Commit 09ad187fe24f
Changed files (8)
home
common
modules
nix-flake-updater
tools
nix-flake-update
home/common/AGENTS.md
@@ -0,0 +1,23 @@
+# Shared Home-Manager Configuration
+
+Code in this directory is shared across **all hosts** and **both nixpkgs channels**
+(unstable and stable/25.11). The same rules as `systems/common/AGENTS.md` apply.
+
+## Rules
+
+1. **Never assume a home-manager option or package exists on all channels.**
+ Use conditionals or host-specific overrides when needed.
+
+2. **Host-specific home-manager overrides** go in `systems/<hostname>/home.nix`,
+ not here.
+
+3. **Test both channels** after modifying shared code:
+ - Unstable: `make host/okinawa/build`
+ - Stable: `make host/rhea/build`
+
+## Directory Structure
+
+- `desktop/` — desktop environment programs (browser, terminal, media, etc.)
+- `dev/` — development tools (editors, languages, AI tools, etc.)
+- `services/` — user services (syncthing, gpg-agent, etc.)
+- `shell/` — shell configuration (zsh, starship, aliases, etc.)
modules/nix-flake-updater/default.nix
@@ -123,6 +123,50 @@ let
default = 3600;
description = "Random delay in seconds before starting (0-value)";
};
+
+ autoFix = {
+ enable = mkEnableOption "AI-powered auto-fix on build failure";
+
+ command = mkOption {
+ type = types.str;
+ default = "pi";
+ description = "Agent command to invoke (must support -p for non-interactive mode)";
+ };
+
+ extraArgs = mkOption {
+ type = types.listOf types.str;
+ default = [
+ "--provider"
+ "google-vertex-claude"
+ "--no-session"
+ "--no-themes"
+ "--no-skills"
+ ];
+ description = "Extra arguments passed to the agent command (note: do not use --no-extensions if the provider is an extension)";
+ };
+
+ maxAttempts = mkOption {
+ type = types.int;
+ default = 3;
+ description = "Maximum agent invocations per failing host before giving up";
+ };
+
+ envFile = mkOption {
+ type = types.nullOr types.path;
+ default = null;
+ description = "Optional file to source before running the agent (for API keys, credentials)";
+ };
+
+ environment = mkOption {
+ type = types.attrsOf types.str;
+ default = { };
+ example = {
+ GOOGLE_CLOUD_PROJECT = "my-project";
+ GOOGLE_CLOUD_LOCATION = "us-east5";
+ };
+ description = "Environment variables to set when running the agent";
+ };
+ };
};
};
@@ -144,6 +188,16 @@ let
${optionalString (
instanceCfg.ntfyTokenFile != null
) ''export NTFY_TOKEN_FILE="${instanceCfg.ntfyTokenFile}"''}
+ export AUTO_FIX="${toString instanceCfg.autoFix.enable}"
+ export AUTO_FIX_COMMAND="${instanceCfg.autoFix.command}"
+ export AUTO_FIX_EXTRA_ARGS="${concatStringsSep " " instanceCfg.autoFix.extraArgs}"
+ export AUTO_FIX_MAX_ATTEMPTS="${toString instanceCfg.autoFix.maxAttempts}"
+ ${optionalString (
+ instanceCfg.autoFix.envFile != null
+ ) ''export AUTO_FIX_ENV_FILE="${instanceCfg.autoFix.envFile}"''}
+ ${concatStringsSep "\n" (
+ mapAttrsToList (k: v: "export ${k}=\"${v}\"") instanceCfg.autoFix.environment
+ )}
# Execute the packaged update script (already has tools in PATH)
exec ${pkgs.nix-flake-update}/bin/nix-flake-update
@@ -176,6 +230,8 @@ let
"/home/${instanceCfg.user}/.cache/nix"
# Org inbox for TODOs
(dirOf instanceCfg.inboxOrg)
+ # Pi agent session/config directory (needed for auto-fix)
+ "/home/${instanceCfg.user}/.pi"
];
NoNewPrivileges = true;
modules/nix-flake-updater/README.md
@@ -1,6 +1,7 @@
# Nix Flake Updater Module
-Automated NixOS module for updating `flake.lock` with build verification and notifications.
+Automated NixOS module for updating `flake.lock` with build verification, notifications,
+and optional AI-powered auto-fix.
## Overview
@@ -8,8 +9,10 @@ This module provides automated, unattended flake.lock updates that:
- Run on a configurable schedule via systemd timers
- Verify builds across multiple systems before committing
+- Optionally use a coding agent (pi) to auto-fix build failures
- Create git branches for review workflow
- Send notifications via ntfy
+- Support multiple named instances (e.g., daily, biweekly)
- Support dry-run mode for testing
## Files
@@ -19,67 +22,126 @@ This module provides automated, unattended flake.lock updates that:
## Usage
-Import the module and configure:
+Import the module and configure instances:
```nix
{
- imports = [
- ../../modules/nix-flake-updater
- ];
+ imports = [ ../../modules/nix-flake-updater ];
services.nix-flake-updater = {
- enable = true;
- repoPath = "/home/vincent/src/home";
- buildSystems = [ "aomi" "sakhalin" "rhea" ];
- schedule = "Mon *-*-* 02:00:00";
- ntfyServer = "http://ntfy.sbr.pm";
- user = "vincent";
+ # Bi-weekly full update with auto-fix
+ biweekly = {
+ enable = true;
+ repoPath = "/home/vincent/src/home";
+ buildSystems = [ "okinawa" "kyushu" "rhea" "athena" ];
+ schedule = "Sun *-*-1..7,15..21 02:00:00";
+ ntfyServer = "https://ntfy.sbr.pm";
+ user = "vincent";
+
+ autoFix = {
+ enable = true;
+ command = "pir";
+ extraArgs = [ "--model" "claude-opus-4-6" "--no-session" "--no-extensions" "--no-themes" ];
+ maxAttempts = 3;
+ };
+ };
+
+ # Daily update for specific inputs with auto-merge
+ daily = {
+ enable = true;
+ repoPath = "/home/vincent/src/home";
+ flakeInputs = [ "chick-group" "chapeau-rouge" ];
+ autoMerge = true;
+ buildSystems = [ "okinawa" "kyushu" ];
+ schedule = "*-*-* 04:00:00";
+ user = "vincent";
+ };
};
}
```
+## Auto-Fix
+
+When `autoFix.enable = true`, build failures trigger a coding agent to attempt fixes:
+
+1. Build error stderr is captured (last 200 lines)
+2. The agent is invoked in non-interactive mode (`-p`) with the error context
+3. The agent reads AGENTS.md files in the repo for channel-awareness rules
+4. If the fix works, it's committed separately from the flake.lock update
+5. A regression check rebuilds all hosts after fixes are applied
+6. Up to `maxAttempts` retries per failing host
+
+### Agent Authentication
+
+The default agent command (`pir`) uses `passage` for API key retrieval. For headless
+systemd execution, ensure the password store is accessible without interactive auth,
+or use `autoFix.envFile` to source credentials:
+
+```nix
+autoFix = {
+ enable = true;
+ envFile = config.age.secrets."vertex-ai-credentials".path;
+};
+```
+
+## Manual Trigger
+
+```bash
+# Run the bi-weekly update manually
+sudo systemctl start nix-flake-updater-biweekly
+
+# View logs
+journalctl -u nix-flake-updater-biweekly -f
+
+# Check timer schedule
+systemctl list-timers 'nix-flake-updater-*'
+```
+
+## Configuration Options
+
+### Core
+- `enable` - Enable this instance
+- `repoPath` - Git repository path
+- `buildSystems` - List of NixOS systems to build for verification
+- `schedule` - Systemd OnCalendar schedule
+- `flakeInputs` - Specific inputs to update (empty = all)
+- `user` - User to run as (needs git push access)
+
+### Git
+- `gitRemote` - Remote to push to (default: `origin`)
+- `mainBranch` - Main branch name (default: `main`)
+- `branchPrefix` - Prefix for update branches
+- `autoMerge` - Auto-merge to main on success (default: `false`)
+
+### Notifications
+- `ntfyServer` / `ntfyTopic` - ntfy notification settings
+- `ntfyTokenFile` - Authentication token file
+- `inboxOrg` - Org-mode inbox for TODO entries on failure
+
+### Auto-Fix
+- `autoFix.enable` - Enable AI-powered auto-fix
+- `autoFix.command` - Agent command (default: `pir`)
+- `autoFix.extraArgs` - Extra agent CLI arguments
+- `autoFix.maxAttempts` - Max retries per host (default: `3`)
+- `autoFix.envFile` - Source file for API credentials
+
+### Other
+- `dryRun` - Don't push to remote
+- `randomizedDelaySec` - Random delay before start
+
+## Architecture
+
+The update script:
+1. Creates an isolated git worktree from main
+2. Updates flake.lock (all or specific inputs)
+3. Builds all specified systems
+4. On failure with auto-fix: invokes coding agent → rebuilds → regression check
+5. Commits flake.lock update + any fixes (separate commits)
+6. Pushes branch (or auto-merges to main)
+7. Sends ntfy notification with results
+8. Cleans up worktree
+
## Documentation
See:
- `/docs/nix-flake-updater-guide.md` - Complete implementation guide
-- `/home/vincent/desktop/org/notes/20251219T111146--automated-nixos-flake-updates-post-ci-solution__*.org` - Design notes
-
-## Architecture
-
-The module creates a systemd timer that:
-1. Pulls latest main branch
-2. Creates update branch
-3. Runs `nix flake update`
-4. Builds specified systems for verification
-5. Commits and pushes if builds succeed
-6. Sends ntfy notification with results
-
-## Configuration Options
-
-- `enable` - Enable the service
-- `repoPath` - Git repository path
-- `buildSystems` - List of systems to build for verification
-- `schedule` - Systemd OnCalendar schedule
-- `ntfyServer` / `ntfyTopic` - Notification settings
-- `gitRemote` - Remote to push to
-- `user` - User to run as (needs git push access)
-- `dryRun` - Test mode (don't push)
-
-## Example Deployment
-
-```bash
-# Build configuration
-make host/aomi/build
-
-# Deploy
-make host/aomi/switch
-
-# Verify timer
-systemctl list-timers nix-flake-updater
-
-# Test manually
-sudo systemctl start nix-flake-updater
-
-# View logs
-journalctl -u nix-flake-updater -f
-```
systems/common/AGENTS.md
@@ -0,0 +1,40 @@
+# Shared NixOS Configuration
+
+Code in this directory is used by **all hosts** across **both nixpkgs channels**
+(unstable and stable/25.11). See `../AGENTS.md` for the full channel map.
+
+## Rules for Modifying Shared Code
+
+1. **Never assume an option exists on all channels.** Options added or renamed
+ in nixpkgs-unstable won't exist on nixpkgs-25.11 (and vice versa for
+ backports). When in doubt, check the option exists before using it.
+
+2. **Use conditional patterns** when a change is channel-dependent:
+ ```nix
+ # Check if an option exists before using it
+ lib.optionalAttrs (builtins.hasAttr "newOption" options.services.foo) {
+ services.foo.newOption = true;
+ }
+
+ # Version-gated setting
+ lib.mkIf (lib.versionAtLeast config.system.nixos.release "25.11") {
+ services.foo.bar = "new-value";
+ }
+ ```
+
+3. **Prefer host-specific overrides** over conditionals when the change only
+ affects one or two hosts. Edit `systems/<hostname>/extra.nix` instead of
+ modifying shared modules.
+
+4. **Test both channels** after modifying shared code:
+ - Unstable: `make host/okinawa/build`
+ - Stable: `make host/rhea/build`
+
+## Directory Structure
+
+- `base/` — core system settings (nix, boot, locale, networking, security)
+- `desktop/` — desktop environment modules (sway, niri, waybar, etc.)
+- `hardware/` — hardware support (audio, bluetooth, GPU, etc.)
+- `programs/` — system-wide program configurations
+- `services/` — system service configurations
+- `users/` — user account definitions
systems/okinawa/extra.nix
@@ -156,8 +156,9 @@
# Automated flake.lock updates with build verification
services.nix-flake-updater = {
- # Weekly updates for all inputs
- weekly = {
+ # Bi-weekly updates for all inputs with AI-powered auto-fix
+ # Manual trigger: sudo systemctl start nix-flake-updater-biweekly
+ biweekly = {
enable = true;
repoPath = "/home/vincent/src/home";
@@ -177,8 +178,8 @@
"aix" # Raspberry Pi 4
];
- # Run weekly on Sunday at 2 AM
- schedule = "Sun *-*-* 02:00:00";
+ # Run bi-weekly: 1st and 3rd Sunday of each month at 2 AM
+ schedule = "Sun *-*-1..7,15..21 02:00:00";
# Notifications via ntfy
ntfyServer = "https://ntfy.sbr.pm";
@@ -194,6 +195,24 @@
# Add randomized delay to avoid conflicts
randomizedDelaySec = 1800; # 0-30 min delay
+
+ # AI-powered auto-fix on build failure
+ autoFix = {
+ enable = true;
+ command = "pi";
+ extraArgs = [
+ "--provider"
+ "google-vertex-claude"
+ "--no-session"
+ "--no-themes"
+ "--no-skills"
+ ];
+ maxAttempts = 3;
+ environment = {
+ GOOGLE_CLOUD_PROJECT = "itpc-gcp-pnd-pe-eng-claude";
+ GOOGLE_CLOUD_LOCATION = "us-east5";
+ };
+ };
};
# Daily automated updates for chick-group and chapeau-rouge with auto-merge
systems/AGENTS.md
@@ -0,0 +1,38 @@
+# Systems Architecture
+
+NixOS system configurations organized by hostname with shared modules in `common/`.
+
+## Channel Groups
+
+Hosts use different nixpkgs channels. Changes to shared code (`common/`)
+**must work across both channels**.
+
+### Unstable (`nixpkgs` — nixos-unstable)
+- **okinawa** — x86_64-linux, laptop / LLM build server (ASUS G14)
+- **kyushu** — x86_64-linux, work laptop
+- **aomi** — x86_64-linux, server
+- **sakhalin** — x86_64-linux, home server
+
+### Stable (`nixpkgs-25_11` — nixos-25.11)
+- **rhea** — aarch64-linux, media server (main)
+- **aion** — aarch64-linux, XMPP / podcast server
+- **athena** — aarch64-linux, Raspberry Pi 4
+- **demeter** — aarch64-linux, Raspberry Pi 4
+- **aix** — aarch64-linux, Raspberry Pi 4
+- **kerkouane** — x86_64-linux, VPS server
+- **carthage** — x86_64-linux, VPS server
+
+## Host File Layout
+
+Each host has a directory in `/systems/<hostname>/` containing:
+- `boot.nix` — bootloader, initrd, kernel modules
+- `hardware.nix` — hardware-specific settings, filesystem mounts
+- `extra.nix` (optional) — additional host-specific NixOS configuration
+- `home.nix` (optional) — host-specific home-manager configuration
+
+## Build Verification
+
+- Test current host: `make build`
+- Test a specific host: `make host/<hostname>/build`
+- Dry-build (eval only): `make host/<hostname>/dry-build`
+- Direct nix: `nix build .#nixosConfigurations.<hostname>.config.system.build.toplevel`
tools/nix-flake-update/default.nix
@@ -7,11 +7,12 @@
jq,
curl,
openssh,
+ pi-coding-agent,
}:
stdenv.mkDerivation {
pname = "nix-flake-update";
- version = "0.1.0";
+ version = "0.2.0";
src = ./.;
@@ -32,6 +33,7 @@ stdenv.mkDerivation {
jq
curl
openssh
+ pi-coding-agent
]
}
tools/nix-flake-update/nix-flake-update.sh
@@ -1,8 +1,9 @@
#!/usr/bin/env bash
set -euo pipefail
-# Automated NixOS flake.lock updater
-# This script updates flake.lock, builds verification systems, and pushes to remote
+# Automated NixOS flake.lock updater with optional AI-powered auto-fix
+# This script updates flake.lock, builds verification systems, optionally
+# uses a coding agent to fix build failures, and pushes to remote.
# Configuration from environment or defaults
REPO_PATH="${REPO_PATH:-/home/vincent/src/home}"
@@ -19,6 +20,13 @@ FLAKE_INPUTS="${FLAKE_INPUTS:-}" # Space-separated list of inputs to update (em
AUTO_MERGE="${AUTO_MERGE:-false}" # If true, merge to main on success
INBOX_ORG="${INBOX_ORG:-$HOME/desktop/org/inbox.org}" # Path to org-mode inbox
+# Auto-fix configuration
+AUTO_FIX="${AUTO_FIX:-false}"
+AUTO_FIX_COMMAND="${AUTO_FIX_COMMAND:-pir}"
+AUTO_FIX_EXTRA_ARGS="${AUTO_FIX_EXTRA_ARGS:---model claude-opus-4-6 --no-session --no-extensions --no-themes}"
+AUTO_FIX_MAX_ATTEMPTS="${AUTO_FIX_MAX_ATTEMPTS:-3}"
+AUTO_FIX_ENV_FILE="${AUTO_FIX_ENV_FILE:-}"
+
LOG_FILE="/var/log/nix-flake-updater/$(date +%Y%m%d-%H%M%S).log"
mkdir -p "$(dirname "$LOG_FILE")"
@@ -26,6 +34,11 @@ mkdir -p "$(dirname "$LOG_FILE")"
WORKTREE_DIR="$HOME/tmp/nix-flake-updater-$(date +%Y%m%d-%H%M%S)"
mkdir -p "$HOME/tmp"
+# Track auto-fix state
+FIXES_APPLIED=0
+FIXED_HOSTS=()
+UNFIXED_HOSTS=()
+
log() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" | tee -a "$LOG_FILE"
}
@@ -37,7 +50,6 @@ notify() {
local tags="$4"
if [ -n "$NTFY_TOKEN_FILE" ] && [ -f "$NTFY_TOKEN_FILE" ]; then
- # Use authentication token
curl -s \
-H "Authorization: Bearer $(tr -d '\n' < "$NTFY_TOKEN_FILE")" \
-H "Title: $title" \
@@ -46,7 +58,6 @@ notify() {
-d "$message" \
"$NTFY_SERVER/$NTFY_TOPIC" || true
else
- # No authentication
curl -s \
-H "Title: $title" \
-H "Priority: $priority" \
@@ -63,7 +74,6 @@ add_todo_to_inbox() {
if [ -f "$INBOX_ORG" ]; then
log "Adding TODO to $INBOX_ORG"
- # Capture last 30 lines of the log file for quick diagnosis
local log_tail=""
if [ -f "$LOG_FILE" ]; then
log_tail=$(tail -30 "$LOG_FILE")
@@ -109,18 +119,17 @@ cleanup() {
if [ $exit_code -ne 0 ]; then
log "ERROR: Update process failed with exit code $exit_code"
-
- # Add TODO to inbox on failure
+
local input_desc="all inputs"
if [ -n "$FLAKE_INPUTS" ]; then
input_desc="inputs: $FLAKE_INPUTS"
fi
-
+
add_todo_to_inbox "Fix flake update failure" \
"Flake update failed for $input_desc.
Build systems: $BUILD_SYSTEMS
Auto-merge: $AUTO_MERGE"
-
+
notify "high" "❌ Flake Update Failed" \
"Build failed for $input_desc. TODO added to inbox. See logs: $LOG_FILE" \
"warning,flake"
@@ -129,6 +138,223 @@ Auto-merge: $AUTO_MERGE"
trap cleanup EXIT
+# Build a single host, capturing stderr. Returns 0 on success, 1 on failure.
+# Sets BUILD_ERROR with captured stderr on failure.
+build_host() {
+ local system="$1"
+ local error_file
+ error_file=$(mktemp)
+
+ log "Building system: $system"
+ if nix build ".#nixosConfigurations.$system.config.system.build.toplevel" \
+ --no-link \
+ --print-build-logs 2> >(tee "$error_file" | tee -a "$LOG_FILE" >&2); then
+ log "✓ $system built successfully"
+ rm -f "$error_file"
+ return 0
+ else
+ log "✗ $system build failed"
+ BUILD_ERROR=$(tail -200 "$error_file")
+ rm -f "$error_file"
+ return 1
+ fi
+}
+
+# Build all systems, returning list of failures.
+# Sets FAILED_SYSTEMS (space-separated) and PASSED_SYSTEMS.
+build_all_systems() {
+ FAILED_SYSTEMS=""
+ PASSED_SYSTEMS=""
+
+ for system in $BUILD_SYSTEMS; do
+ if build_host "$system"; then
+ PASSED_SYSTEMS="$PASSED_SYSTEMS $system"
+ else
+ FAILED_SYSTEMS="$FAILED_SYSTEMS $system"
+ fi
+ done
+
+ # Trim leading spaces
+ FAILED_SYSTEMS="${FAILED_SYSTEMS# }"
+ PASSED_SYSTEMS="${PASSED_SYSTEMS# }"
+}
+
+# Try to auto-fix a build failure using a coding agent.
+# Arguments: $1 = system name, $2 = build error text
+# Returns 0 if the agent ran (check rebuild to see if it actually fixed it).
+try_auto_fix() {
+ local system="$1"
+ local error_text="$2"
+
+ if [ "$AUTO_FIX" != "true" ] && [ "$AUTO_FIX" != "1" ]; then
+ return 1
+ fi
+
+ # Source env file if provided (for API keys, credentials)
+ if [ -n "$AUTO_FIX_ENV_FILE" ] && [ -f "$AUTO_FIX_ENV_FILE" ]; then
+ log "Sourcing auto-fix environment: $AUTO_FIX_ENV_FILE"
+ # shellcheck disable=SC1090
+ source "$AUTO_FIX_ENV_FILE"
+ fi
+
+ log "Running auto-fix agent for $system (command: $AUTO_FIX_COMMAND)"
+
+ # Build the prompt
+ local prompt
+ prompt="The NixOS build for host '${system}' failed after a flake.lock update.
+
+Working directory: $(pwd)
+
+Build command that failed:
+ nix build .#nixosConfigurations.${system}.config.system.build.toplevel
+
+Build error output (last 200 lines):
+\`\`\`
+${error_text}
+\`\`\`
+
+Fix the Nix configuration files to resolve this build error.
+
+Rules:
+- Do NOT modify flake.lock or flake.nix
+- Only edit .nix configuration files
+- Read the AGENTS.md files in the relevant directories for channel-awareness rules
+- If the fix is in a shared module (systems/common/ or home/common/), ensure it works across both nixpkgs channels
+- Prefer host-specific overrides (systems/${system}/extra.nix) over modifying shared code when possible
+- After making changes, verify with: nix build .#nixosConfigurations.${system}.config.system.build.toplevel --no-link"
+
+ # Run the agent
+ # shellcheck disable=SC2086
+ if $AUTO_FIX_COMMAND -p $AUTO_FIX_EXTRA_ARGS "$prompt" 2>&1 | tee -a "$LOG_FILE"; then
+ log "Auto-fix agent completed for $system"
+ return 0
+ else
+ log "Auto-fix agent failed/crashed for $system"
+ return 1
+ fi
+}
+
+# Run the auto-fix loop for all failed systems.
+# Attempts to fix each failing host, with retries and regression checking.
+auto_fix_loop() {
+ local remaining_failures="$1"
+ local all_systems_list="$BUILD_SYSTEMS"
+ local round=0
+
+ while [ -n "$remaining_failures" ] && [ $round -lt "$AUTO_FIX_MAX_ATTEMPTS" ]; do
+ round=$((round + 1))
+ log "=== Auto-fix round $round/$AUTO_FIX_MAX_ATTEMPTS ==="
+
+ local still_failing=""
+
+ for system in $remaining_failures; do
+ # Try to build first (a previous fix might have resolved this too)
+ if build_host "$system"; then
+ log "✓ $system now builds (fixed by previous change)"
+ FIXED_HOSTS+=("$system")
+ continue
+ fi
+
+ # Run the agent
+ if try_auto_fix "$system" "$BUILD_ERROR"; then
+ # Check if the fix worked
+ if build_host "$system"; then
+ log "✓ $system fixed by auto-fix agent"
+ FIXES_APPLIED=$((FIXES_APPLIED + 1))
+ FIXED_HOSTS+=("$system")
+ else
+ log "✗ $system still failing after auto-fix attempt"
+ still_failing="$still_failing $system"
+ fi
+ else
+ log "✗ Auto-fix agent failed to run for $system"
+ still_failing="$still_failing $system"
+ fi
+ done
+
+ remaining_failures="${still_failing# }"
+
+ if [ -z "$remaining_failures" ]; then
+ log "All failures resolved after $round round(s)"
+ break
+ fi
+ done
+
+ # Record unfixed hosts
+ for system in $remaining_failures; do
+ UNFIXED_HOSTS+=("$system")
+ done
+
+ # Regression check: rebuild all previously-passing systems
+ if [ $FIXES_APPLIED -gt 0 ]; then
+ log "=== Regression check: rebuilding all systems ==="
+ local regression_failures=""
+
+ for system in $all_systems_list; do
+ if ! build_host "$system"; then
+ # Check if this was already a known failure
+ local was_fixed=false
+ for fixed in "${FIXED_HOSTS[@]:-}"; do
+ if [ "$system" = "$fixed" ]; then
+ was_fixed=true
+ break
+ fi
+ done
+
+ if [ "$was_fixed" = true ]; then
+ log "⚠ Regression: $system was fixed but now fails again"
+ regression_failures="$regression_failures $system"
+ else
+ # Check if this is a new regression (was passing before)
+ local is_new_regression=true
+ for unfixed in "${UNFIXED_HOSTS[@]:-}"; do
+ if [ "$system" = "$unfixed" ]; then
+ is_new_regression=false
+ break
+ fi
+ done
+
+ if [ "$is_new_regression" = true ]; then
+ log "⚠ Regression: $system was passing but now fails after auto-fix changes"
+ regression_failures="$regression_failures $system"
+ fi
+ fi
+ fi
+ done
+
+ regression_failures="${regression_failures# }"
+
+ if [ -n "$regression_failures" ]; then
+ log "Regressions detected: $regression_failures"
+ log "Attempting to fix regressions..."
+
+ # One more attempt with regression context
+ for system in $regression_failures; do
+ if ! build_host "$system"; then
+ local regression_prompt_extra="IMPORTANT: This is a REGRESSION. Host '$system' was building successfully before auto-fix changes were made to fix other hosts. Your previous fixes broke this host. Fix it WITHOUT breaking the other hosts."
+
+ if try_auto_fix "$system" "$BUILD_ERROR
+$regression_prompt_extra"; then
+ if build_host "$system"; then
+ log "✓ Regression fixed for $system"
+ FIXES_APPLIED=$((FIXES_APPLIED + 1))
+ else
+ log "✗ Could not fix regression for $system"
+ UNFIXED_HOSTS+=("$system")
+ fi
+ else
+ UNFIXED_HOSTS+=("$system")
+ fi
+ fi
+ done
+ fi
+ fi
+}
+
+# =============================================================================
+# Main script
+# =============================================================================
+
log "Starting flake update process"
cd "$REPO_PATH"
@@ -158,7 +384,7 @@ git checkout -b "$BRANCH_NAME"
# Save old flake.lock for before/after comparison
OLD_FLAKE_LOCK=$(cat flake.lock)
-# Update flake.lock (work in worktree, flake is at root)
+# Update flake.lock
log "Updating flake.lock"
if [ -n "$FLAKE_INPUTS" ]; then
log "Updating specific inputs: $FLAKE_INPUTS"
@@ -179,32 +405,118 @@ if ! git diff --quiet flake.lock; then
log "Flake input changes:"
git diff flake.lock | grep -E '^\+.*"(narHash|rev)"' | head -20 | tee -a "$LOG_FILE"
- # Build test systems (build from worktree)
- BUILD_SUCCESS=true
- for system in $BUILD_SYSTEMS; do
- log "Building system: $system"
- if nix build ".#nixosConfigurations.$system.config.system.build.toplevel" \
- --no-link \
- --print-build-logs 2>&1 | tee -a "$LOG_FILE"; then
- log "✓ $system built successfully"
- else
- log "✗ $system build failed"
- BUILD_SUCCESS=false
- break
- fi
- done
+ # Build all systems
+ build_all_systems
- if [ "$BUILD_SUCCESS" = true ]; then
- # Commit changes (we're already in WORKTREE_DIR)
+ if [ -n "$FAILED_SYSTEMS" ]; then
+ log "Build failures detected: $FAILED_SYSTEMS"
+
+ if [ "$AUTO_FIX" = "true" ] || [ "$AUTO_FIX" = "1" ]; then
+ # Commit flake.lock first so the agent works on a clean tree
+ git add flake.lock
+
+ input_desc="all inputs"
+ if [ -n "$FLAKE_INPUTS" ]; then
+ input_desc="$FLAKE_INPUTS"
+ fi
+
+ CHANGES=$(jq -n --argjson old "$OLD_FLAKE_LOCK" --argjson new "$(cat flake.lock)" -r '
+ def rev_map:
+ .nodes | to_entries
+ | map(select(.key != "root" and .value.locked != null))
+ | map({(.key): (.value.locked.rev // .value.locked.narHash // "unknown")})
+ | add // {};
+ ($old | rev_map) as $o |
+ ($new | rev_map) as $n |
+ [$n | to_entries[] | select($o[.key] != null and $o[.key] != .value)] |
+ group_by({old: $o[.key], new: .value}) |
+ map({
+ names: (map(.key) | join(", ")),
+ old: $o[.[0].key][0:12],
+ new: .[0].value[0:12]
+ }) |
+ map("- \(.names): \(.old) → \(.new)") |
+ join("\n")
+ ' 2>/dev/null || echo "Updated flake inputs")
+
+ git -c user.signingkey=/home/vincent/.ssh/id_ed25519 commit -m "chore(flake): update $input_desc
+
+$CHANGES"
+
+ # Run auto-fix loop
+ auto_fix_loop "$FAILED_SYSTEMS"
+
+ # Commit any fixes the agent made
+ if [ $FIXES_APPLIED -gt 0 ]; then
+ git add -A
+ if ! git diff --cached --quiet; then
+ local_fixed_list="${FIXED_HOSTS[*]:-}"
+ git -c user.signingkey=/home/vincent/.ssh/id_ed25519 commit -m "fix(nix): auto-fix build errors
+
+Fixed hosts: ${local_fixed_list}
+Agent: ${AUTO_FIX_COMMAND}
+Attempts used: ${FIXES_APPLIED}"
+ fi
+ fi
+
+ # Determine overall result
+ if [ ${#UNFIXED_HOSTS[@]} -gt 0 ]; then
+ log "Auto-fix partially succeeded. Unfixed: ${UNFIXED_HOSTS[*]}"
+
+ add_todo_to_inbox "Flake update: ${#UNFIXED_HOSTS[@]} hosts still failing" \
+ "Auto-fix resolved ${#FIXED_HOSTS[@]} host(s) but could not fix: ${UNFIXED_HOSTS[*]}
+Build systems: $BUILD_SYSTEMS
+Agent: $AUTO_FIX_COMMAND"
+
+ # Still push the branch with partial fixes
+ if [ "$DRY_RUN" != "false" ] && [ "$DRY_RUN" != "" ] && [ "$DRY_RUN" != "0" ]; then
+ log "DRY RUN: Would push partial-fix branch"
+ else
+ git push "$GIT_REMOTE" "$BRANCH_NAME"
+ fi
+
+ notify "high" "⚠️ Flake Updated (${#UNFIXED_HOSTS[@]} hosts still failing)" \
+ "Auto-fixed: ${FIXED_HOSTS[*]:-none}. Still failing: ${UNFIXED_HOSTS[*]}. Branch: $BRANCH_NAME" \
+ "warning,flake,robot"
+
+ exit 1
+ else
+ log "All failures resolved by auto-fix"
+ # Fall through to normal push/merge logic below
+ fi
+ else
+ # No auto-fix — original behavior
+ log "Build failed, not committing changes"
+
+ input_desc="all inputs"
+ if [ -n "$FLAKE_INPUTS" ]; then
+ input_desc="$FLAKE_INPUTS"
+ fi
+
+ add_todo_to_inbox "Flake update build failure" \
+ "Build failed after updating $input_desc.
+Build systems tested: $BUILD_SYSTEMS
+Auto-merge: $AUTO_MERGE"
+
+ notify "high" "❌ Flake Update Build Failed" \
+ "Builds failed for updated $input_desc. TODO added to inbox. Check logs: $LOG_FILE" \
+ "x,flake,warning"
+
+ exit 1
+ fi
+ fi
+
+ # If we get here, all builds passed (either initially or after auto-fix)
+
+ # Commit flake.lock if not already committed (no auto-fix path)
+ if ! git diff --quiet flake.lock || ! git diff --cached --quiet flake.lock; then
git add flake.lock
- # Generate commit message with changed inputs
input_desc="all inputs"
if [ -n "$FLAKE_INPUTS" ]; then
input_desc="$FLAKE_INPUTS"
fi
-
- # Generate before/after changelog, deduplicated by rev transition
+
CHANGES=$(jq -n --argjson old "$OLD_FLAKE_LOCK" --argjson new "$(cat flake.lock)" -r '
def rev_map:
.nodes | to_entries
@@ -231,77 +543,58 @@ $CHANGES
Built systems: $BUILD_SYSTEMS"
git -c user.signingkey=/home/vincent/.ssh/id_ed25519 commit -m "$COMMIT_MSG"
+ fi
- if [ "$DRY_RUN" != "false" ] && [ "$DRY_RUN" != "" ] && [ "$DRY_RUN" != "0" ]; then
- log "DRY RUN: Would push to $GIT_REMOTE/$BRANCH_NAME"
- notify "low" "🧪 Flake Update (Dry Run)" \
- "Branch $BRANCH_NAME created locally. All builds passed: $BUILD_SYSTEMS" \
- "test_tube,flake"
- elif [ "$AUTO_MERGE" = "true" ] || [ "$AUTO_MERGE" = "1" ]; then
- # Auto-merge: rebase onto main and push directly
- log "Auto-merge enabled: rebasing onto $GIT_REMOTE/$MAIN_BRANCH"
-
- # Fetch latest main
- git fetch "$GIT_REMOTE" "$MAIN_BRANCH"
-
- # Rebase our commit onto main
- if git rebase "$GIT_REMOTE/$MAIN_BRANCH"; then
- log "Rebase successful, pushing to $GIT_REMOTE/$MAIN_BRANCH"
-
- # Push directly to main
- git push "$GIT_REMOTE" "HEAD:$MAIN_BRANCH"
-
- # Notify success
- notify "default" "✅ Flake Auto-Updated & Merged" \
- "Updates for $input_desc merged to $MAIN_BRANCH. All builds passed: $BUILD_SYSTEMS" \
- "white_check_mark,flake,merged"
-
- log "SUCCESS: Flake updated and merged to $MAIN_BRANCH"
- else
- log "ERROR: Rebase failed, main branch may have moved"
- git rebase --abort || true
-
- add_todo_to_inbox "Flake update rebase conflict" \
- "Auto-merge failed due to rebase conflict.
+ # Determine notification details
+ fix_note=""
+ if [ $FIXES_APPLIED -gt 0 ]; then
+ fix_note=" ($FIXES_APPLIED auto-fix(es) applied: ${FIXED_HOSTS[*]})"
+ fi
+
+ if [ "$DRY_RUN" != "false" ] && [ "$DRY_RUN" != "" ] && [ "$DRY_RUN" != "0" ]; then
+ log "DRY RUN: Would push to $GIT_REMOTE/$BRANCH_NAME"
+ notify "low" "🧪 Flake Update (Dry Run)" \
+ "Branch $BRANCH_NAME created locally. All builds passed: $BUILD_SYSTEMS${fix_note}" \
+ "test_tube,flake"
+ elif [ "$AUTO_MERGE" = "true" ] || [ "$AUTO_MERGE" = "1" ]; then
+ # Auto-merge: rebase onto main and push directly
+ log "Auto-merge enabled: rebasing onto $GIT_REMOTE/$MAIN_BRANCH"
+
+ git fetch "$GIT_REMOTE" "$MAIN_BRANCH"
+
+ if git rebase "$GIT_REMOTE/$MAIN_BRANCH"; then
+ log "Rebase successful, pushing to $GIT_REMOTE/$MAIN_BRANCH"
+ git push "$GIT_REMOTE" "HEAD:$MAIN_BRANCH"
+
+ notify "default" "✅ Flake Auto-Updated & Merged" \
+ "Updates merged to $MAIN_BRANCH. All builds passed: $BUILD_SYSTEMS${fix_note}" \
+ "white_check_mark,flake,merged"
+
+ log "SUCCESS: Flake updated and merged to $MAIN_BRANCH"
+ else
+ log "ERROR: Rebase failed, main branch may have moved"
+ git rebase --abort || true
+
+ add_todo_to_inbox "Flake update rebase conflict" \
+ "Auto-merge failed due to rebase conflict.
Inputs: $input_desc
Branch: $BRANCH_NAME (in worktree, needs manual rebase)"
-
- notify "high" "⚠️ Flake Update Rebase Failed" \
- "Could not rebase $input_desc onto $MAIN_BRANCH. TODO added to inbox." \
- "warning,flake,conflict"
- exit 1
- fi
- else
- # Branch mode: push to feature branch
- log "Pushing to $GIT_REMOTE/$BRANCH_NAME"
- git push "$GIT_REMOTE" "$BRANCH_NAME"
- # Notify success
- notify "default" "✅ Flake Updated Successfully" \
- "Branch $BRANCH_NAME created and pushed. All builds passed: $BUILD_SYSTEMS" \
- "white_check_mark,flake"
-
- log "SUCCESS: Flake updated and pushed to $BRANCH_NAME"
+ notify "high" "⚠️ Flake Update Rebase Failed" \
+ "Could not rebase onto $MAIN_BRANCH. TODO added to inbox." \
+ "warning,flake,conflict"
+ exit 1
fi
-
else
- log "Build failed, not committing changes"
-
- input_desc="all inputs"
- if [ -n "$FLAKE_INPUTS" ]; then
- input_desc="$FLAKE_INPUTS"
- fi
-
- add_todo_to_inbox "Flake update build failure" \
- "Build failed after updating $input_desc.
-Build systems tested: $BUILD_SYSTEMS
-Auto-merge: $AUTO_MERGE"
-
- notify "high" "❌ Flake Update Build Failed" \
- "Builds failed for updated $input_desc. TODO added to inbox. Check logs: $LOG_FILE" \
- "x,flake,warning"
+ # Branch mode: push to feature branch
+ log "Pushing to $GIT_REMOTE/$BRANCH_NAME"
+ git push "$GIT_REMOTE" "$BRANCH_NAME"
- exit 1
+ notify "default" "✅ Flake Updated Successfully" \
+ "Branch $BRANCH_NAME pushed. All builds passed: $BUILD_SYSTEMS${fix_note}" \
+ "white_check_mark,flake"
+
+ log "SUCCESS: Flake updated and pushed to $BRANCH_NAME"
fi
else