Commit 281e3e9abc97

Vincent Demeester <vincent@sbr.pm>
2026-01-07 16:30:30
feat(kerkouane): Block AI scrapers with robots.txt and enforcement
- Protect personal content from unauthorized AI model training - Implement hybrid approach: robots.txt compliance + HTTP 403 blocking - Apply to all public services (static sites, media, git repos) Signed-off-by: Vincent Demeester <vincent@sbr.pm>
1 parent c95c7ec
Changed files (1)
systems
kerkouane
systems/kerkouane/extra.nix
@@ -47,6 +47,56 @@ let
       -Server
     }
   '';
+
+  # Robots.txt snippet - polite request to AI scrapers
+  robotsTxtSnippet = ''
+        @robots path /robots.txt
+        handle @robots {
+          respond 200 {
+            body `User-agent: CCBot
+    Disallow: /
+
+    User-agent: ChatGPT-User
+    Disallow: /
+
+    User-agent: GPTBot
+    Disallow: /
+
+    User-agent: Google-Extended
+    Disallow: /
+
+    User-agent: anthropic-ai
+    Disallow: /
+
+    User-agent: Omgilibot
+    Disallow: /
+
+    User-agent: Omgili
+    Disallow: /
+
+    User-agent: FacebookBot
+    Disallow: /`
+            close
+          }
+        }
+  '';
+
+  # AI bot blocking snippet - enforcement via HTTP 403
+  blockAIBotsSnippet = ''
+    @aibots {
+      header User-Agent *CCBot*
+      header User-Agent *ChatGPT-User*
+      header User-Agent *GPTBot*
+      header User-Agent *Google-Extended*
+      header User-Agent *anthropic-ai*
+      header User-Agent *Omgilibot*
+      header User-Agent *Omgili*
+      header User-Agent *FacebookBot*
+    }
+    handle @aibots {
+      respond "AI scraping not permitted" 403
+    }
+  '';
 in
 {
   imports = [
@@ -313,6 +363,9 @@ in
     virtualHosts = {
       # File server with directory browsing (replaces fancyindex)
       "dl.sbr.pm".extraConfig = ''
+        ${blockAIBotsSnippet}
+        ${robotsTxtSnippet}
+
         root * /var/www/dl.sbr.pm
         file_server browse {
           hide .fancyindex README.md HEADER.md
@@ -342,12 +395,18 @@ in
 
       # Static sites
       "paste.sbr.pm".extraConfig = ''
+        ${blockAIBotsSnippet}
+        ${robotsTxtSnippet}
+
         root * /var/www/paste.sbr.pm
         file_server
         ${securityHeaders}
       '';
 
       "sbr.pm".extraConfig = ''
+        ${blockAIBotsSnippet}
+        ${robotsTxtSnippet}
+
         root * /var/www/sbr.pm
         file_server
         ${securityHeaders}
@@ -368,6 +427,9 @@ in
 
       # Immich photo management (proxied to rhea)
       "immich.sbr.pm".extraConfig = ''
+        ${blockAIBotsSnippet}
+        ${robotsTxtSnippet}
+
         # Allow large photo/video uploads (50GB limit)
         request_body {
           max_size 50GB
@@ -428,6 +490,9 @@ in
 
       # Navidrome music streaming (proxied to aion)
       "navidrome.sbr.pm".extraConfig = ''
+        ${blockAIBotsSnippet}
+        ${robotsTxtSnippet}
+
         # Rate limiting for music streaming
         rate_limit {
           zone navidrome_general {
@@ -447,6 +512,9 @@ in
 
       # Jellyfin media server (proxied to rhea)
       "jellyfin.sbr.pm".extraConfig = ''
+        ${blockAIBotsSnippet}
+        ${robotsTxtSnippet}
+
         # Rate limiting for media server
         rate_limit {
           zone jellyfin_general {
@@ -466,6 +534,9 @@ in
 
       # Audiobookshelf audiobook server (proxied to aion)
       "audiobookshelf.sbr.pm".extraConfig = ''
+        ${blockAIBotsSnippet}
+        ${robotsTxtSnippet}
+
         # Rate limiting for audiobook streaming
         rate_limit {
           zone audiobookshelf_general {
@@ -485,6 +556,9 @@ in
 
       # Service aliases (user-friendly URLs - transparent proxy)
       "music.sbr.pm".extraConfig = ''
+        ${blockAIBotsSnippet}
+        ${robotsTxtSnippet}
+
         # Rate limiting for music streaming
         rate_limit {
           zone music_general {
@@ -503,6 +577,9 @@ in
       '';
 
       "photos.sbr.pm".extraConfig = ''
+        ${blockAIBotsSnippet}
+        ${robotsTxtSnippet}
+
         # Allow large photo/video uploads (50GB limit)
         request_body {
           max_size 50GB
@@ -562,6 +639,9 @@ in
       '';
 
       "podcasts.sbr.pm".extraConfig = ''
+        ${blockAIBotsSnippet}
+        ${robotsTxtSnippet}
+
         # Rate limiting for audiobook streaming
         rate_limit {
           zone podcasts_general {
@@ -588,6 +668,9 @@ in
 
       # Personal website with directory browsing
       "vincent.demeester.fr".extraConfig = ''
+        ${blockAIBotsSnippet}
+        ${robotsTxtSnippet}
+
         root * /var/www/vincent.demeester.fr
 
         # Try files with .html extension
@@ -602,6 +685,9 @@ in
 
       # Self-hosted git repositories (public only)
       "git.sbr.pm".extraConfig = ''
+        ${blockAIBotsSnippet}
+        ${robotsTxtSnippet}
+
         root * /home/vincent/git/public
         file_server browse {
           hide .fancyindex README.md HEADER.md