Bladeren bron

feat: enable loading config fragments (#321)

* feat(config): support importing bot policy snippets

This changes the grammar of the Anubis bot policy config to allow
importing from internal shared rules or external rules on the
filesystem.

This lets you create a file at `/data/policies/block-evilbot.yaml` and
then import it with:

```yaml
bots:
- import: /data/policies/block-evilbot.yaml
```

This also explodes the default policy file into a bunch of composable
snippets.

Thank you @Aibrew for your example gitea Atom / RSS feed rules!

Signed-off-by: Xe Iaso <me@xeiaso.net>

* fix(data): update botPolicies.json to use imports

Signed-off-by: Xe Iaso <me@xeiaso.net>

* fix(cmd/anubis): extract bot policies with --extract-resources

This allows a user that doesn't have anything but the Anubis binary to
figure out what the default configuration does.

* docs(data/botPolices.yaml): document import syntax in-line

Signed-off-by: Xe Iaso <me@xeiaso.net>

* fix(lib/policy): better test importing from JSON snippets

Signed-off-by: Xe Iaso <me@xeiaso.net>

* docs(admin): Add import syntax documentation

This documents the import syntax and is based on the block comment at
the top of the default bot policy file.

* docs(changelog): add note about importing snippets

Signed-off-by: Xe Iaso <me@xeiaso.net>

* style(lib/policy/config): use an error value instead of an inline error

Signed-off-by: Xe Iaso <me@xeiaso.net>

---------

Signed-off-by: Xe Iaso <me@xeiaso.net>
Xe Iaso 2 maanden geleden
bovenliggende
commit
74e11505c6
37 gewijzigde bestanden met toevoegingen van 1210 en 1305 verwijderingen
  1. 5 1
      cmd/anubis/main.go
  2. 7 0
      data/apps/gitea-rss-feeds.yaml
  3. 14 645
      data/botPolicies.json
  4. 30 642
      data/botPolicies.yaml
  5. 4 0
      data/bots/ai-robots-txt.yaml
  6. 4 0
      data/bots/cloudflare-workers.yaml
  7. 9 0
      data/bots/headless-browsers.yaml
  8. 3 0
      data/bots/us-ai-scraper.yaml
  9. 15 0
      data/common/allow-private-addresses.yaml
  10. 10 0
      data/common/keep-internet-working.yaml
  11. 34 0
      data/crawlers/bingbot.yaml
  12. 275 0
      data/crawlers/duckduckbot.yaml
  13. 263 0
      data/crawlers/googlebot.yaml
  14. 8 0
      data/crawlers/internet-archive.yaml
  15. 10 0
      data/crawlers/kagibot.yaml
  16. 11 0
      data/crawlers/marginalia.yaml
  17. 5 0
      data/crawlers/mojeekbot.yaml
  18. 5 0
      data/crawlers/qwantbot.yaml
  19. 1 1
      data/embed.go
  20. 1 0
      docs/docs/CHANGELOG.md
  21. 147 0
      docs/docs/admin/configuration/import.mdx
  22. 1 0
      docs/docs/admin/policies.mdx
  23. 168 2
      lib/policy/config/config.go
  24. 109 5
      lib/policy/config/config_test.go
  25. 10 0
      lib/policy/config/testdata/bad/import_and_bot.json
  26. 6 0
      lib/policy/config/testdata/bad/import_and_bot.yaml
  27. 7 0
      lib/policy/config/testdata/bad/import_invalid_file.json
  28. 2 0
      lib/policy/config/testdata/bad/import_invalid_file.yaml
  29. 7 0
      lib/policy/config/testdata/good/import_filesystem.json
  30. 2 0
      lib/policy/config/testdata/good/import_filesystem.yaml
  31. 7 0
      lib/policy/config/testdata/good/import_keep_internet_working.json
  32. 2 0
      lib/policy/config/testdata/good/import_keep_internet_working.yaml
  33. 9 0
      lib/policy/config/testdata/hack-test.json
  34. 3 0
      lib/policy/config/testdata/hack-test.yaml
  35. 4 9
      lib/policy/policy.go
  36. 9 0
      lib/policy/testdata/hack-test.json
  37. 3 0
      lib/policy/testdata/hack-test.yaml

+ 5 - 1
cmd/anubis/main.go

@@ -27,6 +27,7 @@ import (
 	"time"
 
 	"github.com/TecharoHQ/anubis"
+	"github.com/TecharoHQ/anubis/data"
 	"github.com/TecharoHQ/anubis/internal"
 	libanubis "github.com/TecharoHQ/anubis/lib"
 	botPolicy "github.com/TecharoHQ/anubis/lib/policy"
@@ -184,6 +185,9 @@ func main() {
 	}
 
 	if *extractResources != "" {
+		if err := extractEmbedFS(data.BotPolicies, ".", *extractResources); err != nil {
+			log.Fatal(err)
+		}
 		if err := extractEmbedFS(web.Static, "static", *extractResources); err != nil {
 			log.Fatal(err)
 		}
@@ -347,7 +351,7 @@ func extractEmbedFS(fsys embed.FS, root string, destDir string) error {
 			return err
 		}
 
-		destPath := filepath.Join(destDir, relPath)
+		destPath := filepath.Join(destDir, root, relPath)
 
 		if d.IsDir() {
 			return os.MkdirAll(destPath, 0o700)

+ 7 - 0
data/apps/gitea-rss-feeds.yaml

@@ -0,0 +1,7 @@
+# By Aibrew: https://github.com/TecharoHQ/anubis/discussions/261#discussioncomment-12821065
+- name: gitea-feed-atom
+  action: ALLOW
+  path_regex: ^/[.A-Za-z0-9_-]{1,256}?[./A-Za-z0-9_-]*\.atom$
+- name: gitea-feed-rss
+  action: ALLOW
+  path_regex: ^/[.A-Za-z0-9_-]{1,256}?[./A-Za-z0-9_-]*\.rss$

+ 14 - 645
data/botPolicies.json

@@ -1,678 +1,47 @@
 {
   "bots": [
     {
-      "name": "cloudflare-workers",
-      "headers_regex": {
-        "CF-Worker": ".*"
-      },
-      "action": "DENY"
+      "import": "(data)/bots/ai-robots-txt.yaml"
     },
     {
-      "name": "ai-robots-txt",
-      "user_agent_regex": "AI2Bot|Ai2Bot-Dolma|Amazonbot|anthropic-ai|Applebot|Applebot-Extended|Brightbot 1.0|Bytespider|CCBot|ChatGPT-User|Claude-Web|ClaudeBot|cohere-ai|cohere-training-data-crawler|Crawlspace|Diffbot|DuckAssistBot|FacebookBot|FriendlyCrawler|Google-Extended|GoogleOther|GoogleOther-Image|GoogleOther-Video|GPTBot|iaskspider/2.0|ICC-Crawler|ImagesiftBot|img2dataset|ISSCyberRiskCrawler|Kangaroo Bot|Meta-ExternalAgent|Meta-ExternalFetcher|OAI-SearchBot|omgili|omgilibot|PanguBot|Perplexity-User|PerplexityBot|PetalBot|Scrapy|SemrushBot-OCOB|SemrushBot-SWA|Sidetrade indexer bot|Timpibot|VelenPublicWebCrawler|Webzio-Extended|YouBot",
-      "action": "DENY"
+      "import": "(data)/bots/cloudflare-workers.yaml"
     },
     {
-      "name": "googlebot",
-      "user_agent_regex": "\\+http\\://www\\.google\\.com/bot\\.html",
-      "action": "ALLOW",
-      "remote_addresses": [
-        "2001:4860:4801:10::/64",
-        "2001:4860:4801:11::/64",
-        "2001:4860:4801:12::/64",
-        "2001:4860:4801:13::/64",
-        "2001:4860:4801:14::/64",
-        "2001:4860:4801:15::/64",
-        "2001:4860:4801:16::/64",
-        "2001:4860:4801:17::/64",
-        "2001:4860:4801:18::/64",
-        "2001:4860:4801:19::/64",
-        "2001:4860:4801:1a::/64",
-        "2001:4860:4801:1b::/64",
-        "2001:4860:4801:1c::/64",
-        "2001:4860:4801:1d::/64",
-        "2001:4860:4801:1e::/64",
-        "2001:4860:4801:1f::/64",
-        "2001:4860:4801:20::/64",
-        "2001:4860:4801:21::/64",
-        "2001:4860:4801:22::/64",
-        "2001:4860:4801:23::/64",
-        "2001:4860:4801:24::/64",
-        "2001:4860:4801:25::/64",
-        "2001:4860:4801:26::/64",
-        "2001:4860:4801:27::/64",
-        "2001:4860:4801:28::/64",
-        "2001:4860:4801:29::/64",
-        "2001:4860:4801:2::/64",
-        "2001:4860:4801:2a::/64",
-        "2001:4860:4801:2b::/64",
-        "2001:4860:4801:2c::/64",
-        "2001:4860:4801:2d::/64",
-        "2001:4860:4801:2e::/64",
-        "2001:4860:4801:2f::/64",
-        "2001:4860:4801:31::/64",
-        "2001:4860:4801:32::/64",
-        "2001:4860:4801:33::/64",
-        "2001:4860:4801:34::/64",
-        "2001:4860:4801:35::/64",
-        "2001:4860:4801:36::/64",
-        "2001:4860:4801:37::/64",
-        "2001:4860:4801:38::/64",
-        "2001:4860:4801:39::/64",
-        "2001:4860:4801:3a::/64",
-        "2001:4860:4801:3b::/64",
-        "2001:4860:4801:3c::/64",
-        "2001:4860:4801:3d::/64",
-        "2001:4860:4801:3e::/64",
-        "2001:4860:4801:40::/64",
-        "2001:4860:4801:41::/64",
-        "2001:4860:4801:42::/64",
-        "2001:4860:4801:43::/64",
-        "2001:4860:4801:44::/64",
-        "2001:4860:4801:45::/64",
-        "2001:4860:4801:46::/64",
-        "2001:4860:4801:47::/64",
-        "2001:4860:4801:48::/64",
-        "2001:4860:4801:49::/64",
-        "2001:4860:4801:4a::/64",
-        "2001:4860:4801:4b::/64",
-        "2001:4860:4801:4c::/64",
-        "2001:4860:4801:50::/64",
-        "2001:4860:4801:51::/64",
-        "2001:4860:4801:52::/64",
-        "2001:4860:4801:53::/64",
-        "2001:4860:4801:54::/64",
-        "2001:4860:4801:55::/64",
-        "2001:4860:4801:56::/64",
-        "2001:4860:4801:60::/64",
-        "2001:4860:4801:61::/64",
-        "2001:4860:4801:62::/64",
-        "2001:4860:4801:63::/64",
-        "2001:4860:4801:64::/64",
-        "2001:4860:4801:65::/64",
-        "2001:4860:4801:66::/64",
-        "2001:4860:4801:67::/64",
-        "2001:4860:4801:68::/64",
-        "2001:4860:4801:69::/64",
-        "2001:4860:4801:6a::/64",
-        "2001:4860:4801:6b::/64",
-        "2001:4860:4801:6c::/64",
-        "2001:4860:4801:6d::/64",
-        "2001:4860:4801:6e::/64",
-        "2001:4860:4801:6f::/64",
-        "2001:4860:4801:70::/64",
-        "2001:4860:4801:71::/64",
-        "2001:4860:4801:72::/64",
-        "2001:4860:4801:73::/64",
-        "2001:4860:4801:74::/64",
-        "2001:4860:4801:75::/64",
-        "2001:4860:4801:76::/64",
-        "2001:4860:4801:77::/64",
-        "2001:4860:4801:78::/64",
-        "2001:4860:4801:79::/64",
-        "2001:4860:4801:80::/64",
-        "2001:4860:4801:81::/64",
-        "2001:4860:4801:82::/64",
-        "2001:4860:4801:83::/64",
-        "2001:4860:4801:84::/64",
-        "2001:4860:4801:85::/64",
-        "2001:4860:4801:86::/64",
-        "2001:4860:4801:87::/64",
-        "2001:4860:4801:88::/64",
-        "2001:4860:4801:90::/64",
-        "2001:4860:4801:91::/64",
-        "2001:4860:4801:92::/64",
-        "2001:4860:4801:93::/64",
-        "2001:4860:4801:94::/64",
-        "2001:4860:4801:95::/64",
-        "2001:4860:4801:96::/64",
-        "2001:4860:4801:a0::/64",
-        "2001:4860:4801:a1::/64",
-        "2001:4860:4801:a2::/64",
-        "2001:4860:4801:a3::/64",
-        "2001:4860:4801:a4::/64",
-        "2001:4860:4801:a5::/64",
-        "2001:4860:4801:c::/64",
-        "2001:4860:4801:f::/64",
-        "192.178.5.0/27",
-        "192.178.6.0/27",
-        "192.178.6.128/27",
-        "192.178.6.160/27",
-        "192.178.6.192/27",
-        "192.178.6.32/27",
-        "192.178.6.64/27",
-        "192.178.6.96/27",
-        "34.100.182.96/28",
-        "34.101.50.144/28",
-        "34.118.254.0/28",
-        "34.118.66.0/28",
-        "34.126.178.96/28",
-        "34.146.150.144/28",
-        "34.147.110.144/28",
-        "34.151.74.144/28",
-        "34.152.50.64/28",
-        "34.154.114.144/28",
-        "34.155.98.32/28",
-        "34.165.18.176/28",
-        "34.175.160.64/28",
-        "34.176.130.16/28",
-        "34.22.85.0/27",
-        "34.64.82.64/28",
-        "34.65.242.112/28",
-        "34.80.50.80/28",
-        "34.88.194.0/28",
-        "34.89.10.80/28",
-        "34.89.198.80/28",
-        "34.96.162.48/28",
-        "35.247.243.240/28",
-        "66.249.64.0/27",
-        "66.249.64.128/27",
-        "66.249.64.160/27",
-        "66.249.64.224/27",
-        "66.249.64.32/27",
-        "66.249.64.64/27",
-        "66.249.64.96/27",
-        "66.249.65.0/27",
-        "66.249.65.128/27",
-        "66.249.65.160/27",
-        "66.249.65.192/27",
-        "66.249.65.224/27",
-        "66.249.65.32/27",
-        "66.249.65.64/27",
-        "66.249.65.96/27",
-        "66.249.66.0/27",
-        "66.249.66.128/27",
-        "66.249.66.160/27",
-        "66.249.66.192/27",
-        "66.249.66.224/27",
-        "66.249.66.32/27",
-        "66.249.66.64/27",
-        "66.249.66.96/27",
-        "66.249.68.0/27",
-        "66.249.68.128/27",
-        "66.249.68.32/27",
-        "66.249.68.64/27",
-        "66.249.68.96/27",
-        "66.249.69.0/27",
-        "66.249.69.128/27",
-        "66.249.69.160/27",
-        "66.249.69.192/27",
-        "66.249.69.224/27",
-        "66.249.69.32/27",
-        "66.249.69.64/27",
-        "66.249.69.96/27",
-        "66.249.70.0/27",
-        "66.249.70.128/27",
-        "66.249.70.160/27",
-        "66.249.70.192/27",
-        "66.249.70.224/27",
-        "66.249.70.32/27",
-        "66.249.70.64/27",
-        "66.249.70.96/27",
-        "66.249.71.0/27",
-        "66.249.71.128/27",
-        "66.249.71.160/27",
-        "66.249.71.192/27",
-        "66.249.71.224/27",
-        "66.249.71.32/27",
-        "66.249.71.64/27",
-        "66.249.71.96/27",
-        "66.249.72.0/27",
-        "66.249.72.128/27",
-        "66.249.72.160/27",
-        "66.249.72.192/27",
-        "66.249.72.224/27",
-        "66.249.72.32/27",
-        "66.249.72.64/27",
-        "66.249.72.96/27",
-        "66.249.73.0/27",
-        "66.249.73.128/27",
-        "66.249.73.160/27",
-        "66.249.73.192/27",
-        "66.249.73.224/27",
-        "66.249.73.32/27",
-        "66.249.73.64/27",
-        "66.249.73.96/27",
-        "66.249.74.0/27",
-        "66.249.74.128/27",
-        "66.249.74.160/27",
-        "66.249.74.192/27",
-        "66.249.74.32/27",
-        "66.249.74.64/27",
-        "66.249.74.96/27",
-        "66.249.75.0/27",
-        "66.249.75.128/27",
-        "66.249.75.160/27",
-        "66.249.75.192/27",
-        "66.249.75.224/27",
-        "66.249.75.32/27",
-        "66.249.75.64/27",
-        "66.249.75.96/27",
-        "66.249.76.0/27",
-        "66.249.76.128/27",
-        "66.249.76.160/27",
-        "66.249.76.192/27",
-        "66.249.76.224/27",
-        "66.249.76.32/27",
-        "66.249.76.64/27",
-        "66.249.76.96/27",
-        "66.249.77.0/27",
-        "66.249.77.128/27",
-        "66.249.77.160/27",
-        "66.249.77.192/27",
-        "66.249.77.224/27",
-        "66.249.77.32/27",
-        "66.249.77.64/27",
-        "66.249.77.96/27",
-        "66.249.78.0/27",
-        "66.249.78.32/27",
-        "66.249.79.0/27",
-        "66.249.79.128/27",
-        "66.249.79.160/27",
-        "66.249.79.192/27",
-        "66.249.79.224/27",
-        "66.249.79.32/27",
-        "66.249.79.64/27",
-        "66.249.79.96/27"
-      ]
+      "import": "(data)/bots/headless-browsers.yaml"
     },
     {
-      "name": "bingbot",
-      "user_agent_regex": "\\+http\\://www\\.bing\\.com/bingbot\\.htm",
-      "action": "ALLOW",
-      "remote_addresses": [
-        "157.55.39.0/24",
-        "207.46.13.0/24",
-        "40.77.167.0/24",
-        "13.66.139.0/24",
-        "13.66.144.0/24",
-        "52.167.144.0/24",
-        "13.67.10.16/28",
-        "13.69.66.240/28",
-        "13.71.172.224/28",
-        "139.217.52.0/28",
-        "191.233.204.224/28",
-        "20.36.108.32/28",
-        "20.43.120.16/28",
-        "40.79.131.208/28",
-        "40.79.186.176/28",
-        "52.231.148.0/28",
-        "20.79.107.240/28",
-        "51.105.67.0/28",
-        "20.125.163.80/28",
-        "40.77.188.0/22",
-        "65.55.210.0/24",
-        "199.30.24.0/23",
-        "40.77.202.0/24",
-        "40.77.139.0/25",
-        "20.74.197.0/28",
-        "20.15.133.160/27",
-        "40.77.177.0/24",
-        "40.77.178.0/23"
-      ]
+      "import": "(data)/bots/us-ai-scraper.yaml"
     },
     {
-      "name": "duckduckbot",
-      "user_agent_regex": "\\+http\\://duckduckgo\\.com/duckduckbot\\.html",
-      "action": "ALLOW",
-      "remote_addresses": [
-        "57.152.72.128/32",
-        "51.8.253.152/32",
-        "40.80.242.63/32",
-        "20.12.141.99/32",
-        "20.49.136.28/32",
-        "51.116.131.221/32",
-        "51.107.40.209/32",
-        "20.40.133.240/32",
-        "20.50.168.91/32",
-        "51.120.48.122/32",
-        "20.193.45.113/32",
-        "40.76.173.151/32",
-        "40.76.163.7/32",
-        "20.185.79.47/32",
-        "52.142.26.175/32",
-        "20.185.79.15/32",
-        "52.142.24.149/32",
-        "40.76.162.208/32",
-        "40.76.163.23/32",
-        "40.76.162.191/32",
-        "40.76.162.247/32",
-        "40.88.21.235/32",
-        "20.191.45.212/32",
-        "52.146.59.12/32",
-        "52.146.59.156/32",
-        "52.146.59.154/32",
-        "52.146.58.236/32",
-        "20.62.224.44/32",
-        "51.104.180.53/32",
-        "51.104.180.47/32",
-        "51.104.180.26/32",
-        "51.104.146.225/32",
-        "51.104.146.235/32",
-        "20.73.202.147/32",
-        "20.73.132.240/32",
-        "20.71.12.143/32",
-        "20.56.197.58/32",
-        "20.56.197.63/32",
-        "20.43.150.93/32",
-        "20.43.150.85/32",
-        "20.44.222.1/32",
-        "40.89.243.175/32",
-        "13.89.106.77/32",
-        "52.143.242.6/32",
-        "52.143.241.111/32",
-        "52.154.60.82/32",
-        "20.197.209.11/32",
-        "20.197.209.27/32",
-        "20.226.133.105/32",
-        "191.234.216.4/32",
-        "191.234.216.178/32",
-        "20.53.92.211/32",
-        "20.53.91.2/32",
-        "20.207.99.197/32",
-        "20.207.97.190/32",
-        "40.81.250.205/32",
-        "40.64.106.11/32",
-        "40.64.105.247/32",
-        "20.72.242.93/32",
-        "20.99.255.235/32",
-        "20.113.3.121/32",
-        "52.224.16.221/32",
-        "52.224.21.53/32",
-        "52.224.20.204/32",
-        "52.224.21.19/32",
-        "52.224.20.249/32",
-        "52.224.20.203/32",
-        "52.224.20.190/32",
-        "52.224.16.229/32",
-        "52.224.21.20/32",
-        "52.146.63.80/32",
-        "52.224.20.227/32",
-        "52.224.20.193/32",
-        "52.190.37.160/32",
-        "52.224.21.23/32",
-        "52.224.20.223/32",
-        "52.224.20.181/32",
-        "52.224.21.49/32",
-        "52.224.21.55/32",
-        "52.224.21.61/32",
-        "52.224.19.152/32",
-        "52.224.20.186/32",
-        "52.224.21.27/32",
-        "52.224.21.51/32",
-        "52.224.20.174/32",
-        "52.224.21.4/32",
-        "51.104.164.109/32",
-        "51.104.167.71/32",
-        "51.104.160.177/32",
-        "51.104.162.149/32",
-        "51.104.167.95/32",
-        "51.104.167.54/32",
-        "51.104.166.111/32",
-        "51.104.167.88/32",
-        "51.104.161.32/32",
-        "51.104.163.250/32",
-        "51.104.164.189/32",
-        "51.104.167.19/32",
-        "51.104.160.167/32",
-        "51.104.167.110/32",
-        "20.191.44.119/32",
-        "51.104.167.104/32",
-        "20.191.44.234/32",
-        "51.104.164.215/32",
-        "51.104.167.52/32",
-        "20.191.44.22/32",
-        "51.104.167.87/32",
-        "51.104.167.96/32",
-        "20.191.44.16/32",
-        "51.104.167.61/32",
-        "51.104.164.147/32",
-        "20.50.48.159/32",
-        "40.114.182.172/32",
-        "20.50.50.130/32",
-        "20.50.50.163/32",
-        "20.50.50.46/32",
-        "40.114.182.153/32",
-        "20.50.50.118/32",
-        "20.50.49.55/32",
-        "20.50.49.25/32",
-        "40.114.183.251/32",
-        "20.50.50.123/32",
-        "20.50.49.237/32",
-        "20.50.48.192/32",
-        "20.50.50.134/32",
-        "51.138.90.233/32",
-        "40.114.183.196/32",
-        "20.50.50.146/32",
-        "40.114.183.88/32",
-        "20.50.50.145/32",
-        "20.50.50.121/32",
-        "20.50.49.40/32",
-        "51.138.90.206/32",
-        "40.114.182.45/32",
-        "51.138.90.161/32",
-        "20.50.49.0/32",
-        "40.119.232.215/32",
-        "104.43.55.167/32",
-        "40.119.232.251/32",
-        "40.119.232.50/32",
-        "40.119.232.146/32",
-        "40.119.232.218/32",
-        "104.43.54.127/32",
-        "104.43.55.117/32",
-        "104.43.55.116/32",
-        "104.43.55.166/32",
-        "52.154.169.50/32",
-        "52.154.171.70/32",
-        "52.154.170.229/32",
-        "52.154.170.113/32",
-        "52.154.171.44/32",
-        "52.154.172.2/32",
-        "52.143.244.81/32",
-        "52.154.171.87/32",
-        "52.154.171.250/32",
-        "52.154.170.28/32",
-        "52.154.170.122/32",
-        "52.143.243.117/32",
-        "52.143.247.235/32",
-        "52.154.171.235/32",
-        "52.154.171.196/32",
-        "52.154.171.0/32",
-        "52.154.170.243/32",
-        "52.154.170.26/32",
-        "52.154.169.200/32",
-        "52.154.170.96/32",
-        "52.154.170.88/32",
-        "52.154.171.150/32",
-        "52.154.171.205/32",
-        "52.154.170.117/32",
-        "52.154.170.209/32",
-        "191.235.202.48/32",
-        "191.233.3.202/32",
-        "191.235.201.214/32",
-        "191.233.3.197/32",
-        "191.235.202.38/32",
-        "20.53.78.144/32",
-        "20.193.24.10/32",
-        "20.53.78.236/32",
-        "20.53.78.138/32",
-        "20.53.78.123/32",
-        "20.53.78.106/32",
-        "20.193.27.215/32",
-        "20.193.25.197/32",
-        "20.193.12.126/32",
-        "20.193.24.251/32",
-        "20.204.242.101/32",
-        "20.207.72.113/32",
-        "20.204.242.19/32",
-        "20.219.45.67/32",
-        "20.207.72.11/32",
-        "20.219.45.190/32",
-        "20.204.243.55/32",
-        "20.204.241.148/32",
-        "20.207.72.110/32",
-        "20.204.240.172/32",
-        "20.207.72.21/32",
-        "20.204.246.81/32",
-        "20.207.107.181/32",
-        "20.204.246.254/32",
-        "20.219.43.246/32",
-        "52.149.25.43/32",
-        "52.149.61.51/32",
-        "52.149.58.139/32",
-        "52.149.60.38/32",
-        "52.148.165.38/32",
-        "52.143.95.162/32",
-        "52.149.56.151/32",
-        "52.149.30.45/32",
-        "52.149.58.173/32",
-        "52.143.95.204/32",
-        "52.149.28.83/32",
-        "52.149.58.69/32",
-        "52.148.161.87/32",
-        "52.149.58.27/32",
-        "52.149.28.18/32",
-        "20.79.226.26/32",
-        "20.79.239.66/32",
-        "20.79.238.198/32",
-        "20.113.14.159/32",
-        "20.75.144.152/32",
-        "20.43.172.120/32",
-        "20.53.134.160/32",
-        "20.201.15.208/32",
-        "20.93.28.24/32",
-        "20.61.34.40/32",
-        "52.242.224.168/32",
-        "20.80.129.80/32",
-        "20.195.108.47/32",
-        "4.195.133.120/32",
-        "4.228.76.163/32",
-        "4.182.131.108/32",
-        "4.209.224.56/32",
-        "108.141.83.74/32",
-        "4.213.46.14/32",
-        "172.169.17.165/32",
-        "51.8.71.117/32",
-        "20.3.1.178/32",
-        "52.149.56.151/32",
-        "52.149.30.45/32",
-        "52.149.58.173/32",
-        "52.143.95.204/32",
-        "52.149.28.83/32",
-        "52.149.58.69/32",
-        "52.148.161.87/32",
-        "52.149.58.27/32",
-        "52.149.28.18/32",
-        "20.79.226.26/32",
-        "20.79.239.66/32",
-        "20.79.238.198/32",
-        "20.113.14.159/32",
-        "20.75.144.152/32",
-        "20.43.172.120/32",
-        "20.53.134.160/32",
-        "20.201.15.208/32",
-        "20.93.28.24/32",
-        "20.61.34.40/32",
-        "52.242.224.168/32",
-        "20.80.129.80/32",
-        "20.195.108.47/32",
-        "4.195.133.120/32",
-        "4.228.76.163/32",
-        "4.182.131.108/32",
-        "4.209.224.56/32",
-        "108.141.83.74/32",
-        "4.213.46.14/32",
-        "172.169.17.165/32",
-        "51.8.71.117/32",
-        "20.3.1.178/32"
-      ]
+      "import": "(data)/crawlers/googlebot.yaml"
     },
     {
-      "name": "qwantbot",
-      "user_agent_regex": "\\+https\\://help\\.qwant\\.com/bot/",
-      "action": "ALLOW",
-      "remote_addresses": [
-        "91.242.162.0/24"
-      ]
+      "import": "(data)/crawlers/bingbot.yaml"
     },
     {
-      "name": "internet-archive",
-      "action": "ALLOW",
-      "remote_addresses": [
-        "207.241.224.0/20",
-        "208.70.24.0/21",
-        "2620:0:9c0::/48"
-      ]
+      "import": "(data)/crawlers/duckduckbot.yaml"
     },
     {
-      "name": "kagibot",
-      "user_agent_regex": "\\+https\\://kagi\\.com/bot",
-      "action": "ALLOW",
-      "remote_addresses": [
-        "216.18.205.234/32",
-        "35.212.27.76/32",
-        "104.254.65.50/32",
-        "209.151.156.194/32"
-      ]
+      "import": "(data)/crawlers/qwantbot.yaml"
     },
     {
-      "name": "marginalia",
-      "user_agent_regex": "search\\.marginalia\\.nu",
-      "action": "ALLOW",
-      "remote_addresses": [
-        "193.183.0.162/31",
-        "193.183.0.164/30",
-        "193.183.0.168/30",
-        "193.183.0.172/31",
-        "193.183.0.174/32"
-      ]
+      "import": "(data)/crawlers/internet-archive.yaml"
     },
     {
-      "name": "mojeekbot",
-      "user_agent_regex": "http\\://www\\.mojeek\\.com/bot\\.html",
-      "action": "ALLOW",
-      "remote_addresses": [
-        "5.102.173.71/32"
-      ]
+      "import": "(data)/crawlers/kagibot.yaml"
     },
     {
-      "name": "us-artificial-intelligence-scraper",
-      "user_agent_regex": "\\+https\\://github\\.com/US-Artificial-Intelligence/scraper",
-      "action": "DENY"
+      "import": "(data)/crawlers/marginalia.yaml"
     },
     {
-      "name": "well-known",
-      "path_regex": "^/.well-known/.*$",
-      "action": "ALLOW"
+      "import": "(data)/crawlers/mojeekbot.yaml"
     },
     {
-      "name": "favicon",
-      "path_regex": "^/favicon.ico$",
-      "action": "ALLOW"
-    },
-    {
-      "name": "robots-txt",
-      "path_regex": "^/robots.txt$",
-      "action": "ALLOW"
-    },
-    {
-      "name": "lightpanda",
-      "user_agent_regex": "^Lightpanda/.*$",
-      "action": "DENY"
-    },
-    {
-      "name": "headless-chrome",
-      "user_agent_regex": "HeadlessChrome",
-      "action": "DENY"
-    },
-    {
-      "name": "headless-chromium",
-      "user_agent_regex": "HeadlessChromium",
-      "action": "DENY"
+      "import": "(data)/common/keep-internet-working.yaml"
     },
     {
       "name": "generic-browser",
-      "user_agent_regex": "Mozilla|Opera",
+      "user_agent_regex": "Mozilla|Opera\n",
       "action": "CHALLENGE"
     }
   ],

+ 30 - 642
data/botPolicies.yaml

@@ -1,651 +1,38 @@
+## Anubis has the ability to let you import snippets of configuration into the main
+## configuration file. This allows you to break up your config into smaller parts
+## that get logically assembled into one big file.
+##
+## Of note, a bot rule can either have inline bot configuration or import a
+## bot config snippet. You cannot do both in a single bot rule.
+##
+## Import paths can either be prefixed with (data) to import from the common/shared
+## rules in the data folder in the Anubis source tree or will point to absolute/relative
+## paths in your filesystem. If you don't have access to the Anubis source tree, check
+## /usr/share/docs/anubis/data or in the tarball you extracted Anubis from.
+
 bots:
 # Pathological bots to deny
-- name: us-artificial-intelligence-scraper
-  user_agent_regex: \+https\://github\.com/US-Artificial-Intelligence/scraper
-  action: DENY
-- name: lightpanda
-  user_agent_regex: ^LightPanda/.*$
-  action: DENY
-- name: headless-chrome
-  user_agent_regex: HeadlessChrome
-  action: DENY
-- name: headless-chromium
-  user_agent_regex: HeadlessChromium
-  action: DENY
-- name: "ai-robots-txt"
-  user_agent_regex: >
-    AI2Bot|Ai2Bot-Dolma|Amazonbot|anthropic-ai|Applebot|Applebot-Extended|Brightbot 1.0|Bytespider|CCBot|ChatGPT-User|Claude-Web|ClaudeBot|cohere-ai|cohere-training-data-crawler|Crawlspace|Diffbot|DuckAssistBot|FacebookBot|FriendlyCrawler|Google-Extended|GoogleOther|GoogleOther-Image|GoogleOther-Video|GPTBot|iaskspider/2.0|ICC-Crawler|ImagesiftBot|img2dataset|ISSCyberRiskCrawler|Kangaroo Bot|Meta-ExternalAgent|Meta-ExternalFetcher|OAI-SearchBot|omgili|omgilibot|PanguBot|Perplexity-User|PerplexityBot|PetalBot|Scrapy|SemrushBot-OCOB|SemrushBot-SWA|Sidetrade indexer bot|Timpibot|VelenPublicWebCrawler|Webzio-Extended|YouBot
-  action: DENY
-- name: cloudflare-workers
-  headers_regex:
-    CF-Worker: .*
-  action: DENY
+- # This correlates to data/bots/ai-robots-txt.yaml in the source tree
+  import: (data)/bots/ai-robots-txt.yaml
+- import: (data)/bots/cloudflare-workers.yaml 
+- import: (data)/bots/headless-browsers.yaml
+- import: (data)/bots/us-ai-scraper.yaml
 
-# search engines to allow
-- name: googlebot
-  user_agent_regex: \+http\://www\.google\.com/bot\.html
-  action: ALLOW
-  # https://developers.google.com/static/search/apis/ipranges/googlebot.json
-  remote_addresses: [
-    "2001:4860:4801:10::/64",
-    "2001:4860:4801:11::/64",
-    "2001:4860:4801:12::/64",
-    "2001:4860:4801:13::/64",
-    "2001:4860:4801:14::/64",
-    "2001:4860:4801:15::/64",
-    "2001:4860:4801:16::/64",
-    "2001:4860:4801:17::/64",
-    "2001:4860:4801:18::/64",
-    "2001:4860:4801:19::/64",
-    "2001:4860:4801:1a::/64",
-    "2001:4860:4801:1b::/64",
-    "2001:4860:4801:1c::/64",
-    "2001:4860:4801:1d::/64",
-    "2001:4860:4801:1e::/64",
-    "2001:4860:4801:1f::/64",
-    "2001:4860:4801:20::/64",
-    "2001:4860:4801:21::/64",
-    "2001:4860:4801:22::/64",
-    "2001:4860:4801:23::/64",
-    "2001:4860:4801:24::/64",
-    "2001:4860:4801:25::/64",
-    "2001:4860:4801:26::/64",
-    "2001:4860:4801:27::/64",
-    "2001:4860:4801:28::/64",
-    "2001:4860:4801:29::/64",
-    "2001:4860:4801:2::/64",
-    "2001:4860:4801:2a::/64",
-    "2001:4860:4801:2b::/64",
-    "2001:4860:4801:2c::/64",
-    "2001:4860:4801:2d::/64",
-    "2001:4860:4801:2e::/64",
-    "2001:4860:4801:2f::/64",
-    "2001:4860:4801:31::/64",
-    "2001:4860:4801:32::/64",
-    "2001:4860:4801:33::/64",
-    "2001:4860:4801:34::/64",
-    "2001:4860:4801:35::/64",
-    "2001:4860:4801:36::/64",
-    "2001:4860:4801:37::/64",
-    "2001:4860:4801:38::/64",
-    "2001:4860:4801:39::/64",
-    "2001:4860:4801:3a::/64",
-    "2001:4860:4801:3b::/64",
-    "2001:4860:4801:3c::/64",
-    "2001:4860:4801:3d::/64",
-    "2001:4860:4801:3e::/64",
-    "2001:4860:4801:40::/64",
-    "2001:4860:4801:41::/64",
-    "2001:4860:4801:42::/64",
-    "2001:4860:4801:43::/64",
-    "2001:4860:4801:44::/64",
-    "2001:4860:4801:45::/64",
-    "2001:4860:4801:46::/64",
-    "2001:4860:4801:47::/64",
-    "2001:4860:4801:48::/64",
-    "2001:4860:4801:49::/64",
-    "2001:4860:4801:4a::/64",
-    "2001:4860:4801:4b::/64",
-    "2001:4860:4801:4c::/64",
-    "2001:4860:4801:50::/64",
-    "2001:4860:4801:51::/64",
-    "2001:4860:4801:52::/64",
-    "2001:4860:4801:53::/64",
-    "2001:4860:4801:54::/64",
-    "2001:4860:4801:55::/64",
-    "2001:4860:4801:56::/64",
-    "2001:4860:4801:60::/64",
-    "2001:4860:4801:61::/64",
-    "2001:4860:4801:62::/64",
-    "2001:4860:4801:63::/64",
-    "2001:4860:4801:64::/64",
-    "2001:4860:4801:65::/64",
-    "2001:4860:4801:66::/64",
-    "2001:4860:4801:67::/64",
-    "2001:4860:4801:68::/64",
-    "2001:4860:4801:69::/64",
-    "2001:4860:4801:6a::/64",
-    "2001:4860:4801:6b::/64",
-    "2001:4860:4801:6c::/64",
-    "2001:4860:4801:6d::/64",
-    "2001:4860:4801:6e::/64",
-    "2001:4860:4801:6f::/64",
-    "2001:4860:4801:70::/64",
-    "2001:4860:4801:71::/64",
-    "2001:4860:4801:72::/64",
-    "2001:4860:4801:73::/64",
-    "2001:4860:4801:74::/64",
-    "2001:4860:4801:75::/64",
-    "2001:4860:4801:76::/64",
-    "2001:4860:4801:77::/64",
-    "2001:4860:4801:78::/64",
-    "2001:4860:4801:79::/64",
-    "2001:4860:4801:80::/64",
-    "2001:4860:4801:81::/64",
-    "2001:4860:4801:82::/64",
-    "2001:4860:4801:83::/64",
-    "2001:4860:4801:84::/64",
-    "2001:4860:4801:85::/64",
-    "2001:4860:4801:86::/64",
-    "2001:4860:4801:87::/64",
-    "2001:4860:4801:88::/64",
-    "2001:4860:4801:90::/64",
-    "2001:4860:4801:91::/64",
-    "2001:4860:4801:92::/64",
-    "2001:4860:4801:93::/64",
-    "2001:4860:4801:94::/64",
-    "2001:4860:4801:95::/64",
-    "2001:4860:4801:96::/64",
-    "2001:4860:4801:a0::/64",
-    "2001:4860:4801:a1::/64",
-    "2001:4860:4801:a2::/64",
-    "2001:4860:4801:a3::/64",
-    "2001:4860:4801:a4::/64",
-    "2001:4860:4801:a5::/64",
-    "2001:4860:4801:c::/64",
-    "2001:4860:4801:f::/64",
-    "192.178.5.0/27",
-    "192.178.6.0/27",
-    "192.178.6.128/27",
-    "192.178.6.160/27",
-    "192.178.6.192/27",
-    "192.178.6.32/27",
-    "192.178.6.64/27",
-    "192.178.6.96/27",
-    "34.100.182.96/28",
-    "34.101.50.144/28",
-    "34.118.254.0/28",
-    "34.118.66.0/28",
-    "34.126.178.96/28",
-    "34.146.150.144/28",
-    "34.147.110.144/28",
-    "34.151.74.144/28",
-    "34.152.50.64/28",
-    "34.154.114.144/28",
-    "34.155.98.32/28",
-    "34.165.18.176/28",
-    "34.175.160.64/28",
-    "34.176.130.16/28",
-    "34.22.85.0/27",
-    "34.64.82.64/28",
-    "34.65.242.112/28",
-    "34.80.50.80/28",
-    "34.88.194.0/28",
-    "34.89.10.80/28",
-    "34.89.198.80/28",
-    "34.96.162.48/28",
-    "35.247.243.240/28",
-    "66.249.64.0/27",
-    "66.249.64.128/27",
-    "66.249.64.160/27",
-    "66.249.64.224/27",
-    "66.249.64.32/27",
-    "66.249.64.64/27",
-    "66.249.64.96/27",
-    "66.249.65.0/27",
-    "66.249.65.128/27",
-    "66.249.65.160/27",
-    "66.249.65.192/27",
-    "66.249.65.224/27",
-    "66.249.65.32/27",
-    "66.249.65.64/27",
-    "66.249.65.96/27",
-    "66.249.66.0/27",
-    "66.249.66.128/27",
-    "66.249.66.160/27",
-    "66.249.66.192/27",
-    "66.249.66.224/27",
-    "66.249.66.32/27",
-    "66.249.66.64/27",
-    "66.249.66.96/27",
-    "66.249.68.0/27",
-    "66.249.68.128/27",
-    "66.249.68.32/27",
-    "66.249.68.64/27",
-    "66.249.68.96/27",
-    "66.249.69.0/27",
-    "66.249.69.128/27",
-    "66.249.69.160/27",
-    "66.249.69.192/27",
-    "66.249.69.224/27",
-    "66.249.69.32/27",
-    "66.249.69.64/27",
-    "66.249.69.96/27",
-    "66.249.70.0/27",
-    "66.249.70.128/27",
-    "66.249.70.160/27",
-    "66.249.70.192/27",
-    "66.249.70.224/27",
-    "66.249.70.32/27",
-    "66.249.70.64/27",
-    "66.249.70.96/27",
-    "66.249.71.0/27",
-    "66.249.71.128/27",
-    "66.249.71.160/27",
-    "66.249.71.192/27",
-    "66.249.71.224/27",
-    "66.249.71.32/27",
-    "66.249.71.64/27",
-    "66.249.71.96/27",
-    "66.249.72.0/27",
-    "66.249.72.128/27",
-    "66.249.72.160/27",
-    "66.249.72.192/27",
-    "66.249.72.224/27",
-    "66.249.72.32/27",
-    "66.249.72.64/27",
-    "66.249.72.96/27",
-    "66.249.73.0/27",
-    "66.249.73.128/27",
-    "66.249.73.160/27",
-    "66.249.73.192/27",
-    "66.249.73.224/27",
-    "66.249.73.32/27",
-    "66.249.73.64/27",
-    "66.249.73.96/27",
-    "66.249.74.0/27",
-    "66.249.74.128/27",
-    "66.249.74.160/27",
-    "66.249.74.192/27",
-    "66.249.74.32/27",
-    "66.249.74.64/27",
-    "66.249.74.96/27",
-    "66.249.75.0/27",
-    "66.249.75.128/27",
-    "66.249.75.160/27",
-    "66.249.75.192/27",
-    "66.249.75.224/27",
-    "66.249.75.32/27",
-    "66.249.75.64/27",
-    "66.249.75.96/27",
-    "66.249.76.0/27",
-    "66.249.76.128/27",
-    "66.249.76.160/27",
-    "66.249.76.192/27",
-    "66.249.76.224/27",
-    "66.249.76.32/27",
-    "66.249.76.64/27",
-    "66.249.76.96/27",
-    "66.249.77.0/27",
-    "66.249.77.128/27",
-    "66.249.77.160/27",
-    "66.249.77.192/27",
-    "66.249.77.224/27",
-    "66.249.77.32/27",
-    "66.249.77.64/27",
-    "66.249.77.96/27",
-    "66.249.78.0/27",
-    "66.249.78.32/27",
-    "66.249.79.0/27",
-    "66.249.79.128/27",
-    "66.249.79.160/27",
-    "66.249.79.192/27",
-    "66.249.79.224/27",
-    "66.249.79.32/27",
-    "66.249.79.64/27",
-    "66.249.79.96/27"
-  ]
-- name: bingbot
-  user_agent_regex: \+http\://www\.bing\.com/bingbot\.htm
-  action: ALLOW
-  # https://www.bing.com/toolbox/bingbot.json
-  remote_addresses: [
-    "157.55.39.0/24",
-    "207.46.13.0/24",
-    "40.77.167.0/24",
-    "13.66.139.0/24",
-    "13.66.144.0/24",
-    "52.167.144.0/24",
-    "13.67.10.16/28",
-    "13.69.66.240/28",
-    "13.71.172.224/28",
-    "139.217.52.0/28",
-    "191.233.204.224/28",
-    "20.36.108.32/28",
-    "20.43.120.16/28",
-    "40.79.131.208/28",
-    "40.79.186.176/28",
-    "52.231.148.0/28",
-    "20.79.107.240/28",
-    "51.105.67.0/28",
-    "20.125.163.80/28",
-    "40.77.188.0/22",
-    "65.55.210.0/24",
-    "199.30.24.0/23",
-    "40.77.202.0/24",
-    "40.77.139.0/25",
-    "20.74.197.0/28",
-    "20.15.133.160/27",
-    "40.77.177.0/24",
-    "40.77.178.0/23"
-  ]
-- name: duckduckbot
-  user_agent_regex: DuckDuckBot/1\.1; \(\+http\://duckduckgo\.com/duckduckbot\.html\)
-  action: ALLOW
-  # https://duckduckgo.com/duckduckgo-help-pages/results/duckduckbot
-  remote_addresses: [
-    "57.152.72.128/32",
-    "51.8.253.152/32",
-    "40.80.242.63/32",
-    "20.12.141.99/32",
-    "20.49.136.28/32",
-    "51.116.131.221/32",
-    "51.107.40.209/32",
-    "20.40.133.240/32",
-    "20.50.168.91/32",
-    "51.120.48.122/32",
-    "20.193.45.113/32",
-    "40.76.173.151/32",
-    "40.76.163.7/32",
-    "20.185.79.47/32",
-    "52.142.26.175/32",
-    "20.185.79.15/32",
-    "52.142.24.149/32",
-    "40.76.162.208/32",
-    "40.76.163.23/32",
-    "40.76.162.191/32",
-    "40.76.162.247/32",
-    "40.88.21.235/32",
-    "20.191.45.212/32",
-    "52.146.59.12/32",
-    "52.146.59.156/32",
-    "52.146.59.154/32",
-    "52.146.58.236/32",
-    "20.62.224.44/32",
-    "51.104.180.53/32",
-    "51.104.180.47/32",
-    "51.104.180.26/32",
-    "51.104.146.225/32",
-    "51.104.146.235/32",
-    "20.73.202.147/32",
-    "20.73.132.240/32",
-    "20.71.12.143/32",
-    "20.56.197.58/32",
-    "20.56.197.63/32",
-    "20.43.150.93/32",
-    "20.43.150.85/32",
-    "20.44.222.1/32",
-    "40.89.243.175/32",
-    "13.89.106.77/32",
-    "52.143.242.6/32",
-    "52.143.241.111/32",
-    "52.154.60.82/32",
-    "20.197.209.11/32",
-    "20.197.209.27/32",
-    "20.226.133.105/32",
-    "191.234.216.4/32",
-    "191.234.216.178/32",
-    "20.53.92.211/32",
-    "20.53.91.2/32",
-    "20.207.99.197/32",
-    "20.207.97.190/32",
-    "40.81.250.205/32",
-    "40.64.106.11/32",
-    "40.64.105.247/32",
-    "20.72.242.93/32",
-    "20.99.255.235/32",
-    "20.113.3.121/32",
-    "52.224.16.221/32",
-    "52.224.21.53/32",
-    "52.224.20.204/32",
-    "52.224.21.19/32",
-    "52.224.20.249/32",
-    "52.224.20.203/32",
-    "52.224.20.190/32",
-    "52.224.16.229/32",
-    "52.224.21.20/32",
-    "52.146.63.80/32",
-    "52.224.20.227/32",
-    "52.224.20.193/32",
-    "52.190.37.160/32",
-    "52.224.21.23/32",
-    "52.224.20.223/32",
-    "52.224.20.181/32",
-    "52.224.21.49/32",
-    "52.224.21.55/32",
-    "52.224.21.61/32",
-    "52.224.19.152/32",
-    "52.224.20.186/32",
-    "52.224.21.27/32",
-    "52.224.21.51/32",
-    "52.224.20.174/32",
-    "52.224.21.4/32",
-    "51.104.164.109/32",
-    "51.104.167.71/32",
-    "51.104.160.177/32",
-    "51.104.162.149/32",
-    "51.104.167.95/32",
-    "51.104.167.54/32",
-    "51.104.166.111/32",
-    "51.104.167.88/32",
-    "51.104.161.32/32",
-    "51.104.163.250/32",
-    "51.104.164.189/32",
-    "51.104.167.19/32",
-    "51.104.160.167/32",
-    "51.104.167.110/32",
-    "20.191.44.119/32",
-    "51.104.167.104/32",
-    "20.191.44.234/32",
-    "51.104.164.215/32",
-    "51.104.167.52/32",
-    "20.191.44.22/32",
-    "51.104.167.87/32",
-    "51.104.167.96/32",
-    "20.191.44.16/32",
-    "51.104.167.61/32",
-    "51.104.164.147/32",
-    "20.50.48.159/32",
-    "40.114.182.172/32",
-    "20.50.50.130/32",
-    "20.50.50.163/32",
-    "20.50.50.46/32",
-    "40.114.182.153/32",
-    "20.50.50.118/32",
-    "20.50.49.55/32",
-    "20.50.49.25/32",
-    "40.114.183.251/32",
-    "20.50.50.123/32",
-    "20.50.49.237/32",
-    "20.50.48.192/32",
-    "20.50.50.134/32",
-    "51.138.90.233/32",
-    "40.114.183.196/32",
-    "20.50.50.146/32",
-    "40.114.183.88/32",
-    "20.50.50.145/32",
-    "20.50.50.121/32",
-    "20.50.49.40/32",
-    "51.138.90.206/32",
-    "40.114.182.45/32",
-    "51.138.90.161/32",
-    "20.50.49.0/32",
-    "40.119.232.215/32",
-    "104.43.55.167/32",
-    "40.119.232.251/32",
-    "40.119.232.50/32",
-    "40.119.232.146/32",
-    "40.119.232.218/32",
-    "104.43.54.127/32",
-    "104.43.55.117/32",
-    "104.43.55.116/32",
-    "104.43.55.166/32",
-    "52.154.169.50/32",
-    "52.154.171.70/32",
-    "52.154.170.229/32",
-    "52.154.170.113/32",
-    "52.154.171.44/32",
-    "52.154.172.2/32",
-    "52.143.244.81/32",
-    "52.154.171.87/32",
-    "52.154.171.250/32",
-    "52.154.170.28/32",
-    "52.154.170.122/32",
-    "52.143.243.117/32",
-    "52.143.247.235/32",
-    "52.154.171.235/32",
-    "52.154.171.196/32",
-    "52.154.171.0/32",
-    "52.154.170.243/32",
-    "52.154.170.26/32",
-    "52.154.169.200/32",
-    "52.154.170.96/32",
-    "52.154.170.88/32",
-    "52.154.171.150/32",
-    "52.154.171.205/32",
-    "52.154.170.117/32",
-    "52.154.170.209/32",
-    "191.235.202.48/32",
-    "191.233.3.202/32",
-    "191.235.201.214/32",
-    "191.233.3.197/32",
-    "191.235.202.38/32",
-    "20.53.78.144/32",
-    "20.193.24.10/32",
-    "20.53.78.236/32",
-    "20.53.78.138/32",
-    "20.53.78.123/32",
-    "20.53.78.106/32",
-    "20.193.27.215/32",
-    "20.193.25.197/32",
-    "20.193.12.126/32",
-    "20.193.24.251/32",
-    "20.204.242.101/32",
-    "20.207.72.113/32",
-    "20.204.242.19/32",
-    "20.219.45.67/32",
-    "20.207.72.11/32",
-    "20.219.45.190/32",
-    "20.204.243.55/32",
-    "20.204.241.148/32",
-    "20.207.72.110/32",
-    "20.204.240.172/32",
-    "20.207.72.21/32",
-    "20.204.246.81/32",
-    "20.207.107.181/32",
-    "20.204.246.254/32",
-    "20.219.43.246/32",
-    "52.149.25.43/32",
-    "52.149.61.51/32",
-    "52.149.58.139/32",
-    "52.149.60.38/32",
-    "52.148.165.38/32",
-    "52.143.95.162/32",
-    "52.149.56.151/32",
-    "52.149.30.45/32",
-    "52.149.58.173/32",
-    "52.143.95.204/32",
-    "52.149.28.83/32",
-    "52.149.58.69/32",
-    "52.148.161.87/32",
-    "52.149.58.27/32",
-    "52.149.28.18/32",
-    "20.79.226.26/32",
-    "20.79.239.66/32",
-    "20.79.238.198/32",
-    "20.113.14.159/32",
-    "20.75.144.152/32",
-    "20.43.172.120/32",
-    "20.53.134.160/32",
-    "20.201.15.208/32",
-    "20.93.28.24/32",
-    "20.61.34.40/32",
-    "52.242.224.168/32",
-    "20.80.129.80/32",
-    "20.195.108.47/32",
-    "4.195.133.120/32",
-    "4.228.76.163/32",
-    "4.182.131.108/32",
-    "4.209.224.56/32",
-    "108.141.83.74/32",
-    "4.213.46.14/32",
-    "172.169.17.165/32",
-    "51.8.71.117/32",
-    "20.3.1.178/32",
-    "52.149.56.151/32",
-    "52.149.30.45/32",
-    "52.149.58.173/32",
-    "52.143.95.204/32",
-    "52.149.28.83/32",
-    "52.149.58.69/32",
-    "52.148.161.87/32",
-    "52.149.58.27/32",
-    "52.149.28.18/32",
-    "20.79.226.26/32",
-    "20.79.239.66/32",
-    "20.79.238.198/32",
-    "20.113.14.159/32",
-    "20.75.144.152/32",
-    "20.43.172.120/32",
-    "20.53.134.160/32",
-    "20.201.15.208/32",
-    "20.93.28.24/32",
-    "20.61.34.40/32",
-    "52.242.224.168/32",
-    "20.80.129.80/32",
-    "20.195.108.47/32",
-    "4.195.133.120/32",
-    "4.228.76.163/32",
-    "4.182.131.108/32",
-    "4.209.224.56/32",
-    "108.141.83.74/32",
-    "4.213.46.14/32",
-    "172.169.17.165/32",
-    "51.8.71.117/32",
-    "20.3.1.178/32"
-  ]
-- name: qwantbot
-  user_agent_regex: \+https\://help\.qwant\.com/bot/
-  action: ALLOW
-  # https://help.qwant.com/wp-content/uploads/sites/2/2025/01/qwantbot.json
-  remote_addresses: [ "91.242.162.0/24" ]
-- name: internet-archive
-  action: ALLOW
-  # https://ipinfo.io/AS7941
-  remote_addresses: [
-    "207.241.224.0/20",
-    "208.70.24.0/21",
-    "2620:0:9c0::/48"
-  ]
-- name: kagibot
-  user_agent_regex: \+https\://kagi\.com/bot
-  action: ALLOW
-  # https://kagi.com/bot
-  remote_addresses: [
-    "216.18.205.234/32",
-    "35.212.27.76/32",
-    "104.254.65.50/32",
-    "209.151.156.194/32"
-  ]
-- name: marginalia
-  user_agent_regex: search\.marginalia\.nu
-  action: ALLOW
-  # Received directly over email
-  remote_addresses: [
-    "193.183.0.162/31",
-    "193.183.0.164/30",
-    "193.183.0.168/30",
-    "193.183.0.172/31",
-    "193.183.0.174/32"
-  ]
-- name: mojeekbot
-  user_agent_regex: http\://www\.mojeek\.com/bot\.html
-  action: ALLOW
-  # https://www.mojeek.com/bot.html
-  remote_addresses: [ "5.102.173.71/32" ]
+# Search engines to allow
+- import: (data)/crawlers/googlebot.yaml
+- import: (data)/crawlers/bingbot.yaml
+- import: (data)/crawlers/duckduckbot.yaml
+- import: (data)/crawlers/qwantbot.yaml
+- import: (data)/crawlers/internet-archive.yaml
+- import: (data)/crawlers/kagibot.yaml
+- import: (data)/crawlers/marginalia.yaml
+- import: (data)/crawlers/mojeekbot.yaml
 
-# Common "keeping the internet working" routes
-- name: well-known
-  path_regex: ^/.well-known/.*$
-  action: ALLOW
-- name: favicon
-  path_regex: ^/favicon.ico$
-  action: ALLOW
-- name: robots-txt
-  path_regex: ^/robots.txt$
-  action: ALLOW
+# Allow common "keeping the internet working" routes (well-known, favicon, robots.txt)
+- import: (data)/common/keep-internet-working.yaml
 
 # # Punish any bot with "bot" in the user-agent string
+# # This is known to have a high false-positive rate, use at your own risk
 # - name: generic-bot-catchall
 #   user_agent_regex: (?i:bot|crawler)
 #   action: CHALLENGE
@@ -654,6 +41,7 @@ bots:
 #     report_as: 4    # lie to the operator
 #     algorithm: slow # intentionally waste CPU cycles and time
 
+# Generic catchall rule
 - name: generic-browser
   user_agent_regex: >
     Mozilla|Opera

+ 4 - 0
data/bots/ai-robots-txt.yaml

@@ -0,0 +1,4 @@
+- name: "ai-robots-txt"
+  user_agent_regex: >
+    AI2Bot|Ai2Bot-Dolma|Amazonbot|anthropic-ai|Applebot|Applebot-Extended|Brightbot 1.0|Bytespider|CCBot|ChatGPT-User|Claude-Web|ClaudeBot|cohere-ai|cohere-training-data-crawler|Crawlspace|Diffbot|DuckAssistBot|FacebookBot|FriendlyCrawler|Google-Extended|GoogleOther|GoogleOther-Image|GoogleOther-Video|GPTBot|iaskspider/2.0|ICC-Crawler|ImagesiftBot|img2dataset|ISSCyberRiskCrawler|Kangaroo Bot|Meta-ExternalAgent|Meta-ExternalFetcher|OAI-SearchBot|omgili|omgilibot|PanguBot|Perplexity-User|PerplexityBot|PetalBot|Scrapy|SemrushBot-OCOB|SemrushBot-SWA|Sidetrade indexer bot|Timpibot|VelenPublicWebCrawler|Webzio-Extended|YouBot
+  action: DENY

+ 4 - 0
data/bots/cloudflare-workers.yaml

@@ -0,0 +1,4 @@
+- name: cloudflare-workers
+  headers_regex:
+    CF-Worker: .*
+  action: DENY

+ 9 - 0
data/bots/headless-browsers.yaml

@@ -0,0 +1,9 @@
+- name: lightpanda
+  user_agent_regex: ^LightPanda/.*$
+  action: DENY
+- name: headless-chrome
+  user_agent_regex: HeadlessChrome
+  action: DENY
+- name: headless-chromium
+  user_agent_regex: HeadlessChromium
+  action: DENY

+ 3 - 0
data/bots/us-ai-scraper.yaml

@@ -0,0 +1,3 @@
+- name: us-artificial-intelligence-scraper
+  user_agent_regex: \+https\://github\.com/US-Artificial-Intelligence/scraper
+  action: DENY

+ 15 - 0
data/common/allow-private-addresses.yaml

@@ -0,0 +1,15 @@
+- name: ipv4-rfc-1918
+  action: ALLOW
+  remote_addresses:
+  - 10.0.0.0/8
+  - 172.16.0.0/12
+  - 192.168.0.0/16
+  - 100.64.0.0/10
+- name: ipv6-ula
+  action: ALLOW
+  remote_addresses:
+  - fc00::/7
+- name: ipv6-link-local
+  action: ALLOW
+  remote_addresses:
+  - fe80::/10

+ 10 - 0
data/common/keep-internet-working.yaml

@@ -0,0 +1,10 @@
+# Common "keeping the internet working" routes
+- name: well-known
+  path_regex: ^/.well-known/.*$
+  action: ALLOW
+- name: favicon
+  path_regex: ^/favicon.ico$
+  action: ALLOW
+- name: robots-txt
+  path_regex: ^/robots.txt$
+  action: ALLOW

+ 34 - 0
data/crawlers/bingbot.yaml

@@ -0,0 +1,34 @@
+- name: bingbot
+  user_agent_regex: \+http\://www\.bing\.com/bingbot\.htm
+  action: ALLOW
+  # https://www.bing.com/toolbox/bingbot.json
+  remote_addresses: [
+    "157.55.39.0/24",
+    "207.46.13.0/24",
+    "40.77.167.0/24",
+    "13.66.139.0/24",
+    "13.66.144.0/24",
+    "52.167.144.0/24",
+    "13.67.10.16/28",
+    "13.69.66.240/28",
+    "13.71.172.224/28",
+    "139.217.52.0/28",
+    "191.233.204.224/28",
+    "20.36.108.32/28",
+    "20.43.120.16/28",
+    "40.79.131.208/28",
+    "40.79.186.176/28",
+    "52.231.148.0/28",
+    "20.79.107.240/28",
+    "51.105.67.0/28",
+    "20.125.163.80/28",
+    "40.77.188.0/22",
+    "65.55.210.0/24",
+    "199.30.24.0/23",
+    "40.77.202.0/24",
+    "40.77.139.0/25",
+    "20.74.197.0/28",
+    "20.15.133.160/27",
+    "40.77.177.0/24",
+    "40.77.178.0/23"
+  ]

+ 275 - 0
data/crawlers/duckduckbot.yaml

@@ -0,0 +1,275 @@
+- name: duckduckbot
+  user_agent_regex: DuckDuckBot/1\.1; \(\+http\://duckduckgo\.com/duckduckbot\.html\)
+  action: ALLOW
+  # https://duckduckgo.com/duckduckgo-help-pages/results/duckduckbot
+  remote_addresses: [
+    "57.152.72.128/32",
+    "51.8.253.152/32",
+    "40.80.242.63/32",
+    "20.12.141.99/32",
+    "20.49.136.28/32",
+    "51.116.131.221/32",
+    "51.107.40.209/32",
+    "20.40.133.240/32",
+    "20.50.168.91/32",
+    "51.120.48.122/32",
+    "20.193.45.113/32",
+    "40.76.173.151/32",
+    "40.76.163.7/32",
+    "20.185.79.47/32",
+    "52.142.26.175/32",
+    "20.185.79.15/32",
+    "52.142.24.149/32",
+    "40.76.162.208/32",
+    "40.76.163.23/32",
+    "40.76.162.191/32",
+    "40.76.162.247/32",
+    "40.88.21.235/32",
+    "20.191.45.212/32",
+    "52.146.59.12/32",
+    "52.146.59.156/32",
+    "52.146.59.154/32",
+    "52.146.58.236/32",
+    "20.62.224.44/32",
+    "51.104.180.53/32",
+    "51.104.180.47/32",
+    "51.104.180.26/32",
+    "51.104.146.225/32",
+    "51.104.146.235/32",
+    "20.73.202.147/32",
+    "20.73.132.240/32",
+    "20.71.12.143/32",
+    "20.56.197.58/32",
+    "20.56.197.63/32",
+    "20.43.150.93/32",
+    "20.43.150.85/32",
+    "20.44.222.1/32",
+    "40.89.243.175/32",
+    "13.89.106.77/32",
+    "52.143.242.6/32",
+    "52.143.241.111/32",
+    "52.154.60.82/32",
+    "20.197.209.11/32",
+    "20.197.209.27/32",
+    "20.226.133.105/32",
+    "191.234.216.4/32",
+    "191.234.216.178/32",
+    "20.53.92.211/32",
+    "20.53.91.2/32",
+    "20.207.99.197/32",
+    "20.207.97.190/32",
+    "40.81.250.205/32",
+    "40.64.106.11/32",
+    "40.64.105.247/32",
+    "20.72.242.93/32",
+    "20.99.255.235/32",
+    "20.113.3.121/32",
+    "52.224.16.221/32",
+    "52.224.21.53/32",
+    "52.224.20.204/32",
+    "52.224.21.19/32",
+    "52.224.20.249/32",
+    "52.224.20.203/32",
+    "52.224.20.190/32",
+    "52.224.16.229/32",
+    "52.224.21.20/32",
+    "52.146.63.80/32",
+    "52.224.20.227/32",
+    "52.224.20.193/32",
+    "52.190.37.160/32",
+    "52.224.21.23/32",
+    "52.224.20.223/32",
+    "52.224.20.181/32",
+    "52.224.21.49/32",
+    "52.224.21.55/32",
+    "52.224.21.61/32",
+    "52.224.19.152/32",
+    "52.224.20.186/32",
+    "52.224.21.27/32",
+    "52.224.21.51/32",
+    "52.224.20.174/32",
+    "52.224.21.4/32",
+    "51.104.164.109/32",
+    "51.104.167.71/32",
+    "51.104.160.177/32",
+    "51.104.162.149/32",
+    "51.104.167.95/32",
+    "51.104.167.54/32",
+    "51.104.166.111/32",
+    "51.104.167.88/32",
+    "51.104.161.32/32",
+    "51.104.163.250/32",
+    "51.104.164.189/32",
+    "51.104.167.19/32",
+    "51.104.160.167/32",
+    "51.104.167.110/32",
+    "20.191.44.119/32",
+    "51.104.167.104/32",
+    "20.191.44.234/32",
+    "51.104.164.215/32",
+    "51.104.167.52/32",
+    "20.191.44.22/32",
+    "51.104.167.87/32",
+    "51.104.167.96/32",
+    "20.191.44.16/32",
+    "51.104.167.61/32",
+    "51.104.164.147/32",
+    "20.50.48.159/32",
+    "40.114.182.172/32",
+    "20.50.50.130/32",
+    "20.50.50.163/32",
+    "20.50.50.46/32",
+    "40.114.182.153/32",
+    "20.50.50.118/32",
+    "20.50.49.55/32",
+    "20.50.49.25/32",
+    "40.114.183.251/32",
+    "20.50.50.123/32",
+    "20.50.49.237/32",
+    "20.50.48.192/32",
+    "20.50.50.134/32",
+    "51.138.90.233/32",
+    "40.114.183.196/32",
+    "20.50.50.146/32",
+    "40.114.183.88/32",
+    "20.50.50.145/32",
+    "20.50.50.121/32",
+    "20.50.49.40/32",
+    "51.138.90.206/32",
+    "40.114.182.45/32",
+    "51.138.90.161/32",
+    "20.50.49.0/32",
+    "40.119.232.215/32",
+    "104.43.55.167/32",
+    "40.119.232.251/32",
+    "40.119.232.50/32",
+    "40.119.232.146/32",
+    "40.119.232.218/32",
+    "104.43.54.127/32",
+    "104.43.55.117/32",
+    "104.43.55.116/32",
+    "104.43.55.166/32",
+    "52.154.169.50/32",
+    "52.154.171.70/32",
+    "52.154.170.229/32",
+    "52.154.170.113/32",
+    "52.154.171.44/32",
+    "52.154.172.2/32",
+    "52.143.244.81/32",
+    "52.154.171.87/32",
+    "52.154.171.250/32",
+    "52.154.170.28/32",
+    "52.154.170.122/32",
+    "52.143.243.117/32",
+    "52.143.247.235/32",
+    "52.154.171.235/32",
+    "52.154.171.196/32",
+    "52.154.171.0/32",
+    "52.154.170.243/32",
+    "52.154.170.26/32",
+    "52.154.169.200/32",
+    "52.154.170.96/32",
+    "52.154.170.88/32",
+    "52.154.171.150/32",
+    "52.154.171.205/32",
+    "52.154.170.117/32",
+    "52.154.170.209/32",
+    "191.235.202.48/32",
+    "191.233.3.202/32",
+    "191.235.201.214/32",
+    "191.233.3.197/32",
+    "191.235.202.38/32",
+    "20.53.78.144/32",
+    "20.193.24.10/32",
+    "20.53.78.236/32",
+    "20.53.78.138/32",
+    "20.53.78.123/32",
+    "20.53.78.106/32",
+    "20.193.27.215/32",
+    "20.193.25.197/32",
+    "20.193.12.126/32",
+    "20.193.24.251/32",
+    "20.204.242.101/32",
+    "20.207.72.113/32",
+    "20.204.242.19/32",
+    "20.219.45.67/32",
+    "20.207.72.11/32",
+    "20.219.45.190/32",
+    "20.204.243.55/32",
+    "20.204.241.148/32",
+    "20.207.72.110/32",
+    "20.204.240.172/32",
+    "20.207.72.21/32",
+    "20.204.246.81/32",
+    "20.207.107.181/32",
+    "20.204.246.254/32",
+    "20.219.43.246/32",
+    "52.149.25.43/32",
+    "52.149.61.51/32",
+    "52.149.58.139/32",
+    "52.149.60.38/32",
+    "52.148.165.38/32",
+    "52.143.95.162/32",
+    "52.149.56.151/32",
+    "52.149.30.45/32",
+    "52.149.58.173/32",
+    "52.143.95.204/32",
+    "52.149.28.83/32",
+    "52.149.58.69/32",
+    "52.148.161.87/32",
+    "52.149.58.27/32",
+    "52.149.28.18/32",
+    "20.79.226.26/32",
+    "20.79.239.66/32",
+    "20.79.238.198/32",
+    "20.113.14.159/32",
+    "20.75.144.152/32",
+    "20.43.172.120/32",
+    "20.53.134.160/32",
+    "20.201.15.208/32",
+    "20.93.28.24/32",
+    "20.61.34.40/32",
+    "52.242.224.168/32",
+    "20.80.129.80/32",
+    "20.195.108.47/32",
+    "4.195.133.120/32",
+    "4.228.76.163/32",
+    "4.182.131.108/32",
+    "4.209.224.56/32",
+    "108.141.83.74/32",
+    "4.213.46.14/32",
+    "172.169.17.165/32",
+    "51.8.71.117/32",
+    "20.3.1.178/32",
+    "52.149.56.151/32",
+    "52.149.30.45/32",
+    "52.149.58.173/32",
+    "52.143.95.204/32",
+    "52.149.28.83/32",
+    "52.149.58.69/32",
+    "52.148.161.87/32",
+    "52.149.58.27/32",
+    "52.149.28.18/32",
+    "20.79.226.26/32",
+    "20.79.239.66/32",
+    "20.79.238.198/32",
+    "20.113.14.159/32",
+    "20.75.144.152/32",
+    "20.43.172.120/32",
+    "20.53.134.160/32",
+    "20.201.15.208/32",
+    "20.93.28.24/32",
+    "20.61.34.40/32",
+    "52.242.224.168/32",
+    "20.80.129.80/32",
+    "20.195.108.47/32",
+    "4.195.133.120/32",
+    "4.228.76.163/32",
+    "4.182.131.108/32",
+    "4.209.224.56/32",
+    "108.141.83.74/32",
+    "4.213.46.14/32",
+    "172.169.17.165/32",
+    "51.8.71.117/32",
+    "20.3.1.178/32"
+  ]

+ 263 - 0
data/crawlers/googlebot.yaml

@@ -0,0 +1,263 @@
+- name: googlebot
+  user_agent_regex: \+http\://www\.google\.com/bot\.html
+  action: ALLOW
+  # https://developers.google.com/static/search/apis/ipranges/googlebot.json
+  remote_addresses: [
+    "2001:4860:4801:10::/64",
+    "2001:4860:4801:11::/64",
+    "2001:4860:4801:12::/64",
+    "2001:4860:4801:13::/64",
+    "2001:4860:4801:14::/64",
+    "2001:4860:4801:15::/64",
+    "2001:4860:4801:16::/64",
+    "2001:4860:4801:17::/64",
+    "2001:4860:4801:18::/64",
+    "2001:4860:4801:19::/64",
+    "2001:4860:4801:1a::/64",
+    "2001:4860:4801:1b::/64",
+    "2001:4860:4801:1c::/64",
+    "2001:4860:4801:1d::/64",
+    "2001:4860:4801:1e::/64",
+    "2001:4860:4801:1f::/64",
+    "2001:4860:4801:20::/64",
+    "2001:4860:4801:21::/64",
+    "2001:4860:4801:22::/64",
+    "2001:4860:4801:23::/64",
+    "2001:4860:4801:24::/64",
+    "2001:4860:4801:25::/64",
+    "2001:4860:4801:26::/64",
+    "2001:4860:4801:27::/64",
+    "2001:4860:4801:28::/64",
+    "2001:4860:4801:29::/64",
+    "2001:4860:4801:2::/64",
+    "2001:4860:4801:2a::/64",
+    "2001:4860:4801:2b::/64",
+    "2001:4860:4801:2c::/64",
+    "2001:4860:4801:2d::/64",
+    "2001:4860:4801:2e::/64",
+    "2001:4860:4801:2f::/64",
+    "2001:4860:4801:31::/64",
+    "2001:4860:4801:32::/64",
+    "2001:4860:4801:33::/64",
+    "2001:4860:4801:34::/64",
+    "2001:4860:4801:35::/64",
+    "2001:4860:4801:36::/64",
+    "2001:4860:4801:37::/64",
+    "2001:4860:4801:38::/64",
+    "2001:4860:4801:39::/64",
+    "2001:4860:4801:3a::/64",
+    "2001:4860:4801:3b::/64",
+    "2001:4860:4801:3c::/64",
+    "2001:4860:4801:3d::/64",
+    "2001:4860:4801:3e::/64",
+    "2001:4860:4801:40::/64",
+    "2001:4860:4801:41::/64",
+    "2001:4860:4801:42::/64",
+    "2001:4860:4801:43::/64",
+    "2001:4860:4801:44::/64",
+    "2001:4860:4801:45::/64",
+    "2001:4860:4801:46::/64",
+    "2001:4860:4801:47::/64",
+    "2001:4860:4801:48::/64",
+    "2001:4860:4801:49::/64",
+    "2001:4860:4801:4a::/64",
+    "2001:4860:4801:4b::/64",
+    "2001:4860:4801:4c::/64",
+    "2001:4860:4801:50::/64",
+    "2001:4860:4801:51::/64",
+    "2001:4860:4801:52::/64",
+    "2001:4860:4801:53::/64",
+    "2001:4860:4801:54::/64",
+    "2001:4860:4801:55::/64",
+    "2001:4860:4801:56::/64",
+    "2001:4860:4801:60::/64",
+    "2001:4860:4801:61::/64",
+    "2001:4860:4801:62::/64",
+    "2001:4860:4801:63::/64",
+    "2001:4860:4801:64::/64",
+    "2001:4860:4801:65::/64",
+    "2001:4860:4801:66::/64",
+    "2001:4860:4801:67::/64",
+    "2001:4860:4801:68::/64",
+    "2001:4860:4801:69::/64",
+    "2001:4860:4801:6a::/64",
+    "2001:4860:4801:6b::/64",
+    "2001:4860:4801:6c::/64",
+    "2001:4860:4801:6d::/64",
+    "2001:4860:4801:6e::/64",
+    "2001:4860:4801:6f::/64",
+    "2001:4860:4801:70::/64",
+    "2001:4860:4801:71::/64",
+    "2001:4860:4801:72::/64",
+    "2001:4860:4801:73::/64",
+    "2001:4860:4801:74::/64",
+    "2001:4860:4801:75::/64",
+    "2001:4860:4801:76::/64",
+    "2001:4860:4801:77::/64",
+    "2001:4860:4801:78::/64",
+    "2001:4860:4801:79::/64",
+    "2001:4860:4801:80::/64",
+    "2001:4860:4801:81::/64",
+    "2001:4860:4801:82::/64",
+    "2001:4860:4801:83::/64",
+    "2001:4860:4801:84::/64",
+    "2001:4860:4801:85::/64",
+    "2001:4860:4801:86::/64",
+    "2001:4860:4801:87::/64",
+    "2001:4860:4801:88::/64",
+    "2001:4860:4801:90::/64",
+    "2001:4860:4801:91::/64",
+    "2001:4860:4801:92::/64",
+    "2001:4860:4801:93::/64",
+    "2001:4860:4801:94::/64",
+    "2001:4860:4801:95::/64",
+    "2001:4860:4801:96::/64",
+    "2001:4860:4801:a0::/64",
+    "2001:4860:4801:a1::/64",
+    "2001:4860:4801:a2::/64",
+    "2001:4860:4801:a3::/64",
+    "2001:4860:4801:a4::/64",
+    "2001:4860:4801:a5::/64",
+    "2001:4860:4801:c::/64",
+    "2001:4860:4801:f::/64",
+    "192.178.5.0/27",
+    "192.178.6.0/27",
+    "192.178.6.128/27",
+    "192.178.6.160/27",
+    "192.178.6.192/27",
+    "192.178.6.32/27",
+    "192.178.6.64/27",
+    "192.178.6.96/27",
+    "34.100.182.96/28",
+    "34.101.50.144/28",
+    "34.118.254.0/28",
+    "34.118.66.0/28",
+    "34.126.178.96/28",
+    "34.146.150.144/28",
+    "34.147.110.144/28",
+    "34.151.74.144/28",
+    "34.152.50.64/28",
+    "34.154.114.144/28",
+    "34.155.98.32/28",
+    "34.165.18.176/28",
+    "34.175.160.64/28",
+    "34.176.130.16/28",
+    "34.22.85.0/27",
+    "34.64.82.64/28",
+    "34.65.242.112/28",
+    "34.80.50.80/28",
+    "34.88.194.0/28",
+    "34.89.10.80/28",
+    "34.89.198.80/28",
+    "34.96.162.48/28",
+    "35.247.243.240/28",
+    "66.249.64.0/27",
+    "66.249.64.128/27",
+    "66.249.64.160/27",
+    "66.249.64.224/27",
+    "66.249.64.32/27",
+    "66.249.64.64/27",
+    "66.249.64.96/27",
+    "66.249.65.0/27",
+    "66.249.65.128/27",
+    "66.249.65.160/27",
+    "66.249.65.192/27",
+    "66.249.65.224/27",
+    "66.249.65.32/27",
+    "66.249.65.64/27",
+    "66.249.65.96/27",
+    "66.249.66.0/27",
+    "66.249.66.128/27",
+    "66.249.66.160/27",
+    "66.249.66.192/27",
+    "66.249.66.224/27",
+    "66.249.66.32/27",
+    "66.249.66.64/27",
+    "66.249.66.96/27",
+    "66.249.68.0/27",
+    "66.249.68.128/27",
+    "66.249.68.32/27",
+    "66.249.68.64/27",
+    "66.249.68.96/27",
+    "66.249.69.0/27",
+    "66.249.69.128/27",
+    "66.249.69.160/27",
+    "66.249.69.192/27",
+    "66.249.69.224/27",
+    "66.249.69.32/27",
+    "66.249.69.64/27",
+    "66.249.69.96/27",
+    "66.249.70.0/27",
+    "66.249.70.128/27",
+    "66.249.70.160/27",
+    "66.249.70.192/27",
+    "66.249.70.224/27",
+    "66.249.70.32/27",
+    "66.249.70.64/27",
+    "66.249.70.96/27",
+    "66.249.71.0/27",
+    "66.249.71.128/27",
+    "66.249.71.160/27",
+    "66.249.71.192/27",
+    "66.249.71.224/27",
+    "66.249.71.32/27",
+    "66.249.71.64/27",
+    "66.249.71.96/27",
+    "66.249.72.0/27",
+    "66.249.72.128/27",
+    "66.249.72.160/27",
+    "66.249.72.192/27",
+    "66.249.72.224/27",
+    "66.249.72.32/27",
+    "66.249.72.64/27",
+    "66.249.72.96/27",
+    "66.249.73.0/27",
+    "66.249.73.128/27",
+    "66.249.73.160/27",
+    "66.249.73.192/27",
+    "66.249.73.224/27",
+    "66.249.73.32/27",
+    "66.249.73.64/27",
+    "66.249.73.96/27",
+    "66.249.74.0/27",
+    "66.249.74.128/27",
+    "66.249.74.160/27",
+    "66.249.74.192/27",
+    "66.249.74.32/27",
+    "66.249.74.64/27",
+    "66.249.74.96/27",
+    "66.249.75.0/27",
+    "66.249.75.128/27",
+    "66.249.75.160/27",
+    "66.249.75.192/27",
+    "66.249.75.224/27",
+    "66.249.75.32/27",
+    "66.249.75.64/27",
+    "66.249.75.96/27",
+    "66.249.76.0/27",
+    "66.249.76.128/27",
+    "66.249.76.160/27",
+    "66.249.76.192/27",
+    "66.249.76.224/27",
+    "66.249.76.32/27",
+    "66.249.76.64/27",
+    "66.249.76.96/27",
+    "66.249.77.0/27",
+    "66.249.77.128/27",
+    "66.249.77.160/27",
+    "66.249.77.192/27",
+    "66.249.77.224/27",
+    "66.249.77.32/27",
+    "66.249.77.64/27",
+    "66.249.77.96/27",
+    "66.249.78.0/27",
+    "66.249.78.32/27",
+    "66.249.79.0/27",
+    "66.249.79.128/27",
+    "66.249.79.160/27",
+    "66.249.79.192/27",
+    "66.249.79.224/27",
+    "66.249.79.32/27",
+    "66.249.79.64/27",
+    "66.249.79.96/27"
+  ]

+ 8 - 0
data/crawlers/internet-archive.yaml

@@ -0,0 +1,8 @@
+- name: internet-archive
+  action: ALLOW
+  # https://ipinfo.io/AS7941
+  remote_addresses: [
+    "207.241.224.0/20",
+    "208.70.24.0/21",
+    "2620:0:9c0::/48"
+  ]

+ 10 - 0
data/crawlers/kagibot.yaml

@@ -0,0 +1,10 @@
+- name: kagibot
+  user_agent_regex: \+https\://kagi\.com/bot
+  action: ALLOW
+  # https://kagi.com/bot
+  remote_addresses: [
+    "216.18.205.234/32",
+    "35.212.27.76/32",
+    "104.254.65.50/32",
+    "209.151.156.194/32"
+  ]

+ 11 - 0
data/crawlers/marginalia.yaml

@@ -0,0 +1,11 @@
+- name: marginalia
+  user_agent_regex: search\.marginalia\.nu
+  action: ALLOW
+  # Received directly over email
+  remote_addresses: [
+    "193.183.0.162/31",
+    "193.183.0.164/30",
+    "193.183.0.168/30",
+    "193.183.0.172/31",
+    "193.183.0.174/32"
+  ]

+ 5 - 0
data/crawlers/mojeekbot.yaml

@@ -0,0 +1,5 @@
+- name: mojeekbot
+  user_agent_regex: http\://www\.mojeek\.com/bot\.html
+  action: ALLOW
+  # https://www.mojeek.com/bot.html
+  remote_addresses: [ "5.102.173.71/32" ]

+ 5 - 0
data/crawlers/qwantbot.yaml

@@ -0,0 +1,5 @@
+- name: qwantbot
+  user_agent_regex: \+https\://help\.qwant\.com/bot/
+  action: ALLOW
+  # https://help.qwant.com/wp-content/uploads/sites/2/2025/01/qwantbot.json
+  remote_addresses: [ "91.242.162.0/24" ]

+ 1 - 1
data/embed.go

@@ -3,6 +3,6 @@ package data
 import "embed"
 
 var (
-	//go:embed botPolicies.yaml botPolicies.json
+	//go:embed botPolicies.yaml botPolicies.json apps bots common crawlers
 	BotPolicies embed.FS
 )

+ 1 - 0
docs/docs/CHANGELOG.md

@@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+- Enable [importing configuration snippets](./admin/configuration/import.mdx) ([#321](https://github.com/TecharoHQ/anubis/pull/321))
 - Refactor check logic to be more generic and work on a Checker type
 - Add more AI user agents based on the [ai.robots.txt](https://github.com/ai-robots-txt/ai.robots.txt) project
 - Embedded challenge data in initial HTML response to improve performance

+ 147 - 0
docs/docs/admin/configuration/import.mdx

@@ -0,0 +1,147 @@
+# Importing configuration rules
+
+import Tabs from "@theme/Tabs";
+import TabItem from "@theme/TabItem";
+
+Anubis has the ability to let you import snippets of configuration into the main configuration file. This allows you to break up your config into smaller parts that get logically assembled into one big file.
+
+EG:
+
+<Tabs>
+<TabItem value="json" label="JSON">
+
+```json
+{
+  "bots": [
+    {
+      "import": "(data)/bots/ai-robots-txt.yaml"
+    },
+    {
+      "import": "(data)/bots/cloudflare-workers.yaml"
+    }
+  ]
+}
+```
+
+</TabItem>
+<TabItem value="yaml" label="YAML" default>
+
+```yaml
+bots:
+  # Pathological bots to deny
+  - # This correlates to data/bots/ai-robots-txt.yaml in the source tree
+    import: (data)/bots/ai-robots-txt.yaml
+  - import: (data)/bots/cloudflare-workers.yaml
+```
+
+</TabItem>
+</Tabs>
+
+Of note, a bot rule can either have inline bot configuration or import a bot config snippet. You cannot do both in a single bot rule.
+
+<Tabs>
+<TabItem value="json" label="JSON">
+
+```json
+{
+  "bots": [
+    {
+      "import": "(data)/bots/ai-robots-txt.yaml",
+      "name": "generic-browser",
+      "user_agent_regex": "Mozilla|Opera\n",
+      "action": "CHALLENGE"
+    }
+  ]
+}
+```
+
+</TabItem>
+<TabItem value="yaml" label="YAML" default>
+
+```yaml
+bots:
+  - import: (data)/bots/ai-robots-txt.yaml
+    name: generic-browser
+    user_agent_regex: >
+      Mozilla|Opera
+    action: CHALLENGE
+```
+
+</TabItem>
+</Tabs>
+
+This will return an error like this:
+
+```text
+config is not valid:
+config.BotOrImport: rule definition is invalid, you must set either bot rules or an import statement, not both
+```
+
+Paths can either be prefixed with `(data)` to import from the [the data folder in the Anubis source tree](https://github.com/TecharoHQ/anubis/tree/main/data) or anywhere on the filesystem. If you don't have access to the Anubis source tree, check /usr/share/docs/anubis/data or in the tarball you extracted Anubis from.
+
+## Writing snippets
+
+Snippets can be written in either JSON or YAML, with a preference for YAML. When writing a snippet, write the bot rules you want directly at the top level of the file in a list.
+
+Here is an example snippet that allows [IPv6 Unique Local Addresses](https://en.wikipedia.org/wiki/Unique_local_address) through Anubis:
+
+<Tabs>
+<TabItem value="json" label="JSON">
+
+```json
+[
+  {
+    "name": "ipv6-ula",
+    "action": "ALLOW",
+    "remote_addresses": ["fc00::/7"]
+  }
+]
+```
+
+</TabItem>
+<TabItem value="yaml" label="YAML" default>
+
+```yaml
+- name: ipv6-ula
+  action: ALLOW
+  remote_addresses:
+    - fc00::/7
+```
+
+</TabItem>
+</Tabs>
+
+## Extracting Anubis' embedded filesystem
+
+You can always extract the list of rules embedded into the Anubis binary with this command:
+
+```text
+anubis --extract-resources=static
+```
+
+This will dump the contents of Anubis' embedded data to a new folder named `static`:
+
+```text
+static
+├── apps
+│   └── gitea-rss-feeds.yaml
+├── botPolicies.json
+├── botPolicies.yaml
+├── bots
+│   ├── ai-robots-txt.yaml
+│   ├── cloudflare-workers.yaml
+│   ├── headless-browsers.yaml
+│   └── us-ai-scraper.yaml
+├── common
+│   ├── allow-private-addresses.yaml
+│   └── keep-internet-working.yaml
+└── crawlers
+    ├── bingbot.yaml
+    ├── duckduckbot.yaml
+    ├── googlebot.yaml
+    ├── internet-archive.yaml
+    ├── kagibot.yaml
+    ├── marginalia.yaml
+    ├── mojeekbot.yaml
+    └── qwantbot.yaml
+```

+ 1 - 0
docs/docs/admin/policies.mdx

@@ -12,6 +12,7 @@ Bot policies let you customize the rules that Anubis uses to allow, deny, or cha
 - Request path
 - User agent string
 - HTTP request header values
+- [Importing other configuration snippets](./configuration/import.mdx)
 
 As of version v1.17.0 or later, configuration can be written in either JSON or YAML.
 

+ 168 - 2
lib/policy/config/config.go

@@ -3,8 +3,15 @@ package config
 import (
 	"errors"
 	"fmt"
+	"io"
+	"io/fs"
 	"net"
+	"os"
 	"regexp"
+	"strings"
+
+	"github.com/TecharoHQ/anubis/data"
+	"k8s.io/apimachinery/pkg/util/yaml"
 )
 
 var (
@@ -17,6 +24,9 @@ var (
 	ErrInvalidPathRegex                  = errors.New("config.Bot: invalid path regex")
 	ErrInvalidHeadersRegex               = errors.New("config.Bot: invalid headers regex")
 	ErrInvalidCIDR                       = errors.New("config.Bot: invalid CIDR")
+	ErrInvalidImportStatement            = errors.New("config.ImportStatement: invalid source file")
+	ErrCantSetBotAndImportValuesAtOnce   = errors.New("config.BotOrImport: can't set bot rules and import values at the same time")
+	ErrMustSetBotOrImportRules           = errors.New("config.BotOrImport: rule definition is invalid, you must set either bot rules or an import statement, not both")
 )
 
 type Rule string
@@ -47,6 +57,24 @@ type BotConfig struct {
 	Challenge      *ChallengeRules   `json:"challenge,omitempty"`
 }
 
+func (b BotConfig) Zero() bool {
+	for _, cond := range []bool{
+		b.Name != "",
+		b.UserAgentRegex != nil,
+		b.PathRegex != nil,
+		len(b.HeadersRegex) != 0,
+		b.Action != "",
+		len(b.RemoteAddr) != 0,
+		b.Challenge != nil,
+	} {
+		if cond {
+			return false
+		}
+	}
+
+	return true
+}
+
 func (b BotConfig) Valid() error {
 	var errs []error
 
@@ -151,9 +179,147 @@ func (cr ChallengeRules) Valid() error {
 	return nil
 }
 
+type ImportStatement struct {
+	Import string `json:"import"`
+	Bots   []BotConfig
+}
+
+func (is *ImportStatement) open() (fs.File, error) {
+	if strings.HasPrefix(is.Import, "(data)/") {
+		fname := strings.TrimPrefix(is.Import, "(data)/")
+		fin, err := data.BotPolicies.Open(fname)
+		return fin, err
+	}
+
+	return os.Open(is.Import)
+}
+
+func (is *ImportStatement) load() error {
+	fin, err := is.open()
+	if err != nil {
+		return fmt.Errorf("can't open %s: %w", is.Import, err)
+	}
+	defer fin.Close()
+
+	var result []BotConfig
+
+	if err := yaml.NewYAMLToJSONDecoder(fin).Decode(&result); err != nil {
+		return fmt.Errorf("can't parse %s: %w", is.Import, err)
+	}
+
+	var errs []error
+
+	for _, b := range result {
+		if err := b.Valid(); err != nil {
+			errs = append(errs, err)
+		}
+	}
+
+	if len(errs) != 0 {
+		return fmt.Errorf("config %s is not valid:\n%w", is.Import, errors.Join(errs...))
+	}
+
+	is.Bots = result
+
+	return nil
+}
+
+func (is *ImportStatement) Valid() error {
+	return is.load()
+}
+
+type BotOrImport struct {
+	*BotConfig       `json:",inline"`
+	*ImportStatement `json:",inline"`
+}
+
+func (boi *BotOrImport) Valid() error {
+	if boi.BotConfig != nil && boi.ImportStatement != nil {
+		return ErrCantSetBotAndImportValuesAtOnce
+	}
+
+	if boi.BotConfig != nil {
+		return boi.BotConfig.Valid()
+	}
+
+	if boi.ImportStatement != nil {
+		return boi.ImportStatement.Valid()
+	}
+
+	return ErrMustSetBotOrImportRules
+}
+
+type fileConfig struct {
+	Bots  []BotOrImport `json:"bots"`
+	DNSBL bool          `json:"dnsbl"`
+}
+
+func (c fileConfig) Valid() error {
+	var errs []error
+
+	if len(c.Bots) == 0 {
+		errs = append(errs, ErrNoBotRulesDefined)
+	}
+
+	for _, b := range c.Bots {
+		if err := b.Valid(); err != nil {
+			errs = append(errs, err)
+		}
+	}
+
+	if len(errs) != 0 {
+		return fmt.Errorf("config is not valid:\n%w", errors.Join(errs...))
+	}
+
+	return nil
+}
+
+func Load(fin io.Reader, fname string) (*Config, error) {
+	var c fileConfig
+	if err := yaml.NewYAMLToJSONDecoder(fin).Decode(&c); err != nil {
+		return nil, fmt.Errorf("can't parse policy config YAML %s: %w", fname, err)
+	}
+
+	if err := c.Valid(); err != nil {
+		return nil, err
+	}
+
+	result := &Config{
+		DNSBL: c.DNSBL,
+	}
+
+	var validationErrs []error
+
+	for _, boi := range c.Bots {
+		if boi.ImportStatement != nil {
+			if err := boi.load(); err != nil {
+				validationErrs = append(validationErrs, err)
+				continue
+			}
+
+			result.Bots = append(result.Bots, boi.ImportStatement.Bots...)
+		}
+
+		if boi.BotConfig != nil {
+			if err := boi.BotConfig.Valid(); err != nil {
+				validationErrs = append(validationErrs, err)
+				continue
+			}
+
+			result.Bots = append(result.Bots, *boi.BotConfig)
+		}
+	}
+
+	if len(validationErrs) > 0 {
+		return nil, fmt.Errorf("errors validating policy config %s: %w", fname, errors.Join(validationErrs...))
+	}
+
+	return result, nil
+}
+
 type Config struct {
-	Bots  []BotConfig `json:"bots"`
-	DNSBL bool        `json:"dnsbl"`
+	Bots  []BotConfig
+	DNSBL bool
 }
 
 func (c Config) Valid() error {

+ 109 - 5
lib/policy/config/config_test.go

@@ -2,10 +2,12 @@ package config
 
 import (
 	"errors"
+	"io/fs"
 	"os"
 	"path/filepath"
 	"testing"
 
+	"github.com/TecharoHQ/anubis/data"
 	"k8s.io/apimachinery/pkg/util/yaml"
 )
 
@@ -219,13 +221,69 @@ func TestConfigValidKnownGood(t *testing.T) {
 			}
 			defer fin.Close()
 
-			var c Config
-			if err := yaml.NewYAMLToJSONDecoder(fin).Decode(&c); err != nil {
-				t.Fatalf("can't decode file: %v", err)
+			c, err := Load(fin, st.Name())
+			if err != nil {
+				t.Fatal(err)
 			}
 
 			if err := c.Valid(); err != nil {
-				t.Fatal(err)
+				t.Error(err)
+			}
+
+			if len(c.Bots) == 0 {
+				t.Error("wanted more than 0 bots, got zero")
+			}
+		})
+	}
+}
+
+func TestImportStatement(t *testing.T) {
+	type testCase struct {
+		name       string
+		importPath string
+		err        error
+	}
+
+	var tests []testCase
+
+	for _, folderName := range []string{
+		"apps",
+		"bots",
+		"common",
+		"crawlers",
+	} {
+		if err := fs.WalkDir(data.BotPolicies, folderName, func(path string, d fs.DirEntry, err error) error {
+			if err != nil {
+				return err
+			}
+			if d.IsDir() {
+				return nil
+			}
+
+			tests = append(tests, testCase{
+				name:       "(data)/" + path,
+				importPath: "(data)/" + path,
+				err:        nil,
+			})
+
+			return nil
+		}); err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			is := &ImportStatement{
+				Import: tt.importPath,
+			}
+
+			if err := is.Valid(); err != nil {
+				t.Errorf("validation error: %v", err)
+			}
+
+			if len(is.Bots) == 0 {
+				t.Error("wanted bot definitions, but got none")
 			}
 		})
 	}
@@ -246,7 +304,7 @@ func TestConfigValidBad(t *testing.T) {
 			}
 			defer fin.Close()
 
-			var c Config
+			var c fileConfig
 			if err := yaml.NewYAMLToJSONDecoder(fin).Decode(&c); err != nil {
 				t.Fatalf("can't decode file: %v", err)
 			}
@@ -259,3 +317,49 @@ func TestConfigValidBad(t *testing.T) {
 		})
 	}
 }
+
+func TestBotConfigZero(t *testing.T) {
+	var b BotConfig
+	if !b.Zero() {
+		t.Error("zero value BotConfig is not zero value")
+	}
+
+	b.Name = "hi"
+	if b.Zero() {
+		t.Error("BotConfig with name is zero value")
+	}
+
+	b.UserAgentRegex = p(".*")
+	if b.Zero() {
+		t.Error("BotConfig with user agent regex is zero value")
+	}
+
+	b.PathRegex = p(".*")
+	if b.Zero() {
+		t.Error("BotConfig with path regex is zero value")
+	}
+
+	b.HeadersRegex = map[string]string{"hi": "there"}
+	if b.Zero() {
+		t.Error("BotConfig with headers regex is zero value")
+	}
+
+	b.Action = RuleAllow
+	if b.Zero() {
+		t.Error("BotConfig with action is zero value")
+	}
+
+	b.RemoteAddr = []string{"::/0"}
+	if b.Zero() {
+		t.Error("BotConfig with remote addresses is zero value")
+	}
+
+	b.Challenge = &ChallengeRules{
+		Difficulty: 4,
+		ReportAs:   4,
+		Algorithm:  AlgorithmFast,
+	}
+	if b.Zero() {
+		t.Error("BotConfig with challenge rules is zero value")
+	}
+}

+ 10 - 0
lib/policy/config/testdata/bad/import_and_bot.json

@@ -0,0 +1,10 @@
+{
+  "bots": [
+    {
+      "import": "(data)/bots/ai-robots-txt.yaml",
+      "name": "generic-browser",
+      "user_agent_regex": "Mozilla|Opera\n",
+      "action": "CHALLENGE"
+    }
+  ]
+}

+ 6 - 0
lib/policy/config/testdata/bad/import_and_bot.yaml

@@ -0,0 +1,6 @@
+bots:
+- import: (data)/bots/ai-robots-txt.yaml
+  name: generic-browser
+  user_agent_regex: >
+    Mozilla|Opera
+  action: CHALLENGE

+ 7 - 0
lib/policy/config/testdata/bad/import_invalid_file.json

@@ -0,0 +1,7 @@
+{
+  "bots": [
+    {
+      "import": "(data)/does-not-exist-fake-file.yaml"
+    }
+  ]
+}

+ 2 - 0
lib/policy/config/testdata/bad/import_invalid_file.yaml

@@ -0,0 +1,2 @@
+bots:
+- import: (data)/does-not-exist-fake-file.yaml

+ 7 - 0
lib/policy/config/testdata/good/import_filesystem.json

@@ -0,0 +1,7 @@
+{
+  "bots": [
+    {
+      "import": "./testdata/hack-test.json"
+    }
+  ]
+}

+ 2 - 0
lib/policy/config/testdata/good/import_filesystem.yaml

@@ -0,0 +1,2 @@
+bots:
+- import: ./testdata/hack-test.yaml

+ 7 - 0
lib/policy/config/testdata/good/import_keep_internet_working.json

@@ -0,0 +1,7 @@
+{
+  "bots": [
+    {
+      "import": "(data)/common/keep-internet-working.yaml"
+    }
+  ]
+}

+ 2 - 0
lib/policy/config/testdata/good/import_keep_internet_working.yaml

@@ -0,0 +1,2 @@
+bots:
+- import: (data)/common/keep-internet-working.yaml

+ 9 - 0
lib/policy/config/testdata/hack-test.json

@@ -0,0 +1,9 @@
+[
+  {
+    "name": "ipv6-ula",
+    "action": "ALLOW",
+    "remote_addresses": [
+      "fc00::/7"
+    ]
+  }
+]

+ 3 - 0
lib/policy/config/testdata/hack-test.yaml

@@ -0,0 +1,3 @@
+- name: well-known
+  path_regex: ^/.well-known/.*$
+  action: ALLOW

+ 4 - 9
lib/policy/policy.go

@@ -7,7 +7,6 @@ import (
 
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/client_golang/prometheus/promauto"
-	"k8s.io/apimachinery/pkg/util/yaml"
 
 	"github.com/TecharoHQ/anubis/lib/policy/config"
 )
@@ -20,26 +19,22 @@ var (
 )
 
 type ParsedConfig struct {
-	orig config.Config
+	orig *config.Config
 
 	Bots              []Bot
 	DNSBL             bool
 	DefaultDifficulty int
 }
 
-func NewParsedConfig(orig config.Config) *ParsedConfig {
+func NewParsedConfig(orig *config.Config) *ParsedConfig {
 	return &ParsedConfig{
 		orig: orig,
 	}
 }
 
 func ParseConfig(fin io.Reader, fname string, defaultDifficulty int) (*ParsedConfig, error) {
-	var c config.Config
-	if err := yaml.NewYAMLToJSONDecoder(fin).Decode(&c); err != nil {
-		return nil, fmt.Errorf("can't parse policy config YAML %s: %w", fname, err)
-	}
-
-	if err := c.Valid(); err != nil {
+	c, err := config.Load(fin, fname)
+	if err != nil {
 		return nil, err
 	}
 

+ 9 - 0
lib/policy/testdata/hack-test.json

@@ -0,0 +1,9 @@
+[
+  {
+    "name": "ipv6-ula",
+    "action": "ALLOW",
+    "remote_addresses": [
+      "fc00::/7"
+    ]
+  }
+]

+ 3 - 0
lib/policy/testdata/hack-test.yaml

@@ -0,0 +1,3 @@
+- name: well-known
+  path_regex: ^/.well-known/.*$
+  action: ALLOW