2 months ago · 22c47f40d1
--- a/data/botPolicies.yaml
+++ b/data/botPolicies.yaml
@@ -11,44 +11,44 @@
 
				 ## /usr/share/docs/anubis/data or in the tarball you extracted Anubis from.
			
 
				 
			
 
				 bots:
			
 
				-# Pathological bots to deny
			
 
				-- # This correlates to data/bots/deny-pathological.yaml in the source tree
			
 
				-  # https://github.com/TecharoHQ/anubis/blob/main/data/bots/deny-pathological.yaml
			
 
				-  import: (data)/bots/_deny-pathological.yaml
			
 
				-- import: (data)/bots/aggressive-brazilian-scrapers.yaml
			
 
				-
			
 
				-# Enforce https://github.com/ai-robots-txt/ai.robots.txt
			
 
				-- import: (data)/bots/ai-robots-txt.yaml
			
 
				-
			
 
				-# Search engine crawlers to allow, defaults to:
			
 
				-#   - Google (so they don't try to bypass Anubis)
			
 
				-#   - Bing
			
 
				-#   - DuckDuckGo
			
 
				-#   - Qwant
			
 
				-#   - The Internet Archive
			
 
				-#   - Kagi
			
 
				-#   - Marginalia
			
 
				-#   - Mojeek
			
 
				-- import: (data)/crawlers/_allow-good.yaml
			
 
				-
			
 
				-# Allow common "keeping the internet working" routes (well-known, favicon, robots.txt)
			
 
				-- import: (data)/common/keep-internet-working.yaml
			
 
				-
			
 
				-# # Punish any bot with "bot" in the user-agent string
			
 
				-# # This is known to have a high false-positive rate, use at your own risk
			
 
				-# - name: generic-bot-catchall
			
 
				-#   user_agent_regex: (?i:bot|crawler)
			
 
				-#   action: CHALLENGE
			
 
				-#   challenge:
			
 
				-#     difficulty: 16  # impossible
			
 
				-#     report_as: 4    # lie to the operator
			
 
				-#     algorithm: slow # intentionally waste CPU cycles and time
			
 
				-
			
 
				-# Generic catchall rule
			
 
				-- name: generic-browser
			
 
				-  user_agent_regex: >-
			
 
				-    Mozilla|Opera
			
 
				-  action: CHALLENGE
			
 
				+  # Pathological bots to deny
			
 
				+  - # This correlates to data/bots/deny-pathological.yaml in the source tree
			
 
				+    # https://github.com/TecharoHQ/anubis/blob/main/data/bots/deny-pathological.yaml
			
 
				+    import: (data)/bots/_deny-pathological.yaml
			
 
				+  - import: (data)/bots/aggressive-brazilian-scrapers.yaml
			
 
				+
			
 
				+  # Enforce https://github.com/ai-robots-txt/ai.robots.txt
			
 
				+  - import: (data)/bots/ai-robots-txt.yaml
			
 
				+
			
 
				+  # Search engine crawlers to allow, defaults to:
			
 
				+  #   - Google (so they don't try to bypass Anubis)
			
 
				+  #   - Bing
			
 
				+  #   - DuckDuckGo
			
 
				+  #   - Qwant
			
 
				+  #   - The Internet Archive
			
 
				+  #   - Kagi
			
 
				+  #   - Marginalia
			
 
				+  #   - Mojeek
			
 
				+  - import: (data)/crawlers/_allow-good.yaml
			
 
				+
			
 
				+  # Allow common "keeping the internet working" routes (well-known, favicon, robots.txt)
			
 
				+  - import: (data)/common/keep-internet-working.yaml
			
 
				+
			
 
				+  # # Punish any bot with "bot" in the user-agent string
			
 
				+  # # This is known to have a high false-positive rate, use at your own risk
			
 
				+  # - name: generic-bot-catchall
			
 
				+  #   user_agent_regex: (?i:bot|crawler)
			
 
				+  #   action: CHALLENGE
			
 
				+  #   challenge:
			
 
				+  #     difficulty: 16  # impossible
			
 
				+  #     report_as: 4    # lie to the operator
			
 
				+  #     algorithm: slow # intentionally waste CPU cycles and time
			
 
				+
			
 
				+  # Generic catchall rule
			
 
				+  - name: generic-browser
			
 
				+    user_agent_regex: >-
			
 
				+      Mozilla|Opera
			
 
				+    action: CHALLENGE
			
 
				 
			
 
				 dnsbl: false
			
 
				 
			
@@ -58,4 +58,4 @@ dnsbl: false
 
				 # will stop sending requests once they get it.
			
 
				 status_codes:
			
 
				   CHALLENGE: 200
			
 
				-  DENY: 200 
			
 
				+  DENY: 200
			
--- a/docs/docs/admin/configuration/expressions.mdx
+++ b/docs/docs/admin/configuration/expressions.mdx
@@ -143,7 +143,29 @@ Anubis would return a challenge because all of those conditions are true.
 
				 
			
 
				 ## Functions exposed to Anubis expressions
			
 
				 
			
 
				-There are currently no functions from the Anubis runtime exposed to expressions. This will change in the future.
			
 
				+Anubis expressions can be augmented with the following functions:
			
 
				+
			
 
				+### `randInt`
			
 
				+
			
 
				+```ts
			
 
				+function randInt(n: int): int;
			
 
				+```
			
 
				+
			
 
				+randInt returns a randomly selected integer value in the range of `[0,n)`. This is a thin wrapper around [Go's math/rand#Intn](https://pkg.go.dev/math/rand#Intn). Be careful with this as it may cause inconsistent behavior for genuine users.
			
 
				+
			
 
				+This is best applied when doing explicit block rules, eg:
			
 
				+
			
 
				+```yaml
			
 
				+# Denies LightPanda about 75% of the time on average
			
 
				+- name: deny-lightpanda-sometimes
			
 
				+  action: DENY
			
 
				+  expression:
			
 
				+    all:
			
 
				+      - userAgent.matches("LightPanda")
			
 
				+      - randInt(16) >= 4
			
 
				+```
			
 
				+
			
 
				+It seems counter-intuitive to allow known bad clients through sometimes, but this allows you to confuse attackers by making Anubis' behavior random. Adjust the thresholds and numbers as facts and circumstances demand.
			
 
				 
			
 
				 ## Life advice
			
 
				 
			
--- a/lib/policy/config/testdata/good/entropy.yaml
+++ b/lib/policy/config/testdata/good/entropy.yaml
@@ -0,0 +1,8 @@
 
				+bots:
			
 
				+  - name: total-randomness
			
 
				+    action: ALLOW
			
 
				+    expression:
			
 
				+      all:
			
 
				+        - '"Accept" in headers'
			
 
				+        - headers["Accept"].contains("text/html")
			
 
				+        - randInt(1) == 0
			
--- a/lib/policy/expressions/environment.go
+++ b/lib/policy/expressions/environment.go
@@ -1,7 +1,11 @@
 
				 package expressions
			
 
				 
			
 
				 import (
			
 
				+	"math/rand/v2"
			
 
				+
			
 
				 	"github.com/google/cel-go/cel"
			
 
				+	"github.com/google/cel-go/common/types"
			
 
				+	"github.com/google/cel-go/common/types/ref"
			
 
				 	"github.com/google/cel-go/ext"
			
 
				 )
			
 
				 
			
@@ -29,6 +33,20 @@ func NewEnvironment() (*cel.Env, error) {
 
				 		cel.Variable("headers", cel.MapType(cel.StringType, cel.StringType)),
			
 
				 
			
 
				 		// Functions exposed to CEL programs:
			
 
				+		cel.Function("randInt",
			
 
				+			cel.Overload("randInt_int",
			
 
				+				[]*cel.Type{cel.IntType},
			
 
				+				cel.IntType,
			
 
				+				cel.UnaryBinding(func(val ref.Val) ref.Val {
			
 
				+					n, ok := val.(types.Int)
			
 
				+					if !ok {
			
 
				+						return types.ValOrErr(val, "value is not an integer, but is %T", val)
			
 
				+					}
			
 
				+
			
 
				+					return types.Int(rand.IntN(int(n)))
			
 
				+				}),
			
 
				+			),
			
 
				+		),
			
 
				 	)
			
 
				 }