Просмотр исходного кода

feat: add default OpenGraph tags to configuration file (#694)

* feat(config): opengraph passthrough configuration

Signed-off-by: Xe Iaso <me@xeiaso.net>

* chore(ogtags): use config.OpenGraph for configuration

Signed-off-by: Xe Iaso <me@xeiaso.net>

* chore: wire up ogtags config in most of the app

Signed-off-by: Xe Iaso <me@xeiaso.net>

* feat(ogtags): return default tags if they are supplied

Signed-off-by: Xe Iaso <me@xeiaso.net>

* chore: make OpenGraph legal so we have some sanity in reviewing

Signed-off-by: Xe Iaso <me@xeiaso.net>

* chore: spelling

Signed-off-by: Xe Iaso <me@xeiaso.net>

* fix(lib): use OpenGraph.Enabled

Signed-off-by: Xe Iaso <me@xeiaso.net>

* test(lib): load default config file if one is not specified in spawnAnubis

Signed-off-by: Xe Iaso <me@xeiaso.net>

* chore(config): fix ST1005

Signed-off-by: Xe Iaso <me@xeiaso.net>

* docs: document open graph defaults and its new home in the policy file

Signed-off-by: Xe Iaso <me@xeiaso.net>

* docs(installation): point to weight threshold new home

Signed-off-by: Xe Iaso <me@xeiaso.net>

* chore: rename default to override

Signed-off-by: Xe Iaso <me@xeiaso.net>

* chore(default-config): add off-by-default opengraph settings to bot policy file

Signed-off-by: Xe Iaso <me@xeiaso.net>

* fix(anubis): make build

Signed-off-by: Xe Iaso <me@xeiaso.net>

* test(lib): fix build

Signed-off-by: Xe Iaso <me@xeiaso.net>

---------

Signed-off-by: Xe Iaso <me@xeiaso.net>
Xe Iaso 3 дней назад
Родитель
Сommit
4948036f39

+ 2 - 0
.github/actions/spelling/expect.txt

@@ -183,9 +183,11 @@ NONINFRINGEMENT
 nosleep
 OCOB
 ogtags
+ogtitle
 omgili
 omgilibot
 openai
+opengraph
 openrc
 pag
 palemoon

+ 0 - 8
.github/actions/spelling/line_forbidden.patterns

@@ -273,14 +273,6 @@
 # Most people only have two hands. Reword.
 \b(?i)on the third hand\b
 
-# Should be `Open Graph`
-# unless talking about a specific Open Graph implementation:
-# - Java
-# - Node
-# - Py
-# - Ruby
-\bOpenGraph\b
-
 # Should be `OpenShift`
 \bOpenshift\b
 

+ 21 - 15
cmd/anubis/main.go

@@ -331,22 +331,28 @@ func main() {
 		slog.Warn("REDIRECT_DOMAINS is not set, Anubis will only redirect to the same domain a request is coming from, see https://anubis.techaro.lol/docs/admin/configuration/redirect-domains")
 	}
 
+	// If OpenGraph configuration values are not set in the config file, use the
+	// values from flags / envvars.
+	if !policy.OpenGraph.Enabled {
+		policy.OpenGraph.Enabled = *ogPassthrough
+		policy.OpenGraph.ConsiderHost = *ogCacheConsiderHost
+		policy.OpenGraph.TimeToLive = *ogTimeToLive
+		policy.OpenGraph.Override = map[string]string{}
+	}
+
 	s, err := libanubis.New(libanubis.Options{
-		BasePrefix:           *basePrefix,
-		StripBasePrefix:      *stripBasePrefix,
-		Next:                 rp,
-		Policy:               policy,
-		ServeRobotsTXT:       *robotsTxt,
-		PrivateKey:           priv,
-		CookieDomain:         *cookieDomain,
-		CookieExpiration:     *cookieExpiration,
-		CookiePartitioned:    *cookiePartitioned,
-		OGPassthrough:        *ogPassthrough,
-		OGTimeToLive:         *ogTimeToLive,
-		RedirectDomains:      redirectDomainsList,
-		Target:               *target,
-		WebmasterEmail:       *webmasterEmail,
-		OGCacheConsidersHost: *ogCacheConsiderHost,
+		BasePrefix:        *basePrefix,
+		StripBasePrefix:   *stripBasePrefix,
+		Next:              rp,
+		Policy:            policy,
+		ServeRobotsTXT:    *robotsTxt,
+		PrivateKey:        priv,
+		CookieDomain:      *cookieDomain,
+		CookieExpiration:  *cookieExpiration,
+		CookiePartitioned: *cookiePartitioned,
+		RedirectDomains:   redirectDomainsList,
+		Target:            *target,
+		WebmasterEmail:    *webmasterEmail,
 	})
 	if err != nil {
 		log.Fatalf("can't construct libanubis.Server: %v", err)

+ 24 - 0
data/botPolicies.yaml

@@ -84,6 +84,30 @@ bots:
 
 dnsbl: false
 
+# Open Graph passthrough configuration, see here for more information:
+# https://anubis.techaro.lol/docs/admin/configuration/open-graph/
+openGraph:
+  # Enables Open Graph passthrough
+  enabled: false
+  # Enables the use of the HTTP host in the cache key, this enables
+  # caching metadata for multiple http hosts at once.
+  considerHost: false
+  # How long cached OpenGraph metadata should last in memory
+  ttl: 24h
+  # # If set, return these opengraph values instead of looking them up with
+  # # the target service.
+  # #
+  # # Correlates to properties in https://ogp.me/
+  # override:
+  #   # og:title is required, it is the title of the website
+  #   "og:title": "Techaro Anubis"
+  #   "og:description": >-
+  #     Anubis is a Web AI Firewall Utility that helps you fight the bots
+  #     away so that you can maintain uptime at work!
+  #   "description": >-
+  #     Anubis is a Web AI Firewall Utility that helps you fight the bots
+  #     away so that you can maintain uptime at work!
+
 # By default, send HTTP 200 back to clients that either get issued a challenge
 # or a denial. This seems weird, but this is load-bearing due to the fact that
 # the most aggressive scraper bots seem to really, really, want an HTTP 200 and

+ 2 - 0
docs/docs/CHANGELOG.md

@@ -11,6 +11,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+- Move Open Graph configuration [to the policy file](./admin/configuration/open-graph.mdx)
+- Enable support for default Open Graph metadata
 - Replace cidranger with bart for IP range checking, improving IP matching performance by 3-20x with zero heap
   allocations
 - Remove the unused `/test-error` endpoint and update the testing endpoint `/make-challenge` to only be enabled in

+ 33 - 0
docs/docs/admin/configuration/open-graph.mdx

@@ -9,12 +9,45 @@ This page provides detailed information on how to configure [Open Graph tag](htt
 
 ## Configuration Options
 
+Open Graph settings are configured in the `openGraph` section of the [Policy File](../policies.mdx).
+
+```yaml
+openGraph:
+  # Enables Open Graph passthrough
+  enabled: true
+  # Enables the use of the HTTP host in the cache key, this enables
+  # caching metadata for multiple http hosts at once.
+  considerHost: true
+  # How long cached OpenGraph metadata should last in memory
+  ttl: 24h
+  # If set, return these opengraph values instead of looking them up with
+  # the target service.
+  #
+  # Correlates to properties in https://ogp.me/
+  override:
+    # og:title is required, it is the title of the website
+    "og:title": "Techaro Anubis"
+    "og:description": >-
+      Anubis is a Web AI Firewall Utility that helps you fight the bots
+      away so that you can maintain uptime at work!
+    "description": >-
+      Anubis is a Web AI Firewall Utility that helps you fight the bots
+      away so that you can maintain uptime at work!
+```
+
+<details>
+<summary>Configuration flags / envvars (old)</summary>
+
+Open Graph passthrough used to be configured with configuration flags / environment variables. Reference to these settings are maintained for backwards compatibility's sake.
+
 | Name                     | Description                                               | Type     | Default | Example                       |
 | ------------------------ | --------------------------------------------------------- | -------- | ------- | ----------------------------- |
 | `OG_PASSTHROUGH`         | Enables or disables the Open Graph tag passthrough system | Boolean  | `true`  | `OG_PASSTHROUGH=true`         |
 | `OG_EXPIRY_TIME`         | Configurable cache expiration time for Open Graph tags    | Duration | `24h`   | `OG_EXPIRY_TIME=1h`           |
 | `OG_CACHE_CONSIDER_HOST` | Enables or disables the use of the host in the cache key  | Boolean  | `false` | `OG_CACHE_CONSIDER_HOST=true` |
 
+</details>
+
 ## Usage
 
 To configure Open Graph tags, you can set the following environment variables, environment file or as flags in your Anubis configuration:

+ 19 - 7
docs/docs/admin/installation.mdx

@@ -4,7 +4,6 @@ title: Setting up Anubis
 
 import RandomKey from "@site/src/components/RandomKey";
 
-
 Anubis is meant to sit between your reverse proxy (such as Nginx or Caddy) and your target service. One instance of Anubis must be used per service you are protecting.
 
 <center>
@@ -30,7 +29,7 @@ TLS terminator)
 Anubis is shipped in the Docker repo [`ghcr.io/techarohq/anubis`](https://github.com/TecharoHQ/anubis/pkgs/container/anubis). The following tags exist for your convenience:
 
 | Tag                 | Meaning                                                                                                                            |
-|:--------------------|:-----------------------------------------------------------------------------------------------------------------------------------|
+| :------------------ | :--------------------------------------------------------------------------------------------------------------------------------- |
 | `latest`            | The latest [tagged release](https://github.com/TecharoHQ/anubis/releases), if you are in doubt, start here.                        |
 | `v<version number>` | The Anubis image for [any given tagged release](https://github.com/TecharoHQ/anubis/tags)                                          |
 | `main`              | The current build on the `main` branch. Only use this if you need the latest and greatest features as they are merged into `main`. |
@@ -43,12 +42,24 @@ Anubis has very minimal system requirements. I suspect that 128Mi of ram may be
 
 For more detailed information on installing Anubis with native packages, please read [the native install directions](./native-install.mdx).
 
-## Environment variables
+## Configuration
+
+Anubis is configurable via environment variables and [the policy file](./policies.mdx). Most settings are currently exposed with environment variables but they are being slowly moved over to the policy file.
+
+### Configuration via the policy file
+
+Currently the following settings are configurable via the policy file:
+
+- [Bot policies](./policies.mdx)
+- [Open Graph passthrough](./configuration/open-graph.mdx)
+- [Weight thresholds](./configuration/thresholds.mdx)
+
+### Environment variables
 
 Anubis uses these environment variables for configuration:
 
 | Environment Variable           | Default value           | Explanation                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
-|:-------------------------------|:------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| :----------------------------- | :---------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
 | `BASE_PREFIX`                  | unset                   | If set, adds a global prefix to all Anubis endpoints. For example, setting this to `/myapp` would make Anubis accessible at `/myapp/` instead of `/`. This is useful when running Anubis behind a reverse proxy that routes based on path prefixes.                                                                                                                                                                                                                                                     |
 | `BIND`                         | `:8923`                 | The network address that Anubis listens on. For `unix`, set this to a path: `/run/anubis/instance.sock`                                                                                                                                                                                                                                                                                                                                                                                                 |
 | `BIND_NETWORK`                 | `tcp`                   | The address family that Anubis listens on. Accepts `tcp`, `unix` and anything Go's [`net.Listen`](https://pkg.go.dev/net#Listen) supports.                                                                                                                                                                                                                                                                                                                                                              |
@@ -60,9 +71,9 @@ Anubis uses these environment variables for configuration:
 | `ED25519_PRIVATE_KEY_HEX_FILE` | unset                   | Path to a file containing the hex-encoded ed25519 private key. Only one of this or its sister option may be set.                                                                                                                                                                                                                                                                                                                                                                                        |
 | `METRICS_BIND`                 | `:9090`                 | The network address that Anubis serves Prometheus metrics on. See `BIND` for more information.                                                                                                                                                                                                                                                                                                                                                                                                          |
 | `METRICS_BIND_NETWORK`         | `tcp`                   | The address family that the Anubis metrics server listens on. See `BIND_NETWORK` for more information.                                                                                                                                                                                                                                                                                                                                                                                                  |
-| `OG_EXPIRY_TIME`               | `24h`                   | The expiration time for the Open Graph tag cache.                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
-| `OG_PASSTHROUGH`               | `false`                 | If set to `true`, Anubis will enable Open Graph tag passthrough.                                                                                                                                                                                                                                                                                                                                                                                                                                        |
-| `OG_CACHE_CONSIDER_HOST`       | `false`                 | If set to `true`, Anubis will consider the host in the Open Graph tag cache key.                                                                                                                                                                                                                                                                                                                                                                                                                        |
+| `OG_EXPIRY_TIME`               | `24h`                   | The expiration time for the Open Graph tag cache. Prefer using [the policy file](./configuration/open-graph.mdx) to configure the Open Graph subsystem.                                                                                                                                                                                                                                                                                                                                                 |
+| `OG_PASSTHROUGH`               | `false`                 | If set to `true`, Anubis will enable Open Graph tag passthrough. Prefer using [the policy file](./configuration/open-graph.mdx) to configure the Open Graph subsystem.                                                                                                                                                                                                                                                                                                                                  |
+| `OG_CACHE_CONSIDER_HOST`       | `false`                 | If set to `true`, Anubis will consider the host in the Open Graph tag cache key. Prefer using [the policy file](./configuration/open-graph.mdx) to configure the Open Graph subsystem.                                                                                                                                                                                                                                                                                                                  |
 | `POLICY_FNAME`                 | unset                   | The file containing [bot policy configuration](./policies.mdx). See the bot policy documentation for more details. If unset, the default bot policy configuration is used.                                                                                                                                                                                                                                                                                                                              |
 | `REDIRECT_DOMAINS`             | unset                   | If set, restrict the domains that Anubis can redirect to when passing a challenge.<br/><br/>If this is unset, Anubis may redirect to any domain which could cause security issues in the unlikely case that an attacker passes a challenge for your browser and then tricks you into clicking a link to your domain.<br/><br/>Note that if you are hosting Anubis on a non-standard port (`https://example:com:8443`, `http://www.example.net:8080`, etc.), you must also include the port number here. |
 | `SERVE_ROBOTS_TXT`             | `false`                 | If set `true`, Anubis will serve a default `robots.txt` file that disallows all known AI scrapers by name and then additionally disallows every scraper. This is useful if facts and circumstances make it difficult to change the underlying service to serve such a `robots.txt` file.                                                                                                                                                                                                                |
@@ -138,6 +149,7 @@ STRIP_BASE_PREFIX=true
 ```
 
 With this configuration:
+
 - A request to `/myapp/api/users` would be forwarded to your target service as `/api/users`
 - A request to `/myapp/` would be forwarded as `/`
 

+ 4 - 0
internal/ogtags/cache.go

@@ -13,6 +13,10 @@ func (c *OGTagCache) GetOGTags(url *url.URL, originalHost string) (map[string]st
 		return nil, errors.New("nil URL provided, cannot fetch OG tags")
 	}
 
+	if len(c.ogOverride) != 0 {
+		return c.ogOverride, nil
+	}
+
 	target := c.getTarget(url)
 	cacheKey := c.generateCacheKey(target, originalHost)
 

+ 50 - 3
internal/ogtags/cache_test.go

@@ -7,10 +7,49 @@ import (
 	"reflect"
 	"testing"
 	"time"
+
+	"github.com/TecharoHQ/anubis/lib/policy/config"
 )
 
+func TestCacheReturnsDefault(t *testing.T) {
+	want := map[string]string{
+		"og:title":       "Foo bar",
+		"og:description": "The best website ever made!!!1!",
+	}
+	cache := NewOGTagCache("", config.OpenGraph{
+		Enabled:      true,
+		TimeToLive:   time.Minute,
+		ConsiderHost: false,
+		Override:     want,
+	})
+
+	u, err := url.Parse("https://anubis.techaro.lol")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	result, err := cache.GetOGTags(u, "anubis.techaro.lol")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	for k, v := range want {
+		t.Run(k, func(t *testing.T) {
+			if got := result[k]; got != v {
+				t.Logf("want: tags[%q] = %q", k, v)
+				t.Logf("got:  tags[%q] = %q", k, got)
+				t.Error("invalid result from function")
+			}
+		})
+	}
+}
+
 func TestCheckCache(t *testing.T) {
-	cache := NewOGTagCache("http://example.com", true, time.Minute, false)
+	cache := NewOGTagCache("http://example.com", config.OpenGraph{
+		Enabled:      true,
+		TimeToLive:   time.Minute,
+		ConsiderHost: false,
+	})
 
 	// Set up test data
 	urlStr := "http://example.com/page"
@@ -69,7 +108,11 @@ func TestGetOGTags(t *testing.T) {
 	defer ts.Close()
 
 	// Create an instance of OGTagCache with a short TTL for testing
-	cache := NewOGTagCache(ts.URL, true, 1*time.Minute, false)
+	cache := NewOGTagCache(ts.URL, config.OpenGraph{
+		Enabled:      true,
+		TimeToLive:   time.Minute,
+		ConsiderHost: false,
+	})
 
 	// Parse the test server URL
 	parsedURL, err := url.Parse(ts.URL)
@@ -216,7 +259,11 @@ func TestGetOGTagsWithHostConsideration(t *testing.T) {
 	for _, tc := range testCases {
 		t.Run(tc.name, func(t *testing.T) {
 			loadCount = 0 // Reset load count for each test case
-			cache := NewOGTagCache(ts.URL, true, 1*time.Minute, tc.ogCacheConsiderHost)
+			cache := NewOGTagCache(ts.URL, config.OpenGraph{
+				Enabled:      true,
+				TimeToLive:   time.Minute,
+				ConsiderHost: tc.ogCacheConsiderHost,
+			})
 
 			for i, req := range tc.requests {
 				ogTags, err := cache.GetOGTags(parsedURL, req.host)

+ 11 - 2
internal/ogtags/fetch_test.go

@@ -10,6 +10,7 @@ import (
 	"testing"
 	"time"
 
+	"github.com/TecharoHQ/anubis/lib/policy/config"
 	"golang.org/x/net/html"
 )
 
@@ -80,7 +81,11 @@ func TestFetchHTMLDocument(t *testing.T) {
 			}))
 			defer ts.Close()
 
-			cache := NewOGTagCache("", true, time.Minute, false)
+			cache := NewOGTagCache("", config.OpenGraph{
+				Enabled:      true,
+				TimeToLive:   time.Minute,
+				ConsiderHost: false,
+			})
 			doc, err := cache.fetchHTMLDocument(ts.URL, "anything")
 
 			if tt.expectError {
@@ -107,7 +112,11 @@ func TestFetchHTMLDocumentInvalidURL(t *testing.T) {
 		t.Skip("test requires theoretical network egress")
 	}
 
-	cache := NewOGTagCache("", true, time.Minute, false)
+	cache := NewOGTagCache("", config.OpenGraph{
+		Enabled:      true,
+		TimeToLive:   time.Minute,
+		ConsiderHost: false,
+	})
 
 	doc, err := cache.fetchHTMLDocument("http://invalid.url.that.doesnt.exist.example", "anything")
 

+ 7 - 1
internal/ogtags/integration_test.go

@@ -6,6 +6,8 @@ import (
 	"net/url"
 	"testing"
 	"time"
+
+	"github.com/TecharoHQ/anubis/lib/policy/config"
 )
 
 func TestIntegrationGetOGTags(t *testing.T) {
@@ -104,7 +106,11 @@ func TestIntegrationGetOGTags(t *testing.T) {
 	for _, tc := range testCases {
 		t.Run(tc.name, func(t *testing.T) {
 			// Create cache instance
-			cache := NewOGTagCache(ts.URL, true, 1*time.Minute, false)
+			cache := NewOGTagCache(ts.URL, config.OpenGraph{
+				Enabled:      true,
+				TimeToLive:   time.Minute,
+				ConsiderHost: false,
+			})
 
 			// Create URL for test
 			testURL, _ := url.Parse(ts.URL)

+ 4 - 3
internal/ogtags/mem_test.go

@@ -6,6 +6,7 @@ import (
 	"strings"
 	"testing"
 
+	"github.com/TecharoHQ/anubis/lib/policy/config"
 	"golang.org/x/net/html"
 )
 
@@ -29,7 +30,7 @@ func BenchmarkGetTarget(b *testing.B) {
 
 	for _, tt := range tests {
 		b.Run(tt.name, func(b *testing.B) {
-			cache := NewOGTagCache(tt.target, false, 0, false)
+			cache := NewOGTagCache(tt.target, config.OpenGraph{})
 			urls := make([]*url.URL, len(tt.paths))
 			for i, path := range tt.paths {
 				u, _ := url.Parse(path)
@@ -65,7 +66,7 @@ func BenchmarkExtractOGTags(b *testing.B) {
 		</head><body><div><p>Content</p></div></body></html>`,
 	}
 
-	cache := NewOGTagCache("http://example.com", false, 0, false)
+	cache := NewOGTagCache("http://example.com", config.OpenGraph{})
 	docs := make([]*html.Node, len(htmlSamples))
 
 	for i, sample := range htmlSamples {
@@ -83,7 +84,7 @@ func BenchmarkExtractOGTags(b *testing.B) {
 
 // Memory usage test
 func TestMemoryUsage(t *testing.T) {
-	cache := NewOGTagCache("http://example.com", false, 0, false)
+	cache := NewOGTagCache("http://example.com", config.OpenGraph{})
 
 	// Force GC and wait for it to complete
 	runtime.GC()

+ 7 - 4
internal/ogtags/ogtags.go

@@ -10,6 +10,7 @@ import (
 	"time"
 
 	"github.com/TecharoHQ/anubis/decaymap"
+	"github.com/TecharoHQ/anubis/lib/policy/config"
 )
 
 const (
@@ -32,9 +33,10 @@ type OGTagCache struct {
 	ogTimeToLive        time.Duration
 	ogCacheConsiderHost bool
 	ogPassthrough       bool
+	ogOverride          map[string]string
 }
 
-func NewOGTagCache(target string, ogPassthrough bool, ogTimeToLive time.Duration, ogTagsConsiderHost bool) *OGTagCache {
+func NewOGTagCache(target string, conf config.OpenGraph) *OGTagCache {
 	// Predefined approved tags and prefixes
 	defaultApprovedTags := []string{"description", "keywords", "author"}
 	defaultApprovedPrefixes := []string{"og:", "twitter:", "fediverse:"}
@@ -77,9 +79,10 @@ func NewOGTagCache(target string, ogPassthrough bool, ogTimeToLive time.Duration
 	return &OGTagCache{
 		cache:               decaymap.New[string, map[string]string](),
 		targetURL:           parsedTargetURL,
-		ogPassthrough:       ogPassthrough,
-		ogTimeToLive:        ogTimeToLive,
-		ogCacheConsiderHost: ogTagsConsiderHost,
+		ogPassthrough:       conf.Enabled,
+		ogTimeToLive:        conf.TimeToLive,
+		ogCacheConsiderHost: conf.ConsiderHost,
+		ogOverride:          conf.Override,
 		approvedTags:        defaultApprovedTags,
 		approvedPrefixes:    defaultApprovedPrefixes,
 		client:              client,

+ 6 - 5
internal/ogtags/ogtags_fuzz_test.go

@@ -6,6 +6,7 @@ import (
 	"testing"
 	"unicode/utf8"
 
+	"github.com/TecharoHQ/anubis/lib/policy/config"
 	"golang.org/x/net/html"
 )
 
@@ -45,7 +46,7 @@ func FuzzGetTarget(f *testing.F) {
 		}
 
 		// Create cache - should not panic
-		cache := NewOGTagCache(target, false, 0, false)
+		cache := NewOGTagCache(target, config.OpenGraph{})
 
 		// Create URL
 		u := &url.URL{
@@ -129,7 +130,7 @@ func FuzzExtractOGTags(f *testing.F) {
 			return
 		}
 
-		cache := NewOGTagCache("http://example.com", false, 0, false)
+		cache := NewOGTagCache("http://example.com", config.OpenGraph{})
 
 		// Should not panic
 		tags := cache.extractOGTags(doc)
@@ -185,7 +186,7 @@ func FuzzGetTargetRoundTrip(f *testing.F) {
 			t.Skip()
 		}
 
-		cache := NewOGTagCache(target, false, 0, false)
+		cache := NewOGTagCache(target, config.OpenGraph{})
 		u := &url.URL{Path: path, RawQuery: query}
 
 		result := cache.getTarget(u)
@@ -242,7 +243,7 @@ func FuzzExtractMetaTagInfo(f *testing.F) {
 			},
 		}
 
-		cache := NewOGTagCache("http://example.com", false, 0, false)
+		cache := NewOGTagCache("http://example.com", config.OpenGraph{})
 
 		// Should not panic
 		property, content := cache.extractMetaTagInfo(node)
@@ -295,7 +296,7 @@ func BenchmarkFuzzedGetTarget(b *testing.B) {
 
 	for _, input := range inputs {
 		b.Run(input.name, func(b *testing.B) {
-			cache := NewOGTagCache(input.target, false, 0, false)
+			cache := NewOGTagCache(input.target, config.OpenGraph{})
 			u := &url.URL{Path: input.path, RawQuery: input.query}
 
 			b.ResetTimer()

+ 25 - 5
internal/ogtags/ogtags_test.go

@@ -13,6 +13,8 @@ import (
 	"strings"
 	"testing"
 	"time"
+
+	"github.com/TecharoHQ/anubis/lib/policy/config"
 )
 
 func TestNewOGTagCache(t *testing.T) {
@@ -38,7 +40,11 @@ func TestNewOGTagCache(t *testing.T) {
 
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			cache := NewOGTagCache(tt.target, tt.ogPassthrough, tt.ogTimeToLive, false)
+			cache := NewOGTagCache(tt.target, config.OpenGraph{
+				Enabled:      tt.ogPassthrough,
+				TimeToLive:   tt.ogTimeToLive,
+				ConsiderHost: false,
+			})
 
 			if cache == nil {
 				t.Fatal("expected non-nil cache, got nil")
@@ -74,7 +80,11 @@ func TestNewOGTagCache_UnixSocket(t *testing.T) {
 	socketPath := filepath.Join(tempDir, "test.sock")
 	target := "unix://" + socketPath
 
-	cache := NewOGTagCache(target, true, 5*time.Minute, false)
+	cache := NewOGTagCache(target, config.OpenGraph{
+		Enabled:      true,
+		TimeToLive:   5 * time.Minute,
+		ConsiderHost: false,
+	})
 
 	if cache == nil {
 		t.Fatal("expected non-nil cache, got nil")
@@ -155,7 +165,11 @@ func TestGetTarget(t *testing.T) {
 
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			cache := NewOGTagCache(tt.target, false, time.Minute, false)
+			cache := NewOGTagCache(tt.target, config.OpenGraph{
+				Enabled:      true,
+				TimeToLive:   time.Minute,
+				ConsiderHost: false,
+			})
 
 			u := &url.URL{
 				Path:     tt.path,
@@ -175,7 +189,9 @@ func TestGetTarget(t *testing.T) {
 func TestIntegrationGetOGTags_UnixSocket(t *testing.T) {
 	tempDir := t.TempDir()
 
-	socketPath := filepath.Join(tempDir, "anubis-test.sock")
+	// XXX(Xe): if this is named longer, macOS fails with `bind: invalid argument`
+	// because the unix socket path is too long. I love computers.
+	socketPath := filepath.Join(tempDir, "t")
 
 	// Ensure the socket does not exist initially
 	_ = os.Remove(socketPath)
@@ -222,7 +238,11 @@ func TestIntegrationGetOGTags_UnixSocket(t *testing.T) {
 
 	// Create cache instance pointing to the Unix socket
 	targetURL := "unix://" + socketPath
-	cache := NewOGTagCache(targetURL, true, 1*time.Minute, false)
+	cache := NewOGTagCache(targetURL, config.OpenGraph{
+		Enabled:      true,
+		TimeToLive:   time.Minute,
+		ConsiderHost: false,
+	})
 
 	// Create a dummy URL for the request (path and query matter)
 	testReqURL, _ := url.Parse("/some/page?query=1")

+ 11 - 2
internal/ogtags/parse_test.go

@@ -6,13 +6,18 @@ import (
 	"testing"
 	"time"
 
+	"github.com/TecharoHQ/anubis/lib/policy/config"
 	"golang.org/x/net/html"
 )
 
 // TestExtractOGTags updated with correct expectations based on filtering logic
 func TestExtractOGTags(t *testing.T) {
 	// Use a cache instance that reflects the default approved lists
-	testCache := NewOGTagCache("", false, time.Minute, false)
+	testCache := NewOGTagCache("", config.OpenGraph{
+		Enabled:      false,
+		ConsiderHost: false,
+		TimeToLive:   time.Minute,
+	})
 	// Manually set approved tags/prefixes based on the user request for clarity
 	testCache.approvedTags = []string{"description"}
 	testCache.approvedPrefixes = []string{"og:"}
@@ -189,7 +194,11 @@ func TestIsOGMetaTag(t *testing.T) {
 
 func TestExtractMetaTagInfo(t *testing.T) {
 	// Use a cache instance that reflects the default approved lists
-	testCache := NewOGTagCache("", false, time.Minute, false)
+	testCache := NewOGTagCache("", config.OpenGraph{
+		Enabled:      false,
+		ConsiderHost: false,
+		TimeToLive:   time.Minute,
+	})
 	testCache.approvedTags = []string{"description"}
 	testCache.approvedPrefixes = []string{"og:"}
 

+ 4 - 0
lib/anubis_test.go

@@ -44,6 +44,10 @@ func loadPolicies(t *testing.T, fname string, difficulty int) *policy.ParsedConf
 func spawnAnubis(t *testing.T, opts Options) *Server {
 	t.Helper()
 
+	if opts.Policy == nil {
+		opts.Policy = loadPolicies(t, "", 4)
+	}
+
 	s, err := New(opts)
 	if err != nil {
 		t.Fatalf("can't construct libanubis.Server: %v", err)

+ 16 - 17
lib/config.go

@@ -21,27 +21,26 @@ import (
 	"github.com/TecharoHQ/anubis/internal/ogtags"
 	"github.com/TecharoHQ/anubis/lib/challenge"
 	"github.com/TecharoHQ/anubis/lib/policy"
+	"github.com/TecharoHQ/anubis/lib/policy/config"
 	"github.com/TecharoHQ/anubis/web"
 	"github.com/TecharoHQ/anubis/xess"
 )
 
 type Options struct {
-	Next                 http.Handler
-	Policy               *policy.ParsedConfig
-	Target               string
-	CookieDomain         string
-	CookieName           string
-	BasePrefix           string
-	WebmasterEmail       string
-	RedirectDomains      []string
-	PrivateKey           ed25519.PrivateKey
-	CookieExpiration     time.Duration
-	OGTimeToLive         time.Duration
-	StripBasePrefix      bool
-	OGCacheConsidersHost bool
-	OGPassthrough        bool
-	CookiePartitioned    bool
-	ServeRobotsTXT       bool
+	Next              http.Handler
+	Policy            *policy.ParsedConfig
+	Target            string
+	CookieDomain      string
+	CookieName        string
+	BasePrefix        string
+	WebmasterEmail    string
+	RedirectDomains   []string
+	PrivateKey        ed25519.PrivateKey
+	CookieExpiration  time.Duration
+	StripBasePrefix   bool
+	OpenGraph         config.OpenGraph
+	CookiePartitioned bool
+	ServeRobotsTXT    bool
 }
 
 func LoadPoliciesOrDefault(ctx context.Context, fname string, defaultDifficulty int) (*policy.ParsedConfig, error) {
@@ -112,7 +111,7 @@ func New(opts Options) (*Server, error) {
 		policy:     opts.Policy,
 		opts:       opts,
 		DNSBLCache: decaymap.New[string, dnsbl.DroneBLResponse](),
-		OGTags:     ogtags.NewOGTagCache(opts.Target, opts.OGPassthrough, opts.OGTimeToLive, opts.OGCacheConsidersHost),
+		OGTags:     ogtags.NewOGTagCache(opts.Target, opts.Policy.OpenGraph),
 		cookieName: cookieName,
 	}
 

+ 1 - 1
lib/http.go

@@ -80,7 +80,7 @@ func (s *Server) RenderIndex(w http.ResponseWriter, r *http.Request, rule *polic
 	challengeStr := s.challengeFor(r, rule.Challenge.Difficulty)
 
 	var ogTags map[string]string = nil
-	if s.opts.OGPassthrough {
+	if s.opts.OpenGraph.Enabled {
 		var err error
 		ogTags, err = s.OGTags.GetOGTags(r.URL, r.Host)
 		if err != nil {

+ 25 - 5
lib/policy/config/config.go

@@ -10,6 +10,7 @@ import (
 	"os"
 	"regexp"
 	"strings"
+	"time"
 
 	"github.com/TecharoHQ/anubis/data"
 	"k8s.io/apimachinery/pkg/util/yaml"
@@ -323,10 +324,11 @@ func (sc StatusCodes) Valid() error {
 }
 
 type fileConfig struct {
-	Bots        []BotOrImport `json:"bots"`
-	DNSBL       bool          `json:"dnsbl"`
-	StatusCodes StatusCodes   `json:"status_codes"`
-	Thresholds  []Threshold   `json:"thresholds"`
+	Bots        []BotOrImport       `json:"bots"`
+	DNSBL       bool                `json:"dnsbl"`
+	OpenGraph   openGraphFileConfig `json:"openGraph,omitempty"`
+	StatusCodes StatusCodes         `json:"status_codes"`
+	Thresholds  []Threshold         `json:"thresholds"`
 }
 
 func (c *fileConfig) Valid() error {
@@ -342,6 +344,12 @@ func (c *fileConfig) Valid() error {
 		}
 	}
 
+	if c.OpenGraph.Enabled {
+		if err := c.OpenGraph.Valid(); err != nil {
+			errs = append(errs, err)
+		}
+	}
+
 	if err := c.StatusCodes.Valid(); err != nil {
 		errs = append(errs, err)
 	}
@@ -376,10 +384,21 @@ func Load(fin io.Reader, fname string) (*Config, error) {
 	}
 
 	result := &Config{
-		DNSBL:       c.DNSBL,
+		DNSBL: c.DNSBL,
+		OpenGraph: OpenGraph{
+			Enabled:      c.OpenGraph.Enabled,
+			ConsiderHost: c.OpenGraph.ConsiderHost,
+			Override:     c.OpenGraph.Override,
+		},
 		StatusCodes: c.StatusCodes,
 	}
 
+	if c.OpenGraph.TimeToLive != "" {
+		// XXX(Xe): already validated in Valid()
+		ogTTL, _ := time.ParseDuration(c.OpenGraph.TimeToLive)
+		result.OpenGraph.TimeToLive = ogTTL
+	}
+
 	var validationErrs []error
 
 	for _, boi := range c.Bots {
@@ -426,6 +445,7 @@ type Config struct {
 	Bots        []BotConfig
 	Thresholds  []Threshold
 	DNSBL       bool
+	OpenGraph   OpenGraph
 	StatusCodes StatusCodes
 }
 

+ 51 - 0
lib/policy/config/opengraph.go

@@ -0,0 +1,51 @@
+package config
+
+import (
+	"errors"
+	"fmt"
+	"time"
+)
+
+var (
+	ErrInvalidOpenGraphConfig   = errors.New("config.OpenGraph: invalid OpenGraph configuration")
+	ErrOpenGraphTTLDoesNotParse = errors.New("config.OpenGraph: ttl does not parse as a Duration, see https://pkg.go.dev/time#ParseDuration (formatted like 5m -> 5 minutes, 2h -> 2 hours, etc)")
+	ErrOpenGraphMissingProperty = errors.New("config.OpenGraph: default opengraph tags missing a property")
+)
+
+type openGraphFileConfig struct {
+	Enabled      bool              `json:"enabled" yaml:"enabled"`
+	ConsiderHost bool              `json:"considerHost" yaml:"enabled"`
+	TimeToLive   string            `json:"ttl" yaml:"ttl"`
+	Override     map[string]string `json:"override,omitempty" yaml:"override,omitempty"`
+}
+
+type OpenGraph struct {
+	Enabled      bool              `json:"enabled" yaml:"enabled"`
+	ConsiderHost bool              `json:"considerHost" yaml:"enabled"`
+	Override     map[string]string `json:"override,omitempty" yaml:"override,omitempty"`
+	TimeToLive   time.Duration     `json:"ttl" yaml:"ttl"`
+}
+
+func (og *openGraphFileConfig) Valid() error {
+	var errs []error
+
+	if _, err := time.ParseDuration(og.TimeToLive); err != nil {
+		errs = append(errs, fmt.Errorf("%w: ParseDuration(%q) returned: %w", ErrOpenGraphTTLDoesNotParse, og.TimeToLive, err))
+	}
+
+	if len(og.Override) != 0 {
+		for _, tag := range []string{
+			"og:title",
+		} {
+			if _, ok := og.Override[tag]; !ok {
+				errs = append(errs, fmt.Errorf("%w: %s", ErrOpenGraphMissingProperty, tag))
+			}
+		}
+	}
+
+	if len(errs) != 0 {
+		return errors.Join(ErrInvalidOpenGraphConfig, errors.Join(errs...))
+	}
+
+	return nil
+}

+ 67 - 0
lib/policy/config/opengraph_test.go

@@ -0,0 +1,67 @@
+package config
+
+import (
+	"errors"
+	"testing"
+)
+
+func TestOpenGraphFileConfigValid(t *testing.T) {
+	for _, tt := range []struct {
+		name  string
+		input *openGraphFileConfig
+		err   error
+	}{
+		{
+			name: "basic happy path",
+			input: &openGraphFileConfig{
+				Enabled:      true,
+				ConsiderHost: false,
+				TimeToLive:   "1h",
+				Override:     map[string]string{},
+			},
+			err: nil,
+		},
+		{
+			name: "basic happy path with default",
+			input: &openGraphFileConfig{
+				Enabled:      true,
+				ConsiderHost: false,
+				TimeToLive:   "1h",
+				Override: map[string]string{
+					"og:title": "foobar",
+				},
+			},
+			err: nil,
+		},
+		{
+			name: "invalid time duration",
+			input: &openGraphFileConfig{
+				Enabled:      true,
+				ConsiderHost: false,
+				TimeToLive:   "taco",
+				Override:     map[string]string{},
+			},
+			err: ErrOpenGraphTTLDoesNotParse,
+		},
+		{
+			name: "missing og:title in defaults",
+			input: &openGraphFileConfig{
+				Enabled:      true,
+				ConsiderHost: false,
+				TimeToLive:   "1h",
+				Override: map[string]string{
+					"description": "foobar",
+				},
+			},
+			err: ErrOpenGraphMissingProperty,
+		},
+	} {
+		t.Run(tt.name, func(t *testing.T) {
+			if err := tt.input.Valid(); !errors.Is(err, tt.err) {
+				t.Logf("wanted error: %v", tt.err)
+				t.Logf("got error:    %v", err)
+				t.Error("validation failed")
+			}
+		})
+	}
+}

+ 12 - 0
lib/policy/config/testdata/bad/opengraph_bad_ttl.yaml

@@ -0,0 +1,12 @@
+bots:
+  - name: everything
+    user_agent_regex: .*
+    action: DENY
+
+openGraph:
+  enabled: true
+  considerHost: false
+  ttl: taco
+  default:
+    "og:title": "Xe's magic land of fun"
+    "og:description": "We're no strangers to love, you know the rules and so do I"

+ 12 - 0
lib/policy/config/testdata/good/opengraph_all_good.yaml

@@ -0,0 +1,12 @@
+bots:
+  - name: everything
+    user_agent_regex: .*
+    action: DENY
+
+openGraph:
+  enabled: true
+  considerHost: false
+  ttl: 1h
+  default:
+    "og:title": "Xe's magic land of fun"
+    "og:description": "We're no strangers to love, you know the rules and so do I"

+ 2 - 0
lib/policy/policy.go

@@ -31,6 +31,7 @@ type ParsedConfig struct {
 	Bots              []Bot
 	Thresholds        []*Threshold
 	DNSBL             bool
+	OpenGraph         config.OpenGraph
 	DefaultDifficulty int
 	StatusCodes       config.StatusCodes
 }
@@ -38,6 +39,7 @@ type ParsedConfig struct {
 func NewParsedConfig(orig *config.Config) *ParsedConfig {
 	return &ParsedConfig{
 		orig:        orig,
+		OpenGraph:   orig.OpenGraph,
 		StatusCodes: orig.StatusCodes,
 	}
 }