1 vecka sedan · 2904ff974b
--- a/.github/actions/spelling/expect.txt
+++ b/.github/actions/spelling/expect.txt
@@ -49,6 +49,7 @@ coreutils
 
				 Cotoyogi
			
 
				 CRDs
			
 
				 crt
			
 
				+Cscript
			
 
				 daemonizing
			
 
				 DDOS
			
 
				 Debian
			
@@ -69,7 +70,6 @@ duckduckbot
 
				 eerror
			
 
				 ellenjoe
			
 
				 enbyware
			
 
				-euo
			
 
				 everyones
			
 
				 evilbot
			
 
				 evilsite
			
@@ -108,6 +108,7 @@ hebis
 
				 hec
			
 
				 hmc
			
 
				 hostable
			
 
				+htmlc
			
 
				 htmx
			
 
				 httpdebug
			
 
				 hypertext
			
@@ -119,7 +120,6 @@ imgproxy
 
				 inp
			
 
				 iss
			
 
				 isset
			
 
				-itv
			
 
				 ivh
			
 
				 Jenomis
			
 
				 JGit
			
@@ -249,6 +249,7 @@ traefik
 
				 uberspace
			
 
				 unixhttpd
			
 
				 unmarshal
			
 
				+unparseable
			
 
				 uuidgen
			
 
				 uvx
			
 
				 UXP
			
--- a/docs/docs/CHANGELOG.md
+++ b/docs/docs/CHANGELOG.md
@@ -20,6 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
				 - Implement a no-JS challenge method: [`metarefresh`](./admin/configuration/challenges/metarefresh.mdx) ([#95](https://github.com/TecharoHQ/anubis/issues/95))
			
 
				 - Bump AI-robots.txt to version 1.34
			
 
				 - Make progress bar styling more compatible (UXP, etc)
			
 
				+- Optimized the OGTags subsystem with reduced allocations and runtime per request by up to 66%
			
 
				 - Add `--strip-base-prefix` flag/envvar to strip the base prefix from request paths when forwarding to target servers
			
 
				 
			
 
				 ## v1.19.1: Jenomis cen Lexentale - Echo 1
			
--- a/internal/ogtags/mem_test.go
+++ b/internal/ogtags/mem_test.go
@@ -0,0 +1,148 @@
 
				+package ogtags
			
 
				+
			
 
				+import (
			
 
				+	"golang.org/x/net/html"
			
 
				+	"net/url"
			
 
				+	"runtime"
			
 
				+	"strings"
			
 
				+	"testing"
			
 
				+)
			
 
				+
			
 
				+func BenchmarkGetTarget(b *testing.B) {
			
 
				+	tests := []struct {
			
 
				+		name   string
			
 
				+		target string
			
 
				+		paths  []string
			
 
				+	}{
			
 
				+		{
			
 
				+			name:   "HTTP",
			
 
				+			target: "http://example.com",
			
 
				+			paths:  []string{"/", "/path", "/path/to/resource", "/path?query=1&foo=bar"},
			
 
				+		},
			
 
				+		{
			
 
				+			name:   "Unix",
			
 
				+			target: "unix:///var/run/app.sock",
			
 
				+			paths:  []string{"/", "/api/endpoint", "/api/endpoint?param=value"},
			
 
				+		},
			
 
				+	}
			
 
				+
			
 
				+	for _, tt := range tests {
			
 
				+		b.Run(tt.name, func(b *testing.B) {
			
 
				+			cache := NewOGTagCache(tt.target, false, 0, false)
			
 
				+			urls := make([]*url.URL, len(tt.paths))
			
 
				+			for i, path := range tt.paths {
			
 
				+				u, _ := url.Parse(path)
			
 
				+				urls[i] = u
			
 
				+			}
			
 
				+
			
 
				+			b.ResetTimer()
			
 
				+			b.ReportAllocs()
			
 
				+
			
 
				+			for i := 0; i < b.N; i++ {
			
 
				+				_ = cache.getTarget(urls[i%len(urls)])
			
 
				+			}
			
 
				+		})
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func BenchmarkExtractOGTags(b *testing.B) {
			
 
				+	htmlSamples := []string{
			
 
				+		`<html><head>
			
 
				+			<meta property="og:title" content="Test Title">
			
 
				+			<meta property="og:description" content="Test Description">
			
 
				+			<meta name="keywords" content="test,keywords">
			
 
				+		</head><body></body></html>`,
			
 
				+		`<html><head>
			
 
				+			<meta property="og:title" content="Page Title">
			
 
				+			<meta property="og:type" content="website">
			
 
				+			<meta property="og:url" content="https://example.com">
			
 
				+			<meta property="og:image" content="https://example.com/image.jpg">
			
 
				+			<meta property="twitter:card" content="summary_large_image">
			
 
				+			<meta property="twitter:title" content="Twitter Title">
			
 
				+			<meta name="description" content="Page description">
			
 
				+			<meta name="author" content="John Doe">
			
 
				+		</head><body><div><p>Content</p></div></body></html>`,
			
 
				+	}
			
 
				+
			
 
				+	cache := NewOGTagCache("http://example.com", false, 0, false)
			
 
				+	docs := make([]*html.Node, len(htmlSamples))
			
 
				+
			
 
				+	for i, sample := range htmlSamples {
			
 
				+		doc, _ := html.Parse(strings.NewReader(sample))
			
 
				+		docs[i] = doc
			
 
				+	}
			
 
				+
			
 
				+	b.ResetTimer()
			
 
				+	b.ReportAllocs()
			
 
				+
			
 
				+	for i := 0; i < b.N; i++ {
			
 
				+		_ = cache.extractOGTags(docs[i%len(docs)])
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// Memory usage test
			
 
				+func TestMemoryUsage(t *testing.T) {
			
 
				+	cache := NewOGTagCache("http://example.com", false, 0, false)
			
 
				+
			
 
				+	// Force GC and wait for it to complete
			
 
				+	runtime.GC()
			
 
				+
			
 
				+	var m1 runtime.MemStats
			
 
				+	runtime.ReadMemStats(&m1)
			
 
				+
			
 
				+	// Run getTarget many times
			
 
				+	u, _ := url.Parse("/path/to/resource?query=1&foo=bar&baz=qux")
			
 
				+	for i := 0; i < 10000; i++ {
			
 
				+		_ = cache.getTarget(u)
			
 
				+	}
			
 
				+
			
 
				+	// Force GC after operations
			
 
				+	runtime.GC()
			
 
				+
			
 
				+	var m2 runtime.MemStats
			
 
				+	runtime.ReadMemStats(&m2)
			
 
				+
			
 
				+	allocatedBytes := int64(m2.TotalAlloc) - int64(m1.TotalAlloc)
			
 
				+	allocatedKB := float64(allocatedBytes) / 1024.0
			
 
				+	allocatedPerOp := float64(allocatedBytes) / 10000.0
			
 
				+
			
 
				+	t.Logf("Memory allocated for 10k getTarget calls:")
			
 
				+	t.Logf("  Total: %.2f KB (%.2f MB)", allocatedKB, allocatedKB/1024.0)
			
 
				+	t.Logf("  Per operation: %.2f bytes", allocatedPerOp)
			
 
				+
			
 
				+	// Test extractOGTags memory usage
			
 
				+	htmlDoc := `<html><head>
			
 
				+		<meta property="og:title" content="Test Title">
			
 
				+		<meta property="og:description" content="Test Description">
			
 
				+		<meta property="og:image" content="https://example.com/image.jpg">
			
 
				+		<meta property="twitter:card" content="summary">
			
 
				+		<meta name="keywords" content="test,keywords,example">
			
 
				+		<meta name="author" content="Test Author">
			
 
				+		<meta property="unknown:tag" content="Should be ignored">
			
 
				+	</head><body></body></html>`
			
 
				+
			
 
				+	doc, _ := html.Parse(strings.NewReader(htmlDoc))
			
 
				+
			
 
				+	runtime.GC()
			
 
				+	runtime.ReadMemStats(&m1)
			
 
				+
			
 
				+	for i := 0; i < 1000; i++ {
			
 
				+		_ = cache.extractOGTags(doc)
			
 
				+	}
			
 
				+
			
 
				+	runtime.GC()
			
 
				+	runtime.ReadMemStats(&m2)
			
 
				+
			
 
				+	allocatedBytes = int64(m2.TotalAlloc) - int64(m1.TotalAlloc)
			
 
				+	allocatedKB = float64(allocatedBytes) / 1024.0
			
 
				+	allocatedPerOp = float64(allocatedBytes) / 1000.0
			
 
				+
			
 
				+	t.Logf("Memory allocated for 1k extractOGTags calls:")
			
 
				+	t.Logf("  Total: %.2f KB (%.2f MB)", allocatedKB, allocatedKB/1024.0)
			
 
				+	t.Logf("  Per operation: %.2f bytes", allocatedPerOp)
			
 
				+
			
 
				+	// Sanity checks
			
 
				+	if allocatedPerOp > 10000 {
			
 
				+		t.Errorf("extractOGTags allocating too much memory per operation: %.2f bytes", allocatedPerOp)
			
 
				+	}
			
 
				+}
			
--- a/internal/ogtags/ogtags.go
+++ b/internal/ogtags/ogtags.go
@@ -13,8 +13,11 @@ import (
 
				 )
			
 
				 
			
 
				 const (
			
 
				-	maxContentLength = 16 << 20        // 16 MiB in bytes, if there is a reasonable reason that you need more than this...Why?
			
 
				+	maxContentLength = 8 << 20         // 8 MiB is enough for anyone
			
 
				 	httpTimeout      = 5 * time.Second /*todo: make this configurable?*/
			
 
				+
			
 
				+	schemeSeparatorLength = 3 // Length of "://"
			
 
				+	querySeparatorLength  = 1 // Length of "?" for query strings
			
 
				 )
			
 
				 
			
 
				 type OGTagCache struct {
			
@@ -26,11 +29,13 @@ type OGTagCache struct {
 
				 	ogTimeToLive        time.Duration
			
 
				 	ogCacheConsiderHost bool
			
 
				 	ogPassthrough       bool
			
 
				+
			
 
				+	// Pre-built strings for optimization
			
 
				+	unixPrefix string // "http://unix"
			
 
				 }
			
 
				 
			
 
				 func NewOGTagCache(target string, ogPassthrough bool, ogTimeToLive time.Duration, ogTagsConsiderHost bool) *OGTagCache {
			
 
				 	// Predefined approved tags and prefixes
			
 
				-	// In the future, these could come from configuration
			
 
				 	defaultApprovedTags := []string{"description", "keywords", "author"}
			
 
				 	defaultApprovedPrefixes := []string{"og:", "twitter:", "fediverse:"}
			
 
				 
			
@@ -71,37 +76,50 @@ func NewOGTagCache(target string, ogPassthrough bool, ogTimeToLive time.Duration
 
				 
			
 
				 	return &OGTagCache{
			
 
				 		cache:               decaymap.New[string, map[string]string](),
			
 
				-		targetURL:           parsedTargetURL, // Store the parsed URL
			
 
				+		targetURL:           parsedTargetURL,
			
 
				 		ogPassthrough:       ogPassthrough,
			
 
				 		ogTimeToLive:        ogTimeToLive,
			
 
				-		ogCacheConsiderHost: ogTagsConsiderHost, // todo: refactor to be a separate struct
			
 
				+		ogCacheConsiderHost: ogTagsConsiderHost,
			
 
				 		approvedTags:        defaultApprovedTags,
			
 
				 		approvedPrefixes:    defaultApprovedPrefixes,
			
 
				 		client:              client,
			
 
				+		unixPrefix:          "http://unix",
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				 // getTarget constructs the target URL string for fetching OG tags.
			
 
				-// For Unix sockets, it creates a "fake" HTTP URL that the custom dialer understands.
			
 
				+// Optimized to minimize allocations by building strings directly.
			
 
				 func (c *OGTagCache) getTarget(u *url.URL) string {
			
 
				+	var escapedPath = u.EscapedPath() // will cause an allocation if path contains special characters
			
 
				 	if c.targetURL.Scheme == "unix" {
			
 
				-		// The custom dialer ignores the host, but we need a valid http URL structure.
			
 
				-		// Use "unix" as a placeholder host. Path and Query from original request are appended.
			
 
				-		fakeURL := &url.URL{
			
 
				-			Scheme:   "http", // Scheme must be http/https for client.Get
			
 
				-			Host:     "unix", // Arbitrary host, ignored by custom dialer
			
 
				-			Path:     u.Path,
			
 
				-			RawQuery: u.RawQuery,
			
 
				+		// Build URL string directly without creating intermediate URL object
			
 
				+		var sb strings.Builder
			
 
				+		sb.Grow(len(c.unixPrefix) + len(escapedPath) + len(u.RawQuery) + querySeparatorLength) // Pre-allocate
			
 
				+		sb.WriteString(c.unixPrefix)
			
 
				+		sb.WriteString(escapedPath)
			
 
				+		if u.RawQuery != "" {
			
 
				+			sb.WriteByte('?')
			
 
				+			sb.WriteString(u.RawQuery)
			
 
				 		}
			
 
				-		return fakeURL.String()
			
 
				+		return sb.String()
			
 
				 	}
			
 
				 
			
 
				-	// For regular http/https targets
			
 
				-	target := *c.targetURL // Make a copy
			
 
				-	target.Path = u.Path
			
 
				-	target.RawQuery = u.RawQuery
			
 
				-	return target.String()
			
 
				+	// For regular http/https targets, build URL string directly
			
 
				+	var sb strings.Builder
			
 
				+	// Pre-calculate size: scheme + "://" + host + path + "?" + query
			
 
				+	estimatedSize := len(c.targetURL.Scheme) + schemeSeparatorLength + len(c.targetURL.Host) + len(escapedPath) + len(u.RawQuery) + querySeparatorLength
			
 
				+	sb.Grow(estimatedSize)
			
 
				+
			
 
				+	sb.WriteString(c.targetURL.Scheme)
			
 
				+	sb.WriteString("://")
			
 
				+	sb.WriteString(c.targetURL.Host)
			
 
				+	sb.WriteString(escapedPath)
			
 
				+	if u.RawQuery != "" {
			
 
				+		sb.WriteByte('?')
			
 
				+		sb.WriteString(u.RawQuery)
			
 
				+	}
			
 
				 
			
 
				+	return sb.String()
			
 
				 }
			
 
				 
			
 
				 func (c *OGTagCache) Cleanup() {
			
--- a/internal/ogtags/ogtags_fuzz_test.go
+++ b/internal/ogtags/ogtags_fuzz_test.go
@@ -0,0 +1,308 @@
 
				+package ogtags
			
 
				+
			
 
				+import (
			
 
				+	"golang.org/x/net/html"
			
 
				+	"net/url"
			
 
				+	"strings"
			
 
				+	"testing"
			
 
				+	"unicode/utf8"
			
 
				+)
			
 
				+
			
 
				+// FuzzGetTarget tests getTarget with various inputs
			
 
				+func FuzzGetTarget(f *testing.F) {
			
 
				+	// Seed corpus with interesting test cases
			
 
				+	testCases := []struct {
			
 
				+		target string
			
 
				+		path   string
			
 
				+		query  string
			
 
				+	}{
			
 
				+		{"http://example.com", "/", ""},
			
 
				+		{"http://example.com", "/path", "q=1"},
			
 
				+		{"unix:///tmp/socket", "/api", "key=value"},
			
 
				+		{"https://example.com:8080", "/path/to/resource", "a=1&b=2"},
			
 
				+		{"http://example.com", "/path with spaces", "q=hello world"},
			
 
				+		{"http://example.com", "/path/❤️/emoji", "emoji=🎉"},
			
 
				+		{"http://example.com", "/path/../../../etc/passwd", ""},
			
 
				+		{"http://example.com", "/path%2F%2E%2E%2F", "q=%3Cscript%3E"},
			
 
				+		{"unix:///var/run/app.sock", "/../../etc/passwd", ""},
			
 
				+		{"http://[::1]:8080", "/ipv6", "test=1"},
			
 
				+		{"http://example.com", strings.Repeat("/very/long/path", 100), strings.Repeat("param=value&", 100)},
			
 
				+		{"http://example.com", "/path%20with%20encoded", "q=%20encoded%20"},
			
 
				+		{"http://example.com", "/пример/кириллица", "q=тест"},
			
 
				+		{"http://example.com", "/中文/路径", "查询=值"},
			
 
				+		{"", "/path", "q=1"}, // Empty target
			
 
				+	}
			
 
				+
			
 
				+	for _, tc := range testCases {
			
 
				+		f.Add(tc.target, tc.path, tc.query)
			
 
				+	}
			
 
				+
			
 
				+	f.Fuzz(func(t *testing.T, target, path, query string) {
			
 
				+		// Skip invalid UTF-8 to focus on realistic inputs
			
 
				+		if !utf8.ValidString(target) || !utf8.ValidString(path) || !utf8.ValidString(query) {
			
 
				+			t.Skip()
			
 
				+		}
			
 
				+
			
 
				+		// Create cache - should not panic
			
 
				+		cache := NewOGTagCache(target, false, 0, false)
			
 
				+
			
 
				+		// Create URL
			
 
				+		u := &url.URL{
			
 
				+			Path:     path,
			
 
				+			RawQuery: query,
			
 
				+		}
			
 
				+
			
 
				+		// Call getTarget - should not panic
			
 
				+		result := cache.getTarget(u)
			
 
				+
			
 
				+		// Basic validation
			
 
				+		if result == "" {
			
 
				+			t.Errorf("getTarget returned empty string for target=%q, path=%q, query=%q", target, path, query)
			
 
				+		}
			
 
				+
			
 
				+		// Verify result is a valid URL (for non-empty targets)
			
 
				+		if target != "" {
			
 
				+			parsedResult, err := url.Parse(result)
			
 
				+			if err != nil {
			
 
				+				t.Errorf("getTarget produced invalid URL %q: %v", result, err)
			
 
				+			} else {
			
 
				+				// For unix sockets, verify the scheme is http
			
 
				+				if strings.HasPrefix(target, "unix:") && parsedResult.Scheme != "http" {
			
 
				+					t.Errorf("Unix socket URL should have http scheme, got %q", parsedResult.Scheme)
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		// Ensure no memory corruption by calling multiple times
			
 
				+		for i := 0; i < 3; i++ {
			
 
				+			result2 := cache.getTarget(u)
			
 
				+			if result != result2 {
			
 
				+				t.Errorf("getTarget not deterministic: %q != %q", result, result2)
			
 
				+			}
			
 
				+		}
			
 
				+	})
			
 
				+}
			
 
				+
			
 
				+// FuzzExtractOGTags tests extractOGTags with various HTML inputs
			
 
				+func FuzzExtractOGTags(f *testing.F) {
			
 
				+	// Seed corpus with interesting HTML cases
			
 
				+	htmlCases := []string{
			
 
				+		`<html><head><meta property="og:title" content="Test"></head></html>`,
			
 
				+		`<meta property="og:title" content="No HTML tags">`,
			
 
				+		`<html><head>` + strings.Repeat(`<meta property="og:title" content="Many tags">`, 1000) + `</head></html>`,
			
 
				+		`<html><head><meta property="og:title" content="<script>alert('xss')</script>"></head></html>`,
			
 
				+		`<html><head><meta property="og:title" content="Line1&#10;Line2"></head></html>`,
			
 
				+		`<html><head><meta property="og:emoji" content="❤️🎉🎊"></head></html>`,
			
 
				+		`<html><head><meta property="og:title" content="` + strings.Repeat("A", 10000) + `"></head></html>`,
			
 
				+		`<html><head><meta property="og:title" content='Single quotes'></head></html>`,
			
 
				+		`<html><head><meta property=og:title content=no-quotes></head></html>`,
			
 
				+		`<html><head><meta name="keywords" content="test,keywords"></head></html>`,
			
 
				+		`<html><head><meta property="unknown:tag" content="Should be ignored"></head></html>`,
			
 
				+		`<html><head><meta property="` + strings.Repeat("og:", 100) + `title" content="Nested prefixes"></head></html>`,
			
 
				+		`<html>` + strings.Repeat(`<div>`, 1000) + `<meta property="og:title" content="Deep nesting">` + strings.Repeat(`</div>`, 1000) + `</html>`,
			
 
				+		`<!DOCTYPE html><html xmlns="http://www.w3.org/1999/xhtml"><head><meta property="og:title" content="With doctype"/></head></html>`,
			
 
				+		`<html><head><meta property="" content="Empty property"></head></html>`,
			
 
				+		`<html><head><meta content="Content only"></head></html>`,
			
 
				+		`<html><head><meta property="og:title"></head></html>`, // No content
			
 
				+		``, // Empty HTML
			
 
				+		`<html><head><meta property="og:title" content="Кириллица"></head></html>`,
			
 
				+		`<html><head><meta property="og:title" content="中文内容"></head></html>`,
			
 
				+		`<html><head><!--<meta property="og:title" content="Commented out">--></head></html>`,
			
 
				+		`<html><head><META PROPERTY="OG:TITLE" CONTENT="UPPERCASE"></head></html>`,
			
 
				+	}
			
 
				+
			
 
				+	for _, htmlc := range htmlCases {
			
 
				+		f.Add(htmlc)
			
 
				+	}
			
 
				+
			
 
				+	f.Fuzz(func(t *testing.T, htmlContent string) {
			
 
				+		// Skip invalid UTF-8
			
 
				+		if !utf8.ValidString(htmlContent) {
			
 
				+			t.Skip()
			
 
				+		}
			
 
				+
			
 
				+		// Parse HTML - may fail on invalid input
			
 
				+		doc, err := html.Parse(strings.NewReader(htmlContent))
			
 
				+		if err != nil {
			
 
				+			// This is expected for malformed HTML
			
 
				+			return
			
 
				+		}
			
 
				+
			
 
				+		cache := NewOGTagCache("http://example.com", false, 0, false)
			
 
				+
			
 
				+		// Should not panic
			
 
				+		tags := cache.extractOGTags(doc)
			
 
				+
			
 
				+		// Validate results
			
 
				+		for property, content := range tags {
			
 
				+			// Ensure property is approved
			
 
				+			approved := false
			
 
				+			for _, prefix := range cache.approvedPrefixes {
			
 
				+				if strings.HasPrefix(property, prefix) {
			
 
				+					approved = true
			
 
				+					break
			
 
				+				}
			
 
				+			}
			
 
				+			if !approved {
			
 
				+				for _, tag := range cache.approvedTags {
			
 
				+					if property == tag {
			
 
				+						approved = true
			
 
				+						break
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+			if !approved {
			
 
				+				t.Errorf("Unapproved property %q was extracted", property)
			
 
				+			}
			
 
				+
			
 
				+			// Ensure content is valid string
			
 
				+			if !utf8.ValidString(content) {
			
 
				+				t.Errorf("Invalid UTF-8 in content for property %q", property)
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		// Test determinism
			
 
				+		tags2 := cache.extractOGTags(doc)
			
 
				+		if len(tags) != len(tags2) {
			
 
				+			t.Errorf("extractOGTags not deterministic: different lengths %d != %d", len(tags), len(tags2))
			
 
				+		}
			
 
				+		for k, v := range tags {
			
 
				+			if tags2[k] != v {
			
 
				+				t.Errorf("extractOGTags not deterministic: %q=%q != %q=%q", k, v, k, tags2[k])
			
 
				+			}
			
 
				+		}
			
 
				+	})
			
 
				+}
			
 
				+
			
 
				+// FuzzGetTargetRoundTrip tests that getTarget produces valid URLs that can be parsed back
			
 
				+func FuzzGetTargetRoundTrip(f *testing.F) {
			
 
				+	f.Add("http://example.com", "/path/to/resource", "key=value&foo=bar")
			
 
				+	f.Add("unix:///tmp/socket", "/api/endpoint", "param=test")
			
 
				+
			
 
				+	f.Fuzz(func(t *testing.T, target, path, query string) {
			
 
				+		if !utf8.ValidString(target) || !utf8.ValidString(path) || !utf8.ValidString(query) {
			
 
				+			t.Skip()
			
 
				+		}
			
 
				+
			
 
				+		cache := NewOGTagCache(target, false, 0, false)
			
 
				+		u := &url.URL{Path: path, RawQuery: query}
			
 
				+
			
 
				+		result := cache.getTarget(u)
			
 
				+		if result == "" {
			
 
				+			return
			
 
				+		}
			
 
				+
			
 
				+		// Parse the result back
			
 
				+		parsed, err := url.Parse(result)
			
 
				+		if err != nil {
			
 
				+			t.Errorf("getTarget produced unparseable URL: %v", err)
			
 
				+			return
			
 
				+		}
			
 
				+
			
 
				+		// For non-unix targets, verify path preservation (accounting for encoding)
			
 
				+		if !strings.HasPrefix(target, "unix:") && target != "" {
			
 
				+			// The paths should match after normalization
			
 
				+			expectedPath := u.EscapedPath()
			
 
				+			if parsed.EscapedPath() != expectedPath {
			
 
				+				t.Errorf("Path not preserved: want %q, got %q", expectedPath, parsed.EscapedPath())
			
 
				+			}
			
 
				+
			
 
				+			// Query should be preserved exactly
			
 
				+			if parsed.RawQuery != query {
			
 
				+				t.Errorf("Query not preserved: want %q, got %q", query, parsed.RawQuery)
			
 
				+			}
			
 
				+		}
			
 
				+	})
			
 
				+}
			
 
				+
			
 
				+// FuzzExtractMetaTagInfo tests the extractMetaTagInfo function directly
			
 
				+func FuzzExtractMetaTagInfo(f *testing.F) {
			
 
				+	// Seed with various attribute combinations
			
 
				+	f.Add("og:title", "Test Title", "property")
			
 
				+	f.Add("keywords", "test,keywords", "name")
			
 
				+	f.Add("og:description", "A description with \"quotes\"", "property")
			
 
				+	f.Add("twitter:card", "summary", "property")
			
 
				+	f.Add("unknown:tag", "Should be filtered", "property")
			
 
				+	f.Add("", "Content without property", "property")
			
 
				+	f.Add("og:title", "", "property") // Property without content
			
 
				+
			
 
				+	f.Fuzz(func(t *testing.T, propertyValue, contentValue, propertyKey string) {
			
 
				+		if !utf8.ValidString(propertyValue) || !utf8.ValidString(contentValue) || !utf8.ValidString(propertyKey) {
			
 
				+			t.Skip()
			
 
				+		}
			
 
				+
			
 
				+		// Create a meta node
			
 
				+		node := &html.Node{
			
 
				+			Type: html.ElementNode,
			
 
				+			Data: "meta",
			
 
				+			Attr: []html.Attribute{
			
 
				+				{Key: propertyKey, Val: propertyValue},
			
 
				+				{Key: "content", Val: contentValue},
			
 
				+			},
			
 
				+		}
			
 
				+
			
 
				+		cache := NewOGTagCache("http://example.com", false, 0, false)
			
 
				+
			
 
				+		// Should not panic
			
 
				+		property, content := cache.extractMetaTagInfo(node)
			
 
				+
			
 
				+		// If property is returned, it must be approved
			
 
				+		if property != "" {
			
 
				+			approved := false
			
 
				+			for _, prefix := range cache.approvedPrefixes {
			
 
				+				if strings.HasPrefix(property, prefix) {
			
 
				+					approved = true
			
 
				+					break
			
 
				+				}
			
 
				+			}
			
 
				+			if !approved {
			
 
				+				for _, tag := range cache.approvedTags {
			
 
				+					if property == tag {
			
 
				+						approved = true
			
 
				+						break
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+			if !approved {
			
 
				+				t.Errorf("extractMetaTagInfo returned unapproved property: %q", property)
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		// Content should match input if property is approved
			
 
				+		if property != "" && content != contentValue {
			
 
				+			t.Errorf("Content mismatch: want %q, got %q", contentValue, content)
			
 
				+		}
			
 
				+	})
			
 
				+}
			
 
				+
			
 
				+// Benchmark comparison for the fuzzed scenarios
			
 
				+func BenchmarkFuzzedGetTarget(b *testing.B) {
			
 
				+	// Test with various challenging inputs found during fuzzing
			
 
				+	inputs := []struct {
			
 
				+		name   string
			
 
				+		target string
			
 
				+		path   string
			
 
				+		query  string
			
 
				+	}{
			
 
				+		{"Simple", "http://example.com", "/api", "k=v"},
			
 
				+		{"LongPath", "http://example.com", strings.Repeat("/segment", 50), ""},
			
 
				+		{"LongQuery", "http://example.com", "/", strings.Repeat("param=value&", 50)},
			
 
				+		{"Unicode", "http://example.com", "/путь/路径/path", "q=значение"},
			
 
				+		{"Encoded", "http://example.com", "/path%20with%20spaces", "q=%3Cscript%3E"},
			
 
				+		{"Unix", "unix:///tmp/socket.sock", "/api/v1/resource", "id=123&format=json"},
			
 
				+	}
			
 
				+
			
 
				+	for _, input := range inputs {
			
 
				+		b.Run(input.name, func(b *testing.B) {
			
 
				+			cache := NewOGTagCache(input.target, false, 0, false)
			
 
				+			u := &url.URL{Path: input.path, RawQuery: input.query}
			
 
				+
			
 
				+			b.ResetTimer()
			
 
				+			b.ReportAllocs()
			
 
				+
			
 
				+			for i := 0; i < b.N; i++ {
			
 
				+				_ = cache.getTarget(u)
			
 
				+			}
			
 
				+		})
			
 
				+	}
			
 
				+}
			
--- a/internal/ogtags/parse.go
+++ b/internal/ogtags/parse.go
@@ -12,15 +12,12 @@ func (c *OGTagCache) extractOGTags(doc *html.Node) map[string]string {
 
				 
			
 
				 	var traverseNodes func(*html.Node)
			
 
				 	traverseNodes = func(n *html.Node) {
			
 
				-		// isOGMetaTag only checks if it's a <meta> tag.
			
 
				-		// The actual filtering happens in extractMetaTagInfo now.
			
 
				 		if isOGMetaTag(n) {
			
 
				 			property, content := c.extractMetaTagInfo(n)
			
 
				 			if property != "" {
			
 
				 				ogTags[property] = content
			
 
				 			}
			
 
				 		}
			
 
				-
			
 
				 		for child := n.FirstChild; child != nil; child = child.NextSibling {
			
 
				 			traverseNodes(child)
			
 
				 		}
			
@@ -39,43 +36,40 @@ func isOGMetaTag(n *html.Node) bool {
 
				 }
			
 
				 
			
 
				 // extractMetaTagInfo extracts property and content from a meta tag
			
 
				-// *and* checks if the property is approved.
			
 
				-// Returns empty property string if the tag is not approved.
			
 
				 func (c *OGTagCache) extractMetaTagInfo(n *html.Node) (property, content string) {
			
 
				-	var rawProperty string // Store the property found before approval check
			
 
				+	var propertyKey string
			
 
				 
			
 
				+	// Single pass through attributes, using range to avoid bounds checking
			
 
				 	for _, attr := range n.Attr {
			
 
				-		if attr.Key == "property" || attr.Key == "name" {
			
 
				-			rawProperty = attr.Val
			
 
				-		}
			
 
				-		if attr.Key == "content" {
			
 
				+		switch attr.Key {
			
 
				+		case "property", "name":
			
 
				+			propertyKey = attr.Val
			
 
				+		case "content":
			
 
				 			content = attr.Val
			
 
				 		}
			
 
				-	}
			
 
				-
			
 
				-	// Check if the rawProperty is approved
			
 
				-	isApproved := false
			
 
				-	for _, prefix := range c.approvedPrefixes {
			
 
				-		if strings.HasPrefix(rawProperty, prefix) {
			
 
				-			isApproved = true
			
 
				+		// Early exit if we have both
			
 
				+		if propertyKey != "" && content != "" {
			
 
				 			break
			
 
				 		}
			
 
				 	}
			
 
				-	// Check exact approved tags if not already approved by prefix
			
 
				-	if !isApproved {
			
 
				-		for _, tag := range c.approvedTags {
			
 
				-			if rawProperty == tag {
			
 
				-				isApproved = true
			
 
				-				break
			
 
				-			}
			
 
				+
			
 
				+	if propertyKey == "" {
			
 
				+		return "", content
			
 
				+	}
			
 
				+
			
 
				+	// Check prefixes first (more common case)
			
 
				+	for _, prefix := range c.approvedPrefixes {
			
 
				+		if strings.HasPrefix(propertyKey, prefix) {
			
 
				+			return propertyKey, content
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	// Only return the property if it's approved
			
 
				-	if isApproved {
			
 
				-		property = rawProperty
			
 
				+	// Check exact matches
			
 
				+	for _, tag := range c.approvedTags {
			
 
				+		if propertyKey == tag {
			
 
				+			return propertyKey, content
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				-	// Content is returned regardless, but property will be "" if not approved
			
 
				-	return property, content
			
 
				+	return "", content
			
 
				 }