1 неделя назад · b8e017e715
--- a/internal/cache/search.go
+++ b/internal/cache/search.go
@@ -182,7 +182,33 @@ func (si *SearchIndexer) createIndexMapping() mapping.IndexMapping {
 
				 }
			
 
				 
			
 
				 // handleConfigScan processes scanned config files and indexes them
			
 
				-func (si *SearchIndexer) handleConfigScan(configPath string, content []byte) error {
			
 
				+func (si *SearchIndexer) handleConfigScan(configPath string, content []byte) (err error) {
			
 
				+	// Add panic recovery to prevent the entire application from crashing
			
 
				+	defer func() {
			
 
				+		if r := recover(); r != nil {
			
 
				+			err = fmt.Errorf("panic during config scan: %v", r)
			
 
				+			logger.Error("Panic occurred while scanning config", "config_path", configPath, "content_size", len(content), "error", err)
			
 
				+		}
			
 
				+	}()
			
 
				+
			
 
				+	// File size limit: 10MB to prevent memory overflow
			
 
				+	const maxFileSize = 10 * 1024 * 1024 // 10MB
			
 
				+	if len(content) > maxFileSize {
			
 
				+		logger.Warn("Skipping file due to size limit", "path", configPath, "size", len(content), "limit", maxFileSize)
			
 
				+		return nil
			
 
				+	}
			
 
				+
			
 
				+	// Skip empty files
			
 
				+	if len(content) == 0 {
			
 
				+		return nil
			
 
				+	}
			
 
				+
			
 
				+	// Basic content validation: check if it's text content
			
 
				+	if !isTextContent(content) {
			
 
				+		logger.Warn("Skipping non-text file", "path", configPath)
			
 
				+		return nil
			
 
				+	}
			
 
				+
			
 
				 	docType := si.determineConfigType(configPath)
			
 
				 	if docType == "" {
			
 
				 		return nil // Skip unsupported file types
			
@@ -214,7 +240,15 @@ func (si *SearchIndexer) determineConfigType(configPath string) string {
 
				 }
			
 
				 
			
 
				 // IndexDocument indexes a single document
			
 
				-func (si *SearchIndexer) IndexDocument(doc SearchDocument) error {
			
 
				+func (si *SearchIndexer) IndexDocument(doc SearchDocument) (err error) {
			
 
				+	// Add panic recovery to prevent the entire application from crashing
			
 
				+	defer func() {
			
 
				+		if r := recover(); r != nil {
			
 
				+			err = fmt.Errorf("panic during indexing: %v", r)
			
 
				+			logger.Error("Panic occurred while indexing document", "document_id", doc.ID, "error", err)
			
 
				+		}
			
 
				+	}()
			
 
				+
			
 
				 	si.indexMutex.RLock()
			
 
				 	defer si.indexMutex.RUnlock()
			
 
				 
			
@@ -222,6 +256,11 @@ func (si *SearchIndexer) IndexDocument(doc SearchDocument) error {
 
				 		return fmt.Errorf("search index not initialized")
			
 
				 	}
			
 
				 
			
 
				+	// Additional size check as a safety measure
			
 
				+	if len(doc.Content) > 50*1024*1024 { // 50MB absolute limit
			
 
				+		return fmt.Errorf("document content too large: %d bytes", len(doc.Content))
			
 
				+	}
			
 
				+
			
 
				 	// logger.Debugf("Indexing document: ID=%s, Type=%s, Name=%s, Path=%s",
			
 
				 	// 	doc.ID, doc.Type, doc.Name, doc.Path)
			
 
				 
			
@@ -497,3 +536,50 @@ func SearchConfigs(ctx context.Context, query string, limit int) ([]SearchResult
 
				 func SearchAll(ctx context.Context, query string, limit int) ([]SearchResult, error) {
			
 
				 	return GetSearchIndexer().Search(ctx, query, limit)
			
 
				 }
			
 
				+
			
 
				+// isTextContent checks if the content appears to be text-based
			
 
				+// This helps prevent indexing binary files that might have been misidentified
			
 
				+func isTextContent(content []byte) bool {
			
 
				+	if len(content) == 0 {
			
 
				+		return true // Empty content is considered text
			
 
				+	}
			
 
				+
			
 
				+	// Check for common binary file signatures
			
 
				+	if len(content) >= 4 {
			
 
				+		// Check for some common binary file headers
			
 
				+		switch {
			
 
				+		case content[0] == 0x7F && content[1] == 0x45 && content[2] == 0x4C && content[3] == 0x46: // ELF
			
 
				+			return false
			
 
				+		case content[0] == 0x89 && content[1] == 0x50 && content[2] == 0x4E && content[3] == 0x47: // PNG
			
 
				+			return false
			
 
				+		case content[0] == 0xFF && content[1] == 0xD8 && content[2] == 0xFF: // JPEG
			
 
				+			return false
			
 
				+		case content[0] == 0x50 && content[1] == 0x4B && content[2] == 0x03 && content[3] == 0x04: // ZIP
			
 
				+			return false
			
 
				+		case content[0] == 0x50 && content[1] == 0x4B && content[2] == 0x05 && content[3] == 0x06: // ZIP (empty)
			
 
				+			return false
			
 
				+		case content[0] == 0x50 && content[1] == 0x4B && content[2] == 0x07 && content[3] == 0x08: // ZIP (spanned)
			
 
				+			return false
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	// Check if the first part of the content contains mostly printable characters
			
 
				+	// Sample up to 8KB for performance
			
 
				+	sampleSize := len(content)
			
 
				+	if sampleSize > 8192 {
			
 
				+		sampleSize = 8192
			
 
				+	}
			
 
				+
			
 
				+	nonPrintableCount := 0
			
 
				+	for i := 0; i < sampleSize; i++ {
			
 
				+		b := content[i]
			
 
				+		// Allow printable ASCII characters, newlines, tabs, and carriage returns
			
 
				+		if (b < 32 && b != 9 && b != 10 && b != 13) || b > 126 {
			
 
				+			nonPrintableCount++
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	// If more than 30% of the sampled content is non-printable, consider it binary
			
 
				+	threshold := float64(sampleSize) * 0.3
			
 
				+	return float64(nonPrintableCount) <= threshold
			
 
				+}