search.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585
  1. package cache
  2. import (
  3. "context"
  4. "fmt"
  5. "os"
  6. "path/filepath"
  7. "strings"
  8. "sync"
  9. "time"
  10. "github.com/blevesearch/bleve/v2"
  11. "github.com/blevesearch/bleve/v2/analysis/lang/en"
  12. "github.com/blevesearch/bleve/v2/mapping"
  13. "github.com/blevesearch/bleve/v2/search/query"
  14. "github.com/uozi-tech/cosy/logger"
  15. )
  16. // SearchDocument represents a document in the search index
  17. type SearchDocument struct {
  18. ID string `json:"id"`
  19. Type string `json:"type"` // "site", "stream", or "config"
  20. Name string `json:"name"` // extracted from filename
  21. Path string `json:"path"` // file path
  22. Content string `json:"content"` // file content
  23. UpdatedAt time.Time `json:"updated_at"`
  24. }
  25. // SearchResult represents a search result
  26. type SearchResult struct {
  27. Document SearchDocument `json:"document"`
  28. Score float64 `json:"score"`
  29. }
  30. // SearchIndexer manages the Bleve search index
  31. type SearchIndexer struct {
  32. index bleve.Index
  33. indexPath string
  34. indexMutex sync.RWMutex
  35. ctx context.Context
  36. cancel context.CancelFunc
  37. cleanupOnce sync.Once
  38. }
  39. var (
  40. searchIndexer *SearchIndexer
  41. searchIndexerOnce sync.Once
  42. )
  43. // GetSearchIndexer returns the singleton search indexer instance
  44. func GetSearchIndexer() *SearchIndexer {
  45. searchIndexerOnce.Do(func() {
  46. // Create a temporary directory for the index
  47. tempDir, err := os.MkdirTemp("", "nginx-ui-search-index-*")
  48. if err != nil {
  49. logger.Fatalf("Failed to create temp directory for search index: %v", err)
  50. }
  51. searchIndexer = &SearchIndexer{
  52. indexPath: tempDir,
  53. }
  54. })
  55. return searchIndexer
  56. }
  57. // InitSearchIndex initializes the search index
  58. func InitSearchIndex(ctx context.Context) error {
  59. indexer := GetSearchIndexer()
  60. return indexer.Initialize(ctx)
  61. }
  62. // Initialize sets up the Bleve search index
  63. func (si *SearchIndexer) Initialize(ctx context.Context) error {
  64. si.indexMutex.Lock()
  65. defer si.indexMutex.Unlock()
  66. // Create a derived context for cleanup
  67. si.ctx, si.cancel = context.WithCancel(ctx)
  68. // Check if context is cancelled
  69. select {
  70. case <-ctx.Done():
  71. return ctx.Err()
  72. default:
  73. }
  74. // Try to open existing index, create new if it fails
  75. var err error
  76. si.index, err = bleve.Open(si.indexPath)
  77. if err != nil {
  78. // Check context again before creating new index
  79. select {
  80. case <-ctx.Done():
  81. return ctx.Err()
  82. default:
  83. }
  84. logger.Info("Creating new search index at:", si.indexPath)
  85. si.index, err = bleve.New(si.indexPath, si.createIndexMapping())
  86. if err != nil {
  87. return fmt.Errorf("failed to create search index: %w", err)
  88. }
  89. }
  90. // Register callback for config scanning
  91. RegisterCallback(si.handleConfigScan)
  92. // Start cleanup goroutine
  93. go si.watchContext()
  94. logger.Info("Search index initialized successfully")
  95. return nil
  96. }
  97. // watchContext monitors the context and cleans up when it's cancelled
  98. func (si *SearchIndexer) watchContext() {
  99. <-si.ctx.Done()
  100. si.cleanup()
  101. }
  102. // cleanup closes the index and removes the temporary directory
  103. func (si *SearchIndexer) cleanup() {
  104. si.cleanupOnce.Do(func() {
  105. logger.Info("Cleaning up search index...")
  106. si.indexMutex.Lock()
  107. defer si.indexMutex.Unlock()
  108. if si.index != nil {
  109. si.index.Close()
  110. si.index = nil
  111. }
  112. // Remove the temporary directory
  113. if err := os.RemoveAll(si.indexPath); err != nil {
  114. logger.Error("Failed to remove search index directory:", err)
  115. } else {
  116. logger.Info("Search index directory removed successfully")
  117. }
  118. })
  119. }
  120. // createIndexMapping creates the mapping for the search index
  121. func (si *SearchIndexer) createIndexMapping() mapping.IndexMapping {
  122. docMapping := bleve.NewDocumentMapping()
  123. // Text fields with standard analyzer
  124. textField := bleve.NewTextFieldMapping()
  125. textField.Analyzer = en.AnalyzerName
  126. textField.Store = true
  127. textField.Index = true
  128. // Keyword fields for exact match
  129. keywordField := bleve.NewKeywordFieldMapping()
  130. keywordField.Store = true
  131. keywordField.Index = true
  132. // Date field
  133. dateField := bleve.NewDateTimeFieldMapping()
  134. dateField.Store = true
  135. dateField.Index = true
  136. // Map fields to types
  137. fieldMappings := map[string]*mapping.FieldMapping{
  138. "id": keywordField,
  139. "type": keywordField,
  140. "path": keywordField,
  141. "name": textField,
  142. "content": textField,
  143. "updated_at": dateField,
  144. }
  145. for field, fieldMapping := range fieldMappings {
  146. docMapping.AddFieldMappingsAt(field, fieldMapping)
  147. }
  148. indexMapping := bleve.NewIndexMapping()
  149. indexMapping.DefaultMapping = docMapping
  150. indexMapping.DefaultAnalyzer = en.AnalyzerName
  151. return indexMapping
  152. }
  153. // handleConfigScan processes scanned config files and indexes them
  154. func (si *SearchIndexer) handleConfigScan(configPath string, content []byte) (err error) {
  155. // Add panic recovery to prevent the entire application from crashing
  156. defer func() {
  157. if r := recover(); r != nil {
  158. err = fmt.Errorf("panic during config scan: %v", r)
  159. logger.Error("Panic occurred while scanning config", "config_path", configPath, "content_size", len(content), "error", err)
  160. }
  161. }()
  162. // File size limit: 10MB to prevent memory overflow
  163. const maxFileSize = 10 * 1024 * 1024 // 10MB
  164. if len(content) > maxFileSize {
  165. logger.Warn("Skipping file due to size limit", "path", configPath, "size", len(content), "limit", maxFileSize)
  166. return nil
  167. }
  168. // Skip empty files
  169. if len(content) == 0 {
  170. return nil
  171. }
  172. // Basic content validation: check if it's text content
  173. if !isTextContent(content) {
  174. logger.Warn("Skipping non-text file", "path", configPath)
  175. return nil
  176. }
  177. docType := si.determineConfigType(configPath)
  178. if docType == "" {
  179. return nil // Skip unsupported file types
  180. }
  181. doc := SearchDocument{
  182. ID: configPath,
  183. Type: docType,
  184. Name: filepath.Base(configPath),
  185. Path: configPath,
  186. Content: string(content),
  187. UpdatedAt: time.Now(),
  188. }
  189. return si.IndexDocument(doc)
  190. }
  191. // determineConfigType determines the type of config file based on path
  192. func (si *SearchIndexer) determineConfigType(configPath string) string {
  193. normalizedPath := filepath.ToSlash(configPath)
  194. switch {
  195. case strings.Contains(normalizedPath, "sites-available") || strings.Contains(normalizedPath, "sites-enabled"):
  196. return "site"
  197. case strings.Contains(normalizedPath, "streams-available") || strings.Contains(normalizedPath, "streams-enabled"):
  198. return "stream"
  199. default:
  200. return "config"
  201. }
  202. }
  203. // IndexDocument indexes a single document
  204. func (si *SearchIndexer) IndexDocument(doc SearchDocument) (err error) {
  205. // Add panic recovery to prevent the entire application from crashing
  206. defer func() {
  207. if r := recover(); r != nil {
  208. err = fmt.Errorf("panic during indexing: %v", r)
  209. logger.Error("Panic occurred while indexing document", "document_id", doc.ID, "error", err)
  210. }
  211. }()
  212. si.indexMutex.RLock()
  213. defer si.indexMutex.RUnlock()
  214. if si.index == nil {
  215. return fmt.Errorf("search index not initialized")
  216. }
  217. // Additional size check as a safety measure
  218. if len(doc.Content) > 50*1024*1024 { // 50MB absolute limit
  219. return fmt.Errorf("document content too large: %d bytes", len(doc.Content))
  220. }
  221. // logger.Debugf("Indexing document: ID=%s, Type=%s, Name=%s, Path=%s",
  222. // doc.ID, doc.Type, doc.Name, doc.Path)
  223. return si.index.Index(doc.ID, doc)
  224. }
  225. // Search performs a search query
  226. func (si *SearchIndexer) Search(ctx context.Context, queryStr string, limit int) ([]SearchResult, error) {
  227. return si.searchWithType(ctx, queryStr, "", limit)
  228. }
  229. // SearchByType performs a search filtered by document type
  230. func (si *SearchIndexer) SearchByType(ctx context.Context, queryStr string, docType string, limit int) ([]SearchResult, error) {
  231. return si.searchWithType(ctx, queryStr, docType, limit)
  232. }
  233. // searchWithType performs the actual search with optional type filtering
  234. func (si *SearchIndexer) searchWithType(ctx context.Context, queryStr string, docType string, limit int) ([]SearchResult, error) {
  235. si.indexMutex.RLock()
  236. defer si.indexMutex.RUnlock()
  237. // Check if context is cancelled
  238. select {
  239. case <-ctx.Done():
  240. return nil, ctx.Err()
  241. default:
  242. }
  243. if si.index == nil {
  244. return nil, fmt.Errorf("search index not initialized")
  245. }
  246. if limit <= 0 {
  247. limit = 500 // Increase default limit to handle more results
  248. }
  249. query := si.buildQuery(queryStr, docType)
  250. searchRequest := bleve.NewSearchRequest(query)
  251. searchRequest.Size = limit
  252. searchRequest.Fields = []string{"*"}
  253. // Use a channel to handle search with context cancellation
  254. type searchResult struct {
  255. result *bleve.SearchResult
  256. err error
  257. }
  258. resultChan := make(chan searchResult, 1)
  259. go func() {
  260. result, err := si.index.Search(searchRequest)
  261. resultChan <- searchResult{result: result, err: err}
  262. }()
  263. // Wait for search result or context cancellation
  264. select {
  265. case <-ctx.Done():
  266. return nil, ctx.Err()
  267. case res := <-resultChan:
  268. if res.err != nil {
  269. return nil, fmt.Errorf("search execution failed: %w", res.err)
  270. }
  271. results := si.convertResults(res.result)
  272. // Debug log the search execution
  273. logger.Debugf("Search index query '%s' (type: %s, limit: %d) returned %d results",
  274. queryStr, docType, limit, len(results))
  275. return results, nil
  276. }
  277. }
  278. // buildQuery builds a search query with optional type filtering
  279. func (si *SearchIndexer) buildQuery(queryStr string, docType string) query.Query {
  280. mainQuery := bleve.NewBooleanQuery()
  281. // Add type filter if specified
  282. if docType != "" {
  283. typeQuery := bleve.NewTermQuery(docType)
  284. typeQuery.SetField("type")
  285. mainQuery.AddMust(typeQuery)
  286. }
  287. // Add text search across name and content fields only
  288. textQuery := bleve.NewBooleanQuery()
  289. searchFields := []string{"name", "content"}
  290. for _, field := range searchFields {
  291. // Create a boolean query for this field to combine multiple query types
  292. fieldQuery := bleve.NewBooleanQuery()
  293. // 1. Exact match query (highest priority)
  294. matchQuery := bleve.NewMatchQuery(queryStr)
  295. matchQuery.SetField(field)
  296. matchQuery.SetBoost(3.0) // Higher boost for exact matches
  297. fieldQuery.AddShould(matchQuery)
  298. // 2. Prefix query for partial matches (e.g., "access" matches "access_log")
  299. prefixQuery := bleve.NewPrefixQuery(queryStr)
  300. prefixQuery.SetField(field)
  301. prefixQuery.SetBoost(2.0) // Medium boost for prefix matches
  302. fieldQuery.AddShould(prefixQuery)
  303. // 3. Wildcard query for more flexible matching
  304. wildcardQuery := bleve.NewWildcardQuery("*" + queryStr + "*")
  305. wildcardQuery.SetField(field)
  306. wildcardQuery.SetBoost(1.5) // Lower boost for wildcard matches
  307. fieldQuery.AddShould(wildcardQuery)
  308. // 4. Fuzzy match query (allows 1 character difference)
  309. fuzzyQuery := bleve.NewFuzzyQuery(queryStr)
  310. fuzzyQuery.SetField(field)
  311. fuzzyQuery.SetFuzziness(1)
  312. fuzzyQuery.SetBoost(1.0) // Lowest boost for fuzzy matches
  313. fieldQuery.AddShould(fuzzyQuery)
  314. textQuery.AddShould(fieldQuery)
  315. }
  316. if docType != "" {
  317. mainQuery.AddMust(textQuery)
  318. } else {
  319. return textQuery
  320. }
  321. return mainQuery
  322. }
  323. // convertResults converts Bleve search results to our SearchResult format
  324. func (si *SearchIndexer) convertResults(searchResult *bleve.SearchResult) []SearchResult {
  325. results := make([]SearchResult, 0, len(searchResult.Hits))
  326. for _, hit := range searchResult.Hits {
  327. doc := SearchDocument{
  328. ID: si.getStringField(hit.Fields, "id"),
  329. Type: si.getStringField(hit.Fields, "type"),
  330. Name: si.getStringField(hit.Fields, "name"),
  331. Path: si.getStringField(hit.Fields, "path"),
  332. Content: si.getStringField(hit.Fields, "content"),
  333. }
  334. // Parse updated_at if present
  335. if updatedAtStr := si.getStringField(hit.Fields, "updated_at"); updatedAtStr != "" {
  336. if updatedAt, err := time.Parse(time.RFC3339, updatedAtStr); err == nil {
  337. doc.UpdatedAt = updatedAt
  338. }
  339. }
  340. results = append(results, SearchResult{
  341. Document: doc,
  342. Score: hit.Score,
  343. })
  344. }
  345. return results
  346. }
  347. // getStringField safely gets a string field from search results
  348. func (si *SearchIndexer) getStringField(fields map[string]interface{}, fieldName string) string {
  349. if value, ok := fields[fieldName]; ok {
  350. if str, ok := value.(string); ok {
  351. return str
  352. }
  353. }
  354. return ""
  355. }
  356. // DeleteDocument removes a document from the index
  357. func (si *SearchIndexer) DeleteDocument(docID string) error {
  358. si.indexMutex.RLock()
  359. defer si.indexMutex.RUnlock()
  360. if si.index == nil {
  361. return fmt.Errorf("search index not initialized")
  362. }
  363. return si.index.Delete(docID)
  364. }
  365. // RebuildIndex rebuilds the entire search index
  366. func (si *SearchIndexer) RebuildIndex(ctx context.Context) error {
  367. si.indexMutex.Lock()
  368. defer si.indexMutex.Unlock()
  369. // Check if context is cancelled
  370. select {
  371. case <-ctx.Done():
  372. return ctx.Err()
  373. default:
  374. }
  375. if si.index != nil {
  376. si.index.Close()
  377. }
  378. // Check context before removing old index
  379. select {
  380. case <-ctx.Done():
  381. return ctx.Err()
  382. default:
  383. }
  384. // Remove old index
  385. if err := os.RemoveAll(si.indexPath); err != nil {
  386. logger.Error("Failed to remove old index:", err)
  387. }
  388. // Check context before creating new index
  389. select {
  390. case <-ctx.Done():
  391. return ctx.Err()
  392. default:
  393. }
  394. // Create new index
  395. var err error
  396. si.index, err = bleve.New(si.indexPath, si.createIndexMapping())
  397. if err != nil {
  398. return fmt.Errorf("failed to create new index: %w", err)
  399. }
  400. logger.Info("Search index rebuilt successfully")
  401. return nil
  402. }
  403. // GetIndexStats returns statistics about the search index
  404. func (si *SearchIndexer) GetIndexStats() (map[string]interface{}, error) {
  405. si.indexMutex.RLock()
  406. defer si.indexMutex.RUnlock()
  407. if si.index == nil {
  408. return nil, fmt.Errorf("search index not initialized")
  409. }
  410. docCount, err := si.index.DocCount()
  411. if err != nil {
  412. return nil, err
  413. }
  414. return map[string]interface{}{
  415. "document_count": docCount,
  416. "index_path": si.indexPath,
  417. }, nil
  418. }
  419. // Close closes the search index and triggers cleanup
  420. func (si *SearchIndexer) Close() error {
  421. if si.cancel != nil {
  422. si.cancel()
  423. }
  424. si.cleanup()
  425. return nil
  426. }
  427. // Convenience functions for different search types
  428. // SearchSites searches only site configurations
  429. func SearchSites(ctx context.Context, query string, limit int) ([]SearchResult, error) {
  430. return GetSearchIndexer().SearchByType(ctx, query, "site", limit)
  431. }
  432. // SearchStreams searches only stream configurations
  433. func SearchStreams(ctx context.Context, query string, limit int) ([]SearchResult, error) {
  434. return GetSearchIndexer().SearchByType(ctx, query, "stream", limit)
  435. }
  436. // SearchConfigs searches only general configurations
  437. func SearchConfigs(ctx context.Context, query string, limit int) ([]SearchResult, error) {
  438. return GetSearchIndexer().SearchByType(ctx, query, "config", limit)
  439. }
  440. // SearchAll searches across all configuration types
  441. func SearchAll(ctx context.Context, query string, limit int) ([]SearchResult, error) {
  442. return GetSearchIndexer().Search(ctx, query, limit)
  443. }
  444. // isTextContent checks if the content appears to be text-based
  445. // This helps prevent indexing binary files that might have been misidentified
  446. func isTextContent(content []byte) bool {
  447. if len(content) == 0 {
  448. return true // Empty content is considered text
  449. }
  450. // Check for common binary file signatures
  451. if len(content) >= 4 {
  452. // Check for some common binary file headers
  453. switch {
  454. case content[0] == 0x7F && content[1] == 0x45 && content[2] == 0x4C && content[3] == 0x46: // ELF
  455. return false
  456. case content[0] == 0x89 && content[1] == 0x50 && content[2] == 0x4E && content[3] == 0x47: // PNG
  457. return false
  458. case content[0] == 0xFF && content[1] == 0xD8 && content[2] == 0xFF: // JPEG
  459. return false
  460. case content[0] == 0x50 && content[1] == 0x4B && content[2] == 0x03 && content[3] == 0x04: // ZIP
  461. return false
  462. case content[0] == 0x50 && content[1] == 0x4B && content[2] == 0x05 && content[3] == 0x06: // ZIP (empty)
  463. return false
  464. case content[0] == 0x50 && content[1] == 0x4B && content[2] == 0x07 && content[3] == 0x08: // ZIP (spanned)
  465. return false
  466. }
  467. }
  468. // Check if the first part of the content contains mostly printable characters
  469. // Sample up to 8KB for performance
  470. sampleSize := len(content)
  471. if sampleSize > 8192 {
  472. sampleSize = 8192
  473. }
  474. nonPrintableCount := 0
  475. for i := 0; i < sampleSize; i++ {
  476. b := content[i]
  477. // Allow printable ASCII characters, newlines, tabs, and carriage returns
  478. if (b < 32 && b != 9 && b != 10 && b != 13) || b > 126 {
  479. nonPrintableCount++
  480. }
  481. }
  482. // If more than 30% of the sampled content is non-printable, consider it binary
  483. threshold := float64(sampleSize) * 0.3
  484. return float64(nonPrintableCount) <= threshold
  485. }