FuzzyMatch.cpp 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. /*
  2. * Copyright (c) 2021, Spencer Dixon <spencercdixon@gmail.com>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include "FuzzyMatch.h"
  7. #include <AK/CharacterTypes.h>
  8. #include <string.h>
  9. namespace Assistant {
  10. static constexpr const int RECURSION_LIMIT = 10;
  11. static constexpr const int MAX_MATCHES = 256;
  12. // Bonuses and penalties are used to build up a final score for the match.
  13. static constexpr const int SEQUENTIAL_BONUS = 15; // bonus for adjacent matches (needle: 'ca', haystack: 'cat')
  14. static constexpr const int SEPARATOR_BONUS = 30; // bonus if match occurs after a separator ('_' or ' ')
  15. static constexpr const int CAMEL_BONUS = 30; // bonus if match is uppercase and prev is lower (needle: 'myF' haystack: '/path/to/myFile.txt')
  16. static constexpr const int FIRST_LETTER_BONUS = 20; // bonus if the first letter is matched (needle: 'c' haystack: 'cat')
  17. static constexpr const int LEADING_LETTER_PENALTY = -5; // penalty applied for every letter in str before the first match
  18. static constexpr const int MAX_LEADING_LETTER_PENALTY = -15; // maximum penalty for leading letters
  19. static constexpr const int UNMATCHED_LETTER_PENALTY = -1; // penalty for every letter that doesn't matter
  20. static FuzzyMatchResult fuzzy_match_recursive(String const& needle, String const& haystack, size_t needle_idx, size_t haystack_idx,
  21. u8 const* src_matches, u8* matches, int next_match, int& recursion_count)
  22. {
  23. int out_score = 0;
  24. ++recursion_count;
  25. if (recursion_count >= RECURSION_LIMIT)
  26. return { false, out_score };
  27. if (needle.length() == needle_idx || haystack.length() == haystack_idx)
  28. return { false, out_score };
  29. bool had_recursive_match = false;
  30. constexpr size_t recursive_match_limit = 256;
  31. u8 best_recursive_matches[recursive_match_limit];
  32. int best_recursive_score = 0;
  33. bool first_match = true;
  34. while (needle_idx < needle.length() && haystack_idx < haystack.length()) {
  35. if (to_ascii_lowercase(needle[needle_idx]) == to_ascii_lowercase(haystack[haystack_idx])) {
  36. if (next_match >= MAX_MATCHES)
  37. return { false, out_score };
  38. if (first_match && src_matches) {
  39. memcpy(matches, src_matches, next_match);
  40. first_match = false;
  41. }
  42. u8 recursive_matches[recursive_match_limit] {};
  43. auto result = fuzzy_match_recursive(needle, haystack, needle_idx, haystack_idx + 1, matches, recursive_matches, next_match, recursion_count);
  44. if (result.matched) {
  45. if (!had_recursive_match || result.score > best_recursive_score) {
  46. memcpy(best_recursive_matches, recursive_matches, recursive_match_limit);
  47. best_recursive_score = result.score;
  48. }
  49. had_recursive_match = true;
  50. matches[next_match++] = haystack_idx;
  51. }
  52. needle_idx++;
  53. }
  54. haystack_idx++;
  55. }
  56. bool matched = needle_idx == needle.length();
  57. if (matched) {
  58. out_score = 100;
  59. int penalty = LEADING_LETTER_PENALTY * matches[0];
  60. if (penalty < MAX_LEADING_LETTER_PENALTY)
  61. penalty = MAX_LEADING_LETTER_PENALTY;
  62. out_score += penalty;
  63. int unmatched = haystack.length() - next_match;
  64. out_score += UNMATCHED_LETTER_PENALTY * unmatched;
  65. for (int i = 0; i < next_match; i++) {
  66. u8 current_idx = matches[i];
  67. if (i > 0) {
  68. u8 previous_idx = matches[i - 1];
  69. if (current_idx == previous_idx)
  70. out_score += SEQUENTIAL_BONUS;
  71. }
  72. if (current_idx > 0) {
  73. u32 current_character = haystack[current_idx];
  74. u32 neighbor_character = haystack[current_idx - 1];
  75. if (neighbor_character != to_ascii_uppercase(neighbor_character) && current_character != to_ascii_lowercase(current_character))
  76. out_score += CAMEL_BONUS;
  77. if (neighbor_character == '_' || neighbor_character == ' ')
  78. out_score += SEPARATOR_BONUS;
  79. } else {
  80. out_score += FIRST_LETTER_BONUS;
  81. }
  82. }
  83. if (had_recursive_match && (!matched || best_recursive_score > out_score)) {
  84. memcpy(matches, best_recursive_matches, MAX_MATCHES);
  85. out_score = best_recursive_score;
  86. return { true, out_score };
  87. } else if (matched) {
  88. return { true, out_score };
  89. }
  90. }
  91. return { false, out_score };
  92. }
  93. FuzzyMatchResult fuzzy_match(String needle, String haystack)
  94. {
  95. int recursion_count = 0;
  96. u8 matches[MAX_MATCHES] {};
  97. return fuzzy_match_recursive(needle, haystack, 0, 0, nullptr, matches, 0, recursion_count);
  98. }
  99. }