Explorar o código

AK: Make String::matches() capable of reporting match positions too

Also, rewrite StringUtils::match(), because the old implementation was
fairly broken, e.g. "acdcxb" would *not* match "a*?b".
AnotherTest %!s(int64=4) %!d(string=hai) anos
pai
achega
0801b1fada
Modificáronse 7 ficheiros con 86 adicións e 33 borrados
  1. 5 0
      AK/String.cpp
  2. 1 0
      AK/String.h
  3. 40 32
      AK/StringUtils.cpp
  4. 15 1
      AK/StringUtils.h
  5. 5 0
      AK/StringView.cpp
  6. 1 0
      AK/StringView.h
  7. 19 0
      AK/Tests/TestStringUtils.cpp

+ 5 - 0
AK/String.cpp

@@ -270,6 +270,11 @@ String String::repeated(char ch, size_t count)
     return *impl;
 }
 
+bool String::matches(const StringView& mask, Vector<MaskSpan>& mask_spans, CaseSensitivity case_sensitivity) const
+{
+    return StringUtils::matches(*this, mask, case_sensitivity, &mask_spans);
+}
+
 bool String::matches(const StringView& mask, CaseSensitivity case_sensitivity) const
 {
     return StringUtils::matches(*this, mask, case_sensitivity);

+ 1 - 0
AK/String.h

@@ -112,6 +112,7 @@ public:
 
     static String repeated(char, size_t count);
     bool matches(const StringView& mask, CaseSensitivity = CaseSensitivity::CaseInsensitive) const;
+    bool matches(const StringView& mask, Vector<MaskSpan>&, CaseSensitivity = CaseSensitivity::CaseInsensitive) const;
 
     Optional<int> to_int() const;
     Optional<unsigned> to_uint() const;

+ 40 - 32
AK/StringUtils.cpp

@@ -30,62 +30,70 @@
 #include <AK/String.h>
 #include <AK/StringUtils.h>
 #include <AK/StringView.h>
+#include <AK/Vector.h>
 
 namespace AK {
 
 namespace StringUtils {
 
-bool matches(const StringView& str, const StringView& mask, CaseSensitivity case_sensitivity)
+bool matches(const StringView& str, const StringView& mask, CaseSensitivity case_sensitivity, Vector<MaskSpan>* match_spans)
 {
+    auto record_span = [&match_spans](size_t start, size_t length) {
+        if (match_spans)
+            match_spans->append({ start, length });
+    };
+
     if (str.is_null() || mask.is_null())
         return str.is_null() && mask.is_null();
 
+    if (mask == "*") {
+        record_span(0, str.length());
+        return true;
+    }
+
     if (case_sensitivity == CaseSensitivity::CaseInsensitive) {
         const String str_lower = String(str).to_lowercase();
         const String mask_lower = String(mask).to_lowercase();
-        return matches(str_lower, mask_lower, CaseSensitivity::CaseSensitive);
+        return matches(str_lower, mask_lower, CaseSensitivity::CaseSensitive, match_spans);
     }
 
     const char* string_ptr = str.characters_without_null_termination();
+    const char* string_start = str.characters_without_null_termination();
     const char* string_end = string_ptr + str.length();
     const char* mask_ptr = mask.characters_without_null_termination();
     const char* mask_end = mask_ptr + mask.length();
 
-    // Match string against mask directly unless we hit a *
-    while ((string_ptr < string_end) && (mask_ptr < mask_end) && (*mask_ptr != '*')) {
-        if ((*mask_ptr != *string_ptr) && (*mask_ptr != '?'))
-            return false;
-        mask_ptr++;
-        string_ptr++;
-    }
-
-    const char* cp = nullptr;
-    const char* mp = nullptr;
-
-    while (string_ptr < string_end) {
-        if ((mask_ptr < mask_end) && (*mask_ptr == '*')) {
-            // If we have only a * left, there is no way to not match.
-            if (++mask_ptr == mask_end)
+    auto matches_one = [](char ch, char p) {
+        if (p == '?')
+            return true;
+        return p == ch && ch != 0;
+    };
+    while (string_ptr < string_end && mask_ptr < mask_end) {
+        auto string_start_ptr = string_ptr;
+        switch (*mask_ptr) {
+        case '*':
+            if (mask_ptr[1] == 0) {
+                record_span(string_ptr - string_start, string_end - string_ptr);
                 return true;
-            mp = mask_ptr;
-            cp = string_ptr + 1;
-        } else if ((mask_ptr < mask_end) && ((*mask_ptr == *string_ptr) || (*mask_ptr == '?'))) {
-            mask_ptr++;
-            string_ptr++;
-        } else if ((cp != nullptr) && (mp != nullptr)) {
-            mask_ptr = mp;
-            string_ptr = cp++;
-        } else {
+            }
+            while (string_ptr < string_end && !matches(string_ptr, mask_ptr + 1))
+                ++string_ptr;
+            record_span(string_start_ptr - string_start, string_ptr - string_start_ptr);
+            --string_ptr;
+            break;
+        case '?':
+            record_span(string_ptr - string_start, 1);
+            break;
+        default:
+            if (!matches_one(*string_ptr, *mask_ptr))
+                return false;
             break;
         }
+        ++string_ptr;
+        ++mask_ptr;
     }
 
-    // Handle any trailing mask
-    while ((mask_ptr < mask_end) && (*mask_ptr == '*'))
-        mask_ptr++;
-
-    // If we 'ate' all of the mask and the string then we match.
-    return (mask_ptr == mask_end) && string_ptr == string_end;
+    return string_ptr == string_end && mask_ptr == mask_end;
 }
 
 Optional<int> convert_to_int(const StringView& str)

+ 15 - 1
AK/StringUtils.h

@@ -42,9 +42,23 @@ enum class TrimMode {
     Both
 };
 
+struct MaskSpan {
+    size_t start;
+    size_t length;
+
+    bool operator==(const MaskSpan& other) const
+    {
+        return start == other.start && length == other.length;
+    }
+    bool operator!=(const MaskSpan& other) const
+    {
+        return !(*this == other);
+    }
+};
+
 namespace StringUtils {
 
-bool matches(const StringView& str, const StringView& mask, CaseSensitivity = CaseSensitivity::CaseInsensitive);
+bool matches(const StringView& str, const StringView& mask, CaseSensitivity = CaseSensitivity::CaseInsensitive, Vector<MaskSpan>* match_spans = nullptr);
 Optional<int> convert_to_int(const StringView&);
 Optional<unsigned> convert_to_uint(const StringView&);
 Optional<unsigned> convert_to_uint_from_hex(const StringView&);

+ 5 - 0
AK/StringView.cpp

@@ -164,6 +164,11 @@ bool StringView::ends_with(const StringView& str, CaseSensitivity case_sensitivi
     return StringUtils::ends_with(*this, str, case_sensitivity);
 }
 
+bool StringView::matches(const StringView& mask, Vector<MaskSpan>& mask_spans, CaseSensitivity case_sensitivity) const
+{
+    return StringUtils::matches(*this, mask, case_sensitivity, &mask_spans);
+}
+
 bool StringView::matches(const StringView& mask, CaseSensitivity case_sensitivity) const
 {
     return StringUtils::matches(*this, mask, case_sensitivity);

+ 1 - 0
AK/StringView.h

@@ -87,6 +87,7 @@ public:
     bool starts_with(char) const;
     bool ends_with(char) const;
     bool matches(const StringView& mask, CaseSensitivity = CaseSensitivity::CaseInsensitive) const;
+    bool matches(const StringView& mask, Vector<MaskSpan>&, CaseSensitivity = CaseSensitivity::CaseInsensitive) const;
     bool contains(char) const;
     bool contains(const StringView&, CaseSensitivity = CaseSensitivity::CaseSensitive) const;
     bool equals_ignoring_case(const StringView& other) const;

+ 19 - 0
AK/Tests/TestStringUtils.cpp

@@ -67,6 +67,25 @@ TEST_CASE(matches_case_insensitive)
     EXPECT(!AK::StringUtils::matches("acdcb", "a*c?b"));
 }
 
+TEST_CASE(matches_with_positions)
+{
+    Vector<AK::MaskSpan> spans;
+    EXPECT(AK::StringUtils::matches("abbb", "a*", CaseSensitivity::CaseSensitive, &spans));
+    EXPECT(spans == Vector<AK::MaskSpan>({ { 1, 3 } }));
+
+    spans.clear();
+    EXPECT(AK::StringUtils::matches("abbb", "?*", CaseSensitivity::CaseSensitive, &spans));
+    EXPECT_EQ(spans, Vector<AK::MaskSpan>({ { 0, 1 }, { 1, 3 } }));
+
+    spans.clear();
+    EXPECT(AK::StringUtils::matches("acdcxb", "a*c?b", CaseSensitivity::CaseSensitive, &spans));
+    EXPECT_EQ(spans, Vector<AK::MaskSpan>({ { 1, 2 }, { 4, 1 } }));
+
+    spans.clear();
+    EXPECT(AK::StringUtils::matches("aaaa", "A*", CaseSensitivity::CaseInsensitive, &spans));
+    EXPECT_EQ(spans, Vector<AK::MaskSpan>({ { 1, 3 } }));
+}
+
 TEST_CASE(convert_to_int)
 {
     auto value = AK::StringUtils::convert_to_int(StringView());