Explorar el Código

LibRegex: Use a copy-on-write vector for fork state

Ali Mohammad Pur hace 2 años
padre
commit
f1851346d3

+ 4 - 2
Userland/Libraries/LibRegex/RegexByteCode.cpp

@@ -1060,9 +1060,11 @@ ALWAYS_INLINE ExecutionResult OpCode_Checkpoint::execute(MatchInput const& input
 
 
 ALWAYS_INLINE ExecutionResult OpCode_JumpNonEmpty::execute(MatchInput const& input, MatchState& state) const
 ALWAYS_INLINE ExecutionResult OpCode_JumpNonEmpty::execute(MatchInput const& input, MatchState& state) const
 {
 {
-    auto current_position = state.string_position;
+    u64 current_position = state.string_position;
     auto checkpoint_ip = state.instruction_position + size() + checkpoint();
     auto checkpoint_ip = state.instruction_position + size() + checkpoint();
-    if (input.checkpoints.get(checkpoint_ip).value_or(current_position) != current_position) {
+    auto checkpoint_position = input.checkpoints.find(checkpoint_ip);
+
+    if (checkpoint_position != input.checkpoints.end() && checkpoint_position->value != current_position) {
         auto form = this->form();
         auto form = this->form();
 
 
         if (form == OpCodeId::Jump) {
         if (form == OpCodeId::Jump) {

+ 133 - 3
Userland/Libraries/LibRegex/RegexMatch.h

@@ -12,6 +12,7 @@
 #include <AK/FlyString.h>
 #include <AK/FlyString.h>
 #include <AK/HashMap.h>
 #include <AK/HashMap.h>
 #include <AK/MemMem.h>
 #include <AK/MemMem.h>
+#include <AK/RedBlackTree.h>
 #include <AK/String.h>
 #include <AK/String.h>
 #include <AK/StringBuilder.h>
 #include <AK/StringBuilder.h>
 #include <AK/StringView.h>
 #include <AK/StringView.h>
@@ -23,6 +24,135 @@
 
 
 namespace regex {
 namespace regex {
 
 
+template<typename T>
+class COWVector {
+    struct Detail : RefCounted<Detail> {
+        Vector<T> m_members;
+    };
+
+public:
+    COWVector()
+        : m_detail(make_ref_counted<Detail>())
+    {
+    }
+
+    COWVector(COWVector const&) = default;
+    COWVector(COWVector&&) = default;
+
+    COWVector& operator=(COWVector const&) = default;
+    COWVector& operator=(COWVector&&) = default;
+
+    Vector<T> release() &&
+    {
+        if (m_detail->ref_count() == 1)
+            return exchange(m_detail->m_members, Vector<T>());
+
+        return m_detail->m_members;
+    }
+
+    void append(T const& value)
+    {
+        return append(T { value });
+    }
+
+    void append(T&& value)
+    {
+        copy();
+        m_detail->m_members.append(move(value));
+    }
+
+    void resize(size_t size)
+    {
+        copy();
+        m_detail->m_members.resize(size);
+    }
+
+    void ensure_capacity(size_t capacity)
+    {
+        if (m_detail->m_members.capacity() >= capacity)
+            return;
+
+        copy();
+        m_detail->m_members.ensure_capacity(capacity);
+    }
+
+    template<typename... Args>
+    void empend(Args&&... args)
+    {
+        copy();
+        m_detail->m_members.empend(forward<Args>(args)...);
+    }
+
+    void clear()
+    {
+        if (m_detail->ref_count() > 1)
+            m_detail = make_ref_counted<Detail>();
+        else
+            m_detail->m_members.clear();
+    }
+
+    T& at(size_t index)
+    {
+        // We're handing out a mutable reference, so make sure we own the data exclusively.
+        copy();
+        return m_detail->m_members.at(index);
+    }
+
+    T const& at(size_t index) const
+    {
+        return m_detail->m_members.at(index);
+    }
+
+    T& operator[](size_t index)
+    {
+        // We're handing out a mutable reference, so make sure we own the data exclusively.
+        copy();
+        return m_detail->m_members[index];
+    }
+
+    T const& operator[](size_t index) const
+    {
+        return m_detail->m_members[index];
+    }
+
+    size_t capacity() const
+    {
+        return m_detail->m_members.capacity();
+    }
+
+    size_t size() const
+    {
+        return m_detail->m_members.size();
+    }
+
+    bool is_empty() const
+    {
+        return m_detail->m_members.is_empty();
+    }
+
+    T const& first() const
+    {
+        return m_detail->m_members.first();
+    }
+
+    T const& last() const
+    {
+        return m_detail->m_members.last();
+    }
+
+private:
+    void copy()
+    {
+        if (m_detail->ref_count() <= 1)
+            return;
+        auto new_detail = make_ref_counted<Detail>();
+        new_detail->m_members = m_detail->m_members;
+        m_detail = new_detail;
+    }
+
+    NonnullRefPtr<Detail> m_detail;
+};
+
 class RegexStringView {
 class RegexStringView {
 public:
 public:
     RegexStringView() = default;
     RegexStringView() = default;
@@ -510,9 +640,9 @@ struct MatchState {
     size_t fork_at_position { 0 };
     size_t fork_at_position { 0 };
     size_t forks_since_last_save { 0 };
     size_t forks_since_last_save { 0 };
     Optional<size_t> initiating_fork;
     Optional<size_t> initiating_fork;
-    Vector<Match> matches;
-    Vector<Vector<Match>> capture_group_matches;
-    Vector<u64> repetition_marks;
+    COWVector<Match> matches;
+    COWVector<Vector<Match>> capture_group_matches;
+    COWVector<u64> repetition_marks;
 };
 };
 
 
 }
 }

+ 3 - 3
Userland/Libraries/LibRegex/RegexMatcher.cpp

@@ -156,7 +156,7 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona
 
 
         for (size_t j = 0; j < c_match_preallocation_count; ++j) {
         for (size_t j = 0; j < c_match_preallocation_count; ++j) {
             state.matches.empend();
             state.matches.empend();
-            state.capture_group_matches.unchecked_append({});
+            state.capture_group_matches.empend();
             state.capture_group_matches.at(j).ensure_capacity(capture_groups_count);
             state.capture_group_matches.at(j).ensure_capacity(capture_groups_count);
             for (size_t k = 0; k < capture_groups_count; ++k)
             for (size_t k = 0; k < capture_groups_count; ++k)
                 state.capture_group_matches.at(j).unchecked_append({});
                 state.capture_group_matches.at(j).unchecked_append({});
@@ -306,8 +306,8 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona
     RegexResult result {
     RegexResult result {
         match_count != 0,
         match_count != 0,
         match_count,
         match_count,
-        move(state.matches),
-        move(state.capture_group_matches),
+        move(state.matches).release(),
+        move(state.capture_group_matches).release(),
         operations,
         operations,
         m_pattern->parser_result.capture_groups_count,
         m_pattern->parser_result.capture_groups_count,
         m_pattern->parser_result.named_capture_groups_count,
         m_pattern->parser_result.named_capture_groups_count,