AK: Move parts for searching CircularBuffer into a new class

We will be adding extra logic to the CircularBuffer to optimize searching, but this would negatively impact the performance of CircularBuffer users that don't need that functionality.
Author: https://github.com/timschumi Commit: https://github.com/SerenityOS/serenity/commit/9e82ad758e Pull-request: https://github.com/SerenityOS/serenity/pull/19332 Reviewed-by: https://github.com/linusg ✅
2024-11-22 15:40:19 +00:00 · 2023-06-01 20:48:35 +02:00 · 2023-06-01 20:48:35 +02:00 · 9e82ad758e · 2024-07-17 00:23:42 +09:00
commit 9e82ad758e
parent 40c0dd67dd
6 changed files with 50 additions and 16 deletions
--- a/AK/CircularBuffer.cpp
+++ b/AK/CircularBuffer.cpp
@ -249,7 +249,30 @@ ErrorOr<size_t> CircularBuffer::copy_from_seekback(size_t distance, size_t lengt
    return length - remaining_length;
 }
-ErrorOr<Vector<CircularBuffer::Match>> CircularBuffer::find_copy_in_seekback(size_t maximum_length, size_t minimum_length, Optional<Vector<size_t> const&> distance_hints) const
+SearchableCircularBuffer::SearchableCircularBuffer(ByteBuffer buffer)
    : CircularBuffer(move(buffer))
 {
 }
 ErrorOr<SearchableCircularBuffer> SearchableCircularBuffer::create_empty(size_t size)
 {
    auto temporary_buffer = TRY(ByteBuffer::create_uninitialized(size));
    SearchableCircularBuffer circular_buffer { move(temporary_buffer) };
    return circular_buffer;
 }
 ErrorOr<SearchableCircularBuffer> SearchableCircularBuffer::create_initialized(ByteBuffer buffer)
 {
    SearchableCircularBuffer circular_buffer { move(buffer) };
    circular_buffer.m_used_space = circular_buffer.m_buffer.size();
    return circular_buffer;
 }
 ErrorOr<Vector<SearchableCircularBuffer::Match>> SearchableCircularBuffer::find_copy_in_seekback(size_t maximum_length, size_t minimum_length, Optional<Vector<size_t> const&> distance_hints) const
 {
    VERIFY(minimum_length > 0);
--- a/AK/CircularBuffer.h
+++ b/AK/CircularBuffer.h
@ -35,15 +35,6 @@ public:
    ErrorOr<size_t> copy_from_seekback(size_t distance, size_t length);
    struct Match {
        size_t distance;
        size_t length;
    };
    /// This searches the seekback buffer (between read head and limit) for occurrences where it matches the next `length` bytes from the read buffer.
    /// Supplying any hints will only consider those distances, in case existing offsets need to be validated.
    /// Note that, since we only start searching at the read head, the length between read head and write head is excluded from the distance.
    ErrorOr<Vector<Match>> find_copy_in_seekback(size_t maximum_length, size_t minimum_length = 2, Optional<Vector<size_t> const&> distance_hints = {}) const;
    [[nodiscard]] size_t empty_space() const;
    [[nodiscard]] size_t used_space() const;
    [[nodiscard]] size_t capacity() const;
@ -53,7 +44,7 @@ public:
    void clear();
-private:
+protected:
    CircularBuffer(ByteBuffer);
    [[nodiscard]] bool is_wrapping_around() const;
@ -69,4 +60,22 @@ private:
    size_t m_seekback_limit {};
 };
 class SearchableCircularBuffer : public CircularBuffer {
 public:
    static ErrorOr<SearchableCircularBuffer> create_empty(size_t size);
    static ErrorOr<SearchableCircularBuffer> create_initialized(ByteBuffer);
    struct Match {
        size_t distance;
        size_t length;
    };
    /// This searches the seekback buffer (between read head and limit) for occurrences where it matches the next `length` bytes from the read buffer.
    /// Supplying any hints will only consider those distances, in case existing offsets need to be validated.
    /// Note that, since we only start searching at the read head, the length between read head and write head is excluded from the distance.
    ErrorOr<Vector<Match>> find_copy_in_seekback(size_t maximum_length, size_t minimum_length = 2, Optional<Vector<size_t> const&> distance_hints = {}) const;
 private:
    SearchableCircularBuffer(ByteBuffer);
 };
 }
--- a/AK/Forward.h
+++ b/AK/Forward.h
@ -38,6 +38,7 @@ class JsonValue;
 class LexicalPath;
 class LittleEndianInputBitStream;
 class LittleEndianOutputBitStream;
 class SearchableCircularBuffer;
 class SeekableStream;
 class StackInfo;
 class Stream;
@ -187,6 +188,7 @@ using AK::Optional;
 using AK::OwnPtr;
 using AK::ReadonlyBytes;
 using AK::RefPtr;
 using AK::SearchableCircularBuffer;
 using AK::SeekableStream;
 using AK::SinglyLinkedList;
 using AK::Span;
--- a/Tests/AK/TestCircularBuffer.cpp
+++ b/Tests/AK/TestCircularBuffer.cpp
@ -335,7 +335,7 @@ TEST_CASE(find_copy_in_seekback)
    auto needle = "ABCD"sv.bytes();
    // Set up the buffer for testing.
-    auto buffer = MUST(CircularBuffer::create_empty(haystack.size() + needle.size()));
+    auto buffer = MUST(SearchableCircularBuffer::create_empty(haystack.size() + needle.size()));
    auto written_haystack_bytes = buffer.write(haystack);
    VERIFY(written_haystack_bytes == haystack.size());
    MUST(buffer.discard(haystack.size()));
--- a/Userland/Libraries/LibCompress/Lzma.cpp
+++ b/Userland/Libraries/LibCompress/Lzma.cpp
@ -1214,7 +1214,7 @@ void LzmaDecompressor::close()
 ErrorOr<NonnullOwnPtr<LzmaCompressor>> LzmaCompressor::create_container(MaybeOwned<Stream> stream, LzmaCompressorOptions const& options)
 {
-    auto dictionary = TRY(try_make<CircularBuffer>(TRY(CircularBuffer::create_empty(options.dictionary_size + largest_real_match_length))));
+    auto dictionary = TRY(try_make<SearchableCircularBuffer>(TRY(SearchableCircularBuffer::create_empty(options.dictionary_size + largest_real_match_length))));
    // "The LZMA Decoder uses (1 << (lc + lp)) tables with CProb values, where each table contains 0x300 CProb values."
    auto literal_probabilities = TRY(FixedArray<Probability>::create(literal_probability_table_size * (1 << (options.literal_context_bits + options.literal_position_bits))));
@ -1227,7 +1227,7 @@ ErrorOr<NonnullOwnPtr<LzmaCompressor>> LzmaCompressor::create_container(MaybeOwn
    return compressor;
 }
-LzmaCompressor::LzmaCompressor(MaybeOwned<AK::Stream> stream, Compress::LzmaCompressorOptions options, MaybeOwned<CircularBuffer> dictionary, FixedArray<Compress::LzmaState::Probability> literal_probabilities)
+LzmaCompressor::LzmaCompressor(MaybeOwned<AK::Stream> stream, Compress::LzmaCompressorOptions options, MaybeOwned<SearchableCircularBuffer> dictionary, FixedArray<Compress::LzmaState::Probability> literal_probabilities)
    : LzmaState(move(literal_probabilities))
    , m_stream(move(stream))
    , m_options(move(options))
--- a/Userland/Libraries/LibCompress/Lzma.h
+++ b/Userland/Libraries/LibCompress/Lzma.h
@ -229,7 +229,7 @@ public:
    virtual ~LzmaCompressor();
 private:
-    LzmaCompressor(MaybeOwned<Stream>, LzmaCompressorOptions, MaybeOwned<CircularBuffer>, FixedArray<Probability> literal_probabilities);
+    LzmaCompressor(MaybeOwned<Stream>, LzmaCompressorOptions, MaybeOwned<SearchableCircularBuffer>, FixedArray<Probability> literal_probabilities);
    ErrorOr<void> shift_range_encoder();
    ErrorOr<void> normalize_range_encoder();
@ -255,7 +255,7 @@ private:
    // This doubles as an input buffer, which is appended at the very front of the buffer.
    // Therefore, the size of this should at least be the dictionary size + the largest possible repetition length.
-    MaybeOwned<CircularBuffer> m_dictionary;
+    MaybeOwned<SearchableCircularBuffer> m_dictionary;
    // Range encoder state.
    u32 m_range_encoder_range { 0xFFFFFFFF };