AK: Move parts for searching CircularBuffer into a new class

We will be adding extra logic to the CircularBuffer to optimize
searching, but this would negatively impact the performance of
CircularBuffer users that don't need that functionality.
This commit is contained in:
Tim Schumacher 2023-06-01 20:48:35 +02:00 committed by Linus Groh
parent 40c0dd67dd
commit 9e82ad758e
Notes: sideshowbarker 2024-07-17 00:23:42 +09:00
6 changed files with 50 additions and 16 deletions

View file

@ -249,7 +249,30 @@ ErrorOr<size_t> CircularBuffer::copy_from_seekback(size_t distance, size_t lengt
return length - remaining_length; return length - remaining_length;
} }
ErrorOr<Vector<CircularBuffer::Match>> CircularBuffer::find_copy_in_seekback(size_t maximum_length, size_t minimum_length, Optional<Vector<size_t> const&> distance_hints) const SearchableCircularBuffer::SearchableCircularBuffer(ByteBuffer buffer)
: CircularBuffer(move(buffer))
{
}
ErrorOr<SearchableCircularBuffer> SearchableCircularBuffer::create_empty(size_t size)
{
auto temporary_buffer = TRY(ByteBuffer::create_uninitialized(size));
SearchableCircularBuffer circular_buffer { move(temporary_buffer) };
return circular_buffer;
}
ErrorOr<SearchableCircularBuffer> SearchableCircularBuffer::create_initialized(ByteBuffer buffer)
{
SearchableCircularBuffer circular_buffer { move(buffer) };
circular_buffer.m_used_space = circular_buffer.m_buffer.size();
return circular_buffer;
}
ErrorOr<Vector<SearchableCircularBuffer::Match>> SearchableCircularBuffer::find_copy_in_seekback(size_t maximum_length, size_t minimum_length, Optional<Vector<size_t> const&> distance_hints) const
{ {
VERIFY(minimum_length > 0); VERIFY(minimum_length > 0);

View file

@ -35,15 +35,6 @@ public:
ErrorOr<size_t> copy_from_seekback(size_t distance, size_t length); ErrorOr<size_t> copy_from_seekback(size_t distance, size_t length);
struct Match {
size_t distance;
size_t length;
};
/// This searches the seekback buffer (between read head and limit) for occurrences where it matches the next `length` bytes from the read buffer.
/// Supplying any hints will only consider those distances, in case existing offsets need to be validated.
/// Note that, since we only start searching at the read head, the length between read head and write head is excluded from the distance.
ErrorOr<Vector<Match>> find_copy_in_seekback(size_t maximum_length, size_t minimum_length = 2, Optional<Vector<size_t> const&> distance_hints = {}) const;
[[nodiscard]] size_t empty_space() const; [[nodiscard]] size_t empty_space() const;
[[nodiscard]] size_t used_space() const; [[nodiscard]] size_t used_space() const;
[[nodiscard]] size_t capacity() const; [[nodiscard]] size_t capacity() const;
@ -53,7 +44,7 @@ public:
void clear(); void clear();
private: protected:
CircularBuffer(ByteBuffer); CircularBuffer(ByteBuffer);
[[nodiscard]] bool is_wrapping_around() const; [[nodiscard]] bool is_wrapping_around() const;
@ -69,4 +60,22 @@ private:
size_t m_seekback_limit {}; size_t m_seekback_limit {};
}; };
class SearchableCircularBuffer : public CircularBuffer {
public:
static ErrorOr<SearchableCircularBuffer> create_empty(size_t size);
static ErrorOr<SearchableCircularBuffer> create_initialized(ByteBuffer);
struct Match {
size_t distance;
size_t length;
};
/// This searches the seekback buffer (between read head and limit) for occurrences where it matches the next `length` bytes from the read buffer.
/// Supplying any hints will only consider those distances, in case existing offsets need to be validated.
/// Note that, since we only start searching at the read head, the length between read head and write head is excluded from the distance.
ErrorOr<Vector<Match>> find_copy_in_seekback(size_t maximum_length, size_t minimum_length = 2, Optional<Vector<size_t> const&> distance_hints = {}) const;
private:
SearchableCircularBuffer(ByteBuffer);
};
} }

View file

@ -38,6 +38,7 @@ class JsonValue;
class LexicalPath; class LexicalPath;
class LittleEndianInputBitStream; class LittleEndianInputBitStream;
class LittleEndianOutputBitStream; class LittleEndianOutputBitStream;
class SearchableCircularBuffer;
class SeekableStream; class SeekableStream;
class StackInfo; class StackInfo;
class Stream; class Stream;
@ -187,6 +188,7 @@ using AK::Optional;
using AK::OwnPtr; using AK::OwnPtr;
using AK::ReadonlyBytes; using AK::ReadonlyBytes;
using AK::RefPtr; using AK::RefPtr;
using AK::SearchableCircularBuffer;
using AK::SeekableStream; using AK::SeekableStream;
using AK::SinglyLinkedList; using AK::SinglyLinkedList;
using AK::Span; using AK::Span;

View file

@ -335,7 +335,7 @@ TEST_CASE(find_copy_in_seekback)
auto needle = "ABCD"sv.bytes(); auto needle = "ABCD"sv.bytes();
// Set up the buffer for testing. // Set up the buffer for testing.
auto buffer = MUST(CircularBuffer::create_empty(haystack.size() + needle.size())); auto buffer = MUST(SearchableCircularBuffer::create_empty(haystack.size() + needle.size()));
auto written_haystack_bytes = buffer.write(haystack); auto written_haystack_bytes = buffer.write(haystack);
VERIFY(written_haystack_bytes == haystack.size()); VERIFY(written_haystack_bytes == haystack.size());
MUST(buffer.discard(haystack.size())); MUST(buffer.discard(haystack.size()));

View file

@ -1214,7 +1214,7 @@ void LzmaDecompressor::close()
ErrorOr<NonnullOwnPtr<LzmaCompressor>> LzmaCompressor::create_container(MaybeOwned<Stream> stream, LzmaCompressorOptions const& options) ErrorOr<NonnullOwnPtr<LzmaCompressor>> LzmaCompressor::create_container(MaybeOwned<Stream> stream, LzmaCompressorOptions const& options)
{ {
auto dictionary = TRY(try_make<CircularBuffer>(TRY(CircularBuffer::create_empty(options.dictionary_size + largest_real_match_length)))); auto dictionary = TRY(try_make<SearchableCircularBuffer>(TRY(SearchableCircularBuffer::create_empty(options.dictionary_size + largest_real_match_length))));
// "The LZMA Decoder uses (1 << (lc + lp)) tables with CProb values, where each table contains 0x300 CProb values." // "The LZMA Decoder uses (1 << (lc + lp)) tables with CProb values, where each table contains 0x300 CProb values."
auto literal_probabilities = TRY(FixedArray<Probability>::create(literal_probability_table_size * (1 << (options.literal_context_bits + options.literal_position_bits)))); auto literal_probabilities = TRY(FixedArray<Probability>::create(literal_probability_table_size * (1 << (options.literal_context_bits + options.literal_position_bits))));
@ -1227,7 +1227,7 @@ ErrorOr<NonnullOwnPtr<LzmaCompressor>> LzmaCompressor::create_container(MaybeOwn
return compressor; return compressor;
} }
LzmaCompressor::LzmaCompressor(MaybeOwned<AK::Stream> stream, Compress::LzmaCompressorOptions options, MaybeOwned<CircularBuffer> dictionary, FixedArray<Compress::LzmaState::Probability> literal_probabilities) LzmaCompressor::LzmaCompressor(MaybeOwned<AK::Stream> stream, Compress::LzmaCompressorOptions options, MaybeOwned<SearchableCircularBuffer> dictionary, FixedArray<Compress::LzmaState::Probability> literal_probabilities)
: LzmaState(move(literal_probabilities)) : LzmaState(move(literal_probabilities))
, m_stream(move(stream)) , m_stream(move(stream))
, m_options(move(options)) , m_options(move(options))

View file

@ -229,7 +229,7 @@ public:
virtual ~LzmaCompressor(); virtual ~LzmaCompressor();
private: private:
LzmaCompressor(MaybeOwned<Stream>, LzmaCompressorOptions, MaybeOwned<CircularBuffer>, FixedArray<Probability> literal_probabilities); LzmaCompressor(MaybeOwned<Stream>, LzmaCompressorOptions, MaybeOwned<SearchableCircularBuffer>, FixedArray<Probability> literal_probabilities);
ErrorOr<void> shift_range_encoder(); ErrorOr<void> shift_range_encoder();
ErrorOr<void> normalize_range_encoder(); ErrorOr<void> normalize_range_encoder();
@ -255,7 +255,7 @@ private:
// This doubles as an input buffer, which is appended at the very front of the buffer. // This doubles as an input buffer, which is appended at the very front of the buffer.
// Therefore, the size of this should at least be the dictionary size + the largest possible repetition length. // Therefore, the size of this should at least be the dictionary size + the largest possible repetition length.
MaybeOwned<CircularBuffer> m_dictionary; MaybeOwned<SearchableCircularBuffer> m_dictionary;
// Range encoder state. // Range encoder state.
u32 m_range_encoder_range { 0xFFFFFFFF }; u32 m_range_encoder_range { 0xFFFFFFFF };