mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-22 15:40:19 +00:00
AK: Split up CircularBuffer::find_copy_in_seekback
The "operation modes" of this function have very different focuses, and trying to combine both in a way where we share the most amount of code probably results in the worst performance. Instead, split up the function into "existing distances" and "no existing distances" so that we can optimize either case separately.
This commit is contained in:
parent
9e82ad758e
commit
046a9faeb3
Notes:
sideshowbarker
2024-07-17 04:34:25 +09:00
Author: https://github.com/timschumi Commit: https://github.com/SerenityOS/serenity/commit/046a9faeb3 Pull-request: https://github.com/SerenityOS/serenity/pull/19332 Reviewed-by: https://github.com/linusg ✅
4 changed files with 91 additions and 53 deletions
|
@ -272,7 +272,7 @@ ErrorOr<SearchableCircularBuffer> SearchableCircularBuffer::create_initialized(B
|
||||||
return circular_buffer;
|
return circular_buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
ErrorOr<Vector<SearchableCircularBuffer::Match>> SearchableCircularBuffer::find_copy_in_seekback(size_t maximum_length, size_t minimum_length, Optional<Vector<size_t> const&> distance_hints) const
|
ErrorOr<Vector<SearchableCircularBuffer::Match>> SearchableCircularBuffer::find_copy_in_seekback(size_t maximum_length, size_t minimum_length) const
|
||||||
{
|
{
|
||||||
VERIFY(minimum_length > 0);
|
VERIFY(minimum_length > 0);
|
||||||
|
|
||||||
|
@ -285,29 +285,7 @@ ErrorOr<Vector<SearchableCircularBuffer::Match>> SearchableCircularBuffer::find_
|
||||||
|
|
||||||
Vector<Match> matches;
|
Vector<Match> matches;
|
||||||
|
|
||||||
if (distance_hints.has_value()) {
|
// Use memmem to find the initial matches.
|
||||||
// If we have any hints, verify and use those.
|
|
||||||
for (auto const& distance : distance_hints.value()) {
|
|
||||||
// TODO: This does not yet support looping repetitions.
|
|
||||||
if (distance < minimum_length)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
auto needle_offset = (capacity() + m_reading_head) % capacity();
|
|
||||||
auto haystack_offset = (capacity() + m_reading_head - distance) % capacity();
|
|
||||||
|
|
||||||
for (size_t i = 0; i < minimum_length; i++) {
|
|
||||||
if (m_buffer[needle_offset] != m_buffer[haystack_offset])
|
|
||||||
break;
|
|
||||||
|
|
||||||
needle_offset = (needle_offset + 1) % capacity();
|
|
||||||
haystack_offset = (haystack_offset + 1) % capacity();
|
|
||||||
|
|
||||||
if (i + 1 == minimum_length)
|
|
||||||
TRY(matches.try_empend(distance, minimum_length));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Otherwise, use memmem to find the initial matches.
|
|
||||||
// Note: We have the read head as our reference point, but `next_read_span_with_seekback` isn't aware of that and continues to use the write head.
|
// Note: We have the read head as our reference point, but `next_read_span_with_seekback` isn't aware of that and continues to use the write head.
|
||||||
// Therefore, we need to make sure to slice off the extraneous bytes from the end of the span and shift the returned distances by the correct amount.
|
// Therefore, we need to make sure to slice off the extraneous bytes from the end of the span and shift the returned distances by the correct amount.
|
||||||
size_t haystack_offset_from_start = 0;
|
size_t haystack_offset_from_start = 0;
|
||||||
|
@ -347,6 +325,65 @@ ErrorOr<Vector<SearchableCircularBuffer::Match>> SearchableCircularBuffer::find_
|
||||||
// Try and find the next match.
|
// Try and find the next match.
|
||||||
memmem_match = AK::memmem(haystack.begin(), haystack.end(), needle);
|
memmem_match = AK::memmem(haystack.begin(), haystack.end(), needle);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// From now on, all matches that we have stored have at least a length of `minimum_length` and they all refer to the same value.
|
||||||
|
// For the remaining part, we will keep checking the next byte incrementally and keep eliminating matches until we eliminated all of them.
|
||||||
|
Vector<Match> next_matches;
|
||||||
|
|
||||||
|
for (size_t offset = minimum_length; offset < maximum_length; offset++) {
|
||||||
|
auto needle_data = m_buffer[(capacity() + m_reading_head + offset) % capacity()];
|
||||||
|
|
||||||
|
for (auto const& match : matches) {
|
||||||
|
auto haystack_data = m_buffer[(capacity() + m_reading_head - match.distance + offset) % capacity()];
|
||||||
|
|
||||||
|
if (haystack_data != needle_data)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
TRY(next_matches.try_empend(match.distance, match.length + 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (next_matches.size() == 0)
|
||||||
|
return matches;
|
||||||
|
|
||||||
|
swap(matches, next_matches);
|
||||||
|
next_matches.clear_with_capacity();
|
||||||
|
}
|
||||||
|
|
||||||
|
return matches;
|
||||||
|
}
|
||||||
|
|
||||||
|
ErrorOr<Vector<SearchableCircularBuffer::Match>> SearchableCircularBuffer::find_copy_in_seekback(Vector<size_t> const& distances, size_t maximum_length, size_t minimum_length) const
|
||||||
|
{
|
||||||
|
VERIFY(minimum_length > 0);
|
||||||
|
|
||||||
|
// Clip the maximum length to the amount of data that we actually store.
|
||||||
|
if (maximum_length > m_used_space)
|
||||||
|
maximum_length = m_used_space;
|
||||||
|
|
||||||
|
if (maximum_length < minimum_length)
|
||||||
|
return Vector<Match> {};
|
||||||
|
|
||||||
|
Vector<Match> matches;
|
||||||
|
|
||||||
|
// Verify all hints that we have.
|
||||||
|
for (auto const& distance : distances) {
|
||||||
|
// TODO: This does not yet support looping repetitions.
|
||||||
|
if (distance < minimum_length)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
auto needle_offset = (capacity() + m_reading_head) % capacity();
|
||||||
|
auto haystack_offset = (capacity() + m_reading_head - distance) % capacity();
|
||||||
|
|
||||||
|
for (size_t i = 0; i < minimum_length; i++) {
|
||||||
|
if (m_buffer[needle_offset] != m_buffer[haystack_offset])
|
||||||
|
break;
|
||||||
|
|
||||||
|
needle_offset = (needle_offset + 1) % capacity();
|
||||||
|
haystack_offset = (haystack_offset + 1) % capacity();
|
||||||
|
|
||||||
|
if (i + 1 == minimum_length)
|
||||||
|
TRY(matches.try_empend(distance, minimum_length));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// From now on, all matches that we have stored have at least a length of `minimum_length` and they all refer to the same value.
|
// From now on, all matches that we have stored have at least a length of `minimum_length` and they all refer to the same value.
|
||||||
|
|
|
@ -72,7 +72,8 @@ public:
|
||||||
/// This searches the seekback buffer (between read head and limit) for occurrences where it matches the next `length` bytes from the read buffer.
|
/// This searches the seekback buffer (between read head and limit) for occurrences where it matches the next `length` bytes from the read buffer.
|
||||||
/// Supplying any hints will only consider those distances, in case existing offsets need to be validated.
|
/// Supplying any hints will only consider those distances, in case existing offsets need to be validated.
|
||||||
/// Note that, since we only start searching at the read head, the length between read head and write head is excluded from the distance.
|
/// Note that, since we only start searching at the read head, the length between read head and write head is excluded from the distance.
|
||||||
ErrorOr<Vector<Match>> find_copy_in_seekback(size_t maximum_length, size_t minimum_length = 2, Optional<Vector<size_t> const&> distance_hints = {}) const;
|
ErrorOr<Vector<Match>> find_copy_in_seekback(size_t maximum_length, size_t minimum_length = 2) const;
|
||||||
|
ErrorOr<Vector<Match>> find_copy_in_seekback(Vector<size_t> const& distances, size_t maximum_length, size_t minimum_length = 2) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
SearchableCircularBuffer(ByteBuffer);
|
SearchableCircularBuffer(ByteBuffer);
|
||||||
|
|
|
@ -412,7 +412,7 @@ TEST_CASE(find_copy_in_seekback)
|
||||||
|
|
||||||
{
|
{
|
||||||
// Find the largest matches with a length between 1 and 2 (selected "AB", everything smaller gets eliminated).
|
// Find the largest matches with a length between 1 and 2 (selected "AB", everything smaller gets eliminated).
|
||||||
auto matches = MUST(buffer.find_copy_in_seekback(2, 1, Vector<size_t> { 6ul, 9ul }));
|
auto matches = MUST(buffer.find_copy_in_seekback(Vector<size_t> { 6ul, 9ul }, 2, 1));
|
||||||
EXPECT_EQ(matches.size(), 2ul);
|
EXPECT_EQ(matches.size(), 2ul);
|
||||||
EXPECT_EQ(matches[0].distance, 6ul);
|
EXPECT_EQ(matches[0].distance, 6ul);
|
||||||
EXPECT_EQ(matches[0].length, 2ul);
|
EXPECT_EQ(matches[0].length, 2ul);
|
||||||
|
@ -422,13 +422,13 @@ TEST_CASE(find_copy_in_seekback)
|
||||||
|
|
||||||
{
|
{
|
||||||
// Check that we don't find anything for hints before the valid range.
|
// Check that we don't find anything for hints before the valid range.
|
||||||
auto matches = MUST(buffer.find_copy_in_seekback(2, 1, Vector<size_t> { 0ul }));
|
auto matches = MUST(buffer.find_copy_in_seekback(Vector<size_t> { 0ul }, 2, 1));
|
||||||
EXPECT_EQ(matches.size(), 0ul);
|
EXPECT_EQ(matches.size(), 0ul);
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
// Check that we don't find anything for hints after the valid range.
|
// Check that we don't find anything for hints after the valid range.
|
||||||
auto matches = MUST(buffer.find_copy_in_seekback(2, 1, Vector<size_t> { 12ul }));
|
auto matches = MUST(buffer.find_copy_in_seekback(Vector<size_t> { 12ul }, 2, 1));
|
||||||
EXPECT_EQ(matches.size(), 0ul);
|
EXPECT_EQ(matches.size(), 0ul);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -992,13 +992,13 @@ ErrorOr<void> LzmaCompressor::encode_match_type(MatchType match_type)
|
||||||
ErrorOr<void> LzmaCompressor::encode_once()
|
ErrorOr<void> LzmaCompressor::encode_once()
|
||||||
{
|
{
|
||||||
// Check if any of our existing match distances are currently usable.
|
// Check if any of our existing match distances are currently usable.
|
||||||
Vector<size_t> const existing_distance_hints {
|
Vector<size_t> const existing_distances {
|
||||||
m_rep0 + normalized_to_real_match_distance_offset,
|
m_rep0 + normalized_to_real_match_distance_offset,
|
||||||
m_rep1 + normalized_to_real_match_distance_offset,
|
m_rep1 + normalized_to_real_match_distance_offset,
|
||||||
m_rep2 + normalized_to_real_match_distance_offset,
|
m_rep2 + normalized_to_real_match_distance_offset,
|
||||||
m_rep3 + normalized_to_real_match_distance_offset,
|
m_rep3 + normalized_to_real_match_distance_offset,
|
||||||
};
|
};
|
||||||
auto existing_distance_results = TRY(m_dictionary->find_copy_in_seekback(m_dictionary->used_space(), normalized_to_real_match_length_offset, existing_distance_hints));
|
auto existing_distance_results = TRY(m_dictionary->find_copy_in_seekback(existing_distances, m_dictionary->used_space(), normalized_to_real_match_length_offset));
|
||||||
|
|
||||||
if (existing_distance_results.size() > 0) {
|
if (existing_distance_results.size() > 0) {
|
||||||
auto selected_match = existing_distance_results[0];
|
auto selected_match = existing_distance_results[0];
|
||||||
|
|
Loading…
Reference in a new issue