mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-25 00:50:22 +00:00
Bitmap: Add Bitmap::find_next_range_of_unset_bits()
AK::Bitmap is extended with find_next_range_of_unset_bits(). The function is implemented using count_trailing_zeroes_32(), which is optimized on many platforms, that gives a huge performance boost. Functions find_longest_range_of_unset_bits() and find_first_fit() are implemented with find_next_range_of_unset_bits(). According to benchmarks, they are 60-100% faster.
This commit is contained in:
parent
471db7ce0a
commit
fbcc798f3f
Notes:
sideshowbarker
2024-07-19 07:52:28 +09:00
Author: https://github.com/nimelehin Commit: https://github.com/SerenityOS/serenity/commit/fbcc798f3f6
2 changed files with 112 additions and 54 deletions
164
AK/Bitmap.h
164
AK/Bitmap.h
|
@ -29,6 +29,7 @@
|
|||
#include <AK/Assertions.h>
|
||||
#include <AK/Noncopyable.h>
|
||||
#include <AK/Optional.h>
|
||||
#include <AK/Platform.h>
|
||||
#include <AK/StdLibExtras.h>
|
||||
#include <AK/Types.h>
|
||||
#include <AK/kmalloc.h>
|
||||
|
@ -119,14 +120,8 @@ public:
|
|||
|
||||
if (previous_data != nullptr) {
|
||||
__builtin_memcpy(m_data, previous_data, previous_size_bytes);
|
||||
|
||||
if ((previous_size % 8) != 0) {
|
||||
if (default_value)
|
||||
m_data[previous_size_bytes - 1] |= (0xff >> (previous_size % 8));
|
||||
else
|
||||
m_data[previous_size_bytes - 1] &= ~(0xff >> (previous_size % 8));
|
||||
}
|
||||
|
||||
if (previous_size % 8)
|
||||
set_range(previous_size, 8 - previous_size % 8, default_value);
|
||||
kfree(previous_data);
|
||||
}
|
||||
}
|
||||
|
@ -163,68 +158,129 @@ public:
|
|||
return {};
|
||||
}
|
||||
|
||||
Optional<size_t> find_longest_range_of_unset_bits(size_t max_length, size_t& found_range_size) const
|
||||
// The function will return the next range of unset bits starting from the
|
||||
// @from value.
|
||||
// @from: the postition from which the search starts. The var will be
|
||||
// changed and new value is the offset of the found block.
|
||||
// @min_length: minimum size of the range which will be returned.
|
||||
// @max_length: maximum size of the range which will be returned.
|
||||
// This is used to increase performance, since the range of
|
||||
// unset bits can be long, and we don't need the while range,
|
||||
// so we can stop when we've reached @max_length.
|
||||
inline Optional<size_t> find_next_range_of_unset_bits(size_t& from, size_t min_length = 1, size_t max_length = max_size) const
|
||||
{
|
||||
auto first_index = find_first_unset();
|
||||
if (!first_index.has_value())
|
||||
if (min_length > max_length) {
|
||||
return {};
|
||||
}
|
||||
|
||||
size_t free_region_start = first_index.value();
|
||||
size_t free_region_size = 1;
|
||||
u32* bitmap32 = (u32*)m_data;
|
||||
|
||||
size_t max_region_start = free_region_start;
|
||||
size_t max_region_size = free_region_size;
|
||||
// Calculating the start offset.
|
||||
size_t start_bucket_index = from / 32;
|
||||
size_t start_bucket_bit = from % 32;
|
||||
|
||||
// Let's try and find the best fit possible
|
||||
for (size_t j = first_index.value() + 1; j < m_size && free_region_size < max_length; j++) {
|
||||
if (!get(j)) {
|
||||
if (free_region_size == 0)
|
||||
free_region_start = j;
|
||||
free_region_size++;
|
||||
} else {
|
||||
if (max_region_size < free_region_size) {
|
||||
max_region_size = free_region_size;
|
||||
max_region_start = free_region_start;
|
||||
size_t* start_of_free_chunks = &from;
|
||||
size_t free_chunks = 0;
|
||||
|
||||
for (size_t bucket_index = start_bucket_index; bucket_index < m_size / 32; ++bucket_index) {
|
||||
if (bitmap32[bucket_index] == 0xffffffff) {
|
||||
// Skip over completely full bucket of size 32.
|
||||
if (free_chunks >= min_length) {
|
||||
return min(free_chunks, max_length);
|
||||
}
|
||||
free_chunks = 0;
|
||||
start_bucket_bit = 0;
|
||||
continue;
|
||||
}
|
||||
if (bitmap32[bucket_index] == 0x0) {
|
||||
// Skip over completely empty bucket of size 32.
|
||||
if (free_chunks == 0) {
|
||||
*start_of_free_chunks = bucket_index * 32;
|
||||
}
|
||||
free_chunks += 32;
|
||||
if (free_chunks >= max_length) {
|
||||
return max_length;
|
||||
}
|
||||
start_bucket_bit = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
u32 bucket = bitmap32[bucket_index];
|
||||
u8 viewed_bits = start_bucket_bit;
|
||||
u32 trailing_zeroes = 0;
|
||||
|
||||
bucket >>= viewed_bits;
|
||||
start_bucket_bit = 0;
|
||||
|
||||
while (viewed_bits < 32) {
|
||||
if (bucket == 0) {
|
||||
if (free_chunks == 0) {
|
||||
*start_of_free_chunks = bucket_index * 32 + viewed_bits;
|
||||
}
|
||||
free_chunks += 32 - viewed_bits;
|
||||
viewed_bits = 32;
|
||||
} else {
|
||||
trailing_zeroes = count_trailing_zeroes_32(bucket);
|
||||
bucket >>= trailing_zeroes;
|
||||
|
||||
if (free_chunks == 0) {
|
||||
*start_of_free_chunks = bucket_index * 32 + viewed_bits;
|
||||
}
|
||||
free_chunks += trailing_zeroes;
|
||||
viewed_bits += trailing_zeroes;
|
||||
|
||||
if (free_chunks >= min_length) {
|
||||
return min(free_chunks, max_length);
|
||||
}
|
||||
|
||||
// Deleting trailing ones.
|
||||
u32 trailing_ones = count_trailing_zeroes_32(~bucket);
|
||||
bucket >>= trailing_ones;
|
||||
viewed_bits += trailing_ones;
|
||||
free_chunks = 0;
|
||||
}
|
||||
free_region_start = 0;
|
||||
free_region_size = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (max_region_size < free_region_size) {
|
||||
max_region_size = free_region_size;
|
||||
max_region_start = free_region_start;
|
||||
if (free_chunks < min_length) {
|
||||
return {};
|
||||
}
|
||||
|
||||
return min(free_chunks, max_length);
|
||||
}
|
||||
|
||||
Optional<size_t> find_longest_range_of_unset_bits(size_t max_length, size_t& found_range_size) const
|
||||
{
|
||||
size_t start = 0;
|
||||
size_t max_region_start = 0;
|
||||
size_t max_region_size = 0;
|
||||
|
||||
while (true) {
|
||||
// Look for the next block which is bigger than currunt.
|
||||
auto length_of_found_range = find_next_range_of_unset_bits(start, max_region_size + 1, max_length);
|
||||
if (length_of_found_range.has_value()) {
|
||||
max_region_start = start;
|
||||
max_region_size = length_of_found_range.value();
|
||||
start += max_region_size;
|
||||
} else {
|
||||
// No ranges which are bigger than current were found.
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
found_range_size = max_region_size;
|
||||
if (max_region_size > 1)
|
||||
if (max_region_size) {
|
||||
return max_region_start;
|
||||
// if the max free region size is one, then return the earliest one found
|
||||
return first_index;
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
Optional<size_t> find_first_fit(size_t minimum_length) const
|
||||
{
|
||||
auto first_index = find_first_unset();
|
||||
if (!first_index.has_value())
|
||||
return {};
|
||||
if (minimum_length == 1)
|
||||
return first_index;
|
||||
|
||||
size_t free_region_start = first_index.value();
|
||||
size_t free_region_size = 1;
|
||||
|
||||
// Let's try to find the first fit
|
||||
for (size_t j = first_index.value() + 1; j < m_size; j++) {
|
||||
if (!get(j)) {
|
||||
if (free_region_size == 0)
|
||||
free_region_start = j;
|
||||
if (++free_region_size == minimum_length)
|
||||
return free_region_start;
|
||||
} else {
|
||||
free_region_start = 0;
|
||||
free_region_size = 0;
|
||||
}
|
||||
size_t start = 0;
|
||||
auto length_of_found_range = find_next_range_of_unset_bits(start, minimum_length, minimum_length);
|
||||
if (length_of_found_range.has_value()) {
|
||||
return start;
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
@ -252,6 +308,8 @@ public:
|
|||
{
|
||||
}
|
||||
|
||||
static constexpr u32 max_size = 0xffffffff;
|
||||
|
||||
private:
|
||||
size_t size_in_bytes() const { return ceil_div(m_size, 8); }
|
||||
|
||||
|
|
|
@ -97,4 +97,4 @@ template<typename T>
|
|||
}
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue