Bitmap: Add Bitmap::find_next_range_of_unset_bits()

AK::Bitmap is extended with find_next_range_of_unset_bits().
The function is implemented using count_trailing_zeroes_32(), which is
optimized on many platforms, that gives a huge performance boost.
Functions find_longest_range_of_unset_bits() and find_first_fit() are
implemented with find_next_range_of_unset_bits(). According to
benchmarks, they are 60-100% faster.
This commit is contained in:
nimelehin 2020-03-08 17:53:38 +03:00 committed by Andreas Kling
parent 471db7ce0a
commit fbcc798f3f
Notes: sideshowbarker 2024-07-19 07:52:28 +09:00
2 changed files with 112 additions and 54 deletions

View file

@ -29,6 +29,7 @@
#include <AK/Assertions.h>
#include <AK/Noncopyable.h>
#include <AK/Optional.h>
#include <AK/Platform.h>
#include <AK/StdLibExtras.h>
#include <AK/Types.h>
#include <AK/kmalloc.h>
@ -119,14 +120,8 @@ public:
if (previous_data != nullptr) {
__builtin_memcpy(m_data, previous_data, previous_size_bytes);
if ((previous_size % 8) != 0) {
if (default_value)
m_data[previous_size_bytes - 1] |= (0xff >> (previous_size % 8));
else
m_data[previous_size_bytes - 1] &= ~(0xff >> (previous_size % 8));
}
if (previous_size % 8)
set_range(previous_size, 8 - previous_size % 8, default_value);
kfree(previous_data);
}
}
@ -163,68 +158,129 @@ public:
return {};
}
Optional<size_t> find_longest_range_of_unset_bits(size_t max_length, size_t& found_range_size) const
// The function will return the next range of unset bits starting from the
// @from value.
// @from: the postition from which the search starts. The var will be
// changed and new value is the offset of the found block.
// @min_length: minimum size of the range which will be returned.
// @max_length: maximum size of the range which will be returned.
// This is used to increase performance, since the range of
// unset bits can be long, and we don't need the while range,
// so we can stop when we've reached @max_length.
inline Optional<size_t> find_next_range_of_unset_bits(size_t& from, size_t min_length = 1, size_t max_length = max_size) const
{
auto first_index = find_first_unset();
if (!first_index.has_value())
if (min_length > max_length) {
return {};
}
size_t free_region_start = first_index.value();
size_t free_region_size = 1;
u32* bitmap32 = (u32*)m_data;
size_t max_region_start = free_region_start;
size_t max_region_size = free_region_size;
// Calculating the start offset.
size_t start_bucket_index = from / 32;
size_t start_bucket_bit = from % 32;
// Let's try and find the best fit possible
for (size_t j = first_index.value() + 1; j < m_size && free_region_size < max_length; j++) {
if (!get(j)) {
if (free_region_size == 0)
free_region_start = j;
free_region_size++;
} else {
if (max_region_size < free_region_size) {
max_region_size = free_region_size;
max_region_start = free_region_start;
size_t* start_of_free_chunks = &from;
size_t free_chunks = 0;
for (size_t bucket_index = start_bucket_index; bucket_index < m_size / 32; ++bucket_index) {
if (bitmap32[bucket_index] == 0xffffffff) {
// Skip over completely full bucket of size 32.
if (free_chunks >= min_length) {
return min(free_chunks, max_length);
}
free_chunks = 0;
start_bucket_bit = 0;
continue;
}
if (bitmap32[bucket_index] == 0x0) {
// Skip over completely empty bucket of size 32.
if (free_chunks == 0) {
*start_of_free_chunks = bucket_index * 32;
}
free_chunks += 32;
if (free_chunks >= max_length) {
return max_length;
}
start_bucket_bit = 0;
continue;
}
u32 bucket = bitmap32[bucket_index];
u8 viewed_bits = start_bucket_bit;
u32 trailing_zeroes = 0;
bucket >>= viewed_bits;
start_bucket_bit = 0;
while (viewed_bits < 32) {
if (bucket == 0) {
if (free_chunks == 0) {
*start_of_free_chunks = bucket_index * 32 + viewed_bits;
}
free_chunks += 32 - viewed_bits;
viewed_bits = 32;
} else {
trailing_zeroes = count_trailing_zeroes_32(bucket);
bucket >>= trailing_zeroes;
if (free_chunks == 0) {
*start_of_free_chunks = bucket_index * 32 + viewed_bits;
}
free_chunks += trailing_zeroes;
viewed_bits += trailing_zeroes;
if (free_chunks >= min_length) {
return min(free_chunks, max_length);
}
// Deleting trailing ones.
u32 trailing_ones = count_trailing_zeroes_32(~bucket);
bucket >>= trailing_ones;
viewed_bits += trailing_ones;
free_chunks = 0;
}
free_region_start = 0;
free_region_size = 0;
}
}
if (max_region_size < free_region_size) {
max_region_size = free_region_size;
max_region_start = free_region_start;
if (free_chunks < min_length) {
return {};
}
return min(free_chunks, max_length);
}
Optional<size_t> find_longest_range_of_unset_bits(size_t max_length, size_t& found_range_size) const
{
size_t start = 0;
size_t max_region_start = 0;
size_t max_region_size = 0;
while (true) {
// Look for the next block which is bigger than currunt.
auto length_of_found_range = find_next_range_of_unset_bits(start, max_region_size + 1, max_length);
if (length_of_found_range.has_value()) {
max_region_start = start;
max_region_size = length_of_found_range.value();
start += max_region_size;
} else {
// No ranges which are bigger than current were found.
break;
}
}
found_range_size = max_region_size;
if (max_region_size > 1)
if (max_region_size) {
return max_region_start;
// if the max free region size is one, then return the earliest one found
return first_index;
}
return {};
}
Optional<size_t> find_first_fit(size_t minimum_length) const
{
auto first_index = find_first_unset();
if (!first_index.has_value())
return {};
if (minimum_length == 1)
return first_index;
size_t free_region_start = first_index.value();
size_t free_region_size = 1;
// Let's try to find the first fit
for (size_t j = first_index.value() + 1; j < m_size; j++) {
if (!get(j)) {
if (free_region_size == 0)
free_region_start = j;
if (++free_region_size == minimum_length)
return free_region_start;
} else {
free_region_start = 0;
free_region_size = 0;
}
size_t start = 0;
auto length_of_found_range = find_next_range_of_unset_bits(start, minimum_length, minimum_length);
if (length_of_found_range.has_value()) {
return start;
}
return {};
}
@ -252,6 +308,8 @@ public:
{
}
static constexpr u32 max_size = 0xffffffff;
private:
size_t size_in_bytes() const { return ceil_div(m_size, 8); }

View file

@ -97,4 +97,4 @@ template<typename T>
}
return 0;
#endif
}
}