AK: Add thresholds to quickselect_inline and Statistics::Median

I did a bit of Profiling and made the quickselect and median algorithms
use the best of option for the respective input size.
This commit is contained in:
Staubfinger 2023-01-08 14:55:54 +01:00 committed by Jelle Raaijmakers
parent 6b9344e86c
commit da1023fcc5
Notes: sideshowbarker 2024-07-17 00:49:56 +09:00
4 changed files with 92 additions and 4 deletions

View file

@ -11,6 +11,9 @@
#include <AK/StdLibExtras.h>
namespace AK {
static constexpr int MEDIAN_OF_MEDIAN_CUTOFF = 4500;
// FIXME: Stole and adapted these two functions from `Userland/Demos/Tubes/Tubes.cpp` we really need something like this in `AK/Random.h`
static inline double random_double()
{
@ -161,9 +164,13 @@ size_t quickselect_inplace(Collection& collection, size_t k, PivotFn pivot_fn)
template<typename Collection>
size_t quickselect_inplace(Collection& collection, size_t k)
{
// By default, lets use middle_element to match `quicksort`
return quickselect_inplace(
collection, 0, collection.size() - 1, k, [](auto collection, size_t left, size_t right, auto less_than) { return PivotFunctions::middle_element(collection, left, right, less_than); }, [](auto& a, auto& b) { return a < b; });
if (collection.size() >= MEDIAN_OF_MEDIAN_CUTOFF)
return quickselect_inplace(
collection, 0, collection.size() - 1, k, [](auto collection, size_t left, size_t right, auto less_than) { return PivotFunctions::median_of_medians(collection, left, right, less_than); }, [](auto& a, auto& b) { return a < b; });
else
return quickselect_inplace(
collection, 0, collection.size() - 1, k, [](auto collection, size_t left, size_t right, auto less_than) { return PivotFunctions::random_element(collection, left, right, less_than); }, [](auto& a, auto& b) { return a < b; });
}
}

View file

@ -9,10 +9,14 @@
#include <AK/Concepts.h>
#include <AK/Math.h>
#include <AK/QuickSelect.h>
#include <AK/QuickSort.h>
#include <AK/Vector.h>
namespace AK {
static constexpr int ODD_NAIVE_MEDIAN_CUTOFF = 200;
static constexpr int EVEN_NAIVE_MEDIAN_CUTOFF = 350;
template<Arithmetic T = float>
class Statistics {
public:
@ -75,7 +79,13 @@ public:
return 0;
// If the number of values is even, the median is the arithmetic mean of the two middle values
if (size() % 2 == 0) {
if (size() <= EVEN_NAIVE_MEDIAN_CUTOFF && size() % 2 == 0) {
quick_sort(m_values);
return (m_values.at(size() / 2) + m_values.at(size() / 2 - 1)) / 2;
} else if (size() <= ODD_NAIVE_MEDIAN_CUTOFF && size() % 2 == 1) {
quick_sort(m_values);
return m_values.at(m_values.size() / 2);
} else if (size() % 2 == 0) {
auto index = size() / 2;
auto median1 = m_values.at(AK::quickselect_inplace(m_values, index));
auto median2 = m_values.at(AK::quickselect_inplace(m_values, index - 1));

View file

@ -67,6 +67,7 @@ set(AK_TEST_SOURCES
TestSourceLocation.cpp
TestSpan.cpp
TestStack.cpp
TestStatistics.cpp
TestStdLibExtras.cpp
TestString.cpp
TestStringFloatingPointConversions.cpp

View file

@ -0,0 +1,70 @@
/*
* Copyright (c) 2023, the SerenityOS developers.
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/Statistics.h>
#include <LibTest/TestSuite.h>
TEST_CASE(Statistics)
{
// Setup Test Data
AK::Statistics<double> odd_number_elements;
AK::Statistics<double> even_number_elements;
AK::Statistics<double> odd_number_elements_large;
AK::Statistics<double> even_number_elements_large;
odd_number_elements.add(5.0);
odd_number_elements.add(4.0);
odd_number_elements.add(3.0);
odd_number_elements.add(2.0);
odd_number_elements.add(1.0);
even_number_elements.add(6.0);
even_number_elements.add(5.0);
even_number_elements.add(4.0);
even_number_elements.add(3.0);
even_number_elements.add(2.0);
even_number_elements.add(1.0);
for (int i = 201; i > 0; i--) {
odd_number_elements_large.add(i);
}
for (int i = 360; i > 0; i--) {
even_number_elements_large.add(i);
}
// Sum
EXPECT_APPROXIMATE(odd_number_elements.sum(), 15.0);
EXPECT_APPROXIMATE(even_number_elements.sum(), 21.0);
// Average
EXPECT_APPROXIMATE(odd_number_elements.average(), 3.0);
EXPECT_APPROXIMATE(even_number_elements.average(), 3.5);
// Min
EXPECT_APPROXIMATE(odd_number_elements.min(), 1.0);
EXPECT_APPROXIMATE(even_number_elements.min(), 1.0);
// Max
EXPECT_APPROXIMATE(odd_number_elements.max(), 5.0);
EXPECT_APPROXIMATE(even_number_elements.max(), 6.0);
// Median
EXPECT_APPROXIMATE(odd_number_elements.median(), 3.0);
EXPECT_APPROXIMATE(even_number_elements.median(), 3.5);
EXPECT_APPROXIMATE(odd_number_elements_large.median(), 101.0);
EXPECT_APPROXIMATE(even_number_elements_large.median(), 180.5);
// The expected values for standard deviation and variance were calculated by my school issued scientific calculator
// Standard Deviation
EXPECT_APPROXIMATE(odd_number_elements.standard_deviation(), 1.4142135623731);
EXPECT_APPROXIMATE(even_number_elements.standard_deviation(), 1.7078251276599);
// Variance
EXPECT_APPROXIMATE(odd_number_elements.variance(), 2.0);
EXPECT_APPROXIMATE(even_number_elements.variance(), 2.9166666666667);
}