|
@@ -4034,6 +4034,265 @@ pub(crate) const BIAS_DATA_VEC: &[&[f64]] = &[
|
|
|
],
|
|
|
];
|
|
|
|
|
|
+const ONE_OVER_POWER_OF_TWO: [f64; 256] = [
|
|
|
+ 1.0,
|
|
|
+ 0.5,
|
|
|
+ 0.25,
|
|
|
+ 0.125,
|
|
|
+ 0.0625,
|
|
|
+ 0.03125,
|
|
|
+ 0.015625,
|
|
|
+ 0.0078125,
|
|
|
+ 0.00390625,
|
|
|
+ 0.001953125,
|
|
|
+ 0.0009765625,
|
|
|
+ 0.00048828125,
|
|
|
+ 0.000244140625,
|
|
|
+ 0.0001220703125,
|
|
|
+ 6.103515625e-05,
|
|
|
+ 3.0517578125e-05,
|
|
|
+ 1.52587890625e-05,
|
|
|
+ 7.62939453125e-06,
|
|
|
+ 3.814697265625e-06,
|
|
|
+ 1.9073486328125e-06,
|
|
|
+ 9.5367431640625e-07,
|
|
|
+ 4.76837158203125e-07,
|
|
|
+ 2.384_185_791_015_625e-7,
|
|
|
+ 1.192_092_895_507_812_5e-7,
|
|
|
+ 5.960_464_477_539_063e-8,
|
|
|
+ 2.980_232_238_769_531_3e-8,
|
|
|
+ 1.490_116_119_384_765_6e-8,
|
|
|
+ 7.450_580_596_923_828e-9,
|
|
|
+ 3.725_290_298_461_914e-9,
|
|
|
+ 1.862_645_149_230_957e-9,
|
|
|
+ 9.313225746154785e-10,
|
|
|
+ 4.656612873077393e-10,
|
|
|
+ 2.3283064365386963e-10,
|
|
|
+ 1.1641532182693481e-10,
|
|
|
+ 5.820766091346741e-11,
|
|
|
+ 2.9103830456733704e-11,
|
|
|
+ 1.4551915228366852e-11,
|
|
|
+ 7.275957614183426e-12,
|
|
|
+ 3.637978807091713e-12,
|
|
|
+ 1.8189894035458565e-12,
|
|
|
+ 9.094947017729282e-13,
|
|
|
+ 4.547473508864641e-13,
|
|
|
+ 2.2737367544323206e-13,
|
|
|
+ 1.1368683772161603e-13,
|
|
|
+ 5.684341886080802e-14,
|
|
|
+ 2.842170943040401e-14,
|
|
|
+ 1.4210854715202004e-14,
|
|
|
+ 7.105427357601002e-15,
|
|
|
+ 3.552713678800501e-15,
|
|
|
+ 1.7763568394002505e-15,
|
|
|
+ 8.881784197001252e-16,
|
|
|
+ 4.440892098500626e-16,
|
|
|
+ 2.220446049250313e-16,
|
|
|
+ 1.1102230246251565e-16,
|
|
|
+ 5.551115123125783e-17,
|
|
|
+ 2.7755575615628914e-17,
|
|
|
+ 1.3877787807814457e-17,
|
|
|
+ 6.938893903907228e-18,
|
|
|
+ 3.469446951953614e-18,
|
|
|
+ 1.734723475976807e-18,
|
|
|
+ 8.673617379884035e-19,
|
|
|
+ 4.336808689942018e-19,
|
|
|
+ 2.168404344971009e-19,
|
|
|
+ 1.0842021724855044e-19,
|
|
|
+ 5.421010862427522e-20,
|
|
|
+ 2.710505431213761e-20,
|
|
|
+ 1.3552527156068805e-20,
|
|
|
+ 6.776263578034403e-21,
|
|
|
+ 3.3881317890172014e-21,
|
|
|
+ 1.6940658945086007e-21,
|
|
|
+ 8.470329472543003e-22,
|
|
|
+ 4.235164736271502e-22,
|
|
|
+ 2.117582368135751e-22,
|
|
|
+ 1.0587911840678754e-22,
|
|
|
+ 5.293955920339377e-23,
|
|
|
+ 2.6469779601696886e-23,
|
|
|
+ 1.3234889800848443e-23,
|
|
|
+ 6.617444900424222e-24,
|
|
|
+ 3.308722450212111e-24,
|
|
|
+ 1.6543612251060553e-24,
|
|
|
+ 8.271806125530277e-25,
|
|
|
+ 4.1359030627651384e-25,
|
|
|
+ 2.0679515313825692e-25,
|
|
|
+ 1.0339757656912846e-25,
|
|
|
+ 5.169878828456423e-26,
|
|
|
+ 2.5849394142282115e-26,
|
|
|
+ 1.2924697071141057e-26,
|
|
|
+ 6.462348535570529e-27,
|
|
|
+ 3.2311742677852644e-27,
|
|
|
+ 1.6155871338926322e-27,
|
|
|
+ 8.077935669463161e-28,
|
|
|
+ 4.0389678347315804e-28,
|
|
|
+ 2.0194839173657902e-28,
|
|
|
+ 1.0097419586828951e-28,
|
|
|
+ 5.048709793414476e-29,
|
|
|
+ 2.524354896707238e-29,
|
|
|
+ 1.262177448353619e-29,
|
|
|
+ 6.310887241768095e-30,
|
|
|
+ 3.1554436208840472e-30,
|
|
|
+ 1.5777218104420236e-30,
|
|
|
+ 7.888609052210118e-31,
|
|
|
+ 3.944304526105059e-31,
|
|
|
+ 1.9721522630525295e-31,
|
|
|
+ 9.860761315262648e-32,
|
|
|
+ 4.930380657631324e-32,
|
|
|
+ 2.465190328815662e-32,
|
|
|
+ 1.232595164407831e-32,
|
|
|
+ 6.162975822039155e-33,
|
|
|
+ 3.0814879110195774e-33,
|
|
|
+ 1.5407439555097887e-33,
|
|
|
+ 7.703719777548943e-34,
|
|
|
+ 3.851859888774472e-34,
|
|
|
+ 1.925929944387236e-34,
|
|
|
+ 9.62964972193618e-35,
|
|
|
+ 4.81482486096809e-35,
|
|
|
+ 2.407412430484045e-35,
|
|
|
+ 1.2037062152420224e-35,
|
|
|
+ 6.018531076210112e-36,
|
|
|
+ 3.009265538105056e-36,
|
|
|
+ 1.504632769052528e-36,
|
|
|
+ 7.52316384526264e-37,
|
|
|
+ 3.76158192263132e-37,
|
|
|
+ 1.88079096131566e-37,
|
|
|
+ 9.4039548065783e-38,
|
|
|
+ 4.70197740328915e-38,
|
|
|
+ 2.350988701644575e-38,
|
|
|
+ 1.1754943508222875e-38,
|
|
|
+ 5.877471754111438e-39,
|
|
|
+ 2.938735877055719e-39,
|
|
|
+ 1.4693679385278594e-39,
|
|
|
+ 7.346839692639297e-40,
|
|
|
+ 3.6734198463196485e-40,
|
|
|
+ 1.8367099231598242e-40,
|
|
|
+ 9.183549615799121e-41,
|
|
|
+ 4.591774807899561e-41,
|
|
|
+ 2.2958874039497803e-41,
|
|
|
+ 1.1479437019748901e-41,
|
|
|
+ 5.739718509874451e-42,
|
|
|
+ 2.8698592549372254e-42,
|
|
|
+ 1.4349296274686127e-42,
|
|
|
+ 7.174648137343064e-43,
|
|
|
+ 3.587324068671532e-43,
|
|
|
+ 1.793662034335766e-43,
|
|
|
+ 8.96831017167883e-44,
|
|
|
+ 4.484155085839415e-44,
|
|
|
+ 2.2420775429197073e-44,
|
|
|
+ 1.1210387714598537e-44,
|
|
|
+ 5.605193857299268e-45,
|
|
|
+ 2.802596928649634e-45,
|
|
|
+ 1.401298464324817e-45,
|
|
|
+ 7.006492321624085e-46,
|
|
|
+ 3.503246160812043e-46,
|
|
|
+ 1.7516230804060213e-46,
|
|
|
+ 8.758115402030107e-47,
|
|
|
+ 4.3790577010150533e-47,
|
|
|
+ 2.1895288505075267e-47,
|
|
|
+ 1.0947644252537633e-47,
|
|
|
+ 5.473822126268817e-48,
|
|
|
+ 2.7369110631344083e-48,
|
|
|
+ 1.3684555315672042e-48,
|
|
|
+ 6.842277657836021e-49,
|
|
|
+ 3.4211388289180104e-49,
|
|
|
+ 1.7105694144590052e-49,
|
|
|
+ 8.552847072295026e-50,
|
|
|
+ 4.276423536147513e-50,
|
|
|
+ 2.1382117680737565e-50,
|
|
|
+ 1.0691058840368783e-50,
|
|
|
+ 5.345529420184391e-51,
|
|
|
+ 2.6727647100921956e-51,
|
|
|
+ 1.3363823550460978e-51,
|
|
|
+ 6.681911775230489e-52,
|
|
|
+ 3.3409558876152446e-52,
|
|
|
+ 1.6704779438076223e-52,
|
|
|
+ 8.352389719038111e-53,
|
|
|
+ 4.176194859519056e-53,
|
|
|
+ 2.088097429759528e-53,
|
|
|
+ 1.044048714879764e-53,
|
|
|
+ 5.22024357439882e-54,
|
|
|
+ 2.61012178719941e-54,
|
|
|
+ 1.305060893599705e-54,
|
|
|
+ 6.525304467998525e-55,
|
|
|
+ 3.2626522339992623e-55,
|
|
|
+ 1.6313261169996311e-55,
|
|
|
+ 8.156630584998156e-56,
|
|
|
+ 4.078315292499078e-56,
|
|
|
+ 2.039157646249539e-56,
|
|
|
+ 1.0195788231247695e-56,
|
|
|
+ 5.0978941156238473e-57,
|
|
|
+ 2.5489470578119236e-57,
|
|
|
+ 1.2744735289059618e-57,
|
|
|
+ 6.372367644529809e-58,
|
|
|
+ 3.1861838222649046e-58,
|
|
|
+ 1.5930919111324523e-58,
|
|
|
+ 7.965459555662261e-59,
|
|
|
+ 3.982729777831131e-59,
|
|
|
+ 1.9913648889155653e-59,
|
|
|
+ 9.956824444577827e-60,
|
|
|
+ 4.9784122222889134e-60,
|
|
|
+ 2.4892061111444567e-60,
|
|
|
+ 1.2446030555722283e-60,
|
|
|
+ 6.223015277861142e-61,
|
|
|
+ 3.111507638930571e-61,
|
|
|
+ 1.5557538194652854e-61,
|
|
|
+ 7.778769097326427e-62,
|
|
|
+ 3.8893845486632136e-62,
|
|
|
+ 1.9446922743316068e-62,
|
|
|
+ 9.723461371658034e-63,
|
|
|
+ 4.861730685829017e-63,
|
|
|
+ 2.4308653429145085e-63,
|
|
|
+ 1.2154326714572542e-63,
|
|
|
+ 6.077163357286271e-64,
|
|
|
+ 3.0385816786431356e-64,
|
|
|
+ 1.5192908393215678e-64,
|
|
|
+ 7.596454196607839e-65,
|
|
|
+ 3.7982270983039195e-65,
|
|
|
+ 1.8991135491519597e-65,
|
|
|
+ 9.495567745759799e-66,
|
|
|
+ 4.7477838728798994e-66,
|
|
|
+ 2.3738919364399497e-66,
|
|
|
+ 1.1869459682199748e-66,
|
|
|
+ 5.934729841099874e-67,
|
|
|
+ 2.967364920549937e-67,
|
|
|
+ 1.4836824602749686e-67,
|
|
|
+ 7.418412301374843e-68,
|
|
|
+ 3.7092061506874214e-68,
|
|
|
+ 1.8546030753437107e-68,
|
|
|
+ 9.273015376718553e-69,
|
|
|
+ 4.636507688359277e-69,
|
|
|
+ 2.3182538441796384e-69,
|
|
|
+ 1.1591269220898192e-69,
|
|
|
+ 5.795634610449096e-70,
|
|
|
+ 2.897817305224548e-70,
|
|
|
+ 1.448908652612274e-70,
|
|
|
+ 7.24454326306137e-71,
|
|
|
+ 3.622271631530685e-71,
|
|
|
+ 1.8111358157653425e-71,
|
|
|
+ 9.055679078826712e-72,
|
|
|
+ 4.527839539413356e-72,
|
|
|
+ 2.263919769706678e-72,
|
|
|
+ 1.131959884853339e-72,
|
|
|
+ 5.659799424266695e-73,
|
|
|
+ 2.8298997121333476e-73,
|
|
|
+ 1.4149498560666738e-73,
|
|
|
+ 7.074749280333369e-74,
|
|
|
+ 3.5373746401666845e-74,
|
|
|
+ 1.7686873200833423e-74,
|
|
|
+ 8.843436600416711e-75,
|
|
|
+ 4.421718300208356e-75,
|
|
|
+ 2.210859150104178e-75,
|
|
|
+ 1.105429575052089e-75,
|
|
|
+ 5.527147875260445e-76,
|
|
|
+ 2.7635739376302223e-76,
|
|
|
+ 1.3817869688151111e-76,
|
|
|
+ 6.908934844075556e-77,
|
|
|
+ 3.454467422037778e-77,
|
|
|
+ 1.727233711018889e-77,
|
|
|
+];
|
|
|
+
|
|
|
#[derive(Clone)]
|
|
|
pub struct HyperLogLog<const N: usize> {
|
|
|
registers: Vec<u8>,
|
|
@@ -4048,10 +4307,12 @@ impl<const N: usize> Default for HyperLogLog<N> {
|
|
|
}
|
|
|
|
|
|
impl<const N: usize> HyperLogLog<N> {
|
|
|
+ #[inline]
|
|
|
fn hash(item: u64) -> u64 {
|
|
|
item.wrapping_mul(11400714819323198549)
|
|
|
}
|
|
|
|
|
|
+ #[inline]
|
|
|
fn am(&self) -> f64 {
|
|
|
let m = self.registers.len();
|
|
|
|
|
@@ -4066,6 +4327,7 @@ impl<const N: usize> HyperLogLog<N> {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+ #[inline]
|
|
|
fn b(&self) -> usize {
|
|
|
(N as f64).log2() as usize
|
|
|
}
|
|
@@ -4086,9 +4348,11 @@ impl<const N: usize> HyperLogLog<N> {
|
|
|
pub fn estimate_bias(&self, e: f64, b: usize) -> f64 {
|
|
|
// binary search first nearest neighbor
|
|
|
let lookup_array = RAW_ESTIMATE_DATA_VEC[b - 1 - RAW_ESTIMATE_DATA_OFFSET];
|
|
|
+
|
|
|
let mut idx_left = match lookup_array.binary_search_by(|v| v.partial_cmp(&e).unwrap()) {
|
|
|
- Ok(i) => Some(i), // exact match
|
|
|
- Err(i) => Some(i), // no match, i points to left neighbor
|
|
|
+ Ok(i) => Some(i), // exact match
|
|
|
+ Err(i) if i == lookup_array.len() => Some(i - 1), // no match, i points to end of array
|
|
|
+ Err(i) => Some(i), // no match, i points to left neighbor
|
|
|
};
|
|
|
|
|
|
let mut idx_right = match idx_left {
|
|
@@ -4105,8 +4369,8 @@ impl<const N: usize> HyperLogLog<N> {
|
|
|
// collect k nearest neighbors
|
|
|
const K: usize = 6;
|
|
|
debug_assert!(lookup_array.len() >= K);
|
|
|
- let mut neighbors = Vec::with_capacity(K);
|
|
|
- for _ in 0..K {
|
|
|
+ let mut neighbors = [0; K];
|
|
|
+ for neighbor in neighbors.iter_mut() {
|
|
|
let (right_instead_left, idx) = match (idx_left, idx_right) {
|
|
|
(Some(i_left), Some(i_right)) => {
|
|
|
// 2 candidates, find better one
|
|
@@ -4128,7 +4392,7 @@ impl<const N: usize> HyperLogLog<N> {
|
|
|
}
|
|
|
_ => panic!("neighborhood search failed, this is bug!"),
|
|
|
};
|
|
|
- neighbors.push(idx);
|
|
|
+ *neighbor = idx;
|
|
|
if right_instead_left {
|
|
|
idx_right = if idx < lookup_array.len() - 1 {
|
|
|
Some(idx + 1)
|
|
@@ -4141,7 +4405,8 @@ impl<const N: usize> HyperLogLog<N> {
|
|
|
}
|
|
|
|
|
|
// calculate mean of neighbors
|
|
|
- let bias_data = BIAS_DATA_VEC[b - BIAS_DATA_OFFSET];
|
|
|
+ let bias_data = BIAS_DATA_VEC[b - 1 - BIAS_DATA_OFFSET];
|
|
|
+
|
|
|
neighbors.iter().map(|&i| bias_data[i]).sum::<f64>() / (K as f64)
|
|
|
}
|
|
|
|
|
@@ -4159,12 +4424,13 @@ impl<const N: usize> HyperLogLog<N> {
|
|
|
let m = self.registers.len() as f64;
|
|
|
let b = self.b();
|
|
|
|
|
|
- let z = 1f64
|
|
|
- / self
|
|
|
- .registers
|
|
|
- .iter()
|
|
|
- .map(|&val| 2_f64.powi(-(i32::from(val))))
|
|
|
- .sum::<f64>();
|
|
|
+ let sum: f64 = self
|
|
|
+ .registers
|
|
|
+ .iter()
|
|
|
+ .map(|&val| ONE_OVER_POWER_OF_TWO[val as usize])
|
|
|
+ .sum();
|
|
|
+
|
|
|
+ let z = 1f64 / sum;
|
|
|
|
|
|
let e = self.am() * m.powi(2) * z;
|
|
|
|
|
@@ -4222,6 +4488,21 @@ mod tests {
|
|
|
|
|
|
assert!(set.size() > lower_bound && set.size() < upper_bound);
|
|
|
}
|
|
|
+ #[test]
|
|
|
+ fn many_different_sizes() {
|
|
|
+ let mut set: HyperLogLog<128> = HyperLogLog::default();
|
|
|
+
|
|
|
+ for item in 0..10_000 {
|
|
|
+ set.add(item);
|
|
|
+ set.size();
|
|
|
+ }
|
|
|
+
|
|
|
+ let delta = (set.relative_error() * (set.size() as f64)) as usize;
|
|
|
+ let lower_bound = set.size() - delta;
|
|
|
+ let upper_bound = set.size() + delta;
|
|
|
+
|
|
|
+ assert!(set.size() > lower_bound && set.size() < upper_bound);
|
|
|
+ }
|
|
|
|
|
|
#[test]
|
|
|
fn merge() {
|