Generator.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447
  1. /*
  2. * Copyright (c) 2023, Martin Janiczek <martin@janiczek.cz>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #pragma once
  7. #include <LibTest/Macros.h>
  8. #include <LibTest/Randomized/RandomRun.h>
  9. #include <AK/Function.h>
  10. #include <AK/Random.h>
  11. #include <AK/String.h>
  12. #include <AK/StringView.h>
  13. #include <AK/Tuple.h>
  14. #include <math.h>
  15. namespace Test {
  16. namespace Randomized {
  17. // Returns a random double value in range 0..1.
  18. // This is not a generator. It is meant to be used inside RandomnessSource::draw_value().
  19. // Based on: https://dotat.at/@/2023-06-23-random-double.html
  20. inline f64 get_random_probability()
  21. {
  22. return static_cast<f64>(AK::get_random<u64>() >> 11) * 0x1.0p-53;
  23. }
  24. // Generators take random bits from the RandomnessSource and return a value
  25. // back.
  26. //
  27. // Example:
  28. // - Gen::number_u64(5,10) --> 9, 7, 5, 10, 8, ...
  29. namespace Gen {
  30. // An unsigned integer generator.
  31. //
  32. // The minimum value will always be 0.
  33. // The maximum value is given by user in the argument.
  34. //
  35. // Gen::number_u64(10) -> value 5, RandomRun [5]
  36. // -> value 8, RandomRun [8]
  37. // etc.
  38. //
  39. // Shrinks towards 0.
  40. inline u64 number_u64(u64 max)
  41. {
  42. if (max == 0)
  43. return 0;
  44. u64 random = Test::randomness_source().draw_value(max, [&]() {
  45. // `clamp` to guard against integer overflow
  46. u64 exclusive_bound = AK::clamp(max + 1, max, NumericLimits<u64>::max());
  47. return AK::get_random_uniform_64(exclusive_bound);
  48. });
  49. return random;
  50. }
  51. // An unsigned integer generator in a particular range.
  52. //
  53. // Gen::number_u64(3,10) -> value 3, RandomRun [0]
  54. // -> value 8, RandomRun [5]
  55. // -> value 10, RandomRun [7]
  56. // etc.
  57. //
  58. // In case `min == max`, the RandomRun footprint will be smaller: no randomness
  59. // is needed.
  60. //
  61. // Gen::number_u64(3,3) -> value 3, RandomRun [] (always)
  62. //
  63. // Shrinks towards the minimum.
  64. inline u64 number_u64(u64 min, u64 max)
  65. {
  66. VERIFY(max >= min);
  67. return number_u64(max - min) + min;
  68. }
  69. // Randomly (uniformly) selects a value out of the given arguments.
  70. //
  71. // Gen::one_of(20,5,10) --> value 20, RandomRun [0]
  72. // --> value 5, RandomRun [1]
  73. // --> value 10, RandomRun [2]
  74. //
  75. // Shrinks towards the earlier arguments (above, towards 20).
  76. template<typename... Ts>
  77. requires(sizeof...(Ts) > 0)
  78. CommonType<Ts...> one_of(Ts... choices)
  79. {
  80. Vector<CommonType<Ts...>> choices_vec { choices... };
  81. constexpr size_t count = sizeof...(choices);
  82. size_t i = number_u64(count - 1);
  83. return choices_vec[i];
  84. }
  85. template<typename T>
  86. struct Choice {
  87. i32 weight;
  88. T value;
  89. };
  90. // Workaround for clang bug fixed in clang 17
  91. template<typename T>
  92. Choice(i32, T) -> Choice<T>;
  93. // Randomly (uniformly) selects a value out of the given weighted arguments.
  94. //
  95. // Gen::frequency(
  96. // Gen::Choice {5,999},
  97. // Gen::Choice {1,111},
  98. // )
  99. // --> value 999 (5 out of 6 times), RandomRun [0]
  100. // --> value 111 (1 out of 6 times), RandomRun [1]
  101. //
  102. // Shrinks towards the earlier arguments (above, towards 'x').
  103. template<typename... Ts>
  104. requires(sizeof...(Ts) > 0)
  105. CommonType<Ts...> frequency(Choice<Ts>... choices)
  106. {
  107. Vector<Choice<CommonType<Ts...>>> choices_vec { choices... };
  108. u64 sum = 0;
  109. for (auto const& choice : choices_vec) {
  110. VERIFY(choice.weight > 0);
  111. sum += static_cast<u64>(choice.weight);
  112. }
  113. u64 target = number_u64(sum);
  114. size_t i = 0;
  115. for (auto const& choice : choices_vec) {
  116. u64 weight = static_cast<u64>(choice.weight);
  117. if (weight >= target) {
  118. return choice.value;
  119. }
  120. target -= weight;
  121. ++i;
  122. }
  123. return choices_vec[i - 1].value;
  124. }
  125. // An unsigned integer generator in the full u64 range.
  126. //
  127. // Prefers 8bit numbers, then 4bit, 16bit, 32bit and 64bit ones.
  128. // Around 11% of the time it tries edge cases like 0 and various NumericLimits::max().
  129. //
  130. // Gen::number_u64() -> value 3, RandomRun [0,3]
  131. // -> value 8, RandomRun [1,8]
  132. // -> value 100, RandomRun [2,100]
  133. // -> value 5, RandomRun [3,5]
  134. // -> value 255, RandomRun [4,1]
  135. // -> value 65535, RandomRun [4,2]
  136. // etc.
  137. //
  138. // Shrinks towards 0.
  139. inline u64 number_u64()
  140. {
  141. u64 bits = frequency(
  142. // weight, bits
  143. Choice { 4, 4 },
  144. Choice { 8, 8 },
  145. Choice { 2, 16 },
  146. Choice { 1, 32 },
  147. Choice { 1, 64 },
  148. Choice { 2, 0 });
  149. // The special cases go last as they can be the most extreme (large) values.
  150. if (bits == 0) {
  151. // Special cases, eg. max integers for u8, u16, u32, u64.
  152. return one_of(
  153. 0U,
  154. NumericLimits<u8>::max(),
  155. NumericLimits<u16>::max(),
  156. NumericLimits<u32>::max(),
  157. NumericLimits<u64>::max());
  158. }
  159. u64 max = bits == 64
  160. ? NumericLimits<u64>::max()
  161. : ((u64)1 << bits) - 1;
  162. return number_u64(max);
  163. }
  164. // A generator returning `true` with the given `probability` (0..1).
  165. //
  166. // If probability <= 0, doesn't use any randomness and returns false.
  167. // If probability >= 1, doesn't use any randomness and returns true.
  168. //
  169. // In general case:
  170. // Gen::weighted_boolean(0.75)
  171. // -> value false, RandomRun [0]
  172. // -> value true, RandomRun [1]
  173. //
  174. // Shrinks towards false.
  175. inline bool weighted_boolean(f64 probability)
  176. {
  177. if (probability <= 0)
  178. return false;
  179. if (probability >= 1)
  180. return true;
  181. u64 random_int = Test::randomness_source().draw_value(1, [&]() {
  182. f64 drawn_probability = get_random_probability();
  183. return drawn_probability <= probability ? 1 : 0;
  184. });
  185. bool random_bool = random_int == 1;
  186. return random_bool;
  187. }
  188. // A (fair) boolean generator.
  189. //
  190. // Gen::boolean()
  191. // -> value false, RandomRun [0]
  192. // -> value true, RandomRun [1]
  193. //
  194. // Shrinks towards false.
  195. inline bool boolean()
  196. {
  197. return weighted_boolean(0.5);
  198. }
  199. // A vector generator of a random length between the given limits.
  200. //
  201. // Gen::vector(2,3,[]() { return Gen::number_u64(5); })
  202. // -> value [1,5], RandomRun [1,1,1,5,0]
  203. // -> value [1,5,0], RandomRun [1,1,1,5,1,0,0]
  204. // etc.
  205. //
  206. // In case `min == max`, the RandomRun footprint will be smaller, as there will
  207. // be no randomness involved in figuring out the length:
  208. //
  209. // Gen::vector(3,3,[]() { return Gen::number_u64(5); })
  210. // -> value [1,3], RandomRun [1,3]
  211. // -> value [5,2], RandomRun [5,2]
  212. // etc.
  213. //
  214. // Shrinks towards shorter vectors, with simpler elements inside.
  215. template<typename Fn>
  216. inline Vector<InvokeResult<Fn>> vector(size_t min, size_t max, Fn item_gen)
  217. {
  218. VERIFY(max >= min);
  219. size_t size = 0;
  220. Vector<InvokeResult<Fn>> acc;
  221. // Special case: no randomness for the boolean
  222. if (min == max) {
  223. while (size < min) {
  224. acc.append(item_gen());
  225. ++size;
  226. }
  227. return acc;
  228. }
  229. // General case: before each item we "flip a coin" to decide whether to
  230. // generate another one.
  231. //
  232. // This algorithm is used instead of the more intuitive "generate length,
  233. // then generate that many items" algorithm, because it produces RandomRun
  234. // patterns that shrink more easily.
  235. //
  236. // See the Hypothesis paper [1], section 3.3, around the paragraph starting
  237. // with "More commonly".
  238. //
  239. // [1]: https://drops.dagstuhl.de/opus/volltexte/2020/13170/pdf/LIPIcs-ECOOP-2020-13.pdf
  240. while (size < min) {
  241. acc.append(item_gen());
  242. ++size;
  243. }
  244. f64 average = static_cast<f64>(min + max) / 2.0;
  245. VERIFY(average > 0);
  246. // A geometric distribution: https://en.wikipedia.org/wiki/Geometric_distribution#Moments_and_cumulants
  247. // The below derives from the E(X) = 1/p formula.
  248. //
  249. // We need to flip the `p` to `1-p` as our success ("another item!") is
  250. // a "failure" in the geometric distribution's interpretation ("we fail X
  251. // times before succeeding the first time").
  252. //
  253. // That gives us `1 - 1/p`. Then, E(X) also contains the final success, so we
  254. // need to say `1 + average` instead of `average`, as it will mean "our X
  255. // items + the final failure that stops the process".
  256. f64 probability = 1.0 - 1.0 / (1.0 + average);
  257. while (size < max) {
  258. if (weighted_boolean(probability)) {
  259. acc.append(item_gen());
  260. ++size;
  261. } else {
  262. break;
  263. }
  264. }
  265. return acc;
  266. }
  267. // A vector generator of a given length.
  268. //
  269. // Gen::vector_of_length(3,[]() { return Gen::number_u64(5); })
  270. // -> value [1,5,0], RandomRun [1,1,1,5,1,0,0]
  271. // -> value [2,9,3], RandomRun [1,2,1,9,1,3,0]
  272. // etc.
  273. //
  274. // Shrinks towards shorter vectors, with simpler elements inside.
  275. template<typename Fn>
  276. inline Vector<InvokeResult<Fn>> vector(size_t length, Fn item_gen)
  277. {
  278. return vector(length, length, item_gen);
  279. }
  280. // A vector generator of a random length between 0 and 32 elements.
  281. //
  282. // If you need a different length, use vector(max,item_gen) or
  283. // vector(min,max,item_gen).
  284. //
  285. // Gen::vector([]() { return Gen::number_u64(5); })
  286. // -> value [], RandomRun [0]
  287. // -> value [1], RandomRun [1,1,0]
  288. // -> value [1,5], RandomRun [1,1,1,5,0]
  289. // -> value [1,5,0], RandomRun [1,1,1,5,1,0,0]
  290. // -> value [1,5,0,2], RandomRun [1,1,1,5,1,0,1,2,0]
  291. // etc.
  292. //
  293. // Shrinks towards shorter vectors, with simpler elements inside.
  294. template<typename Fn>
  295. inline Vector<InvokeResult<Fn>> vector(Fn item_gen)
  296. {
  297. return vector(0, 32, item_gen);
  298. }
  299. // A double generator in the [0,1) range.
  300. //
  301. // RandomRun footprint: a single number.
  302. //
  303. // Shrinks towards 0.
  304. //
  305. // Based on: https://dotat.at/@/2023-06-23-random-double.html
  306. inline f64 percentage()
  307. {
  308. return static_cast<f64>(number_u64() >> 11) * 0x1.0p-53;
  309. }
  310. // An internal double generator. This one won't make any attempt to shrink nicely.
  311. // Test writers should use number_f64(f64 min, f64 max) instead.
  312. inline f64 number_f64_scaled(f64 min, f64 max)
  313. {
  314. VERIFY(max >= min);
  315. if (min == max)
  316. return min;
  317. f64 p = percentage();
  318. return min * (1.0 - p) + max * p;
  319. }
  320. inline f64 number_f64(f64 min, f64 max)
  321. {
  322. // FIXME: after we figure out how to use frequency() with lambdas,
  323. // do edge cases and nicely shrinking float generators here
  324. return number_f64_scaled(min, max);
  325. }
  326. inline f64 number_f64()
  327. {
  328. // FIXME: this could be much nicer to the user, at the expense of code complexity
  329. // We could follow Hypothesis' lead and remap integers 0..MAXINT to _simple_
  330. // floats rather than small floats. Meaning, we would like to prefer integers
  331. // over floats with decimal digits, positive numbers over negative numbers etc.
  332. // As a result, users would get failures with floats like 0, 1, or 0.5 instead of
  333. // ones like 1.175494e-38.
  334. // Check the doc comment in Hypothesis: https://github.com/HypothesisWorks/hypothesis/blob/master/hypothesis-python/src/hypothesis/internal/conjecture/floats.py
  335. return number_f64(NumericLimits<f64>::lowest(), NumericLimits<f64>::max());
  336. }
  337. // A double generator.
  338. //
  339. // The minimum value will always be NumericLimits<f64>::lowest().
  340. // The maximum value is given by user in the argument.
  341. //
  342. // Prefers positive numbers, then negative numbers, then edge cases.
  343. //
  344. // Shrinks towards 0.
  345. inline f64 number_f64(f64 max)
  346. {
  347. // FIXME: after we figure out how to use frequency() with lambdas,
  348. // do edge cases and nicely shrinking float generators here
  349. return number_f64_scaled(NumericLimits<f64>::lowest(), max);
  350. }
  351. // TODO
  352. inline u32 number_u32(u32 max)
  353. {
  354. if (max == 0)
  355. return 0;
  356. u32 random = Test::randomness_source().draw_value(max, [&]() {
  357. // `clamp` to guard against integer overflow
  358. u32 exclusive_bound = AK::clamp(max + 1, max, NumericLimits<u32>::max());
  359. return AK::get_random_uniform(exclusive_bound);
  360. });
  361. return random;
  362. }
  363. // TODO
  364. inline u32 number_u32(u32 min, u32 max)
  365. {
  366. VERIFY(max >= min);
  367. return number_u32(max - min) + min;
  368. }
  369. // TODO
  370. inline u32 number_u32()
  371. {
  372. u32 bits = frequency(
  373. // weight, bits
  374. Choice { 4, 4 },
  375. Choice { 8, 8 },
  376. Choice { 2, 16 },
  377. Choice { 1, 32 },
  378. Choice { 1, 64 },
  379. Choice { 2, 0 });
  380. // The special cases go last as they can be the most extreme (large) values.
  381. if (bits == 0) {
  382. // Special cases, eg. max integers for u8, u16, u32.
  383. return one_of(
  384. 0U,
  385. NumericLimits<u8>::max(),
  386. NumericLimits<u16>::max(),
  387. NumericLimits<u32>::max());
  388. }
  389. u32 max = bits == 32
  390. ? NumericLimits<u32>::max()
  391. : ((u32)1 << bits) - 1;
  392. return number_u32(max);
  393. }
  394. } // namespace Gen
  395. } // namespace Randomized
  396. } // namespace Test