Huffman.h 2.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889
  1. /*
  2. * Copyright (c) 2024, the SerenityOS developers.
  3. * Copyright (c) 2021, Idan Horowitz <idan.horowitz@serenityos.org>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #pragma once
  8. #include <AK/Array.h>
  9. #include <AK/BinaryHeap.h>
  10. namespace Compress {
  11. template<size_t Size>
  12. void generate_huffman_lengths(Array<u8, Size>& lengths, Array<u16, Size> const& frequencies, size_t max_bit_length, u16 frequency_cap = UINT16_MAX)
  13. {
  14. VERIFY((1u << max_bit_length) >= Size);
  15. u16 heap_keys[Size]; // Used for O(n) heap construction
  16. u16 heap_values[Size];
  17. u16 huffman_links[Size * 2] = { 0 };
  18. size_t non_zero_freqs = 0;
  19. for (size_t i = 0; i < Size; i++) {
  20. auto frequency = frequencies[i];
  21. if (frequency == 0)
  22. continue;
  23. if (frequency > frequency_cap) {
  24. frequency = frequency_cap;
  25. }
  26. heap_keys[non_zero_freqs] = frequency; // sort symbols by frequency
  27. heap_values[non_zero_freqs] = Size + non_zero_freqs; // huffman_links "links"
  28. non_zero_freqs++;
  29. }
  30. // special case for only 1 used symbol
  31. if (non_zero_freqs < 2) {
  32. for (size_t i = 0; i < Size; i++)
  33. lengths[i] = (frequencies[i] == 0) ? 0 : 1;
  34. return;
  35. }
  36. BinaryHeap<u16, u16, Size> heap { heap_keys, heap_values, non_zero_freqs };
  37. // build the huffman tree - binary heap is used for efficient frequency comparisons
  38. while (heap.size() > 1) {
  39. u16 lowest_frequency = heap.peek_min_key();
  40. u16 lowest_link = heap.pop_min();
  41. u16 second_lowest_frequency = heap.peek_min_key();
  42. u16 second_lowest_link = heap.pop_min();
  43. u16 new_link = heap.size() + 1;
  44. u32 sum = lowest_frequency + second_lowest_frequency;
  45. sum = min(sum, UINT16_MAX);
  46. heap.insert(sum, new_link);
  47. huffman_links[lowest_link] = new_link;
  48. huffman_links[second_lowest_link] = new_link;
  49. }
  50. non_zero_freqs = 0;
  51. for (size_t i = 0; i < Size; i++) {
  52. if (frequencies[i] == 0) {
  53. lengths[i] = 0;
  54. continue;
  55. }
  56. u16 link = huffman_links[Size + non_zero_freqs];
  57. non_zero_freqs++;
  58. size_t bit_length = 1;
  59. while (link != 1) {
  60. bit_length++;
  61. link = huffman_links[link];
  62. }
  63. if (bit_length > max_bit_length) {
  64. VERIFY(frequency_cap != 1);
  65. return generate_huffman_lengths(lengths, frequencies, max_bit_length, frequency_cap / 2);
  66. }
  67. lengths[i] = bit_length;
  68. }
  69. }
  70. }