mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-22 23:50:19 +00:00
1a9d8e8fbe
Deflate and WebP can store at most 15 bits per symbol, meaning their huffman trees can be at most 15 levels deep. During construction, when we hit this level, we used to try again with an ever lower frequency cap per symbol. This had the effect of giving the symbols with the highest frequency lower frequencies first, causing the most-frequent symbols to be merged. For example, maybe the most-frequent symbol had 1 bit, and the 2nd-frequent two bits (and everything else at least 3). With the cap, the two most frequent symbols might both have 2 symbols, freeing up bits for the lower levels of the tree. This has the effect of making the most-frequent symbols longer at first, which isn't great for file size. Instead of using a frequency cap, ignore ever more of the low bits of the frequency. This sacrifices resolution where it hurts the lower levels of the tree first, and those are stored less frequently. For deflate, the 64 kiB block size means this doesn't have a big effect, but for WebP it can have a big effect: sunset-retro.png (876K): 2.02M -> 1.73M -- now (very slightly) smaller than twice the input size! Maybe we'll be competitive one day. (For wow.webp and 7z7c.webp, it has no effect, since we don't hit the "tree too deep" case there, since those have relatively few colors.) No behavior change other than smaller file size. (No performance cost either, and it's less code too.)
87 lines
2.4 KiB
C++
87 lines
2.4 KiB
C++
/*
|
|
* Copyright (c) 2024, the SerenityOS developers.
|
|
* Copyright (c) 2021, Idan Horowitz <idan.horowitz@serenityos.org>
|
|
*
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include <AK/Array.h>
|
|
#include <AK/BinaryHeap.h>
|
|
|
|
namespace Compress {
|
|
|
|
template<size_t Size>
|
|
void generate_huffman_lengths(Array<u8, Size>& lengths, Array<u16, Size> const& frequencies, size_t max_bit_length, u16 shift = 0)
|
|
{
|
|
VERIFY((1u << max_bit_length) >= Size);
|
|
u16 heap_keys[Size]; // Used for O(n) heap construction
|
|
u16 heap_values[Size];
|
|
|
|
u16 huffman_links[Size * 2] = { 0 };
|
|
size_t non_zero_freqs = 0;
|
|
for (size_t i = 0; i < Size; i++) {
|
|
auto frequency = frequencies[i];
|
|
if (frequency == 0)
|
|
continue;
|
|
|
|
frequency = max(1, frequency >> shift);
|
|
|
|
heap_keys[non_zero_freqs] = frequency; // sort symbols by frequency
|
|
heap_values[non_zero_freqs] = Size + non_zero_freqs; // huffman_links "links"
|
|
non_zero_freqs++;
|
|
}
|
|
|
|
// special case for only 1 used symbol
|
|
if (non_zero_freqs < 2) {
|
|
for (size_t i = 0; i < Size; i++)
|
|
lengths[i] = (frequencies[i] == 0) ? 0 : 1;
|
|
return;
|
|
}
|
|
|
|
BinaryHeap<u16, u16, Size> heap { heap_keys, heap_values, non_zero_freqs };
|
|
|
|
// build the huffman tree - binary heap is used for efficient frequency comparisons
|
|
while (heap.size() > 1) {
|
|
u16 lowest_frequency = heap.peek_min_key();
|
|
u16 lowest_link = heap.pop_min();
|
|
u16 second_lowest_frequency = heap.peek_min_key();
|
|
u16 second_lowest_link = heap.pop_min();
|
|
|
|
u16 new_link = heap.size() + 1;
|
|
|
|
u32 sum = lowest_frequency + second_lowest_frequency;
|
|
sum = min(sum, UINT16_MAX);
|
|
heap.insert(sum, new_link);
|
|
|
|
huffman_links[lowest_link] = new_link;
|
|
huffman_links[second_lowest_link] = new_link;
|
|
}
|
|
|
|
non_zero_freqs = 0;
|
|
for (size_t i = 0; i < Size; i++) {
|
|
if (frequencies[i] == 0) {
|
|
lengths[i] = 0;
|
|
continue;
|
|
}
|
|
|
|
u16 link = huffman_links[Size + non_zero_freqs];
|
|
non_zero_freqs++;
|
|
|
|
size_t bit_length = 1;
|
|
while (link != 1) {
|
|
bit_length++;
|
|
link = huffman_links[link];
|
|
}
|
|
|
|
if (bit_length > max_bit_length) {
|
|
VERIFY(shift < 15);
|
|
return generate_huffman_lengths(lengths, frequencies, max_bit_length, shift + 1);
|
|
}
|
|
|
|
lengths[i] = bit_length;
|
|
}
|
|
}
|
|
|
|
}
|