2020-08-01 20:01:39 +00:00
/*
* Copyright ( c ) 2020 , the SerenityOS developers
2021-04-22 20:40:43 +00:00
* Copyright ( c ) 2021 , Idan Horowitz < idan . horowitz @ serenityos . org >
2020-08-01 20:01:39 +00:00
*
2021-04-22 08:24:48 +00:00
* SPDX - License - Identifier : BSD - 2 - Clause
2020-08-01 20:01:39 +00:00
*/
2020-09-06 19:40:46 +00:00
# include <AK/Array.h>
2020-08-01 20:01:39 +00:00
# include <AK/Assertions.h>
2021-03-12 23:17:18 +00:00
# include <AK/BinaryHeap.h>
2020-08-26 12:22:25 +00:00
# include <AK/BinarySearch.h>
2020-09-01 09:43:32 +00:00
# include <AK/MemoryStream.h>
2021-03-12 23:17:18 +00:00
# include <string.h>
2020-08-26 12:22:25 +00:00
2020-08-01 20:01:39 +00:00
# include <LibCompress/Deflate.h>
namespace Compress {
2020-09-10 12:06:50 +00:00
const CanonicalCode & CanonicalCode : : fixed_literal_codes ( )
2020-08-26 12:22:25 +00:00
{
2020-08-31 09:41:44 +00:00
static CanonicalCode code ;
static bool initialized = false ;
2020-08-26 12:22:25 +00:00
2020-08-31 09:41:44 +00:00
if ( initialized )
return code ;
2020-08-26 12:22:25 +00:00
2021-03-12 23:17:18 +00:00
code = CanonicalCode : : from_bytes ( fixed_literal_bit_lengths ) . value ( ) ;
2020-08-31 09:41:44 +00:00
initialized = true ;
return code ;
2020-08-01 20:01:39 +00:00
}
2020-09-10 12:06:50 +00:00
const CanonicalCode & CanonicalCode : : fixed_distance_codes ( )
2020-08-01 20:01:39 +00:00
{
2020-08-31 09:41:44 +00:00
static CanonicalCode code ;
static bool initialized = false ;
2020-08-01 20:01:39 +00:00
2020-08-31 09:41:44 +00:00
if ( initialized )
return code ;
2020-08-01 20:01:39 +00:00
2021-03-12 23:17:18 +00:00
code = CanonicalCode : : from_bytes ( fixed_distance_bit_lengths ) . value ( ) ;
2020-08-31 09:41:44 +00:00
initialized = true ;
return code ;
}
2020-09-10 12:06:50 +00:00
Optional < CanonicalCode > CanonicalCode : : from_bytes ( ReadonlyBytes bytes )
2020-08-31 09:41:44 +00:00
{
// FIXME: I can't quite follow the algorithm here, but it seems to work.
CanonicalCode code ;
2021-03-12 23:17:18 +00:00
auto non_zero_symbols = 0 ;
auto last_non_zero = - 1 ;
for ( size_t i = 0 ; i < bytes . size ( ) ; i + + ) {
if ( bytes [ i ] ! = 0 ) {
non_zero_symbols + + ;
last_non_zero = i ;
}
}
if ( non_zero_symbols = = 1 ) { // special case - only 1 symbol
code . m_symbol_codes . append ( 0b10 ) ;
code . m_symbol_values . append ( last_non_zero ) ;
code . m_bit_codes [ last_non_zero ] = 0 ;
code . m_bit_code_lengths [ last_non_zero ] = 1 ;
return code ;
}
2020-08-31 09:41:44 +00:00
auto next_code = 0 ;
for ( size_t code_length = 1 ; code_length < = 15 ; + + code_length ) {
next_code < < = 1 ;
auto start_bit = 1 < < code_length ;
for ( size_t symbol = 0 ; symbol < bytes . size ( ) ; + + symbol ) {
if ( bytes [ symbol ] ! = code_length )
continue ;
if ( next_code > start_bit )
return { } ;
code . m_symbol_codes . append ( start_bit | next_code ) ;
code . m_symbol_values . append ( symbol ) ;
2021-03-12 23:17:18 +00:00
code . m_bit_codes [ symbol ] = fast_reverse16 ( start_bit | next_code , code_length ) ; // DEFLATE writes huffman encoded symbols as lsb-first
code . m_bit_code_lengths [ symbol ] = code_length ;
2020-08-31 09:41:44 +00:00
next_code + + ;
}
}
if ( next_code ! = ( 1 < < 15 ) ) {
return { } ;
}
return code ;
2020-08-26 12:22:25 +00:00
}
2020-09-10 12:06:50 +00:00
u32 CanonicalCode : : read_symbol ( InputBitStream & stream ) const
2020-08-26 12:22:25 +00:00
{
u32 code_bits = 1 ;
for ( ; ; ) {
code_bits = code_bits < < 1 | stream . read_bits ( 1 ) ;
2021-03-16 18:25:58 +00:00
if ( code_bits > = ( 1 < < 16 ) )
return UINT32_MAX ; // the maximum symbol in deflate is 288, so we use UINT32_MAX (an impossible value) to indicate an error
2020-08-01 20:01:39 +00:00
2021-01-22 16:42:36 +00:00
// FIXME: This is very inefficient and could greatly be improved by implementing this
2021-01-01 20:32:59 +00:00
// algorithm: https://www.hanshq.net/zip.html#huffdec
2020-08-26 12:22:25 +00:00
size_t index ;
2021-02-25 20:10:47 +00:00
if ( binary_search ( m_symbol_codes . span ( ) , code_bits , & index ) )
2020-08-26 12:22:25 +00:00
return m_symbol_values [ index ] ;
2020-08-01 20:01:39 +00:00
}
}
2021-03-12 23:17:18 +00:00
void CanonicalCode : : write_symbol ( OutputBitStream & stream , u32 symbol ) const
{
stream . write_bits ( m_bit_codes [ symbol ] , m_bit_code_lengths [ symbol ] ) ;
}
2020-08-26 12:22:25 +00:00
DeflateDecompressor : : CompressedBlock : : CompressedBlock ( DeflateDecompressor & decompressor , CanonicalCode literal_codes , Optional < CanonicalCode > distance_codes )
: m_decompressor ( decompressor )
, m_literal_codes ( literal_codes )
, m_distance_codes ( distance_codes )
2020-08-01 20:01:39 +00:00
{
}
2020-08-26 12:22:25 +00:00
bool DeflateDecompressor : : CompressedBlock : : try_read_more ( )
2020-08-01 20:01:39 +00:00
{
2020-08-26 12:22:25 +00:00
if ( m_eof = = true )
return false ;
const auto symbol = m_literal_codes . read_symbol ( m_decompressor . m_input_stream ) ;
2021-03-17 16:07:27 +00:00
if ( symbol > = 286 ) { // invalid deflate literal/length symbol
2021-03-16 18:25:58 +00:00
m_decompressor . set_fatal_error ( ) ;
return false ;
}
2020-08-26 12:22:25 +00:00
if ( symbol < 256 ) {
m_decompressor . m_output_stream < < static_cast < u8 > ( symbol ) ;
return true ;
} else if ( symbol = = 256 ) {
m_eof = true ;
return false ;
2020-08-01 20:01:39 +00:00
} else {
2020-09-05 15:38:46 +00:00
if ( ! m_distance_codes . has_value ( ) ) {
m_decompressor . set_fatal_error ( ) ;
return false ;
}
2020-08-26 12:22:25 +00:00
2020-09-08 15:46:19 +00:00
const auto length = m_decompressor . decode_length ( symbol ) ;
2021-03-16 18:25:58 +00:00
const auto distance_symbol = m_distance_codes . value ( ) . read_symbol ( m_decompressor . m_input_stream ) ;
2021-03-17 16:07:27 +00:00
if ( distance_symbol > = 30 ) { // invalid deflate distance symbol
2021-03-16 18:25:58 +00:00
m_decompressor . set_fatal_error ( ) ;
return false ;
}
const auto distance = m_decompressor . decode_distance ( distance_symbol ) ;
2020-08-26 12:22:25 +00:00
2020-09-08 15:46:19 +00:00
for ( size_t idx = 0 ; idx < length ; + + idx ) {
2020-09-05 15:38:46 +00:00
u8 byte = 0 ;
m_decompressor . m_output_stream . read ( { & byte , sizeof ( byte ) } , distance ) ;
2021-03-17 15:42:08 +00:00
if ( m_decompressor . m_output_stream . handle_any_error ( ) ) {
m_decompressor . set_fatal_error ( ) ;
return false ; // a back reference was requested that was too far back (outside our current sliding window)
}
2020-09-05 15:38:46 +00:00
m_decompressor . m_output_stream < < byte ;
2020-08-31 10:30:16 +00:00
}
2020-08-26 12:22:25 +00:00
return true ;
2020-08-01 20:01:39 +00:00
}
}
2020-08-26 12:22:25 +00:00
DeflateDecompressor : : UncompressedBlock : : UncompressedBlock ( DeflateDecompressor & decompressor , size_t length )
: m_decompressor ( decompressor )
, m_bytes_remaining ( length )
2020-08-01 20:01:39 +00:00
{
2020-08-26 12:22:25 +00:00
}
2020-08-01 20:01:39 +00:00
2020-08-26 12:22:25 +00:00
bool DeflateDecompressor : : UncompressedBlock : : try_read_more ( )
{
if ( m_bytes_remaining = = 0 )
return false ;
2020-08-01 20:01:39 +00:00
2020-08-26 12:22:25 +00:00
const auto nread = min ( m_bytes_remaining , m_decompressor . m_output_stream . remaining_contigous_space ( ) ) ;
m_bytes_remaining - = nread ;
2020-08-01 20:01:39 +00:00
2020-08-26 12:22:25 +00:00
m_decompressor . m_input_stream > > m_decompressor . m_output_stream . reserve_contigous_space ( nread ) ;
2020-08-01 20:01:39 +00:00
2020-08-26 12:22:25 +00:00
return true ;
}
2020-08-01 20:01:39 +00:00
2020-08-26 12:22:25 +00:00
DeflateDecompressor : : DeflateDecompressor ( InputStream & stream )
: m_input_stream ( stream )
{
2020-08-01 20:01:39 +00:00
}
2020-08-26 12:22:25 +00:00
DeflateDecompressor : : ~ DeflateDecompressor ( )
2020-08-01 20:01:39 +00:00
{
2020-08-26 12:22:25 +00:00
if ( m_state = = State : : ReadingCompressedBlock )
m_compressed_block . ~ CompressedBlock ( ) ;
if ( m_state = = State : : ReadingUncompressedBlock )
m_uncompressed_block . ~ UncompressedBlock ( ) ;
}
2020-08-01 20:01:39 +00:00
2020-08-26 12:22:25 +00:00
size_t DeflateDecompressor : : read ( Bytes bytes )
{
2021-03-19 14:20:00 +00:00
size_t total_read = 0 ;
while ( total_read < bytes . size ( ) ) {
if ( has_any_error ( ) )
break ;
2020-08-01 20:01:39 +00:00
2021-03-19 14:20:00 +00:00
auto slice = bytes . slice ( total_read ) ;
2020-08-01 20:01:39 +00:00
2021-03-19 14:20:00 +00:00
if ( m_state = = State : : Idle ) {
if ( m_read_final_bock )
break ;
2020-08-01 20:01:39 +00:00
2021-03-19 14:20:00 +00:00
m_read_final_bock = m_input_stream . read_bit ( ) ;
const auto block_type = m_input_stream . read_bits ( 2 ) ;
2020-08-01 20:01:39 +00:00
2021-03-16 16:28:01 +00:00
if ( m_input_stream . has_any_error ( ) ) {
set_fatal_error ( ) ;
2021-03-19 14:20:00 +00:00
break ;
2020-08-01 20:01:39 +00:00
}
2021-03-19 14:20:00 +00:00
if ( block_type = = 0b00 ) {
m_input_stream . align_to_byte_boundary ( ) ;
2020-08-01 20:01:39 +00:00
2021-03-19 14:20:00 +00:00
LittleEndian < u16 > length , negated_length ;
m_input_stream > > length > > negated_length ;
2020-08-01 20:01:39 +00:00
2021-03-19 14:20:00 +00:00
if ( m_input_stream . has_any_error ( ) ) {
set_fatal_error ( ) ;
break ;
}
2020-08-01 20:01:39 +00:00
2021-03-19 14:20:00 +00:00
if ( ( length ^ 0xffff ) ! = negated_length ) {
set_fatal_error ( ) ;
break ;
}
2020-08-01 20:01:39 +00:00
2021-03-19 14:20:00 +00:00
m_state = State : : ReadingUncompressedBlock ;
new ( & m_uncompressed_block ) UncompressedBlock ( * this , length ) ;
2020-08-31 09:33:18 +00:00
2021-03-19 14:20:00 +00:00
continue ;
2021-03-16 16:28:01 +00:00
}
2021-03-19 14:20:00 +00:00
if ( block_type = = 0b01 ) {
m_state = State : : ReadingCompressedBlock ;
new ( & m_compressed_block ) CompressedBlock ( * this , CanonicalCode : : fixed_literal_codes ( ) , CanonicalCode : : fixed_distance_codes ( ) ) ;
2020-08-01 20:01:39 +00:00
2021-03-19 14:20:00 +00:00
continue ;
}
2020-08-01 20:01:39 +00:00
2021-03-19 14:20:00 +00:00
if ( block_type = = 0b10 ) {
CanonicalCode literal_codes ;
Optional < CanonicalCode > distance_codes ;
decode_codes ( literal_codes , distance_codes ) ;
2020-08-01 20:01:39 +00:00
2021-03-19 14:20:00 +00:00
if ( m_input_stream . has_any_error ( ) ) {
set_fatal_error ( ) ;
break ;
}
2020-08-01 20:01:39 +00:00
2021-03-19 14:20:00 +00:00
m_state = State : : ReadingCompressedBlock ;
new ( & m_compressed_block ) CompressedBlock ( * this , literal_codes , distance_codes ) ;
continue ;
}
2020-08-01 20:01:39 +00:00
2021-03-16 16:28:01 +00:00
set_fatal_error ( ) ;
2021-03-19 14:20:00 +00:00
break ;
2021-03-16 16:28:01 +00:00
}
2021-03-19 14:20:00 +00:00
if ( m_state = = State : : ReadingCompressedBlock ) {
auto nread = m_output_stream . read ( slice ) ;
2020-08-01 20:01:39 +00:00
2021-03-19 14:20:00 +00:00
while ( nread < slice . size ( ) & & m_compressed_block . try_read_more ( ) ) {
nread + = m_output_stream . read ( slice . slice ( nread ) ) ;
}
2020-08-01 20:01:39 +00:00
2021-03-19 14:20:00 +00:00
if ( m_input_stream . has_any_error ( ) ) {
set_fatal_error ( ) ;
break ;
}
2020-08-01 20:01:39 +00:00
2021-03-19 14:20:00 +00:00
total_read + = nread ;
if ( nread = = slice . size ( ) )
break ;
2020-08-01 20:01:39 +00:00
2021-03-19 14:20:00 +00:00
m_compressed_block . ~ CompressedBlock ( ) ;
m_state = State : : Idle ;
2020-08-01 20:01:39 +00:00
2021-03-19 14:20:00 +00:00
continue ;
2021-03-16 16:28:01 +00:00
}
2021-03-19 14:20:00 +00:00
if ( m_state = = State : : ReadingUncompressedBlock ) {
auto nread = m_output_stream . read ( slice ) ;
2020-08-01 20:01:39 +00:00
2021-03-19 14:20:00 +00:00
while ( nread < slice . size ( ) & & m_uncompressed_block . try_read_more ( ) ) {
nread + = m_output_stream . read ( slice . slice ( nread ) ) ;
}
2020-08-01 20:01:39 +00:00
2021-03-19 14:20:00 +00:00
if ( m_input_stream . has_any_error ( ) ) {
set_fatal_error ( ) ;
break ;
}
2020-08-01 20:01:39 +00:00
2021-03-19 14:20:00 +00:00
total_read + = nread ;
if ( nread = = slice . size ( ) )
break ;
m_uncompressed_block . ~ UncompressedBlock ( ) ;
m_state = State : : Idle ;
continue ;
}
VERIFY_NOT_REACHED ( ) ;
}
return total_read ;
2020-08-01 20:01:39 +00:00
}
2020-08-26 12:22:25 +00:00
bool DeflateDecompressor : : read_or_error ( Bytes bytes )
2020-08-01 20:01:39 +00:00
{
2020-08-26 12:22:25 +00:00
if ( read ( bytes ) < bytes . size ( ) ) {
2020-08-31 11:12:15 +00:00
set_fatal_error ( ) ;
2020-08-26 12:22:25 +00:00
return false ;
2020-08-01 20:01:39 +00:00
}
2020-08-26 12:22:25 +00:00
return true ;
2020-08-01 20:01:39 +00:00
}
2020-08-26 12:22:25 +00:00
bool DeflateDecompressor : : discard_or_error ( size_t count )
2020-08-01 20:01:39 +00:00
{
2020-08-26 12:22:25 +00:00
u8 buffer [ 4096 ] ;
2020-08-01 20:01:39 +00:00
2020-08-26 12:22:25 +00:00
size_t ndiscarded = 0 ;
while ( ndiscarded < count ) {
2020-09-13 10:24:17 +00:00
if ( unreliable_eof ( ) ) {
2020-08-31 11:12:15 +00:00
set_fatal_error ( ) ;
2020-08-26 12:22:25 +00:00
return false ;
}
2020-08-01 20:01:39 +00:00
2020-08-26 12:22:25 +00:00
ndiscarded + = read ( { buffer , min < size_t > ( count - ndiscarded , 4096 ) } ) ;
2020-08-01 20:01:39 +00:00
}
2020-08-26 12:22:25 +00:00
return true ;
2020-08-01 20:01:39 +00:00
}
2020-09-13 10:24:17 +00:00
bool DeflateDecompressor : : unreliable_eof ( ) const { return m_state = = State : : Idle & & m_read_final_bock ; }
2020-08-01 20:01:39 +00:00
2021-03-15 15:20:20 +00:00
bool DeflateDecompressor : : handle_any_error ( )
{
2021-03-16 13:11:08 +00:00
bool handled_errors = m_input_stream . handle_any_error ( ) ;
return Stream : : handle_any_error ( ) | | handled_errors ;
2021-03-15 15:20:20 +00:00
}
2020-09-08 14:49:05 +00:00
Optional < ByteBuffer > DeflateDecompressor : : decompress_all ( ReadonlyBytes bytes )
2020-08-01 20:01:39 +00:00
{
2020-08-26 12:22:25 +00:00
InputMemoryStream memory_stream { bytes } ;
2020-09-08 15:46:19 +00:00
DeflateDecompressor deflate_stream { memory_stream } ;
2020-09-15 09:48:54 +00:00
DuplexMemoryStream output_stream ;
2020-08-01 20:01:39 +00:00
2020-09-07 17:56:06 +00:00
u8 buffer [ 4096 ] ;
2020-09-13 10:24:17 +00:00
while ( ! deflate_stream . has_any_error ( ) & & ! deflate_stream . unreliable_eof ( ) ) {
2020-09-07 17:56:06 +00:00
const auto nread = deflate_stream . read ( { buffer , sizeof ( buffer ) } ) ;
output_stream . write_or_error ( { buffer , nread } ) ;
2020-08-01 20:01:39 +00:00
}
2020-09-07 17:56:06 +00:00
if ( deflate_stream . handle_any_error ( ) )
return { } ;
return output_stream . copy_into_contiguous_buffer ( ) ;
2020-08-01 20:01:39 +00:00
}
2020-09-08 15:46:19 +00:00
u32 DeflateDecompressor : : decode_length ( u32 symbol )
2020-08-01 20:01:39 +00:00
{
2020-08-26 12:22:25 +00:00
// FIXME: I can't quite follow the algorithm here, but it seems to work.
2020-08-01 20:01:39 +00:00
2020-08-26 12:22:25 +00:00
if ( symbol < = 264 )
return symbol - 254 ;
2020-08-01 20:01:39 +00:00
2020-08-26 12:22:25 +00:00
if ( symbol < = 284 ) {
auto extra_bits = ( symbol - 261 ) / 4 ;
return ( ( ( symbol - 265 ) % 4 + 4 ) < < extra_bits ) + 3 + m_input_stream . read_bits ( extra_bits ) ;
2020-08-01 20:01:39 +00:00
}
2020-08-26 12:22:25 +00:00
if ( symbol = = 285 )
return 258 ;
2021-02-23 19:42:32 +00:00
VERIFY_NOT_REACHED ( ) ;
2020-08-01 20:01:39 +00:00
}
2020-08-26 12:22:25 +00:00
u32 DeflateDecompressor : : decode_distance ( u32 symbol )
2020-08-01 20:01:39 +00:00
{
2020-08-26 12:22:25 +00:00
// FIXME: I can't quite follow the algorithm here, but it seems to work.
if ( symbol < = 3 )
return symbol + 1 ;
if ( symbol < = 29 ) {
auto extra_bits = ( symbol / 2 ) - 1 ;
return ( ( symbol % 2 + 2 ) < < extra_bits ) + 1 + m_input_stream . read_bits ( extra_bits ) ;
2020-08-01 20:01:39 +00:00
}
2021-02-23 19:42:32 +00:00
VERIFY_NOT_REACHED ( ) ;
2020-08-01 20:01:39 +00:00
}
2020-08-31 10:20:23 +00:00
void DeflateDecompressor : : decode_codes ( CanonicalCode & literal_code , Optional < CanonicalCode > & distance_code )
2020-08-01 20:01:39 +00:00
{
2020-08-31 10:20:23 +00:00
auto literal_code_count = m_input_stream . read_bits ( 5 ) + 257 ;
auto distance_code_count = m_input_stream . read_bits ( 5 ) + 1 ;
auto code_length_count = m_input_stream . read_bits ( 4 ) + 4 ;
// First we have to extract the code lengths of the code that was used to encode the code lengths of
// the code that was used to encode the block.
u8 code_lengths_code_lengths [ 19 ] = { 0 } ;
for ( size_t i = 0 ; i < code_length_count ; + + i ) {
2021-03-12 23:17:18 +00:00
code_lengths_code_lengths [ code_lengths_code_lengths_order [ i ] ] = m_input_stream . read_bits ( 3 ) ;
2020-08-31 10:20:23 +00:00
}
// Now we can extract the code that was used to encode the code lengths of the code that was used to
// encode the block.
auto code_length_code_result = CanonicalCode : : from_bytes ( { code_lengths_code_lengths , sizeof ( code_lengths_code_lengths ) } ) ;
if ( ! code_length_code_result . has_value ( ) ) {
2020-08-31 11:12:15 +00:00
set_fatal_error ( ) ;
2020-08-31 10:20:23 +00:00
return ;
}
const auto code_length_code = code_length_code_result . value ( ) ;
// Next we extract the code lengths of the code that was used to encode the block.
Vector < u8 > code_lengths ;
while ( code_lengths . size ( ) < literal_code_count + distance_code_count ) {
auto symbol = code_length_code . read_symbol ( m_input_stream ) ;
2021-03-16 18:25:58 +00:00
if ( symbol = = UINT32_MAX ) {
set_fatal_error ( ) ;
return ;
}
2021-03-12 23:17:18 +00:00
if ( symbol < DeflateSpecialCodeLengths : : COPY ) {
2020-08-31 10:20:23 +00:00
code_lengths . append ( static_cast < u8 > ( symbol ) ) ;
continue ;
2021-03-12 23:17:18 +00:00
} else if ( symbol = = DeflateSpecialCodeLengths : : ZEROS ) {
2020-08-31 10:20:23 +00:00
auto nrepeat = 3 + m_input_stream . read_bits ( 3 ) ;
for ( size_t j = 0 ; j < nrepeat ; + + j )
code_lengths . append ( 0 ) ;
continue ;
2021-03-12 23:17:18 +00:00
} else if ( symbol = = DeflateSpecialCodeLengths : : LONG_ZEROS ) {
2020-08-31 10:20:23 +00:00
auto nrepeat = 11 + m_input_stream . read_bits ( 7 ) ;
for ( size_t j = 0 ; j < nrepeat ; + + j )
code_lengths . append ( 0 ) ;
continue ;
} else {
2021-03-12 23:17:18 +00:00
VERIFY ( symbol = = DeflateSpecialCodeLengths : : COPY ) ;
2020-08-31 10:20:23 +00:00
if ( code_lengths . is_empty ( ) ) {
2020-08-31 11:12:15 +00:00
set_fatal_error ( ) ;
2020-08-31 10:20:23 +00:00
return ;
}
2020-09-04 12:30:53 +00:00
auto nrepeat = 3 + m_input_stream . read_bits ( 2 ) ;
2020-08-31 10:20:23 +00:00
for ( size_t j = 0 ; j < nrepeat ; + + j )
code_lengths . append ( code_lengths . last ( ) ) ;
}
}
if ( code_lengths . size ( ) ! = literal_code_count + distance_code_count ) {
2020-08-31 11:12:15 +00:00
set_fatal_error ( ) ;
2020-08-31 10:20:23 +00:00
return ;
}
// Now we extract the code that was used to encode literals and lengths in the block.
auto literal_code_result = CanonicalCode : : from_bytes ( code_lengths . span ( ) . trim ( literal_code_count ) ) ;
if ( ! literal_code_result . has_value ( ) ) {
2020-08-31 11:12:15 +00:00
set_fatal_error ( ) ;
2020-08-31 10:20:23 +00:00
return ;
}
literal_code = literal_code_result . value ( ) ;
// Now we extract the code that was used to encode distances in the block.
if ( distance_code_count = = 1 ) {
auto length = code_lengths [ literal_code_count ] ;
if ( length = = 0 ) {
return ;
} else if ( length ! = 1 ) {
2020-08-31 11:12:15 +00:00
set_fatal_error ( ) ;
2020-08-31 10:20:23 +00:00
return ;
}
}
auto distance_code_result = CanonicalCode : : from_bytes ( code_lengths . span ( ) . slice ( literal_code_count ) ) ;
if ( ! distance_code_result . has_value ( ) ) {
2020-08-31 11:12:15 +00:00
set_fatal_error ( ) ;
2020-08-31 10:20:23 +00:00
return ;
}
distance_code = distance_code_result . value ( ) ;
2020-08-01 20:01:39 +00:00
}
2020-08-18 18:49:59 +00:00
2021-03-12 23:17:18 +00:00
DeflateCompressor : : DeflateCompressor ( OutputStream & stream , CompressionLevel compression_level )
: m_compression_level ( compression_level )
, m_compression_constants ( compression_constants [ static_cast < int > ( m_compression_level ) ] )
, m_output_stream ( stream )
{
m_symbol_frequencies . fill ( 0 ) ;
m_distance_frequencies . fill ( 0 ) ;
}
DeflateCompressor : : ~ DeflateCompressor ( )
{
VERIFY ( m_finished ) ;
}
size_t DeflateCompressor : : write ( ReadonlyBytes bytes )
{
VERIFY ( ! m_finished ) ;
if ( bytes . size ( ) = = 0 )
return 0 ; // recursion base case
auto n_written = bytes . copy_trimmed_to ( pending_block ( ) . slice ( m_pending_block_size ) ) ;
m_pending_block_size + = n_written ;
if ( m_pending_block_size = = block_size )
flush ( ) ;
return n_written + write ( bytes . slice ( n_written ) ) ;
}
bool DeflateCompressor : : write_or_error ( ReadonlyBytes bytes )
{
if ( write ( bytes ) < bytes . size ( ) ) {
set_fatal_error ( ) ;
return false ;
}
return true ;
}
// Knuth's multiplicative hash on 4 bytes
u16 DeflateCompressor : : hash_sequence ( const u8 * bytes )
{
constexpr const u32 knuth_constant = 2654435761 ; // shares no common factors with 2^32
return ( ( bytes [ 0 ] | bytes [ 1 ] < < 8 | bytes [ 2 ] < < 16 | bytes [ 3 ] < < 24 ) * knuth_constant ) > > ( 32 - hash_bits ) ;
}
size_t DeflateCompressor : : compare_match_candidate ( size_t start , size_t candidate , size_t previous_match_length , size_t maximum_match_length )
{
VERIFY ( previous_match_length < maximum_match_length ) ;
2021-04-18 08:30:03 +00:00
// We firstly check that the match is at least (prev_match_length + 1) long, we check backwards as there's a higher chance the end mismatches
2021-03-12 23:17:18 +00:00
for ( ssize_t i = previous_match_length ; i > = 0 ; i - - ) {
if ( m_rolling_window [ start + i ] ! = m_rolling_window [ candidate + i ] )
return 0 ;
}
// Find the actual length
auto match_length = previous_match_length + 1 ;
while ( match_length < maximum_match_length & & m_rolling_window [ start + match_length ] = = m_rolling_window [ candidate + match_length ] ) {
match_length + + ;
}
VERIFY ( match_length > previous_match_length ) ;
VERIFY ( match_length < = maximum_match_length ) ;
return match_length ;
}
size_t DeflateCompressor : : find_back_match ( size_t start , u16 hash , size_t previous_match_length , size_t maximum_match_length , size_t & match_position )
{
auto max_chain_length = m_compression_constants . max_chain ;
if ( previous_match_length = = 0 )
previous_match_length = min_match_length - 1 ; // we only care about matches that are at least min_match_length long
if ( previous_match_length > = maximum_match_length )
2021-04-18 08:30:03 +00:00
return 0 ; // we can't improve a maximum length match
2021-03-12 23:17:18 +00:00
if ( previous_match_length > = m_compression_constants . max_lazy_length )
return 0 ; // the previous match is already pretty, we shouldn't waste another full search
if ( previous_match_length > = m_compression_constants . good_match_length )
max_chain_length / = 4 ; // we already have a pretty good much, so do a shorter search
auto candidate = m_hash_head [ hash ] ;
auto match_found = false ;
while ( max_chain_length - - ) {
if ( candidate = = empty_slot )
break ; // no remaining candidates
VERIFY ( candidate < start ) ;
if ( start - candidate > window_size )
break ; // outside the window
auto match_length = compare_match_candidate ( start , candidate , previous_match_length , maximum_match_length ) ;
if ( match_length ! = 0 ) {
match_found = true ;
match_position = candidate ;
previous_match_length = match_length ;
if ( match_length = = maximum_match_length )
return match_length ; // bail if we got the maximum possible length
}
candidate = m_hash_prev [ candidate % window_size ] ;
}
if ( ! match_found )
2021-04-18 08:30:03 +00:00
return 0 ; // we didn't find any matches
2021-03-12 23:17:18 +00:00
return previous_match_length ; // we found matches, but they were at most previous_match_length long
}
ALWAYS_INLINE u8 DeflateCompressor : : distance_to_base ( u16 distance )
{
return ( distance < = 256 ) ? distance_to_base_lo [ distance - 1 ] : distance_to_base_hi [ ( distance - 1 ) > > 7 ] ;
}
template < size_t Size >
2021-03-13 21:40:10 +00:00
void DeflateCompressor : : generate_huffman_lengths ( Array < u8 , Size > & lengths , const Array < u16 , Size > & frequencies , size_t max_bit_length , u16 frequency_cap )
2021-03-12 23:17:18 +00:00
{
VERIFY ( ( 1u < < max_bit_length ) > = Size ) ;
u16 heap_keys [ Size ] ; // Used for O(n) heap construction
u16 heap_values [ Size ] ;
u16 huffman_links [ Size * 2 + 1 ] = { 0 } ;
size_t non_zero_freqs = 0 ;
for ( size_t i = 0 ; i < Size ; i + + ) {
auto frequency = frequencies [ i ] ;
if ( frequency = = 0 )
continue ;
if ( frequency > frequency_cap ) {
frequency = frequency_cap ;
}
heap_keys [ non_zero_freqs ] = frequency ; // sort symbols by frequency
heap_values [ non_zero_freqs ] = Size + non_zero_freqs ; // huffman_links "links"
non_zero_freqs + + ;
}
// special case for only 1 used symbol
if ( non_zero_freqs < 2 ) {
for ( size_t i = 0 ; i < Size ; i + + )
lengths [ i ] = ( frequencies [ i ] = = 0 ) ? 0 : 1 ;
return ;
}
BinaryHeap < u16 , u16 , Size > heap { heap_keys , heap_values , non_zero_freqs } ;
// build the huffman tree - binary heap is used for efficient frequency comparisons
while ( heap . size ( ) > 1 ) {
u16 lowest_frequency = heap . peek_min_key ( ) ;
u16 lowest_link = heap . pop_min ( ) ;
u16 second_lowest_frequency = heap . peek_min_key ( ) ;
u16 second_lowest_link = heap . pop_min ( ) ;
u16 new_link = heap . size ( ) + 2 ;
heap . insert ( lowest_frequency + second_lowest_frequency , new_link ) ;
huffman_links [ lowest_link ] = new_link ;
huffman_links [ second_lowest_link ] = new_link ;
}
non_zero_freqs = 0 ;
for ( size_t i = 0 ; i < Size ; i + + ) {
if ( frequencies [ i ] = = 0 ) {
lengths [ i ] = 0 ;
continue ;
}
u16 link = huffman_links [ Size + non_zero_freqs ] ;
non_zero_freqs + + ;
size_t bit_length = 1 ;
while ( link ! = 2 ) {
bit_length + + ;
link = huffman_links [ link ] ;
}
if ( bit_length > max_bit_length ) {
VERIFY ( frequency_cap ! = 1 ) ;
2021-03-13 21:40:10 +00:00
return generate_huffman_lengths ( lengths , frequencies , max_bit_length , frequency_cap / 2 ) ;
2021-03-12 23:17:18 +00:00
}
lengths [ i ] = bit_length ;
}
}
void DeflateCompressor : : lz77_compress_block ( )
{
for ( auto & slot : m_hash_head ) { // initialize chained hash table
slot = empty_slot ;
}
auto insert_hash = [ & ] ( auto pos , auto hash ) {
auto window_pos = pos % window_size ;
m_hash_prev [ window_pos ] = m_hash_head [ hash ] ;
m_hash_head [ hash ] = window_pos ;
} ;
auto emit_literal = [ & ] ( auto literal ) {
VERIFY ( m_pending_symbol_size < = block_size + 1 ) ;
auto index = m_pending_symbol_size + + ;
m_symbol_buffer [ index ] . distance = 0 ;
m_symbol_buffer [ index ] . literal = literal ;
m_symbol_frequencies [ literal ] + + ;
} ;
auto emit_back_reference = [ & ] ( auto distance , auto length ) {
VERIFY ( m_pending_symbol_size < = block_size + 1 ) ;
auto index = m_pending_symbol_size + + ;
m_symbol_buffer [ index ] . distance = distance ;
m_symbol_buffer [ index ] . length = length ;
m_symbol_frequencies [ length_to_symbol [ length ] ] + + ;
m_distance_frequencies [ distance_to_base ( distance ) ] + + ;
} ;
size_t previous_match_length = 0 ;
size_t previous_match_position = 0 ;
VERIFY ( m_compression_constants . great_match_length < = max_match_length ) ;
// our block starts at block_size and is m_pending_block_size in length
auto block_end = block_size + m_pending_block_size ;
size_t current_position ;
for ( current_position = block_size ; current_position < block_end - min_match_length + 1 ; current_position + + ) {
auto hash = hash_sequence ( & m_rolling_window [ current_position ] ) ;
size_t match_position ;
auto match_length = find_back_match ( current_position , hash , previous_match_length ,
min ( m_compression_constants . great_match_length , block_end - current_position ) , match_position ) ;
insert_hash ( current_position , hash ) ;
// if the previous match is as good as the new match, just use it
if ( previous_match_length ! = 0 & & previous_match_length > = match_length ) {
emit_back_reference ( ( current_position - 1 ) - previous_match_position , previous_match_length ) ;
// skip all the bytes that are included in this match
for ( size_t j = current_position + 1 ; j < min ( current_position - 1 + previous_match_length , block_end - min_match_length + 1 ) ; j + + ) {
insert_hash ( j , hash_sequence ( & m_rolling_window [ j ] ) ) ;
}
current_position = ( current_position - 1 ) + previous_match_length - 1 ;
previous_match_length = 0 ;
continue ;
}
if ( match_length = = 0 ) {
VERIFY ( previous_match_length = = 0 ) ;
emit_literal ( m_rolling_window [ current_position ] ) ;
continue ;
}
// if this is a lazy match, and the new match is better than the old one, output previous as literal
if ( previous_match_length ! = 0 ) {
emit_literal ( m_rolling_window [ current_position - 1 ] ) ;
}
previous_match_length = match_length ;
previous_match_position = match_position ;
}
// clean up leftover lazy match
if ( previous_match_length ! = 0 ) {
emit_back_reference ( ( current_position - 1 ) - previous_match_position , previous_match_length ) ;
current_position = ( current_position - 1 ) + previous_match_length ;
}
// output remaining literals
while ( current_position < block_end ) {
emit_literal ( m_rolling_window [ current_position + + ] ) ;
}
}
size_t DeflateCompressor : : huffman_block_length ( const Array < u8 , max_huffman_literals > & literal_bit_lengths , const Array < u8 , max_huffman_distances > & distance_bit_lengths )
{
size_t length = 0 ;
for ( size_t i = 0 ; i < 286 ; i + + ) {
auto frequency = m_symbol_frequencies [ i ] ;
length + = literal_bit_lengths [ i ] * frequency ;
if ( i > = 257 ) // back reference length symbols
length + = packed_length_symbols [ i - 257 ] . extra_bits * frequency ;
}
for ( size_t i = 0 ; i < 30 ; i + + ) {
auto frequency = m_distance_frequencies [ i ] ;
length + = distance_bit_lengths [ i ] * frequency ;
length + = packed_distances [ i ] . extra_bits * frequency ;
}
return length ;
}
size_t DeflateCompressor : : uncompressed_block_length ( )
{
auto padding = 8 - ( ( m_output_stream . bit_offset ( ) + 3 ) % 8 ) ;
// 3 bit block header + align to byte + 2 * 16 bit length fields + block contents
return 3 + padding + ( 2 * 16 ) + m_pending_block_size * 8 ;
}
size_t DeflateCompressor : : fixed_block_length ( )
{
// block header + fixed huffman encoded block contents
return 3 + huffman_block_length ( fixed_literal_bit_lengths , fixed_distance_bit_lengths ) ;
}
size_t DeflateCompressor : : dynamic_block_length ( const Array < u8 , max_huffman_literals > & literal_bit_lengths , const Array < u8 , max_huffman_distances > & distance_bit_lengths , const Array < u8 , 19 > & code_lengths_bit_lengths , const Array < u16 , 19 > & code_lengths_frequencies , size_t code_lengths_count )
{
// block header + literal code count + distance code count + code length count
auto length = 3 + 5 + 5 + 4 ;
// 3 bits per code_length
length + = 3 * code_lengths_count ;
for ( size_t i = 0 ; i < code_lengths_frequencies . size ( ) ; i + + ) {
auto frequency = code_lengths_frequencies [ i ] ;
length + = code_lengths_bit_lengths [ i ] * frequency ;
if ( i = = DeflateSpecialCodeLengths : : COPY ) {
length + = 2 * frequency ;
} else if ( i = = DeflateSpecialCodeLengths : : ZEROS ) {
length + = 3 * frequency ;
} else if ( i = = DeflateSpecialCodeLengths : : LONG_ZEROS ) {
length + = 7 * frequency ;
}
}
return length + huffman_block_length ( literal_bit_lengths , distance_bit_lengths ) ;
}
2021-03-14 09:43:18 +00:00
void DeflateCompressor : : write_huffman ( const CanonicalCode & literal_code , const Optional < CanonicalCode > & distance_code )
2021-03-12 23:17:18 +00:00
{
2021-03-14 09:43:18 +00:00
auto has_distances = distance_code . has_value ( ) ;
2021-03-12 23:17:18 +00:00
for ( size_t i = 0 ; i < m_pending_symbol_size ; i + + ) {
if ( m_symbol_buffer [ i ] . distance = = 0 ) {
literal_code . write_symbol ( m_output_stream , m_symbol_buffer [ i ] . literal ) ;
continue ;
}
2021-03-14 09:43:18 +00:00
VERIFY ( has_distances ) ;
2021-03-12 23:17:18 +00:00
auto symbol = length_to_symbol [ m_symbol_buffer [ i ] . length ] ;
literal_code . write_symbol ( m_output_stream , symbol ) ;
// Emit extra bits if needed
m_output_stream . write_bits ( m_symbol_buffer [ i ] . length - packed_length_symbols [ symbol - 257 ] . base_length , packed_length_symbols [ symbol - 257 ] . extra_bits ) ;
auto base_distance = distance_to_base ( m_symbol_buffer [ i ] . distance ) ;
2021-03-14 09:43:18 +00:00
distance_code . value ( ) . write_symbol ( m_output_stream , base_distance ) ;
2021-03-12 23:17:18 +00:00
// Emit extra bits if needed
m_output_stream . write_bits ( m_symbol_buffer [ i ] . distance - packed_distances [ base_distance ] . base_distance , packed_distances [ base_distance ] . extra_bits ) ;
}
}
size_t DeflateCompressor : : encode_huffman_lengths ( const Array < u8 , max_huffman_literals + max_huffman_distances > & lengths , size_t lengths_count , Array < code_length_symbol , max_huffman_literals + max_huffman_distances > & encoded_lengths )
{
size_t encoded_count = 0 ;
size_t i = 0 ;
while ( i < lengths_count ) {
if ( lengths [ i ] = = 0 ) {
auto zero_count = 0 ;
for ( size_t j = i ; j < min ( lengths_count , i + 138 ) & & lengths [ j ] = = 0 ; j + + )
zero_count + + ;
if ( zero_count < 3 ) { // below minimum repeated zero count
encoded_lengths [ encoded_count + + ] . symbol = 0 ;
i + + ;
continue ;
}
if ( zero_count < = 10 ) {
encoded_lengths [ encoded_count ] . symbol = DeflateSpecialCodeLengths : : ZEROS ;
encoded_lengths [ encoded_count + + ] . count = zero_count ;
} else {
encoded_lengths [ encoded_count ] . symbol = DeflateSpecialCodeLengths : : LONG_ZEROS ;
encoded_lengths [ encoded_count + + ] . count = zero_count ;
}
i + = zero_count ;
continue ;
}
encoded_lengths [ encoded_count + + ] . symbol = lengths [ i + + ] ;
auto copy_count = 0 ;
for ( size_t j = i ; j < min ( lengths_count , i + 6 ) & & lengths [ j ] = = lengths [ i - 1 ] ; j + + )
copy_count + + ;
if ( copy_count > = 3 ) {
encoded_lengths [ encoded_count ] . symbol = DeflateSpecialCodeLengths : : COPY ;
encoded_lengths [ encoded_count + + ] . count = copy_count ;
i + = copy_count ;
continue ;
}
}
return encoded_count ;
}
size_t DeflateCompressor : : encode_block_lengths ( const Array < u8 , max_huffman_literals > & literal_bit_lengths , const Array < u8 , max_huffman_distances > & distance_bit_lengths , Array < code_length_symbol , max_huffman_literals + max_huffman_distances > & encoded_lengths , size_t & literal_code_count , size_t & distance_code_count )
{
literal_code_count = max_huffman_literals ;
distance_code_count = max_huffman_distances ;
VERIFY ( literal_bit_lengths [ 256 ] ! = 0 ) ; // Make sure at least the EndOfBlock marker is present
while ( literal_bit_lengths [ literal_code_count - 1 ] = = 0 )
literal_code_count - - ;
// Drop trailing zero lengths, keeping at least one
while ( distance_bit_lengths [ distance_code_count - 1 ] = = 0 & & distance_code_count > 1 )
distance_code_count - - ;
Array < u8 , max_huffman_literals + max_huffman_distances > all_lengths { } ;
size_t lengths_count = 0 ;
for ( size_t i = 0 ; i < literal_code_count ; i + + ) {
all_lengths [ lengths_count + + ] = literal_bit_lengths [ i ] ;
}
for ( size_t i = 0 ; i < distance_code_count ; i + + ) {
all_lengths [ lengths_count + + ] = distance_bit_lengths [ i ] ;
}
return encode_huffman_lengths ( all_lengths , lengths_count , encoded_lengths ) ;
}
2021-03-14 09:43:18 +00:00
void DeflateCompressor : : write_dynamic_huffman ( const CanonicalCode & literal_code , size_t literal_code_count , const Optional < CanonicalCode > & distance_code , size_t distance_code_count , const Array < u8 , 19 > & code_lengths_bit_lengths , size_t code_length_count , const Array < code_length_symbol , max_huffman_literals + max_huffman_distances > & encoded_lengths , size_t encoded_lengths_count )
2021-03-12 23:17:18 +00:00
{
m_output_stream . write_bits ( literal_code_count - 257 , 5 ) ;
m_output_stream . write_bits ( distance_code_count - 1 , 5 ) ;
m_output_stream . write_bits ( code_length_count - 4 , 4 ) ;
for ( size_t i = 0 ; i < code_length_count ; i + + ) {
m_output_stream . write_bits ( code_lengths_bit_lengths [ code_lengths_code_lengths_order [ i ] ] , 3 ) ;
}
auto code_lengths_code = CanonicalCode : : from_bytes ( code_lengths_bit_lengths ) ;
VERIFY ( code_lengths_code . has_value ( ) ) ;
for ( size_t i = 0 ; i < encoded_lengths_count ; i + + ) {
auto encoded_length = encoded_lengths [ i ] ;
code_lengths_code - > write_symbol ( m_output_stream , encoded_length . symbol ) ;
if ( encoded_length . symbol = = DeflateSpecialCodeLengths : : COPY ) {
m_output_stream . write_bits ( encoded_length . count - 3 , 2 ) ;
} else if ( encoded_length . symbol = = DeflateSpecialCodeLengths : : ZEROS ) {
m_output_stream . write_bits ( encoded_length . count - 3 , 3 ) ;
} else if ( encoded_length . symbol = = DeflateSpecialCodeLengths : : LONG_ZEROS ) {
m_output_stream . write_bits ( encoded_length . count - 11 , 7 ) ;
}
}
write_huffman ( literal_code , distance_code ) ;
}
void DeflateCompressor : : flush ( )
{
if ( m_output_stream . handle_any_error ( ) ) {
set_fatal_error ( ) ;
return ;
}
m_output_stream . write_bit ( m_finished ) ;
// if this is just an empty block to signify the end of the deflate stream use the smallest block possible (10 bits total)
if ( m_pending_block_size = = 0 ) {
VERIFY ( m_finished ) ; // we shouldn't be writing empty blocks unless this is the final one
m_output_stream . write_bits ( 0b01 , 2 ) ; // fixed huffman codes
m_output_stream . write_bits ( 0b0000000 , 7 ) ; // end of block symbol
m_output_stream . align_to_byte_boundary ( ) ;
return ;
}
auto write_uncompressed = [ & ] ( ) {
m_output_stream . write_bits ( 0b00 , 2 ) ; // no compression
m_output_stream . align_to_byte_boundary ( ) ;
LittleEndian < u16 > len = m_pending_block_size ;
m_output_stream < < len ;
LittleEndian < u16 > nlen = ~ m_pending_block_size ;
m_output_stream < < nlen ;
m_output_stream . write_or_error ( pending_block ( ) . slice ( 0 , m_pending_block_size ) ) ;
} ;
if ( m_compression_level = = CompressionLevel : : STORE ) { // disabled compression fast path
write_uncompressed ( ) ;
m_pending_block_size = 0 ;
return ;
}
// The following implementation of lz77 compression and huffman encoding is based on the reference implementation by Hans Wennborg https://www.hanshq.net/zip.html
// this reads from the pending block and writes to m_symbol_buffer
lz77_compress_block ( ) ;
// insert EndOfBlock marker to the symbol buffer
m_symbol_buffer [ m_pending_symbol_size ] . distance = 0 ;
m_symbol_buffer [ m_pending_symbol_size + + ] . literal = 256 ;
m_symbol_frequencies [ 256 ] + + ;
// generate optimal dynamic huffman code lengths
Array < u8 , max_huffman_literals > dynamic_literal_bit_lengths { } ;
Array < u8 , max_huffman_distances > dynamic_distance_bit_lengths { } ;
generate_huffman_lengths ( dynamic_literal_bit_lengths , m_symbol_frequencies , 15 ) ; // deflate data huffman can use up to 15 bits per symbol
generate_huffman_lengths ( dynamic_distance_bit_lengths , m_distance_frequencies , 15 ) ;
// encode literal and distance lengths together in deflate format
Array < code_length_symbol , max_huffman_literals + max_huffman_distances > encoded_lengths { } ;
size_t literal_code_count ;
size_t distance_code_count ;
auto encoded_lengths_count = encode_block_lengths ( dynamic_literal_bit_lengths , dynamic_distance_bit_lengths , encoded_lengths , literal_code_count , distance_code_count ) ;
// count code length frequencies
Array < u16 , 19 > code_lengths_frequencies { 0 } ;
for ( size_t i = 0 ; i < encoded_lengths_count ; i + + ) {
code_lengths_frequencies [ encoded_lengths [ i ] . symbol ] + + ;
}
// generate optimal huffman code lengths code lengths
Array < u8 , 19 > code_lengths_bit_lengths { } ;
generate_huffman_lengths ( code_lengths_bit_lengths , code_lengths_frequencies , 7 ) ; // deflate code length huffman can use up to 7 bits per symbol
// calculate actual code length code lengths count (without trailing zeros)
auto code_lengths_count = code_lengths_bit_lengths . size ( ) ;
while ( code_lengths_bit_lengths [ code_lengths_code_lengths_order [ code_lengths_count - 1 ] ] = = 0 )
code_lengths_count - - ;
auto uncompressed_size = uncompressed_block_length ( ) ;
auto fixed_huffman_size = fixed_block_length ( ) ;
auto dynamic_huffman_size = dynamic_block_length ( dynamic_literal_bit_lengths , dynamic_distance_bit_lengths , code_lengths_bit_lengths , code_lengths_frequencies , code_lengths_count ) ;
2021-04-18 08:30:03 +00:00
// If the compression somehow didn't reduce the size enough, just write out the block uncompressed as it allows for much faster decompression
2021-03-12 23:17:18 +00:00
if ( uncompressed_size < = min ( fixed_huffman_size , dynamic_huffman_size ) ) {
write_uncompressed ( ) ;
} else if ( fixed_huffman_size < = dynamic_huffman_size ) { // If the fixed and dynamic huffman codes come out the same size, prefer the fixed version, as it takes less time to decode
m_output_stream . write_bits ( 0b01 , 2 ) ; // fixed huffman codes
write_huffman ( CanonicalCode : : fixed_literal_codes ( ) , CanonicalCode : : fixed_distance_codes ( ) ) ;
} else {
m_output_stream . write_bits ( 0b10 , 2 ) ; // dynamic huffman codes
auto literal_code = CanonicalCode : : from_bytes ( dynamic_literal_bit_lengths ) ;
VERIFY ( literal_code . has_value ( ) ) ;
auto distance_code = CanonicalCode : : from_bytes ( dynamic_distance_bit_lengths ) ;
2021-03-14 09:43:18 +00:00
write_dynamic_huffman ( literal_code . value ( ) , literal_code_count , distance_code , distance_code_count , code_lengths_bit_lengths , code_lengths_count , encoded_lengths , encoded_lengths_count ) ;
2021-03-12 23:17:18 +00:00
}
if ( m_finished )
m_output_stream . align_to_byte_boundary ( ) ;
// reset all block specific members
m_pending_block_size = 0 ;
m_pending_symbol_size = 0 ;
m_symbol_frequencies . fill ( 0 ) ;
m_distance_frequencies . fill ( 0 ) ;
// On the final block this copy will potentially produce an invalid search window, but since its the final block we dont care
pending_block ( ) . copy_trimmed_to ( { m_rolling_window , block_size } ) ;
}
void DeflateCompressor : : final_flush ( )
{
VERIFY ( ! m_finished ) ;
m_finished = true ;
flush ( ) ;
}
Optional < ByteBuffer > DeflateCompressor : : compress_all ( const ReadonlyBytes & bytes , CompressionLevel compression_level )
{
DuplexMemoryStream output_stream ;
DeflateCompressor deflate_stream { output_stream , compression_level } ;
deflate_stream . write_or_error ( bytes ) ;
deflate_stream . final_flush ( ) ;
if ( deflate_stream . handle_any_error ( ) )
return { } ;
return output_stream . copy_into_contiguous_buffer ( ) ;
}
2020-08-01 20:01:39 +00:00
}