LZWDecoder.h 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. /*
  2. * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org>
  3. * Copyright (c) 2022, the SerenityOS developers.
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #pragma once
  8. #include <AK/BitStream.h>
  9. #include <AK/Concepts.h>
  10. #include <AK/Debug.h>
  11. #include <AK/Format.h>
  12. #include <AK/IntegralMath.h>
  13. #include <AK/MemoryStream.h>
  14. #include <AK/Vector.h>
  15. namespace Compress {
  16. template<InputBitStream InputStream>
  17. class LZWDecoder {
  18. private:
  19. static constexpr int max_code_size = 12;
  20. public:
  21. explicit LZWDecoder(MaybeOwned<InputStream> lzw_stream, u8 min_code_size, i32 offset_for_size_change = 0)
  22. : m_bit_stream(move(lzw_stream))
  23. , m_code_size(min_code_size)
  24. , m_original_code_size(min_code_size)
  25. , m_table_capacity(AK::exp2<u32>(min_code_size))
  26. , m_offset_for_size_change(offset_for_size_change)
  27. {
  28. init_code_table();
  29. }
  30. static ErrorOr<ByteBuffer> decode_all(ReadonlyBytes bytes, u8 initial_code_size, i32 offset_for_size_change = 0)
  31. {
  32. auto memory_stream = make<FixedMemoryStream>(bytes);
  33. auto lzw_stream = make<InputStream>(MaybeOwned<Stream>(move(memory_stream)));
  34. Compress::LZWDecoder lzw_decoder { MaybeOwned<InputStream> { move(lzw_stream) }, initial_code_size, offset_for_size_change };
  35. ByteBuffer decoded;
  36. u16 const clear_code = lzw_decoder.add_control_code();
  37. u16 const end_of_data_code = lzw_decoder.add_control_code();
  38. while (true) {
  39. auto const code = TRY(lzw_decoder.next_code());
  40. if (code == clear_code) {
  41. lzw_decoder.reset();
  42. continue;
  43. }
  44. if (code == end_of_data_code)
  45. break;
  46. TRY(decoded.try_append(lzw_decoder.get_output()));
  47. }
  48. return decoded;
  49. }
  50. u16 add_control_code()
  51. {
  52. u16 const control_code = m_code_table.size();
  53. m_code_table.append(Vector<u8> {});
  54. m_original_code_table.append(Vector<u8> {});
  55. if (m_code_table.size() >= m_table_capacity && m_code_size < max_code_size) {
  56. ++m_code_size;
  57. ++m_original_code_size;
  58. m_table_capacity *= 2;
  59. }
  60. return control_code;
  61. }
  62. void reset()
  63. {
  64. m_code_table.clear();
  65. m_code_table.extend(m_original_code_table);
  66. m_code_size = m_original_code_size;
  67. m_table_capacity = AK::exp2<u32>(m_code_size);
  68. m_output.clear();
  69. }
  70. ErrorOr<u16> next_code()
  71. {
  72. m_current_code = TRY(m_bit_stream->template read_bits<u16>(m_code_size));
  73. if (m_current_code > m_code_table.size()) {
  74. dbgln_if(LZW_DEBUG, "Corrupted LZW stream, invalid code: {}, code table size: {}",
  75. m_current_code,
  76. m_code_table.size());
  77. return Error::from_string_literal("Corrupted LZW stream, invalid code");
  78. } else if (m_current_code == m_code_table.size() && m_output.is_empty()) {
  79. dbgln_if(LZW_DEBUG, "Corrupted LZW stream, valid new code but output buffer is empty: {}, code table size: {}",
  80. m_current_code,
  81. m_code_table.size());
  82. return Error::from_string_literal("Corrupted LZW stream, valid new code but output buffer is empty");
  83. }
  84. return m_current_code;
  85. }
  86. Vector<u8>& get_output()
  87. {
  88. VERIFY(m_current_code <= m_code_table.size());
  89. if (m_current_code < m_code_table.size()) {
  90. Vector<u8> new_entry = m_output;
  91. m_output = m_code_table.at(m_current_code);
  92. new_entry.append(m_output[0]);
  93. extend_code_table(new_entry);
  94. } else if (m_current_code == m_code_table.size()) {
  95. VERIFY(!m_output.is_empty());
  96. m_output.append(m_output[0]);
  97. extend_code_table(m_output);
  98. }
  99. return m_output;
  100. }
  101. private:
  102. void init_code_table()
  103. {
  104. m_code_table.ensure_capacity(m_table_capacity);
  105. for (u16 i = 0; i < m_table_capacity; ++i) {
  106. m_code_table.unchecked_append({ (u8)i });
  107. }
  108. m_original_code_table = m_code_table;
  109. }
  110. void extend_code_table(Vector<u8> const& entry)
  111. {
  112. if (entry.size() > 1 && m_code_table.size() < 4096) {
  113. m_code_table.append(entry);
  114. if (m_code_table.size() >= (m_table_capacity + m_offset_for_size_change) && m_code_size < max_code_size) {
  115. ++m_code_size;
  116. m_table_capacity *= 2;
  117. }
  118. }
  119. }
  120. MaybeOwned<InputStream> m_bit_stream;
  121. Vector<Vector<u8>> m_code_table {};
  122. Vector<Vector<u8>> m_original_code_table {};
  123. u8 m_code_size { 0 };
  124. u8 m_original_code_size { 0 };
  125. u32 m_table_capacity { 0 };
  126. i32 m_offset_for_size_change {};
  127. u16 m_current_code { 0 };
  128. Vector<u8> m_output {};
  129. };
  130. }