Filter.cpp 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174
  1. /*
  2. * Copyright (c) 2021, Matthew Olsson <mattco@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/Hex.h>
  7. #include <LibCompress/Deflate.h>
  8. #include <LibPDF/CommonNames.h>
  9. #include <LibPDF/Filter.h>
  10. namespace PDF {
  11. Optional<ByteBuffer> Filter::decode(ReadonlyBytes const& bytes, FlyString const& encoding_type)
  12. {
  13. if (encoding_type == CommonNames::ASCIIHexDecode)
  14. return decode_ascii_hex(bytes);
  15. if (encoding_type == CommonNames::ASCII85Decode)
  16. return decode_ascii85(bytes);
  17. if (encoding_type == CommonNames::LZWDecode)
  18. return decode_lzw(bytes);
  19. if (encoding_type == CommonNames::FlateDecode)
  20. return decode_flate(bytes);
  21. if (encoding_type == CommonNames::RunLengthDecode)
  22. return decode_run_length(bytes);
  23. if (encoding_type == CommonNames::CCITTFaxDecode)
  24. return decode_ccitt(bytes);
  25. if (encoding_type == CommonNames::JBIG2Decode)
  26. return decode_jbig2(bytes);
  27. if (encoding_type == CommonNames::DCTDecode)
  28. return decode_dct(bytes);
  29. if (encoding_type == CommonNames::JPXDecode)
  30. return decode_jpx(bytes);
  31. if (encoding_type == CommonNames::Crypt)
  32. return decode_crypt(bytes);
  33. return {};
  34. }
  35. Optional<ByteBuffer> Filter::decode_ascii_hex(ReadonlyBytes const& bytes)
  36. {
  37. if (bytes.size() % 2 == 0)
  38. return decode_hex(bytes);
  39. // FIXME: Integrate this padding into AK/Hex?
  40. auto output = ByteBuffer::create_zeroed(bytes.size() / 2 + 1);
  41. for (size_t i = 0; i < bytes.size() / 2; ++i) {
  42. const auto c1 = decode_hex_digit(static_cast<char>(bytes[i * 2]));
  43. if (c1 >= 16)
  44. return {};
  45. const auto c2 = decode_hex_digit(static_cast<char>(bytes[i * 2 + 1]));
  46. if (c2 >= 16)
  47. return {};
  48. output[i] = (c1 << 4) + c2;
  49. }
  50. // Process last byte with a padded zero
  51. output[output.size() - 1] = decode_hex_digit(static_cast<char>(bytes[bytes.size() - 1])) * 16;
  52. return output;
  53. };
  54. Optional<ByteBuffer> Filter::decode_ascii85(ReadonlyBytes const& bytes)
  55. {
  56. Vector<u8> buff;
  57. buff.ensure_capacity(bytes.size());
  58. size_t byte_index = 0;
  59. while (byte_index < bytes.size()) {
  60. if (bytes[byte_index] == ' ') {
  61. byte_index++;
  62. continue;
  63. }
  64. if (bytes[byte_index] == 'z') {
  65. byte_index++;
  66. for (int i = 0; i < 4; i++)
  67. buff.append(0);
  68. continue;
  69. }
  70. u32 number = 0;
  71. if (byte_index + 5 >= bytes.size()) {
  72. auto to_write = bytes.size() - byte_index;
  73. for (int i = 0; i < 5; i++) {
  74. auto byte = byte_index >= bytes.size() ? 'u' : bytes[byte_index++];
  75. if (byte == ' ') {
  76. i--;
  77. continue;
  78. }
  79. number = number * 85 + byte - 33;
  80. }
  81. for (size_t i = 0; i < to_write - 1; i++)
  82. buff.append(reinterpret_cast<u8*>(&number)[3 - i]);
  83. break;
  84. } else {
  85. for (int i = 0; i < 5; i++) {
  86. auto byte = bytes[byte_index++];
  87. if (byte == ' ') {
  88. i--;
  89. continue;
  90. }
  91. number = number * 85 + byte - 33;
  92. }
  93. }
  94. for (int i = 0; i < 4; i++)
  95. buff.append(reinterpret_cast<u8*>(&number)[3 - i]);
  96. }
  97. return ByteBuffer::copy(buff.span());
  98. };
  99. Optional<ByteBuffer> Filter::decode_lzw(ReadonlyBytes const&)
  100. {
  101. dbgln("LZW decoding is not supported");
  102. VERIFY_NOT_REACHED();
  103. };
  104. Optional<ByteBuffer> Filter::decode_flate(ReadonlyBytes const& bytes)
  105. {
  106. // FIXME: The spec says Flate decoding is "based on" zlib, does that mean they
  107. // aren't exactly the same?
  108. auto buff = Compress::DeflateDecompressor::decompress_all(bytes.slice(2));
  109. VERIFY(buff.has_value());
  110. return buff.value();
  111. };
  112. Optional<ByteBuffer> Filter::decode_run_length(ReadonlyBytes const&)
  113. {
  114. // FIXME: Support RunLength decoding
  115. TODO();
  116. };
  117. Optional<ByteBuffer> Filter::decode_ccitt(ReadonlyBytes const&)
  118. {
  119. // FIXME: Support CCITT decoding
  120. TODO();
  121. };
  122. Optional<ByteBuffer> Filter::decode_jbig2(ReadonlyBytes const&)
  123. {
  124. // FIXME: Support JBIG2 decoding
  125. TODO();
  126. };
  127. Optional<ByteBuffer> Filter::decode_dct(ReadonlyBytes const&)
  128. {
  129. // FIXME: Support dct decoding
  130. TODO();
  131. };
  132. Optional<ByteBuffer> Filter::decode_jpx(ReadonlyBytes const&)
  133. {
  134. // FIXME: Support JPX decoding
  135. TODO();
  136. };
  137. Optional<ByteBuffer> Filter::decode_crypt(ReadonlyBytes const&)
  138. {
  139. // FIXME: Support Crypt decoding
  140. TODO();
  141. };
  142. }