JBIG2Loader.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354
  1. /*
  2. * Copyright (c) 2024, Nico Weber <thakis@chromium.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/Debug.h>
  7. #include <LibGfx/ImageFormats/JBIG2Loader.h>
  8. // Spec: ITU-T_T_88__08_2018.pdf in the zip file here:
  9. // https://www.itu.int/rec/T-REC-T.88-201808-I
  10. // Annex H has a datastream example.
  11. namespace Gfx {
  12. // JBIG2 spec, Annex D, D.4.1 ID string
  13. static constexpr u8 id_string[] = { 0x97, 0x4A, 0x42, 0x32, 0x0D, 0x0A, 0x1A, 0x0A };
  14. // 7.3 Segment types
  15. enum SegmentType {
  16. SymbolDictionary = 0,
  17. IntermediateTextRegion = 4,
  18. ImmediateTextRegion = 6,
  19. ImmediateLosslessTextRegion = 7,
  20. PatternDictionary = 16,
  21. IntermediateHalftoneRegion = 20,
  22. ImmediateHalftoneRegion = 22,
  23. ImmediateLosslessHalftoneRegion = 23,
  24. IntermediateGenericRegion = 36,
  25. ImmediateGenericRegion = 38,
  26. ImmediateLosslessGenericRegion = 39,
  27. IntermediateGenericRefinementRegion = 40,
  28. ImmediateGenericRefinementRegion = 42,
  29. ImmediateLosslessGenericRefinementRegion = 43,
  30. PageInformation = 48,
  31. EndOfPage = 49,
  32. EndOfStripe = 50,
  33. EndOfFile = 51,
  34. Profiles = 52,
  35. Tables = 53,
  36. ColorPalette = 54,
  37. Extension = 62,
  38. };
  39. // Annex D
  40. enum class Organization {
  41. // D.1 Sequential organization
  42. Sequential,
  43. // D.2 Random-access organization
  44. RandomAccess,
  45. // D.3 Embedded organization
  46. Embedded,
  47. };
  48. struct SegmentHeader {
  49. u32 segment_number;
  50. SegmentType type;
  51. Vector<u32> referred_to_segment_numbers;
  52. u32 page_association;
  53. Optional<u32> data_length;
  54. };
  55. struct SegmentData {
  56. SegmentHeader header;
  57. ReadonlyBytes data;
  58. };
  59. struct JBIG2LoadingContext {
  60. enum class State {
  61. NotDecoded = 0,
  62. Error,
  63. };
  64. State state { State::NotDecoded };
  65. Organization organization { Organization::Sequential };
  66. IntSize size;
  67. Optional<u32> number_of_pages;
  68. Vector<SegmentData> segments;
  69. };
  70. static ErrorOr<void> decode_jbig2_header(JBIG2LoadingContext& context, ReadonlyBytes data)
  71. {
  72. if (!JBIG2ImageDecoderPlugin::sniff(data))
  73. return Error::from_string_literal("JBIG2LoadingContext: Invalid JBIG2 header");
  74. FixedMemoryStream stream(data.slice(sizeof(id_string)));
  75. // D.4.2 File header flags
  76. u8 header_flags = TRY(stream.read_value<u8>());
  77. if (header_flags & 0b11110000)
  78. return Error::from_string_literal("JBIG2LoadingContext: Invalid header flags");
  79. context.organization = (header_flags & 1) ? Organization::Sequential : Organization::RandomAccess;
  80. dbgln_if(JBIG2_DEBUG, "JBIG2LoadingContext: Organization: {} ({})", (int)context.organization, context.organization == Organization::Sequential ? "Sequential" : "Random-access");
  81. bool has_known_number_of_pages = (header_flags & 2) ? false : true;
  82. bool uses_templates_with_12_AT_pixels = (header_flags & 4) ? true : false;
  83. bool contains_colored_region_segments = (header_flags & 8) ? true : false;
  84. // FIXME: Do something with these?
  85. (void)uses_templates_with_12_AT_pixels;
  86. (void)contains_colored_region_segments;
  87. // D.4.3 Number of pages
  88. if (has_known_number_of_pages) {
  89. context.number_of_pages = TRY(stream.read_value<BigEndian<u32>>());
  90. dbgln_if(JBIG2_DEBUG, "JBIG2LoadingContext: Number of pages: {}", context.number_of_pages.value());
  91. }
  92. return {};
  93. }
  94. static ErrorOr<SegmentHeader> decode_segment_header(SeekableStream& stream)
  95. {
  96. // 7.2.2 Segment number
  97. u32 segment_number = TRY(stream.read_value<BigEndian<u32>>());
  98. dbgln_if(JBIG2_DEBUG, "Segment number: {}", segment_number);
  99. // 7.2.3 Segment header flags
  100. u8 flags = TRY(stream.read_value<u8>());
  101. SegmentType type = static_cast<SegmentType>(flags & 0b11'1111);
  102. dbgln_if(JBIG2_DEBUG, "Segment type: {}", (int)type);
  103. bool segment_page_association_size_is_32_bits = (flags & 0b100'0000) != 0;
  104. bool segment_retained_only_by_itself_and_extension_segments = (flags & 0b1000'00000) != 0;
  105. // FIXME: Do something with these.
  106. (void)segment_page_association_size_is_32_bits;
  107. (void)segment_retained_only_by_itself_and_extension_segments;
  108. // 7.2.4 Referred-to segment count and retention flags
  109. u8 referred_to_segment_count_and_retention_flags = TRY(stream.read_value<u8>());
  110. u32 count_of_referred_to_segments = referred_to_segment_count_and_retention_flags >> 5;
  111. if (count_of_referred_to_segments == 5 || count_of_referred_to_segments == 6)
  112. return Error::from_string_literal("JBIG2ImageDecoderPlugin: Invalid count_of_referred_to_segments");
  113. u32 extra_count = 0;
  114. if (count_of_referred_to_segments == 7) {
  115. TRY(stream.seek(-1, SeekMode::FromCurrentPosition));
  116. count_of_referred_to_segments = TRY(stream.read_value<BigEndian<u32>>()) & 0x1FFF'FFFF;
  117. extra_count = ceil_div(count_of_referred_to_segments + 1, 8);
  118. TRY(stream.seek(extra_count, SeekMode::FromCurrentPosition));
  119. }
  120. dbgln_if(JBIG2_DEBUG, "Referred-to segment count: {}", count_of_referred_to_segments);
  121. // 7.2.5 Referred-to segment numbers
  122. Vector<u32> referred_to_segment_numbers;
  123. for (u32 i = 0; i < count_of_referred_to_segments; ++i) {
  124. u32 referred_to_segment_number;
  125. if (segment_number <= 256)
  126. referred_to_segment_number = TRY(stream.read_value<u8>());
  127. else if (segment_number <= 65536)
  128. referred_to_segment_number = TRY(stream.read_value<BigEndian<u16>>());
  129. else
  130. referred_to_segment_number = TRY(stream.read_value<BigEndian<u32>>());
  131. referred_to_segment_numbers.append(referred_to_segment_number);
  132. dbgln_if(JBIG2_DEBUG, "Referred-to segment number: {}", referred_to_segment_number);
  133. }
  134. // 7.2.6 Segment page association
  135. u32 segment_page_association;
  136. if (segment_page_association_size_is_32_bits) {
  137. segment_page_association = TRY(stream.read_value<BigEndian<u32>>());
  138. } else {
  139. segment_page_association = TRY(stream.read_value<u8>());
  140. }
  141. dbgln_if(JBIG2_DEBUG, "Segment page association: {}", segment_page_association);
  142. // 7.2.7 Segment data length
  143. u32 data_length = TRY(stream.read_value<BigEndian<u32>>());
  144. dbgln_if(JBIG2_DEBUG, "Segment data length: {}", data_length);
  145. // FIXME: Add some validity checks:
  146. // - check type is valid
  147. // - check referred_to_segment_numbers are smaller than segment_number
  148. // - 7.3.1 Rules for segment references
  149. // - 7.3.2 Rules for page associations
  150. Optional<u32> opt_data_length;
  151. if (data_length != 0xffff'ffff)
  152. opt_data_length = data_length;
  153. else if (type != ImmediateGenericRegion)
  154. return Error::from_string_literal("JBIG2ImageDecoderPlugin: Unknown data length only allowed for ImmediateGenericRegion");
  155. return SegmentHeader { segment_number, type, move(referred_to_segment_numbers), segment_page_association, opt_data_length };
  156. }
  157. static ErrorOr<size_t> scan_for_immediate_generic_region_size(ReadonlyBytes data)
  158. {
  159. // 7.2.7 Segment data length
  160. // "If the segment's type is "Immediate generic region", then the length field may contain the value 0xFFFFFFFF.
  161. // This value is intended to mean that the length of the segment's data part is unknown at the time that the segment header is written (...).
  162. // In this case, the true length of the segment's data part shall be determined through examination of the data:
  163. // if the segment uses template-based arithmetic coding, then the segment's data part ends with the two-byte sequence 0xFF 0xAC followed by a four-byte row count.
  164. // If the segment uses MMR coding, then the segment's data part ends with the two-byte sequence 0x00 0x00 followed by a four-byte row count.
  165. // The form of encoding used by the segment may be determined by examining the eighteenth byte of its segment data part,
  166. // and the end sequences can occur anywhere after that eighteenth byte."
  167. // 7.4.6.4 Decoding a generic region segment
  168. // "NOTE – The sequence 0x00 0x00 cannot occur within MMR-encoded data; the sequence 0xFF 0xAC can occur only at the end of arithmetically-coded data.
  169. // Thus, those sequences cannot occur by chance in the data that is decoded to generate the contents of the generic region."
  170. dbgln_if(JBIG2_DEBUG, "(Unknown data length, computing it)");
  171. if (data.size() < 18)
  172. return Error::from_string_literal("JBIG2ImageDecoderPlugin: Data too short to contain segment data header");
  173. // Per 7.4.6.1 Generic region segment data header, this starts with the 17 bytes described in
  174. // 7.4.1 Region segment information field, followed the byte described in 7.4.6.2 Generic region segment flags.
  175. // That byte's lowest bit stores if the segment uses MMR.
  176. u8 flags = data[17];
  177. bool uses_mmr = (flags & 1) != 0;
  178. auto end_sequence = uses_mmr ? to_array<u8>({ 0x00, 0x00 }) : to_array<u8>({ 0xFF, 0xAC });
  179. u8 const* end = static_cast<u8 const*>(memmem(data.data() + 19, data.size() - 19 - sizeof(u32), end_sequence.data(), end_sequence.size()));
  180. if (!end)
  181. return Error::from_string_literal("JBIG2ImageDecoderPlugin: Could not find end sequence in segment data");
  182. size_t size = end - data.data() + end_sequence.size() + sizeof(u32);
  183. dbgln_if(JBIG2_DEBUG, "(Computed size is {})", size);
  184. return size;
  185. }
  186. static ErrorOr<void> decode_segment_headers(JBIG2LoadingContext& context, ReadonlyBytes data)
  187. {
  188. FixedMemoryStream stream(data);
  189. Vector<ReadonlyBytes> segment_datas;
  190. auto store_and_skip_segment_data = [&](SegmentHeader const& segment_header) -> ErrorOr<void> {
  191. size_t start_offset = TRY(stream.tell());
  192. u32 data_length = TRY(segment_header.data_length.try_value_or_lazy_evaluated([&]() {
  193. return scan_for_immediate_generic_region_size(data.slice(start_offset));
  194. }));
  195. if (start_offset + data_length > data.size()) {
  196. dbgln_if(JBIG2_DEBUG, "JBIG2ImageDecoderPlugin: start_offset={}, data_length={}, data.size()={}", start_offset, data_length, data.size());
  197. return Error::from_string_literal("JBIG2ImageDecoderPlugin: Segment data length exceeds file size");
  198. }
  199. ReadonlyBytes segment_data = data.slice(start_offset, data_length);
  200. segment_datas.append(segment_data);
  201. TRY(stream.seek(data_length, SeekMode::FromCurrentPosition));
  202. return {};
  203. };
  204. Vector<SegmentHeader> segment_headers;
  205. while (!stream.is_eof()) {
  206. auto segment_header = TRY(decode_segment_header(stream));
  207. segment_headers.append(segment_header);
  208. if (context.organization != Organization::RandomAccess)
  209. TRY(store_and_skip_segment_data(segment_header));
  210. // Required per spec for files with RandomAccess organization.
  211. if (segment_header.type == SegmentType::EndOfFile)
  212. break;
  213. }
  214. if (context.organization == Organization::RandomAccess) {
  215. for (auto const& segment_header : segment_headers)
  216. TRY(store_and_skip_segment_data(segment_header));
  217. }
  218. if (segment_headers.size() != segment_datas.size())
  219. return Error::from_string_literal("JBIG2ImageDecoderPlugin: Segment headers and segment datas have different sizes");
  220. for (size_t i = 0; i < segment_headers.size(); ++i)
  221. context.segments.append({ segment_headers[i], segment_datas[i] });
  222. return {};
  223. }
  224. // 7.4.8 Page information segment syntax
  225. struct [[gnu::packed]] PageInformationSegment {
  226. BigEndian<u32> bitmap_width;
  227. BigEndian<u32> bitmap_height;
  228. BigEndian<u32> page_x_resolution; // In pixels/meter.
  229. BigEndian<u32> page_y_resolution; // In pixels/meter.
  230. u8 flags;
  231. BigEndian<u16> striping_information;
  232. };
  233. static_assert(AssertSize<PageInformationSegment, 19>());
  234. static ErrorOr<PageInformationSegment> decode_page_information_segment(ReadonlyBytes data)
  235. {
  236. // 7.4.8 Page information segment syntax
  237. if (data.size() != sizeof(PageInformationSegment))
  238. return Error::from_string_literal("JBIG2ImageDecoderPlugin: Invalid page information segment size");
  239. return *(PageInformationSegment const*)data.data();
  240. }
  241. static ErrorOr<void> scan_for_page_size(JBIG2LoadingContext& context)
  242. {
  243. // We only decode the first page at the moment.
  244. for (auto const& segment : context.segments) {
  245. if (segment.header.type != SegmentType::PageInformation)
  246. continue;
  247. auto page_information = TRY(decode_page_information_segment(segment.data));
  248. context.size = { page_information.bitmap_width, page_information.bitmap_height };
  249. return {};
  250. }
  251. return Error::from_string_literal("JBIG2ImageDecoderPlugin: No page information segment found");
  252. }
  253. JBIG2ImageDecoderPlugin::JBIG2ImageDecoderPlugin()
  254. {
  255. m_context = make<JBIG2LoadingContext>();
  256. }
  257. IntSize JBIG2ImageDecoderPlugin::size()
  258. {
  259. return m_context->size;
  260. }
  261. bool JBIG2ImageDecoderPlugin::sniff(ReadonlyBytes data)
  262. {
  263. return data.starts_with(id_string);
  264. }
  265. ErrorOr<NonnullOwnPtr<ImageDecoderPlugin>> JBIG2ImageDecoderPlugin::create(ReadonlyBytes data)
  266. {
  267. auto plugin = TRY(adopt_nonnull_own_or_enomem(new (nothrow) JBIG2ImageDecoderPlugin()));
  268. TRY(decode_jbig2_header(*plugin->m_context, data));
  269. data = data.slice(sizeof(id_string) + sizeof(u8) + (plugin->m_context->number_of_pages.has_value() ? sizeof(u32) : 0));
  270. TRY(decode_segment_headers(*plugin->m_context, data));
  271. TRY(scan_for_page_size(*plugin->m_context));
  272. return plugin;
  273. }
  274. ErrorOr<ImageFrameDescriptor> JBIG2ImageDecoderPlugin::frame(size_t index, Optional<IntSize>)
  275. {
  276. // FIXME: Use this for multi-page JBIG2 files?
  277. if (index != 0)
  278. return Error::from_string_literal("JBIG2ImageDecoderPlugin: Invalid frame index");
  279. if (m_context->state == JBIG2LoadingContext::State::Error)
  280. return Error::from_string_literal("JBIG2ImageDecoderPlugin: Decoding failed");
  281. return Error::from_string_literal("JBIG2ImageDecoderPlugin: Draw the rest of the owl");
  282. }
  283. ErrorOr<ByteBuffer> JBIG2ImageDecoderPlugin::decode_embedded(Vector<ReadonlyBytes> data)
  284. {
  285. auto plugin = TRY(adopt_nonnull_own_or_enomem(new (nothrow) JBIG2ImageDecoderPlugin()));
  286. plugin->m_context->organization = Organization::Embedded;
  287. for (auto const& segment_data : data)
  288. TRY(decode_segment_headers(*plugin->m_context, segment_data));
  289. TRY(scan_for_page_size(*plugin->m_context));
  290. return Error::from_string_literal("JBIG2ImageDecoderPlugin: Cannot decode embedded JBIG2 yet");
  291. }
  292. }