JBIG2Loader.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358
  1. /*
  2. * Copyright (c) 2024, Nico Weber <thakis@chromium.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/Debug.h>
  7. #include <LibGfx/ImageFormats/JBIG2Loader.h>
  8. // Spec: ITU-T_T_88__08_2018.pdf in the zip file here:
  9. // https://www.itu.int/rec/T-REC-T.88-201808-I
  10. // Annex H has a datastream example.
  11. namespace Gfx {
  12. // JBIG2 spec, Annex D, D.4.1 ID string
  13. static constexpr u8 id_string[] = { 0x97, 0x4A, 0x42, 0x32, 0x0D, 0x0A, 0x1A, 0x0A };
  14. // 7.3 Segment types
  15. enum SegmentType {
  16. SymbolDictionary = 0,
  17. IntermediateTextRegion = 4,
  18. ImmediateTextRegion = 6,
  19. ImmediateLosslessTextRegion = 7,
  20. PatternDictionary = 16,
  21. IntermediateHalftoneRegion = 20,
  22. ImmediateHalftoneRegion = 22,
  23. ImmediateLosslessHalftoneRegion = 23,
  24. IntermediateGenericRegion = 36,
  25. ImmediateGenericRegion = 38,
  26. ImmediateLosslessGenericRegion = 39,
  27. IntermediateGenericRefinementRegion = 40,
  28. ImmediateGenericRefinementRegion = 42,
  29. ImmediateLosslessGenericRefinementRegion = 43,
  30. PageInformation = 48,
  31. EndOfPage = 49,
  32. EndOfStripe = 50,
  33. EndOfFile = 51,
  34. Profiles = 52,
  35. Tables = 53,
  36. ColorPalette = 54,
  37. Extension = 62,
  38. };
  39. // Annex D
  40. enum class Organization {
  41. // D.1 Sequential organization
  42. Sequential,
  43. // D.2 Random-access organization
  44. RandomAccess,
  45. // D.3 Embedded organization
  46. Embedded,
  47. };
  48. struct SegmentHeader {
  49. u32 segment_number;
  50. SegmentType type;
  51. Vector<u32> referred_to_segment_numbers;
  52. // 7.2.6 Segment page association
  53. // "The first page must be numbered "1". This field may contain a value of zero; this value indicates that this segment is not associated with any page."
  54. u32 page_association;
  55. Optional<u32> data_length;
  56. };
  57. struct SegmentData {
  58. SegmentHeader header;
  59. ReadonlyBytes data;
  60. };
  61. struct JBIG2LoadingContext {
  62. enum class State {
  63. NotDecoded = 0,
  64. Error,
  65. };
  66. State state { State::NotDecoded };
  67. Organization organization { Organization::Sequential };
  68. IntSize size;
  69. Optional<u32> number_of_pages;
  70. Vector<SegmentData> segments;
  71. };
  72. static ErrorOr<void> decode_jbig2_header(JBIG2LoadingContext& context, ReadonlyBytes data)
  73. {
  74. if (!JBIG2ImageDecoderPlugin::sniff(data))
  75. return Error::from_string_literal("JBIG2LoadingContext: Invalid JBIG2 header");
  76. FixedMemoryStream stream(data.slice(sizeof(id_string)));
  77. // D.4.2 File header flags
  78. u8 header_flags = TRY(stream.read_value<u8>());
  79. if (header_flags & 0b11110000)
  80. return Error::from_string_literal("JBIG2LoadingContext: Invalid header flags");
  81. context.organization = (header_flags & 1) ? Organization::Sequential : Organization::RandomAccess;
  82. dbgln_if(JBIG2_DEBUG, "JBIG2LoadingContext: Organization: {} ({})", (int)context.organization, context.organization == Organization::Sequential ? "Sequential" : "Random-access");
  83. bool has_known_number_of_pages = (header_flags & 2) ? false : true;
  84. bool uses_templates_with_12_AT_pixels = (header_flags & 4) ? true : false;
  85. bool contains_colored_region_segments = (header_flags & 8) ? true : false;
  86. // FIXME: Do something with these?
  87. (void)uses_templates_with_12_AT_pixels;
  88. (void)contains_colored_region_segments;
  89. // D.4.3 Number of pages
  90. if (has_known_number_of_pages) {
  91. context.number_of_pages = TRY(stream.read_value<BigEndian<u32>>());
  92. dbgln_if(JBIG2_DEBUG, "JBIG2LoadingContext: Number of pages: {}", context.number_of_pages.value());
  93. }
  94. return {};
  95. }
  96. static ErrorOr<SegmentHeader> decode_segment_header(SeekableStream& stream)
  97. {
  98. // 7.2.2 Segment number
  99. u32 segment_number = TRY(stream.read_value<BigEndian<u32>>());
  100. dbgln_if(JBIG2_DEBUG, "Segment number: {}", segment_number);
  101. // 7.2.3 Segment header flags
  102. u8 flags = TRY(stream.read_value<u8>());
  103. SegmentType type = static_cast<SegmentType>(flags & 0b11'1111);
  104. dbgln_if(JBIG2_DEBUG, "Segment type: {}", (int)type);
  105. bool segment_page_association_size_is_32_bits = (flags & 0b100'0000) != 0;
  106. bool segment_retained_only_by_itself_and_extension_segments = (flags & 0b1000'00000) != 0;
  107. // FIXME: Do something with these.
  108. (void)segment_page_association_size_is_32_bits;
  109. (void)segment_retained_only_by_itself_and_extension_segments;
  110. // 7.2.4 Referred-to segment count and retention flags
  111. u8 referred_to_segment_count_and_retention_flags = TRY(stream.read_value<u8>());
  112. u32 count_of_referred_to_segments = referred_to_segment_count_and_retention_flags >> 5;
  113. if (count_of_referred_to_segments == 5 || count_of_referred_to_segments == 6)
  114. return Error::from_string_literal("JBIG2ImageDecoderPlugin: Invalid count_of_referred_to_segments");
  115. u32 extra_count = 0;
  116. if (count_of_referred_to_segments == 7) {
  117. TRY(stream.seek(-1, SeekMode::FromCurrentPosition));
  118. count_of_referred_to_segments = TRY(stream.read_value<BigEndian<u32>>()) & 0x1FFF'FFFF;
  119. extra_count = ceil_div(count_of_referred_to_segments + 1, 8);
  120. TRY(stream.seek(extra_count, SeekMode::FromCurrentPosition));
  121. }
  122. dbgln_if(JBIG2_DEBUG, "Referred-to segment count: {}", count_of_referred_to_segments);
  123. // 7.2.5 Referred-to segment numbers
  124. Vector<u32> referred_to_segment_numbers;
  125. for (u32 i = 0; i < count_of_referred_to_segments; ++i) {
  126. u32 referred_to_segment_number;
  127. if (segment_number <= 256)
  128. referred_to_segment_number = TRY(stream.read_value<u8>());
  129. else if (segment_number <= 65536)
  130. referred_to_segment_number = TRY(stream.read_value<BigEndian<u16>>());
  131. else
  132. referred_to_segment_number = TRY(stream.read_value<BigEndian<u32>>());
  133. referred_to_segment_numbers.append(referred_to_segment_number);
  134. dbgln_if(JBIG2_DEBUG, "Referred-to segment number: {}", referred_to_segment_number);
  135. }
  136. // 7.2.6 Segment page association
  137. u32 segment_page_association;
  138. if (segment_page_association_size_is_32_bits) {
  139. segment_page_association = TRY(stream.read_value<BigEndian<u32>>());
  140. } else {
  141. segment_page_association = TRY(stream.read_value<u8>());
  142. }
  143. dbgln_if(JBIG2_DEBUG, "Segment page association: {}", segment_page_association);
  144. // 7.2.7 Segment data length
  145. u32 data_length = TRY(stream.read_value<BigEndian<u32>>());
  146. dbgln_if(JBIG2_DEBUG, "Segment data length: {}", data_length);
  147. // FIXME: Add some validity checks:
  148. // - check type is valid
  149. // - check referred_to_segment_numbers are smaller than segment_number
  150. // - 7.3.1 Rules for segment references
  151. // - 7.3.2 Rules for page associations
  152. Optional<u32> opt_data_length;
  153. if (data_length != 0xffff'ffff)
  154. opt_data_length = data_length;
  155. else if (type != ImmediateGenericRegion)
  156. return Error::from_string_literal("JBIG2ImageDecoderPlugin: Unknown data length only allowed for ImmediateGenericRegion");
  157. return SegmentHeader { segment_number, type, move(referred_to_segment_numbers), segment_page_association, opt_data_length };
  158. }
  159. static ErrorOr<size_t> scan_for_immediate_generic_region_size(ReadonlyBytes data)
  160. {
  161. // 7.2.7 Segment data length
  162. // "If the segment's type is "Immediate generic region", then the length field may contain the value 0xFFFFFFFF.
  163. // This value is intended to mean that the length of the segment's data part is unknown at the time that the segment header is written (...).
  164. // In this case, the true length of the segment's data part shall be determined through examination of the data:
  165. // if the segment uses template-based arithmetic coding, then the segment's data part ends with the two-byte sequence 0xFF 0xAC followed by a four-byte row count.
  166. // If the segment uses MMR coding, then the segment's data part ends with the two-byte sequence 0x00 0x00 followed by a four-byte row count.
  167. // The form of encoding used by the segment may be determined by examining the eighteenth byte of its segment data part,
  168. // and the end sequences can occur anywhere after that eighteenth byte."
  169. // 7.4.6.4 Decoding a generic region segment
  170. // "NOTE – The sequence 0x00 0x00 cannot occur within MMR-encoded data; the sequence 0xFF 0xAC can occur only at the end of arithmetically-coded data.
  171. // Thus, those sequences cannot occur by chance in the data that is decoded to generate the contents of the generic region."
  172. dbgln_if(JBIG2_DEBUG, "(Unknown data length, computing it)");
  173. if (data.size() < 18)
  174. return Error::from_string_literal("JBIG2ImageDecoderPlugin: Data too short to contain segment data header");
  175. // Per 7.4.6.1 Generic region segment data header, this starts with the 17 bytes described in
  176. // 7.4.1 Region segment information field, followed the byte described in 7.4.6.2 Generic region segment flags.
  177. // That byte's lowest bit stores if the segment uses MMR.
  178. u8 flags = data[17];
  179. bool uses_mmr = (flags & 1) != 0;
  180. auto end_sequence = uses_mmr ? to_array<u8>({ 0x00, 0x00 }) : to_array<u8>({ 0xFF, 0xAC });
  181. u8 const* end = static_cast<u8 const*>(memmem(data.data() + 19, data.size() - 19 - sizeof(u32), end_sequence.data(), end_sequence.size()));
  182. if (!end)
  183. return Error::from_string_literal("JBIG2ImageDecoderPlugin: Could not find end sequence in segment data");
  184. size_t size = end - data.data() + end_sequence.size() + sizeof(u32);
  185. dbgln_if(JBIG2_DEBUG, "(Computed size is {})", size);
  186. return size;
  187. }
  188. static ErrorOr<void> decode_segment_headers(JBIG2LoadingContext& context, ReadonlyBytes data)
  189. {
  190. FixedMemoryStream stream(data);
  191. Vector<ReadonlyBytes> segment_datas;
  192. auto store_and_skip_segment_data = [&](SegmentHeader const& segment_header) -> ErrorOr<void> {
  193. size_t start_offset = TRY(stream.tell());
  194. u32 data_length = TRY(segment_header.data_length.try_value_or_lazy_evaluated([&]() {
  195. return scan_for_immediate_generic_region_size(data.slice(start_offset));
  196. }));
  197. if (start_offset + data_length > data.size()) {
  198. dbgln_if(JBIG2_DEBUG, "JBIG2ImageDecoderPlugin: start_offset={}, data_length={}, data.size()={}", start_offset, data_length, data.size());
  199. return Error::from_string_literal("JBIG2ImageDecoderPlugin: Segment data length exceeds file size");
  200. }
  201. ReadonlyBytes segment_data = data.slice(start_offset, data_length);
  202. segment_datas.append(segment_data);
  203. TRY(stream.seek(data_length, SeekMode::FromCurrentPosition));
  204. return {};
  205. };
  206. Vector<SegmentHeader> segment_headers;
  207. while (!stream.is_eof()) {
  208. auto segment_header = TRY(decode_segment_header(stream));
  209. segment_headers.append(segment_header);
  210. if (context.organization != Organization::RandomAccess)
  211. TRY(store_and_skip_segment_data(segment_header));
  212. // Required per spec for files with RandomAccess organization.
  213. if (segment_header.type == SegmentType::EndOfFile)
  214. break;
  215. }
  216. if (context.organization == Organization::RandomAccess) {
  217. for (auto const& segment_header : segment_headers)
  218. TRY(store_and_skip_segment_data(segment_header));
  219. }
  220. if (segment_headers.size() != segment_datas.size())
  221. return Error::from_string_literal("JBIG2ImageDecoderPlugin: Segment headers and segment datas have different sizes");
  222. for (size_t i = 0; i < segment_headers.size(); ++i)
  223. context.segments.append({ segment_headers[i], segment_datas[i] });
  224. return {};
  225. }
  226. // 7.4.8 Page information segment syntax
  227. struct [[gnu::packed]] PageInformationSegment {
  228. BigEndian<u32> bitmap_width;
  229. BigEndian<u32> bitmap_height;
  230. BigEndian<u32> page_x_resolution; // In pixels/meter.
  231. BigEndian<u32> page_y_resolution; // In pixels/meter.
  232. u8 flags;
  233. BigEndian<u16> striping_information;
  234. };
  235. static_assert(AssertSize<PageInformationSegment, 19>());
  236. static ErrorOr<PageInformationSegment> decode_page_information_segment(ReadonlyBytes data)
  237. {
  238. // 7.4.8 Page information segment syntax
  239. if (data.size() != sizeof(PageInformationSegment))
  240. return Error::from_string_literal("JBIG2ImageDecoderPlugin: Invalid page information segment size");
  241. return *(PageInformationSegment const*)data.data();
  242. }
  243. static ErrorOr<void> scan_for_page_size(JBIG2LoadingContext& context)
  244. {
  245. // We only decode the first page at the moment.
  246. for (auto const& segment : context.segments) {
  247. if (segment.header.type != SegmentType::PageInformation || segment.header.page_association != 1)
  248. continue;
  249. auto page_information = TRY(decode_page_information_segment(segment.data));
  250. context.size = { page_information.bitmap_width, page_information.bitmap_height };
  251. return {};
  252. }
  253. return Error::from_string_literal("JBIG2ImageDecoderPlugin: No page information segment found for page 1");
  254. }
  255. JBIG2ImageDecoderPlugin::JBIG2ImageDecoderPlugin()
  256. {
  257. m_context = make<JBIG2LoadingContext>();
  258. }
  259. IntSize JBIG2ImageDecoderPlugin::size()
  260. {
  261. return m_context->size;
  262. }
  263. bool JBIG2ImageDecoderPlugin::sniff(ReadonlyBytes data)
  264. {
  265. return data.starts_with(id_string);
  266. }
  267. ErrorOr<NonnullOwnPtr<ImageDecoderPlugin>> JBIG2ImageDecoderPlugin::create(ReadonlyBytes data)
  268. {
  269. auto plugin = TRY(adopt_nonnull_own_or_enomem(new (nothrow) JBIG2ImageDecoderPlugin()));
  270. TRY(decode_jbig2_header(*plugin->m_context, data));
  271. data = data.slice(sizeof(id_string) + sizeof(u8) + (plugin->m_context->number_of_pages.has_value() ? sizeof(u32) : 0));
  272. TRY(decode_segment_headers(*plugin->m_context, data));
  273. TRY(scan_for_page_size(*plugin->m_context));
  274. return plugin;
  275. }
  276. ErrorOr<ImageFrameDescriptor> JBIG2ImageDecoderPlugin::frame(size_t index, Optional<IntSize>)
  277. {
  278. // FIXME: Use this for multi-page JBIG2 files?
  279. if (index != 0)
  280. return Error::from_string_literal("JBIG2ImageDecoderPlugin: Invalid frame index");
  281. if (m_context->state == JBIG2LoadingContext::State::Error)
  282. return Error::from_string_literal("JBIG2ImageDecoderPlugin: Decoding failed");
  283. return Error::from_string_literal("JBIG2ImageDecoderPlugin: Draw the rest of the owl");
  284. }
  285. ErrorOr<ByteBuffer> JBIG2ImageDecoderPlugin::decode_embedded(Vector<ReadonlyBytes> data)
  286. {
  287. auto plugin = TRY(adopt_nonnull_own_or_enomem(new (nothrow) JBIG2ImageDecoderPlugin()));
  288. plugin->m_context->organization = Organization::Embedded;
  289. for (auto const& segment_data : data)
  290. TRY(decode_segment_headers(*plugin->m_context, segment_data));
  291. TRY(scan_for_page_size(*plugin->m_context));
  292. return Error::from_string_literal("JBIG2ImageDecoderPlugin: Cannot decode embedded JBIG2 yet");
  293. }
  294. }