diff --git a/Tests/LibPDF/CMakeLists.txt b/Tests/LibPDF/CMakeLists.txt index fe57676933c..3f4970bdb29 100644 --- a/Tests/LibPDF/CMakeLists.txt +++ b/Tests/LibPDF/CMakeLists.txt @@ -12,6 +12,7 @@ set(TEST_FILES complex.pdf encoding.pdf encryption_nocopy.pdf + jbig2-globals.pdf linearized.pdf non-linearized.pdf oss-fuzz-testcase-62065.pdf diff --git a/Tests/LibPDF/TestPDF.cpp b/Tests/LibPDF/TestPDF.cpp index 0e019ed255e..8f3d7289b6e 100644 --- a/Tests/LibPDF/TestPDF.cpp +++ b/Tests/LibPDF/TestPDF.cpp @@ -121,6 +121,18 @@ TEST_CASE(encrypted_object_stream) EXPECT_EQ(MUST(info_dict.creator()).value(), "Acrobat PDFMaker 9.1 voor Word"); } +TEST_CASE(resolve_indirect_reference_during_parsing) +{ + auto file = MUST(Core::MappedFile::map("jbig2-globals.pdf"sv)); + auto document = MUST(PDF::Document::create(file->bytes())); + MUST(document->initialize()); + EXPECT_EQ(document->get_page_count(), 1U); + + auto jbig2_stream_value = MUST(document->get_or_load_value(5)); + auto jbig2_stream = MUST(document->resolve_to(jbig2_stream_value)); + EXPECT_EQ(jbig2_stream->bytes().size(), 20'000U); +} + TEST_CASE(malformed_pdf_document) { Array test_inputs = { diff --git a/Userland/Libraries/LibPDF/DocumentParser.cpp b/Userland/Libraries/LibPDF/DocumentParser.cpp index 8d7e6dc6a2b..d9924ae8ff0 100644 --- a/Userland/Libraries/LibPDF/DocumentParser.cpp +++ b/Userland/Libraries/LibPDF/DocumentParser.cpp @@ -70,11 +70,31 @@ PDFErrorOr DocumentParser::parse_object_with_index(u32 index) if (!m_xref_table->is_object_in_use(index)) return nullptr; + // If this is called to resolve an indirect object reference while parsing another object, + // make sure to restore the current position after parsing the indirect object, so that the + // parser can keep parsing the original object stream afterwards. + // parse_compressed_object_with_index() also moves the reader's position, so this needs + // to be before the potential call to parse_compressed_object_with_index(). + class SavePoint { + public: + SavePoint(Reader& reader) + : m_reader(reader) + { + m_reader.save(); + } + ~SavePoint() { m_reader.load(); } + + private: + Reader& m_reader; + }; + SavePoint restore_current_position { m_reader }; + if (m_xref_table->is_object_compressed(index)) // The object can be found in a object stream return parse_compressed_object_with_index(index); auto byte_offset = m_xref_table->byte_offset_for_object(index); + m_reader.move_to(byte_offset); auto indirect_value = TRY(parse_indirect_value()); VERIFY(indirect_value->index() == index); diff --git a/Userland/Libraries/LibPDF/Parser.cpp b/Userland/Libraries/LibPDF/Parser.cpp index c7842e88f0f..2fa72ca87ea 100644 --- a/Userland/Libraries/LibPDF/Parser.cpp +++ b/Userland/Libraries/LibPDF/Parser.cpp @@ -483,9 +483,7 @@ PDFErrorOr> Parser::parse_stream(NonnullRefPtrget(CommonNames::Length); if (maybe_length.has_value() && m_document->can_resolve_references()) { // The PDF writer has kindly provided us with the direct length of the stream - m_reader.save(); auto length = TRY(m_document->resolve_to(maybe_length.value())); - m_reader.load(); bytes = m_reader.bytes().slice(m_reader.offset(), length); m_reader.move_by(length); m_reader.consume_whitespace();