LibPDF: Make encrypted object streams work

There were two problems:
1. parse_compressed_object_with_index() parses indirect objects
   without going through Parser::parse_indirect_value(), so
   push_reference() / pop_reference() weren't called.
   Manually call them, both for the indirect object containing
   the object stream and for the indirect object within the
   object stream.
2. The indirect object within the object stream got decrypted
   twice: Once when the object stream data itself got decrypted,
   and then incorrectly a second time when the object data within
   the stream was read. To fix, disable encryption while parsing
   object stream data (since it's already decrypted).

The test is from http://opf-labs.org/format-corpus/pdfCabinetOfHorrors/
which according to readme.md at the same location is CC0.
This commit is contained in:
Nico Weber 2023-07-11 12:02:16 -04:00 committed by Linus Groh
parent 6200097bcc
commit 323d76fbb9
Notes: sideshowbarker 2024-07-17 07:19:27 +09:00
5 changed files with 31 additions and 2 deletions

View file

@ -8,6 +8,7 @@ endforeach()
set(TEST_FILES
complex.pdf
encryption_nocopy.pdf
linearized.pdf
non-linearized.pdf
password-is-sup.pdf

View file

@ -61,3 +61,15 @@ TEST_CASE(encrypted_with_aes)
EXPECT_EQ(MUST(info_dict.title()).value(), "sup");
EXPECT_EQ(MUST(info_dict.creator()).value(), "TextEdit");
}
TEST_CASE(encrypted_object_stream)
{
auto file = MUST(Core::MappedFile::map("encryption_nocopy.pdf"sv));
auto document = MUST(PDF::Document::create(file->bytes()));
MUST(document->initialize());
EXPECT_EQ(document->get_page_count(), 1U);
auto info_dict = MUST(document->info_dict()).value();
EXPECT_EQ(MUST(info_dict.author()).value(), "van der Knijff");
EXPECT_EQ(MUST(info_dict.creator()).value(), "Acrobat PDFMaker 9.1 voor Word");
}

Binary file not shown.

View file

@ -568,7 +568,9 @@ PDFErrorOr<Value> DocumentParser::parse_compressed_object_with_index(u32 index)
if (m_reader.matches_eol())
m_reader.consume_eol();
push_reference({ static_cast<u32>(first_number.get<int>()), static_cast<u32>(second_number.get<int>()) });
auto dict = TRY(parse_dict());
auto type = TRY(dict->get_name(m_document, CommonNames::Type))->name();
if (type != "ObjStm")
return error("Invalid object stream type");
@ -577,8 +579,13 @@ PDFErrorOr<Value> DocumentParser::parse_compressed_object_with_index(u32 index)
auto first_object_offset = dict->get_value("First").get_u32();
auto stream = TRY(parse_stream(dict));
pop_reference();
Parser stream_parser(m_document, stream->bytes());
// The data was already decrypted when reading the outer compressed ObjStm.
stream_parser.set_encryption_enabled(false);
for (u32 i = 0; i < object_count; ++i) {
auto object_number = TRY(stream_parser.parse_number());
auto object_offset = TRY(stream_parser.parse_number());
@ -589,7 +596,11 @@ PDFErrorOr<Value> DocumentParser::parse_compressed_object_with_index(u32 index)
}
}
return TRY(stream_parser.parse_value());
stream_parser.push_reference({ index, 0 });
auto value = TRY(stream_parser.parse_value());
stream_parser.pop_reference();
return value;
}
PDFErrorOr<DocumentParser::PageOffsetHintTable> DocumentParser::parse_page_offset_hint_table(ReadonlyBytes hint_stream_bytes)

View file

@ -62,10 +62,15 @@ public:
m_enable_filters = enabled;
}
protected:
void set_encryption_enabled(bool enabled)
{
m_enable_encryption = enabled;
}
void push_reference(Reference const& ref) { m_current_reference_stack.append(ref); }
void pop_reference() { m_current_reference_stack.take_last(); }
protected:
Error error(
DeprecatedString const& message
#ifdef PDF_DEBUG