Просмотр исходного кода

LibPDF: Make encrypted object streams work

There were two problems:
1. parse_compressed_object_with_index() parses indirect objects
   without going through Parser::parse_indirect_value(), so
   push_reference() / pop_reference() weren't called.
   Manually call them, both for the indirect object containing
   the object stream and for the indirect object within the
   object stream.
2. The indirect object within the object stream got decrypted
   twice: Once when the object stream data itself got decrypted,
   and then incorrectly a second time when the object data within
   the stream was read. To fix, disable encryption while parsing
   object stream data (since it's already decrypted).

The test is from http://opf-labs.org/format-corpus/pdfCabinetOfHorrors/
which according to readme.md at the same location is CC0.
Nico Weber 2 лет назад
Родитель
Сommit
323d76fbb9

+ 1 - 0
Tests/LibPDF/CMakeLists.txt

@@ -8,6 +8,7 @@ endforeach()
 
 
 set(TEST_FILES
 set(TEST_FILES
     complex.pdf
     complex.pdf
+    encryption_nocopy.pdf
     linearized.pdf
     linearized.pdf
     non-linearized.pdf
     non-linearized.pdf
     password-is-sup.pdf
     password-is-sup.pdf

+ 12 - 0
Tests/LibPDF/TestPDF.cpp

@@ -61,3 +61,15 @@ TEST_CASE(encrypted_with_aes)
     EXPECT_EQ(MUST(info_dict.title()).value(), "sup");
     EXPECT_EQ(MUST(info_dict.title()).value(), "sup");
     EXPECT_EQ(MUST(info_dict.creator()).value(), "TextEdit");
     EXPECT_EQ(MUST(info_dict.creator()).value(), "TextEdit");
 }
 }
+
+TEST_CASE(encrypted_object_stream)
+{
+    auto file = MUST(Core::MappedFile::map("encryption_nocopy.pdf"sv));
+    auto document = MUST(PDF::Document::create(file->bytes()));
+    MUST(document->initialize());
+    EXPECT_EQ(document->get_page_count(), 1U);
+
+    auto info_dict = MUST(document->info_dict()).value();
+    EXPECT_EQ(MUST(info_dict.author()).value(), "van der Knijff");
+    EXPECT_EQ(MUST(info_dict.creator()).value(), "Acrobat PDFMaker 9.1 voor Word");
+}

BIN
Tests/LibPDF/encryption_nocopy.pdf


+ 12 - 1
Userland/Libraries/LibPDF/DocumentParser.cpp

@@ -568,7 +568,9 @@ PDFErrorOr<Value> DocumentParser::parse_compressed_object_with_index(u32 index)
     if (m_reader.matches_eol())
     if (m_reader.matches_eol())
         m_reader.consume_eol();
         m_reader.consume_eol();
 
 
+    push_reference({ static_cast<u32>(first_number.get<int>()), static_cast<u32>(second_number.get<int>()) });
     auto dict = TRY(parse_dict());
     auto dict = TRY(parse_dict());
+
     auto type = TRY(dict->get_name(m_document, CommonNames::Type))->name();
     auto type = TRY(dict->get_name(m_document, CommonNames::Type))->name();
     if (type != "ObjStm")
     if (type != "ObjStm")
         return error("Invalid object stream type");
         return error("Invalid object stream type");
@@ -577,8 +579,13 @@ PDFErrorOr<Value> DocumentParser::parse_compressed_object_with_index(u32 index)
     auto first_object_offset = dict->get_value("First").get_u32();
     auto first_object_offset = dict->get_value("First").get_u32();
 
 
     auto stream = TRY(parse_stream(dict));
     auto stream = TRY(parse_stream(dict));
+    pop_reference();
+
     Parser stream_parser(m_document, stream->bytes());
     Parser stream_parser(m_document, stream->bytes());
 
 
+    // The data was already decrypted when reading the outer compressed ObjStm.
+    stream_parser.set_encryption_enabled(false);
+
     for (u32 i = 0; i < object_count; ++i) {
     for (u32 i = 0; i < object_count; ++i) {
         auto object_number = TRY(stream_parser.parse_number());
         auto object_number = TRY(stream_parser.parse_number());
         auto object_offset = TRY(stream_parser.parse_number());
         auto object_offset = TRY(stream_parser.parse_number());
@@ -589,7 +596,11 @@ PDFErrorOr<Value> DocumentParser::parse_compressed_object_with_index(u32 index)
         }
         }
     }
     }
 
 
-    return TRY(stream_parser.parse_value());
+    stream_parser.push_reference({ index, 0 });
+    auto value = TRY(stream_parser.parse_value());
+    stream_parser.pop_reference();
+
+    return value;
 }
 }
 
 
 PDFErrorOr<DocumentParser::PageOffsetHintTable> DocumentParser::parse_page_offset_hint_table(ReadonlyBytes hint_stream_bytes)
 PDFErrorOr<DocumentParser::PageOffsetHintTable> DocumentParser::parse_page_offset_hint_table(ReadonlyBytes hint_stream_bytes)

+ 6 - 1
Userland/Libraries/LibPDF/Parser.h

@@ -62,10 +62,15 @@ public:
         m_enable_filters = enabled;
         m_enable_filters = enabled;
     }
     }
 
 
-protected:
+    void set_encryption_enabled(bool enabled)
+    {
+        m_enable_encryption = enabled;
+    }
+
     void push_reference(Reference const& ref) { m_current_reference_stack.append(ref); }
     void push_reference(Reference const& ref) { m_current_reference_stack.append(ref); }
     void pop_reference() { m_current_reference_stack.take_last(); }
     void pop_reference() { m_current_reference_stack.take_last(); }
 
 
+protected:
     Error error(
     Error error(
         DeprecatedString const& message
         DeprecatedString const& message
 #ifdef PDF_DEBUG
 #ifdef PDF_DEBUG