From 1b8f73b6b3afd8d78172fcd30b95db67f259f58a Mon Sep 17 00:00:00 2001 From: Wesley Moret Date: Wed, 14 Jul 2021 21:48:09 -0400 Subject: [PATCH] LibPDF: Fix treating not finding the linearized dict as a fatal error We now try to parse the first indirect value and see if it's the `Linearization Parameter Dictionary`. if it's not, we fallback to reading the xref table from the end of the document --- Userland/Libraries/LibPDF/Parser.cpp | 31 +++++++++++++++++----------- Userland/Libraries/LibPDF/Parser.h | 8 ++++++- 2 files changed, 26 insertions(+), 13 deletions(-) diff --git a/Userland/Libraries/LibPDF/Parser.cpp b/Userland/Libraries/LibPDF/Parser.cpp index 51f59c99b61..263ea0a2700 100644 --- a/Userland/Libraries/LibPDF/Parser.cpp +++ b/Userland/Libraries/LibPDF/Parser.cpp @@ -45,14 +45,18 @@ bool Parser::initialize() if (!parse_header()) return {}; - if (!initialize_linearization_dict()) + const auto result = initialize_linearization_dict(); + if (result == LinearizationResult::Error) return {}; + if (result == LinearizationResult::NotLinearized) + return initialize_non_linearized_xref_table(); + bool is_linearized = m_linearization_dictionary.has_value(); if (is_linearized) { // The file may have been linearized at one point, but could have been updated afterwards, // which means it is no longer a linearized PDF file. - is_linearized = is_linearized && m_linearization_dictionary.value().length_of_file == m_reader.bytes().size(); + is_linearized = m_linearization_dictionary.value().length_of_file == m_reader.bytes().size(); if (!is_linearized) { // FIXME: The file shouldn't be treated as linearized, yet the xref tables are still @@ -113,21 +117,24 @@ bool Parser::parse_header() return true; } -bool Parser::initialize_linearization_dict() +Parser::LinearizationResult Parser::initialize_linearization_dict() { // parse_header() is called immediately before this, so we are at the right location auto dict_value = m_document->resolve(parse_indirect_value()); if (!dict_value || !dict_value.is_object()) - return false; + return LinearizationResult::Error; auto dict_object = dict_value.as_object(); if (!dict_object->is_dict()) - return false; + return LinearizationResult::NotLinearized; auto dict = object_cast(dict_object); + if (!dict->contains(CommonNames::Linearized)) + return LinearizationResult::NotLinearized; + if (!dict->contains(CommonNames::L, CommonNames::H, CommonNames::O, CommonNames::E, CommonNames::N, CommonNames::T)) - return true; + return LinearizationResult::Error; auto length_of_file = dict->get_value(CommonNames::L); auto hint_table = dict->get_value(CommonNames::H); @@ -144,17 +151,17 @@ bool Parser::initialize_linearization_dict() || !number_of_pages.is_int_type() || !offset_of_main_xref_table.is_int_type() || (first_page && !first_page.is_int_type())) { - return true; + return LinearizationResult::Error; } auto hint_table_object = hint_table.as_object(); if (!hint_table_object->is_array()) - return true; + return LinearizationResult::Error; auto hint_table_array = object_cast(hint_table_object); auto hint_table_size = hint_table_array->size(); if (hint_table_size != 2 && hint_table_size != 4) - return true; + return LinearizationResult::Error; auto primary_hint_stream_offset = hint_table_array->at(0); auto primary_hint_stream_length = hint_table_array->at(1); @@ -170,7 +177,7 @@ bool Parser::initialize_linearization_dict() || !primary_hint_stream_length.is_int_type() || (overflow_hint_stream_offset && !overflow_hint_stream_offset.is_int_type()) || (overflow_hint_stream_length && !overflow_hint_stream_length.is_int_type())) { - return true; + return LinearizationResult::Error; } m_linearization_dictionary = LinearizationDictionary { @@ -186,7 +193,7 @@ bool Parser::initialize_linearization_dict() first_page ? first_page.as_int_type() : NumericLimits::max(), }; - return true; + return LinearizationResult::Linearized; } bool Parser::initialize_linearized_xref_table() @@ -1023,7 +1030,7 @@ RefPtr Parser::parse_stream(NonnullRefPtr dict) ReadonlyBytes bytes; auto maybe_length = dict->get(CommonNames::Length); - if (maybe_length.has_value()) { + if (maybe_length.has_value() && (!maybe_length->is_ref() || m_xref_table)) { // The PDF writer has kindly provided us with the direct length of the stream m_reader.save(); auto length = m_document->resolve_to(maybe_length.value()); diff --git a/Userland/Libraries/LibPDF/Parser.h b/Userland/Libraries/LibPDF/Parser.h index 6dbe3b2f5f8..2dba1a0515b 100644 --- a/Userland/Libraries/LibPDF/Parser.h +++ b/Userland/Libraries/LibPDF/Parser.h @@ -18,6 +18,12 @@ class Document; class Parser final : public RefCounted { public: + enum class LinearizationResult { + Error, + NotLinearized, + Linearized, + }; + static Vector parse_graphics_commands(ReadonlyBytes const&); Parser(Badge, ReadonlyBytes const&); @@ -83,7 +89,7 @@ private: explicit Parser(ReadonlyBytes const&); bool parse_header(); - bool initialize_linearization_dict(); + LinearizationResult initialize_linearization_dict(); bool initialize_linearized_xref_table(); bool initialize_non_linearized_xref_table(); bool initialize_hint_tables();