From 77f5f7a6f452d4941fef123f216ecf41fbb33a04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Julian=20Offenh=C3=A4user?= Date: Sat, 20 Aug 2022 15:17:15 +0200 Subject: [PATCH] LibPDF: Support parsing page tree nodes that are in object streams conditionally_parse_page_tree_node used to assume that the xref table contained a byte offset, even for compressed objects. It now uses the common facilities for parsing objects, at the expense of some performance. --- Userland/Libraries/LibPDF/DocumentParser.cpp | 59 ++++++------------- Userland/Libraries/LibPDF/ObjectDerivatives.h | 3 + 2 files changed, 20 insertions(+), 42 deletions(-) diff --git a/Userland/Libraries/LibPDF/DocumentParser.cpp b/Userland/Libraries/LibPDF/DocumentParser.cpp index 95e8c3b3a9b..71a645ddec1 100644 --- a/Userland/Libraries/LibPDF/DocumentParser.cpp +++ b/Userland/Libraries/LibPDF/DocumentParser.cpp @@ -644,51 +644,26 @@ bool DocumentParser::navigate_to_after_startxref() PDFErrorOr> DocumentParser::conditionally_parse_page_tree_node(u32 object_index) { - VERIFY(m_xref_table->has_object(object_index)); - auto byte_offset = m_xref_table->byte_offset_for_object(object_index); + auto dict_value = TRY(parse_object_with_index(object_index)); + auto dict_object = dict_value.get>(); + if (!dict_object->is()) + return error(String::formatted("Invalid page tree with xref index {}", object_index)); - m_reader.move_to(byte_offset); - TRY(parse_number()); - TRY(parse_number()); - if (!m_reader.matches("obj")) - return error(String::formatted("Invalid page tree offset {}", object_index)); + auto dict = dict_object->cast(); + if (!dict->contains_any_of(CommonNames::Type, CommonNames::Parent, CommonNames::Kids, CommonNames::Count)) + // This is a page, not a page tree node + return RefPtr {}; - m_reader.move_by(3); - m_reader.consume_whitespace(); + if (!dict->contains(CommonNames::Type)) + return RefPtr {}; + auto type_object = TRY(dict->get_object(m_document, CommonNames::Type)); + if (!type_object->is()) + return RefPtr {}; + auto type_name = type_object->cast(); + if (type_name->name() != CommonNames::Pages) + return RefPtr {}; - VERIFY(m_reader.consume('<') && m_reader.consume('<')); - - m_reader.consume_whitespace(); - HashMap map; - - while (true) { - if (m_reader.matches(">>")) - break; - auto name = TRY(parse_name()); - auto name_string = name->name(); - if (!name_string.is_one_of(CommonNames::Type, CommonNames::Parent, CommonNames::Kids, CommonNames::Count)) { - // This is a page, not a page tree node - return RefPtr {}; - } - - auto value = TRY(parse_value()); - if (name_string == CommonNames::Type) { - if (!value.has>()) - return RefPtr {}; - auto type_object = value.get>(); - if (!type_object->is()) - return RefPtr {}; - auto type_name = type_object->cast(); - if (type_name->name() != CommonNames::Pages) - return RefPtr {}; - } - map.set(name->name(), value); - } - - VERIFY(m_reader.consume('>') && m_reader.consume('>')); - m_reader.consume_whitespace(); - - return make_object(map); + return dict; } } diff --git a/Userland/Libraries/LibPDF/ObjectDerivatives.h b/Userland/Libraries/LibPDF/ObjectDerivatives.h index c719ef2f273..cdb8dd7ef76 100644 --- a/Userland/Libraries/LibPDF/ObjectDerivatives.h +++ b/Userland/Libraries/LibPDF/ObjectDerivatives.h @@ -113,6 +113,9 @@ public: template bool contains(Args&&... keys) const { return (m_map.contains(keys) && ...); } + template + bool contains_any_of(Args&&... keys) const { return (m_map.contains(keys) || ...); } + ALWAYS_INLINE Optional get(FlyString const& key) const { return m_map.get(key); } Value get_value(FlyString const& key) const