Переглянути джерело

LibPDF: Support parsing page tree nodes that are in object streams

conditionally_parse_page_tree_node used to assume that the xref table
contained a byte offset, even for compressed objects. It now uses the
common facilities for parsing objects, at the expense of some
performance.
Julian Offenhäuser 2 роки тому
батько
коміт
77f5f7a6f4

+ 18 - 43
Userland/Libraries/LibPDF/DocumentParser.cpp

@@ -644,51 +644,26 @@ bool DocumentParser::navigate_to_after_startxref()
 
 PDFErrorOr<RefPtr<DictObject>> DocumentParser::conditionally_parse_page_tree_node(u32 object_index)
 {
-    VERIFY(m_xref_table->has_object(object_index));
-    auto byte_offset = m_xref_table->byte_offset_for_object(object_index);
-
-    m_reader.move_to(byte_offset);
-    TRY(parse_number());
-    TRY(parse_number());
-    if (!m_reader.matches("obj"))
-        return error(String::formatted("Invalid page tree offset {}", object_index));
-
-    m_reader.move_by(3);
-    m_reader.consume_whitespace();
-
-    VERIFY(m_reader.consume('<') && m_reader.consume('<'));
-
-    m_reader.consume_whitespace();
-    HashMap<FlyString, Value> map;
-
-    while (true) {
-        if (m_reader.matches(">>"))
-            break;
-        auto name = TRY(parse_name());
-        auto name_string = name->name();
-        if (!name_string.is_one_of(CommonNames::Type, CommonNames::Parent, CommonNames::Kids, CommonNames::Count)) {
-            // This is a page, not a page tree node
-            return RefPtr<DictObject> {};
-        }
-
-        auto value = TRY(parse_value());
-        if (name_string == CommonNames::Type) {
-            if (!value.has<NonnullRefPtr<Object>>())
-                return RefPtr<DictObject> {};
-            auto type_object = value.get<NonnullRefPtr<Object>>();
-            if (!type_object->is<NameObject>())
-                return RefPtr<DictObject> {};
-            auto type_name = type_object->cast<NameObject>();
-            if (type_name->name() != CommonNames::Pages)
-                return RefPtr<DictObject> {};
-        }
-        map.set(name->name(), value);
-    }
+    auto dict_value = TRY(parse_object_with_index(object_index));
+    auto dict_object = dict_value.get<NonnullRefPtr<Object>>();
+    if (!dict_object->is<DictObject>())
+        return error(String::formatted("Invalid page tree with xref index {}", object_index));
 
-    VERIFY(m_reader.consume('>') && m_reader.consume('>'));
-    m_reader.consume_whitespace();
+    auto dict = dict_object->cast<DictObject>();
+    if (!dict->contains_any_of(CommonNames::Type, CommonNames::Parent, CommonNames::Kids, CommonNames::Count))
+        // This is a page, not a page tree node
+        return RefPtr<DictObject> {};
+
+    if (!dict->contains(CommonNames::Type))
+        return RefPtr<DictObject> {};
+    auto type_object = TRY(dict->get_object(m_document, CommonNames::Type));
+    if (!type_object->is<NameObject>())
+        return RefPtr<DictObject> {};
+    auto type_name = type_object->cast<NameObject>();
+    if (type_name->name() != CommonNames::Pages)
+        return RefPtr<DictObject> {};
 
-    return make_object<DictObject>(map);
+    return dict;
 }
 
 }

+ 3 - 0
Userland/Libraries/LibPDF/ObjectDerivatives.h

@@ -113,6 +113,9 @@ public:
     template<typename... Args>
     bool contains(Args&&... keys) const { return (m_map.contains(keys) && ...); }
 
+    template<typename... Args>
+    bool contains_any_of(Args&&... keys) const { return (m_map.contains(keys) || ...); }
+
     ALWAYS_INLINE Optional<Value> get(FlyString const& key) const { return m_map.get(key); }
 
     Value get_value(FlyString const& key) const