Browse Source

LibPDF: Simplify outline construction

While the Outline Items making up the document's Outline have all sorts
of cross-references (parent, first/last chlid, next/previous sibling,
etc), not all documents out there have fully-consistent references. Our
implementation already discarded some of that information too (e.g.,
/Parent and /Prev were never read), and trusted that /First and /Next
were good enough to traverse the whole hierarchy.

Where the current implementation failed was in assuming that /Last was
also a good source of information. There are documents out there were
/Last also points to dead ends, and were therefore causing a crash when
we verified that the last child found on a chain was the /Last child
declared by the parent. To fix this I'm simply removing the check, and
simplifying the function call to remove any references to /Last. This
way we affirm our commitment to /First and /Next as the main sources of
information.
Rodrigo Tobar 2 years ago
parent
commit
cb1a7cc721
2 changed files with 9 additions and 10 deletions
  1. 8 9
      Userland/Libraries/LibPDF/Document.cpp
  2. 1 1
      Userland/Libraries/LibPDF/Document.h

+ 8 - 9
Userland/Libraries/LibPDF/Document.cpp

@@ -211,9 +211,8 @@ PDFErrorOr<void> Document::build_outline()
         return {};
         return {};
 
 
     auto first_ref = outline_dict->get_value(CommonNames::First);
     auto first_ref = outline_dict->get_value(CommonNames::First);
-    auto last_ref = outline_dict->get_value(CommonNames::Last);
 
 
-    auto children = TRY(build_outline_item_chain(first_ref, last_ref));
+    auto children = TRY(build_outline_item_chain(first_ref));
 
 
     m_outline = adopt_ref(*new OutlineDict());
     m_outline = adopt_ref(*new OutlineDict());
     m_outline->children = move(children);
     m_outline->children = move(children);
@@ -273,9 +272,7 @@ PDFErrorOr<NonnullRefPtr<OutlineItem>> Document::build_outline_item(NonnullRefPt
     if (outline_item_dict->contains(CommonNames::First)) {
     if (outline_item_dict->contains(CommonNames::First)) {
         VERIFY(outline_item_dict->contains(CommonNames::Last));
         VERIFY(outline_item_dict->contains(CommonNames::Last));
         auto first_ref = outline_item_dict->get_value(CommonNames::First);
         auto first_ref = outline_item_dict->get_value(CommonNames::First);
-        auto last_ref = outline_item_dict->get_value(CommonNames::Last);
-
-        auto children = TRY(build_outline_item_chain(first_ref, last_ref));
+        auto children = TRY(build_outline_item_chain(first_ref));
         outline_item->children = move(children);
         outline_item->children = move(children);
     }
     }
 
 
@@ -326,10 +323,14 @@ PDFErrorOr<NonnullRefPtr<OutlineItem>> Document::build_outline_item(NonnullRefPt
     return outline_item;
     return outline_item;
 }
 }
 
 
-PDFErrorOr<NonnullRefPtrVector<OutlineItem>> Document::build_outline_item_chain(Value const& first_ref, Value const& last_ref)
+PDFErrorOr<NonnullRefPtrVector<OutlineItem>> Document::build_outline_item_chain(Value const& first_ref)
 {
 {
+    // We used to receive a last_ref parameter, which was what the parent of this chain
+    // thought was this chain's last child. There are documents out there in the wild
+    // where this cross-references don't match though, and it seems like simply following
+    // the /First and /Next links is the way to go to construct the whole Outline
+    // (we already ignore the /Parent attribute too, which can also be out of sync).
     VERIFY(first_ref.has<Reference>());
     VERIFY(first_ref.has<Reference>());
-    VERIFY(last_ref.has<Reference>());
 
 
     NonnullRefPtrVector<OutlineItem> children;
     NonnullRefPtrVector<OutlineItem> children;
 
 
@@ -352,8 +353,6 @@ PDFErrorOr<NonnullRefPtrVector<OutlineItem>> Document::build_outline_item_chain(
         current_child_dict = move(next_child_dict);
         current_child_dict = move(next_child_dict);
     }
     }
 
 
-    VERIFY(last_ref.as_ref_index() == current_child_index);
-
     return children;
     return children;
 }
 }
 
 

+ 1 - 1
Userland/Libraries/LibPDF/Document.h

@@ -146,7 +146,7 @@ private:
 
 
     PDFErrorOr<void> build_outline();
     PDFErrorOr<void> build_outline();
     PDFErrorOr<NonnullRefPtr<OutlineItem>> build_outline_item(NonnullRefPtr<DictObject> const& outline_item_dict);
     PDFErrorOr<NonnullRefPtr<OutlineItem>> build_outline_item(NonnullRefPtr<DictObject> const& outline_item_dict);
-    PDFErrorOr<NonnullRefPtrVector<OutlineItem>> build_outline_item_chain(Value const& first_ref, Value const& last_ref);
+    PDFErrorOr<NonnullRefPtrVector<OutlineItem>> build_outline_item_chain(Value const& first_ref);
 
 
     PDFErrorOr<Destination> create_destination_from_parameters(NonnullRefPtr<ArrayObject>);
     PDFErrorOr<Destination> create_destination_from_parameters(NonnullRefPtr<ArrayObject>);