LibPDF: Refine the distinction between the Document and Parser
The Parser should hold information relevant for parsing, whereas the Document should hold information relevant for displaying pages. With this in mind, there is no reason for the Document to hold the xref table and trailer. These objects have been moved to the Parser, which allows the Parser to expose less public methods (which will be even more evident once linearized PDFs are supported).
This commit is contained in:
parent
69410d7f4e
commit
78bc9d1539
Notes:
sideshowbarker
2024-07-18 12:21:26 +09:00
Author: https://github.com/mattco98 Commit: https://github.com/SerenityOS/serenity/commit/78bc9d1539a Pull-request: https://github.com/SerenityOS/serenity/pull/7675 Reviewed-by: https://github.com/FireFox317 Reviewed-by: https://github.com/alimpfard
4 changed files with 43 additions and 56 deletions
|
@ -39,17 +39,10 @@ RefPtr<Document> Document::create(const ReadonlyBytes& bytes)
|
|||
auto parser = adopt_ref(*new Parser({}, bytes));
|
||||
auto document = adopt_ref(*new Document(parser));
|
||||
|
||||
VERIFY(parser->perform_validation());
|
||||
auto xref_table_and_trailer_opt = parser->parse_last_xref_table_and_trailer();
|
||||
if (!xref_table_and_trailer_opt.has_value())
|
||||
if (!parser->initialize())
|
||||
return {};
|
||||
|
||||
auto [xref_table, trailer] = xref_table_and_trailer_opt.value();
|
||||
|
||||
document->m_xref_table = xref_table;
|
||||
document->m_trailer = trailer;
|
||||
|
||||
document->m_catalog = document->m_trailer->get_dict(document, CommonNames::Root);
|
||||
document->m_catalog = parser->trailer()->get_dict(document, CommonNames::Root);
|
||||
document->build_page_tree();
|
||||
document->build_outline();
|
||||
|
||||
|
@ -68,13 +61,9 @@ Value Document::get_or_load_value(u32 index)
|
|||
if (value)
|
||||
return value;
|
||||
|
||||
VERIFY(m_xref_table.has_object(index));
|
||||
auto byte_offset = m_xref_table.byte_offset_for_object(index);
|
||||
auto indirect_value = m_parser->parse_indirect_value_at_offset(byte_offset);
|
||||
VERIFY(indirect_value->index() == index);
|
||||
value = indirect_value->value();
|
||||
m_values.set(index, value);
|
||||
return value;
|
||||
auto object = m_parser->parse_object_with_index(index);
|
||||
m_values.set(index, object);
|
||||
return object;
|
||||
}
|
||||
|
||||
u32 Document::get_first_page_index() const
|
||||
|
@ -179,9 +168,8 @@ bool Document::add_page_tree_node_to_page_tree(NonnullRefPtr<DictObject> page_tr
|
|||
|
||||
for (auto& value : *kids_array) {
|
||||
auto reference_index = value.as_ref_index();
|
||||
auto byte_offset = m_xref_table.byte_offset_for_object(reference_index);
|
||||
bool ok;
|
||||
auto maybe_page_tree_node = m_parser->conditionally_parse_page_tree_node_at_offset(byte_offset, ok);
|
||||
auto maybe_page_tree_node = m_parser->conditionally_parse_page_tree_node(reference_index, ok);
|
||||
if (!ok)
|
||||
return false;
|
||||
if (maybe_page_tree_node) {
|
||||
|
|
|
@ -75,8 +75,6 @@ class Document final : public RefCounted<Document> {
|
|||
public:
|
||||
static RefPtr<Document> create(const ReadonlyBytes& bytes);
|
||||
|
||||
ALWAYS_INLINE const XRefTable& xref_table() const { return m_xref_table; }
|
||||
ALWAYS_INLINE const DictObject& trailer() const { return *m_trailer; }
|
||||
ALWAYS_INLINE const RefPtr<OutlineDict>& outline() const { return m_outline; }
|
||||
|
||||
[[nodiscard]] Value get_or_load_value(u32 index);
|
||||
|
@ -92,12 +90,6 @@ public:
|
|||
return m_values.get(index).value_or({});
|
||||
}
|
||||
|
||||
ALWAYS_INLINE void set_value(u32 index, const Value& value)
|
||||
{
|
||||
m_values.ensure_capacity(index);
|
||||
m_values.set(index, value);
|
||||
}
|
||||
|
||||
// Strips away the layer of indirection by turning indirect value
|
||||
// refs into the value they reference, and indirect values into
|
||||
// the value being wrapped.
|
||||
|
@ -139,8 +131,6 @@ private:
|
|||
NonnullRefPtrVector<OutlineItem> build_outline_item_chain(const Value& first_ref, const Value& last_ref);
|
||||
|
||||
NonnullRefPtr<Parser> m_parser;
|
||||
XRefTable m_xref_table;
|
||||
RefPtr<DictObject> m_trailer;
|
||||
RefPtr<DictObject> m_catalog;
|
||||
Vector<u32> m_page_object_indices;
|
||||
HashMap<u32, Page> m_pages;
|
||||
|
|
|
@ -38,43 +38,47 @@ Parser::Parser(const ReadonlyBytes& bytes)
|
|||
{
|
||||
}
|
||||
|
||||
bool Parser::perform_validation()
|
||||
bool Parser::initialize()
|
||||
{
|
||||
return !sloppy_is_linearized() && parse_header();
|
||||
}
|
||||
if (!parse_header())
|
||||
return {};
|
||||
|
||||
Optional<Parser::XRefTableAndTrailer> Parser::parse_last_xref_table_and_trailer()
|
||||
{
|
||||
m_reader.move_to(m_reader.bytes().size() - 1);
|
||||
if (!navigate_to_before_eof_marker())
|
||||
return {};
|
||||
return false;
|
||||
if (!navigate_to_after_startxref())
|
||||
return {};
|
||||
return false;
|
||||
if (m_reader.done())
|
||||
return {};
|
||||
return false;
|
||||
|
||||
m_reader.set_reading_forwards();
|
||||
auto xref_offset_value = parse_number();
|
||||
if (!xref_offset_value.is_int())
|
||||
return {};
|
||||
return false;
|
||||
auto xref_offset = xref_offset_value.as_int();
|
||||
|
||||
m_reader.move_to(xref_offset);
|
||||
auto xref_table = parse_xref_table();
|
||||
if (!xref_table.has_value())
|
||||
return {};
|
||||
return false;
|
||||
auto trailer = parse_file_trailer();
|
||||
if (!trailer)
|
||||
return {};
|
||||
return false;
|
||||
|
||||
return XRefTableAndTrailer { xref_table.value(), trailer.release_nonnull() };
|
||||
m_xref_table = xref_table.value();
|
||||
m_trailer = trailer;
|
||||
return true;
|
||||
}
|
||||
|
||||
RefPtr<IndirectValue> Parser::parse_indirect_value_at_offset(size_t offset)
|
||||
Value Parser::parse_object_with_index(u32 index)
|
||||
{
|
||||
m_reader.set_reading_forwards();
|
||||
m_reader.move_to(offset);
|
||||
return parse_indirect_value();
|
||||
VERIFY(m_xref_table.has_object(index));
|
||||
auto byte_offset = m_xref_table.byte_offset_for_object(index);
|
||||
m_reader.move_to(byte_offset);
|
||||
auto indirect_value = parse_indirect_value();
|
||||
VERIFY(indirect_value);
|
||||
VERIFY(indirect_value->index() == index);
|
||||
return indirect_value->value();
|
||||
}
|
||||
|
||||
bool Parser::parse_header()
|
||||
|
@ -647,11 +651,14 @@ RefPtr<DictObject> Parser::parse_dict()
|
|||
return make_object<DictObject>(map);
|
||||
}
|
||||
|
||||
RefPtr<DictObject> Parser::conditionally_parse_page_tree_node_at_offset(size_t offset, bool& ok)
|
||||
RefPtr<DictObject> Parser::conditionally_parse_page_tree_node(u32 object_index, bool& ok)
|
||||
{
|
||||
ok = true;
|
||||
|
||||
m_reader.move_to(offset);
|
||||
VERIFY(m_xref_table.has_object(object_index));
|
||||
auto byte_offset = m_xref_table.byte_offset_for_object(object_index);
|
||||
|
||||
m_reader.move_to(byte_offset);
|
||||
parse_number();
|
||||
parse_number();
|
||||
if (!m_reader.matches("obj")) {
|
||||
|
|
|
@ -22,19 +22,19 @@ public:
|
|||
|
||||
Parser(Badge<Document>, const ReadonlyBytes&);
|
||||
|
||||
void set_document(RefPtr<Document> document) { m_document = document; }
|
||||
[[nodiscard]] ALWAYS_INLINE const RefPtr<DictObject>& trailer() const { return m_trailer; }
|
||||
void set_document(const RefPtr<Document>& document) { m_document = document; }
|
||||
|
||||
bool perform_validation();
|
||||
// Parses the header and initializes the xref table and trailer
|
||||
bool initialize();
|
||||
|
||||
struct XRefTableAndTrailer {
|
||||
XRefTable xref_table;
|
||||
NonnullRefPtr<DictObject> trailer;
|
||||
};
|
||||
Optional<XRefTableAndTrailer> parse_last_xref_table_and_trailer();
|
||||
Value parse_object_with_index(u32 index);
|
||||
|
||||
RefPtr<IndirectValue> parse_indirect_value_at_offset(size_t offset);
|
||||
|
||||
RefPtr<DictObject> conditionally_parse_page_tree_node_at_offset(size_t offset, bool& ok);
|
||||
// Specialized version of parse_dict which aborts early if the dict being parsed
|
||||
// is not a page object. A null RefPtr return indicates that the dict at this index
|
||||
// is not a page tree node, whereas ok == false indicates a malformed PDF file and
|
||||
// should cause an abort of the current operation.
|
||||
RefPtr<DictObject> conditionally_parse_page_tree_node(u32 object_index, bool& ok);
|
||||
|
||||
private:
|
||||
explicit Parser(const ReadonlyBytes&);
|
||||
|
@ -85,6 +85,8 @@ private:
|
|||
|
||||
Reader m_reader;
|
||||
RefPtr<Document> m_document;
|
||||
XRefTable m_xref_table;
|
||||
RefPtr<DictObject> m_trailer;
|
||||
};
|
||||
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue