DocumentParser.h 3.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. /*
  2. * Copyright (c) 2021-2022, Matthew Olsson <mattco@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #pragma once
  7. #include <LibPDF/Parser.h>
  8. namespace PDF {
  9. class DocumentParser final : public RefCounted<DocumentParser>
  10. , public Parser {
  11. public:
  12. DocumentParser(Document*, ReadonlyBytes);
  13. enum class LinearizationResult {
  14. NotLinearized,
  15. Linearized,
  16. };
  17. [[nodiscard]] ALWAYS_INLINE RefPtr<DictObject> const& trailer() const { return m_trailer; }
  18. // Parses the header and initializes the xref table and trailer
  19. PDFErrorOr<void> initialize();
  20. PDFErrorOr<Value> parse_object_with_index(u32 index);
  21. // Specialized version of parse_dict which aborts early if the dict being parsed
  22. // is not a page object
  23. PDFErrorOr<RefPtr<DictObject>> conditionally_parse_page_tree_node(u32 object_index);
  24. private:
  25. struct LinearizationDictionary {
  26. u32 length_of_file { 0 };
  27. u32 primary_hint_stream_offset { 0 };
  28. u32 primary_hint_stream_length { 0 };
  29. u32 overflow_hint_stream_offset { 0 };
  30. u32 overflow_hint_stream_length { 0 };
  31. u32 first_page_object_number { 0 };
  32. u32 offset_of_first_page_end { 0 };
  33. u16 number_of_pages { 0 };
  34. u32 offset_of_main_xref_table { 0 };
  35. u32 first_page { 0 }; // The page to initially open (I think, the spec isn't all that clear here)
  36. };
  37. struct PageOffsetHintTable {
  38. u32 least_number_of_objects_in_a_page { 0 };
  39. u32 location_of_first_page_object { 0 };
  40. u16 bits_required_for_object_number { 0 };
  41. u32 least_length_of_a_page { 0 };
  42. u16 bits_required_for_page_length { 0 };
  43. u32 least_offset_of_any_content_stream { 0 };
  44. u16 bits_required_for_content_stream_offsets { 0 };
  45. u32 least_content_stream_length { 0 };
  46. u16 bits_required_for_content_stream_length { 0 };
  47. u16 bits_required_for_number_of_shared_obj_refs { 0 };
  48. u16 bits_required_for_greatest_shared_obj_identifier { 0 };
  49. u16 bits_required_for_fraction_numerator { 0 };
  50. u16 shared_object_reference_fraction_denominator { 0 };
  51. };
  52. struct PageOffsetHintTableEntry {
  53. u32 objects_in_page_number { 0 };
  54. u32 page_length_number { 0 };
  55. u32 number_of_shared_objects { 0 };
  56. Vector<u32> shared_object_identifiers {};
  57. Vector<u32> shared_object_location_numerators {};
  58. u32 page_content_stream_offset_number { 0 };
  59. u32 page_content_stream_length_number { 0 };
  60. };
  61. friend struct AK::Formatter<LinearizationDictionary>;
  62. friend struct AK::Formatter<PageOffsetHintTable>;
  63. friend struct AK::Formatter<PageOffsetHintTableEntry>;
  64. PDFErrorOr<void> parse_header();
  65. PDFErrorOr<LinearizationResult> initialize_linearization_dict();
  66. PDFErrorOr<void> initialize_linearized_xref_table();
  67. PDFErrorOr<void> initialize_non_linearized_xref_table();
  68. PDFErrorOr<void> initialize_hint_tables();
  69. PDFErrorOr<PageOffsetHintTable> parse_page_offset_hint_table(ReadonlyBytes hint_stream_bytes);
  70. Vector<PageOffsetHintTableEntry> parse_all_page_offset_hint_table_entries(PageOffsetHintTable const&, ReadonlyBytes hint_stream_bytes);
  71. PDFErrorOr<NonnullRefPtr<XRefTable>> parse_xref_stream();
  72. PDFErrorOr<NonnullRefPtr<XRefTable>> parse_xref_table();
  73. PDFErrorOr<NonnullRefPtr<DictObject>> parse_file_trailer();
  74. bool navigate_to_before_eof_marker();
  75. bool navigate_to_after_startxref();
  76. RefPtr<XRefTable> m_xref_table;
  77. RefPtr<DictObject> m_trailer;
  78. Optional<LinearizationDictionary> m_linearization_dictionary;
  79. };
  80. }