Parser.h 2.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889
  1. /*
  2. * Copyright (c) 2021, Matthew Olsson <mattco@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #pragma once
  7. #include <AK/NonnullRefPtrVector.h>
  8. #include <LibPDF/Command.h>
  9. #include <LibPDF/Object.h>
  10. #include <LibPDF/Reader.h>
  11. #include <LibPDF/XRefTable.h>
  12. namespace PDF {
  13. class Document;
  14. class Parser {
  15. public:
  16. static Vector<Command> parse_graphics_commands(const ReadonlyBytes&);
  17. Parser(Badge<Document>, const ReadonlyBytes&);
  18. void set_document(RefPtr<Document> document) { m_document = document; }
  19. bool perform_validation();
  20. struct XRefTableAndTrailer {
  21. XRefTable xref_table;
  22. NonnullRefPtr<DictObject> trailer;
  23. };
  24. XRefTableAndTrailer parse_last_xref_table_and_trailer();
  25. NonnullRefPtr<IndirectValue> parse_indirect_value_at_offset(size_t offset);
  26. RefPtr<DictObject> conditionally_parse_page_tree_node_at_offset(size_t offset);
  27. private:
  28. explicit Parser(const ReadonlyBytes&);
  29. bool parse_header();
  30. XRefTable parse_xref_table();
  31. NonnullRefPtr<DictObject> parse_file_trailer();
  32. bool navigate_to_before_eof_marker();
  33. bool navigate_to_after_startxref();
  34. // If the PDF is linearized, the first object will be the linearization
  35. // parameter dictionary, and it will always occur within the first 1024 bytes.
  36. // We do a very sloppy and context-free search for this object. A return value
  37. // of true does not necessarily mean this PDF is linearized, but a return value
  38. // of false does mean this PDF is not linearized.
  39. // FIXME: false doesn't guarantee non-linearization, but we VERIFY the result!
  40. bool sloppy_is_linearized();
  41. String parse_comment();
  42. Value parse_value();
  43. Value parse_possible_indirect_value_or_ref();
  44. NonnullRefPtr<IndirectValue> parse_indirect_value(int index, int generation);
  45. NonnullRefPtr<IndirectValue> parse_indirect_value();
  46. Value parse_number();
  47. NonnullRefPtr<NameObject> parse_name();
  48. NonnullRefPtr<StringObject> parse_string();
  49. String parse_literal_string();
  50. String parse_hex_string();
  51. NonnullRefPtr<ArrayObject> parse_array();
  52. NonnullRefPtr<DictObject> parse_dict();
  53. NonnullRefPtr<StreamObject> parse_stream(NonnullRefPtr<DictObject> dict);
  54. Vector<Command> parse_graphics_commands();
  55. bool matches_eol() const;
  56. bool matches_whitespace() const;
  57. bool matches_number() const;
  58. bool matches_delimiter() const;
  59. bool matches_regular_character() const;
  60. void consume_eol();
  61. bool consume_whitespace();
  62. char consume();
  63. void consume(char);
  64. Reader m_reader;
  65. RefPtr<Document> m_document;
  66. };
  67. }