Document.h 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237
  1. /*
  2. * Copyright (c) 2021, Matthew Olsson <mattco@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #pragma once
  7. #include <AK/Format.h>
  8. #include <AK/HashMap.h>
  9. #include <AK/RefCounted.h>
  10. #include <LibGfx/Color.h>
  11. #include <LibPDF/Object.h>
  12. #include <LibPDF/Parser.h>
  13. namespace PDF {
  14. struct Rectangle {
  15. float lower_left_x;
  16. float lower_left_y;
  17. float upper_right_x;
  18. float upper_right_y;
  19. };
  20. struct Page {
  21. NonnullRefPtr<DictObject> resources;
  22. NonnullRefPtr<Object> contents;
  23. Rectangle media_box;
  24. Rectangle crop_box;
  25. float user_unit;
  26. int rotate;
  27. };
  28. struct Destination {
  29. enum class Type {
  30. XYZ,
  31. Fit,
  32. FitH,
  33. FitV,
  34. FitR,
  35. FitB,
  36. FitBH,
  37. FitBV,
  38. };
  39. Type type;
  40. Value page;
  41. Vector<float> parameters;
  42. };
  43. struct OutlineItem final : public RefCounted<OutlineItem> {
  44. RefPtr<OutlineItem> parent;
  45. NonnullRefPtrVector<OutlineItem> children;
  46. String title;
  47. i32 count { 0 };
  48. Destination dest;
  49. Gfx::Color color { Color::NamedColor::Black }; // 'C' in the PDF spec
  50. bool italic { false }; // bit 0 of 'F' in the PDF spec
  51. bool bold { false }; // bit 0 of 'F' in the PDF spec
  52. OutlineItem() = default;
  53. String to_string(int indent) const;
  54. };
  55. struct OutlineDict final : public RefCounted<OutlineDict> {
  56. NonnullRefPtrVector<OutlineItem> children;
  57. u32 count { 0 };
  58. OutlineDict() = default;
  59. };
  60. class Document final : public RefCounted<Document> {
  61. public:
  62. static RefPtr<Document> create(ReadonlyBytes const& bytes);
  63. ALWAYS_INLINE RefPtr<OutlineDict> const& outline() const { return m_outline; }
  64. [[nodiscard]] Value get_or_load_value(u32 index);
  65. [[nodiscard]] u32 get_first_page_index() const;
  66. [[nodiscard]] u32 get_page_count() const;
  67. [[nodiscard]] Page get_page(u32 index);
  68. ALWAYS_INLINE Value get_value(u32 index) const
  69. {
  70. return m_values.get(index).value_or({});
  71. }
  72. // Strips away the layer of indirection by turning indirect value
  73. // refs into the value they reference, and indirect values into
  74. // the value being wrapped.
  75. Value resolve(Value const& value);
  76. // Like resolve, but unwraps the Value into the given type. Accepts
  77. // any object type, and the three primitive Value types.
  78. template<IsValueType T>
  79. UnwrappedValueType<T> resolve_to(Value const& value)
  80. {
  81. auto resolved = resolve(value);
  82. if constexpr (IsSame<T, bool>)
  83. return resolved.as_bool();
  84. if constexpr (IsSame<T, int>)
  85. return resolved.as_int();
  86. if constexpr (IsSame<T, float>)
  87. return resolved.as_float();
  88. if constexpr (IsObject<T>)
  89. return object_cast<T>(resolved.as_object());
  90. VERIFY_NOT_REACHED();
  91. }
  92. private:
  93. explicit Document(NonnullRefPtr<Parser> const& parser);
  94. // FIXME: Currently, to improve performance, we don't load any pages at Document
  95. // construction, rather we just load the page structure and populate
  96. // m_page_object_indices. However, we can be even lazier and defer page tree node
  97. // parsing, as good PDF writers will layout the page tree in a balanced tree to
  98. // improve lookup time. This would reduce the initial overhead by not loading
  99. // every page tree node of, say, a 1000+ page PDF file.
  100. bool build_page_tree();
  101. bool add_page_tree_node_to_page_tree(NonnullRefPtr<DictObject> const& page_tree);
  102. void build_outline();
  103. NonnullRefPtr<OutlineItem> build_outline_item(NonnullRefPtr<DictObject> const& outline_item_dict);
  104. NonnullRefPtrVector<OutlineItem> build_outline_item_chain(Value const& first_ref, Value const& last_ref);
  105. NonnullRefPtr<Parser> m_parser;
  106. RefPtr<DictObject> m_catalog;
  107. Vector<u32> m_page_object_indices;
  108. HashMap<u32, Page> m_pages;
  109. HashMap<u32, Value> m_values;
  110. RefPtr<OutlineDict> m_outline;
  111. };
  112. }
  113. namespace AK {
  114. template<>
  115. struct Formatter<PDF::Rectangle> : Formatter<StringView> {
  116. void format(FormatBuilder& builder, PDF::Rectangle const& rectangle)
  117. {
  118. Formatter<StringView>::format(builder,
  119. String::formatted("Rectangle {{ ll=({}, {}), ur=({}, {}) }}",
  120. rectangle.lower_left_x,
  121. rectangle.lower_left_y,
  122. rectangle.upper_right_x,
  123. rectangle.upper_right_y));
  124. }
  125. };
  126. template<>
  127. struct Formatter<PDF::Page> : Formatter<StringView> {
  128. void format(FormatBuilder& builder, PDF::Page const& page)
  129. {
  130. constexpr auto fmt_string = "Page {{\n resources={}\n contents={}\n media_box={}\n crop_box={}\n user_unit={}\n rotate={}\n}}";
  131. auto str = String::formatted(fmt_string,
  132. page.resources->to_string(1),
  133. page.contents->to_string(1),
  134. page.media_box,
  135. page.crop_box,
  136. page.user_unit,
  137. page.rotate);
  138. Formatter<StringView>::format(builder, str);
  139. }
  140. };
  141. template<>
  142. struct Formatter<PDF::Destination> : Formatter<StringView> {
  143. void format(FormatBuilder& builder, PDF::Destination const& destination)
  144. {
  145. String type_str;
  146. switch (destination.type) {
  147. case PDF::Destination::Type::XYZ:
  148. type_str = "XYZ";
  149. break;
  150. case PDF::Destination::Type::Fit:
  151. type_str = "Fit";
  152. break;
  153. case PDF::Destination::Type::FitH:
  154. type_str = "FitH";
  155. break;
  156. case PDF::Destination::Type::FitV:
  157. type_str = "FitV";
  158. break;
  159. case PDF::Destination::Type::FitR:
  160. type_str = "FitR";
  161. break;
  162. case PDF::Destination::Type::FitB:
  163. type_str = "FitB";
  164. break;
  165. case PDF::Destination::Type::FitBH:
  166. type_str = "FitBH";
  167. break;
  168. case PDF::Destination::Type::FitBV:
  169. type_str = "FitBV";
  170. break;
  171. }
  172. StringBuilder param_builder;
  173. for (auto& param : destination.parameters)
  174. param_builder.appendff("{} ", param);
  175. auto str = String::formatted("{{ type={} page={} params={} }}", type_str, destination.page, param_builder.to_string());
  176. Formatter<StringView>::format(builder, str);
  177. }
  178. };
  179. template<>
  180. struct Formatter<PDF::OutlineItem> : Formatter<StringView> {
  181. void format(FormatBuilder& builder, PDF::OutlineItem const& item)
  182. {
  183. Formatter<StringView>::format(builder, item.to_string(0));
  184. }
  185. };
  186. template<>
  187. struct Formatter<PDF::OutlineDict> : Formatter<StringView> {
  188. void format(FormatBuilder& builder, PDF::OutlineDict const& dict)
  189. {
  190. StringBuilder child_builder;
  191. child_builder.append('[');
  192. for (auto& child : dict.children)
  193. child_builder.appendff("{}\n", child.to_string(2));
  194. child_builder.append(" ]");
  195. Formatter<StringView>::format(builder,
  196. String::formatted("OutlineDict {{\n count={}\n children={}\n}}", dict.count, child_builder.to_string()));
  197. }
  198. };
  199. }