Procházet zdrojové kódy

LibPDF: Parse graphics commands

Matthew Olsson před 4 roky
rodič
revize
101639e526

+ 137 - 0
Userland/Libraries/LibPDF/Command.h

@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2021, Matthew Olsson <mattco@serenityos.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#pragma once
+
+#include <AK/Format.h>
+#include <AK/String.h>
+#include <AK/StringBuilder.h>
+#include <LibPDF/Value.h>
+
+#define ENUMERATE_COMMANDS(V)                                          \
+    V(SaveState, save_state, q)                                        \
+    V(RestoreState, restore_state, Q)                                  \
+    V(ConcatenateMatrix, concatenate_matrix, cm)                       \
+    V(SetLineWidth, set_line_width, w)                                 \
+    V(SetLineCap, set_line_cap, J)                                     \
+    V(SetLineJoin, set_line_join, j)                                   \
+    V(SetMiterLimit, set_miter_limit, M)                               \
+    V(SetDashPattern, set_dash_pattern, d)                             \
+    V(PathBegin, path_begin, m)                                        \
+    V(PathEnd, path_end, n)                                            \
+    V(PathLine, path_line, l)                                          \
+    V(PathClose, path_close, h)                                        \
+    V(PathAppendRect, path_append_rect, re)                            \
+    V(PathStroke, path_stroke, S)                                      \
+    V(PathCloseAndStroke, path_close_and_stroke, s)                    \
+    V(PathFillNonZero, path_fill_nonzero, f)                           \
+    V(PathFillNonZeroDeprecated, path_fill_nonzero_deprecated, F)      \
+    V(PathFillEvenOdd, path_fill_evenodd, f*)                          \
+    V(PathFillStrokeNonZero, path_fill_stroke_nonzero, B)              \
+    V(PathFillStrokeEvenOdd, path_fill_stroke_evenodd, B*)             \
+    V(PathCloseFillStrokeNonZero, path_close_fill_stroke_nonzero, b)   \
+    V(PathCloseFillStrokeEvenOdd, path_close_fill_stroke_evenodd, b*)  \
+    V(TextSetCharSpace, text_set_char_space, Tc)                       \
+    V(TextSetWordSpace, text_set_word_space, Tw)                       \
+    V(TextSetHorizontalScale, text_set_horizontal_scale, Tz)           \
+    V(TextSetLeading, text_set_leading, TL)                            \
+    V(TextSetFont, text_set_font, Tf)                                  \
+    V(TextSetRenderingMode, text_set_rendering_mode, Tr)               \
+    V(TextSetRise, text_set_rise, Ts)                                  \
+    V(TextBegin, text_begin, BT)                                       \
+    V(TextEnd, text_end, ET)                                           \
+    V(TextNextLineOffset, text_next_line_offset, Td)                   \
+    V(TextNextLineAndSetLeading, text_next_line_and_set_leading, TD)   \
+    V(TextSetMatrixAndLineMatrix, text_set_matrix_and_line_matrix, Tm) \
+    V(TextNextLine, text_next_line, T*)                                \
+    V(TextShowString, text_show_string, Tj)
+
+namespace PDF {
+
+enum class CommandType {
+#define V(name, snake_name, symbol) name,
+    ENUMERATE_COMMANDS(V)
+#undef V
+        TextNextLineShowString,
+};
+
+class Command {
+public:
+    static CommandType command_type_from_symbol(const StringView& symbol_string)
+    {
+#define V(name, snake_name, symbol) \
+    if (symbol_string == #symbol)   \
+        return CommandType::name;
+        ENUMERATE_COMMANDS(V)
+#undef V
+
+        if (symbol_string == "'")
+            return CommandType::TextNextLineShowString;
+
+        dbgln("unsupported graphics symbol {}", symbol_string);
+        VERIFY_NOT_REACHED();
+    }
+
+    static const char* command_name(CommandType command_name)
+    {
+#define V(name, snake_name, symbol)        \
+    if (command_name == CommandType::name) \
+        return #name;
+        ENUMERATE_COMMANDS(V)
+#undef V
+
+        if (command_name == CommandType::TextNextLineShowString)
+            return "TextNextLineShowString";
+
+        VERIFY_NOT_REACHED();
+    }
+
+    static const char* command_symbol(CommandType command_name)
+    {
+#define V(name, snake_name, symbol)        \
+    if (command_name == CommandType::name) \
+        return #symbol;
+        ENUMERATE_COMMANDS(V)
+#undef V
+
+        if (command_name == CommandType::TextNextLineShowString)
+            return "'";
+
+        VERIFY_NOT_REACHED();
+    }
+
+    Command(CommandType command_type, Vector<Value> arguments)
+        : m_command_type(command_type)
+        , m_arguments(move(arguments))
+    {
+    }
+
+    [[nodiscard]] ALWAYS_INLINE CommandType command_type() const { return m_command_type; }
+    [[nodiscard]] ALWAYS_INLINE const Vector<Value>& arguments() const { return m_arguments; }
+
+private:
+    CommandType m_command_type;
+    Vector<Value> m_arguments;
+};
+
+}
+
+namespace AK {
+
+template<>
+struct Formatter<PDF::Command> : Formatter<StringView> {
+    void format(FormatBuilder& format_builder, const PDF::Command& command)
+    {
+        StringBuilder builder;
+        builder.appendff("{} [ ", PDF::Command::command_name(command.command_type()));
+        for (auto& argument : command.arguments())
+            builder.appendff(" {}", argument);
+        builder.append(" ]");
+        Formatter<StringView>::format(format_builder, builder.to_string());
+    }
+};
+
+}

+ 46 - 0
Userland/Libraries/LibPDF/Parser.cpp

@@ -19,11 +19,22 @@ static NonnullRefPtr<T> make_object(Args... args) requires(IsBaseOf<Object, T>)
     return adopt_ref(*new T(forward<Args>(args)...));
 }
 
+Vector<Command> Parser::parse_graphics_commands(const ReadonlyBytes& bytes)
+{
+    Parser parser(bytes);
+    return parser.parse_graphics_commands();
+}
+
 Parser::Parser(Badge<Document>, const ReadonlyBytes& bytes)
     : m_reader(bytes)
 {
 }
 
+Parser::Parser(const ReadonlyBytes& bytes)
+    : m_reader(bytes)
+{
+}
+
 bool Parser::perform_validation()
 {
     return !sloppy_is_linearized() && parse_header();
@@ -650,6 +661,41 @@ NonnullRefPtr<StreamObject> Parser::parse_stream(NonnullRefPtr<DictObject> dict)
     return make_object<StreamObject>(dict, bytes);
 }
 
+Vector<Command> Parser::parse_graphics_commands()
+{
+    Vector<Command> commands;
+    Vector<Value> command_args;
+
+    constexpr static auto is_command_char = [](char ch) {
+        return isalpha(ch) || ch == '*' || ch == '\'';
+    };
+
+    while (!m_reader.done()) {
+        auto ch = m_reader.peek();
+        if (is_command_char(ch)) {
+            auto command_start = m_reader.offset();
+            while (is_command_char(ch)) {
+                consume();
+                if (m_reader.done())
+                    break;
+                ch = m_reader.peek();
+            }
+
+            auto command_string = StringView(m_reader.bytes().slice(command_start, m_reader.offset() - command_start));
+            auto command_type = Command::command_type_from_symbol(command_string);
+            commands.append(Command(command_type, move(command_args)));
+            command_args = Vector<Value>();
+            consume_whitespace();
+
+            continue;
+        }
+
+        command_args.append(parse_value());
+    }
+
+    return commands;
+}
+
 bool Parser::matches_eol() const
 {
     return m_reader.matches_any(0xa, 0xd);

+ 7 - 0
Userland/Libraries/LibPDF/Parser.h

@@ -7,6 +7,7 @@
 #pragma once
 
 #include <AK/NonnullRefPtrVector.h>
+#include <LibPDF/Command.h>
 #include <LibPDF/Object.h>
 #include <LibPDF/Reader.h>
 #include <LibPDF/XRefTable.h>
@@ -17,6 +18,8 @@ class Document;
 
 class Parser {
 public:
+    static Vector<Command> parse_graphics_commands(const ReadonlyBytes&);
+
     Parser(Badge<Document>, const ReadonlyBytes&);
 
     void set_document(RefPtr<Document> document) { m_document = document; }
@@ -34,6 +37,8 @@ public:
     RefPtr<DictObject> conditionally_parse_page_tree_node_at_offset(size_t offset);
 
 private:
+    explicit Parser(const ReadonlyBytes&);
+
     bool parse_header();
     XRefTable parse_xref_table();
     NonnullRefPtr<DictObject> parse_file_trailer();
@@ -64,6 +69,8 @@ private:
     NonnullRefPtr<DictObject> parse_dict();
     NonnullRefPtr<StreamObject> parse_stream(NonnullRefPtr<DictObject> dict);
 
+    Vector<Command> parse_graphics_commands();
+
     bool matches_eol() const;
     bool matches_whitespace() const;
     bool matches_number() const;