Explorar o código

LibPDF: Add Type0 and TrueType fonts

Matthew Olsson %!s(int64=3) %!d(string=hai) anos
pai
achega
4d0f74a15c

+ 2 - 0
Userland/Libraries/LibPDF/CMakeLists.txt

@@ -6,6 +6,8 @@ set(SOURCES
     Encryption.cpp
     Filter.cpp
     Fonts/PDFFont.cpp
+    Fonts/TrueTypeFont.cpp
+    Fonts/Type0Font.cpp
     Fonts/Type1Font.cpp
     ObjectDerivatives.cpp
     Parser.cpp

+ 2 - 0
Userland/Libraries/LibPDF/CommonNames.cpp

@@ -12,4 +12,6 @@ namespace PDF {
 ENUMERATE_COMMON_NAMES(ENUMERATE)
 #undef ENUMERATE
 
+FlyString CommonNames::IdentityH = "Identity-H";
+
 }

+ 10 - 0
Userland/Libraries/LibPDF/CommonNames.h

@@ -23,13 +23,17 @@
     A(CA)                         \
     A(CCITTFaxDecode)             \
     A(CalRGB)                     \
+    A(CIDSystemInfo)              \
+    A(CIDToGIDMap)                \
     A(ColorSpace)                 \
     A(Contents)                   \
     A(Count)                      \
     A(CropBox)                    \
     A(Crypt)                      \
     A(D)                          \
+    A(DW)                         \
     A(DCTDecode)                  \
+    A(DescendantFonts)            \
     A(Dest)                       \
     A(Dests)                      \
     A(DeviceCMYK)                 \
@@ -87,6 +91,7 @@
     A(O)                          \
     A(OP)                         \
     A(OPM)                        \
+    A(Ordering)                   \
     A(Outlines)                   \
     A(P)                          \
     A(Pages)                      \
@@ -95,6 +100,7 @@
     A(Prev)                       \
     A(R)                          \
     A(RI)                         \
+    A(Registry)                   \
     A(Resources)                  \
     A(Root)                       \
     A(Rotate)                     \
@@ -103,6 +109,7 @@
     A(SM)                         \
     A(SMask)                      \
     A(Subtype)                    \
+    A(Supplement)                 \
     A(T)                          \
     A(TK)                         \
     A(TR)                         \
@@ -114,6 +121,7 @@
     A(UCR)                        \
     A(UseBlackPTComp)             \
     A(UserUnit)                   \
+    A(W)                          \
     A(WhitePoint)                 \
     A(Widths)                     \
     A(XYZ)                        \
@@ -127,6 +135,8 @@ public:
 #define ENUMERATE(name) static FlyString name;
     ENUMERATE_COMMON_NAMES(ENUMERATE)
 #undef ENUMERATE
+
+    static FlyString IdentityH;
 };
 
 }

+ 6 - 0
Userland/Libraries/LibPDF/Error.h

@@ -18,6 +18,12 @@ public:
         MalformedPDF,
     };
 
+    Error(AK::Error error)
+        : m_type(Type::Internal)
+        , m_message(String::formatted("Internal error while processing PDF file: {}", error.string_literal()))
+    {
+    }
+
     Error(Type type, String const& message)
         : m_type(type)
     {

+ 6 - 0
Userland/Libraries/LibPDF/Fonts/PDFFont.cpp

@@ -6,6 +6,8 @@
 
 #include <LibPDF/CommonNames.h>
 #include <LibPDF/Fonts/PDFFont.h>
+#include <LibPDF/Fonts/TrueTypeFont.h>
+#include <LibPDF/Fonts/Type0Font.h>
 #include <LibPDF/Fonts/Type1Font.h>
 
 namespace PDF {
@@ -14,8 +16,12 @@ PDFErrorOr<NonnullRefPtr<PDFFont>> PDFFont::create(Document* document, NonnullRe
 {
     auto subtype = TRY(dict->get_name(document, CommonNames::Subtype))->name();
 
+    if (subtype == "Type0")
+        return TRY(Type0Font::create(document, dict));
     if (subtype == "Type1")
         return TRY(Type1Font::create(document, dict));
+    if (subtype == "TrueType")
+        return TRY(TrueTypeFont::create(document, dict));
 
     dbgln("Unknown font subtype: {}", subtype);
     TODO();

+ 65 - 0
Userland/Libraries/LibPDF/Fonts/TrueTypeFont.cpp

@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2022, Matthew Olsson <mattco@serenityos.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <LibPDF/CommonNames.h>
+#include <LibPDF/Fonts/TrueTypeFont.h>
+#include <LibPDF/Fonts/Type1Font.h>
+
+namespace PDF {
+
+PDFErrorOr<NonnullRefPtr<PDFFont>> TrueTypeFont::create(Document* document, NonnullRefPtr<DictObject> dict)
+{
+    auto font_descriptor = TRY(dict->get_dict(document, CommonNames::FontDescriptor));
+
+    if (!dict->contains(CommonNames::FontFile2)) {
+        // FIXME: The TTF is one of the standard 14 fonts. These should be built into
+        //        the system, and their attributes hardcoded. Until we have them, just
+        //        treat this as a Type1 font (which are very similar to TTF fonts)
+        return TRY(Type1Font::create(document, dict));
+    }
+
+    auto font_file = TRY(dict->get_stream(document, CommonNames::FontFile2));
+    auto ttf_font = TRY(TTF::Font::try_load_from_externally_owned_memory(font_file->bytes()));
+    auto data = TRY(Type1Font::parse_data(document, dict));
+
+    return adopt_ref(*new TrueTypeFont(ttf_font, move(data)));
+}
+
+TrueTypeFont::TrueTypeFont(NonnullRefPtr<TTF::Font> ttf_font, Type1Font::Data data)
+    : m_ttf_font(ttf_font)
+    , m_data(data)
+{
+}
+
+u32 TrueTypeFont::char_code_to_code_point(u16 char_code) const
+{
+    if (m_data.to_unicode)
+        TODO();
+
+    auto descriptor = m_data.encoding->get_char_code_descriptor(char_code);
+    return descriptor.code_point;
+}
+
+float TrueTypeFont::get_char_width(u16 char_code, float font_size) const
+{
+    u16 width;
+    if (auto char_code_width = m_data.widths.get(char_code); char_code_width.has_value()) {
+        width = char_code_width.value();
+    } else {
+        // FIXME: Should we do something with m_data.missing_width here?
+        float units_per_em = m_ttf_font->units_per_em();
+        auto scale = (font_size * DEFAULT_DPI) / (POINTS_PER_INCH * units_per_em);
+
+        auto code_point = char_code_to_code_point(char_code);
+        auto id = m_ttf_font->glyph_id_for_code_point(code_point);
+        auto metrics = m_ttf_font->glyph_metrics(id, scale, scale);
+        width = metrics.advance_width;
+    }
+
+    return static_cast<float>(width) / 1000.0f;
+}
+
+}

+ 29 - 0
Userland/Libraries/LibPDF/Fonts/TrueTypeFont.h

@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2022, Matthew Olsson <mattco@serenityos.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#pragma once
+
+#include <LibGfx/TrueTypeFont/Font.h>
+#include <LibPDF/Fonts/Type1Font.h>
+
+namespace PDF {
+
+class TrueTypeFont : public PDFFont {
+public:
+    static PDFErrorOr<NonnullRefPtr<PDFFont>> create(Document*, NonnullRefPtr<DictObject>);
+
+    TrueTypeFont(NonnullRefPtr<TTF::Font> ttf_font, Type1Font::Data);
+    ~TrueTypeFont() override = default;
+
+    u32 char_code_to_code_point(u16 char_code) const override;
+    float get_char_width(u16 char_code, float font_size) const override;
+
+private:
+    NonnullRefPtr<TTF::Font> m_ttf_font;
+    Type1Font::Data m_data;
+};
+
+}

+ 97 - 0
Userland/Libraries/LibPDF/Fonts/Type0Font.cpp

@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2022, Matthew Olsson <mattco@serenityos.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <LibPDF/CommonNames.h>
+#include <LibPDF/Fonts/Type0Font.h>
+
+namespace PDF {
+
+PDFErrorOr<NonnullRefPtr<Type0Font>> Type0Font::create(Document* document, NonnullRefPtr<DictObject> dict)
+{
+    // FIXME: Support arbitrary CMaps
+    auto cmap_value = TRY(dict->get_object(document, CommonNames::Encoding));
+    if (!cmap_value->is<NameObject>() || cmap_value->cast<NameObject>()->name() != CommonNames::IdentityH)
+        TODO();
+
+    auto descendant_font_value = TRY(dict->get_array(document, CommonNames::DescendantFonts));
+    auto descendant_font = TRY(descendant_font_value->get_dict_at(document, 0));
+
+    auto system_info_dict = TRY(descendant_font->get_dict(document, CommonNames::CIDSystemInfo));
+    auto registry = TRY(system_info_dict->get_string(document, CommonNames::Registry))->string();
+    auto ordering = TRY(system_info_dict->get_string(document, CommonNames::Ordering))->string();
+    u8 supplement = system_info_dict->get_value(CommonNames::Supplement).get<int>();
+    CIDSystemInfo system_info { registry, ordering, supplement };
+
+    auto font_descriptor = TRY(descendant_font->get_dict(document, CommonNames::FontDescriptor));
+
+    u16 default_width = 1000;
+    if (descendant_font->contains(CommonNames::DW))
+        default_width = descendant_font->get_value(CommonNames::DW).get<int>();
+
+    HashMap<u16, u16> widths;
+
+    if (descendant_font->contains(CommonNames::W)) {
+        auto widths_array = MUST(descendant_font->get_array(document, CommonNames::W));
+        Optional<u16> pending_code;
+
+        for (size_t i = 0; i < widths_array->size(); i++) {
+            auto& value = widths_array->at(i);
+            if (!pending_code.has_value()) {
+                pending_code = value.get<int>();
+            } else if (value.has<NonnullRefPtr<Object>>()) {
+                auto array = value.get<NonnullRefPtr<Object>>()->cast<ArrayObject>();
+                auto code = pending_code.release_value();
+                for (auto& width : *array)
+                    widths.set(code++, width.get<int>());
+            } else {
+                auto first_code = pending_code.release_value();
+                auto last_code = value.get<int>();
+                auto width = widths_array->at(i + 1).get<int>();
+                for (u16 code = first_code; code <= last_code; code++)
+                    widths.set(code, width);
+
+                i++;
+            }
+        }
+    }
+
+    if (dict->contains(CommonNames::CIDToGIDMap)) {
+        auto value = TRY(dict->get_object(document, CommonNames::CIDToGIDMap));
+        if (value->is<StreamObject>()) {
+            TODO();
+        } else if (value->cast<NameObject>()->name() != "Identity") {
+            TODO();
+        }
+    }
+
+    return adopt_ref(*new Type0Font(system_info, widths, default_width));
+}
+
+Type0Font::Type0Font(CIDSystemInfo const& system_info, HashMap<u16, u16> const& widths, u16 missing_width)
+    : m_system_info(system_info)
+    , m_widths(widths)
+    , m_missing_width(missing_width)
+{
+}
+
+u32 Type0Font::char_code_to_code_point(u16 char_code) const
+{
+    return char_code;
+}
+
+float Type0Font::get_char_width(u16 char_code, float) const
+{
+    u16 width;
+    if (auto char_code_width = m_widths.get(char_code); char_code_width.has_value()) {
+        width = char_code_width.value();
+    } else {
+        width = m_missing_width;
+    }
+
+    return static_cast<float>(width) / 1000.0f;
+}
+
+}

+ 35 - 0
Userland/Libraries/LibPDF/Fonts/Type0Font.h

@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2022, Matthew Olsson <mattco@serenityos.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#pragma once
+
+#include <LibPDF/Fonts/PDFFont.h>
+
+namespace PDF {
+
+struct CIDSystemInfo {
+    String registry;
+    String ordering;
+    u8 supplement;
+};
+
+class Type0Font : public PDFFont {
+public:
+    static PDFErrorOr<NonnullRefPtr<Type0Font>> create(Document*, NonnullRefPtr<DictObject>);
+
+    Type0Font(CIDSystemInfo const&, HashMap<u16, u16> const& widths, u16 missing_width);
+    ~Type0Font() override = default;
+
+    u32 char_code_to_code_point(u16 char_code) const override;
+    float get_char_width(u16 char_code, float font_size) const override;
+
+private:
+    CIDSystemInfo m_system_info;
+    HashMap<u16, u16> m_widths;
+    u16 m_missing_width;
+};
+
+}