Forráskód Böngészése

LibPDF: Simplify Encoding to align with simple font requirements

All "Simple Fonts" in PDF (all but Type0 fonts) have the property that
glyphs are selected with single byte character codes. This means that
the Encoding objects should use u8 for representing these character
codes. Moreover, and as mentioned in a previous commit, there is no need
to store the unicode code point associated with a character (which was
in turn wrongly associated to a glyph).

This commit greatly simplifies the Encoding class. Namely it:

 * Removes the unnecessary CharDescriptor class.
 * Changes the internal maps to be u8 -> FlyString and vice-versa,
   effectively providing two-way lookups.
 * Adds a new method to set a two-way u8 -> FlyString mapping and uses
   it in all possible places.
 * Simplified the creation of Encoding objects.
 * Changes how the WinAnsi special treatment for bullet points is
   implemented.
Rodrigo Tobar 2 éve
szülő
commit
286e3e6872

+ 29 - 42
Userland/Libraries/LibPDF/Encoding.cpp

@@ -11,15 +11,9 @@
 
 
 namespace PDF {
 namespace PDF {
 
 
-PDFErrorOr<NonnullRefPtr<Encoding>> Encoding::create(HashMap<u16, CharDescriptor> descriptors)
+NonnullRefPtr<Encoding> Encoding::create()
 {
 {
-    auto encoding = adopt_ref(*new Encoding());
-    encoding->m_descriptors = descriptors;
-
-    for (auto& descriptor : descriptors)
-        encoding->m_name_mapping.set(descriptor.value.name, descriptor.value.code_point);
-
-    return encoding;
+    return adopt_ref(*new Encoding());
 }
 }
 
 
 PDFErrorOr<NonnullRefPtr<Encoding>> Encoding::from_object(Document* document, NonnullRefPtr<Object> const& obj)
 PDFErrorOr<NonnullRefPtr<Encoding>> Encoding::from_object(Document* document, NonnullRefPtr<Object> const& obj)
@@ -49,8 +43,8 @@ PDFErrorOr<NonnullRefPtr<Encoding>> Encoding::from_object(Document* document, No
 
 
     auto encoding = adopt_ref(*new Encoding());
     auto encoding = adopt_ref(*new Encoding());
 
 
-    encoding->m_descriptors = base_encoding->descriptors();
-    encoding->m_name_mapping = base_encoding->name_mapping();
+    encoding->m_descriptors = base_encoding->m_descriptors;
+    encoding->m_name_mapping = base_encoding->m_name_mapping;
 
 
     auto differences_array = TRY(dict->get_array(document, CommonNames::Differences));
     auto differences_array = TRY(dict->get_array(document, CommonNames::Differences));
 
 
@@ -66,8 +60,7 @@ PDFErrorOr<NonnullRefPtr<Encoding>> Encoding::from_object(Document* document, No
             VERIFY(!first);
             VERIFY(!first);
             auto& object = item.get<NonnullRefPtr<Object>>();
             auto& object = item.get<NonnullRefPtr<Object>>();
             auto name = object->cast<NameObject>()->name();
             auto name = object->cast<NameObject>()->name();
-
-            encoding->m_descriptors.set(current_code_point, { name, base_encoding->m_name_mapping.ensure(name) });
+            encoding->set(current_code_point, name);
             current_code_point++;
             current_code_point++;
         }
         }
     }
     }
@@ -75,13 +68,18 @@ PDFErrorOr<NonnullRefPtr<Encoding>> Encoding::from_object(Document* document, No
     return encoding;
     return encoding;
 }
 }
 
 
+void Encoding::set(CharCodeType char_code, DeprecatedFlyString const& glyph_name)
+{
+    m_descriptors.set(char_code, glyph_name);
+    m_name_mapping.set(glyph_name, char_code);
+}
+
 NonnullRefPtr<Encoding> Encoding::standard_encoding()
 NonnullRefPtr<Encoding> Encoding::standard_encoding()
 {
 {
     static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
     static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
     if (encoding->m_descriptors.is_empty()) {
     if (encoding->m_descriptors.is_empty()) {
 #define ENUMERATE(name, standard_code, mac_code, win_code, pdf_code) \
 #define ENUMERATE(name, standard_code, mac_code, win_code, pdf_code) \
-    encoding->m_descriptors.set(standard_code, { #name, 0 });        \
-    encoding->m_name_mapping.set(#name, standard_code);
+    encoding->set(standard_code, #name);
         ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE)
         ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE)
 #undef ENUMERATE
 #undef ENUMERATE
     }
     }
@@ -94,8 +92,7 @@ NonnullRefPtr<Encoding> Encoding::mac_encoding()
     static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
     static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
     if (encoding->m_descriptors.is_empty()) {
     if (encoding->m_descriptors.is_empty()) {
 #define ENUMERATE(name, standard_code, mac_code, win_code, pdf_code) \
 #define ENUMERATE(name, standard_code, mac_code, win_code, pdf_code) \
-    encoding->m_descriptors.set(mac_code, { #name, 0 });             \
-    encoding->m_name_mapping.set(#name, mac_code);
+    encoding->set(mac_code, #name);
         ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE)
         ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE)
 #undef ENUMERATE
 #undef ENUMERATE
     }
     }
@@ -108,13 +105,20 @@ NonnullRefPtr<Encoding> Encoding::windows_encoding()
     static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
     static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
     if (encoding->m_descriptors.is_empty()) {
     if (encoding->m_descriptors.is_empty()) {
 #define ENUMERATE(name, standard_code, mac_code, win_code, pdf_code) \
 #define ENUMERATE(name, standard_code, mac_code, win_code, pdf_code) \
-    encoding->m_descriptors.set(win_code, { #name, 0 });             \
-    encoding->m_name_mapping.set(#name, win_code);
+    encoding->set(win_code, #name);
         ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE)
         ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE)
 #undef ENUMERATE
 #undef ENUMERATE
-        encoding->m_windows = true;
-    }
 
 
+        // PDF Annex D table D.2, note 3:
+        // In WinAnsiEncoding, all unused codes greater than 40 (octal) map to the bullet character. However, only
+        // code 225 (octal) shall be specifically assigned to the bullet character; other codes are subject to future re-assignment.
+        //
+        // Since CharCodeType is u8 *and* we need to include 255, we iterate in reverse order to have more readable code.
+        for (CharCodeType char_code = 255; char_code > 040; char_code--) {
+            if (!encoding->m_descriptors.contains(char_code))
+                encoding->set(char_code, "bullet");
+        }
+    }
     return encoding;
     return encoding;
 }
 }
 
 
@@ -123,8 +127,7 @@ NonnullRefPtr<Encoding> Encoding::pdf_doc_encoding()
     static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
     static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
     if (encoding->m_descriptors.is_empty()) {
     if (encoding->m_descriptors.is_empty()) {
 #define ENUMERATE(name, standard_code, mac_code, win_code, pdf_code) \
 #define ENUMERATE(name, standard_code, mac_code, win_code, pdf_code) \
-    encoding->m_descriptors.set(pdf_code, { #name, 0 });             \
-    encoding->m_name_mapping.set(#name, pdf_code);
+    encoding->set(pdf_code, #name);
         ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE)
         ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE)
 #undef ENUMERATE
 #undef ENUMERATE
     }
     }
@@ -136,9 +139,8 @@ NonnullRefPtr<Encoding> Encoding::symbol_encoding()
 {
 {
     static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
     static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
     if (encoding->m_descriptors.is_empty()) {
     if (encoding->m_descriptors.is_empty()) {
-#define ENUMERATE(name, code)                        \
-    encoding->m_descriptors.set(code, { #name, 0 }); \
-    encoding->m_name_mapping.set(#name, code);
+#define ENUMERATE(name, code) \
+    encoding->set(code, #name);
         ENUMERATE_SYMBOL_CHARACTER_SET(ENUMERATE)
         ENUMERATE_SYMBOL_CHARACTER_SET(ENUMERATE)
 #undef ENUMERATE
 #undef ENUMERATE
     }
     }
@@ -150,21 +152,14 @@ NonnullRefPtr<Encoding> Encoding::zapf_encoding()
 {
 {
     static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
     static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
     if (encoding->m_descriptors.is_empty()) {
     if (encoding->m_descriptors.is_empty()) {
-#define ENUMERATE(name, code)                        \
-    encoding->m_descriptors.set(code, { #name, 0 }); \
-    encoding->m_name_mapping.set(#name, code);
+#define ENUMERATE(name, code) \
+    encoding->set(code, #name);
         ENUMERATE_ZAPF_DINGBATS_CHARACTER_SET(ENUMERATE)
         ENUMERATE_ZAPF_DINGBATS_CHARACTER_SET(ENUMERATE)
 #undef ENUMERATE
 #undef ENUMERATE
     }
     }
-
     return encoding;
     return encoding;
 }
 }
 
 
-CharDescriptor const& Encoding::get_char_code_descriptor(u16 char_code) const
-{
-    return const_cast<Encoding*>(this)->m_descriptors.ensure(char_code);
-}
-
 u16 Encoding::get_char_code(DeprecatedString const& name) const
 u16 Encoding::get_char_code(DeprecatedString const& name) const
 {
 {
     auto code_iterator = m_name_mapping.find(name);
     auto code_iterator = m_name_mapping.find(name);
@@ -173,12 +168,4 @@ u16 Encoding::get_char_code(DeprecatedString const& name) const
     return 0;
     return 0;
 }
 }
 
 
-bool Encoding::should_map_to_bullet(u16 char_code) const
-{
-    // PDF Annex D table D.2, note 3:
-    // In WinAnsiEncoding, all unused codes greater than 40 (octal) map to the bullet character. However, only
-    // code 225 (octal) shall be specifically assigned to the bullet character; other codes are subject to future re-assignment.
-    return m_windows && char_code > 040 && !m_descriptors.contains(char_code);
-}
-
 }
 }

+ 6 - 13
Userland/Libraries/LibPDF/Encoding.h

@@ -625,14 +625,10 @@
 
 
 namespace PDF {
 namespace PDF {
 
 
-struct CharDescriptor {
-    DeprecatedString name;
-    u32 code_point;
-};
-
 class Encoding : public RefCounted<Encoding> {
 class Encoding : public RefCounted<Encoding> {
 public:
 public:
-    static PDFErrorOr<NonnullRefPtr<Encoding>> create(HashMap<u16, CharDescriptor> descriptors);
+    using CharCodeType = u8;
+    static NonnullRefPtr<Encoding> create();
     static PDFErrorOr<NonnullRefPtr<Encoding>> from_object(Document*, NonnullRefPtr<Object> const&);
     static PDFErrorOr<NonnullRefPtr<Encoding>> from_object(Document*, NonnullRefPtr<Object> const&);
 
 
     static NonnullRefPtr<Encoding> standard_encoding();
     static NonnullRefPtr<Encoding> standard_encoding();
@@ -642,17 +638,14 @@ public:
     static NonnullRefPtr<Encoding> symbol_encoding();
     static NonnullRefPtr<Encoding> symbol_encoding();
     static NonnullRefPtr<Encoding> zapf_encoding();
     static NonnullRefPtr<Encoding> zapf_encoding();
 
 
-    HashMap<u16, CharDescriptor> const& descriptors() const { return m_descriptors; }
-    HashMap<DeprecatedString, u16> const& name_mapping() const { return m_name_mapping; }
+    HashMap<DeprecatedString, CharCodeType> const& name_mapping() const { return m_name_mapping; }
 
 
     u16 get_char_code(DeprecatedString const&) const;
     u16 get_char_code(DeprecatedString const&) const;
-    CharDescriptor const& get_char_code_descriptor(u16 char_code) const;
-
-    bool should_map_to_bullet(u16 char_code) const;
+    void set(CharCodeType char_code, DeprecatedFlyString const& glyph_name);
 
 
 protected:
 protected:
-    HashMap<u16, CharDescriptor> m_descriptors;
-    HashMap<DeprecatedString, u16> m_name_mapping;
+    HashMap<CharCodeType, DeprecatedFlyString> m_descriptors;
+    HashMap<DeprecatedString, CharCodeType> m_name_mapping;
 
 
     bool m_windows { false };
     bool m_windows { false };
 };
 };

+ 4 - 4
Userland/Libraries/LibPDF/Fonts/CFF.cpp

@@ -121,19 +121,19 @@ PDFErrorOr<NonnullRefPtr<CFF>> CFF::create(ReadonlyBytes const& cff_bytes, RefPt
         }
         }
         cff->set_encoding(move(encoding));
         cff->set_encoding(move(encoding));
     } else {
     } else {
-        HashMap<u16, CharDescriptor> descriptors;
+        auto encoding = Encoding::create();
         for (size_t i = 0; i < glyphs.size(); i++) {
         for (size_t i = 0; i < glyphs.size(); i++) {
             if (i == 0) {
             if (i == 0) {
                 TRY(cff->add_glyph(0, move(glyphs[0])));
                 TRY(cff->add_glyph(0, move(glyphs[0])));
-                descriptors.set(0, CharDescriptor { ".notdef", 0 });
+                encoding->set(0, ".notdef");
                 continue;
                 continue;
             }
             }
             auto code = encoding_codes[i - 1];
             auto code = encoding_codes[i - 1];
             auto char_name = charset[i - 1];
             auto char_name = charset[i - 1];
             TRY(cff->add_glyph(code, move(glyphs[i])));
             TRY(cff->add_glyph(code, move(glyphs[i])));
-            descriptors.set(code, CharDescriptor { char_name, code });
+            encoding->set(code, char_name);
         }
         }
-        cff->set_encoding(TRY(Encoding::create(descriptors)));
+        cff->set_encoding(move(encoding));
     }
     }
 
 
     return cff;
     return cff;

+ 4 - 5
Userland/Libraries/LibPDF/Fonts/PS1FontProgram.cpp

@@ -36,19 +36,18 @@ PDFErrorOr<NonnullRefPtr<Type1FontProgram>> PS1FontProgram::create(ReadonlyBytes
         if (TRY(parse_word(reader)) == "StandardEncoding") {
         if (TRY(parse_word(reader)) == "StandardEncoding") {
             font_program->set_encoding(Encoding::standard_encoding());
             font_program->set_encoding(Encoding::standard_encoding());
         } else {
         } else {
-            HashMap<u16, CharDescriptor> descriptors;
-
+            auto encoding = Encoding::create();
             while (reader.remaining()) {
             while (reader.remaining()) {
                 auto word = TRY(parse_word(reader));
                 auto word = TRY(parse_word(reader));
                 if (word == "readonly") {
                 if (word == "readonly") {
                     break;
                     break;
                 } else if (word == "dup") {
                 } else if (word == "dup") {
-                    u32 char_code = TRY(parse_int(reader));
+                    u8 char_code = TRY(parse_int(reader));
                     auto name = TRY(parse_word(reader));
                     auto name = TRY(parse_word(reader));
-                    descriptors.set(char_code, { name.starts_with('/') ? name.substring_view(1) : name.view(), char_code });
+                    encoding->set(char_code, name.starts_with('/') ? name.substring_view(1) : name.view());
                 }
                 }
             }
             }
-            font_program->set_encoding(TRY(Encoding::create(descriptors)));
+            font_program->set_encoding(move(encoding));
         }
         }
     }
     }