Browse Source

LibPDF: Add more built-in SIDs

The first iteration has enough SIDs to display simple documents, but
when trying more and more documents we started to need more of these
SIDs to be properly defined. This is a copy/paste exercise from the CFF
document, which is tedious, so it will continue in small drops.

This commit fills all the gaps until SID 228, which covers all the
ISOAdobe space, and should be enough for most use cases. Since this is a
continuous space starting at 0, we now use an Array instead of a Map to
store these names, which should be more performant. Also to simplify
things I've moved the Array out of the CFF class, making it a simpler
static variable, which allows us to use template type deduction.
Rodrigo Tobar 2 years ago
parent
commit
c4507bb56e
2 changed files with 235 additions and 95 deletions
  1. 235 93
      Userland/Libraries/LibPDF/Fonts/CFF.cpp
  2. 0 2
      Userland/Libraries/LibPDF/Fonts/CFF.h

+ 235 - 93
Userland/Libraries/LibPDF/Fonts/CFF.cpp

@@ -138,105 +138,247 @@ PDFErrorOr<NonnullRefPtr<CFF>> CFF::create(ReadonlyBytes const& cff_bytes, RefPt
     return cff;
     return cff;
 }
 }
 
 
-HashMap<CFF::SID, DeprecatedFlyString> CFF::builtin_names {
-    { 0, ".notdef" },
-    { 1, "space" },
-    { 9, "parenleft" },
-    { 10, "parenright" },
-    { 13, "comma" },
-    { 14, "hyphen" },
-    { 15, "period" },
-
-    { 17, "zero" },
-    { 18, "one" },
-    { 19, "two" },
-    { 20, "three" },
-    { 21, "four" },
-    { 22, "five" },
-    { 23, "six" },
-    { 24, "seven" },
-    { 25, "eight" },
-    { 26, "nine" },
-    { 27, "colon" },
-    { 28, "semicolon" },
-
-    { 34, "A" },
-    { 35, "B" },
-    { 36, "C" },
-    { 37, "D" },
-    { 38, "E" },
-    { 39, "F" },
-    { 40, "G" },
-    { 41, "H" },
-    { 42, "I" },
-    { 43, "J" },
-    { 44, "K" },
-    { 45, "L" },
-    { 46, "M" },
-    { 47, "N" },
-    { 48, "O" },
-    { 49, "P" },
-    { 50, "Q" },
-    { 51, "R" },
-    { 52, "S" },
-    { 53, "T" },
-    { 54, "U" },
-    { 55, "V" },
-    { 56, "W" },
-    { 57, "X" },
-    { 58, "Y" },
-    { 59, "Z" },
-    { 66, "a" },
-    { 67, "b" },
-    { 68, "c" },
-    { 69, "d" },
-    { 70, "e" },
-    { 71, "f" },
-    { 72, "g" },
-    { 73, "h" },
-    { 74, "i" },
-    { 75, "j" },
-    { 76, "k" },
-    { 77, "l" },
-    { 78, "m" },
-    { 79, "n" },
-    { 80, "o" },
-    { 81, "p" },
-    { 82, "q" },
-    { 83, "r" },
-    { 84, "s" },
-    { 85, "t" },
-    { 86, "u" },
-    { 87, "v" },
-    { 88, "w" },
-    { 89, "x" },
-    { 90, "y" },
-    { 91, "z" },
-
-    { 104, "quotesingle" },
-    { 105, "quotedblleft" },
-
-    { 111, "endash" },
-
-    { 116, "bullet" },
-
-    { 119, "quotedblright" },
-
-    { 137, "emdash" },
-
-    { 170, "copyright" },
+/// Appendix C: Predefined Charsets
+static constexpr Array s_cff_builtin_names {
+    ".notdef"sv,
+    "space"sv,
+    "exclam"sv,
+    "quotedbl"sv,
+    "numbersign"sv,
+    "dollar"sv,
+    "percent"sv,
+    "ampersand"sv,
+    "quoteright"sv,
+    "parenleft"sv,
+    "parenright"sv,
+    "asterisk"sv,
+    "plus"sv,
+    "comma"sv,
+    "hyphen"sv,
+    "period"sv,
+    "slash"sv,
+    "zero"sv,
+    "one"sv,
+    "two"sv,
+    "three"sv,
+    "four"sv,
+    "five"sv,
+    "six"sv,
+    "seven"sv,
+    "eight"sv,
+    "nine"sv,
+    "colon"sv,
+    "semicolon"sv,
+    "less"sv,
+    "equal"sv,
+    "greater"sv,
+    "question"sv,
+    "at"sv,
+    "A"sv,
+    "B"sv,
+    "C"sv,
+    "D"sv,
+    "E"sv,
+    "F"sv,
+    "G"sv,
+    "H"sv,
+    "I"sv,
+    "J"sv,
+    "K"sv,
+    "L"sv,
+    "M"sv,
+    "N"sv,
+    "O"sv,
+    "P"sv,
+    "Q"sv,
+    "R"sv,
+    "S"sv,
+    "T"sv,
+    "U"sv,
+    "V"sv,
+    "W"sv,
+    "X"sv,
+    "Y"sv,
+    "Z"sv,
+    "bracketleft"sv,
+    "backslash"sv,
+    "bracketright"sv,
+    "asciicircum"sv,
+    "underscore"sv,
+    "quoteleft"sv,
+    "a"sv,
+    "b"sv,
+    "c"sv,
+    "d"sv,
+    "e"sv,
+    "f"sv,
+    "g"sv,
+    "h"sv,
+    "i"sv,
+    "j"sv,
+    "k"sv,
+    "l"sv,
+    "m"sv,
+    "n"sv,
+    "o"sv,
+    "p"sv,
+    "q"sv,
+    "r"sv,
+    "s"sv,
+    "t"sv,
+    "u"sv,
+    "v"sv,
+    "w"sv,
+    "x"sv,
+    "y"sv,
+    "z"sv,
+    "braceleft"sv,
+    "bar"sv,
+    "braceright"sv,
+    "asciitilde"sv,
+    "exclamdown"sv,
+    "cent"sv,
+    "sterling"sv,
+    "fraction"sv,
+    "yen"sv,
+    "florin"sv,
+    "section"sv,
+    "currency"sv,
+    "quotesingle"sv,
+    "quotedblleft"sv,
+    "guillemotleft"sv,
+    "guilsinglleft"sv,
+    "guilsinglright"sv,
+    "fi"sv,
+    "fl"sv,
+    "endash"sv,
+    "dagger"sv,
+    "daggerdbl"sv,
+    "periodcentered"sv,
+    "paragraph"sv,
+    "bullet"sv,
+    "quotesinglbase"sv,
+    "quotedblbase"sv,
+    "quotedblright"sv,
+    "guillemotright"sv,
+    "ellipsis"sv,
+    "perthousand"sv,
+    "questiondown"sv,
+    "grave"sv,
+    "acute"sv,
+    "circumflex"sv,
+    "tilde"sv,
+    "macron"sv,
+    "breve"sv,
+    "dotaccent"sv,
+    "dieresis"sv,
+    "ring"sv,
+    "cedilla"sv,
+    "hungarumlaut"sv,
+    "ogonek"sv,
+    "caron"sv,
+    "emdash"sv,
+    "AE"sv,
+    "ordfeminine"sv,
+    "Lslash"sv,
+    "Oslash"sv,
+    "OE"sv,
+    "ordmasculine"sv,
+    "ae"sv,
+    "dotlessi"sv,
+    "lslash"sv,
+    "oslash"sv,
+    "oe"sv,
+    "germandbls"sv,
+    "onesuperior"sv,
+    "logicalnot"sv,
+    "mu"sv,
+    "trademark"sv,
+    "Eth"sv,
+    "onehalf"sv,
+    "plusminus"sv,
+    "Thorn"sv,
+    "onequarter"sv,
+    "divide"sv,
+    "brokenbar"sv,
+    "degree"sv,
+    "thorn"sv,
+    "threequarters"sv,
+    "twosuperior"sv,
+    "registered"sv,
+    "minus"sv,
+    "eth"sv,
+    "multiply"sv,
+    "threesuperior"sv,
+    "copyright"sv,
+    "Aacute"sv,
+    "Acircumflex"sv,
+    "Adieresis"sv,
+    "Agrave"sv,
+    "Aring"sv,
+    "Atilde"sv,
+    "Ccedilla"sv,
+    "Eacute"sv,
+    "Ecircumflex"sv,
+    "Edieresis"sv,
+    "Egrave"sv,
+    "Iacute"sv,
+    "Icircumflex"sv,
+    "Idieresis"sv,
+    "Igrave"sv,
+    "Ntilde"sv,
+    "Oacute"sv,
+    "Ocircumflex"sv,
+    "Odieresis"sv,
+    "Ograve"sv,
+    "Otilde"sv,
+    "Scaron"sv,
+    "Uacute"sv,
+    "Ucircumflex"sv,
+    "Udieresis"sv,
+    "Ugrave"sv,
+    "Yacute"sv,
+    "Ydieresis"sv,
+    "Zcaron"sv,
+    "aacute"sv,
+    "acircumflex"sv,
+    "adieresis"sv,
+    "agrave"sv,
+    "aring"sv,
+    "atilde"sv,
+    "ccedilla"sv,
+    "eacute"sv,
+    "ecircumflex"sv,
+    "edieresis"sv,
+    "egrave"sv,
+    "iacute"sv,
+    "icircumflex"sv,
+    "idieresis"sv,
+    "igrave"sv,
+    "ntilde"sv,
+    "oacute"sv,
+    "ocircumflex"sv,
+    "odieresis"sv,
+    "ograve"sv,
+    "otilde"sv,
+    "scaron"sv,
+    "uacute"sv,
+    "ucircumflex"sv,
+    "udieresis"sv,
+    "ugrave"sv,
+    "yacute"sv,
+    "ydieresis"sv,
+    "zcaron"sv,
 };
 };
 
 
 PDFErrorOr<Vector<DeprecatedFlyString>> CFF::parse_charset(Reader&& reader, size_t glyph_count)
 PDFErrorOr<Vector<DeprecatedFlyString>> CFF::parse_charset(Reader&& reader, size_t glyph_count)
 {
 {
     Vector<DeprecatedFlyString> names;
     Vector<DeprecatedFlyString> names;
     auto resolve = [](SID sid) {
     auto resolve = [](SID sid) {
-        auto x = builtin_names.find(sid);
-        if (x == builtin_names.end()) {
-            dbgln("Cound't find string for SID {}, going with space", sid);
-            return DeprecatedFlyString("space");
-        }
-        return x->value;
+        if (sid < s_cff_builtin_names.size())
+            return DeprecatedFlyString(s_cff_builtin_names[sid]);
+        dbgln("Cound't find string for SID {}, going with space", sid);
+        return DeprecatedFlyString("space");
     };
     };
 
 
     auto format = TRY(reader.try_read<Card8>());
     auto format = TRY(reader.try_read<Card8>());

+ 0 - 2
Userland/Libraries/LibPDF/Fonts/CFF.h

@@ -79,8 +79,6 @@ public:
 
 
     static PDFErrorOr<Vector<DeprecatedFlyString>> parse_charset(Reader&&, size_t);
     static PDFErrorOr<Vector<DeprecatedFlyString>> parse_charset(Reader&&, size_t);
     static PDFErrorOr<Vector<u8>> parse_encoding(Reader&&);
     static PDFErrorOr<Vector<u8>> parse_encoding(Reader&&);
-
-    static HashMap<SID, DeprecatedFlyString> builtin_names;
 };
 };
 
 
 }
 }