LibPDF: Add more built-in SIDs

The first iteration has enough SIDs to display simple documents, but
when trying more and more documents we started to need more of these
SIDs to be properly defined. This is a copy/paste exercise from the CFF
document, which is tedious, so it will continue in small drops.

This commit fills all the gaps until SID 228, which covers all the
ISOAdobe space, and should be enough for most use cases. Since this is a
continuous space starting at 0, we now use an Array instead of a Map to
store these names, which should be more performant. Also to simplify
things I've moved the Array out of the CFF class, making it a simpler
static variable, which allows us to use template type deduction.
This commit is contained in:
Rodrigo Tobar 2023-02-11 14:11:12 +08:00 committed by Linus Groh
parent fb0c226da3
commit c4507bb56e
Notes: sideshowbarker 2024-07-17 10:08:28 +09:00
2 changed files with 235 additions and 95 deletions

View file

@ -138,105 +138,247 @@ PDFErrorOr<NonnullRefPtr<CFF>> CFF::create(ReadonlyBytes const& cff_bytes, RefPt
return cff;
}
HashMap<CFF::SID, DeprecatedFlyString> CFF::builtin_names {
{ 0, ".notdef" },
{ 1, "space" },
{ 9, "parenleft" },
{ 10, "parenright" },
{ 13, "comma" },
{ 14, "hyphen" },
{ 15, "period" },
{ 17, "zero" },
{ 18, "one" },
{ 19, "two" },
{ 20, "three" },
{ 21, "four" },
{ 22, "five" },
{ 23, "six" },
{ 24, "seven" },
{ 25, "eight" },
{ 26, "nine" },
{ 27, "colon" },
{ 28, "semicolon" },
{ 34, "A" },
{ 35, "B" },
{ 36, "C" },
{ 37, "D" },
{ 38, "E" },
{ 39, "F" },
{ 40, "G" },
{ 41, "H" },
{ 42, "I" },
{ 43, "J" },
{ 44, "K" },
{ 45, "L" },
{ 46, "M" },
{ 47, "N" },
{ 48, "O" },
{ 49, "P" },
{ 50, "Q" },
{ 51, "R" },
{ 52, "S" },
{ 53, "T" },
{ 54, "U" },
{ 55, "V" },
{ 56, "W" },
{ 57, "X" },
{ 58, "Y" },
{ 59, "Z" },
{ 66, "a" },
{ 67, "b" },
{ 68, "c" },
{ 69, "d" },
{ 70, "e" },
{ 71, "f" },
{ 72, "g" },
{ 73, "h" },
{ 74, "i" },
{ 75, "j" },
{ 76, "k" },
{ 77, "l" },
{ 78, "m" },
{ 79, "n" },
{ 80, "o" },
{ 81, "p" },
{ 82, "q" },
{ 83, "r" },
{ 84, "s" },
{ 85, "t" },
{ 86, "u" },
{ 87, "v" },
{ 88, "w" },
{ 89, "x" },
{ 90, "y" },
{ 91, "z" },
{ 104, "quotesingle" },
{ 105, "quotedblleft" },
{ 111, "endash" },
{ 116, "bullet" },
{ 119, "quotedblright" },
{ 137, "emdash" },
{ 170, "copyright" },
/// Appendix C: Predefined Charsets
static constexpr Array s_cff_builtin_names {
".notdef"sv,
"space"sv,
"exclam"sv,
"quotedbl"sv,
"numbersign"sv,
"dollar"sv,
"percent"sv,
"ampersand"sv,
"quoteright"sv,
"parenleft"sv,
"parenright"sv,
"asterisk"sv,
"plus"sv,
"comma"sv,
"hyphen"sv,
"period"sv,
"slash"sv,
"zero"sv,
"one"sv,
"two"sv,
"three"sv,
"four"sv,
"five"sv,
"six"sv,
"seven"sv,
"eight"sv,
"nine"sv,
"colon"sv,
"semicolon"sv,
"less"sv,
"equal"sv,
"greater"sv,
"question"sv,
"at"sv,
"A"sv,
"B"sv,
"C"sv,
"D"sv,
"E"sv,
"F"sv,
"G"sv,
"H"sv,
"I"sv,
"J"sv,
"K"sv,
"L"sv,
"M"sv,
"N"sv,
"O"sv,
"P"sv,
"Q"sv,
"R"sv,
"S"sv,
"T"sv,
"U"sv,
"V"sv,
"W"sv,
"X"sv,
"Y"sv,
"Z"sv,
"bracketleft"sv,
"backslash"sv,
"bracketright"sv,
"asciicircum"sv,
"underscore"sv,
"quoteleft"sv,
"a"sv,
"b"sv,
"c"sv,
"d"sv,
"e"sv,
"f"sv,
"g"sv,
"h"sv,
"i"sv,
"j"sv,
"k"sv,
"l"sv,
"m"sv,
"n"sv,
"o"sv,
"p"sv,
"q"sv,
"r"sv,
"s"sv,
"t"sv,
"u"sv,
"v"sv,
"w"sv,
"x"sv,
"y"sv,
"z"sv,
"braceleft"sv,
"bar"sv,
"braceright"sv,
"asciitilde"sv,
"exclamdown"sv,
"cent"sv,
"sterling"sv,
"fraction"sv,
"yen"sv,
"florin"sv,
"section"sv,
"currency"sv,
"quotesingle"sv,
"quotedblleft"sv,
"guillemotleft"sv,
"guilsinglleft"sv,
"guilsinglright"sv,
"fi"sv,
"fl"sv,
"endash"sv,
"dagger"sv,
"daggerdbl"sv,
"periodcentered"sv,
"paragraph"sv,
"bullet"sv,
"quotesinglbase"sv,
"quotedblbase"sv,
"quotedblright"sv,
"guillemotright"sv,
"ellipsis"sv,
"perthousand"sv,
"questiondown"sv,
"grave"sv,
"acute"sv,
"circumflex"sv,
"tilde"sv,
"macron"sv,
"breve"sv,
"dotaccent"sv,
"dieresis"sv,
"ring"sv,
"cedilla"sv,
"hungarumlaut"sv,
"ogonek"sv,
"caron"sv,
"emdash"sv,
"AE"sv,
"ordfeminine"sv,
"Lslash"sv,
"Oslash"sv,
"OE"sv,
"ordmasculine"sv,
"ae"sv,
"dotlessi"sv,
"lslash"sv,
"oslash"sv,
"oe"sv,
"germandbls"sv,
"onesuperior"sv,
"logicalnot"sv,
"mu"sv,
"trademark"sv,
"Eth"sv,
"onehalf"sv,
"plusminus"sv,
"Thorn"sv,
"onequarter"sv,
"divide"sv,
"brokenbar"sv,
"degree"sv,
"thorn"sv,
"threequarters"sv,
"twosuperior"sv,
"registered"sv,
"minus"sv,
"eth"sv,
"multiply"sv,
"threesuperior"sv,
"copyright"sv,
"Aacute"sv,
"Acircumflex"sv,
"Adieresis"sv,
"Agrave"sv,
"Aring"sv,
"Atilde"sv,
"Ccedilla"sv,
"Eacute"sv,
"Ecircumflex"sv,
"Edieresis"sv,
"Egrave"sv,
"Iacute"sv,
"Icircumflex"sv,
"Idieresis"sv,
"Igrave"sv,
"Ntilde"sv,
"Oacute"sv,
"Ocircumflex"sv,
"Odieresis"sv,
"Ograve"sv,
"Otilde"sv,
"Scaron"sv,
"Uacute"sv,
"Ucircumflex"sv,
"Udieresis"sv,
"Ugrave"sv,
"Yacute"sv,
"Ydieresis"sv,
"Zcaron"sv,
"aacute"sv,
"acircumflex"sv,
"adieresis"sv,
"agrave"sv,
"aring"sv,
"atilde"sv,
"ccedilla"sv,
"eacute"sv,
"ecircumflex"sv,
"edieresis"sv,
"egrave"sv,
"iacute"sv,
"icircumflex"sv,
"idieresis"sv,
"igrave"sv,
"ntilde"sv,
"oacute"sv,
"ocircumflex"sv,
"odieresis"sv,
"ograve"sv,
"otilde"sv,
"scaron"sv,
"uacute"sv,
"ucircumflex"sv,
"udieresis"sv,
"ugrave"sv,
"yacute"sv,
"ydieresis"sv,
"zcaron"sv,
};
PDFErrorOr<Vector<DeprecatedFlyString>> CFF::parse_charset(Reader&& reader, size_t glyph_count)
{
Vector<DeprecatedFlyString> names;
auto resolve = [](SID sid) {
auto x = builtin_names.find(sid);
if (x == builtin_names.end()) {
dbgln("Cound't find string for SID {}, going with space", sid);
return DeprecatedFlyString("space");
}
return x->value;
if (sid < s_cff_builtin_names.size())
return DeprecatedFlyString(s_cff_builtin_names[sid]);
dbgln("Cound't find string for SID {}, going with space", sid);
return DeprecatedFlyString("space");
};
auto format = TRY(reader.try_read<Card8>());

View file

@ -79,8 +79,6 @@ public:
static PDFErrorOr<Vector<DeprecatedFlyString>> parse_charset(Reader&&, size_t);
static PDFErrorOr<Vector<u8>> parse_encoding(Reader&&);
static HashMap<SID, DeprecatedFlyString> builtin_names;
};
}