CFF.cpp 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983
  1. /*
  2. * Copyright (c) 2023, Rodrigo Tobar <rtobarc@gmail.com>.
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. // CFF spec: https://adobe-type-tools.github.io/font-tech-notes/pdfs/5176.CFF.pdf
  7. #include <AK/Debug.h>
  8. #include <AK/Endian.h>
  9. #include <AK/String.h>
  10. #include <LibGfx/Forward.h>
  11. #include <LibPDF/Encoding.h>
  12. #include <LibPDF/Error.h>
  13. #include <LibPDF/Fonts/CFF.h>
  14. #include <LibPDF/Reader.h>
  15. namespace PDF {
  16. // The built-in encodings map codes to SIDs.
  17. // CFF spec, "Appendix B Predefined Encodings, Standard Encoding"
  18. // clang-format off
  19. static constexpr Array s_predefined_encoding_standard {
  20. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  21. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
  22. 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
  23. 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
  24. 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
  25. 90, 91, 92, 93, 94, 95, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  26. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 0, 111, 112,
  27. 113, 114, 0, 115, 116, 117, 118, 119, 120, 121, 122, 0, 123, 0, 124, 125, 126, 127, 128, 129, 130, 131, 0, 132, 133, 0, 134, 135, 136,
  28. 137, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 138, 0, 139, 0, 0, 0, 0, 140, 141, 142, 143, 0,
  29. 0, 0, 0, 0, 144, 0, 0,
  30. 0, 145, 0, 0, 146, 147, 148,
  31. 149, 0, 0, 0, 0,
  32. };
  33. static_assert(s_predefined_encoding_standard.size() == 256);
  34. // clang-format on
  35. // CFF spec, "Appendix B Predefined Encodings, Expert Encoding"
  36. // clang-format off
  37. static constexpr Array s_predefined_encoding_expert {
  38. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  39. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 229, 230, 0,
  40. 231, 232, 233, 234, 235, 236, 237, 238, 13, 14, 15, 99, 239, 240, 241, 242, 243, 244,
  41. 245, 246, 247, 248, 27, 28, 249, 250, 251, 252, 0, 253, 254, 255, 256, 257, 0, 0, 0, 258, 0, 0, 259, 260, 261, 262, 0, 0, 263,
  42. 264, 265, 0, 266, 109, 110, 267, 268, 269, 0, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288,
  43. 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  44. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 304, 305, 306, 0, 0, 307, 308, 309, 310,
  45. 311, 0, 312, 0, 0, 313, 0, 0, 314, 315, 0, 0, 316, 317, 318, 0, 0, 0, 158, 155, 163, 319, 320, 321, 322, 323, 324, 325, 0,
  46. 0, 326, 150, 164, 169, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350,
  47. 351, 352, 353, 354, 355, 356, 357, 358, 359, 360,
  48. 361, 362, 363, 364, 365, 366, 367, 368, 369, 370,
  49. 371, 372, 373, 374, 375, 376, 377, 378,
  50. };
  51. static_assert(s_predefined_encoding_expert.size() == 256);
  52. // clang-format on
  53. // Charsets map GIDs to SIDs.
  54. // CFF spec, "Appendix C Predefined Charsets, Expert"
  55. // clang-format off
  56. static constexpr Array s_predefined_charset_expert {
  57. 1, 229, 230, 231, 232,
  58. 233, 234, 235, 236, 237,
  59. 238, 13, 14, 15, 99,
  60. 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 27, 28, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 109, 110,
  61. 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298,
  62. 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 158, 155, 163, 319, 320, 321, 322, 323, 324, 325, 326, 150,
  63. 164, 169, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342,
  64. 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360,
  65. 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378,
  66. };
  67. // clang-format on
  68. // CFF spec, "Appendix C Predefined Charsets, Expert Subset"
  69. // clang-format off
  70. static constexpr Array s_predefined_charset_expert_subset {
  71. 1, 231, 232, 235, 236, 237, 238, 13, 14, 15, 99,
  72. 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 27,
  73. 28, 249, 250, 251, 253, 254, 255, 256, 257, 258, 259,
  74. 260, 261, 262, 263, 264, 265, 266, 109, 110, 267, 268, 269, 270, 272, 300, 301, 302, 305,
  75. 314, 315, 158, 155, 163, 320, 321, 322, 323, 324, 325, 326, 150, 164, 169, 327, 328, 329,
  76. 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346,
  77. };
  78. // clang-format on
  79. PDFErrorOr<NonnullRefPtr<CFF>> CFF::create(ReadonlyBytes const& cff_bytes, RefPtr<Encoding> encoding)
  80. {
  81. Reader reader(cff_bytes);
  82. // CFF spec, "6 Header"
  83. // skip major, minor version
  84. reader.consume(2);
  85. auto header_size = TRY(reader.try_read<Card8>());
  86. // skip offset size
  87. reader.consume(1);
  88. reader.move_to(header_size);
  89. // CFF spec, "7 Name INDEX"
  90. Vector<String> font_names;
  91. TRY(parse_index(reader, [&](ReadonlyBytes const& data) -> PDFErrorOr<void> {
  92. auto font_name = TRY(String::from_utf8(data));
  93. dbgln_if(CFF_DEBUG, "CFF font name '{}'", font_name);
  94. return TRY(font_names.try_append(font_name));
  95. }));
  96. if (font_names.size() != 1)
  97. return error("CFFs with more than one font not yet implemented");
  98. auto cff = adopt_ref(*new CFF());
  99. cff->set_font_matrix({ 0.001f, 0.0f, 0.0f, 0.001f, 0.0f, 0.0f });
  100. // CFF spec, "8 Top DICT INDEX"
  101. int charset_offset = 0;
  102. int encoding_offset = 0;
  103. auto charstrings_offset = 0;
  104. Vector<ByteBuffer> local_subroutines;
  105. float defaultWidthX = 0;
  106. float nominalWidthX = 0;
  107. TRY(parse_index(reader, [&](ReadonlyBytes const& element_data) {
  108. Reader element_reader { element_data };
  109. return parse_dict<TopDictOperator>(element_reader, [&](TopDictOperator op, Vector<DictOperand> const& operands) -> PDFErrorOr<void> {
  110. switch (op) {
  111. case TopDictOperator::Version:
  112. case TopDictOperator::Notice:
  113. case TopDictOperator::FullName:
  114. case TopDictOperator::FamilyName:
  115. case TopDictOperator::Weight:
  116. case TopDictOperator::FontBBox:
  117. case TopDictOperator::UniqueID:
  118. case TopDictOperator::XUID:
  119. case TopDictOperator::Copyright:
  120. case TopDictOperator::IsFixedPitch:
  121. case TopDictOperator::ItalicAngle:
  122. case TopDictOperator::UnderlinePosition:
  123. case TopDictOperator::UnderlineThickness:
  124. case TopDictOperator::PaintType:
  125. case TopDictOperator::FontMatrix:
  126. case TopDictOperator::StrokeWidth:
  127. case TopDictOperator::PostScript:
  128. case TopDictOperator::BaseFontName:
  129. case TopDictOperator::BaseFontBlend:
  130. break;
  131. case TopDictOperator::CharstringType: {
  132. int charstring_type = 2;
  133. if (!operands.is_empty())
  134. charstring_type = operands[0].get<int>();
  135. if (charstring_type != 2)
  136. dbgln("CFF: has unimplemented CharstringType, might not look right");
  137. break;
  138. }
  139. case TopDictOperator::SyntheticBase:
  140. dbgln("CFF: has unimplemented SyntheticBase, might not look right");
  141. break;
  142. case TopDictOperator::Encoding: {
  143. if (!operands.is_empty())
  144. encoding_offset = operands[0].get<int>();
  145. break;
  146. }
  147. case TopDictOperator::Charset: {
  148. if (!operands.is_empty())
  149. charset_offset = operands[0].get<int>();
  150. break;
  151. }
  152. case TopDictOperator::CharStrings: {
  153. if (!operands.is_empty())
  154. charstrings_offset = operands[0].get<int>();
  155. break;
  156. }
  157. case TopDictOperator::Private: {
  158. auto private_dict_size = operands[0].get<int>();
  159. auto private_dict_offset = operands[1].get<int>();
  160. Reader priv_dict_reader { cff_bytes.slice(private_dict_offset, private_dict_size) };
  161. TRY(parse_dict<PrivDictOperator>(priv_dict_reader, [&](PrivDictOperator op, Vector<DictOperand> const& operands) -> PDFErrorOr<void> {
  162. switch (op) {
  163. case PrivDictOperator::Subrs: {
  164. // CFF spec, "16 Local/Global Subrs INDEXes"
  165. // "Local subrs are stored in an INDEX structure which is located via the offset operand of the Subrs operator in the Private DICT."
  166. auto subrs_offset = operands[0].get<int>();
  167. Reader subrs_reader { cff_bytes.slice(private_dict_offset + subrs_offset) };
  168. TRY(parse_index(subrs_reader, [&](ReadonlyBytes const& subroutine_bytes) -> PDFErrorOr<void> {
  169. return TRY(local_subroutines.try_append(TRY(ByteBuffer::copy(subroutine_bytes))));
  170. }));
  171. dbgln_if(CFF_DEBUG, "CFF has {} subr entries", local_subroutines.size());
  172. break;
  173. }
  174. case PrivDictOperator::DefaultWidthX:
  175. if (!operands.is_empty())
  176. defaultWidthX = to_number(operands[0]);
  177. break;
  178. case PrivDictOperator::NominalWidthX:
  179. if (!operands.is_empty())
  180. nominalWidthX = to_number(operands[0]);
  181. break;
  182. }
  183. return {};
  184. }));
  185. break;
  186. }
  187. default:
  188. dbgln("CFF: Unhandled top dict entry {}", static_cast<int>(op));
  189. }
  190. return {};
  191. });
  192. }));
  193. auto strings = TRY(parse_strings(reader));
  194. // CFF spec "16 Local/Global Subrs INDEXes"
  195. // "Global subrs are stored in an INDEX structure which follows the String INDEX."
  196. Vector<ByteBuffer> global_subroutines;
  197. TRY(parse_index(reader, [&](ReadonlyBytes const& subroutine_bytes) -> PDFErrorOr<void> {
  198. return TRY(global_subroutines.try_append(TRY(ByteBuffer::copy(subroutine_bytes))));
  199. }));
  200. dbgln_if(CFF_DEBUG, "CFF has {} gsubr entries", global_subroutines.size());
  201. // Create glyphs (now that we have the subroutines) and associate missing information to store them and their encoding
  202. auto glyphs = TRY(parse_charstrings(Reader(cff_bytes.slice(charstrings_offset)), local_subroutines, global_subroutines));
  203. // CFF spec, "Table 16 Encoding ID"
  204. // FIXME: Only read this if the built-in encoding is actually needed? (ie. `if (!encoding)`)
  205. Vector<u8> encoding_codes; // Maps GID to its codepoint.
  206. HashMap<Card8, SID> encoding_supplemental; // Maps codepoint to SID.
  207. switch (encoding_offset) {
  208. case 0:
  209. dbgln_if(CFF_DEBUG, "CFF predefined encoding Standard");
  210. for (size_t i = 1; i < s_predefined_encoding_standard.size(); ++i)
  211. TRY(encoding_supplemental.try_set(i, s_predefined_encoding_standard[i]));
  212. break;
  213. case 1:
  214. dbgln_if(CFF_DEBUG, "CFF predefined encoding Expert");
  215. for (size_t i = 1; i < s_predefined_encoding_expert.size(); ++i)
  216. TRY(encoding_supplemental.try_set(i, s_predefined_encoding_expert[i]));
  217. break;
  218. default:
  219. encoding_codes = TRY(parse_encoding(Reader(cff_bytes.slice(encoding_offset)), encoding_supplemental));
  220. break;
  221. }
  222. // CFF spec, "Table 22 Charset ID"
  223. Vector<DeprecatedFlyString> charset_names;
  224. switch (charset_offset) {
  225. case 0:
  226. dbgln_if(CFF_DEBUG, "CFF predefined charset ISOAdobe");
  227. // CFF spec, "Appendix C Predefined Charsets, ISOAdobe"
  228. for (SID sid = 1; sid <= 228; sid++)
  229. TRY(charset_names.try_append(resolve_sid(sid, strings)));
  230. break;
  231. case 1:
  232. dbgln_if(CFF_DEBUG, "CFF predefined charset Expert");
  233. for (SID sid : s_predefined_charset_expert)
  234. TRY(charset_names.try_append(resolve_sid(sid, strings)));
  235. break;
  236. case 2:
  237. dbgln_if(CFF_DEBUG, "CFF predefined charset Expert Subset");
  238. for (SID sid : s_predefined_charset_expert_subset)
  239. TRY(charset_names.try_append(resolve_sid(sid, strings)));
  240. break;
  241. default: {
  242. auto charset = TRY(parse_charset(Reader { cff_bytes.slice(charset_offset) }, glyphs.size()));
  243. for (SID sid : charset)
  244. TRY(charset_names.try_append(resolve_sid(sid, strings)));
  245. break;
  246. }
  247. }
  248. // Adjust glyphs' widths as they are deltas from nominalWidthX
  249. for (auto& glyph : glyphs) {
  250. if (!glyph.has_width())
  251. glyph.set_width(defaultWidthX);
  252. else
  253. glyph.set_width(glyph.width() + nominalWidthX);
  254. }
  255. for (size_t i = 0; i < glyphs.size(); i++) {
  256. if (i == 0) {
  257. TRY(cff->add_glyph(0, move(glyphs[0])));
  258. continue;
  259. }
  260. auto const& name = charset_names[i - 1];
  261. TRY(cff->add_glyph(name, move(glyphs[i])));
  262. }
  263. cff->consolidate_glyphs();
  264. // Encoding given or read
  265. if (encoding) {
  266. dbgln_if(CFF_DEBUG, "CFF using external encoding");
  267. cff->set_encoding(move(encoding));
  268. } else {
  269. dbgln_if(CFF_DEBUG, "CFF using embedded encoding");
  270. auto encoding = Encoding::create();
  271. for (size_t i = 0; i < glyphs.size(); i++) {
  272. if (i == 0) {
  273. encoding->set(0, ".notdef");
  274. continue;
  275. }
  276. if (i >= encoding_codes.size() || i >= charset_names.size())
  277. break;
  278. auto code = encoding_codes[i - 1];
  279. auto char_name = charset_names[i - 1];
  280. encoding->set(code, char_name);
  281. }
  282. for (auto const& entry : encoding_supplemental)
  283. encoding->set(entry.key, resolve_sid(entry.value, strings));
  284. cff->set_encoding(move(encoding));
  285. }
  286. return cff;
  287. }
  288. /// Appendix A: Standard Strings
  289. static constexpr Array s_cff_builtin_names {
  290. ".notdef"sv,
  291. "space"sv,
  292. "exclam"sv,
  293. "quotedbl"sv,
  294. "numbersign"sv,
  295. "dollar"sv,
  296. "percent"sv,
  297. "ampersand"sv,
  298. "quoteright"sv,
  299. "parenleft"sv,
  300. "parenright"sv,
  301. "asterisk"sv,
  302. "plus"sv,
  303. "comma"sv,
  304. "hyphen"sv,
  305. "period"sv,
  306. "slash"sv,
  307. "zero"sv,
  308. "one"sv,
  309. "two"sv,
  310. "three"sv,
  311. "four"sv,
  312. "five"sv,
  313. "six"sv,
  314. "seven"sv,
  315. "eight"sv,
  316. "nine"sv,
  317. "colon"sv,
  318. "semicolon"sv,
  319. "less"sv,
  320. "equal"sv,
  321. "greater"sv,
  322. "question"sv,
  323. "at"sv,
  324. "A"sv,
  325. "B"sv,
  326. "C"sv,
  327. "D"sv,
  328. "E"sv,
  329. "F"sv,
  330. "G"sv,
  331. "H"sv,
  332. "I"sv,
  333. "J"sv,
  334. "K"sv,
  335. "L"sv,
  336. "M"sv,
  337. "N"sv,
  338. "O"sv,
  339. "P"sv,
  340. "Q"sv,
  341. "R"sv,
  342. "S"sv,
  343. "T"sv,
  344. "U"sv,
  345. "V"sv,
  346. "W"sv,
  347. "X"sv,
  348. "Y"sv,
  349. "Z"sv,
  350. "bracketleft"sv,
  351. "backslash"sv,
  352. "bracketright"sv,
  353. "asciicircum"sv,
  354. "underscore"sv,
  355. "quoteleft"sv,
  356. "a"sv,
  357. "b"sv,
  358. "c"sv,
  359. "d"sv,
  360. "e"sv,
  361. "f"sv,
  362. "g"sv,
  363. "h"sv,
  364. "i"sv,
  365. "j"sv,
  366. "k"sv,
  367. "l"sv,
  368. "m"sv,
  369. "n"sv,
  370. "o"sv,
  371. "p"sv,
  372. "q"sv,
  373. "r"sv,
  374. "s"sv,
  375. "t"sv,
  376. "u"sv,
  377. "v"sv,
  378. "w"sv,
  379. "x"sv,
  380. "y"sv,
  381. "z"sv,
  382. "braceleft"sv,
  383. "bar"sv,
  384. "braceright"sv,
  385. "asciitilde"sv,
  386. "exclamdown"sv,
  387. "cent"sv,
  388. "sterling"sv,
  389. "fraction"sv,
  390. "yen"sv,
  391. "florin"sv,
  392. "section"sv,
  393. "currency"sv,
  394. "quotesingle"sv,
  395. "quotedblleft"sv,
  396. "guillemotleft"sv,
  397. "guilsinglleft"sv,
  398. "guilsinglright"sv,
  399. "fi"sv,
  400. "fl"sv,
  401. "endash"sv,
  402. "dagger"sv,
  403. "daggerdbl"sv,
  404. "periodcentered"sv,
  405. "paragraph"sv,
  406. "bullet"sv,
  407. "quotesinglbase"sv,
  408. "quotedblbase"sv,
  409. "quotedblright"sv,
  410. "guillemotright"sv,
  411. "ellipsis"sv,
  412. "perthousand"sv,
  413. "questiondown"sv,
  414. "grave"sv,
  415. "acute"sv,
  416. "circumflex"sv,
  417. "tilde"sv,
  418. "macron"sv,
  419. "breve"sv,
  420. "dotaccent"sv,
  421. "dieresis"sv,
  422. "ring"sv,
  423. "cedilla"sv,
  424. "hungarumlaut"sv,
  425. "ogonek"sv,
  426. "caron"sv,
  427. "emdash"sv,
  428. "AE"sv,
  429. "ordfeminine"sv,
  430. "Lslash"sv,
  431. "Oslash"sv,
  432. "OE"sv,
  433. "ordmasculine"sv,
  434. "ae"sv,
  435. "dotlessi"sv,
  436. "lslash"sv,
  437. "oslash"sv,
  438. "oe"sv,
  439. "germandbls"sv,
  440. "onesuperior"sv,
  441. "logicalnot"sv,
  442. "mu"sv,
  443. "trademark"sv,
  444. "Eth"sv,
  445. "onehalf"sv,
  446. "plusminus"sv,
  447. "Thorn"sv,
  448. "onequarter"sv,
  449. "divide"sv,
  450. "brokenbar"sv,
  451. "degree"sv,
  452. "thorn"sv,
  453. "threequarters"sv,
  454. "twosuperior"sv,
  455. "registered"sv,
  456. "minus"sv,
  457. "eth"sv,
  458. "multiply"sv,
  459. "threesuperior"sv,
  460. "copyright"sv,
  461. "Aacute"sv,
  462. "Acircumflex"sv,
  463. "Adieresis"sv,
  464. "Agrave"sv,
  465. "Aring"sv,
  466. "Atilde"sv,
  467. "Ccedilla"sv,
  468. "Eacute"sv,
  469. "Ecircumflex"sv,
  470. "Edieresis"sv,
  471. "Egrave"sv,
  472. "Iacute"sv,
  473. "Icircumflex"sv,
  474. "Idieresis"sv,
  475. "Igrave"sv,
  476. "Ntilde"sv,
  477. "Oacute"sv,
  478. "Ocircumflex"sv,
  479. "Odieresis"sv,
  480. "Ograve"sv,
  481. "Otilde"sv,
  482. "Scaron"sv,
  483. "Uacute"sv,
  484. "Ucircumflex"sv,
  485. "Udieresis"sv,
  486. "Ugrave"sv,
  487. "Yacute"sv,
  488. "Ydieresis"sv,
  489. "Zcaron"sv,
  490. "aacute"sv,
  491. "acircumflex"sv,
  492. "adieresis"sv,
  493. "agrave"sv,
  494. "aring"sv,
  495. "atilde"sv,
  496. "ccedilla"sv,
  497. "eacute"sv,
  498. "ecircumflex"sv,
  499. "edieresis"sv,
  500. "egrave"sv,
  501. "iacute"sv,
  502. "icircumflex"sv,
  503. "idieresis"sv,
  504. "igrave"sv,
  505. "ntilde"sv,
  506. "oacute"sv,
  507. "ocircumflex"sv,
  508. "odieresis"sv,
  509. "ograve"sv,
  510. "otilde"sv,
  511. "scaron"sv,
  512. "uacute"sv,
  513. "ucircumflex"sv,
  514. "udieresis"sv,
  515. "ugrave"sv,
  516. "yacute"sv,
  517. "ydieresis"sv,
  518. "zcaron"sv,
  519. "exclamsmall"sv,
  520. "Hungarumlautsmall"sv,
  521. "dollaroldstyle"sv,
  522. "dollarsuperior"sv,
  523. "ampersandsmall"sv,
  524. "Acutesmall"sv,
  525. "parenleftsuperior"sv,
  526. "parenrightsuperior"sv,
  527. "twodotenleader"sv,
  528. "onedotenleader"sv,
  529. "zerooldstyle"sv,
  530. "oneoldstyle"sv,
  531. "twooldstyle"sv,
  532. "threeoldstyle"sv,
  533. "fouroldstyle"sv,
  534. "fiveoldstyle"sv,
  535. "sixoldstyle"sv,
  536. "sevenoldstyle"sv,
  537. "eightoldstyle"sv,
  538. "nineoldstyle"sv,
  539. "commasuperior"sv,
  540. "threequartersemdash"sv,
  541. "periodsuperior"sv,
  542. "questionsmall"sv,
  543. "asuperior"sv,
  544. "bsuperior"sv,
  545. "centsuperior"sv,
  546. "dsuperior"sv,
  547. "esuperior"sv,
  548. "isuperior"sv,
  549. "lsuperior"sv,
  550. "msuperior"sv,
  551. "nsuperior"sv,
  552. "osuperior"sv,
  553. "rsuperior"sv,
  554. "ssuperior"sv,
  555. "tsuperior"sv,
  556. "ff"sv,
  557. "ffi"sv,
  558. "ffl"sv,
  559. "parenleftinferior"sv,
  560. "parenrightinferior"sv,
  561. "Circumflexsmall"sv,
  562. "hyphensuperior"sv,
  563. "Gravesmall"sv,
  564. "Asmall"sv,
  565. "Bsmall"sv,
  566. "Csmall"sv,
  567. "Dsmall"sv,
  568. "Esmall"sv,
  569. "Fsmall"sv,
  570. "Gsmall"sv,
  571. "Hsmall"sv,
  572. "Ismall"sv,
  573. "Jsmall"sv,
  574. "Ksmall"sv,
  575. "Lsmall"sv,
  576. "Msmall"sv,
  577. "Nsmall"sv,
  578. "Osmall"sv,
  579. "Psmall"sv,
  580. "Qsmall"sv,
  581. "Rsmall"sv,
  582. "Ssmall"sv,
  583. "Tsmall"sv,
  584. "Usmall"sv,
  585. "Vsmall"sv,
  586. "Wsmall"sv,
  587. "Xsmall"sv,
  588. "Ysmall"sv,
  589. "Zsmall"sv,
  590. "colonmonetary"sv,
  591. "onefitted"sv,
  592. "rupiah"sv,
  593. "Tildesmall"sv,
  594. "exclamdownsmall"sv,
  595. "centoldstyle"sv,
  596. "Lslashsmall"sv,
  597. "Scaronsmall"sv,
  598. "Zcaronsmall"sv,
  599. "Dieresissmall"sv,
  600. "Brevesmall"sv,
  601. "Caronsmall"sv,
  602. "Dotaccentsmall"sv,
  603. "Macronsmall"sv,
  604. "figuredash"sv,
  605. "hypheninferior"sv,
  606. "Ogoneksmall"sv,
  607. "Ringsmall"sv,
  608. "Cedillasmall"sv,
  609. "questiondownsmall"sv,
  610. "oneeighth"sv,
  611. "threeeighths"sv,
  612. "fiveeighths"sv,
  613. "seveneighths"sv,
  614. "onethird"sv,
  615. "twothirds"sv,
  616. "zerosuperior"sv,
  617. "foursuperior"sv,
  618. "fivesuperior"sv,
  619. "sixsuperior"sv,
  620. "sevensuperior"sv,
  621. "eightsuperior"sv,
  622. "ninesuperior"sv,
  623. "zeroinferior"sv,
  624. "oneinferior"sv,
  625. "twoinferior"sv,
  626. "threeinferior"sv,
  627. "fourinferior"sv,
  628. "fiveinferior"sv,
  629. "sixinferior"sv,
  630. "seveninferior"sv,
  631. "eightinferior"sv,
  632. "nineinferior"sv,
  633. "centinferior"sv,
  634. "dollarinferior"sv,
  635. "periodinferior"sv,
  636. "commainferior"sv,
  637. "Agravesmall"sv,
  638. "Aacutesmall"sv,
  639. "Acircumflexsmall"sv,
  640. "Atildesmall"sv,
  641. "Adieresissmall"sv,
  642. "Aringsmall"sv,
  643. "AEsmall"sv,
  644. "Ccedillasmall"sv,
  645. "Egravesmall"sv,
  646. "Eacutesmall"sv,
  647. "Ecircumflexsmall"sv,
  648. "Edieresissmall"sv,
  649. "Igravesmall"sv,
  650. "Iacutesmall"sv,
  651. "Icircumflexsmall"sv,
  652. "Idieresissmall"sv,
  653. "Ethsmall"sv,
  654. "Ntildesmall"sv,
  655. "Ogravesmall"sv,
  656. "Oacutesmall"sv,
  657. "Ocircumflexsmall"sv,
  658. "Otildesmall"sv,
  659. "Odieresissmall"sv,
  660. "OEsmall"sv,
  661. "Oslashsmall"sv,
  662. "Ugravesmall"sv,
  663. "Uacutesmall"sv,
  664. "Ucircumflexsmall"sv,
  665. "Udieresissmall"sv,
  666. "Yacutesmall"sv,
  667. "Thornsmall"sv,
  668. "Ydieresissmall"sv,
  669. "001.000"sv,
  670. "001.001"sv,
  671. "001.002"sv,
  672. "001.003"sv,
  673. "Black"sv,
  674. "Bold"sv,
  675. "Book"sv,
  676. "Light"sv,
  677. "Medium"sv,
  678. "Regular"sv,
  679. "Roman"sv,
  680. "Semibold"sv,
  681. };
  682. PDFErrorOr<Vector<StringView>> CFF::parse_strings(Reader& reader)
  683. {
  684. // CFF spec "10 String Index"
  685. Vector<StringView> strings;
  686. TRY(parse_index(reader, [&](ReadonlyBytes const& string) -> PDFErrorOr<void> {
  687. return TRY(strings.try_append(string));
  688. }));
  689. dbgln_if(CFF_DEBUG, "CFF has {} additional strings in string table", strings.size());
  690. return strings;
  691. }
  692. DeprecatedFlyString CFF::resolve_sid(SID sid, Vector<StringView> const& strings)
  693. {
  694. if (sid < s_cff_builtin_names.size())
  695. return DeprecatedFlyString(s_cff_builtin_names[sid]);
  696. if (sid - s_cff_builtin_names.size() < strings.size())
  697. return DeprecatedFlyString(strings[sid - s_cff_builtin_names.size()]);
  698. dbgln("Couldn't find string for SID {}, going with space", sid);
  699. return DeprecatedFlyString("space");
  700. }
  701. PDFErrorOr<Vector<CFF::SID>> CFF::parse_charset(Reader&& reader, size_t glyph_count)
  702. {
  703. // CFF spec, "13 Charsets"
  704. Vector<SID> names;
  705. auto format = TRY(reader.try_read<Card8>());
  706. if (format == 0) {
  707. // CFF spec, "Table 17 Format 0"
  708. dbgln_if(CFF_DEBUG, "CFF charset format 0");
  709. for (size_t i = 0; i < glyph_count - 1; i++) {
  710. SID sid = TRY(reader.try_read<BigEndian<SID>>());
  711. TRY(names.try_append(sid));
  712. }
  713. } else if (format == 1) {
  714. // CFF spec, "Table 18 Format 1"
  715. dbgln_if(CFF_DEBUG, "CFF charset format 1");
  716. while (names.size() < glyph_count - 1) {
  717. // CFF spec, "Table 19 Range1 Format (Charset)"
  718. auto first_sid = TRY(reader.try_read<BigEndian<SID>>());
  719. int left = TRY(reader.try_read<Card8>());
  720. for (SID sid = first_sid; left >= 0; left--, sid++)
  721. TRY(names.try_append(sid));
  722. }
  723. } else if (format == 2) {
  724. // CFF spec, "Table 20 Format 2"
  725. // "Format 2 differs from format 1 only in the size of the Left field in each range."
  726. dbgln_if(CFF_DEBUG, "CFF charset format 2");
  727. while (names.size() < glyph_count - 1) {
  728. // CFF spec, "Table 21 Range2 Format"
  729. auto first_sid = TRY(reader.try_read<BigEndian<SID>>());
  730. int left = TRY(reader.try_read<BigEndian<Card16>>());
  731. for (SID sid = first_sid; left >= 0; left--, sid++)
  732. TRY(names.try_append(sid));
  733. }
  734. } else {
  735. dbgln("CFF: Unknown charset format {}", format);
  736. }
  737. return names;
  738. }
  739. PDFErrorOr<Vector<CFF::Glyph>> CFF::parse_charstrings(Reader&& reader, Vector<ByteBuffer> const& local_subroutines, Vector<ByteBuffer> const& global_subroutines)
  740. {
  741. // CFF spec, "14 CharStrings INDEX"
  742. Vector<Glyph> glyphs;
  743. TRY(parse_index(reader, [&](ReadonlyBytes const& charstring_data) -> PDFErrorOr<void> {
  744. GlyphParserState state;
  745. auto glyph = TRY(parse_glyph(charstring_data, local_subroutines, global_subroutines, state, true));
  746. return TRY(glyphs.try_append(glyph));
  747. }));
  748. dbgln_if(CFF_DEBUG, "CFF has {} glyphs", glyphs.size());
  749. return glyphs;
  750. }
  751. PDFErrorOr<Vector<u8>> CFF::parse_encoding(Reader&& reader, HashMap<Card8, SID>& supplemental)
  752. {
  753. // CFF spec, "12 Encodings"
  754. Vector<u8> encoding_codes;
  755. auto format_raw = TRY(reader.try_read<Card8>());
  756. auto format = format_raw & 0x7f;
  757. if (format == 0) {
  758. // CFF spec, "Table 11 Format 0"
  759. auto n_codes = TRY(reader.try_read<Card8>());
  760. dbgln_if(CFF_DEBUG, "CFF encoding format 0, {} codes", n_codes);
  761. for (u8 i = 0; i < n_codes; i++) {
  762. TRY(encoding_codes.try_append(TRY(reader.try_read<Card8>())));
  763. }
  764. } else if (format == 1) {
  765. // CFF spec, "Table 12 Format 1"
  766. auto n_ranges = TRY(reader.try_read<Card8>());
  767. dbgln_if(CFF_DEBUG, "CFF encoding format 1, {} ranges", n_ranges);
  768. for (u8 i = 0; i < n_ranges; i++) {
  769. // CFF spec, "Table 13 Range1 Format (Encoding)"
  770. auto first_code = TRY(reader.try_read<Card8>());
  771. int left = TRY(reader.try_read<Card8>());
  772. for (u8 code = first_code; left >= 0; left--, code++)
  773. TRY(encoding_codes.try_append(code));
  774. }
  775. } else
  776. return error(ByteString::formatted("Invalid encoding format: {}", format));
  777. if (format_raw & 0x80) {
  778. // CFF spec, "Table 14 Supplemental Encoding Data"
  779. auto n_sups = TRY(reader.try_read<Card8>());
  780. dbgln_if(CFF_DEBUG, "CFF encoding, {} supplemental entries", n_sups);
  781. for (u8 i = 0; i < n_sups; i++) {
  782. // CFF spec, "Table 15 Supplement Format"
  783. auto code = TRY(reader.try_read<Card8>());
  784. SID name = TRY(reader.try_read<SID>());
  785. TRY(supplemental.try_set(code, name));
  786. }
  787. }
  788. return encoding_codes;
  789. }
  790. template<typename OperatorT>
  791. PDFErrorOr<void> CFF::parse_dict(Reader& reader, DictEntryHandler<OperatorT>&& handler)
  792. {
  793. // CFF spec, "4 DICT data"
  794. Vector<DictOperand> operands;
  795. while (reader.remaining() > 0) {
  796. auto b0 = reader.read<u8>();
  797. // "Operators and operands may be distinguished by inspection of their first byte: 0-21 specify operators"
  798. if (b0 <= 21) {
  799. auto op = TRY(parse_dict_operator<OperatorT>(b0, reader));
  800. TRY(handler(op, operands));
  801. operands.clear();
  802. continue;
  803. }
  804. // An operand
  805. TRY(operands.try_append(TRY(load_dict_operand(b0, reader))));
  806. }
  807. return {};
  808. }
  809. template PDFErrorOr<void> CFF::parse_dict<CFF::TopDictOperator>(Reader&, DictEntryHandler<TopDictOperator>&&);
  810. template PDFErrorOr<void> CFF::parse_dict<CFF::PrivDictOperator>(Reader&, DictEntryHandler<PrivDictOperator>&&);
  811. template<typename OperatorT>
  812. PDFErrorOr<OperatorT> CFF::parse_dict_operator(u8 b0, Reader& reader)
  813. {
  814. // CFF spec, "4 DICT data"
  815. VERIFY(b0 <= 21);
  816. // "Two-byte operators have an initial escape byte of 12."
  817. if (b0 != 12)
  818. return OperatorT { (int)b0 };
  819. auto b1 = TRY(reader.try_read<u8>());
  820. return OperatorT { b0 << 8 | b1 };
  821. }
  822. template PDFErrorOr<CFF::TopDictOperator> CFF::parse_dict_operator(u8, Reader&);
  823. PDFErrorOr<void> CFF::parse_index(Reader& reader, IndexDataHandler&& data_handler)
  824. {
  825. // CFF spec, "5 INDEX Data"
  826. Card16 count = TRY(reader.try_read<BigEndian<Card16>>());
  827. if (count == 0)
  828. return {};
  829. auto offset_size = TRY(reader.try_read<OffSize>());
  830. if (offset_size > 4)
  831. return error("CFF INDEX Data offset_size > 4 not supported");
  832. return parse_index_data(offset_size, count, reader, data_handler);
  833. }
  834. PDFErrorOr<void> CFF::parse_index_data(OffSize offset_size, Card16 count, Reader& reader, IndexDataHandler& handler)
  835. {
  836. // CFF spec, "5 INDEX Data"
  837. u32 last_data_end = 1;
  838. auto read_offset = [&]() -> PDFErrorOr<u32> {
  839. u32 offset = 0;
  840. for (OffSize i = 0; i < offset_size; ++i)
  841. offset = (offset << 8) | TRY(reader.try_read<u8>());
  842. return offset;
  843. };
  844. auto offset_refpoint = reader.offset() + offset_size * (count + 1) - 1;
  845. for (u16 i = 0; i < count; i++) {
  846. reader.save();
  847. reader.move_by(offset_size * i);
  848. u32 data_start = TRY(read_offset());
  849. last_data_end = TRY(read_offset());
  850. auto data_size = last_data_end - data_start;
  851. reader.move_to(offset_refpoint + data_start);
  852. TRY(handler(reader.bytes().slice(reader.offset(), data_size)));
  853. reader.load();
  854. }
  855. reader.move_to(offset_refpoint + last_data_end);
  856. return {};
  857. }
  858. int CFF::load_int_dict_operand(u8 b0, Reader& reader)
  859. {
  860. // CFF spec, "Table 3 Operand Encoding"
  861. if (b0 >= 32 && b0 <= 246) {
  862. return b0 - 139;
  863. }
  864. if (b0 >= 247 && b0 <= 250) {
  865. auto b1 = reader.read<u8>();
  866. return (b0 - 247) * 256 + b1 + 108;
  867. }
  868. if (b0 >= 251 && b0 <= 254) {
  869. auto b1 = reader.read<u8>();
  870. return -(b0 - 251) * 256 - b1 - 108;
  871. }
  872. if (b0 == 28) {
  873. auto b1 = reader.read<u8>();
  874. auto b2 = reader.read<u8>();
  875. return b1 << 8 | b2;
  876. }
  877. if (b0 == 29) {
  878. auto b1 = reader.read<u8>();
  879. auto b2 = reader.read<u8>();
  880. auto b3 = reader.read<u8>();
  881. auto b4 = reader.read<u8>();
  882. return b1 << 24 | b2 << 16 | b3 << 8 | b4;
  883. }
  884. VERIFY_NOT_REACHED();
  885. }
  886. float CFF::load_float_dict_operand(Reader& reader)
  887. {
  888. // CFF spec, "Table 5 Nibble Definitions"
  889. StringBuilder sb;
  890. auto add_nibble = [&](char nibble) {
  891. if (nibble < 0xa)
  892. sb.append('0' + nibble);
  893. else if (nibble == 0xa)
  894. sb.append('.');
  895. else if (nibble == 0xb)
  896. sb.append('E');
  897. else if (nibble == 0xc)
  898. sb.append("E-"sv);
  899. else if (nibble == 0xe)
  900. sb.append('-');
  901. };
  902. while (true) {
  903. auto byte = reader.read<u8>();
  904. char nibble1 = (byte & 0xf0) >> 4;
  905. char nibble2 = byte & 0x0f;
  906. if (nibble1 == 0xf)
  907. break;
  908. add_nibble(nibble1);
  909. if (nibble2 == 0xf)
  910. break;
  911. add_nibble(nibble2);
  912. }
  913. auto result = AK::StringUtils::convert_to_floating_point<float>(sb.string_view());
  914. return result.release_value();
  915. }
  916. PDFErrorOr<CFF::DictOperand> CFF::load_dict_operand(u8 b0, Reader& reader)
  917. {
  918. // CFF spec, "4 DICT data"
  919. if (b0 == 30)
  920. return load_float_dict_operand(reader);
  921. if (b0 >= 28)
  922. return load_int_dict_operand(b0, reader);
  923. return Error { Error::Type::MalformedPDF, ByteString::formatted("Unknown CFF dict element prefix: {}", b0) };
  924. }
  925. }