CFF.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575
  1. /*
  2. * Copyright (c) 2023, Rodrigo Tobar <rtobarc@gmail.com>.
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/Endian.h>
  7. #include <AK/String.h>
  8. #include <LibGfx/Forward.h>
  9. #include <LibPDF/Encoding.h>
  10. #include <LibPDF/Error.h>
  11. #include <LibPDF/Fonts/CFF.h>
  12. #include <LibPDF/Reader.h>
  13. namespace PDF {
  14. PDFErrorOr<NonnullRefPtr<CFF>> CFF::create(ReadonlyBytes const& cff_bytes, RefPtr<Encoding> encoding)
  15. {
  16. Reader reader(cff_bytes);
  17. // Header
  18. // skip major, minor version
  19. reader.consume(2);
  20. auto header_size = TRY(reader.try_read<Card8>());
  21. // skip offset size
  22. reader.consume(1);
  23. reader.move_to(header_size);
  24. // Name INDEX
  25. Vector<String> font_names;
  26. TRY(parse_index(reader, [&](ReadonlyBytes const& data) -> PDFErrorOr<void> {
  27. auto string = TRY(String::from_utf8(data));
  28. return TRY(font_names.try_append(string));
  29. }));
  30. auto cff = adopt_ref(*new CFF());
  31. cff->set_font_matrix({ 0.001f, 0.0f, 0.0f, 0.001f, 0.0f, 0.0f });
  32. // Top DICT INDEX
  33. int charset_offset = 0;
  34. Vector<u8> encoding_codes;
  35. auto charstrings_offset = 0;
  36. Vector<ByteBuffer> subroutines;
  37. int defaultWidthX = 0;
  38. int nominalWidthX = 0;
  39. TRY(parse_index(reader, [&](ReadonlyBytes const& element_data) {
  40. Reader element_reader { element_data };
  41. return parse_dict<TopDictOperator>(element_reader, [&](TopDictOperator op, Vector<DictOperand> const& operands) -> PDFErrorOr<void> {
  42. switch (op) {
  43. case TopDictOperator::Encoding: {
  44. auto encoding_offset = 0;
  45. if (!operands.is_empty())
  46. encoding_offset = operands[0].get<int>();
  47. encoding_codes = TRY(parse_encoding(Reader(cff_bytes.slice(encoding_offset))));
  48. break;
  49. }
  50. case TopDictOperator::Charset: {
  51. if (!operands.is_empty())
  52. charset_offset = operands[0].get<int>();
  53. break;
  54. }
  55. case TopDictOperator::CharStrings: {
  56. if (!operands.is_empty())
  57. charstrings_offset = operands[0].get<int>();
  58. break;
  59. }
  60. case TopDictOperator::Private: {
  61. auto private_dict_size = operands[0].get<int>();
  62. auto private_dict_offset = operands[1].get<int>();
  63. Reader priv_dict_reader { cff_bytes.slice(private_dict_offset, private_dict_size) };
  64. TRY(parse_dict<PrivDictOperator>(priv_dict_reader, [&](PrivDictOperator op, Vector<DictOperand> const& operands) -> PDFErrorOr<void> {
  65. switch (op) {
  66. case PrivDictOperator::Subrs: {
  67. auto subrs_offset = operands[0].get<int>();
  68. Reader subrs_reader { cff_bytes.slice(private_dict_offset + subrs_offset) };
  69. dbgln("Parsing Subrs INDEX");
  70. TRY(parse_index(subrs_reader, [&](ReadonlyBytes const& subroutine_bytes) -> PDFErrorOr<void> {
  71. return TRY(subroutines.try_append(TRY(ByteBuffer::copy(subroutine_bytes))));
  72. }));
  73. break;
  74. }
  75. case PrivDictOperator::DefaultWidthX:
  76. defaultWidthX = operands[0].get<int>();
  77. break;
  78. case PrivDictOperator::NominalWidthX:
  79. nominalWidthX = operands[0].get<int>();
  80. break;
  81. }
  82. return {};
  83. }));
  84. break;
  85. }
  86. default:;
  87. }
  88. return {};
  89. });
  90. }));
  91. // Create glyphs (now that we have the subroutines) and associate missing information to store them and their encoding
  92. auto glyphs = TRY(parse_charstrings(Reader(cff_bytes.slice(charstrings_offset)), subroutines));
  93. auto charset = TRY(parse_charset(Reader { cff_bytes.slice(charset_offset) }, glyphs.size()));
  94. // Adjust glyphs' widths as they are deltas from nominalWidthX
  95. for (auto& glyph : glyphs) {
  96. if (!glyph.has_width())
  97. glyph.set_width(float(defaultWidthX));
  98. else
  99. glyph.set_width(glyph.width() + float(nominalWidthX));
  100. }
  101. for (size_t i = 0; i < glyphs.size(); i++) {
  102. if (i == 0) {
  103. TRY(cff->add_glyph(0, move(glyphs[0])));
  104. continue;
  105. }
  106. auto const& name = charset[i - 1];
  107. TRY(cff->add_glyph(name, move(glyphs[i])));
  108. }
  109. cff->consolidate_glyphs();
  110. // Encoding given or read
  111. if (encoding) {
  112. cff->set_encoding(move(encoding));
  113. } else {
  114. auto encoding = Encoding::create();
  115. for (size_t i = 0; i < glyphs.size(); i++) {
  116. if (i == 0) {
  117. encoding->set(0, ".notdef");
  118. continue;
  119. }
  120. auto code = encoding_codes[i - 1];
  121. auto char_name = charset[i - 1];
  122. encoding->set(code, char_name);
  123. }
  124. cff->set_encoding(move(encoding));
  125. }
  126. return cff;
  127. }
  128. /// Appendix C: Predefined Charsets
  129. static constexpr Array s_cff_builtin_names {
  130. ".notdef"sv,
  131. "space"sv,
  132. "exclam"sv,
  133. "quotedbl"sv,
  134. "numbersign"sv,
  135. "dollar"sv,
  136. "percent"sv,
  137. "ampersand"sv,
  138. "quoteright"sv,
  139. "parenleft"sv,
  140. "parenright"sv,
  141. "asterisk"sv,
  142. "plus"sv,
  143. "comma"sv,
  144. "hyphen"sv,
  145. "period"sv,
  146. "slash"sv,
  147. "zero"sv,
  148. "one"sv,
  149. "two"sv,
  150. "three"sv,
  151. "four"sv,
  152. "five"sv,
  153. "six"sv,
  154. "seven"sv,
  155. "eight"sv,
  156. "nine"sv,
  157. "colon"sv,
  158. "semicolon"sv,
  159. "less"sv,
  160. "equal"sv,
  161. "greater"sv,
  162. "question"sv,
  163. "at"sv,
  164. "A"sv,
  165. "B"sv,
  166. "C"sv,
  167. "D"sv,
  168. "E"sv,
  169. "F"sv,
  170. "G"sv,
  171. "H"sv,
  172. "I"sv,
  173. "J"sv,
  174. "K"sv,
  175. "L"sv,
  176. "M"sv,
  177. "N"sv,
  178. "O"sv,
  179. "P"sv,
  180. "Q"sv,
  181. "R"sv,
  182. "S"sv,
  183. "T"sv,
  184. "U"sv,
  185. "V"sv,
  186. "W"sv,
  187. "X"sv,
  188. "Y"sv,
  189. "Z"sv,
  190. "bracketleft"sv,
  191. "backslash"sv,
  192. "bracketright"sv,
  193. "asciicircum"sv,
  194. "underscore"sv,
  195. "quoteleft"sv,
  196. "a"sv,
  197. "b"sv,
  198. "c"sv,
  199. "d"sv,
  200. "e"sv,
  201. "f"sv,
  202. "g"sv,
  203. "h"sv,
  204. "i"sv,
  205. "j"sv,
  206. "k"sv,
  207. "l"sv,
  208. "m"sv,
  209. "n"sv,
  210. "o"sv,
  211. "p"sv,
  212. "q"sv,
  213. "r"sv,
  214. "s"sv,
  215. "t"sv,
  216. "u"sv,
  217. "v"sv,
  218. "w"sv,
  219. "x"sv,
  220. "y"sv,
  221. "z"sv,
  222. "braceleft"sv,
  223. "bar"sv,
  224. "braceright"sv,
  225. "asciitilde"sv,
  226. "exclamdown"sv,
  227. "cent"sv,
  228. "sterling"sv,
  229. "fraction"sv,
  230. "yen"sv,
  231. "florin"sv,
  232. "section"sv,
  233. "currency"sv,
  234. "quotesingle"sv,
  235. "quotedblleft"sv,
  236. "guillemotleft"sv,
  237. "guilsinglleft"sv,
  238. "guilsinglright"sv,
  239. "fi"sv,
  240. "fl"sv,
  241. "endash"sv,
  242. "dagger"sv,
  243. "daggerdbl"sv,
  244. "periodcentered"sv,
  245. "paragraph"sv,
  246. "bullet"sv,
  247. "quotesinglbase"sv,
  248. "quotedblbase"sv,
  249. "quotedblright"sv,
  250. "guillemotright"sv,
  251. "ellipsis"sv,
  252. "perthousand"sv,
  253. "questiondown"sv,
  254. "grave"sv,
  255. "acute"sv,
  256. "circumflex"sv,
  257. "tilde"sv,
  258. "macron"sv,
  259. "breve"sv,
  260. "dotaccent"sv,
  261. "dieresis"sv,
  262. "ring"sv,
  263. "cedilla"sv,
  264. "hungarumlaut"sv,
  265. "ogonek"sv,
  266. "caron"sv,
  267. "emdash"sv,
  268. "AE"sv,
  269. "ordfeminine"sv,
  270. "Lslash"sv,
  271. "Oslash"sv,
  272. "OE"sv,
  273. "ordmasculine"sv,
  274. "ae"sv,
  275. "dotlessi"sv,
  276. "lslash"sv,
  277. "oslash"sv,
  278. "oe"sv,
  279. "germandbls"sv,
  280. "onesuperior"sv,
  281. "logicalnot"sv,
  282. "mu"sv,
  283. "trademark"sv,
  284. "Eth"sv,
  285. "onehalf"sv,
  286. "plusminus"sv,
  287. "Thorn"sv,
  288. "onequarter"sv,
  289. "divide"sv,
  290. "brokenbar"sv,
  291. "degree"sv,
  292. "thorn"sv,
  293. "threequarters"sv,
  294. "twosuperior"sv,
  295. "registered"sv,
  296. "minus"sv,
  297. "eth"sv,
  298. "multiply"sv,
  299. "threesuperior"sv,
  300. "copyright"sv,
  301. "Aacute"sv,
  302. "Acircumflex"sv,
  303. "Adieresis"sv,
  304. "Agrave"sv,
  305. "Aring"sv,
  306. "Atilde"sv,
  307. "Ccedilla"sv,
  308. "Eacute"sv,
  309. "Ecircumflex"sv,
  310. "Edieresis"sv,
  311. "Egrave"sv,
  312. "Iacute"sv,
  313. "Icircumflex"sv,
  314. "Idieresis"sv,
  315. "Igrave"sv,
  316. "Ntilde"sv,
  317. "Oacute"sv,
  318. "Ocircumflex"sv,
  319. "Odieresis"sv,
  320. "Ograve"sv,
  321. "Otilde"sv,
  322. "Scaron"sv,
  323. "Uacute"sv,
  324. "Ucircumflex"sv,
  325. "Udieresis"sv,
  326. "Ugrave"sv,
  327. "Yacute"sv,
  328. "Ydieresis"sv,
  329. "Zcaron"sv,
  330. "aacute"sv,
  331. "acircumflex"sv,
  332. "adieresis"sv,
  333. "agrave"sv,
  334. "aring"sv,
  335. "atilde"sv,
  336. "ccedilla"sv,
  337. "eacute"sv,
  338. "ecircumflex"sv,
  339. "edieresis"sv,
  340. "egrave"sv,
  341. "iacute"sv,
  342. "icircumflex"sv,
  343. "idieresis"sv,
  344. "igrave"sv,
  345. "ntilde"sv,
  346. "oacute"sv,
  347. "ocircumflex"sv,
  348. "odieresis"sv,
  349. "ograve"sv,
  350. "otilde"sv,
  351. "scaron"sv,
  352. "uacute"sv,
  353. "ucircumflex"sv,
  354. "udieresis"sv,
  355. "ugrave"sv,
  356. "yacute"sv,
  357. "ydieresis"sv,
  358. "zcaron"sv,
  359. };
  360. PDFErrorOr<Vector<DeprecatedFlyString>> CFF::parse_charset(Reader&& reader, size_t glyph_count)
  361. {
  362. Vector<DeprecatedFlyString> names;
  363. auto resolve = [](SID sid) {
  364. if (sid < s_cff_builtin_names.size())
  365. return DeprecatedFlyString(s_cff_builtin_names[sid]);
  366. dbgln("Cound't find string for SID {}, going with space", sid);
  367. return DeprecatedFlyString("space");
  368. };
  369. auto format = TRY(reader.try_read<Card8>());
  370. if (format == 0) {
  371. for (u8 i = 0; i < glyph_count - 1; i++) {
  372. SID sid = TRY(reader.try_read<BigEndian<SID>>());
  373. TRY(names.try_append(resolve(sid)));
  374. }
  375. } else if (format == 1) {
  376. while (names.size() < glyph_count - 1) {
  377. auto first_sid = TRY(reader.try_read<BigEndian<SID>>());
  378. int left = TRY(reader.try_read<Card8>());
  379. for (u8 sid = first_sid; left >= 0; left--, sid++)
  380. TRY(names.try_append(resolve(sid)));
  381. }
  382. }
  383. return names;
  384. }
  385. PDFErrorOr<Vector<CFF::Glyph>> CFF::parse_charstrings(Reader&& reader, Vector<ByteBuffer> const& subroutines)
  386. {
  387. Vector<Glyph> glyphs;
  388. TRY(parse_index(reader, [&](ReadonlyBytes const& charstring_data) -> PDFErrorOr<void> {
  389. GlyphParserState state;
  390. auto glyph = TRY(parse_glyph(charstring_data, subroutines, state, true));
  391. return TRY(glyphs.try_append(glyph));
  392. }));
  393. return glyphs;
  394. }
  395. PDFErrorOr<Vector<u8>> CFF::parse_encoding(Reader&& reader)
  396. {
  397. Vector<u8> encoding_codes;
  398. auto format_raw = TRY(reader.try_read<Card8>());
  399. // TODO: support encoding supplements when highest bit is set
  400. auto format = format_raw & 0x7f;
  401. if (format == 0) {
  402. auto n_codes = TRY(reader.try_read<Card8>());
  403. for (u8 i = 0; i < n_codes; i++) {
  404. TRY(encoding_codes.try_append(TRY(reader.try_read<Card8>())));
  405. }
  406. } else if (format == 1) {
  407. auto n_ranges = TRY(reader.try_read<Card8>());
  408. for (u8 i = 0; i < n_ranges; i++) {
  409. auto first_code = TRY(reader.try_read<Card8>());
  410. int left = TRY(reader.try_read<Card8>());
  411. for (u8 code = first_code; left >= 0; left--, code++)
  412. TRY(encoding_codes.try_append(code));
  413. }
  414. } else
  415. return error(DeprecatedString::formatted("Invalid encoding format: {}", format));
  416. return encoding_codes;
  417. }
  418. template<typename OperatorT>
  419. PDFErrorOr<void> CFF::parse_dict(Reader& reader, DictEntryHandler<OperatorT>&& handler)
  420. {
  421. Vector<DictOperand> operands;
  422. while (reader.remaining() > 0) {
  423. auto b0 = reader.read<u8>();
  424. // A command
  425. if (b0 <= 21) {
  426. auto op = TRY(parse_dict_operator<OperatorT>(b0, reader));
  427. TRY(handler(op, operands));
  428. operands.clear();
  429. continue;
  430. }
  431. // An operand
  432. TRY(operands.try_append(TRY(load_dict_operand(b0, reader))));
  433. }
  434. return {};
  435. }
  436. template PDFErrorOr<void> CFF::parse_dict<CFF::TopDictOperator>(Reader&, DictEntryHandler<TopDictOperator>&&);
  437. template PDFErrorOr<void> CFF::parse_dict<CFF::PrivDictOperator>(Reader&, DictEntryHandler<PrivDictOperator>&&);
  438. template<typename OperatorT>
  439. PDFErrorOr<OperatorT> CFF::parse_dict_operator(u8 b0, Reader& reader)
  440. {
  441. VERIFY(b0 <= 21);
  442. if (b0 != 12)
  443. return OperatorT { (int)b0 };
  444. auto b1 = TRY(reader.try_read<u8>());
  445. return OperatorT { b0 << 8 | b1 };
  446. }
  447. template PDFErrorOr<CFF::TopDictOperator> CFF::parse_dict_operator(u8, Reader&);
  448. PDFErrorOr<void> CFF::parse_index(Reader& reader, IndexDataHandler&& data_handler)
  449. {
  450. Card16 count = TRY(reader.try_read<BigEndian<Card16>>());
  451. if (count == 0)
  452. return {};
  453. auto offset_size = TRY(reader.try_read<OffSize>());
  454. if (offset_size == 1)
  455. return parse_index_data<u8>(count, reader, data_handler);
  456. if (offset_size == 2)
  457. return parse_index_data<u16>(count, reader, data_handler);
  458. if (offset_size == 4)
  459. return parse_index_data<u32>(count, reader, data_handler);
  460. VERIFY_NOT_REACHED();
  461. }
  462. template<typename OffsetType>
  463. PDFErrorOr<void> CFF::parse_index_data(Card16 count, Reader& reader, IndexDataHandler& handler)
  464. {
  465. OffsetType last_data_end = 1;
  466. auto offset_refpoint = reader.offset() + sizeof(OffsetType) * (count + 1) - 1;
  467. for (u16 i = 0; i < count; i++) {
  468. reader.save();
  469. reader.move_by(sizeof(OffsetType) * i);
  470. OffsetType data_start = reader.read<BigEndian<OffsetType>>();
  471. last_data_end = reader.read<BigEndian<OffsetType>>();
  472. auto data_size = last_data_end - data_start;
  473. reader.move_to(offset_refpoint + data_start);
  474. TRY(handler(reader.bytes().slice(reader.offset(), data_size)));
  475. reader.load();
  476. }
  477. reader.move_to(offset_refpoint + last_data_end);
  478. return {};
  479. }
  480. template PDFErrorOr<void> CFF::parse_index_data<u8>(Card16, Reader&, IndexDataHandler&);
  481. template PDFErrorOr<void> CFF::parse_index_data<u16>(Card16, Reader&, IndexDataHandler&);
  482. template PDFErrorOr<void> CFF::parse_index_data<u32>(Card16, Reader&, IndexDataHandler&);
  483. // 4 DICT DATA, Table 3 Operand Encoding
  484. int CFF::load_int_dict_operand(u8 b0, Reader& reader)
  485. {
  486. if (b0 >= 32 && b0 <= 246) {
  487. return b0 - 139;
  488. }
  489. if (b0 >= 247 && b0 <= 250) {
  490. auto b1 = reader.read<u8>();
  491. return (b0 - 247) * 256 + b1 + 108;
  492. }
  493. if (b0 >= 251 && b0 <= 254) {
  494. auto b1 = reader.read<u8>();
  495. return -(b0 - 251) * 256 - b1 - 108;
  496. }
  497. if (b0 == 28) {
  498. auto b1 = reader.read<u8>();
  499. auto b2 = reader.read<u8>();
  500. return b1 << 8 | b2;
  501. }
  502. if (b0 == 29) {
  503. auto b1 = reader.read<u8>();
  504. auto b2 = reader.read<u8>();
  505. auto b3 = reader.read<u8>();
  506. auto b4 = reader.read<u8>();
  507. return b1 << 24 | b2 << 16 | b3 << 8 | b4;
  508. }
  509. VERIFY_NOT_REACHED();
  510. }
  511. float CFF::load_float_dict_operand(Reader& reader)
  512. {
  513. StringBuilder sb;
  514. auto add_nibble = [&](char nibble) {
  515. if (nibble < 0xa)
  516. sb.append('0' + nibble);
  517. else if (nibble == 0xa)
  518. sb.append('.');
  519. else if (nibble == 0xb)
  520. sb.append('E');
  521. else if (nibble == 0xc)
  522. sb.append("E-"sv);
  523. else if (nibble == 0xe)
  524. sb.append('-');
  525. };
  526. while (true) {
  527. auto byte = reader.read<u8>();
  528. char nibble1 = (byte & 0xf0) >> 4;
  529. char nibble2 = byte & 0x0f;
  530. if (nibble1 == 0xf)
  531. break;
  532. add_nibble(nibble1);
  533. if (nibble2 == 0xf)
  534. break;
  535. add_nibble(nibble2);
  536. }
  537. auto result = AK::StringUtils::convert_to_floating_point<float>(sb.string_view());
  538. return result.release_value();
  539. }
  540. PDFErrorOr<CFF::DictOperand> CFF::load_dict_operand(u8 b0, Reader& reader)
  541. {
  542. if (b0 == 30)
  543. return load_float_dict_operand(reader);
  544. if (b0 >= 28)
  545. return load_int_dict_operand(b0, reader);
  546. return Error { Error::Type::MalformedPDF, DeprecatedString::formatted("Unknown CFF dict element prefix: {}", b0) };
  547. }
  548. }