CFF.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431
  1. /*
  2. * Copyright (c) 2023, Rodrigo Tobar <rtobarc@gmail.com>.
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/Endian.h>
  7. #include <AK/String.h>
  8. #include <LibGfx/Forward.h>
  9. #include <LibPDF/Encoding.h>
  10. #include <LibPDF/Error.h>
  11. #include <LibPDF/Fonts/CFF.h>
  12. #include <LibPDF/Reader.h>
  13. namespace PDF {
  14. PDFErrorOr<NonnullRefPtr<CFF>> CFF::create(ReadonlyBytes const& cff_bytes, RefPtr<Encoding> encoding)
  15. {
  16. Reader reader(cff_bytes);
  17. // Header
  18. // skip major, minor version
  19. reader.consume(2);
  20. auto header_size = TRY(reader.try_read<Card8>());
  21. // skip offset size
  22. reader.consume(1);
  23. reader.move_to(header_size);
  24. // Name INDEX
  25. Vector<String> font_names;
  26. TRY(parse_index(reader, [&](ReadonlyBytes const& data) -> PDFErrorOr<void> {
  27. auto string = TRY(String::from_utf8(data));
  28. return TRY(font_names.try_append(string));
  29. }));
  30. auto cff = adopt_ref(*new CFF());
  31. cff->set_font_matrix({ 0.001f, 0.0f, 0.0f, 0.001f, 0.0f, 0.0f });
  32. // Top DICT INDEX
  33. int charset_offset = 0;
  34. Vector<u8> encoding_codes;
  35. auto charstrings_offset = 0;
  36. Vector<ByteBuffer> subroutines;
  37. int defaultWidthX = 0;
  38. int nominalWidthX = 0;
  39. TRY(parse_index(reader, [&](ReadonlyBytes const& element_data) {
  40. Reader element_reader { element_data };
  41. return parse_dict<TopDictOperator>(element_reader, [&](TopDictOperator op, Vector<DictOperand> const& operands) -> PDFErrorOr<void> {
  42. switch (op) {
  43. case TopDictOperator::Encoding: {
  44. auto encoding_offset = 0;
  45. if (!operands.is_empty())
  46. encoding_offset = operands[0].get<int>();
  47. encoding_codes = TRY(parse_encoding(Reader(cff_bytes.slice(encoding_offset))));
  48. break;
  49. }
  50. case TopDictOperator::Charset: {
  51. if (!operands.is_empty())
  52. charset_offset = operands[0].get<int>();
  53. break;
  54. }
  55. case TopDictOperator::CharStrings: {
  56. if (!operands.is_empty())
  57. charstrings_offset = operands[0].get<int>();
  58. break;
  59. }
  60. case TopDictOperator::Private: {
  61. auto private_dict_size = operands[0].get<int>();
  62. auto private_dict_offset = operands[1].get<int>();
  63. Reader priv_dict_reader { cff_bytes.slice(private_dict_offset, private_dict_size) };
  64. TRY(parse_dict<PrivDictOperator>(priv_dict_reader, [&](PrivDictOperator op, Vector<DictOperand> const& operands) -> PDFErrorOr<void> {
  65. switch (op) {
  66. case PrivDictOperator::Subrs: {
  67. auto subrs_offset = operands[0].get<int>();
  68. Reader subrs_reader { cff_bytes.slice(private_dict_offset + subrs_offset) };
  69. dbgln("Parsing Subrs INDEX");
  70. TRY(parse_index(subrs_reader, [&](ReadonlyBytes const& subroutine_bytes) -> PDFErrorOr<void> {
  71. return TRY(subroutines.try_append(TRY(ByteBuffer::copy(subroutine_bytes))));
  72. }));
  73. break;
  74. }
  75. case PrivDictOperator::DefaultWidthX:
  76. defaultWidthX = operands[0].get<int>();
  77. break;
  78. case PrivDictOperator::NominalWidthX:
  79. nominalWidthX = operands[0].get<int>();
  80. break;
  81. }
  82. return {};
  83. }));
  84. break;
  85. }
  86. default:;
  87. }
  88. return {};
  89. });
  90. }));
  91. // Create glpyhs (now that we have the subroutines) and associate missing information to store them and their encoding
  92. auto glyphs = TRY(parse_charstrings(Reader(cff_bytes.slice(charstrings_offset)), subroutines));
  93. auto charset = TRY(parse_charset(Reader { cff_bytes.slice(charset_offset) }, glyphs.size()));
  94. // Adjust glyphs' widths as they are deltas from nominalWidthX
  95. for (auto& glyph : glyphs) {
  96. if (!glyph.has_width())
  97. glyph.set_width(float(defaultWidthX));
  98. else
  99. glyph.set_width(glyph.width() + float(nominalWidthX));
  100. }
  101. for (size_t i = 0; i < glyphs.size(); i++) {
  102. if (i == 0) {
  103. TRY(cff->add_glyph(0, move(glyphs[0])));
  104. continue;
  105. }
  106. auto const& name = charset[i - 1];
  107. TRY(cff->add_glyph(name, move(glyphs[i])));
  108. }
  109. cff->consolidate_glyphs();
  110. // Encoding given or read
  111. if (encoding) {
  112. cff->set_encoding(move(encoding));
  113. } else {
  114. auto encoding = Encoding::create();
  115. for (size_t i = 0; i < glyphs.size(); i++) {
  116. if (i == 0) {
  117. encoding->set(0, ".notdef");
  118. continue;
  119. }
  120. auto code = encoding_codes[i - 1];
  121. auto char_name = charset[i - 1];
  122. encoding->set(code, char_name);
  123. }
  124. cff->set_encoding(move(encoding));
  125. }
  126. return cff;
  127. }
  128. HashMap<CFF::SID, DeprecatedFlyString> CFF::builtin_names {
  129. { 0, ".notdef" },
  130. { 1, "space" },
  131. { 9, "parenleft" },
  132. { 10, "parenright" },
  133. { 13, "comma" },
  134. { 14, "hyphen" },
  135. { 15, "period" },
  136. { 17, "zero" },
  137. { 18, "one" },
  138. { 19, "two" },
  139. { 20, "three" },
  140. { 21, "four" },
  141. { 22, "five" },
  142. { 23, "six" },
  143. { 24, "seven" },
  144. { 25, "eight" },
  145. { 26, "nine" },
  146. { 27, "colon" },
  147. { 28, "semicolon" },
  148. { 34, "A" },
  149. { 35, "B" },
  150. { 36, "C" },
  151. { 37, "D" },
  152. { 38, "E" },
  153. { 39, "F" },
  154. { 40, "G" },
  155. { 41, "H" },
  156. { 42, "I" },
  157. { 43, "J" },
  158. { 44, "K" },
  159. { 45, "L" },
  160. { 46, "M" },
  161. { 47, "N" },
  162. { 48, "O" },
  163. { 49, "P" },
  164. { 50, "Q" },
  165. { 51, "R" },
  166. { 52, "S" },
  167. { 53, "T" },
  168. { 54, "U" },
  169. { 55, "V" },
  170. { 56, "W" },
  171. { 57, "X" },
  172. { 58, "Y" },
  173. { 59, "Z" },
  174. { 66, "a" },
  175. { 67, "b" },
  176. { 68, "c" },
  177. { 69, "d" },
  178. { 70, "e" },
  179. { 71, "f" },
  180. { 72, "g" },
  181. { 73, "h" },
  182. { 74, "i" },
  183. { 75, "j" },
  184. { 76, "k" },
  185. { 77, "l" },
  186. { 78, "m" },
  187. { 79, "n" },
  188. { 80, "o" },
  189. { 81, "p" },
  190. { 82, "q" },
  191. { 83, "r" },
  192. { 84, "s" },
  193. { 85, "t" },
  194. { 86, "u" },
  195. { 87, "v" },
  196. { 88, "w" },
  197. { 89, "x" },
  198. { 90, "y" },
  199. { 91, "z" },
  200. { 104, "quotesingle" },
  201. { 105, "quotedblleft" },
  202. { 111, "endash" },
  203. { 116, "bullet" },
  204. { 119, "quotedblright" },
  205. { 137, "emdash" },
  206. { 170, "copyright" },
  207. };
  208. PDFErrorOr<Vector<DeprecatedFlyString>> CFF::parse_charset(Reader&& reader, size_t glyph_count)
  209. {
  210. Vector<DeprecatedFlyString> names;
  211. auto resolve = [](SID sid) {
  212. auto x = builtin_names.find(sid);
  213. if (x == builtin_names.end()) {
  214. dbgln("Cound't find string for SID {}, going with space", sid);
  215. return DeprecatedFlyString("space");
  216. }
  217. return x->value;
  218. };
  219. auto format = TRY(reader.try_read<Card8>());
  220. if (format == 0) {
  221. for (u8 i = 0; i < glyph_count - 1; i++) {
  222. SID sid = TRY(reader.try_read<BigEndian<SID>>());
  223. TRY(names.try_append(resolve(sid)));
  224. }
  225. } else if (format == 1) {
  226. while (names.size() < glyph_count - 1) {
  227. auto first_sid = TRY(reader.try_read<BigEndian<SID>>());
  228. int left = TRY(reader.try_read<Card8>());
  229. for (u8 sid = first_sid; left >= 0; left--, sid++)
  230. TRY(names.try_append(resolve(sid)));
  231. }
  232. }
  233. return names;
  234. }
  235. PDFErrorOr<Vector<CFF::Glyph>> CFF::parse_charstrings(Reader&& reader, Vector<ByteBuffer> const& subroutines)
  236. {
  237. Vector<Glyph> glyphs;
  238. TRY(parse_index(reader, [&](ReadonlyBytes const& charstring_data) -> PDFErrorOr<void> {
  239. GlyphParserState state;
  240. auto glyph = TRY(parse_glyph(charstring_data, subroutines, state, true));
  241. return TRY(glyphs.try_append(glyph));
  242. }));
  243. return glyphs;
  244. }
  245. PDFErrorOr<Vector<u8>> CFF::parse_encoding(Reader&& reader)
  246. {
  247. Vector<u8> encoding_codes;
  248. auto format = TRY(reader.try_read<Card8>());
  249. if (format == 0) {
  250. auto n_codes = TRY(reader.try_read<Card8>());
  251. for (u8 i = 0; i < n_codes; i++) {
  252. TRY(encoding_codes.try_append(TRY(reader.try_read<Card8>())));
  253. }
  254. } else if (format == 1) {
  255. auto n_ranges = TRY(reader.try_read<Card8>());
  256. for (u8 i = 0; i < n_ranges; i++) {
  257. auto first_code = TRY(reader.try_read<Card8>());
  258. int left = TRY(reader.try_read<Card8>());
  259. for (u8 code = first_code; left >= 0; left--, code++)
  260. TRY(encoding_codes.try_append(code));
  261. }
  262. } else
  263. return error(DeprecatedString::formatted("Invalid encoding format: {}", format));
  264. return encoding_codes;
  265. }
  266. template<typename OperatorT>
  267. PDFErrorOr<void> CFF::parse_dict(Reader& reader, DictEntryHandler<OperatorT>&& handler)
  268. {
  269. Vector<DictOperand> operands;
  270. while (reader.remaining() > 0) {
  271. auto b0 = reader.read<u8>();
  272. // A command
  273. if (b0 <= 21) {
  274. auto op = TRY(parse_dict_operator<OperatorT>(b0, reader));
  275. TRY(handler(op, operands));
  276. operands.clear();
  277. continue;
  278. }
  279. // An operand
  280. TRY(operands.try_append(TRY(load_dict_operand(b0, reader))));
  281. }
  282. return {};
  283. }
  284. template PDFErrorOr<void> CFF::parse_dict<CFF::TopDictOperator>(Reader&, DictEntryHandler<TopDictOperator>&&);
  285. template PDFErrorOr<void> CFF::parse_dict<CFF::PrivDictOperator>(Reader&, DictEntryHandler<PrivDictOperator>&&);
  286. template<typename OperatorT>
  287. PDFErrorOr<OperatorT> CFF::parse_dict_operator(u8 b0, Reader& reader)
  288. {
  289. VERIFY(b0 <= 21);
  290. if (b0 != 12)
  291. return OperatorT { (int)b0 };
  292. auto b1 = TRY(reader.try_read<u8>());
  293. return OperatorT { b0 << 8 | b1 };
  294. }
  295. template PDFErrorOr<CFF::TopDictOperator> CFF::parse_dict_operator(u8, Reader&);
  296. PDFErrorOr<void> CFF::parse_index(Reader& reader, IndexDataHandler&& data_handler)
  297. {
  298. Card16 count = TRY(reader.try_read<BigEndian<Card16>>());
  299. if (count == 0)
  300. return {};
  301. auto offset_size = TRY(reader.try_read<OffSize>());
  302. if (offset_size == 1)
  303. return parse_index_data<u8>(count, reader, data_handler);
  304. if (offset_size == 2)
  305. return parse_index_data<u16>(count, reader, data_handler);
  306. if (offset_size == 4)
  307. return parse_index_data<u32>(count, reader, data_handler);
  308. VERIFY_NOT_REACHED();
  309. }
  310. template<typename OffsetType>
  311. PDFErrorOr<void> CFF::parse_index_data(Card16 count, Reader& reader, IndexDataHandler& handler)
  312. {
  313. OffsetType last_data_end = 1;
  314. auto offset_refpoint = reader.offset() + sizeof(OffsetType) * (count + 1) - 1;
  315. for (u16 i = 0; i < count; i++) {
  316. reader.save();
  317. reader.move_by(sizeof(OffsetType) * i);
  318. OffsetType data_start = reader.read<BigEndian<OffsetType>>();
  319. last_data_end = reader.read<BigEndian<OffsetType>>();
  320. auto data_size = last_data_end - data_start;
  321. reader.move_to(offset_refpoint + data_start);
  322. TRY(handler(reader.bytes().slice(reader.offset(), data_size)));
  323. reader.load();
  324. }
  325. reader.move_to(offset_refpoint + last_data_end);
  326. return {};
  327. }
  328. template PDFErrorOr<void> CFF::parse_index_data<u8>(Card16, Reader&, IndexDataHandler&);
  329. template PDFErrorOr<void> CFF::parse_index_data<u16>(Card16, Reader&, IndexDataHandler&);
  330. template PDFErrorOr<void> CFF::parse_index_data<u32>(Card16, Reader&, IndexDataHandler&);
  331. // 4 DICT DATA, Table 3 Operand Encoding
  332. int CFF::load_int_dict_operand(u8 b0, Reader& reader)
  333. {
  334. if (b0 >= 32 && b0 <= 246) {
  335. return b0 - 139;
  336. }
  337. if (b0 >= 247 && b0 <= 250) {
  338. auto b1 = reader.read<u8>();
  339. return (b0 - 247) * 256 + b1 + 108;
  340. }
  341. if (b0 >= 251 && b0 <= 254) {
  342. auto b1 = reader.read<u8>();
  343. return -(b0 - 251) * 256 - b1 - 108;
  344. }
  345. if (b0 == 28) {
  346. auto b1 = reader.read<u8>();
  347. auto b2 = reader.read<u8>();
  348. return b1 << 8 | b2;
  349. }
  350. if (b0 == 29) {
  351. auto b1 = reader.read<u8>();
  352. auto b2 = reader.read<u8>();
  353. auto b3 = reader.read<u8>();
  354. auto b4 = reader.read<u8>();
  355. return b1 << 24 | b2 << 16 | b3 << 8 | b4;
  356. }
  357. VERIFY_NOT_REACHED();
  358. }
  359. float CFF::load_float_dict_operand(Reader& reader)
  360. {
  361. StringBuilder sb;
  362. auto add_nibble = [&](char nibble) {
  363. if (nibble < 0xa)
  364. sb.append('0' + nibble);
  365. else if (nibble == 0xa)
  366. sb.append('.');
  367. else if (nibble == 0xb)
  368. sb.append('E');
  369. else if (nibble == 0xc)
  370. sb.append("E-"sv);
  371. else if (nibble == 0xe)
  372. sb.append('-');
  373. };
  374. while (true) {
  375. auto byte = reader.read<u8>();
  376. char nibble1 = (byte & 0xf0) >> 4;
  377. char nibble2 = byte & 0x0f;
  378. if (nibble1 == 0xf)
  379. break;
  380. add_nibble(nibble1);
  381. if (nibble2 == 0xf)
  382. break;
  383. add_nibble(nibble2);
  384. }
  385. auto result = AK::StringUtils::convert_to_floating_point<float>(sb.string_view());
  386. return result.release_value();
  387. }
  388. PDFErrorOr<CFF::DictOperand> CFF::load_dict_operand(u8 b0, Reader& reader)
  389. {
  390. if (b0 == 30)
  391. return load_float_dict_operand(reader);
  392. if (b0 >= 28)
  393. return load_int_dict_operand(b0, reader);
  394. return Error { Error::Type::MalformedPDF, DeprecatedString::formatted("Unknown CFF dict element prefix: {}", b0) };
  395. }
  396. }