123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575 |
- /*
- * Copyright (c) 2023, Rodrigo Tobar <rtobarc@gmail.com>.
- *
- * SPDX-License-Identifier: BSD-2-Clause
- */
- #include <AK/Endian.h>
- #include <AK/String.h>
- #include <LibGfx/Forward.h>
- #include <LibPDF/Encoding.h>
- #include <LibPDF/Error.h>
- #include <LibPDF/Fonts/CFF.h>
- #include <LibPDF/Reader.h>
- namespace PDF {
- PDFErrorOr<NonnullRefPtr<CFF>> CFF::create(ReadonlyBytes const& cff_bytes, RefPtr<Encoding> encoding)
- {
- Reader reader(cff_bytes);
- // Header
- // skip major, minor version
- reader.consume(2);
- auto header_size = TRY(reader.try_read<Card8>());
- // skip offset size
- reader.consume(1);
- reader.move_to(header_size);
- // Name INDEX
- Vector<String> font_names;
- TRY(parse_index(reader, [&](ReadonlyBytes const& data) -> PDFErrorOr<void> {
- auto string = TRY(String::from_utf8(data));
- return TRY(font_names.try_append(string));
- }));
- auto cff = adopt_ref(*new CFF());
- cff->set_font_matrix({ 0.001f, 0.0f, 0.0f, 0.001f, 0.0f, 0.0f });
- // Top DICT INDEX
- int charset_offset = 0;
- Vector<u8> encoding_codes;
- auto charstrings_offset = 0;
- Vector<ByteBuffer> subroutines;
- int defaultWidthX = 0;
- int nominalWidthX = 0;
- TRY(parse_index(reader, [&](ReadonlyBytes const& element_data) {
- Reader element_reader { element_data };
- return parse_dict<TopDictOperator>(element_reader, [&](TopDictOperator op, Vector<DictOperand> const& operands) -> PDFErrorOr<void> {
- switch (op) {
- case TopDictOperator::Encoding: {
- auto encoding_offset = 0;
- if (!operands.is_empty())
- encoding_offset = operands[0].get<int>();
- encoding_codes = TRY(parse_encoding(Reader(cff_bytes.slice(encoding_offset))));
- break;
- }
- case TopDictOperator::Charset: {
- if (!operands.is_empty())
- charset_offset = operands[0].get<int>();
- break;
- }
- case TopDictOperator::CharStrings: {
- if (!operands.is_empty())
- charstrings_offset = operands[0].get<int>();
- break;
- }
- case TopDictOperator::Private: {
- auto private_dict_size = operands[0].get<int>();
- auto private_dict_offset = operands[1].get<int>();
- Reader priv_dict_reader { cff_bytes.slice(private_dict_offset, private_dict_size) };
- TRY(parse_dict<PrivDictOperator>(priv_dict_reader, [&](PrivDictOperator op, Vector<DictOperand> const& operands) -> PDFErrorOr<void> {
- switch (op) {
- case PrivDictOperator::Subrs: {
- auto subrs_offset = operands[0].get<int>();
- Reader subrs_reader { cff_bytes.slice(private_dict_offset + subrs_offset) };
- dbgln("Parsing Subrs INDEX");
- TRY(parse_index(subrs_reader, [&](ReadonlyBytes const& subroutine_bytes) -> PDFErrorOr<void> {
- return TRY(subroutines.try_append(TRY(ByteBuffer::copy(subroutine_bytes))));
- }));
- break;
- }
- case PrivDictOperator::DefaultWidthX:
- defaultWidthX = operands[0].get<int>();
- break;
- case PrivDictOperator::NominalWidthX:
- nominalWidthX = operands[0].get<int>();
- break;
- }
- return {};
- }));
- break;
- }
- default:;
- }
- return {};
- });
- }));
- // Create glyphs (now that we have the subroutines) and associate missing information to store them and their encoding
- auto glyphs = TRY(parse_charstrings(Reader(cff_bytes.slice(charstrings_offset)), subroutines));
- auto charset = TRY(parse_charset(Reader { cff_bytes.slice(charset_offset) }, glyphs.size()));
- // Adjust glyphs' widths as they are deltas from nominalWidthX
- for (auto& glyph : glyphs) {
- if (!glyph.has_width())
- glyph.set_width(float(defaultWidthX));
- else
- glyph.set_width(glyph.width() + float(nominalWidthX));
- }
- for (size_t i = 0; i < glyphs.size(); i++) {
- if (i == 0) {
- TRY(cff->add_glyph(0, move(glyphs[0])));
- continue;
- }
- auto const& name = charset[i - 1];
- TRY(cff->add_glyph(name, move(glyphs[i])));
- }
- cff->consolidate_glyphs();
- // Encoding given or read
- if (encoding) {
- cff->set_encoding(move(encoding));
- } else {
- auto encoding = Encoding::create();
- for (size_t i = 0; i < glyphs.size(); i++) {
- if (i == 0) {
- encoding->set(0, ".notdef");
- continue;
- }
- auto code = encoding_codes[i - 1];
- auto char_name = charset[i - 1];
- encoding->set(code, char_name);
- }
- cff->set_encoding(move(encoding));
- }
- return cff;
- }
- /// Appendix C: Predefined Charsets
- static constexpr Array s_cff_builtin_names {
- ".notdef"sv,
- "space"sv,
- "exclam"sv,
- "quotedbl"sv,
- "numbersign"sv,
- "dollar"sv,
- "percent"sv,
- "ampersand"sv,
- "quoteright"sv,
- "parenleft"sv,
- "parenright"sv,
- "asterisk"sv,
- "plus"sv,
- "comma"sv,
- "hyphen"sv,
- "period"sv,
- "slash"sv,
- "zero"sv,
- "one"sv,
- "two"sv,
- "three"sv,
- "four"sv,
- "five"sv,
- "six"sv,
- "seven"sv,
- "eight"sv,
- "nine"sv,
- "colon"sv,
- "semicolon"sv,
- "less"sv,
- "equal"sv,
- "greater"sv,
- "question"sv,
- "at"sv,
- "A"sv,
- "B"sv,
- "C"sv,
- "D"sv,
- "E"sv,
- "F"sv,
- "G"sv,
- "H"sv,
- "I"sv,
- "J"sv,
- "K"sv,
- "L"sv,
- "M"sv,
- "N"sv,
- "O"sv,
- "P"sv,
- "Q"sv,
- "R"sv,
- "S"sv,
- "T"sv,
- "U"sv,
- "V"sv,
- "W"sv,
- "X"sv,
- "Y"sv,
- "Z"sv,
- "bracketleft"sv,
- "backslash"sv,
- "bracketright"sv,
- "asciicircum"sv,
- "underscore"sv,
- "quoteleft"sv,
- "a"sv,
- "b"sv,
- "c"sv,
- "d"sv,
- "e"sv,
- "f"sv,
- "g"sv,
- "h"sv,
- "i"sv,
- "j"sv,
- "k"sv,
- "l"sv,
- "m"sv,
- "n"sv,
- "o"sv,
- "p"sv,
- "q"sv,
- "r"sv,
- "s"sv,
- "t"sv,
- "u"sv,
- "v"sv,
- "w"sv,
- "x"sv,
- "y"sv,
- "z"sv,
- "braceleft"sv,
- "bar"sv,
- "braceright"sv,
- "asciitilde"sv,
- "exclamdown"sv,
- "cent"sv,
- "sterling"sv,
- "fraction"sv,
- "yen"sv,
- "florin"sv,
- "section"sv,
- "currency"sv,
- "quotesingle"sv,
- "quotedblleft"sv,
- "guillemotleft"sv,
- "guilsinglleft"sv,
- "guilsinglright"sv,
- "fi"sv,
- "fl"sv,
- "endash"sv,
- "dagger"sv,
- "daggerdbl"sv,
- "periodcentered"sv,
- "paragraph"sv,
- "bullet"sv,
- "quotesinglbase"sv,
- "quotedblbase"sv,
- "quotedblright"sv,
- "guillemotright"sv,
- "ellipsis"sv,
- "perthousand"sv,
- "questiondown"sv,
- "grave"sv,
- "acute"sv,
- "circumflex"sv,
- "tilde"sv,
- "macron"sv,
- "breve"sv,
- "dotaccent"sv,
- "dieresis"sv,
- "ring"sv,
- "cedilla"sv,
- "hungarumlaut"sv,
- "ogonek"sv,
- "caron"sv,
- "emdash"sv,
- "AE"sv,
- "ordfeminine"sv,
- "Lslash"sv,
- "Oslash"sv,
- "OE"sv,
- "ordmasculine"sv,
- "ae"sv,
- "dotlessi"sv,
- "lslash"sv,
- "oslash"sv,
- "oe"sv,
- "germandbls"sv,
- "onesuperior"sv,
- "logicalnot"sv,
- "mu"sv,
- "trademark"sv,
- "Eth"sv,
- "onehalf"sv,
- "plusminus"sv,
- "Thorn"sv,
- "onequarter"sv,
- "divide"sv,
- "brokenbar"sv,
- "degree"sv,
- "thorn"sv,
- "threequarters"sv,
- "twosuperior"sv,
- "registered"sv,
- "minus"sv,
- "eth"sv,
- "multiply"sv,
- "threesuperior"sv,
- "copyright"sv,
- "Aacute"sv,
- "Acircumflex"sv,
- "Adieresis"sv,
- "Agrave"sv,
- "Aring"sv,
- "Atilde"sv,
- "Ccedilla"sv,
- "Eacute"sv,
- "Ecircumflex"sv,
- "Edieresis"sv,
- "Egrave"sv,
- "Iacute"sv,
- "Icircumflex"sv,
- "Idieresis"sv,
- "Igrave"sv,
- "Ntilde"sv,
- "Oacute"sv,
- "Ocircumflex"sv,
- "Odieresis"sv,
- "Ograve"sv,
- "Otilde"sv,
- "Scaron"sv,
- "Uacute"sv,
- "Ucircumflex"sv,
- "Udieresis"sv,
- "Ugrave"sv,
- "Yacute"sv,
- "Ydieresis"sv,
- "Zcaron"sv,
- "aacute"sv,
- "acircumflex"sv,
- "adieresis"sv,
- "agrave"sv,
- "aring"sv,
- "atilde"sv,
- "ccedilla"sv,
- "eacute"sv,
- "ecircumflex"sv,
- "edieresis"sv,
- "egrave"sv,
- "iacute"sv,
- "icircumflex"sv,
- "idieresis"sv,
- "igrave"sv,
- "ntilde"sv,
- "oacute"sv,
- "ocircumflex"sv,
- "odieresis"sv,
- "ograve"sv,
- "otilde"sv,
- "scaron"sv,
- "uacute"sv,
- "ucircumflex"sv,
- "udieresis"sv,
- "ugrave"sv,
- "yacute"sv,
- "ydieresis"sv,
- "zcaron"sv,
- };
- PDFErrorOr<Vector<DeprecatedFlyString>> CFF::parse_charset(Reader&& reader, size_t glyph_count)
- {
- Vector<DeprecatedFlyString> names;
- auto resolve = [](SID sid) {
- if (sid < s_cff_builtin_names.size())
- return DeprecatedFlyString(s_cff_builtin_names[sid]);
- dbgln("Cound't find string for SID {}, going with space", sid);
- return DeprecatedFlyString("space");
- };
- auto format = TRY(reader.try_read<Card8>());
- if (format == 0) {
- for (u8 i = 0; i < glyph_count - 1; i++) {
- SID sid = TRY(reader.try_read<BigEndian<SID>>());
- TRY(names.try_append(resolve(sid)));
- }
- } else if (format == 1) {
- while (names.size() < glyph_count - 1) {
- auto first_sid = TRY(reader.try_read<BigEndian<SID>>());
- int left = TRY(reader.try_read<Card8>());
- for (u8 sid = first_sid; left >= 0; left--, sid++)
- TRY(names.try_append(resolve(sid)));
- }
- }
- return names;
- }
- PDFErrorOr<Vector<CFF::Glyph>> CFF::parse_charstrings(Reader&& reader, Vector<ByteBuffer> const& subroutines)
- {
- Vector<Glyph> glyphs;
- TRY(parse_index(reader, [&](ReadonlyBytes const& charstring_data) -> PDFErrorOr<void> {
- GlyphParserState state;
- auto glyph = TRY(parse_glyph(charstring_data, subroutines, state, true));
- return TRY(glyphs.try_append(glyph));
- }));
- return glyphs;
- }
- PDFErrorOr<Vector<u8>> CFF::parse_encoding(Reader&& reader)
- {
- Vector<u8> encoding_codes;
- auto format_raw = TRY(reader.try_read<Card8>());
- // TODO: support encoding supplements when highest bit is set
- auto format = format_raw & 0x7f;
- if (format == 0) {
- auto n_codes = TRY(reader.try_read<Card8>());
- for (u8 i = 0; i < n_codes; i++) {
- TRY(encoding_codes.try_append(TRY(reader.try_read<Card8>())));
- }
- } else if (format == 1) {
- auto n_ranges = TRY(reader.try_read<Card8>());
- for (u8 i = 0; i < n_ranges; i++) {
- auto first_code = TRY(reader.try_read<Card8>());
- int left = TRY(reader.try_read<Card8>());
- for (u8 code = first_code; left >= 0; left--, code++)
- TRY(encoding_codes.try_append(code));
- }
- } else
- return error(DeprecatedString::formatted("Invalid encoding format: {}", format));
- return encoding_codes;
- }
- template<typename OperatorT>
- PDFErrorOr<void> CFF::parse_dict(Reader& reader, DictEntryHandler<OperatorT>&& handler)
- {
- Vector<DictOperand> operands;
- while (reader.remaining() > 0) {
- auto b0 = reader.read<u8>();
- // A command
- if (b0 <= 21) {
- auto op = TRY(parse_dict_operator<OperatorT>(b0, reader));
- TRY(handler(op, operands));
- operands.clear();
- continue;
- }
- // An operand
- TRY(operands.try_append(TRY(load_dict_operand(b0, reader))));
- }
- return {};
- }
- template PDFErrorOr<void> CFF::parse_dict<CFF::TopDictOperator>(Reader&, DictEntryHandler<TopDictOperator>&&);
- template PDFErrorOr<void> CFF::parse_dict<CFF::PrivDictOperator>(Reader&, DictEntryHandler<PrivDictOperator>&&);
- template<typename OperatorT>
- PDFErrorOr<OperatorT> CFF::parse_dict_operator(u8 b0, Reader& reader)
- {
- VERIFY(b0 <= 21);
- if (b0 != 12)
- return OperatorT { (int)b0 };
- auto b1 = TRY(reader.try_read<u8>());
- return OperatorT { b0 << 8 | b1 };
- }
- template PDFErrorOr<CFF::TopDictOperator> CFF::parse_dict_operator(u8, Reader&);
- PDFErrorOr<void> CFF::parse_index(Reader& reader, IndexDataHandler&& data_handler)
- {
- Card16 count = TRY(reader.try_read<BigEndian<Card16>>());
- if (count == 0)
- return {};
- auto offset_size = TRY(reader.try_read<OffSize>());
- if (offset_size == 1)
- return parse_index_data<u8>(count, reader, data_handler);
- if (offset_size == 2)
- return parse_index_data<u16>(count, reader, data_handler);
- if (offset_size == 4)
- return parse_index_data<u32>(count, reader, data_handler);
- VERIFY_NOT_REACHED();
- }
- template<typename OffsetType>
- PDFErrorOr<void> CFF::parse_index_data(Card16 count, Reader& reader, IndexDataHandler& handler)
- {
- OffsetType last_data_end = 1;
- auto offset_refpoint = reader.offset() + sizeof(OffsetType) * (count + 1) - 1;
- for (u16 i = 0; i < count; i++) {
- reader.save();
- reader.move_by(sizeof(OffsetType) * i);
- OffsetType data_start = reader.read<BigEndian<OffsetType>>();
- last_data_end = reader.read<BigEndian<OffsetType>>();
- auto data_size = last_data_end - data_start;
- reader.move_to(offset_refpoint + data_start);
- TRY(handler(reader.bytes().slice(reader.offset(), data_size)));
- reader.load();
- }
- reader.move_to(offset_refpoint + last_data_end);
- return {};
- }
- template PDFErrorOr<void> CFF::parse_index_data<u8>(Card16, Reader&, IndexDataHandler&);
- template PDFErrorOr<void> CFF::parse_index_data<u16>(Card16, Reader&, IndexDataHandler&);
- template PDFErrorOr<void> CFF::parse_index_data<u32>(Card16, Reader&, IndexDataHandler&);
- // 4 DICT DATA, Table 3 Operand Encoding
- int CFF::load_int_dict_operand(u8 b0, Reader& reader)
- {
- if (b0 >= 32 && b0 <= 246) {
- return b0 - 139;
- }
- if (b0 >= 247 && b0 <= 250) {
- auto b1 = reader.read<u8>();
- return (b0 - 247) * 256 + b1 + 108;
- }
- if (b0 >= 251 && b0 <= 254) {
- auto b1 = reader.read<u8>();
- return -(b0 - 251) * 256 - b1 - 108;
- }
- if (b0 == 28) {
- auto b1 = reader.read<u8>();
- auto b2 = reader.read<u8>();
- return b1 << 8 | b2;
- }
- if (b0 == 29) {
- auto b1 = reader.read<u8>();
- auto b2 = reader.read<u8>();
- auto b3 = reader.read<u8>();
- auto b4 = reader.read<u8>();
- return b1 << 24 | b2 << 16 | b3 << 8 | b4;
- }
- VERIFY_NOT_REACHED();
- }
- float CFF::load_float_dict_operand(Reader& reader)
- {
- StringBuilder sb;
- auto add_nibble = [&](char nibble) {
- if (nibble < 0xa)
- sb.append('0' + nibble);
- else if (nibble == 0xa)
- sb.append('.');
- else if (nibble == 0xb)
- sb.append('E');
- else if (nibble == 0xc)
- sb.append("E-"sv);
- else if (nibble == 0xe)
- sb.append('-');
- };
- while (true) {
- auto byte = reader.read<u8>();
- char nibble1 = (byte & 0xf0) >> 4;
- char nibble2 = byte & 0x0f;
- if (nibble1 == 0xf)
- break;
- add_nibble(nibble1);
- if (nibble2 == 0xf)
- break;
- add_nibble(nibble2);
- }
- auto result = AK::StringUtils::convert_to_floating_point<float>(sb.string_view());
- return result.release_value();
- }
- PDFErrorOr<CFF::DictOperand> CFF::load_dict_operand(u8 b0, Reader& reader)
- {
- if (b0 == 30)
- return load_float_dict_operand(reader);
- if (b0 >= 28)
- return load_int_dict_operand(b0, reader);
- return Error { Error::Type::MalformedPDF, DeprecatedString::formatted("Unknown CFF dict element prefix: {}", b0) };
- }
- }
|