
The Type1FontProgram logic was based on the Adobe Type 1 Font Format; in particular, it implemented the CharStrings Dictionary section (charstring decoding, and most commands). In the case of Type1, these charstrings are read from a PS1 diciontary, with one entry per character in the font's charset. This has served us well for Type1 font rendering. When implementing Type1C font rendering, this wasn't enough. Type1C PDF fonts are specified in embedded CFF (Compact Font File) streams, which also contain a charstring dictionary with an entry for each character in the font's charset. These entries can be slightly different from those in a PS1 Font Program though: depending on a flag in the CFF, the entries will be encoded either in the original charstring format from the Adobe Type 1 Font Format, or in the "Type 2 Charstring Format" (Adobe's Technical Note #1577). This new format is for the most part a super-set of the original, with small differences, all in the name of making the representation as compact as possible: * The glyph's width is not specified via a separate command; instead it's an optional additional argument to the first command of the charstring stream (and even then, it's only the *difference* to a nominal character width specified in the CFF). * The interpretation of a 4-byte number is different from Type 1: in Type 1 this is a 4-byte unsigned integer, whereas in Type 1 it's a fixed decimal with 16 bits of fractional part. * Many commands accept a variable set of arguments, so they can draw more than one line/curve on a single go. These are all retro-compatible with Type 1's commands. All these changes are implemented in this patch in a backwards-compatible way. To ensure Type 1/2 behavior is accessed, a new parameter indicates which behavior is desired when decoding the charstring stream. I also took the chance to centralise some logic that was previously duplicated across the parse_glyph function. Common lambdas capture the logic for moving to, or drawing a line/curve to a given point and updating the glyph state. Similarly, some command logic, including reading parameters, are shared by several commands. Finally, I've re-organised the cases in the main switch to group together related commands.
243 lines
7.4 KiB
C++
243 lines
7.4 KiB
C++
/*
|
||
* Copyright (c) 2022, Julian Offenhäuser <offenhaeuser@protonmail.com>
|
||
*
|
||
* SPDX-License-Identifier: BSD-2-Clause
|
||
*/
|
||
|
||
#include <LibGfx/Font/PathRasterizer.h>
|
||
#include <LibPDF/CommonNames.h>
|
||
#include <LibPDF/Encoding.h>
|
||
#include <LibPDF/Fonts/PS1FontProgram.h>
|
||
#include <LibPDF/Reader.h>
|
||
#include <ctype.h>
|
||
#include <math.h>
|
||
|
||
namespace PDF {
|
||
|
||
PDFErrorOr<NonnullRefPtr<Type1FontProgram>> PS1FontProgram::create(ReadonlyBytes const& bytes, RefPtr<Encoding> encoding, size_t cleartext_length, size_t encrypted_length)
|
||
{
|
||
Reader reader(bytes);
|
||
if (reader.remaining() == 0)
|
||
return error("Empty font program");
|
||
|
||
reader.move_to(0);
|
||
if (reader.remaining() < 2 || !reader.matches("%!"))
|
||
return error("Not a font program");
|
||
|
||
if (!seek_name(reader, CommonNames::Encoding))
|
||
return error("Missing encoding array");
|
||
|
||
auto font_program = adopt_ref(*new PS1FontProgram());
|
||
if (encoding) {
|
||
// 9.6.6.2 Encodings for Type 1 Fonts:
|
||
// An Encoding entry may override a Type 1 font’s mapping from character codes to character names.
|
||
font_program->set_encoding(move(encoding));
|
||
} else {
|
||
if (TRY(parse_word(reader)) == "StandardEncoding") {
|
||
font_program->set_encoding(Encoding::standard_encoding());
|
||
} else {
|
||
HashMap<u16, CharDescriptor> descriptors;
|
||
|
||
while (reader.remaining()) {
|
||
auto word = TRY(parse_word(reader));
|
||
if (word == "readonly") {
|
||
break;
|
||
} else if (word == "dup") {
|
||
u32 char_code = TRY(parse_int(reader));
|
||
auto name = TRY(parse_word(reader));
|
||
descriptors.set(char_code, { name.starts_with('/') ? name.substring_view(1) : name.view(), char_code });
|
||
}
|
||
}
|
||
font_program->set_encoding(TRY(Encoding::create(descriptors)));
|
||
}
|
||
}
|
||
|
||
bool found_font_matrix = seek_name(reader, "FontMatrix");
|
||
if (found_font_matrix) {
|
||
auto array = TRY(parse_number_array(reader, 6));
|
||
font_program->set_font_matrix({ array[0], array[1], array[2], array[3], array[4], array[5] });
|
||
} else {
|
||
font_program->set_font_matrix({ 0.001f, 0.0f, 0.0f, 0.001f, 0.0f, 0.0f });
|
||
}
|
||
|
||
auto decrypted = TRY(decrypt(reader.bytes().slice(cleartext_length, encrypted_length), 55665, 4));
|
||
TRY(font_program->parse_encrypted_portion(decrypted));
|
||
return font_program;
|
||
}
|
||
|
||
PDFErrorOr<void> PS1FontProgram::parse_encrypted_portion(ByteBuffer const& buffer)
|
||
{
|
||
Reader reader(buffer);
|
||
|
||
if (seek_name(reader, "lenIV"))
|
||
m_lenIV = TRY(parse_int(reader));
|
||
|
||
if (!seek_name(reader, "Subrs"))
|
||
return error("Missing subroutine array");
|
||
auto subroutines = TRY(parse_subroutines(reader));
|
||
|
||
if (!seek_name(reader, "CharStrings"))
|
||
return error("Missing char strings array");
|
||
|
||
while (reader.remaining()) {
|
||
auto word = TRY(parse_word(reader));
|
||
VERIFY(!word.is_empty());
|
||
|
||
if (word == "end")
|
||
break;
|
||
|
||
if (word[0] == '/') {
|
||
auto encrypted_size = TRY(parse_int(reader));
|
||
auto rd = TRY(parse_word(reader));
|
||
if (rd == "-|" || rd == "RD") {
|
||
auto line = TRY(decrypt(reader.bytes().slice(reader.offset(), encrypted_size), m_encryption_key, m_lenIV));
|
||
reader.move_by(encrypted_size);
|
||
auto name_mapping = encoding()->name_mapping();
|
||
auto char_code = name_mapping.ensure(word.substring_view(1));
|
||
GlyphParserState state;
|
||
TRY(add_glyph(char_code, TRY(parse_glyph(line, subroutines, state, false))));
|
||
}
|
||
}
|
||
}
|
||
|
||
return {};
|
||
}
|
||
|
||
PDFErrorOr<Vector<ByteBuffer>> PS1FontProgram::parse_subroutines(Reader& reader) const
|
||
{
|
||
if (!reader.matches_number())
|
||
return error("Expected array length");
|
||
|
||
auto length = TRY(parse_int(reader));
|
||
VERIFY(length <= 1024);
|
||
|
||
Vector<ByteBuffer> array;
|
||
TRY(array.try_resize(length));
|
||
|
||
while (reader.remaining()) {
|
||
auto word = TRY(parse_word(reader));
|
||
if (word.is_empty())
|
||
VERIFY(0);
|
||
|
||
if (word == "dup") {
|
||
auto index = TRY(parse_int(reader));
|
||
auto entry = TRY(parse_word(reader));
|
||
|
||
if (entry.is_empty())
|
||
return error("Empty array entry");
|
||
|
||
if (index >= length)
|
||
return error("Array index out of bounds");
|
||
|
||
if (isdigit(entry[0])) {
|
||
auto maybe_encrypted_size = entry.to_int();
|
||
if (!maybe_encrypted_size.has_value())
|
||
return error("Malformed array");
|
||
auto rd = TRY(parse_word(reader));
|
||
if (rd == "-|" || rd == "RD") {
|
||
array[index] = TRY(decrypt(reader.bytes().slice(reader.offset(), maybe_encrypted_size.value()), m_encryption_key, m_lenIV));
|
||
reader.move_by(maybe_encrypted_size.value());
|
||
}
|
||
} else {
|
||
array[index] = TRY(ByteBuffer::copy(entry.bytes()));
|
||
}
|
||
} else if (word == "index") {
|
||
break;
|
||
}
|
||
}
|
||
|
||
return array;
|
||
}
|
||
|
||
PDFErrorOr<Vector<float>> PS1FontProgram::parse_number_array(Reader& reader, size_t length)
|
||
{
|
||
Vector<float> array;
|
||
TRY(array.try_resize(length));
|
||
|
||
reader.consume_whitespace();
|
||
|
||
if (!reader.consume('['))
|
||
return error("Expected array to start with '['");
|
||
|
||
reader.consume_whitespace();
|
||
|
||
for (size_t i = 0; i < length; ++i)
|
||
array.at(i) = TRY(parse_float(reader));
|
||
|
||
if (!reader.consume(']'))
|
||
return error("Expected array to end with ']'");
|
||
|
||
return array;
|
||
}
|
||
|
||
PDFErrorOr<DeprecatedString> PS1FontProgram::parse_word(Reader& reader)
|
||
{
|
||
reader.consume_whitespace();
|
||
|
||
auto start = reader.offset();
|
||
reader.move_while([&](char c) {
|
||
return !reader.matches_whitespace() && c != '[' && c != ']';
|
||
});
|
||
auto end = reader.offset();
|
||
|
||
if (reader.matches_whitespace())
|
||
reader.consume();
|
||
|
||
return StringView(reader.bytes().data() + start, end - start);
|
||
}
|
||
|
||
PDFErrorOr<float> PS1FontProgram::parse_float(Reader& reader)
|
||
{
|
||
auto word = TRY(parse_word(reader));
|
||
return strtof(DeprecatedString(word).characters(), nullptr);
|
||
}
|
||
|
||
PDFErrorOr<int> PS1FontProgram::parse_int(Reader& reader)
|
||
{
|
||
auto maybe_int = TRY(parse_word(reader)).to_int();
|
||
if (!maybe_int.has_value())
|
||
return error("Invalid int");
|
||
return maybe_int.value();
|
||
}
|
||
|
||
PDFErrorOr<ByteBuffer> PS1FontProgram::decrypt(ReadonlyBytes const& encrypted, u16 key, size_t skip)
|
||
{
|
||
auto decrypted = TRY(ByteBuffer::create_uninitialized(encrypted.size() - skip));
|
||
|
||
u16 R = key;
|
||
u16 c1 = 52845;
|
||
u16 c2 = 22719;
|
||
|
||
for (size_t i = 0; i < encrypted.size(); ++i) {
|
||
u8 C = encrypted[i];
|
||
u8 P = C ^ (R >> 8);
|
||
R = (C + R) * c1 + c2;
|
||
if (i >= skip)
|
||
decrypted[i - skip] = P;
|
||
}
|
||
|
||
return decrypted;
|
||
}
|
||
|
||
bool PS1FontProgram::seek_name(Reader& reader, DeprecatedString const& name)
|
||
{
|
||
auto start = reader.offset();
|
||
|
||
reader.move_to(0);
|
||
while (reader.remaining()) {
|
||
if (reader.consume('/') && reader.matches(name.characters())) {
|
||
// Skip name
|
||
reader.move_while([&](char) {
|
||
return reader.matches_regular_character();
|
||
});
|
||
reader.consume_whitespace();
|
||
return true;
|
||
}
|
||
}
|
||
|
||
// Jump back to where we started
|
||
reader.move_to(start);
|
||
return false;
|
||
}
|
||
|
||
}
|