Encoding.cpp 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. /*
  2. * Copyright (c) 2022, Matthew Olsson <mattco@serenityos.org>
  3. * Copyright (c) 2022, Julian Offenhäuser <offenhaeuser@protonmail.com>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <AK/Utf8View.h>
  8. #include <LibPDF/CommonNames.h>
  9. #include <LibPDF/Encoding.h>
  10. namespace PDF {
  11. NonnullRefPtr<Encoding> Encoding::create()
  12. {
  13. return adopt_ref(*new Encoding());
  14. }
  15. PDFErrorOr<NonnullRefPtr<Encoding>> Encoding::from_object(Document* document, NonnullRefPtr<Object> const& obj)
  16. {
  17. if (obj->is<NameObject>()) {
  18. // PDF 1.7 spec, 5.5.5 "Character Encoding"
  19. auto name = obj->cast<NameObject>()->name();
  20. if (name == "StandardEncoding")
  21. return standard_encoding();
  22. // FIXME: MacExpertEncoding
  23. if (name == "MacRomanEncoding")
  24. return mac_encoding();
  25. if (name == "WinAnsiEncoding")
  26. return windows_encoding();
  27. VERIFY_NOT_REACHED();
  28. }
  29. // Make a custom encoding
  30. auto dict = obj->cast<DictObject>();
  31. RefPtr<Encoding> base_encoding;
  32. if (dict->contains(CommonNames::BaseEncoding)) {
  33. auto base_encoding_obj = MUST(dict->get_object(document, CommonNames::BaseEncoding));
  34. base_encoding = TRY(Encoding::from_object(document, base_encoding_obj));
  35. } else {
  36. // FIXME:
  37. // "If this entry is absent, the Differences entry describes differences from an implicit base encoding.
  38. // For a font program that is embedded in the PDF file, the implicit base encoding is the font program’s built-in encoding,
  39. // as described above and further elaborated in the sections on specific font types below.
  40. // Otherwise, for a nonsymbolic font, it is StandardEncoding, and for a symbolic font, it is the font’s built-in encoding."
  41. base_encoding = Encoding::standard_encoding();
  42. }
  43. auto encoding = adopt_ref(*new Encoding());
  44. encoding->m_descriptors = TRY(base_encoding->m_descriptors.clone());
  45. encoding->m_name_mapping = TRY(base_encoding->m_name_mapping.clone());
  46. if (dict->contains(CommonNames::Differences)) {
  47. auto differences_array = TRY(dict->get_array(document, CommonNames::Differences));
  48. u16 current_code_point = 0;
  49. bool first = true;
  50. for (auto& item : *differences_array) {
  51. if (item.has_u32()) {
  52. current_code_point = item.to_int();
  53. first = false;
  54. } else {
  55. VERIFY(item.has<NonnullRefPtr<Object>>());
  56. VERIFY(!first);
  57. auto& object = item.get<NonnullRefPtr<Object>>();
  58. auto name = object->cast<NameObject>()->name();
  59. encoding->set(current_code_point, name);
  60. current_code_point++;
  61. }
  62. }
  63. }
  64. return encoding;
  65. }
  66. void Encoding::set(CharCodeType char_code, DeprecatedFlyString const& glyph_name)
  67. {
  68. m_descriptors.set(char_code, glyph_name);
  69. m_name_mapping.set(glyph_name, char_code);
  70. }
  71. NonnullRefPtr<Encoding> Encoding::standard_encoding()
  72. {
  73. static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
  74. if (encoding->m_descriptors.is_empty()) {
  75. #define ENUMERATE(name, standard_code, mac_code, win_code, pdf_code) \
  76. encoding->set(standard_code, #name);
  77. ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE)
  78. #undef ENUMERATE
  79. }
  80. return encoding;
  81. }
  82. NonnullRefPtr<Encoding> Encoding::mac_encoding()
  83. {
  84. static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
  85. if (encoding->m_descriptors.is_empty()) {
  86. #define ENUMERATE(name, standard_code, mac_code, win_code, pdf_code) \
  87. encoding->set(mac_code, #name);
  88. ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE)
  89. #undef ENUMERATE
  90. }
  91. return encoding;
  92. }
  93. NonnullRefPtr<Encoding> Encoding::windows_encoding()
  94. {
  95. static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
  96. if (encoding->m_descriptors.is_empty()) {
  97. #define ENUMERATE(name, standard_code, mac_code, win_code, pdf_code) \
  98. encoding->set(win_code, #name);
  99. ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE)
  100. #undef ENUMERATE
  101. // PDF Annex D table D.2, note 3:
  102. // In WinAnsiEncoding, all unused codes greater than 40 (octal) map to the bullet character. However, only
  103. // code 225 (octal) shall be specifically assigned to the bullet character; other codes are subject to future re-assignment.
  104. //
  105. // Since CharCodeType is u8 *and* we need to include 255, we iterate in reverse order to have more readable code.
  106. for (CharCodeType char_code = 255; char_code > 040; char_code--) {
  107. if (!encoding->m_descriptors.contains(char_code))
  108. encoding->set(char_code, "bullet");
  109. }
  110. }
  111. return encoding;
  112. }
  113. NonnullRefPtr<Encoding> Encoding::pdf_doc_encoding()
  114. {
  115. static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
  116. if (encoding->m_descriptors.is_empty()) {
  117. #define ENUMERATE(name, standard_code, mac_code, win_code, pdf_code) \
  118. encoding->set(pdf_code, #name);
  119. ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE)
  120. #undef ENUMERATE
  121. }
  122. return encoding;
  123. }
  124. NonnullRefPtr<Encoding> Encoding::symbol_encoding()
  125. {
  126. static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
  127. if (encoding->m_descriptors.is_empty()) {
  128. #define ENUMERATE(name, code) \
  129. encoding->set(code, #name);
  130. ENUMERATE_SYMBOL_CHARACTER_SET(ENUMERATE)
  131. #undef ENUMERATE
  132. }
  133. return encoding;
  134. }
  135. NonnullRefPtr<Encoding> Encoding::zapf_encoding()
  136. {
  137. static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
  138. if (encoding->m_descriptors.is_empty()) {
  139. #define ENUMERATE(name, code) \
  140. encoding->set(code, #name);
  141. ENUMERATE_ZAPF_DINGBATS_CHARACTER_SET(ENUMERATE)
  142. #undef ENUMERATE
  143. }
  144. return encoding;
  145. }
  146. u16 Encoding::get_char_code(ByteString const& name) const
  147. {
  148. auto code_iterator = m_name_mapping.find(name);
  149. if (code_iterator != m_name_mapping.end())
  150. return code_iterator->value;
  151. return 0;
  152. }
  153. DeprecatedFlyString Encoding::get_name(u8 char_code) const
  154. {
  155. auto name_iterator = m_descriptors.find(char_code);
  156. if (name_iterator != m_descriptors.end())
  157. return name_iterator->value;
  158. return 0;
  159. }
  160. }