Encoding.cpp 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. /*
  2. * Copyright (c) 2022, Matthew Olsson <mattco@serenityos.org>
  3. * Copyright (c) 2022, Julian Offenhäuser <offenhaeuser@protonmail.com>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <AK/Utf8View.h>
  8. #include <LibPDF/CommonNames.h>
  9. #include <LibPDF/Encoding.h>
  10. namespace PDF {
  11. PDFErrorOr<NonnullRefPtr<Encoding>> Encoding::create(HashMap<u16, CharDescriptor> descriptors)
  12. {
  13. auto encoding = adopt_ref(*new Encoding());
  14. encoding->m_descriptors = descriptors;
  15. for (auto& descriptor : descriptors)
  16. encoding->m_name_mapping.set(descriptor.value.name, descriptor.value.code_point);
  17. return encoding;
  18. }
  19. PDFErrorOr<NonnullRefPtr<Encoding>> Encoding::from_object(Document* document, NonnullRefPtr<Object> const& obj)
  20. {
  21. if (obj->is<NameObject>()) {
  22. auto name = obj->cast<NameObject>()->name();
  23. if (name == "StandardEncoding")
  24. return standard_encoding();
  25. if (name == "MacRomanEncoding")
  26. return mac_encoding();
  27. if (name == "WinAnsiEncoding")
  28. return windows_encoding();
  29. VERIFY_NOT_REACHED();
  30. }
  31. // Make a custom encoding
  32. auto dict = obj->cast<DictObject>();
  33. RefPtr<Encoding> base_encoding;
  34. if (dict->contains(CommonNames::BaseEncoding)) {
  35. auto base_encoding_obj = MUST(dict->get_object(document, CommonNames::BaseEncoding));
  36. base_encoding = TRY(Encoding::from_object(document, base_encoding_obj));
  37. } else {
  38. base_encoding = Encoding::standard_encoding();
  39. }
  40. auto encoding = adopt_ref(*new Encoding());
  41. encoding->m_descriptors = base_encoding->descriptors();
  42. encoding->m_name_mapping = base_encoding->name_mapping();
  43. auto differences_array = TRY(dict->get_array(document, CommonNames::Differences));
  44. u16 current_code_point = 0;
  45. bool first = true;
  46. for (auto& item : *differences_array) {
  47. if (item.has_u32()) {
  48. current_code_point = item.to_int();
  49. first = false;
  50. } else {
  51. VERIFY(item.has<NonnullRefPtr<Object>>());
  52. VERIFY(!first);
  53. auto& object = item.get<NonnullRefPtr<Object>>();
  54. auto name = object->cast<NameObject>()->name();
  55. encoding->m_descriptors.set(current_code_point, { name, base_encoding->m_name_mapping.ensure(name) });
  56. current_code_point++;
  57. }
  58. }
  59. return encoding;
  60. }
  61. NonnullRefPtr<Encoding> Encoding::standard_encoding()
  62. {
  63. static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
  64. if (encoding->m_descriptors.is_empty()) {
  65. #define ENUMERATE(string, name, standard_code, mac_code, win_code, pdf_code) \
  66. auto name##_code_point = *Utf8View(string##sv).begin(); \
  67. encoding->m_descriptors.set(standard_code, { string, name##_code_point }); \
  68. encoding->m_name_mapping.set(#name, name##_code_point);
  69. ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE)
  70. #undef ENUMERATE
  71. }
  72. return encoding;
  73. }
  74. NonnullRefPtr<Encoding> Encoding::mac_encoding()
  75. {
  76. static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
  77. if (encoding->m_descriptors.is_empty()) {
  78. #define ENUMERATE(string, name, standard_code, mac_code, win_code, pdf_code) \
  79. auto name##_code_point = *Utf8View(string##sv).begin(); \
  80. encoding->m_descriptors.set(mac_code, { string, name##_code_point }); \
  81. encoding->m_name_mapping.set(#name, name##_code_point);
  82. ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE)
  83. #undef ENUMERATE
  84. }
  85. return encoding;
  86. }
  87. NonnullRefPtr<Encoding> Encoding::windows_encoding()
  88. {
  89. static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
  90. if (encoding->m_descriptors.is_empty()) {
  91. #define ENUMERATE(string, name, standard_code, mac_code, win_code, pdf_code) \
  92. auto name##_code_point = *Utf8View(string##sv).begin(); \
  93. encoding->m_descriptors.set(win_code, { string, name##_code_point }); \
  94. encoding->m_name_mapping.set(#name, name##_code_point);
  95. ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE)
  96. #undef ENUMERATE
  97. encoding->m_windows = true;
  98. }
  99. return encoding;
  100. }
  101. NonnullRefPtr<Encoding> Encoding::pdf_doc_encoding()
  102. {
  103. static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
  104. if (encoding->m_descriptors.is_empty()) {
  105. #define ENUMERATE(string, name, standard_code, mac_code, win_code, pdf_code) \
  106. auto name##_code_point = *Utf8View(string##sv).begin(); \
  107. encoding->m_descriptors.set(pdf_code, { string, name##_code_point }); \
  108. encoding->m_name_mapping.set(#name, name##_code_point);
  109. ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE)
  110. #undef ENUMERATE
  111. }
  112. return encoding;
  113. }
  114. NonnullRefPtr<Encoding> Encoding::symbol_encoding()
  115. {
  116. static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
  117. if (encoding->m_descriptors.is_empty()) {
  118. #define ENUMERATE(string, name, code) \
  119. auto name##_code_point = *Utf8View(string##sv).begin(); \
  120. encoding->m_descriptors.set(code, { string, name##_code_point }); \
  121. encoding->m_name_mapping.set(#name, name##_code_point);
  122. ENUMERATE_SYMBOL_CHARACTER_SET(ENUMERATE)
  123. #undef ENUMERATE
  124. }
  125. return encoding;
  126. }
  127. NonnullRefPtr<Encoding> Encoding::zapf_encoding()
  128. {
  129. static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
  130. if (encoding->m_descriptors.is_empty()) {
  131. #define ENUMERATE(string, name, code) \
  132. auto name##_code_point = *Utf8View(string##sv).begin(); \
  133. encoding->m_descriptors.set(code, { string, name##_code_point }); \
  134. encoding->m_name_mapping.set(#name, name##_code_point);
  135. ENUMERATE_ZAPF_DINGBATS_CHARACTER_SET(ENUMERATE)
  136. #undef ENUMERATE
  137. }
  138. return encoding;
  139. }
  140. CharDescriptor const& Encoding::get_char_code_descriptor(u16 char_code) const
  141. {
  142. return const_cast<Encoding*>(this)->m_descriptors.ensure(char_code);
  143. }
  144. u16 Encoding::get_char_code(DeprecatedString const& name) const
  145. {
  146. auto code_iterator = m_name_mapping.find(name);
  147. if (code_iterator != m_name_mapping.end())
  148. return code_iterator->value;
  149. return 0;
  150. }
  151. bool Encoding::should_map_to_bullet(u16 char_code) const
  152. {
  153. // PDF Annex D table D.2, note 3:
  154. // In WinAnsiEncoding, all unused codes greater than 40 (octal) map to the bullet character. However, only
  155. // code 225 (octal) shall be specifically assigned to the bullet character; other codes are subject to future re-assignment.
  156. return m_windows && char_code > 040 && !m_descriptors.contains(char_code);
  157. }
  158. }