Encoding.cpp 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179
  1. /*
  2. * Copyright (c) 2022, Matthew Olsson <mattco@serenityos.org>
  3. * Copyright (c) 2022, Julian Offenhäuser <offenhaeuser@protonmail.com>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <AK/Utf8View.h>
  8. #include <LibPDF/CommonNames.h>
  9. #include <LibPDF/Encoding.h>
  10. namespace PDF {
  11. NonnullRefPtr<Encoding> Encoding::create()
  12. {
  13. return adopt_ref(*new Encoding());
  14. }
  15. PDFErrorOr<NonnullRefPtr<Encoding>> Encoding::from_object(Document* document, NonnullRefPtr<Object> const& obj)
  16. {
  17. if (obj->is<NameObject>()) {
  18. auto name = obj->cast<NameObject>()->name();
  19. if (name == "StandardEncoding")
  20. return standard_encoding();
  21. if (name == "MacRomanEncoding")
  22. return mac_encoding();
  23. if (name == "WinAnsiEncoding")
  24. return windows_encoding();
  25. VERIFY_NOT_REACHED();
  26. }
  27. // Make a custom encoding
  28. auto dict = obj->cast<DictObject>();
  29. RefPtr<Encoding> base_encoding;
  30. if (dict->contains(CommonNames::BaseEncoding)) {
  31. auto base_encoding_obj = MUST(dict->get_object(document, CommonNames::BaseEncoding));
  32. base_encoding = TRY(Encoding::from_object(document, base_encoding_obj));
  33. } else {
  34. base_encoding = Encoding::standard_encoding();
  35. }
  36. auto encoding = adopt_ref(*new Encoding());
  37. encoding->m_descriptors = TRY(base_encoding->m_descriptors.clone());
  38. encoding->m_name_mapping = TRY(base_encoding->m_name_mapping.clone());
  39. auto differences_array = TRY(dict->get_array(document, CommonNames::Differences));
  40. u16 current_code_point = 0;
  41. bool first = true;
  42. for (auto& item : *differences_array) {
  43. if (item.has_u32()) {
  44. current_code_point = item.to_int();
  45. first = false;
  46. } else {
  47. VERIFY(item.has<NonnullRefPtr<Object>>());
  48. VERIFY(!first);
  49. auto& object = item.get<NonnullRefPtr<Object>>();
  50. auto name = object->cast<NameObject>()->name();
  51. encoding->set(current_code_point, name);
  52. current_code_point++;
  53. }
  54. }
  55. return encoding;
  56. }
  57. void Encoding::set(CharCodeType char_code, DeprecatedFlyString const& glyph_name)
  58. {
  59. m_descriptors.set(char_code, glyph_name);
  60. m_name_mapping.set(glyph_name, char_code);
  61. }
  62. NonnullRefPtr<Encoding> Encoding::standard_encoding()
  63. {
  64. static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
  65. if (encoding->m_descriptors.is_empty()) {
  66. #define ENUMERATE(name, standard_code, mac_code, win_code, pdf_code) \
  67. encoding->set(standard_code, #name);
  68. ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE)
  69. #undef ENUMERATE
  70. }
  71. return encoding;
  72. }
  73. NonnullRefPtr<Encoding> Encoding::mac_encoding()
  74. {
  75. static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
  76. if (encoding->m_descriptors.is_empty()) {
  77. #define ENUMERATE(name, standard_code, mac_code, win_code, pdf_code) \
  78. encoding->set(mac_code, #name);
  79. ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE)
  80. #undef ENUMERATE
  81. }
  82. return encoding;
  83. }
  84. NonnullRefPtr<Encoding> Encoding::windows_encoding()
  85. {
  86. static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
  87. if (encoding->m_descriptors.is_empty()) {
  88. #define ENUMERATE(name, standard_code, mac_code, win_code, pdf_code) \
  89. encoding->set(win_code, #name);
  90. ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE)
  91. #undef ENUMERATE
  92. // PDF Annex D table D.2, note 3:
  93. // In WinAnsiEncoding, all unused codes greater than 40 (octal) map to the bullet character. However, only
  94. // code 225 (octal) shall be specifically assigned to the bullet character; other codes are subject to future re-assignment.
  95. //
  96. // Since CharCodeType is u8 *and* we need to include 255, we iterate in reverse order to have more readable code.
  97. for (CharCodeType char_code = 255; char_code > 040; char_code--) {
  98. if (!encoding->m_descriptors.contains(char_code))
  99. encoding->set(char_code, "bullet");
  100. }
  101. }
  102. return encoding;
  103. }
  104. NonnullRefPtr<Encoding> Encoding::pdf_doc_encoding()
  105. {
  106. static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
  107. if (encoding->m_descriptors.is_empty()) {
  108. #define ENUMERATE(name, standard_code, mac_code, win_code, pdf_code) \
  109. encoding->set(pdf_code, #name);
  110. ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE)
  111. #undef ENUMERATE
  112. }
  113. return encoding;
  114. }
  115. NonnullRefPtr<Encoding> Encoding::symbol_encoding()
  116. {
  117. static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
  118. if (encoding->m_descriptors.is_empty()) {
  119. #define ENUMERATE(name, code) \
  120. encoding->set(code, #name);
  121. ENUMERATE_SYMBOL_CHARACTER_SET(ENUMERATE)
  122. #undef ENUMERATE
  123. }
  124. return encoding;
  125. }
  126. NonnullRefPtr<Encoding> Encoding::zapf_encoding()
  127. {
  128. static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
  129. if (encoding->m_descriptors.is_empty()) {
  130. #define ENUMERATE(name, code) \
  131. encoding->set(code, #name);
  132. ENUMERATE_ZAPF_DINGBATS_CHARACTER_SET(ENUMERATE)
  133. #undef ENUMERATE
  134. }
  135. return encoding;
  136. }
  137. u16 Encoding::get_char_code(DeprecatedString const& name) const
  138. {
  139. auto code_iterator = m_name_mapping.find(name);
  140. if (code_iterator != m_name_mapping.end())
  141. return code_iterator->value;
  142. return 0;
  143. }
  144. DeprecatedFlyString Encoding::get_name(u8 char_code) const
  145. {
  146. auto name_iterator = m_descriptors.find(char_code);
  147. if (name_iterator != m_descriptors.end())
  148. return name_iterator->value;
  149. return 0;
  150. }
  151. }