Encoding.cpp 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166
  1. /*
  2. * Copyright (c) 2022, Matthew Olsson <mattco@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/Utf8View.h>
  7. #include <LibPDF/CommonNames.h>
  8. #include <LibPDF/Encoding.h>
  9. namespace PDF {
  10. PDFErrorOr<NonnullRefPtr<Encoding>> Encoding::from_object(Document* document, NonnullRefPtr<Object> const& obj)
  11. {
  12. if (obj->is<NameObject>()) {
  13. auto name = obj->cast<NameObject>()->name();
  14. if (name == "StandardEncoding")
  15. return standard_encoding();
  16. if (name == "MacRomanEncoding")
  17. return mac_encoding();
  18. if (name == "WinAnsiEncoding")
  19. return windows_encoding();
  20. VERIFY_NOT_REACHED();
  21. }
  22. // Make a custom encoding
  23. auto dict = obj->cast<DictObject>();
  24. RefPtr<Encoding> base_encoding;
  25. if (dict->contains(CommonNames::BaseEncoding)) {
  26. auto base_encoding_obj = MUST(dict->get_object(document, CommonNames::BaseEncoding));
  27. base_encoding = TRY(Encoding::from_object(document, base_encoding_obj));
  28. } else {
  29. base_encoding = Encoding::standard_encoding();
  30. }
  31. auto encoding = adopt_ref(*new Encoding());
  32. // Build a String -> Character mapping for handling the differences map
  33. HashMap<String, CharDescriptor> base_encoding_name_mapping;
  34. for (auto& [code_point, descriptor] : base_encoding->descriptors()) {
  35. encoding->m_descriptors.set(code_point, descriptor);
  36. base_encoding_name_mapping.set(descriptor.name, descriptor);
  37. }
  38. auto differences_array = TRY(dict->get_array(document, CommonNames::Differences));
  39. HashMap<u16, String> differences_map;
  40. u16 current_code_point = 0;
  41. bool first = true;
  42. for (auto& item : *differences_array) {
  43. if (item.has_u32()) {
  44. current_code_point = item.to_int();
  45. first = false;
  46. } else {
  47. VERIFY(item.has<NonnullRefPtr<Object>>());
  48. VERIFY(!first);
  49. auto& object = item.get<NonnullRefPtr<Object>>();
  50. auto name = object->cast<NameObject>()->name();
  51. auto character = base_encoding_name_mapping.get(name);
  52. // FIXME: This should always have a value. This does cause crashes in certain
  53. // documents, so we must be missing something here.
  54. if (character.has_value())
  55. encoding->m_descriptors.set(current_code_point, character.value());
  56. current_code_point++;
  57. }
  58. }
  59. return encoding;
  60. }
  61. NonnullRefPtr<Encoding> Encoding::standard_encoding()
  62. {
  63. static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
  64. if (encoding->m_descriptors.is_empty()) {
  65. #define ENUMERATE(string, name, standard_code, mac_code, win_code, pdf_code) \
  66. auto name##_code_point = *Utf8View(StringView(string)).begin(); \
  67. encoding->m_descriptors.set(standard_code, { string, name##_code_point });
  68. ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE)
  69. #undef ENUMERATE
  70. }
  71. return encoding;
  72. }
  73. NonnullRefPtr<Encoding> Encoding::mac_encoding()
  74. {
  75. static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
  76. if (encoding->m_descriptors.is_empty()) {
  77. #define ENUMERATE(string, name, standard_code, mac_code, win_code, pdf_code) \
  78. auto name##_code_point = *Utf8View(StringView(string)).begin(); \
  79. encoding->m_descriptors.set(mac_code, { string, name##_code_point });
  80. ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE)
  81. #undef ENUMERATE
  82. }
  83. return encoding;
  84. }
  85. NonnullRefPtr<Encoding> Encoding::windows_encoding()
  86. {
  87. static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
  88. if (encoding->m_descriptors.is_empty()) {
  89. #define ENUMERATE(string, name, standard_code, mac_code, win_code, pdf_code) \
  90. auto name##_code_point = *Utf8View(StringView(string)).begin(); \
  91. encoding->m_descriptors.set(win_code, { string, name##_code_point });
  92. ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE)
  93. #undef ENUMERATE
  94. }
  95. return encoding;
  96. }
  97. NonnullRefPtr<Encoding> Encoding::pdf_doc_encoding()
  98. {
  99. static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
  100. if (encoding->m_descriptors.is_empty()) {
  101. #define ENUMERATE(string, name, standard_code, mac_code, win_code, pdf_code) \
  102. auto name##_code_point = *Utf8View(StringView(string)).begin(); \
  103. encoding->m_descriptors.set(pdf_code, { string, name##_code_point });
  104. ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE)
  105. #undef ENUMERATE
  106. }
  107. return encoding;
  108. }
  109. NonnullRefPtr<Encoding> Encoding::symbol_encoding()
  110. {
  111. static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
  112. if (encoding->m_descriptors.is_empty()) {
  113. #define ENUMERATE(string, name, code) \
  114. auto name##_code_point = *Utf8View(StringView(string)).begin(); \
  115. encoding->m_descriptors.set(code, { string, name##_code_point });
  116. ENUMERATE_SYMBOL_CHARACTER_SET(ENUMERATE)
  117. #undef ENUMERATE
  118. }
  119. return encoding;
  120. }
  121. NonnullRefPtr<Encoding> Encoding::zapf_encoding()
  122. {
  123. static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
  124. if (encoding->m_descriptors.is_empty()) {
  125. #define ENUMERATE(string, name, code) \
  126. auto name##_code_point = *Utf8View(StringView(string)).begin(); \
  127. encoding->m_descriptors.set(code, { string, name##_code_point });
  128. ENUMERATE_ZAPF_DINGBATS_CHARACTER_SET(ENUMERATE)
  129. #undef ENUMERATE
  130. }
  131. return encoding;
  132. }
  133. CharDescriptor const& Encoding::get_char_code_descriptor(u16 char_code) const
  134. {
  135. return const_cast<Encoding*>(this)->m_descriptors.ensure(char_code);
  136. }
  137. }