Encoding.cpp 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. /*
  2. * Copyright (c) 2022, Matthew Olsson <mattco@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/Utf8View.h>
  7. #include <LibPDF/CommonNames.h>
  8. #include <LibPDF/Encoding.h>
  9. namespace PDF {
  10. PDFErrorOr<NonnullRefPtr<Encoding>> Encoding::from_object(Document* document, NonnullRefPtr<Object> const& obj)
  11. {
  12. if (obj->is<NameObject>()) {
  13. auto name = obj->cast<NameObject>()->name();
  14. if (name == "StandardEncoding")
  15. return standard_encoding();
  16. if (name == "MacRomanEncoding")
  17. return mac_encoding();
  18. if (name == "WinAnsiEncoding")
  19. return windows_encoding();
  20. VERIFY_NOT_REACHED();
  21. }
  22. // Make a custom encoding
  23. auto dict = obj->cast<DictObject>();
  24. // FIXME: If this entry is absent, the Differences entry shall describe differences
  25. // from an implicit base encoding. For a font program that is embedded in the
  26. // PDF file, the implicit base encoding shall be a font program's built-in
  27. // encoding [...]. Otherwise, for a nonsymbolic font, it shall be
  28. // StandardEncoding, and for a symbolic font, it shall be the font's built-in
  29. // encoding.
  30. auto base_encoding_obj = MUST(dict->get_object(document, CommonNames::BaseEncoding));
  31. auto base_encoding = TRY(Encoding::from_object(document, base_encoding_obj));
  32. auto encoding = adopt_ref(*new Encoding());
  33. // Build a String -> Character mapping for handling the differences map
  34. HashMap<String, CharDescriptor> base_encoding_name_mapping;
  35. for (auto& [code_point, descriptor] : base_encoding->descriptors()) {
  36. encoding->m_descriptors.set(code_point, descriptor);
  37. base_encoding_name_mapping.set(descriptor.name, descriptor);
  38. }
  39. auto differences_array = TRY(dict->get_array(document, CommonNames::Differences));
  40. HashMap<u16, String> differences_map;
  41. u16 current_code_point = 0;
  42. bool first = true;
  43. for (auto& item : *differences_array) {
  44. if (item.has_u32()) {
  45. current_code_point = item.to_int();
  46. first = false;
  47. } else {
  48. VERIFY(item.has<NonnullRefPtr<Object>>());
  49. VERIFY(!first);
  50. auto& object = item.get<NonnullRefPtr<Object>>();
  51. auto name = object->cast<NameObject>()->name();
  52. auto character = base_encoding_name_mapping.get(name);
  53. VERIFY(character.has_value());
  54. encoding->m_descriptors.set(current_code_point, character.value());
  55. current_code_point++;
  56. }
  57. }
  58. return encoding;
  59. }
  60. NonnullRefPtr<Encoding> Encoding::standard_encoding()
  61. {
  62. static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
  63. if (encoding->m_descriptors.is_empty()) {
  64. #define ENUMERATE(string, name, standard_code, mac_code, win_code, pdf_code) \
  65. auto name##_code_point = *Utf8View(StringView(string)).begin(); \
  66. encoding->m_descriptors.set(standard_code, { string, name##_code_point });
  67. ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE)
  68. #undef ENUMERATE
  69. }
  70. return encoding;
  71. }
  72. NonnullRefPtr<Encoding> Encoding::mac_encoding()
  73. {
  74. static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
  75. if (encoding->m_descriptors.is_empty()) {
  76. #define ENUMERATE(string, name, standard_code, mac_code, win_code, pdf_code) \
  77. auto name##_code_point = *Utf8View(StringView(string)).begin(); \
  78. encoding->m_descriptors.set(mac_code, { string, name##_code_point });
  79. ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE)
  80. #undef ENUMERATE
  81. }
  82. return encoding;
  83. }
  84. NonnullRefPtr<Encoding> Encoding::windows_encoding()
  85. {
  86. static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
  87. if (encoding->m_descriptors.is_empty()) {
  88. #define ENUMERATE(string, name, standard_code, mac_code, win_code, pdf_code) \
  89. auto name##_code_point = *Utf8View(StringView(string)).begin(); \
  90. encoding->m_descriptors.set(win_code, { string, name##_code_point });
  91. ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE)
  92. #undef ENUMERATE
  93. }
  94. return encoding;
  95. }
  96. NonnullRefPtr<Encoding> Encoding::pdf_doc_encoding()
  97. {
  98. static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
  99. if (encoding->m_descriptors.is_empty()) {
  100. #define ENUMERATE(string, name, standard_code, mac_code, win_code, pdf_code) \
  101. auto name##_code_point = *Utf8View(StringView(string)).begin(); \
  102. encoding->m_descriptors.set(pdf_code, { string, name##_code_point });
  103. ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE)
  104. #undef ENUMERATE
  105. }
  106. return encoding;
  107. }
  108. NonnullRefPtr<Encoding> Encoding::symbol_encoding()
  109. {
  110. static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
  111. if (encoding->m_descriptors.is_empty()) {
  112. #define ENUMERATE(string, name, code) \
  113. auto name##_code_point = *Utf8View(StringView(string)).begin(); \
  114. encoding->m_descriptors.set(code, { string, name##_code_point });
  115. ENUMERATE_SYMBOL_CHARACTER_SET(ENUMERATE)
  116. #undef ENUMERATE
  117. }
  118. return encoding;
  119. }
  120. NonnullRefPtr<Encoding> Encoding::zapf_encoding()
  121. {
  122. static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
  123. if (encoding->m_descriptors.is_empty()) {
  124. #define ENUMERATE(string, name, code) \
  125. auto name##_code_point = *Utf8View(StringView(string)).begin(); \
  126. encoding->m_descriptors.set(code, { string, name##_code_point });
  127. ENUMERATE_ZAPF_DINGBATS_CHARACTER_SET(ENUMERATE)
  128. #undef ENUMERATE
  129. }
  130. return encoding;
  131. }
  132. CharDescriptor const& Encoding::get_char_code_descriptor(u16 char_code) const
  133. {
  134. return const_cast<Encoding*>(this)->m_descriptors.ensure(char_code);
  135. }
  136. }