PrimitiveString.cpp 2.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485
  1. /*
  2. * Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/CharacterTypes.h>
  7. #include <AK/Utf16View.h>
  8. #include <LibJS/Runtime/PrimitiveString.h>
  9. #include <LibJS/Runtime/VM.h>
  10. namespace JS {
  11. PrimitiveString::PrimitiveString(String string)
  12. : m_string(move(string))
  13. {
  14. }
  15. PrimitiveString::~PrimitiveString()
  16. {
  17. }
  18. Vector<u16> const& PrimitiveString::utf16_string() const
  19. {
  20. if (m_utf16_string.is_empty() && !m_string.is_empty())
  21. m_utf16_string = AK::utf8_to_utf16(m_string);
  22. return m_utf16_string;
  23. }
  24. Utf16View PrimitiveString::utf16_string_view() const
  25. {
  26. return Utf16View { utf16_string() };
  27. }
  28. PrimitiveString* js_string(Heap& heap, Utf16View const& string)
  29. {
  30. if (string.is_empty())
  31. return &heap.vm().empty_string();
  32. if (string.length_in_code_units() == 1) {
  33. u16 code_unit = string.code_unit_at(0);
  34. if (is_ascii(code_unit))
  35. return &heap.vm().single_ascii_character_string(static_cast<u8>(code_unit));
  36. }
  37. auto utf8_string = string.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes);
  38. return heap.allocate_without_global_object<PrimitiveString>(move(utf8_string));
  39. }
  40. PrimitiveString* js_string(VM& vm, Utf16View const& string)
  41. {
  42. return js_string(vm.heap(), string);
  43. }
  44. PrimitiveString* js_string(Heap& heap, String string)
  45. {
  46. if (string.is_empty())
  47. return &heap.vm().empty_string();
  48. if (string.length() == 1) {
  49. auto ch = static_cast<u8>(string.characters()[0]);
  50. if (is_ascii(ch))
  51. return &heap.vm().single_ascii_character_string(ch);
  52. }
  53. // UTF-8 strings must first be transcoded to UTF-16, even though they are stored as String objects
  54. // internally, to parse encoded surrogate pairs. As an optimization to reduce string copying, only
  55. // perform that transcoding if there are non-ASCII codepoints in the string.
  56. for (auto it : string) {
  57. auto ch = static_cast<u8>(it);
  58. if (!is_ascii(ch)) {
  59. auto utf16_string = AK::utf8_to_utf16(string);
  60. return js_string(heap, Utf16View { utf16_string });
  61. }
  62. }
  63. return heap.allocate_without_global_object<PrimitiveString>(move(string));
  64. }
  65. PrimitiveString* js_string(VM& vm, String string)
  66. {
  67. return js_string(vm.heap(), move(string));
  68. }
  69. }