Segmenter.cpp 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109
  1. /*
  2. * Copyright (c) 2022, Idan Horowitz <idan.horowitz@serenityos.org>
  3. * Copyright (c) 2023, Tim Flynn <trflynn89@serenityos.org>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <AK/Utf16View.h>
  8. #include <LibJS/Runtime/GlobalObject.h>
  9. #include <LibJS/Runtime/Intl/Segmenter.h>
  10. namespace JS::Intl {
  11. JS_DEFINE_ALLOCATOR(Segmenter);
  12. // 18 Segmenter Objects, https://tc39.es/ecma402/#segmenter-objects
  13. Segmenter::Segmenter(Object& prototype)
  14. : Object(ConstructWithPrototypeTag::Tag, prototype)
  15. {
  16. }
  17. // 18.7.1 CreateSegmentDataObject ( segmenter, string, startIndex, endIndex ), https://tc39.es/ecma402/#sec-createsegmentdataobject
  18. ThrowCompletionOr<NonnullGCPtr<Object>> create_segment_data_object(VM& vm, ::Locale::Segmenter const& segmenter, Utf16View const& string, size_t start_index, size_t end_index)
  19. {
  20. auto& realm = *vm.current_realm();
  21. // 1. Let len be the length of string.
  22. auto length = string.length_in_code_units();
  23. // 2. Assert: startIndex ≥ 0.
  24. // NOTE: This is always true because the type is size_t.
  25. // 3. Assert: endIndex ≤ len.
  26. VERIFY(end_index <= length);
  27. // 4. Assert: startIndex < endIndex.
  28. VERIFY(start_index < end_index);
  29. // 5. Let result be OrdinaryObjectCreate(%Object.prototype%).
  30. auto result = Object::create(realm, realm.intrinsics().object_prototype());
  31. // 6. Let segment be the substring of string from startIndex to endIndex.
  32. auto segment = string.substring_view(start_index, end_index - start_index);
  33. // 7. Perform ! CreateDataPropertyOrThrow(result, "segment", segment).
  34. MUST(result->create_data_property_or_throw(vm.names.segment, PrimitiveString::create(vm, Utf16String::create(segment))));
  35. // 8. Perform ! CreateDataPropertyOrThrow(result, "index", 𝔽(startIndex)).
  36. MUST(result->create_data_property_or_throw(vm.names.index, Value(start_index)));
  37. // 9. Perform ! CreateDataPropertyOrThrow(result, "input", string).
  38. MUST(result->create_data_property_or_throw(vm.names.input, PrimitiveString::create(vm, Utf16String::create(string))));
  39. // 10. Let granularity be segmenter.[[SegmenterGranularity]].
  40. auto granularity = segmenter.segmenter_granularity();
  41. // 11. If granularity is "word", then
  42. if (granularity == ::Locale::SegmenterGranularity::Word) {
  43. // a. Let isWordLike be a Boolean value indicating whether the segment in string is "word-like" according to locale segmenter.[[Locale]].
  44. auto is_word_like = segmenter.is_current_boundary_word_like();
  45. // b. Perform ! CreateDataPropertyOrThrow(result, "isWordLike", isWordLike).
  46. MUST(result->create_data_property_or_throw(vm.names.isWordLike, Value(is_word_like)));
  47. }
  48. // 12. Return result.
  49. return result;
  50. }
  51. // 18.8.1 FindBoundary ( segmenter, string, startIndex, direction ), https://tc39.es/ecma402/#sec-findboundary
  52. size_t find_boundary(::Locale::Segmenter& segmenter, Utf16View const& string, size_t start_index, Direction direction)
  53. {
  54. // 1. Let len be the length of string.
  55. auto length = string.length_in_code_units();
  56. // 2. Assert: startIndex < len.
  57. VERIFY(start_index < length);
  58. // 3. Let locale be segmenter.[[Locale]].
  59. // 4. Let granularity be segmenter.[[SegmenterGranularity]].
  60. // 5. If direction is before, then
  61. if (direction == Direction::Before) {
  62. // a. Search string for the last segmentation boundary that is preceded by at most startIndex code units from
  63. // the beginning, using locale locale and text element granularity granularity.
  64. auto boundary = segmenter.previous_boundary(start_index, ::Locale::Segmenter::Inclusive::Yes);
  65. // b. If a boundary is found, return the count of code units in string preceding it.
  66. if (boundary.has_value())
  67. return *boundary;
  68. // c. Return 0.
  69. return 0;
  70. }
  71. // 6. Assert: direction is after.
  72. // 7. Search string for the first segmentation boundary that follows the code unit at index startIndex, using locale
  73. // locale and text element granularity granularity.
  74. auto boundary = segmenter.next_boundary(start_index);
  75. // 8. If a boundary is found, return the count of code units in string preceding it.
  76. if (boundary.has_value())
  77. return *boundary;
  78. // 9. Return len.
  79. return length;
  80. }
  81. }