|
@@ -8,7 +8,6 @@
|
|
#include <AK/Utf16View.h>
|
|
#include <AK/Utf16View.h>
|
|
#include <LibJS/Runtime/GlobalObject.h>
|
|
#include <LibJS/Runtime/GlobalObject.h>
|
|
#include <LibJS/Runtime/Intl/Segmenter.h>
|
|
#include <LibJS/Runtime/Intl/Segmenter.h>
|
|
-#include <LibUnicode/Segmentation.h>
|
|
|
|
|
|
|
|
namespace JS::Intl {
|
|
namespace JS::Intl {
|
|
|
|
|
|
@@ -20,34 +19,8 @@ Segmenter::Segmenter(Object& prototype)
|
|
{
|
|
{
|
|
}
|
|
}
|
|
|
|
|
|
-void Segmenter::set_segmenter_granularity(StringView segmenter_granularity)
|
|
|
|
-{
|
|
|
|
- if (segmenter_granularity == "grapheme"sv)
|
|
|
|
- m_segmenter_granularity = SegmenterGranularity::Grapheme;
|
|
|
|
- else if (segmenter_granularity == "word"sv)
|
|
|
|
- m_segmenter_granularity = SegmenterGranularity::Word;
|
|
|
|
- else if (segmenter_granularity == "sentence"sv)
|
|
|
|
- m_segmenter_granularity = SegmenterGranularity::Sentence;
|
|
|
|
- else
|
|
|
|
- VERIFY_NOT_REACHED();
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-StringView Segmenter::segmenter_granularity_string() const
|
|
|
|
-{
|
|
|
|
- switch (m_segmenter_granularity) {
|
|
|
|
- case SegmenterGranularity::Grapheme:
|
|
|
|
- return "grapheme"sv;
|
|
|
|
- case SegmenterGranularity::Word:
|
|
|
|
- return "word"sv;
|
|
|
|
- case SegmenterGranularity::Sentence:
|
|
|
|
- return "sentence"sv;
|
|
|
|
- default:
|
|
|
|
- VERIFY_NOT_REACHED();
|
|
|
|
- }
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
// 18.7.1 CreateSegmentDataObject ( segmenter, string, startIndex, endIndex ), https://tc39.es/ecma402/#sec-createsegmentdataobject
|
|
// 18.7.1 CreateSegmentDataObject ( segmenter, string, startIndex, endIndex ), https://tc39.es/ecma402/#sec-createsegmentdataobject
|
|
-ThrowCompletionOr<NonnullGCPtr<Object>> create_segment_data_object(VM& vm, Segmenter const& segmenter, Utf16View const& string, double start_index, double end_index)
|
|
|
|
|
|
+ThrowCompletionOr<NonnullGCPtr<Object>> create_segment_data_object(VM& vm, ::Locale::Segmenter const& segmenter, Utf16View const& string, size_t start_index, size_t end_index)
|
|
{
|
|
{
|
|
auto& realm = *vm.current_realm();
|
|
auto& realm = *vm.current_realm();
|
|
|
|
|
|
@@ -55,7 +28,7 @@ ThrowCompletionOr<NonnullGCPtr<Object>> create_segment_data_object(VM& vm, Segme
|
|
auto length = string.length_in_code_units();
|
|
auto length = string.length_in_code_units();
|
|
|
|
|
|
// 2. Assert: startIndex ≥ 0.
|
|
// 2. Assert: startIndex ≥ 0.
|
|
- VERIFY(start_index >= 0);
|
|
|
|
|
|
+ // NOTE: This is always true because the type is size_t.
|
|
|
|
|
|
// 3. Assert: endIndex ≤ len.
|
|
// 3. Assert: endIndex ≤ len.
|
|
VERIFY(end_index <= length);
|
|
VERIFY(end_index <= length);
|
|
@@ -82,89 +55,52 @@ ThrowCompletionOr<NonnullGCPtr<Object>> create_segment_data_object(VM& vm, Segme
|
|
auto granularity = segmenter.segmenter_granularity();
|
|
auto granularity = segmenter.segmenter_granularity();
|
|
|
|
|
|
// 11. If granularity is "word", then
|
|
// 11. If granularity is "word", then
|
|
- if (granularity == Segmenter::SegmenterGranularity::Word) {
|
|
|
|
|
|
+ if (granularity == ::Locale::SegmenterGranularity::Word) {
|
|
// a. Let isWordLike be a Boolean value indicating whether the segment in string is "word-like" according to locale segmenter.[[Locale]].
|
|
// a. Let isWordLike be a Boolean value indicating whether the segment in string is "word-like" according to locale segmenter.[[Locale]].
|
|
- // TODO
|
|
|
|
|
|
+ auto is_word_like = segmenter.is_current_boundary_word_like();
|
|
|
|
|
|
// b. Perform ! CreateDataPropertyOrThrow(result, "isWordLike", isWordLike).
|
|
// b. Perform ! CreateDataPropertyOrThrow(result, "isWordLike", isWordLike).
|
|
- MUST(result->create_data_property_or_throw(vm.names.isWordLike, Value(false)));
|
|
|
|
|
|
+ MUST(result->create_data_property_or_throw(vm.names.isWordLike, Value(is_word_like)));
|
|
}
|
|
}
|
|
|
|
|
|
// 12. Return result.
|
|
// 12. Return result.
|
|
return result;
|
|
return result;
|
|
}
|
|
}
|
|
|
|
|
|
-static Optional<size_t> find_previous_boundary_index(Utf16View const& string, size_t index, Segmenter::SegmenterGranularity granularity)
|
|
|
|
-{
|
|
|
|
- switch (granularity) {
|
|
|
|
- case Segmenter::SegmenterGranularity::Grapheme:
|
|
|
|
- return Unicode::previous_grapheme_segmentation_boundary(string, index);
|
|
|
|
- case Segmenter::SegmenterGranularity::Word:
|
|
|
|
- return Unicode::previous_word_segmentation_boundary(string, index);
|
|
|
|
- case Segmenter::SegmenterGranularity::Sentence:
|
|
|
|
- return Unicode::previous_sentence_segmentation_boundary(string, index);
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- VERIFY_NOT_REACHED();
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-static Optional<size_t> find_next_boundary_index(Utf16View const& string, size_t index, Segmenter::SegmenterGranularity granularity)
|
|
|
|
-{
|
|
|
|
- switch (granularity) {
|
|
|
|
- case Segmenter::SegmenterGranularity::Grapheme:
|
|
|
|
- return Unicode::next_grapheme_segmentation_boundary(string, index);
|
|
|
|
- case Segmenter::SegmenterGranularity::Word:
|
|
|
|
- return Unicode::next_word_segmentation_boundary(string, index);
|
|
|
|
- case Segmenter::SegmenterGranularity::Sentence:
|
|
|
|
- return Unicode::next_sentence_segmentation_boundary(string, index);
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- VERIFY_NOT_REACHED();
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
// 18.8.1 FindBoundary ( segmenter, string, startIndex, direction ), https://tc39.es/ecma402/#sec-findboundary
|
|
// 18.8.1 FindBoundary ( segmenter, string, startIndex, direction ), https://tc39.es/ecma402/#sec-findboundary
|
|
-double find_boundary(Segmenter const& segmenter, Utf16View const& string, double start_index, Direction direction)
|
|
|
|
|
|
+size_t find_boundary(::Locale::Segmenter& segmenter, Utf16View const& string, size_t start_index, Direction direction)
|
|
{
|
|
{
|
|
- // 1. Let locale be segmenter.[[Locale]].
|
|
|
|
- // FIXME: Support locale-sensitive boundaries
|
|
|
|
|
|
+ // 1. Let len be the length of string.
|
|
|
|
+ auto length = string.length_in_code_units();
|
|
|
|
|
|
- // 2. Let granularity be segmenter.[[SegmenterGranularity]].
|
|
|
|
- auto granularity = segmenter.segmenter_granularity();
|
|
|
|
|
|
+ // 2. Assert: startIndex < len.
|
|
|
|
+ VERIFY(start_index < length);
|
|
|
|
|
|
- // 3. Let len be the length of string.
|
|
|
|
- auto length = string.length_in_code_units();
|
|
|
|
|
|
+ // 3. Let locale be segmenter.[[Locale]].
|
|
|
|
+ // 4. Let granularity be segmenter.[[SegmenterGranularity]].
|
|
|
|
|
|
- // 4. If direction is before, then
|
|
|
|
|
|
+ // 5. If direction is before, then
|
|
if (direction == Direction::Before) {
|
|
if (direction == Direction::Before) {
|
|
- // a. Assert: startIndex ≥ 0.
|
|
|
|
- VERIFY(start_index >= 0);
|
|
|
|
- // b. Assert: startIndex < len.
|
|
|
|
- VERIFY(start_index < length);
|
|
|
|
|
|
+ // a. Search string for the last segmentation boundary that is preceded by at most startIndex code units from
|
|
|
|
+ // the beginning, using locale locale and text element granularity granularity.
|
|
|
|
+ auto boundary = segmenter.previous_boundary(start_index, ::Locale::Segmenter::Inclusive::Yes);
|
|
|
|
|
|
- // c. Search string for the last segmentation boundary that is preceded by at most startIndex code units from the beginning, using locale locale and text element granularity granularity.
|
|
|
|
- auto boundary_index = find_previous_boundary_index(string, static_cast<size_t>(start_index) + 1, granularity);
|
|
|
|
|
|
+ // b. If a boundary is found, return the count of code units in string preceding it.
|
|
|
|
+ if (boundary.has_value())
|
|
|
|
+ return *boundary;
|
|
|
|
|
|
- // d. If a boundary is found, return the count of code units in string preceding it.
|
|
|
|
- if (boundary_index.has_value())
|
|
|
|
- return static_cast<double>(*boundary_index);
|
|
|
|
-
|
|
|
|
- // e. Return 0.
|
|
|
|
|
|
+ // c. Return 0.
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
|
|
- // 5. Assert: direction is after.
|
|
|
|
- VERIFY(direction == Direction::After);
|
|
|
|
-
|
|
|
|
- // 6. If len is 0 or startIndex ≥ len, return +∞.
|
|
|
|
- if (length == 0 || start_index >= length)
|
|
|
|
- return INFINITY;
|
|
|
|
-
|
|
|
|
- // 7. Search string for the first segmentation boundary that follows the code unit at index startIndex, using locale locale and text element granularity granularity.
|
|
|
|
- auto boundary_index = find_next_boundary_index(string, static_cast<size_t>(start_index), granularity);
|
|
|
|
|
|
+ // 6. Assert: direction is after.
|
|
|
|
+ // 7. Search string for the first segmentation boundary that follows the code unit at index startIndex, using locale
|
|
|
|
+ // locale and text element granularity granularity.
|
|
|
|
+ auto boundary = segmenter.next_boundary(start_index);
|
|
|
|
|
|
// 8. If a boundary is found, return the count of code units in string preceding it.
|
|
// 8. If a boundary is found, return the count of code units in string preceding it.
|
|
- if (boundary_index.has_value())
|
|
|
|
- return static_cast<double>(*boundary_index);
|
|
|
|
|
|
+ if (boundary.has_value())
|
|
|
|
+ return *boundary;
|
|
|
|
|
|
// 9. Return len.
|
|
// 9. Return len.
|
|
return length;
|
|
return length;
|