LibJS: Port Intl.Segmenter to the ICU text segmenter

This also lets us fully implement detecting if a segment is word-like,
although that is not tested by test262.
This commit is contained in:
Timothy Flynn 2024-06-18 18:51:06 -04:00 committed by Andreas Kling
parent 3fe0a27fbd
commit 14071c52f9
Notes: sideshowbarker 2024-07-17 03:45:48 +09:00
12 changed files with 95 additions and 162 deletions

View file

@ -863,8 +863,6 @@ ErrorOr<void> print_intl_segments(JS::PrintContext& print_context, JS::Intl::Seg
TRY(print_type(print_context, "Segments"sv));
out("\n string: ");
TRY(print_value(print_context, JS::PrimitiveString::create(segments.vm(), move(segments_string)), seen_objects));
out("\n segmenter: ");
TRY(print_value(print_context, &segments.segments_segmenter(), seen_objects));
return {};
}

View file

@ -13,7 +13,7 @@ namespace JS::Intl {
JS_DEFINE_ALLOCATOR(SegmentIterator);
// 18.6.1 CreateSegmentIterator ( segmenter, string ), https://tc39.es/ecma402/#sec-createsegmentsobject
NonnullGCPtr<SegmentIterator> SegmentIterator::create(Realm& realm, Segmenter& segmenter, Utf16View const& string, Segments const& segments)
NonnullGCPtr<SegmentIterator> SegmentIterator::create(Realm& realm, ::Locale::Segmenter const& segmenter, Utf16View const& string, Segments const& segments)
{
// 1. Let internalSlotsList be « [[IteratingSegmenter]], [[IteratedString]], [[IteratedStringNextSegmentCodeUnitIndex]] ».
// 2. Let iterator be OrdinaryObjectCreate(%SegmentIteratorPrototype%, internalSlotsList).
@ -21,22 +21,22 @@ NonnullGCPtr<SegmentIterator> SegmentIterator::create(Realm& realm, Segmenter& s
// 4. Set iterator.[[IteratedString]] to string.
// 5. Set iterator.[[IteratedStringNextSegmentCodeUnitIndex]] to 0.
// 6. Return iterator.
return realm.heap().allocate<SegmentIterator>(realm, realm, segmenter, move(string), segments);
return realm.heap().allocate<SegmentIterator>(realm, realm, segmenter, string, segments);
}
// 18.6 Segment Iterator Objects, https://tc39.es/ecma402/#sec-segment-iterator-objects
SegmentIterator::SegmentIterator(Realm& realm, Segmenter& segmenter, Utf16View const& string, Segments const& segments)
SegmentIterator::SegmentIterator(Realm& realm, ::Locale::Segmenter const& segmenter, Utf16View const& string, Segments const& segments)
: Object(ConstructWithPrototypeTag::Tag, realm.intrinsics().intl_segment_iterator_prototype())
, m_iterating_segmenter(segmenter)
, m_iterating_segmenter(segmenter.clone())
, m_iterated_string(string)
, m_segments(segments)
{
m_iterating_segmenter->set_segmented_text(m_iterated_string);
}
void SegmentIterator::visit_edges(Cell::Visitor& visitor)
{
Base::visit_edges(visitor);
visitor.visit(m_iterating_segmenter);
visitor.visit(m_segments);
}

View file

@ -9,6 +9,7 @@
#include <AK/Utf16View.h>
#include <LibJS/Runtime/Intl/Segmenter.h>
#include <LibJS/Runtime/Object.h>
#include <LibLocale/Segmenter.h>
namespace JS::Intl {
@ -17,25 +18,23 @@ class SegmentIterator final : public Object {
JS_DECLARE_ALLOCATOR(SegmentIterator);
public:
static NonnullGCPtr<SegmentIterator> create(Realm&, Segmenter&, Utf16View const&, Segments const&);
static NonnullGCPtr<SegmentIterator> create(Realm&, ::Locale::Segmenter const&, Utf16View const&, Segments const&);
virtual ~SegmentIterator() override = default;
Segmenter const& iterating_segmenter() const { return m_iterating_segmenter; }
::Locale::Segmenter& iterating_segmenter() { return *m_iterating_segmenter; }
Utf16View const& iterated_string() const { return m_iterated_string; }
size_t iterated_string_next_segment_code_unit_index() const { return m_iterated_string_next_segment_code_unit_index; }
void set_iterated_string_next_segment_code_unit_index(size_t index) { m_iterated_string_next_segment_code_unit_index = index; }
size_t iterated_string_next_segment_code_unit_index() const { return m_iterating_segmenter->current_boundary(); }
Segments const& segments() { return m_segments; }
private:
SegmentIterator(Realm&, Segmenter&, Utf16View const&, Segments const&);
SegmentIterator(Realm&, ::Locale::Segmenter const&, Utf16View const&, Segments const&);
virtual void visit_edges(Cell::Visitor&) override;
NonnullGCPtr<Segmenter> m_iterating_segmenter; // [[IteratingSegmenter]]
Utf16View m_iterated_string; // [[IteratedString]]
size_t m_iterated_string_next_segment_code_unit_index { 0 }; // [[IteratedStringNextSegmentCodeUnitIndex]]
NonnullOwnPtr<::Locale::Segmenter> m_iterating_segmenter; // [[IteratingSegmenter]]
Utf16View m_iterated_string; // [[IteratedString]]
NonnullGCPtr<Segments const> m_segments;
};

View file

@ -41,7 +41,7 @@ JS_DEFINE_NATIVE_FUNCTION(SegmentIteratorPrototype::next)
auto iterator = TRY(typed_this_object(vm));
// 3. Let segmenter be iterator.[[IteratingSegmenter]].
auto const& segmenter = iterator->iterating_segmenter();
auto& segmenter = iterator->iterating_segmenter();
// 4. Let string be iterator.[[IteratedString]].
auto const& string = iterator->iterated_string();
@ -49,22 +49,25 @@ JS_DEFINE_NATIVE_FUNCTION(SegmentIteratorPrototype::next)
// 5. Let startIndex be iterator.[[IteratedStringNextSegmentCodeUnitIndex]].
auto start_index = iterator->iterated_string_next_segment_code_unit_index();
// 6. Let endIndex be ! FindBoundary(segmenter, string, startIndex, after).
auto end_index = find_boundary(segmenter, string, start_index, Direction::After);
// 6. Let len be the length of string.
auto length = string.length_in_code_units();
// 7. If endIndex is not finite, then
if (!Value(end_index).is_finite_number()) {
// 7. If startIndex ≥ len, then
if (start_index >= length) {
// a. Return CreateIterResultObject(undefined, true).
return create_iterator_result_object(vm, js_undefined(), true);
}
// 8. Set iterator.[[IteratedStringNextSegmentCodeUnitIndex]] to endIndex.
iterator->set_iterated_string_next_segment_code_unit_index(end_index);
// 8. Let endIndex be FindBoundary(segmenter, string, startIndex, after).
auto end_index = find_boundary(segmenter, string, start_index, Direction::After);
// 9. Let segmentData be ! CreateSegmentDataObject(segmenter, string, startIndex, endIndex).
// 9. Set iterator.[[IteratedStringNextSegmentCodeUnitIndex]] to endIndex.
// NOTE: This is already handled by LibLocale.
// 10. Let segmentData be CreateSegmentDataObject(segmenter, string, startIndex, endIndex).
auto segment_data = TRY(create_segment_data_object(vm, segmenter, string, start_index, end_index));
// 10. Return CreateIterResultObject(segmentData, false).
// 11. Return CreateIterResultObject(segmentData, false).
return create_iterator_result_object(vm, segment_data, false);
}

View file

@ -8,7 +8,6 @@
#include <AK/Utf16View.h>
#include <LibJS/Runtime/GlobalObject.h>
#include <LibJS/Runtime/Intl/Segmenter.h>
#include <LibUnicode/Segmentation.h>
namespace JS::Intl {
@ -20,34 +19,8 @@ Segmenter::Segmenter(Object& prototype)
{
}
void Segmenter::set_segmenter_granularity(StringView segmenter_granularity)
{
if (segmenter_granularity == "grapheme"sv)
m_segmenter_granularity = SegmenterGranularity::Grapheme;
else if (segmenter_granularity == "word"sv)
m_segmenter_granularity = SegmenterGranularity::Word;
else if (segmenter_granularity == "sentence"sv)
m_segmenter_granularity = SegmenterGranularity::Sentence;
else
VERIFY_NOT_REACHED();
}
StringView Segmenter::segmenter_granularity_string() const
{
switch (m_segmenter_granularity) {
case SegmenterGranularity::Grapheme:
return "grapheme"sv;
case SegmenterGranularity::Word:
return "word"sv;
case SegmenterGranularity::Sentence:
return "sentence"sv;
default:
VERIFY_NOT_REACHED();
}
}
// 18.7.1 CreateSegmentDataObject ( segmenter, string, startIndex, endIndex ), https://tc39.es/ecma402/#sec-createsegmentdataobject
ThrowCompletionOr<NonnullGCPtr<Object>> create_segment_data_object(VM& vm, Segmenter const& segmenter, Utf16View const& string, double start_index, double end_index)
ThrowCompletionOr<NonnullGCPtr<Object>> create_segment_data_object(VM& vm, ::Locale::Segmenter const& segmenter, Utf16View const& string, size_t start_index, size_t end_index)
{
auto& realm = *vm.current_realm();
@ -55,7 +28,7 @@ ThrowCompletionOr<NonnullGCPtr<Object>> create_segment_data_object(VM& vm, Segme
auto length = string.length_in_code_units();
// 2. Assert: startIndex ≥ 0.
VERIFY(start_index >= 0);
// NOTE: This is always true because the type is size_t.
// 3. Assert: endIndex ≤ len.
VERIFY(end_index <= length);
@ -82,89 +55,52 @@ ThrowCompletionOr<NonnullGCPtr<Object>> create_segment_data_object(VM& vm, Segme
auto granularity = segmenter.segmenter_granularity();
// 11. If granularity is "word", then
if (granularity == Segmenter::SegmenterGranularity::Word) {
if (granularity == ::Locale::SegmenterGranularity::Word) {
// a. Let isWordLike be a Boolean value indicating whether the segment in string is "word-like" according to locale segmenter.[[Locale]].
// TODO
auto is_word_like = segmenter.is_current_boundary_word_like();
// b. Perform ! CreateDataPropertyOrThrow(result, "isWordLike", isWordLike).
MUST(result->create_data_property_or_throw(vm.names.isWordLike, Value(false)));
MUST(result->create_data_property_or_throw(vm.names.isWordLike, Value(is_word_like)));
}
// 12. Return result.
return result;
}
static Optional<size_t> find_previous_boundary_index(Utf16View const& string, size_t index, Segmenter::SegmenterGranularity granularity)
{
switch (granularity) {
case Segmenter::SegmenterGranularity::Grapheme:
return Unicode::previous_grapheme_segmentation_boundary(string, index);
case Segmenter::SegmenterGranularity::Word:
return Unicode::previous_word_segmentation_boundary(string, index);
case Segmenter::SegmenterGranularity::Sentence:
return Unicode::previous_sentence_segmentation_boundary(string, index);
}
VERIFY_NOT_REACHED();
}
static Optional<size_t> find_next_boundary_index(Utf16View const& string, size_t index, Segmenter::SegmenterGranularity granularity)
{
switch (granularity) {
case Segmenter::SegmenterGranularity::Grapheme:
return Unicode::next_grapheme_segmentation_boundary(string, index);
case Segmenter::SegmenterGranularity::Word:
return Unicode::next_word_segmentation_boundary(string, index);
case Segmenter::SegmenterGranularity::Sentence:
return Unicode::next_sentence_segmentation_boundary(string, index);
}
VERIFY_NOT_REACHED();
}
// 18.8.1 FindBoundary ( segmenter, string, startIndex, direction ), https://tc39.es/ecma402/#sec-findboundary
double find_boundary(Segmenter const& segmenter, Utf16View const& string, double start_index, Direction direction)
size_t find_boundary(::Locale::Segmenter& segmenter, Utf16View const& string, size_t start_index, Direction direction)
{
// 1. Let locale be segmenter.[[Locale]].
// FIXME: Support locale-sensitive boundaries
// 2. Let granularity be segmenter.[[SegmenterGranularity]].
auto granularity = segmenter.segmenter_granularity();
// 3. Let len be the length of string.
// 1. Let len be the length of string.
auto length = string.length_in_code_units();
// 4. If direction is before, then
// 2. Assert: startIndex < len.
VERIFY(start_index < length);
// 3. Let locale be segmenter.[[Locale]].
// 4. Let granularity be segmenter.[[SegmenterGranularity]].
// 5. If direction is before, then
if (direction == Direction::Before) {
// a. Assert: startIndex ≥ 0.
VERIFY(start_index >= 0);
// b. Assert: startIndex < len.
VERIFY(start_index < length);
// a. Search string for the last segmentation boundary that is preceded by at most startIndex code units from
// the beginning, using locale locale and text element granularity granularity.
auto boundary = segmenter.previous_boundary(start_index, ::Locale::Segmenter::Inclusive::Yes);
// c. Search string for the last segmentation boundary that is preceded by at most startIndex code units from the beginning, using locale locale and text element granularity granularity.
auto boundary_index = find_previous_boundary_index(string, static_cast<size_t>(start_index) + 1, granularity);
// b. If a boundary is found, return the count of code units in string preceding it.
if (boundary.has_value())
return *boundary;
// d. If a boundary is found, return the count of code units in string preceding it.
if (boundary_index.has_value())
return static_cast<double>(*boundary_index);
// e. Return 0.
// c. Return 0.
return 0;
}
// 5. Assert: direction is after.
VERIFY(direction == Direction::After);
// 6. If len is 0 or startIndex ≥ len, return +∞.
if (length == 0 || start_index >= length)
return INFINITY;
// 7. Search string for the first segmentation boundary that follows the code unit at index startIndex, using locale locale and text element granularity granularity.
auto boundary_index = find_next_boundary_index(string, static_cast<size_t>(start_index), granularity);
// 6. Assert: direction is after.
// 7. Search string for the first segmentation boundary that follows the code unit at index startIndex, using locale
// locale and text element granularity granularity.
auto boundary = segmenter.next_boundary(start_index);
// 8. If a boundary is found, return the count of code units in string preceding it.
if (boundary_index.has_value())
return static_cast<double>(*boundary_index);
if (boundary.has_value())
return *boundary;
// 9. Return len.
return length;

View file

@ -9,6 +9,7 @@
#include <AK/String.h>
#include <LibJS/Runtime/Object.h>
#include <LibLocale/Segmenter.h>
namespace JS::Intl {
@ -17,34 +18,34 @@ class Segmenter final : public Object {
JS_DECLARE_ALLOCATOR(Segmenter);
public:
enum class SegmenterGranularity {
Grapheme,
Word,
Sentence,
};
virtual ~Segmenter() override = default;
String const& locale() const { return m_locale; }
void set_locale(String locale) { m_locale = move(locale); }
SegmenterGranularity segmenter_granularity() const { return m_segmenter_granularity; }
void set_segmenter_granularity(StringView);
StringView segmenter_granularity_string() const;
::Locale::SegmenterGranularity segmenter_granularity() const { return m_segmenter_granularity; }
void set_segmenter_granularity(StringView segmenter_granularity) { m_segmenter_granularity = ::Locale::segmenter_granularity_from_string(segmenter_granularity); }
StringView segmenter_granularity_string() const { return ::Locale::segmenter_granularity_to_string(m_segmenter_granularity); }
::Locale::Segmenter const& segmenter() const { return *m_segmenter; }
void set_segmenter(NonnullOwnPtr<::Locale::Segmenter> segmenter) { m_segmenter = move(segmenter); }
private:
explicit Segmenter(Object& prototype);
String m_locale; // [[Locale]]
SegmenterGranularity m_segmenter_granularity { SegmenterGranularity::Grapheme }; // [[SegmenterGranularity]]
String m_locale; // [[Locale]]
::Locale::SegmenterGranularity m_segmenter_granularity { ::Locale::SegmenterGranularity::Grapheme }; // [[SegmenterGranularity]]
// Non-standard. Stores the ICU segmenter for the Intl object's segmentation options.
OwnPtr<::Locale::Segmenter> m_segmenter;
};
ThrowCompletionOr<NonnullGCPtr<Object>> create_segment_data_object(VM&, Segmenter const&, Utf16View const&, double start_index, double end_index);
ThrowCompletionOr<NonnullGCPtr<Object>> create_segment_data_object(VM&, ::Locale::Segmenter const&, Utf16View const&, size_t start_index, size_t end_index);
enum class Direction {
Before,
After,
};
double find_boundary(Segmenter const&, Utf16View const&, double start_index, Direction);
size_t find_boundary(::Locale::Segmenter&, Utf16View const&, size_t start_index, Direction);
}

View file

@ -71,21 +71,22 @@ ThrowCompletionOr<NonnullGCPtr<Object>> SegmenterConstructor::construct(Function
// 8. Set opt.[[localeMatcher]] to matcher.
opt.locale_matcher = matcher;
// 9. Let localeData be %Segmenter%.[[LocaleData]].
// 10. Let r be ResolveLocale(%Segmenter%.[[AvailableLocales]], requestedLocales, opt, %Segmenter%.[[RelevantExtensionKeys]], localeData).
// 9. Let r be ResolveLocale(%Intl.Segmenter%.[[AvailableLocales]], requestedLocales, opt, %Intl.Segmenter%.[[RelevantExtensionKeys]], %Intl.Segmenter%.[[LocaleData]]).
auto result = resolve_locale(requested_locales, opt, {});
// 11. Set segmenter.[[Locale]] to r.[[locale]].
// 10. Set segmenter.[[Locale]] to r.[[locale]].
segmenter->set_locale(move(result.locale));
// 12. Let granularity be ? GetOption(options, "granularity", string, « "grapheme", "word", "sentence" », "grapheme").
// 11. Let granularity be ? GetOption(options, "granularity", string, « "grapheme", "word", "sentence" », "grapheme").
auto granularity = TRY(get_option(vm, *options, vm.names.granularity, OptionType::String, { "grapheme"sv, "word"sv, "sentence"sv }, "grapheme"sv));
// 13. Set segmenter.[[SegmenterGranularity]] to granularity.
// 12. Set segmenter.[[SegmenterGranularity]] to granularity.
segmenter->set_segmenter_granularity(granularity.as_string().utf8_string_view());
// 14. Return segmenter.
auto locale_segmenter = ::Locale::Segmenter::create(segmenter->locale(), segmenter->segmenter_granularity());
segmenter->set_segmenter(move(locale_segmenter));
// 13. Return segmenter.
return segmenter;
}

View file

@ -70,7 +70,7 @@ JS_DEFINE_NATIVE_FUNCTION(SegmenterPrototype::segment)
auto string = TRY(vm.argument(0).to_utf16_string(vm));
// 4. Return ! CreateSegmentsObject(segmenter, string).
return Segments::create(realm, segmenter, move(string));
return Segments::create(realm, segmenter->segmenter(), move(string));
}
}

View file

@ -13,7 +13,7 @@ namespace JS::Intl {
JS_DEFINE_ALLOCATOR(Segments);
// 18.5.1 CreateSegmentsObject ( segmenter, string ), https://tc39.es/ecma402/#sec-createsegmentsobject
NonnullGCPtr<Segments> Segments::create(Realm& realm, Segmenter& segmenter, Utf16String string)
NonnullGCPtr<Segments> Segments::create(Realm& realm, ::Locale::Segmenter const& segmenter, Utf16String string)
{
// 1. Let internalSlotsList be « [[SegmentsSegmenter]], [[SegmentsString]] ».
// 2. Let segments be OrdinaryObjectCreate(%SegmentsPrototype%, internalSlotsList).
@ -24,17 +24,12 @@ NonnullGCPtr<Segments> Segments::create(Realm& realm, Segmenter& segmenter, Utf1
}
// 18.5 Segments Objects, https://tc39.es/ecma402/#sec-segments-objects
Segments::Segments(Realm& realm, Segmenter& segmenter, Utf16String string)
Segments::Segments(Realm& realm, ::Locale::Segmenter const& segmenter, Utf16String string)
: Object(ConstructWithPrototypeTag::Tag, realm.intrinsics().intl_segments_prototype())
, m_segments_segmenter(segmenter)
, m_segments_segmenter(segmenter.clone())
, m_segments_string(move(string))
{
}
void Segments::visit_edges(Cell::Visitor& visitor)
{
Base::visit_edges(visitor);
visitor.visit(m_segments_segmenter);
m_segments_segmenter->set_segmented_text(m_segments_string.view());
}
}

View file

@ -9,6 +9,7 @@
#include <AK/Utf16View.h>
#include <LibJS/Runtime/Intl/Segmenter.h>
#include <LibJS/Runtime/Object.h>
#include <LibLocale/Segmenter.h>
namespace JS::Intl {
@ -17,21 +18,19 @@ class Segments final : public Object {
JS_DECLARE_ALLOCATOR(Segments);
public:
static NonnullGCPtr<Segments> create(Realm&, Segmenter&, Utf16String);
static NonnullGCPtr<Segments> create(Realm&, ::Locale::Segmenter const&, Utf16String);
virtual ~Segments() override = default;
Segmenter& segments_segmenter() const { return m_segments_segmenter; }
::Locale::Segmenter& segments_segmenter() const { return *m_segments_segmenter; }
Utf16View segments_string() const { return m_segments_string.view(); }
private:
Segments(Realm&, Segmenter&, Utf16String);
Segments(Realm&, ::Locale::Segmenter const&, Utf16String);
virtual void visit_edges(Cell::Visitor&) override;
NonnullGCPtr<Segmenter> m_segments_segmenter; // [[SegmentsSegmenter]]
Utf16String m_segments_string; // [[SegmentsString]]
NonnullOwnPtr<::Locale::Segmenter> m_segments_segmenter; // [[SegmentsSegmenter]]
Utf16String m_segments_string; // [[SegmentsString]]
};
}

View file

@ -38,10 +38,10 @@ JS_DEFINE_NATIVE_FUNCTION(SegmentsPrototype::containing)
auto segments = TRY(typed_this_object(vm));
// 3. Let segmenter be segments.[[SegmentsSegmenter]].
auto const& segmenter = segments->segments_segmenter();
auto& segmenter = segments->segments_segmenter();
// 4. Let string be segments.[[SegmentsString]].
auto string = segments->segments_string();
auto const& string = segments->segments_string();
// 5. Let len be the length of string.
auto length = string.length_in_code_units();
@ -50,16 +50,16 @@ JS_DEFINE_NATIVE_FUNCTION(SegmentsPrototype::containing)
auto n = TRY(vm.argument(0).to_integer_or_infinity(vm));
// 7. If n < 0 or n ≥ len, return undefined.
if (n < 0 || n >= length)
if (n < 0 || n >= static_cast<double>(length))
return js_undefined();
// 8. Let startIndex be ! FindBoundary(segmenter, string, n, before).
auto start_index = find_boundary(segmenter, string, n, Direction::Before);
// 8. Let startIndex be FindBoundary(segmenter, string, n, before).
auto start_index = find_boundary(segmenter, string, static_cast<size_t>(n), Direction::Before);
// 9. Let endIndex be ! FindBoundary(segmenter, string, n, after).
auto end_index = find_boundary(segmenter, string, n, Direction::After);
// 9. Let endIndex be FindBoundary(segmenter, string, n, after).
auto end_index = find_boundary(segmenter, string, static_cast<size_t>(n), Direction::After);
// 10. Return ! CreateSegmentDataObject(segmenter, string, startIndex, endIndex).
// 10. Return CreateSegmentDataObject(segmenter, string, startIndex, endIndex).
return TRY(create_segment_data_object(vm, segmenter, string, start_index, end_index));
}

View file

@ -31,7 +31,7 @@ describe("correct behavior", () => {
expect(wordSegment0.segment).toBe("hello");
expect(wordSegment0.index).toBe(0);
expect(wordSegment0.input).toBe(string);
// FIXME: expect(wordSegment0.isWordLike).toBeTrue();
expect(wordSegment0.isWordLike).toBeTrue();
const wordSegment5 = wordSegments.containing(5);
expect(wordSegment5.segment).toBe(" ");
expect(wordSegment5.index).toBe(5);
@ -85,7 +85,7 @@ describe("correct behavior", () => {
expect(segment.segment).toBe(expectedSegments[index].segment);
expect(segment.index).toBe(expectedSegments[index].index);
expect(segment.input).toBe(string);
// FIXME: expect(segment.isWordLike).toBe(expectedSegments[index].isWordLike);
expect(segment.isWordLike).toBe(expectedSegments[index].isWordLike);
index++;
}
expect(index).toBe(expectedSegments.length);
@ -139,6 +139,7 @@ describe("correct behavior", () => {
for (const segment of segments) {
expect(segment.segment).toBe(expectedSegments[index].segment);
expect(segment.index).toBe(expectedSegments[index].index);
expect(segment.isWordLike).toBe(expectedSegments[index].isWordLike);
expect(segment.input).toBe(string);
index++;
}