mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-22 15:40:19 +00:00
LibRegex: Partially implement the ECMAScript unicodeSets proposal
This skips the new string unicode properties additions, along with \q{}.
This commit is contained in:
parent
7734914909
commit
598dc74a76
Notes:
sideshowbarker
2024-07-17 08:44:43 +09:00
Author: https://github.com/alimpfard Commit: https://github.com/SerenityOS/serenity/commit/598dc74a76 Pull-request: https://github.com/SerenityOS/serenity/pull/14592 Reviewed-by: https://github.com/linusg ✅
9 changed files with 611 additions and 69 deletions
|
@ -765,6 +765,45 @@ TEST_CASE(ECMA262_unicode_match)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_CASE(ECMA262_unicode_sets_match)
|
||||||
|
{
|
||||||
|
struct _test {
|
||||||
|
StringView pattern;
|
||||||
|
StringView subject;
|
||||||
|
bool matches { true };
|
||||||
|
ECMAScriptFlags options {};
|
||||||
|
};
|
||||||
|
|
||||||
|
constexpr _test tests[] {
|
||||||
|
{ "[\\w--x]"sv, "x"sv, false },
|
||||||
|
{ "[\\w&&x]"sv, "y"sv, false },
|
||||||
|
{ "[\\w--x]"sv, "y"sv, true },
|
||||||
|
{ "[\\w&&x]"sv, "x"sv, true },
|
||||||
|
{ "[[0-9\\w]--x--6]"sv, "6"sv, false },
|
||||||
|
{ "[[0-9\\w]--x--6]"sv, "x"sv, false },
|
||||||
|
{ "[[0-9\\w]--x--6]"sv, "y"sv, true },
|
||||||
|
{ "[[0-9\\w]--x--6]"sv, "9"sv, true },
|
||||||
|
{ "[\\w&&\\d]"sv, "a"sv, false },
|
||||||
|
{ "[\\w&&\\d]"sv, "4"sv, true },
|
||||||
|
};
|
||||||
|
|
||||||
|
for (auto& test : tests) {
|
||||||
|
Regex<ECMA262> re(test.pattern, (ECMAScriptFlags)regex::AllFlags::UnicodeSets | test.options);
|
||||||
|
if constexpr (REGEX_DEBUG) {
|
||||||
|
dbgln("\n");
|
||||||
|
RegexDebug regex_dbg(stderr);
|
||||||
|
regex_dbg.print_raw_bytecode(re);
|
||||||
|
regex_dbg.print_header();
|
||||||
|
regex_dbg.print_bytecode(re);
|
||||||
|
dbgln("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
EXPECT_EQ(re.parser_result.error, regex::Error::NoError);
|
||||||
|
auto result = re.match(test.subject).success;
|
||||||
|
EXPECT_EQ(result, test.matches);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
TEST_CASE(ECMA262_property_match)
|
TEST_CASE(ECMA262_property_match)
|
||||||
{
|
{
|
||||||
struct _test {
|
struct _test {
|
||||||
|
|
|
@ -39,6 +39,7 @@ enum __Regex_Error {
|
||||||
__Regex_InvalidNameForCaptureGroup, // Name of capture group is invalid.
|
__Regex_InvalidNameForCaptureGroup, // Name of capture group is invalid.
|
||||||
__Regex_InvalidNameForProperty, // Name of property is invalid.
|
__Regex_InvalidNameForProperty, // Name of property is invalid.
|
||||||
__Regex_DuplicateNamedCapture, // Duplicate named capture group
|
__Regex_DuplicateNamedCapture, // Duplicate named capture group
|
||||||
|
__Regex_InvalidCharacterClassEscape, // Invalid escaped entity in character class.
|
||||||
};
|
};
|
||||||
|
|
||||||
enum ReError {
|
enum ReError {
|
||||||
|
@ -82,10 +83,11 @@ enum __RegexAllFlags {
|
||||||
__Regex_Multiline = __Regex_Global << 12, // Handle newline characters. Match each line, one by one.
|
__Regex_Multiline = __Regex_Global << 12, // Handle newline characters. Match each line, one by one.
|
||||||
__Regex_SkipTrimEmptyMatches = __Regex_Global << 13, // Do not remove empty capture group results.
|
__Regex_SkipTrimEmptyMatches = __Regex_Global << 13, // Do not remove empty capture group results.
|
||||||
__Regex_SingleMatch = __Regex_Global << 14, // Stop after acquiring a single match.
|
__Regex_SingleMatch = __Regex_Global << 14, // Stop after acquiring a single match.
|
||||||
__Regex_Internal_Stateful = __Regex_Global << 15, // Internal flag; enables stateful matches.
|
__Regex_UnicodeSets = __Regex_Global << 15, // ECMA262 Parser specific: Allow set operations in char classes.
|
||||||
__Regex_Internal_BrowserExtended = __Regex_Global << 16, // Internal flag; enable browser-specific ECMA262 extensions.
|
__Regex_Internal_Stateful = __Regex_Global << 16, // Internal flag; enables stateful matches.
|
||||||
__Regex_Internal_ConsiderNewline = __Regex_Global << 17, // Internal flag; allow matchers to consider newlines as line separators.
|
__Regex_Internal_BrowserExtended = __Regex_Global << 17, // Internal flag; enable browser-specific ECMA262 extensions.
|
||||||
__Regex_Last = __Regex_SingleMatch
|
__Regex_Internal_ConsiderNewline = __Regex_Global << 18, // Internal flag; allow matchers to consider newlines as line separators.
|
||||||
|
__Regex_Last = __Regex_UnicodeSets,
|
||||||
};
|
};
|
||||||
|
|
||||||
// Values for the cflags parameter to the regcomp() function:
|
// Values for the cflags parameter to the regcomp() function:
|
||||||
|
|
|
@ -435,6 +435,20 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
|
||||||
bool inverse { false };
|
bool inverse { false };
|
||||||
bool temporary_inverse { false };
|
bool temporary_inverse { false };
|
||||||
bool reset_temp_inverse { false };
|
bool reset_temp_inverse { false };
|
||||||
|
struct DisjunctionState {
|
||||||
|
bool active { false };
|
||||||
|
bool is_conjunction { false };
|
||||||
|
bool fail { false };
|
||||||
|
size_t initial_position;
|
||||||
|
size_t initial_code_unit_position;
|
||||||
|
Optional<size_t> last_accepted_position {};
|
||||||
|
Optional<size_t> last_accepted_code_unit_position {};
|
||||||
|
};
|
||||||
|
|
||||||
|
Vector<DisjunctionState, 4> disjunction_states;
|
||||||
|
disjunction_states.empend();
|
||||||
|
|
||||||
|
auto current_disjunction_state = [&]() -> DisjunctionState& { return disjunction_states.last(); };
|
||||||
|
|
||||||
auto current_inversion_state = [&]() -> bool { return temporary_inverse ^ inverse; };
|
auto current_inversion_state = [&]() -> bool { return temporary_inverse ^ inverse; };
|
||||||
|
|
||||||
|
@ -602,16 +616,69 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
|
||||||
auto script = static_cast<Unicode::Script>(m_bytecode->at(offset++));
|
auto script = static_cast<Unicode::Script>(m_bytecode->at(offset++));
|
||||||
compare_script_extension(input, state, script, current_inversion_state(), inverse_matched);
|
compare_script_extension(input, state, script, current_inversion_state(), inverse_matched);
|
||||||
|
|
||||||
|
} else if (compare_type == CharacterCompareType::And) {
|
||||||
|
disjunction_states.append({
|
||||||
|
.active = true,
|
||||||
|
.is_conjunction = false,
|
||||||
|
.fail = false,
|
||||||
|
.initial_position = state.string_position,
|
||||||
|
.initial_code_unit_position = state.string_position_in_code_units,
|
||||||
|
});
|
||||||
|
continue;
|
||||||
|
|
||||||
|
} else if (compare_type == CharacterCompareType::Or) {
|
||||||
|
disjunction_states.append({
|
||||||
|
.active = true,
|
||||||
|
.is_conjunction = true,
|
||||||
|
.fail = true,
|
||||||
|
.initial_position = state.string_position,
|
||||||
|
.initial_code_unit_position = state.string_position_in_code_units,
|
||||||
|
});
|
||||||
|
continue;
|
||||||
|
|
||||||
|
} else if (compare_type == CharacterCompareType::EndAndOr) {
|
||||||
|
auto disjunction_state = disjunction_states.take_last();
|
||||||
|
if (!disjunction_state.fail) {
|
||||||
|
state.string_position = disjunction_state.last_accepted_position.value_or(disjunction_state.initial_position);
|
||||||
|
state.string_position_in_code_units = disjunction_state.last_accepted_code_unit_position.value_or(disjunction_state.initial_code_unit_position);
|
||||||
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
warnln("Undefined comparison: {}", (int)compare_type);
|
warnln("Undefined comparison: {}", (int)compare_type);
|
||||||
VERIFY_NOT_REACHED();
|
VERIFY_NOT_REACHED();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (current_inversion_state() && !inverse && !inverse_matched) {
|
auto& new_disjunction_state = current_disjunction_state();
|
||||||
|
if (current_inversion_state() && (!inverse || new_disjunction_state.active) && !inverse_matched) {
|
||||||
advance_string_position(state, input.view);
|
advance_string_position(state, input.view);
|
||||||
inverse_matched = true;
|
inverse_matched = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (new_disjunction_state.active) {
|
||||||
|
auto failed = (!had_zero_length_match && string_position == state.string_position) || state.string_position > input.view.length();
|
||||||
|
|
||||||
|
if (!failed) {
|
||||||
|
new_disjunction_state.last_accepted_position = state.string_position;
|
||||||
|
new_disjunction_state.last_accepted_code_unit_position = state.string_position_in_code_units;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (new_disjunction_state.is_conjunction)
|
||||||
|
new_disjunction_state.fail = failed && new_disjunction_state.fail;
|
||||||
|
else
|
||||||
|
new_disjunction_state.fail = failed || new_disjunction_state.fail;
|
||||||
|
|
||||||
|
state.string_position = new_disjunction_state.initial_position;
|
||||||
|
state.string_position_in_code_units = new_disjunction_state.initial_code_unit_position;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
auto& new_disjunction_state = current_disjunction_state();
|
||||||
|
if (new_disjunction_state.active) {
|
||||||
|
if (!new_disjunction_state.fail) {
|
||||||
|
state.string_position = new_disjunction_state.last_accepted_position.value_or(new_disjunction_state.initial_position);
|
||||||
|
state.string_position_in_code_units = new_disjunction_state.last_accepted_code_unit_position.value_or(new_disjunction_state.initial_code_unit_position);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (current_inversion_state() && !inverse_matched)
|
if (current_inversion_state() && !inverse_matched)
|
||||||
|
@ -843,6 +910,12 @@ Vector<CompareTypeAndValuePair> OpCode_Compare::flat_compares() const
|
||||||
auto count = m_bytecode->at(offset++);
|
auto count = m_bytecode->at(offset++);
|
||||||
for (size_t i = 0; i < count; ++i)
|
for (size_t i = 0; i < count; ++i)
|
||||||
result.append({ CharacterCompareType::CharRange, m_bytecode->at(offset++) });
|
result.append({ CharacterCompareType::CharRange, m_bytecode->at(offset++) });
|
||||||
|
} else if (compare_type == CharacterCompareType::GeneralCategory
|
||||||
|
|| compare_type == CharacterCompareType::Property
|
||||||
|
|| compare_type == CharacterCompareType::Script
|
||||||
|
|| compare_type == CharacterCompareType::ScriptExtension) {
|
||||||
|
auto value = m_bytecode->at(offset++);
|
||||||
|
result.append({ compare_type, value });
|
||||||
} else {
|
} else {
|
||||||
result.append({ compare_type, 0 });
|
result.append({ compare_type, 0 });
|
||||||
}
|
}
|
||||||
|
@ -936,6 +1009,13 @@ Vector<String> OpCode_Compare::variable_arguments_to_string(Optional<MatchInput>
|
||||||
result.empend(String::formatted(
|
result.empend(String::formatted(
|
||||||
" compare against: '{}'",
|
" compare against: '{}'",
|
||||||
input.value().view.substring_view(string_start_offset, state().string_position > view.length() ? 0 : 1).to_string()));
|
input.value().view.substring_view(string_start_offset, state().string_position > view.length() ? 0 : 1).to_string()));
|
||||||
|
} else if (compare_type == CharacterCompareType::GeneralCategory
|
||||||
|
|| compare_type == CharacterCompareType::Property
|
||||||
|
|| compare_type == CharacterCompareType::Script
|
||||||
|
|| compare_type == CharacterCompareType::ScriptExtension) {
|
||||||
|
|
||||||
|
auto value = m_bytecode->at(offset++);
|
||||||
|
result.empend(String::formatted(" value={}", value));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
|
|
|
@ -76,7 +76,10 @@ enum class OpCodeId : ByteCodeValueType {
|
||||||
__ENUMERATE_CHARACTER_COMPARE_TYPE(Script) \
|
__ENUMERATE_CHARACTER_COMPARE_TYPE(Script) \
|
||||||
__ENUMERATE_CHARACTER_COMPARE_TYPE(ScriptExtension) \
|
__ENUMERATE_CHARACTER_COMPARE_TYPE(ScriptExtension) \
|
||||||
__ENUMERATE_CHARACTER_COMPARE_TYPE(RangeExpressionDummy) \
|
__ENUMERATE_CHARACTER_COMPARE_TYPE(RangeExpressionDummy) \
|
||||||
__ENUMERATE_CHARACTER_COMPARE_TYPE(LookupTable)
|
__ENUMERATE_CHARACTER_COMPARE_TYPE(LookupTable) \
|
||||||
|
__ENUMERATE_CHARACTER_COMPARE_TYPE(And) \
|
||||||
|
__ENUMERATE_CHARACTER_COMPARE_TYPE(Or) \
|
||||||
|
__ENUMERATE_CHARACTER_COMPARE_TYPE(EndAndOr)
|
||||||
|
|
||||||
enum class CharacterCompareType : ByteCodeValueType {
|
enum class CharacterCompareType : ByteCodeValueType {
|
||||||
#define __ENUMERATE_CHARACTER_COMPARE_TYPE(x) x,
|
#define __ENUMERATE_CHARACTER_COMPARE_TYPE(x) x,
|
||||||
|
|
|
@ -36,6 +36,7 @@ enum class Error : u8 {
|
||||||
InvalidNameForCaptureGroup = __Regex_InvalidNameForCaptureGroup, // Name of capture group is invalid.
|
InvalidNameForCaptureGroup = __Regex_InvalidNameForCaptureGroup, // Name of capture group is invalid.
|
||||||
InvalidNameForProperty = __Regex_InvalidNameForProperty, // Name of property is invalid.
|
InvalidNameForProperty = __Regex_InvalidNameForProperty, // Name of property is invalid.
|
||||||
DuplicateNamedCapture = __Regex_DuplicateNamedCapture, // Name of property is invalid.
|
DuplicateNamedCapture = __Regex_DuplicateNamedCapture, // Name of property is invalid.
|
||||||
|
InvalidCharacterClassEscape = __Regex_InvalidCharacterClassEscape, // Invalid escaped entity in character class.
|
||||||
};
|
};
|
||||||
|
|
||||||
inline String get_error_string(Error error)
|
inline String get_error_string(Error error)
|
||||||
|
@ -79,6 +80,8 @@ inline String get_error_string(Error error)
|
||||||
return "Name of property is invalid.";
|
return "Name of property is invalid.";
|
||||||
case Error::DuplicateNamedCapture:
|
case Error::DuplicateNamedCapture:
|
||||||
return "Duplicate capture group name";
|
return "Duplicate capture group name";
|
||||||
|
case Error::InvalidCharacterClassEscape:
|
||||||
|
return "Invalid escaped entity in character class.";
|
||||||
}
|
}
|
||||||
return "Undefined error.";
|
return "Undefined error.";
|
||||||
}
|
}
|
||||||
|
|
|
@ -205,6 +205,9 @@ static bool has_overlap(Vector<CompareTypeAndValuePair> const& lhs, Vector<Compa
|
||||||
case CharacterCompareType::GeneralCategory:
|
case CharacterCompareType::GeneralCategory:
|
||||||
case CharacterCompareType::Script:
|
case CharacterCompareType::Script:
|
||||||
case CharacterCompareType::ScriptExtension:
|
case CharacterCompareType::ScriptExtension:
|
||||||
|
case CharacterCompareType::And:
|
||||||
|
case CharacterCompareType::Or:
|
||||||
|
case CharacterCompareType::EndAndOr:
|
||||||
// FIXME: These are too difficult to handle, so bail out.
|
// FIXME: These are too difficult to handle, so bail out.
|
||||||
return true;
|
return true;
|
||||||
case CharacterCompareType::Undefined:
|
case CharacterCompareType::Undefined:
|
||||||
|
@ -274,6 +277,9 @@ static bool has_overlap(Vector<CompareTypeAndValuePair> const& lhs, Vector<Compa
|
||||||
case CharacterCompareType::GeneralCategory:
|
case CharacterCompareType::GeneralCategory:
|
||||||
case CharacterCompareType::Script:
|
case CharacterCompareType::Script:
|
||||||
case CharacterCompareType::ScriptExtension:
|
case CharacterCompareType::ScriptExtension:
|
||||||
|
case CharacterCompareType::And:
|
||||||
|
case CharacterCompareType::Or:
|
||||||
|
case CharacterCompareType::EndAndOr:
|
||||||
// FIXME: These are too difficult to handle, so bail out.
|
// FIXME: These are too difficult to handle, so bail out.
|
||||||
return true;
|
return true;
|
||||||
case CharacterCompareType::Undefined:
|
case CharacterCompareType::Undefined:
|
||||||
|
@ -785,6 +791,8 @@ enum class LookupTableInsertionOutcome {
|
||||||
ReplaceWithAnyChar,
|
ReplaceWithAnyChar,
|
||||||
TemporaryInversionNeeded,
|
TemporaryInversionNeeded,
|
||||||
PermanentInversionNeeded,
|
PermanentInversionNeeded,
|
||||||
|
FlushOnInsertion,
|
||||||
|
FinishFlushOnInsertion,
|
||||||
CannotPlaceInTable,
|
CannotPlaceInTable,
|
||||||
};
|
};
|
||||||
static LookupTableInsertionOutcome insert_into_lookup_table(RedBlackTree<ByteCodeValueType, CharRange>& table, CompareTypeAndValuePair pair)
|
static LookupTableInsertionOutcome insert_into_lookup_table(RedBlackTree<ByteCodeValueType, CharRange>& table, CompareTypeAndValuePair pair)
|
||||||
|
@ -806,11 +814,16 @@ static LookupTableInsertionOutcome insert_into_lookup_table(RedBlackTree<ByteCod
|
||||||
table.insert(range.from, range);
|
table.insert(range.from, range);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case CharacterCompareType::EndAndOr:
|
||||||
|
return LookupTableInsertionOutcome::FinishFlushOnInsertion;
|
||||||
|
case CharacterCompareType::And:
|
||||||
|
return LookupTableInsertionOutcome::FlushOnInsertion;
|
||||||
case CharacterCompareType::Reference:
|
case CharacterCompareType::Reference:
|
||||||
case CharacterCompareType::Property:
|
case CharacterCompareType::Property:
|
||||||
case CharacterCompareType::GeneralCategory:
|
case CharacterCompareType::GeneralCategory:
|
||||||
case CharacterCompareType::Script:
|
case CharacterCompareType::Script:
|
||||||
case CharacterCompareType::ScriptExtension:
|
case CharacterCompareType::ScriptExtension:
|
||||||
|
case CharacterCompareType::Or:
|
||||||
return LookupTableInsertionOutcome::CannotPlaceInTable;
|
return LookupTableInsertionOutcome::CannotPlaceInTable;
|
||||||
case CharacterCompareType::Undefined:
|
case CharacterCompareType::Undefined:
|
||||||
case CharacterCompareType::RangeExpressionDummy:
|
case CharacterCompareType::RangeExpressionDummy:
|
||||||
|
@ -830,7 +843,12 @@ void Optimizer::append_character_class(ByteCode& target, Vector<CompareTypeAndVa
|
||||||
if (pairs.size() <= 1) {
|
if (pairs.size() <= 1) {
|
||||||
for (auto& pair : pairs) {
|
for (auto& pair : pairs) {
|
||||||
arguments.append(to_underlying(pair.type));
|
arguments.append(to_underlying(pair.type));
|
||||||
if (pair.type != CharacterCompareType::AnyChar && pair.type != CharacterCompareType::TemporaryInverse && pair.type != CharacterCompareType::Inverse)
|
if (pair.type != CharacterCompareType::AnyChar
|
||||||
|
&& pair.type != CharacterCompareType::TemporaryInverse
|
||||||
|
&& pair.type != CharacterCompareType::Inverse
|
||||||
|
&& pair.type != CharacterCompareType::And
|
||||||
|
&& pair.type != CharacterCompareType::Or
|
||||||
|
&& pair.type != CharacterCompareType::EndAndOr)
|
||||||
arguments.append(pair.value);
|
arguments.append(pair.value);
|
||||||
++argument_count;
|
++argument_count;
|
||||||
}
|
}
|
||||||
|
@ -881,8 +899,12 @@ void Optimizer::append_character_class(ByteCode& target, Vector<CompareTypeAndVa
|
||||||
arguments.append(to_underlying(CharacterCompareType::TemporaryInverse));
|
arguments.append(to_underlying(CharacterCompareType::TemporaryInverse));
|
||||||
append_table(inverted_table);
|
append_table(inverted_table);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
table.clear();
|
||||||
|
inverted_table.clear();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
auto flush_on_every_insertion = false;
|
||||||
for (auto& value : pairs) {
|
for (auto& value : pairs) {
|
||||||
auto should_invert_after_this_iteration = invert_for_next_iteration;
|
auto should_invert_after_this_iteration = invert_for_next_iteration;
|
||||||
invert_for_next_iteration = false;
|
invert_for_next_iteration = false;
|
||||||
|
@ -890,6 +912,8 @@ void Optimizer::append_character_class(ByteCode& target, Vector<CompareTypeAndVa
|
||||||
auto insertion_result = insert_into_lookup_table(*current_table, value);
|
auto insertion_result = insert_into_lookup_table(*current_table, value);
|
||||||
switch (insertion_result) {
|
switch (insertion_result) {
|
||||||
case LookupTableInsertionOutcome::Successful:
|
case LookupTableInsertionOutcome::Successful:
|
||||||
|
if (flush_on_every_insertion)
|
||||||
|
flush_tables();
|
||||||
break;
|
break;
|
||||||
case LookupTableInsertionOutcome::ReplaceWithAnyChar: {
|
case LookupTableInsertionOutcome::ReplaceWithAnyChar: {
|
||||||
table.clear();
|
table.clear();
|
||||||
|
@ -908,12 +932,24 @@ void Optimizer::append_character_class(ByteCode& target, Vector<CompareTypeAndVa
|
||||||
arguments.append(to_underlying(CharacterCompareType::Inverse));
|
arguments.append(to_underlying(CharacterCompareType::Inverse));
|
||||||
++argument_count;
|
++argument_count;
|
||||||
break;
|
break;
|
||||||
|
case LookupTableInsertionOutcome::FlushOnInsertion:
|
||||||
|
case LookupTableInsertionOutcome::FinishFlushOnInsertion:
|
||||||
|
flush_tables();
|
||||||
|
flush_on_every_insertion = insertion_result == LookupTableInsertionOutcome::FlushOnInsertion;
|
||||||
|
[[fallthrough]];
|
||||||
case LookupTableInsertionOutcome::CannotPlaceInTable:
|
case LookupTableInsertionOutcome::CannotPlaceInTable:
|
||||||
if (is_currently_inverted) {
|
if (is_currently_inverted) {
|
||||||
arguments.append(to_underlying(CharacterCompareType::TemporaryInverse));
|
arguments.append(to_underlying(CharacterCompareType::TemporaryInverse));
|
||||||
++argument_count;
|
++argument_count;
|
||||||
}
|
}
|
||||||
arguments.append(to_underlying(value.type));
|
arguments.append(to_underlying(value.type));
|
||||||
|
|
||||||
|
if (value.type != CharacterCompareType::AnyChar
|
||||||
|
&& value.type != CharacterCompareType::TemporaryInverse
|
||||||
|
&& value.type != CharacterCompareType::Inverse
|
||||||
|
&& value.type != CharacterCompareType::And
|
||||||
|
&& value.type != CharacterCompareType::Or
|
||||||
|
&& value.type != CharacterCompareType::EndAndOr)
|
||||||
arguments.append(value.value);
|
arguments.append(value.value);
|
||||||
++argument_count;
|
++argument_count;
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -34,6 +34,7 @@ enum class AllFlags {
|
||||||
Multiline = __Regex_Multiline, // Handle newline characters. Match each line, one by one.
|
Multiline = __Regex_Multiline, // Handle newline characters. Match each line, one by one.
|
||||||
SkipTrimEmptyMatches = __Regex_SkipTrimEmptyMatches, // Do not remove empty capture group results.
|
SkipTrimEmptyMatches = __Regex_SkipTrimEmptyMatches, // Do not remove empty capture group results.
|
||||||
SingleMatch = __Regex_SingleMatch, // Stop after acquiring a single match.
|
SingleMatch = __Regex_SingleMatch, // Stop after acquiring a single match.
|
||||||
|
UnicodeSets = __Regex_UnicodeSets, // Only for ECMA262, Allow set operations in character classes.
|
||||||
Internal_Stateful = __Regex_Internal_Stateful, // Make global matches match one result at a time, and further match() calls on the same instance continue where the previous one left off.
|
Internal_Stateful = __Regex_Internal_Stateful, // Make global matches match one result at a time, and further match() calls on the same instance continue where the previous one left off.
|
||||||
Internal_BrowserExtended = __Regex_Internal_BrowserExtended, // Only for ECMA262, Enable the behaviors defined in section B.1.4. of the ECMA262 spec.
|
Internal_BrowserExtended = __Regex_Internal_BrowserExtended, // Only for ECMA262, Enable the behaviors defined in section B.1.4. of the ECMA262 spec.
|
||||||
Internal_ConsiderNewline = __Regex_Internal_ConsiderNewline, // Only for ECMA262, Allow multiline matches to consider newlines as line boundaries.
|
Internal_ConsiderNewline = __Regex_Internal_ConsiderNewline, // Only for ECMA262, Allow multiline matches to consider newlines as line boundaries.
|
||||||
|
@ -66,6 +67,7 @@ enum class ECMAScriptFlags : FlagsUnderlyingType {
|
||||||
Sticky = (FlagsUnderlyingType)AllFlags::Sticky,
|
Sticky = (FlagsUnderlyingType)AllFlags::Sticky,
|
||||||
Multiline = (FlagsUnderlyingType)AllFlags::Multiline,
|
Multiline = (FlagsUnderlyingType)AllFlags::Multiline,
|
||||||
StringCopyMatches = (FlagsUnderlyingType)AllFlags::StringCopyMatches,
|
StringCopyMatches = (FlagsUnderlyingType)AllFlags::StringCopyMatches,
|
||||||
|
UnicodeSets = (FlagsUnderlyingType)AllFlags::UnicodeSets,
|
||||||
BrowserExtended = (FlagsUnderlyingType)AllFlags::Internal_BrowserExtended,
|
BrowserExtended = (FlagsUnderlyingType)AllFlags::Internal_BrowserExtended,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -930,16 +930,17 @@ bool PosixExtendedParser::parse_root(ByteCode& stack, size_t& match_length_minim
|
||||||
bool ECMA262Parser::parse_internal(ByteCode& stack, size_t& match_length_minimum)
|
bool ECMA262Parser::parse_internal(ByteCode& stack, size_t& match_length_minimum)
|
||||||
{
|
{
|
||||||
auto unicode = m_parser_state.regex_options.has_flag_set(AllFlags::Unicode);
|
auto unicode = m_parser_state.regex_options.has_flag_set(AllFlags::Unicode);
|
||||||
if (unicode) {
|
auto unicode_sets = m_parser_state.regex_options.has_flag_set(AllFlags::UnicodeSets);
|
||||||
return parse_pattern(stack, match_length_minimum, { .unicode = true, .named = true });
|
if (unicode || unicode_sets) {
|
||||||
|
return parse_pattern(stack, match_length_minimum, { .unicode = true, .named = true, .unicode_sets = unicode_sets });
|
||||||
}
|
}
|
||||||
|
|
||||||
ByteCode new_stack;
|
ByteCode new_stack;
|
||||||
size_t new_match_length = 0;
|
size_t new_match_length = 0;
|
||||||
auto res = parse_pattern(new_stack, new_match_length, { .unicode = false, .named = false });
|
auto res = parse_pattern(new_stack, new_match_length, { .unicode = false, .named = false, .unicode_sets = false });
|
||||||
if (m_parser_state.named_capture_groups_count > 0) {
|
if (m_parser_state.named_capture_groups_count > 0) {
|
||||||
reset();
|
reset();
|
||||||
return parse_pattern(stack, match_length_minimum, { .unicode = false, .named = true });
|
return parse_pattern(stack, match_length_minimum, { .unicode = false, .named = true, .unicode_sets = false });
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!res)
|
if (!res)
|
||||||
|
@ -1136,7 +1137,7 @@ bool ECMA262Parser::parse_quantifiable_assertion(ByteCode& stack, size_t&, Parse
|
||||||
size_t match_length_minimum = 0;
|
size_t match_length_minimum = 0;
|
||||||
|
|
||||||
if (try_skip("="sv)) {
|
if (try_skip("="sv)) {
|
||||||
if (!parse_inner_disjunction(assertion_stack, match_length_minimum, { .unicode = false, .named = flags.named }))
|
if (!parse_inner_disjunction(assertion_stack, match_length_minimum, { .unicode = false, .named = flags.named, .unicode_sets = false }))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
stack.insert_bytecode_lookaround(move(assertion_stack), ByteCode::LookAroundType::LookAhead);
|
stack.insert_bytecode_lookaround(move(assertion_stack), ByteCode::LookAroundType::LookAhead);
|
||||||
|
@ -1149,7 +1150,7 @@ bool ECMA262Parser::parse_quantifiable_assertion(ByteCode& stack, size_t&, Parse
|
||||||
exit_capture_group_scope();
|
exit_capture_group_scope();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
if (!parse_inner_disjunction(assertion_stack, match_length_minimum, { .unicode = false, .named = flags.named }))
|
if (!parse_inner_disjunction(assertion_stack, match_length_minimum, { .unicode = false, .named = flags.named, .unicode_sets = false }))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
stack.insert_bytecode_lookaround(move(assertion_stack), ByteCode::LookAroundType::NegatedLookAhead);
|
stack.insert_bytecode_lookaround(move(assertion_stack), ByteCode::LookAroundType::NegatedLookAhead);
|
||||||
|
@ -1756,6 +1757,7 @@ bool ECMA262Parser::parse_character_class(ByteCode& stack, size_t& match_length_
|
||||||
compares.empend(CompareTypeAndValuePair { CharacterCompareType::Inverse, 0 });
|
compares.empend(CompareTypeAndValuePair { CharacterCompareType::Inverse, 0 });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ClassContents :: [empty]
|
||||||
if (match(TokenType::RightBracket)) {
|
if (match(TokenType::RightBracket)) {
|
||||||
consume();
|
consume();
|
||||||
// Should only have at most an 'Inverse'
|
// Should only have at most an 'Inverse'
|
||||||
|
@ -1764,7 +1766,12 @@ bool ECMA262Parser::parse_character_class(ByteCode& stack, size_t& match_length_
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!parse_nonempty_class_ranges(compares, flags))
|
// ClassContents :: [~UnicodeSetsMode] NonemptyClassRanges[?UnicodeMode]
|
||||||
|
if (!flags.unicode_sets && !parse_nonempty_class_ranges(compares, flags))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// ClassContents :: [+UnicodeSetsMode] ClassSetExpression
|
||||||
|
if (flags.unicode_sets && !parse_class_set_expression(compares))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
match_length_minimum += 1;
|
match_length_minimum += 1;
|
||||||
|
@ -2029,6 +2036,364 @@ bool ECMA262Parser::parse_nonempty_class_ranges(Vector<CompareTypeAndValuePair>&
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool ECMA262Parser::parse_class_set_expression(Vector<CompareTypeAndValuePair>& compares)
|
||||||
|
{
|
||||||
|
auto start_position = tell();
|
||||||
|
|
||||||
|
// ClassSetExpression :: ClassUnion | ClassIntersection | ClassSubtraction
|
||||||
|
if (parse_class_subtraction(compares)) {
|
||||||
|
consume(TokenType::RightBracket, Error::MismatchingBracket);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (has_error())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
back(tell() - start_position + 1);
|
||||||
|
if (parse_class_intersection(compares)) {
|
||||||
|
consume(TokenType::RightBracket, Error::MismatchingBracket);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (has_error())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
back(tell() - start_position + 1);
|
||||||
|
if (parse_class_union(compares)) {
|
||||||
|
consume(TokenType::RightBracket, Error::MismatchingBracket);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ECMA262Parser::parse_class_union(Vector<regex::CompareTypeAndValuePair>& compares)
|
||||||
|
{
|
||||||
|
auto start_position = tell();
|
||||||
|
ArmedScopeGuard restore_position { [&] { back(tell() - start_position + 1); } };
|
||||||
|
|
||||||
|
auto first = true;
|
||||||
|
|
||||||
|
// ClassUnion :: ClassSetRange ClassUnion[opt] | ClassSetOperand ClassUnion[opt]
|
||||||
|
for (;;) {
|
||||||
|
if (!parse_class_set_range(compares)) {
|
||||||
|
if (has_error() || match(TokenType::RightBracket))
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (!parse_class_set_operand(compares)) {
|
||||||
|
if (first || has_error())
|
||||||
|
return false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
first = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
restore_position.disarm();
|
||||||
|
return !has_error();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ECMA262Parser::parse_class_intersection(Vector<CompareTypeAndValuePair>& compares)
|
||||||
|
{
|
||||||
|
// ClassIntersection :: ClassSetOperand "&&" [lookahead != "&"] ClassSetOperand
|
||||||
|
// | ClassIntersection "&&" [lookahead != "&"] ClassSetOperand
|
||||||
|
Vector<CompareTypeAndValuePair> lhs;
|
||||||
|
Vector<CompareTypeAndValuePair> rhs;
|
||||||
|
|
||||||
|
auto start_position = tell();
|
||||||
|
ArmedScopeGuard restore_position { [&] { back(tell() - start_position + 1); } };
|
||||||
|
|
||||||
|
if (!parse_class_set_operand(lhs))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (!try_skip("&&"sv))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
compares.append({ CharacterCompareType::And, 0 });
|
||||||
|
compares.extend(move(lhs));
|
||||||
|
|
||||||
|
do {
|
||||||
|
rhs.clear_with_capacity();
|
||||||
|
if (!parse_class_set_operand(rhs))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
compares.extend(rhs);
|
||||||
|
|
||||||
|
if (try_skip("&&&"sv))
|
||||||
|
return false;
|
||||||
|
} while (!has_error() && try_skip("&&"sv));
|
||||||
|
|
||||||
|
compares.append({ CharacterCompareType::EndAndOr, 0 });
|
||||||
|
|
||||||
|
restore_position.disarm();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ECMA262Parser::parse_class_subtraction(Vector<CompareTypeAndValuePair>& compares)
|
||||||
|
{
|
||||||
|
// ClassSubtraction :: ClassSetOperand "--" ClassSetOperand | ClassSubtraction "--" ClassSetOperand
|
||||||
|
Vector<CompareTypeAndValuePair> lhs;
|
||||||
|
Vector<CompareTypeAndValuePair> rhs;
|
||||||
|
|
||||||
|
auto start_position = tell();
|
||||||
|
ArmedScopeGuard restore_position { [&] { back(tell() - start_position + 1); } };
|
||||||
|
|
||||||
|
if (!parse_class_set_operand(lhs))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (!try_skip("--"sv))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
compares.append({ CharacterCompareType::And, 0 });
|
||||||
|
compares.extend(move(lhs));
|
||||||
|
|
||||||
|
do {
|
||||||
|
rhs.clear_with_capacity();
|
||||||
|
if (!parse_class_set_operand(rhs))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
compares.append({ CharacterCompareType::TemporaryInverse, 0 });
|
||||||
|
compares.extend(rhs);
|
||||||
|
} while (!has_error() && try_skip("--"sv));
|
||||||
|
|
||||||
|
compares.append({ CharacterCompareType::EndAndOr, 0 });
|
||||||
|
|
||||||
|
restore_position.disarm();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ECMA262Parser::parse_class_set_range(Vector<CompareTypeAndValuePair>& compares)
|
||||||
|
{
|
||||||
|
// ClassSetRange :: ClassSetCharacter "-" ClassSetCharacter
|
||||||
|
auto start_position = tell();
|
||||||
|
ArmedScopeGuard restore_position { [&] { back(tell() - start_position + 1); } };
|
||||||
|
|
||||||
|
auto lhs = parse_class_set_character();
|
||||||
|
if (!lhs.has_value())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (!match(TokenType::HyphenMinus))
|
||||||
|
return false;
|
||||||
|
consume();
|
||||||
|
|
||||||
|
auto rhs = parse_class_set_character();
|
||||||
|
if (!rhs.has_value())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
compares.append({
|
||||||
|
CharacterCompareType::CharRange,
|
||||||
|
CharRange { lhs.value(), rhs.value() },
|
||||||
|
});
|
||||||
|
restore_position.disarm();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
Optional<u32> ECMA262Parser::parse_class_set_character()
|
||||||
|
{
|
||||||
|
// ClassSetCharacter :: [lookahead ∉ ClassSetReservedDoublePunctuator] SourceCharacter but not ClassSetSyntaxCharacter
|
||||||
|
// | "\" CharacterEscape[+UnicodeMode]
|
||||||
|
// | "\" ClassSetReservedPunctuator
|
||||||
|
// | "\" b
|
||||||
|
// ClassSetReservedDoublePunctuator :: one of "&&" "!!" "##" "$$" "%%" "**" "++" ",," ".." "::" ";;" "<<" "==" ">>" "??" "@@" "^^" "``" "~~"
|
||||||
|
// ClassSetSyntaxCharacter :: one of "(" ")" "{" "}" "[" "]" "/" "-" "\" "|"
|
||||||
|
// ClassSetReservedPunctuator :: one of "&" "-" "!" "#" "%" "," ":" ";" "<" "=" ">" "@" "`" "~"
|
||||||
|
|
||||||
|
constexpr auto class_set_reserved_double_punctuator = Array {
|
||||||
|
"&&"sv, "!!"sv, "##"sv, "$$"sv, "%%"sv, "**"sv, "++"sv, ",,"sv, ".."sv, "::"sv, ";;"sv, "<<"sv, "=="sv, ">>"sv, "??"sv, "@@"sv, "^^"sv, "``"sv, "~~"sv
|
||||||
|
};
|
||||||
|
|
||||||
|
auto start_position = tell();
|
||||||
|
ArmedScopeGuard restore { [&] { back(tell() - start_position + 1); } };
|
||||||
|
|
||||||
|
if (try_skip("\\"sv)) {
|
||||||
|
if (done()) {
|
||||||
|
set_error(Error::InvalidTrailingEscape);
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
// "\" ClassSetReservedPunctuator
|
||||||
|
for (auto const& reserved : class_set_reserved_double_punctuator) {
|
||||||
|
if (try_skip(reserved)) {
|
||||||
|
// "\" ClassSetReservedPunctuator (ClassSetReservedPunctuator)
|
||||||
|
back();
|
||||||
|
|
||||||
|
restore.disarm();
|
||||||
|
return reserved[0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// "\" b
|
||||||
|
if (try_skip("b"sv)) {
|
||||||
|
restore.disarm();
|
||||||
|
return '\b';
|
||||||
|
}
|
||||||
|
|
||||||
|
// "\" CharacterEscape[+UnicodeMode]
|
||||||
|
Vector<CompareTypeAndValuePair> compares;
|
||||||
|
size_t minimum_length = 0;
|
||||||
|
if (parse_character_escape(compares, minimum_length, { .unicode = true })) {
|
||||||
|
VERIFY(compares.size() == 1);
|
||||||
|
auto& compare = compares.first();
|
||||||
|
VERIFY(compare.type == CharacterCompareType::Char);
|
||||||
|
restore.disarm();
|
||||||
|
return compare.value;
|
||||||
|
}
|
||||||
|
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
// [lookahead ∉ ClassSetReservedDoublePunctuator] SourceCharacter but not ClassSetSyntaxCharacter
|
||||||
|
auto lookahead_matches = any_of(class_set_reserved_double_punctuator, [this](auto& reserved) {
|
||||||
|
return try_skip(reserved);
|
||||||
|
});
|
||||||
|
|
||||||
|
if (lookahead_matches)
|
||||||
|
return {};
|
||||||
|
|
||||||
|
for (auto character : { "("sv, ")"sv, "{"sv, "}"sv, "["sv, "]"sv, "/"sv, "-"sv, "\\"sv, "|"sv }) {
|
||||||
|
if (try_skip(character))
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
restore.disarm();
|
||||||
|
return skip();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ECMA262Parser::parse_class_set_operand(Vector<regex::CompareTypeAndValuePair>& compares)
|
||||||
|
{
|
||||||
|
auto start_position = tell();
|
||||||
|
|
||||||
|
// ClassSetOperand :: ClassSetCharacter | ClassStringDisjunction | NestedClass
|
||||||
|
if (auto character = parse_class_set_character(); character.has_value()) {
|
||||||
|
compares.append({ CharacterCompareType::Char, character.value() });
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// NestedClass :: "[" [lookahead != "^"] ClassContents[+UnicodeMode +UnicodeSetsMode] "]"
|
||||||
|
// | "[" "^" ClassContents[+UnicodeMode +UnicodeSetsMode] "]"
|
||||||
|
// | "\" CharacterClassEscape[+UnicodeMode]
|
||||||
|
if (parse_nested_class(compares))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
if (has_error())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
auto negated = false;
|
||||||
|
if (auto ch = parse_character_class_escape(negated, true); ch.has_value()) {
|
||||||
|
if (negated)
|
||||||
|
compares.append({ CharacterCompareType::TemporaryInverse, 1 });
|
||||||
|
compares.append({ CharacterCompareType::CharClass, (ByteCodeValueType)ch.value() });
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
PropertyEscape property {};
|
||||||
|
if (parse_unicode_property_escape(property, negated)) {
|
||||||
|
if (negated)
|
||||||
|
compares.empend(CompareTypeAndValuePair { CharacterCompareType::Inverse, 0 });
|
||||||
|
property.visit(
|
||||||
|
[&](Unicode::Property property) {
|
||||||
|
compares.empend(CompareTypeAndValuePair { CharacterCompareType::Property, (ByteCodeValueType)property });
|
||||||
|
},
|
||||||
|
[&](Unicode::GeneralCategory general_category) {
|
||||||
|
compares.empend(CompareTypeAndValuePair { CharacterCompareType::GeneralCategory, (ByteCodeValueType)general_category });
|
||||||
|
},
|
||||||
|
[&](Script script) {
|
||||||
|
if (script.is_extension)
|
||||||
|
compares.empend(CompareTypeAndValuePair { CharacterCompareType::ScriptExtension, (ByteCodeValueType)script.script });
|
||||||
|
else
|
||||||
|
compares.empend(CompareTypeAndValuePair { CharacterCompareType::Script, (ByteCodeValueType)script.script });
|
||||||
|
},
|
||||||
|
[](Empty&) { VERIFY_NOT_REACHED(); });
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (has_error())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// ClassStringDisjunction :: "\q{" ClassStringDisjunctionContents "}"
|
||||||
|
// ClassStringDisjunctionContents :: ClassString | ClassString "|" ClassStringDisjunctionContents
|
||||||
|
// ClassString :: [empty] | NonEmptyClassString
|
||||||
|
// NonEmptyClassString :: ClassCharacter NonEmptyClassString[opt]
|
||||||
|
if (try_skip("\\q{"sv)) {
|
||||||
|
// FIXME: Implement this :P
|
||||||
|
return set_error(Error::InvalidCharacterClass);
|
||||||
|
}
|
||||||
|
|
||||||
|
back(tell() - start_position + 1);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ECMA262Parser::parse_nested_class(Vector<regex::CompareTypeAndValuePair>& compares)
|
||||||
|
{
|
||||||
|
auto start_position = tell();
|
||||||
|
|
||||||
|
// NestedClass :: "[" [lookahead ≠ ^ ] ClassContents [+UnicodeMode, +UnicodeSetsMode] "]"
|
||||||
|
// | "[" "^" ClassContents[+UnicodeMode, +UnicodeSetsMode] "]"
|
||||||
|
// | "\" CharacterClassEscape[+UnicodeMode]
|
||||||
|
|
||||||
|
if (match(TokenType::LeftBracket)) {
|
||||||
|
consume();
|
||||||
|
|
||||||
|
compares.append(CompareTypeAndValuePair { CharacterCompareType::Or, 0 });
|
||||||
|
|
||||||
|
if (match(TokenType::Circumflex)) {
|
||||||
|
// Negated charclass
|
||||||
|
consume();
|
||||||
|
compares.empend(CompareTypeAndValuePair { CharacterCompareType::Inverse, 0 });
|
||||||
|
}
|
||||||
|
|
||||||
|
// ClassContents :: [empty]
|
||||||
|
if (match(TokenType::RightBracket)) {
|
||||||
|
consume();
|
||||||
|
// Should only have at most an 'Inverse' (after an 'Or')
|
||||||
|
VERIFY(compares.size() <= 2);
|
||||||
|
compares.append(CompareTypeAndValuePair { CharacterCompareType::EndAndOr, 0 });
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ClassContents :: [+UnicodeSetsMode] ClassSetExpression
|
||||||
|
if (!parse_class_set_expression(compares))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
compares.append(CompareTypeAndValuePair { CharacterCompareType::EndAndOr, 0 });
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (try_skip("\\"sv)) {
|
||||||
|
auto negated = false;
|
||||||
|
if (auto char_class = parse_character_class_escape(negated); char_class.has_value()) {
|
||||||
|
if (negated)
|
||||||
|
compares.append({ CharacterCompareType::TemporaryInverse, 1 });
|
||||||
|
compares.append({ CharacterCompareType::CharClass, (ByteCodeValueType)char_class.value() });
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
PropertyEscape property {};
|
||||||
|
if (parse_unicode_property_escape(property, negated)) {
|
||||||
|
if (negated)
|
||||||
|
compares.empend(CompareTypeAndValuePair { CharacterCompareType::Inverse, 0 });
|
||||||
|
property.visit(
|
||||||
|
[&](Unicode::Property property) {
|
||||||
|
compares.empend(CompareTypeAndValuePair { CharacterCompareType::Property, (ByteCodeValueType)property });
|
||||||
|
},
|
||||||
|
[&](Unicode::GeneralCategory general_category) {
|
||||||
|
compares.empend(CompareTypeAndValuePair { CharacterCompareType::GeneralCategory, (ByteCodeValueType)general_category });
|
||||||
|
},
|
||||||
|
[&](Script script) {
|
||||||
|
if (script.is_extension)
|
||||||
|
compares.empend(CompareTypeAndValuePair { CharacterCompareType::ScriptExtension, (ByteCodeValueType)script.script });
|
||||||
|
else
|
||||||
|
compares.empend(CompareTypeAndValuePair { CharacterCompareType::Script, (ByteCodeValueType)script.script });
|
||||||
|
},
|
||||||
|
[](Empty&) { VERIFY_NOT_REACHED(); });
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (has_error())
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
back(tell() - start_position + 1);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
bool ECMA262Parser::parse_unicode_property_escape(PropertyEscape& property, bool& negated)
|
bool ECMA262Parser::parse_unicode_property_escape(PropertyEscape& property, bool& negated)
|
||||||
{
|
{
|
||||||
negated = false;
|
negated = false;
|
||||||
|
|
|
@ -92,6 +92,8 @@ protected:
|
||||||
ALWAYS_INLINE bool done() const;
|
ALWAYS_INLINE bool done() const;
|
||||||
ALWAYS_INLINE bool set_error(Error error);
|
ALWAYS_INLINE bool set_error(Error error);
|
||||||
|
|
||||||
|
size_t tell() const { return m_parser_state.current_token.position(); }
|
||||||
|
|
||||||
struct NamedCaptureGroup {
|
struct NamedCaptureGroup {
|
||||||
size_t group_index { 0 };
|
size_t group_index { 0 };
|
||||||
size_t minimum_length { 0 };
|
size_t minimum_length { 0 };
|
||||||
|
@ -223,6 +225,7 @@ private:
|
||||||
struct ParseFlags {
|
struct ParseFlags {
|
||||||
bool unicode { false };
|
bool unicode { false };
|
||||||
bool named { false };
|
bool named { false };
|
||||||
|
bool unicode_sets { false };
|
||||||
};
|
};
|
||||||
|
|
||||||
enum class ReadDigitsInitialZeroState {
|
enum class ReadDigitsInitialZeroState {
|
||||||
|
@ -257,6 +260,15 @@ private:
|
||||||
|
|
||||||
bool parse_character_escape(Vector<CompareTypeAndValuePair>&, size_t&, ParseFlags);
|
bool parse_character_escape(Vector<CompareTypeAndValuePair>&, size_t&, ParseFlags);
|
||||||
|
|
||||||
|
bool parse_class_set_expression(Vector<CompareTypeAndValuePair>&);
|
||||||
|
bool parse_class_union(Vector<CompareTypeAndValuePair>&);
|
||||||
|
bool parse_class_intersection(Vector<CompareTypeAndValuePair>&);
|
||||||
|
bool parse_class_subtraction(Vector<CompareTypeAndValuePair>&);
|
||||||
|
bool parse_class_set_range(Vector<CompareTypeAndValuePair>&);
|
||||||
|
bool parse_class_set_operand(Vector<CompareTypeAndValuePair>&);
|
||||||
|
bool parse_nested_class(Vector<CompareTypeAndValuePair>&);
|
||||||
|
Optional<u32> parse_class_set_character();
|
||||||
|
|
||||||
// Used only by B.1.4, Regular Expression Patterns (Extended for use in browsers)
|
// Used only by B.1.4, Regular Expression Patterns (Extended for use in browsers)
|
||||||
bool parse_quantifiable_assertion(ByteCode&, size_t&, ParseFlags);
|
bool parse_quantifiable_assertion(ByteCode&, size_t&, ParseFlags);
|
||||||
bool parse_extended_atom(ByteCode&, size_t&, ParseFlags);
|
bool parse_extended_atom(ByteCode&, size_t&, ParseFlags);
|
||||||
|
|
Loading…
Reference in a new issue