LibRegex: Make ^ and $ accept all LineTerminators instead of just '\n'

Also adds a couple tests.
This commit is contained in:
Ali Mohammad Pur 2023-03-25 16:36:05 +03:30 committed by Andreas Kling
parent 59a76b1279
commit 6fc9f5fa28
Notes: sideshowbarker 2024-07-17 03:10:07 +09:00
3 changed files with 13 additions and 8 deletions

View file

@ -624,6 +624,8 @@ TEST_CASE(ECMA262_parse)
TEST_CASE(ECMA262_match)
{
constexpr auto global_multiline = ECMAScriptFlags::Global | ECMAScriptFlags::Multiline;
struct _test {
StringView pattern;
StringView subject;
@ -698,6 +700,9 @@ TEST_CASE(ECMA262_match)
{ "^[a-sy-z]$"sv, "y"sv, true, ECMAScriptFlags::Insensitive },
{ "^[a-sy-z]$"sv, "u"sv, false, ECMAScriptFlags::Insensitive },
{ "."sv, "\n\r\u2028\u2029"sv, false }, // Dot should not match any of CR/LF/LS/PS in ECMA262 mode without DotAll.
{ "a$"sv, "a\r\n"sv, true, global_multiline.value() }, // $ should accept all LineTerminators in ECMA262 mode with Multiline.
{ "^a"sv, "\ra"sv, true, global_multiline.value() },
{ "^(.*?):[ \\t]*([^\\r\\n]*)$"sv, "content-length: 488\r\ncontent-type: application/json; charset=utf-8\r\n"sv, true, global_multiline.value() },
};
// clang-format on

View file

@ -11,6 +11,11 @@
#include <AK/StringBuilder.h>
#include <LibUnicode/CharacterTypes.h>
// U+2028 LINE SEPARATOR
constexpr static u32 const LineSeparator { 0x2028 };
// U+2029 PARAGRAPH SEPARATOR
constexpr static u32 const ParagraphSeparator { 0x2029 };
namespace regex {
StringView OpCode::name(OpCodeId opcode_id)
@ -277,7 +282,7 @@ ALWAYS_INLINE ExecutionResult OpCode_CheckBegin::execute(MatchInput const& input
if (input.regex_options.has_flag_set(AllFlags::Multiline) && input.regex_options.has_flag_set(AllFlags::Internal_ConsiderNewline)) {
auto input_view = input.view.substring_view(state.string_position - 1, 1)[0];
return input_view == '\n';
return input_view == '\r' || input_view == '\n' || input_view == LineSeparator || input_view == ParagraphSeparator;
}
return false;
@ -330,7 +335,7 @@ ALWAYS_INLINE ExecutionResult OpCode_CheckEnd::execute(MatchInput const& input,
if (input.regex_options.has_flag_set(AllFlags::Multiline) && input.regex_options.has_flag_set(AllFlags::Internal_ConsiderNewline)) {
auto input_view = input.view.substring_view(state.string_position, 1)[0];
return input_view == '\n';
return input_view == '\r' || input_view == '\n' || input_view == LineSeparator || input_view == ParagraphSeparator;
}
return false;
@ -499,11 +504,6 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
if (input.view.length() <= state.string_position)
return ExecutionResult::Failed_ExecuteLowPrioForks;
// U+2028 LINE SEPARATOR
constexpr static u32 const LineSeparator { 0x2028 };
// U+2029 PARAGRAPH SEPARATOR
constexpr static u32 const ParagraphSeparator { 0x2029 };
auto input_view = input.view.substring_view(state.string_position, 1)[0];
auto is_equivalent_to_newline = input_view == '\n'
|| (input.regex_options.has_flag_set(AllFlags::Internal_ECMA262DotSemantics)

View file

@ -116,7 +116,7 @@ public:
void reset_flag(T flag) { m_flags = (T)((FlagsUnderlyingType)m_flags & ~(FlagsUnderlyingType)flag); }
void set_flag(T flag) { *this |= flag; }
bool has_flag_set(T flag) const { return (FlagsUnderlyingType)flag == ((FlagsUnderlyingType)m_flags & (FlagsUnderlyingType)flag); }
T value() const { return m_flags; }
constexpr T value() const { return m_flags; }
private:
T m_flags { T::Default };