Selaa lähdekoodia

LibRegex: Make ^ and $ accept all `LineTerminator`s instead of just '\n'

Also adds a couple tests.
Ali Mohammad Pur 2 vuotta sitten
vanhempi
commit
6fc9f5fa28

+ 5 - 0
Tests/LibRegex/Regex.cpp

@@ -624,6 +624,8 @@ TEST_CASE(ECMA262_parse)
 
 TEST_CASE(ECMA262_match)
 {
+    constexpr auto global_multiline = ECMAScriptFlags::Global | ECMAScriptFlags::Multiline;
+
     struct _test {
         StringView pattern;
         StringView subject;
@@ -698,6 +700,9 @@ TEST_CASE(ECMA262_match)
         { "^[a-sy-z]$"sv, "y"sv, true, ECMAScriptFlags::Insensitive },
         { "^[a-sy-z]$"sv, "u"sv, false, ECMAScriptFlags::Insensitive },
         { "."sv, "\n\r\u2028\u2029"sv, false }, // Dot should not match any of CR/LF/LS/PS in ECMA262 mode without DotAll.
+        { "a$"sv, "a\r\n"sv, true, global_multiline.value() }, // $ should accept all LineTerminators in ECMA262 mode with Multiline.
+        { "^a"sv, "\ra"sv, true, global_multiline.value() },
+        { "^(.*?):[ \\t]*([^\\r\\n]*)$"sv, "content-length: 488\r\ncontent-type: application/json; charset=utf-8\r\n"sv, true, global_multiline.value() },
     };
     // clang-format on
 

+ 7 - 7
Userland/Libraries/LibRegex/RegexByteCode.cpp

@@ -11,6 +11,11 @@
 #include <AK/StringBuilder.h>
 #include <LibUnicode/CharacterTypes.h>
 
+// U+2028 LINE SEPARATOR
+constexpr static u32 const LineSeparator { 0x2028 };
+// U+2029 PARAGRAPH SEPARATOR
+constexpr static u32 const ParagraphSeparator { 0x2029 };
+
 namespace regex {
 
 StringView OpCode::name(OpCodeId opcode_id)
@@ -277,7 +282,7 @@ ALWAYS_INLINE ExecutionResult OpCode_CheckBegin::execute(MatchInput const& input
 
         if (input.regex_options.has_flag_set(AllFlags::Multiline) && input.regex_options.has_flag_set(AllFlags::Internal_ConsiderNewline)) {
             auto input_view = input.view.substring_view(state.string_position - 1, 1)[0];
-            return input_view == '\n';
+            return input_view == '\r' || input_view == '\n' || input_view == LineSeparator || input_view == ParagraphSeparator;
         }
 
         return false;
@@ -330,7 +335,7 @@ ALWAYS_INLINE ExecutionResult OpCode_CheckEnd::execute(MatchInput const& input,
 
         if (input.regex_options.has_flag_set(AllFlags::Multiline) && input.regex_options.has_flag_set(AllFlags::Internal_ConsiderNewline)) {
             auto input_view = input.view.substring_view(state.string_position, 1)[0];
-            return input_view == '\n';
+            return input_view == '\r' || input_view == '\n' || input_view == LineSeparator || input_view == ParagraphSeparator;
         }
 
         return false;
@@ -499,11 +504,6 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
             if (input.view.length() <= state.string_position)
                 return ExecutionResult::Failed_ExecuteLowPrioForks;
 
-            // U+2028 LINE SEPARATOR
-            constexpr static u32 const LineSeparator { 0x2028 };
-            // U+2029 PARAGRAPH SEPARATOR
-            constexpr static u32 const ParagraphSeparator { 0x2029 };
-
             auto input_view = input.view.substring_view(state.string_position, 1)[0];
             auto is_equivalent_to_newline = input_view == '\n'
                 || (input.regex_options.has_flag_set(AllFlags::Internal_ECMA262DotSemantics)

+ 1 - 1
Userland/Libraries/LibRegex/RegexOptions.h

@@ -116,7 +116,7 @@ public:
     void reset_flag(T flag) { m_flags = (T)((FlagsUnderlyingType)m_flags & ~(FlagsUnderlyingType)flag); }
     void set_flag(T flag) { *this |= flag; }
     bool has_flag_set(T flag) const { return (FlagsUnderlyingType)flag == ((FlagsUnderlyingType)m_flags & (FlagsUnderlyingType)flag); }
-    T value() const { return m_flags; }
+    constexpr T value() const { return m_flags; }
 
 private:
     T m_flags { T::Default };