浏览代码

LibRegex: Fix ECMA-262 parsing of invalid identity escapes

* Only alphabetic (A-Z, a-z) characters may be escaped with \c. The loop
  currently parsing \c includes code points between the upper/lower case
  groups.
* In Unicode mode, all invalid identity escapes should cause a parser
  error, even in browser-extended mode.
* Avoid an infinite loop when parsing the pattern "\c" on its own.
Timothy Flynn 3 年之前
父节点
当前提交
e887314472
共有 2 个文件被更改,包括 34 次插入10 次删除
  1. 13 0
      Tests/LibRegex/Regex.cpp
  2. 21 10
      Userland/Libraries/LibRegex/RegexParser.cpp

+ 13 - 0
Tests/LibRegex/Regex.cpp

@@ -525,6 +525,18 @@ TEST_CASE(ECMA262_parse)
         { "\\\\p{1}", regex::Error::NoError, ECMAScriptFlags::Unicode },
         { "\\\\p{1}", regex::Error::NoError, ECMAScriptFlags::Unicode },
         { "\\\\p{AsCiI}", regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
         { "\\\\p{AsCiI}", regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
         { "\\\\p{ASCII}", regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
         { "\\\\p{ASCII}", regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
+        { "\\c", regex::Error::NoError, ECMAScriptFlags::BrowserExtended },
+        { "\\c", regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
+        { "[\\c]", regex::Error::NoError, ECMAScriptFlags::BrowserExtended },
+        { "[\\c]", regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
+        { "\\c`", regex::Error::NoError, ECMAScriptFlags::BrowserExtended },
+        { "\\c`", regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
+        { "[\\c`]", regex::Error::NoError, ECMAScriptFlags::BrowserExtended },
+        { "[\\c`]", regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
+        { "\\A", regex::Error::NoError, ECMAScriptFlags::BrowserExtended },
+        { "\\A", regex::Error::InvalidCharacterClass, ECMAScriptFlags::Unicode },
+        { "[\\A]", regex::Error::NoError, ECMAScriptFlags::BrowserExtended },
+        { "[\\A]", regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
     };
     };
 
 
     for (auto& test : tests) {
     for (auto& test : tests) {
@@ -579,6 +591,7 @@ TEST_CASE(ECMA262_match)
         { "\\05", "\5", true, ECMAScriptFlags::BrowserExtended },
         { "\\05", "\5", true, ECMAScriptFlags::BrowserExtended },
         { "\\455", "\45""5", true, ECMAScriptFlags::BrowserExtended },
         { "\\455", "\45""5", true, ECMAScriptFlags::BrowserExtended },
         { "\\314", "\314", true, ECMAScriptFlags::BrowserExtended },
         { "\\314", "\314", true, ECMAScriptFlags::BrowserExtended },
+        { "\\c", "\\c", true, ECMAScriptFlags::BrowserExtended },
         { "\\cf", "\06", true, ECMAScriptFlags::BrowserExtended },
         { "\\cf", "\06", true, ECMAScriptFlags::BrowserExtended },
         { "\\c1", "\\c1", true, ECMAScriptFlags::BrowserExtended },
         { "\\c1", "\\c1", true, ECMAScriptFlags::BrowserExtended },
         { "[\\c1]", "\x11", true, ECMAScriptFlags::BrowserExtended },
         { "[\\c1]", "\x11", true, ECMAScriptFlags::BrowserExtended },

+ 21 - 10
Userland/Libraries/LibRegex/RegexParser.cpp

@@ -17,6 +17,7 @@
 namespace regex {
 namespace regex {
 
 
 static constexpr size_t s_maximum_repetition_count = 1024 * 1024;
 static constexpr size_t s_maximum_repetition_count = 1024 * 1024;
+static constexpr auto s_alphabetic_characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"sv;
 
 
 ALWAYS_INLINE bool Parser::set_error(Error error)
 ALWAYS_INLINE bool Parser::set_error(Error error)
 {
 {
@@ -1400,7 +1401,7 @@ bool ECMA262Parser::parse_atom_escape(ByteCode& stack, size_t& match_length_mini
 
 
     // CharacterEscape > ControlLetter
     // CharacterEscape > ControlLetter
     if (try_skip("c")) {
     if (try_skip("c")) {
-        for (auto c = 'A'; c <= 'z'; ++c) {
+        for (auto c : s_alphabetic_characters) {
             if (try_skip({ &c, 1 })) {
             if (try_skip({ &c, 1 })) {
                 match_length_minimum += 1;
                 match_length_minimum += 1;
                 stack.insert_bytecode_compare_values({ { CharacterCompareType::Char, (ByteCodeValueType)(c % 32) } });
                 stack.insert_bytecode_compare_values({ { CharacterCompareType::Char, (ByteCodeValueType)(c % 32) } });
@@ -1408,18 +1409,18 @@ bool ECMA262Parser::parse_atom_escape(ByteCode& stack, size_t& match_length_mini
             }
             }
         }
         }
 
 
+        if (unicode) {
+            set_error(Error::InvalidPattern);
+            return false;
+        }
+
         if (m_should_use_browser_extended_grammar) {
         if (m_should_use_browser_extended_grammar) {
-            back(2);
+            back(1 + !done());
             stack.insert_bytecode_compare_values({ { CharacterCompareType::Char, (ByteCodeValueType)'\\' } });
             stack.insert_bytecode_compare_values({ { CharacterCompareType::Char, (ByteCodeValueType)'\\' } });
             match_length_minimum += 1;
             match_length_minimum += 1;
             return true;
             return true;
         }
         }
 
 
-        if (unicode) {
-            set_error(Error::InvalidPattern);
-            return false;
-        }
-
         // Allow '\c' in non-unicode mode, just matches 'c'.
         // Allow '\c' in non-unicode mode, just matches 'c'.
         match_length_minimum += 1;
         match_length_minimum += 1;
         stack.insert_bytecode_compare_values({ { CharacterCompareType::Char, (ByteCodeValueType)'c' } });
         stack.insert_bytecode_compare_values({ { CharacterCompareType::Char, (ByteCodeValueType)'c' } });
@@ -1768,10 +1769,17 @@ bool ECMA262Parser::parse_nonempty_class_ranges(Vector<CompareTypeAndValuePair>&
 
 
             // CharacterEscape > ControlLetter
             // CharacterEscape > ControlLetter
             if (try_skip("c")) {
             if (try_skip("c")) {
-                for (auto c = 'A'; c <= 'z'; ++c) {
-                    if (try_skip({ &c, 1 }))
+                for (auto c : s_alphabetic_characters) {
+                    if (try_skip({ &c, 1 })) {
                         return { CharClassRangeElement { .code_point = (u32)(c % 32), .is_character_class = false } };
                         return { CharClassRangeElement { .code_point = (u32)(c % 32), .is_character_class = false } };
+                    }
+                }
+
+                if (unicode) {
+                    set_error(Error::InvalidPattern);
+                    return {};
                 }
                 }
+
                 if (m_should_use_browser_extended_grammar) {
                 if (m_should_use_browser_extended_grammar) {
                     for (auto c = '0'; c <= '9'; ++c) {
                     for (auto c = '0'; c <= '9'; ++c) {
                         if (try_skip({ &c, 1 }))
                         if (try_skip({ &c, 1 }))
@@ -1780,7 +1788,7 @@ bool ECMA262Parser::parse_nonempty_class_ranges(Vector<CompareTypeAndValuePair>&
                     if (try_skip("_"))
                     if (try_skip("_"))
                         return { CharClassRangeElement { .code_point = (u32)('_' % 32), .is_character_class = false } };
                         return { CharClassRangeElement { .code_point = (u32)('_' % 32), .is_character_class = false } };
 
 
-                    back(2);
+                    back(1 + !done());
                     return { CharClassRangeElement { .code_point = '\\', .is_character_class = false } };
                     return { CharClassRangeElement { .code_point = '\\', .is_character_class = false } };
                 }
                 }
             }
             }
@@ -1856,6 +1864,9 @@ bool ECMA262Parser::parse_nonempty_class_ranges(Vector<CompareTypeAndValuePair>&
                 // Any unrecognised escape is allowed in non-unicode mode.
                 // Any unrecognised escape is allowed in non-unicode mode.
                 return { CharClassRangeElement { .code_point = (u32)skip(), .is_character_class = false } };
                 return { CharClassRangeElement { .code_point = (u32)skip(), .is_character_class = false } };
             }
             }
+
+            set_error(Error::InvalidPattern);
+            return {};
         }
         }
 
 
         if (match(TokenType::RightBracket) || match(TokenType::HyphenMinus))
         if (match(TokenType::RightBracket) || match(TokenType::HyphenMinus))