Jelajahi Sumber

LibRegex: Avoid calling GenericLexer::consume() past EOF

The consume(size_t) overload consumes "at most" as many bytes as
requested, but consume() consumes exactly one byte.
This commit makes sure to avoid consuming past EOF.

Fixes #18324.
Fixes #18325.
Ali Mohammad Pur 2 tahun lalu
induk
melakukan
eba466b8e7
2 mengubah file dengan 8 tambahan dan 1 penghapusan
  1. 2 0
      Tests/LibRegex/Regex.cpp
  2. 6 1
      Userland/Libraries/LibRegex/RegexParser.cpp

+ 2 - 0
Tests/LibRegex/Regex.cpp

@@ -606,6 +606,8 @@ TEST_CASE(ECMA262_parse)
         { "((?=lg)?[vl]k\\-?\\d{3}) bui| 3\\.[-\\w; ]{10}lg?-([06cv9]{3,4})"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended }, // #12373, quantifiable assertions.
         { "((?=lg)?[vl]k\\-?\\d{3}) bui| 3\\.[-\\w; ]{10}lg?-([06cv9]{3,4})"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended }, // #12373, quantifiable assertions.
         { parse_test_case_long_disjunction_chain.view() },                                                                                 // A whole lot of disjunctions, should not overflow the stack.
         { parse_test_case_long_disjunction_chain.view() },                                                                                 // A whole lot of disjunctions, should not overflow the stack.
         { "(\"|')(?:(?!\\2)[^\\\\\\r\\n]|\\\\.)*\\2"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended },                         // LegacyOctalEscapeSequence should not consume too many chars (and should not crash)
         { "(\"|')(?:(?!\\2)[^\\\\\\r\\n]|\\\\.)*\\2"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended },                         // LegacyOctalEscapeSequence should not consume too many chars (and should not crash)
+        // #18324, Capture group counter skipped past EOF.
+        { "\\1[\\"sv, regex::Error::InvalidNumber },
     };
     };
 
 
     for (auto& test : tests) {
     for (auto& test : tests) {

+ 6 - 1
Userland/Libraries/LibRegex/RegexParser.cpp

@@ -2695,17 +2695,22 @@ size_t ECMA262Parser::ensure_total_number_of_capturing_parenthesis()
     while (!lexer.is_eof()) {
     while (!lexer.is_eof()) {
         switch (lexer.peek()) {
         switch (lexer.peek()) {
         case '\\':
         case '\\':
-            lexer.consume(2);
+            lexer.consume(min(lexer.tell_remaining(), 2));
             continue;
             continue;
         case '[':
         case '[':
             while (!lexer.is_eof()) {
             while (!lexer.is_eof()) {
                 if (lexer.consume_specific('\\')) {
                 if (lexer.consume_specific('\\')) {
+                    if (lexer.is_eof())
+                        break;
                     lexer.consume();
                     lexer.consume();
                     continue;
                     continue;
                 }
                 }
                 if (lexer.consume_specific(']')) {
                 if (lexer.consume_specific(']')) {
                     break;
                     break;
                 }
                 }
+
+                if (lexer.is_eof())
+                    break;
                 lexer.consume();
                 lexer.consume();
             }
             }
             break;
             break;