Browse Source

LibRegex: Use match_ordinary_characters() in ECMA262Parser::parse_atom()

Otherwise we would only match TokenType::Char, making all of these invalid:

- /foo,bar/
- /foo\/bar/
- /foo=bar/
- /foo-bar/
- /foo:bar/

Fixes #4243.
Linus Groh 4 years ago
parent
commit
eea7cabdbc

+ 14 - 14
Libraries/LibRegex/RegexParser.cpp

@@ -154,6 +154,19 @@ Parser::Result Parser::parse(Optional<AllOptions> regex_options)
     };
 }
 
+ALWAYS_INLINE bool Parser::match_ordinary_characters()
+{
+    // NOTE: This method must not be called during bracket and repetition parsing!
+    // FIXME: Add assertion for that?
+    auto type = m_parser_state.current_token.type();
+    return (type == TokenType::Char
+        || type == TokenType::Comma
+        || type == TokenType::Slash
+        || type == TokenType::EqualSign
+        || type == TokenType::HyphenMinus
+        || type == TokenType::Colon);
+}
+
 // =============================
 // PosixExtended Parser
 // =============================
@@ -172,19 +185,6 @@ ALWAYS_INLINE bool PosixExtendedParser::match_repetition_symbol()
         || type == TokenType::LeftCurly);
 }
 
-ALWAYS_INLINE bool PosixExtendedParser::match_ordinary_characters()
-{
-    // NOTE: This method must not be called during bracket and repetition parsing!
-    // FIXME: Add assertion for that?
-    auto type = m_parser_state.current_token.type();
-    return (type == TokenType::Char
-        || type == TokenType::Comma
-        || type == TokenType::Slash
-        || type == TokenType::EqualSign
-        || type == TokenType::HyphenMinus
-        || type == TokenType::Colon);
-}
-
 ALWAYS_INLINE bool PosixExtendedParser::parse_repetition_symbol(ByteCode& bytecode_to_repeat, size_t& match_length_minimum)
 {
     if (match(TokenType::LeftCurly)) {
@@ -964,7 +964,7 @@ bool ECMA262Parser::parse_atom(ByteCode& stack, size_t& match_length_minimum, bo
         return false;
     }
 
-    if (match(TokenType::Char)) {
+    if (match_ordinary_characters()) {
         auto token = consume().value();
         match_length_minimum += 1;
         stack.insert_bytecode_compare_values({ { CharacterCompareType::Char, (ByteCodeValueType)token[0] } });

+ 1 - 1
Libraries/LibRegex/RegexParser.h

@@ -90,6 +90,7 @@ protected:
 
     ALWAYS_INLINE bool match(TokenType type) const;
     ALWAYS_INLINE bool match(char ch) const;
+    ALWAYS_INLINE bool match_ordinary_characters();
     ALWAYS_INLINE Token consume();
     ALWAYS_INLINE Token consume(TokenType type, Error error);
     ALWAYS_INLINE bool consume(const String&);
@@ -145,7 +146,6 @@ public:
 
 private:
     ALWAYS_INLINE bool match_repetition_symbol();
-    ALWAYS_INLINE bool match_ordinary_characters();
 
     bool parse_internal(ByteCode&, size_t&) override;
 

+ 1 - 0
Libraries/LibRegex/Tests/Regex.cpp

@@ -488,6 +488,7 @@ TEST_CASE(ECMA262_parse)
         "^[\\w+/_-]+[=]{0,2}$",                        // #4189
         "^(?:[^<]*(<[\\w\\W]+>)[^>]*$|#([\\w\\-]*)$)", // #4189
         "\\/",                                         // #4189
+        ",/=-:",                                       // #4243
         "\\x",                                         // Even invalid escapes are allowed if ~unicode.
     };