4 tahun lalu · 4fb96afafc
--- a/Libraries/LibJS/Parser.cpp
+++ b/Libraries/LibJS/Parser.cpp
@@ -836,23 +836,41 @@ NonnullRefPtr<ArrayExpression> Parser::parse_array_expression()
 
				     return create_ast_node<ArrayExpression>(move(elements));
			
 
				 }
			
 
				 
			
 
				-NonnullRefPtr<StringLiteral> Parser::parse_string_literal(Token token)
			
 
				+NonnullRefPtr<StringLiteral> Parser::parse_string_literal(Token token, bool in_template_literal)
			
 
				 {
			
 
				     auto status = Token::StringValueStatus::Ok;
			
 
				     auto string = token.string_value(status);
			
 
				     if (status != Token::StringValueStatus::Ok) {
			
 
				         String message;
			
 
				-        if (status == Token::StringValueStatus::MalformedHexEscape || status == Token::StringValueStatus::MalformedUnicodeEscape) {
			
 
				+        if (status == Token::StringValueStatus::LegacyOctalEscapeSequence) {
			
 
				+            m_parser_state.m_string_legacy_octal_escape_sequence_in_scope = true;
			
 
				+            if (in_template_literal)
			
 
				+                message = "Octal escape sequence not allowed in template literal";
			
 
				+            else if (m_parser_state.m_strict_mode)
			
 
				+                message = "Octal escape sequence in string literal not allowed in strict mode";
			
 
				+        } else if (status == Token::StringValueStatus::MalformedHexEscape || status == Token::StringValueStatus::MalformedUnicodeEscape) {
			
 
				             auto type = status == Token::StringValueStatus::MalformedUnicodeEscape ? "unicode" : "hexadecimal";
			
 
				             message = String::formatted("Malformed {} escape sequence", type);
			
 
				         } else if (status == Token::StringValueStatus::UnicodeEscapeOverflow) {
			
 
				             message = "Unicode code_point must not be greater than 0x10ffff in escape sequence";
			
 
				+        } else {
			
 
				+            ASSERT_NOT_REACHED();
			
 
				         }
			
 
				 
			
 
				         if (!message.is_empty())
			
 
				             syntax_error(message, token.line_number(), token.line_column());
			
 
				     }
			
 
				 
			
 
				+    // It is possible for string literals to precede a Use Strict Directive that places the
			
 
				+    // enclosing code in strict mode, and implementations must take care to not use this
			
 
				+    // extended definition of EscapeSequence with such literals. For example, attempting to
			
 
				+    // parse the following source text must fail:
			
 
				+    //
			
 
				+    // function invalid() { "\7"; "use strict"; }
			
 
				+
			
 
				+    if (m_parser_state.m_string_legacy_octal_escape_sequence_in_scope && string == "use strict")
			
 
				+        syntax_error("Octal escape sequence in string literal not allowed in strict mode");
			
 
				+
			
 
				     if (m_parser_state.m_use_strict_directive == UseStrictDirectiveState::Looking) {
			
 
				         if (string == "use strict" && token.type() != TokenType::TemplateLiteralString) {
			
 
				             m_parser_state.m_use_strict_directive = UseStrictDirectiveState::Found;
			
@@ -884,7 +902,7 @@ NonnullRefPtr<TemplateLiteral> Parser::parse_template_literal(bool is_tagged)
 
				     while (!done() && !match(TokenType::TemplateLiteralEnd) && !match(TokenType::UnterminatedTemplateLiteral)) {
			
 
				         if (match(TokenType::TemplateLiteralString)) {
			
 
				             auto token = consume();
			
 
				-            expressions.append(parse_string_literal(token));
			
 
				+            expressions.append(parse_string_literal(token, true));
			
 
				             if (is_tagged)
			
 
				                 raw_strings.append(create_ast_node<StringLiteral>(token.value()));
			
 
				         } else if (match(TokenType::TemplateLiteralExprStart)) {
			
@@ -1249,6 +1267,7 @@ NonnullRefPtr<BlockStatement> Parser::parse_block_statement(bool& is_strict)
 
				         first = false;
			
 
				     }
			
 
				     m_parser_state.m_strict_mode = initial_strict_mode_state;
			
 
				+    m_parser_state.m_string_legacy_octal_escape_sequence_in_scope = false;
			
 
				     consume(TokenType::CurlyClose);
			
 
				     block->add_variables(m_parser_state.m_let_scopes.last());
			
 
				     block->add_functions(m_parser_state.m_function_scopes.last());
			
--- a/Libraries/LibJS/Parser.h
+++ b/Libraries/LibJS/Parser.h
@@ -87,7 +87,7 @@ public:
 
				     NonnullRefPtr<RegExpLiteral> parse_regexp_literal();
			
 
				     NonnullRefPtr<ObjectExpression> parse_object_expression();
			
 
				     NonnullRefPtr<ArrayExpression> parse_array_expression();
			
 
				-    NonnullRefPtr<StringLiteral> parse_string_literal(Token token);
			
 
				+    NonnullRefPtr<StringLiteral> parse_string_literal(Token token, bool in_template_literal = false);
			
 
				     NonnullRefPtr<TemplateLiteral> parse_template_literal(bool is_tagged);
			
 
				     NonnullRefPtr<Expression> parse_secondary_expression(NonnullRefPtr<Expression>, int min_precedence, Associativity associate = Associativity::Right);
			
 
				     NonnullRefPtr<CallExpression> parse_call_expression(NonnullRefPtr<Expression>);
			
@@ -184,6 +184,7 @@ private:
 
				         bool m_in_function_context { false };
			
 
				         bool m_in_break_context { false };
			
 
				         bool m_in_continue_context { false };
			
 
				+        bool m_string_legacy_octal_escape_sequence_in_scope { false };
			
 
				 
			
 
				         explicit ParserState(Lexer);
			
 
				     };
			
--- a/Libraries/LibJS/Tests/string-escapes.js
+++ b/Libraries/LibJS/Tests/string-escapes.js
@@ -13,3 +13,32 @@ test("unicode escapes", () => {
 
				     expect(`\u{1f41e}`).toBe("🐞");
			
 
				     expect("\u00ff").toBe(String.fromCharCode(0xff));
			
 
				 });
			
 
				+
			
 
				+describe("octal escapes", () => {
			
 
				+    test("basic functionality", () => {
			
 
				+        expect("\1").toBe("\u0001");
			
 
				+        expect("\2").toBe("\u0002");
			
 
				+        expect("\3").toBe("\u0003");
			
 
				+        expect("\4").toBe("\u0004");
			
 
				+        expect("\5").toBe("\u0005");
			
 
				+        expect("\6").toBe("\u0006");
			
 
				+        expect("\7").toBe("\u0007");
			
 
				+        expect("\8").toBe("8");
			
 
				+        expect("\9").toBe("9");
			
 
				+        expect("\128").toBe("\n8");
			
 
				+        expect("\141bc").toBe("abc");
			
 
				+        expect("f\157o\142a\162").toBe("foobar");
			
 
				+        expect("\123\145\162\145\156\151\164\171\117\123").toBe("SerenityOS");
			
 
				+    });
			
 
				+
			
 
				+    test("syntax error in template literal", () => {
			
 
				+        expect("`\\123`").not.toEval();
			
 
				+    });
			
 
				+
			
 
				+    test("syntax error in strict mode", () => {
			
 
				+        expect("'use strict'; '\\123'").not.toEval();
			
 
				+        expect('"use strict"; "\\123"').not.toEval();
			
 
				+        // Special case, string literal precedes use strict directive
			
 
				+        expect("'\\123'; somethingElse; 'use strict'").not.toEval();
			
 
				+    });
			
 
				+});
			
--- a/Libraries/LibJS/Token.cpp
+++ b/Libraries/LibJS/Token.cpp
@@ -1,5 +1,6 @@
 
				 /*
			
 
				  * Copyright (c) 2020, Stephan Unverwerth <s.unverwerth@gmx.de>
			
 
				+ * Copyright (c) 2020, Linus Groh <mail@linusgroh.de>
			
 
				  * All rights reserved.
			
 
				  *
			
 
				  * Redistribution and use in source and binary forms, with or without
			
@@ -103,8 +104,19 @@ String Token::string_value(StringValueStatus& status) const
 
				 {
			
 
				     ASSERT(type() == TokenType::StringLiteral || type() == TokenType::TemplateLiteralString);
			
 
				     auto is_template = type() == TokenType::TemplateLiteralString;
			
 
				+    auto offset = is_template ? 0 : 1;
			
 
				 
			
 
				-    auto offset = type() == TokenType::TemplateLiteralString ? 0 : 1;
			
 
				+    size_t i;
			
 
				+
			
 
				+    auto lookahead = [&]<typename T>(T fn, size_t distance = 1) -> bool {
			
 
				+        if (i + distance >= m_value.length() - offset)
			
 
				+            return false;
			
 
				+        return fn(m_value[i + distance]);
			
 
				+    };
			
 
				+
			
 
				+    auto is_octal_digit = [](char c) {
			
 
				+        return c >= '0' && c <= '7';
			
 
				+    };
			
 
				 
			
 
				     auto encoding_failure = [&status](StringValueStatus parse_status) -> String {
			
 
				         status = parse_status;
			
@@ -112,7 +124,7 @@ String Token::string_value(StringValueStatus& status) const
 
				     };
			
 
				 
			
 
				     StringBuilder builder;
			
 
				-    for (size_t i = offset; i < m_value.length() - offset; ++i) {
			
 
				+    for (i = offset; i < m_value.length() - offset; ++i) {
			
 
				         if (m_value[i] == '\\' && i + 1 < m_value.length() - offset) {
			
 
				             i++;
			
 
				             switch (m_value[i]) {
			
@@ -134,9 +146,6 @@ String Token::string_value(StringValueStatus& status) const
 
				             case 'v':
			
 
				                 builder.append('\v');
			
 
				                 break;
			
 
				-            case '0':
			
 
				-                builder.append((char)0);
			
 
				-                break;
			
 
				             case '\'':
			
 
				                 builder.append('\'');
			
 
				                 break;
			
@@ -200,9 +209,43 @@ String Token::string_value(StringValueStatus& status) const
 
				                     builder.append(m_value[i]);
			
 
				                     break;
			
 
				                 }
			
 
				+                if (m_value[i] == '0' && !lookahead(isdigit)) {
			
 
				+                    builder.append((char)0);
			
 
				+                    break;
			
 
				+                }
			
 
				 
			
 
				-                // FIXME: Also parse octal. Should anything else generate a syntax error?
			
 
				-                builder.append(m_value[i]);
			
 
				+                // In non-strict mode LegacyOctalEscapeSequence is allowed in strings:
			
 
				+                // https://tc39.es/ecma262/#sec-additional-syntax-string-literals
			
 
				+                String octal_str;
			
 
				+
			
 
				+                // OctalDigit [lookahead ∉ OctalDigit]
			
 
				+                if (is_octal_digit(m_value[i]) && !lookahead(is_octal_digit)) {
			
 
				+                    status = StringValueStatus::LegacyOctalEscapeSequence;
			
 
				+                    octal_str = String(&m_value[i], 1);
			
 
				+                }
			
 
				+                // ZeroToThree OctalDigit [lookahead ∉ OctalDigit]
			
 
				+                else if (m_value[i] >= '0' && m_value[i] <= '3' && lookahead(is_octal_digit) && !lookahead(is_octal_digit, 2)) {
			
 
				+                    status = StringValueStatus::LegacyOctalEscapeSequence;
			
 
				+                    octal_str = String(m_value.substring_view(i, 2));
			
 
				+                    i++;
			
 
				+                }
			
 
				+                // FourToSeven OctalDigit
			
 
				+                else if (m_value[i] >= '4' && m_value[i] <= '7' && lookahead(is_octal_digit)) {
			
 
				+                    status = StringValueStatus::LegacyOctalEscapeSequence;
			
 
				+                    octal_str = String(m_value.substring_view(i, 2));
			
 
				+                    i++;
			
 
				+                }
			
 
				+                // ZeroToThree OctalDigit OctalDigit
			
 
				+                else if (m_value[i] >= '0' && m_value[i] <= '3' && lookahead(is_octal_digit) && lookahead(is_octal_digit, 2)) {
			
 
				+                    status = StringValueStatus::LegacyOctalEscapeSequence;
			
 
				+                    octal_str = String(m_value.substring_view(i, 3));
			
 
				+                    i += 2;
			
 
				+                }
			
 
				+
			
 
				+                if (status == StringValueStatus::LegacyOctalEscapeSequence)
			
 
				+                    builder.append_code_point(strtoul(octal_str.characters(), nullptr, 8));
			
 
				+                else
			
 
				+                    builder.append(m_value[i]);
			
 
				             }
			
 
				         } else {
			
 
				             builder.append(m_value[i]);
			
--- a/Libraries/LibJS/Token.h
+++ b/Libraries/LibJS/Token.h
@@ -208,6 +208,7 @@ public:
 
				         MalformedHexEscape,
			
 
				         MalformedUnicodeEscape,
			
 
				         UnicodeEscapeOverflow,
			
 
				+        LegacyOctalEscapeSequence,
			
 
				     };
			
 
				     String string_value(StringValueStatus& status) const;