4 years ago · 3dbf4c62b0
--- a/Libraries/LibJS/Tests/string-escapes.js
+++ b/Libraries/LibJS/Tests/string-escapes.js
@@ -4,6 +4,11 @@ test("hex escapes", () => {
 
				     expect(`\x55`).toBe("U");
			
 
				     expect(`\X55`).toBe("X55");
			
 
				     expect("\xff").toBe(String.fromCharCode(0xff));
			
 
				+    expect("'\\x'").not.toEval();
			
 
				+    expect("'\\x1'").not.toEval();
			
 
				+    expect("'\\xz'").not.toEval();
			
 
				+    expect("'\\xzz'").not.toEval();
			
 
				+    expect("'\\x🐞'").not.toEval();
			
 
				 });
			
 
				 
			
 
				 test("unicode escapes", () => {
			
@@ -12,6 +17,18 @@ test("unicode escapes", () => {
 
				     expect("\u{1f41e}").toBe("🐞");
			
 
				     expect(`\u{1f41e}`).toBe("🐞");
			
 
				     expect("\u00ff").toBe(String.fromCharCode(0xff));
			
 
				+    expect("'\\u'").not.toEval();
			
 
				+    expect("'\\u1'").not.toEval();
			
 
				+    expect("'\\uf'").not.toEval();
			
 
				+    expect("'\\u123'").not.toEval();
			
 
				+    expect("'\\u123z'").not.toEval();
			
 
				+    expect("'\\uz'").not.toEval();
			
 
				+    expect("'\\uzz'").not.toEval();
			
 
				+    expect("'\\uzzzz'").not.toEval();
			
 
				+    expect("'\\u{'").not.toEval();
			
 
				+    expect("'\\u{}'").not.toEval();
			
 
				+    expect("'\\u{z}'").not.toEval();
			
 
				+    expect("'\\u🐞'").not.toEval();
			
 
				 });
			
 
				 
			
 
				 describe("octal escapes", () => {
			
--- a/Libraries/LibJS/Token.cpp
+++ b/Libraries/LibJS/Token.cpp
@@ -27,8 +27,8 @@
 
				 
			
 
				 #include "Token.h"
			
 
				 #include <AK/Assertions.h>
			
 
				+#include <AK/GenericLexer.h>
			
 
				 #include <AK/StringBuilder.h>
			
 
				-#include <AK/Utf32View.h>
			
 
				 #include <ctype.h>
			
 
				 
			
 
				 namespace JS {
			
@@ -104,20 +104,9 @@ static u32 hex2int(char x)
 
				 String Token::string_value(StringValueStatus& status) const
			
 
				 {
			
 
				     ASSERT(type() == TokenType::StringLiteral || type() == TokenType::TemplateLiteralString);
			
 
				-    auto is_template = type() == TokenType::TemplateLiteralString;
			
 
				-    auto offset = is_template ? 0 : 1;
			
 
				-
			
 
				-    size_t i;
			
 
				-
			
 
				-    auto lookahead = [&]<typename T>(T fn, size_t distance = 1) -> bool {
			
 
				-        if (i + distance >= m_value.length() - offset)
			
 
				-            return false;
			
 
				-        return fn(m_value[i + distance]);
			
 
				-    };
			
 
				 
			
 
				-    auto is_octal_digit = [](char c) {
			
 
				-        return c >= '0' && c <= '7';
			
 
				-    };
			
 
				+    auto is_template = type() == TokenType::TemplateLiteralString;
			
 
				+    GenericLexer lexer(is_template ? m_value : m_value.substring_view(1, m_value.length() - 2));
			
 
				 
			
 
				     auto encoding_failure = [&status](StringValueStatus parse_status) -> String {
			
 
				         status = parse_status;
			
@@ -125,144 +114,101 @@ String Token::string_value(StringValueStatus& status) const
 
				     };
			
 
				 
			
 
				     StringBuilder builder;
			
 
				-    for (i = offset; i < m_value.length() - offset; ++i) {
			
 
				-        if (m_value[i] == '\\' && i + 1 < m_value.length() - offset) {
			
 
				-            i++;
			
 
				-            switch (m_value[i]) {
			
 
				-            case 'b':
			
 
				-                builder.append('\b');
			
 
				-                break;
			
 
				-            case 'f':
			
 
				-                builder.append('\f');
			
 
				-                break;
			
 
				-            case 'n':
			
 
				-                builder.append('\n');
			
 
				-                break;
			
 
				-            case 'r':
			
 
				-                builder.append('\r');
			
 
				-                break;
			
 
				-            case 't':
			
 
				-                builder.append('\t');
			
 
				-                break;
			
 
				-            case 'v':
			
 
				-                builder.append('\v');
			
 
				-                break;
			
 
				-            case '\'':
			
 
				-                builder.append('\'');
			
 
				-                break;
			
 
				-            case '"':
			
 
				-                builder.append('"');
			
 
				-                break;
			
 
				-            case '\\':
			
 
				-                builder.append('\\');
			
 
				-                break;
			
 
				-            case '\n':
			
 
				-                break;
			
 
				-            case '\r':
			
 
				-                break;
			
 
				-            case 'x': {
			
 
				-                if (i + 2 >= m_value.length() - offset)
			
 
				-                    return encoding_failure(StringValueStatus::MalformedHexEscape);
			
 
				-
			
 
				-                auto digit1 = m_value[++i];
			
 
				-                auto digit2 = m_value[++i];
			
 
				-                if (!isxdigit(digit1) || !isxdigit(digit2))
			
 
				-                    return encoding_failure(StringValueStatus::MalformedHexEscape);
			
 
				-                builder.append_code_point(hex2int(digit1) * 16 + hex2int(digit2));
			
 
				-                break;
			
 
				-            }
			
 
				-            case 'u': {
			
 
				-                if (i + 1 >= m_value.length() - offset)
			
 
				-                    return encoding_failure(StringValueStatus::MalformedUnicodeEscape);
			
 
				-                u32 code_point = m_value[++i];
			
 
				-
			
 
				-                if (code_point == '{') {
			
 
				-                    code_point = 0;
			
 
				-                    while (true) {
			
 
				-                        if (i + 1 >= m_value.length() - offset)
			
 
				-                            return encoding_failure(StringValueStatus::MalformedUnicodeEscape);
			
 
				+    while (!lexer.is_eof()) {
			
 
				+        // No escape, consume one char and continue
			
 
				+        if (!lexer.next_is('\\')) {
			
 
				+            builder.append(lexer.consume());
			
 
				+            continue;
			
 
				+        }
			
 
				 
			
 
				-                        auto ch = m_value[++i];
			
 
				-                        if (ch == '}')
			
 
				-                            break;
			
 
				-                        if (!isxdigit(ch))
			
 
				-                            return encoding_failure(StringValueStatus::MalformedUnicodeEscape);
			
 
				+        lexer.ignore();
			
 
				+        ASSERT(!lexer.is_eof());
			
 
				 
			
 
				-                        auto new_code_point = (code_point << 4u) | hex2int(ch);
			
 
				-                        if (new_code_point < code_point)
			
 
				-                            return encoding_failure(StringValueStatus::UnicodeEscapeOverflow);
			
 
				-                        code_point = new_code_point;
			
 
				-                    }
			
 
				-                } else {
			
 
				-                    if (i + 3 >= m_value.length() - offset || !isxdigit(code_point))
			
 
				+        // Line continuation
			
 
				+        if (lexer.next_is('\n') || lexer.next_is('\r')) {
			
 
				+            lexer.ignore();
			
 
				+            continue;
			
 
				+        }
			
 
				+        // Line continuation
			
 
				+        if (lexer.next_is(LINE_SEPARATOR) || lexer.next_is(PARAGRAPH_SEPARATOR)) {
			
 
				+            lexer.ignore(3);
			
 
				+            continue;
			
 
				+        }
			
 
				+        // Null-byte escape
			
 
				+        if (lexer.next_is('0') && !isdigit(lexer.peek(1))) {
			
 
				+            lexer.ignore();
			
 
				+            builder.append('\0');
			
 
				+            continue;
			
 
				+        }
			
 
				+        // Hex escape
			
 
				+        if (lexer.next_is('x')) {
			
 
				+            lexer.ignore();
			
 
				+            if (!isxdigit(lexer.peek()) || !isxdigit(lexer.peek(1)))
			
 
				+                return encoding_failure(StringValueStatus::MalformedHexEscape);
			
 
				+            auto code_point = hex2int(lexer.consume()) * 16 + hex2int(lexer.consume());
			
 
				+            ASSERT(code_point <= 255);
			
 
				+            builder.append_code_point(code_point);
			
 
				+            continue;
			
 
				+        }
			
 
				+        // Unicode escape
			
 
				+        if (lexer.next_is('u')) {
			
 
				+            lexer.ignore();
			
 
				+            u32 code_point = 0;
			
 
				+            if (lexer.next_is('{')) {
			
 
				+                lexer.ignore();
			
 
				+                while (true) {
			
 
				+                    if (!lexer.next_is(isxdigit))
			
 
				                         return encoding_failure(StringValueStatus::MalformedUnicodeEscape);
			
 
				-
			
 
				-                    code_point = hex2int(code_point);
			
 
				-                    for (int j = 0; j < 3; ++j) {
			
 
				-                        auto ch = m_value[++i];
			
 
				-                        if (!isxdigit(ch))
			
 
				-                            return encoding_failure(StringValueStatus::MalformedUnicodeEscape);
			
 
				-                        code_point = (code_point << 4u) | hex2int(ch);
			
 
				-                    }
			
 
				-                }
			
 
				-
			
 
				-                builder.append_code_point(code_point);
			
 
				-                break;
			
 
				-            }
			
 
				-            default:
			
 
				-                if (i + 2 < m_value.length() - offset) {
			
 
				-                    auto three_chars_view = m_value.substring_view(i, 3);
			
 
				-                    if (three_chars_view == LINE_SEPARATOR || three_chars_view == PARAGRAPH_SEPARATOR) {
			
 
				-                        // line continuation with LS or PS
			
 
				-                        i += 2;
			
 
				+                    auto new_code_point = (code_point << 4u) | hex2int(lexer.consume());
			
 
				+                    if (new_code_point < code_point)
			
 
				+                        return encoding_failure(StringValueStatus::UnicodeEscapeOverflow);
			
 
				+                    code_point = new_code_point;
			
 
				+                    if (lexer.next_is('}'))
			
 
				                         break;
			
 
				-                    }
			
 
				-                }
			
 
				-                if (is_template && (m_value[i] == '$' || m_value[i] == '`')) {
			
 
				-                    builder.append(m_value[i]);
			
 
				-                    break;
			
 
				                 }
			
 
				-                if (m_value[i] == '0' && !lookahead(isdigit)) {
			
 
				-                    builder.append((char)0);
			
 
				-                    break;
			
 
				+                lexer.ignore();
			
 
				+            } else {
			
 
				+                for (int j = 0; j < 4; ++j) {
			
 
				+                    if (!lexer.next_is(isxdigit))
			
 
				+                        return encoding_failure(StringValueStatus::MalformedUnicodeEscape);
			
 
				+                    code_point = (code_point << 4u) | hex2int(lexer.consume());
			
 
				                 }
			
 
				+            }
			
 
				+            builder.append_code_point(code_point);
			
 
				+            continue;
			
 
				+        }
			
 
				 
			
 
				-                // In non-strict mode LegacyOctalEscapeSequence is allowed in strings:
			
 
				-                // https://tc39.es/ecma262/#sec-additional-syntax-string-literals
			
 
				-                String octal_str;
			
 
				+        // In non-strict mode LegacyOctalEscapeSequence is allowed in strings:
			
 
				+        // https://tc39.es/ecma262/#sec-additional-syntax-string-literals
			
 
				+        String octal_str;
			
 
				 
			
 
				-                // OctalDigit [lookahead ∉ OctalDigit]
			
 
				-                if (is_octal_digit(m_value[i]) && !lookahead(is_octal_digit)) {
			
 
				-                    status = StringValueStatus::LegacyOctalEscapeSequence;
			
 
				-                    octal_str = String(&m_value[i], 1);
			
 
				-                }
			
 
				-                // ZeroToThree OctalDigit [lookahead ∉ OctalDigit]
			
 
				-                else if (m_value[i] >= '0' && m_value[i] <= '3' && lookahead(is_octal_digit) && !lookahead(is_octal_digit, 2)) {
			
 
				-                    status = StringValueStatus::LegacyOctalEscapeSequence;
			
 
				-                    octal_str = String(m_value.substring_view(i, 2));
			
 
				-                    i++;
			
 
				-                }
			
 
				-                // FourToSeven OctalDigit
			
 
				-                else if (m_value[i] >= '4' && m_value[i] <= '7' && lookahead(is_octal_digit)) {
			
 
				-                    status = StringValueStatus::LegacyOctalEscapeSequence;
			
 
				-                    octal_str = String(m_value.substring_view(i, 2));
			
 
				-                    i++;
			
 
				-                }
			
 
				-                // ZeroToThree OctalDigit OctalDigit
			
 
				-                else if (m_value[i] >= '0' && m_value[i] <= '3' && lookahead(is_octal_digit) && lookahead(is_octal_digit, 2)) {
			
 
				-                    status = StringValueStatus::LegacyOctalEscapeSequence;
			
 
				-                    octal_str = String(m_value.substring_view(i, 3));
			
 
				-                    i += 2;
			
 
				-                }
			
 
				+        auto is_octal_digit = [](char ch) { return ch >= '0' && ch <= '7'; };
			
 
				+        auto is_zero_to_three = [](char ch) { return ch >= '0' && ch <= '3'; };
			
 
				+        auto is_four_to_seven = [](char ch) { return ch >= '4' && ch <= '7'; };
			
 
				 
			
 
				-                if (status == StringValueStatus::LegacyOctalEscapeSequence)
			
 
				-                    builder.append_code_point(strtoul(octal_str.characters(), nullptr, 8));
			
 
				-                else
			
 
				-                    builder.append(m_value[i]);
			
 
				-            }
			
 
				-        } else {
			
 
				-            builder.append(m_value[i]);
			
 
				+        // OctalDigit [lookahead ∉ OctalDigit]
			
 
				+        if (is_octal_digit(lexer.peek()) && !is_octal_digit(lexer.peek(1)))
			
 
				+            octal_str = lexer.consume(1);
			
 
				+        // ZeroToThree OctalDigit [lookahead ∉ OctalDigit]
			
 
				+        else if (is_zero_to_three(lexer.peek()) && is_octal_digit(lexer.peek(1)) && !is_octal_digit(lexer.peek(2)))
			
 
				+            octal_str = lexer.consume(2);
			
 
				+        // FourToSeven OctalDigit
			
 
				+        else if (is_four_to_seven(lexer.peek()) && is_octal_digit(lexer.peek(1)))
			
 
				+            octal_str = lexer.consume(2);
			
 
				+        // ZeroToThree OctalDigit OctalDigit
			
 
				+        else if (is_zero_to_three(lexer.peek()) && is_octal_digit(lexer.peek(1)) && is_octal_digit(lexer.peek(2)))
			
 
				+            octal_str = lexer.consume(3);
			
 
				+
			
 
				+        if (!octal_str.is_null()) {
			
 
				+            status = StringValueStatus::LegacyOctalEscapeSequence;
			
 
				+            auto code_point = strtoul(octal_str.characters(), nullptr, 8);
			
 
				+            ASSERT(code_point <= 255);
			
 
				+            builder.append_code_point(code_point);
			
 
				+            continue;
			
 
				         }
			
 
				+
			
 
				+        lexer.retreat();
			
 
				+        builder.append(lexer.consume_escaped_character('\\', "b\bf\fn\nr\rt\tv\v"));
			
 
				     }
			
 
				     return builder.to_string();
			
 
				 }