2020-03-11 18:27:43 +00:00
|
|
|
/*
|
2021-05-29 10:38:28 +00:00
|
|
|
* Copyright (c) 2020, Stephan Unverwerth <s.unverwerth@serenityos.org>
|
2020-03-11 18:27:43 +00:00
|
|
|
*
|
2021-04-22 08:24:48 +00:00
|
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
2020-03-11 18:27:43 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
2021-08-18 20:34:25 +00:00
|
|
|
#include <AK/FlyString.h>
|
2020-03-11 18:27:43 +00:00
|
|
|
#include <AK/String.h>
|
|
|
|
#include <AK/StringView.h>
|
2021-08-18 20:34:25 +00:00
|
|
|
#include <AK/Variant.h>
|
2020-03-11 18:27:43 +00:00
|
|
|
|
|
|
|
namespace JS {
|
|
|
|
|
2020-10-21 21:16:45 +00:00
|
|
|
// U+2028 LINE SEPARATOR
|
|
|
|
constexpr const char line_separator_chars[] { (char)0xe2, (char)0x80, (char)0xa8, 0 };
|
2021-08-14 15:07:47 +00:00
|
|
|
constexpr const StringView LINE_SEPARATOR_STRING { line_separator_chars };
|
|
|
|
constexpr const u32 LINE_SEPARATOR { 0x2028 };
|
2020-10-21 21:16:45 +00:00
|
|
|
|
|
|
|
// U+2029 PARAGRAPH SEPARATOR
|
|
|
|
constexpr const char paragraph_separator_chars[] { (char)0xe2, (char)0x80, (char)0xa9, 0 };
|
2021-08-14 15:07:47 +00:00
|
|
|
constexpr const StringView PARAGRAPH_SEPARATOR_STRING { paragraph_separator_chars };
|
|
|
|
constexpr const u32 PARAGRAPH_SEPARATOR { 0x2029 };
|
|
|
|
|
|
|
|
// U+00A0 NO BREAK SPACE
|
|
|
|
constexpr const u32 NO_BREAK_SPACE { 0x00A0 };
|
|
|
|
|
|
|
|
// U+200C ZERO WIDTH NON-JOINER
|
|
|
|
constexpr const u32 ZERO_WIDTH_NON_JOINER { 0x200C };
|
|
|
|
|
2021-08-21 09:31:36 +00:00
|
|
|
// U+FEFF ZERO WIDTH NO-BREAK SPACE
|
|
|
|
constexpr const u32 ZERO_WIDTH_NO_BREAK_SPACE { 0xFEFF };
|
|
|
|
|
2021-08-14 15:07:47 +00:00
|
|
|
// U+200D ZERO WIDTH JOINER
|
|
|
|
constexpr const u32 ZERO_WIDTH_JOINER { 0x200D };
|
2020-10-21 21:16:45 +00:00
|
|
|
|
LibJS: Unify syntax highlighting
So far we have three different syntax highlighters for LibJS:
- js's Line::Editor stylization
- JS::MarkupGenerator
- GUI::JSSyntaxHighlighter
This not only caused repetition of most token types in each highlighter
but also a lot of inconsistency regarding the styling of certain tokens:
- JSSyntaxHighlighter was considering TokenType::Period to be an
operator whereas MarkupGenerator categorized it as punctuation.
- MarkupGenerator was considering TokenType::{Break,Case,Continue,
Default,Switch,With} control keywords whereas JSSyntaxHighlighter just
disregarded them
- MarkupGenerator considered some future reserved keywords invalid and
others not. JSSyntaxHighlighter and js disregarded most
Adding a new token type meant adding it to ENUMERATE_JS_TOKENS as well
as each individual highlighter's switch/case construct.
I added a TokenCategory enum, and each TokenType is now associated to a
certain category, which the syntax highlighters then can use for styling
rather than operating on the token type directly. This also makes
changing a token's category everywhere easier, should we need to do that
(e.g. I decided to make TokenType::{Period,QuestionMarkPeriod}
TokenCategory::Operator for now, but we might want to change them to
Punctuation.
2020-10-04 21:28:59 +00:00
|
|
|
#define ENUMERATE_JS_TOKENS \
|
|
|
|
__ENUMERATE_JS_TOKEN(Ampersand, Operator) \
|
|
|
|
__ENUMERATE_JS_TOKEN(AmpersandEquals, Operator) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Arrow, Operator) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Asterisk, Operator) \
|
|
|
|
__ENUMERATE_JS_TOKEN(AsteriskEquals, Operator) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Async, Keyword) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Await, Keyword) \
|
|
|
|
__ENUMERATE_JS_TOKEN(BigIntLiteral, Number) \
|
|
|
|
__ENUMERATE_JS_TOKEN(BoolLiteral, Keyword) \
|
|
|
|
__ENUMERATE_JS_TOKEN(BracketClose, Punctuation) \
|
|
|
|
__ENUMERATE_JS_TOKEN(BracketOpen, Punctuation) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Break, Keyword) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Caret, Operator) \
|
|
|
|
__ENUMERATE_JS_TOKEN(CaretEquals, Operator) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Case, ControlKeyword) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Catch, ControlKeyword) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Class, Keyword) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Colon, Punctuation) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Comma, Punctuation) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Const, Keyword) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Continue, ControlKeyword) \
|
|
|
|
__ENUMERATE_JS_TOKEN(CurlyClose, Punctuation) \
|
|
|
|
__ENUMERATE_JS_TOKEN(CurlyOpen, Punctuation) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Debugger, Keyword) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Default, ControlKeyword) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Delete, Keyword) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Do, ControlKeyword) \
|
|
|
|
__ENUMERATE_JS_TOKEN(DoubleAmpersand, Operator) \
|
2020-10-05 15:49:43 +00:00
|
|
|
__ENUMERATE_JS_TOKEN(DoubleAmpersandEquals, Operator) \
|
LibJS: Unify syntax highlighting
So far we have three different syntax highlighters for LibJS:
- js's Line::Editor stylization
- JS::MarkupGenerator
- GUI::JSSyntaxHighlighter
This not only caused repetition of most token types in each highlighter
but also a lot of inconsistency regarding the styling of certain tokens:
- JSSyntaxHighlighter was considering TokenType::Period to be an
operator whereas MarkupGenerator categorized it as punctuation.
- MarkupGenerator was considering TokenType::{Break,Case,Continue,
Default,Switch,With} control keywords whereas JSSyntaxHighlighter just
disregarded them
- MarkupGenerator considered some future reserved keywords invalid and
others not. JSSyntaxHighlighter and js disregarded most
Adding a new token type meant adding it to ENUMERATE_JS_TOKENS as well
as each individual highlighter's switch/case construct.
I added a TokenCategory enum, and each TokenType is now associated to a
certain category, which the syntax highlighters then can use for styling
rather than operating on the token type directly. This also makes
changing a token's category everywhere easier, should we need to do that
(e.g. I decided to make TokenType::{Period,QuestionMarkPeriod}
TokenCategory::Operator for now, but we might want to change them to
Punctuation.
2020-10-04 21:28:59 +00:00
|
|
|
__ENUMERATE_JS_TOKEN(DoubleAsterisk, Operator) \
|
2020-10-05 15:49:43 +00:00
|
|
|
__ENUMERATE_JS_TOKEN(DoubleAsteriskEquals, Operator) \
|
LibJS: Unify syntax highlighting
So far we have three different syntax highlighters for LibJS:
- js's Line::Editor stylization
- JS::MarkupGenerator
- GUI::JSSyntaxHighlighter
This not only caused repetition of most token types in each highlighter
but also a lot of inconsistency regarding the styling of certain tokens:
- JSSyntaxHighlighter was considering TokenType::Period to be an
operator whereas MarkupGenerator categorized it as punctuation.
- MarkupGenerator was considering TokenType::{Break,Case,Continue,
Default,Switch,With} control keywords whereas JSSyntaxHighlighter just
disregarded them
- MarkupGenerator considered some future reserved keywords invalid and
others not. JSSyntaxHighlighter and js disregarded most
Adding a new token type meant adding it to ENUMERATE_JS_TOKENS as well
as each individual highlighter's switch/case construct.
I added a TokenCategory enum, and each TokenType is now associated to a
certain category, which the syntax highlighters then can use for styling
rather than operating on the token type directly. This also makes
changing a token's category everywhere easier, should we need to do that
(e.g. I decided to make TokenType::{Period,QuestionMarkPeriod}
TokenCategory::Operator for now, but we might want to change them to
Punctuation.
2020-10-04 21:28:59 +00:00
|
|
|
__ENUMERATE_JS_TOKEN(DoublePipe, Operator) \
|
2020-10-05 15:49:43 +00:00
|
|
|
__ENUMERATE_JS_TOKEN(DoublePipeEquals, Operator) \
|
LibJS: Unify syntax highlighting
So far we have three different syntax highlighters for LibJS:
- js's Line::Editor stylization
- JS::MarkupGenerator
- GUI::JSSyntaxHighlighter
This not only caused repetition of most token types in each highlighter
but also a lot of inconsistency regarding the styling of certain tokens:
- JSSyntaxHighlighter was considering TokenType::Period to be an
operator whereas MarkupGenerator categorized it as punctuation.
- MarkupGenerator was considering TokenType::{Break,Case,Continue,
Default,Switch,With} control keywords whereas JSSyntaxHighlighter just
disregarded them
- MarkupGenerator considered some future reserved keywords invalid and
others not. JSSyntaxHighlighter and js disregarded most
Adding a new token type meant adding it to ENUMERATE_JS_TOKENS as well
as each individual highlighter's switch/case construct.
I added a TokenCategory enum, and each TokenType is now associated to a
certain category, which the syntax highlighters then can use for styling
rather than operating on the token type directly. This also makes
changing a token's category everywhere easier, should we need to do that
(e.g. I decided to make TokenType::{Period,QuestionMarkPeriod}
TokenCategory::Operator for now, but we might want to change them to
Punctuation.
2020-10-04 21:28:59 +00:00
|
|
|
__ENUMERATE_JS_TOKEN(DoubleQuestionMark, Operator) \
|
2020-10-05 15:49:43 +00:00
|
|
|
__ENUMERATE_JS_TOKEN(DoubleQuestionMarkEquals, Operator) \
|
LibJS: Unify syntax highlighting
So far we have three different syntax highlighters for LibJS:
- js's Line::Editor stylization
- JS::MarkupGenerator
- GUI::JSSyntaxHighlighter
This not only caused repetition of most token types in each highlighter
but also a lot of inconsistency regarding the styling of certain tokens:
- JSSyntaxHighlighter was considering TokenType::Period to be an
operator whereas MarkupGenerator categorized it as punctuation.
- MarkupGenerator was considering TokenType::{Break,Case,Continue,
Default,Switch,With} control keywords whereas JSSyntaxHighlighter just
disregarded them
- MarkupGenerator considered some future reserved keywords invalid and
others not. JSSyntaxHighlighter and js disregarded most
Adding a new token type meant adding it to ENUMERATE_JS_TOKENS as well
as each individual highlighter's switch/case construct.
I added a TokenCategory enum, and each TokenType is now associated to a
certain category, which the syntax highlighters then can use for styling
rather than operating on the token type directly. This also makes
changing a token's category everywhere easier, should we need to do that
(e.g. I decided to make TokenType::{Period,QuestionMarkPeriod}
TokenCategory::Operator for now, but we might want to change them to
Punctuation.
2020-10-04 21:28:59 +00:00
|
|
|
__ENUMERATE_JS_TOKEN(Else, ControlKeyword) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Enum, Keyword) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Eof, Invalid) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Equals, Operator) \
|
|
|
|
__ENUMERATE_JS_TOKEN(EqualsEquals, Operator) \
|
|
|
|
__ENUMERATE_JS_TOKEN(EqualsEqualsEquals, Operator) \
|
2021-08-21 09:27:20 +00:00
|
|
|
__ENUMERATE_JS_TOKEN(EscapedKeyword, Identifier) \
|
LibJS: Unify syntax highlighting
So far we have three different syntax highlighters for LibJS:
- js's Line::Editor stylization
- JS::MarkupGenerator
- GUI::JSSyntaxHighlighter
This not only caused repetition of most token types in each highlighter
but also a lot of inconsistency regarding the styling of certain tokens:
- JSSyntaxHighlighter was considering TokenType::Period to be an
operator whereas MarkupGenerator categorized it as punctuation.
- MarkupGenerator was considering TokenType::{Break,Case,Continue,
Default,Switch,With} control keywords whereas JSSyntaxHighlighter just
disregarded them
- MarkupGenerator considered some future reserved keywords invalid and
others not. JSSyntaxHighlighter and js disregarded most
Adding a new token type meant adding it to ENUMERATE_JS_TOKENS as well
as each individual highlighter's switch/case construct.
I added a TokenCategory enum, and each TokenType is now associated to a
certain category, which the syntax highlighters then can use for styling
rather than operating on the token type directly. This also makes
changing a token's category everywhere easier, should we need to do that
(e.g. I decided to make TokenType::{Period,QuestionMarkPeriod}
TokenCategory::Operator for now, but we might want to change them to
Punctuation.
2020-10-04 21:28:59 +00:00
|
|
|
__ENUMERATE_JS_TOKEN(ExclamationMark, Operator) \
|
|
|
|
__ENUMERATE_JS_TOKEN(ExclamationMarkEquals, Operator) \
|
|
|
|
__ENUMERATE_JS_TOKEN(ExclamationMarkEqualsEquals, Operator) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Export, Keyword) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Extends, Keyword) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Finally, ControlKeyword) \
|
|
|
|
__ENUMERATE_JS_TOKEN(For, ControlKeyword) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Function, Keyword) \
|
|
|
|
__ENUMERATE_JS_TOKEN(GreaterThan, Operator) \
|
|
|
|
__ENUMERATE_JS_TOKEN(GreaterThanEquals, Operator) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Identifier, Identifier) \
|
|
|
|
__ENUMERATE_JS_TOKEN(If, ControlKeyword) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Implements, Keyword) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Import, Keyword) \
|
|
|
|
__ENUMERATE_JS_TOKEN(In, Keyword) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Instanceof, Keyword) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Interface, Keyword) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Invalid, Invalid) \
|
|
|
|
__ENUMERATE_JS_TOKEN(LessThan, Operator) \
|
|
|
|
__ENUMERATE_JS_TOKEN(LessThanEquals, Operator) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Let, Keyword) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Minus, Operator) \
|
|
|
|
__ENUMERATE_JS_TOKEN(MinusEquals, Operator) \
|
|
|
|
__ENUMERATE_JS_TOKEN(MinusMinus, Operator) \
|
|
|
|
__ENUMERATE_JS_TOKEN(New, Keyword) \
|
|
|
|
__ENUMERATE_JS_TOKEN(NullLiteral, Keyword) \
|
|
|
|
__ENUMERATE_JS_TOKEN(NumericLiteral, Number) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Package, Keyword) \
|
|
|
|
__ENUMERATE_JS_TOKEN(ParenClose, Punctuation) \
|
|
|
|
__ENUMERATE_JS_TOKEN(ParenOpen, Punctuation) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Percent, Operator) \
|
|
|
|
__ENUMERATE_JS_TOKEN(PercentEquals, Operator) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Period, Operator) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Pipe, Operator) \
|
|
|
|
__ENUMERATE_JS_TOKEN(PipeEquals, Operator) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Plus, Operator) \
|
|
|
|
__ENUMERATE_JS_TOKEN(PlusEquals, Operator) \
|
|
|
|
__ENUMERATE_JS_TOKEN(PlusPlus, Operator) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Private, Keyword) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Protected, Keyword) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Public, Keyword) \
|
|
|
|
__ENUMERATE_JS_TOKEN(QuestionMark, Operator) \
|
|
|
|
__ENUMERATE_JS_TOKEN(QuestionMarkPeriod, Operator) \
|
|
|
|
__ENUMERATE_JS_TOKEN(RegexFlags, String) \
|
2020-10-05 15:49:43 +00:00
|
|
|
__ENUMERATE_JS_TOKEN(RegexLiteral, String) \
|
LibJS: Unify syntax highlighting
So far we have three different syntax highlighters for LibJS:
- js's Line::Editor stylization
- JS::MarkupGenerator
- GUI::JSSyntaxHighlighter
This not only caused repetition of most token types in each highlighter
but also a lot of inconsistency regarding the styling of certain tokens:
- JSSyntaxHighlighter was considering TokenType::Period to be an
operator whereas MarkupGenerator categorized it as punctuation.
- MarkupGenerator was considering TokenType::{Break,Case,Continue,
Default,Switch,With} control keywords whereas JSSyntaxHighlighter just
disregarded them
- MarkupGenerator considered some future reserved keywords invalid and
others not. JSSyntaxHighlighter and js disregarded most
Adding a new token type meant adding it to ENUMERATE_JS_TOKENS as well
as each individual highlighter's switch/case construct.
I added a TokenCategory enum, and each TokenType is now associated to a
certain category, which the syntax highlighters then can use for styling
rather than operating on the token type directly. This also makes
changing a token's category everywhere easier, should we need to do that
(e.g. I decided to make TokenType::{Period,QuestionMarkPeriod}
TokenCategory::Operator for now, but we might want to change them to
Punctuation.
2020-10-04 21:28:59 +00:00
|
|
|
__ENUMERATE_JS_TOKEN(Return, ControlKeyword) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Semicolon, Punctuation) \
|
|
|
|
__ENUMERATE_JS_TOKEN(ShiftLeft, Operator) \
|
|
|
|
__ENUMERATE_JS_TOKEN(ShiftLeftEquals, Operator) \
|
|
|
|
__ENUMERATE_JS_TOKEN(ShiftRight, Operator) \
|
|
|
|
__ENUMERATE_JS_TOKEN(ShiftRightEquals, Operator) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Slash, Operator) \
|
|
|
|
__ENUMERATE_JS_TOKEN(SlashEquals, Operator) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Static, Keyword) \
|
|
|
|
__ENUMERATE_JS_TOKEN(StringLiteral, String) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Super, Keyword) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Switch, ControlKeyword) \
|
|
|
|
__ENUMERATE_JS_TOKEN(TemplateLiteralEnd, String) \
|
|
|
|
__ENUMERATE_JS_TOKEN(TemplateLiteralExprEnd, Punctuation) \
|
|
|
|
__ENUMERATE_JS_TOKEN(TemplateLiteralExprStart, Punctuation) \
|
|
|
|
__ENUMERATE_JS_TOKEN(TemplateLiteralStart, String) \
|
|
|
|
__ENUMERATE_JS_TOKEN(TemplateLiteralString, String) \
|
|
|
|
__ENUMERATE_JS_TOKEN(This, Keyword) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Throw, ControlKeyword) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Tilde, Operator) \
|
|
|
|
__ENUMERATE_JS_TOKEN(TripleDot, Operator) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Try, ControlKeyword) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Typeof, Keyword) \
|
|
|
|
__ENUMERATE_JS_TOKEN(UnsignedShiftRight, Operator) \
|
|
|
|
__ENUMERATE_JS_TOKEN(UnsignedShiftRightEquals, Operator) \
|
|
|
|
__ENUMERATE_JS_TOKEN(UnterminatedRegexLiteral, String) \
|
|
|
|
__ENUMERATE_JS_TOKEN(UnterminatedStringLiteral, String) \
|
|
|
|
__ENUMERATE_JS_TOKEN(UnterminatedTemplateLiteral, String) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Var, Keyword) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Void, Keyword) \
|
|
|
|
__ENUMERATE_JS_TOKEN(While, ControlKeyword) \
|
|
|
|
__ENUMERATE_JS_TOKEN(With, ControlKeyword) \
|
|
|
|
__ENUMERATE_JS_TOKEN(Yield, ControlKeyword)
|
2020-03-30 11:11:07 +00:00
|
|
|
|
2020-03-11 18:27:43 +00:00
|
|
|
enum class TokenType {
|
LibJS: Unify syntax highlighting
So far we have three different syntax highlighters for LibJS:
- js's Line::Editor stylization
- JS::MarkupGenerator
- GUI::JSSyntaxHighlighter
This not only caused repetition of most token types in each highlighter
but also a lot of inconsistency regarding the styling of certain tokens:
- JSSyntaxHighlighter was considering TokenType::Period to be an
operator whereas MarkupGenerator categorized it as punctuation.
- MarkupGenerator was considering TokenType::{Break,Case,Continue,
Default,Switch,With} control keywords whereas JSSyntaxHighlighter just
disregarded them
- MarkupGenerator considered some future reserved keywords invalid and
others not. JSSyntaxHighlighter and js disregarded most
Adding a new token type meant adding it to ENUMERATE_JS_TOKENS as well
as each individual highlighter's switch/case construct.
I added a TokenCategory enum, and each TokenType is now associated to a
certain category, which the syntax highlighters then can use for styling
rather than operating on the token type directly. This also makes
changing a token's category everywhere easier, should we need to do that
(e.g. I decided to make TokenType::{Period,QuestionMarkPeriod}
TokenCategory::Operator for now, but we might want to change them to
Punctuation.
2020-10-04 21:28:59 +00:00
|
|
|
#define __ENUMERATE_JS_TOKEN(type, category) type,
|
2020-03-30 11:11:07 +00:00
|
|
|
ENUMERATE_JS_TOKENS
|
|
|
|
#undef __ENUMERATE_JS_TOKEN
|
2020-08-18 16:46:36 +00:00
|
|
|
_COUNT_OF_TOKENS
|
2020-03-11 18:27:43 +00:00
|
|
|
};
|
2020-08-18 16:46:36 +00:00
|
|
|
constexpr size_t cs_num_of_js_tokens = static_cast<size_t>(TokenType::_COUNT_OF_TOKENS);
|
2020-03-11 18:27:43 +00:00
|
|
|
|
LibJS: Unify syntax highlighting
So far we have three different syntax highlighters for LibJS:
- js's Line::Editor stylization
- JS::MarkupGenerator
- GUI::JSSyntaxHighlighter
This not only caused repetition of most token types in each highlighter
but also a lot of inconsistency regarding the styling of certain tokens:
- JSSyntaxHighlighter was considering TokenType::Period to be an
operator whereas MarkupGenerator categorized it as punctuation.
- MarkupGenerator was considering TokenType::{Break,Case,Continue,
Default,Switch,With} control keywords whereas JSSyntaxHighlighter just
disregarded them
- MarkupGenerator considered some future reserved keywords invalid and
others not. JSSyntaxHighlighter and js disregarded most
Adding a new token type meant adding it to ENUMERATE_JS_TOKENS as well
as each individual highlighter's switch/case construct.
I added a TokenCategory enum, and each TokenType is now associated to a
certain category, which the syntax highlighters then can use for styling
rather than operating on the token type directly. This also makes
changing a token's category everywhere easier, should we need to do that
(e.g. I decided to make TokenType::{Period,QuestionMarkPeriod}
TokenCategory::Operator for now, but we might want to change them to
Punctuation.
2020-10-04 21:28:59 +00:00
|
|
|
enum class TokenCategory {
|
|
|
|
Invalid,
|
|
|
|
Number,
|
|
|
|
String,
|
|
|
|
Punctuation,
|
|
|
|
Operator,
|
|
|
|
Keyword,
|
|
|
|
ControlKeyword,
|
|
|
|
Identifier
|
|
|
|
};
|
|
|
|
|
2020-03-11 18:27:43 +00:00
|
|
|
class Token {
|
|
|
|
public:
|
2021-08-18 20:34:25 +00:00
|
|
|
Token() = default;
|
|
|
|
|
2021-07-10 20:46:17 +00:00
|
|
|
Token(TokenType type, String message, StringView trivia, StringView value, StringView filename, size_t line_number, size_t line_column, size_t offset)
|
2020-03-11 18:27:43 +00:00
|
|
|
: m_type(type)
|
2020-10-26 20:08:01 +00:00
|
|
|
, m_message(message)
|
2020-03-11 18:27:43 +00:00
|
|
|
, m_trivia(trivia)
|
2021-08-18 20:34:25 +00:00
|
|
|
, m_original_value(value)
|
2020-03-11 18:27:43 +00:00
|
|
|
, m_value(value)
|
2021-02-28 09:42:34 +00:00
|
|
|
, m_filename(filename)
|
2020-04-05 09:34:03 +00:00
|
|
|
, m_line_number(line_number)
|
|
|
|
, m_line_column(line_column)
|
2021-07-10 20:46:17 +00:00
|
|
|
, m_offset(offset)
|
2020-03-11 18:27:43 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2021-08-18 20:34:25 +00:00
|
|
|
Token(TokenType type, String message, StringView trivia, StringView original_value, FlyString value, StringView filename, size_t line_number, size_t line_column, size_t offset)
|
|
|
|
: m_type(type)
|
|
|
|
, m_message(message)
|
|
|
|
, m_trivia(trivia)
|
|
|
|
, m_original_value(original_value)
|
|
|
|
, m_value(move(value))
|
|
|
|
, m_filename(filename)
|
|
|
|
, m_line_number(line_number)
|
|
|
|
, m_line_column(line_column)
|
|
|
|
, m_offset(offset)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2020-03-11 18:27:43 +00:00
|
|
|
TokenType type() const { return m_type; }
|
LibJS: Unify syntax highlighting
So far we have three different syntax highlighters for LibJS:
- js's Line::Editor stylization
- JS::MarkupGenerator
- GUI::JSSyntaxHighlighter
This not only caused repetition of most token types in each highlighter
but also a lot of inconsistency regarding the styling of certain tokens:
- JSSyntaxHighlighter was considering TokenType::Period to be an
operator whereas MarkupGenerator categorized it as punctuation.
- MarkupGenerator was considering TokenType::{Break,Case,Continue,
Default,Switch,With} control keywords whereas JSSyntaxHighlighter just
disregarded them
- MarkupGenerator considered some future reserved keywords invalid and
others not. JSSyntaxHighlighter and js disregarded most
Adding a new token type meant adding it to ENUMERATE_JS_TOKENS as well
as each individual highlighter's switch/case construct.
I added a TokenCategory enum, and each TokenType is now associated to a
certain category, which the syntax highlighters then can use for styling
rather than operating on the token type directly. This also makes
changing a token's category everywhere easier, should we need to do that
(e.g. I decided to make TokenType::{Period,QuestionMarkPeriod}
TokenCategory::Operator for now, but we might want to change them to
Punctuation.
2020-10-04 21:28:59 +00:00
|
|
|
TokenCategory category() const;
|
|
|
|
static TokenCategory category(TokenType);
|
2020-03-11 18:27:43 +00:00
|
|
|
const char* name() const;
|
|
|
|
static const char* name(TokenType);
|
|
|
|
|
2020-10-26 20:08:01 +00:00
|
|
|
const String& message() const { return m_message; }
|
2020-03-11 18:27:43 +00:00
|
|
|
const StringView& trivia() const { return m_trivia; }
|
2021-08-18 20:34:25 +00:00
|
|
|
const StringView& original_value() const { return m_original_value; }
|
|
|
|
StringView value() const
|
|
|
|
{
|
|
|
|
return m_value.visit(
|
|
|
|
[](StringView const& view) { return view; },
|
|
|
|
[](FlyString const& identifier) { return identifier.view(); },
|
|
|
|
[](Empty) -> StringView { VERIFY_NOT_REACHED(); });
|
|
|
|
}
|
2021-02-28 09:42:34 +00:00
|
|
|
const StringView& filename() const { return m_filename; }
|
2020-04-05 09:34:03 +00:00
|
|
|
size_t line_number() const { return m_line_number; }
|
|
|
|
size_t line_column() const { return m_line_column; }
|
2021-07-10 20:46:17 +00:00
|
|
|
size_t offset() const { return m_offset; }
|
2020-03-11 18:27:43 +00:00
|
|
|
double double_value() const;
|
|
|
|
bool bool_value() const;
|
|
|
|
|
2020-05-17 06:27:25 +00:00
|
|
|
enum class StringValueStatus {
|
|
|
|
Ok,
|
|
|
|
MalformedHexEscape,
|
|
|
|
MalformedUnicodeEscape,
|
|
|
|
UnicodeEscapeOverflow,
|
2020-10-24 12:30:57 +00:00
|
|
|
LegacyOctalEscapeSequence,
|
2020-05-17 06:27:25 +00:00
|
|
|
};
|
|
|
|
String string_value(StringValueStatus& status) const;
|
2021-09-01 16:34:19 +00:00
|
|
|
String raw_template_value() const;
|
2020-05-17 06:27:25 +00:00
|
|
|
|
2020-04-18 18:31:27 +00:00
|
|
|
bool is_identifier_name() const;
|
2020-10-21 21:16:45 +00:00
|
|
|
bool trivia_contains_line_terminator() const;
|
2020-04-18 18:31:27 +00:00
|
|
|
|
2020-03-11 18:27:43 +00:00
|
|
|
private:
|
2021-08-18 20:34:25 +00:00
|
|
|
TokenType m_type { TokenType::Invalid };
|
2020-10-26 20:08:01 +00:00
|
|
|
String m_message;
|
2020-03-11 18:27:43 +00:00
|
|
|
StringView m_trivia;
|
2021-08-18 20:34:25 +00:00
|
|
|
StringView m_original_value;
|
|
|
|
Variant<Empty, StringView, FlyString> m_value { Empty {} };
|
2021-02-28 09:42:34 +00:00
|
|
|
StringView m_filename;
|
2021-08-18 20:34:25 +00:00
|
|
|
size_t m_line_number { 0 };
|
|
|
|
size_t m_line_column { 0 };
|
|
|
|
size_t m_offset { 0 };
|
2020-03-11 18:27:43 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|