123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819 |
- /*
- * Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
- * Copyright (c) 2023, Volodymyr V. <vvmposeydon@gmail.com>
- *
- * SPDX-License-Identifier: BSD-2-Clause
- */
- #include "Lexer.h"
- #include <AK/CharacterTypes.h>
- #include <AK/Function.h>
- #include <AK/HashTable.h>
- #include <AK/StdLibExtras.h>
- #include <AK/String.h>
- namespace GLSL {
- Lexer::Lexer(StringView input, size_t start_line)
- : m_input(input)
- , m_previous_position { start_line, 0 }
- , m_position { start_line, 0 }
- {
- }
- char Lexer::peek(size_t offset) const
- {
- if ((m_index + offset) >= m_input.length())
- return 0;
- return m_input[m_index + offset];
- }
- char Lexer::consume()
- {
- VERIFY(m_index < m_input.length());
- char ch = m_input[m_index++];
- m_previous_position = m_position;
- if (ch == '\n') {
- m_position.line++;
- m_position.column = 0;
- } else {
- m_position.column++;
- }
- return ch;
- }
- constexpr bool is_valid_first_character_of_identifier(char ch)
- {
- return is_ascii_alpha(ch) || ch == '_' || ch == '$';
- }
- constexpr bool is_valid_nonfirst_character_of_identifier(char ch)
- {
- return is_valid_first_character_of_identifier(ch) || is_ascii_digit(ch);
- }
- // NOTE: some of these keywords are not used at the moment, however they are reserved for future use and should not be used as identifiers
- constexpr Array<StringView, 66> s_known_keywords = {
- "asm"sv,
- "attribute"sv,
- "break"sv,
- "case"sv,
- "cast"sv,
- "centroid"sv,
- "class"sv,
- "common"
- "partition"sv,
- "active"sv,
- "const"sv,
- "continue"sv,
- "default"sv,
- "discard"sv,
- "do"sv,
- "else"sv,
- "enum"sv,
- "extern"sv,
- "external"sv,
- "false"sv,
- "filter"sv,
- "fixed"sv,
- "flat"sv,
- "for"sv,
- "goto"sv,
- "half"sv,
- "highp"sv,
- "if"sv,
- "in"sv,
- "inline"sv,
- "inout"sv,
- "input"sv,
- "interface"sv,
- "invariant"sv,
- "layout"sv,
- "lowp"sv,
- "mediump"sv,
- "namespace"sv,
- "noinline"sv,
- "noperspective"sv,
- "out"sv,
- "output"sv,
- "packed"sv,
- "patch"sv,
- "precision"sv,
- "public"sv,
- "return"sv,
- "row_major"sv,
- "sample"sv,
- "sizeof"sv,
- "smooth"sv,
- "static"sv,
- "struct"sv,
- "subroutine"sv,
- "superp"sv,
- "switch"sv,
- "template"sv,
- "this"sv,
- "true"sv,
- "typedef"sv,
- "uniform"sv,
- "union"sv,
- "using"sv,
- "varying"sv,
- "volatile"sv,
- "while"sv,
- };
- constexpr Array<StringView, 120> s_known_types = {
- "bool"sv,
- "bvec2"sv,
- "bvec3"sv,
- "bvec4"sv,
- "dmat2"sv,
- "dmat2x2"sv,
- "dmat2x3"sv,
- "dmat2x4"sv,
- "dmat3"sv,
- "dmat3x2"sv,
- "dmat3x3"sv,
- "dmat3x4"sv,
- "dmat4"sv,
- "dmat4x2"sv,
- "dmat4x3"sv,
- "dmat4x4"sv,
- "double"sv,
- "dvec2"sv,
- "dvec3"sv,
- "dvec4"sv,
- "float"sv,
- "fvec2"sv,
- "fvec3"sv,
- "fvec4"sv,
- "hvec2"sv,
- "hvec3"sv,
- "hvec4"sv,
- "iimage1D"sv,
- "iimage1DArray"sv,
- "iimage2D"sv,
- "iimage2DArray"sv,
- "iimage3D"sv,
- "iimageBuffer"sv,
- "iimageCube"sv,
- "image1D"sv,
- "image1DArray"sv,
- "image1DArrayShadow"sv,
- "image1DShadow"sv,
- "image2D"sv,
- "image2DArray"sv,
- "image2DArrayShadow"sv,
- "image2DShadow"sv,
- "image3D"sv,
- "imageBuffer"sv,
- "imageCube"sv,
- "int"sv,
- "isampler1D"sv,
- "isampler1DArray"sv,
- "isampler2D"sv,
- "isampler2DArray"sv,
- "isampler2DMS"sv,
- "isampler2DMSArray"sv,
- "isampler2DRect"sv,
- "isampler3D"sv,
- "isamplerBuffer"sv,
- "isamplerCube"sv,
- "isamplerCubeArray"sv,
- "ivec2"sv,
- "ivec3"sv,
- "ivec4"sv,
- "long"sv,
- "mat2"sv,
- "mat2x2"sv,
- "mat2x3"sv,
- "mat2x4"sv,
- "mat3"sv,
- "mat3x2"sv,
- "mat3x3"sv,
- "mat3x4"sv,
- "mat4"sv,
- "mat4x2"sv,
- "mat4x3"sv,
- "mat4x4"sv,
- "sampler1D"sv,
- "sampler1DArray"sv,
- "sampler1DArrayShadow"sv,
- "sampler1DShadow"sv,
- "sampler2D"sv,
- "sampler2DArray"sv,
- "sampler2DArrayShadow"sv,
- "sampler2DMS"sv,
- "sampler2DMSArray"sv,
- "sampler2DRect"sv,
- "sampler2DRectShadow"sv,
- "sampler2DShadow"sv,
- "sampler3D"sv,
- "sampler3DRect"sv,
- "samplerBuffer"sv,
- "samplerCube"sv,
- "samplerCubeArray"sv,
- "samplerCubeArrayShadow"sv,
- "samplerCubeShadow"sv,
- "short"sv,
- "uimage1D"sv,
- "uimage1DArray"sv,
- "uimage2D"sv,
- "uimage2DArray"sv,
- "uimage3D"sv,
- "uimageBuffer"sv,
- "uimageCube"sv,
- "uint"sv,
- "unsigned"sv,
- "usampler1D"sv,
- "usampler1DArray"sv,
- "usampler2D"sv,
- "usampler2DArray"sv,
- "usampler2DMS"sv,
- "usampler2DMSArray"sv,
- "usampler2DRect"sv,
- "usampler3D"sv,
- "usamplerBuffer"sv,
- "usamplerCube"sv,
- "usamplerCubeArray"sv,
- "uvec2"sv,
- "uvec3"sv,
- "uvec4"sv,
- "vec2"sv,
- "vec3"sv,
- "vec4"sv,
- "void"sv,
- };
- static bool is_keyword(StringView string)
- {
- return AK::find(s_known_keywords.begin(), s_known_keywords.end(), string) != s_known_keywords.end();
- }
- static bool is_known_type(StringView string)
- {
- return AK::find(s_known_types.begin(), s_known_types.end(), string) != s_known_types.end();
- }
- void Lexer::lex_impl(Function<void(Token)> callback)
- {
- size_t token_start_index = 0;
- Position token_start_position;
- auto emit_single_char_token = [&](auto type) {
- callback(Token(type, m_position, m_position, m_input.substring_view(m_index, 1)));
- consume();
- };
- auto begin_token = [&] {
- token_start_index = m_index;
- token_start_position = m_position;
- };
- auto commit_token = [&](auto type) {
- if (m_options.ignore_whitespace && type == Token::Type::Whitespace)
- return;
- callback(Token(type, token_start_position, m_previous_position, m_input.substring_view(token_start_index, m_index - token_start_index)));
- };
- auto emit_token_equals = [&](auto type, auto equals_type) {
- if (peek(1) == '=') {
- begin_token();
- consume();
- consume();
- commit_token(equals_type);
- return;
- }
- emit_single_char_token(type);
- };
- auto match_escape_sequence = [&]() -> size_t {
- switch (peek(1)) {
- case '\'':
- case '"':
- case '?':
- case '\\':
- case 'a':
- case 'b':
- case 'f':
- case 'n':
- case 'r':
- case 't':
- case 'v':
- return 2;
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7': {
- size_t octal_digits = 1;
- for (size_t i = 0; i < 2; ++i) {
- char next = peek(2 + i);
- if (next < '0' || next > '7')
- break;
- ++octal_digits;
- }
- return 1 + octal_digits;
- }
- case 'x': {
- size_t hex_digits = 0;
- while (is_ascii_hex_digit(peek(2 + hex_digits)))
- ++hex_digits;
- return 2 + hex_digits;
- }
- case 'u':
- case 'U': {
- bool is_unicode = true;
- size_t number_of_digits = peek(1) == 'u' ? 4 : 8;
- for (size_t i = 0; i < number_of_digits; ++i) {
- if (!is_ascii_hex_digit(peek(2 + i))) {
- is_unicode = false;
- break;
- }
- }
- return is_unicode ? 2 + number_of_digits : 0;
- }
- default:
- return 0;
- }
- };
- auto match_string_prefix = [&](char quote) -> size_t {
- if (peek() == quote)
- return 1;
- if (peek() == 'L' && peek(1) == quote)
- return 2;
- if (peek() == 'u') {
- if (peek(1) == quote)
- return 2;
- if (peek(1) == '8' && peek(2) == quote)
- return 3;
- }
- if (peek() == 'U' && peek(1) == quote)
- return 2;
- return 0;
- };
- while (m_index < m_input.length()) {
- auto ch = peek();
- if (is_ascii_space(ch)) {
- begin_token();
- while (is_ascii_space(peek()))
- consume();
- commit_token(Token::Type::Whitespace);
- continue;
- }
- if (ch == '(') {
- emit_single_char_token(Token::Type::LeftParen);
- continue;
- }
- if (ch == ')') {
- emit_single_char_token(Token::Type::RightParen);
- continue;
- }
- if (ch == '{') {
- emit_single_char_token(Token::Type::LeftCurly);
- continue;
- }
- if (ch == '}') {
- emit_single_char_token(Token::Type::RightCurly);
- continue;
- }
- if (ch == '[') {
- emit_single_char_token(Token::Type::LeftBracket);
- continue;
- }
- if (ch == ']') {
- emit_single_char_token(Token::Type::RightBracket);
- continue;
- }
- if (ch == '<') {
- begin_token();
- consume();
- if (peek() == '<') {
- consume();
- if (peek() == '=') {
- consume();
- commit_token(Token::Type::LessLessEquals);
- continue;
- }
- commit_token(Token::Type::LessLess);
- continue;
- }
- if (peek() == '=') {
- consume();
- commit_token(Token::Type::LessEquals);
- continue;
- }
- commit_token(Token::Type::Less);
- continue;
- }
- if (ch == '>') {
- begin_token();
- consume();
- if (peek() == '>') {
- consume();
- if (peek() == '=') {
- consume();
- commit_token(Token::Type::GreaterGreaterEquals);
- continue;
- }
- commit_token(Token::Type::GreaterGreater);
- continue;
- }
- if (peek() == '=') {
- consume();
- commit_token(Token::Type::GreaterEquals);
- continue;
- }
- commit_token(Token::Type::Greater);
- continue;
- }
- if (ch == ',') {
- emit_single_char_token(Token::Type::Comma);
- continue;
- }
- if (ch == '+') {
- begin_token();
- consume();
- if (peek() == '+') {
- consume();
- commit_token(Token::Type::PlusPlus);
- continue;
- }
- if (peek() == '=') {
- consume();
- commit_token(Token::Type::PlusEquals);
- continue;
- }
- commit_token(Token::Type::Plus);
- continue;
- }
- if (ch == '-') {
- begin_token();
- consume();
- if (peek() == '-') {
- consume();
- commit_token(Token::Type::MinusMinus);
- continue;
- }
- if (peek() == '=') {
- consume();
- commit_token(Token::Type::MinusEquals);
- continue;
- }
- commit_token(Token::Type::Minus);
- continue;
- }
- if (ch == '*') {
- emit_token_equals(Token::Type::Asterisk, Token::Type::AsteriskEquals);
- continue;
- }
- if (ch == '%') {
- emit_token_equals(Token::Type::Percent, Token::Type::PercentEquals);
- continue;
- }
- if (ch == '^') {
- begin_token();
- consume();
- if (peek() == '^') {
- consume();
- commit_token(Token::Type::CaretCaret);
- continue;
- }
- if (peek() == '=') {
- consume();
- commit_token(Token::Type::CaretEquals);
- continue;
- }
- commit_token(Token::Type::Caret);
- continue;
- }
- if (ch == '!') {
- emit_token_equals(Token::Type::ExclamationMark, Token::Type::ExclamationMarkEquals);
- continue;
- }
- if (ch == '=') {
- emit_token_equals(Token::Type::Equals, Token::Type::EqualsEquals);
- continue;
- }
- if (ch == '&') {
- begin_token();
- consume();
- if (peek() == '&') {
- consume();
- commit_token(Token::Type::AndAnd);
- continue;
- }
- if (peek() == '=') {
- consume();
- commit_token(Token::Type::AndEquals);
- continue;
- }
- commit_token(Token::Type::And);
- continue;
- }
- if (ch == '|') {
- begin_token();
- consume();
- if (peek() == '|') {
- consume();
- commit_token(Token::Type::PipePipe);
- continue;
- }
- if (peek() == '=') {
- consume();
- commit_token(Token::Type::PipeEquals);
- continue;
- }
- commit_token(Token::Type::Pipe);
- continue;
- }
- if (ch == '~') {
- emit_single_char_token(Token::Type::Tilde);
- continue;
- }
- if (ch == '?') {
- emit_single_char_token(Token::Type::QuestionMark);
- continue;
- }
- if (ch == ':') {
- emit_single_char_token(Token::Type::Colon);
- continue;
- }
- if (ch == ';') {
- emit_single_char_token(Token::Type::Semicolon);
- continue;
- }
- if (ch == '.') {
- emit_single_char_token(Token::Type::Dot);
- continue;
- }
- if (ch == '#') {
- begin_token();
- consume();
- while (AK::is_ascii_space(peek()))
- consume();
- size_t directive_start = m_index;
- if (is_valid_first_character_of_identifier(peek()))
- while (peek() && is_valid_nonfirst_character_of_identifier(peek()))
- consume();
- auto directive = StringView(m_input.characters_without_null_termination() + directive_start, m_index - directive_start);
- if (directive == "include"sv) {
- commit_token(Token::Type::IncludeStatement);
- if (is_ascii_space(peek())) {
- begin_token();
- do {
- consume();
- } while (is_ascii_space(peek()));
- commit_token(Token::Type::Whitespace);
- }
- begin_token();
- if (peek() == '<' || peek() == '"') {
- char closing = consume() == '<' ? '>' : '"';
- while (peek() && peek() != closing && peek() != '\n')
- consume();
- if (peek() && consume() == '\n') {
- commit_token(Token::Type::IncludePath);
- continue;
- }
- commit_token(Token::Type::IncludePath);
- begin_token();
- }
- } else {
- while (peek()) {
- if (peek() == '\\' && peek(1) == '\n') {
- consume();
- consume();
- } else if (peek() == '\n') {
- break;
- } else {
- consume();
- }
- }
- commit_token(Token::Type::PreprocessorStatement);
- }
- continue;
- }
- if (ch == '/' && peek(1) == '/') {
- while (peek() && peek() != '\n')
- consume();
- continue;
- }
- if (ch == '/' && peek(1) == '*') {
- consume();
- consume();
- bool comment_block_ends = false;
- while (peek()) {
- if (peek() == '*' && peek(1) == '/') {
- comment_block_ends = true;
- break;
- }
- consume();
- }
- if (comment_block_ends) {
- consume();
- consume();
- }
- continue;
- }
- if (ch == '/') {
- emit_token_equals(Token::Type::Slash, Token::Type::SlashEquals);
- continue;
- }
- if (size_t prefix = match_string_prefix('"'); prefix > 0) {
- begin_token();
- for (size_t i = 0; i < prefix; ++i)
- consume();
- while (peek()) {
- if (peek() == '\\') {
- if (size_t escape = match_escape_sequence(); escape > 0) {
- commit_token(Token::Type::DoubleQuotedString);
- begin_token();
- for (size_t i = 0; i < escape; ++i)
- consume();
- commit_token(Token::Type::EscapeSequence);
- begin_token();
- continue;
- }
- }
- // If string is not terminated - stop before EOF
- if (!peek(1))
- break;
- if (consume() == '"')
- break;
- }
- commit_token(Token::Type::DoubleQuotedString);
- continue;
- }
- if (size_t prefix = match_string_prefix('R'); prefix > 0 && peek(prefix) == '"') {
- begin_token();
- for (size_t i = 0; i < prefix + 1; ++i)
- consume();
- size_t prefix_start = m_index;
- while (peek() && peek() != '(')
- consume();
- StringView prefix_string = m_input.substring_view(prefix_start, m_index - prefix_start);
- while (peek()) {
- if (consume() == '"') {
- VERIFY(m_index >= prefix_string.length() + 2);
- VERIFY(m_input[m_index - 1] == '"');
- if (m_input[m_index - 1 - prefix_string.length() - 1] == ')') {
- StringView suffix_string = m_input.substring_view(m_index - 1 - prefix_string.length(), prefix_string.length());
- if (prefix_string == suffix_string)
- break;
- }
- }
- }
- commit_token(Token::Type::RawString);
- continue;
- }
- if (size_t prefix = match_string_prefix('\''); prefix > 0) {
- begin_token();
- for (size_t i = 0; i < prefix; ++i)
- consume();
- while (peek()) {
- if (peek() == '\\') {
- if (size_t escape = match_escape_sequence(); escape > 0) {
- commit_token(Token::Type::SingleQuotedString);
- begin_token();
- for (size_t i = 0; i < escape; ++i)
- consume();
- commit_token(Token::Type::EscapeSequence);
- begin_token();
- continue;
- }
- }
- if (consume() == '\'')
- break;
- }
- commit_token(Token::Type::SingleQuotedString);
- continue;
- }
- if (is_ascii_digit(ch) || (ch == '.' && is_ascii_digit(peek(1)))) {
- begin_token();
- consume();
- auto type = ch == '.' ? Token::Type::Float : Token::Type::Integer;
- bool is_hex = false;
- bool is_binary = false;
- auto match_exponent = [&]() -> size_t {
- char ch = peek();
- if (ch != 'e' && ch != 'E' && ch != 'p' && ch != 'P')
- return 0;
- type = Token::Type::Float;
- size_t length = 1;
- ch = peek(length);
- if (ch == '+' || ch == '-') {
- ++length;
- }
- for (ch = peek(length); is_ascii_digit(ch); ch = peek(length)) {
- ++length;
- }
- return length;
- };
- auto match_type_literal = [&]() -> size_t {
- size_t length = 0;
- for (;;) {
- char ch = peek(length);
- if ((ch == 'u' || ch == 'U') && type == Token::Type::Integer) {
- ++length;
- } else if ((ch == 'f' || ch == 'F') && !is_binary) {
- type = Token::Type::Float;
- ++length;
- } else if (ch == 'l' || ch == 'L') {
- ++length;
- } else
- return length;
- }
- };
- if (peek() == 'b' || peek() == 'B') {
- consume();
- is_binary = true;
- for (char ch = peek(); ch == '0' || ch == '1' || (ch == '\'' && peek(1) != '\''); ch = peek()) {
- consume();
- }
- } else {
- if (peek() == 'x' || peek() == 'X') {
- consume();
- is_hex = true;
- }
- for (char ch = peek(); (is_hex ? is_ascii_hex_digit(ch) : is_ascii_digit(ch)) || (ch == '\'' && peek(1) != '\'') || ch == '.'; ch = peek()) {
- if (ch == '.') {
- if (type == Token::Type::Integer) {
- type = Token::Type::Float;
- } else
- break;
- };
- consume();
- }
- }
- if (!is_binary) {
- size_t length = match_exponent();
- for (size_t i = 0; i < length; ++i)
- consume();
- }
- size_t length = match_type_literal();
- for (size_t i = 0; i < length; ++i)
- consume();
- commit_token(type);
- continue;
- }
- if (is_valid_first_character_of_identifier(ch)) {
- begin_token();
- while (peek() && is_valid_nonfirst_character_of_identifier(peek()))
- consume();
- auto token_view = StringView(m_input.characters_without_null_termination() + token_start_index, m_index - token_start_index);
- if (is_keyword(token_view))
- commit_token(Token::Type::Keyword);
- else if (is_known_type(token_view))
- commit_token(Token::Type::KnownType);
- else
- commit_token(Token::Type::Identifier);
- continue;
- }
- if (ch == '\\' && peek(1) == '\n') {
- consume();
- consume();
- continue;
- }
- dbgln("Unimplemented token character: {}", ch);
- emit_single_char_token(Token::Type::Unknown);
- }
- }
- Vector<Token> Lexer::lex()
- {
- Vector<Token> tokens;
- lex_impl([&](auto token) {
- tokens.append(move(token));
- });
- return tokens;
- }
- }
|