Pārlūkot izejas kodu

LibSQL: Lex string and blob literals

Blob literals are the same as string literals except prefixed with an
'x' or 'X'.
Timothy Flynn 4 gadi atpakaļ
vecāks
revīzija
570aa57df3

+ 52 - 7
Userland/Libraries/LibSQL/Lexer.cpp

@@ -75,6 +75,18 @@ Token Lexer::next()
 
     if (is_eof()) {
         token_type = found_invalid_comment ? TokenType::Invalid : TokenType::Eof;
+    } else if (is_numeric_literal_start()) {
+        token_type = TokenType::NumericLiteral;
+        if (!consume_numeric_literal())
+            token_type = TokenType::Invalid;
+    } else if (is_string_literal_start()) {
+        token_type = TokenType::StringLiteral;
+        if (!consume_string_literal())
+            token_type = TokenType::Invalid;
+    } else if (is_blob_literal_start()) {
+        token_type = TokenType::BlobLiteral;
+        if (!consume_blob_literal())
+            token_type = TokenType::Invalid;
     } else if (is_identifier_start()) {
         do {
             consume();
@@ -85,10 +97,6 @@ Token Lexer::next()
         } else {
             token_type = TokenType::Identifier;
         }
-    } else if (is_numeric_literal_start()) {
-        token_type = TokenType::NumericLiteral;
-        if (!consume_numeric_literal())
-            token_type = TokenType::Invalid;
     } else {
         bool found_two_char_token = false;
         if (m_position < m_source.length()) {
@@ -190,7 +198,7 @@ bool Lexer::consume_whitespace_and_comments()
 
 bool Lexer::consume_numeric_literal()
 {
-    // https://www.sqlite.org/draft/syntax/numeric-literal.html
+    // https://sqlite.org/syntax/numeric-literal.html
     bool is_valid_numeric_literal = true;
 
     if (m_current_char == '0') {
@@ -227,6 +235,29 @@ bool Lexer::consume_numeric_literal()
     return is_valid_numeric_literal;
 }
 
+bool Lexer::consume_string_literal()
+{
+    // https://sqlite.org/lang_expr.html - See "3. Literal Values (Constants)"
+    bool is_valid_string_literal = true;
+    consume();
+
+    while (!is_eof() && !is_string_literal_end())
+        consume();
+
+    if (is_eof())
+        is_valid_string_literal = false;
+    consume();
+
+    return is_valid_string_literal;
+}
+
+bool Lexer::consume_blob_literal()
+{
+    // https://sqlite.org/lang_expr.html - See "3. Literal Values (Constants)"
+    consume();
+    return consume_string_literal();
+}
+
 bool Lexer::consume_exponent()
 {
     consume();
@@ -259,8 +290,7 @@ bool Lexer::match(char a, char b) const
     if (m_position >= m_source.length())
         return false;
 
-    return m_current_char == a
-        && m_source[m_position] == b;
+    return m_current_char == a && m_source[m_position] == b;
 }
 
 bool Lexer::is_identifier_start() const
@@ -278,6 +308,21 @@ bool Lexer::is_numeric_literal_start() const
     return isdigit(m_current_char) || (m_current_char == '.' && m_position < m_source.length() && isdigit(m_source[m_position]));
 }
 
+bool Lexer::is_string_literal_start() const
+{
+    return m_current_char == '\'';
+}
+
+bool Lexer::is_string_literal_end() const
+{
+    return m_current_char == '\'' && !(m_position < m_source.length() && m_source[m_position] == '\'');
+}
+
+bool Lexer::is_blob_literal_start() const
+{
+    return match('x', '\'') || match('X', '\'');
+}
+
 bool Lexer::is_line_comment_start() const
 {
     return match('-', '-');

+ 5 - 0
Userland/Libraries/LibSQL/Lexer.h

@@ -44,6 +44,8 @@ private:
 
     bool consume_whitespace_and_comments();
     bool consume_numeric_literal();
+    bool consume_string_literal();
+    bool consume_blob_literal();
     bool consume_exponent();
     bool consume_hexadecimal_number();
 
@@ -51,6 +53,9 @@ private:
     bool is_identifier_start() const;
     bool is_identifier_middle() const;
     bool is_numeric_literal_start() const;
+    bool is_string_literal_start() const;
+    bool is_string_literal_end() const;
+    bool is_blob_literal_start() const;
     bool is_line_comment_start() const;
     bool is_block_comment_start() const;
     bool is_block_comment_end() const;

+ 4 - 0
Userland/Libraries/LibSQL/Token.h

@@ -182,6 +182,8 @@ namespace SQL {
     __ENUMERATE_SQL_TOKEN("WITHOUT", Without, Keyword)                    \
     __ENUMERATE_SQL_TOKEN("_identifier_", Identifier, Identifier)         \
     __ENUMERATE_SQL_TOKEN("_numeric_", NumericLiteral, Number)            \
+    __ENUMERATE_SQL_TOKEN("_string_", StringLiteral, String)              \
+    __ENUMERATE_SQL_TOKEN("_blob_", BlobLiteral, Blob)                    \
     __ENUMERATE_SQL_TOKEN("_eof_", Eof, Invalid)                          \
     __ENUMERATE_SQL_TOKEN("_invalid_", Invalid, Invalid)                  \
     __ENUMERATE_SQL_TOKEN("&", Ampersand, Operator)                       \
@@ -221,6 +223,8 @@ enum class TokenCategory {
     Keyword,
     Identifier,
     Number,
+    String,
+    Blob,
     Operator,
     Punctuation,
 };