ladybird/Userland/Libraries/LibSQL/AST/Parser.cpp
Jan de Visser 5c4890411b LibSQL: Make lexer and parser more standard SQL compliant
SQL was standardized before there was consensus on sane language syntax
constructs had evolved. The language is mostly case-insensitive, with
unquoted text converted to upper case. Identifiers can include lower
case characters and other 'special' characters by enclosing the
identifier with double quotes. A double quote is escaped by doubling it.
Likewise, a single quote in a literal string is escaped by doubling it.

All this means that the strategy used in the lexer, where a token's
value is a StringView 'window' on the source string, does not work,
because the value needs to be massaged before being handed to the
parser. Therefore a token now has a String containing its value. Given
the limited lifetime of a token, this is acceptable overhead.

Not doing this means that for example quote removal and double quote
escaping would need to be done in the parser or in AST node
construction, which would spread lexing basically all over the place.
Which would be suboptimal.

There was some impact on the sql utility and SyntaxHighlighter component
which was addressed by storing the token's end position together with
the start position in order to properly highlight it.

Finally, reviewing the tests for parsing numeric literals revealed an
inconsistency in which tokens we accept or reject: `1a` is accepted but
`1e` is rejected. Related to this is the fate of `0x`. Added a FIXME
reminding us to address this.
2021-06-24 00:36:53 +02:00

1089 lines
39 KiB
C++

/*
* Copyright (c) 2021, Tim Flynn <trflynn89@pm.me>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include "Parser.h"
#include <AK/ScopeGuard.h>
#include <AK/TypeCasts.h>
namespace SQL::AST {
Parser::Parser(Lexer lexer)
: m_parser_state(move(lexer))
{
}
NonnullRefPtr<Statement> Parser::next_statement()
{
auto terminate_statement = [this](auto statement) {
consume(TokenType::SemiColon);
return statement;
};
if (match(TokenType::With)) {
auto common_table_expression_list = parse_common_table_expression_list();
if (!common_table_expression_list)
return create_ast_node<ErrorStatement>();
return terminate_statement(parse_statement_with_expression_list(move(common_table_expression_list)));
}
return terminate_statement(parse_statement());
}
NonnullRefPtr<Statement> Parser::parse_statement()
{
switch (m_parser_state.m_token.type()) {
case TokenType::Create:
return parse_create_table_statement();
case TokenType::Alter:
return parse_alter_table_statement();
case TokenType::Drop:
return parse_drop_table_statement();
case TokenType::Insert:
return parse_insert_statement({});
case TokenType::Update:
return parse_update_statement({});
case TokenType::Delete:
return parse_delete_statement({});
case TokenType::Select:
return parse_select_statement({});
default:
expected("CREATE, ALTER, DROP, INSERT, UPDATE, DELETE, or SELECT");
return create_ast_node<ErrorStatement>();
}
}
NonnullRefPtr<Statement> Parser::parse_statement_with_expression_list(RefPtr<CommonTableExpressionList> common_table_expression_list)
{
switch (m_parser_state.m_token.type()) {
case TokenType::Insert:
return parse_insert_statement(move(common_table_expression_list));
case TokenType::Update:
return parse_update_statement(move(common_table_expression_list));
case TokenType::Delete:
return parse_delete_statement(move(common_table_expression_list));
case TokenType::Select:
return parse_select_statement(move(common_table_expression_list));
default:
expected("INSERT, UPDATE, DELETE or SELECT");
return create_ast_node<ErrorStatement>();
}
}
NonnullRefPtr<CreateTable> Parser::parse_create_table_statement()
{
// https://sqlite.org/lang_createtable.html
consume(TokenType::Create);
bool is_temporary = false;
if (consume_if(TokenType::Temp) || consume_if(TokenType::Temporary))
is_temporary = true;
consume(TokenType::Table);
bool is_error_if_table_exists = true;
if (consume_if(TokenType::If)) {
consume(TokenType::Not);
consume(TokenType::Exists);
is_error_if_table_exists = false;
}
String schema_name;
String table_name;
parse_schema_and_table_name(schema_name, table_name);
if (consume_if(TokenType::As)) {
auto select_statement = parse_select_statement({});
return create_ast_node<CreateTable>(move(schema_name), move(table_name), move(select_statement), is_temporary, is_error_if_table_exists);
}
NonnullRefPtrVector<ColumnDefinition> column_definitions;
parse_comma_separated_list(true, [&]() { column_definitions.append(parse_column_definition()); });
// FIXME: Parse "table-constraint".
return create_ast_node<CreateTable>(move(schema_name), move(table_name), move(column_definitions), is_temporary, is_error_if_table_exists);
}
NonnullRefPtr<AlterTable> Parser::parse_alter_table_statement()
{
// https://sqlite.org/lang_altertable.html
consume(TokenType::Alter);
consume(TokenType::Table);
String schema_name;
String table_name;
parse_schema_and_table_name(schema_name, table_name);
if (consume_if(TokenType::Add)) {
consume_if(TokenType::Column); // COLUMN is optional.
auto column = parse_column_definition();
return create_ast_node<AddColumn>(move(schema_name), move(table_name), move(column));
}
if (consume_if(TokenType::Drop)) {
consume_if(TokenType::Column); // COLUMN is optional.
auto column = consume(TokenType::Identifier).value();
return create_ast_node<DropColumn>(move(schema_name), move(table_name), move(column));
}
consume(TokenType::Rename);
if (consume_if(TokenType::To)) {
auto new_table_name = consume(TokenType::Identifier).value();
return create_ast_node<RenameTable>(move(schema_name), move(table_name), move(new_table_name));
}
consume_if(TokenType::Column); // COLUMN is optional.
auto column_name = consume(TokenType::Identifier).value();
consume(TokenType::To);
auto new_column_name = consume(TokenType::Identifier).value();
return create_ast_node<RenameColumn>(move(schema_name), move(table_name), move(column_name), move(new_column_name));
}
NonnullRefPtr<DropTable> Parser::parse_drop_table_statement()
{
// https://sqlite.org/lang_droptable.html
consume(TokenType::Drop);
consume(TokenType::Table);
bool is_error_if_table_does_not_exist = true;
if (consume_if(TokenType::If)) {
consume(TokenType::Exists);
is_error_if_table_does_not_exist = false;
}
String schema_name;
String table_name;
parse_schema_and_table_name(schema_name, table_name);
return create_ast_node<DropTable>(move(schema_name), move(table_name), is_error_if_table_does_not_exist);
}
NonnullRefPtr<Insert> Parser::parse_insert_statement(RefPtr<CommonTableExpressionList> common_table_expression_list)
{
// https://sqlite.org/lang_insert.html
consume(TokenType::Insert);
auto conflict_resolution = parse_conflict_resolution();
consume(TokenType::Into);
String schema_name;
String table_name;
parse_schema_and_table_name(schema_name, table_name);
String alias;
if (consume_if(TokenType::As))
alias = consume(TokenType::Identifier).value();
Vector<String> column_names;
if (match(TokenType::ParenOpen))
parse_comma_separated_list(true, [&]() { column_names.append(consume(TokenType::Identifier).value()); });
NonnullRefPtrVector<ChainedExpression> chained_expressions;
RefPtr<Select> select_statement;
if (consume_if(TokenType::Values)) {
parse_comma_separated_list(false, [&]() {
if (auto chained_expression = parse_chained_expression(); chained_expression.has_value())
chained_expressions.append(move(chained_expression.value()));
else
expected("Chained expression");
});
} else if (match(TokenType::Select)) {
select_statement = parse_select_statement({});
} else {
consume(TokenType::Default);
consume(TokenType::Values);
}
RefPtr<ReturningClause> returning_clause;
if (match(TokenType::Returning))
returning_clause = parse_returning_clause();
// FIXME: Parse 'upsert-clause'.
if (!chained_expressions.is_empty())
return create_ast_node<Insert>(move(common_table_expression_list), conflict_resolution, move(schema_name), move(table_name), move(alias), move(column_names), move(chained_expressions));
if (!select_statement.is_null())
return create_ast_node<Insert>(move(common_table_expression_list), conflict_resolution, move(schema_name), move(table_name), move(alias), move(column_names), move(select_statement));
return create_ast_node<Insert>(move(common_table_expression_list), conflict_resolution, move(schema_name), move(table_name), move(alias), move(column_names));
}
NonnullRefPtr<Update> Parser::parse_update_statement(RefPtr<CommonTableExpressionList> common_table_expression_list)
{
// https://sqlite.org/lang_update.html
consume(TokenType::Update);
auto conflict_resolution = parse_conflict_resolution();
auto qualified_table_name = parse_qualified_table_name();
consume(TokenType::Set);
Vector<Update::UpdateColumns> update_columns;
parse_comma_separated_list(false, [&]() {
Vector<String> column_names;
if (match(TokenType::ParenOpen)) {
parse_comma_separated_list(true, [&]() { column_names.append(consume(TokenType::Identifier).value()); });
} else {
column_names.append(consume(TokenType::Identifier).value());
}
consume(TokenType::Equals);
update_columns.append({ move(column_names), parse_expression() });
});
NonnullRefPtrVector<TableOrSubquery> table_or_subquery_list;
if (consume_if(TokenType::From)) {
// FIXME: Parse join-clause.
parse_comma_separated_list(false, [&]() { table_or_subquery_list.append(parse_table_or_subquery()); });
}
RefPtr<Expression> where_clause;
if (consume_if(TokenType::Where))
where_clause = parse_expression();
RefPtr<ReturningClause> returning_clause;
if (match(TokenType::Returning))
returning_clause = parse_returning_clause();
return create_ast_node<Update>(move(common_table_expression_list), conflict_resolution, move(qualified_table_name), move(update_columns), move(table_or_subquery_list), move(where_clause), move(returning_clause));
}
NonnullRefPtr<Delete> Parser::parse_delete_statement(RefPtr<CommonTableExpressionList> common_table_expression_list)
{
// https://sqlite.org/lang_delete.html
consume(TokenType::Delete);
consume(TokenType::From);
auto qualified_table_name = parse_qualified_table_name();
RefPtr<Expression> where_clause;
if (consume_if(TokenType::Where))
where_clause = parse_expression();
RefPtr<ReturningClause> returning_clause;
if (match(TokenType::Returning))
returning_clause = parse_returning_clause();
return create_ast_node<Delete>(move(common_table_expression_list), move(qualified_table_name), move(where_clause), move(returning_clause));
}
NonnullRefPtr<Select> Parser::parse_select_statement(RefPtr<CommonTableExpressionList> common_table_expression_list)
{
// https://sqlite.org/lang_select.html
consume(TokenType::Select);
bool select_all = !consume_if(TokenType::Distinct);
consume_if(TokenType::All); // ALL is the default, so ignore it if specified.
NonnullRefPtrVector<ResultColumn> result_column_list;
parse_comma_separated_list(false, [&]() { result_column_list.append(parse_result_column()); });
NonnullRefPtrVector<TableOrSubquery> table_or_subquery_list;
if (consume_if(TokenType::From)) {
// FIXME: Parse join-clause.
parse_comma_separated_list(false, [&]() { table_or_subquery_list.append(parse_table_or_subquery()); });
}
RefPtr<Expression> where_clause;
if (consume_if(TokenType::Where))
where_clause = parse_expression();
RefPtr<GroupByClause> group_by_clause;
if (consume_if(TokenType::Group)) {
consume(TokenType::By);
NonnullRefPtrVector<Expression> group_by_list;
parse_comma_separated_list(false, [&]() { group_by_list.append(parse_expression()); });
if (!group_by_list.is_empty()) {
RefPtr<Expression> having_clause;
if (consume_if(TokenType::Having))
having_clause = parse_expression();
group_by_clause = create_ast_node<GroupByClause>(move(group_by_list), move(having_clause));
}
}
// FIXME: Parse 'WINDOW window-name AS window-defn'.
// FIXME: Parse 'compound-operator'.
NonnullRefPtrVector<OrderingTerm> ordering_term_list;
if (consume_if(TokenType::Order)) {
consume(TokenType::By);
parse_comma_separated_list(false, [&]() { ordering_term_list.append(parse_ordering_term()); });
}
RefPtr<LimitClause> limit_clause;
if (consume_if(TokenType::Limit)) {
auto limit_expression = parse_expression();
RefPtr<Expression> offset_expression;
if (consume_if(TokenType::Offset)) {
offset_expression = parse_expression();
} else if (consume_if(TokenType::Comma)) {
// Note: The limit clause may instead be defined as "offset-expression, limit-expression", effectively reversing the
// order of the expressions. SQLite notes "this is counter-intuitive" and "to avoid confusion, programmers are strongly
// encouraged to ... avoid using a LIMIT clause with a comma-separated offset."
syntax_error("LIMIT clauses of the form 'LIMIT <expr>, <expr>' are not supported");
}
limit_clause = create_ast_node<LimitClause>(move(limit_expression), move(offset_expression));
}
return create_ast_node<Select>(move(common_table_expression_list), select_all, move(result_column_list), move(table_or_subquery_list), move(where_clause), move(group_by_clause), move(ordering_term_list), move(limit_clause));
}
RefPtr<CommonTableExpressionList> Parser::parse_common_table_expression_list()
{
consume(TokenType::With);
bool recursive = consume_if(TokenType::Recursive);
NonnullRefPtrVector<CommonTableExpression> common_table_expression;
parse_comma_separated_list(false, [&]() { common_table_expression.append(parse_common_table_expression()); });
if (common_table_expression.is_empty()) {
expected("Common table expression list");
return {};
}
return create_ast_node<CommonTableExpressionList>(recursive, move(common_table_expression));
}
NonnullRefPtr<Expression> Parser::parse_expression()
{
if (++m_parser_state.m_current_expression_depth > Limits::maximum_expression_tree_depth) {
syntax_error(String::formatted("Exceeded maximum expression tree depth of {}", Limits::maximum_expression_tree_depth));
return create_ast_node<ErrorExpression>();
}
// https://sqlite.org/lang_expr.html
auto expression = parse_primary_expression();
if (match_secondary_expression())
expression = parse_secondary_expression(move(expression));
// FIXME: Parse 'bind-parameter'.
// FIXME: Parse 'function-name'.
// FIXME: Parse 'raise-function'.
--m_parser_state.m_current_expression_depth;
return expression;
}
NonnullRefPtr<Expression> Parser::parse_primary_expression()
{
if (auto expression = parse_literal_value_expression(); expression.has_value())
return move(expression.value());
if (auto expression = parse_column_name_expression(); expression.has_value())
return move(expression.value());
if (auto expression = parse_unary_operator_expression(); expression.has_value())
return move(expression.value());
if (auto expression = parse_chained_expression(); expression.has_value())
return move(expression.value());
if (auto expression = parse_cast_expression(); expression.has_value())
return move(expression.value());
if (auto expression = parse_case_expression(); expression.has_value())
return move(expression.value());
if (auto expression = parse_exists_expression(false); expression.has_value())
return move(expression.value());
expected("Primary Expression");
consume();
return create_ast_node<ErrorExpression>();
}
NonnullRefPtr<Expression> Parser::parse_secondary_expression(NonnullRefPtr<Expression> primary)
{
if (auto expression = parse_binary_operator_expression(primary); expression.has_value())
return move(expression.value());
if (auto expression = parse_collate_expression(primary); expression.has_value())
return move(expression.value());
if (auto expression = parse_is_expression(primary); expression.has_value())
return move(expression.value());
bool invert_expression = false;
if (consume_if(TokenType::Not))
invert_expression = true;
if (auto expression = parse_match_expression(primary, invert_expression); expression.has_value())
return move(expression.value());
if (auto expression = parse_null_expression(primary, invert_expression); expression.has_value())
return move(expression.value());
if (auto expression = parse_between_expression(primary, invert_expression); expression.has_value())
return move(expression.value());
if (auto expression = parse_in_expression(primary, invert_expression); expression.has_value())
return move(expression.value());
expected("Secondary Expression");
consume();
return create_ast_node<ErrorExpression>();
}
bool Parser::match_secondary_expression() const
{
return match(TokenType::Not)
|| match(TokenType::DoublePipe)
|| match(TokenType::Asterisk)
|| match(TokenType::Divide)
|| match(TokenType::Modulus)
|| match(TokenType::Plus)
|| match(TokenType::Minus)
|| match(TokenType::ShiftLeft)
|| match(TokenType::ShiftRight)
|| match(TokenType::Ampersand)
|| match(TokenType::Pipe)
|| match(TokenType::LessThan)
|| match(TokenType::LessThanEquals)
|| match(TokenType::GreaterThan)
|| match(TokenType::GreaterThanEquals)
|| match(TokenType::Equals)
|| match(TokenType::EqualsEquals)
|| match(TokenType::NotEquals1)
|| match(TokenType::NotEquals2)
|| match(TokenType::And)
|| match(TokenType::Or)
|| match(TokenType::Collate)
|| match(TokenType::Is)
|| match(TokenType::Like)
|| match(TokenType::Glob)
|| match(TokenType::Match)
|| match(TokenType::Regexp)
|| match(TokenType::Isnull)
|| match(TokenType::Notnull)
|| match(TokenType::Between)
|| match(TokenType::In);
}
Optional<NonnullRefPtr<Expression>> Parser::parse_literal_value_expression()
{
if (match(TokenType::NumericLiteral)) {
auto value = consume().double_value();
return create_ast_node<NumericLiteral>(value);
}
if (match(TokenType::StringLiteral)) {
// TODO: Should the surrounding ' ' be removed here?
auto value = consume().value();
return create_ast_node<StringLiteral>(value);
}
if (match(TokenType::BlobLiteral)) {
// TODO: Should the surrounding x' ' be removed here?
auto value = consume().value();
return create_ast_node<BlobLiteral>(value);
}
if (consume_if(TokenType::Null))
return create_ast_node<NullLiteral>();
return {};
}
Optional<NonnullRefPtr<Expression>> Parser::parse_column_name_expression(String with_parsed_identifier, bool with_parsed_period)
{
if (with_parsed_identifier.is_null() && !match(TokenType::Identifier))
return {};
String first_identifier;
if (with_parsed_identifier.is_null())
first_identifier = consume(TokenType::Identifier).value();
else
first_identifier = move(with_parsed_identifier);
String schema_name;
String table_name;
String column_name;
if (with_parsed_period || consume_if(TokenType::Period)) {
String second_identifier = consume(TokenType::Identifier).value();
if (consume_if(TokenType::Period)) {
schema_name = move(first_identifier);
table_name = move(second_identifier);
column_name = consume(TokenType::Identifier).value();
} else {
table_name = move(first_identifier);
column_name = move(second_identifier);
}
} else {
column_name = move(first_identifier);
}
return create_ast_node<ColumnNameExpression>(move(schema_name), move(table_name), move(column_name));
}
Optional<NonnullRefPtr<Expression>> Parser::parse_unary_operator_expression()
{
if (consume_if(TokenType::Minus))
return create_ast_node<UnaryOperatorExpression>(UnaryOperator::Minus, parse_expression());
if (consume_if(TokenType::Plus))
return create_ast_node<UnaryOperatorExpression>(UnaryOperator::Plus, parse_expression());
if (consume_if(TokenType::Tilde))
return create_ast_node<UnaryOperatorExpression>(UnaryOperator::BitwiseNot, parse_expression());
if (consume_if(TokenType::Not)) {
if (match(TokenType::Exists))
return parse_exists_expression(true);
else
return create_ast_node<UnaryOperatorExpression>(UnaryOperator::Not, parse_expression());
}
return {};
}
Optional<NonnullRefPtr<Expression>> Parser::parse_binary_operator_expression(NonnullRefPtr<Expression> lhs)
{
if (consume_if(TokenType::DoublePipe))
return create_ast_node<BinaryOperatorExpression>(BinaryOperator::Concatenate, move(lhs), parse_expression());
if (consume_if(TokenType::Asterisk))
return create_ast_node<BinaryOperatorExpression>(BinaryOperator::Multiplication, move(lhs), parse_expression());
if (consume_if(TokenType::Divide))
return create_ast_node<BinaryOperatorExpression>(BinaryOperator::Division, move(lhs), parse_expression());
if (consume_if(TokenType::Modulus))
return create_ast_node<BinaryOperatorExpression>(BinaryOperator::Modulo, move(lhs), parse_expression());
if (consume_if(TokenType::Plus))
return create_ast_node<BinaryOperatorExpression>(BinaryOperator::Plus, move(lhs), parse_expression());
if (consume_if(TokenType::Minus))
return create_ast_node<BinaryOperatorExpression>(BinaryOperator::Minus, move(lhs), parse_expression());
if (consume_if(TokenType::ShiftLeft))
return create_ast_node<BinaryOperatorExpression>(BinaryOperator::ShiftLeft, move(lhs), parse_expression());
if (consume_if(TokenType::ShiftRight))
return create_ast_node<BinaryOperatorExpression>(BinaryOperator::ShiftRight, move(lhs), parse_expression());
if (consume_if(TokenType::Ampersand))
return create_ast_node<BinaryOperatorExpression>(BinaryOperator::BitwiseAnd, move(lhs), parse_expression());
if (consume_if(TokenType::Pipe))
return create_ast_node<BinaryOperatorExpression>(BinaryOperator::BitwiseOr, move(lhs), parse_expression());
if (consume_if(TokenType::LessThan))
return create_ast_node<BinaryOperatorExpression>(BinaryOperator::LessThan, move(lhs), parse_expression());
if (consume_if(TokenType::LessThanEquals))
return create_ast_node<BinaryOperatorExpression>(BinaryOperator::LessThanEquals, move(lhs), parse_expression());
if (consume_if(TokenType::GreaterThan))
return create_ast_node<BinaryOperatorExpression>(BinaryOperator::GreaterThan, move(lhs), parse_expression());
if (consume_if(TokenType::GreaterThanEquals))
return create_ast_node<BinaryOperatorExpression>(BinaryOperator::GreaterThanEquals, move(lhs), parse_expression());
if (consume_if(TokenType::Equals) || consume_if(TokenType::EqualsEquals))
return create_ast_node<BinaryOperatorExpression>(BinaryOperator::Equals, move(lhs), parse_expression());
if (consume_if(TokenType::NotEquals1) || consume_if(TokenType::NotEquals2))
return create_ast_node<BinaryOperatorExpression>(BinaryOperator::NotEquals, move(lhs), parse_expression());
if (consume_if(TokenType::And))
return create_ast_node<BinaryOperatorExpression>(BinaryOperator::And, move(lhs), parse_expression());
if (consume_if(TokenType::Or))
return create_ast_node<BinaryOperatorExpression>(BinaryOperator::Or, move(lhs), parse_expression());
return {};
}
Optional<NonnullRefPtr<Expression>> Parser::parse_chained_expression()
{
if (!consume_if(TokenType::ParenOpen))
return {};
if (match(TokenType::Select))
return parse_exists_expression(false, TokenType::Select);
NonnullRefPtrVector<Expression> expressions;
parse_comma_separated_list(false, [&]() { expressions.append(parse_expression()); });
consume(TokenType::ParenClose);
return create_ast_node<ChainedExpression>(move(expressions));
}
Optional<NonnullRefPtr<Expression>> Parser::parse_cast_expression()
{
if (!match(TokenType::Cast))
return {};
consume(TokenType::Cast);
consume(TokenType::ParenOpen);
auto expression = parse_expression();
consume(TokenType::As);
auto type_name = parse_type_name();
consume(TokenType::ParenClose);
return create_ast_node<CastExpression>(move(expression), move(type_name));
}
Optional<NonnullRefPtr<Expression>> Parser::parse_case_expression()
{
if (!match(TokenType::Case))
return {};
consume();
RefPtr<Expression> case_expression;
if (!match(TokenType::When)) {
case_expression = parse_expression();
}
Vector<CaseExpression::WhenThenClause> when_then_clauses;
do {
consume(TokenType::When);
auto when = parse_expression();
consume(TokenType::Then);
auto then = parse_expression();
when_then_clauses.append({ move(when), move(then) });
if (!match(TokenType::When))
break;
} while (!match(TokenType::Eof));
RefPtr<Expression> else_expression;
if (consume_if(TokenType::Else))
else_expression = parse_expression();
consume(TokenType::End);
return create_ast_node<CaseExpression>(move(case_expression), move(when_then_clauses), move(else_expression));
}
Optional<NonnullRefPtr<Expression>> Parser::parse_exists_expression(bool invert_expression, TokenType opening_token)
{
VERIFY((opening_token == TokenType::Exists) || (opening_token == TokenType::Select));
if ((opening_token == TokenType::Exists) && !consume_if(TokenType::Exists))
return {};
if (opening_token == TokenType::Exists)
consume(TokenType::ParenOpen);
auto select_statement = parse_select_statement({});
consume(TokenType::ParenClose);
return create_ast_node<ExistsExpression>(move(select_statement), invert_expression);
}
Optional<NonnullRefPtr<Expression>> Parser::parse_collate_expression(NonnullRefPtr<Expression> expression)
{
if (!match(TokenType::Collate))
return {};
consume();
String collation_name = consume(TokenType::Identifier).value();
return create_ast_node<CollateExpression>(move(expression), move(collation_name));
}
Optional<NonnullRefPtr<Expression>> Parser::parse_is_expression(NonnullRefPtr<Expression> expression)
{
if (!match(TokenType::Is))
return {};
consume();
bool invert_expression = false;
if (match(TokenType::Not)) {
consume();
invert_expression = true;
}
auto rhs = parse_expression();
return create_ast_node<IsExpression>(move(expression), move(rhs), invert_expression);
}
Optional<NonnullRefPtr<Expression>> Parser::parse_match_expression(NonnullRefPtr<Expression> lhs, bool invert_expression)
{
auto parse_escape = [this]() {
RefPtr<Expression> escape;
if (consume_if(TokenType::Escape))
escape = parse_expression();
return escape;
};
if (consume_if(TokenType::Like))
return create_ast_node<MatchExpression>(MatchOperator::Like, move(lhs), parse_expression(), parse_escape(), invert_expression);
if (consume_if(TokenType::Glob))
return create_ast_node<MatchExpression>(MatchOperator::Glob, move(lhs), parse_expression(), parse_escape(), invert_expression);
if (consume_if(TokenType::Match))
return create_ast_node<MatchExpression>(MatchOperator::Match, move(lhs), parse_expression(), parse_escape(), invert_expression);
if (consume_if(TokenType::Regexp))
return create_ast_node<MatchExpression>(MatchOperator::Regexp, move(lhs), parse_expression(), parse_escape(), invert_expression);
return {};
}
Optional<NonnullRefPtr<Expression>> Parser::parse_null_expression(NonnullRefPtr<Expression> expression, bool invert_expression)
{
if (!match(TokenType::Isnull) && !match(TokenType::Notnull) && !(invert_expression && match(TokenType::Null)))
return {};
auto type = consume().type();
invert_expression |= (type == TokenType::Notnull);
return create_ast_node<NullExpression>(move(expression), invert_expression);
}
Optional<NonnullRefPtr<Expression>> Parser::parse_between_expression(NonnullRefPtr<Expression> expression, bool invert_expression)
{
if (!match(TokenType::Between))
return {};
consume();
auto nested = parse_expression();
if (!is<BinaryOperatorExpression>(*nested)) {
expected("Binary Expression");
return create_ast_node<ErrorExpression>();
}
const auto& binary_expression = static_cast<const BinaryOperatorExpression&>(*nested);
if (binary_expression.type() != BinaryOperator::And) {
expected("AND Expression");
return create_ast_node<ErrorExpression>();
}
return create_ast_node<BetweenExpression>(move(expression), binary_expression.lhs(), binary_expression.rhs(), invert_expression);
}
Optional<NonnullRefPtr<Expression>> Parser::parse_in_expression(NonnullRefPtr<Expression> expression, bool invert_expression)
{
if (!match(TokenType::In))
return {};
consume();
if (consume_if(TokenType::ParenOpen)) {
if (match(TokenType::Select)) {
auto select_statement = parse_select_statement({});
return create_ast_node<InSelectionExpression>(move(expression), move(select_statement), invert_expression);
}
// FIXME: Consolidate this with parse_chained_expression(). That method consumes the opening paren as
// well, and also requires at least one expression (whereas this allows for an empty chain).
NonnullRefPtrVector<Expression> expressions;
if (!match(TokenType::ParenClose))
parse_comma_separated_list(false, [&]() { expressions.append(parse_expression()); });
consume(TokenType::ParenClose);
auto chain = create_ast_node<ChainedExpression>(move(expressions));
return create_ast_node<InChainedExpression>(move(expression), move(chain), invert_expression);
}
String schema_name;
String table_name;
parse_schema_and_table_name(schema_name, table_name);
if (match(TokenType::ParenOpen)) {
// FIXME: Parse "table-function".
return {};
}
return create_ast_node<InTableExpression>(move(expression), move(schema_name), move(table_name), invert_expression);
}
NonnullRefPtr<ColumnDefinition> Parser::parse_column_definition()
{
// https://sqlite.org/syntax/column-def.html
auto name = consume(TokenType::Identifier).value();
auto type_name = match(TokenType::Identifier)
? parse_type_name()
// https://www.sqlite.org/datatype3.html: If no type is specified then the column has affinity BLOB.
: create_ast_node<TypeName>("BLOB", NonnullRefPtrVector<SignedNumber> {});
// FIXME: Parse "column-constraint".
return create_ast_node<ColumnDefinition>(move(name), move(type_name));
}
NonnullRefPtr<TypeName> Parser::parse_type_name()
{
// https: //sqlite.org/syntax/type-name.html
auto name = consume(TokenType::Identifier).value();
NonnullRefPtrVector<SignedNumber> signed_numbers;
if (consume_if(TokenType::ParenOpen)) {
signed_numbers.append(parse_signed_number());
if (consume_if(TokenType::Comma))
signed_numbers.append(parse_signed_number());
consume(TokenType::ParenClose);
}
return create_ast_node<TypeName>(move(name), move(signed_numbers));
}
NonnullRefPtr<SignedNumber> Parser::parse_signed_number()
{
// https://sqlite.org/syntax/signed-number.html
bool is_positive = true;
if (consume_if(TokenType::Plus))
is_positive = true;
else if (consume_if(TokenType::Minus))
is_positive = false;
if (match(TokenType::NumericLiteral)) {
auto number = consume(TokenType::NumericLiteral).double_value();
return create_ast_node<SignedNumber>(is_positive ? number : (number * -1));
}
expected("NumericLiteral");
return create_ast_node<SignedNumber>(0);
}
NonnullRefPtr<CommonTableExpression> Parser::parse_common_table_expression()
{
// https://sqlite.org/syntax/common-table-expression.html
auto table_name = consume(TokenType::Identifier).value();
Vector<String> column_names;
if (match(TokenType::ParenOpen))
parse_comma_separated_list(true, [&]() { column_names.append(consume(TokenType::Identifier).value()); });
consume(TokenType::As);
consume(TokenType::ParenOpen);
auto select_statement = parse_select_statement({});
consume(TokenType::ParenClose);
return create_ast_node<CommonTableExpression>(move(table_name), move(column_names), move(select_statement));
}
NonnullRefPtr<QualifiedTableName> Parser::parse_qualified_table_name()
{
// https://sqlite.org/syntax/qualified-table-name.html
String schema_name;
String table_name;
parse_schema_and_table_name(schema_name, table_name);
String alias;
if (consume_if(TokenType::As))
alias = consume(TokenType::Identifier).value();
// Note: The qualified-table-name spec may include an "INDEXED BY index-name" or "NOT INDEXED" clause. This is a SQLite extension
// "designed to help detect undesirable query plan changes during regression testing", and "application developers are admonished
// to omit all use of INDEXED BY during application design, implementation, testing, and tuning". Our implementation purposefully
// omits parsing INDEXED BY for now until there is good reason to add support.
return create_ast_node<QualifiedTableName>(move(schema_name), move(table_name), move(alias));
}
NonnullRefPtr<ReturningClause> Parser::parse_returning_clause()
{
// https://sqlite.org/syntax/returning-clause.html
consume(TokenType::Returning);
if (consume_if(TokenType::Asterisk))
return create_ast_node<ReturningClause>();
Vector<ReturningClause::ColumnClause> columns;
parse_comma_separated_list(false, [&]() {
auto expression = parse_expression();
String column_alias;
if (consume_if(TokenType::As) || match(TokenType::Identifier))
column_alias = consume(TokenType::Identifier).value();
columns.append({ move(expression), move(column_alias) });
});
return create_ast_node<ReturningClause>(move(columns));
}
NonnullRefPtr<ResultColumn> Parser::parse_result_column()
{
// https://sqlite.org/syntax/result-column.html
if (consume_if(TokenType::Asterisk))
return create_ast_node<ResultColumn>();
// If we match an identifier now, we don't know whether it is a table-name of the form "table-name.*", or if it is the start of a
// column-name-expression, until we try to parse the asterisk. So if we consume an identifier and a period, but don't find an
// asterisk, hold onto that information to form a column-name-expression later.
String table_name;
bool parsed_period = false;
if (match(TokenType::Identifier)) {
table_name = consume().value();
parsed_period = consume_if(TokenType::Period);
if (parsed_period && consume_if(TokenType::Asterisk))
return create_ast_node<ResultColumn>(move(table_name));
}
auto expression = table_name.is_null()
? parse_expression()
: static_cast<NonnullRefPtr<Expression>>(*parse_column_name_expression(move(table_name), parsed_period));
String column_alias;
if (consume_if(TokenType::As) || match(TokenType::Identifier))
column_alias = consume(TokenType::Identifier).value();
return create_ast_node<ResultColumn>(move(expression), move(column_alias));
}
NonnullRefPtr<TableOrSubquery> Parser::parse_table_or_subquery()
{
if (++m_parser_state.m_current_subquery_depth > Limits::maximum_subquery_depth)
syntax_error(String::formatted("Exceeded maximum subquery depth of {}", Limits::maximum_subquery_depth));
ScopeGuard guard([&]() { --m_parser_state.m_current_subquery_depth; });
// https://sqlite.org/syntax/table-or-subquery.html
if (match(TokenType::Identifier)) {
String schema_name;
String table_name;
parse_schema_and_table_name(schema_name, table_name);
String table_alias;
if (consume_if(TokenType::As) || match(TokenType::Identifier))
table_alias = consume(TokenType::Identifier).value();
return create_ast_node<TableOrSubquery>(move(schema_name), move(table_name), move(table_alias));
}
// FIXME: Parse join-clause.
NonnullRefPtrVector<TableOrSubquery> subqueries;
parse_comma_separated_list(true, [&]() { subqueries.append(parse_table_or_subquery()); });
return create_ast_node<TableOrSubquery>(move(subqueries));
}
NonnullRefPtr<OrderingTerm> Parser::parse_ordering_term()
{
// https://sqlite.org/syntax/ordering-term.html
auto expression = parse_expression();
String collation_name;
if (is<CollateExpression>(*expression)) {
const auto& collate = static_cast<const CollateExpression&>(*expression);
collation_name = collate.collation_name();
expression = collate.expression();
} else if (consume_if(TokenType::Collate)) {
collation_name = consume(TokenType::Identifier).value();
}
Order order = consume_if(TokenType::Desc) ? Order::Descending : Order::Ascending;
consume_if(TokenType::Asc); // ASC is the default, so ignore it if specified.
Nulls nulls = order == Order::Ascending ? Nulls::First : Nulls::Last;
if (consume_if(TokenType::Nulls)) {
if (consume_if(TokenType::First))
nulls = Nulls::First;
else if (consume_if(TokenType::Last))
nulls = Nulls::Last;
else
expected("FIRST or LAST");
}
return create_ast_node<OrderingTerm>(move(expression), move(collation_name), order, nulls);
}
void Parser::parse_schema_and_table_name(String& schema_name, String& table_name)
{
String schema_or_table_name = consume(TokenType::Identifier).value();
if (consume_if(TokenType::Period)) {
schema_name = move(schema_or_table_name);
table_name = consume(TokenType::Identifier).value();
} else {
table_name = move(schema_or_table_name);
}
}
ConflictResolution Parser::parse_conflict_resolution()
{
// https://sqlite.org/lang_conflict.html
if (consume_if(TokenType::Or)) {
if (consume_if(TokenType::Abort))
return ConflictResolution::Abort;
if (consume_if(TokenType::Fail))
return ConflictResolution::Fail;
if (consume_if(TokenType::Ignore))
return ConflictResolution::Ignore;
if (consume_if(TokenType::Replace))
return ConflictResolution::Replace;
if (consume_if(TokenType::Rollback))
return ConflictResolution::Rollback;
expected("ABORT, FAIL, IGNORE, REPLACE, or ROLLBACK");
}
return ConflictResolution::Abort;
}
Token Parser::consume()
{
auto old_token = m_parser_state.m_token;
m_parser_state.m_token = m_parser_state.m_lexer.next();
return old_token;
}
Token Parser::consume(TokenType expected_type)
{
if (!match(expected_type)) {
expected(Token::name(expected_type));
}
return consume();
}
bool Parser::consume_if(TokenType expected_type)
{
if (!match(expected_type))
return false;
consume();
return true;
}
bool Parser::match(TokenType type) const
{
return m_parser_state.m_token.type() == type;
}
void Parser::expected(StringView what)
{
syntax_error(String::formatted("Unexpected token {}, expected {}", m_parser_state.m_token.name(), what));
}
void Parser::syntax_error(String message)
{
m_parser_state.m_errors.append({ move(message), position() });
}
SourcePosition Parser::position() const
{
return m_parser_state.m_token.start_position();
}
Parser::ParserState::ParserState(Lexer lexer)
: m_lexer(move(lexer))
, m_token(m_lexer.next())
{
}
}