/* * Copyright (c) 2021, Tim Flynn * * SPDX-License-Identifier: BSD-2-Clause */ #include "Parser.h" #include namespace SQL { Parser::Parser(Lexer lexer) : m_parser_state(move(lexer)) { } NonnullRefPtr Parser::next_statement() { if (match(TokenType::With)) { auto common_table_expression_list = parse_common_table_expression_list(); return parse_statement_with_expression_list(move(common_table_expression_list)); } return parse_statement(); } NonnullRefPtr Parser::parse_statement() { switch (m_parser_state.m_token.type()) { case TokenType::Create: return parse_create_table_statement(); case TokenType::Drop: return parse_drop_table_statement(); case TokenType::Delete: return parse_delete_statement({}); case TokenType::Select: return parse_select_statement({}); default: expected("CREATE, DROP, DELETE, or SELECT"); return create_ast_node(); } } NonnullRefPtr Parser::parse_statement_with_expression_list(RefPtr common_table_expression_list) { switch (m_parser_state.m_token.type()) { case TokenType::Delete: return parse_delete_statement(move(common_table_expression_list)); case TokenType::Select: return parse_select_statement(move(common_table_expression_list)); default: expected("DELETE or SELECT"); return create_ast_node(); } } NonnullRefPtr Parser::parse_create_table_statement() { // https://sqlite.org/lang_createtable.html consume(TokenType::Create); bool is_temporary = false; if (consume_if(TokenType::Temp) || consume_if(TokenType::Temporary)) is_temporary = true; consume(TokenType::Table); bool is_error_if_table_exists = true; if (consume_if(TokenType::If)) { consume(TokenType::Not); consume(TokenType::Exists); is_error_if_table_exists = false; } String schema_name; String table_name; parse_schema_and_table_name(schema_name, table_name); // FIXME: Parse "AS select-stmt". NonnullRefPtrVector column_definitions; parse_comma_separated_list(true, [&]() { column_definitions.append(parse_column_definition()); }); // FIXME: Parse "table-constraint". consume(TokenType::SemiColon); return create_ast_node(move(schema_name), move(table_name), move(column_definitions), is_temporary, is_error_if_table_exists); } NonnullRefPtr Parser::parse_drop_table_statement() { // https://sqlite.org/lang_droptable.html consume(TokenType::Drop); consume(TokenType::Table); bool is_error_if_table_does_not_exist = true; if (consume_if(TokenType::If)) { consume(TokenType::Exists); is_error_if_table_does_not_exist = false; } String schema_name; String table_name; parse_schema_and_table_name(schema_name, table_name); consume(TokenType::SemiColon); return create_ast_node(move(schema_name), move(table_name), is_error_if_table_does_not_exist); } NonnullRefPtr Parser::parse_delete_statement(RefPtr common_table_expression_list) { // https://sqlite.org/lang_delete.html consume(TokenType::Delete); consume(TokenType::From); auto qualified_table_name = parse_qualified_table_name(); RefPtr where_clause; if (consume_if(TokenType::Where)) where_clause = parse_expression(); RefPtr returning_clause; if (match(TokenType::Returning)) returning_clause = parse_returning_clause(); consume(TokenType::SemiColon); return create_ast_node(move(common_table_expression_list), move(qualified_table_name), move(where_clause), move(returning_clause)); } NonnullRefPtr(move(common_table_expression_list), select_all, move(result_column_list), move(table_or_subquery_list), move(where_clause), move(group_by_clause), move(ordering_term_list), move(limit_clause)); } NonnullRefPtr Parser::parse_common_table_expression_list() { consume(TokenType::With); bool recursive = consume_if(TokenType::Recursive); NonnullRefPtrVector common_table_expression; parse_comma_separated_list(false, [&]() { common_table_expression.append(parse_common_table_expression()); }); return create_ast_node(recursive, move(common_table_expression)); } NonnullRefPtr Parser::parse_expression() { // https://sqlite.org/lang_expr.html auto expression = parse_primary_expression(); if (match_secondary_expression()) expression = parse_secondary_expression(move(expression)); // FIXME: Parse 'bind-parameter'. // FIXME: Parse 'function-name'. // FIXME: Parse 'exists'. // FIXME: Parse 'raise-function'. return expression; } NonnullRefPtr Parser::parse_primary_expression() { if (auto expression = parse_literal_value_expression(); expression.has_value()) return move(expression.value()); if (auto expression = parse_column_name_expression(); expression.has_value()) return move(expression.value()); if (auto expression = parse_unary_operator_expression(); expression.has_value()) return move(expression.value()); if (auto expression = parse_chained_expression(); expression.has_value()) return move(expression.value()); if (auto expression = parse_cast_expression(); expression.has_value()) return move(expression.value()); if (auto expression = parse_case_expression(); expression.has_value()) return move(expression.value()); expected("Primary Expression"); consume(); return create_ast_node(); } NonnullRefPtr Parser::parse_secondary_expression(NonnullRefPtr primary) { if (auto expression = parse_binary_operator_expression(primary); expression.has_value()) return move(expression.value()); if (auto expression = parse_collate_expression(primary); expression.has_value()) return move(expression.value()); if (auto expression = parse_is_expression(primary); expression.has_value()) return move(expression.value()); bool invert_expression = false; if (consume_if(TokenType::Not)) invert_expression = true; if (auto expression = parse_match_expression(primary, invert_expression); expression.has_value()) return move(expression.value()); if (auto expression = parse_null_expression(primary, invert_expression); expression.has_value()) return move(expression.value()); if (auto expression = parse_between_expression(primary, invert_expression); expression.has_value()) return move(expression.value()); if (auto expression = parse_in_expression(primary, invert_expression); expression.has_value()) return move(expression.value()); expected("Secondary Expression"); consume(); return create_ast_node(); } bool Parser::match_secondary_expression() const { return match(TokenType::Not) || match(TokenType::DoublePipe) || match(TokenType::Asterisk) || match(TokenType::Divide) || match(TokenType::Modulus) || match(TokenType::Plus) || match(TokenType::Minus) || match(TokenType::ShiftLeft) || match(TokenType::ShiftRight) || match(TokenType::Ampersand) || match(TokenType::Pipe) || match(TokenType::LessThan) || match(TokenType::LessThanEquals) || match(TokenType::GreaterThan) || match(TokenType::GreaterThanEquals) || match(TokenType::Equals) || match(TokenType::EqualsEquals) || match(TokenType::NotEquals1) || match(TokenType::NotEquals2) || match(TokenType::And) || match(TokenType::Or) || match(TokenType::Collate) || match(TokenType::Is) || match(TokenType::Like) || match(TokenType::Glob) || match(TokenType::Match) || match(TokenType::Regexp) || match(TokenType::Isnull) || match(TokenType::Notnull) || match(TokenType::Between) || match(TokenType::In); } Optional> Parser::parse_literal_value_expression() { if (match(TokenType::NumericLiteral)) { auto value = consume().double_value(); return create_ast_node(value); } if (match(TokenType::StringLiteral)) { // TODO: Should the surrounding ' ' be removed here? auto value = consume().value(); return create_ast_node(value); } if (match(TokenType::BlobLiteral)) { // TODO: Should the surrounding x' ' be removed here? auto value = consume().value(); return create_ast_node(value); } if (consume_if(TokenType::Null)) return create_ast_node(); return {}; } Optional> Parser::parse_column_name_expression(String with_parsed_identifier, bool with_parsed_period) { if (with_parsed_identifier.is_null() && !match(TokenType::Identifier)) return {}; String first_identifier; if (with_parsed_identifier.is_null()) first_identifier = consume(TokenType::Identifier).value(); else first_identifier = move(with_parsed_identifier); String schema_name; String table_name; String column_name; if (with_parsed_period || consume_if(TokenType::Period)) { String second_identifier = consume(TokenType::Identifier).value(); if (consume_if(TokenType::Period)) { schema_name = move(first_identifier); table_name = move(second_identifier); column_name = consume(TokenType::Identifier).value(); } else { table_name = move(first_identifier); column_name = move(second_identifier); } } else { column_name = move(first_identifier); } return create_ast_node(move(schema_name), move(table_name), move(column_name)); } Optional> Parser::parse_unary_operator_expression() { if (consume_if(TokenType::Minus)) return create_ast_node(UnaryOperator::Minus, parse_expression()); if (consume_if(TokenType::Plus)) return create_ast_node(UnaryOperator::Plus, parse_expression()); if (consume_if(TokenType::Tilde)) return create_ast_node(UnaryOperator::BitwiseNot, parse_expression()); if (consume_if(TokenType::Not)) return create_ast_node(UnaryOperator::Not, parse_expression()); return {}; } Optional> Parser::parse_binary_operator_expression(NonnullRefPtr lhs) { if (consume_if(TokenType::DoublePipe)) return create_ast_node(BinaryOperator::Concatenate, move(lhs), parse_expression()); if (consume_if(TokenType::Asterisk)) return create_ast_node(BinaryOperator::Multiplication, move(lhs), parse_expression()); if (consume_if(TokenType::Divide)) return create_ast_node(BinaryOperator::Division, move(lhs), parse_expression()); if (consume_if(TokenType::Modulus)) return create_ast_node(BinaryOperator::Modulo, move(lhs), parse_expression()); if (consume_if(TokenType::Plus)) return create_ast_node(BinaryOperator::Plus, move(lhs), parse_expression()); if (consume_if(TokenType::Minus)) return create_ast_node(BinaryOperator::Minus, move(lhs), parse_expression()); if (consume_if(TokenType::ShiftLeft)) return create_ast_node(BinaryOperator::ShiftLeft, move(lhs), parse_expression()); if (consume_if(TokenType::ShiftRight)) return create_ast_node(BinaryOperator::ShiftRight, move(lhs), parse_expression()); if (consume_if(TokenType::Ampersand)) return create_ast_node(BinaryOperator::BitwiseAnd, move(lhs), parse_expression()); if (consume_if(TokenType::Pipe)) return create_ast_node(BinaryOperator::BitwiseOr, move(lhs), parse_expression()); if (consume_if(TokenType::LessThan)) return create_ast_node(BinaryOperator::LessThan, move(lhs), parse_expression()); if (consume_if(TokenType::LessThanEquals)) return create_ast_node(BinaryOperator::LessThanEquals, move(lhs), parse_expression()); if (consume_if(TokenType::GreaterThan)) return create_ast_node(BinaryOperator::GreaterThan, move(lhs), parse_expression()); if (consume_if(TokenType::GreaterThanEquals)) return create_ast_node(BinaryOperator::GreaterThanEquals, move(lhs), parse_expression()); if (consume_if(TokenType::Equals) || consume_if(TokenType::EqualsEquals)) return create_ast_node(BinaryOperator::Equals, move(lhs), parse_expression()); if (consume_if(TokenType::NotEquals1) || consume_if(TokenType::NotEquals2)) return create_ast_node(BinaryOperator::NotEquals, move(lhs), parse_expression()); if (consume_if(TokenType::And)) return create_ast_node(BinaryOperator::And, move(lhs), parse_expression()); if (consume_if(TokenType::Or)) return create_ast_node(BinaryOperator::Or, move(lhs), parse_expression()); return {}; } Optional> Parser::parse_chained_expression() { if (!match(TokenType::ParenOpen)) return {}; NonnullRefPtrVector expressions; parse_comma_separated_list(true, [&]() { expressions.append(parse_expression()); }); return create_ast_node(move(expressions)); } Optional> Parser::parse_cast_expression() { if (!match(TokenType::Cast)) return {}; consume(TokenType::Cast); consume(TokenType::ParenOpen); auto expression = parse_expression(); consume(TokenType::As); auto type_name = parse_type_name(); consume(TokenType::ParenClose); return create_ast_node(move(expression), move(type_name)); } Optional> Parser::parse_case_expression() { if (!match(TokenType::Case)) return {}; consume(); RefPtr case_expression; if (!match(TokenType::When)) { case_expression = parse_expression(); } Vector when_then_clauses; do { consume(TokenType::When); auto when = parse_expression(); consume(TokenType::Then); auto then = parse_expression(); when_then_clauses.append({ move(when), move(then) }); if (!match(TokenType::When)) break; } while (!match(TokenType::Eof)); RefPtr else_expression; if (consume_if(TokenType::Else)) else_expression = parse_expression(); consume(TokenType::End); return create_ast_node(move(case_expression), move(when_then_clauses), move(else_expression)); } Optional> Parser::parse_collate_expression(NonnullRefPtr expression) { if (!match(TokenType::Collate)) return {}; consume(); String collation_name = consume(TokenType::Identifier).value(); return create_ast_node(move(expression), move(collation_name)); } Optional> Parser::parse_is_expression(NonnullRefPtr expression) { if (!match(TokenType::Is)) return {}; consume(); bool invert_expression = false; if (match(TokenType::Not)) { consume(); invert_expression = true; } auto rhs = parse_expression(); return create_ast_node(move(expression), move(rhs), invert_expression); } Optional> Parser::parse_match_expression(NonnullRefPtr lhs, bool invert_expression) { auto parse_escape = [this]() { RefPtr escape; if (consume_if(TokenType::Escape)) escape = parse_expression(); return escape; }; if (consume_if(TokenType::Like)) return create_ast_node(MatchOperator::Like, move(lhs), parse_expression(), parse_escape(), invert_expression); if (consume_if(TokenType::Glob)) return create_ast_node(MatchOperator::Glob, move(lhs), parse_expression(), parse_escape(), invert_expression); if (consume_if(TokenType::Match)) return create_ast_node(MatchOperator::Match, move(lhs), parse_expression(), parse_escape(), invert_expression); if (consume_if(TokenType::Regexp)) return create_ast_node(MatchOperator::Regexp, move(lhs), parse_expression(), parse_escape(), invert_expression); return {}; } Optional> Parser::parse_null_expression(NonnullRefPtr expression, bool invert_expression) { if (!match(TokenType::Isnull) && !match(TokenType::Notnull) && !(invert_expression && match(TokenType::Null))) return {}; auto type = consume().type(); invert_expression |= (type == TokenType::Notnull); return create_ast_node(move(expression), invert_expression); } Optional> Parser::parse_between_expression(NonnullRefPtr expression, bool invert_expression) { if (!match(TokenType::Between)) return {}; consume(); auto nested = parse_expression(); if (!is(*nested)) { expected("Binary Expression"); return create_ast_node(); } const auto& binary_expression = static_cast(*nested); if (binary_expression.type() != BinaryOperator::And) { expected("AND Expression"); return create_ast_node(); } return create_ast_node(move(expression), binary_expression.lhs(), binary_expression.rhs(), invert_expression); } Optional> Parser::parse_in_expression(NonnullRefPtr expression, bool invert_expression) { if (!match(TokenType::In)) return {}; consume(); if (consume_if(TokenType::ParenOpen)) { if (match(TokenType::Select)) { // FIXME: Parse "select-stmt". return {}; } // FIXME: Consolidate this with parse_chained_expression(). That method consumes the opening paren as // well, and also requires at least one expression (whereas this allows for an empty chain). NonnullRefPtrVector expressions; if (!match(TokenType::ParenClose)) parse_comma_separated_list(false, [&]() { expressions.append(parse_expression()); }); consume(TokenType::ParenClose); auto chain = create_ast_node(move(expressions)); return create_ast_node(move(expression), move(chain), invert_expression); } String schema_name; String table_name; parse_schema_and_table_name(schema_name, table_name); if (match(TokenType::ParenOpen)) { // FIXME: Parse "table-function". return {}; } return create_ast_node(move(expression), move(schema_name), move(table_name), invert_expression); } NonnullRefPtr Parser::parse_column_definition() { // https://sqlite.org/syntax/column-def.html auto name = consume(TokenType::Identifier).value(); auto type_name = match(TokenType::Identifier) ? parse_type_name() // https://www.sqlite.org/datatype3.html: If no type is specified then the column has affinity BLOB. : create_ast_node("BLOB", NonnullRefPtrVector {}); // FIXME: Parse "column-constraint". return create_ast_node(move(name), move(type_name)); } NonnullRefPtr Parser::parse_type_name() { // https: //sqlite.org/syntax/type-name.html auto name = consume(TokenType::Identifier).value(); NonnullRefPtrVector signed_numbers; if (consume_if(TokenType::ParenOpen)) { signed_numbers.append(parse_signed_number()); if (consume_if(TokenType::Comma)) signed_numbers.append(parse_signed_number()); consume(TokenType::ParenClose); } return create_ast_node(move(name), move(signed_numbers)); } NonnullRefPtr Parser::parse_signed_number() { // https://sqlite.org/syntax/signed-number.html bool is_positive = true; if (consume_if(TokenType::Plus)) is_positive = true; else if (consume_if(TokenType::Minus)) is_positive = false; if (match(TokenType::NumericLiteral)) { auto number = consume(TokenType::NumericLiteral).double_value(); return create_ast_node(is_positive ? number : (number * -1)); } expected("NumericLiteral"); return create_ast_node(0); } NonnullRefPtr Parser::parse_common_table_expression() { // https://sqlite.org/syntax/common-table-expression.html auto table_name = consume(TokenType::Identifier).value(); Vector column_names; if (match(TokenType::ParenOpen)) parse_comma_separated_list(true, [&]() { column_names.append(consume(TokenType::Identifier).value()); }); consume(TokenType::As); consume(TokenType::ParenOpen); // FIXME: Parse "select-stmt". consume(TokenType::ParenClose); return create_ast_node(move(table_name), move(column_names)); } NonnullRefPtr Parser::parse_qualified_table_name() { // https://sqlite.org/syntax/qualified-table-name.html String schema_name; String table_name; parse_schema_and_table_name(schema_name, table_name); String alias; if (consume_if(TokenType::As)) alias = consume(TokenType::Identifier).value(); // Note: The qualified-table-name spec may include an "INDEXED BY index-name" or "NOT INDEXED" clause. This is a SQLite extension // "designed to help detect undesirable query plan changes during regression testing", and "application developers are admonished // to omit all use of INDEXED BY during application design, implementation, testing, and tuning". Our implementation purposefully // omits parsing INDEXED BY for now until there is good reason to add support. return create_ast_node(move(schema_name), move(table_name), move(alias)); } NonnullRefPtr Parser::parse_returning_clause() { // https://sqlite.org/syntax/returning-clause.html consume(TokenType::Returning); if (consume_if(TokenType::Asterisk)) return create_ast_node(); Vector columns; parse_comma_separated_list(false, [&]() { auto expression = parse_expression(); String column_alias; if (consume_if(TokenType::As) || match(TokenType::Identifier)) column_alias = consume(TokenType::Identifier).value(); columns.append({ move(expression), move(column_alias) }); }); return create_ast_node(move(columns)); } NonnullRefPtr Parser::parse_result_column() { // https://sqlite.org/syntax/result-column.html if (consume_if(TokenType::Asterisk)) return create_ast_node(); // If we match an identifier now, we don't know whether it is a table-name of the form "table-name.*", or if it is the start of a // column-name-expression, until we try to parse the asterisk. So if we consume an indentifier and a period, but don't find an // asterisk, hold onto that information to form a column-name-expression later. String table_name; bool parsed_period = false; if (match(TokenType::Identifier)) { table_name = consume().value(); parsed_period = consume_if(TokenType::Period); if (parsed_period && consume_if(TokenType::Asterisk)) return create_ast_node(move(table_name)); } auto expression = table_name.is_null() ? parse_expression() : static_cast>(*parse_column_name_expression(move(table_name), parsed_period)); String column_alias; if (consume_if(TokenType::As) || match(TokenType::Identifier)) column_alias = consume(TokenType::Identifier).value(); return create_ast_node(move(expression), move(column_alias)); } NonnullRefPtr Parser::parse_table_or_subquery() { // https://sqlite.org/syntax/table-or-subquery.html if (match(TokenType::Identifier)) { String schema_name; String table_name; parse_schema_and_table_name(schema_name, table_name); String table_alias; if (consume_if(TokenType::As) || match(TokenType::Identifier)) table_alias = consume(TokenType::Identifier).value(); return create_ast_node(move(schema_name), move(table_name), move(table_alias)); } // FIXME: Parse join-clause. NonnullRefPtrVector subqueries; parse_comma_separated_list(true, [&]() { subqueries.append(parse_table_or_subquery()); }); return create_ast_node(move(subqueries)); } NonnullRefPtr Parser::parse_ordering_term() { // https://sqlite.org/syntax/ordering-term.html auto expression = parse_expression(); String collation_name; if (is(*expression)) { const auto& collate = static_cast(*expression); collation_name = collate.collation_name(); expression = collate.expression(); } else if (consume_if(TokenType::Collate)) { collation_name = consume(TokenType::Identifier).value(); } Order order = consume_if(TokenType::Desc) ? Order::Descending : Order::Ascending; consume_if(TokenType::Asc); // ASC is the default, so ignore it if specified. Nulls nulls = order == Order::Ascending ? Nulls::First : Nulls::Last; if (consume_if(TokenType::Nulls)) { if (consume_if(TokenType::First)) nulls = Nulls::First; else if (consume_if(TokenType::Last)) nulls = Nulls::Last; else expected("FIRST or LAST"); } return create_ast_node(move(expression), move(collation_name), order, nulls); } void Parser::parse_schema_and_table_name(String& schema_name, String& table_name) { String schema_or_table_name = consume(TokenType::Identifier).value(); if (consume_if(TokenType::Period)) { schema_name = move(schema_or_table_name); table_name = consume(TokenType::Identifier).value(); } else { table_name = move(schema_or_table_name); } } Token Parser::consume() { auto old_token = m_parser_state.m_token; m_parser_state.m_token = m_parser_state.m_lexer.next(); return old_token; } Token Parser::consume(TokenType expected_type) { if (!match(expected_type)) { expected(Token::name(expected_type)); } return consume(); } bool Parser::consume_if(TokenType expected_type) { if (!match(expected_type)) return false; consume(); return true; } bool Parser::match(TokenType type) const { return m_parser_state.m_token.type() == type; } void Parser::expected(StringView what) { syntax_error(String::formatted("Unexpected token {}, expected {}", m_parser_state.m_token.name(), what)); } void Parser::syntax_error(String message) { m_parser_state.m_errors.append({ move(message), position() }); } Parser::Position Parser::position() const { return { m_parser_state.m_token.line_number(), m_parser_state.m_token.line_column() }; } Parser::ParserState::ParserState(Lexer lexer) : m_lexer(move(lexer)) , m_token(m_lexer.next()) { } }