From 66216d3af682328ca0a019d93ec988b3b6e40ab7 Mon Sep 17 00:00:00 2001 From: mnlrsn Date: Sat, 22 Jan 2022 15:43:30 +0100 Subject: [PATCH] LibSQL: Add simple REGEXP match The implementation of LIKE uses regexes under the hood, and this implementation of REGEXP takes the same approach. It employs PosixExtended from LibRegex with case insensitive and Unicode flags set. The implementation of LIKE is based on SQLlite specs, but SQLlite does not offer directions for a built-in regex functionality, so this one uses LibRegex. --- Tests/LibSQL/TestSqlStatementExecution.cpp | 62 ++++++++++++++++++++ Userland/Libraries/LibSQL/AST/Expression.cpp | 19 +++++- 2 files changed, 80 insertions(+), 1 deletion(-) diff --git a/Tests/LibSQL/TestSqlStatementExecution.cpp b/Tests/LibSQL/TestSqlStatementExecution.cpp index 562d6e5c0b3..aabe474a872 100644 --- a/Tests/LibSQL/TestSqlStatementExecution.cpp +++ b/Tests/LibSQL/TestSqlStatementExecution.cpp @@ -449,6 +449,68 @@ TEST_CASE(select_with_order) EXPECT_EQ(rows[4].row[0].to_string(), "Test_5"); } +TEST_CASE(select_with_regexp) +{ + ScopeGuard guard([]() { unlink(db_name); }); + auto database = SQL::Database::construct(db_name); + EXPECT(!database->open().is_error()); + create_table(database); + auto result = execute(database, + "INSERT INTO TestSchema.TestTable ( TextColumn, IntColumn ) VALUES " + "( 'Test+1', 42 ), " + "( 'Pröv+2', 43 ), " + "( 'Test(3)', 44 ), " + "( 'Test[4]', 45 ), " + "( 'Test+5', 46 ), " + "( 'Another-Test_6', 47 );"); + EXPECT(result->error().code == SQL::SQLErrorCode::NoError); + EXPECT(result->inserted() == 6); + + // Simple match + result = execute(database, "SELECT TextColumn FROM TestSchema.TestTable WHERE TextColumn REGEXP 'Test\\+1';"); + EXPECT(result->error().code == SQL::SQLErrorCode::NoError); + EXPECT(result->has_results()); + EXPECT_EQ(result->results().size(), 1u); + + // Match all + result = execute(database, "SELECT TextColumn FROM TestSchema.TestTable WHERE TextColumn REGEXP '.*';"); + EXPECT(result->error().code == SQL::SQLErrorCode::NoError); + EXPECT(result->has_results()); + EXPECT_EQ(result->results().size(), 6u); + + // Match with wildcards + result = execute(database, "SELECT TextColumn FROM TestSchema.TestTable WHERE TextColumn REGEXP '^Test.+';"); + EXPECT(result->error().code == SQL::SQLErrorCode::NoError); + EXPECT(result->has_results()); + EXPECT_EQ(result->results().size(), 4u); + + // Match with case insensitive basic Latin and case sensitive Swedish ö + // FIXME: If LibRegex is changed to support case insensitive matches of Unicode characters + // This test should be updated and changed to match 'PRÖV'. + result = execute(database, "SELECT TextColumn FROM TestSchema.TestTable WHERE TextColumn REGEXP 'PRöV.*';"); + EXPECT(result->error().code == SQL::SQLErrorCode::NoError); + EXPECT(result->has_results()); + EXPECT_EQ(result->results().size(), 1u); +} + +TEST_CASE(handle_regexp_errors) +{ + ScopeGuard guard([]() { unlink(db_name); }); + auto database = SQL::Database::construct(db_name); + EXPECT(!database->open().is_error()); + create_table(database); + auto result = execute(database, + "INSERT INTO TestSchema.TestTable ( TextColumn, IntColumn ) VALUES " + "( 'Test', 0 );"); + EXPECT(result->error().code == SQL::SQLErrorCode::NoError); + EXPECT(result->inserted() == 1); + + // Malformed regex, unmatched square bracket + result = execute(database, "SELECT TextColumn FROM TestSchema.TestTable WHERE TextColumn REGEXP 'Test\\+[0-9.*';"); + EXPECT(result->error().code != SQL::SQLErrorCode::NoError); + EXPECT(!result->has_results()); +} + TEST_CASE(select_with_order_two_columns) { ScopeGuard guard([]() { unlink(db_name); }); diff --git a/Userland/Libraries/LibSQL/AST/Expression.cpp b/Userland/Libraries/LibSQL/AST/Expression.cpp index ad189c5eb65..5fe0fc670e8 100644 --- a/Userland/Libraries/LibSQL/AST/Expression.cpp +++ b/Userland/Libraries/LibSQL/AST/Expression.cpp @@ -242,9 +242,26 @@ Value MatchExpression::evaluate(ExecutionContext& context) const auto result = regex.match(lhs_value.to_string(), PosixFlags::Insensitive | PosixFlags::Unicode); return Value(invert_expression() ? !result.success : result.success); } + case MatchOperator::Regexp: { + Value lhs_value = lhs()->evaluate(context); + Value rhs_value = rhs()->evaluate(context); + + auto regex = Regex(rhs_value.to_string()); + auto err = regex.parser_result.error; + if (err != regex::Error::NoError) { + StringBuilder builder; + builder.append("Regular expression: "); + builder.append(get_error_string(err)); + + context.result->set_error(SQLErrorCode::SyntaxError, builder.build()); + return Value(false); + } + + auto result = regex.match(lhs_value.to_string(), PosixFlags::Insensitive | PosixFlags::Unicode); + return Value(invert_expression() ? !result.success : result.success); + } case MatchOperator::Glob: case MatchOperator::Match: - case MatchOperator::Regexp: default: VERIFY_NOT_REACHED(); }