LibSQL: Add simple REGEXP match

The implementation of LIKE uses regexes under the hood, and this
implementation of REGEXP takes the same approach. It employs
PosixExtended from LibRegex with case insensitive and Unicode flags
set. The implementation of LIKE is based on SQLlite specs, but SQLlite
does not offer directions for a built-in regex functionality, so this
one uses LibRegex.
This commit is contained in:
mnlrsn 2022-01-22 15:43:30 +01:00 committed by Ali Mohammad Pur
parent ace36681ff
commit 66216d3af6
Notes: sideshowbarker 2024-07-17 20:22:42 +09:00
2 changed files with 80 additions and 1 deletions

View file

@ -449,6 +449,68 @@ TEST_CASE(select_with_order)
EXPECT_EQ(rows[4].row[0].to_string(), "Test_5");
}
TEST_CASE(select_with_regexp)
{
ScopeGuard guard([]() { unlink(db_name); });
auto database = SQL::Database::construct(db_name);
EXPECT(!database->open().is_error());
create_table(database);
auto result = execute(database,
"INSERT INTO TestSchema.TestTable ( TextColumn, IntColumn ) VALUES "
"( 'Test+1', 42 ), "
"( 'Pröv+2', 43 ), "
"( 'Test(3)', 44 ), "
"( 'Test[4]', 45 ), "
"( 'Test+5', 46 ), "
"( 'Another-Test_6', 47 );");
EXPECT(result->error().code == SQL::SQLErrorCode::NoError);
EXPECT(result->inserted() == 6);
// Simple match
result = execute(database, "SELECT TextColumn FROM TestSchema.TestTable WHERE TextColumn REGEXP 'Test\\+1';");
EXPECT(result->error().code == SQL::SQLErrorCode::NoError);
EXPECT(result->has_results());
EXPECT_EQ(result->results().size(), 1u);
// Match all
result = execute(database, "SELECT TextColumn FROM TestSchema.TestTable WHERE TextColumn REGEXP '.*';");
EXPECT(result->error().code == SQL::SQLErrorCode::NoError);
EXPECT(result->has_results());
EXPECT_EQ(result->results().size(), 6u);
// Match with wildcards
result = execute(database, "SELECT TextColumn FROM TestSchema.TestTable WHERE TextColumn REGEXP '^Test.+';");
EXPECT(result->error().code == SQL::SQLErrorCode::NoError);
EXPECT(result->has_results());
EXPECT_EQ(result->results().size(), 4u);
// Match with case insensitive basic Latin and case sensitive Swedish ö
// FIXME: If LibRegex is changed to support case insensitive matches of Unicode characters
// This test should be updated and changed to match 'PRÖV'.
result = execute(database, "SELECT TextColumn FROM TestSchema.TestTable WHERE TextColumn REGEXP 'PRöV.*';");
EXPECT(result->error().code == SQL::SQLErrorCode::NoError);
EXPECT(result->has_results());
EXPECT_EQ(result->results().size(), 1u);
}
TEST_CASE(handle_regexp_errors)
{
ScopeGuard guard([]() { unlink(db_name); });
auto database = SQL::Database::construct(db_name);
EXPECT(!database->open().is_error());
create_table(database);
auto result = execute(database,
"INSERT INTO TestSchema.TestTable ( TextColumn, IntColumn ) VALUES "
"( 'Test', 0 );");
EXPECT(result->error().code == SQL::SQLErrorCode::NoError);
EXPECT(result->inserted() == 1);
// Malformed regex, unmatched square bracket
result = execute(database, "SELECT TextColumn FROM TestSchema.TestTable WHERE TextColumn REGEXP 'Test\\+[0-9.*';");
EXPECT(result->error().code != SQL::SQLErrorCode::NoError);
EXPECT(!result->has_results());
}
TEST_CASE(select_with_order_two_columns)
{
ScopeGuard guard([]() { unlink(db_name); });

View file

@ -242,9 +242,26 @@ Value MatchExpression::evaluate(ExecutionContext& context) const
auto result = regex.match(lhs_value.to_string(), PosixFlags::Insensitive | PosixFlags::Unicode);
return Value(invert_expression() ? !result.success : result.success);
}
case MatchOperator::Regexp: {
Value lhs_value = lhs()->evaluate(context);
Value rhs_value = rhs()->evaluate(context);
auto regex = Regex<PosixExtended>(rhs_value.to_string());
auto err = regex.parser_result.error;
if (err != regex::Error::NoError) {
StringBuilder builder;
builder.append("Regular expression: ");
builder.append(get_error_string(err));
context.result->set_error(SQLErrorCode::SyntaxError, builder.build());
return Value(false);
}
auto result = regex.match(lhs_value.to_string(), PosixFlags::Insensitive | PosixFlags::Unicode);
return Value(invert_expression() ? !result.success : result.success);
}
case MatchOperator::Glob:
case MatchOperator::Match:
case MatchOperator::Regexp:
default:
VERIFY_NOT_REACHED();
}