ソースを参照

LibSQL: Add simple REGEXP match

The implementation of LIKE uses regexes under the hood, and this
implementation of REGEXP takes the same approach. It employs
PosixExtended from LibRegex with case insensitive and Unicode flags
set. The implementation of LIKE is based on SQLlite specs, but SQLlite
does not offer directions for a built-in regex functionality, so this
one uses LibRegex.
mnlrsn 3 年 前
コミット
66216d3af6

+ 62 - 0
Tests/LibSQL/TestSqlStatementExecution.cpp

@@ -449,6 +449,68 @@ TEST_CASE(select_with_order)
     EXPECT_EQ(rows[4].row[0].to_string(), "Test_5");
 }
 
+TEST_CASE(select_with_regexp)
+{
+    ScopeGuard guard([]() { unlink(db_name); });
+    auto database = SQL::Database::construct(db_name);
+    EXPECT(!database->open().is_error());
+    create_table(database);
+    auto result = execute(database,
+        "INSERT INTO TestSchema.TestTable ( TextColumn, IntColumn ) VALUES "
+        "( 'Test+1', 42 ), "
+        "( 'Pröv+2', 43 ), "
+        "( 'Test(3)', 44 ), "
+        "( 'Test[4]', 45 ), "
+        "( 'Test+5', 46 ), "
+        "( 'Another-Test_6', 47 );");
+    EXPECT(result->error().code == SQL::SQLErrorCode::NoError);
+    EXPECT(result->inserted() == 6);
+
+    // Simple match
+    result = execute(database, "SELECT TextColumn FROM TestSchema.TestTable WHERE TextColumn REGEXP 'Test\\+1';");
+    EXPECT(result->error().code == SQL::SQLErrorCode::NoError);
+    EXPECT(result->has_results());
+    EXPECT_EQ(result->results().size(), 1u);
+
+    // Match all
+    result = execute(database, "SELECT TextColumn FROM TestSchema.TestTable WHERE TextColumn REGEXP '.*';");
+    EXPECT(result->error().code == SQL::SQLErrorCode::NoError);
+    EXPECT(result->has_results());
+    EXPECT_EQ(result->results().size(), 6u);
+
+    // Match with wildcards
+    result = execute(database, "SELECT TextColumn FROM TestSchema.TestTable WHERE TextColumn REGEXP '^Test.+';");
+    EXPECT(result->error().code == SQL::SQLErrorCode::NoError);
+    EXPECT(result->has_results());
+    EXPECT_EQ(result->results().size(), 4u);
+
+    // Match with case insensitive basic Latin and case sensitive Swedish ö
+    // FIXME: If LibRegex is changed to support case insensitive matches of Unicode characters
+    //        This test should be updated and changed to match 'PRÖV'.
+    result = execute(database, "SELECT TextColumn FROM TestSchema.TestTable WHERE TextColumn REGEXP 'PRöV.*';");
+    EXPECT(result->error().code == SQL::SQLErrorCode::NoError);
+    EXPECT(result->has_results());
+    EXPECT_EQ(result->results().size(), 1u);
+}
+
+TEST_CASE(handle_regexp_errors)
+{
+    ScopeGuard guard([]() { unlink(db_name); });
+    auto database = SQL::Database::construct(db_name);
+    EXPECT(!database->open().is_error());
+    create_table(database);
+    auto result = execute(database,
+        "INSERT INTO TestSchema.TestTable ( TextColumn, IntColumn ) VALUES "
+        "( 'Test', 0 );");
+    EXPECT(result->error().code == SQL::SQLErrorCode::NoError);
+    EXPECT(result->inserted() == 1);
+
+    // Malformed regex, unmatched square bracket
+    result = execute(database, "SELECT TextColumn FROM TestSchema.TestTable WHERE TextColumn REGEXP 'Test\\+[0-9.*';");
+    EXPECT(result->error().code != SQL::SQLErrorCode::NoError);
+    EXPECT(!result->has_results());
+}
+
 TEST_CASE(select_with_order_two_columns)
 {
     ScopeGuard guard([]() { unlink(db_name); });

+ 18 - 1
Userland/Libraries/LibSQL/AST/Expression.cpp

@@ -242,9 +242,26 @@ Value MatchExpression::evaluate(ExecutionContext& context) const
         auto result = regex.match(lhs_value.to_string(), PosixFlags::Insensitive | PosixFlags::Unicode);
         return Value(invert_expression() ? !result.success : result.success);
     }
+    case MatchOperator::Regexp: {
+        Value lhs_value = lhs()->evaluate(context);
+        Value rhs_value = rhs()->evaluate(context);
+
+        auto regex = Regex<PosixExtended>(rhs_value.to_string());
+        auto err = regex.parser_result.error;
+        if (err != regex::Error::NoError) {
+            StringBuilder builder;
+            builder.append("Regular expression: ");
+            builder.append(get_error_string(err));
+
+            context.result->set_error(SQLErrorCode::SyntaxError, builder.build());
+            return Value(false);
+        }
+
+        auto result = regex.match(lhs_value.to_string(), PosixFlags::Insensitive | PosixFlags::Unicode);
+        return Value(invert_expression() ? !result.success : result.success);
+    }
     case MatchOperator::Glob:
     case MatchOperator::Match:
-    case MatchOperator::Regexp:
     default:
         VERIFY_NOT_REACHED();
     }