Ver código fonte

[webgraph] filters in group_by queries

Mikkel Denker 9 meses atrás
pai
commit
48eebbef43
1 arquivos alterados com 74 adições e 13 exclusões
  1. 74 13
      crates/core/src/webgraph/query/group_by.rs

+ 74 - 13
crates/core/src/webgraph/query/group_by.rs

@@ -15,6 +15,7 @@
 // along with this program.  If not, see <https://www.gnu.org/licenses/>.
 // along with this program.  If not, see <https://www.gnu.org/licenses/>.
 
 
 use rustc_hash::{FxHashMap, FxHashSet};
 use rustc_hash::{FxHashMap, FxHashSet};
+use tantivy::query::{BooleanQuery, Occur};
 
 
 use crate::{
 use crate::{
     hyperloglog::HyperLogLog,
     hyperloglog::HyperLogLog,
@@ -26,7 +27,7 @@ use crate::{
 
 
 use super::{
 use super::{
     collector::{GroupExactCollector, GroupSketchCollector},
     collector::{GroupExactCollector, GroupSketchCollector},
-    raw, Query,
+    raw, AndFilter, Filter, FilterEnum, Query,
 };
 };
 
 
 #[derive(Debug, Clone, bincode::Encode, bincode::Decode)]
 #[derive(Debug, Clone, bincode::Encode, bincode::Decode)]
@@ -40,6 +41,7 @@ pub struct HostGroupSketchQuery {
     node: LinksDirection,
     node: LinksDirection,
     group: FieldEnum,
     group: FieldEnum,
     value: FieldEnum,
     value: FieldEnum,
+    filters: Vec<FilterEnum>,
 }
 }
 
 
 impl HostGroupSketchQuery {
 impl HostGroupSketchQuery {
@@ -52,6 +54,7 @@ impl HostGroupSketchQuery {
             node,
             node,
             group: group.into(),
             group: group.into(),
             value: value.into(),
             value: value.into(),
+            filters: Vec::new(),
         }
         }
     }
     }
 
 
@@ -66,29 +69,57 @@ impl HostGroupSketchQuery {
     ) -> Self {
     ) -> Self {
         Self::new(LinksDirection::From(node), group, value)
         Self::new(LinksDirection::From(node), group, value)
     }
     }
+
+    pub fn filter<F: Filter>(mut self, filter: F) -> Self {
+        self.filters.push(filter.into());
+        self
+    }
+
+    fn filter_as_and(&self) -> Option<AndFilter> {
+        if self.filters.is_empty() {
+            None
+        } else {
+            let mut filter = AndFilter::new();
+
+            for f in self.filters.clone() {
+                filter = filter.and(f);
+            }
+
+            Some(filter)
+        }
+    }
 }
 }
 
 
 impl Query for HostGroupSketchQuery {
 impl Query for HostGroupSketchQuery {
     type Collector = GroupSketchCollector;
     type Collector = GroupSketchCollector;
-    type TantivyQuery = raw::HostLinksQuery;
+    type TantivyQuery = Box<dyn tantivy::query::Query>;
     type IntermediateOutput = FxHashMap<u64, HyperLogLog<4069>>;
     type IntermediateOutput = FxHashMap<u64, HyperLogLog<4069>>;
     type Output = FxHashMap<u64, HyperLogLog<4069>>;
     type Output = FxHashMap<u64, HyperLogLog<4069>>;
 
 
     fn tantivy_query(&self, searcher: &crate::webgraph::searcher::Searcher) -> Self::TantivyQuery {
     fn tantivy_query(&self, searcher: &crate::webgraph::searcher::Searcher) -> Self::TantivyQuery {
-        match self.node {
-            LinksDirection::From(node) => raw::HostLinksQuery::new(
+        let mut raw: Self::TantivyQuery = match self.node {
+            LinksDirection::From(node) => Box::new(raw::HostLinksQuery::new(
                 node,
                 node,
                 FromHostId,
                 FromHostId,
                 ToHostId,
                 ToHostId,
                 searcher.warmed_column_fields().clone(),
                 searcher.warmed_column_fields().clone(),
-            ),
-            LinksDirection::To(node) => raw::HostLinksQuery::new(
+            )),
+            LinksDirection::To(node) => Box::new(raw::HostLinksQuery::new(
                 node,
                 node,
                 ToHostId,
                 ToHostId,
                 FromHostId,
                 FromHostId,
                 searcher.warmed_column_fields().clone(),
                 searcher.warmed_column_fields().clone(),
-            ),
+            )),
+        };
+
+        if let Some(filter) = self.filter_as_and().and_then(|f| f.inverted_index_filter()) {
+            let filter = filter.query(searcher);
+            let mut queries = vec![(Occur::Must, raw)];
+            queries.extend(filter);
+            raw = Box::new(BooleanQuery::new(queries));
         }
         }
+
+        raw
     }
     }
 
 
     fn collector(&self, searcher: &crate::webgraph::searcher::Searcher) -> Self::Collector {
     fn collector(&self, searcher: &crate::webgraph::searcher::Searcher) -> Self::Collector {
@@ -126,6 +157,7 @@ pub struct HostGroupQuery {
     node: LinksDirection,
     node: LinksDirection,
     group: FieldEnum,
     group: FieldEnum,
     value: FieldEnum,
     value: FieldEnum,
+    filters: Vec<FilterEnum>,
 }
 }
 
 
 impl HostGroupQuery {
 impl HostGroupQuery {
@@ -138,6 +170,7 @@ impl HostGroupQuery {
             node,
             node,
             group: group.into(),
             group: group.into(),
             value: value.into(),
             value: value.into(),
+            filters: Vec::new(),
         }
         }
     }
     }
 
 
@@ -152,29 +185,57 @@ impl HostGroupQuery {
     ) -> Self {
     ) -> Self {
         Self::new(LinksDirection::From(node), group, value)
         Self::new(LinksDirection::From(node), group, value)
     }
     }
+
+    pub fn filter<F: Filter>(mut self, filter: F) -> Self {
+        self.filters.push(filter.into());
+        self
+    }
+
+    fn filter_as_and(&self) -> Option<AndFilter> {
+        if self.filters.is_empty() {
+            None
+        } else {
+            let mut filter = AndFilter::new();
+
+            for f in self.filters.clone() {
+                filter = filter.and(f);
+            }
+
+            Some(filter)
+        }
+    }
 }
 }
 
 
 impl Query for HostGroupQuery {
 impl Query for HostGroupQuery {
     type Collector = GroupExactCollector;
     type Collector = GroupExactCollector;
-    type TantivyQuery = raw::HostLinksQuery;
+    type TantivyQuery = Box<dyn tantivy::query::Query>;
     type IntermediateOutput = FxHashMap<u64, FxHashSet<u64>>;
     type IntermediateOutput = FxHashMap<u64, FxHashSet<u64>>;
     type Output = FxHashMap<u64, FxHashSet<u64>>;
     type Output = FxHashMap<u64, FxHashSet<u64>>;
 
 
     fn tantivy_query(&self, searcher: &crate::webgraph::searcher::Searcher) -> Self::TantivyQuery {
     fn tantivy_query(&self, searcher: &crate::webgraph::searcher::Searcher) -> Self::TantivyQuery {
-        match self.node {
-            LinksDirection::From(node) => raw::HostLinksQuery::new(
+        let mut raw: Self::TantivyQuery = match self.node {
+            LinksDirection::From(node) => Box::new(raw::HostLinksQuery::new(
                 node,
                 node,
                 FromHostId,
                 FromHostId,
                 ToHostId,
                 ToHostId,
                 searcher.warmed_column_fields().clone(),
                 searcher.warmed_column_fields().clone(),
-            ),
-            LinksDirection::To(node) => raw::HostLinksQuery::new(
+            )),
+            LinksDirection::To(node) => Box::new(raw::HostLinksQuery::new(
                 node,
                 node,
                 ToHostId,
                 ToHostId,
                 FromHostId,
                 FromHostId,
                 searcher.warmed_column_fields().clone(),
                 searcher.warmed_column_fields().clone(),
-            ),
+            )),
+        };
+
+        if let Some(filter) = self.filter_as_and().and_then(|f| f.inverted_index_filter()) {
+            let filter = filter.query(searcher);
+            let mut queries = vec![(Occur::Must, raw)];
+            queries.extend(filter);
+            raw = Box::new(BooleanQuery::new(queries));
         }
         }
+
+        raw
     }
     }
 
 
     fn collector(&self, searcher: &crate::webgraph::searcher::Searcher) -> Self::Collector {
     fn collector(&self, searcher: &crate::webgraph::searcher::Searcher) -> Self::Collector {