Browse Source

use workspace fst in tantivy

Mikkel Denker 1 year ago
parent
commit
3e5875839b

+ 1 - 12
Cargo.lock

@@ -5211,6 +5211,7 @@ dependencies = [
  "fastdivide",
  "fastdivide",
  "fnv",
  "fnv",
  "fs4",
  "fs4",
+ "fst",
  "futures",
  "futures",
  "futures-util",
  "futures-util",
  "htmlescape",
  "htmlescape",
@@ -5242,7 +5243,6 @@ dependencies = [
  "serde",
  "serde",
  "serde_json",
  "serde_json",
  "smallvec",
  "smallvec",
- "tantivy-fst",
  "tempfile",
  "tempfile",
  "thiserror",
  "thiserror",
  "time",
  "time",
@@ -5252,17 +5252,6 @@ dependencies = [
  "zstd 0.13.1",
  "zstd 0.13.1",
 ]
 ]
 
 
-[[package]]
-name = "tantivy-fst"
-version = "0.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d60769b80ad7953d8a7b2c70cdfe722bbcdcac6bccc8ac934c40c034d866fc18"
-dependencies = [
- "byteorder 1.5.0",
- "regex-syntax 0.8.4",
- "utf8-ranges",
-]
-
 [[package]]
 [[package]]
 name = "tap"
 name = "tap"
 version = "1.0.1"
 version = "1.0.1"

+ 1 - 31
assets/licenses.html

@@ -45,7 +45,7 @@
         <h2>Overview of licenses:</h2>
         <h2>Overview of licenses:</h2>
         <ul class="licenses-overview">
         <ul class="licenses-overview">
             <li><a href="#Apache-2.0">Apache License 2.0</a> (419)</li>
             <li><a href="#Apache-2.0">Apache License 2.0</a> (419)</li>
-            <li><a href="#MIT">MIT License</a> (188)</li>
+            <li><a href="#MIT">MIT License</a> (187)</li>
             <li><a href="#BSD-3-Clause">BSD 3-Clause &quot;New&quot; or &quot;Revised&quot; License</a> (9)</li>
             <li><a href="#BSD-3-Clause">BSD 3-Clause &quot;New&quot; or &quot;Revised&quot; License</a> (9)</li>
             <li><a href="#MPL-2.0">Mozilla Public License 2.0</a> (8)</li>
             <li><a href="#MPL-2.0">Mozilla Public License 2.0</a> (8)</li>
             <li><a href="#AGPL-3.0">GNU Affero General Public License v3.0</a> (5)</li>
             <li><a href="#AGPL-3.0">GNU Affero General Public License v3.0</a> (5)</li>
@@ -14763,36 +14763,6 @@ furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in
 The above copyright notice and this permission notice shall be included in
 all copies or substantial portions of the Software.
 all copies or substantial portions of the Software.
 
 
-THE SOFTWARE IS PROVIDED &quot;AS IS&quot;, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-</pre>
-            </li>
-            <li class="license">
-                <h3 id="MIT">MIT License</h3>
-                <h4>Used by:</h4>
-                <ul class="license-used-by">
-                    <li><a href=" https://github.com/quickwit-inc/fst ">tantivy-fst 0.5.0</a></li>
-                </ul>
-                <pre class="license-text">The MIT License (MIT)
-
-Copyright (c) 2015 Andrew Gallant
-Copyright (c) 2019 Paul Masurel
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the &quot;Software&quot;), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
 THE SOFTWARE IS PROVIDED &quot;AS IS&quot;, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 THE SOFTWARE IS PROVIDED &quot;AS IS&quot;, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ 1 - 1
crates/tantivy/Cargo.toml

@@ -30,6 +30,7 @@ fail = {version = "0.5.0", optional = true}
 fastdivide = "0.4.0"
 fastdivide = "0.4.0"
 fnv = "1.0.7"
 fnv = "1.0.7"
 fs4 = {version = "0.8.0", optional = true}
 fs4 = {version = "0.8.0", optional = true}
+fst.workspace = true
 futures-util = {version = "0.3.28", optional = true}
 futures-util = {version = "0.3.28", optional = true}
 htmlescape = "0.3.1"
 htmlescape = "0.3.1"
 itertools = "0.13.0"
 itertools = "0.13.0"
@@ -54,7 +55,6 @@ rustc-hash = "1.1.0"
 serde = {version = "1.0.136", features = ["derive"]}
 serde = {version = "1.0.136", features = ["derive"]}
 serde_json = "1.0.79"
 serde_json = "1.0.79"
 smallvec = "1.8.0"
 smallvec = "1.8.0"
-tantivy-fst = "0.5"
 tempfile = {version = "3.3.0", optional = true}
 tempfile = {version = "3.3.0", optional = true}
 thiserror = "1.0.30"
 thiserror = "1.0.30"
 time = {version = "0.3.10", features = ["serde-well-known"]}
 time = {version = "0.3.10", features = ["serde-well-known"]}

+ 2 - 2
crates/tantivy/src/query/automaton_weight.rs

@@ -2,7 +2,7 @@ use std::io;
 use std::sync::Arc;
 use std::sync::Arc;
 
 
 use crate::common::BitSet;
 use crate::common::BitSet;
-use tantivy_fst::Automaton;
+use fst::Automaton;
 
 
 use super::phrase_prefix_query::prefix_end;
 use super::phrase_prefix_query::prefix_end;
 use crate::index::SegmentReader;
 use crate::index::SegmentReader;
@@ -111,7 +111,7 @@ where
 
 
 #[cfg(test)]
 #[cfg(test)]
 mod tests {
 mod tests {
-    use tantivy_fst::Automaton;
+    use fst::Automaton;
 
 
     use super::AutomatonWeight;
     use super::AutomatonWeight;
     use crate::docset::TERMINATED;
     use crate::docset::TERMINATED;

+ 1 - 1
crates/tantivy/src/query/fuzzy_query.rs

@@ -1,6 +1,6 @@
+use fst::Automaton;
 use levenshtein_automata::{Distance, LevenshteinAutomatonBuilder, DFA};
 use levenshtein_automata::{Distance, LevenshteinAutomatonBuilder, DFA};
 use once_cell::sync::OnceCell;
 use once_cell::sync::OnceCell;
-use tantivy_fst::Automaton;
 
 
 use crate::query::{AutomatonWeight, EnableScoring, Query, Weight};
 use crate::query::{AutomatonWeight, EnableScoring, Query, Weight};
 use crate::schema::{Term, Type};
 use crate::schema::{Term, Type};

+ 0 - 2
crates/tantivy/src/query/mod.rs

@@ -18,7 +18,6 @@ mod phrase_query;
 mod query;
 mod query;
 mod query_parser;
 mod query_parser;
 mod range_query;
 mod range_query;
-mod regex_query;
 mod reqopt_scorer;
 mod reqopt_scorer;
 mod scorer;
 mod scorer;
 mod set_query;
 mod set_query;
@@ -55,7 +54,6 @@ pub use self::phrase_query::PhraseQuery;
 pub use self::query::{EnableScoring, Query, QueryClone};
 pub use self::query::{EnableScoring, Query, QueryClone};
 pub use self::query_parser::{QueryParser, QueryParserError};
 pub use self::query_parser::{QueryParser, QueryParserError};
 pub use self::range_query::{FastFieldRangeWeight, IPFastFieldRangeWeight, RangeQuery};
 pub use self::range_query::{FastFieldRangeWeight, IPFastFieldRangeWeight, RangeQuery};
-pub use self::regex_query::RegexQuery;
 pub use self::reqopt_scorer::RequiredOptionalScorer;
 pub use self::reqopt_scorer::RequiredOptionalScorer;
 pub use self::score_combiner::{
 pub use self::score_combiner::{
     DisjunctionMaxCombiner, ScoreCombiner, SumCombiner, SumWithCoordsCombiner,
     DisjunctionMaxCombiner, ScoreCombiner, SumCombiner, SumWithCoordsCombiner,

+ 0 - 191
crates/tantivy/src/query/regex_query.rs

@@ -1,191 +0,0 @@
-use std::clone::Clone;
-use std::sync::Arc;
-
-use tantivy_fst::Regex;
-
-use crate::error::TantivyError;
-use crate::query::{AutomatonWeight, EnableScoring, Query, Weight};
-use crate::schema::Field;
-
-/// A Regex Query matches all of the documents
-/// containing a specific term that matches
-/// a regex pattern.
-///
-/// Wildcard queries (e.g. ho*se) can be achieved
-/// by converting them to their regex counterparts.
-///
-/// ```rust
-/// use tantivy::collector::Count;
-/// use tantivy::query::RegexQuery;
-/// use tantivy::schema::{Schema, TEXT};
-/// use tantivy::{doc, Index, IndexWriter, Term};
-///
-/// # fn test() -> tantivy::Result<()> {
-/// let mut schema_builder = Schema::builder();
-/// let title = schema_builder.add_text_field("title", TEXT);
-/// let schema = schema_builder.build();
-/// let index = Index::create_in_ram(schema);
-/// {
-///     let mut index_writer: IndexWriter = index.writer(15_000_000)?;
-///     index_writer.add_document(doc!(
-///         title => "The Name of the Wind",
-///     ))?;
-///     index_writer.add_document(doc!(
-///         title => "The Diary of Muadib",
-///     ))?;
-///     index_writer.add_document(doc!(
-///         title => "A Dairy Cow",
-///     ))?;
-///     index_writer.add_document(doc!(
-///         title => "The Diary of a Young Girl",
-///     ))?;
-///     index_writer.commit()?;
-/// }
-///
-/// let reader = index.reader()?;
-/// let searcher = reader.searcher();
-///
-/// let term = Term::from_field_text(title, "Diary");
-/// let query = RegexQuery::from_pattern("d[ai]{2}ry", title)?;
-/// let count = searcher.search(&query, &Count)?;
-/// assert_eq!(count, 3);
-/// Ok(())
-/// # }
-/// # assert!(test().is_ok());
-/// ```
-#[derive(Debug, Clone)]
-pub struct RegexQuery {
-    regex: Arc<Regex>,
-    field: Field,
-}
-
-impl RegexQuery {
-    /// Creates a new RegexQuery from a given pattern
-    pub fn from_pattern(regex_pattern: &str, field: Field) -> crate::Result<Self> {
-        let regex = Regex::new(regex_pattern)
-            .map_err(|err| TantivyError::InvalidArgument(format!("RegexQueryError: {err}")))?;
-        Ok(RegexQuery::from_regex(regex, field))
-    }
-
-    /// Creates a new RegexQuery from a fully built Regex
-    pub fn from_regex<T: Into<Arc<Regex>>>(regex: T, field: Field) -> Self {
-        RegexQuery {
-            regex: regex.into(),
-            field,
-        }
-    }
-
-    fn specialized_weight(&self) -> AutomatonWeight<Regex> {
-        AutomatonWeight::new(self.field, self.regex.clone())
-    }
-}
-
-impl Query for RegexQuery {
-    fn weight(&self, _enabled_scoring: EnableScoring<'_>) -> crate::Result<Box<dyn Weight>> {
-        Ok(Box::new(self.specialized_weight()))
-    }
-}
-
-#[cfg(test)]
-mod test {
-    use std::sync::Arc;
-
-    use tantivy_fst::Regex;
-
-    use super::RegexQuery;
-    use crate::collector::TopDocs;
-    use crate::schema::{Field, Schema, TEXT};
-    use crate::{assert_nearly_equals, Index, IndexReader, IndexWriter};
-
-    fn build_test_index() -> crate::Result<(IndexReader, Field)> {
-        let mut schema_builder = Schema::builder();
-        let country_field = schema_builder.add_text_field("country", TEXT);
-        let schema = schema_builder.build();
-        let index = Index::create_in_ram(schema);
-        {
-            let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
-            index_writer.add_document(doc!(
-                country_field => "japan",
-            ))?;
-            index_writer.add_document(doc!(
-                country_field => "korea",
-            ))?;
-            index_writer.commit()?;
-        }
-        let reader = index.reader()?;
-
-        Ok((reader, country_field))
-    }
-
-    fn verify_regex_query(
-        query_matching_one: RegexQuery,
-        query_matching_zero: RegexQuery,
-        reader: IndexReader,
-    ) {
-        let searcher = reader.searcher();
-        {
-            let scored_docs = searcher
-                .search(&query_matching_one, &TopDocs::with_limit(2))
-                .unwrap();
-            assert_eq!(scored_docs.len(), 1, "Expected only 1 document");
-            let (score, _) = scored_docs[0];
-            assert_nearly_equals!(1.0, score);
-        }
-        let top_docs = searcher
-            .search(&query_matching_zero, &TopDocs::with_limit(2))
-            .unwrap();
-        assert!(top_docs.is_empty(), "Expected ZERO document");
-    }
-
-    #[test]
-    pub fn test_regex_query() -> crate::Result<()> {
-        let (reader, field) = build_test_index()?;
-
-        let matching_one = RegexQuery::from_pattern("jap[ao]n", field)?;
-        let matching_zero = RegexQuery::from_pattern("jap[A-Z]n", field)?;
-        verify_regex_query(matching_one, matching_zero, reader);
-        Ok(())
-    }
-
-    #[test]
-    pub fn test_construct_from_regex() -> crate::Result<()> {
-        let (reader, field) = build_test_index()?;
-
-        let matching_one = RegexQuery::from_regex(Regex::new("jap[ao]n").unwrap(), field);
-        let matching_zero = RegexQuery::from_regex(Regex::new("jap[A-Z]n").unwrap(), field);
-
-        verify_regex_query(matching_one, matching_zero, reader);
-        Ok(())
-    }
-
-    #[test]
-    pub fn test_construct_from_reused_regex() -> crate::Result<()> {
-        let r1 = Arc::new(Regex::new("jap[ao]n").unwrap());
-        let r2 = Arc::new(Regex::new("jap[A-Z]n").unwrap());
-
-        let (reader, field) = build_test_index()?;
-
-        let matching_one = RegexQuery::from_regex(r1.clone(), field);
-        let matching_zero = RegexQuery::from_regex(r2.clone(), field);
-
-        verify_regex_query(matching_one, matching_zero, reader.clone());
-
-        let matching_one = RegexQuery::from_regex(r1, field);
-        let matching_zero = RegexQuery::from_regex(r2, field);
-
-        verify_regex_query(matching_one, matching_zero, reader);
-        Ok(())
-    }
-
-    #[test]
-    pub fn test_pattern_error() {
-        let (_reader, field) = build_test_index().unwrap();
-
-        match RegexQuery::from_pattern(r"(foo", field) {
-            Err(crate::TantivyError::InvalidArgument(msg)) => {
-                assert!(msg.contains("error: unclosed group"))
-            }
-            res => panic!("unexpected result: {res:?}"),
-        }
-    }
-}

+ 2 - 2
crates/tantivy/src/query/set_query.rs

@@ -1,7 +1,7 @@
 use std::collections::HashMap;
 use std::collections::HashMap;
 
 
-use tantivy_fst::raw::CompiledAddr;
-use tantivy_fst::{Automaton, Map};
+use fst::raw::CompiledAddr;
+use fst::{Automaton, Map};
 
 
 use crate::query::score_combiner::DoNothingCombiner;
 use crate::query::score_combiner::DoNothingCombiner;
 use crate::query::{AutomatonWeight, BooleanWeight, EnableScoring, Occur, Query, Weight};
 use crate::query::{AutomatonWeight, BooleanWeight, EnableScoring, Occur, Query, Weight};

+ 2 - 2
crates/tantivy/src/sstable/dictionary.rs

@@ -6,8 +6,8 @@ use std::sync::Arc;
 
 
 use crate::common::file_slice::FileSlice;
 use crate::common::file_slice::FileSlice;
 use crate::common::{BinarySerializable, OwnedBytes};
 use crate::common::{BinarySerializable, OwnedBytes};
-use tantivy_fst::automaton::AlwaysMatch;
-use tantivy_fst::Automaton;
+use fst::automaton::AlwaysMatch;
+use fst::Automaton;
 
 
 use super::sstable_index_v3::SSTableIndexV3Empty;
 use super::sstable_index_v3::SSTableIndexV3Empty;
 use super::streamer::{Streamer, StreamerBuilder};
 use super::streamer::{Streamer, StreamerBuilder};

+ 5 - 5
crates/tantivy/src/sstable/sstable_index_v3.rs

@@ -4,8 +4,8 @@ use std::sync::Arc;
 
 
 use crate::bitpacker::{compute_num_bits, BitPacker};
 use crate::bitpacker::{compute_num_bits, BitPacker};
 use crate::common::{BinarySerializable, FixedSize, OwnedBytes};
 use crate::common::{BinarySerializable, FixedSize, OwnedBytes};
-use tantivy_fst::raw::Fst;
-use tantivy_fst::{IntoStreamer, Map, MapBuilder, Streamer};
+use fst::raw::Fst;
+use fst::{IntoStreamer, Map, MapBuilder, Streamer};
 
 
 use super::{common_prefix_len, SSTableDataCorruption, TermOrdinal};
 use super::{common_prefix_len, SSTableDataCorruption, TermOrdinal};
 
 
@@ -303,10 +303,10 @@ impl SSTableIndexBuilder {
     }
     }
 }
 }
 
 
-fn fst_error_to_io_error(error: tantivy_fst::Error) -> io::Error {
+fn fst_error_to_io_error(error: fst::Error) -> io::Error {
     match error {
     match error {
-        tantivy_fst::Error::Fst(fst_error) => io::Error::new(io::ErrorKind::Other, fst_error),
-        tantivy_fst::Error::Io(ioerror) => ioerror,
+        fst::Error::Fst(fst_error) => io::Error::new(io::ErrorKind::Other, fst_error),
+        fst::Error::Io(ioerror) => ioerror,
     }
     }
 }
 }
 
 

+ 2 - 17
crates/tantivy/src/sstable/streamer.rs

@@ -1,8 +1,8 @@
 use std::io;
 use std::io;
 use std::ops::Bound;
 use std::ops::Bound;
 
 
-use tantivy_fst::automaton::AlwaysMatch;
-use tantivy_fst::Automaton;
+use fst::automaton::AlwaysMatch;
+use fst::Automaton;
 
 
 use super::dictionary::Dictionary;
 use super::dictionary::Dictionary;
 use super::{DeltaReader, SSTable, TermOrdinal};
 use super::{DeltaReader, SSTable, TermOrdinal};
@@ -313,19 +313,4 @@ mod tests {
         assert!(!streamer.advance());
         assert!(!streamer.advance());
         Ok(())
         Ok(())
     }
     }
-
-    #[test]
-    fn test_sstable_search() -> io::Result<()> {
-        let term_dict = create_test_dictionary()?;
-        let ptn = tantivy_fst::Regex::new("ab.*t.*").unwrap();
-        let mut term_streamer = term_dict.search(ptn).into_stream()?;
-        assert!(term_streamer.advance());
-        assert_eq!(term_streamer.key(), b"abalation");
-        assert_eq!(term_streamer.value(), &1u64);
-        assert!(term_streamer.advance());
-        assert_eq!(term_streamer.key(), b"abalienate");
-        assert_eq!(term_streamer.value(), &2u64);
-        assert!(!term_streamer.advance());
-        Ok(())
-    }
 }
 }

+ 3 - 3
crates/tantivy/src/termdict/fst_termdict/merger.rs

@@ -1,6 +1,6 @@
-use tantivy_fst::map::{OpBuilder, Union};
-use tantivy_fst::raw::IndexedValue;
-use tantivy_fst::Streamer;
+use fst::map::{OpBuilder, Union};
+use fst::raw::IndexedValue;
+use fst::Streamer;
 
 
 use super::termdict::TermDictionary;
 use super::termdict::TermDictionary;
 use crate::postings::TermInfo;
 use crate::postings::TermInfo;

+ 11 - 13
crates/tantivy/src/termdict/fst_termdict/streamer.rs

@@ -1,8 +1,8 @@
 use std::io;
 use std::io;
 
 
-use tantivy_fst::automaton::AlwaysMatch;
-use tantivy_fst::map::{Stream, StreamBuilder};
-use tantivy_fst::{Automaton, IntoStreamer, Streamer};
+use fst::automaton::AlwaysMatch;
+use fst::map::{Stream, StreamBuilder};
+use fst::{Automaton, IntoStreamer, Streamer};
 
 
 use super::TermDictionary;
 use super::TermDictionary;
 use crate::postings::TermInfo;
 use crate::postings::TermInfo;
@@ -11,14 +11,16 @@ use crate::termdict::TermOrdinal;
 /// `TermStreamerBuilder` is a helper object used to define
 /// `TermStreamerBuilder` is a helper object used to define
 /// a range of terms that should be streamed.
 /// a range of terms that should be streamed.
 pub struct TermStreamerBuilder<'a, A = AlwaysMatch>
 pub struct TermStreamerBuilder<'a, A = AlwaysMatch>
-where A: Automaton
+where
+    A: Automaton,
 {
 {
     fst_map: &'a TermDictionary,
     fst_map: &'a TermDictionary,
     stream_builder: StreamBuilder<'a, A>,
     stream_builder: StreamBuilder<'a, A>,
 }
 }
 
 
 impl<'a, A> TermStreamerBuilder<'a, A>
 impl<'a, A> TermStreamerBuilder<'a, A>
-where A: Automaton
+where
+    A: Automaton,
 {
 {
     pub(crate) fn new(fst_map: &'a TermDictionary, stream_builder: StreamBuilder<'a, A>) -> Self {
     pub(crate) fn new(fst_map: &'a TermDictionary, stream_builder: StreamBuilder<'a, A>) -> Self {
         TermStreamerBuilder {
         TermStreamerBuilder {
@@ -51,12 +53,6 @@ where A: Automaton
         self
         self
     }
     }
 
 
-    /// Iterate over the range backwards.
-    pub fn backward(mut self) -> Self {
-        self.stream_builder = self.stream_builder.backward();
-        self
-    }
-
     /// Creates the stream corresponding to the range
     /// Creates the stream corresponding to the range
     /// of terms defined using the `TermStreamerBuilder`.
     /// of terms defined using the `TermStreamerBuilder`.
     pub fn into_stream(self) -> io::Result<TermStreamer<'a, A>> {
     pub fn into_stream(self) -> io::Result<TermStreamer<'a, A>> {
@@ -73,7 +69,8 @@ where A: Automaton
 /// `TermStreamer` acts as a cursor over a range of terms of a segment.
 /// `TermStreamer` acts as a cursor over a range of terms of a segment.
 /// Terms are guaranteed to be sorted.
 /// Terms are guaranteed to be sorted.
 pub struct TermStreamer<'a, A = AlwaysMatch>
 pub struct TermStreamer<'a, A = AlwaysMatch>
-where A: Automaton
+where
+    A: Automaton,
 {
 {
     pub(crate) fst_map: &'a TermDictionary,
     pub(crate) fst_map: &'a TermDictionary,
     pub(crate) stream: Stream<'a, A>,
     pub(crate) stream: Stream<'a, A>,
@@ -83,7 +80,8 @@ where A: Automaton
 }
 }
 
 
 impl<'a, A> TermStreamer<'a, A>
 impl<'a, A> TermStreamer<'a, A>
-where A: Automaton
+where
+    A: Automaton,
 {
 {
     /// Advance position the stream on the next item.
     /// Advance position the stream on the next item.
     /// Before the first call to `.advance()`, the stream
     /// Before the first call to `.advance()`, the stream

+ 8 - 8
crates/tantivy/src/termdict/fst_termdict/termdict.rs

@@ -1,9 +1,9 @@
 use std::io::{self, Write};
 use std::io::{self, Write};
 
 
 use crate::common::{BinarySerializable, CountingWriter};
 use crate::common::{BinarySerializable, CountingWriter};
+use fst::raw::Fst;
+use fst::Automaton;
 use once_cell::sync::Lazy;
 use once_cell::sync::Lazy;
-use tantivy_fst::raw::Fst;
-use tantivy_fst::Automaton;
 
 
 use super::term_info_store::{TermInfoStore, TermInfoStoreWriter};
 use super::term_info_store::{TermInfoStore, TermInfoStoreWriter};
 use super::{TermStreamer, TermStreamerBuilder};
 use super::{TermStreamer, TermStreamerBuilder};
@@ -11,7 +11,7 @@ use crate::directory::{FileSlice, OwnedBytes};
 use crate::postings::TermInfo;
 use crate::postings::TermInfo;
 use crate::termdict::TermOrdinal;
 use crate::termdict::TermOrdinal;
 
 
-fn convert_fst_error(e: tantivy_fst::Error) -> io::Error {
+fn convert_fst_error(e: fst::Error) -> io::Error {
     io::Error::new(io::ErrorKind::Other, e)
     io::Error::new(io::ErrorKind::Other, e)
 }
 }
 
 
@@ -21,7 +21,7 @@ const FST_VERSION: u32 = 1;
 ///
 ///
 /// Inserting must be done in the order of the `keys`.
 /// Inserting must be done in the order of the `keys`.
 pub struct TermDictionaryBuilder<W> {
 pub struct TermDictionaryBuilder<W> {
-    fst_builder: tantivy_fst::MapBuilder<W>,
+    fst_builder: fst::MapBuilder<W>,
     term_info_store_writer: TermInfoStoreWriter,
     term_info_store_writer: TermInfoStoreWriter,
     term_ord: u64,
     term_ord: u64,
 }
 }
@@ -32,7 +32,7 @@ where
 {
 {
     /// Creates a new `TermDictionaryBuilder`
     /// Creates a new `TermDictionaryBuilder`
     pub fn create(w: W) -> io::Result<Self> {
     pub fn create(w: W) -> io::Result<Self> {
-        let fst_builder = tantivy_fst::MapBuilder::new(w).map_err(convert_fst_error)?;
+        let fst_builder = fst::MapBuilder::new(w).map_err(convert_fst_error)?;
         Ok(TermDictionaryBuilder {
         Ok(TermDictionaryBuilder {
             fst_builder,
             fst_builder,
             term_info_store_writer: TermInfoStoreWriter::new(),
             term_info_store_writer: TermInfoStoreWriter::new(),
@@ -89,7 +89,7 @@ where
     }
     }
 }
 }
 
 
-fn open_fst_index(fst_file: FileSlice) -> io::Result<tantivy_fst::Map<OwnedBytes>> {
+fn open_fst_index(fst_file: FileSlice) -> io::Result<fst::Map<OwnedBytes>> {
     let bytes = fst_file.read_bytes()?;
     let bytes = fst_file.read_bytes()?;
     let fst = Fst::new(bytes).map_err(|err| {
     let fst = Fst::new(bytes).map_err(|err| {
         io::Error::new(
         io::Error::new(
@@ -97,7 +97,7 @@ fn open_fst_index(fst_file: FileSlice) -> io::Result<tantivy_fst::Map<OwnedBytes
             format!("Fst data is corrupted: {err:?}"),
             format!("Fst data is corrupted: {err:?}"),
         )
         )
     })?;
     })?;
-    Ok(tantivy_fst::Map::from(fst))
+    Ok(fst::Map::from(fst))
 }
 }
 
 
 static EMPTY_TERM_DICT_FILE: Lazy<FileSlice> = Lazy::new(|| {
 static EMPTY_TERM_DICT_FILE: Lazy<FileSlice> = Lazy::new(|| {
@@ -115,7 +115,7 @@ static EMPTY_TERM_DICT_FILE: Lazy<FileSlice> = Lazy::new(|| {
 /// respective `TermOrdinal`. The `TermInfoStore` then makes it
 /// respective `TermOrdinal`. The `TermInfoStore` then makes it
 /// possible to fetch the associated `TermInfo`.
 /// possible to fetch the associated `TermInfo`.
 pub struct TermDictionary {
 pub struct TermDictionary {
-    fst_index: tantivy_fst::Map<OwnedBytes>,
+    fst_index: fst::Map<OwnedBytes>,
     term_info_store: TermInfoStore,
     term_info_store: TermInfoStore,
 }
 }
 
 

+ 1 - 1
crates/tantivy/src/termdict/mod.rs

@@ -31,7 +31,7 @@ use std::io;
 
 
 use crate::common::file_slice::FileSlice;
 use crate::common::file_slice::FileSlice;
 use crate::common::BinarySerializable;
 use crate::common::BinarySerializable;
-use tantivy_fst::Automaton;
+use fst::Automaton;
 
 
 use self::termdict::{
 use self::termdict::{
     TermDictionary as InnerTermDict, TermDictionaryBuilder as InnerTermDictBuilder,
     TermDictionary as InnerTermDict, TermDictionaryBuilder as InnerTermDictBuilder,

+ 0 - 62
crates/tantivy/src/termdict/tests.rs

@@ -302,68 +302,6 @@ fn test_stream_range_boundaries_forward() -> crate::Result<()> {
     Ok(())
     Ok(())
 }
 }
 
 
-#[cfg(not(feature = "quickwit"))]
-#[test]
-fn test_stream_range_boundaries_backward() -> crate::Result<()> {
-    let term_dictionary = stream_range_test_dict()?;
-    let value_list_backward = |mut streamer: TermStreamer<'_>| {
-        let mut res: Vec<u32> = vec![];
-        while let Some((_, v)) = streamer.next() {
-            res.push(v.doc_freq);
-        }
-        res.reverse();
-        res
-    };
-    {
-        let range = term_dictionary.range().backward().into_stream()?;
-        assert_eq!(
-            value_list_backward(range),
-            vec![0u32, 1u32, 2u32, 3u32, 4u32, 5u32, 6u32, 7u32, 8u32, 9u32]
-        );
-    }
-    {
-        let range = term_dictionary.range().ge([2u8]).backward().into_stream()?;
-        assert_eq!(
-            value_list_backward(range),
-            vec![2u32, 3u32, 4u32, 5u32, 6u32, 7u32, 8u32, 9u32]
-        );
-    }
-    {
-        let range = term_dictionary.range().gt([2u8]).backward().into_stream()?;
-        assert_eq!(
-            value_list_backward(range),
-            vec![3u32, 4u32, 5u32, 6u32, 7u32, 8u32, 9u32]
-        );
-    }
-    {
-        let range = term_dictionary.range().lt([6u8]).backward().into_stream()?;
-        assert_eq!(
-            value_list_backward(range),
-            vec![0u32, 1u32, 2u32, 3u32, 4u32, 5u32]
-        );
-    }
-    {
-        let range = term_dictionary.range().le([6u8]).backward().into_stream()?;
-        assert_eq!(
-            value_list_backward(range),
-            vec![0u32, 1u32, 2u32, 3u32, 4u32, 5u32, 6u32]
-        );
-    }
-    {
-        let range = term_dictionary
-            .range()
-            .ge([0u8])
-            .lt([5u8])
-            .backward()
-            .into_stream()?;
-        assert_eq!(
-            value_list_backward(range),
-            vec![0u32, 1u32, 2u32, 3u32, 4u32]
-        );
-    }
-    Ok(())
-}
-
 #[test]
 #[test]
 fn test_ord_to_term() -> crate::Result<()> {
 fn test_ord_to_term() -> crate::Result<()> {
     let termdict = stream_range_test_dict()?;
     let termdict = stream_range_test_dict()?;