From cb3348191b1672afbfc99939cce4bc6f2b63eb4a Mon Sep 17 00:00:00 2001 From: AnotherTest Date: Sat, 26 Dec 2020 13:52:44 +0330 Subject: [PATCH] AK: Add a prefix tree implementation `AK::Trie` can be keyed by any given hashable type, and can store any metadata (including nothing at all). Also adds a test. --- AK/Tests/CMakeLists.txt | 1 + AK/Tests/TestTrie.cpp | 83 ++++++++++++++ AK/Trie.h | 232 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 316 insertions(+) create mode 100644 AK/Tests/TestTrie.cpp create mode 100644 AK/Trie.h diff --git a/AK/Tests/CMakeLists.txt b/AK/Tests/CMakeLists.txt index 675e2a4b64f..aba78f94091 100644 --- a/AK/Tests/CMakeLists.txt +++ b/AK/Tests/CMakeLists.txt @@ -33,6 +33,7 @@ set(AK_TEST_SOURCES TestString.cpp TestStringUtils.cpp TestStringView.cpp + TestTrie.cpp TestTypedTransfer.cpp TestURL.cpp TestUtf8.cpp diff --git a/AK/Tests/TestTrie.cpp b/AK/Tests/TestTrie.cpp new file mode 100644 index 00000000000..cae1b5b63f0 --- /dev/null +++ b/AK/Tests/TestTrie.cpp @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2020, the SerenityOS developers. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include + +TEST_CASE(normal_behaviour) +{ + Trie dictionary('/', ""); + constexpr static const char* data[] { "test", "example", "foo", "foobar" }; + constexpr static const size_t total_chars = 18; // root (1), 'test' (4), 'example' (7), 'foo' (3), 'foobar' (3, "foo" already stored). + for (auto& entry : data) { + StringView view { entry }; + auto it = view.begin(); + dictionary.insert(it, view.end(), view, [](auto& parent, auto& it) -> Optional { return String::formatted("{}{}", parent.metadata_value(), *it); }); + } + + size_t i = 0; + for ([[maybe_unused]] auto& node : dictionary) + ++i; + EXPECT_EQ(i, total_chars); + + for (auto& entry : data) { + StringView view { entry }; + auto it = view.begin(); + auto& node = dictionary.traverse_until_last_accessible_node(it, view.end()); + EXPECT(it.is_end()); + EXPECT(node.metadata().has_value()); + EXPECT_EQ(view, node.metadata_value()); + } + + constexpr static const char* test_data_with_prefix_in_dict[] { "testx", "exampley", "fooa", "foobarb", "fox", "text" }; + for (auto& entry : test_data_with_prefix_in_dict) { + StringView view { entry }; + auto it = view.begin(); + auto& node = dictionary.traverse_until_last_accessible_node(it, view.end()); + EXPECT(!it.is_end()); + EXPECT(node.metadata().has_value()); + EXPECT(view.starts_with(node.metadata_value())); + } +} + +TEST_CASE(iterate) +{ + Trie bunch_of_numbers { 0 }; + Array input; + for (size_t i = 0; i < input.size(); ++i) + input[i] = i; + + // Iteration order is preorder (order between adjacent nodes is not defined, but parents come before children) + // in this case, the tree is linear. + size_t i = 0; + for (auto& node : bunch_of_numbers) { + EXPECT_EQ(input[i], node.value()); + ++i; + } +} + +TEST_MAIN(AllOf) diff --git a/AK/Trie.h b/AK/Trie.h new file mode 100644 index 00000000000..ae9f52806f9 --- /dev/null +++ b/AK/Trie.h @@ -0,0 +1,232 @@ +/* + * Copyright (c) 2020, the SerenityOS developers. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include +#include +#include + +namespace AK { + +namespace Detail { + +template +struct SubstituteIfVoid { + using Type = TypeA; +}; + +template +struct SubstituteIfVoid { + using Type = Default; +}; + +template +class Trie { + using BaseType = typename SubstituteIfVoid::Type; + + class ConstIterator { + + public: + static ConstIterator end() { return {}; } + + bool operator==(const ConstIterator& other) const { return m_current_node == other.m_current_node; } + + const BaseType& operator*() const { return static_cast(*m_current_node); } + const BaseType* operator->() const { return static_cast(m_current_node); } + void operator++() { skip_to_next(); } + + explicit ConstIterator(const Trie& node) + { + m_current_node = &node; + m_state.empend(false, node.m_children.begin(), node.m_children.end()); + } + + private: + void skip_to_next() + { + auto& current_state = m_state.last(); + if (current_state.did_generate_root) + ++current_state.it; + else + current_state.did_generate_root = true; + if (current_state.it == current_state.end) + return pop_and_get_next(); + + m_current_node = &*(*current_state.it).value; + m_state.empend(false, m_current_node->m_children.begin(), m_current_node->m_children.end()); + } + void pop_and_get_next() + { + m_state.take_last(); + if (m_state.is_empty()) { + m_current_node = nullptr; + return; + } + + skip_to_next(); + } + + ConstIterator() = default; + + struct State { + bool did_generate_root { false }; + typename HashMap, ValueTraits>::ConstIteratorType it; + typename HashMap, ValueTraits>::ConstIteratorType end; + }; + Vector m_state; + const Trie* m_current_node { nullptr }; + }; + +public: + using MetadataType = MetadataT; + + Trie(ValueType value, Optional metadata) + : m_value(move(value)) + , m_metadata(move(metadata)) + { + } + + template + BaseType& traverse_until_last_accessible_node(It& it, const It& end) + { + Trie* node = this; + for (; it < end; ++it) { + auto next_it = node->m_children.find(*it); + if (next_it == node->m_children.end()) + return static_cast(*node); + node = &*(*next_it).value; + } + return static_cast(*node); + } + + template + const BaseType& traverse_until_last_accessible_node(It& it, const It& end) const { return const_cast(this)->traverse_until_last_accessible_node(it, end); } + + template + BaseType& traverse_until_last_accessible_node(const It& begin, const It& end) + { + auto it = begin; + return const_cast(this)->traverse_until_last_accessible_node(it, end); + } + + template + const BaseType& traverse_until_last_accessible_node(const It& begin, const It& end) const + { + auto it = begin; + return const_cast(this)->traverse_until_last_accessible_node(it, end); + } + + Optional metadata() const requires(!IsSame::value) { return m_metadata; } + void set_metadata(MetadataType metadata) requires(!IsSame::value) { m_metadata = move(metadata); } + const MetadataType& metadata_value() const requires(!IsSame::value) { return m_metadata.value(); } + + const ValueType& value() const { return m_value; } + ValueType& value() { return m_value; } + + Trie& ensure_child(ValueType value, Optional metadata = {}) + { + auto it = m_children.find(value); + if (it == m_children.end()) { + auto node = make(value, move(metadata)); + auto& node_ref = *node; + m_children.set(move(value), move(node)); + return static_cast(node_ref); + } + + auto& node_ref = *it->value; + if (metadata.has_value()) + node_ref.m_metadata = move(metadata); + return static_cast(node_ref); + } + + template + BaseType& insert( + It& it, const It& end, MetadataType metadata, ProvideMetadataFunction provide_missing_metadata) requires(!IsSame::value) + { + Trie* last_root_node = &traverse_until_last_accessible_node(it, end); + for (; it != end; ++it) + last_root_node = static_cast(&last_root_node->ensure_child(*it, provide_missing_metadata(static_cast(*last_root_node), it))); + last_root_node->set_metadata(move(metadata)); + return static_cast(*last_root_node); + } + + template + BaseType& insert(It& it, const It& end) requires(IsSame::value) + { + Trie* last_root_node = &traverse_until_last_accessible_node(it, end); + for (; it != end; ++it) + last_root_node = static_cast(&last_root_node->ensure_child(*it, {})); + return static_cast(*last_root_node); + } + + ConstIterator begin() const { return ConstIterator(*this); } + ConstIterator end() const { return ConstIterator::end(); } + + bool is_empty() const { return m_children.is_empty(); } + void clear() { m_children.clear(); } + + BaseType deep_copy() + { + Trie root(m_value, m_metadata); + for (auto& it : m_children) + root.m_children.set(it.key, make(it.value->deep_copy())); + return static_cast(move(root)); + } + +private: + ValueType m_value; + Optional m_metadata; + HashMap, ValueTraits> m_children; +}; + +template +class Trie : public Trie { + using Trie::Trie; +}; + +} + +template, typename BaseT = void> +class Trie : public Detail::Trie, ValueType, MetadataT, ValueTraits> { +public: + using DetailTrie = Detail::Trie, ValueType, MetadataT, ValueTraits>; + using MetadataType = typename DetailTrie::MetadataType; + + Trie(ValueType value, MetadataType metadata) requires(!IsSame::value && !IsSame::value) + : DetailTrie(move(value), move(metadata)) + { + } + + explicit Trie(ValueType value) + : DetailTrie(move(value), Optional {}) + { + } +}; + +} + +using AK::Trie;