Explorar el Código

LibPublicSuffix: Add Library and Generators

Cameron Youell hace 2 años
padre
commit
8fcf42f684

+ 1 - 0
Meta/CMake/common_options.cmake

@@ -17,6 +17,7 @@ serenity_option(ENABLE_COMPILETIME_HEADER_CHECK OFF CACHE BOOL "Enable compileti
 
 serenity_option(ENABLE_TIME_ZONE_DATABASE_DOWNLOAD ON CACHE BOOL "Enable download of the IANA Time Zone Database at build time")
 serenity_option(ENABLE_UNICODE_DATABASE_DOWNLOAD ON CACHE BOOL "Enable download of Unicode UCD and CLDR files at build time")
+serenity_option(ENABLE_PUBLIC_SUFFIX_DOWNLOAD ON CACHE BOOL "Enable download of the Public Suffix List at build time")
 serenity_option(INCLUDE_WASM_SPEC_TESTS OFF CACHE BOOL "Download and include the WebAssembly spec testsuite")
 serenity_option(INCLUDE_FLAC_SPEC_TESTS OFF CACHE BOOL "Download and include the FLAC spec testsuite")
 serenity_option(ENABLE_CACERT_DOWNLOAD ON CACHE BOOL "Enable download of cacert.pem at build time")

+ 25 - 0
Meta/CMake/public_suffix.cmake

@@ -0,0 +1,25 @@
+include(${CMAKE_CURRENT_LIST_DIR}/utils.cmake)
+
+if (ENABLE_PUBLIC_SUFFIX_DOWNLOAD)
+    set(PUBLIC_SUFFIX_PATH "${SERENITY_CACHE_DIR}/PublicSuffix" CACHE PATH "Download location for PublicSuffix files")
+    set(PUBLIC_SUFFIX_DATA_URL "https://raw.githubusercontent.com/publicsuffix/list/master/public_suffix_list.dat")
+    set(PUBLIC_SUFFIX_DATA_PATH "${PUBLIC_SUFFIX_PATH}/public_suffix_list.dat")
+
+    set(PUBLIC_SUFFIX_DATA_HEADER PublicSuffixData.h)
+    set(PUBLIC_SUFFIX_DATA_IMPLEMENTATION PublicSuffixData.cpp)
+
+    download_file("${PUBLIC_SUFFIX_DATA_URL}" "${PUBLIC_SUFFIX_DATA_PATH}")
+    invoke_generator(
+        "PublicSuffixData"
+        Lagom::GeneratePublicSuffixData
+        "${PUBLIC_SUFFIX_PATH}/"
+        "${PUBLIC_SUFFIX_DATA_HEADER}"
+        "${PUBLIC_SUFFIX_DATA_IMPLEMENTATION}"
+        arguments -p "${PUBLIC_SUFFIX_DATA_PATH}"
+    )
+
+    set(PUBLIC_SUFFIX_SOURCES
+    	${PUBLIC_SUFFIX_DATA_HEADER}
+    	${PUBLIC_SUFFIX_DATA_IMPLEMENTATION}
+    )
+endif()

+ 1 - 0
Meta/Lagom/CMakeLists.txt

@@ -404,6 +404,7 @@ if (BUILD_LAGOM)
         Markdown
         PDF
         Protocol
+        PublicSuffix
         Regex
         SoftGPU
         SQL

+ 1 - 0
Meta/Lagom/Tools/CodeGenerators/CMakeLists.txt

@@ -3,6 +3,7 @@ add_subdirectory(IPCCompiler)
 add_subdirectory(LibEDID)
 add_subdirectory(LibGL)
 add_subdirectory(LibLocale)
+add_subdirectory(LibPublicSuffix)
 add_subdirectory(LibTimeZone)
 add_subdirectory(LibUnicode)
 add_subdirectory(LibWeb)

+ 1 - 0
Meta/Lagom/Tools/CodeGenerators/LibPublicSuffix/CMakeLists.txt

@@ -0,0 +1 @@
+lagom_tool(GeneratePublicSuffixData        SOURCES GeneratePublicSuffixData.cpp LIBS LibMain)

+ 185 - 0
Meta/Lagom/Tools/CodeGenerators/LibPublicSuffix/GeneratePublicSuffixData.cpp

@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2023, Cameron Youell <cameronyouell@gmail.com>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include "../LibUnicode/GeneratorUtil.h"
+#include <AK/SourceGenerator.h>
+#include <AK/StringBuilder.h>
+#include <LibCore/ArgsParser.h>
+#include <LibCore/File.h>
+#include <LibMain/Main.h>
+
+ErrorOr<void> generate_header_file(Core::InputBufferedFile&, Core::File&);
+ErrorOr<void> generate_implementation_file(Core::InputBufferedFile&, Core::File&);
+
+ErrorOr<int> serenity_main(Main::Arguments arguments)
+{
+    StringView generated_header_path;
+    StringView generated_implementation_path;
+    StringView public_suffix_list_path;
+
+    Core::ArgsParser args_parser;
+    args_parser.add_option(generated_header_path, "Path to the header file to generate", "generated-header-path", 'h', "generated-header-path");
+    args_parser.add_option(generated_implementation_path, "Path to the implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path");
+    args_parser.add_option(public_suffix_list_path, "Path to the public suffix list", "public-suffix-list-path", 'p', "public-suffix-list-path");
+    args_parser.parse(arguments);
+
+    auto identifier_data = TRY(open_file(public_suffix_list_path, Core::File::OpenMode::Read));
+
+    auto generated_header_file = TRY(Core::File::open(generated_header_path, Core::File::OpenMode::Write));
+    auto generated_implementation_file = TRY(Core::File::open(generated_implementation_path, Core::File::OpenMode::Write));
+
+    TRY(generate_header_file(*identifier_data, *generated_header_file));
+    TRY(generate_implementation_file(*identifier_data, *generated_implementation_file));
+
+    return 0;
+}
+
+ErrorOr<void> generate_header_file(Core::InputBufferedFile&, Core::File& file)
+{
+    StringBuilder builder;
+    SourceGenerator generator { builder };
+    generator.append(R"~~~(
+#pragma once
+
+#include <AK/DeprecatedString.h>
+#include <AK/Forward.h>
+#include <AK/Trie.h>
+
+namespace PublicSuffix {
+
+class PublicSuffixData {
+protected:
+    PublicSuffixData();
+
+public:
+    PublicSuffixData(PublicSuffixData const&) = delete;
+    PublicSuffixData& operator=(PublicSuffixData const&) = delete;
+
+    static PublicSuffixData* the()
+    {
+        static PublicSuffixData* s_the;
+        if (!s_the)
+            s_the = new PublicSuffixData;
+        return s_the;
+    }
+
+    ErrorOr<Optional<String>> get_public_suffix(StringView string);
+
+private:
+    Trie<char, DeprecatedString> m_dictionary;
+};
+
+} // namespace PublicSuffix
+
+)~~~");
+
+    TRY(file.write_until_depleted(generator.as_string_view().bytes()));
+    return {};
+}
+
+ErrorOr<void> generate_implementation_file(Core::InputBufferedFile& input, Core::File& file)
+{
+    StringBuilder builder;
+    SourceGenerator generator { builder };
+    generator.append(R"~~~(
+#include <LibPublicSuffix/PublicSuffixData.h>
+#include <AK/Vector.h>
+#include <AK/String.h>
+
+namespace PublicSuffix {
+
+static Vector<StringView> s_public_suffixes {)~~~");
+
+    Array<u8, 1024> buffer {};
+
+    while (TRY(input.can_read_line())) {
+        auto line = TRY(input.read_line(buffer));
+
+        if (line.starts_with("//"sv) || line.is_empty())
+            continue;
+
+        auto view = line.split_view("."sv);
+        view.reverse();
+
+        StringBuilder builder;
+        builder.join("."sv, view);
+        auto val = builder.string_view();
+
+        generator.set("line", val);
+        generator.append(R"~~~(
+    {"@line@"sv},)~~~");
+    }
+
+    generator.append(R"~~~(
+};
+
+PublicSuffixData::PublicSuffixData()
+    : m_dictionary('/', "")
+{
+    // FIXME: Reduce the depth of this trie
+    for (auto str : s_public_suffixes) {
+        MUST(m_dictionary.insert(str.begin(), str.end(), str, [](auto& parent, auto& it) -> Optional<DeprecatedString> { 
+            return DeprecatedString::formatted("{}{}", parent.metadata_value(), *it);
+        }));
+    }
+}
+
+ErrorOr<Optional<String>> PublicSuffixData::get_public_suffix(StringView string)
+{
+    auto input = string.split_view("."sv);
+    input.reverse();
+
+    auto can_find = [&](StringView input) -> bool {
+        auto it = input.begin();
+        auto& node = m_dictionary.traverse_until_last_accessible_node(it, input.end());
+        return it.is_end() && node.metadata().has_value();
+    };
+
+    StringBuilder overall_search_string;
+    StringBuilder search_string;
+    for (auto part : input) {
+        search_string.clear();
+        TRY(search_string.try_append(TRY(overall_search_string.to_string())));
+        TRY(search_string.try_append(part));
+
+        if (can_find(search_string.string_view())) {
+            overall_search_string.append(TRY(String::from_utf8(part)));
+            overall_search_string.append("."sv);
+            continue;
+        }
+
+        search_string.clear();
+        TRY(search_string.try_append(TRY(overall_search_string.to_string())));
+        TRY(search_string.try_append("*"sv));
+
+        if (can_find(search_string.string_view())) {
+            overall_search_string.append(TRY(String::from_utf8(part)));
+            overall_search_string.append("."sv);
+            continue;
+        }
+
+        break;
+    }
+
+    auto view = overall_search_string.string_view().split_view("."sv);
+    view.reverse();
+
+    StringBuilder return_string_builder;
+    return_string_builder.join('.', view);
+    auto returnString = TRY(return_string_builder.to_string());
+    if (!returnString.is_empty())
+        return returnString;
+
+    return Optional<String> {};
+}
+
+} // namespace PublicSuffix
+
+)~~~");
+
+    TRY(file.write_until_depleted(generator.as_string_view().bytes()));
+    return {};
+}

+ 1 - 0
Userland/Libraries/CMakeLists.txt

@@ -44,6 +44,7 @@ add_subdirectory(LibPartition)
 add_subdirectory(LibPCIDB)
 add_subdirectory(LibPDF)
 add_subdirectory(LibProtocol)
+add_subdirectory(LibPublicSuffix)
 add_subdirectory(LibRegex)
 add_subdirectory(LibSanitizer)
 add_subdirectory(LibSoftGPU)

+ 10 - 0
Userland/Libraries/LibPublicSuffix/CMakeLists.txt

@@ -0,0 +1,10 @@
+include(${SerenityOS_SOURCE_DIR}/Meta/CMake/public_suffix.cmake)
+
+set(SOURCES
+    URL.cpp
+    ${PUBLIC_SUFFIX_SOURCES}
+)
+set(GENERATED_SOURCES ${CURRENT_LIB_GENERATED})
+
+serenity_lib(LibPublicSuffix publicsuffix)
+target_compile_definitions(LibPublicSuffix PRIVATE ENABLE_PUBLIC_SUFFIX_DOWNLOAD=$<BOOL:${ENABLE_PUBLIC_SUFFIX_DOWNLOAD}>)

+ 47 - 0
Userland/Libraries/LibPublicSuffix/URL.cpp

@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2023, Cameron Youell <cameronyouell@gmail.com>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <AK/String.h>
+#include <AK/URL.h>
+#include <LibPublicSuffix/URL.h>
+#if defined(ENABLE_PUBLIC_SUFFIX_DOWNLOAD)
+#    include <LibPublicSuffix/PublicSuffixData.h>
+#endif
+
+namespace PublicSuffix {
+ErrorOr<String> absolute_url(StringView url)
+{
+    String out = TRY(String::from_utf8(url));
+#if !defined(ENABLE_PUBLIC_SUFFIX_DOWNLOAD)
+    return out;
+#else
+    if (!out.contains("://"sv))
+        out = TRY(String::formatted("https://{}"sv, out));
+
+    auto final_url = URL::create_with_url_or_path(out.to_deprecated_string());
+    if (!final_url.is_valid())
+        return Error::from_string_view("Invalid URL"sv);
+
+    if (final_url.host().has<URL::IPv4Address>() || final_url.host().has<URL::IPv6Address>())
+        return out;
+
+    if (final_url.scheme() != "http"sv && final_url.scheme() != "https"sv)
+        return out;
+
+    if (final_url.host().has<String>()) {
+        auto string_host = final_url.host().get<String>();
+        auto maybe_public_suffix = TRY(PublicSuffixData::the()->get_public_suffix(string_host));
+        if (maybe_public_suffix.has_value())
+            return out;
+
+        if (string_host.ends_with_bytes(".local"sv) || string_host.ends_with_bytes("localhost"sv))
+            return out;
+    }
+
+    return Error::from_string_view("Invalid URL"sv);
+#endif
+}
+}

+ 15 - 0
Userland/Libraries/LibPublicSuffix/URL.h

@@ -0,0 +1,15 @@
+/*
+ * Copyright (c) 2023, Cameron Youell <cameronyouell@gmail.com>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#pragma once
+
+#include <AK/Forward.h>
+
+namespace PublicSuffix {
+
+ErrorOr<String> absolute_url(StringView url);
+
+}