LibPublicSuffix: Add Library and Generators
This commit is contained in:
parent
c53d3e7aa4
commit
8fcf42f684
Notes:
sideshowbarker
2024-07-17 05:02:42 +09:00
Author: https://github.com/cammo1123 Commit: https://github.com/SerenityOS/serenity/commit/8fcf42f684 Pull-request: https://github.com/SerenityOS/serenity/pull/19064 Reviewed-by: https://github.com/AtkinsSJ
10 changed files with 287 additions and 0 deletions
|
@ -17,6 +17,7 @@ serenity_option(ENABLE_COMPILETIME_HEADER_CHECK OFF CACHE BOOL "Enable compileti
|
|||
|
||||
serenity_option(ENABLE_TIME_ZONE_DATABASE_DOWNLOAD ON CACHE BOOL "Enable download of the IANA Time Zone Database at build time")
|
||||
serenity_option(ENABLE_UNICODE_DATABASE_DOWNLOAD ON CACHE BOOL "Enable download of Unicode UCD and CLDR files at build time")
|
||||
serenity_option(ENABLE_PUBLIC_SUFFIX_DOWNLOAD ON CACHE BOOL "Enable download of the Public Suffix List at build time")
|
||||
serenity_option(INCLUDE_WASM_SPEC_TESTS OFF CACHE BOOL "Download and include the WebAssembly spec testsuite")
|
||||
serenity_option(INCLUDE_FLAC_SPEC_TESTS OFF CACHE BOOL "Download and include the FLAC spec testsuite")
|
||||
serenity_option(ENABLE_CACERT_DOWNLOAD ON CACHE BOOL "Enable download of cacert.pem at build time")
|
||||
|
|
25
Meta/CMake/public_suffix.cmake
Normal file
25
Meta/CMake/public_suffix.cmake
Normal file
|
@ -0,0 +1,25 @@
|
|||
include(${CMAKE_CURRENT_LIST_DIR}/utils.cmake)
|
||||
|
||||
if (ENABLE_PUBLIC_SUFFIX_DOWNLOAD)
|
||||
set(PUBLIC_SUFFIX_PATH "${SERENITY_CACHE_DIR}/PublicSuffix" CACHE PATH "Download location for PublicSuffix files")
|
||||
set(PUBLIC_SUFFIX_DATA_URL "https://raw.githubusercontent.com/publicsuffix/list/master/public_suffix_list.dat")
|
||||
set(PUBLIC_SUFFIX_DATA_PATH "${PUBLIC_SUFFIX_PATH}/public_suffix_list.dat")
|
||||
|
||||
set(PUBLIC_SUFFIX_DATA_HEADER PublicSuffixData.h)
|
||||
set(PUBLIC_SUFFIX_DATA_IMPLEMENTATION PublicSuffixData.cpp)
|
||||
|
||||
download_file("${PUBLIC_SUFFIX_DATA_URL}" "${PUBLIC_SUFFIX_DATA_PATH}")
|
||||
invoke_generator(
|
||||
"PublicSuffixData"
|
||||
Lagom::GeneratePublicSuffixData
|
||||
"${PUBLIC_SUFFIX_PATH}/"
|
||||
"${PUBLIC_SUFFIX_DATA_HEADER}"
|
||||
"${PUBLIC_SUFFIX_DATA_IMPLEMENTATION}"
|
||||
arguments -p "${PUBLIC_SUFFIX_DATA_PATH}"
|
||||
)
|
||||
|
||||
set(PUBLIC_SUFFIX_SOURCES
|
||||
${PUBLIC_SUFFIX_DATA_HEADER}
|
||||
${PUBLIC_SUFFIX_DATA_IMPLEMENTATION}
|
||||
)
|
||||
endif()
|
|
@ -404,6 +404,7 @@ if (BUILD_LAGOM)
|
|||
Markdown
|
||||
PDF
|
||||
Protocol
|
||||
PublicSuffix
|
||||
Regex
|
||||
SoftGPU
|
||||
SQL
|
||||
|
|
|
@ -3,6 +3,7 @@ add_subdirectory(IPCCompiler)
|
|||
add_subdirectory(LibEDID)
|
||||
add_subdirectory(LibGL)
|
||||
add_subdirectory(LibLocale)
|
||||
add_subdirectory(LibPublicSuffix)
|
||||
add_subdirectory(LibTimeZone)
|
||||
add_subdirectory(LibUnicode)
|
||||
add_subdirectory(LibWeb)
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
lagom_tool(GeneratePublicSuffixData SOURCES GeneratePublicSuffixData.cpp LIBS LibMain)
|
|
@ -0,0 +1,185 @@
|
|||
/*
|
||||
* Copyright (c) 2023, Cameron Youell <cameronyouell@gmail.com>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include "../LibUnicode/GeneratorUtil.h"
|
||||
#include <AK/SourceGenerator.h>
|
||||
#include <AK/StringBuilder.h>
|
||||
#include <LibCore/ArgsParser.h>
|
||||
#include <LibCore/File.h>
|
||||
#include <LibMain/Main.h>
|
||||
|
||||
ErrorOr<void> generate_header_file(Core::InputBufferedFile&, Core::File&);
|
||||
ErrorOr<void> generate_implementation_file(Core::InputBufferedFile&, Core::File&);
|
||||
|
||||
ErrorOr<int> serenity_main(Main::Arguments arguments)
|
||||
{
|
||||
StringView generated_header_path;
|
||||
StringView generated_implementation_path;
|
||||
StringView public_suffix_list_path;
|
||||
|
||||
Core::ArgsParser args_parser;
|
||||
args_parser.add_option(generated_header_path, "Path to the header file to generate", "generated-header-path", 'h', "generated-header-path");
|
||||
args_parser.add_option(generated_implementation_path, "Path to the implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path");
|
||||
args_parser.add_option(public_suffix_list_path, "Path to the public suffix list", "public-suffix-list-path", 'p', "public-suffix-list-path");
|
||||
args_parser.parse(arguments);
|
||||
|
||||
auto identifier_data = TRY(open_file(public_suffix_list_path, Core::File::OpenMode::Read));
|
||||
|
||||
auto generated_header_file = TRY(Core::File::open(generated_header_path, Core::File::OpenMode::Write));
|
||||
auto generated_implementation_file = TRY(Core::File::open(generated_implementation_path, Core::File::OpenMode::Write));
|
||||
|
||||
TRY(generate_header_file(*identifier_data, *generated_header_file));
|
||||
TRY(generate_implementation_file(*identifier_data, *generated_implementation_file));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
ErrorOr<void> generate_header_file(Core::InputBufferedFile&, Core::File& file)
|
||||
{
|
||||
StringBuilder builder;
|
||||
SourceGenerator generator { builder };
|
||||
generator.append(R"~~~(
|
||||
#pragma once
|
||||
|
||||
#include <AK/DeprecatedString.h>
|
||||
#include <AK/Forward.h>
|
||||
#include <AK/Trie.h>
|
||||
|
||||
namespace PublicSuffix {
|
||||
|
||||
class PublicSuffixData {
|
||||
protected:
|
||||
PublicSuffixData();
|
||||
|
||||
public:
|
||||
PublicSuffixData(PublicSuffixData const&) = delete;
|
||||
PublicSuffixData& operator=(PublicSuffixData const&) = delete;
|
||||
|
||||
static PublicSuffixData* the()
|
||||
{
|
||||
static PublicSuffixData* s_the;
|
||||
if (!s_the)
|
||||
s_the = new PublicSuffixData;
|
||||
return s_the;
|
||||
}
|
||||
|
||||
ErrorOr<Optional<String>> get_public_suffix(StringView string);
|
||||
|
||||
private:
|
||||
Trie<char, DeprecatedString> m_dictionary;
|
||||
};
|
||||
|
||||
} // namespace PublicSuffix
|
||||
|
||||
)~~~");
|
||||
|
||||
TRY(file.write_until_depleted(generator.as_string_view().bytes()));
|
||||
return {};
|
||||
}
|
||||
|
||||
ErrorOr<void> generate_implementation_file(Core::InputBufferedFile& input, Core::File& file)
|
||||
{
|
||||
StringBuilder builder;
|
||||
SourceGenerator generator { builder };
|
||||
generator.append(R"~~~(
|
||||
#include <LibPublicSuffix/PublicSuffixData.h>
|
||||
#include <AK/Vector.h>
|
||||
#include <AK/String.h>
|
||||
|
||||
namespace PublicSuffix {
|
||||
|
||||
static Vector<StringView> s_public_suffixes {)~~~");
|
||||
|
||||
Array<u8, 1024> buffer {};
|
||||
|
||||
while (TRY(input.can_read_line())) {
|
||||
auto line = TRY(input.read_line(buffer));
|
||||
|
||||
if (line.starts_with("//"sv) || line.is_empty())
|
||||
continue;
|
||||
|
||||
auto view = line.split_view("."sv);
|
||||
view.reverse();
|
||||
|
||||
StringBuilder builder;
|
||||
builder.join("."sv, view);
|
||||
auto val = builder.string_view();
|
||||
|
||||
generator.set("line", val);
|
||||
generator.append(R"~~~(
|
||||
{"@line@"sv},)~~~");
|
||||
}
|
||||
|
||||
generator.append(R"~~~(
|
||||
};
|
||||
|
||||
PublicSuffixData::PublicSuffixData()
|
||||
: m_dictionary('/', "")
|
||||
{
|
||||
// FIXME: Reduce the depth of this trie
|
||||
for (auto str : s_public_suffixes) {
|
||||
MUST(m_dictionary.insert(str.begin(), str.end(), str, [](auto& parent, auto& it) -> Optional<DeprecatedString> {
|
||||
return DeprecatedString::formatted("{}{}", parent.metadata_value(), *it);
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
ErrorOr<Optional<String>> PublicSuffixData::get_public_suffix(StringView string)
|
||||
{
|
||||
auto input = string.split_view("."sv);
|
||||
input.reverse();
|
||||
|
||||
auto can_find = [&](StringView input) -> bool {
|
||||
auto it = input.begin();
|
||||
auto& node = m_dictionary.traverse_until_last_accessible_node(it, input.end());
|
||||
return it.is_end() && node.metadata().has_value();
|
||||
};
|
||||
|
||||
StringBuilder overall_search_string;
|
||||
StringBuilder search_string;
|
||||
for (auto part : input) {
|
||||
search_string.clear();
|
||||
TRY(search_string.try_append(TRY(overall_search_string.to_string())));
|
||||
TRY(search_string.try_append(part));
|
||||
|
||||
if (can_find(search_string.string_view())) {
|
||||
overall_search_string.append(TRY(String::from_utf8(part)));
|
||||
overall_search_string.append("."sv);
|
||||
continue;
|
||||
}
|
||||
|
||||
search_string.clear();
|
||||
TRY(search_string.try_append(TRY(overall_search_string.to_string())));
|
||||
TRY(search_string.try_append("*"sv));
|
||||
|
||||
if (can_find(search_string.string_view())) {
|
||||
overall_search_string.append(TRY(String::from_utf8(part)));
|
||||
overall_search_string.append("."sv);
|
||||
continue;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
auto view = overall_search_string.string_view().split_view("."sv);
|
||||
view.reverse();
|
||||
|
||||
StringBuilder return_string_builder;
|
||||
return_string_builder.join('.', view);
|
||||
auto returnString = TRY(return_string_builder.to_string());
|
||||
if (!returnString.is_empty())
|
||||
return returnString;
|
||||
|
||||
return Optional<String> {};
|
||||
}
|
||||
|
||||
} // namespace PublicSuffix
|
||||
|
||||
)~~~");
|
||||
|
||||
TRY(file.write_until_depleted(generator.as_string_view().bytes()));
|
||||
return {};
|
||||
}
|
|
@ -44,6 +44,7 @@ add_subdirectory(LibPartition)
|
|||
add_subdirectory(LibPCIDB)
|
||||
add_subdirectory(LibPDF)
|
||||
add_subdirectory(LibProtocol)
|
||||
add_subdirectory(LibPublicSuffix)
|
||||
add_subdirectory(LibRegex)
|
||||
add_subdirectory(LibSanitizer)
|
||||
add_subdirectory(LibSoftGPU)
|
||||
|
|
10
Userland/Libraries/LibPublicSuffix/CMakeLists.txt
Normal file
10
Userland/Libraries/LibPublicSuffix/CMakeLists.txt
Normal file
|
@ -0,0 +1,10 @@
|
|||
include(${SerenityOS_SOURCE_DIR}/Meta/CMake/public_suffix.cmake)
|
||||
|
||||
set(SOURCES
|
||||
URL.cpp
|
||||
${PUBLIC_SUFFIX_SOURCES}
|
||||
)
|
||||
set(GENERATED_SOURCES ${CURRENT_LIB_GENERATED})
|
||||
|
||||
serenity_lib(LibPublicSuffix publicsuffix)
|
||||
target_compile_definitions(LibPublicSuffix PRIVATE ENABLE_PUBLIC_SUFFIX_DOWNLOAD=$<BOOL:${ENABLE_PUBLIC_SUFFIX_DOWNLOAD}>)
|
47
Userland/Libraries/LibPublicSuffix/URL.cpp
Normal file
47
Userland/Libraries/LibPublicSuffix/URL.cpp
Normal file
|
@ -0,0 +1,47 @@
|
|||
/*
|
||||
* Copyright (c) 2023, Cameron Youell <cameronyouell@gmail.com>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <AK/String.h>
|
||||
#include <AK/URL.h>
|
||||
#include <LibPublicSuffix/URL.h>
|
||||
#if defined(ENABLE_PUBLIC_SUFFIX_DOWNLOAD)
|
||||
# include <LibPublicSuffix/PublicSuffixData.h>
|
||||
#endif
|
||||
|
||||
namespace PublicSuffix {
|
||||
ErrorOr<String> absolute_url(StringView url)
|
||||
{
|
||||
String out = TRY(String::from_utf8(url));
|
||||
#if !defined(ENABLE_PUBLIC_SUFFIX_DOWNLOAD)
|
||||
return out;
|
||||
#else
|
||||
if (!out.contains("://"sv))
|
||||
out = TRY(String::formatted("https://{}"sv, out));
|
||||
|
||||
auto final_url = URL::create_with_url_or_path(out.to_deprecated_string());
|
||||
if (!final_url.is_valid())
|
||||
return Error::from_string_view("Invalid URL"sv);
|
||||
|
||||
if (final_url.host().has<URL::IPv4Address>() || final_url.host().has<URL::IPv6Address>())
|
||||
return out;
|
||||
|
||||
if (final_url.scheme() != "http"sv && final_url.scheme() != "https"sv)
|
||||
return out;
|
||||
|
||||
if (final_url.host().has<String>()) {
|
||||
auto string_host = final_url.host().get<String>();
|
||||
auto maybe_public_suffix = TRY(PublicSuffixData::the()->get_public_suffix(string_host));
|
||||
if (maybe_public_suffix.has_value())
|
||||
return out;
|
||||
|
||||
if (string_host.ends_with_bytes(".local"sv) || string_host.ends_with_bytes("localhost"sv))
|
||||
return out;
|
||||
}
|
||||
|
||||
return Error::from_string_view("Invalid URL"sv);
|
||||
#endif
|
||||
}
|
||||
}
|
15
Userland/Libraries/LibPublicSuffix/URL.h
Normal file
15
Userland/Libraries/LibPublicSuffix/URL.h
Normal file
|
@ -0,0 +1,15 @@
|
|||
/*
|
||||
* Copyright (c) 2023, Cameron Youell <cameronyouell@gmail.com>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/Forward.h>
|
||||
|
||||
namespace PublicSuffix {
|
||||
|
||||
ErrorOr<String> absolute_url(StringView url);
|
||||
|
||||
}
|
Loading…
Add table
Reference in a new issue