URL.cpp 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. /*
  2. * Copyright (c) 2023, Tim Flynn <trflynn89@serenityos.org>
  3. * Copyright (c) 2023, Cameron Youell <cameronyouell@gmail.com>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <AK/String.h>
  8. #include <LibCore/System.h>
  9. #include <LibFileSystem/FileSystem.h>
  10. #include <LibWebView/URL.h>
  11. #if defined(ENABLE_PUBLIC_SUFFIX)
  12. # include <LibWebView/PublicSuffixData.h>
  13. #endif
  14. namespace WebView {
  15. static Optional<URL> query_public_suffix_list(StringView url_string)
  16. {
  17. auto out = MUST(String::from_utf8(url_string));
  18. if (!out.contains("://"sv))
  19. out = MUST(String::formatted("https://{}"sv, out));
  20. auto url = URL::create_with_url_or_path(out.to_deprecated_string());
  21. if (!url.is_valid())
  22. return {};
  23. if (url.host().has<URL::IPv4Address>() || url.host().has<URL::IPv6Address>())
  24. return url;
  25. if (url.scheme() != "http"sv && url.scheme() != "https"sv)
  26. return url;
  27. if (url.host().has<String>()) {
  28. auto const& host = url.host().get<String>();
  29. if (auto public_suffix = get_public_suffix(host); public_suffix.has_value())
  30. return url;
  31. if (host.ends_with_bytes(".local"sv) || host.ends_with_bytes("localhost"sv))
  32. return url;
  33. }
  34. return {};
  35. }
  36. bool is_public_suffix([[maybe_unused]] StringView host)
  37. {
  38. #if defined(ENABLE_PUBLIC_SUFFIX)
  39. return PublicSuffixData::the()->is_public_suffix(host);
  40. #else
  41. return false;
  42. #endif
  43. }
  44. Optional<String> get_public_suffix([[maybe_unused]] StringView host)
  45. {
  46. #if defined(ENABLE_PUBLIC_SUFFIX)
  47. return MUST(PublicSuffixData::the()->get_public_suffix(host));
  48. #else
  49. return {};
  50. #endif
  51. }
  52. Optional<URL> sanitize_url(StringView url, Optional<StringView> search_engine, AppendTLD append_tld)
  53. {
  54. if (FileSystem::exists(url)) {
  55. auto path = FileSystem::real_path(url);
  56. if (path.is_error())
  57. return {};
  58. return URL::create_with_file_scheme(path.value().to_deprecated_string());
  59. }
  60. auto format_search_engine = [&]() -> Optional<URL> {
  61. if (!search_engine.has_value())
  62. return {};
  63. return MUST(String::formatted(*search_engine, URL::percent_decode(url)));
  64. };
  65. String url_buffer;
  66. if (append_tld == AppendTLD::Yes) {
  67. // FIXME: Expand the list of top level domains.
  68. if (!url.ends_with(".com"sv) && !url.ends_with(".net"sv) && !url.ends_with(".org"sv)) {
  69. url_buffer = MUST(String::formatted("{}.com", url));
  70. url = url_buffer;
  71. }
  72. }
  73. auto result = query_public_suffix_list(url);
  74. if (!result.has_value())
  75. return format_search_engine();
  76. return result.release_value();
  77. }
  78. static URLParts break_file_url_into_parts(URL const& url, StringView url_string)
  79. {
  80. auto scheme = url_string.substring_view(0, url.scheme().bytes_as_string_view().length() + "://"sv.length());
  81. auto path = url_string.substring_view(scheme.length());
  82. return URLParts { scheme, path, {} };
  83. }
  84. static URLParts break_web_url_into_parts(URL const& url, StringView url_string)
  85. {
  86. auto host = MUST(url.serialized_host());
  87. auto public_suffix = get_public_suffix(host);
  88. if (!public_suffix.has_value())
  89. return {};
  90. auto public_suffix_start = url_string.find(*public_suffix);
  91. auto public_suffix_end = public_suffix_start.value() + public_suffix->bytes_as_string_view().length();
  92. auto scheme_and_subdomain = url_string.substring_view(0, *public_suffix_start);
  93. scheme_and_subdomain = scheme_and_subdomain.trim("."sv, TrimMode::Right);
  94. if (auto index = scheme_and_subdomain.find_last('.'); index.has_value())
  95. scheme_and_subdomain = scheme_and_subdomain.substring_view(0, *index + 1);
  96. else
  97. scheme_and_subdomain = scheme_and_subdomain.substring_view(0, url.scheme().bytes_as_string_view().length() + "://"sv.length());
  98. auto effective_tld_plus_one = url_string.substring_view(scheme_and_subdomain.length(), public_suffix_end - scheme_and_subdomain.length());
  99. auto remainder = url_string.substring_view(public_suffix_end);
  100. return URLParts { scheme_and_subdomain, effective_tld_plus_one, remainder };
  101. }
  102. Optional<URLParts> break_url_into_parts(StringView url_string)
  103. {
  104. auto url = URL::create_with_url_or_path(url_string);
  105. if (!url.is_valid())
  106. return {};
  107. if (url.scheme() == "file"sv)
  108. return break_file_url_into_parts(url, url_string);
  109. if (url.scheme().is_one_of("http"sv, "https"sv, "gemini"sv))
  110. return break_web_url_into_parts(url, url_string);
  111. return {};
  112. }
  113. }