URL.cpp 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203
  1. /*
  2. * Copyright (c) 2023, Tim Flynn <trflynn89@serenityos.org>
  3. * Copyright (c) 2023, Cameron Youell <cameronyouell@gmail.com>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <AK/LexicalPath.h>
  8. #include <AK/String.h>
  9. #include <LibCore/System.h>
  10. #include <LibFileSystem/FileSystem.h>
  11. #include <LibUnicode/URL.h>
  12. #include <LibWebView/URL.h>
  13. #if defined(ENABLE_PUBLIC_SUFFIX)
  14. # include <LibWebView/PublicSuffixData.h>
  15. #endif
  16. namespace WebView {
  17. static Optional<URL> create_url_with_url_or_path(String const& url_or_path)
  18. {
  19. auto url = Unicode::create_unicode_url(url_or_path);
  20. if (!url.is_error() && url.value().is_valid())
  21. return url.release_value();
  22. auto path = LexicalPath::canonicalized_path(url_or_path.to_byte_string());
  23. auto url_from_path = URL::create_with_file_scheme(path);
  24. if (url_from_path.is_valid())
  25. return url_from_path;
  26. return {};
  27. }
  28. static Optional<URL> query_public_suffix_list(StringView url_string)
  29. {
  30. auto out = MUST(String::from_utf8(url_string));
  31. if (!out.starts_with_bytes("about:"sv) && !out.contains("://"sv))
  32. out = MUST(String::formatted("https://{}"sv, out));
  33. auto maybe_url = create_url_with_url_or_path(out);
  34. if (!maybe_url.has_value())
  35. return {};
  36. auto url = maybe_url.release_value();
  37. if (url.host().has<URL::IPv4Address>() || url.host().has<URL::IPv6Address>())
  38. return url;
  39. if (url.scheme() != "http"sv && url.scheme() != "https"sv)
  40. return url;
  41. if (url.host().has<String>()) {
  42. auto const& host = url.host().get<String>();
  43. if (auto public_suffix = get_public_suffix(host); public_suffix.has_value())
  44. return url;
  45. if (host.ends_with_bytes(".local"sv) || host.ends_with_bytes("localhost"sv))
  46. return url;
  47. }
  48. return {};
  49. }
  50. bool is_public_suffix([[maybe_unused]] StringView host)
  51. {
  52. #if defined(ENABLE_PUBLIC_SUFFIX)
  53. return PublicSuffixData::the()->is_public_suffix(host);
  54. #else
  55. return false;
  56. #endif
  57. }
  58. Optional<String> get_public_suffix([[maybe_unused]] StringView host)
  59. {
  60. #if defined(ENABLE_PUBLIC_SUFFIX)
  61. return MUST(PublicSuffixData::the()->get_public_suffix(host));
  62. #else
  63. return {};
  64. #endif
  65. }
  66. Optional<URL> sanitize_url(StringView url, Optional<StringView> search_engine, AppendTLD append_tld)
  67. {
  68. if (FileSystem::exists(url)) {
  69. auto path = FileSystem::real_path(url);
  70. if (path.is_error())
  71. return {};
  72. return URL::create_with_file_scheme(path.value());
  73. }
  74. auto format_search_engine = [&]() -> Optional<URL> {
  75. if (!search_engine.has_value())
  76. return {};
  77. return MUST(String::formatted(*search_engine, URL::percent_decode(url)));
  78. };
  79. String url_buffer;
  80. if (append_tld == AppendTLD::Yes) {
  81. // FIXME: Expand the list of top level domains.
  82. if (!url.ends_with(".com"sv) && !url.ends_with(".net"sv) && !url.ends_with(".org"sv)) {
  83. url_buffer = MUST(String::formatted("{}.com", url));
  84. url = url_buffer;
  85. }
  86. }
  87. auto result = query_public_suffix_list(url);
  88. if (!result.has_value())
  89. return format_search_engine();
  90. return result.release_value();
  91. }
  92. static URLParts break_file_url_into_parts(URL const& url, StringView url_string)
  93. {
  94. auto scheme = url_string.substring_view(0, url.scheme().bytes_as_string_view().length() + "://"sv.length());
  95. auto path = url_string.substring_view(scheme.length());
  96. return URLParts { scheme, path, {} };
  97. }
  98. static URLParts break_web_url_into_parts(URL const& url, StringView url_string)
  99. {
  100. auto scheme = url_string.substring_view(0, url.scheme().bytes_as_string_view().length() + "://"sv.length());
  101. auto url_without_scheme = url_string.substring_view(scheme.length());
  102. StringView domain;
  103. StringView remainder;
  104. if (auto index = url_without_scheme.find_any_of("/?#"sv); index.has_value()) {
  105. domain = url_without_scheme.substring_view(0, *index);
  106. remainder = url_without_scheme.substring_view(*index);
  107. } else {
  108. domain = url_without_scheme;
  109. }
  110. auto public_suffix = get_public_suffix(domain);
  111. if (!public_suffix.has_value() || !domain.ends_with(*public_suffix))
  112. return { scheme, domain, remainder };
  113. auto subdomain = domain.substring_view(0, domain.length() - public_suffix->bytes_as_string_view().length());
  114. subdomain = subdomain.trim("."sv, TrimMode::Right);
  115. if (auto index = subdomain.find_last('.'); index.has_value()) {
  116. subdomain = subdomain.substring_view(0, *index + 1);
  117. domain = domain.substring_view(subdomain.length());
  118. } else {
  119. subdomain = {};
  120. }
  121. auto scheme_and_subdomain = url_string.substring_view(0, scheme.length() + subdomain.length());
  122. return { scheme_and_subdomain, domain, remainder };
  123. }
  124. Optional<URLParts> break_url_into_parts(StringView url_string)
  125. {
  126. auto url = URL::create_with_url_or_path(url_string);
  127. if (!url.is_valid())
  128. return {};
  129. auto const& scheme = url.scheme();
  130. auto scheme_length = scheme.bytes_as_string_view().length();
  131. if (!url_string.starts_with(scheme))
  132. return {};
  133. if (!url_string.substring_view(scheme_length).starts_with("://"sv))
  134. return {};
  135. if (url.scheme() == "file"sv)
  136. return break_file_url_into_parts(url, url_string);
  137. if (url.scheme().is_one_of("http"sv, "https"sv, "gemini"sv))
  138. return break_web_url_into_parts(url, url_string);
  139. return {};
  140. }
  141. URLType url_type(URL const& url)
  142. {
  143. if (url.scheme() == "mailto"sv)
  144. return URLType::Email;
  145. if (url.scheme() == "tel"sv)
  146. return URLType::Telephone;
  147. return URLType::Other;
  148. }
  149. String url_text_to_copy(URL const& url)
  150. {
  151. auto url_text = MUST(url.to_string());
  152. if (url.scheme() == "mailto"sv)
  153. return MUST(url_text.substring_from_byte_offset("mailto:"sv.length()));
  154. if (url.scheme() == "tel"sv)
  155. return MUST(url_text.substring_from_byte_offset("tel:"sv.length()));
  156. return url_text;
  157. }
  158. }