URL.cpp 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176
  1. /*
  2. * Copyright (c) 2023, Tim Flynn <trflynn89@serenityos.org>
  3. * Copyright (c) 2023, Cameron Youell <cameronyouell@gmail.com>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <AK/String.h>
  8. #include <LibCore/System.h>
  9. #include <LibFileSystem/FileSystem.h>
  10. #include <LibWebView/URL.h>
  11. #if defined(ENABLE_PUBLIC_SUFFIX)
  12. # include <LibWebView/PublicSuffixData.h>
  13. #endif
  14. namespace WebView {
  15. bool is_public_suffix([[maybe_unused]] StringView host)
  16. {
  17. #if defined(ENABLE_PUBLIC_SUFFIX)
  18. return PublicSuffixData::the()->is_public_suffix(host);
  19. #else
  20. return false;
  21. #endif
  22. }
  23. Optional<String> get_public_suffix([[maybe_unused]] StringView host)
  24. {
  25. #if defined(ENABLE_PUBLIC_SUFFIX)
  26. return MUST(PublicSuffixData::the()->get_public_suffix(host));
  27. #else
  28. return {};
  29. #endif
  30. }
  31. Optional<URL::URL> sanitize_url(StringView url, Optional<StringView> search_engine, AppendTLD append_tld)
  32. {
  33. if (FileSystem::exists(url.trim_whitespace())) {
  34. auto path = FileSystem::real_path(url.trim_whitespace());
  35. if (path.is_error())
  36. return {};
  37. return URL::create_with_file_scheme(path.value());
  38. }
  39. auto format_search_engine = [&]() -> Optional<URL::URL> {
  40. if (!search_engine.has_value())
  41. return {};
  42. return MUST(String::formatted(*search_engine, URL::percent_decode(url)));
  43. };
  44. String url_buffer;
  45. if (append_tld == AppendTLD::Yes) {
  46. // FIXME: Expand the list of top level domains.
  47. if (!url.ends_with(".com"sv) && !url.ends_with(".net"sv) && !url.ends_with(".org"sv)) {
  48. url_buffer = MUST(String::formatted("{}.com", url));
  49. url = url_buffer;
  50. }
  51. }
  52. ByteString url_with_scheme = url;
  53. if (!(url_with_scheme.starts_with("about:"sv) || url_with_scheme.contains("://"sv) || url_with_scheme.starts_with("data:"sv)))
  54. url_with_scheme = ByteString::formatted("https://{}"sv, url_with_scheme);
  55. auto result = URL::create_with_url_or_path(url_with_scheme);
  56. if (!result.is_valid())
  57. return format_search_engine();
  58. return result;
  59. }
  60. Vector<URL::URL> sanitize_urls(ReadonlySpan<ByteString> raw_urls, URL::URL const& new_tab_page_url)
  61. {
  62. Vector<URL::URL> sanitized_urls;
  63. sanitized_urls.ensure_capacity(raw_urls.size());
  64. for (auto const& raw_url : raw_urls) {
  65. if (auto url = sanitize_url(raw_url); url.has_value())
  66. sanitized_urls.unchecked_append(url.release_value());
  67. }
  68. if (sanitized_urls.is_empty())
  69. sanitized_urls.append(new_tab_page_url);
  70. return sanitized_urls;
  71. }
  72. static URLParts break_file_url_into_parts(URL::URL const& url, StringView url_string)
  73. {
  74. auto scheme = url_string.substring_view(0, url.scheme().bytes_as_string_view().length() + "://"sv.length());
  75. auto path = url_string.substring_view(scheme.length());
  76. return URLParts { scheme, path, {} };
  77. }
  78. static URLParts break_web_url_into_parts(URL::URL const& url, StringView url_string)
  79. {
  80. auto scheme = url_string.substring_view(0, url.scheme().bytes_as_string_view().length() + "://"sv.length());
  81. auto url_without_scheme = url_string.substring_view(scheme.length());
  82. StringView domain;
  83. StringView remainder;
  84. if (auto index = url_without_scheme.find_any_of("/?#"sv); index.has_value()) {
  85. domain = url_without_scheme.substring_view(0, *index);
  86. remainder = url_without_scheme.substring_view(*index);
  87. } else {
  88. domain = url_without_scheme;
  89. }
  90. auto public_suffix = get_public_suffix(domain);
  91. if (!public_suffix.has_value() || !domain.ends_with(*public_suffix))
  92. return { scheme, domain, remainder };
  93. auto subdomain = domain.substring_view(0, domain.length() - public_suffix->bytes_as_string_view().length());
  94. subdomain = subdomain.trim("."sv, TrimMode::Right);
  95. if (auto index = subdomain.find_last('.'); index.has_value()) {
  96. subdomain = subdomain.substring_view(0, *index + 1);
  97. domain = domain.substring_view(subdomain.length());
  98. } else {
  99. subdomain = {};
  100. }
  101. auto scheme_and_subdomain = url_string.substring_view(0, scheme.length() + subdomain.length());
  102. return { scheme_and_subdomain, domain, remainder };
  103. }
  104. Optional<URLParts> break_url_into_parts(StringView url_string)
  105. {
  106. auto url = URL::create_with_url_or_path(url_string);
  107. if (!url.is_valid())
  108. return {};
  109. auto const& scheme = url.scheme();
  110. auto scheme_length = scheme.bytes_as_string_view().length();
  111. if (!url_string.starts_with(scheme))
  112. return {};
  113. if (!url_string.substring_view(scheme_length).starts_with("://"sv))
  114. return {};
  115. if (url.scheme() == "file"sv)
  116. return break_file_url_into_parts(url, url_string);
  117. if (url.scheme().is_one_of("http"sv, "https"sv))
  118. return break_web_url_into_parts(url, url_string);
  119. return {};
  120. }
  121. URLType url_type(URL::URL const& url)
  122. {
  123. if (url.scheme() == "mailto"sv)
  124. return URLType::Email;
  125. if (url.scheme() == "tel"sv)
  126. return URLType::Telephone;
  127. return URLType::Other;
  128. }
  129. String url_text_to_copy(URL::URL const& url)
  130. {
  131. auto url_text = MUST(url.to_string());
  132. if (url.scheme() == "mailto"sv)
  133. return MUST(url_text.substring_from_byte_offset("mailto:"sv.length()));
  134. if (url.scheme() == "tel"sv)
  135. return MUST(url_text.substring_from_byte_offset("tel:"sv.length()));
  136. return url_text;
  137. }
  138. }