URL.cpp 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160
  1. /*
  2. * Copyright (c) 2023, Tim Flynn <trflynn89@serenityos.org>
  3. * Copyright (c) 2023, Cameron Youell <cameronyouell@gmail.com>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <AK/String.h>
  8. #include <LibCore/System.h>
  9. #include <LibFileSystem/FileSystem.h>
  10. #include <LibWebView/URL.h>
  11. #if defined(ENABLE_PUBLIC_SUFFIX)
  12. # include <LibWebView/PublicSuffixData.h>
  13. #endif
  14. namespace WebView {
  15. bool is_public_suffix([[maybe_unused]] StringView host)
  16. {
  17. #if defined(ENABLE_PUBLIC_SUFFIX)
  18. return PublicSuffixData::the()->is_public_suffix(host);
  19. #else
  20. return false;
  21. #endif
  22. }
  23. Optional<String> get_public_suffix([[maybe_unused]] StringView host)
  24. {
  25. #if defined(ENABLE_PUBLIC_SUFFIX)
  26. return MUST(PublicSuffixData::the()->get_public_suffix(host));
  27. #else
  28. return {};
  29. #endif
  30. }
  31. Optional<URL::URL> sanitize_url(StringView url, Optional<StringView> search_engine, AppendTLD append_tld)
  32. {
  33. if (FileSystem::exists(url)) {
  34. auto path = FileSystem::real_path(url);
  35. if (path.is_error())
  36. return {};
  37. return URL::create_with_file_scheme(path.value());
  38. }
  39. auto format_search_engine = [&]() -> Optional<URL::URL> {
  40. if (!search_engine.has_value())
  41. return {};
  42. return MUST(String::formatted(*search_engine, URL::percent_decode(url)));
  43. };
  44. String url_buffer;
  45. if (append_tld == AppendTLD::Yes) {
  46. // FIXME: Expand the list of top level domains.
  47. if (!url.ends_with(".com"sv) && !url.ends_with(".net"sv) && !url.ends_with(".org"sv)) {
  48. url_buffer = MUST(String::formatted("{}.com", url));
  49. url = url_buffer;
  50. }
  51. }
  52. ByteString url_with_scheme = url;
  53. if (!(url_with_scheme.starts_with("about:"sv) || url_with_scheme.contains("://"sv) || url_with_scheme.starts_with("data:"sv)))
  54. url_with_scheme = ByteString::formatted("https://{}"sv, url_with_scheme);
  55. auto result = URL::create_with_url_or_path(url_with_scheme);
  56. if (!result.is_valid())
  57. return format_search_engine();
  58. return result;
  59. }
  60. static URLParts break_file_url_into_parts(URL::URL const& url, StringView url_string)
  61. {
  62. auto scheme = url_string.substring_view(0, url.scheme().bytes_as_string_view().length() + "://"sv.length());
  63. auto path = url_string.substring_view(scheme.length());
  64. return URLParts { scheme, path, {} };
  65. }
  66. static URLParts break_web_url_into_parts(URL::URL const& url, StringView url_string)
  67. {
  68. auto scheme = url_string.substring_view(0, url.scheme().bytes_as_string_view().length() + "://"sv.length());
  69. auto url_without_scheme = url_string.substring_view(scheme.length());
  70. StringView domain;
  71. StringView remainder;
  72. if (auto index = url_without_scheme.find_any_of("/?#"sv); index.has_value()) {
  73. domain = url_without_scheme.substring_view(0, *index);
  74. remainder = url_without_scheme.substring_view(*index);
  75. } else {
  76. domain = url_without_scheme;
  77. }
  78. auto public_suffix = get_public_suffix(domain);
  79. if (!public_suffix.has_value() || !domain.ends_with(*public_suffix))
  80. return { scheme, domain, remainder };
  81. auto subdomain = domain.substring_view(0, domain.length() - public_suffix->bytes_as_string_view().length());
  82. subdomain = subdomain.trim("."sv, TrimMode::Right);
  83. if (auto index = subdomain.find_last('.'); index.has_value()) {
  84. subdomain = subdomain.substring_view(0, *index + 1);
  85. domain = domain.substring_view(subdomain.length());
  86. } else {
  87. subdomain = {};
  88. }
  89. auto scheme_and_subdomain = url_string.substring_view(0, scheme.length() + subdomain.length());
  90. return { scheme_and_subdomain, domain, remainder };
  91. }
  92. Optional<URLParts> break_url_into_parts(StringView url_string)
  93. {
  94. auto url = URL::create_with_url_or_path(url_string);
  95. if (!url.is_valid())
  96. return {};
  97. auto const& scheme = url.scheme();
  98. auto scheme_length = scheme.bytes_as_string_view().length();
  99. if (!url_string.starts_with(scheme))
  100. return {};
  101. if (!url_string.substring_view(scheme_length).starts_with("://"sv))
  102. return {};
  103. if (url.scheme() == "file"sv)
  104. return break_file_url_into_parts(url, url_string);
  105. if (url.scheme().is_one_of("http"sv, "https"sv))
  106. return break_web_url_into_parts(url, url_string);
  107. return {};
  108. }
  109. URLType url_type(URL::URL const& url)
  110. {
  111. if (url.scheme() == "mailto"sv)
  112. return URLType::Email;
  113. if (url.scheme() == "tel"sv)
  114. return URLType::Telephone;
  115. return URLType::Other;
  116. }
  117. String url_text_to_copy(URL::URL const& url)
  118. {
  119. auto url_text = MUST(url.to_string());
  120. if (url.scheme() == "mailto"sv)
  121. return MUST(url_text.substring_from_byte_offset("mailto:"sv.length()));
  122. if (url.scheme() == "tel"sv)
  123. return MUST(url_text.substring_from_byte_offset("tel:"sv.length()));
  124. return url_text;
  125. }
  126. }