URL.cpp 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151
  1. /*
  2. * Copyright (c) 2023, Tim Flynn <trflynn89@serenityos.org>
  3. * Copyright (c) 2023, Cameron Youell <cameronyouell@gmail.com>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <AK/String.h>
  8. #include <LibFileSystem/FileSystem.h>
  9. #include <LibWebView/URL.h>
  10. namespace WebView {
  11. Optional<URL::URL> sanitize_url(StringView url, Optional<StringView> search_engine, AppendTLD append_tld)
  12. {
  13. if (FileSystem::exists(url.trim_whitespace())) {
  14. auto path = FileSystem::real_path(url.trim_whitespace());
  15. if (path.is_error())
  16. return {};
  17. return URL::create_with_file_scheme(path.value());
  18. }
  19. auto format_search_engine = [&]() -> Optional<URL::URL> {
  20. if (!search_engine.has_value())
  21. return {};
  22. return MUST(String::formatted(*search_engine, URL::percent_decode(url)));
  23. };
  24. String url_buffer;
  25. if (append_tld == AppendTLD::Yes) {
  26. // FIXME: Expand the list of top level domains.
  27. if (!url.ends_with(".com"sv) && !url.ends_with(".net"sv) && !url.ends_with(".org"sv)) {
  28. url_buffer = MUST(String::formatted("{}.com", url));
  29. url = url_buffer;
  30. }
  31. }
  32. ByteString url_with_scheme = url;
  33. if (!(url_with_scheme.starts_with("about:"sv) || url_with_scheme.contains("://"sv) || url_with_scheme.starts_with("data:"sv) || url_with_scheme.contains("."sv)) {
  34. url_with_scheme = ByteString::formatted("https://{}"sv, url_with_scheme);
  35. return URL::create_with_url_or_path(url_with_scheme);
  36. }
  37. return format_search_engine();;
  38. }
  39. Vector<URL::URL> sanitize_urls(ReadonlySpan<ByteString> raw_urls, URL::URL const& new_tab_page_url)
  40. {
  41. Vector<URL::URL> sanitized_urls;
  42. sanitized_urls.ensure_capacity(raw_urls.size());
  43. for (auto const& raw_url : raw_urls) {
  44. if (auto url = sanitize_url(raw_url); url.has_value())
  45. sanitized_urls.unchecked_append(url.release_value());
  46. }
  47. if (sanitized_urls.is_empty())
  48. sanitized_urls.append(new_tab_page_url);
  49. return sanitized_urls;
  50. }
  51. static URLParts break_file_url_into_parts(URL::URL const& url, StringView url_string)
  52. {
  53. auto scheme = url_string.substring_view(0, url.scheme().bytes_as_string_view().length() + "://"sv.length());
  54. auto path = url_string.substring_view(scheme.length());
  55. return URLParts { scheme, path, {} };
  56. }
  57. static URLParts break_web_url_into_parts(URL::URL const& url, StringView url_string)
  58. {
  59. auto scheme = url_string.substring_view(0, url.scheme().bytes_as_string_view().length() + "://"sv.length());
  60. auto url_without_scheme = url_string.substring_view(scheme.length());
  61. StringView domain;
  62. StringView remainder;
  63. if (auto index = url_without_scheme.find_any_of("/?#"sv); index.has_value()) {
  64. domain = url_without_scheme.substring_view(0, *index);
  65. remainder = url_without_scheme.substring_view(*index);
  66. } else {
  67. domain = url_without_scheme;
  68. }
  69. auto public_suffix = URL::get_public_suffix(domain);
  70. if (!public_suffix.has_value() || !domain.ends_with(*public_suffix))
  71. return { scheme, domain, remainder };
  72. auto subdomain = domain.substring_view(0, domain.length() - public_suffix->bytes_as_string_view().length());
  73. subdomain = subdomain.trim("."sv, TrimMode::Right);
  74. if (auto index = subdomain.find_last('.'); index.has_value()) {
  75. subdomain = subdomain.substring_view(0, *index + 1);
  76. domain = domain.substring_view(subdomain.length());
  77. } else {
  78. subdomain = {};
  79. }
  80. auto scheme_and_subdomain = url_string.substring_view(0, scheme.length() + subdomain.length());
  81. return { scheme_and_subdomain, domain, remainder };
  82. }
  83. Optional<URLParts> break_url_into_parts(StringView url_string)
  84. {
  85. auto url = URL::create_with_url_or_path(url_string);
  86. if (!url.is_valid())
  87. return {};
  88. auto const& scheme = url.scheme();
  89. auto scheme_length = scheme.bytes_as_string_view().length();
  90. if (!url_string.starts_with(scheme))
  91. return {};
  92. if (!url_string.substring_view(scheme_length).starts_with("://"sv))
  93. return {};
  94. if (url.scheme() == "file"sv)
  95. return break_file_url_into_parts(url, url_string);
  96. if (url.scheme().is_one_of("http"sv, "https"sv))
  97. return break_web_url_into_parts(url, url_string);
  98. return {};
  99. }
  100. URLType url_type(URL::URL const& url)
  101. {
  102. if (url.scheme() == "mailto"sv)
  103. return URLType::Email;
  104. if (url.scheme() == "tel"sv)
  105. return URLType::Telephone;
  106. return URLType::Other;
  107. }
  108. String url_text_to_copy(URL::URL const& url)
  109. {
  110. auto url_text = url.to_string();
  111. if (url.scheme() == "mailto"sv)
  112. return MUST(url_text.substring_from_byte_offset("mailto:"sv.length()));
  113. if (url.scheme() == "tel"sv)
  114. return MUST(url_text.substring_from_byte_offset("tel:"sv.length()));
  115. return url_text;
  116. }
  117. }