URL.h 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229
  1. /*
  2. * Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
  3. * Copyright (c) 2021, Max Wipfli <mail@maxwipfli.ch>
  4. * Copyright (c) 2023-2024, Shannon Booth <shannon@serenityos.org>
  5. *
  6. * SPDX-License-Identifier: BSD-2-Clause
  7. */
  8. #pragma once
  9. #include <AK/ByteString.h>
  10. #include <AK/CopyOnWrite.h>
  11. #include <AK/String.h>
  12. #include <AK/StringView.h>
  13. #include <AK/Vector.h>
  14. // On Linux distros that use mlibc `basename` is defined as a macro that expands to `__mlibc_gnu_basename` or `__mlibc_gnu_basename_c`, so we undefine it.
  15. #if defined(AK_OS_LINUX) && defined(basename)
  16. # undef basename
  17. #endif
  18. namespace URL {
  19. enum class PercentEncodeSet {
  20. C0Control,
  21. Fragment,
  22. Query,
  23. SpecialQuery,
  24. Path,
  25. Userinfo,
  26. Component,
  27. ApplicationXWWWFormUrlencoded,
  28. EncodeURI
  29. };
  30. enum class ExcludeFragment {
  31. No,
  32. Yes
  33. };
  34. // https://url.spec.whatwg.org/#concept-ipv4
  35. // An IPv4 address is a 32-bit unsigned integer that identifies a network address. [RFC791]
  36. // FIXME: It would be nice if this were an AK::IPv4Address
  37. using IPv4Address = u32;
  38. // https://url.spec.whatwg.org/#concept-ipv6
  39. // An IPv6 address is a 128-bit unsigned integer that identifies a network address. For the purposes of this standard
  40. // it is represented as a list of eight 16-bit unsigned integers, also known as IPv6 pieces. [RFC4291]
  41. // FIXME: It would be nice if this were an AK::IPv6Address
  42. using IPv6Address = Array<u16, 8>;
  43. // https://url.spec.whatwg.org/#concept-host
  44. // A host is a domain, an IP address, an opaque host, or an empty host. Typically a host serves as a network address,
  45. // but it is sometimes used as opaque identifier in URLs where a network address is not necessary.
  46. using Host = Variant<IPv4Address, IPv6Address, String, Empty>;
  47. // https://w3c.github.io/FileAPI/#blob-url-entry
  48. // NOTE: This represents the raw bytes behind a 'Blob' (and does not yet support a MediaSourceQuery).
  49. struct BlobURLEntry {
  50. String type;
  51. ByteBuffer byte_buffer;
  52. };
  53. void append_percent_encoded_if_necessary(StringBuilder&, u32 code_point, PercentEncodeSet set = PercentEncodeSet::Userinfo);
  54. void append_percent_encoded(StringBuilder&, u32 code_point);
  55. bool code_point_is_in_percent_encode_set(u32 code_point, PercentEncodeSet);
  56. Optional<u16> default_port_for_scheme(StringView);
  57. bool is_special_scheme(StringView);
  58. enum class SpaceAsPlus {
  59. No,
  60. Yes,
  61. };
  62. String percent_encode(StringView input, PercentEncodeSet set = PercentEncodeSet::Userinfo, SpaceAsPlus = SpaceAsPlus::No);
  63. ByteString percent_decode(StringView input);
  64. // https://url.spec.whatwg.org/#url-representation
  65. // A URL is a struct that represents a universal identifier. To disambiguate from a valid URL string it can also be referred to as a URL record.
  66. class URL {
  67. friend class Parser;
  68. public:
  69. URL() = default;
  70. URL(StringView);
  71. URL(ByteString const& string)
  72. : URL(string.view())
  73. {
  74. }
  75. URL(String const& string)
  76. : URL(string.bytes_as_string_view())
  77. {
  78. }
  79. bool is_valid() const { return m_data->valid; }
  80. String const& scheme() const { return m_data->scheme; }
  81. String const& username() const { return m_data->username; }
  82. String const& password() const { return m_data->password; }
  83. Host const& host() const { return m_data->host; }
  84. ErrorOr<String> serialized_host() const;
  85. ByteString basename() const;
  86. Optional<String> const& query() const { return m_data->query; }
  87. Optional<String> const& fragment() const { return m_data->fragment; }
  88. Optional<u16> port() const { return m_data->port; }
  89. ByteString path_segment_at_index(size_t index) const;
  90. size_t path_segment_count() const { return m_data->paths.size(); }
  91. u16 port_or_default() const { return m_data->port.value_or(default_port_for_scheme(m_data->scheme).value_or(0)); }
  92. bool cannot_be_a_base_url() const { return m_data->cannot_be_a_base_url; }
  93. bool cannot_have_a_username_or_password_or_port() const;
  94. bool includes_credentials() const { return !m_data->username.is_empty() || !m_data->password.is_empty(); }
  95. bool is_special() const { return is_special_scheme(m_data->scheme); }
  96. void set_scheme(String);
  97. void set_username(StringView);
  98. void set_password(StringView);
  99. void set_host(Host);
  100. void set_port(Optional<u16>);
  101. void set_paths(Vector<ByteString> const&);
  102. Vector<String> const& paths() const { return m_data->paths; }
  103. void set_query(Optional<String> query) { m_data->query = move(query); }
  104. void set_fragment(Optional<String> fragment) { m_data->fragment = move(fragment); }
  105. void set_cannot_be_a_base_url(bool value) { m_data->cannot_be_a_base_url = value; }
  106. void append_path(StringView);
  107. void append_slash()
  108. {
  109. // NOTE: To indicate that we want to end the path with a slash, we have to append an empty path segment.
  110. m_data->paths.append(String {});
  111. }
  112. String serialize_path() const;
  113. ByteString serialize(ExcludeFragment = ExcludeFragment::No) const;
  114. ByteString serialize_for_display() const;
  115. ByteString to_byte_string() const { return serialize(); }
  116. ErrorOr<String> to_string() const;
  117. // HTML origin
  118. ByteString serialize_origin() const;
  119. bool equals(URL const& other, ExcludeFragment = ExcludeFragment::No) const;
  120. URL complete_url(StringView) const;
  121. [[nodiscard]] bool operator==(URL const& other) const
  122. {
  123. if (m_data.ptr() == other.m_data.ptr())
  124. return true;
  125. return equals(other, ExcludeFragment::No);
  126. }
  127. Optional<BlobURLEntry> const& blob_url_entry() const { return m_data->blob_url_entry; }
  128. void set_blob_url_entry(Optional<BlobURLEntry> entry) { m_data->blob_url_entry = move(entry); }
  129. private:
  130. bool compute_validity() const;
  131. struct Data : public RefCounted<Data> {
  132. NonnullRefPtr<Data> clone()
  133. {
  134. auto clone = adopt_ref(*new Data);
  135. clone->valid = valid;
  136. clone->scheme = scheme;
  137. clone->username = username;
  138. clone->password = password;
  139. clone->host = host;
  140. clone->port = port;
  141. clone->paths = paths;
  142. clone->query = query;
  143. clone->fragment = fragment;
  144. clone->cannot_be_a_base_url = cannot_be_a_base_url;
  145. clone->blob_url_entry = blob_url_entry;
  146. return clone;
  147. }
  148. bool valid { false };
  149. // A URL’s scheme is an ASCII string that identifies the type of URL and can be used to dispatch a URL for further processing after parsing. It is initially the empty string.
  150. String scheme;
  151. // A URL’s username is an ASCII string identifying a username. It is initially the empty string.
  152. String username;
  153. // A URL’s password is an ASCII string identifying a password. It is initially the empty string.
  154. String password;
  155. // A URL’s host is null or a host. It is initially null.
  156. Host host;
  157. // A URL’s port is either null or a 16-bit unsigned integer that identifies a networking port. It is initially null.
  158. Optional<u16> port;
  159. // A URL’s path is either a URL path segment or a list of zero or more URL path segments, usually identifying a location. It is initially « ».
  160. // A URL path segment is an ASCII string. It commonly refers to a directory or a file, but has no predefined meaning.
  161. Vector<String> paths;
  162. // A URL’s query is either null or an ASCII string. It is initially null.
  163. Optional<String> query;
  164. // A URL’s fragment is either null or an ASCII string that can be used for further processing on the resource the URL’s other components identify. It is initially null.
  165. Optional<String> fragment;
  166. bool cannot_be_a_base_url { false };
  167. // https://url.spec.whatwg.org/#concept-url-blob-entry
  168. // A URL also has an associated blob URL entry that is either null or a blob URL entry. It is initially null.
  169. Optional<BlobURLEntry> blob_url_entry;
  170. };
  171. AK::CopyOnWrite<Data> m_data;
  172. };
  173. URL create_with_url_or_path(ByteString const&);
  174. URL create_with_file_scheme(ByteString const& path, ByteString const& fragment = {}, ByteString const& hostname = {});
  175. URL create_with_data(StringView mime_type, StringView payload, bool is_base64 = false);
  176. }
  177. template<>
  178. struct AK::Formatter<URL::URL> : AK::Formatter<StringView> {
  179. ErrorOr<void> format(FormatBuilder& builder, URL::URL const& value)
  180. {
  181. return Formatter<StringView>::format(builder, value.serialize());
  182. }
  183. };
  184. template<>
  185. struct AK::Traits<URL::URL> : public AK::DefaultTraits<URL::URL> {
  186. static unsigned hash(URL::URL const& url) { return url.to_byte_string().hash(); }
  187. };