URL.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361
  1. /*
  2. * Copyright (c) 2021, Idan Horowitz <idan.horowitz@serenityos.org>
  3. * Copyright (c) 2021, the SerenityOS developers.
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <AK/IPv4Address.h>
  8. #include <AK/IPv6Address.h>
  9. #include <AK/URLParser.h>
  10. #include <LibWeb/Bindings/Intrinsics.h>
  11. #include <LibWeb/URL/URL.h>
  12. namespace Web::URL {
  13. WebIDL::ExceptionOr<JS::NonnullGCPtr<URL>> URL::create(JS::Realm& realm, AK::URL url, JS::NonnullGCPtr<URLSearchParams> query)
  14. {
  15. return MUST_OR_THROW_OOM(realm.heap().allocate<URL>(realm, realm, move(url), move(query)));
  16. }
  17. WebIDL::ExceptionOr<JS::NonnullGCPtr<URL>> URL::construct_impl(JS::Realm& realm, DeprecatedString const& url, DeprecatedString const& base)
  18. {
  19. // 1. Let parsedBase be null.
  20. Optional<AK::URL> parsed_base;
  21. // 2. If base is given, then:
  22. if (!base.is_null()) {
  23. // 1. Let parsedBase be the result of running the basic URL parser on base.
  24. parsed_base = base;
  25. // 2. If parsedBase is failure, then throw a TypeError.
  26. if (!parsed_base->is_valid())
  27. return WebIDL::SimpleException { WebIDL::SimpleExceptionType::TypeError, "Invalid base URL"sv };
  28. }
  29. // 3. Let parsedURL be the result of running the basic URL parser on url with parsedBase.
  30. AK::URL parsed_url;
  31. if (parsed_base.has_value())
  32. parsed_url = parsed_base->complete_url(url);
  33. else
  34. parsed_url = url;
  35. // 4. If parsedURL is failure, then throw a TypeError.
  36. if (!parsed_url.is_valid())
  37. return WebIDL::SimpleException { WebIDL::SimpleExceptionType::TypeError, "Invalid URL"sv };
  38. // 5. Let query be parsedURL’s query, if that is non-null, and the empty string otherwise.
  39. auto& query = parsed_url.query().is_null() ? DeprecatedString::empty() : parsed_url.query();
  40. // 6. Set this’s URL to parsedURL.
  41. // 7. Set this’s query object to a new URLSearchParams object.
  42. auto query_object = MUST(URLSearchParams::construct_impl(realm, query));
  43. // 8. Initialize this’s query object with query.
  44. auto result_url = TRY(URL::create(realm, move(parsed_url), move(query_object)));
  45. // 9. Set this’s query object’s URL object to this.
  46. result_url->m_query->m_url = result_url;
  47. return result_url;
  48. }
  49. URL::URL(JS::Realm& realm, AK::URL url, JS::NonnullGCPtr<URLSearchParams> query)
  50. : PlatformObject(realm)
  51. , m_url(move(url))
  52. , m_query(move(query))
  53. {
  54. }
  55. URL::~URL() = default;
  56. JS::ThrowCompletionOr<void> URL::initialize(JS::Realm& realm)
  57. {
  58. MUST_OR_THROW_OOM(Base::initialize(realm));
  59. set_prototype(&Bindings::ensure_web_prototype<Bindings::URLPrototype>(realm, "URL"));
  60. return {};
  61. }
  62. void URL::visit_edges(Cell::Visitor& visitor)
  63. {
  64. Base::visit_edges(visitor);
  65. visitor.visit(m_query.ptr());
  66. }
  67. DeprecatedString URL::href() const
  68. {
  69. // return the serialization of this’s URL.
  70. return m_url.serialize();
  71. }
  72. DeprecatedString URL::to_json() const
  73. {
  74. // return the serialization of this’s URL.
  75. return m_url.serialize();
  76. }
  77. WebIDL::ExceptionOr<void> URL::set_href(DeprecatedString const& href)
  78. {
  79. // 1. Let parsedURL be the result of running the basic URL parser on the given value.
  80. AK::URL parsed_url = href;
  81. // 2. If parsedURL is failure, then throw a TypeError.
  82. if (!parsed_url.is_valid())
  83. return WebIDL::SimpleException { WebIDL::SimpleExceptionType::TypeError, "Invalid URL"sv };
  84. // 3. Set this’s URL to parsedURL.
  85. m_url = move(parsed_url);
  86. // 4. Empty this’s query object’s list.
  87. m_query->m_list.clear();
  88. // 5. Let query be this’s URL’s query.
  89. auto& query = m_url.query();
  90. // 6. If query is non-null, then set this’s query object’s list to the result of parsing query.
  91. if (!query.is_null())
  92. m_query->m_list = url_decode(query);
  93. return {};
  94. }
  95. DeprecatedString URL::origin() const
  96. {
  97. // return the serialization of this’s URL’s origin.
  98. return m_url.serialize_origin();
  99. }
  100. DeprecatedString URL::protocol() const
  101. {
  102. // return this’s URL’s scheme, followed by U+003A (:).
  103. return DeprecatedString::formatted("{}:", m_url.scheme());
  104. }
  105. void URL::set_protocol(DeprecatedString const& protocol)
  106. {
  107. // basic URL parse the given value, followed by U+003A (:), with this’s URL as url and scheme start state as state override.
  108. auto result_url = URLParser::parse(DeprecatedString::formatted("{}:", protocol), nullptr, m_url, URLParser::State::SchemeStart);
  109. if (result_url.is_valid())
  110. m_url = move(result_url);
  111. }
  112. DeprecatedString URL::username() const
  113. {
  114. // return this’s URL’s username.
  115. return m_url.username();
  116. }
  117. void URL::set_username(DeprecatedString const& username)
  118. {
  119. // 1. If this’s URL cannot have a username/password/port, then return.
  120. if (m_url.cannot_have_a_username_or_password_or_port())
  121. return;
  122. // 2. Set the username given this’s URL and the given value.
  123. m_url.set_username(AK::URL::percent_encode(username, AK::URL::PercentEncodeSet::Userinfo));
  124. }
  125. DeprecatedString URL::password() const
  126. {
  127. // return this’s URL’s password.
  128. return m_url.password();
  129. }
  130. void URL::set_password(DeprecatedString const& password)
  131. {
  132. // 1. If this’s URL cannot have a username/password/port, then return.
  133. if (m_url.cannot_have_a_username_or_password_or_port())
  134. return;
  135. // 2. Set the password given this’s URL and the given value.
  136. m_url.set_password(AK::URL::percent_encode(password, AK::URL::PercentEncodeSet::Userinfo));
  137. }
  138. DeprecatedString URL::host() const
  139. {
  140. // 1. Let url be this’s URL.
  141. auto& url = m_url;
  142. // 2. If url’s host is null, then return the empty string.
  143. if (url.host().is_null())
  144. return DeprecatedString::empty();
  145. // 3. If url’s port is null, return url’s host, serialized.
  146. if (!url.port().has_value())
  147. return url.host();
  148. // 4. Return url’s host, serialized, followed by U+003A (:) and url’s port, serialized.
  149. return DeprecatedString::formatted("{}:{}", url.host(), *url.port());
  150. }
  151. void URL::set_host(DeprecatedString const& host)
  152. {
  153. // 1. If this’s URL’s cannot-be-a-base-URL is true, then return.
  154. if (m_url.cannot_be_a_base_url())
  155. return;
  156. // 2. Basic URL parse the given value with this’s URL as url and host state as state override.
  157. auto result_url = URLParser::parse(host, nullptr, m_url, URLParser::State::Host);
  158. if (result_url.is_valid())
  159. m_url = move(result_url);
  160. }
  161. DeprecatedString URL::hostname() const
  162. {
  163. // 1. If this’s URL’s host is null, then return the empty string.
  164. if (m_url.host().is_null())
  165. return DeprecatedString::empty();
  166. // 2. Return this’s URL’s host, serialized.
  167. return m_url.host();
  168. }
  169. void URL::set_hostname(DeprecatedString const& hostname)
  170. {
  171. // 1. If this’s URL’s cannot-be-a-base-URL is true, then return.
  172. if (m_url.cannot_be_a_base_url())
  173. return;
  174. // 2. Basic URL parse the given value with this’s URL as url and hostname state as state override.
  175. auto result_url = URLParser::parse(hostname, nullptr, m_url, URLParser::State::Hostname);
  176. if (result_url.is_valid())
  177. m_url = move(result_url);
  178. }
  179. DeprecatedString URL::port() const
  180. {
  181. // 1. If this’s URL’s port is null, then return the empty string.
  182. if (!m_url.port().has_value())
  183. return {};
  184. // 2. Return this’s URL’s port, serialized.
  185. return DeprecatedString::formatted("{}", *m_url.port());
  186. }
  187. void URL::set_port(DeprecatedString const& port)
  188. {
  189. // 1. If this’s URL cannot have a username/password/port, then return.
  190. if (m_url.cannot_have_a_username_or_password_or_port())
  191. return;
  192. // 2. If the given value is the empty string, then set this’s URL’s port to null.
  193. if (port.is_empty()) {
  194. m_url.set_port({});
  195. return;
  196. }
  197. // 3. Otherwise, basic URL parse the given value with this’s URL as url and port state as state override.
  198. auto result_url = URLParser::parse(port, nullptr, m_url, URLParser::State::Port);
  199. if (result_url.is_valid())
  200. m_url = move(result_url);
  201. }
  202. DeprecatedString URL::pathname() const
  203. {
  204. // 1. If this’s URL’s cannot-be-a-base-URL is true, then return this’s URL’s path[0].
  205. // 2. If this’s URL’s path is empty, then return the empty string.
  206. // 3. Return U+002F (/), followed by the strings in this’s URL’s path (including empty strings), if any, separated from each other by U+002F (/).
  207. return m_url.path();
  208. }
  209. void URL::set_pathname(DeprecatedString const& pathname)
  210. {
  211. // 1. If this’s URL’s cannot-be-a-base-URL is true, then return.
  212. if (m_url.cannot_be_a_base_url())
  213. return;
  214. // 2. Empty this’s URL’s path.
  215. auto url = m_url; // We copy the URL here to follow other browser's behaviour of reverting the path change if the parse failed.
  216. url.set_paths({});
  217. // 3. Basic URL parse the given value with this’s URL as url and path start state as state override.
  218. auto result_url = URLParser::parse(pathname, nullptr, move(url), URLParser::State::PathStart);
  219. if (result_url.is_valid())
  220. m_url = move(result_url);
  221. }
  222. DeprecatedString URL::search() const
  223. {
  224. // 1. If this’s URL’s query is either null or the empty string, then return the empty string.
  225. if (m_url.query().is_null() || m_url.query().is_empty())
  226. return DeprecatedString::empty();
  227. // 2. Return U+003F (?), followed by this’s URL’s query.
  228. return DeprecatedString::formatted("?{}", m_url.query());
  229. }
  230. void URL::set_search(DeprecatedString const& search)
  231. {
  232. // 1. Let url be this’s URL.
  233. auto& url = m_url;
  234. // If the given value is the empty string, set url’s query to null, empty this’s query object’s list, and then return.
  235. if (search.is_empty()) {
  236. url.set_query({});
  237. m_query->m_list.clear();
  238. return;
  239. }
  240. // 2. Let input be the given value with a single leading U+003F (?) removed, if any.
  241. auto input = search.substring_view(search.starts_with('?'));
  242. // 3. Set url’s query to the empty string.
  243. auto url_copy = url; // We copy the URL here to follow other browser's behaviour of reverting the search change if the parse failed.
  244. url_copy.set_query(DeprecatedString::empty());
  245. // 4. Basic URL parse input with url as url and query state as state override.
  246. auto result_url = URLParser::parse(input, nullptr, move(url_copy), URLParser::State::Query);
  247. if (result_url.is_valid()) {
  248. m_url = move(result_url);
  249. // 5. Set this’s query object’s list to the result of parsing input.
  250. m_query->m_list = url_decode(input);
  251. }
  252. }
  253. URLSearchParams const* URL::search_params() const
  254. {
  255. return m_query;
  256. }
  257. DeprecatedString URL::hash() const
  258. {
  259. // 1. If this’s URL’s fragment is either null or the empty string, then return the empty string.
  260. if (m_url.fragment().is_null() || m_url.fragment().is_empty())
  261. return DeprecatedString::empty();
  262. // 2. Return U+0023 (#), followed by this’s URL’s fragment.
  263. return DeprecatedString::formatted("#{}", m_url.fragment());
  264. }
  265. void URL::set_hash(DeprecatedString const& hash)
  266. {
  267. // 1. If the given value is the empty string, then set this’s URL’s fragment to null and return.
  268. if (hash.is_empty()) {
  269. m_url.set_fragment({});
  270. return;
  271. }
  272. // 2. Let input be the given value with a single leading U+0023 (#) removed, if any.
  273. auto input = hash.substring_view(hash.starts_with('#'));
  274. // 3. Set this’s URL’s fragment to the empty string.
  275. auto url = m_url; // We copy the URL here to follow other browser's behaviour of reverting the hash change if the parse failed.
  276. url.set_fragment(DeprecatedString::empty());
  277. // 4. Basic URL parse input with this’s URL as url and fragment state as state override.
  278. auto result_url = URLParser::parse(input, nullptr, move(url), URLParser::State::Fragment);
  279. if (result_url.is_valid())
  280. m_url = move(result_url);
  281. }
  282. // https://url.spec.whatwg.org/#concept-url-origin
  283. HTML::Origin url_origin(AK::URL const& url)
  284. {
  285. // FIXME: We should probably have an extended version of AK::URL for LibWeb instead of standalone functions like this.
  286. // The origin of a URL url is the origin returned by running these steps, switching on url’s scheme:
  287. // "blob"
  288. if (url.scheme() == "blob"sv) {
  289. // FIXME: Support 'blob://' URLs
  290. return HTML::Origin {};
  291. }
  292. // "ftp"
  293. // "http"
  294. // "https"
  295. // "ws"
  296. // "wss"
  297. if (url.scheme().is_one_of("ftp"sv, "http"sv, "https"sv, "ws"sv, "wss"sv)) {
  298. // Return the tuple origin (url’s scheme, url’s host, url’s port, null).
  299. return HTML::Origin(url.scheme(), url.host(), url.port().value_or(0));
  300. }
  301. // "file"
  302. if (url.scheme() == "file"sv) {
  303. // Unfortunate as it is, this is left as an exercise to the reader. When in doubt, return a new opaque origin.
  304. // Note: We must return an origin with the `file://' protocol for `file://' iframes to work from `file://' pages.
  305. return HTML::Origin(url.scheme(), DeprecatedString(), 0);
  306. }
  307. // Return a new opaque origin.
  308. return HTML::Origin {};
  309. }
  310. // https://url.spec.whatwg.org/#concept-domain
  311. bool host_is_domain(StringView host)
  312. {
  313. // A domain is a non-empty ASCII string that identifies a realm within a network.
  314. return !host.is_empty()
  315. && !IPv4Address::from_string(host).has_value()
  316. && !IPv6Address::from_string(host).has_value();
  317. }
  318. }