URL.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493
  1. /*
  2. * Copyright (c) 2021, Idan Horowitz <idan.horowitz@serenityos.org>
  3. * Copyright (c) 2021, the SerenityOS developers.
  4. * Copyright (c) 2023, networkException <networkexception@serenityos.org>
  5. *
  6. * SPDX-License-Identifier: BSD-2-Clause
  7. */
  8. #include <AK/IPv4Address.h>
  9. #include <AK/IPv6Address.h>
  10. #include <AK/URLParser.h>
  11. #include <LibWeb/Bindings/Intrinsics.h>
  12. #include <LibWeb/URL/URL.h>
  13. namespace Web::URL {
  14. WebIDL::ExceptionOr<JS::NonnullGCPtr<URL>> URL::create(JS::Realm& realm, AK::URL url, JS::NonnullGCPtr<URLSearchParams> query)
  15. {
  16. return MUST_OR_THROW_OOM(realm.heap().allocate<URL>(realm, realm, move(url), move(query)));
  17. }
  18. // https://url.spec.whatwg.org/#api-url-parser
  19. static Optional<AK::URL> parse_api_url(String const& url, Optional<String> const& base)
  20. {
  21. // FIXME: We somewhat awkwardly have two failure states encapsulated in the return type (and convert between them in the steps),
  22. // ideally we'd get rid of URL's valid flag
  23. // 1. Let parsedBase be null.
  24. Optional<AK::URL> parsed_base;
  25. // 2. If base is non-null:
  26. if (base.has_value()) {
  27. // 1. Set parsedBase to the result of running the basic URL parser on base.
  28. auto parsed_base_url = URLParser::parse(*base);
  29. // 2. If parsedBase is failure, then return failure.
  30. if (!parsed_base_url.is_valid())
  31. return {};
  32. parsed_base = parsed_base_url;
  33. }
  34. // 3. Return the result of running the basic URL parser on url with parsedBase.
  35. auto parsed = URLParser::parse(url, parsed_base);
  36. return parsed.is_valid() ? parsed : Optional<AK::URL> {};
  37. }
  38. // https://url.spec.whatwg.org/#dom-url-url
  39. WebIDL::ExceptionOr<JS::NonnullGCPtr<URL>> URL::construct_impl(JS::Realm& realm, String const& url, Optional<String> const& base)
  40. {
  41. auto& vm = realm.vm();
  42. // 1. Let parsedURL be the result of running the API URL parser on url with base, if given.
  43. auto parsed_url = parse_api_url(url, base);
  44. // 2. If parsedURL is failure, then throw a TypeError.
  45. if (!parsed_url.has_value())
  46. return WebIDL::SimpleException { WebIDL::SimpleExceptionType::TypeError, "Invalid URL"sv };
  47. // 3. Let query be parsedURL’s query, if that is non-null, and the empty string otherwise.
  48. auto query = parsed_url->query().is_null() ? String {} : TRY_OR_THROW_OOM(vm, String::from_deprecated_string(parsed_url->query()));
  49. // 4. Set this’s URL to parsedURL.
  50. // 5. Set this’s query object to a new URLSearchParams object.
  51. auto query_object = MUST(URLSearchParams::construct_impl(realm, query));
  52. // 6. Initialize this’s query object with query.
  53. auto result_url = TRY(URL::create(realm, parsed_url.release_value(), move(query_object)));
  54. // 7. Set this’s query object’s URL object to this.
  55. result_url->m_query->m_url = result_url;
  56. return result_url;
  57. }
  58. URL::URL(JS::Realm& realm, AK::URL url, JS::NonnullGCPtr<URLSearchParams> query)
  59. : PlatformObject(realm)
  60. , m_url(move(url))
  61. , m_query(move(query))
  62. {
  63. }
  64. URL::~URL() = default;
  65. JS::ThrowCompletionOr<void> URL::initialize(JS::Realm& realm)
  66. {
  67. MUST_OR_THROW_OOM(Base::initialize(realm));
  68. set_prototype(&Bindings::ensure_web_prototype<Bindings::URLPrototype>(realm, "URL"));
  69. return {};
  70. }
  71. void URL::visit_edges(Cell::Visitor& visitor)
  72. {
  73. Base::visit_edges(visitor);
  74. visitor.visit(m_query.ptr());
  75. }
  76. // https://url.spec.whatwg.org/#dom-url-canparse
  77. bool URL::can_parse(JS::VM&, String const& url, Optional<String> const& base)
  78. {
  79. // 1. Let parsedURL be the result of running the API URL parser on url with base, if given.
  80. auto parsed_url = parse_api_url(url, base);
  81. // 2. If parsedURL is failure, then return false.
  82. if (!parsed_url.has_value())
  83. return false;
  84. // 3. Return true.
  85. return true;
  86. }
  87. // https://url.spec.whatwg.org/#dom-url-href
  88. WebIDL::ExceptionOr<String> URL::href() const
  89. {
  90. auto& vm = realm().vm();
  91. // The href getter steps and the toJSON() method steps are to return the serialization of this’s URL.
  92. return TRY_OR_THROW_OOM(vm, String::from_deprecated_string(m_url.serialize()));
  93. }
  94. // https://url.spec.whatwg.org/#dom-url-tojson
  95. WebIDL::ExceptionOr<String> URL::to_json() const
  96. {
  97. auto& vm = realm().vm();
  98. // The href getter steps and the toJSON() method steps are to return the serialization of this’s URL.
  99. return TRY_OR_THROW_OOM(vm, String::from_deprecated_string(m_url.serialize()));
  100. }
  101. // https://url.spec.whatwg.org/#ref-for-dom-url-href②
  102. WebIDL::ExceptionOr<void> URL::set_href(String const& href)
  103. {
  104. auto& vm = realm().vm();
  105. // 1. Let parsedURL be the result of running the basic URL parser on the given value.
  106. AK::URL parsed_url = href;
  107. // 2. If parsedURL is failure, then throw a TypeError.
  108. if (!parsed_url.is_valid())
  109. return WebIDL::SimpleException { WebIDL::SimpleExceptionType::TypeError, "Invalid URL"sv };
  110. // 3. Set this’s URL to parsedURL.
  111. m_url = move(parsed_url);
  112. // 4. Empty this’s query object’s list.
  113. m_query->m_list.clear();
  114. // 5. Let query be this’s URL’s query.
  115. auto query = m_url.query();
  116. // 6. If query is non-null, then set this’s query object’s list to the result of parsing query.
  117. if (!query.is_null())
  118. m_query->m_list = TRY_OR_THROW_OOM(vm, url_decode(query));
  119. return {};
  120. }
  121. // https://url.spec.whatwg.org/#dom-url-origin
  122. WebIDL::ExceptionOr<String> URL::origin() const
  123. {
  124. auto& vm = realm().vm();
  125. // The origin getter steps are to return the serialization of this’s URL’s origin. [HTML]
  126. return TRY_OR_THROW_OOM(vm, String::from_deprecated_string(m_url.serialize_origin()));
  127. }
  128. // https://url.spec.whatwg.org/#dom-url-protocol
  129. WebIDL::ExceptionOr<String> URL::protocol() const
  130. {
  131. auto& vm = realm().vm();
  132. // The protocol getter steps are to return this’s URL’s scheme, followed by U+003A (:).
  133. return TRY_OR_THROW_OOM(vm, String::formatted("{}:", m_url.scheme()));
  134. }
  135. // https://url.spec.whatwg.org/#ref-for-dom-url-protocol%E2%91%A0
  136. WebIDL::ExceptionOr<void> URL::set_protocol(String const& protocol)
  137. {
  138. auto& vm = realm().vm();
  139. // The protocol setter steps are to basic URL parse the given value, followed by U+003A (:), with this’s URL as
  140. // url and scheme start state as state override.
  141. auto result_url = URLParser::parse(TRY_OR_THROW_OOM(vm, String::formatted("{}:", protocol)), {}, m_url, URLParser::State::SchemeStart);
  142. if (result_url.is_valid())
  143. m_url = move(result_url);
  144. return {};
  145. }
  146. // https://url.spec.whatwg.org/#dom-url-username
  147. WebIDL::ExceptionOr<String> URL::username() const
  148. {
  149. auto& vm = realm().vm();
  150. // The username getter steps are to return this’s URL’s username.
  151. return TRY_OR_THROW_OOM(vm, String::from_deprecated_string(m_url.username()));
  152. }
  153. // https://url.spec.whatwg.org/#ref-for-dom-url-username%E2%91%A0
  154. void URL::set_username(String const& username)
  155. {
  156. // 1. If this’s URL cannot have a username/password/port, then return.
  157. if (m_url.cannot_have_a_username_or_password_or_port())
  158. return;
  159. // 2. Set the username given this’s URL and the given value.
  160. m_url.set_username(username.to_deprecated_string(), AK::URL::ApplyPercentEncoding::Yes);
  161. }
  162. // https://url.spec.whatwg.org/#dom-url-password
  163. WebIDL::ExceptionOr<String> URL::password() const
  164. {
  165. auto& vm = realm().vm();
  166. // The password getter steps are to return this’s URL’s password.
  167. return TRY_OR_THROW_OOM(vm, String::from_deprecated_string(m_url.password()));
  168. }
  169. // https://url.spec.whatwg.org/#ref-for-dom-url-password%E2%91%A0
  170. void URL::set_password(String const& password)
  171. {
  172. // 1. If this’s URL cannot have a username/password/port, then return.
  173. if (m_url.cannot_have_a_username_or_password_or_port())
  174. return;
  175. // 2. Set the password given this’s URL and the given value.
  176. m_url.set_password(password.to_deprecated_string(), AK::URL::ApplyPercentEncoding::Yes);
  177. }
  178. // https://url.spec.whatwg.org/#dom-url-host
  179. WebIDL::ExceptionOr<String> URL::host() const
  180. {
  181. auto& vm = realm().vm();
  182. // 1. Let url be this’s URL.
  183. auto& url = m_url;
  184. // 2. If url’s host is null, then return the empty string.
  185. if (url.host().is_null())
  186. return String {};
  187. // 3. If url’s port is null, return url’s host, serialized.
  188. if (!url.port().has_value())
  189. return TRY_OR_THROW_OOM(vm, String::from_deprecated_string(url.host()));
  190. // 4. Return url’s host, serialized, followed by U+003A (:) and url’s port, serialized.
  191. return TRY_OR_THROW_OOM(vm, String::formatted("{}:{}", url.host(), *url.port()));
  192. }
  193. // https://url.spec.whatwg.org/#dom-url-hostref-for-dom-url-host%E2%91%A0
  194. void URL::set_host(String const& host)
  195. {
  196. // 1. If this’s URL’s cannot-be-a-base-URL is true, then return.
  197. if (m_url.cannot_be_a_base_url())
  198. return;
  199. // 2. Basic URL parse the given value with this’s URL as url and host state as state override.
  200. auto result_url = URLParser::parse(host, {}, m_url, URLParser::State::Host);
  201. if (result_url.is_valid())
  202. m_url = move(result_url);
  203. }
  204. // https://url.spec.whatwg.org/#dom-url-hostname
  205. WebIDL::ExceptionOr<String> URL::hostname() const
  206. {
  207. auto& vm = realm().vm();
  208. // 1. If this’s URL’s host is null, then return the empty string.
  209. if (m_url.host().is_null())
  210. return String {};
  211. // 2. Return this’s URL’s host, serialized.
  212. return TRY_OR_THROW_OOM(vm, String::from_deprecated_string(m_url.host()));
  213. }
  214. // https://url.spec.whatwg.org/#ref-for-dom-url-hostname①
  215. void URL::set_hostname(String const& hostname)
  216. {
  217. // 1. If this’s URL’s cannot-be-a-base-URL is true, then return.
  218. if (m_url.cannot_be_a_base_url())
  219. return;
  220. // 2. Basic URL parse the given value with this’s URL as url and hostname state as state override.
  221. auto result_url = URLParser::parse(hostname, {}, m_url, URLParser::State::Hostname);
  222. if (result_url.is_valid())
  223. m_url = move(result_url);
  224. }
  225. // https://url.spec.whatwg.org/#ref-for-dom-url-hostname①
  226. WebIDL::ExceptionOr<String> URL::port() const
  227. {
  228. auto& vm = realm().vm();
  229. // 1. If this’s URL’s port is null, then return the empty string.
  230. if (!m_url.port().has_value())
  231. return String {};
  232. // 2. Return this’s URL’s port, serialized.
  233. return TRY_OR_THROW_OOM(vm, String::formatted("{}", *m_url.port()));
  234. }
  235. // https://url.spec.whatwg.org/#ref-for-dom-url-port%E2%91%A0
  236. void URL::set_port(String const& port)
  237. {
  238. // 1. If this’s URL cannot have a username/password/port, then return.
  239. if (m_url.cannot_have_a_username_or_password_or_port())
  240. return;
  241. // 2. If the given value is the empty string, then set this’s URL’s port to null.
  242. if (port.is_empty()) {
  243. m_url.set_port({});
  244. }
  245. // 3. Otherwise, basic URL parse the given value with this’s URL as url and port state as state override.
  246. else {
  247. auto result_url = URLParser::parse(port, {}, m_url, URLParser::State::Port);
  248. if (result_url.is_valid())
  249. m_url = move(result_url);
  250. }
  251. }
  252. // https://url.spec.whatwg.org/#dom-url-pathname
  253. WebIDL::ExceptionOr<String> URL::pathname() const
  254. {
  255. auto& vm = realm().vm();
  256. // The pathname getter steps are to return the result of URL path serializing this’s URL.
  257. return TRY_OR_THROW_OOM(vm, String::from_deprecated_string(m_url.serialize_path()));
  258. }
  259. // https://url.spec.whatwg.org/#ref-for-dom-url-pathname%E2%91%A0
  260. void URL::set_pathname(String const& pathname)
  261. {
  262. // FIXME: These steps no longer match the speci.
  263. // 1. If this’s URL’s cannot-be-a-base-URL is true, then return.
  264. if (m_url.cannot_be_a_base_url())
  265. return;
  266. // 2. Empty this’s URL’s path.
  267. auto url = m_url; // We copy the URL here to follow other browser's behaviour of reverting the path change if the parse failed.
  268. url.set_paths({});
  269. // 3. Basic URL parse the given value with this’s URL as url and path start state as state override.
  270. auto result_url = URLParser::parse(pathname, {}, move(url), URLParser::State::PathStart);
  271. if (result_url.is_valid())
  272. m_url = move(result_url);
  273. }
  274. // https://url.spec.whatwg.org/#dom-url-search
  275. WebIDL::ExceptionOr<String> URL::search() const
  276. {
  277. auto& vm = realm().vm();
  278. // 1. If this’s URL’s query is either null or the empty string, then return the empty string.
  279. if (m_url.query().is_null() || m_url.query().is_empty())
  280. return String {};
  281. // 2. Return U+003F (?), followed by this’s URL’s query.
  282. return TRY_OR_THROW_OOM(vm, String::formatted("?{}", m_url.query()));
  283. }
  284. // https://url.spec.whatwg.org/#ref-for-dom-url-search%E2%91%A0
  285. WebIDL::ExceptionOr<void> URL::set_search(String const& search)
  286. {
  287. auto& vm = realm().vm();
  288. // 1. Let url be this’s URL.
  289. auto& url = m_url;
  290. // 2. If the given value is the empty string:
  291. if (search.is_empty()) {
  292. // 1. Set url’s query to null.
  293. url.set_query({});
  294. // 2. Empty this’s query object’s list.
  295. m_query->m_list.clear();
  296. // FIXME: 3. Potentially strip trailing spaces from an opaque path with this.
  297. // 4. Return.
  298. return {};
  299. }
  300. // 3. Let input be the given value with a single leading U+003F (?) removed, if any.
  301. auto search_as_string_view = search.bytes_as_string_view();
  302. auto input = search_as_string_view.substring_view(search_as_string_view.starts_with('?'));
  303. // 4. Set url’s query to the empty string.
  304. auto url_copy = url; // We copy the URL here to follow other browser's behaviour of reverting the search change if the parse failed.
  305. url_copy.set_query(DeprecatedString::empty());
  306. // 5. Basic URL parse input with url as url and query state as state override.
  307. auto result_url = URLParser::parse(input, {}, move(url_copy), URLParser::State::Query);
  308. if (result_url.is_valid()) {
  309. m_url = move(result_url);
  310. // 6. Set this’s query object’s list to the result of parsing input.
  311. m_query->m_list = TRY_OR_THROW_OOM(vm, url_decode(input));
  312. }
  313. return {};
  314. }
  315. // https://url.spec.whatwg.org/#dom-url-searchparams
  316. JS::NonnullGCPtr<URLSearchParams const> URL::search_params() const
  317. {
  318. // The searchParams getter steps are to return this’s query object.
  319. return m_query;
  320. }
  321. // https://url.spec.whatwg.org/#dom-url-hash
  322. WebIDL::ExceptionOr<String> URL::hash() const
  323. {
  324. auto& vm = realm().vm();
  325. // 1. If this’s URL’s fragment is either null or the empty string, then return the empty string.
  326. if (m_url.fragment().is_null() || m_url.fragment().is_empty())
  327. return String {};
  328. // 2. Return U+0023 (#), followed by this’s URL’s fragment.
  329. return TRY_OR_THROW_OOM(vm, String::formatted("#{}", m_url.fragment()));
  330. }
  331. // https://url.spec.whatwg.org/#ref-for-dom-url-hash%E2%91%A0
  332. void URL::set_hash(String const& hash)
  333. {
  334. // 1. If the given value is the empty string:
  335. if (hash.is_empty()) {
  336. // 1. Set this’s URL’s fragment to null.
  337. m_url.set_fragment({});
  338. // FIXME: 2. Potentially strip trailing spaces from an opaque path with this.
  339. // 3. Return.
  340. return;
  341. }
  342. // 2. Let input be the given value with a single leading U+0023 (#) removed, if any.
  343. auto hash_as_string_view = hash.bytes_as_string_view();
  344. auto input = hash_as_string_view.substring_view(hash_as_string_view.starts_with('#'));
  345. // 3. Set this’s URL’s fragment to the empty string.
  346. auto url = m_url; // We copy the URL here to follow other browser's behaviour of reverting the hash change if the parse failed.
  347. url.set_fragment(DeprecatedString::empty());
  348. // 4. Basic URL parse input with this’s URL as url and fragment state as state override.
  349. auto result_url = URLParser::parse(input, {}, move(url), URLParser::State::Fragment);
  350. if (result_url.is_valid())
  351. m_url = move(result_url);
  352. }
  353. // https://url.spec.whatwg.org/#concept-url-origin
  354. HTML::Origin url_origin(AK::URL const& url)
  355. {
  356. // FIXME: We should probably have an extended version of AK::URL for LibWeb instead of standalone functions like this.
  357. // The origin of a URL url is the origin returned by running these steps, switching on url’s scheme:
  358. // -> "blob"
  359. if (url.scheme() == "blob"sv) {
  360. // FIXME: Support 'blob://' URLs
  361. return HTML::Origin {};
  362. }
  363. // -> "ftp"
  364. // -> "http"
  365. // -> "https"
  366. // -> "ws"
  367. // -> "wss"
  368. if (url.scheme().is_one_of("ftp"sv, "http"sv, "https"sv, "ws"sv, "wss"sv)) {
  369. // Return the tuple origin (url’s scheme, url’s host, url’s port, null).
  370. return HTML::Origin(url.scheme(), url.host(), url.port().value_or(0));
  371. }
  372. // -> "file"
  373. if (url.scheme() == "file"sv) {
  374. // Unfortunate as it is, this is left as an exercise to the reader. When in doubt, return a new opaque origin.
  375. // Note: We must return an origin with the `file://' protocol for `file://' iframes to work from `file://' pages.
  376. return HTML::Origin(url.scheme(), DeprecatedString(), 0);
  377. }
  378. // -> Otherwise
  379. // Return a new opaque origin.
  380. return HTML::Origin {};
  381. }
  382. // https://url.spec.whatwg.org/#concept-domain
  383. bool host_is_domain(StringView host)
  384. {
  385. // A domain is a non-empty ASCII string that identifies a realm within a network.
  386. return !host.is_empty()
  387. && !IPv4Address::from_string(host).has_value()
  388. && !IPv6Address::from_string(host).has_value();
  389. }
  390. }