URL.cpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561
  1. /*
  2. * Copyright (c) 2021, Idan Horowitz <idan.horowitz@serenityos.org>
  3. * Copyright (c) 2021, the SerenityOS developers.
  4. * Copyright (c) 2023, networkException <networkexception@serenityos.org>
  5. *
  6. * SPDX-License-Identifier: BSD-2-Clause
  7. */
  8. #include <AK/IPv4Address.h>
  9. #include <AK/IPv6Address.h>
  10. #include <AK/URLParser.h>
  11. #include <LibWeb/Bindings/Intrinsics.h>
  12. #include <LibWeb/FileAPI/Blob.h>
  13. #include <LibWeb/FileAPI/BlobURLStore.h>
  14. #include <LibWeb/URL/URL.h>
  15. namespace Web::URL {
  16. JS::NonnullGCPtr<URL> URL::create(JS::Realm& realm, AK::URL url, JS::NonnullGCPtr<URLSearchParams> query)
  17. {
  18. return realm.heap().allocate<URL>(realm, realm, move(url), move(query));
  19. }
  20. // https://url.spec.whatwg.org/#api-url-parser
  21. static Optional<AK::URL> parse_api_url(String const& url, Optional<String> const& base)
  22. {
  23. // FIXME: We somewhat awkwardly have two failure states encapsulated in the return type (and convert between them in the steps),
  24. // ideally we'd get rid of URL's valid flag
  25. // 1. Let parsedBase be null.
  26. Optional<AK::URL> parsed_base;
  27. // 2. If base is non-null:
  28. if (base.has_value()) {
  29. // 1. Set parsedBase to the result of running the basic URL parser on base.
  30. auto parsed_base_url = URLParser::basic_parse(*base);
  31. // 2. If parsedBase is failure, then return failure.
  32. if (!parsed_base_url.is_valid())
  33. return {};
  34. parsed_base = parsed_base_url;
  35. }
  36. // 3. Return the result of running the basic URL parser on url with parsedBase.
  37. auto parsed = URLParser::basic_parse(url, parsed_base);
  38. return parsed.is_valid() ? parsed : Optional<AK::URL> {};
  39. }
  40. // https://url.spec.whatwg.org/#dom-url-url
  41. WebIDL::ExceptionOr<JS::NonnullGCPtr<URL>> URL::construct_impl(JS::Realm& realm, String const& url, Optional<String> const& base)
  42. {
  43. // 1. Let parsedURL be the result of running the API URL parser on url with base, if given.
  44. auto parsed_url = parse_api_url(url, base);
  45. // 2. If parsedURL is failure, then throw a TypeError.
  46. if (!parsed_url.has_value())
  47. return WebIDL::SimpleException { WebIDL::SimpleExceptionType::TypeError, "Invalid URL"sv };
  48. // 3. Let query be parsedURL’s query, if that is non-null, and the empty string otherwise.
  49. auto query = parsed_url->query().value_or(String {});
  50. // 4. Set this’s URL to parsedURL.
  51. // 5. Set this’s query object to a new URLSearchParams object.
  52. auto query_object = MUST(URLSearchParams::construct_impl(realm, query));
  53. // 6. Initialize this’s query object with query.
  54. auto result_url = URL::create(realm, parsed_url.release_value(), move(query_object));
  55. // 7. Set this’s query object’s URL object to this.
  56. result_url->m_query->m_url = result_url;
  57. return result_url;
  58. }
  59. URL::URL(JS::Realm& realm, AK::URL url, JS::NonnullGCPtr<URLSearchParams> query)
  60. : PlatformObject(realm)
  61. , m_url(move(url))
  62. , m_query(move(query))
  63. {
  64. }
  65. URL::~URL() = default;
  66. void URL::initialize(JS::Realm& realm)
  67. {
  68. Base::initialize(realm);
  69. set_prototype(&Bindings::ensure_web_prototype<Bindings::URLPrototype>(realm, "URL"));
  70. }
  71. void URL::visit_edges(Cell::Visitor& visitor)
  72. {
  73. Base::visit_edges(visitor);
  74. visitor.visit(m_query.ptr());
  75. }
  76. // https://w3c.github.io/FileAPI/#dfn-createObjectURL
  77. WebIDL::ExceptionOr<String> URL::create_object_url(JS::VM& vm, JS::NonnullGCPtr<FileAPI::Blob> object)
  78. {
  79. // The createObjectURL(obj) static method must return the result of adding an entry to the blob URL store for obj.
  80. return TRY_OR_THROW_OOM(vm, FileAPI::add_entry_to_blob_url_store(object));
  81. }
  82. // https://w3c.github.io/FileAPI/#dfn-revokeObjectURL
  83. WebIDL::ExceptionOr<void> URL::revoke_object_url(JS::VM& vm, StringView url)
  84. {
  85. // 1. Let url record be the result of parsing url.
  86. auto url_record = parse(url);
  87. // 2. If url record’s scheme is not "blob", return.
  88. if (url_record.scheme() != "blob"sv)
  89. return {};
  90. // 3. Let origin be the origin of url record.
  91. auto origin = url_origin(url_record);
  92. // 4. Let settings be the current settings object.
  93. auto& settings = HTML::current_settings_object();
  94. // 5. If origin is not same origin with settings’s origin, return.
  95. if (!origin.is_same_origin(settings.origin()))
  96. return {};
  97. // 6. Remove an entry from the Blob URL Store for url.
  98. TRY_OR_THROW_OOM(vm, FileAPI::remove_entry_from_blob_url_store(url));
  99. return {};
  100. }
  101. // https://url.spec.whatwg.org/#dom-url-canparse
  102. bool URL::can_parse(JS::VM&, String const& url, Optional<String> const& base)
  103. {
  104. // 1. Let parsedURL be the result of running the API URL parser on url with base, if given.
  105. auto parsed_url = parse_api_url(url, base);
  106. // 2. If parsedURL is failure, then return false.
  107. if (!parsed_url.has_value())
  108. return false;
  109. // 3. Return true.
  110. return true;
  111. }
  112. // https://url.spec.whatwg.org/#dom-url-href
  113. WebIDL::ExceptionOr<String> URL::href() const
  114. {
  115. auto& vm = realm().vm();
  116. // The href getter steps and the toJSON() method steps are to return the serialization of this’s URL.
  117. return TRY_OR_THROW_OOM(vm, String::from_deprecated_string(m_url.serialize()));
  118. }
  119. // https://url.spec.whatwg.org/#dom-url-tojson
  120. WebIDL::ExceptionOr<String> URL::to_json() const
  121. {
  122. auto& vm = realm().vm();
  123. // The href getter steps and the toJSON() method steps are to return the serialization of this’s URL.
  124. return TRY_OR_THROW_OOM(vm, String::from_deprecated_string(m_url.serialize()));
  125. }
  126. // https://url.spec.whatwg.org/#ref-for-dom-url-href②
  127. WebIDL::ExceptionOr<void> URL::set_href(String const& href)
  128. {
  129. auto& vm = realm().vm();
  130. // 1. Let parsedURL be the result of running the basic URL parser on the given value.
  131. AK::URL parsed_url = href;
  132. // 2. If parsedURL is failure, then throw a TypeError.
  133. if (!parsed_url.is_valid())
  134. return WebIDL::SimpleException { WebIDL::SimpleExceptionType::TypeError, "Invalid URL"sv };
  135. // 3. Set this’s URL to parsedURL.
  136. m_url = move(parsed_url);
  137. // 4. Empty this’s query object’s list.
  138. m_query->m_list.clear();
  139. // 5. Let query be this’s URL’s query.
  140. auto query = m_url.query();
  141. // 6. If query is non-null, then set this’s query object’s list to the result of parsing query.
  142. if (query.has_value())
  143. m_query->m_list = TRY_OR_THROW_OOM(vm, url_decode(*query));
  144. return {};
  145. }
  146. // https://url.spec.whatwg.org/#dom-url-origin
  147. WebIDL::ExceptionOr<String> URL::origin() const
  148. {
  149. auto& vm = realm().vm();
  150. // The origin getter steps are to return the serialization of this’s URL’s origin. [HTML]
  151. return TRY_OR_THROW_OOM(vm, String::from_deprecated_string(m_url.serialize_origin()));
  152. }
  153. // https://url.spec.whatwg.org/#dom-url-protocol
  154. WebIDL::ExceptionOr<String> URL::protocol() const
  155. {
  156. auto& vm = realm().vm();
  157. // The protocol getter steps are to return this’s URL’s scheme, followed by U+003A (:).
  158. return TRY_OR_THROW_OOM(vm, String::formatted("{}:", m_url.scheme()));
  159. }
  160. // https://url.spec.whatwg.org/#ref-for-dom-url-protocol%E2%91%A0
  161. WebIDL::ExceptionOr<void> URL::set_protocol(String const& protocol)
  162. {
  163. auto& vm = realm().vm();
  164. // The protocol setter steps are to basic URL parse the given value, followed by U+003A (:), with this’s URL as
  165. // url and scheme start state as state override.
  166. auto result_url = URLParser::basic_parse(TRY_OR_THROW_OOM(vm, String::formatted("{}:", protocol)), {}, m_url, URLParser::State::SchemeStart);
  167. if (result_url.is_valid())
  168. m_url = move(result_url);
  169. return {};
  170. }
  171. // https://url.spec.whatwg.org/#dom-url-username
  172. WebIDL::ExceptionOr<String> URL::username() const
  173. {
  174. auto& vm = realm().vm();
  175. // The username getter steps are to return this’s URL’s username.
  176. return TRY_OR_THROW_OOM(vm, m_url.username());
  177. }
  178. // https://url.spec.whatwg.org/#ref-for-dom-url-username%E2%91%A0
  179. void URL::set_username(String const& username)
  180. {
  181. // 1. If this’s URL cannot have a username/password/port, then return.
  182. if (m_url.cannot_have_a_username_or_password_or_port())
  183. return;
  184. // 2. Set the username given this’s URL and the given value.
  185. MUST(m_url.set_username(username));
  186. }
  187. // https://url.spec.whatwg.org/#dom-url-password
  188. WebIDL::ExceptionOr<String> URL::password() const
  189. {
  190. auto& vm = realm().vm();
  191. // The password getter steps are to return this’s URL’s password.
  192. return TRY_OR_THROW_OOM(vm, m_url.password());
  193. }
  194. // https://url.spec.whatwg.org/#ref-for-dom-url-password%E2%91%A0
  195. void URL::set_password(String const& password)
  196. {
  197. // 1. If this’s URL cannot have a username/password/port, then return.
  198. if (m_url.cannot_have_a_username_or_password_or_port())
  199. return;
  200. // 2. Set the password given this’s URL and the given value.
  201. MUST(m_url.set_password(password));
  202. }
  203. // https://url.spec.whatwg.org/#dom-url-host
  204. WebIDL::ExceptionOr<String> URL::host() const
  205. {
  206. auto& vm = realm().vm();
  207. // 1. Let url be this’s URL.
  208. auto& url = m_url;
  209. // 2. If url’s host is null, then return the empty string.
  210. if (url.host().has<Empty>())
  211. return String {};
  212. // 3. If url’s port is null, return url’s host, serialized.
  213. if (!url.port().has_value())
  214. return TRY_OR_THROW_OOM(vm, url.serialized_host());
  215. // 4. Return url’s host, serialized, followed by U+003A (:) and url’s port, serialized.
  216. return TRY_OR_THROW_OOM(vm, String::formatted("{}:{}", TRY_OR_THROW_OOM(vm, url.serialized_host()), *url.port()));
  217. }
  218. // https://url.spec.whatwg.org/#dom-url-hostref-for-dom-url-host%E2%91%A0
  219. void URL::set_host(String const& host)
  220. {
  221. // 1. If this’s URL’s cannot-be-a-base-URL is true, then return.
  222. if (m_url.cannot_be_a_base_url())
  223. return;
  224. // 2. Basic URL parse the given value with this’s URL as url and host state as state override.
  225. auto result_url = URLParser::basic_parse(host, {}, m_url, URLParser::State::Host);
  226. if (result_url.is_valid())
  227. m_url = move(result_url);
  228. }
  229. // https://url.spec.whatwg.org/#dom-url-hostname
  230. WebIDL::ExceptionOr<String> URL::hostname() const
  231. {
  232. auto& vm = realm().vm();
  233. // 1. If this’s URL’s host is null, then return the empty string.
  234. if (m_url.host().has<Empty>())
  235. return String {};
  236. // 2. Return this’s URL’s host, serialized.
  237. return TRY_OR_THROW_OOM(vm, m_url.serialized_host());
  238. }
  239. // https://url.spec.whatwg.org/#ref-for-dom-url-hostname①
  240. void URL::set_hostname(String const& hostname)
  241. {
  242. // 1. If this’s URL’s cannot-be-a-base-URL is true, then return.
  243. if (m_url.cannot_be_a_base_url())
  244. return;
  245. // 2. Basic URL parse the given value with this’s URL as url and hostname state as state override.
  246. auto result_url = URLParser::basic_parse(hostname, {}, m_url, URLParser::State::Hostname);
  247. if (result_url.is_valid())
  248. m_url = move(result_url);
  249. }
  250. // https://url.spec.whatwg.org/#ref-for-dom-url-hostname①
  251. WebIDL::ExceptionOr<String> URL::port() const
  252. {
  253. auto& vm = realm().vm();
  254. // 1. If this’s URL’s port is null, then return the empty string.
  255. if (!m_url.port().has_value())
  256. return String {};
  257. // 2. Return this’s URL’s port, serialized.
  258. return TRY_OR_THROW_OOM(vm, String::formatted("{}", *m_url.port()));
  259. }
  260. // https://url.spec.whatwg.org/#ref-for-dom-url-port%E2%91%A0
  261. void URL::set_port(String const& port)
  262. {
  263. // 1. If this’s URL cannot have a username/password/port, then return.
  264. if (m_url.cannot_have_a_username_or_password_or_port())
  265. return;
  266. // 2. If the given value is the empty string, then set this’s URL’s port to null.
  267. if (port.is_empty()) {
  268. m_url.set_port({});
  269. }
  270. // 3. Otherwise, basic URL parse the given value with this’s URL as url and port state as state override.
  271. else {
  272. auto result_url = URLParser::basic_parse(port, {}, m_url, URLParser::State::Port);
  273. if (result_url.is_valid())
  274. m_url = move(result_url);
  275. }
  276. }
  277. // https://url.spec.whatwg.org/#dom-url-pathname
  278. WebIDL::ExceptionOr<String> URL::pathname() const
  279. {
  280. auto& vm = realm().vm();
  281. // The pathname getter steps are to return the result of URL path serializing this’s URL.
  282. return TRY_OR_THROW_OOM(vm, String::from_deprecated_string(m_url.serialize_path()));
  283. }
  284. // https://url.spec.whatwg.org/#ref-for-dom-url-pathname%E2%91%A0
  285. void URL::set_pathname(String const& pathname)
  286. {
  287. // FIXME: These steps no longer match the speci.
  288. // 1. If this’s URL’s cannot-be-a-base-URL is true, then return.
  289. if (m_url.cannot_be_a_base_url())
  290. return;
  291. // 2. Empty this’s URL’s path.
  292. auto url = m_url; // We copy the URL here to follow other browser's behavior of reverting the path change if the parse failed.
  293. url.set_paths({});
  294. // 3. Basic URL parse the given value with this’s URL as url and path start state as state override.
  295. auto result_url = URLParser::basic_parse(pathname, {}, move(url), URLParser::State::PathStart);
  296. if (result_url.is_valid())
  297. m_url = move(result_url);
  298. }
  299. // https://url.spec.whatwg.org/#dom-url-search
  300. WebIDL::ExceptionOr<String> URL::search() const
  301. {
  302. auto& vm = realm().vm();
  303. // 1. If this’s URL’s query is either null or the empty string, then return the empty string.
  304. if (!m_url.query().has_value() || m_url.query()->is_empty())
  305. return String {};
  306. // 2. Return U+003F (?), followed by this’s URL’s query.
  307. return TRY_OR_THROW_OOM(vm, String::formatted("?{}", *m_url.query()));
  308. }
  309. // https://url.spec.whatwg.org/#ref-for-dom-url-search%E2%91%A0
  310. WebIDL::ExceptionOr<void> URL::set_search(String const& search)
  311. {
  312. auto& vm = realm().vm();
  313. // 1. Let url be this’s URL.
  314. auto& url = m_url;
  315. // 2. If the given value is the empty string:
  316. if (search.is_empty()) {
  317. // 1. Set url’s query to null.
  318. url.set_query({});
  319. // 2. Empty this’s query object’s list.
  320. m_query->m_list.clear();
  321. // FIXME: 3. Potentially strip trailing spaces from an opaque path with this.
  322. // 4. Return.
  323. return {};
  324. }
  325. // 3. Let input be the given value with a single leading U+003F (?) removed, if any.
  326. auto search_as_string_view = search.bytes_as_string_view();
  327. auto input = search_as_string_view.substring_view(search_as_string_view.starts_with('?'));
  328. // 4. Set url’s query to the empty string.
  329. auto url_copy = url; // We copy the URL here to follow other browser's behavior of reverting the search change if the parse failed.
  330. url_copy.set_query(String {});
  331. // 5. Basic URL parse input with url as url and query state as state override.
  332. auto result_url = URLParser::basic_parse(input, {}, move(url_copy), URLParser::State::Query);
  333. if (result_url.is_valid()) {
  334. m_url = move(result_url);
  335. // 6. Set this’s query object’s list to the result of parsing input.
  336. m_query->m_list = TRY_OR_THROW_OOM(vm, url_decode(input));
  337. }
  338. return {};
  339. }
  340. // https://url.spec.whatwg.org/#dom-url-searchparams
  341. JS::NonnullGCPtr<URLSearchParams const> URL::search_params() const
  342. {
  343. // The searchParams getter steps are to return this’s query object.
  344. return m_query;
  345. }
  346. // https://url.spec.whatwg.org/#dom-url-hash
  347. WebIDL::ExceptionOr<String> URL::hash() const
  348. {
  349. auto& vm = realm().vm();
  350. // 1. If this’s URL’s fragment is either null or the empty string, then return the empty string.
  351. if (m_url.fragment().is_null() || m_url.fragment().is_empty())
  352. return String {};
  353. // 2. Return U+0023 (#), followed by this’s URL’s fragment.
  354. return TRY_OR_THROW_OOM(vm, String::formatted("#{}", m_url.fragment()));
  355. }
  356. // https://url.spec.whatwg.org/#ref-for-dom-url-hash%E2%91%A0
  357. void URL::set_hash(String const& hash)
  358. {
  359. // 1. If the given value is the empty string:
  360. if (hash.is_empty()) {
  361. // 1. Set this’s URL’s fragment to null.
  362. m_url.set_fragment({});
  363. // FIXME: 2. Potentially strip trailing spaces from an opaque path with this.
  364. // 3. Return.
  365. return;
  366. }
  367. // 2. Let input be the given value with a single leading U+0023 (#) removed, if any.
  368. auto hash_as_string_view = hash.bytes_as_string_view();
  369. auto input = hash_as_string_view.substring_view(hash_as_string_view.starts_with('#'));
  370. // 3. Set this’s URL’s fragment to the empty string.
  371. auto url = m_url; // We copy the URL here to follow other browser's behavior of reverting the hash change if the parse failed.
  372. url.set_fragment(DeprecatedString::empty());
  373. // 4. Basic URL parse input with this’s URL as url and fragment state as state override.
  374. auto result_url = URLParser::basic_parse(input, {}, move(url), URLParser::State::Fragment);
  375. if (result_url.is_valid())
  376. m_url = move(result_url);
  377. }
  378. // https://url.spec.whatwg.org/#concept-url-origin
  379. HTML::Origin url_origin(AK::URL const& url)
  380. {
  381. // FIXME: We should probably have an extended version of AK::URL for LibWeb instead of standalone functions like this.
  382. // The origin of a URL url is the origin returned by running these steps, switching on url’s scheme:
  383. // -> "blob"
  384. if (url.scheme() == "blob"sv) {
  385. auto url_string = url.to_string().release_value_but_fixme_should_propagate_errors();
  386. // 1. If url’s blob URL entry is non-null, then return url’s blob URL entry’s environment’s origin.
  387. if (auto blob_url_entry = FileAPI::blob_url_store().get(url_string); blob_url_entry.has_value())
  388. return blob_url_entry->environment->origin();
  389. // 2. Let pathURL be the result of parsing the result of URL path serializing url.
  390. auto path_url = parse(url.serialize_path());
  391. // 3. If pathURL is failure, then return a new opaque origin.
  392. if (!path_url.is_valid())
  393. return HTML::Origin {};
  394. // 4. If pathURL’s scheme is "http", "https", or "file", then return pathURL’s origin.
  395. if (path_url.scheme().is_one_of("http"sv, "https"sv, "file"sv))
  396. return url_origin(path_url);
  397. // 5. Return a new opaque origin.
  398. return HTML::Origin {};
  399. }
  400. // -> "ftp"
  401. // -> "http"
  402. // -> "https"
  403. // -> "ws"
  404. // -> "wss"
  405. if (url.scheme().is_one_of("ftp"sv, "http"sv, "https"sv, "ws"sv, "wss"sv)) {
  406. // Return the tuple origin (url’s scheme, url’s host, url’s port, null).
  407. return HTML::Origin(url.scheme().to_deprecated_string(), url.host(), url.port().value_or(0));
  408. }
  409. // -> "file"
  410. if (url.scheme() == "file"sv) {
  411. // Unfortunate as it is, this is left as an exercise to the reader. When in doubt, return a new opaque origin.
  412. // Note: We must return an origin with the `file://' protocol for `file://' iframes to work from `file://' pages.
  413. return HTML::Origin(url.scheme().to_deprecated_string(), String {}, 0);
  414. }
  415. // -> Otherwise
  416. // Return a new opaque origin.
  417. return HTML::Origin {};
  418. }
  419. // https://url.spec.whatwg.org/#concept-domain
  420. bool host_is_domain(AK::URL::Host const& host)
  421. {
  422. // A domain is a non-empty ASCII string that identifies a realm within a network.
  423. return host.has<String>() && host.get<String>() != String {};
  424. }
  425. // https://url.spec.whatwg.org/#concept-url-parser
  426. AK::URL parse(StringView input, Optional<AK::URL> const& base_url)
  427. {
  428. // FIXME: We should probably have an extended version of AK::URL for LibWeb instead of standalone functions like this.
  429. // 1. Let url be the result of running the basic URL parser on input with base and encoding.
  430. auto url = URLParser::basic_parse(input, base_url);
  431. // 2. If url is failure, return failure.
  432. if (!url.is_valid())
  433. return {};
  434. // 3. If url’s scheme is not "blob",
  435. if (url.scheme() != "blob")
  436. return url;
  437. // FIXME: 4. Set url’s blob URL entry to the result of resolving the blob URL url,
  438. // FIXME: 5. if that did not return failure, and null otherwise.
  439. // 6. Return url
  440. return url;
  441. }
  442. }