URL.cpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431
  1. /*
  2. * Copyright (c) 2021, Idan Horowitz <idan.horowitz@serenityos.org>
  3. * Copyright (c) 2021, the SerenityOS developers.
  4. * Copyright (c) 2023, networkException <networkexception@serenityos.org>
  5. *
  6. * SPDX-License-Identifier: BSD-2-Clause
  7. */
  8. #include <AK/IPv4Address.h>
  9. #include <AK/IPv6Address.h>
  10. #include <AK/URLParser.h>
  11. #include <LibWeb/Bindings/Intrinsics.h>
  12. #include <LibWeb/URL/URL.h>
  13. namespace Web::URL {
  14. WebIDL::ExceptionOr<JS::NonnullGCPtr<URL>> URL::create(JS::Realm& realm, AK::URL url, JS::NonnullGCPtr<URLSearchParams> query)
  15. {
  16. return MUST_OR_THROW_OOM(realm.heap().allocate<URL>(realm, realm, move(url), move(query)));
  17. }
  18. // https://url.spec.whatwg.org/#api-url-parser
  19. static Optional<AK::URL> parse_api_url(String const& url, Optional<String> const& base)
  20. {
  21. // FIXME: We somewhat awkwardly have two failure states encapsulated in the return type (and convert between them in the steps),
  22. // ideally we'd get rid of URL's valid flag
  23. // 1. Let parsedBase be null.
  24. Optional<AK::URL> parsed_base;
  25. // 2. If base is non-null:
  26. if (base.has_value()) {
  27. // 1. Set parsedBase to the result of running the basic URL parser on base.
  28. auto parsed_base_url = URLParser::parse(*base);
  29. // 2. If parsedBase is failure, then return failure.
  30. if (!parsed_base_url.is_valid())
  31. return {};
  32. parsed_base = parsed_base_url;
  33. }
  34. // 3. Return the result of running the basic URL parser on url with parsedBase.
  35. auto parsed = URLParser::parse(url, parsed_base);
  36. return parsed.is_valid() ? parsed : Optional<AK::URL> {};
  37. }
  38. // https://url.spec.whatwg.org/#dom-url-url
  39. WebIDL::ExceptionOr<JS::NonnullGCPtr<URL>> URL::construct_impl(JS::Realm& realm, String const& url, Optional<String> const& base)
  40. {
  41. auto& vm = realm.vm();
  42. // 1. Let parsedURL be the result of running the API URL parser on url with base, if given.
  43. auto parsed_url = parse_api_url(url, base);
  44. // 2. If parsedURL is failure, then throw a TypeError.
  45. if (!parsed_url.has_value())
  46. return WebIDL::SimpleException { WebIDL::SimpleExceptionType::TypeError, "Invalid URL"sv };
  47. // 3. Let query be parsedURL’s query, if that is non-null, and the empty string otherwise.
  48. auto query = parsed_url->query().is_null() ? String {} : TRY_OR_THROW_OOM(vm, String::from_deprecated_string(parsed_url->query()));
  49. // 4. Set this’s URL to parsedURL.
  50. // 5. Set this’s query object to a new URLSearchParams object.
  51. auto query_object = MUST(URLSearchParams::construct_impl(realm, query));
  52. // 6. Initialize this’s query object with query.
  53. auto result_url = TRY(URL::create(realm, parsed_url.release_value(), move(query_object)));
  54. // 7. Set this’s query object’s URL object to this.
  55. result_url->m_query->m_url = result_url;
  56. return result_url;
  57. }
  58. URL::URL(JS::Realm& realm, AK::URL url, JS::NonnullGCPtr<URLSearchParams> query)
  59. : PlatformObject(realm)
  60. , m_url(move(url))
  61. , m_query(move(query))
  62. {
  63. }
  64. URL::~URL() = default;
  65. JS::ThrowCompletionOr<void> URL::initialize(JS::Realm& realm)
  66. {
  67. MUST_OR_THROW_OOM(Base::initialize(realm));
  68. set_prototype(&Bindings::ensure_web_prototype<Bindings::URLPrototype>(realm, "URL"));
  69. return {};
  70. }
  71. void URL::visit_edges(Cell::Visitor& visitor)
  72. {
  73. Base::visit_edges(visitor);
  74. visitor.visit(m_query.ptr());
  75. }
  76. // https://url.spec.whatwg.org/#dom-url-canparse
  77. bool URL::can_parse(JS::VM&, String const& url, Optional<String> const& base)
  78. {
  79. // 1. Let parsedURL be the result of running the API URL parser on url with base, if given.
  80. auto parsed_url = parse_api_url(url, base);
  81. // 2. If parsedURL is failure, then return false.
  82. if (!parsed_url.has_value())
  83. return false;
  84. // 3. Return true.
  85. return true;
  86. }
  87. WebIDL::ExceptionOr<String> URL::href() const
  88. {
  89. auto& vm = realm().vm();
  90. // return the serialization of this’s URL.
  91. return TRY_OR_THROW_OOM(vm, String::from_deprecated_string(m_url.serialize()));
  92. }
  93. WebIDL::ExceptionOr<String> URL::to_json() const
  94. {
  95. auto& vm = realm().vm();
  96. // return the serialization of this’s URL.
  97. return TRY_OR_THROW_OOM(vm, String::from_deprecated_string(m_url.serialize()));
  98. }
  99. WebIDL::ExceptionOr<void> URL::set_href(String const& href)
  100. {
  101. auto& vm = realm().vm();
  102. // 1. Let parsedURL be the result of running the basic URL parser on the given value.
  103. AK::URL parsed_url = href;
  104. // 2. If parsedURL is failure, then throw a TypeError.
  105. if (!parsed_url.is_valid())
  106. return WebIDL::SimpleException { WebIDL::SimpleExceptionType::TypeError, "Invalid URL"sv };
  107. // 3. Set this’s URL to parsedURL.
  108. m_url = move(parsed_url);
  109. // 4. Empty this’s query object’s list.
  110. m_query->m_list.clear();
  111. // 5. Let query be this’s URL’s query.
  112. auto& query = m_url.query();
  113. // 6. If query is non-null, then set this’s query object’s list to the result of parsing query.
  114. if (!query.is_null())
  115. m_query->m_list = TRY_OR_THROW_OOM(vm, url_decode(query));
  116. return {};
  117. }
  118. WebIDL::ExceptionOr<String> URL::origin() const
  119. {
  120. auto& vm = realm().vm();
  121. // return the serialization of this’s URL’s origin.
  122. return TRY_OR_THROW_OOM(vm, String::from_deprecated_string(m_url.serialize_origin()));
  123. }
  124. WebIDL::ExceptionOr<String> URL::protocol() const
  125. {
  126. auto& vm = realm().vm();
  127. // return this’s URL’s scheme, followed by U+003A (:).
  128. return TRY_OR_THROW_OOM(vm, String::formatted("{}:", m_url.scheme()));
  129. }
  130. WebIDL::ExceptionOr<void> URL::set_protocol(String const& protocol)
  131. {
  132. auto& vm = realm().vm();
  133. // basic URL parse the given value, followed by U+003A (:), with this’s URL as url and scheme start state as state override.
  134. auto result_url = URLParser::parse(TRY_OR_THROW_OOM(vm, String::formatted("{}:", protocol)), {}, m_url, URLParser::State::SchemeStart);
  135. if (result_url.is_valid())
  136. m_url = move(result_url);
  137. return {};
  138. }
  139. WebIDL::ExceptionOr<String> URL::username() const
  140. {
  141. auto& vm = realm().vm();
  142. // return this’s URL’s username.
  143. return TRY_OR_THROW_OOM(vm, String::from_deprecated_string(m_url.username()));
  144. }
  145. void URL::set_username(String const& username)
  146. {
  147. // 1. If this’s URL cannot have a username/password/port, then return.
  148. if (m_url.cannot_have_a_username_or_password_or_port())
  149. return;
  150. // 2. Set the username given this’s URL and the given value.
  151. m_url.set_username(AK::URL::percent_encode(username, AK::URL::PercentEncodeSet::Userinfo));
  152. }
  153. WebIDL::ExceptionOr<String> URL::password() const
  154. {
  155. auto& vm = realm().vm();
  156. // return this’s URL’s password.
  157. return TRY_OR_THROW_OOM(vm, String::from_deprecated_string(m_url.password()));
  158. }
  159. void URL::set_password(String const& password)
  160. {
  161. // 1. If this’s URL cannot have a username/password/port, then return.
  162. if (m_url.cannot_have_a_username_or_password_or_port())
  163. return;
  164. // 2. Set the password given this’s URL and the given value.
  165. m_url.set_password(AK::URL::percent_encode(password, AK::URL::PercentEncodeSet::Userinfo));
  166. }
  167. WebIDL::ExceptionOr<String> URL::host() const
  168. {
  169. auto& vm = realm().vm();
  170. // 1. Let url be this’s URL.
  171. auto& url = m_url;
  172. // 2. If url’s host is null, then return the empty string.
  173. if (url.host().is_null())
  174. return String {};
  175. // 3. If url’s port is null, return url’s host, serialized.
  176. if (!url.port().has_value())
  177. return TRY_OR_THROW_OOM(vm, String::from_deprecated_string(url.host()));
  178. // 4. Return url’s host, serialized, followed by U+003A (:) and url’s port, serialized.
  179. return TRY_OR_THROW_OOM(vm, String::formatted("{}:{}", url.host(), *url.port()));
  180. }
  181. void URL::set_host(String const& host)
  182. {
  183. // 1. If this’s URL’s cannot-be-a-base-URL is true, then return.
  184. if (m_url.cannot_be_a_base_url())
  185. return;
  186. // 2. Basic URL parse the given value with this’s URL as url and host state as state override.
  187. auto result_url = URLParser::parse(host, {}, m_url, URLParser::State::Host);
  188. if (result_url.is_valid())
  189. m_url = move(result_url);
  190. }
  191. WebIDL::ExceptionOr<String> URL::hostname() const
  192. {
  193. auto& vm = realm().vm();
  194. // 1. If this’s URL’s host is null, then return the empty string.
  195. if (m_url.host().is_null())
  196. return String {};
  197. // 2. Return this’s URL’s host, serialized.
  198. return TRY_OR_THROW_OOM(vm, String::from_deprecated_string(m_url.host()));
  199. }
  200. void URL::set_hostname(String const& hostname)
  201. {
  202. // 1. If this’s URL’s cannot-be-a-base-URL is true, then return.
  203. if (m_url.cannot_be_a_base_url())
  204. return;
  205. // 2. Basic URL parse the given value with this’s URL as url and hostname state as state override.
  206. auto result_url = URLParser::parse(hostname, {}, m_url, URLParser::State::Hostname);
  207. if (result_url.is_valid())
  208. m_url = move(result_url);
  209. }
  210. WebIDL::ExceptionOr<String> URL::port() const
  211. {
  212. auto& vm = realm().vm();
  213. // 1. If this’s URL’s port is null, then return the empty string.
  214. if (!m_url.port().has_value())
  215. return String {};
  216. // 2. Return this’s URL’s port, serialized.
  217. return TRY_OR_THROW_OOM(vm, String::formatted("{}", *m_url.port()));
  218. }
  219. void URL::set_port(String const& port)
  220. {
  221. // 1. If this’s URL cannot have a username/password/port, then return.
  222. if (m_url.cannot_have_a_username_or_password_or_port())
  223. return;
  224. // 2. If the given value is the empty string, then set this’s URL’s port to null.
  225. if (port.is_empty()) {
  226. m_url.set_port({});
  227. return;
  228. }
  229. // 3. Otherwise, basic URL parse the given value with this’s URL as url and port state as state override.
  230. auto result_url = URLParser::parse(port, {}, m_url, URLParser::State::Port);
  231. if (result_url.is_valid())
  232. m_url = move(result_url);
  233. }
  234. WebIDL::ExceptionOr<String> URL::pathname() const
  235. {
  236. auto& vm = realm().vm();
  237. // 1. If this’s URL’s cannot-be-a-base-URL is true, then return this’s URL’s path[0].
  238. // 2. If this’s URL’s path is empty, then return the empty string.
  239. // 3. Return U+002F (/), followed by the strings in this’s URL’s path (including empty strings), if any, separated from each other by U+002F (/).
  240. return TRY_OR_THROW_OOM(vm, String::from_deprecated_string(m_url.path()));
  241. }
  242. void URL::set_pathname(String const& pathname)
  243. {
  244. // 1. If this’s URL’s cannot-be-a-base-URL is true, then return.
  245. if (m_url.cannot_be_a_base_url())
  246. return;
  247. // 2. Empty this’s URL’s path.
  248. auto url = m_url; // We copy the URL here to follow other browser's behaviour of reverting the path change if the parse failed.
  249. url.set_paths({});
  250. // 3. Basic URL parse the given value with this’s URL as url and path start state as state override.
  251. auto result_url = URLParser::parse(pathname, {}, move(url), URLParser::State::PathStart);
  252. if (result_url.is_valid())
  253. m_url = move(result_url);
  254. }
  255. WebIDL::ExceptionOr<String> URL::search() const
  256. {
  257. auto& vm = realm().vm();
  258. // 1. If this’s URL’s query is either null or the empty string, then return the empty string.
  259. if (m_url.query().is_null() || m_url.query().is_empty())
  260. return String {};
  261. // 2. Return U+003F (?), followed by this’s URL’s query.
  262. return TRY_OR_THROW_OOM(vm, String::formatted("?{}", m_url.query()));
  263. }
  264. WebIDL::ExceptionOr<void> URL::set_search(String const& search)
  265. {
  266. auto& vm = realm().vm();
  267. // 1. Let url be this’s URL.
  268. auto& url = m_url;
  269. // If the given value is the empty string, set url’s query to null, empty this’s query object’s list, and then return.
  270. if (search.is_empty()) {
  271. url.set_query({});
  272. m_query->m_list.clear();
  273. return {};
  274. }
  275. // 2. Let input be the given value with a single leading U+003F (?) removed, if any.
  276. auto search_as_string_view = search.bytes_as_string_view();
  277. auto input = search_as_string_view.substring_view(search_as_string_view.starts_with('?'));
  278. // 3. Set url’s query to the empty string.
  279. auto url_copy = url; // We copy the URL here to follow other browser's behaviour of reverting the search change if the parse failed.
  280. url_copy.set_query(DeprecatedString::empty());
  281. // 4. Basic URL parse input with url as url and query state as state override.
  282. auto result_url = URLParser::parse(input, {}, move(url_copy), URLParser::State::Query);
  283. if (result_url.is_valid()) {
  284. m_url = move(result_url);
  285. // 5. Set this’s query object’s list to the result of parsing input.
  286. m_query->m_list = TRY_OR_THROW_OOM(vm, url_decode(input));
  287. }
  288. return {};
  289. }
  290. URLSearchParams const* URL::search_params() const
  291. {
  292. return m_query;
  293. }
  294. WebIDL::ExceptionOr<String> URL::hash() const
  295. {
  296. auto& vm = realm().vm();
  297. // 1. If this’s URL’s fragment is either null or the empty string, then return the empty string.
  298. if (m_url.fragment().is_null() || m_url.fragment().is_empty())
  299. return String {};
  300. // 2. Return U+0023 (#), followed by this’s URL’s fragment.
  301. return TRY_OR_THROW_OOM(vm, String::formatted("#{}", m_url.fragment()));
  302. }
  303. void URL::set_hash(String const& hash)
  304. {
  305. // 1. If the given value is the empty string, then set this’s URL’s fragment to null and return.
  306. if (hash.is_empty()) {
  307. m_url.set_fragment({});
  308. return;
  309. }
  310. // 2. Let input be the given value with a single leading U+0023 (#) removed, if any.
  311. auto hash_as_string_view = hash.bytes_as_string_view();
  312. auto input = hash_as_string_view.substring_view(hash_as_string_view.starts_with('#'));
  313. // 3. Set this’s URL’s fragment to the empty string.
  314. auto url = m_url; // We copy the URL here to follow other browser's behaviour of reverting the hash change if the parse failed.
  315. url.set_fragment(DeprecatedString::empty());
  316. // 4. Basic URL parse input with this’s URL as url and fragment state as state override.
  317. auto result_url = URLParser::parse(input, {}, move(url), URLParser::State::Fragment);
  318. if (result_url.is_valid())
  319. m_url = move(result_url);
  320. }
  321. // https://url.spec.whatwg.org/#concept-url-origin
  322. HTML::Origin url_origin(AK::URL const& url)
  323. {
  324. // FIXME: We should probably have an extended version of AK::URL for LibWeb instead of standalone functions like this.
  325. // The origin of a URL url is the origin returned by running these steps, switching on url’s scheme:
  326. // "blob"
  327. if (url.scheme() == "blob"sv) {
  328. // FIXME: Support 'blob://' URLs
  329. return HTML::Origin {};
  330. }
  331. // "ftp"
  332. // "http"
  333. // "https"
  334. // "ws"
  335. // "wss"
  336. if (url.scheme().is_one_of("ftp"sv, "http"sv, "https"sv, "ws"sv, "wss"sv)) {
  337. // Return the tuple origin (url’s scheme, url’s host, url’s port, null).
  338. return HTML::Origin(url.scheme(), url.host(), url.port().value_or(0));
  339. }
  340. // "file"
  341. if (url.scheme() == "file"sv) {
  342. // Unfortunate as it is, this is left as an exercise to the reader. When in doubt, return a new opaque origin.
  343. // Note: We must return an origin with the `file://' protocol for `file://' iframes to work from `file://' pages.
  344. return HTML::Origin(url.scheme(), DeprecatedString(), 0);
  345. }
  346. // Return a new opaque origin.
  347. return HTML::Origin {};
  348. }
  349. // https://url.spec.whatwg.org/#concept-domain
  350. bool host_is_domain(StringView host)
  351. {
  352. // A domain is a non-empty ASCII string that identifies a realm within a network.
  353. return !host.is_empty()
  354. && !IPv4Address::from_string(host).has_value()
  355. && !IPv6Address::from_string(host).has_value();
  356. }
  357. }