ResourceLoader.cpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561
  1. /*
  2. * Copyright (c) 2018-2023, Andreas Kling <kling@serenityos.org>
  3. * Copyright (c) 2022, Dex♪ <dexes.ttp@gmail.com>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <AK/Debug.h>
  8. #include <LibCore/DateTime.h>
  9. #include <LibCore/Directory.h>
  10. #include <LibCore/ElapsedTimer.h>
  11. #include <LibCore/MimeData.h>
  12. #include <LibCore/Resource.h>
  13. #include <LibWeb/Cookie/Cookie.h>
  14. #include <LibWeb/Cookie/ParsedCookie.h>
  15. #include <LibWeb/Fetch/Infrastructure/URL.h>
  16. #include <LibWeb/Loader/ContentFilter.h>
  17. #include <LibWeb/Loader/GeneratedPagesLoader.h>
  18. #include <LibWeb/Loader/LoadRequest.h>
  19. #include <LibWeb/Loader/ProxyMappings.h>
  20. #include <LibWeb/Loader/Resource.h>
  21. #include <LibWeb/Loader/ResourceLoader.h>
  22. #include <LibWeb/Platform/EventLoopPlugin.h>
  23. #include <LibWeb/Platform/Timer.h>
  24. namespace Web {
  25. ResourceLoaderConnectorRequest::ResourceLoaderConnectorRequest() = default;
  26. ResourceLoaderConnectorRequest::~ResourceLoaderConnectorRequest() = default;
  27. ResourceLoaderConnector::ResourceLoaderConnector() = default;
  28. ResourceLoaderConnector::~ResourceLoaderConnector() = default;
  29. static RefPtr<ResourceLoader> s_resource_loader;
  30. void ResourceLoader::initialize(RefPtr<ResourceLoaderConnector> connector)
  31. {
  32. if (connector)
  33. s_resource_loader = ResourceLoader::try_create(connector.release_nonnull()).release_value_but_fixme_should_propagate_errors();
  34. }
  35. ResourceLoader& ResourceLoader::the()
  36. {
  37. if (!s_resource_loader) {
  38. dbgln("Web::ResourceLoader was not initialized");
  39. VERIFY_NOT_REACHED();
  40. }
  41. return *s_resource_loader;
  42. }
  43. ErrorOr<NonnullRefPtr<ResourceLoader>> ResourceLoader::try_create(NonnullRefPtr<ResourceLoaderConnector> connector)
  44. {
  45. return adopt_nonnull_ref_or_enomem(new (nothrow) ResourceLoader(move(connector)));
  46. }
  47. ResourceLoader::ResourceLoader(NonnullRefPtr<ResourceLoaderConnector> connector)
  48. : m_connector(move(connector))
  49. , m_user_agent(MUST(String::from_utf8(default_user_agent)))
  50. , m_platform(MUST(String::from_utf8(default_platform)))
  51. , m_preferred_languages({ "en-US"_string })
  52. , m_navigator_compatibility_mode(default_navigator_compatibility_mode)
  53. {
  54. }
  55. void ResourceLoader::prefetch_dns(URL::URL const& url)
  56. {
  57. if (url.scheme().is_one_of("file"sv, "data"sv))
  58. return;
  59. if (ContentFilter::the().is_filtered(url)) {
  60. dbgln("ResourceLoader: Refusing to prefetch DNS for '{}': \033[31;1mURL was filtered\033[0m", url);
  61. return;
  62. }
  63. m_connector->prefetch_dns(url);
  64. }
  65. void ResourceLoader::preconnect(URL::URL const& url)
  66. {
  67. if (url.scheme().is_one_of("file"sv, "data"sv))
  68. return;
  69. if (ContentFilter::the().is_filtered(url)) {
  70. dbgln("ResourceLoader: Refusing to pre-connect to '{}': \033[31;1mURL was filtered\033[0m", url);
  71. return;
  72. }
  73. m_connector->preconnect(url);
  74. }
  75. static HashMap<LoadRequest, NonnullRefPtr<Resource>> s_resource_cache;
  76. RefPtr<Resource> ResourceLoader::load_resource(Resource::Type type, LoadRequest& request)
  77. {
  78. if (!request.is_valid())
  79. return nullptr;
  80. bool use_cache = request.url().scheme() != "file";
  81. if (use_cache) {
  82. auto it = s_resource_cache.find(request);
  83. if (it != s_resource_cache.end()) {
  84. if (it->value->type() != type) {
  85. dbgln("FIXME: Not using cached resource for {} since there's a type mismatch.", request.url());
  86. } else {
  87. dbgln_if(CACHE_DEBUG, "Reusing cached resource for: {}", request.url());
  88. return it->value;
  89. }
  90. }
  91. }
  92. auto resource = Resource::create({}, type, request);
  93. if (use_cache)
  94. s_resource_cache.set(request, resource);
  95. load(
  96. request,
  97. [=](auto data, auto& headers, auto status_code) {
  98. const_cast<Resource&>(*resource).did_load({}, data, headers, status_code);
  99. },
  100. [=](auto& error, auto status_code, auto, auto) {
  101. const_cast<Resource&>(*resource).did_fail({}, error, status_code);
  102. });
  103. return resource;
  104. }
  105. static ByteString sanitized_url_for_logging(URL::URL const& url)
  106. {
  107. if (url.scheme() == "data"sv)
  108. return "[data URL]"sv;
  109. return url.to_byte_string();
  110. }
  111. static void store_response_cookies(Page& page, URL::URL const& url, ByteString const& set_cookie_entry)
  112. {
  113. auto cookie = Cookie::parse_cookie(url, set_cookie_entry);
  114. if (!cookie.has_value())
  115. return;
  116. page.client().page_did_set_cookie(url, cookie.value(), Cookie::Source::Http); // FIXME: Determine cookie source correctly
  117. }
  118. static HTTP::HeaderMap response_headers_for_file(StringView path, Optional<time_t> const& modified_time)
  119. {
  120. // For file:// and resource:// URLs, we have to guess the MIME type, since there's no HTTP header to tell us what
  121. // it is. We insert a fake Content-Type header here, so that clients can use it to learn the MIME type.
  122. auto mime_type = Core::guess_mime_type_based_on_filename(path);
  123. HTTP::HeaderMap response_headers;
  124. response_headers.set("Content-Type"sv, mime_type);
  125. if (modified_time.has_value()) {
  126. auto const datetime = Core::DateTime::from_timestamp(modified_time.value());
  127. response_headers.set("Last-Modified"sv, datetime.to_byte_string("%a, %d %b %Y %H:%M:%S GMT"sv, Core::DateTime::LocalTime::No));
  128. }
  129. return response_headers;
  130. }
  131. static void log_request_start(LoadRequest const& request)
  132. {
  133. auto url_for_logging = sanitized_url_for_logging(request.url());
  134. dbgln_if(SPAM_DEBUG, "ResourceLoader: Starting load of: \"{}\"", url_for_logging);
  135. }
  136. static void log_success(LoadRequest const& request)
  137. {
  138. auto url_for_logging = sanitized_url_for_logging(request.url());
  139. auto load_time_ms = request.load_time().to_milliseconds();
  140. dbgln_if(SPAM_DEBUG, "ResourceLoader: Finished load of: \"{}\", Duration: {}ms", url_for_logging, load_time_ms);
  141. }
  142. template<typename ErrorType>
  143. static void log_failure(LoadRequest const& request, ErrorType const& error)
  144. {
  145. auto url_for_logging = sanitized_url_for_logging(request.url());
  146. auto load_time_ms = request.load_time().to_milliseconds();
  147. dbgln("ResourceLoader: Failed load of: \"{}\", \033[31;1mError: {}\033[0m, Duration: {}ms", url_for_logging, error, load_time_ms);
  148. }
  149. static void log_filtered_request(LoadRequest const& request)
  150. {
  151. auto url_for_logging = sanitized_url_for_logging(request.url());
  152. dbgln("ResourceLoader: Filtered request to: \"{}\"", url_for_logging);
  153. }
  154. static bool should_block_request(LoadRequest const& request)
  155. {
  156. auto const& url = request.url();
  157. auto is_port_blocked = [](int port) {
  158. static constexpr auto ports = to_array({ 1, 7, 9, 11, 13, 15, 17, 19, 20, 21, 22, 23, 25, 37, 42,
  159. 43, 53, 77, 79, 87, 95, 101, 102, 103, 104, 109, 110, 111, 113, 115, 117, 119, 123, 135, 139,
  160. 143, 179, 389, 465, 512, 513, 514, 515, 526, 530, 531, 532, 540, 556, 563, 587, 601, 636,
  161. 993, 995, 2049, 3659, 4045, 6000, 6379, 6665, 6666, 6667, 6668, 6669 });
  162. return ports.first_index_of(port).has_value();
  163. };
  164. if (is_port_blocked(url.port_or_default())) {
  165. log_failure(request, ByteString::formatted("Port #{} is blocked", url.port_or_default()));
  166. return true;
  167. }
  168. if (ContentFilter::the().is_filtered(url)) {
  169. log_filtered_request(request);
  170. return true;
  171. }
  172. return false;
  173. }
  174. void ResourceLoader::load(LoadRequest& request, SuccessCallback success_callback, ErrorCallback error_callback, Optional<u32> timeout, TimeoutCallback timeout_callback)
  175. {
  176. auto const& url = request.url();
  177. log_request_start(request);
  178. request.start_timer();
  179. if (should_block_request(request)) {
  180. error_callback("Request was blocked", {}, {}, {});
  181. return;
  182. }
  183. auto respond_directory_page = [](LoadRequest const& request, URL::URL const& url, SuccessCallback const& success_callback, ErrorCallback const& error_callback) {
  184. auto maybe_response = load_file_directory_page(url);
  185. if (maybe_response.is_error()) {
  186. log_failure(request, maybe_response.error());
  187. if (error_callback)
  188. error_callback(ByteString::formatted("{}", maybe_response.error()), 500u, {}, {});
  189. return;
  190. }
  191. log_success(request);
  192. HTTP::HeaderMap response_headers;
  193. response_headers.set("Content-Type"sv, "text/html"sv);
  194. success_callback(maybe_response.release_value().bytes(), response_headers, {});
  195. };
  196. if (url.scheme() == "about") {
  197. dbgln_if(SPAM_DEBUG, "Loading about: URL {}", url);
  198. log_success(request);
  199. HTTP::HeaderMap response_headers;
  200. response_headers.set("Content-Type", "text/html; charset=UTF-8");
  201. // About version page
  202. if (url.path_segment_at_index(0) == "version") {
  203. success_callback(MUST(load_about_version_page()).bytes(), response_headers, {});
  204. return;
  205. }
  206. // Other about static HTML pages
  207. auto resource = Core::Resource::load_from_uri(MUST(String::formatted("resource://ladybird/{}.html", url.path_segment_at_index(0))));
  208. if (!resource.is_error()) {
  209. auto data = resource.value()->data();
  210. success_callback(data, response_headers, {});
  211. return;
  212. }
  213. Platform::EventLoopPlugin::the().deferred_invoke([success_callback = move(success_callback), response_headers = move(response_headers)] {
  214. success_callback(ByteString::empty().to_byte_buffer(), response_headers, {});
  215. });
  216. return;
  217. }
  218. if (url.scheme() == "data") {
  219. auto data_url_or_error = Fetch::Infrastructure::process_data_url(url);
  220. if (data_url_or_error.is_error()) {
  221. auto error_message = data_url_or_error.error().string_literal();
  222. log_failure(request, error_message);
  223. error_callback(error_message, {}, {}, {});
  224. return;
  225. }
  226. auto data_url = data_url_or_error.release_value();
  227. dbgln_if(SPAM_DEBUG, "ResourceLoader loading a data URL with mime-type: '{}', payload='{}'",
  228. MUST(data_url.mime_type.serialized()),
  229. StringView(data_url.body.bytes()));
  230. HTTP::HeaderMap response_headers;
  231. response_headers.set("Content-Type", MUST(data_url.mime_type.serialized()).to_byte_string());
  232. log_success(request);
  233. Platform::EventLoopPlugin::the().deferred_invoke([data = move(data_url.body), response_headers = move(response_headers), success_callback = move(success_callback)] {
  234. success_callback(data, response_headers, {});
  235. });
  236. return;
  237. }
  238. if (url.scheme() == "resource") {
  239. auto resource = Core::Resource::load_from_uri(url.serialize());
  240. if (resource.is_error()) {
  241. log_failure(request, resource.error());
  242. if (error_callback)
  243. error_callback(ByteString::formatted("{}", resource.error()), {}, {}, {});
  244. return;
  245. }
  246. // When resource URI is a directory use file directory loader to generate response
  247. if (resource.value()->is_directory()) {
  248. respond_directory_page(request, resource.value()->file_url(), success_callback, error_callback);
  249. return;
  250. }
  251. auto data = resource.value()->data();
  252. auto response_headers = response_headers_for_file(URL::percent_decode(url.serialize_path()), resource.value()->modified_time());
  253. log_success(request);
  254. success_callback(data, response_headers, {});
  255. return;
  256. }
  257. if (url.scheme() == "file") {
  258. if (request.page())
  259. m_page = request.page();
  260. if (!m_page.has_value()) {
  261. log_failure(request, "INTERNAL ERROR: No Page for request");
  262. return;
  263. }
  264. FileRequest file_request(URL::percent_decode(url.serialize_path()), [this, success_callback = move(success_callback), error_callback = move(error_callback), request, respond_directory_page](ErrorOr<i32> file_or_error) {
  265. --m_pending_loads;
  266. if (on_load_counter_change)
  267. on_load_counter_change();
  268. if (file_or_error.is_error()) {
  269. log_failure(request, file_or_error.error());
  270. if (error_callback)
  271. error_callback(ByteString::formatted("{}", file_or_error.error()), {}, {}, {});
  272. return;
  273. }
  274. auto const fd = file_or_error.value();
  275. // When local file is a directory use file directory loader to generate response
  276. auto maybe_is_valid_directory = Core::Directory::is_valid_directory(fd);
  277. if (!maybe_is_valid_directory.is_error() && maybe_is_valid_directory.value()) {
  278. respond_directory_page(request, request.url(), success_callback, error_callback);
  279. return;
  280. }
  281. auto st_or_error = Core::System::fstat(fd);
  282. if (st_or_error.is_error()) {
  283. log_failure(request, st_or_error.error());
  284. if (error_callback)
  285. error_callback(ByteString::formatted("{}", st_or_error.error()), {}, {}, {});
  286. return;
  287. }
  288. // Try to read file normally
  289. auto maybe_file = Core::File::adopt_fd(fd, Core::File::OpenMode::Read);
  290. if (maybe_file.is_error()) {
  291. log_failure(request, maybe_file.error());
  292. if (error_callback)
  293. error_callback(ByteString::formatted("{}", maybe_file.error()), {}, {}, {});
  294. return;
  295. }
  296. auto file = maybe_file.release_value();
  297. auto maybe_data = file->read_until_eof();
  298. if (maybe_data.is_error()) {
  299. log_failure(request, maybe_data.error());
  300. if (error_callback)
  301. error_callback(ByteString::formatted("{}", maybe_data.error()), {}, {}, {});
  302. return;
  303. }
  304. auto data = maybe_data.release_value();
  305. auto response_headers = response_headers_for_file(URL::percent_decode(request.url().serialize_path()), st_or_error.value().st_mtime);
  306. log_success(request);
  307. success_callback(data, response_headers, {});
  308. });
  309. (*m_page)->client().request_file(move(file_request));
  310. ++m_pending_loads;
  311. if (on_load_counter_change)
  312. on_load_counter_change();
  313. return;
  314. }
  315. if (url.scheme() == "http" || url.scheme() == "https") {
  316. auto protocol_request = start_network_request(request);
  317. if (!protocol_request) {
  318. if (error_callback)
  319. error_callback("Failed to start network request"sv, {}, {}, {});
  320. return;
  321. }
  322. if (timeout.has_value() && timeout.value() > 0) {
  323. auto timer = Platform::Timer::create_single_shot(timeout.value(), nullptr);
  324. timer->on_timeout = [timer, protocol_request, timeout_callback = move(timeout_callback)] {
  325. protocol_request->stop();
  326. if (timeout_callback)
  327. timeout_callback();
  328. };
  329. timer->start();
  330. }
  331. auto on_buffered_request_finished = [this, success_callback = move(success_callback), error_callback = move(error_callback), request, &protocol_request = *protocol_request](bool success, auto, auto& response_headers, auto status_code, ReadonlyBytes payload) mutable {
  332. handle_network_response_headers(request, response_headers);
  333. finish_network_request(protocol_request);
  334. if (!success || (status_code.has_value() && *status_code >= 400 && *status_code <= 599 && (payload.is_empty() || !request.is_main_resource()))) {
  335. StringBuilder error_builder;
  336. if (status_code.has_value())
  337. error_builder.appendff("Load failed: {}", *status_code);
  338. else
  339. error_builder.append("Load failed"sv);
  340. log_failure(request, error_builder.string_view());
  341. if (error_callback)
  342. error_callback(error_builder.to_byte_string(), status_code, payload, response_headers);
  343. return;
  344. }
  345. log_success(request);
  346. success_callback(payload, response_headers, status_code);
  347. };
  348. protocol_request->set_buffered_request_finished_callback(move(on_buffered_request_finished));
  349. return;
  350. }
  351. auto not_implemented_error = ByteString::formatted("Protocol not implemented: {}", url.scheme());
  352. log_failure(request, not_implemented_error);
  353. if (error_callback)
  354. error_callback(not_implemented_error, {}, {}, {});
  355. }
  356. void ResourceLoader::load_unbuffered(LoadRequest& request, OnHeadersReceived on_headers_received, OnDataReceived on_data_received, OnComplete on_complete)
  357. {
  358. auto const& url = request.url();
  359. log_request_start(request);
  360. request.start_timer();
  361. if (should_block_request(request)) {
  362. on_complete(false, "Request was blocked"sv);
  363. return;
  364. }
  365. if (!url.scheme().is_one_of("http"sv, "https"sv)) {
  366. // FIXME: Non-network requests from fetch should not go through this path.
  367. on_complete(false, "Cannot establish connection non-network scheme"sv);
  368. return;
  369. }
  370. auto protocol_request = start_network_request(request);
  371. if (!protocol_request) {
  372. on_complete(false, "Failed to start network request"sv);
  373. return;
  374. }
  375. auto protocol_headers_received = [this, on_headers_received = move(on_headers_received), request](auto const& response_headers, auto status_code) {
  376. handle_network_response_headers(request, response_headers);
  377. on_headers_received(response_headers, move(status_code));
  378. };
  379. auto protocol_data_received = [on_data_received = move(on_data_received)](auto data) {
  380. on_data_received(data);
  381. };
  382. auto protocol_complete = [this, on_complete = move(on_complete), request, &protocol_request = *protocol_request](bool success, u64) {
  383. finish_network_request(protocol_request);
  384. if (success) {
  385. log_success(request);
  386. on_complete(true, {});
  387. } else {
  388. log_failure(request, "Request finished with error"sv);
  389. on_complete(false, "Request finished with error"sv);
  390. }
  391. };
  392. protocol_request->set_unbuffered_request_callbacks(move(protocol_headers_received), move(protocol_data_received), move(protocol_complete));
  393. }
  394. RefPtr<ResourceLoaderConnectorRequest> ResourceLoader::start_network_request(LoadRequest const& request)
  395. {
  396. auto proxy = ProxyMappings::the().proxy_for_url(request.url());
  397. HTTP::HeaderMap headers;
  398. for (auto const& it : request.headers()) {
  399. headers.set(it.key, it.value);
  400. }
  401. if (!headers.contains("User-Agent"))
  402. headers.set("User-Agent", m_user_agent.to_byte_string());
  403. auto protocol_request = m_connector->start_request(request.method(), request.url(), headers, request.body(), proxy);
  404. if (!protocol_request) {
  405. log_failure(request, "Failed to initiate load"sv);
  406. return nullptr;
  407. }
  408. protocol_request->on_certificate_requested = []() -> ResourceLoaderConnectorRequest::CertificateAndKey {
  409. return {};
  410. };
  411. ++m_pending_loads;
  412. if (on_load_counter_change)
  413. on_load_counter_change();
  414. m_active_requests.set(*protocol_request);
  415. return protocol_request;
  416. }
  417. void ResourceLoader::handle_network_response_headers(LoadRequest const& request, HTTP::HeaderMap const& response_headers)
  418. {
  419. if (!request.page())
  420. return;
  421. for (auto const& [header, value] : response_headers.headers()) {
  422. if (header.equals_ignoring_ascii_case("Set-Cookie"sv)) {
  423. store_response_cookies(*request.page(), request.url(), value);
  424. }
  425. }
  426. if (auto cache_control = response_headers.get("Cache-Control"); cache_control.has_value()) {
  427. if (cache_control.value().contains("no-store"sv))
  428. s_resource_cache.remove(request);
  429. }
  430. }
  431. void ResourceLoader::finish_network_request(NonnullRefPtr<ResourceLoaderConnectorRequest> const& protocol_request)
  432. {
  433. --m_pending_loads;
  434. if (on_load_counter_change)
  435. on_load_counter_change();
  436. Platform::EventLoopPlugin::the().deferred_invoke([this, protocol_request] {
  437. m_active_requests.remove(protocol_request);
  438. });
  439. }
  440. void ResourceLoader::clear_cache()
  441. {
  442. dbgln_if(CACHE_DEBUG, "Clearing {} items from ResourceLoader cache", s_resource_cache.size());
  443. s_resource_cache.clear();
  444. }
  445. void ResourceLoader::evict_from_cache(LoadRequest const& request)
  446. {
  447. dbgln_if(CACHE_DEBUG, "Removing resource {} from cache", request.url());
  448. s_resource_cache.remove(request);
  449. }
  450. }