ResourceLoader.cpp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494
  1. /*
  2. * Copyright (c) 2018-2023, Andreas Kling <kling@serenityos.org>
  3. * Copyright (c) 2022, Dex♪ <dexes.ttp@gmail.com>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <AK/Debug.h>
  8. #include <AK/JsonObject.h>
  9. #include <LibCore/DateTime.h>
  10. #include <LibCore/Directory.h>
  11. #include <LibCore/ElapsedTimer.h>
  12. #include <LibCore/MimeData.h>
  13. #include <LibCore/Resource.h>
  14. #include <LibWeb/Cookie/Cookie.h>
  15. #include <LibWeb/Cookie/ParsedCookie.h>
  16. #include <LibWeb/Fetch/Infrastructure/URL.h>
  17. #include <LibWeb/Loader/ContentFilter.h>
  18. #include <LibWeb/Loader/GeneratedPagesLoader.h>
  19. #include <LibWeb/Loader/LoadRequest.h>
  20. #include <LibWeb/Loader/ProxyMappings.h>
  21. #include <LibWeb/Loader/Resource.h>
  22. #include <LibWeb/Loader/ResourceLoader.h>
  23. #include <LibWeb/Loader/UserAgent.h>
  24. #include <LibWeb/Platform/EventLoopPlugin.h>
  25. #include <LibWeb/Platform/Timer.h>
  26. #ifdef AK_OS_SERENITY
  27. # include <serenity.h>
  28. #endif
  29. namespace Web {
  30. ResourceLoaderConnectorRequest::ResourceLoaderConnectorRequest() = default;
  31. ResourceLoaderConnectorRequest::~ResourceLoaderConnectorRequest() = default;
  32. ResourceLoaderConnector::ResourceLoaderConnector() = default;
  33. ResourceLoaderConnector::~ResourceLoaderConnector() = default;
  34. static RefPtr<ResourceLoader> s_resource_loader;
  35. void ResourceLoader::initialize(RefPtr<ResourceLoaderConnector> connector)
  36. {
  37. if (connector)
  38. s_resource_loader = ResourceLoader::try_create(connector.release_nonnull()).release_value_but_fixme_should_propagate_errors();
  39. }
  40. ResourceLoader& ResourceLoader::the()
  41. {
  42. if (!s_resource_loader) {
  43. dbgln("Web::ResourceLoader was not initialized");
  44. VERIFY_NOT_REACHED();
  45. }
  46. return *s_resource_loader;
  47. }
  48. ErrorOr<NonnullRefPtr<ResourceLoader>> ResourceLoader::try_create(NonnullRefPtr<ResourceLoaderConnector> connector)
  49. {
  50. return adopt_nonnull_ref_or_enomem(new (nothrow) ResourceLoader(move(connector)));
  51. }
  52. ResourceLoader::ResourceLoader(NonnullRefPtr<ResourceLoaderConnector> connector)
  53. : m_connector(move(connector))
  54. , m_user_agent(MUST(String::from_utf8(default_user_agent)))
  55. , m_platform(MUST(String::from_utf8(default_platform)))
  56. {
  57. }
  58. void ResourceLoader::prefetch_dns(URL::URL const& url)
  59. {
  60. if (url.scheme().is_one_of("file"sv, "data"sv))
  61. return;
  62. if (ContentFilter::the().is_filtered(url)) {
  63. dbgln("ResourceLoader: Refusing to prefetch DNS for '{}': \033[31;1mURL was filtered\033[0m", url);
  64. return;
  65. }
  66. m_connector->prefetch_dns(url);
  67. }
  68. void ResourceLoader::preconnect(URL::URL const& url)
  69. {
  70. if (url.scheme().is_one_of("file"sv, "data"sv))
  71. return;
  72. if (ContentFilter::the().is_filtered(url)) {
  73. dbgln("ResourceLoader: Refusing to pre-connect to '{}': \033[31;1mURL was filtered\033[0m", url);
  74. return;
  75. }
  76. m_connector->preconnect(url);
  77. }
  78. static HashMap<LoadRequest, NonnullRefPtr<Resource>> s_resource_cache;
  79. RefPtr<Resource> ResourceLoader::load_resource(Resource::Type type, LoadRequest& request)
  80. {
  81. if (!request.is_valid())
  82. return nullptr;
  83. bool use_cache = request.url().scheme() != "file";
  84. if (use_cache) {
  85. auto it = s_resource_cache.find(request);
  86. if (it != s_resource_cache.end()) {
  87. if (it->value->type() != type) {
  88. dbgln("FIXME: Not using cached resource for {} since there's a type mismatch.", request.url());
  89. } else {
  90. dbgln_if(CACHE_DEBUG, "Reusing cached resource for: {}", request.url());
  91. return it->value;
  92. }
  93. }
  94. }
  95. auto resource = Resource::create({}, type, request);
  96. if (use_cache)
  97. s_resource_cache.set(request, resource);
  98. load(
  99. request,
  100. [=](auto data, auto& headers, auto status_code) {
  101. const_cast<Resource&>(*resource).did_load({}, data, headers, status_code);
  102. },
  103. [=](auto& error, auto status_code, auto, auto) {
  104. const_cast<Resource&>(*resource).did_fail({}, error, status_code);
  105. });
  106. return resource;
  107. }
  108. static ByteString sanitized_url_for_logging(URL::URL const& url)
  109. {
  110. if (url.scheme() == "data"sv)
  111. return "[data URL]"sv;
  112. return url.to_byte_string();
  113. }
  114. static void emit_signpost(ByteString const& message, int id)
  115. {
  116. #ifdef AK_OS_SERENITY
  117. auto string_id = perf_register_string(message.characters(), message.length());
  118. perf_event(PERF_EVENT_SIGNPOST, string_id, id);
  119. #else
  120. (void)message;
  121. (void)id;
  122. #endif
  123. }
  124. static void store_response_cookies(Page& page, URL::URL const& url, ByteString const& cookies)
  125. {
  126. auto set_cookie_json_value = MUST(JsonValue::from_string(cookies));
  127. VERIFY(set_cookie_json_value.type() == JsonValue::Type::Array);
  128. for (auto const& set_cookie_entry : set_cookie_json_value.as_array().values()) {
  129. VERIFY(set_cookie_entry.type() == JsonValue::Type::String);
  130. auto cookie = Cookie::parse_cookie(set_cookie_entry.as_string());
  131. if (!cookie.has_value())
  132. continue;
  133. page.client().page_did_set_cookie(url, cookie.value(), Cookie::Source::Http); // FIXME: Determine cookie source correctly
  134. }
  135. }
  136. static size_t resource_id = 0;
  137. static HashMap<ByteString, ByteString, CaseInsensitiveStringTraits> response_headers_for_file(StringView path, Optional<time_t> const& modified_time)
  138. {
  139. // For file:// and resource:// URLs, we have to guess the MIME type, since there's no HTTP header to tell us what
  140. // it is. We insert a fake Content-Type header here, so that clients can use it to learn the MIME type.
  141. auto mime_type = Core::guess_mime_type_based_on_filename(path);
  142. HashMap<ByteString, ByteString, CaseInsensitiveStringTraits> response_headers;
  143. response_headers.set("Content-Type"sv, mime_type);
  144. if (modified_time.has_value()) {
  145. auto const datetime = Core::DateTime::from_timestamp(modified_time.value());
  146. response_headers.set("Last-Modified"sv, datetime.to_byte_string("%a, %d %b %Y %H:%M:%S GMT"sv, Core::DateTime::LocalTime::No));
  147. }
  148. return response_headers;
  149. }
  150. void ResourceLoader::load(LoadRequest& request, SuccessCallback success_callback, ErrorCallback error_callback, Optional<u32> timeout, TimeoutCallback timeout_callback)
  151. {
  152. auto& url = request.url();
  153. request.start_timer();
  154. auto id = resource_id++;
  155. auto url_for_logging = sanitized_url_for_logging(url);
  156. emit_signpost(ByteString::formatted("Starting load: {}", url_for_logging), id);
  157. dbgln_if(SPAM_DEBUG, "ResourceLoader: Starting load of: \"{}\"", url_for_logging);
  158. auto const log_success = [url_for_logging, id](auto const& request) {
  159. auto load_time_ms = request.load_time().to_milliseconds();
  160. emit_signpost(ByteString::formatted("Finished load: {}", url_for_logging), id);
  161. dbgln_if(SPAM_DEBUG, "ResourceLoader: Finished load of: \"{}\", Duration: {}ms", url_for_logging, load_time_ms);
  162. };
  163. auto const log_failure = [url_for_logging, id](auto const& request, auto const& error_message) {
  164. auto load_time_ms = request.load_time().to_milliseconds();
  165. emit_signpost(ByteString::formatted("Failed load: {}", url_for_logging), id);
  166. dbgln("ResourceLoader: Failed load of: \"{}\", \033[31;1mError: {}\033[0m, Duration: {}ms", url_for_logging, error_message, load_time_ms);
  167. };
  168. auto respond_directory_page = [log_success, log_failure](LoadRequest const& request, URL::URL const& url, SuccessCallback const& success_callback, ErrorCallback const& error_callback) {
  169. auto maybe_response = load_file_directory_page(url);
  170. if (maybe_response.is_error()) {
  171. log_failure(request, maybe_response.error());
  172. if (error_callback)
  173. error_callback(ByteString::formatted("{}", maybe_response.error()), 500u, {}, {});
  174. return;
  175. }
  176. log_success(request);
  177. HashMap<ByteString, ByteString, CaseInsensitiveStringTraits> response_headers;
  178. response_headers.set("Content-Type"sv, "text/html"sv);
  179. success_callback(maybe_response.release_value().bytes(), response_headers, {});
  180. };
  181. if (is_port_blocked(url.port_or_default())) {
  182. log_failure(request, ByteString::formatted("The port #{} is blocked", url.port_or_default()));
  183. return;
  184. }
  185. if (ContentFilter::the().is_filtered(url)) {
  186. auto filter_message = "URL was filtered"sv;
  187. log_failure(request, filter_message);
  188. error_callback(filter_message, {}, {}, {});
  189. return;
  190. }
  191. if (url.scheme() == "about") {
  192. dbgln_if(SPAM_DEBUG, "Loading about: URL {}", url);
  193. log_success(request);
  194. HashMap<ByteString, ByteString, CaseInsensitiveStringTraits> response_headers;
  195. response_headers.set("Content-Type", "text/html; charset=UTF-8");
  196. // About version page
  197. if (url.path_segment_at_index(0) == "version") {
  198. success_callback(MUST(load_about_version_page()).bytes(), response_headers, {});
  199. return;
  200. }
  201. // Other about static HTML pages
  202. auto resource = Core::Resource::load_from_uri(MUST(String::formatted("resource://ladybird/{}.html", url.path_segment_at_index(0))));
  203. if (!resource.is_error()) {
  204. auto data = resource.value()->data();
  205. success_callback(data, response_headers, {});
  206. return;
  207. }
  208. Platform::EventLoopPlugin::the().deferred_invoke([success_callback = move(success_callback), response_headers = move(response_headers)] {
  209. success_callback(ByteString::empty().to_byte_buffer(), response_headers, {});
  210. });
  211. return;
  212. }
  213. if (url.scheme() == "data") {
  214. auto data_url_or_error = Fetch::Infrastructure::process_data_url(url);
  215. if (data_url_or_error.is_error()) {
  216. auto error_message = data_url_or_error.error().string_literal();
  217. log_failure(request, error_message);
  218. error_callback(error_message, {}, {}, {});
  219. return;
  220. }
  221. auto data_url = data_url_or_error.release_value();
  222. dbgln_if(SPAM_DEBUG, "ResourceLoader loading a data URL with mime-type: '{}', payload='{}'",
  223. data_url.mime_type,
  224. StringView(data_url.body.bytes()));
  225. HashMap<ByteString, ByteString, CaseInsensitiveStringTraits> response_headers;
  226. response_headers.set("Content-Type", data_url.mime_type.to_byte_string());
  227. log_success(request);
  228. Platform::EventLoopPlugin::the().deferred_invoke([data = move(data_url.body), response_headers = move(response_headers), success_callback = move(success_callback)] {
  229. success_callback(data, response_headers, {});
  230. });
  231. return;
  232. }
  233. if (url.scheme() == "resource") {
  234. auto resource = Core::Resource::load_from_uri(url.serialize());
  235. if (resource.is_error()) {
  236. log_failure(request, resource.error());
  237. return;
  238. }
  239. // When resource URI is a directory use file directory loader to generate response
  240. if (resource.value()->is_directory()) {
  241. respond_directory_page(request, resource.value()->file_url(), success_callback, error_callback);
  242. return;
  243. }
  244. auto data = resource.value()->data();
  245. auto response_headers = response_headers_for_file(url.serialize_path(), resource.value()->modified_time());
  246. log_success(request);
  247. success_callback(data, response_headers, {});
  248. return;
  249. }
  250. if (url.scheme() == "file") {
  251. if (request.page())
  252. m_page = request.page();
  253. if (!m_page.has_value()) {
  254. log_failure(request, "INTERNAL ERROR: No Page for request");
  255. return;
  256. }
  257. FileRequest file_request(url.serialize_path(), [this, success_callback = move(success_callback), error_callback = move(error_callback), log_success, log_failure, request, respond_directory_page](ErrorOr<i32> file_or_error) {
  258. --m_pending_loads;
  259. if (on_load_counter_change)
  260. on_load_counter_change();
  261. if (file_or_error.is_error()) {
  262. log_failure(request, file_or_error.error());
  263. if (error_callback) {
  264. auto status = file_or_error.error().code() == ENOENT ? 404u : 500u;
  265. error_callback(ByteString::formatted("{}", file_or_error.error()), status, {}, {});
  266. }
  267. return;
  268. }
  269. auto const fd = file_or_error.value();
  270. // When local file is a directory use file directory loader to generate response
  271. auto maybe_is_valid_directory = Core::Directory::is_valid_directory(fd);
  272. if (!maybe_is_valid_directory.is_error() && maybe_is_valid_directory.value()) {
  273. respond_directory_page(request, request.url(), success_callback, error_callback);
  274. return;
  275. }
  276. auto st_or_error = Core::System::fstat(fd);
  277. if (st_or_error.is_error()) {
  278. log_failure(request, st_or_error.error());
  279. if (error_callback)
  280. error_callback(ByteString::formatted("{}", st_or_error.error()), 500u, {}, {});
  281. return;
  282. }
  283. // Try to read file normally
  284. auto maybe_file = Core::File::adopt_fd(fd, Core::File::OpenMode::Read);
  285. if (maybe_file.is_error()) {
  286. log_failure(request, maybe_file.error());
  287. if (error_callback)
  288. error_callback(ByteString::formatted("{}", maybe_file.error()), 500u, {}, {});
  289. return;
  290. }
  291. auto file = maybe_file.release_value();
  292. auto maybe_data = file->read_until_eof();
  293. if (maybe_data.is_error()) {
  294. log_failure(request, maybe_data.error());
  295. if (error_callback)
  296. error_callback(ByteString::formatted("{}", maybe_data.error()), 500u, {}, {});
  297. return;
  298. }
  299. auto data = maybe_data.release_value();
  300. auto response_headers = response_headers_for_file(request.url().serialize_path(), st_or_error.value().st_mtime);
  301. log_success(request);
  302. success_callback(data, response_headers, {});
  303. });
  304. (*m_page)->client().request_file(move(file_request));
  305. ++m_pending_loads;
  306. if (on_load_counter_change)
  307. on_load_counter_change();
  308. return;
  309. }
  310. if (url.scheme() == "http" || url.scheme() == "https" || url.scheme() == "gemini") {
  311. auto proxy = ProxyMappings::the().proxy_for_url(url);
  312. HashMap<ByteString, ByteString> headers;
  313. headers.set("User-Agent", m_user_agent.to_byte_string());
  314. headers.set("Accept-Encoding", "gzip, deflate, br");
  315. for (auto& it : request.headers()) {
  316. headers.set(it.key, it.value);
  317. }
  318. auto protocol_request = m_connector->start_request(request.method(), url, headers, request.body(), proxy);
  319. if (!protocol_request) {
  320. auto start_request_failure_msg = "Failed to initiate load"sv;
  321. log_failure(request, start_request_failure_msg);
  322. if (error_callback)
  323. error_callback(start_request_failure_msg, {}, {}, {});
  324. return;
  325. }
  326. if (timeout.has_value() && timeout.value() > 0) {
  327. auto timer = Platform::Timer::create_single_shot(timeout.value(), nullptr);
  328. timer->on_timeout = [timer, protocol_request, timeout_callback = move(timeout_callback)] {
  329. protocol_request->stop();
  330. if (timeout_callback)
  331. timeout_callback();
  332. };
  333. timer->start();
  334. }
  335. m_active_requests.set(*protocol_request);
  336. auto on_buffered_request_finished = [this, success_callback = move(success_callback), error_callback = move(error_callback), log_success, log_failure, request, &protocol_request = *protocol_request](bool success, auto, auto& response_headers, auto status_code, ReadonlyBytes payload) mutable {
  337. --m_pending_loads;
  338. if (on_load_counter_change)
  339. on_load_counter_change();
  340. if (request.page()) {
  341. if (auto set_cookie = response_headers.get("Set-Cookie"); set_cookie.has_value())
  342. store_response_cookies(*request.page(), request.url(), *set_cookie);
  343. if (auto cache_control = response_headers.get("cache-control"); cache_control.has_value()) {
  344. if (cache_control.value().contains("no-store"sv)) {
  345. s_resource_cache.remove(request);
  346. }
  347. }
  348. }
  349. if (!success || (status_code.has_value() && *status_code >= 400 && *status_code <= 599 && (payload.is_empty() || !request.is_main_resource()))) {
  350. StringBuilder error_builder;
  351. if (status_code.has_value())
  352. error_builder.appendff("Load failed: {}", *status_code);
  353. else
  354. error_builder.append("Load failed"sv);
  355. log_failure(request, error_builder.string_view());
  356. if (error_callback)
  357. error_callback(error_builder.to_byte_string(), status_code, payload, response_headers);
  358. return;
  359. }
  360. log_success(request);
  361. success_callback(payload, response_headers, status_code);
  362. Platform::EventLoopPlugin::the().deferred_invoke([this, &protocol_request] {
  363. m_active_requests.remove(protocol_request);
  364. });
  365. };
  366. protocol_request->set_buffered_request_finished_callback(move(on_buffered_request_finished));
  367. protocol_request->on_certificate_requested = []() -> ResourceLoaderConnectorRequest::CertificateAndKey {
  368. return {};
  369. };
  370. ++m_pending_loads;
  371. if (on_load_counter_change)
  372. on_load_counter_change();
  373. return;
  374. }
  375. auto not_implemented_error = ByteString::formatted("Protocol not implemented: {}", url.scheme());
  376. log_failure(request, not_implemented_error);
  377. if (error_callback)
  378. error_callback(not_implemented_error, {}, {}, {});
  379. }
  380. bool ResourceLoader::is_port_blocked(int port)
  381. {
  382. int ports[] { 1, 7, 9, 11, 13, 15, 17, 19, 20, 21, 22, 23, 25, 37, 42,
  383. 43, 53, 77, 79, 87, 95, 101, 102, 103, 104, 109, 110, 111, 113,
  384. 115, 117, 119, 123, 135, 139, 143, 179, 389, 465, 512, 513, 514,
  385. 515, 526, 530, 531, 532, 540, 556, 563, 587, 601, 636, 993, 995,
  386. 2049, 3659, 4045, 6000, 6379, 6665, 6666, 6667, 6668, 6669 };
  387. for (auto blocked_port : ports)
  388. if (port == blocked_port)
  389. return true;
  390. return false;
  391. }
  392. void ResourceLoader::clear_cache()
  393. {
  394. dbgln_if(CACHE_DEBUG, "Clearing {} items from ResourceLoader cache", s_resource_cache.size());
  395. s_resource_cache.clear();
  396. }
  397. void ResourceLoader::evict_from_cache(LoadRequest const& request)
  398. {
  399. dbgln_if(CACHE_DEBUG, "Removing resource {} from cache", request.url());
  400. s_resource_cache.remove(request);
  401. }
  402. }