ResourceLoader.cpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400
  1. /*
  2. * Copyright (c) 2018-2023, Andreas Kling <kling@serenityos.org>
  3. * Copyright (c) 2022, Dex♪ <dexes.ttp@gmail.com>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <AK/Base64.h>
  8. #include <AK/Debug.h>
  9. #include <AK/JsonObject.h>
  10. #include <LibCore/ElapsedTimer.h>
  11. #include <LibCore/MimeData.h>
  12. #include <LibWeb/Cookie/Cookie.h>
  13. #include <LibWeb/Cookie/ParsedCookie.h>
  14. #include <LibWeb/Loader/ContentFilter.h>
  15. #include <LibWeb/Loader/LoadRequest.h>
  16. #include <LibWeb/Loader/ProxyMappings.h>
  17. #include <LibWeb/Loader/Resource.h>
  18. #include <LibWeb/Loader/ResourceLoader.h>
  19. #include <LibWeb/Platform/EventLoopPlugin.h>
  20. #include <LibWeb/Platform/Timer.h>
  21. #ifdef AK_OS_SERENITY
  22. # include <serenity.h>
  23. #endif
  24. namespace Web {
  25. ResourceLoaderConnectorRequest::ResourceLoaderConnectorRequest() = default;
  26. ResourceLoaderConnectorRequest::~ResourceLoaderConnectorRequest() = default;
  27. ResourceLoaderConnector::ResourceLoaderConnector() = default;
  28. ResourceLoaderConnector::~ResourceLoaderConnector() = default;
  29. static RefPtr<ResourceLoader> s_resource_loader;
  30. void ResourceLoader::initialize(RefPtr<ResourceLoaderConnector> connector)
  31. {
  32. if (connector)
  33. s_resource_loader = ResourceLoader::try_create(connector.release_nonnull()).release_value_but_fixme_should_propagate_errors();
  34. }
  35. ResourceLoader& ResourceLoader::the()
  36. {
  37. if (!s_resource_loader) {
  38. dbgln("Web::ResourceLoader was not initialized");
  39. VERIFY_NOT_REACHED();
  40. }
  41. return *s_resource_loader;
  42. }
  43. ErrorOr<NonnullRefPtr<ResourceLoader>> ResourceLoader::try_create(NonnullRefPtr<ResourceLoaderConnector> connector)
  44. {
  45. return adopt_nonnull_ref_or_enomem(new (nothrow) ResourceLoader(move(connector)));
  46. }
  47. ResourceLoader::ResourceLoader(NonnullRefPtr<ResourceLoaderConnector> connector)
  48. : m_connector(move(connector))
  49. , m_user_agent(default_user_agent)
  50. {
  51. }
  52. void ResourceLoader::prefetch_dns(AK::URL const& url)
  53. {
  54. if (ContentFilter::the().is_filtered(url)) {
  55. dbgln("ResourceLoader: Refusing to prefetch DNS for '{}': \033[31;1mURL was filtered\033[0m", url);
  56. return;
  57. }
  58. m_connector->prefetch_dns(url);
  59. }
  60. void ResourceLoader::preconnect(AK::URL const& url)
  61. {
  62. if (ContentFilter::the().is_filtered(url)) {
  63. dbgln("ResourceLoader: Refusing to pre-connect to '{}': \033[31;1mURL was filtered\033[0m", url);
  64. return;
  65. }
  66. m_connector->preconnect(url);
  67. }
  68. static HashMap<LoadRequest, NonnullRefPtr<Resource>> s_resource_cache;
  69. RefPtr<Resource> ResourceLoader::load_resource(Resource::Type type, LoadRequest& request)
  70. {
  71. if (!request.is_valid())
  72. return nullptr;
  73. bool use_cache = request.url().scheme() != "file";
  74. if (use_cache) {
  75. auto it = s_resource_cache.find(request);
  76. if (it != s_resource_cache.end()) {
  77. if (it->value->type() != type) {
  78. dbgln("FIXME: Not using cached resource for {} since there's a type mismatch.", request.url());
  79. } else {
  80. dbgln_if(CACHE_DEBUG, "Reusing cached resource for: {}", request.url());
  81. return it->value;
  82. }
  83. }
  84. }
  85. auto resource = Resource::create({}, type, request);
  86. if (use_cache)
  87. s_resource_cache.set(request, resource);
  88. load(
  89. request,
  90. [=](auto data, auto& headers, auto status_code) {
  91. const_cast<Resource&>(*resource).did_load({}, data, headers, status_code);
  92. },
  93. [=](auto& error, auto status_code) {
  94. const_cast<Resource&>(*resource).did_fail({}, error, status_code);
  95. });
  96. return resource;
  97. }
  98. static DeprecatedString sanitized_url_for_logging(AK::URL const& url)
  99. {
  100. if (url.scheme() == "data"sv)
  101. return DeprecatedString::formatted("[data URL, mime-type={}, size={}]", url.data_mime_type(), url.data_payload().length());
  102. return url.to_deprecated_string();
  103. }
  104. static void emit_signpost(DeprecatedString const& message, int id)
  105. {
  106. #ifdef AK_OS_SERENITY
  107. auto string_id = perf_register_string(message.characters(), message.length());
  108. perf_event(PERF_EVENT_SIGNPOST, string_id, id);
  109. #else
  110. (void)message;
  111. (void)id;
  112. #endif
  113. }
  114. static void store_response_cookies(Page& page, AK::URL const& url, DeprecatedString const& cookies)
  115. {
  116. auto set_cookie_json_value = MUST(JsonValue::from_string(cookies));
  117. VERIFY(set_cookie_json_value.type() == JsonValue::Type::Array);
  118. for (auto const& set_cookie_entry : set_cookie_json_value.as_array().values()) {
  119. VERIFY(set_cookie_entry.type() == JsonValue::Type::String);
  120. auto cookie = Cookie::parse_cookie(set_cookie_entry.as_string());
  121. if (!cookie.has_value())
  122. continue;
  123. page.client().page_did_set_cookie(url, cookie.value(), Cookie::Source::Http); // FIXME: Determine cookie source correctly
  124. }
  125. }
  126. static size_t resource_id = 0;
  127. void ResourceLoader::load(LoadRequest& request, Function<void(ReadonlyBytes, HashMap<DeprecatedString, DeprecatedString, CaseInsensitiveStringTraits> const& response_headers, Optional<u32> status_code)> success_callback, Function<void(DeprecatedString const&, Optional<u32> status_code)> error_callback, Optional<u32> timeout, Function<void()> timeout_callback)
  128. {
  129. auto& url = request.url();
  130. request.start_timer();
  131. auto id = resource_id++;
  132. auto url_for_logging = sanitized_url_for_logging(url);
  133. emit_signpost(DeprecatedString::formatted("Starting load: {}", url_for_logging), id);
  134. dbgln("ResourceLoader: Starting load of: \"{}\"", url_for_logging);
  135. auto const log_success = [url_for_logging, id](auto const& request) {
  136. auto load_time_ms = request.load_time().to_milliseconds();
  137. emit_signpost(DeprecatedString::formatted("Finished load: {}", url_for_logging), id);
  138. dbgln("ResourceLoader: Finished load of: \"{}\", Duration: {}ms", url_for_logging, load_time_ms);
  139. };
  140. auto const log_failure = [url_for_logging, id](auto const& request, auto const& error_message) {
  141. auto load_time_ms = request.load_time().to_milliseconds();
  142. emit_signpost(DeprecatedString::formatted("Failed load: {}", url_for_logging), id);
  143. dbgln("ResourceLoader: Failed load of: \"{}\", \033[31;1mError: {}\033[0m, Duration: {}ms", url_for_logging, error_message, load_time_ms);
  144. };
  145. if (is_port_blocked(url.port_or_default())) {
  146. log_failure(request, DeprecatedString::formatted("The port #{} is blocked", url.port_or_default()));
  147. return;
  148. }
  149. if (ContentFilter::the().is_filtered(url)) {
  150. auto filter_message = "URL was filtered"sv;
  151. log_failure(request, filter_message);
  152. error_callback(filter_message, {});
  153. return;
  154. }
  155. if (url.scheme() == "about") {
  156. dbgln_if(SPAM_DEBUG, "Loading about: URL {}", url);
  157. log_success(request);
  158. HashMap<DeprecatedString, DeprecatedString, CaseInsensitiveStringTraits> response_headers;
  159. response_headers.set("Content-Type", "text/html; charset=UTF-8");
  160. Platform::EventLoopPlugin::the().deferred_invoke([success_callback = move(success_callback), response_headers = move(response_headers)] {
  161. success_callback(DeprecatedString::empty().to_byte_buffer(), response_headers, {});
  162. });
  163. return;
  164. }
  165. if (url.scheme() == "data") {
  166. dbgln_if(SPAM_DEBUG, "ResourceLoader loading a data URL with mime-type: '{}', base64={}, payload='{}'",
  167. url.data_mime_type(),
  168. url.data_payload_is_base64(),
  169. url.data_payload());
  170. ByteBuffer data;
  171. if (url.data_payload_is_base64()) {
  172. auto data_maybe = decode_base64(url.data_payload());
  173. if (data_maybe.is_error()) {
  174. auto error_message = data_maybe.error().string_literal();
  175. log_failure(request, error_message);
  176. error_callback(error_message, {});
  177. return;
  178. }
  179. data = data_maybe.value();
  180. } else {
  181. data = url.data_payload().to_byte_buffer();
  182. }
  183. log_success(request);
  184. Platform::EventLoopPlugin::the().deferred_invoke([data = move(data), success_callback = move(success_callback)] {
  185. success_callback(data, {}, {});
  186. });
  187. return;
  188. }
  189. if (url.scheme() == "file") {
  190. if (request.page().has_value())
  191. m_page = request.page().value();
  192. if (!m_page.has_value())
  193. return;
  194. FileRequest file_request(url.serialize_path(), [this, success_callback = move(success_callback), error_callback = move(error_callback), log_success, log_failure, request](ErrorOr<i32> file_or_error) {
  195. --m_pending_loads;
  196. if (on_load_counter_change)
  197. on_load_counter_change();
  198. if (file_or_error.is_error()) {
  199. log_failure(request, file_or_error.error());
  200. if (error_callback) {
  201. auto status = file_or_error.error().code() == ENOENT ? 404u : 500u;
  202. error_callback(DeprecatedString::formatted("{}", file_or_error.error()), status);
  203. }
  204. return;
  205. }
  206. auto const fd = file_or_error.value();
  207. auto maybe_file = Core::File::adopt_fd(fd, Core::File::OpenMode::Read);
  208. if (maybe_file.is_error()) {
  209. log_failure(request, maybe_file.error());
  210. if (error_callback)
  211. error_callback(DeprecatedString::formatted("{}", maybe_file.error()), 500u);
  212. return;
  213. }
  214. auto file = maybe_file.release_value();
  215. auto maybe_data = file->read_until_eof();
  216. if (maybe_data.is_error()) {
  217. log_failure(request, maybe_data.error());
  218. if (error_callback)
  219. error_callback(DeprecatedString::formatted("{}", maybe_data.error()), 500u);
  220. return;
  221. }
  222. auto data = maybe_data.release_value();
  223. log_success(request);
  224. // NOTE: For file:// URLs, we have to guess the MIME type, since there's no HTTP header to tell us what this is.
  225. // We insert a fake Content-Type header here, so that clients can use it to learn the MIME type.
  226. HashMap<DeprecatedString, DeprecatedString, CaseInsensitiveStringTraits> response_headers;
  227. auto mime_type = Core::guess_mime_type_based_on_filename(request.url().serialize_path());
  228. response_headers.set("Content-Type"sv, mime_type);
  229. success_callback(data, response_headers, {});
  230. });
  231. m_page->client().request_file(move(file_request));
  232. ++m_pending_loads;
  233. if (on_load_counter_change)
  234. on_load_counter_change();
  235. return;
  236. }
  237. if (url.scheme() == "http" || url.scheme() == "https" || url.scheme() == "gemini") {
  238. auto proxy = ProxyMappings::the().proxy_for_url(url);
  239. HashMap<DeprecatedString, DeprecatedString> headers;
  240. headers.set("User-Agent", m_user_agent);
  241. headers.set("Accept-Encoding", "gzip, deflate, br");
  242. for (auto& it : request.headers()) {
  243. headers.set(it.key, it.value);
  244. }
  245. auto protocol_request = m_connector->start_request(request.method(), url, headers, request.body(), proxy);
  246. if (!protocol_request) {
  247. auto start_request_failure_msg = "Failed to initiate load"sv;
  248. log_failure(request, start_request_failure_msg);
  249. if (error_callback)
  250. error_callback(start_request_failure_msg, {});
  251. return;
  252. }
  253. if (timeout.has_value() && timeout.value() > 0) {
  254. auto timer = Platform::Timer::create_single_shot(timeout.value(), nullptr);
  255. timer->on_timeout = [timer, protocol_request, timeout_callback = move(timeout_callback)] {
  256. protocol_request->stop();
  257. if (timeout_callback)
  258. timeout_callback();
  259. };
  260. timer->start();
  261. }
  262. m_active_requests.set(*protocol_request);
  263. protocol_request->on_buffered_request_finish = [this, success_callback = move(success_callback), error_callback = move(error_callback), log_success, log_failure, request, &protocol_request = *protocol_request](bool success, auto, auto& response_headers, auto status_code, ReadonlyBytes payload) mutable {
  264. --m_pending_loads;
  265. if (on_load_counter_change)
  266. on_load_counter_change();
  267. if (request.page().has_value()) {
  268. if (auto set_cookie = response_headers.get("Set-Cookie"); set_cookie.has_value())
  269. store_response_cookies(request.page().value(), request.url(), *set_cookie);
  270. }
  271. if (!success || (status_code.has_value() && *status_code >= 400 && *status_code <= 599 && (payload.is_empty() || !request.is_main_resource()))) {
  272. StringBuilder error_builder;
  273. if (status_code.has_value())
  274. error_builder.appendff("Load failed: {}", *status_code);
  275. else
  276. error_builder.append("Load failed"sv);
  277. log_failure(request, error_builder.string_view());
  278. if (error_callback)
  279. error_callback(error_builder.to_deprecated_string(), status_code);
  280. return;
  281. }
  282. log_success(request);
  283. success_callback(payload, response_headers, status_code);
  284. Platform::EventLoopPlugin::the().deferred_invoke([this, &protocol_request] {
  285. m_active_requests.remove(protocol_request);
  286. });
  287. };
  288. protocol_request->set_should_buffer_all_input(true);
  289. protocol_request->on_certificate_requested = []() -> ResourceLoaderConnectorRequest::CertificateAndKey {
  290. return {};
  291. };
  292. ++m_pending_loads;
  293. if (on_load_counter_change)
  294. on_load_counter_change();
  295. return;
  296. }
  297. auto not_implemented_error = DeprecatedString::formatted("Protocol not implemented: {}", url.scheme());
  298. log_failure(request, not_implemented_error);
  299. if (error_callback)
  300. error_callback(not_implemented_error, {});
  301. }
  302. void ResourceLoader::load(const AK::URL& url, Function<void(ReadonlyBytes, HashMap<DeprecatedString, DeprecatedString, CaseInsensitiveStringTraits> const& response_headers, Optional<u32> status_code)> success_callback, Function<void(DeprecatedString const&, Optional<u32> status_code)> error_callback, Optional<u32> timeout, Function<void()> timeout_callback)
  303. {
  304. LoadRequest request;
  305. request.set_url(url);
  306. load(request, move(success_callback), move(error_callback), timeout, move(timeout_callback));
  307. }
  308. bool ResourceLoader::is_port_blocked(int port)
  309. {
  310. int ports[] { 1, 7, 9, 11, 13, 15, 17, 19, 20, 21, 22, 23, 25, 37, 42,
  311. 43, 53, 77, 79, 87, 95, 101, 102, 103, 104, 109, 110, 111, 113,
  312. 115, 117, 119, 123, 135, 139, 143, 179, 389, 465, 512, 513, 514,
  313. 515, 526, 530, 531, 532, 540, 556, 563, 587, 601, 636, 993, 995,
  314. 2049, 3659, 4045, 6000, 6379, 6665, 6666, 6667, 6668, 6669, 9000 };
  315. for (auto blocked_port : ports)
  316. if (port == blocked_port)
  317. return true;
  318. return false;
  319. }
  320. void ResourceLoader::clear_cache()
  321. {
  322. dbgln_if(CACHE_DEBUG, "Clearing {} items from ResourceLoader cache", s_resource_cache.size());
  323. s_resource_cache.clear();
  324. }
  325. void ResourceLoader::evict_from_cache(LoadRequest const& request)
  326. {
  327. dbgln_if(CACHE_DEBUG, "Removing resource {} from cache", request.url());
  328. s_resource_cache.remove(request);
  329. }
  330. }