ResourceLoader.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312
  1. /*
  2. * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org>
  3. * Copyright (c) 2022, Dex♪ <dexes.ttp@gmail.com>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <AK/Base64.h>
  8. #include <AK/Debug.h>
  9. #include <AK/JsonObject.h>
  10. #include <LibCore/ElapsedTimer.h>
  11. #include <LibCore/EventLoop.h>
  12. #include <LibCore/File.h>
  13. #include <LibWeb/Loader/ContentFilter.h>
  14. #include <LibWeb/Loader/LoadRequest.h>
  15. #include <LibWeb/Loader/ProxyMappings.h>
  16. #include <LibWeb/Loader/Resource.h>
  17. #include <LibWeb/Loader/ResourceLoader.h>
  18. #ifdef __serenity__
  19. # include <serenity.h>
  20. #endif
  21. namespace Web {
  22. ResourceLoaderConnectorRequest::ResourceLoaderConnectorRequest() = default;
  23. ResourceLoaderConnectorRequest::~ResourceLoaderConnectorRequest() = default;
  24. ResourceLoaderConnector::ResourceLoaderConnector() = default;
  25. ResourceLoaderConnector::~ResourceLoaderConnector() = default;
  26. static RefPtr<ResourceLoader> s_resource_loader;
  27. void ResourceLoader::initialize(RefPtr<ResourceLoaderConnector> connector)
  28. {
  29. if (connector)
  30. s_resource_loader = ResourceLoader::try_create(connector.release_nonnull()).release_value_but_fixme_should_propagate_errors();
  31. }
  32. ResourceLoader& ResourceLoader::the()
  33. {
  34. if (!s_resource_loader) {
  35. dbgln("Web::ResourceLoader was not initialized");
  36. VERIFY_NOT_REACHED();
  37. }
  38. return *s_resource_loader;
  39. }
  40. ErrorOr<NonnullRefPtr<ResourceLoader>> ResourceLoader::try_create(NonnullRefPtr<ResourceLoaderConnector> connector)
  41. {
  42. return adopt_nonnull_ref_or_enomem(new (nothrow) ResourceLoader(move(connector)));
  43. }
  44. ResourceLoader::ResourceLoader(NonnullRefPtr<ResourceLoaderConnector> connector)
  45. : m_connector(move(connector))
  46. , m_user_agent(default_user_agent)
  47. {
  48. }
  49. void ResourceLoader::prefetch_dns(AK::URL const& url)
  50. {
  51. m_connector->prefetch_dns(url);
  52. }
  53. void ResourceLoader::preconnect(AK::URL const& url)
  54. {
  55. m_connector->preconnect(url);
  56. }
  57. static HashMap<LoadRequest, NonnullRefPtr<Resource>> s_resource_cache;
  58. RefPtr<Resource> ResourceLoader::load_resource(Resource::Type type, LoadRequest& request)
  59. {
  60. if (!request.is_valid())
  61. return nullptr;
  62. bool use_cache = request.url().protocol() != "file";
  63. if (use_cache) {
  64. auto it = s_resource_cache.find(request);
  65. if (it != s_resource_cache.end()) {
  66. if (it->value->type() != type) {
  67. dbgln("FIXME: Not using cached resource for {} since there's a type mismatch.", request.url());
  68. } else {
  69. dbgln_if(CACHE_DEBUG, "Reusing cached resource for: {}", request.url());
  70. return it->value;
  71. }
  72. }
  73. }
  74. auto resource = Resource::create({}, type, request);
  75. if (use_cache)
  76. s_resource_cache.set(request, resource);
  77. load(
  78. request,
  79. [=](auto data, auto& headers, auto status_code) {
  80. const_cast<Resource&>(*resource).did_load({}, data, headers, status_code);
  81. },
  82. [=](auto& error, auto status_code) {
  83. const_cast<Resource&>(*resource).did_fail({}, error, status_code);
  84. });
  85. return resource;
  86. }
  87. static String sanitized_url_for_logging(AK::URL const& url)
  88. {
  89. if (url.protocol() == "data"sv)
  90. return String::formatted("[data URL, mime-type={}, size={}]", url.data_mime_type(), url.data_payload().length());
  91. return url.to_string();
  92. }
  93. static void emit_signpost(String const& message, int id)
  94. {
  95. #ifdef __serenity__
  96. auto string_id = perf_register_string(message.characters(), message.length());
  97. perf_event(PERF_EVENT_SIGNPOST, string_id, id);
  98. #else
  99. (void)message;
  100. (void)id;
  101. #endif
  102. }
  103. static size_t resource_id = 0;
  104. void ResourceLoader::load(LoadRequest& request, Function<void(ReadonlyBytes, HashMap<String, String, CaseInsensitiveStringTraits> const& response_headers, Optional<u32> status_code)> success_callback, Function<void(String const&, Optional<u32> status_code)> error_callback)
  105. {
  106. auto& url = request.url();
  107. request.start_timer();
  108. auto id = resource_id++;
  109. auto url_for_logging = sanitized_url_for_logging(url);
  110. emit_signpost(String::formatted("Starting load: {}", url_for_logging), id);
  111. dbgln("ResourceLoader: Starting load of: \"{}\"", url_for_logging);
  112. auto const log_success = [url_for_logging, id](auto const& request) {
  113. auto load_time_ms = request.load_time().to_milliseconds();
  114. emit_signpost(String::formatted("Finished load: {}", url_for_logging), id);
  115. dbgln("ResourceLoader: Finished load of: \"{}\", Duration: {}ms", url_for_logging, load_time_ms);
  116. };
  117. auto const log_failure = [url_for_logging, id](auto const& request, auto const error_message) {
  118. auto load_time_ms = request.load_time().to_milliseconds();
  119. emit_signpost(String::formatted("Failed load: {}", url_for_logging), id);
  120. dbgln("ResourceLoader: Failed load of: \"{}\", \033[31;1mError: {}\033[0m, Duration: {}ms", url_for_logging, error_message, load_time_ms);
  121. };
  122. if (is_port_blocked(url.port_or_default())) {
  123. log_failure(request, String::formatted("The port #{} is blocked", url.port_or_default()));
  124. return;
  125. }
  126. if (ContentFilter::the().is_filtered(url)) {
  127. auto filter_message = "URL was filtered"sv;
  128. log_failure(request, filter_message);
  129. error_callback(filter_message, {});
  130. return;
  131. }
  132. if (url.protocol() == "about") {
  133. dbgln_if(SPAM_DEBUG, "Loading about: URL {}", url);
  134. log_success(request);
  135. HashMap<String, String, CaseInsensitiveStringTraits> response_headers;
  136. response_headers.set("Content-Type", "text/html; charset=UTF-8");
  137. deferred_invoke([success_callback = move(success_callback), response_headers = move(response_headers)] {
  138. success_callback(String::empty().to_byte_buffer(), response_headers, {});
  139. });
  140. return;
  141. }
  142. if (url.protocol() == "data") {
  143. dbgln_if(SPAM_DEBUG, "ResourceLoader loading a data URL with mime-type: '{}', base64={}, payload='{}'",
  144. url.data_mime_type(),
  145. url.data_payload_is_base64(),
  146. url.data_payload());
  147. ByteBuffer data;
  148. if (url.data_payload_is_base64()) {
  149. auto data_maybe = decode_base64(url.data_payload());
  150. if (data_maybe.is_error()) {
  151. auto error_message = data_maybe.error().string_literal();
  152. log_failure(request, error_message);
  153. error_callback(error_message, {});
  154. return;
  155. }
  156. data = data_maybe.value();
  157. } else {
  158. data = url.data_payload().to_byte_buffer();
  159. }
  160. log_success(request);
  161. deferred_invoke([data = move(data), success_callback = move(success_callback)] {
  162. success_callback(data, {}, {});
  163. });
  164. return;
  165. }
  166. if (url.protocol() == "file") {
  167. auto file_result = Core::File::open(url.path(), Core::OpenMode::ReadOnly);
  168. if (file_result.is_error()) {
  169. auto& error = file_result.error();
  170. log_failure(request, error);
  171. if (error_callback)
  172. error_callback(String::formatted("{}", error), error.code());
  173. return;
  174. }
  175. auto file = file_result.release_value();
  176. auto data = file->read_all();
  177. log_success(request);
  178. deferred_invoke([data = move(data), success_callback = move(success_callback)] {
  179. success_callback(data, {}, {});
  180. });
  181. return;
  182. }
  183. if (url.protocol() == "http" || url.protocol() == "https" || url.protocol() == "gemini") {
  184. auto proxy = ProxyMappings::the().proxy_for_url(url);
  185. HashMap<String, String> headers;
  186. headers.set("User-Agent", m_user_agent);
  187. headers.set("Accept-Encoding", "gzip, deflate");
  188. for (auto& it : request.headers()) {
  189. headers.set(it.key, it.value);
  190. }
  191. auto protocol_request = m_connector->start_request(request.method(), url, headers, request.body(), proxy);
  192. if (!protocol_request) {
  193. auto start_request_failure_msg = "Failed to initiate load"sv;
  194. log_failure(request, start_request_failure_msg);
  195. if (error_callback)
  196. error_callback(start_request_failure_msg, {});
  197. return;
  198. }
  199. m_active_requests.set(*protocol_request);
  200. protocol_request->on_buffered_request_finish = [this, success_callback = move(success_callback), error_callback = move(error_callback), log_success, log_failure, request, &protocol_request = *protocol_request](bool success, auto, auto& response_headers, auto status_code, ReadonlyBytes payload) {
  201. --m_pending_loads;
  202. if (on_load_counter_change)
  203. on_load_counter_change();
  204. if (!success || (status_code.has_value() && *status_code >= 400 && *status_code <= 599)) {
  205. StringBuilder error_builder;
  206. if (status_code.has_value())
  207. error_builder.appendff("Load failed: {}", *status_code);
  208. else
  209. error_builder.append("Load failed");
  210. log_failure(request, error_builder.string_view());
  211. if (error_callback)
  212. error_callback(error_builder.to_string(), {});
  213. return;
  214. }
  215. log_success(request);
  216. success_callback(payload, response_headers, status_code);
  217. deferred_invoke([this, &protocol_request] {
  218. m_active_requests.remove(protocol_request);
  219. });
  220. };
  221. protocol_request->set_should_buffer_all_input(true);
  222. protocol_request->on_certificate_requested = []() -> ResourceLoaderConnectorRequest::CertificateAndKey {
  223. return {};
  224. };
  225. ++m_pending_loads;
  226. if (on_load_counter_change)
  227. on_load_counter_change();
  228. return;
  229. }
  230. auto not_implemented_error = String::formatted("Protocol not implemented: {}", url.protocol());
  231. log_failure(request, not_implemented_error);
  232. if (error_callback)
  233. error_callback(not_implemented_error, {});
  234. }
  235. void ResourceLoader::load(const AK::URL& url, Function<void(ReadonlyBytes, HashMap<String, String, CaseInsensitiveStringTraits> const& response_headers, Optional<u32> status_code)> success_callback, Function<void(String const&, Optional<u32> status_code)> error_callback)
  236. {
  237. LoadRequest request;
  238. request.set_url(url);
  239. load(request, move(success_callback), move(error_callback));
  240. }
  241. bool ResourceLoader::is_port_blocked(int port)
  242. {
  243. int ports[] { 1, 7, 9, 11, 13, 15, 17, 19, 20, 21, 22, 23, 25, 37, 42,
  244. 43, 53, 77, 79, 87, 95, 101, 102, 103, 104, 109, 110, 111, 113,
  245. 115, 117, 119, 123, 135, 139, 143, 179, 389, 465, 512, 513, 514,
  246. 515, 526, 530, 531, 532, 540, 556, 563, 587, 601, 636, 993, 995,
  247. 2049, 3659, 4045, 6000, 6379, 6665, 6666, 6667, 6668, 6669, 9000 };
  248. for (auto blocked_port : ports)
  249. if (port == blocked_port)
  250. return true;
  251. return false;
  252. }
  253. void ResourceLoader::clear_cache()
  254. {
  255. dbgln_if(CACHE_DEBUG, "Clearing {} items from ResourceLoader cache", s_resource_cache.size());
  256. s_resource_cache.clear();
  257. }
  258. void ResourceLoader::evict_from_cache(LoadRequest const& request)
  259. {
  260. dbgln_if(CACHE_DEBUG, "Removing resource {} from cache", request.url());
  261. s_resource_cache.remove(request);
  262. }
  263. }