mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-12-15 02:40:38 +00:00
8751be09f9
In order to follow spec text to achieve this, we need to change the underlying representation of a host in AK::URL to deserialized format. Before this, we were parsing the host and then immediately serializing it again. Making that change resulted in a whole bunch of fallout. After this change, callers can access the serialized data through this concept-host-serializer. The functional end result of this change is that IPv6 hosts are now correctly serialized to be surrounded with '[' and ']'.
421 lines
16 KiB
C++
421 lines
16 KiB
C++
/*
|
|
* Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
|
|
* Copyright (c) 2022, Thomas Keppler <serenity@tkeppler.de>
|
|
*
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
*/
|
|
|
|
#include <AK/Base64.h>
|
|
#include <AK/CharacterTypes.h>
|
|
#include <AK/GenericLexer.h>
|
|
#include <AK/LexicalPath.h>
|
|
#include <AK/MaybeOwned.h>
|
|
#include <AK/NumberFormat.h>
|
|
#include <AK/String.h>
|
|
#include <AK/URL.h>
|
|
#include <LibCore/ArgsParser.h>
|
|
#include <LibCore/EventLoop.h>
|
|
#include <LibCore/System.h>
|
|
#include <LibFileSystem/FileSystem.h>
|
|
#include <LibHTTP/HttpResponse.h>
|
|
#include <LibMain/Main.h>
|
|
#include <LibProtocol/Request.h>
|
|
#include <LibProtocol/RequestClient.h>
|
|
#include <ctype.h>
|
|
#include <stdio.h>
|
|
|
|
// FIXME: Move this somewhere else when it's needed (e.g. in the Browser)
|
|
class ContentDispositionParser {
|
|
public:
|
|
ContentDispositionParser(StringView value)
|
|
{
|
|
GenericLexer lexer(value);
|
|
|
|
lexer.ignore_while(is_ascii_space);
|
|
|
|
if (lexer.consume_specific("inline")) {
|
|
m_kind = Kind::Inline;
|
|
if (!lexer.is_eof())
|
|
m_might_be_wrong = true;
|
|
return;
|
|
}
|
|
|
|
if (lexer.consume_specific("attachment")) {
|
|
m_kind = Kind::Attachment;
|
|
if (lexer.consume_specific(";")) {
|
|
lexer.ignore_while(is_ascii_space);
|
|
if (lexer.consume_specific("filename=")) {
|
|
// RFC 2183: "A short (length <= 78 characters)
|
|
// parameter value containing only non-`tspecials' characters SHOULD be
|
|
// represented as a single `token'."
|
|
// Some people seem to take this as generic advice of "if it doesn't have special characters,
|
|
// it's safe to specify as a single token"
|
|
// So let's just be as lenient as possible.
|
|
if (lexer.next_is('"'))
|
|
m_filename = lexer.consume_quoted_string();
|
|
else
|
|
m_filename = lexer.consume_until(is_any_of("()<>@,;:\\\"/[]?= "sv));
|
|
} else {
|
|
m_might_be_wrong = true;
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
|
|
if (lexer.consume_specific("form-data")) {
|
|
m_kind = Kind::FormData;
|
|
while (lexer.consume_specific(";")) {
|
|
lexer.ignore_while(is_ascii_space);
|
|
if (lexer.consume_specific("name=")) {
|
|
m_name = lexer.consume_quoted_string();
|
|
} else if (lexer.consume_specific("filename=")) {
|
|
if (lexer.next_is('"'))
|
|
m_filename = lexer.consume_quoted_string();
|
|
else
|
|
m_filename = lexer.consume_until(is_any_of("()<>@,;:\\\"/[]?= "sv));
|
|
} else {
|
|
m_might_be_wrong = true;
|
|
}
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
// FIXME: Support 'filename*'
|
|
m_might_be_wrong = true;
|
|
}
|
|
|
|
enum class Kind {
|
|
Inline,
|
|
Attachment,
|
|
FormData,
|
|
};
|
|
|
|
StringView filename() const { return m_filename; }
|
|
StringView name() const { return m_name; }
|
|
Kind kind() const { return m_kind; }
|
|
bool might_be_wrong() const { return m_might_be_wrong; }
|
|
|
|
private:
|
|
StringView m_filename;
|
|
StringView m_name;
|
|
Kind m_kind { Kind::Inline };
|
|
bool m_might_be_wrong { false };
|
|
};
|
|
|
|
/// Wraps a stream to silently ignore writes when the condition isn't true.
|
|
template<typename ConditionT>
|
|
class ConditionalOutputStream final : public Stream {
|
|
public:
|
|
ConditionalOutputStream(ConditionT&& condition, MaybeOwned<Stream> stream)
|
|
: m_stream(move(stream))
|
|
, m_condition(condition)
|
|
{
|
|
}
|
|
|
|
virtual ErrorOr<Bytes> read_some(Bytes) override
|
|
{
|
|
return Error::from_errno(EBADF);
|
|
}
|
|
|
|
virtual ErrorOr<size_t> write_some(ReadonlyBytes bytes) override
|
|
{
|
|
// Pretend that we wrote the whole buffer if the condition is untrue.
|
|
if (!m_condition())
|
|
return bytes.size();
|
|
|
|
return m_stream->write_some(bytes);
|
|
}
|
|
|
|
virtual bool is_eof() const override
|
|
{
|
|
return true;
|
|
}
|
|
|
|
virtual bool is_open() const override
|
|
{
|
|
return m_stream->is_open();
|
|
}
|
|
|
|
virtual void close() override
|
|
{
|
|
}
|
|
|
|
private:
|
|
MaybeOwned<Stream> m_stream;
|
|
ConditionT m_condition;
|
|
};
|
|
|
|
ErrorOr<int> serenity_main(Main::Arguments arguments)
|
|
{
|
|
StringView url_str;
|
|
bool save_at_provided_name = false;
|
|
bool should_follow_url = false;
|
|
bool verbose_output = false;
|
|
StringView data;
|
|
StringView proxy_spec;
|
|
DeprecatedString method = "GET";
|
|
StringView method_override;
|
|
HashMap<DeprecatedString, DeprecatedString, CaseInsensitiveStringTraits> request_headers;
|
|
String credentials;
|
|
|
|
Core::ArgsParser args_parser;
|
|
args_parser.set_general_help(
|
|
"Request a file from an arbitrary URL. This command uses RequestServer, "
|
|
"and thus supports at least http, https, and gemini.");
|
|
args_parser.add_option(save_at_provided_name, "Write to a file named as the remote file", nullptr, 'O');
|
|
args_parser.add_option(data, "(HTTP only) Send the provided data via an HTTP POST request", "data", 'd', "data");
|
|
args_parser.add_option(method_override, "(HTTP only) HTTP method to use for the request (eg, GET, POST, etc)", "method", 'm', "method");
|
|
args_parser.add_option(should_follow_url, "(HTTP only) Follow the Location header if a 3xx status is encountered", "follow", 'l');
|
|
args_parser.add_option(Core::ArgsParser::Option {
|
|
.argument_mode = Core::ArgsParser::OptionArgumentMode::Required,
|
|
.help_string = "Add a header entry to the request",
|
|
.long_name = "header",
|
|
.short_name = 'H',
|
|
.value_name = "key:value",
|
|
.accept_value = [&](StringView header) {
|
|
auto split = header.find(':');
|
|
if (!split.has_value())
|
|
return false;
|
|
request_headers.set(header.substring_view(0, split.value()), header.substring_view(split.value() + 1));
|
|
return true;
|
|
} });
|
|
args_parser.add_option(Core::ArgsParser::Option {
|
|
.argument_mode = Core::ArgsParser::OptionArgumentMode::Required,
|
|
.help_string = "(HTTP only) Provide basic authentication credentials",
|
|
.long_name = "auth",
|
|
.short_name = 'u',
|
|
.value_name = "username:password",
|
|
.accept_value = [&](StringView input) {
|
|
if (!input.contains(':'))
|
|
return false;
|
|
|
|
// NOTE: Input is explicitly not trimmed, but instead taken in raw;
|
|
// Space prepended usernames and appended passwords might be legal in the user's context.
|
|
auto maybe_credentials = String::from_utf8(input);
|
|
if (maybe_credentials.is_error())
|
|
return false;
|
|
|
|
credentials = maybe_credentials.release_value();
|
|
return true;
|
|
} });
|
|
args_parser.add_option(proxy_spec, "Specify a proxy server to use for this request (proto://ip:port)", "proxy", 'p', "proxy");
|
|
args_parser.add_option(verbose_output, "(HTTP only) Log request and response metadata", "verbose", 'v');
|
|
args_parser.add_positional_argument(url_str, "URL to download from", "url");
|
|
args_parser.parse(arguments);
|
|
|
|
// If writing to a file was requested, we'll open a new file descriptor with the same number later.
|
|
// Until then, we just clone the stdout file descriptor, because we shouldn't be reopening the actual stdout.
|
|
int const output_fd = TRY(Core::System::dup(STDOUT_FILENO));
|
|
|
|
if (!method_override.is_empty()) {
|
|
method = method_override;
|
|
} else if (!data.is_empty()) {
|
|
method = "POST";
|
|
// FIXME: Content-Type?
|
|
}
|
|
|
|
URL url(url_str);
|
|
if (!url.is_valid()) {
|
|
warnln("'{}' is not a valid URL", url_str);
|
|
return 1;
|
|
}
|
|
|
|
bool const is_http_url = url.scheme().is_one_of("http"sv, "https"sv);
|
|
|
|
Core::ProxyData proxy_data {};
|
|
if (!proxy_spec.is_empty())
|
|
proxy_data = TRY(Core::ProxyData::parse_url(proxy_spec));
|
|
|
|
Core::EventLoop loop;
|
|
bool received_actual_headers = false;
|
|
bool should_save_stream_data = false;
|
|
bool following_url = false;
|
|
|
|
u64 previous_downloaded_size = 0;
|
|
u64 current_bytes_per_second_speed = 0;
|
|
u32 const report_time_in_ms = 100;
|
|
u32 const speed_update_time_in_ms = 1000;
|
|
|
|
auto previous_report_time = MonotonicTime::now();
|
|
auto previous_speed_update_time = previous_report_time;
|
|
|
|
RefPtr<Protocol::Request> request;
|
|
auto protocol_client = TRY(Protocol::RequestClient::try_create());
|
|
auto output_stream = ConditionalOutputStream { [&] { return should_save_stream_data; }, TRY(Core::File::adopt_fd(output_fd, Core::File::OpenMode::Write)) };
|
|
|
|
// https://httpwg.org/specs/rfc9110.html#authentication
|
|
auto const has_credentials = !credentials.is_empty();
|
|
auto const has_manual_authorization_header = request_headers.contains("Authorization");
|
|
if (is_http_url && has_credentials && !has_manual_authorization_header) {
|
|
// 11.2. Authentication Parameters
|
|
// The authentication scheme is followed by additional information necessary for achieving authentication via
|
|
// that scheme as (...) or a single sequence of characters capable of holding base64-encoded information.
|
|
auto const encoded_credentials = TRY(encode_base64(credentials.bytes()));
|
|
auto const authorization = TRY(String::formatted("Basic {}", encoded_credentials));
|
|
request_headers.set("Authorization", authorization.to_deprecated_string());
|
|
} else {
|
|
if (is_http_url && has_credentials && has_manual_authorization_header)
|
|
warnln("* Skipping encoding provided authorization, manual header present.");
|
|
if (!is_http_url && has_credentials)
|
|
warnln("* Skipping adding Authorization header, request was not for the HTTP protocol.");
|
|
}
|
|
|
|
auto update_progress = [&](Optional<u64> maybe_total_size, u64 downloaded_size, bool force_update) {
|
|
auto current_time = MonotonicTime::now();
|
|
if (!force_update && (current_time - previous_report_time).to_milliseconds() < report_time_in_ms)
|
|
return;
|
|
|
|
previous_report_time = current_time;
|
|
warn("\r\033[2K");
|
|
if (maybe_total_size.has_value()) {
|
|
warn("\033]9;{};{};\033\\", downloaded_size, maybe_total_size.value());
|
|
warn("Download progress: {} / {}", human_readable_size(downloaded_size), human_readable_size(maybe_total_size.value()));
|
|
} else {
|
|
warn("Download progress: {} / ???", human_readable_size(downloaded_size));
|
|
}
|
|
|
|
auto time_diff_ms = (current_time - previous_speed_update_time).to_milliseconds();
|
|
if ((force_update && previous_downloaded_size == 0) || time_diff_ms > speed_update_time_in_ms) {
|
|
auto size_diff = downloaded_size - previous_downloaded_size;
|
|
previous_speed_update_time = current_time;
|
|
previous_downloaded_size = downloaded_size;
|
|
current_bytes_per_second_speed = size_diff * 1000 / time_diff_ms;
|
|
}
|
|
|
|
if (previous_downloaded_size == 0)
|
|
warn(" at --.-B/s");
|
|
else
|
|
warn(" at {}/s", human_readable_size(current_bytes_per_second_speed));
|
|
};
|
|
|
|
Function<void()> setup_request = [&] {
|
|
if (!request) {
|
|
warnln("Failed to start request for '{}'", url_str);
|
|
exit(1);
|
|
}
|
|
|
|
if (verbose_output && is_http_url) {
|
|
warnln("* Setting up request");
|
|
warnln("> Method={}, URL={}", method, url);
|
|
for (auto const& header : request_headers) {
|
|
warnln("> {}: {}", header.key, header.value);
|
|
}
|
|
}
|
|
|
|
request->on_progress = [&](Optional<u64> maybe_total_size, u64 downloaded_size) {
|
|
update_progress(move(maybe_total_size), downloaded_size, false);
|
|
};
|
|
request->on_headers_received = [&](auto& response_headers, auto status_code) {
|
|
if (received_actual_headers)
|
|
return;
|
|
dbgln("Received headers! response code = {}", status_code.value_or(0));
|
|
received_actual_headers = true; // And not trailers!
|
|
should_save_stream_data = true;
|
|
|
|
if (verbose_output && is_http_url) {
|
|
warnln("* Received headers");
|
|
auto const value = status_code.value_or(0);
|
|
auto const reason_phrase = (value != 0)
|
|
? HTTP::HttpResponse::reason_phrase_for_code(value)
|
|
: "UNKNOWN"sv;
|
|
warnln("< Code={}, Reason={}", value, reason_phrase);
|
|
for (auto const& header : response_headers) {
|
|
warnln("< {}: {}", header.key, header.value);
|
|
}
|
|
}
|
|
|
|
if (!following_url && save_at_provided_name) {
|
|
DeprecatedString output_name;
|
|
if (auto content_disposition = response_headers.get("Content-Disposition"); content_disposition.has_value()) {
|
|
auto& value = content_disposition.value();
|
|
ContentDispositionParser parser(value);
|
|
output_name = parser.filename();
|
|
}
|
|
|
|
if (output_name.is_empty())
|
|
output_name = url.serialize_path();
|
|
|
|
LexicalPath path { output_name };
|
|
output_name = path.basename();
|
|
|
|
// The URL didn't have a name component, e.g. 'serenityos.org'
|
|
if (output_name.is_empty() || output_name == "/") {
|
|
int i = -1;
|
|
do {
|
|
output_name = url.serialized_host().release_value_but_fixme_should_propagate_errors().to_deprecated_string();
|
|
if (i > -1)
|
|
output_name = DeprecatedString::formatted("{}.{}", output_name, i);
|
|
++i;
|
|
} while (FileSystem::exists(output_name));
|
|
}
|
|
|
|
int target_file_fd = open(output_name.characters(), O_WRONLY | O_CREAT | O_TRUNC, 0644);
|
|
if (target_file_fd < 0) {
|
|
perror("target file open");
|
|
loop.quit(1);
|
|
return;
|
|
}
|
|
|
|
if (dup2(target_file_fd, output_fd) < 0) {
|
|
perror("target file dup2");
|
|
loop.quit(1);
|
|
return;
|
|
}
|
|
|
|
if (close(target_file_fd) < 0) {
|
|
perror("target file close");
|
|
loop.quit(1);
|
|
return;
|
|
}
|
|
}
|
|
|
|
auto status_code_value = status_code.value_or(0);
|
|
if (should_follow_url && status_code_value >= 300 && status_code_value < 400) {
|
|
if (auto location = response_headers.get("Location"); location.has_value()) {
|
|
auto was_following_url = following_url;
|
|
following_url = true;
|
|
received_actual_headers = false;
|
|
should_save_stream_data = false;
|
|
request->on_finish = nullptr;
|
|
request->on_headers_received = nullptr;
|
|
request->on_progress = nullptr;
|
|
request->stop();
|
|
|
|
Core::deferred_invoke([&, was_following_url, url = location.value()] {
|
|
warnln("{}Following to {}", was_following_url ? "" : "\n", url);
|
|
request = protocol_client->start_request(method, url, request_headers, ReadonlyBytes {}, proxy_data);
|
|
setup_request();
|
|
});
|
|
}
|
|
} else {
|
|
following_url = false;
|
|
|
|
if (status_code_value >= 400)
|
|
warnln("Request returned error {}", status_code_value);
|
|
}
|
|
};
|
|
request->on_finish = [&](bool success, u64 total_size) {
|
|
if (following_url)
|
|
return;
|
|
|
|
if (success)
|
|
update_progress(total_size, total_size, true);
|
|
|
|
warn("\033]9;-1;\033\\");
|
|
warnln();
|
|
if (!success)
|
|
warnln("Request failed :(");
|
|
loop.quit(0);
|
|
};
|
|
|
|
request->stream_into(output_stream);
|
|
};
|
|
|
|
request = protocol_client->start_request(method, url, request_headers, data.bytes(), proxy_data);
|
|
setup_request();
|
|
|
|
dbgln("started request with id {}", request->id());
|
|
|
|
return loop.exec();
|
|
}
|