2020-01-18 08:38:21 +00:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions are met:
|
|
|
|
*
|
|
|
|
* 1. Redistributions of source code must retain the above copyright notice, this
|
|
|
|
* list of conditions and the following disclaimer.
|
|
|
|
*
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
|
|
|
* this list of conditions and the following disclaimer in the documentation
|
|
|
|
* and/or other materials provided with the distribution.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
|
|
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
|
|
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
|
|
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
|
|
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
2019-11-18 21:04:39 +00:00
|
|
|
#include <AK/FileSystemPath.h>
|
2019-08-10 15:27:56 +00:00
|
|
|
#include <AK/StringBuilder.h>
|
|
|
|
#include <AK/URL.h>
|
|
|
|
|
|
|
|
namespace AK {
|
|
|
|
|
|
|
|
static inline bool is_valid_protocol_character(char ch)
|
|
|
|
{
|
|
|
|
return ch >= 'a' && ch <= 'z';
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool is_valid_hostname_character(char ch)
|
|
|
|
{
|
|
|
|
return ch && ch != '/' && ch != ':';
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool is_digit(char ch)
|
|
|
|
{
|
|
|
|
return ch >= '0' && ch <= '9';
|
|
|
|
}
|
|
|
|
|
|
|
|
bool URL::parse(const StringView& string)
|
|
|
|
{
|
|
|
|
enum class State {
|
|
|
|
InProtocol,
|
|
|
|
InHostname,
|
|
|
|
InPort,
|
|
|
|
InPath,
|
2020-04-11 22:38:28 +00:00
|
|
|
InQuery,
|
|
|
|
InFragment,
|
2020-04-26 20:48:54 +00:00
|
|
|
InDataMimeType,
|
|
|
|
InDataPayload,
|
2019-08-10 15:27:56 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
Vector<char, 256> buffer;
|
|
|
|
State state { State::InProtocol };
|
|
|
|
|
2019-12-09 16:45:40 +00:00
|
|
|
size_t index = 0;
|
2019-08-10 15:27:56 +00:00
|
|
|
|
|
|
|
auto peek = [&] {
|
|
|
|
if (index >= string.length())
|
|
|
|
return '\0';
|
|
|
|
return string[index];
|
|
|
|
};
|
|
|
|
|
|
|
|
auto consume = [&] {
|
|
|
|
if (index >= string.length())
|
|
|
|
return '\0';
|
|
|
|
return string[index++];
|
|
|
|
};
|
|
|
|
|
|
|
|
while (index < string.length()) {
|
|
|
|
switch (state) {
|
2020-04-26 20:48:54 +00:00
|
|
|
case State::InProtocol: {
|
2019-08-10 15:27:56 +00:00
|
|
|
if (is_valid_protocol_character(peek())) {
|
|
|
|
buffer.append(consume());
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (consume() != ':')
|
|
|
|
return false;
|
2020-04-26 20:48:54 +00:00
|
|
|
|
|
|
|
m_protocol = String::copy(buffer);
|
|
|
|
|
|
|
|
if (m_protocol == "data") {
|
|
|
|
buffer.clear();
|
|
|
|
state = State::InDataMimeType;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2020-05-10 09:11:48 +00:00
|
|
|
if (m_protocol == "about") {
|
|
|
|
buffer.clear();
|
|
|
|
state = State::InPath;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2019-08-10 15:27:56 +00:00
|
|
|
if (consume() != '/')
|
|
|
|
return false;
|
|
|
|
if (consume() != '/')
|
|
|
|
return false;
|
|
|
|
if (buffer.is_empty())
|
|
|
|
return false;
|
2019-10-21 15:19:17 +00:00
|
|
|
if (m_protocol == "http")
|
|
|
|
m_port = 80;
|
|
|
|
else if (m_protocol == "https")
|
|
|
|
m_port = 443;
|
2020-05-16 00:19:17 +00:00
|
|
|
else if (m_protocol == "gemini")
|
|
|
|
m_port = 1965;
|
2020-05-09 14:14:37 +00:00
|
|
|
state = State::InHostname;
|
2020-04-26 20:48:54 +00:00
|
|
|
buffer.clear();
|
2019-08-10 15:27:56 +00:00
|
|
|
continue;
|
2020-04-26 20:48:54 +00:00
|
|
|
}
|
2019-08-10 15:27:56 +00:00
|
|
|
case State::InHostname:
|
|
|
|
if (is_valid_hostname_character(peek())) {
|
|
|
|
buffer.append(consume());
|
|
|
|
continue;
|
|
|
|
}
|
2020-05-09 14:47:05 +00:00
|
|
|
if (buffer.is_empty()) {
|
|
|
|
if (m_protocol == "file") {
|
|
|
|
m_host = "";
|
|
|
|
state = State::InPath;
|
|
|
|
continue;
|
|
|
|
}
|
2019-08-10 15:27:56 +00:00
|
|
|
return false;
|
2020-05-09 14:47:05 +00:00
|
|
|
}
|
2019-08-10 15:27:56 +00:00
|
|
|
m_host = String::copy(buffer);
|
|
|
|
buffer.clear();
|
|
|
|
if (peek() == ':') {
|
|
|
|
consume();
|
|
|
|
state = State::InPort;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (peek() == '/') {
|
|
|
|
state = State::InPath;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
case State::InPort:
|
|
|
|
if (is_digit(peek())) {
|
|
|
|
buffer.append(consume());
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (buffer.is_empty())
|
|
|
|
return false;
|
|
|
|
{
|
|
|
|
bool ok;
|
|
|
|
m_port = String::copy(buffer).to_uint(ok);
|
|
|
|
buffer.clear();
|
|
|
|
if (!ok)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if (peek() == '/') {
|
|
|
|
state = State::InPath;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
case State::InPath:
|
2020-04-11 22:38:28 +00:00
|
|
|
if (peek() == '?' || peek() == '#') {
|
|
|
|
m_path = String::copy(buffer);
|
|
|
|
buffer.clear();
|
|
|
|
state = peek() == '?' ? State::InQuery : State::InFragment;
|
|
|
|
consume();
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
buffer.append(consume());
|
|
|
|
continue;
|
|
|
|
case State::InQuery:
|
|
|
|
if (peek() == '#') {
|
|
|
|
m_query = String::copy(buffer);
|
|
|
|
buffer.clear();
|
|
|
|
consume();
|
|
|
|
state = State::InFragment;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
buffer.append(consume());
|
|
|
|
continue;
|
|
|
|
case State::InFragment:
|
2019-08-10 15:27:56 +00:00
|
|
|
buffer.append(consume());
|
|
|
|
continue;
|
2020-04-26 20:48:54 +00:00
|
|
|
case State::InDataMimeType: {
|
|
|
|
if (peek() != ';' && peek() != ',') {
|
|
|
|
buffer.append(consume());
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
m_data_mime_type = String::copy(buffer);
|
|
|
|
buffer.clear();
|
|
|
|
|
|
|
|
if (peek() == ';') {
|
|
|
|
consume();
|
|
|
|
if (consume() != 'b')
|
|
|
|
return false;
|
|
|
|
if (consume() != 'a')
|
|
|
|
return false;
|
|
|
|
if (consume() != 's')
|
|
|
|
return false;
|
|
|
|
if (consume() != 'e')
|
|
|
|
return false;
|
|
|
|
if (consume() != '6')
|
|
|
|
return false;
|
|
|
|
if (consume() != '4')
|
|
|
|
return false;
|
|
|
|
m_data_payload_is_base64 = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (consume() != ',')
|
|
|
|
return false;
|
|
|
|
|
|
|
|
state = State::InDataPayload;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case State::InDataPayload:
|
|
|
|
buffer.append(consume());
|
|
|
|
break;
|
2019-08-10 15:27:56 +00:00
|
|
|
}
|
|
|
|
}
|
2019-10-17 18:21:57 +00:00
|
|
|
if (state == State::InHostname) {
|
|
|
|
// We're still in the hostname, so e.g "http://serenityos.org"
|
|
|
|
if (buffer.is_empty())
|
|
|
|
return false;
|
|
|
|
m_host = String::copy(buffer);
|
|
|
|
m_path = "/";
|
|
|
|
}
|
2020-04-19 08:36:56 +00:00
|
|
|
if (state == State::InProtocol)
|
|
|
|
return false;
|
2020-04-11 22:38:28 +00:00
|
|
|
if (state == State::InPath)
|
|
|
|
m_path = String::copy(buffer);
|
|
|
|
if (state == State::InQuery)
|
|
|
|
m_query = String::copy(buffer);
|
|
|
|
if (state == State::InFragment)
|
|
|
|
m_fragment = String::copy(buffer);
|
2020-04-26 20:48:54 +00:00
|
|
|
if (state == State::InDataPayload)
|
|
|
|
m_data_payload = String::copy(buffer);
|
2020-04-11 22:38:28 +00:00
|
|
|
if (m_query.is_null())
|
|
|
|
m_query = "";
|
|
|
|
if (m_fragment.is_null())
|
|
|
|
m_fragment = "";
|
2019-08-10 15:27:56 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
URL::URL(const StringView& string)
|
|
|
|
{
|
|
|
|
m_valid = parse(string);
|
|
|
|
}
|
|
|
|
|
|
|
|
String URL::to_string() const
|
|
|
|
{
|
|
|
|
StringBuilder builder;
|
|
|
|
builder.append(m_protocol);
|
2020-04-26 20:59:12 +00:00
|
|
|
|
2020-05-10 09:11:48 +00:00
|
|
|
if (m_protocol == "about") {
|
|
|
|
builder.append(':');
|
|
|
|
builder.append(m_path);
|
|
|
|
return builder.to_string();
|
|
|
|
}
|
|
|
|
|
2020-04-26 20:59:12 +00:00
|
|
|
if (m_protocol == "data") {
|
|
|
|
builder.append(':');
|
|
|
|
builder.append(m_data_mime_type);
|
|
|
|
if (m_data_payload_is_base64)
|
|
|
|
builder.append(";base64");
|
|
|
|
builder.append(',');
|
|
|
|
builder.append(m_data_payload);
|
|
|
|
return builder.to_string();
|
|
|
|
}
|
|
|
|
|
2019-08-10 15:27:56 +00:00
|
|
|
builder.append("://");
|
2020-05-09 14:14:37 +00:00
|
|
|
builder.append(m_host);
|
2019-10-05 08:14:42 +00:00
|
|
|
if (protocol() != "file") {
|
2020-05-16 00:19:17 +00:00
|
|
|
if (!(protocol() == "http" && port() == 80) && !(protocol() == "https" && port() == 443) && !(protocol() == "gemini" && port() == 1965)) {
|
2019-10-10 20:06:25 +00:00
|
|
|
builder.append(':');
|
|
|
|
builder.append(String::number(m_port));
|
|
|
|
}
|
2019-10-05 08:14:42 +00:00
|
|
|
}
|
2019-08-10 15:27:56 +00:00
|
|
|
builder.append(m_path);
|
2019-11-25 20:20:03 +00:00
|
|
|
if (!m_query.is_empty()) {
|
|
|
|
builder.append('?');
|
|
|
|
builder.append(m_query);
|
|
|
|
}
|
2020-04-11 22:38:13 +00:00
|
|
|
if (!m_fragment.is_empty()) {
|
|
|
|
builder.append('#');
|
|
|
|
builder.append(m_fragment);
|
|
|
|
}
|
2019-08-10 15:27:56 +00:00
|
|
|
return builder.to_string();
|
|
|
|
}
|
|
|
|
|
2019-11-18 21:04:39 +00:00
|
|
|
URL URL::complete_url(const String& string) const
|
|
|
|
{
|
|
|
|
URL url(string);
|
|
|
|
if (url.is_valid())
|
|
|
|
return url;
|
|
|
|
|
2020-05-16 17:54:09 +00:00
|
|
|
if (string.starts_with("//")) {
|
|
|
|
URL url(String::format("%s:%s", m_protocol.characters(), string.characters()));
|
|
|
|
if (url.is_valid())
|
|
|
|
return url;
|
|
|
|
}
|
|
|
|
|
2019-12-10 20:13:00 +00:00
|
|
|
if (string.starts_with("/")) {
|
|
|
|
url = *this;
|
|
|
|
url.set_path(string);
|
|
|
|
return url;
|
|
|
|
}
|
|
|
|
|
2019-11-18 21:04:39 +00:00
|
|
|
StringBuilder builder;
|
2019-12-10 20:13:00 +00:00
|
|
|
FileSystemPath fspath(path());
|
2019-11-18 21:04:39 +00:00
|
|
|
builder.append('/');
|
|
|
|
|
|
|
|
bool document_url_ends_in_slash = path()[path().length() - 1] == '/';
|
|
|
|
|
2020-02-25 13:49:47 +00:00
|
|
|
for (size_t i = 0; i < fspath.parts().size(); ++i) {
|
2019-11-18 21:04:39 +00:00
|
|
|
if (i == fspath.parts().size() - 1 && !document_url_ends_in_slash)
|
|
|
|
break;
|
|
|
|
builder.append(fspath.parts()[i]);
|
|
|
|
builder.append('/');
|
|
|
|
}
|
|
|
|
builder.append(string);
|
|
|
|
auto built = builder.to_string();
|
|
|
|
fspath = FileSystemPath(built);
|
|
|
|
|
|
|
|
url = *this;
|
|
|
|
url.set_path(fspath.string());
|
|
|
|
return url;
|
|
|
|
}
|
|
|
|
|
2020-04-11 21:07:23 +00:00
|
|
|
void URL::set_protocol(const String& protocol)
|
|
|
|
{
|
|
|
|
m_protocol = protocol;
|
|
|
|
m_valid = compute_validity();
|
|
|
|
}
|
|
|
|
|
|
|
|
void URL::set_host(const String& host)
|
|
|
|
{
|
|
|
|
m_host = host;
|
|
|
|
m_valid = compute_validity();
|
|
|
|
}
|
|
|
|
|
|
|
|
void URL::set_path(const String& path)
|
|
|
|
{
|
|
|
|
m_path = path;
|
|
|
|
m_valid = compute_validity();
|
|
|
|
}
|
|
|
|
|
|
|
|
void URL::set_query(const String& query)
|
|
|
|
{
|
|
|
|
m_query = query;
|
|
|
|
}
|
|
|
|
|
2020-04-11 22:38:13 +00:00
|
|
|
void URL::set_fragment(const String& fragment)
|
|
|
|
{
|
|
|
|
m_fragment = fragment;
|
|
|
|
}
|
|
|
|
|
2020-04-11 21:07:23 +00:00
|
|
|
bool URL::compute_validity() const
|
|
|
|
{
|
|
|
|
// FIXME: This is by no means complete.
|
|
|
|
if (m_protocol.is_empty())
|
|
|
|
return false;
|
2020-04-19 08:36:56 +00:00
|
|
|
if (m_protocol == "file") {
|
2020-04-11 21:07:23 +00:00
|
|
|
if (m_path.is_empty())
|
|
|
|
return false;
|
2020-04-19 08:36:56 +00:00
|
|
|
} else {
|
|
|
|
if (m_host.is_empty())
|
|
|
|
return false;
|
2020-04-11 21:07:23 +00:00
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-04-18 20:02:04 +00:00
|
|
|
URL URL::create_with_file_protocol(const String& path)
|
|
|
|
{
|
|
|
|
URL url;
|
|
|
|
url.set_protocol("file");
|
|
|
|
url.set_path(path);
|
|
|
|
return url;
|
|
|
|
}
|
|
|
|
|
2020-04-19 08:55:59 +00:00
|
|
|
URL URL::create_with_url_or_path(const String& url_or_path)
|
|
|
|
{
|
|
|
|
URL url = url_or_path;
|
|
|
|
if (url.is_valid())
|
|
|
|
return url;
|
|
|
|
|
|
|
|
String path = canonicalized_path(url_or_path);
|
|
|
|
return URL::create_with_file_protocol(path);
|
|
|
|
}
|
|
|
|
|
2020-05-05 21:56:35 +00:00
|
|
|
String URL::basename() const
|
|
|
|
{
|
|
|
|
if (!m_valid)
|
|
|
|
return {};
|
|
|
|
return FileSystemPath(m_path).basename();
|
|
|
|
}
|
|
|
|
|
2019-08-10 15:27:56 +00:00
|
|
|
}
|