mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-22 07:30:19 +00:00
AK: Replace old URL parser with new URLParser::parse()
This replaces the old URL::parse() and URL::complete_url() parsing mechanisms with the new spec-compliant URLParser::parse().
This commit is contained in:
parent
1697f3c35b
commit
81f03e7a5d
Notes:
sideshowbarker
2024-07-18 17:04:14 +09:00
Author: https://github.com/MaxWipfli Commit: https://github.com/SerenityOS/serenity/commit/81f03e7a5d2 Pull-request: https://github.com/SerenityOS/serenity/pull/7478 Reviewed-by: https://github.com/awesomekling
2 changed files with 11 additions and 276 deletions
286
AK/URL.cpp
286
AK/URL.cpp
|
@ -5,9 +5,11 @@
|
|||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <AK/Debug.h>
|
||||
#include <AK/LexicalPath.h>
|
||||
#include <AK/StringBuilder.h>
|
||||
#include <AK/URL.h>
|
||||
#include <AK/URLParser.h>
|
||||
#include <AK/Utf8View.h>
|
||||
|
||||
namespace AK {
|
||||
|
@ -32,231 +34,16 @@ constexpr bool is_ascii_hex_digit(u32 code_point)
|
|||
return is_ascii_digit(code_point) || (code_point >= 'a' && code_point <= 'f') || (code_point >= 'A' && code_point <= 'F');
|
||||
}
|
||||
|
||||
static inline bool is_valid_scheme_character(char ch)
|
||||
{
|
||||
return ch >= 'a' && ch <= 'z';
|
||||
}
|
||||
|
||||
static inline bool is_valid_hostname_character(char ch)
|
||||
{
|
||||
return ch && ch != '/' && ch != ':';
|
||||
}
|
||||
|
||||
static inline bool is_digit(char ch)
|
||||
{
|
||||
return ch >= '0' && ch <= '9';
|
||||
}
|
||||
|
||||
bool URL::parse(const StringView& string)
|
||||
{
|
||||
if (string.is_null())
|
||||
return false;
|
||||
|
||||
enum class State {
|
||||
InScheme,
|
||||
InHostname,
|
||||
InPort,
|
||||
InPath,
|
||||
InQuery,
|
||||
InFragment,
|
||||
InDataMimeType,
|
||||
InDataPayload,
|
||||
};
|
||||
|
||||
Vector<char, 256> buffer;
|
||||
State state { State::InScheme };
|
||||
|
||||
size_t index = 0;
|
||||
|
||||
auto peek = [&] {
|
||||
if (index >= string.length())
|
||||
return '\0';
|
||||
return string[index];
|
||||
};
|
||||
|
||||
auto consume = [&] {
|
||||
if (index >= string.length())
|
||||
return '\0';
|
||||
return string[index++];
|
||||
};
|
||||
|
||||
while (index < string.length()) {
|
||||
switch (state) {
|
||||
case State::InScheme: {
|
||||
if (is_valid_scheme_character(peek())) {
|
||||
buffer.append(consume());
|
||||
continue;
|
||||
}
|
||||
if (consume() != ':')
|
||||
return false;
|
||||
|
||||
m_scheme = String::copy(buffer);
|
||||
|
||||
if (m_scheme == "data") {
|
||||
buffer.clear();
|
||||
m_host = "";
|
||||
state = State::InDataMimeType;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (m_scheme == "about") {
|
||||
buffer.clear();
|
||||
m_host = "";
|
||||
state = State::InPath;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (consume() != '/')
|
||||
return false;
|
||||
if (consume() != '/')
|
||||
return false;
|
||||
if (buffer.is_empty())
|
||||
return false;
|
||||
state = State::InHostname;
|
||||
buffer.clear();
|
||||
continue;
|
||||
}
|
||||
case State::InHostname:
|
||||
if (is_valid_hostname_character(peek())) {
|
||||
buffer.append(consume());
|
||||
continue;
|
||||
}
|
||||
if (buffer.is_empty()) {
|
||||
if (m_scheme == "file") {
|
||||
m_host = "";
|
||||
state = State::InPath;
|
||||
continue;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
m_host = String::copy(buffer);
|
||||
buffer.clear();
|
||||
if (peek() == ':') {
|
||||
consume();
|
||||
state = State::InPort;
|
||||
continue;
|
||||
}
|
||||
if (peek() == '/') {
|
||||
state = State::InPath;
|
||||
continue;
|
||||
}
|
||||
return false;
|
||||
case State::InPort:
|
||||
if (is_digit(peek())) {
|
||||
buffer.append(consume());
|
||||
continue;
|
||||
}
|
||||
if (buffer.is_empty())
|
||||
return false;
|
||||
{
|
||||
auto port_opt = String::copy(buffer).to_uint();
|
||||
buffer.clear();
|
||||
if (!port_opt.has_value())
|
||||
return false;
|
||||
m_port = port_opt.value();
|
||||
}
|
||||
if (peek() == '/') {
|
||||
state = State::InPath;
|
||||
continue;
|
||||
}
|
||||
return false;
|
||||
case State::InPath:
|
||||
if (peek() == '?' || peek() == '#') {
|
||||
m_path = String::copy(buffer);
|
||||
buffer.clear();
|
||||
state = peek() == '?' ? State::InQuery : State::InFragment;
|
||||
consume();
|
||||
continue;
|
||||
}
|
||||
buffer.append(consume());
|
||||
continue;
|
||||
case State::InQuery:
|
||||
if (peek() == '#') {
|
||||
m_query = String::copy(buffer);
|
||||
buffer.clear();
|
||||
consume();
|
||||
state = State::InFragment;
|
||||
continue;
|
||||
}
|
||||
buffer.append(consume());
|
||||
continue;
|
||||
case State::InFragment:
|
||||
buffer.append(consume());
|
||||
continue;
|
||||
case State::InDataMimeType: {
|
||||
if (peek() != ';' && peek() != ',') {
|
||||
buffer.append(consume());
|
||||
continue;
|
||||
}
|
||||
|
||||
m_data_mime_type = String::copy(buffer);
|
||||
buffer.clear();
|
||||
|
||||
if (peek() == ';') {
|
||||
consume();
|
||||
if (consume() != 'b')
|
||||
return false;
|
||||
if (consume() != 'a')
|
||||
return false;
|
||||
if (consume() != 's')
|
||||
return false;
|
||||
if (consume() != 'e')
|
||||
return false;
|
||||
if (consume() != '6')
|
||||
return false;
|
||||
if (consume() != '4')
|
||||
return false;
|
||||
m_data_payload_is_base64 = true;
|
||||
}
|
||||
|
||||
if (consume() != ',')
|
||||
return false;
|
||||
|
||||
state = State::InDataPayload;
|
||||
break;
|
||||
}
|
||||
case State::InDataPayload:
|
||||
buffer.append(consume());
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (state == State::InHostname) {
|
||||
// We're still in the hostname, so e.g "http://serenityos.org"
|
||||
if (buffer.is_empty())
|
||||
return false;
|
||||
m_host = String::copy(buffer);
|
||||
m_path = "/";
|
||||
}
|
||||
if (state == State::InScheme)
|
||||
return false;
|
||||
if (state == State::InPath)
|
||||
m_path = String::copy(buffer);
|
||||
if (state == State::InQuery)
|
||||
m_query = String::copy(buffer);
|
||||
if (state == State::InFragment)
|
||||
m_fragment = String::copy(buffer);
|
||||
if (state == State::InDataPayload)
|
||||
m_data_payload = URL::percent_decode(String::copy(buffer));
|
||||
if (state == State::InPort) {
|
||||
auto port_opt = String::copy(buffer).to_uint();
|
||||
if (port_opt.has_value())
|
||||
m_port = port_opt.value();
|
||||
}
|
||||
|
||||
if (m_query.is_null())
|
||||
m_query = "";
|
||||
if (m_fragment.is_null())
|
||||
m_fragment = "";
|
||||
|
||||
if (!m_port && scheme_requires_port(m_scheme))
|
||||
set_port(default_port_for_scheme(m_scheme));
|
||||
|
||||
return compute_validity();
|
||||
}
|
||||
|
||||
// FIXME: It could make sense to force users of URL to use URLParser::parse() explicitly instead of using a constructor.
|
||||
URL::URL(const StringView& string)
|
||||
: URL(URLParser::parse({}, string))
|
||||
{
|
||||
m_valid = parse(string);
|
||||
if constexpr (URL_PARSER_DEBUG) {
|
||||
if (m_valid)
|
||||
dbgln("URL constructor: Parsed URL to be '{}'.", serialize());
|
||||
else
|
||||
dbgln("URL constructor: Parsed URL to be invalid.");
|
||||
}
|
||||
}
|
||||
|
||||
String URL::path() const
|
||||
|
@ -318,58 +105,7 @@ URL URL::complete_url(const String& string) const
|
|||
if (!is_valid())
|
||||
return {};
|
||||
|
||||
URL url(string);
|
||||
if (url.is_valid())
|
||||
return url;
|
||||
|
||||
if (scheme() == "data")
|
||||
return {};
|
||||
|
||||
if (string.starts_with("//")) {
|
||||
URL url(String::formatted("{}:{}", m_scheme, string));
|
||||
if (url.is_valid())
|
||||
return url;
|
||||
}
|
||||
|
||||
if (string.starts_with("/")) {
|
||||
url = *this;
|
||||
url.set_path(string);
|
||||
return url;
|
||||
}
|
||||
|
||||
if (string.starts_with("#")) {
|
||||
url = *this;
|
||||
url.set_fragment(string.substring(1, string.length() - 1));
|
||||
return url;
|
||||
}
|
||||
|
||||
StringBuilder builder;
|
||||
LexicalPath lexical_path(path());
|
||||
builder.append('/');
|
||||
|
||||
bool document_url_ends_in_slash = path()[path().length() - 1] == '/';
|
||||
|
||||
for (size_t i = 0; i < lexical_path.parts().size(); ++i) {
|
||||
if (i == lexical_path.parts().size() - 1 && !document_url_ends_in_slash)
|
||||
break;
|
||||
builder.append(lexical_path.parts()[i]);
|
||||
builder.append('/');
|
||||
}
|
||||
builder.append(string);
|
||||
auto built = builder.to_string();
|
||||
lexical_path = LexicalPath(built);
|
||||
|
||||
built = lexical_path.string();
|
||||
if (string.ends_with('/') && !built.ends_with('/')) {
|
||||
builder.clear();
|
||||
builder.append(built);
|
||||
builder.append('/');
|
||||
built = builder.to_string();
|
||||
}
|
||||
|
||||
url = *this;
|
||||
url.set_path(built);
|
||||
return url;
|
||||
return URLParser::parse({}, string, this);
|
||||
}
|
||||
|
||||
void URL::set_scheme(const String& scheme)
|
||||
|
|
1
AK/URL.h
1
AK/URL.h
|
@ -121,7 +121,6 @@ private:
|
|||
{
|
||||
}
|
||||
|
||||
bool parse(const StringView&);
|
||||
bool compute_validity() const;
|
||||
String serialize_data_url() const;
|
||||
|
||||
|
|
Loading…
Reference in a new issue