From 1697f3c35b2fdf3312c79452029e03949ae5cbef Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Tue, 25 May 2021 22:32:39 +0200 Subject: [PATCH] AK: Add spec-compliant URL serialization methods This adds URL serialization methods which are more in line with the specification. The serialize_for_display() method should be used e.g. in the browser address bar, and as per the spec should not display username and password. Furthermore, it could decode most percent-encoded code points, although that is not implemented yet. --- AK/URL.cpp | 122 +++++++++++++++++++++++++++++++++++++++++++++++++++++ AK/URL.h | 9 ++++ 2 files changed, 131 insertions(+) diff --git a/AK/URL.cpp b/AK/URL.cpp index 6d811c831db..22d4f189fe1 100644 --- a/AK/URL.cpp +++ b/AK/URL.cpp @@ -519,6 +519,128 @@ bool URL::is_special_scheme(const StringView& scheme) return scheme.is_one_of("ftp", "file", "http", "https", "ws", "wss"); } +String URL::serialize_data_url() const +{ + VERIFY(m_scheme == "data"); + VERIFY(!m_data_mime_type.is_null()); + VERIFY(!m_data_payload.is_null()); + StringBuilder builder; + builder.append(m_scheme); + builder.append(':'); + builder.append(m_data_mime_type); + if (m_data_payload_is_base64) + builder.append(";base64"); + builder.append(','); + // NOTE: The specification does not say anything about encoding this, but we should encode at least control and non-ASCII + // characters (since this is also a valid representation of the same data URL). + builder.append(URL::percent_encode(m_data_payload, PercentEncodeSet::C0Control)); + return builder.to_string(); +} + +// https://url.spec.whatwg.org/#concept-url-serializer +String URL::serialize(ExcludeFragment exclude_fragment) const +{ + if (m_scheme == "data") + return serialize_data_url(); + StringBuilder builder; + builder.append(m_scheme); + builder.append(':'); + + if (!m_host.is_null()) { + builder.append("//"); + + if (includes_credentials()) { + builder.append(percent_encode(m_username, PercentEncodeSet::Userinfo)); + if (!m_password.is_empty()) { + builder.append(':'); + builder.append(percent_encode(m_password, PercentEncodeSet::Userinfo)); + } + builder.append('@'); + } + + builder.append(m_host); + if (m_port != 0) + builder.appendff(":{}", m_port); + } + + if (cannot_be_a_base_url()) { + builder.append(percent_encode(m_paths[0], PercentEncodeSet::Path)); + } else { + // FIXME: Temporary m_path hack + if (!m_path.is_null()) { + builder.append(path()); + } else { + if (m_host.is_null() && m_paths.size() > 1 && m_paths[0].is_empty()) + builder.append("/."); + for (auto& segment : m_paths) { + builder.append('/'); + builder.append(percent_encode(segment, PercentEncodeSet::Path)); + } + } + } + + if (!m_query.is_null()) { + builder.append('?'); + builder.append(percent_encode(m_query, is_special() ? URL::PercentEncodeSet::SpecialQuery : URL::PercentEncodeSet::Query)); + } + + if (exclude_fragment == ExcludeFragment::No && !m_fragment.is_null()) { + builder.append('#'); + builder.append(percent_encode(m_fragment, PercentEncodeSet::Fragment)); + } + + return builder.to_string(); +} + +// https://url.spec.whatwg.org/#url-rendering +// NOTE: This does e.g. not display credentials. +// FIXME: Parts of the URL other than the host should have their sequences of percent-encoded bytes replaced with code points +// resulting from percent-decoding those sequences converted to bytes, unless that renders those sequences invisible. +String URL::serialize_for_display() const +{ + VERIFY(m_valid); + if (m_scheme == "data") + return serialize_data_url(); + StringBuilder builder; + builder.append(m_scheme); + builder.append(':'); + + if (!m_host.is_null()) { + builder.append("//"); + builder.append(m_host); + if (m_port != 0) + builder.appendff(":{}", m_port); + } + + if (cannot_be_a_base_url()) { + builder.append(percent_encode(m_paths[0], PercentEncodeSet::Path)); + } else { + // FIXME: Temporary m_path hack + if (!m_path.is_null()) { + builder.append(path()); + } else { + if (m_host.is_null() && m_paths.size() > 1 && m_paths[0].is_empty()) + builder.append("/."); + for (auto& segment : m_paths) { + builder.append('/'); + builder.append(percent_encode(segment, PercentEncodeSet::Path)); + } + } + } + + if (!m_query.is_null()) { + builder.append('?'); + builder.append(percent_encode(m_query, is_special() ? URL::PercentEncodeSet::SpecialQuery : URL::PercentEncodeSet::Query)); + } + + if (!m_fragment.is_null()) { + builder.append('#'); + builder.append(percent_encode(m_fragment, PercentEncodeSet::Fragment)); + } + + return builder.to_string(); +} + String URL::basename() const { if (!m_valid) diff --git a/AK/URL.h b/AK/URL.h index f71f9209297..c83b87f1ba7 100644 --- a/AK/URL.h +++ b/AK/URL.h @@ -30,6 +30,11 @@ public: EncodeURI }; + enum class ExcludeFragment { + No, + Yes + }; + URL() = default; URL(const StringView&); URL(const char* string) @@ -78,6 +83,9 @@ public: return percent_encode(to_string(), PercentEncodeSet::EncodeURI); } + String serialize(ExcludeFragment = ExcludeFragment::No) const; + String serialize_for_display() const; + URL complete_url(const String&) const; bool data_payload_is_base64() const { return m_data_payload_is_base64; } @@ -115,6 +123,7 @@ private: bool parse(const StringView&); bool compute_validity() const; + String serialize_data_url() const; static void append_percent_encoded_if_necessary(StringBuilder&, u32 code_point, PercentEncodeSet set = PercentEncodeSet::Userinfo); static void append_percent_encoded(StringBuilder&, u32 code_point);