LibWeb: Sort URLSearchParams using UTF-16 code units

We were previously sorting using code points which could give the wrong result for certain inputs. Fixes the last two failing tests on: https://wpt.live/url/urlsearchparams-sort.any.html
Author: https://github.com/shannonbooth Commit: https://github.com/LadybirdBrowser/ladybird/commit/d56da8cf9a0 Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/1081
2024-11-25 17:10:23 +00:00 · 2024-08-15 20:40:10 +12:00 · 2024-08-15 20:40:10 +12:00 · d56da8cf9a · 2024-08-17 05:45:28 +00:00
commit d56da8cf9a
parent cff3e78a14
3 changed files with 31 additions and 13 deletions
--- a/Tests/LibWeb/Text/expected/URL/url-search-params-sort-utf16-code-units.txt
+++ b/Tests/LibWeb/Text/expected/URL/url-search-params-sort-utf16-code-units.txt
@ -0,0 +1,4 @@
 %EF%BF%BC=&%EF%BF%BD=x&%EF%BF%BD=a
 '\ufffc' => ''
 '\ufffd' => '\u0078'
 '\ufffd' => '\u0061'
--- a/Tests/LibWeb/Text/input/URL/url-search-params-sort-utf16-code-units.html
+++ b/Tests/LibWeb/Text/input/URL/url-search-params-sort-utf16-code-units.html
@ -0,0 +1,17 @@
 <script src="../include.js"></script>
 <script>
    function escapeUnicode(str) {
        return str.replace(/[\s\S]/g, function(c) {
            return '\\u' + ('0000' + c.charCodeAt(0).toString(16)).slice(-4);
        });
    }
    test(() => {
        let params = new URLSearchParams("\uFFFD=x&\uFFFC&\uFFFD=a");
        params.sort();
        println(params.toString())
        for (const [key, value] of params) {
            println(`'${escapeUnicode(key)}' => '${escapeUnicode(value)}'`);
        }
    })
 </script>
--- a/Userland/Libraries/LibWeb/DOMURL/URLSearchParams.cpp
+++ b/Userland/Libraries/LibWeb/DOMURL/URLSearchParams.cpp
@ -325,26 +325,23 @@ void URLSearchParams::set(String const& name, String const& value)
    update();
 }
 // https://url.spec.whatwg.org/#dom-urlsearchparams-sort
 void URLSearchParams::sort()
 {
    // 1. Sort all name-value pairs, if any, by their names. Sorting must be done by comparison of code units. The relative order between name-value pairs with equal names must be preserved.
    insertion_sort(m_list, [](auto& a, auto& b) {
-        Utf8View a_code_points { a.name };
+        // FIXME: There should be a way to do this without converting to utf16
-        Utf8View b_code_points { b.name };
+        auto a_utf16 = MUST(utf8_to_utf16(a.name));
        auto b_utf16 = MUST(utf8_to_utf16(b.name));
-        if (a_code_points.starts_with(b_code_points))
+        auto common_length = min(a_utf16.size(), b_utf16.size());
            return false;
        if (b_code_points.starts_with(a_code_points))
            return true;
-        for (auto k = a_code_points.begin(), l = b_code_points.begin();
+        for (size_t position = 0; position < common_length; ++position) {
-             k != a_code_points.end() && l != b_code_points.end();
+            if (a_utf16[position] != b_utf16[position])
-             ++k, ++l) {
+                return a_utf16[position] < b_utf16[position];
            if (*k != *l) {
                return *k < *l;
            }
        }
-        VERIFY_NOT_REACHED();
+
        return a_utf16.size() < b_utf16.size();
    });
    // 2. Update this.