LibWeb: Sort URLSearchParams using UTF-16 code units

We were previously sorting using code points which could give the wrong result for certain inputs. Fixes the last two failing tests on: https://wpt.live/url/urlsearchparams-sort.any.html
Author: https://github.com/shannonbooth Commit: https://github.com/LadybirdBrowser/ladybird/commit/d56da8cf9a0 Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/1081
2024-11-25 09:00:22 +00:00 · 2024-08-15 20:40:10 +12:00 · 2024-08-15 20:40:10 +12:00 · d56da8cf9a · 2024-08-17 05:45:28 +00:00
commit d56da8cf9a
parent cff3e78a14
3 changed files with 31 additions and 13 deletions
--- a/Tests/LibWeb/Text/expected/URL/url-search-params-sort-utf16-code-units.txt
+++ b/Tests/LibWeb/Text/expected/URL/url-search-params-sort-utf16-code-units.txt
@ -0,0 +1,4 @@
+%EF%BF%BC=&%EF%BF%BD=x&%EF%BF%BD=a
+'\ufffc' => ''
+'\ufffd' => '\u0078'
+'\ufffd' => '\u0061'
--- a/Tests/LibWeb/Text/input/URL/url-search-params-sort-utf16-code-units.html
+++ b/Tests/LibWeb/Text/input/URL/url-search-params-sort-utf16-code-units.html
@ -0,0 +1,17 @@
+<script src="../include.js"></script>
+<script>
+    function escapeUnicode(str) {
+        return str.replace(/[\s\S]/g, function(c) {
+            return '\\u' + ('0000' + c.charCodeAt(0).toString(16)).slice(-4);
+        });
+    }
+
+    test(() => {
+        let params = new URLSearchParams("\uFFFD=x&\uFFFC&\uFFFD=a");
+        params.sort();
+        println(params.toString())
+        for (const [key, value] of params) {
+            println(`'${escapeUnicode(key)}' => '${escapeUnicode(value)}'`);
+        }
+    })
+</script>
--- a/Userland/Libraries/LibWeb/DOMURL/URLSearchParams.cpp
+++ b/Userland/Libraries/LibWeb/DOMURL/URLSearchParams.cpp
@ -325,26 +325,23 @@ void URLSearchParams::set(String const& name, String const& value)
    update();
 }

+// https://url.spec.whatwg.org/#dom-urlsearchparams-sort
 void URLSearchParams::sort()
 {
    // 1. Sort all name-value pairs, if any, by their names. Sorting must be done by comparison of code units. The relative order between name-value pairs with equal names must be preserved.
    insertion_sort(m_list, [](auto& a, auto& b) {
-        Utf8View a_code_points { a.name };
-        Utf8View b_code_points { b.name };
+        // FIXME: There should be a way to do this without converting to utf16
+        auto a_utf16 = MUST(utf8_to_utf16(a.name));
+        auto b_utf16 = MUST(utf8_to_utf16(b.name));

-        if (a_code_points.starts_with(b_code_points))
-            return false;
-        if (b_code_points.starts_with(a_code_points))
-            return true;
+        auto common_length = min(a_utf16.size(), b_utf16.size());

-        for (auto k = a_code_points.begin(), l = b_code_points.begin();
-             k != a_code_points.end() && l != b_code_points.end();
-             ++k, ++l) {
-            if (*k != *l) {
-                return *k < *l;
-            }
+        for (size_t position = 0; position < common_length; ++position) {
+            if (a_utf16[position] != b_utf16[position])
+                return a_utf16[position] < b_utf16[position];
        }
-        VERIFY_NOT_REACHED();
+
+        return a_utf16.size() < b_utf16.size();
    });

    // 2. Update this.