LibURL: Allow inputs containing only whitespace

The check for: ``` if (start_index >= end_index) return {}; ``` To prevent an out of bounds when trimming the start and end of the input of whitespace was preventing valid URLs (only having whitespace in the input) from being parsed. Instead, prevent start_index from ever getting above end_index in the first place, and don't treat empty inputs as an error. Fixes one WPT test on: https://wpt.live/url/url-constructor.any.html
Author: https://github.com/shannonbooth Commit: https://github.com/LadybirdBrowser/ladybird/commit/d6af5bf5eb8 Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/973 Reviewed-by: https://github.com/tcl3 ✅
2024-11-21 15:10:19 +00:00 · 2024-08-06 02:00:52 +12:00 · 2024-08-06 02:00:52 +12:00 · d6af5bf5eb · 2024-08-05 16:22:19 +00:00
commit d6af5bf5eb
parent 4f5af3e90e
3 changed files with 30 additions and 16 deletions
--- a/Tests/LibWeb/Text/expected/URL/url.txt
+++ b/Tests/LibWeb/Text/expected/URL/url.txt
@ -108,6 +108,16 @@ port => '9000'
 pathname => '/path'
 search => '?query'
 hash => '#frag'
+new URL('  \t', 'http://ladybird.org/foo/bar')
+protocol => 'http:'
+username => ''
+password => ''
+host => 'ladybird.org'
+hostname => 'ladybird.org'
+port => ''
+pathname => '/foo/bar'
+search => ''
+hash => ''
 =========================================
 URL.parse('ftp://serenityos.org:21', undefined)
 protocol => 'ftp:'
@ -219,3 +229,13 @@ port => '9000'
 pathname => '/path'
 search => '?query'
 hash => '#frag'
+URL.parse('  \t', 'http://ladybird.org/foo/bar')
+protocol => 'http:'
+username => ''
+password => ''
+host => 'ladybird.org'
+hostname => 'ladybird.org'
+port => ''
+pathname => '/foo/bar'
+search => ''
+hash => ''
--- a/Tests/LibWeb/Text/input/URL/url.html
+++ b/Tests/LibWeb/Text/input/URL/url.html
@ -32,6 +32,7 @@
            { input: 'file://a%C2%ADb/p' },
            { input: 'http://user%20name:pa%40ss%3Aword@www.ladybird.org' },
            { input: 'h\tt\nt\rp://h\to\ns\rt:9\t0\n0\r0/p\ta\nt\rh?q\tu\ne\rry#f\tr\na\rg' },
+            { input: '  \t', base: 'http://ladybird.org/foo/bar' },
        ];

        for (url of urls) {
--- a/Userland/Libraries/LibURL/Parser.cpp
+++ b/Userland/Libraries/LibURL/Parser.cpp
@ -808,29 +808,22 @@ URL Parser::basic_parse(StringView raw_input, Optional<URL> const& base_url, Opt
        // 2. If input contains any leading or trailing C0 control or space, invalid-URL-unit validation error.
        // 3. Remove any leading and trailing C0 control or space from input.
        bool has_validation_error = false;
-        for (size_t i = 0; i < raw_input.length(); ++i) {
-            u8 ch = raw_input[i];
-            if (is_ascii_c0_control_or_space(ch)) {
-                ++start_index;
-                has_validation_error = true;
-            } else {
+
+        for (; start_index < raw_input.length(); ++start_index) {
+            if (!is_ascii_c0_control_or_space(raw_input[start_index]))
                break;
-            }
+            has_validation_error = true;
        }
-        for (ssize_t i = raw_input.length() - 1; i >= 0; --i) {
-            u8 ch = raw_input[i];
-            if (is_ascii_c0_control_or_space(ch)) {
-                --end_index;
-                has_validation_error = true;
-            } else {
+
+        for (; end_index > start_index; --end_index) {
+            if (!is_ascii_c0_control_or_space(raw_input[end_index - 1]))
                break;
-            }
+            has_validation_error = true;
        }
+
        if (has_validation_error)
            report_validation_error();
    }
-    if (start_index >= end_index)
-        return {};

    ByteString processed_input = raw_input.substring_view(start_index, end_index - start_index);