Explorar o código

LibURL: Also remove carriage returns from URL input

The definition of an "ASCII tab or newline" also includes U+000D CR.

This fixes 3 subtests in:

https://wpt.live/url/url-constructor.any.html
Shannon Booth hai 1 ano
pai
achega
41cf9f6fe3

+ 20 - 0
Tests/LibWeb/Text/expected/URL/url.txt

@@ -98,6 +98,16 @@ port => ''
 pathname => '/'
 search => ''
 hash => ''
+new URL('h\tt\nt\rp://h\to\ns\rt:9\t0\n0\r0/p\ta\nt\rh?q\tu\ne\rry#f\tr\na\rg', undefined)
+protocol => 'http:'
+username => ''
+password => ''
+host => 'host:9000'
+hostname => 'host'
+port => '9000'
+pathname => '/path'
+search => '?query'
+hash => '#frag'
 =========================================
 URL.parse('ftp://serenityos.org:21', undefined)
 protocol => 'ftp:'
@@ -199,3 +209,13 @@ port => ''
 pathname => '/'
 search => ''
 hash => ''
+URL.parse('h\tt\nt\rp://h\to\ns\rt:9\t0\n0\r0/p\ta\nt\rh?q\tu\ne\rry#f\tr\na\rg', undefined)
+protocol => 'http:'
+username => ''
+password => ''
+host => 'host:9000'
+hostname => 'host'
+port => '9000'
+pathname => '/path'
+search => '?query'
+hash => '#frag'

+ 12 - 4
Tests/LibWeb/Text/input/URL/url.html

@@ -1,6 +1,13 @@
 <script src="../include.js"></script>
 <script>
     test(() => {
+        function escapeWhitespace(str) {
+            return str
+                .replace(/\t/g, '\\t')
+                .replace(/\n/g, '\\n')
+                .replace(/\r/g, '\\r');
+        }
+
         function printURL(url) {
             println(`protocol => '${url.protocol}'`);
             println(`username => '${url.username}'`);
@@ -24,13 +31,14 @@
             { input: '//d:/..', base: 'file:///C:/a/b' },
             { input: 'file://a%C2%ADb/p' },
             { input: 'http://user%20name:pa%40ss%3Aword@www.ladybird.org' },
+            { input: 'h\tt\nt\rp://h\to\ns\rt:9\t0\n0\r0/p\ta\nt\rh?q\tu\ne\rry#f\tr\na\rg' },
         ];
 
         for (url of urls) {
             if (url.base === undefined)
-                println(`new URL('${url.input}', ${url.base})`);
+                println(`new URL('${escapeWhitespace(url.input)}', ${url.base})`);
             else
-                println(`new URL('${url.input}', '${url.base}')`);
+                println(`new URL('${escapeWhitespace(url.input)}', '${escapeWhitespace(url.base)}')`);
 
             printURL(new URL(url.input, url.base));
         }
@@ -39,9 +47,9 @@
 
         for (url of urls) {
             if (url.base === undefined)
-                println(`URL.parse('${url.input}', ${url.base})`);
+                println(`URL.parse('${escapeWhitespace(url.input)}', ${url.base})`);
             else
-                println(`URL.parse('${url.input}', '${url.base}')`);
+                println(`URL.parse('${escapeWhitespace(url.input)}', '${escapeWhitespace(url.base)}')`);
 
             printURL(URL.parse(url.input, url.base));
         }

+ 2 - 2
Userland/Libraries/LibURL/Parser.cpp

@@ -845,9 +845,9 @@ URL Parser::basic_parse(StringView raw_input, Optional<URL> const& base_url, Opt
     // 2. If input contains any ASCII tab or newline, invalid-URL-unit validation error.
     // 3. Remove all ASCII tab or newline from input.
     for (auto const ch : processed_input) {
-        if (ch == '\t' || ch == '\n') {
+        if (ch == '\t' || ch == '\n' || ch == '\r') {
             report_validation_error();
-            processed_input = processed_input.replace("\t"sv, ""sv, ReplaceMode::All).replace("\n"sv, ""sv, ReplaceMode::All);
+            processed_input = processed_input.replace("\t"sv, ""sv, ReplaceMode::All).replace("\n"sv, ""sv, ReplaceMode::All).replace("\r"sv, ""sv, ReplaceMode::All);
             break;
         }
     }