فهرست منبع

LibC: Consume all whitespace in `scanf` if present in format

We were consuming all whitespace from the format, but not the input
lexer - that was left to the actual format parsing code. It so happened
that we did not account for whitespace with the conversion specifier
'[', causing whitespace to end up in the output variables.

Fix this by always consuming all whitespace and removing the whitespace
logic from the conversion code.
Jelle Raaijmakers 2 سال پیش
والد
کامیت
325263f0e8
2فایلهای تغییر یافته به همراه6 افزوده شده و 20 حذف شده
  1. 1 0
      Tests/LibC/TestScanf.cpp
  2. 5 20
      Userland/Libraries/LibC/scanf.cpp

+ 1 - 0
Tests/LibC/TestScanf.cpp

@@ -180,6 +180,7 @@ const TestSuite test_suites[] {
     { "%d %n", "1 a", 1, 2, { intarg0, intarg1 }, { to_value_t(1), to_value_t(2) } },
     { "%*d", "  42", 0, 0, {}, {} },
     { "%d%*1[:/]%d", "24/7", 2, 2, { intarg0, intarg1 }, { to_value_t(24), to_value_t(7) } },
+    { " %[^a]", " b", 1, 1, { charstararg0 }, { str_to_value_t("b") } },
 };
 
 bool g_any_failed = false;

+ 5 - 20
Userland/Libraries/LibC/scanf.cpp

@@ -62,8 +62,6 @@ template<typename ApT, ReadKind kind>
 struct ReadElementConcrete<int, ApT, kind> {
     bool operator()(GenericLexer& lexer, va_list* ap, bool suppress_assignment)
     {
-        lexer.ignore_while(isspace);
-
         long value = 0;
         char* endptr = nullptr;
         auto nptr = lexer.remaining().characters_without_null_termination();
@@ -116,8 +114,6 @@ template<typename ApT, ReadKind kind>
 struct ReadElementConcrete<unsigned, ApT, kind> {
     bool operator()(GenericLexer& lexer, va_list* ap, bool suppress_assignment)
     {
-        lexer.ignore_while(isspace);
-
         unsigned long value = 0;
         char* endptr = nullptr;
         auto nptr = lexer.remaining().characters_without_null_termination();
@@ -152,8 +148,6 @@ template<typename ApT, ReadKind kind>
 struct ReadElementConcrete<long long, ApT, kind> {
     bool operator()(GenericLexer& lexer, va_list* ap, bool suppress_assignment)
     {
-        lexer.ignore_while(isspace);
-
         long long value = 0;
         char* endptr = nullptr;
         auto nptr = lexer.remaining().characters_without_null_termination();
@@ -188,8 +182,6 @@ template<typename ApT, ReadKind kind>
 struct ReadElementConcrete<unsigned long long, ApT, kind> {
     bool operator()(GenericLexer& lexer, va_list* ap, bool suppress_assignment)
     {
-        lexer.ignore_while(isspace);
-
         unsigned long long value = 0;
         char* endptr = nullptr;
         auto nptr = lexer.remaining().characters_without_null_termination();
@@ -224,8 +216,6 @@ template<typename ApT, ReadKind kind>
 struct ReadElementConcrete<float, ApT, kind> {
     bool operator()(GenericLexer& lexer, va_list* ap, bool suppress_assignment)
     {
-        lexer.ignore_while(isspace);
-
         double value = 0;
         char* endptr = nullptr;
         auto nptr = lexer.remaining().characters_without_null_termination();
@@ -299,7 +289,6 @@ struct ReadElement<char*, ReadKind::Normal> {
     ReadElement(StringView scan_set = {}, bool invert = false)
         : scan_set(scan_set.is_null() ? " \t\n\f\r"sv : scan_set)
         , invert(scan_set.is_null() ? true : invert)
-        , was_null(scan_set.is_null())
     {
     }
 
@@ -309,9 +298,6 @@ struct ReadElement<char*, ReadKind::Normal> {
         if (length_modifier != LengthModifier::Default)
             return false;
 
-        if (was_null)
-            input_lexer.ignore_while(isspace);
-
         auto str = input_lexer.consume_while([this](auto c) { return this->matches(c); });
         if (str.is_empty())
             return false;
@@ -333,7 +319,6 @@ private:
 
     const StringView scan_set;
     bool invert { false };
-    bool was_null { false };
 };
 
 template<>
@@ -343,8 +328,6 @@ struct ReadElement<void*, ReadKind::Normal> {
         if (length_modifier != LengthModifier::Default)
             return false;
 
-        input_lexer.ignore_while(isspace);
-
         auto str = input_lexer.consume_while([this](auto c) { return this->should_consume(c); });
 
         if (count != 8) {
@@ -395,10 +378,13 @@ extern "C" int vsscanf(char const* input, char const* format, va_list ap)
     __builtin_va_copy(copy, ap);
 
     while (!format_lexer.is_eof()) {
-        format_lexer.ignore_while(isspace);
+        if (format_lexer.next_is(isspace)) {
+            format_lexer.ignore_while(isspace);
+            input_lexer.ignore_while(isspace);
+        }
+
         if (!format_lexer.next_is('%')) {
         read_one_literal:;
-            input_lexer.ignore_while(isspace);
             if (format_lexer.is_eof())
                 break;
 
@@ -619,7 +605,6 @@ extern "C" int vsscanf(char const* input, char const* format, va_list ap)
                 ++elements_matched;
             break;
         case ConversionSpecifier::OutputNumberOfBytes: {
-            input_lexer.ignore_while(isspace);
             if (!suppress_assignment) {
                 auto* ptr = va_arg(copy, int*);
                 *ptr = input_lexer.tell();