From 8bc1bcb34bff61301a66ce62532604abaad86337 Mon Sep 17 00:00:00 2001 From: AnotherTest Date: Fri, 12 Feb 2021 12:31:14 +0330 Subject: [PATCH] LibC: Reimplement scanf from the ground up This adds support for some previously unsupported features (e.g. length modifiers) and fixes at least one FIXME. Fixes #90. --- Userland/Libraries/LibC/scanf.cpp | 721 +++++++++++++++++++++--------- Userland/Tests/LibC/scanf.cpp | 253 +++++++++++ 2 files changed, 773 insertions(+), 201 deletions(-) create mode 100644 Userland/Tests/LibC/scanf.cpp diff --git a/Userland/Libraries/LibC/scanf.cpp b/Userland/Libraries/LibC/scanf.cpp index 550240fa239..e1b30f6f696 100644 --- a/Userland/Libraries/LibC/scanf.cpp +++ b/Userland/Libraries/LibC/scanf.cpp @@ -1,227 +1,546 @@ /* - * Copyright (c) 2000-2002 Opsycon AB (www.opsycon.se) - * + * Copyright (c) 2021, the SerenityOS developers. + * All rights reserved. + * * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Opsycon AB. - * 4. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. + * modification, are permitted provided that the following conditions are met: * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS - * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ + #include +#include +#include +#include #include #include -#include #include +#include #include -static const char* determine_base(const char* p, int& base) -{ - if (p[0] == '0') { - switch (p[1]) { - case 'x': - base = 16; - break; - case 't': - case 'n': - base = 10; - break; - case 'o': - base = 8; - break; - default: - base = 10; - return p; - } - return p + 2; +enum LengthModifier { + None, + Default, + Char, + Short, + Long, + LongLong, + IntMax, + Size, + PtrDiff, + LongDouble, +}; + +enum ConversionSpecifier { + Unspecified, + Decimal, + Integer, + Octal, + Unsigned, + Hex, + Floating, + String, + UseScanList, + Character, + Pointer, + OutputNumberOfBytes, + Invalid, +}; + +enum class ReadKind { + Normal, + Octal, + Hex, + Infer, +}; + +template +struct read_element_concrete { + bool operator()(GenericLexer&, va_list) + { + return false; } - base = 10; - return p; -} +}; -static int _atob(unsigned long* vp, const char* p, int base) -{ - unsigned long value, v1, v2; - const char* q; - char tmp[20]; - int digit; +template +struct read_element_concrete { + bool operator()(GenericLexer& lexer, va_list* ap) + { + lexer.ignore_while(isspace); - if (p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { - base = 16; - p += 2; + auto* ptr = va_arg(*ap, ApT*); + long value = 0; + char* endptr = nullptr; + auto nptr = lexer.remaining().characters_without_null_termination(); + if constexpr (kind == ReadKind::Normal) + value = strtol(nptr, &endptr, 10); + if constexpr (kind == ReadKind::Octal) + value = strtol(nptr, &endptr, 8); + if constexpr (kind == ReadKind::Hex) + value = strtol(nptr, &endptr, 16); + if constexpr (kind == ReadKind::Infer) + value = strtol(nptr, &endptr, 0); + + if (!endptr) + return false; + + if (endptr == nptr) + return false; + + auto diff = endptr - nptr; + ASSERT(diff > 0); + lexer.ignore((size_t)diff); + + *ptr = value; + return true; } +}; - if (base == 16 && (q = strchr(p, '.')) != 0) { - if (q - p > (ssize_t)sizeof(tmp) - 1) - return 0; - memcpy(tmp, p, q - p); - tmp[q - p] = '\0'; +template +struct read_element_concrete { + bool operator()(GenericLexer& lexer, va_list* ap) + { + static_assert(kind == ReadKind::Normal, "Can't read a non-normal character"); - if (!_atob(&v1, tmp, 16)) - return 0; - ++q; - if (strchr(q, '.')) - return 0; - if (!_atob(&v2, q, 16)) - return 0; - *vp = (v1 << 16) + v2; - return 1; + auto* ptr = va_arg(*ap, ApT*); + + if (lexer.is_eof()) + return false; + + auto ch = lexer.consume(); + *ptr = ch; + return true; } +}; - value = *vp = 0; - for (; *p; p++) { - if (*p >= '0' && *p <= '9') - digit = *p - '0'; - else if (*p >= 'a' && *p <= 'f') - digit = *p - 'a' + 10; - else if (*p >= 'A' && *p <= 'F') - digit = *p - 'A' + 10; +template +struct read_element_concrete { + bool operator()(GenericLexer& lexer, va_list* ap) + { + lexer.ignore_while(isspace); + + auto* ptr = va_arg(*ap, ApT*); + unsigned long value = 0; + char* endptr = nullptr; + auto nptr = lexer.remaining().characters_without_null_termination(); + if constexpr (kind == ReadKind::Normal) + value = strtoul(nptr, &endptr, 10); + if constexpr (kind == ReadKind::Octal) + value = strtoul(nptr, &endptr, 8); + if constexpr (kind == ReadKind::Hex) + value = strtoul(nptr, &endptr, 16); + if constexpr (kind == ReadKind::Infer) + value = strtoul(nptr, &endptr, 0); + + if (!endptr) + return false; + + if (endptr == nptr) + return false; + + auto diff = endptr - nptr; + ASSERT(diff > 0); + lexer.ignore((size_t)diff); + + *ptr = value; + return true; + } +}; + +template +struct read_element_concrete { + bool operator()(GenericLexer& lexer, va_list* ap) + { + lexer.ignore_while(isspace); + + auto* ptr = va_arg(*ap, ApT*); + + double value = 0; + char* endptr = nullptr; + auto nptr = lexer.remaining().characters_without_null_termination(); + if constexpr (kind == ReadKind::Normal) + value = strtod(nptr, &endptr); else - return 0; + return false; - if (digit >= base) - return 0; - value *= base; - value += digit; + if (!endptr) + return false; + + if (endptr == nptr) + return false; + + auto diff = endptr - nptr; + ASSERT(diff > 0); + lexer.ignore((size_t)diff); + + *ptr = value; + return true; } - *vp = value; - return 1; -} +}; -static int atob(unsigned int* vp, const char* p, int base) -{ - unsigned long v; - - if (base == 0) - p = determine_base(p, base); - if (_atob(&v, p, base)) { - *vp = v; - return 1; - } - return 0; -} - -#define ISSPACE " \t\n\r\f\v" - -int vsscanf(const char* buf, const char* s, va_list ap) -{ - int base = 10; - char* t; - char tmp[BUFSIZ]; - bool noassign = false; - int count = 0; - int width = 0; - - // FIXME: This doesn't work quite right. For example, it fails to match 'SSH-2.0-OpenSSH_8.2p1 Ubuntu-4ubuntu0.1\r\n' - // with 'SSH-%d.%d-%[^\n]\n' - - while (*s && *buf) { - while (isspace(*s)) - s++; - if (*s == '%') { - s++; - for (; *s; s++) { - if (strchr("dibouxcsefg%", *s)) - break; - if (*s == '*') - noassign = true; - else if (*s >= '1' && *s <= '9') { - const char* tc; - for (tc = s; isdigit(*s); s++) - ; - ASSERT((ssize_t)sizeof(tmp) >= s - tc + 1); - memcpy(tmp, tc, s - tc); - tmp[s - tc] = '\0'; - atob((uint32_t*)&width, tmp, 10); - s--; - } - } - if (*s == 's') { - while (isspace(*buf)) - buf++; - if (!width) - width = strcspn(buf, ISSPACE); - if (!noassign) { - // In this case, we have no way to ensure the user buffer is not overflown :( - memcpy(t = va_arg(ap, char*), buf, width); - t[width] = '\0'; - } - buf += width; - } else if (*s == 'c') { - if (!width) - width = 1; - if (!noassign) { - memcpy(t = va_arg(ap, char*), buf, width); - // No null terminator! - } - buf += width; - } else if (strchr("dobxu", *s)) { - while (isspace(*buf)) - buf++; - if (*s == 'd' || *s == 'u') - base = 10; - else if (*s == 'x') - base = 16; - else if (*s == 'o') - base = 8; - else if (*s == 'b') - base = 2; - if (!width) { - if (isspace(*(s + 1)) || *(s + 1) == 0) { - width = strcspn(buf, ISSPACE); - } else { - auto* p = strchr(buf, *(s + 1)); - if (p) - width = p - buf; - else { - noassign = true; - width = 0; - } - } - } - memcpy(tmp, buf, width); - tmp[width] = '\0'; - buf += width; - if (!noassign) { - if (!atob(va_arg(ap, uint32_t*), tmp, base)) - noassign = true; - } - } - if (!noassign) - ++count; - width = 0; - noassign = false; - ++s; - } else { - while (isspace(*buf)) - buf++; - if (*s != *buf) - break; - else { - ++s; - ++buf; - } +template +struct read_element { + bool operator()(LengthModifier length_modifier, GenericLexer& input_lexer, va_list* ap) + { + switch (length_modifier) { + default: + case None: + ASSERT_NOT_REACHED(); + case Default: + return read_element_concrete {}(input_lexer, ap); + case Char: + return read_element_concrete {}(input_lexer, ap); + case Short: + return read_element_concrete {}(input_lexer, ap); + case Long: + if constexpr (IsSame::value) + return read_element_concrete {}(input_lexer, ap); + if constexpr (IsSame::value) + return read_element_concrete {}(input_lexer, ap); + return false; + case LongLong: + if constexpr (IsSame::value) + return read_element_concrete {}(input_lexer, ap); + if constexpr (IsSame::value) + return read_element_concrete {}(input_lexer, ap); + return false; + case IntMax: + return read_element_concrete {}(input_lexer, ap); + case Size: + return read_element_concrete {}(input_lexer, ap); + case PtrDiff: + return read_element_concrete {}(input_lexer, ap); + case LongDouble: + return read_element_concrete {}(input_lexer, ap); } } - return count; +}; + +template<> +struct read_element { + read_element(StringView scan_set = {}, bool invert = false) + : scan_set(scan_set.is_null() ? " \t\n\f\r" : scan_set) + , invert(scan_set.is_null() ? true : invert) + , was_null(scan_set.is_null()) + { + } + + bool operator()(LengthModifier length_modifier, GenericLexer& input_lexer, va_list* ap) + { + // FIXME: Implement wide strings and such. + if (length_modifier != LengthModifier::Default) + return false; + + if (was_null) + input_lexer.ignore_while(isspace); + + auto* ptr = va_arg(*ap, char*); + auto str = input_lexer.consume_while([this](auto c) { return this->matches(c); }); + if (str.is_empty()) + return false; + + memcpy(ptr, str.characters_without_null_termination(), str.length()); + ptr[str.length()] = 0; + + return true; + } + +private: + bool matches(char c) const + { + return invert ^ scan_set.contains(c); + } + + const StringView scan_set; + bool invert { false }; + bool was_null { false }; +}; + +template<> +struct read_element { + bool operator()(LengthModifier length_modifier, GenericLexer& input_lexer, va_list* ap) + { + if (length_modifier != LengthModifier::Default) + return false; + + input_lexer.ignore_while(isspace); + + auto* ptr = va_arg(*ap, void**); + auto str = input_lexer.consume_while([this](auto c) { return this->should_consume(c); }); + + if (count != 8) { + fail:; + for (size_t i = 0; i < count; ++i) + input_lexer.retreat(); + return false; + } + + char buf[9] { 0 }; + memcpy(buf, str.characters_without_null_termination(), 8); + buf[8] = 0; + char* endptr = nullptr; + auto value = strtoull(buf, &endptr, 16); + + if (endptr != &buf[8]) + goto fail; + + memcpy(ptr, &value, sizeof(value)); + return true; + } + +private: + bool should_consume(char c) + { + if (count == 8) + return false; + if (!isxdigit(c)) + return false; + + ++count; + return true; + } + size_t count { 0 }; +}; + +extern "C" int vsscanf(const char* input, const char* format, va_list ap) +{ + GenericLexer format_lexer { format }; + GenericLexer input_lexer { input }; + + int elements_matched = 0; + + while (!format_lexer.is_eof()) { + format_lexer.ignore_while(isspace); + if (!format_lexer.next_is('%')) { + read_one_literal:; + input_lexer.ignore_while(isspace); + if (format_lexer.is_eof()) + break; + + auto next_char = format_lexer.consume(); + if (!input_lexer.consume_specific(next_char)) + return elements_matched; + continue; + } + + if (format_lexer.next_is("%%")) { + format_lexer.ignore(); + goto read_one_literal; + } + + format_lexer.ignore(); // '%' + + bool invert_scanlist = false; + StringView scanlist; + LengthModifier length_modifier { None }; + ConversionSpecifier conversion_specifier { Unspecified }; + reread_lookahead:; + auto format_lookahead = format_lexer.peek(); + if (length_modifier == None) { + switch (format_lookahead) { + case 'h': + if (format_lexer.peek(1) == 'h') { + format_lexer.consume(2); + length_modifier = Char; + } else { + format_lexer.consume(1); + length_modifier = Short; + } + break; + case 'l': + if (format_lexer.peek(1) == 'l') { + format_lexer.consume(2); + length_modifier = LongLong; + } else { + format_lexer.consume(1); + length_modifier = Long; + } + break; + case 'j': + format_lexer.consume(); + length_modifier = IntMax; + break; + case 'z': + format_lexer.consume(); + length_modifier = Size; + break; + case 't': + format_lexer.consume(); + length_modifier = PtrDiff; + break; + case 'L': + format_lexer.consume(); + length_modifier = LongDouble; + break; + default: + length_modifier = Default; + break; + } + goto reread_lookahead; + } + if (conversion_specifier == Unspecified) { + switch (format_lookahead) { + case 'd': + format_lexer.consume(); + conversion_specifier = Decimal; + break; + case 'i': + format_lexer.consume(); + conversion_specifier = Integer; + break; + case 'o': + format_lexer.consume(); + conversion_specifier = Octal; + break; + case 'u': + format_lexer.consume(); + conversion_specifier = Unsigned; + break; + case 'x': + format_lexer.consume(); + conversion_specifier = Hex; + break; + case 'a': + case 'e': + case 'f': + case 'g': + format_lexer.consume(); + conversion_specifier = Floating; + break; + case 's': + format_lexer.consume(); + conversion_specifier = String; + break; + case '[': + format_lexer.consume(); + scanlist = format_lexer.consume_until(']'); + if (scanlist.starts_with('^')) { + scanlist = scanlist.substring_view(1); + invert_scanlist = true; + } + conversion_specifier = UseScanList; + break; + case 'c': + format_lexer.consume(); + conversion_specifier = Character; + break; + case 'p': + format_lexer.consume(); + conversion_specifier = Pointer; + break; + case 'n': + format_lexer.consume(); + conversion_specifier = OutputNumberOfBytes; + break; + case 'C': + format_lexer.consume(); + length_modifier = Long; + conversion_specifier = Character; + break; + case 'S': + format_lexer.consume(); + length_modifier = Long; + conversion_specifier = String; + break; + default: + format_lexer.consume(); + conversion_specifier = Invalid; + break; + } + } + + // Now try to read. + switch (conversion_specifier) { + case Invalid: + case Unspecified: + default: + // "undefined behaviour", let's be nice and crash. + dbgln("Invalid conversion specifier {} in scanf!", (int)conversion_specifier); + ASSERT_NOT_REACHED(); + case Decimal: + if (!read_element {}(length_modifier, input_lexer, &ap)) + format_lexer.consume_all(); + else + ++elements_matched; + break; + case Integer: + if (!read_element {}(length_modifier, input_lexer, &ap)) + format_lexer.consume_all(); + else + ++elements_matched; + break; + case Octal: + if (!read_element {}(length_modifier, input_lexer, &ap)) + format_lexer.consume_all(); + else + ++elements_matched; + break; + case Unsigned: + if (!read_element {}(length_modifier, input_lexer, &ap)) + format_lexer.consume_all(); + else + ++elements_matched; + break; + case Hex: + if (!read_element {}(length_modifier, input_lexer, &ap)) + format_lexer.consume_all(); + else + ++elements_matched; + break; + case Floating: + if (!read_element {}(length_modifier, input_lexer, &ap)) + format_lexer.consume_all(); + else + ++elements_matched; + break; + case String: + if (!read_element {}(length_modifier, input_lexer, &ap)) + format_lexer.consume_all(); + else + ++elements_matched; + break; + case UseScanList: + if (!read_element { scanlist, invert_scanlist }(length_modifier, input_lexer, &ap)) + format_lexer.consume_all(); + else + ++elements_matched; + break; + case Character: + if (!read_element {}(length_modifier, input_lexer, &ap)) + format_lexer.consume_all(); + else + ++elements_matched; + break; + case Pointer: + if (!read_element {}(length_modifier, input_lexer, &ap)) + format_lexer.consume_all(); + else + ++elements_matched; + break; + case OutputNumberOfBytes: { + auto* ptr = va_arg(ap, int*); + *ptr = input_lexer.tell(); + ++elements_matched; + break; + } + } + } + + return elements_matched; } diff --git a/Userland/Tests/LibC/scanf.cpp b/Userland/Tests/LibC/scanf.cpp new file mode 100644 index 00000000000..4da0d928561 --- /dev/null +++ b/Userland/Tests/LibC/scanf.cpp @@ -0,0 +1,253 @@ +/* + * Copyright (c) 2021, the SerenityOS developers. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include + +typedef long double longdouble; +typedef long long longlong; +typedef char charstar[32]; + +template +constexpr static Array to_value_t(T x) +{ + // The endianness doesn't really matter, since we're going to convert both sides with this anyway. + union Value { + u8 v[32]; + T t; + }; + + auto value = Value { .t = x }; + + return { + value.v[0], + value.v[1], + value.v[2], + value.v[3], + value.v[4], + value.v[5], + value.v[6], + value.v[7], + value.v[8], + value.v[9], + value.v[10], + value.v[11], + value.v[12], + value.v[13], + value.v[14], + value.v[15], + value.v[16], + value.v[17], + value.v[18], + value.v[19], + value.v[20], + value.v[21], + value.v[22], + value.v[23], + value.v[24], + value.v[25], + value.v[26], + value.v[27], + value.v[28], + value.v[29], + value.v[30], + value.v[31], + }; +} + +template +constexpr static Array str_to_value_t(const char (&x)[N]) +{ + Array value { 0 }; + for (size_t i = 0; i < N; ++i) + value[i] = x[i]; + return value; +} + +struct Argument { + size_t size; + void* data; +}; + +static Array arg_to_value_t(const Argument& arg) +{ + if (arg.size == 1) + return to_value_t(*(u8*)arg.data); + + if (arg.size == 2) + return to_value_t(*(u16*)arg.data); + + if (arg.size == 4) + return to_value_t(*(u32*)arg.data); + + if (arg.size == 8) + return to_value_t(*(u64*)arg.data); + + if (arg.size == 16) { + auto& data = *(charstar*)arg.data; + Array value { 0 }; + for (size_t i = 0; i < 16; ++i) + value[i] = data[i]; + return value; + } + + if (arg.size == 32) { + auto& data = *(charstar*)arg.data; + auto length = strlen(data); + Array value { 0 }; + for (size_t i = 0; i < length; ++i) + value[i] = data[i]; + return value; + } + + ASSERT_NOT_REACHED(); +} + +#define DECL_WITH_TYPE(ty) \ + ty _##ty##arg0; \ + ty _##ty##arg1; \ + ty _##ty##arg2; \ + Argument ty##arg0 { sizeof(ty), &_##ty##arg0 }; \ + Argument ty##arg1 { sizeof(ty), &_##ty##arg1 }; \ + Argument ty##arg2 { sizeof(ty), &_##ty##arg2 }; + +DECL_WITH_TYPE(int); +DECL_WITH_TYPE(unsigned); +DECL_WITH_TYPE(long); +DECL_WITH_TYPE(longlong); +DECL_WITH_TYPE(float); +DECL_WITH_TYPE(double); +DECL_WITH_TYPE(longdouble); + +#undef DECL_WITH_TYPE + +charstar _charstararg0; +charstar _charstararg1; +charstar _charstararg2; +Argument charstararg0 { sizeof(charstar), &_charstararg0[0] }; +Argument charstararg1 { sizeof(charstar), &_charstararg1[0] }; +Argument charstararg2 { sizeof(charstar), &_charstararg2[0] }; + +struct TestSuite { + const char* format; + const char* input; + int expected_output; + size_t argument_count; + Argument arguments[8]; + Array expected_values[8]; // 32 bytes for each argument's value. +}; + +const TestSuite test_suites[] { + { "%d", "", 0, 0, {}, {} }, + { "%x", "0x519", 1, 1, { unsignedarg0 }, { to_value_t(0x519) } }, + { "%x", "0x51g", 1, 1, { unsignedarg0 }, { to_value_t(0x51u) } }, + { "\"%%%d#", "\"%42#", 1, 1, { intarg0 }, { to_value_t(42) } }, + { " %d", "42", 1, 1, { intarg0 }, { to_value_t(42) } }, + { "%d", " 42", 1, 1, { intarg0 }, { to_value_t(42) } }, + { "%ld", "42", 1, 1, { longarg0 }, { to_value_t(42l) } }, + { "%lld", "42", 1, 1, { longlongarg0 }, { to_value_t(42ll) } }, + { "%f", "42", 1, 1, { floatarg0 }, { to_value_t(42.0f) } }, + { "%lf", "42", 1, 1, { doublearg0 }, { to_value_t(42.0) } }, + { "%s", "42", 1, 1, { charstararg0 }, { str_to_value_t("42") } }, + { "%d%s", "42yoinks", 2, 2, { intarg0, charstararg0 }, { to_value_t(42), str_to_value_t("yoinks") } }, + { "%[^\n]", "aaaa\n", 1, 1, { charstararg0 }, { str_to_value_t("aaaa") } }, + { "%u.%u.%u", "3.19", 2, 3, { unsignedarg0, unsignedarg1, unsignedarg2 }, { to_value_t(3u), to_value_t(19u) } }, + // Failing test case from previous impl: + { "SSH-%d.%d-%[^\n]\n", "SSH-2.0-OpenSSH_8.2p1 Ubuntu-4ubuntu0.1\n", 3, 3, { intarg0, intarg1, charstararg0 }, { to_value_t(2), to_value_t(0), str_to_value_t("OpenSSH_8.2p1 Ubuntu-4ubuntu0.1") } }, +}; + +bool g_any_failed = false; + +static bool check_value_conformance(const TestSuite& test) +{ + bool fail = false; + for (int i = 0; i < test.expected_output; ++i) { + auto& arg = test.arguments[i]; + auto arg_value = arg_to_value_t(arg); + auto& value = test.expected_values[i]; + if (arg_value != value) { + auto arg_ptr = (const u32*)arg_value.data(); + auto value_ptr = (const u32*)value.data(); + printf(" value %d FAIL, expected %04x%04x%04x%04x%04x%04x%04x%04x but got %04x%04x%04x%04x%04x%04x%04x%04x\n", + i, + value_ptr[0], value_ptr[1], value_ptr[2], value_ptr[3], + value_ptr[4], value_ptr[5], value_ptr[6], value_ptr[7], + arg_ptr[0], arg_ptr[1], arg_ptr[2], arg_ptr[3], + arg_ptr[4], arg_ptr[5], arg_ptr[6], arg_ptr[7]); + fail = true; + } else { + printf(" value %d PASS\n", i); + } + } + + return !fail; +} + +static void do_one_test(const TestSuite& test) +{ + printf("Testing '%s' against '%s'...\n", test.input, test.format); + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wformat-nonliteral" + auto rc = sscanf(test.input, test.format, + test.arguments[0].data, test.arguments[1].data, test.arguments[2].data, test.arguments[3].data, + test.arguments[4].data, test.arguments[5].data, test.arguments[6].data, test.arguments[7].data); +#pragma GCC diagnostic pop + + bool overall = true; + printf(" output value...\n"); + if (rc != test.expected_output) { + printf(" output value FAIL, expected %d but got %d\n", test.expected_output, rc); + overall = false; + } else { + printf(" output value PASS\n"); + } + + printf(" read values...\n"); + if (check_value_conformance(test)) { + printf(" read values PASS\n"); + } else { + printf(" read values FAIL\n"); + overall = false; + } + + if (overall) + printf(" overall PASS\n"); + else + printf(" overall FAIL\n"); + + g_any_failed = g_any_failed || !overall; +} + +int main() +{ + for (auto& test : test_suites) + do_one_test(test); + + return g_any_failed ? 1 : 0; +}