LibC: Reimplement scanf from the ground up

This adds support for some previously unsupported features
(e.g. length modifiers) and fixes at least one FIXME.
Fixes #90.
This commit is contained in:
AnotherTest 2021-02-12 12:31:14 +03:30 committed by Andreas Kling
parent 0bf496f864
commit 8bc1bcb34b
Notes: sideshowbarker 2024-07-18 22:14:22 +09:00
2 changed files with 773 additions and 201 deletions

View file

@ -1,227 +1,546 @@
/*
* Copyright (c) 2000-2002 Opsycon AB (www.opsycon.se)
*
* Copyright (c) 2021, the SerenityOS developers.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Opsycon AB.
* 4. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
* modification, are permitted provided that the following conditions are met:
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <AK/Assertions.h>
#include <AK/GenericLexer.h>
#include <AK/LogStream.h>
#include <AK/StdLibExtras.h>
#include <ctype.h>
#include <stdarg.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
static const char* determine_base(const char* p, int& base)
{
if (p[0] == '0') {
switch (p[1]) {
case 'x':
base = 16;
break;
case 't':
case 'n':
base = 10;
break;
case 'o':
base = 8;
break;
default:
base = 10;
return p;
}
return p + 2;
enum LengthModifier {
None,
Default,
Char,
Short,
Long,
LongLong,
IntMax,
Size,
PtrDiff,
LongDouble,
};
enum ConversionSpecifier {
Unspecified,
Decimal,
Integer,
Octal,
Unsigned,
Hex,
Floating,
String,
UseScanList,
Character,
Pointer,
OutputNumberOfBytes,
Invalid,
};
enum class ReadKind {
Normal,
Octal,
Hex,
Infer,
};
template<typename T, typename ApT, ReadKind kind = ReadKind::Normal>
struct read_element_concrete {
bool operator()(GenericLexer&, va_list)
{
return false;
}
base = 10;
return p;
}
};
static int _atob(unsigned long* vp, const char* p, int base)
{
unsigned long value, v1, v2;
const char* q;
char tmp[20];
int digit;
template<typename ApT, ReadKind kind>
struct read_element_concrete<int, ApT, kind> {
bool operator()(GenericLexer& lexer, va_list* ap)
{
lexer.ignore_while(isspace);
if (p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
base = 16;
p += 2;
auto* ptr = va_arg(*ap, ApT*);
long value = 0;
char* endptr = nullptr;
auto nptr = lexer.remaining().characters_without_null_termination();
if constexpr (kind == ReadKind::Normal)
value = strtol(nptr, &endptr, 10);
if constexpr (kind == ReadKind::Octal)
value = strtol(nptr, &endptr, 8);
if constexpr (kind == ReadKind::Hex)
value = strtol(nptr, &endptr, 16);
if constexpr (kind == ReadKind::Infer)
value = strtol(nptr, &endptr, 0);
if (!endptr)
return false;
if (endptr == nptr)
return false;
auto diff = endptr - nptr;
ASSERT(diff > 0);
lexer.ignore((size_t)diff);
*ptr = value;
return true;
}
};
if (base == 16 && (q = strchr(p, '.')) != 0) {
if (q - p > (ssize_t)sizeof(tmp) - 1)
return 0;
memcpy(tmp, p, q - p);
tmp[q - p] = '\0';
template<typename ApT, ReadKind kind>
struct read_element_concrete<char, ApT, kind> {
bool operator()(GenericLexer& lexer, va_list* ap)
{
static_assert(kind == ReadKind::Normal, "Can't read a non-normal character");
if (!_atob(&v1, tmp, 16))
return 0;
++q;
if (strchr(q, '.'))
return 0;
if (!_atob(&v2, q, 16))
return 0;
*vp = (v1 << 16) + v2;
return 1;
auto* ptr = va_arg(*ap, ApT*);
if (lexer.is_eof())
return false;
auto ch = lexer.consume();
*ptr = ch;
return true;
}
};
value = *vp = 0;
for (; *p; p++) {
if (*p >= '0' && *p <= '9')
digit = *p - '0';
else if (*p >= 'a' && *p <= 'f')
digit = *p - 'a' + 10;
else if (*p >= 'A' && *p <= 'F')
digit = *p - 'A' + 10;
template<typename ApT, ReadKind kind>
struct read_element_concrete<unsigned, ApT, kind> {
bool operator()(GenericLexer& lexer, va_list* ap)
{
lexer.ignore_while(isspace);
auto* ptr = va_arg(*ap, ApT*);
unsigned long value = 0;
char* endptr = nullptr;
auto nptr = lexer.remaining().characters_without_null_termination();
if constexpr (kind == ReadKind::Normal)
value = strtoul(nptr, &endptr, 10);
if constexpr (kind == ReadKind::Octal)
value = strtoul(nptr, &endptr, 8);
if constexpr (kind == ReadKind::Hex)
value = strtoul(nptr, &endptr, 16);
if constexpr (kind == ReadKind::Infer)
value = strtoul(nptr, &endptr, 0);
if (!endptr)
return false;
if (endptr == nptr)
return false;
auto diff = endptr - nptr;
ASSERT(diff > 0);
lexer.ignore((size_t)diff);
*ptr = value;
return true;
}
};
template<typename ApT, ReadKind kind>
struct read_element_concrete<float, ApT, kind> {
bool operator()(GenericLexer& lexer, va_list* ap)
{
lexer.ignore_while(isspace);
auto* ptr = va_arg(*ap, ApT*);
double value = 0;
char* endptr = nullptr;
auto nptr = lexer.remaining().characters_without_null_termination();
if constexpr (kind == ReadKind::Normal)
value = strtod(nptr, &endptr);
else
return 0;
return false;
if (digit >= base)
return 0;
value *= base;
value += digit;
if (!endptr)
return false;
if (endptr == nptr)
return false;
auto diff = endptr - nptr;
ASSERT(diff > 0);
lexer.ignore((size_t)diff);
*ptr = value;
return true;
}
*vp = value;
return 1;
}
};
static int atob(unsigned int* vp, const char* p, int base)
{
unsigned long v;
if (base == 0)
p = determine_base(p, base);
if (_atob(&v, p, base)) {
*vp = v;
return 1;
}
return 0;
}
#define ISSPACE " \t\n\r\f\v"
int vsscanf(const char* buf, const char* s, va_list ap)
{
int base = 10;
char* t;
char tmp[BUFSIZ];
bool noassign = false;
int count = 0;
int width = 0;
// FIXME: This doesn't work quite right. For example, it fails to match 'SSH-2.0-OpenSSH_8.2p1 Ubuntu-4ubuntu0.1\r\n'
// with 'SSH-%d.%d-%[^\n]\n'
while (*s && *buf) {
while (isspace(*s))
s++;
if (*s == '%') {
s++;
for (; *s; s++) {
if (strchr("dibouxcsefg%", *s))
break;
if (*s == '*')
noassign = true;
else if (*s >= '1' && *s <= '9') {
const char* tc;
for (tc = s; isdigit(*s); s++)
;
ASSERT((ssize_t)sizeof(tmp) >= s - tc + 1);
memcpy(tmp, tc, s - tc);
tmp[s - tc] = '\0';
atob((uint32_t*)&width, tmp, 10);
s--;
}
}
if (*s == 's') {
while (isspace(*buf))
buf++;
if (!width)
width = strcspn(buf, ISSPACE);
if (!noassign) {
// In this case, we have no way to ensure the user buffer is not overflown :(
memcpy(t = va_arg(ap, char*), buf, width);
t[width] = '\0';
}
buf += width;
} else if (*s == 'c') {
if (!width)
width = 1;
if (!noassign) {
memcpy(t = va_arg(ap, char*), buf, width);
// No null terminator!
}
buf += width;
} else if (strchr("dobxu", *s)) {
while (isspace(*buf))
buf++;
if (*s == 'd' || *s == 'u')
base = 10;
else if (*s == 'x')
base = 16;
else if (*s == 'o')
base = 8;
else if (*s == 'b')
base = 2;
if (!width) {
if (isspace(*(s + 1)) || *(s + 1) == 0) {
width = strcspn(buf, ISSPACE);
} else {
auto* p = strchr(buf, *(s + 1));
if (p)
width = p - buf;
else {
noassign = true;
width = 0;
}
}
}
memcpy(tmp, buf, width);
tmp[width] = '\0';
buf += width;
if (!noassign) {
if (!atob(va_arg(ap, uint32_t*), tmp, base))
noassign = true;
}
}
if (!noassign)
++count;
width = 0;
noassign = false;
++s;
} else {
while (isspace(*buf))
buf++;
if (*s != *buf)
break;
else {
++s;
++buf;
}
template<typename T, ReadKind kind>
struct read_element {
bool operator()(LengthModifier length_modifier, GenericLexer& input_lexer, va_list* ap)
{
switch (length_modifier) {
default:
case None:
ASSERT_NOT_REACHED();
case Default:
return read_element_concrete<T, T, kind> {}(input_lexer, ap);
case Char:
return read_element_concrete<T, char, kind> {}(input_lexer, ap);
case Short:
return read_element_concrete<T, short, kind> {}(input_lexer, ap);
case Long:
if constexpr (IsSame<T, int>::value)
return read_element_concrete<T, long, kind> {}(input_lexer, ap);
if constexpr (IsSame<T, float>::value)
return read_element_concrete<T, double, kind> {}(input_lexer, ap);
return false;
case LongLong:
if constexpr (IsSame<T, int>::value)
return read_element_concrete<T, long long, kind> {}(input_lexer, ap);
if constexpr (IsSame<T, float>::value)
return read_element_concrete<T, double, kind> {}(input_lexer, ap);
return false;
case IntMax:
return read_element_concrete<T, intmax_t, kind> {}(input_lexer, ap);
case Size:
return read_element_concrete<T, size_t, kind> {}(input_lexer, ap);
case PtrDiff:
return read_element_concrete<T, ptrdiff_t, kind> {}(input_lexer, ap);
case LongDouble:
return read_element_concrete<T, long double, kind> {}(input_lexer, ap);
}
}
return count;
};
template<>
struct read_element<char*, ReadKind::Normal> {
read_element(StringView scan_set = {}, bool invert = false)
: scan_set(scan_set.is_null() ? " \t\n\f\r" : scan_set)
, invert(scan_set.is_null() ? true : invert)
, was_null(scan_set.is_null())
{
}
bool operator()(LengthModifier length_modifier, GenericLexer& input_lexer, va_list* ap)
{
// FIXME: Implement wide strings and such.
if (length_modifier != LengthModifier::Default)
return false;
if (was_null)
input_lexer.ignore_while(isspace);
auto* ptr = va_arg(*ap, char*);
auto str = input_lexer.consume_while([this](auto c) { return this->matches(c); });
if (str.is_empty())
return false;
memcpy(ptr, str.characters_without_null_termination(), str.length());
ptr[str.length()] = 0;
return true;
}
private:
bool matches(char c) const
{
return invert ^ scan_set.contains(c);
}
const StringView scan_set;
bool invert { false };
bool was_null { false };
};
template<>
struct read_element<void*, ReadKind::Normal> {
bool operator()(LengthModifier length_modifier, GenericLexer& input_lexer, va_list* ap)
{
if (length_modifier != LengthModifier::Default)
return false;
input_lexer.ignore_while(isspace);
auto* ptr = va_arg(*ap, void**);
auto str = input_lexer.consume_while([this](auto c) { return this->should_consume(c); });
if (count != 8) {
fail:;
for (size_t i = 0; i < count; ++i)
input_lexer.retreat();
return false;
}
char buf[9] { 0 };
memcpy(buf, str.characters_without_null_termination(), 8);
buf[8] = 0;
char* endptr = nullptr;
auto value = strtoull(buf, &endptr, 16);
if (endptr != &buf[8])
goto fail;
memcpy(ptr, &value, sizeof(value));
return true;
}
private:
bool should_consume(char c)
{
if (count == 8)
return false;
if (!isxdigit(c))
return false;
++count;
return true;
}
size_t count { 0 };
};
extern "C" int vsscanf(const char* input, const char* format, va_list ap)
{
GenericLexer format_lexer { format };
GenericLexer input_lexer { input };
int elements_matched = 0;
while (!format_lexer.is_eof()) {
format_lexer.ignore_while(isspace);
if (!format_lexer.next_is('%')) {
read_one_literal:;
input_lexer.ignore_while(isspace);
if (format_lexer.is_eof())
break;
auto next_char = format_lexer.consume();
if (!input_lexer.consume_specific(next_char))
return elements_matched;
continue;
}
if (format_lexer.next_is("%%")) {
format_lexer.ignore();
goto read_one_literal;
}
format_lexer.ignore(); // '%'
bool invert_scanlist = false;
StringView scanlist;
LengthModifier length_modifier { None };
ConversionSpecifier conversion_specifier { Unspecified };
reread_lookahead:;
auto format_lookahead = format_lexer.peek();
if (length_modifier == None) {
switch (format_lookahead) {
case 'h':
if (format_lexer.peek(1) == 'h') {
format_lexer.consume(2);
length_modifier = Char;
} else {
format_lexer.consume(1);
length_modifier = Short;
}
break;
case 'l':
if (format_lexer.peek(1) == 'l') {
format_lexer.consume(2);
length_modifier = LongLong;
} else {
format_lexer.consume(1);
length_modifier = Long;
}
break;
case 'j':
format_lexer.consume();
length_modifier = IntMax;
break;
case 'z':
format_lexer.consume();
length_modifier = Size;
break;
case 't':
format_lexer.consume();
length_modifier = PtrDiff;
break;
case 'L':
format_lexer.consume();
length_modifier = LongDouble;
break;
default:
length_modifier = Default;
break;
}
goto reread_lookahead;
}
if (conversion_specifier == Unspecified) {
switch (format_lookahead) {
case 'd':
format_lexer.consume();
conversion_specifier = Decimal;
break;
case 'i':
format_lexer.consume();
conversion_specifier = Integer;
break;
case 'o':
format_lexer.consume();
conversion_specifier = Octal;
break;
case 'u':
format_lexer.consume();
conversion_specifier = Unsigned;
break;
case 'x':
format_lexer.consume();
conversion_specifier = Hex;
break;
case 'a':
case 'e':
case 'f':
case 'g':
format_lexer.consume();
conversion_specifier = Floating;
break;
case 's':
format_lexer.consume();
conversion_specifier = String;
break;
case '[':
format_lexer.consume();
scanlist = format_lexer.consume_until(']');
if (scanlist.starts_with('^')) {
scanlist = scanlist.substring_view(1);
invert_scanlist = true;
}
conversion_specifier = UseScanList;
break;
case 'c':
format_lexer.consume();
conversion_specifier = Character;
break;
case 'p':
format_lexer.consume();
conversion_specifier = Pointer;
break;
case 'n':
format_lexer.consume();
conversion_specifier = OutputNumberOfBytes;
break;
case 'C':
format_lexer.consume();
length_modifier = Long;
conversion_specifier = Character;
break;
case 'S':
format_lexer.consume();
length_modifier = Long;
conversion_specifier = String;
break;
default:
format_lexer.consume();
conversion_specifier = Invalid;
break;
}
}
// Now try to read.
switch (conversion_specifier) {
case Invalid:
case Unspecified:
default:
// "undefined behaviour", let's be nice and crash.
dbgln("Invalid conversion specifier {} in scanf!", (int)conversion_specifier);
ASSERT_NOT_REACHED();
case Decimal:
if (!read_element<int, ReadKind::Normal> {}(length_modifier, input_lexer, &ap))
format_lexer.consume_all();
else
++elements_matched;
break;
case Integer:
if (!read_element<int, ReadKind::Infer> {}(length_modifier, input_lexer, &ap))
format_lexer.consume_all();
else
++elements_matched;
break;
case Octal:
if (!read_element<unsigned, ReadKind::Octal> {}(length_modifier, input_lexer, &ap))
format_lexer.consume_all();
else
++elements_matched;
break;
case Unsigned:
if (!read_element<unsigned, ReadKind::Normal> {}(length_modifier, input_lexer, &ap))
format_lexer.consume_all();
else
++elements_matched;
break;
case Hex:
if (!read_element<unsigned, ReadKind::Hex> {}(length_modifier, input_lexer, &ap))
format_lexer.consume_all();
else
++elements_matched;
break;
case Floating:
if (!read_element<float, ReadKind::Normal> {}(length_modifier, input_lexer, &ap))
format_lexer.consume_all();
else
++elements_matched;
break;
case String:
if (!read_element<char*, ReadKind::Normal> {}(length_modifier, input_lexer, &ap))
format_lexer.consume_all();
else
++elements_matched;
break;
case UseScanList:
if (!read_element<char*, ReadKind::Normal> { scanlist, invert_scanlist }(length_modifier, input_lexer, &ap))
format_lexer.consume_all();
else
++elements_matched;
break;
case Character:
if (!read_element<char, ReadKind::Normal> {}(length_modifier, input_lexer, &ap))
format_lexer.consume_all();
else
++elements_matched;
break;
case Pointer:
if (!read_element<void*, ReadKind::Normal> {}(length_modifier, input_lexer, &ap))
format_lexer.consume_all();
else
++elements_matched;
break;
case OutputNumberOfBytes: {
auto* ptr = va_arg(ap, int*);
*ptr = input_lexer.tell();
++elements_matched;
break;
}
}
}
return elements_matched;
}

View file

@ -0,0 +1,253 @@
/*
* Copyright (c) 2021, the SerenityOS developers.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <AK/Array.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef long double longdouble;
typedef long long longlong;
typedef char charstar[32];
template<typename T>
constexpr static Array<unsigned char, 32> to_value_t(T x)
{
// The endianness doesn't really matter, since we're going to convert both sides with this anyway.
union Value {
u8 v[32];
T t;
};
auto value = Value { .t = x };
return {
value.v[0],
value.v[1],
value.v[2],
value.v[3],
value.v[4],
value.v[5],
value.v[6],
value.v[7],
value.v[8],
value.v[9],
value.v[10],
value.v[11],
value.v[12],
value.v[13],
value.v[14],
value.v[15],
value.v[16],
value.v[17],
value.v[18],
value.v[19],
value.v[20],
value.v[21],
value.v[22],
value.v[23],
value.v[24],
value.v[25],
value.v[26],
value.v[27],
value.v[28],
value.v[29],
value.v[30],
value.v[31],
};
}
template<size_t N>
constexpr static Array<unsigned char, 32> str_to_value_t(const char (&x)[N])
{
Array<unsigned char, 32> value { 0 };
for (size_t i = 0; i < N; ++i)
value[i] = x[i];
return value;
}
struct Argument {
size_t size;
void* data;
};
static Array<u8, 32> arg_to_value_t(const Argument& arg)
{
if (arg.size == 1)
return to_value_t(*(u8*)arg.data);
if (arg.size == 2)
return to_value_t(*(u16*)arg.data);
if (arg.size == 4)
return to_value_t(*(u32*)arg.data);
if (arg.size == 8)
return to_value_t(*(u64*)arg.data);
if (arg.size == 16) {
auto& data = *(charstar*)arg.data;
Array<unsigned char, 32> value { 0 };
for (size_t i = 0; i < 16; ++i)
value[i] = data[i];
return value;
}
if (arg.size == 32) {
auto& data = *(charstar*)arg.data;
auto length = strlen(data);
Array<unsigned char, 32> value { 0 };
for (size_t i = 0; i < length; ++i)
value[i] = data[i];
return value;
}
ASSERT_NOT_REACHED();
}
#define DECL_WITH_TYPE(ty) \
ty _##ty##arg0; \
ty _##ty##arg1; \
ty _##ty##arg2; \
Argument ty##arg0 { sizeof(ty), &_##ty##arg0 }; \
Argument ty##arg1 { sizeof(ty), &_##ty##arg1 }; \
Argument ty##arg2 { sizeof(ty), &_##ty##arg2 };
DECL_WITH_TYPE(int);
DECL_WITH_TYPE(unsigned);
DECL_WITH_TYPE(long);
DECL_WITH_TYPE(longlong);
DECL_WITH_TYPE(float);
DECL_WITH_TYPE(double);
DECL_WITH_TYPE(longdouble);
#undef DECL_WITH_TYPE
charstar _charstararg0;
charstar _charstararg1;
charstar _charstararg2;
Argument charstararg0 { sizeof(charstar), &_charstararg0[0] };
Argument charstararg1 { sizeof(charstar), &_charstararg1[0] };
Argument charstararg2 { sizeof(charstar), &_charstararg2[0] };
struct TestSuite {
const char* format;
const char* input;
int expected_output;
size_t argument_count;
Argument arguments[8];
Array<unsigned char, 32> expected_values[8]; // 32 bytes for each argument's value.
};
const TestSuite test_suites[] {
{ "%d", "", 0, 0, {}, {} },
{ "%x", "0x519", 1, 1, { unsignedarg0 }, { to_value_t(0x519) } },
{ "%x", "0x51g", 1, 1, { unsignedarg0 }, { to_value_t(0x51u) } },
{ "\"%%%d#", "\"%42#", 1, 1, { intarg0 }, { to_value_t(42) } },
{ " %d", "42", 1, 1, { intarg0 }, { to_value_t(42) } },
{ "%d", " 42", 1, 1, { intarg0 }, { to_value_t(42) } },
{ "%ld", "42", 1, 1, { longarg0 }, { to_value_t(42l) } },
{ "%lld", "42", 1, 1, { longlongarg0 }, { to_value_t(42ll) } },
{ "%f", "42", 1, 1, { floatarg0 }, { to_value_t(42.0f) } },
{ "%lf", "42", 1, 1, { doublearg0 }, { to_value_t(42.0) } },
{ "%s", "42", 1, 1, { charstararg0 }, { str_to_value_t("42") } },
{ "%d%s", "42yoinks", 2, 2, { intarg0, charstararg0 }, { to_value_t(42), str_to_value_t("yoinks") } },
{ "%[^\n]", "aaaa\n", 1, 1, { charstararg0 }, { str_to_value_t("aaaa") } },
{ "%u.%u.%u", "3.19", 2, 3, { unsignedarg0, unsignedarg1, unsignedarg2 }, { to_value_t(3u), to_value_t(19u) } },
// Failing test case from previous impl:
{ "SSH-%d.%d-%[^\n]\n", "SSH-2.0-OpenSSH_8.2p1 Ubuntu-4ubuntu0.1\n", 3, 3, { intarg0, intarg1, charstararg0 }, { to_value_t(2), to_value_t(0), str_to_value_t("OpenSSH_8.2p1 Ubuntu-4ubuntu0.1") } },
};
bool g_any_failed = false;
static bool check_value_conformance(const TestSuite& test)
{
bool fail = false;
for (int i = 0; i < test.expected_output; ++i) {
auto& arg = test.arguments[i];
auto arg_value = arg_to_value_t(arg);
auto& value = test.expected_values[i];
if (arg_value != value) {
auto arg_ptr = (const u32*)arg_value.data();
auto value_ptr = (const u32*)value.data();
printf(" value %d FAIL, expected %04x%04x%04x%04x%04x%04x%04x%04x but got %04x%04x%04x%04x%04x%04x%04x%04x\n",
i,
value_ptr[0], value_ptr[1], value_ptr[2], value_ptr[3],
value_ptr[4], value_ptr[5], value_ptr[6], value_ptr[7],
arg_ptr[0], arg_ptr[1], arg_ptr[2], arg_ptr[3],
arg_ptr[4], arg_ptr[5], arg_ptr[6], arg_ptr[7]);
fail = true;
} else {
printf(" value %d PASS\n", i);
}
}
return !fail;
}
static void do_one_test(const TestSuite& test)
{
printf("Testing '%s' against '%s'...\n", test.input, test.format);
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wformat-nonliteral"
auto rc = sscanf(test.input, test.format,
test.arguments[0].data, test.arguments[1].data, test.arguments[2].data, test.arguments[3].data,
test.arguments[4].data, test.arguments[5].data, test.arguments[6].data, test.arguments[7].data);
#pragma GCC diagnostic pop
bool overall = true;
printf(" output value...\n");
if (rc != test.expected_output) {
printf(" output value FAIL, expected %d but got %d\n", test.expected_output, rc);
overall = false;
} else {
printf(" output value PASS\n");
}
printf(" read values...\n");
if (check_value_conformance(test)) {
printf(" read values PASS\n");
} else {
printf(" read values FAIL\n");
overall = false;
}
if (overall)
printf(" overall PASS\n");
else
printf(" overall FAIL\n");
g_any_failed = g_any_failed || !overall;
}
int main()
{
for (auto& test : test_suites)
do_one_test(test);
return g_any_failed ? 1 : 0;
}