mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-27 01:50:24 +00:00
LibC: Reimplement scanf from the ground up
This adds support for some previously unsupported features (e.g. length modifiers) and fixes at least one FIXME. Fixes #90.
This commit is contained in:
parent
0bf496f864
commit
8bc1bcb34b
Notes:
sideshowbarker
2024-07-18 22:14:22 +09:00
Author: https://github.com/alimpfard Commit: https://github.com/SerenityOS/serenity/commit/8bc1bcb34bf Pull-request: https://github.com/SerenityOS/serenity/pull/5325 Issue: https://github.com/SerenityOS/serenity/issues/90 Reviewed-by: https://github.com/BenWiederhake Reviewed-by: https://github.com/linusg Reviewed-by: https://github.com/sunverwerth
2 changed files with 773 additions and 201 deletions
|
@ -1,227 +1,546 @@
|
|||
/*
|
||||
* Copyright (c) 2000-2002 Opsycon AB (www.opsycon.se)
|
||||
*
|
||||
* Copyright (c) 2021, the SerenityOS developers.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Opsycon AB.
|
||||
* 4. The name of the author may not be used to endorse or promote products
|
||||
* derived from this software without specific prior written permission.
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
|
||||
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
||||
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <AK/Assertions.h>
|
||||
#include <AK/GenericLexer.h>
|
||||
#include <AK/LogStream.h>
|
||||
#include <AK/StdLibExtras.h>
|
||||
#include <ctype.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
static const char* determine_base(const char* p, int& base)
|
||||
{
|
||||
if (p[0] == '0') {
|
||||
switch (p[1]) {
|
||||
case 'x':
|
||||
base = 16;
|
||||
break;
|
||||
case 't':
|
||||
case 'n':
|
||||
base = 10;
|
||||
break;
|
||||
case 'o':
|
||||
base = 8;
|
||||
break;
|
||||
default:
|
||||
base = 10;
|
||||
return p;
|
||||
}
|
||||
return p + 2;
|
||||
enum LengthModifier {
|
||||
None,
|
||||
Default,
|
||||
Char,
|
||||
Short,
|
||||
Long,
|
||||
LongLong,
|
||||
IntMax,
|
||||
Size,
|
||||
PtrDiff,
|
||||
LongDouble,
|
||||
};
|
||||
|
||||
enum ConversionSpecifier {
|
||||
Unspecified,
|
||||
Decimal,
|
||||
Integer,
|
||||
Octal,
|
||||
Unsigned,
|
||||
Hex,
|
||||
Floating,
|
||||
String,
|
||||
UseScanList,
|
||||
Character,
|
||||
Pointer,
|
||||
OutputNumberOfBytes,
|
||||
Invalid,
|
||||
};
|
||||
|
||||
enum class ReadKind {
|
||||
Normal,
|
||||
Octal,
|
||||
Hex,
|
||||
Infer,
|
||||
};
|
||||
|
||||
template<typename T, typename ApT, ReadKind kind = ReadKind::Normal>
|
||||
struct read_element_concrete {
|
||||
bool operator()(GenericLexer&, va_list)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
base = 10;
|
||||
return p;
|
||||
}
|
||||
};
|
||||
|
||||
static int _atob(unsigned long* vp, const char* p, int base)
|
||||
{
|
||||
unsigned long value, v1, v2;
|
||||
const char* q;
|
||||
char tmp[20];
|
||||
int digit;
|
||||
template<typename ApT, ReadKind kind>
|
||||
struct read_element_concrete<int, ApT, kind> {
|
||||
bool operator()(GenericLexer& lexer, va_list* ap)
|
||||
{
|
||||
lexer.ignore_while(isspace);
|
||||
|
||||
if (p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
|
||||
base = 16;
|
||||
p += 2;
|
||||
auto* ptr = va_arg(*ap, ApT*);
|
||||
long value = 0;
|
||||
char* endptr = nullptr;
|
||||
auto nptr = lexer.remaining().characters_without_null_termination();
|
||||
if constexpr (kind == ReadKind::Normal)
|
||||
value = strtol(nptr, &endptr, 10);
|
||||
if constexpr (kind == ReadKind::Octal)
|
||||
value = strtol(nptr, &endptr, 8);
|
||||
if constexpr (kind == ReadKind::Hex)
|
||||
value = strtol(nptr, &endptr, 16);
|
||||
if constexpr (kind == ReadKind::Infer)
|
||||
value = strtol(nptr, &endptr, 0);
|
||||
|
||||
if (!endptr)
|
||||
return false;
|
||||
|
||||
if (endptr == nptr)
|
||||
return false;
|
||||
|
||||
auto diff = endptr - nptr;
|
||||
ASSERT(diff > 0);
|
||||
lexer.ignore((size_t)diff);
|
||||
|
||||
*ptr = value;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
if (base == 16 && (q = strchr(p, '.')) != 0) {
|
||||
if (q - p > (ssize_t)sizeof(tmp) - 1)
|
||||
return 0;
|
||||
memcpy(tmp, p, q - p);
|
||||
tmp[q - p] = '\0';
|
||||
template<typename ApT, ReadKind kind>
|
||||
struct read_element_concrete<char, ApT, kind> {
|
||||
bool operator()(GenericLexer& lexer, va_list* ap)
|
||||
{
|
||||
static_assert(kind == ReadKind::Normal, "Can't read a non-normal character");
|
||||
|
||||
if (!_atob(&v1, tmp, 16))
|
||||
return 0;
|
||||
++q;
|
||||
if (strchr(q, '.'))
|
||||
return 0;
|
||||
if (!_atob(&v2, q, 16))
|
||||
return 0;
|
||||
*vp = (v1 << 16) + v2;
|
||||
return 1;
|
||||
auto* ptr = va_arg(*ap, ApT*);
|
||||
|
||||
if (lexer.is_eof())
|
||||
return false;
|
||||
|
||||
auto ch = lexer.consume();
|
||||
*ptr = ch;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
value = *vp = 0;
|
||||
for (; *p; p++) {
|
||||
if (*p >= '0' && *p <= '9')
|
||||
digit = *p - '0';
|
||||
else if (*p >= 'a' && *p <= 'f')
|
||||
digit = *p - 'a' + 10;
|
||||
else if (*p >= 'A' && *p <= 'F')
|
||||
digit = *p - 'A' + 10;
|
||||
template<typename ApT, ReadKind kind>
|
||||
struct read_element_concrete<unsigned, ApT, kind> {
|
||||
bool operator()(GenericLexer& lexer, va_list* ap)
|
||||
{
|
||||
lexer.ignore_while(isspace);
|
||||
|
||||
auto* ptr = va_arg(*ap, ApT*);
|
||||
unsigned long value = 0;
|
||||
char* endptr = nullptr;
|
||||
auto nptr = lexer.remaining().characters_without_null_termination();
|
||||
if constexpr (kind == ReadKind::Normal)
|
||||
value = strtoul(nptr, &endptr, 10);
|
||||
if constexpr (kind == ReadKind::Octal)
|
||||
value = strtoul(nptr, &endptr, 8);
|
||||
if constexpr (kind == ReadKind::Hex)
|
||||
value = strtoul(nptr, &endptr, 16);
|
||||
if constexpr (kind == ReadKind::Infer)
|
||||
value = strtoul(nptr, &endptr, 0);
|
||||
|
||||
if (!endptr)
|
||||
return false;
|
||||
|
||||
if (endptr == nptr)
|
||||
return false;
|
||||
|
||||
auto diff = endptr - nptr;
|
||||
ASSERT(diff > 0);
|
||||
lexer.ignore((size_t)diff);
|
||||
|
||||
*ptr = value;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename ApT, ReadKind kind>
|
||||
struct read_element_concrete<float, ApT, kind> {
|
||||
bool operator()(GenericLexer& lexer, va_list* ap)
|
||||
{
|
||||
lexer.ignore_while(isspace);
|
||||
|
||||
auto* ptr = va_arg(*ap, ApT*);
|
||||
|
||||
double value = 0;
|
||||
char* endptr = nullptr;
|
||||
auto nptr = lexer.remaining().characters_without_null_termination();
|
||||
if constexpr (kind == ReadKind::Normal)
|
||||
value = strtod(nptr, &endptr);
|
||||
else
|
||||
return 0;
|
||||
return false;
|
||||
|
||||
if (digit >= base)
|
||||
return 0;
|
||||
value *= base;
|
||||
value += digit;
|
||||
if (!endptr)
|
||||
return false;
|
||||
|
||||
if (endptr == nptr)
|
||||
return false;
|
||||
|
||||
auto diff = endptr - nptr;
|
||||
ASSERT(diff > 0);
|
||||
lexer.ignore((size_t)diff);
|
||||
|
||||
*ptr = value;
|
||||
return true;
|
||||
}
|
||||
*vp = value;
|
||||
return 1;
|
||||
}
|
||||
};
|
||||
|
||||
static int atob(unsigned int* vp, const char* p, int base)
|
||||
{
|
||||
unsigned long v;
|
||||
|
||||
if (base == 0)
|
||||
p = determine_base(p, base);
|
||||
if (_atob(&v, p, base)) {
|
||||
*vp = v;
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define ISSPACE " \t\n\r\f\v"
|
||||
|
||||
int vsscanf(const char* buf, const char* s, va_list ap)
|
||||
{
|
||||
int base = 10;
|
||||
char* t;
|
||||
char tmp[BUFSIZ];
|
||||
bool noassign = false;
|
||||
int count = 0;
|
||||
int width = 0;
|
||||
|
||||
// FIXME: This doesn't work quite right. For example, it fails to match 'SSH-2.0-OpenSSH_8.2p1 Ubuntu-4ubuntu0.1\r\n'
|
||||
// with 'SSH-%d.%d-%[^\n]\n'
|
||||
|
||||
while (*s && *buf) {
|
||||
while (isspace(*s))
|
||||
s++;
|
||||
if (*s == '%') {
|
||||
s++;
|
||||
for (; *s; s++) {
|
||||
if (strchr("dibouxcsefg%", *s))
|
||||
break;
|
||||
if (*s == '*')
|
||||
noassign = true;
|
||||
else if (*s >= '1' && *s <= '9') {
|
||||
const char* tc;
|
||||
for (tc = s; isdigit(*s); s++)
|
||||
;
|
||||
ASSERT((ssize_t)sizeof(tmp) >= s - tc + 1);
|
||||
memcpy(tmp, tc, s - tc);
|
||||
tmp[s - tc] = '\0';
|
||||
atob((uint32_t*)&width, tmp, 10);
|
||||
s--;
|
||||
}
|
||||
}
|
||||
if (*s == 's') {
|
||||
while (isspace(*buf))
|
||||
buf++;
|
||||
if (!width)
|
||||
width = strcspn(buf, ISSPACE);
|
||||
if (!noassign) {
|
||||
// In this case, we have no way to ensure the user buffer is not overflown :(
|
||||
memcpy(t = va_arg(ap, char*), buf, width);
|
||||
t[width] = '\0';
|
||||
}
|
||||
buf += width;
|
||||
} else if (*s == 'c') {
|
||||
if (!width)
|
||||
width = 1;
|
||||
if (!noassign) {
|
||||
memcpy(t = va_arg(ap, char*), buf, width);
|
||||
// No null terminator!
|
||||
}
|
||||
buf += width;
|
||||
} else if (strchr("dobxu", *s)) {
|
||||
while (isspace(*buf))
|
||||
buf++;
|
||||
if (*s == 'd' || *s == 'u')
|
||||
base = 10;
|
||||
else if (*s == 'x')
|
||||
base = 16;
|
||||
else if (*s == 'o')
|
||||
base = 8;
|
||||
else if (*s == 'b')
|
||||
base = 2;
|
||||
if (!width) {
|
||||
if (isspace(*(s + 1)) || *(s + 1) == 0) {
|
||||
width = strcspn(buf, ISSPACE);
|
||||
} else {
|
||||
auto* p = strchr(buf, *(s + 1));
|
||||
if (p)
|
||||
width = p - buf;
|
||||
else {
|
||||
noassign = true;
|
||||
width = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
memcpy(tmp, buf, width);
|
||||
tmp[width] = '\0';
|
||||
buf += width;
|
||||
if (!noassign) {
|
||||
if (!atob(va_arg(ap, uint32_t*), tmp, base))
|
||||
noassign = true;
|
||||
}
|
||||
}
|
||||
if (!noassign)
|
||||
++count;
|
||||
width = 0;
|
||||
noassign = false;
|
||||
++s;
|
||||
} else {
|
||||
while (isspace(*buf))
|
||||
buf++;
|
||||
if (*s != *buf)
|
||||
break;
|
||||
else {
|
||||
++s;
|
||||
++buf;
|
||||
}
|
||||
template<typename T, ReadKind kind>
|
||||
struct read_element {
|
||||
bool operator()(LengthModifier length_modifier, GenericLexer& input_lexer, va_list* ap)
|
||||
{
|
||||
switch (length_modifier) {
|
||||
default:
|
||||
case None:
|
||||
ASSERT_NOT_REACHED();
|
||||
case Default:
|
||||
return read_element_concrete<T, T, kind> {}(input_lexer, ap);
|
||||
case Char:
|
||||
return read_element_concrete<T, char, kind> {}(input_lexer, ap);
|
||||
case Short:
|
||||
return read_element_concrete<T, short, kind> {}(input_lexer, ap);
|
||||
case Long:
|
||||
if constexpr (IsSame<T, int>::value)
|
||||
return read_element_concrete<T, long, kind> {}(input_lexer, ap);
|
||||
if constexpr (IsSame<T, float>::value)
|
||||
return read_element_concrete<T, double, kind> {}(input_lexer, ap);
|
||||
return false;
|
||||
case LongLong:
|
||||
if constexpr (IsSame<T, int>::value)
|
||||
return read_element_concrete<T, long long, kind> {}(input_lexer, ap);
|
||||
if constexpr (IsSame<T, float>::value)
|
||||
return read_element_concrete<T, double, kind> {}(input_lexer, ap);
|
||||
return false;
|
||||
case IntMax:
|
||||
return read_element_concrete<T, intmax_t, kind> {}(input_lexer, ap);
|
||||
case Size:
|
||||
return read_element_concrete<T, size_t, kind> {}(input_lexer, ap);
|
||||
case PtrDiff:
|
||||
return read_element_concrete<T, ptrdiff_t, kind> {}(input_lexer, ap);
|
||||
case LongDouble:
|
||||
return read_element_concrete<T, long double, kind> {}(input_lexer, ap);
|
||||
}
|
||||
}
|
||||
return count;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct read_element<char*, ReadKind::Normal> {
|
||||
read_element(StringView scan_set = {}, bool invert = false)
|
||||
: scan_set(scan_set.is_null() ? " \t\n\f\r" : scan_set)
|
||||
, invert(scan_set.is_null() ? true : invert)
|
||||
, was_null(scan_set.is_null())
|
||||
{
|
||||
}
|
||||
|
||||
bool operator()(LengthModifier length_modifier, GenericLexer& input_lexer, va_list* ap)
|
||||
{
|
||||
// FIXME: Implement wide strings and such.
|
||||
if (length_modifier != LengthModifier::Default)
|
||||
return false;
|
||||
|
||||
if (was_null)
|
||||
input_lexer.ignore_while(isspace);
|
||||
|
||||
auto* ptr = va_arg(*ap, char*);
|
||||
auto str = input_lexer.consume_while([this](auto c) { return this->matches(c); });
|
||||
if (str.is_empty())
|
||||
return false;
|
||||
|
||||
memcpy(ptr, str.characters_without_null_termination(), str.length());
|
||||
ptr[str.length()] = 0;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
bool matches(char c) const
|
||||
{
|
||||
return invert ^ scan_set.contains(c);
|
||||
}
|
||||
|
||||
const StringView scan_set;
|
||||
bool invert { false };
|
||||
bool was_null { false };
|
||||
};
|
||||
|
||||
template<>
|
||||
struct read_element<void*, ReadKind::Normal> {
|
||||
bool operator()(LengthModifier length_modifier, GenericLexer& input_lexer, va_list* ap)
|
||||
{
|
||||
if (length_modifier != LengthModifier::Default)
|
||||
return false;
|
||||
|
||||
input_lexer.ignore_while(isspace);
|
||||
|
||||
auto* ptr = va_arg(*ap, void**);
|
||||
auto str = input_lexer.consume_while([this](auto c) { return this->should_consume(c); });
|
||||
|
||||
if (count != 8) {
|
||||
fail:;
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
input_lexer.retreat();
|
||||
return false;
|
||||
}
|
||||
|
||||
char buf[9] { 0 };
|
||||
memcpy(buf, str.characters_without_null_termination(), 8);
|
||||
buf[8] = 0;
|
||||
char* endptr = nullptr;
|
||||
auto value = strtoull(buf, &endptr, 16);
|
||||
|
||||
if (endptr != &buf[8])
|
||||
goto fail;
|
||||
|
||||
memcpy(ptr, &value, sizeof(value));
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
bool should_consume(char c)
|
||||
{
|
||||
if (count == 8)
|
||||
return false;
|
||||
if (!isxdigit(c))
|
||||
return false;
|
||||
|
||||
++count;
|
||||
return true;
|
||||
}
|
||||
size_t count { 0 };
|
||||
};
|
||||
|
||||
extern "C" int vsscanf(const char* input, const char* format, va_list ap)
|
||||
{
|
||||
GenericLexer format_lexer { format };
|
||||
GenericLexer input_lexer { input };
|
||||
|
||||
int elements_matched = 0;
|
||||
|
||||
while (!format_lexer.is_eof()) {
|
||||
format_lexer.ignore_while(isspace);
|
||||
if (!format_lexer.next_is('%')) {
|
||||
read_one_literal:;
|
||||
input_lexer.ignore_while(isspace);
|
||||
if (format_lexer.is_eof())
|
||||
break;
|
||||
|
||||
auto next_char = format_lexer.consume();
|
||||
if (!input_lexer.consume_specific(next_char))
|
||||
return elements_matched;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (format_lexer.next_is("%%")) {
|
||||
format_lexer.ignore();
|
||||
goto read_one_literal;
|
||||
}
|
||||
|
||||
format_lexer.ignore(); // '%'
|
||||
|
||||
bool invert_scanlist = false;
|
||||
StringView scanlist;
|
||||
LengthModifier length_modifier { None };
|
||||
ConversionSpecifier conversion_specifier { Unspecified };
|
||||
reread_lookahead:;
|
||||
auto format_lookahead = format_lexer.peek();
|
||||
if (length_modifier == None) {
|
||||
switch (format_lookahead) {
|
||||
case 'h':
|
||||
if (format_lexer.peek(1) == 'h') {
|
||||
format_lexer.consume(2);
|
||||
length_modifier = Char;
|
||||
} else {
|
||||
format_lexer.consume(1);
|
||||
length_modifier = Short;
|
||||
}
|
||||
break;
|
||||
case 'l':
|
||||
if (format_lexer.peek(1) == 'l') {
|
||||
format_lexer.consume(2);
|
||||
length_modifier = LongLong;
|
||||
} else {
|
||||
format_lexer.consume(1);
|
||||
length_modifier = Long;
|
||||
}
|
||||
break;
|
||||
case 'j':
|
||||
format_lexer.consume();
|
||||
length_modifier = IntMax;
|
||||
break;
|
||||
case 'z':
|
||||
format_lexer.consume();
|
||||
length_modifier = Size;
|
||||
break;
|
||||
case 't':
|
||||
format_lexer.consume();
|
||||
length_modifier = PtrDiff;
|
||||
break;
|
||||
case 'L':
|
||||
format_lexer.consume();
|
||||
length_modifier = LongDouble;
|
||||
break;
|
||||
default:
|
||||
length_modifier = Default;
|
||||
break;
|
||||
}
|
||||
goto reread_lookahead;
|
||||
}
|
||||
if (conversion_specifier == Unspecified) {
|
||||
switch (format_lookahead) {
|
||||
case 'd':
|
||||
format_lexer.consume();
|
||||
conversion_specifier = Decimal;
|
||||
break;
|
||||
case 'i':
|
||||
format_lexer.consume();
|
||||
conversion_specifier = Integer;
|
||||
break;
|
||||
case 'o':
|
||||
format_lexer.consume();
|
||||
conversion_specifier = Octal;
|
||||
break;
|
||||
case 'u':
|
||||
format_lexer.consume();
|
||||
conversion_specifier = Unsigned;
|
||||
break;
|
||||
case 'x':
|
||||
format_lexer.consume();
|
||||
conversion_specifier = Hex;
|
||||
break;
|
||||
case 'a':
|
||||
case 'e':
|
||||
case 'f':
|
||||
case 'g':
|
||||
format_lexer.consume();
|
||||
conversion_specifier = Floating;
|
||||
break;
|
||||
case 's':
|
||||
format_lexer.consume();
|
||||
conversion_specifier = String;
|
||||
break;
|
||||
case '[':
|
||||
format_lexer.consume();
|
||||
scanlist = format_lexer.consume_until(']');
|
||||
if (scanlist.starts_with('^')) {
|
||||
scanlist = scanlist.substring_view(1);
|
||||
invert_scanlist = true;
|
||||
}
|
||||
conversion_specifier = UseScanList;
|
||||
break;
|
||||
case 'c':
|
||||
format_lexer.consume();
|
||||
conversion_specifier = Character;
|
||||
break;
|
||||
case 'p':
|
||||
format_lexer.consume();
|
||||
conversion_specifier = Pointer;
|
||||
break;
|
||||
case 'n':
|
||||
format_lexer.consume();
|
||||
conversion_specifier = OutputNumberOfBytes;
|
||||
break;
|
||||
case 'C':
|
||||
format_lexer.consume();
|
||||
length_modifier = Long;
|
||||
conversion_specifier = Character;
|
||||
break;
|
||||
case 'S':
|
||||
format_lexer.consume();
|
||||
length_modifier = Long;
|
||||
conversion_specifier = String;
|
||||
break;
|
||||
default:
|
||||
format_lexer.consume();
|
||||
conversion_specifier = Invalid;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Now try to read.
|
||||
switch (conversion_specifier) {
|
||||
case Invalid:
|
||||
case Unspecified:
|
||||
default:
|
||||
// "undefined behaviour", let's be nice and crash.
|
||||
dbgln("Invalid conversion specifier {} in scanf!", (int)conversion_specifier);
|
||||
ASSERT_NOT_REACHED();
|
||||
case Decimal:
|
||||
if (!read_element<int, ReadKind::Normal> {}(length_modifier, input_lexer, &ap))
|
||||
format_lexer.consume_all();
|
||||
else
|
||||
++elements_matched;
|
||||
break;
|
||||
case Integer:
|
||||
if (!read_element<int, ReadKind::Infer> {}(length_modifier, input_lexer, &ap))
|
||||
format_lexer.consume_all();
|
||||
else
|
||||
++elements_matched;
|
||||
break;
|
||||
case Octal:
|
||||
if (!read_element<unsigned, ReadKind::Octal> {}(length_modifier, input_lexer, &ap))
|
||||
format_lexer.consume_all();
|
||||
else
|
||||
++elements_matched;
|
||||
break;
|
||||
case Unsigned:
|
||||
if (!read_element<unsigned, ReadKind::Normal> {}(length_modifier, input_lexer, &ap))
|
||||
format_lexer.consume_all();
|
||||
else
|
||||
++elements_matched;
|
||||
break;
|
||||
case Hex:
|
||||
if (!read_element<unsigned, ReadKind::Hex> {}(length_modifier, input_lexer, &ap))
|
||||
format_lexer.consume_all();
|
||||
else
|
||||
++elements_matched;
|
||||
break;
|
||||
case Floating:
|
||||
if (!read_element<float, ReadKind::Normal> {}(length_modifier, input_lexer, &ap))
|
||||
format_lexer.consume_all();
|
||||
else
|
||||
++elements_matched;
|
||||
break;
|
||||
case String:
|
||||
if (!read_element<char*, ReadKind::Normal> {}(length_modifier, input_lexer, &ap))
|
||||
format_lexer.consume_all();
|
||||
else
|
||||
++elements_matched;
|
||||
break;
|
||||
case UseScanList:
|
||||
if (!read_element<char*, ReadKind::Normal> { scanlist, invert_scanlist }(length_modifier, input_lexer, &ap))
|
||||
format_lexer.consume_all();
|
||||
else
|
||||
++elements_matched;
|
||||
break;
|
||||
case Character:
|
||||
if (!read_element<char, ReadKind::Normal> {}(length_modifier, input_lexer, &ap))
|
||||
format_lexer.consume_all();
|
||||
else
|
||||
++elements_matched;
|
||||
break;
|
||||
case Pointer:
|
||||
if (!read_element<void*, ReadKind::Normal> {}(length_modifier, input_lexer, &ap))
|
||||
format_lexer.consume_all();
|
||||
else
|
||||
++elements_matched;
|
||||
break;
|
||||
case OutputNumberOfBytes: {
|
||||
auto* ptr = va_arg(ap, int*);
|
||||
*ptr = input_lexer.tell();
|
||||
++elements_matched;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return elements_matched;
|
||||
}
|
||||
|
|
253
Userland/Tests/LibC/scanf.cpp
Normal file
253
Userland/Tests/LibC/scanf.cpp
Normal file
|
@ -0,0 +1,253 @@
|
|||
/*
|
||||
* Copyright (c) 2021, the SerenityOS developers.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <AK/Array.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
typedef long double longdouble;
|
||||
typedef long long longlong;
|
||||
typedef char charstar[32];
|
||||
|
||||
template<typename T>
|
||||
constexpr static Array<unsigned char, 32> to_value_t(T x)
|
||||
{
|
||||
// The endianness doesn't really matter, since we're going to convert both sides with this anyway.
|
||||
union Value {
|
||||
u8 v[32];
|
||||
T t;
|
||||
};
|
||||
|
||||
auto value = Value { .t = x };
|
||||
|
||||
return {
|
||||
value.v[0],
|
||||
value.v[1],
|
||||
value.v[2],
|
||||
value.v[3],
|
||||
value.v[4],
|
||||
value.v[5],
|
||||
value.v[6],
|
||||
value.v[7],
|
||||
value.v[8],
|
||||
value.v[9],
|
||||
value.v[10],
|
||||
value.v[11],
|
||||
value.v[12],
|
||||
value.v[13],
|
||||
value.v[14],
|
||||
value.v[15],
|
||||
value.v[16],
|
||||
value.v[17],
|
||||
value.v[18],
|
||||
value.v[19],
|
||||
value.v[20],
|
||||
value.v[21],
|
||||
value.v[22],
|
||||
value.v[23],
|
||||
value.v[24],
|
||||
value.v[25],
|
||||
value.v[26],
|
||||
value.v[27],
|
||||
value.v[28],
|
||||
value.v[29],
|
||||
value.v[30],
|
||||
value.v[31],
|
||||
};
|
||||
}
|
||||
|
||||
template<size_t N>
|
||||
constexpr static Array<unsigned char, 32> str_to_value_t(const char (&x)[N])
|
||||
{
|
||||
Array<unsigned char, 32> value { 0 };
|
||||
for (size_t i = 0; i < N; ++i)
|
||||
value[i] = x[i];
|
||||
return value;
|
||||
}
|
||||
|
||||
struct Argument {
|
||||
size_t size;
|
||||
void* data;
|
||||
};
|
||||
|
||||
static Array<u8, 32> arg_to_value_t(const Argument& arg)
|
||||
{
|
||||
if (arg.size == 1)
|
||||
return to_value_t(*(u8*)arg.data);
|
||||
|
||||
if (arg.size == 2)
|
||||
return to_value_t(*(u16*)arg.data);
|
||||
|
||||
if (arg.size == 4)
|
||||
return to_value_t(*(u32*)arg.data);
|
||||
|
||||
if (arg.size == 8)
|
||||
return to_value_t(*(u64*)arg.data);
|
||||
|
||||
if (arg.size == 16) {
|
||||
auto& data = *(charstar*)arg.data;
|
||||
Array<unsigned char, 32> value { 0 };
|
||||
for (size_t i = 0; i < 16; ++i)
|
||||
value[i] = data[i];
|
||||
return value;
|
||||
}
|
||||
|
||||
if (arg.size == 32) {
|
||||
auto& data = *(charstar*)arg.data;
|
||||
auto length = strlen(data);
|
||||
Array<unsigned char, 32> value { 0 };
|
||||
for (size_t i = 0; i < length; ++i)
|
||||
value[i] = data[i];
|
||||
return value;
|
||||
}
|
||||
|
||||
ASSERT_NOT_REACHED();
|
||||
}
|
||||
|
||||
#define DECL_WITH_TYPE(ty) \
|
||||
ty _##ty##arg0; \
|
||||
ty _##ty##arg1; \
|
||||
ty _##ty##arg2; \
|
||||
Argument ty##arg0 { sizeof(ty), &_##ty##arg0 }; \
|
||||
Argument ty##arg1 { sizeof(ty), &_##ty##arg1 }; \
|
||||
Argument ty##arg2 { sizeof(ty), &_##ty##arg2 };
|
||||
|
||||
DECL_WITH_TYPE(int);
|
||||
DECL_WITH_TYPE(unsigned);
|
||||
DECL_WITH_TYPE(long);
|
||||
DECL_WITH_TYPE(longlong);
|
||||
DECL_WITH_TYPE(float);
|
||||
DECL_WITH_TYPE(double);
|
||||
DECL_WITH_TYPE(longdouble);
|
||||
|
||||
#undef DECL_WITH_TYPE
|
||||
|
||||
charstar _charstararg0;
|
||||
charstar _charstararg1;
|
||||
charstar _charstararg2;
|
||||
Argument charstararg0 { sizeof(charstar), &_charstararg0[0] };
|
||||
Argument charstararg1 { sizeof(charstar), &_charstararg1[0] };
|
||||
Argument charstararg2 { sizeof(charstar), &_charstararg2[0] };
|
||||
|
||||
struct TestSuite {
|
||||
const char* format;
|
||||
const char* input;
|
||||
int expected_output;
|
||||
size_t argument_count;
|
||||
Argument arguments[8];
|
||||
Array<unsigned char, 32> expected_values[8]; // 32 bytes for each argument's value.
|
||||
};
|
||||
|
||||
const TestSuite test_suites[] {
|
||||
{ "%d", "", 0, 0, {}, {} },
|
||||
{ "%x", "0x519", 1, 1, { unsignedarg0 }, { to_value_t(0x519) } },
|
||||
{ "%x", "0x51g", 1, 1, { unsignedarg0 }, { to_value_t(0x51u) } },
|
||||
{ "\"%%%d#", "\"%42#", 1, 1, { intarg0 }, { to_value_t(42) } },
|
||||
{ " %d", "42", 1, 1, { intarg0 }, { to_value_t(42) } },
|
||||
{ "%d", " 42", 1, 1, { intarg0 }, { to_value_t(42) } },
|
||||
{ "%ld", "42", 1, 1, { longarg0 }, { to_value_t(42l) } },
|
||||
{ "%lld", "42", 1, 1, { longlongarg0 }, { to_value_t(42ll) } },
|
||||
{ "%f", "42", 1, 1, { floatarg0 }, { to_value_t(42.0f) } },
|
||||
{ "%lf", "42", 1, 1, { doublearg0 }, { to_value_t(42.0) } },
|
||||
{ "%s", "42", 1, 1, { charstararg0 }, { str_to_value_t("42") } },
|
||||
{ "%d%s", "42yoinks", 2, 2, { intarg0, charstararg0 }, { to_value_t(42), str_to_value_t("yoinks") } },
|
||||
{ "%[^\n]", "aaaa\n", 1, 1, { charstararg0 }, { str_to_value_t("aaaa") } },
|
||||
{ "%u.%u.%u", "3.19", 2, 3, { unsignedarg0, unsignedarg1, unsignedarg2 }, { to_value_t(3u), to_value_t(19u) } },
|
||||
// Failing test case from previous impl:
|
||||
{ "SSH-%d.%d-%[^\n]\n", "SSH-2.0-OpenSSH_8.2p1 Ubuntu-4ubuntu0.1\n", 3, 3, { intarg0, intarg1, charstararg0 }, { to_value_t(2), to_value_t(0), str_to_value_t("OpenSSH_8.2p1 Ubuntu-4ubuntu0.1") } },
|
||||
};
|
||||
|
||||
bool g_any_failed = false;
|
||||
|
||||
static bool check_value_conformance(const TestSuite& test)
|
||||
{
|
||||
bool fail = false;
|
||||
for (int i = 0; i < test.expected_output; ++i) {
|
||||
auto& arg = test.arguments[i];
|
||||
auto arg_value = arg_to_value_t(arg);
|
||||
auto& value = test.expected_values[i];
|
||||
if (arg_value != value) {
|
||||
auto arg_ptr = (const u32*)arg_value.data();
|
||||
auto value_ptr = (const u32*)value.data();
|
||||
printf(" value %d FAIL, expected %04x%04x%04x%04x%04x%04x%04x%04x but got %04x%04x%04x%04x%04x%04x%04x%04x\n",
|
||||
i,
|
||||
value_ptr[0], value_ptr[1], value_ptr[2], value_ptr[3],
|
||||
value_ptr[4], value_ptr[5], value_ptr[6], value_ptr[7],
|
||||
arg_ptr[0], arg_ptr[1], arg_ptr[2], arg_ptr[3],
|
||||
arg_ptr[4], arg_ptr[5], arg_ptr[6], arg_ptr[7]);
|
||||
fail = true;
|
||||
} else {
|
||||
printf(" value %d PASS\n", i);
|
||||
}
|
||||
}
|
||||
|
||||
return !fail;
|
||||
}
|
||||
|
||||
static void do_one_test(const TestSuite& test)
|
||||
{
|
||||
printf("Testing '%s' against '%s'...\n", test.input, test.format);
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wformat-nonliteral"
|
||||
auto rc = sscanf(test.input, test.format,
|
||||
test.arguments[0].data, test.arguments[1].data, test.arguments[2].data, test.arguments[3].data,
|
||||
test.arguments[4].data, test.arguments[5].data, test.arguments[6].data, test.arguments[7].data);
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
bool overall = true;
|
||||
printf(" output value...\n");
|
||||
if (rc != test.expected_output) {
|
||||
printf(" output value FAIL, expected %d but got %d\n", test.expected_output, rc);
|
||||
overall = false;
|
||||
} else {
|
||||
printf(" output value PASS\n");
|
||||
}
|
||||
|
||||
printf(" read values...\n");
|
||||
if (check_value_conformance(test)) {
|
||||
printf(" read values PASS\n");
|
||||
} else {
|
||||
printf(" read values FAIL\n");
|
||||
overall = false;
|
||||
}
|
||||
|
||||
if (overall)
|
||||
printf(" overall PASS\n");
|
||||
else
|
||||
printf(" overall FAIL\n");
|
||||
|
||||
g_any_failed = g_any_failed || !overall;
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
for (auto& test : test_suites)
|
||||
do_one_test(test);
|
||||
|
||||
return g_any_failed ? 1 : 0;
|
||||
}
|
Loading…
Reference in a new issue