mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-25 00:50:22 +00:00
AK: Borrow exact format syntax form std::format.
Instead of just implementing format specifiers ad-hog this commit implements the exact syntax std::format uses. There are still a ton of features that are not supported by this implementation, however, the format specifiers should be parsed correctly. In some cases however, the format specifiers aren't quite parsed correctly, for example: String::formatted("{:{}}", 42, 4) should produce the string " 42" however an (unrelated) assertion fails. This is because vformat doesn't consider nested parentheses. I have to spend some time coming up with a simple way of doing this, I don't feel like doing that right now. The fundamental code for this already exists, by limiting the number of format arguments (arbitrarily) to 256 large widths are used to encode that these should be taken from other format parameters.
This commit is contained in:
parent
84d276dba0
commit
2030084746
Notes:
sideshowbarker
2024-07-19 02:13:28 +09:00
Author: https://github.com/asynts Commit: https://github.com/SerenityOS/serenity/commit/20300847460 Pull-request: https://github.com/SerenityOS/serenity/pull/3597 Reviewed-by: https://github.com/benit8
2 changed files with 203 additions and 60 deletions
189
AK/Format.cpp
189
AK/Format.cpp
|
@ -73,28 +73,32 @@ static void write_escaped_literal(StringBuilder& builder, StringView literal)
|
|||
}
|
||||
}
|
||||
|
||||
static size_t parse_number(StringView input)
|
||||
static bool parse_number(GenericLexer& lexer, size_t& value)
|
||||
{
|
||||
size_t value = 0;
|
||||
value = 0;
|
||||
|
||||
for (char ch : input) {
|
||||
value *= 10;
|
||||
value += ch - '0';
|
||||
bool consumed_at_least_one = false;
|
||||
while (!lexer.is_eof()) {
|
||||
if (lexer.next_is(is_digit)) {
|
||||
value *= 10;
|
||||
value += lexer.consume() - '0';
|
||||
consumed_at_least_one = true;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return value;
|
||||
return consumed_at_least_one;
|
||||
}
|
||||
|
||||
constexpr size_t use_next_index = NumericLimits<size_t>::max();
|
||||
|
||||
static bool parse_format_specifier(StringView input, FormatSpecifier& specifier)
|
||||
{
|
||||
specifier.index = NumericLimits<size_t>::max();
|
||||
|
||||
GenericLexer lexer { input };
|
||||
|
||||
auto index = lexer.consume_while([](char ch) { return StringView { "0123456789" }.contains(ch); });
|
||||
|
||||
if (index.length() > 0)
|
||||
specifier.index = parse_number(index);
|
||||
if (!parse_number(lexer, specifier.index))
|
||||
specifier.index = use_next_index;
|
||||
|
||||
if (!lexer.consume_specific(':'))
|
||||
return lexer.is_eof();
|
||||
|
@ -103,6 +107,20 @@ static bool parse_format_specifier(StringView input, FormatSpecifier& specifier)
|
|||
return true;
|
||||
}
|
||||
|
||||
static bool parse_nested_replacement_field(GenericLexer& lexer, size_t& index)
|
||||
{
|
||||
if (!lexer.consume_specific('{'))
|
||||
return false;
|
||||
|
||||
if (!parse_number(lexer, index))
|
||||
index = use_next_index;
|
||||
|
||||
if (!lexer.consume_specific('}'))
|
||||
ASSERT_NOT_REACHED();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
namespace AK {
|
||||
|
@ -137,8 +155,7 @@ void vformat(StringBuilder& builder, StringView fmtstr, AK::Span<const TypeErase
|
|||
ASSERT_NOT_REACHED();
|
||||
|
||||
auto& parameter = parameters[specifier.index];
|
||||
if (!parameter.formatter(builder, parameter.value, specifier.flags))
|
||||
ASSERT_NOT_REACHED();
|
||||
parameter.formatter(builder, parameter.value, specifier.flags, parameters);
|
||||
|
||||
vformat(builder, fmtstr.substring_view(closing + 1), parameters, argument_index);
|
||||
}
|
||||
|
@ -149,43 +166,133 @@ void vformat(const LogStream& stream, StringView fmtstr, Span<const TypeErasedPa
|
|||
stream << builder.to_string();
|
||||
}
|
||||
|
||||
bool Formatter<StringView>::parse(StringView flags)
|
||||
void StandardFormatter::parse(StringView specifier)
|
||||
{
|
||||
return flags.is_empty();
|
||||
GenericLexer lexer { specifier };
|
||||
|
||||
if (StringView { "<^>" }.contains(lexer.peek(1))) {
|
||||
ASSERT(!lexer.next_is(is_any_of("{}")));
|
||||
m_fill = lexer.consume();
|
||||
}
|
||||
|
||||
if (lexer.consume_specific('<'))
|
||||
m_align = Align::Left;
|
||||
else if (lexer.consume_specific('^'))
|
||||
m_align = Align::Center;
|
||||
else if (lexer.consume_specific('>'))
|
||||
m_align = Align::Right;
|
||||
|
||||
if (lexer.consume_specific('-'))
|
||||
m_sign = Sign::NegativeOnly;
|
||||
else if (lexer.consume_specific('+'))
|
||||
m_sign = Sign::PositiveAndNegative;
|
||||
else if (lexer.consume_specific(' '))
|
||||
m_sign = Sign::ReserveSpace;
|
||||
|
||||
if (lexer.consume_specific('#'))
|
||||
m_alternative_form = true;
|
||||
|
||||
if (lexer.consume_specific('0'))
|
||||
m_zero_pad = true;
|
||||
|
||||
if (size_t index = 0; parse_nested_replacement_field(lexer, index))
|
||||
m_width = value_from_arg + index;
|
||||
else if (size_t width = 0; parse_number(lexer, width))
|
||||
m_width = width;
|
||||
|
||||
if (lexer.consume_specific('.')) {
|
||||
if (size_t index = 0; parse_nested_replacement_field(lexer, index))
|
||||
m_precision = value_from_arg + index;
|
||||
else if (size_t precision = 0; parse_number(lexer, precision))
|
||||
m_precision = precision;
|
||||
}
|
||||
|
||||
if (lexer.consume_specific('b'))
|
||||
m_mode = Mode::Binary;
|
||||
else if (lexer.consume_specific('d'))
|
||||
m_mode = Mode::Decimal;
|
||||
else if (lexer.consume_specific('o'))
|
||||
m_mode = Mode::Octal;
|
||||
else if (lexer.consume_specific('x'))
|
||||
m_mode = Mode::Hexadecimal;
|
||||
else if (lexer.consume_specific('c'))
|
||||
m_mode = Mode::Character;
|
||||
else if (lexer.consume_specific('s'))
|
||||
m_mode = Mode::String;
|
||||
else if (lexer.consume_specific('p'))
|
||||
m_mode = Mode::Pointer;
|
||||
|
||||
if (!lexer.is_eof())
|
||||
dbg() << __PRETTY_FUNCTION__ << " did not consume '" << lexer.remaining() << "'";
|
||||
|
||||
ASSERT(lexer.is_eof());
|
||||
}
|
||||
void Formatter<StringView>::format(StringBuilder& builder, StringView value)
|
||||
|
||||
void Formatter<StringView>::format(StringBuilder& builder, StringView value, Span<const TypeErasedParameter>)
|
||||
{
|
||||
if (m_align != Align::Default)
|
||||
TODO();
|
||||
if (m_sign != Sign::Default)
|
||||
ASSERT_NOT_REACHED();
|
||||
if (m_alternative_form)
|
||||
ASSERT_NOT_REACHED();
|
||||
if (m_zero_pad)
|
||||
ASSERT_NOT_REACHED();
|
||||
if (m_width != value_not_set)
|
||||
TODO();
|
||||
if (m_precision != value_not_set)
|
||||
TODO();
|
||||
if (m_mode != Mode::Default && m_mode != Mode::String)
|
||||
ASSERT_NOT_REACHED();
|
||||
|
||||
builder.append(value);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
bool Formatter<T, typename EnableIf<IsIntegral<T>::value>::Type>::parse(StringView flags)
|
||||
void Formatter<T, typename EnableIf<IsIntegral<T>::value>::Type>::format(StringBuilder& builder, T value, Span<const TypeErasedParameter> parameters)
|
||||
{
|
||||
GenericLexer lexer { flags };
|
||||
if (m_align != Align::Default)
|
||||
TODO();
|
||||
if (m_sign != Sign::Default)
|
||||
TODO();
|
||||
if (m_alternative_form)
|
||||
TODO();
|
||||
if (m_precision != value_not_set)
|
||||
ASSERT_NOT_REACHED();
|
||||
if (m_mode == Mode::Character)
|
||||
TODO();
|
||||
|
||||
if (lexer.consume_specific('0'))
|
||||
zero_pad = true;
|
||||
|
||||
auto field_width = lexer.consume_while([](char ch) { return StringView { "0123456789" }.contains(ch); });
|
||||
if (field_width.length() > 0)
|
||||
this->field_width = parse_number(field_width);
|
||||
|
||||
if (lexer.consume_specific('x'))
|
||||
hexadecimal = true;
|
||||
|
||||
return lexer.is_eof();
|
||||
}
|
||||
template<typename T>
|
||||
void Formatter<T, typename EnableIf<IsIntegral<T>::value>::Type>::format(StringBuilder& builder, T value)
|
||||
{
|
||||
char* bufptr;
|
||||
|
||||
if (hexadecimal)
|
||||
PrintfImplementation::print_hex([&](auto, char ch) { builder.append(ch); }, bufptr, value, false, false, false, zero_pad, field_width);
|
||||
else if (IsSame<typename MakeUnsigned<T>::Type, T>::value)
|
||||
PrintfImplementation::print_u64([&](auto, char ch) { builder.append(ch); }, bufptr, value, false, zero_pad, field_width);
|
||||
int base;
|
||||
if (m_mode == Mode::Binary)
|
||||
TODO();
|
||||
else if (m_mode == Mode::Octal)
|
||||
TODO();
|
||||
else if (m_mode == Mode::Decimal || m_mode == Mode::Default)
|
||||
base = 10;
|
||||
else if (m_mode == Mode::Hexadecimal)
|
||||
base = 16;
|
||||
else
|
||||
PrintfImplementation::print_i64([&](auto, char ch) { builder.append(ch); }, bufptr, value, false, zero_pad, field_width);
|
||||
ASSERT_NOT_REACHED();
|
||||
|
||||
size_t width = m_width;
|
||||
if (m_width >= value_from_arg) {
|
||||
const auto parameter = parameters.at(m_width - value_from_arg);
|
||||
|
||||
// FIXME: Totally unsave cast. We should store the type in TypeErasedParameter. For compactness it could be smart to
|
||||
// find a few addresses that can not be valid function pointers and encode the type information there?
|
||||
width = *reinterpret_cast<const size_t*>(parameter.value);
|
||||
}
|
||||
|
||||
// FIXME: We really need one canonical print implementation that just takes a base.
|
||||
(void)base;
|
||||
|
||||
char* bufptr;
|
||||
if (m_mode == Mode::Hexadecimal)
|
||||
PrintfImplementation::print_hex([&](auto, char ch) { builder.append(ch); }, bufptr, value, false, false, false, m_zero_pad, width);
|
||||
else if (IsSame<typename MakeUnsigned<T>::Type, T>::value)
|
||||
PrintfImplementation::print_u64([&](auto, char ch) { builder.append(ch); }, bufptr, value, false, m_zero_pad, width);
|
||||
else
|
||||
PrintfImplementation::print_i64([&](auto, char ch) { builder.append(ch); }, bufptr, value, false, m_zero_pad, width);
|
||||
}
|
||||
|
||||
template struct Formatter<unsigned char, void>;
|
||||
|
|
74
AK/Format.h
74
AK/Format.h
|
@ -38,35 +38,75 @@ namespace AK {
|
|||
template<typename T, typename = void>
|
||||
struct Formatter;
|
||||
|
||||
struct TypeErasedParameter {
|
||||
const void* value;
|
||||
void (*formatter)(StringBuilder& builder, const void* value, StringView specifier, Span<const TypeErasedParameter> parameters);
|
||||
};
|
||||
|
||||
} // namespace AK
|
||||
|
||||
namespace AK::Detail::Format {
|
||||
|
||||
template<typename T>
|
||||
bool format_value(StringBuilder& builder, const void* value, StringView flags)
|
||||
void format_value(StringBuilder& builder, const void* value, StringView specifier, AK::Span<const TypeErasedParameter> parameters)
|
||||
{
|
||||
Formatter<T> formatter;
|
||||
|
||||
if (!formatter.parse(flags))
|
||||
return false;
|
||||
|
||||
formatter.format(builder, *static_cast<const T*>(value));
|
||||
return true;
|
||||
formatter.parse(specifier);
|
||||
formatter.format(builder, *static_cast<const T*>(value), parameters);
|
||||
}
|
||||
|
||||
} // namespace AK::Detail::Format
|
||||
|
||||
namespace AK {
|
||||
|
||||
struct TypeErasedParameter {
|
||||
const void* value;
|
||||
bool (*formatter)(StringBuilder& builder, const void* value, StringView flags);
|
||||
constexpr size_t max_format_arguments = 256;
|
||||
|
||||
// We use the same format for most types for consistency. This is taken directly from std::format.
|
||||
// Not all valid options do anything yet.
|
||||
// https://en.cppreference.com/w/cpp/utility/format/formatter#Standard_format_specification
|
||||
struct StandardFormatter {
|
||||
enum class Align {
|
||||
Default,
|
||||
Left,
|
||||
Right,
|
||||
Center,
|
||||
};
|
||||
enum class Sign {
|
||||
NegativeOnly,
|
||||
PositiveAndNegative,
|
||||
ReserveSpace,
|
||||
Default = NegativeOnly
|
||||
};
|
||||
enum class Mode {
|
||||
Default,
|
||||
Binary,
|
||||
Decimal,
|
||||
Octal,
|
||||
Hexadecimal,
|
||||
Character,
|
||||
String,
|
||||
Pointer,
|
||||
};
|
||||
|
||||
static constexpr size_t value_not_set = 0;
|
||||
static constexpr size_t value_from_arg = NumericLimits<size_t>::max() - max_format_arguments;
|
||||
|
||||
Align m_align = Align::Default;
|
||||
Sign m_sign = Sign::NegativeOnly;
|
||||
Mode m_mode = Mode::Default;
|
||||
bool m_alternative_form = false;
|
||||
char m_fill = ' ';
|
||||
bool m_zero_pad = false;
|
||||
size_t m_width = value_not_set;
|
||||
size_t m_precision = value_not_set;
|
||||
|
||||
void parse(StringView specifier);
|
||||
};
|
||||
|
||||
template<>
|
||||
struct Formatter<StringView> {
|
||||
bool parse(StringView flags);
|
||||
void format(StringBuilder& builder, StringView value);
|
||||
struct Formatter<StringView> : StandardFormatter {
|
||||
void format(StringBuilder& builder, StringView value, Span<const TypeErasedParameter>);
|
||||
};
|
||||
template<>
|
||||
struct Formatter<const char*> : Formatter<StringView> {
|
||||
|
@ -82,18 +122,14 @@ struct Formatter<String> : Formatter<StringView> {
|
|||
};
|
||||
|
||||
template<typename T>
|
||||
struct Formatter<T, typename EnableIf<IsIntegral<T>::value>::Type> {
|
||||
bool parse(StringView flags);
|
||||
void format(StringBuilder&, T value);
|
||||
|
||||
bool zero_pad { false };
|
||||
bool hexadecimal { false };
|
||||
size_t field_width { 0 };
|
||||
struct Formatter<T, typename EnableIf<IsIntegral<T>::value>::Type> : StandardFormatter {
|
||||
void format(StringBuilder&, T value, Span<const TypeErasedParameter>);
|
||||
};
|
||||
|
||||
template<typename... Parameters>
|
||||
Array<TypeErasedParameter, sizeof...(Parameters)> make_type_erased_parameters(const Parameters&... parameters)
|
||||
{
|
||||
static_assert(sizeof...(Parameters) <= max_format_arguments);
|
||||
return { TypeErasedParameter { ¶meters, Detail::Format::format_value<Parameters> }... };
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue