AK+LibPDF: Make Format print floats in a roundtrip-safe way by default

Previously we assumed a default precision of 6, which made the printed
values quite odd in some cases.
This commit changes that default to print them with just enough
precision to produce the exact same float when roundtripped.

This commit adds some new tests that assert exact format outputs, which
have to be modified if we decide to change the default behaviour.
This commit is contained in:
Ali Mohammad Pur 2023-10-30 23:31:47 +03:30 committed by Ali Mohammad Pur
parent 26a5d84d91
commit 78c04cb8b2
Notes: sideshowbarker 2024-07-17 03:14:39 +09:00
4 changed files with 193 additions and 9 deletions

View file

@ -30,6 +30,10 @@
# include <android/log.h>
#endif
#ifndef KERNEL
# include <AK/StringFloatingPointConversions.h>
#endif
namespace AK {
class FormatParser : public GenericLexer {
@ -470,7 +474,7 @@ static ErrorOr<void> round_up_digits(StringBuilder& digits_builder)
return digits_builder.try_append(digits_buffer);
}
ErrorOr<void> FormatBuilder::put_f64(
ErrorOr<void> FormatBuilder::put_f64_with_precision(
double value,
u8 base,
bool upper_case,
@ -543,6 +547,130 @@ ErrorOr<void> FormatBuilder::put_f64(
return put_string(string_builder.string_view(), align, min_width, NumericLimits<size_t>::max(), fill);
}
template<OneOf<f32, f64> T>
ErrorOr<void> FormatBuilder::put_f32_or_f64(
T value,
u8 base,
bool upper_case,
bool zero_pad,
bool use_separator,
Align align,
size_t min_width,
Optional<size_t> precision,
char fill,
SignMode sign_mode,
RealNumberDisplayMode display_mode)
{
if (precision.has_value() || base != 10)
return put_f64_with_precision(value, base, upper_case, zero_pad, use_separator, align, min_width, precision.value_or(6), fill, sign_mode, display_mode);
// No precision specified, so pick the best precision with roundtrip guarantees.
StringBuilder builder;
// Special cases: NaN, inf, -inf, 0 and -0.
auto const is_nan = isnan(value);
auto const is_inf = isinf(value);
auto const is_zero = value == static_cast<T>(0.0) || value == static_cast<T>(-0.0);
if (is_nan || is_inf || is_zero) {
if (value < 0)
TRY(builder.try_append('-'));
else if (sign_mode == SignMode::Always)
TRY(builder.try_append('+'));
else if (sign_mode == SignMode::Reserved)
TRY(builder.try_append(' '));
if (is_nan)
TRY(builder.try_append(upper_case ? "NAN"sv : "nan"sv));
else if (is_inf)
TRY(builder.try_append(upper_case ? "INF"sv : "inf"sv));
else
TRY(builder.try_append('0'));
return put_string(builder.string_view(), align, min_width, NumericLimits<size_t>::max(), fill);
}
auto const [sign, mantissa, exponent] = convert_floating_point_to_decimal_exponential_form(value);
auto convert_to_decimal_digits_array = [](auto x, auto& digits) -> size_t {
size_t length = 0;
for (; x; x /= 10)
digits[length++] = x % 10 | '0';
for (size_t i = 0; 2 * i + 1 < length; ++i)
swap(digits[i], digits[length - i - 1]);
return length;
};
Array<u8, 20> mantissa_digits;
auto mantissa_length = convert_to_decimal_digits_array(mantissa, mantissa_digits);
if (sign)
TRY(builder.try_append('-'));
else if (sign_mode == SignMode::Always)
TRY(builder.try_append('+'));
else if (sign_mode == SignMode::Reserved)
TRY(builder.try_append(' '));
auto const n = exponent + static_cast<i32>(mantissa_length);
auto const mantissa_text = StringView { mantissa_digits.span().slice(0, mantissa_length) };
size_t integral_part_end = 0;
// NOTE: Range from ECMA262, seems like an okay default.
if (n >= -5 && n <= 21) {
if (exponent >= 0) {
TRY(builder.try_append(mantissa_text));
TRY(builder.try_append_repeated('0', exponent));
integral_part_end = builder.length();
} else if (n > 0) {
TRY(builder.try_append(mantissa_text.substring_view(0, n)));
integral_part_end = builder.length();
TRY(builder.try_append('.'));
TRY(builder.try_append(mantissa_text.substring_view(n)));
} else {
TRY(builder.try_append("0."sv));
TRY(builder.try_append_repeated('0', -n));
TRY(builder.try_append(mantissa_text));
integral_part_end = 1;
}
} else {
auto const exponent_sign = n < 0 ? '-' : '+';
Array<u8, 5> exponent_digits;
auto const exponent_length = convert_to_decimal_digits_array(abs(n - 1), exponent_digits);
auto const exponent_text = StringView { exponent_digits.span().slice(0, exponent_length) };
integral_part_end = 1;
if (mantissa_length == 1) {
// <mantissa>e<exponent>
TRY(builder.try_append(mantissa_text));
TRY(builder.try_append('e'));
TRY(builder.try_append(exponent_sign));
TRY(builder.try_append(exponent_text));
} else {
// <mantissa>.<mantissa[1..]>e<exponent>
TRY(builder.try_append(mantissa_text.substring_view(0, 1)));
TRY(builder.try_append('.'));
TRY(builder.try_append(mantissa_text.substring_view(1)));
TRY(builder.try_append('e'));
TRY(builder.try_append(exponent_sign));
TRY(builder.try_append(exponent_text));
}
}
if (use_separator && integral_part_end > 3) {
// Go backwards from the end of the integral part, inserting commas every 3 consecutive digits.
StringBuilder separated_builder;
auto const string_view = builder.string_view();
for (size_t i = 0; i < integral_part_end; ++i) {
auto const index_from_end = integral_part_end - i - 1;
if (index_from_end > 0 && index_from_end != integral_part_end - 1 && index_from_end % 3 == 2)
TRY(separated_builder.try_append(','));
TRY(separated_builder.try_append(string_view[i]));
}
TRY(separated_builder.try_append(string_view.substring_view(integral_part_end)));
builder = move(separated_builder);
}
return put_string(builder.string_view(), align, min_width, NumericLimits<size_t>::max(), fill);
}
ErrorOr<void> FormatBuilder::put_f80(
long double value,
u8 base,
@ -909,15 +1037,33 @@ ErrorOr<void> Formatter<double>::format(FormatBuilder& builder, double value)
}
m_width = m_width.value_or(0);
m_precision = m_precision.value_or(6);
return builder.put_f64(value, base, upper_case, m_zero_pad, m_use_separator, m_align, m_width.value(), m_precision.value(), m_fill, m_sign_mode, real_number_display_mode);
return builder.put_f32_or_f64(value, base, upper_case, m_zero_pad, m_use_separator, m_align, m_width.value(), m_precision, m_fill, m_sign_mode, real_number_display_mode);
}
ErrorOr<void> Formatter<float>::format(FormatBuilder& builder, float value)
{
Formatter<double> formatter { *this };
return formatter.format(builder, value);
u8 base;
bool upper_case;
FormatBuilder::RealNumberDisplayMode real_number_display_mode = FormatBuilder::RealNumberDisplayMode::General;
if (m_mode == Mode::Default || m_mode == Mode::FixedPoint) {
base = 10;
upper_case = false;
if (m_mode == Mode::FixedPoint)
real_number_display_mode = FormatBuilder::RealNumberDisplayMode::FixedPoint;
} else if (m_mode == Mode::Hexfloat) {
base = 16;
upper_case = false;
} else if (m_mode == Mode::HexfloatUppercase) {
base = 16;
upper_case = true;
} else {
VERIFY_NOT_REACHED();
}
m_width = m_width.value_or(0);
return builder.put_f32_or_f64(value, base, upper_case, m_zero_pad, m_use_separator, m_align, m_width.value(), m_precision, m_fill, m_sign_mode, real_number_display_mode);
}
#endif

View file

@ -238,15 +238,16 @@ public:
SignMode sign_mode = SignMode::OnlyIfNeeded,
RealNumberDisplayMode = RealNumberDisplayMode::Default);
ErrorOr<void> put_f64(
double value,
template<OneOf<f32, f64> T>
ErrorOr<void> put_f32_or_f64(
T value,
u8 base = 10,
bool upper_case = false,
bool zero_pad = false,
bool use_separator = false,
Align align = Align::Right,
size_t min_width = 0,
size_t precision = 6,
Optional<size_t> precision = {},
char fill = ' ',
SignMode sign_mode = SignMode::OnlyIfNeeded,
RealNumberDisplayMode = RealNumberDisplayMode::Default);
@ -265,6 +266,21 @@ public:
private:
StringBuilder& m_builder;
#ifndef KERNEL
ErrorOr<void> put_f64_with_precision(
double value,
u8 base,
bool upper_case,
bool zero_pad,
bool use_separator,
Align align,
size_t min_width,
size_t precision,
char fill,
SignMode sign_mode,
RealNumberDisplayMode);
#endif
};
class TypeErasedFormatParams {

View file

@ -283,6 +283,28 @@ TEST_CASE(floating_point_numbers)
EXPECT_EQ(DeprecatedString::formatted("{:x>5.1}", 1.12), "xx1.1");
}
TEST_CASE(floating_point_default_precision)
{
#define EXPECT_FORMAT(lit, value) \
EXPECT_EQ(DeprecatedString::formatted("{}", lit), value##sv)
EXPECT_FORMAT(10.3f, "10.3");
EXPECT_FORMAT(123e4f, "1230000");
EXPECT_FORMAT(1.23e4f, "12300");
EXPECT_FORMAT(134232544.4365f, "134232540");
EXPECT_FORMAT(1.43e24, "1.43e+24");
EXPECT_FORMAT(1.43e-24, "1.43e-24");
EXPECT_FORMAT(0.0f, "0");
EXPECT_FORMAT(3.14159265358979, "3.14159265358979");
EXPECT_FORMAT(1.61803399, "1.61803399");
EXPECT_FORMAT(2.71827, "2.71827");
EXPECT_FORMAT(42.f, "42");
EXPECT_FORMAT(123456.78, "123456.78");
EXPECT_FORMAT(23456.78910, "23456.7891");
#undef EXPECT_FORMAT
}
TEST_CASE(no_precision_no_trailing_number)
{
EXPECT_EQ(DeprecatedString::formatted("{:.0}", 0.1), "0");

View file

@ -232,7 +232,7 @@ struct Formatter<PDF::Destination> : Formatter<FormatString> {
TRY(builder.put_literal(" parameters="sv));
for (auto const& param : destination.parameters) {
if (param.has_value())
TRY(builder.put_f64(double(param.value())));
TRY(builder.put_f32_or_f64(param.value()));
else
TRY(builder.put_literal("{{}}"sv));
TRY(builder.put_literal(" "sv));