AK+Userland: Extend the compiletime format string check to other functions

Thanks to @trflynn89 for the neat implicit consteval ctor trick!
This allows us to basically slap `CheckedFormatString` on any
formatting function, and have its format argument checked at compiletime.

Note that there is a validator bug where it doesn't parse inner replaced
fields like `{:~>{}}` correctly (what should be 'left align with next
argument as size' is parsed as `{:~>{` following a literal closing
brace), so the compiletime checks are disabled on these temporarily by
forcing them to be StringViews.

This commit also removes the now unused `AK::StringLiteral` type (which
was introduced for use with NTTP strings).
This commit is contained in:
AnotherTest 2021-02-22 02:37:24 +03:30 committed by Andreas Kling
parent 29c8d34be7
commit 347d741afb
Notes: sideshowbarker 2024-07-18 21:59:54 +09:00
6 changed files with 277 additions and 229 deletions

242
AK/CheckedFormatString.h Normal file
View file

@ -0,0 +1,242 @@
/*
* Copyright (c) 2021, the SerenityOS developers.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <AK/AllOf.h>
#include <AK/AnyOf.h>
#include <AK/StdLibExtras.h>
#include <AK/StringView.h>
#ifndef DBGLN_NO_COMPILETIME_FORMAT_CHECK
// FIXME: Seems like clang doesn't like calling 'consteval' functions inside 'consteval' functions quite the same way as GCC does,
// it seems to entirely forget that it accepted that parameters to a 'consteval' function to begin with.
# ifdef __clang__
# define DBGLN_NO_COMPILETIME_FORMAT_CHECK
# endif
#endif
#ifndef DBGLN_NO_COMPILETIME_FORMAT_CHECK
namespace AK::Format::Detail {
// We have to define a local "purely constexpr" Array that doesn't lead back to us (via e.g. ASSERT)
template<typename T, size_t Size>
struct Array {
constexpr static size_t size() { return Size; }
constexpr const T& operator[](size_t index) const { return __data[index]; }
constexpr T& operator[](size_t index) { return __data[index]; }
using ConstIterator = SimpleIterator<const Array, const T>;
using Iterator = SimpleIterator<Array, T>;
constexpr ConstIterator begin() const { return ConstIterator::begin(*this); }
constexpr Iterator begin() { return Iterator::begin(*this); }
constexpr ConstIterator end() const { return ConstIterator::end(*this); }
constexpr Iterator end() { return Iterator::end(*this); }
T __data[Size];
};
template<typename... Args>
void compiletime_fail(Args...);
template<size_t N>
consteval auto extract_used_argument_index(const char (&fmt)[N], size_t specifier_start_index, size_t specifier_end_index, size_t& next_implicit_argument_index)
{
struct {
size_t index_value { 0 };
bool saw_explicit_index { false };
} state;
for (size_t i = specifier_start_index; i < specifier_end_index; ++i) {
auto c = fmt[i];
if (c > '9' || c < '0')
break;
state.index_value *= 10;
state.index_value += c - '0';
state.saw_explicit_index = true;
}
if (!state.saw_explicit_index)
return next_implicit_argument_index++;
return state.index_value;
}
// FIXME: We should rather parse these format strings at compile-time if possible.
template<size_t N>
consteval auto count_fmt_params(const char (&fmt)[N])
{
struct {
// FIXME: Switch to variable-sized storage whenever we can come up with one :)
Array<size_t, 128> used_arguments { 0 };
size_t total_used_argument_count { 0 };
size_t next_implicit_argument_index { 0 };
bool has_explicit_argument_references { false };
size_t unclosed_braces { 0 };
size_t extra_closed_braces { 0 };
Array<size_t, 4> last_format_specifier_start { 0 };
size_t total_used_last_format_specifier_start_count { 0 };
} result;
for (size_t i = 0; i < N; ++i) {
auto ch = fmt[i];
switch (ch) {
case '{':
if (i + 1 < N && fmt[i + 1] == '{') {
++i;
continue;
}
// Note: There's no compile-time throw, so we have to abuse a compile-time string to store errors.
if (result.total_used_last_format_specifier_start_count >= result.last_format_specifier_start.size() - 1)
compiletime_fail("Format-String Checker internal error: Format specifier nested too deep");
result.last_format_specifier_start[result.total_used_last_format_specifier_start_count++] = i + 1;
++result.unclosed_braces;
break;
case '}':
if (i + 1 < N && fmt[i + 1] == '}') {
++i;
continue;
}
if (result.unclosed_braces) {
--result.unclosed_braces;
if (result.total_used_last_format_specifier_start_count == 0)
compiletime_fail("Format-String Checker internal error: Expected location information");
const auto specifier_start_index = result.last_format_specifier_start[--result.total_used_last_format_specifier_start_count];
if (result.total_used_argument_count >= result.used_arguments.size())
compiletime_fail("Format-String Checker internal error: Too many format arguments in format string");
auto used_argument_index = extract_used_argument_index<N>(fmt, specifier_start_index, i, result.next_implicit_argument_index);
if (used_argument_index + 1 != result.next_implicit_argument_index)
result.has_explicit_argument_references = true;
result.used_arguments[result.total_used_argument_count++] = used_argument_index;
} else {
++result.extra_closed_braces;
}
break;
default:
continue;
}
}
return result;
}
}
#endif
namespace AK::Format::Detail {
template<typename... Args>
struct CheckedFormatString {
template<size_t N>
consteval CheckedFormatString(const char (&fmt)[N])
: m_string { fmt }
{
#ifndef DBGLN_NO_COMPILETIME_FORMAT_CHECK
check_format_parameter_consistency<N, sizeof...(Args)>(fmt);
#endif
}
template<typename T>
CheckedFormatString(const T& unchecked_fmt) requires(requires(T t) { StringView { t }; })
: m_string(unchecked_fmt)
{
}
auto view() const { return m_string; }
private:
#ifndef DBGLN_NO_COMPILETIME_FORMAT_CHECK
template<size_t N, size_t param_count>
consteval static bool check_format_parameter_consistency(const char (&fmt)[N])
{
auto check = count_fmt_params<N>(fmt);
if (check.unclosed_braces != 0)
compiletime_fail("Extra unclosed braces in format string");
if (check.extra_closed_braces != 0)
compiletime_fail("Extra closing braces in format string");
{
auto begin = check.used_arguments.begin();
auto end = check.used_arguments.begin() + check.total_used_argument_count;
auto has_all_referenced_arguments = !AK::any_of(begin, end, [](auto& entry) { return entry >= param_count; });
if (!has_all_referenced_arguments)
compiletime_fail("Format string references nonexistent parameter");
}
if (!check.has_explicit_argument_references && check.total_used_argument_count != param_count)
compiletime_fail("Format string does not reference all passed parameters");
// Ensure that no passed parameter is ignored or otherwise not referenced in the format
// As this check is generally pretty expensive, try to avoid it where it cannot fail.
// We will only do this check if the format string has explicit argument refs
// otherwise, the check above covers this check too, as implicit refs
// monotonically increase, and cannot have 'gaps'.
if (check.has_explicit_argument_references) {
auto all_parameters = iota_array<size_t, param_count>(0);
constexpr auto contains = [](auto begin, auto end, auto entry) {
for (; begin != end; begin++) {
if (*begin == entry)
return true;
}
return false;
};
auto references_all_arguments = AK::all_of(
all_parameters.begin(),
all_parameters.end(),
[&](auto& entry) {
return contains(
check.used_arguments.begin(),
check.used_arguments.begin() + check.total_used_argument_count,
entry);
});
if (!references_all_arguments)
compiletime_fail("Format string does not reference all passed parameters");
}
return true;
}
#endif
StringView m_string;
};
}
namespace AK {
template<typename... Args>
using CheckedFormatString = Format::Detail::CheckedFormatString<typename IdentityType<Args>::Type...>;
}

View file

@ -26,6 +26,8 @@
#pragma once
#include <AK/CheckedFormatString.h>
#include <AK/AllOf.h>
#include <AK/AnyOf.h>
#include <AK/Array.h>
@ -37,161 +39,6 @@
# include <stdio.h>
#endif
#ifndef DBGLN_NO_COMPILETIME_FORMAT_CHECK
// Note: Clang 12 adds support for CTAD, but still fails to build the dbgln() checks, so they're disabled altogether for now.
// See https://oss-fuzz-build-logs.storage.googleapis.com/log-79750138-f41e-4f39-8812-7c536f1d2e35.txt, for example.
# if defined(__clang__)
# define DBGLN_NO_COMPILETIME_FORMAT_CHECK
# endif
#endif
#ifndef DBGLN_NO_COMPILETIME_FORMAT_CHECK
namespace {
template<size_t N>
consteval auto extract_used_argument_index(const char (&fmt)[N], size_t specifier_start_index, size_t specifier_end_index, size_t& next_implicit_argument_index)
{
struct {
size_t index_value { 0 };
bool saw_explicit_index { false };
} state;
for (size_t i = specifier_start_index; i < specifier_end_index; ++i) {
auto c = fmt[i];
if (c > '9' || c < '0')
break;
state.index_value *= 10;
state.index_value += c - '0';
state.saw_explicit_index = true;
}
if (!state.saw_explicit_index)
return next_implicit_argument_index++;
return state.index_value;
}
// FIXME: We should rather parse these format strings at compile-time if possible.
template<size_t N>
consteval auto count_fmt_params(const char (&fmt)[N])
{
struct {
// FIXME: Switch to variable-sized storage whenever we can come up with one :)
Array<size_t, 128> used_arguments { 0 };
size_t total_used_argument_count { 0 };
size_t next_implicit_argument_index { 0 };
bool has_explicit_argument_references { false };
size_t unclosed_braces { 0 };
size_t extra_closed_braces { 0 };
Array<size_t, 4> last_format_specifier_start { 0 };
size_t total_used_last_format_specifier_start_count { 0 };
StringLiteral<128> internal_error { { 0 } };
} result;
for (size_t i = 0; i < N; ++i) {
auto ch = fmt[i];
switch (ch) {
case '{':
if (i + 1 < N && fmt[i + 1] == '{') {
++i;
continue;
}
// Note: There's no compile-time throw, so we have to abuse a compile-time string to store errors.
if (result.total_used_last_format_specifier_start_count >= result.last_format_specifier_start.size() - 1)
result.internal_error = "Format-String Checker internal error: Format specifier nested too deep";
result.last_format_specifier_start[result.total_used_last_format_specifier_start_count++] = i + 1;
++result.unclosed_braces;
break;
case '}':
if (i + 1 < N && fmt[i + 1] == '}') {
++i;
continue;
}
if (result.unclosed_braces) {
--result.unclosed_braces;
if (result.total_used_last_format_specifier_start_count == 0)
result.internal_error = "Format-String Checker internal error: Expected location information";
const auto specifier_start_index = result.last_format_specifier_start[--result.total_used_last_format_specifier_start_count];
if (result.total_used_argument_count >= result.used_arguments.size())
result.internal_error = "Format-String Checker internal error: Too many format arguments in format string";
auto used_argument_index = extract_used_argument_index<N>(fmt, specifier_start_index, i, result.next_implicit_argument_index);
if (used_argument_index + 1 != result.next_implicit_argument_index)
result.has_explicit_argument_references = true;
result.used_arguments[result.total_used_argument_count++] = used_argument_index;
} else {
++result.extra_closed_braces;
}
break;
default:
continue;
}
}
return result;
}
}
template<size_t N, StringLiteral<N> fmt, size_t param_count, auto check = count_fmt_params<N>(fmt.data)>
constexpr bool check_format_parameter_consistency()
{
static_assert(check.internal_error.data[0] == 0, "Some internal error occured, try looking at the check function type for the error");
static_assert(check.unclosed_braces == 0, "Extra unclosed braces in format string");
static_assert(check.extra_closed_braces == 0, "Extra closing braces in format string");
{
constexpr auto begin = check.used_arguments.begin();
constexpr auto end = check.used_arguments.begin() + check.total_used_argument_count;
constexpr auto has_all_referenced_arguments = !AK::any_of(begin, end, [](auto& entry) { return entry >= param_count; });
static_assert(has_all_referenced_arguments, "Format string references nonexistent parameter");
}
if constexpr (!check.has_explicit_argument_references)
static_assert(check.total_used_argument_count == param_count, "Format string does not reference all passed parameters");
// Ensure that no passed parameter is ignored or otherwise not referenced in the format
// As this check is generally pretty expensive, try to avoid it where it cannot fail.
// We will only do this check if the format string has explicit argument refs
// otherwise, the check above covers this check too, as implicit refs
// monotonically increase, and cannot have 'gaps'.
if constexpr (check.has_explicit_argument_references) {
constexpr auto all_parameters = iota_array<size_t, param_count>(0);
auto contains = [](auto begin, auto end, auto entry) {
for (; begin != end; begin++) {
if (*begin == entry)
return true;
}
return false;
};
constexpr auto references_all_arguments = AK::all_of(
all_parameters.begin(),
all_parameters.end(),
[&](auto& entry) {
return contains(
check.used_arguments.begin(),
check.used_arguments.begin() + check.total_used_argument_count,
entry);
});
static_assert(references_all_arguments, "Format string does not reference all passed parameters");
}
return true;
}
template<auto fmt, auto param_count>
concept ConsistentFormatParameters = check_format_parameter_consistency<fmt.size, fmt, param_count>();
#endif
namespace AK {
class TypeErasedFormatParams;
@ -523,48 +370,38 @@ void vformat(const LogStream& stream, StringView fmtstr, TypeErasedFormatParams)
void vout(FILE*, StringView fmtstr, TypeErasedFormatParams, bool newline = false);
template<typename... Parameters>
void out(FILE* file, StringView fmtstr, const Parameters&... parameters) { vout(file, fmtstr, VariadicFormatParams { parameters... }); }
void out(FILE* file, CheckedFormatString<Parameters...>&& fmtstr, const Parameters&... parameters) { vout(file, fmtstr.view(), VariadicFormatParams { parameters... }); }
template<typename... Parameters>
void outln(FILE* file, StringView fmtstr, const Parameters&... parameters) { vout(file, fmtstr, VariadicFormatParams { parameters... }, true); }
template<typename... Parameters>
void outln(FILE* file, const char* fmtstr, const Parameters&... parameters) { vout(file, fmtstr, VariadicFormatParams { parameters... }, true); }
void outln(FILE* file, CheckedFormatString<Parameters...>&& fmtstr, const Parameters&... parameters) { vout(file, fmtstr.view(), VariadicFormatParams { parameters... }, true); }
inline void outln(FILE* file) { fputc('\n', file); }
template<typename... Parameters>
void out(StringView fmtstr, const Parameters&... parameters) { out(stdout, fmtstr, parameters...); }
void out(CheckedFormatString<Parameters...>&& fmtstr, const Parameters&... parameters) { out(stdout, move(fmtstr), parameters...); }
template<typename... Parameters>
void outln(StringView fmtstr, const Parameters&... parameters) { outln(stdout, fmtstr, parameters...); }
template<typename... Parameters>
void outln(const char* fmtstr, const Parameters&... parameters) { outln(stdout, fmtstr, parameters...); }
void outln(CheckedFormatString<Parameters...>&& fmtstr, const Parameters&... parameters) { outln(stdout, move(fmtstr), parameters...); }
inline void outln() { outln(stdout); }
template<typename... Parameters>
void warn(StringView fmtstr, const Parameters&... parameters) { out(stderr, fmtstr, parameters...); }
void warn(CheckedFormatString<Parameters...>&& fmtstr, const Parameters&... parameters) { out(stderr, move(fmtstr), parameters...); }
template<typename... Parameters>
void warnln(StringView fmtstr, const Parameters&... parameters) { outln(stderr, fmtstr, parameters...); }
template<typename... Parameters>
void warnln(const char* fmtstr, const Parameters&... parameters) { outln(stderr, fmtstr, parameters...); }
void warnln(CheckedFormatString<Parameters...>&& fmtstr, const Parameters&... parameters) { outln(stderr, move(fmtstr), parameters...); }
inline void warnln() { outln(stderr); }
#endif
void vdbgln(StringView fmtstr, TypeErasedFormatParams);
#ifndef DBGLN_NO_COMPILETIME_FORMAT_CHECK
template<StringLiteral fmt, bool enabled = true, typename... Parameters>
void dbgln(const Parameters&... parameters) requires ConsistentFormatParameters<fmt, sizeof...(Parameters)>
{
dbgln<enabled>(StringView { fmt.data }, parameters...);
}
#endif
template<bool enabled = true, typename... Parameters>
void dbgln(StringView fmtstr, const Parameters&... parameters)
void dbgln(CheckedFormatString<Parameters...>&& fmtstr, const Parameters&... parameters)
{
if constexpr (enabled)
vdbgln(fmtstr, VariadicFormatParams { parameters... });
vdbgln(fmtstr.view(), VariadicFormatParams { parameters... });
}
template<bool enabled = true, typename... Parameters>
void dbgln(const char* fmtstr, const Parameters&... parameters) { dbgln<enabled>(StringView { fmtstr }, parameters...); }
template<bool enabled = true>
void dbgln() { dbgln<enabled>(""); }
@ -575,10 +412,10 @@ void set_debug_enabled(bool);
void vdmesgln(StringView fmtstr, TypeErasedFormatParams);
template<typename... Parameters>
void dmesgln(StringView fmtstr, const Parameters&... parameters) { vdmesgln(fmtstr, VariadicFormatParams { parameters... }); }
template<typename... Parameters>
void dmesgln(const char* fmtstr, const Parameters&... parameters) { vdmesgln(fmtstr, VariadicFormatParams { parameters... }); }
inline void dmesgln() { dmesgln(""); }
void dmesgln(CheckedFormatString<Parameters...>&& fmt, const Parameters&... parameters)
{
vdmesgln(fmt.view(), VariadicFormatParams { parameters... });
}
#endif
template<typename T, typename = void>
@ -647,13 +484,8 @@ using AK::warnln;
using AK::dbgln;
using AK::CheckedFormatString;
using AK::FormatIfSupported;
using AK::FormatString;
#ifdef DBGLN_NO_COMPILETIME_FORMAT_CHECK
# define dbgln(fmt, ...) dbgln(fmt, ##__VA_ARGS__)
# define dbgln_if(flag, fmt, ...) dbgln<flag>(fmt, ##__VA_ARGS__)
#else
# define dbgln(fmt, ...) dbgln<fmt>(__VA_ARGS__)
# define dbgln_if(flag, fmt, ...) dbgln<fmt, flag>(__VA_ARGS__)
#endif
#define dbgln_if(flag, fmt, ...) dbgln<flag>(fmt, ##__VA_ARGS__)

View file

@ -565,36 +565,9 @@ using MakeIntegerSequence = decltype(make_integer_sequence_impl<T, N>());
template<unsigned N>
using MakeIndexSequence = MakeIntegerSequence<unsigned, N>;
template<unsigned long N>
struct StringLiteral {
constexpr StringLiteral(const char (&in)[N])
: data {}
, size { N }
{
for (unsigned long i = 0; i < N; ++i)
data[i] = in[i];
}
template<unsigned long Nx>
constexpr StringLiteral& operator=(const StringLiteral<Nx>& other)
{
static_assert(Nx <= N, "Storing a string literal in a smaller one");
for (unsigned long i = 0; i < Nx; ++i)
data[i] = other[i];
return *this;
}
template<unsigned long Nx>
constexpr StringLiteral& operator=(const char (&other)[Nx])
{
static_assert(Nx <= N, "Storing a string literal in a smaller one");
for (unsigned long i = 0; i < Nx; ++i)
data[i] = other[i];
return *this;
}
char data[N];
unsigned long size;
template<typename T>
struct IdentityType {
using Type = T;
};
}
@ -608,6 +581,7 @@ using AK::declval;
using AK::DependentFalse;
using AK::exchange;
using AK::forward;
using AK::IdentityType;
using AK::IndexSequence;
using AK::IntegerSequence;
using AK::is_trivial;
@ -630,6 +604,5 @@ using AK::max;
using AK::min;
using AK::move;
using AK::RemoveConst;
using AK::StringLiteral;
using AK::swap;
using AK::Void;

View file

@ -27,11 +27,12 @@
#pragma once
#include <AK/Assertions.h>
#include <AK/CheckedFormatString.h>
namespace AK {
template<typename... Parameters>
void warnln(const char* fmtstr, const Parameters&...);
void warnln(CheckedFormatString<Parameters...>&& fmtstr, const Parameters&...);
}

View file

@ -1894,16 +1894,16 @@ void Shell::possibly_print_error() const
warn("\x1b[31m");
size_t length_written_so_far = 0;
if (line == (i64)source_position.position->start_line.line_number) {
warn("{:~>{}}", "", 5 + source_position.position->start_line.line_column);
warn(StringView { "{:~>{}}" }, "", 5 + source_position.position->start_line.line_column);
length_written_so_far += source_position.position->start_line.line_column;
} else {
warn("{:~>{}}", "", 5);
warn(StringView { "{:~>{}}" }, "", 5);
}
if (line == (i64)source_position.position->end_line.line_number) {
warn("{:^>{}}", "", source_position.position->end_line.line_column - length_written_so_far);
warn(StringView { "{:^>{}}" }, "", source_position.position->end_line.line_column - length_written_so_far);
length_written_so_far += source_position.position->start_line.line_column;
} else {
warn("{:^>{}}", "", current_line.length() - length_written_so_far);
warn(StringView { "{:^>{}}" }, "", current_line.length() - length_written_so_far);
}
warnln("\x1b[0m");
}

View file

@ -45,11 +45,11 @@ Print the value of EXPRESSION to standard output.)");
exit(0);
}
template<typename... Args>
[[noreturn]] void fail(Args&&... args)
template<typename Fmt, typename... Args>
[[noreturn]] void fail(Fmt&& fmt, Args&&... args)
{
warn("ERROR: \e[31m");
warnln(args...);
warnln(StringView { fmt }, args...);
warn("\e[0m");
exit(1);
}