ladybird/Userland/Utilities/xxd.cpp
Nico Weber 24a469f521 Everywhere: Prefer {:#x} over 0x{:x} in format strings
The former automatically adapts the prefix to binary and octal
output, and is what we already use in the majority of cases.

Patch generated by:

    rg -l '0x\{' | xargs sed -i '' -e 's/0x{:/{:#/'

I ran it 4 times (until it stopped changing things) since each
invocation only converted one instance per line.

No behavior change.
2024-02-21 17:54:38 +01:00

372 lines
12 KiB
C++

/*
* Copyright (c) 2024, Nils Ollrogge <nils-ollrogge@outlook.de>.
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/AllOf.h>
#include <AK/CharacterTypes.h>
#include <AK/StdLibExtras.h>
#include <AK/String.h>
#include <AK/StringUtils.h>
#include <LibCore/ArgsParser.h>
#include <LibCore/File.h>
#include <LibCore/System.h>
#include <LibMain/Main.h>
static constexpr size_t BYTES_PER_LINE_HEX = 16;
static constexpr size_t BYTES_PER_LINE_C = 12;
static constexpr size_t BYTES_PER_LINE_BITS = 6;
static constexpr size_t BYTES_PER_LINE_PLAIN_HEX = 30;
static constexpr size_t BYTES_PER_LINE_MAX = 256;
static constexpr size_t GROUP_SIZE_HEX = 2;
static constexpr size_t GROUP_SIZE_HEX_LITTLE_ENDIAN = 4;
static constexpr size_t GROUP_SIZE_BITS = 1;
static constexpr size_t GROUP_SIZE_PLAIN_HEX = 0;
enum class DisplayStyle {
Hex,
PlainHex,
HexLittleEndian,
CStyle,
Bits
};
static void print_ascii(Bytes line)
{
for (auto const& byte : line) {
if (is_ascii_printable(byte)) {
putchar(byte);
} else {
putchar('.');
}
}
}
static void print_line_hex(Bytes line, size_t line_length_config, size_t group_size, bool uppercase)
{
for (size_t i = 0; i < line_length_config; ++i) {
if (i < line.size()) {
if (uppercase) {
out("{:02X}", line[i]);
} else {
out("{:02x}", line[i]);
}
} else {
out(" ");
}
if (group_size != 0 && (i + 1) % group_size == 0) {
out(" ");
}
}
out(" ");
}
static void print_line_little_endian_hex(Bytes line, size_t line_length_config, size_t group_size, bool uppercase)
{
if (group_size == 1) {
print_line_hex(line, line_length_config, group_size, uppercase);
return;
}
if (group_size == 0 || group_size > BYTES_PER_LINE_HEX) {
group_size = BYTES_PER_LINE_HEX;
}
for (size_t i = 0; i < line_length_config; i += group_size) {
if (i < line.size()) {
size_t size = i + group_size < line.size() ? group_size : line.size() - i;
auto group = line.slice(i, size);
if (size < group_size) {
for (size_t i = 0; i < group_size - size; ++i) {
out(" ");
}
}
for (ssize_t i = group.size() - 1; i >= 0; --i) {
if (uppercase) {
out("{:02X}", group[i]);
} else {
out("{:02x}", group[i]);
}
}
} else {
for (size_t i = 0; i < group_size; ++i) {
out(" ");
}
}
out(" ");
}
out(" ");
}
static void print_line_bits(Bytes line, size_t line_length_config, size_t group_size)
{
auto print_byte = [](u8 byte) {
for (ssize_t i = 7; i >= 0; --i) {
out("{}", (byte >> i) & 1 ? '1' : '0');
}
};
for (size_t i = 0; i < line_length_config; ++i) {
if (i < line.size()) {
print_byte(line[i]);
} else {
out(" ");
}
if (group_size > 0 && (i + 1) % group_size == 0) {
out(" ");
}
}
out(" ");
}
static void print_line_c_style(Bytes line)
{
out(" ");
for (size_t i = 0; i < line.size() - 1; ++i) {
out("{:#02x}, ", line[i]);
}
out("{:#02x}", line[line.size() - 1]);
}
static ErrorOr<String> path_to_variable_name(StringView path)
{
auto work = path.to_byte_string();
work = work.replace("."sv, "_"sv, ReplaceMode::All);
work = work.replace("/"sv, "_"sv, ReplaceMode::All);
return TRY(String::from_byte_string(work));
}
ErrorOr<int> serenity_main(Main::Arguments args)
{
TRY(Core::System::pledge("stdio rpath"));
Core::ArgsParser args_parser;
StringView path;
bool autoskip = false;
bool c_include_file_style = false;
bool capitalize_c_include_file_style = false;
bool binary_digit_formatting = false;
bool little_endian_hexdump = false;
bool offset_in_decimal = false;
bool plain_hexdump_style = false;
bool uppercase_hex = false;
bool revert = false;
Optional<size_t> line_length_option;
Optional<size_t> group_size_option;
Optional<size_t> max_bytes;
Optional<size_t> position_offset;
Optional<off_t> seek_to;
String c_include_file_style_variable_name;
StringView colorize_output_option;
args_parser.add_positional_argument(path, "Input file", "input", Core::ArgsParser::Required::No);
args_parser.add_option(autoskip, "Replace nul-lines with '*'", "autoskip", 'a');
args_parser.add_option(binary_digit_formatting, "Binary digit formatting", "bits", 'b');
args_parser.add_option(capitalize_c_include_file_style, "Capitalize C include file style (-i).", "capitalize", 'C');
args_parser.add_option(line_length_option, "Amount of bytes shown per line (max 256)", "cols", 'c', "cols");
args_parser.add_option(offset_in_decimal, "Show file offset in decimal", "decimal", 'd');
args_parser.add_option(little_endian_hexdump, "Little-endian hex dump", nullptr, 'e');
args_parser.add_option(group_size_option, "Separate the output of every amount bytes", "groupsize", 'g', "amount");
args_parser.add_option(c_include_file_style, "Output in C include file style", "include", 'i');
args_parser.add_option(max_bytes, "Truncate to fixed number of bytes", "len", 'l', "bytes");
args_parser.add_option(c_include_file_style_variable_name, "Set variable name used in C include ouput (-i)", "name", 'n', "include_style");
args_parser.add_option(position_offset, "Add offset to displayed file position", nullptr, 'o', "offset");
args_parser.add_option(plain_hexdump_style, "Output in plain hex dump style", "plain", 'p');
args_parser.add_option(revert, "Patch hex dump into binary", "revert", 'r');
args_parser.add_option(colorize_output_option, "Colorize output", nullptr, 'R', "when");
args_parser.add_option(seek_to, "Seek to a byte offset", "seek", 's', "[-]offset");
args_parser.add_option(uppercase_hex, "Use upper case hex letters", nullptr, 'u');
args_parser.parse(args);
auto file = TRY(Core::File::open_file_or_standard_stream(path, Core::File::OpenMode::Read));
auto display_style = DisplayStyle::Hex;
size_t line_length_config = BYTES_PER_LINE_HEX;
size_t group_size = GROUP_SIZE_HEX;
auto formatting_options_provided = 0x0;
if (c_include_file_style) {
formatting_options_provided++;
display_style = DisplayStyle::CStyle;
line_length_config = BYTES_PER_LINE_C;
if (c_include_file_style_variable_name.is_empty()) {
c_include_file_style_variable_name = TRY(path_to_variable_name(path));
}
if (capitalize_c_include_file_style) {
c_include_file_style_variable_name = TRY(c_include_file_style_variable_name.to_uppercase());
}
if (file->fd() != STDIN_FILENO) {
outln("unsigned char {}[] = {{", c_include_file_style_variable_name);
}
}
if (little_endian_hexdump) {
formatting_options_provided++;
display_style = DisplayStyle::HexLittleEndian;
group_size = GROUP_SIZE_HEX_LITTLE_ENDIAN;
}
if (plain_hexdump_style) {
formatting_options_provided++;
display_style = DisplayStyle::PlainHex;
line_length_config = BYTES_PER_LINE_PLAIN_HEX;
group_size = GROUP_SIZE_PLAIN_HEX;
}
if (binary_digit_formatting) {
formatting_options_provided++;
display_style = DisplayStyle::Bits;
group_size = GROUP_SIZE_BITS;
line_length_config = BYTES_PER_LINE_BITS;
}
if (formatting_options_provided > 1) {
warnln("Only one of the following flags can be used at a time: -i, -e, -p, -b");
return 1;
}
if (line_length_option.has_value() && line_length_option.value() > 0) {
line_length_config = line_length_option.value();
if (line_length_config > BYTES_PER_LINE_MAX && !plain_hexdump_style) {
warnln("Invalid number of columns (max is 256).");
return 1;
}
}
if (group_size_option.has_value()) {
group_size = group_size_option.value();
if (little_endian_hexdump) {
if (group_size != 0 && !is_power_of_two(group_size)) {
warnln("Group size must be a power of 2 with -e");
return 1;
}
}
}
// TODO: colorize output
if (!colorize_output_option.is_null()) {
warnln("Colorizing output is not supported");
}
if (revert) {
warnln("Patching is not supported");
return 1;
}
Array<u8, BUFSIZ> contents;
Bytes bytes;
size_t total_bytes_read = 0x0;
const size_t max_read_size = contents.size() - (contents.size() % line_length_config);
bool is_input_remaining = true;
// TODO: seek relative to current stdin file position
if (seek_to.has_value()) {
auto file_size = 0x0;
if (auto size = file->size(); !size.is_error()) {
file_size = size.value();
}
auto offset = seek_to.value();
total_bytes_read = offset < 0 ? file_size + offset : offset;
TRY(file->seek(total_bytes_read, SeekMode::SetPosition));
}
while (is_input_remaining) {
auto bytes_to_read = max_read_size - bytes.size();
bytes = contents.span().slice(0, bytes_to_read);
bytes = TRY(file->read_some(bytes));
if (bytes.size() < bytes_to_read && file->fd() != STDIN_FILENO) {
is_input_remaining = false;
}
while (bytes.size() > 0) {
auto line_length = bytes.size() > line_length_config ? line_length_config : bytes.size();
if (max_bytes.has_value()) {
auto bytes_remaining = max_bytes.value() - total_bytes_read;
if (bytes_remaining < line_length) {
line_length = bytes_remaining;
}
}
auto current_line = bytes.slice(0, line_length);
bytes = bytes.slice(line_length);
if (autoskip && all_of(bytes, [](auto& b) { return b == 0x0; })) {
outln("*");
continue;
}
if (display_style != DisplayStyle::CStyle && display_style != DisplayStyle::PlainHex) {
auto offset = 0;
if (position_offset.has_value()) {
offset = position_offset.value();
}
if (offset_in_decimal) {
out("{:08}: ", total_bytes_read + offset);
} else {
out("{:08x}: ", total_bytes_read + offset);
}
}
switch (display_style) {
case DisplayStyle::Hex:
print_line_hex(current_line, line_length_config, group_size, uppercase_hex);
print_ascii(current_line);
break;
case DisplayStyle::PlainHex:
print_line_hex(current_line, line_length_config, group_size, uppercase_hex);
break;
case DisplayStyle::HexLittleEndian:
print_line_little_endian_hex(current_line, line_length_config, group_size, uppercase_hex);
print_ascii(current_line);
break;
case DisplayStyle::Bits:
print_line_bits(current_line, line_length_config, group_size);
print_ascii(current_line);
break;
case DisplayStyle::CStyle:
print_line_c_style(current_line);
break;
}
putchar('\n');
total_bytes_read += line_length;
if (max_bytes.has_value() && total_bytes_read >= max_bytes.value()) {
is_input_remaining = false;
break;
}
}
}
if (display_style == DisplayStyle::CStyle) {
outln("}};");
auto postfix = capitalize_c_include_file_style ? "LEN" : "len";
outln("unsigned int {}_{} = {};", c_include_file_style_variable_name, postfix, total_bytes_read);
}
return 0;
}