ladybird/Userland/Utilities/strings.cpp
Tim Schumacher d5871f5717 AK: Rename Stream::{read,write} to Stream::{read_some,write_some}
Similar to POSIX read, the basic read and write functions of AK::Stream
do not have a lower limit of how much data they read or write (apart
from "none at all").

Rename the functions to "read some [data]" and "write some [data]" (with
"data" being omitted, since everything here is reading and writing data)
to make them sufficiently distinct from the functions that ensure to
use the entire buffer (which should be the go-to function for most
usages).

No functional changes, just a lot of new FIXMEs.
2023-03-13 15:16:20 +00:00

139 lines
4.5 KiB
C++

/*
* Copyright (c) 2022, the SerenityOS developers.
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/CharacterTypes.h>
#include <AK/Forward.h>
#include <LibCore/ArgsParser.h>
#include <LibCore/File.h>
#include <LibCore/System.h>
#include <LibMain/Main.h>
#include <unistd.h>
enum class StringOffsetFormat {
None = 0,
Decimal,
Octal,
Hexadecimal
};
// NOTE: This is similar to how the cat utility works in the sense of aggregating
// data in 32K buffer.
static constexpr size_t buffer_read_size = 32768;
static bool should_print_characters(Vector<u8> const& characters)
{
for (u8 ch : characters) {
if (is_ascii_printable(ch) && !is_ascii_space(ch))
return true;
}
return false;
}
static void print_characters(Vector<u8> const& characters, StringOffsetFormat string_offset_format, size_t string_offset_position)
{
switch (string_offset_format) {
case StringOffsetFormat::Decimal:
out("{:>7d} ", string_offset_position);
break;
case StringOffsetFormat::Octal:
out("{:>7o} ", string_offset_position);
break;
case StringOffsetFormat::Hexadecimal:
out("{:>7x} ", string_offset_position);
break;
default:
break;
}
outln("{:s}", characters.span());
}
static int process_characters_in_span(Vector<u8>& characters, ReadonlyBytes span)
{
int processed_characters = 0;
for (u8 ch : span) {
++processed_characters;
if (is_ascii_printable(ch) || ch == '\t')
characters.append(ch);
else
break;
}
return processed_characters;
}
static ErrorOr<void> process_strings_in_file(StringView path, bool show_paths, StringOffsetFormat string_offset_format, size_t minimum_string_length)
{
Array<u8, buffer_read_size> buffer;
Vector<u8> output_characters;
auto file = TRY(Core::File::open_file_or_standard_stream(path, Core::File::OpenMode::Read));
size_t processed_characters = 0;
size_t string_offset_position = 0;
bool did_show_path = false;
while (!file->is_eof()) {
auto buffer_span = TRY(file->read_some(buffer));
while (!buffer_span.is_empty()) {
string_offset_position += processed_characters;
processed_characters = process_characters_in_span(output_characters, buffer_span);
if (show_paths && !did_show_path) {
outln("path {}:", path);
did_show_path = true;
}
if (output_characters.size() >= minimum_string_length && should_print_characters(output_characters)) {
print_characters(output_characters, string_offset_format, string_offset_position);
}
buffer_span = buffer_span.slice(processed_characters);
output_characters.clear();
}
}
return {};
}
ErrorOr<int> serenity_main(Main::Arguments arguments)
{
TRY(Core::System::pledge("stdio rpath"));
Vector<StringView> paths;
size_t minimum_string_length = 4;
bool show_paths = false;
StringOffsetFormat string_offset_format { StringOffsetFormat::None };
Core::ArgsParser args_parser;
args_parser.add_option(minimum_string_length, "Specify the minimum string length.", nullptr, 'n', "number");
args_parser.add_option(show_paths, "Display the path for each matched file.", nullptr, 'p');
args_parser.add_option({ Core::ArgsParser::OptionArgumentMode::Required,
"Write offset relative to start of each file in (d)ec, (o)ct, or he(x) format.",
nullptr,
't',
"format",
[&string_offset_format](StringView value) {
if (value == "d") {
string_offset_format = StringOffsetFormat::Decimal;
} else if (value == "o") {
string_offset_format = StringOffsetFormat::Octal;
} else if (value == "x") {
string_offset_format = StringOffsetFormat::Hexadecimal;
} else {
return false;
}
return true;
} });
args_parser.set_general_help("Write the sequences of printable characters in files or pipes to stdout.");
args_parser.add_positional_argument(paths, "File path", "path", Core::ArgsParser::Required::No);
args_parser.parse(arguments);
if (minimum_string_length < 1) {
warnln("Invalid minimum string length {}", minimum_string_length);
return 1;
}
if (paths.is_empty())
paths.append("-"sv);
for (auto const& path : paths)
TRY(process_strings_in_file(path, show_paths, string_offset_format, minimum_string_length));
return 0;
}