2020-10-12 01:03:37 +00:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2020, Matthew L. Curry <matthew.curry@gmail.com>
|
|
|
|
*
|
2021-04-22 08:24:48 +00:00
|
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
2020-10-12 01:03:37 +00:00
|
|
|
*/
|
|
|
|
|
2022-07-24 10:57:32 +00:00
|
|
|
#include <AK/CharacterTypes.h>
|
2020-10-12 01:03:37 +00:00
|
|
|
#include <AK/RefPtr.h>
|
2022-07-24 10:57:32 +00:00
|
|
|
#include <AK/StringView.h>
|
2020-10-12 01:03:37 +00:00
|
|
|
#include <LibCore/ArgsParser.h>
|
2023-02-09 02:02:46 +00:00
|
|
|
#include <LibCore/File.h>
|
2022-01-04 01:59:53 +00:00
|
|
|
#include <LibCore/System.h>
|
2021-03-12 16:29:37 +00:00
|
|
|
#include <unistd.h>
|
2020-10-12 01:03:37 +00:00
|
|
|
|
2023-02-09 02:02:46 +00:00
|
|
|
static ErrorOr<void> write_line_content(StringView line, size_t count, bool duplicates_only, bool print_count, Core::File& outfile)
|
2020-10-12 01:03:37 +00:00
|
|
|
{
|
2022-07-24 10:57:32 +00:00
|
|
|
if (duplicates_only && count <= 1)
|
|
|
|
return {};
|
2020-10-12 01:03:37 +00:00
|
|
|
|
2022-07-24 10:57:32 +00:00
|
|
|
if (print_count)
|
2023-03-01 16:24:50 +00:00
|
|
|
TRY(outfile.write_until_depleted(DeprecatedString::formatted("{} {}\n", count, line).bytes()));
|
2022-07-24 10:57:32 +00:00
|
|
|
else
|
2023-03-01 16:24:50 +00:00
|
|
|
TRY(outfile.write_until_depleted(DeprecatedString::formatted("{}\n", line).bytes()));
|
2022-07-24 10:57:32 +00:00
|
|
|
return {};
|
|
|
|
}
|
2020-10-12 01:03:37 +00:00
|
|
|
|
2022-07-24 10:57:32 +00:00
|
|
|
static StringView skip(StringView line, unsigned char_skip_count, unsigned field_skip_count)
|
|
|
|
{
|
|
|
|
line = line.trim("\n"sv);
|
|
|
|
if (field_skip_count) {
|
|
|
|
bool in_field = false;
|
|
|
|
int field_index = 0;
|
|
|
|
unsigned current_field = 0;
|
|
|
|
for (size_t i = 0; i < line.length(); i++) {
|
|
|
|
char c = line[i];
|
|
|
|
if (is_ascii_space(c)) {
|
|
|
|
in_field = false;
|
|
|
|
field_index = i;
|
|
|
|
if (++current_field > field_skip_count)
|
|
|
|
break;
|
|
|
|
} else if (!in_field) {
|
|
|
|
in_field = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
line = line.substring_view(field_index);
|
2020-10-12 01:03:37 +00:00
|
|
|
}
|
2022-07-24 10:57:32 +00:00
|
|
|
char_skip_count = min(char_skip_count, line.length());
|
|
|
|
return line.substring_view(char_skip_count);
|
2020-10-12 01:03:37 +00:00
|
|
|
}
|
|
|
|
|
2022-01-04 01:59:53 +00:00
|
|
|
ErrorOr<int> serenity_main(Main::Arguments arguments)
|
2020-10-12 01:03:37 +00:00
|
|
|
{
|
2022-01-04 01:59:53 +00:00
|
|
|
TRY(Core::System::pledge("stdio rpath wpath cpath"));
|
2020-10-12 01:03:37 +00:00
|
|
|
|
2022-07-24 10:57:32 +00:00
|
|
|
StringView inpath;
|
|
|
|
StringView outpath;
|
|
|
|
bool duplicates_only = false;
|
|
|
|
bool unique_only = false;
|
|
|
|
bool ignore_case = false;
|
|
|
|
bool print_count = false;
|
|
|
|
unsigned skip_chars = 0;
|
|
|
|
unsigned skip_fields = 0;
|
|
|
|
|
2020-10-12 01:03:37 +00:00
|
|
|
Core::ArgsParser args_parser;
|
2022-07-24 10:57:32 +00:00
|
|
|
args_parser.add_option(duplicates_only, "Only print duplicated lines", "repeated", 'd');
|
|
|
|
args_parser.add_option(unique_only, "Only print unique lines (default)", "unique", 'u');
|
|
|
|
args_parser.add_option(ignore_case, "Ignore case when comparing lines", "ignore-case", 'i');
|
|
|
|
args_parser.add_option(print_count, "Prefix each line by its number of occurrences", "count", 'c');
|
|
|
|
args_parser.add_option(skip_chars, "Skip N chars", "skip-chars", 's', "N");
|
|
|
|
args_parser.add_option(skip_fields, "Skip N fields", "skip-fields", 'f', "N");
|
2020-10-12 01:03:37 +00:00
|
|
|
args_parser.add_positional_argument(inpath, "Input file", "input", Core::ArgsParser::Required::No);
|
|
|
|
args_parser.add_positional_argument(outpath, "Output file", "output", Core::ArgsParser::Required::No);
|
2022-01-04 01:59:53 +00:00
|
|
|
args_parser.parse(arguments);
|
2020-10-12 01:03:37 +00:00
|
|
|
|
2022-07-24 10:57:32 +00:00
|
|
|
if (!unique_only && !duplicates_only) {
|
|
|
|
unique_only = true;
|
|
|
|
} else if (unique_only && duplicates_only) {
|
|
|
|
// Printing duplicated and unique lines shouldn't print anything
|
|
|
|
return 0;
|
|
|
|
}
|
2020-10-12 01:03:37 +00:00
|
|
|
|
2023-05-03 22:45:18 +00:00
|
|
|
auto infile = TRY(Core::InputBufferedFile::create(TRY(Core::File::open_file_or_standard_stream(inpath, Core::File::OpenMode::Read))));
|
2023-02-09 02:02:46 +00:00
|
|
|
auto outfile = TRY(Core::File::open_file_or_standard_stream(outpath, Core::File::OpenMode::Write));
|
2022-07-24 10:57:32 +00:00
|
|
|
|
|
|
|
size_t count = 0;
|
|
|
|
ByteBuffer previous_buf = TRY(ByteBuffer::create_uninitialized(1024));
|
|
|
|
ByteBuffer current_buf = TRY(ByteBuffer::create_uninitialized(1024));
|
2020-10-12 01:03:37 +00:00
|
|
|
|
2022-07-24 10:57:32 +00:00
|
|
|
StringView previous = TRY(infile->read_line(previous_buf));
|
|
|
|
StringView previous_to_compare = skip(previous, skip_chars, skip_fields);
|
|
|
|
|
|
|
|
while (TRY(infile->can_read_line())) {
|
|
|
|
// FIXME: The buffer does not automatically resize,
|
|
|
|
// and this will return EMSGSIZE if the read line
|
|
|
|
// is more than 1024 bytes.
|
|
|
|
StringView current = TRY(infile->read_line(current_buf));
|
|
|
|
|
|
|
|
StringView current_to_compare = skip(current, skip_chars, skip_fields);
|
2023-03-10 07:48:54 +00:00
|
|
|
bool lines_equal = ignore_case ? current_to_compare.equals_ignoring_ascii_case(previous_to_compare) : current_to_compare == previous_to_compare;
|
2022-07-24 10:57:32 +00:00
|
|
|
if (!lines_equal) {
|
|
|
|
TRY(write_line_content(previous, count, duplicates_only, print_count, *outfile));
|
|
|
|
count = 1;
|
|
|
|
} else {
|
|
|
|
count++;
|
|
|
|
}
|
|
|
|
swap(current_to_compare, previous_to_compare);
|
|
|
|
swap(current_buf, previous_buf);
|
2021-02-25 20:10:47 +00:00
|
|
|
swap(current, previous);
|
2020-10-12 01:03:37 +00:00
|
|
|
}
|
|
|
|
|
2022-07-24 10:57:32 +00:00
|
|
|
TRY(write_line_content(previous, count, duplicates_only, print_count, *outfile));
|
2020-10-12 01:03:37 +00:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|