2020-01-18 08:38:21 +00:00
|
|
|
/*
|
2020-03-10 17:23:06 +00:00
|
|
|
* Copyright (c) 2019-2020, Marios Prokopakis <mariosprokopakis@gmail.com>
|
2020-01-18 08:38:21 +00:00
|
|
|
*
|
2021-04-22 08:24:48 +00:00
|
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
2020-01-18 08:38:21 +00:00
|
|
|
*/
|
|
|
|
|
2022-12-04 18:02:33 +00:00
|
|
|
#include <AK/DeprecatedString.h>
|
2019-08-18 15:04:05 +00:00
|
|
|
#include <AK/QuickSort.h>
|
2020-03-02 12:08:23 +00:00
|
|
|
#include <AK/StdLibExtras.h>
|
2019-08-18 15:04:05 +00:00
|
|
|
#include <AK/Vector.h>
|
2021-11-09 00:54:31 +00:00
|
|
|
#include <LibCore/ArgsParser.h>
|
2023-06-07 04:29:42 +00:00
|
|
|
#include <LibCore/File.h>
|
2021-11-27 20:33:08 +00:00
|
|
|
#include <LibMain/Main.h>
|
2019-08-18 15:04:05 +00:00
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
2021-11-09 02:07:42 +00:00
|
|
|
struct Range {
|
|
|
|
size_t m_from { 1 };
|
|
|
|
size_t m_to { SIZE_MAX };
|
2020-03-02 12:08:23 +00:00
|
|
|
|
2022-04-01 17:58:27 +00:00
|
|
|
[[nodiscard]] bool intersects(Range const& other) const
|
2021-11-09 02:07:42 +00:00
|
|
|
{
|
2020-03-02 12:08:23 +00:00
|
|
|
return !(other.m_from > m_to || other.m_to < m_from);
|
|
|
|
}
|
2019-08-18 15:04:05 +00:00
|
|
|
|
2022-04-01 17:58:27 +00:00
|
|
|
void merge(Range const& other)
|
2021-11-09 02:07:42 +00:00
|
|
|
{
|
|
|
|
// Can't merge two ranges that are disjoint.
|
|
|
|
VERIFY(intersects(other));
|
2020-03-02 12:08:23 +00:00
|
|
|
|
2021-11-09 02:07:42 +00:00
|
|
|
m_from = min(m_from, other.m_from);
|
|
|
|
m_to = max(m_to, other.m_to);
|
2019-08-18 15:04:05 +00:00
|
|
|
}
|
2021-11-09 02:07:42 +00:00
|
|
|
};
|
2019-08-18 15:04:05 +00:00
|
|
|
|
2022-12-04 18:02:33 +00:00
|
|
|
static bool expand_list(DeprecatedString& list, Vector<Range>& ranges)
|
2019-08-18 15:04:05 +00:00
|
|
|
{
|
2023-06-24 19:34:14 +00:00
|
|
|
Vector<DeprecatedString> tokens = list.split(',', SplitBehavior::KeepEmpty);
|
2021-11-09 01:06:38 +00:00
|
|
|
|
2019-08-18 15:04:05 +00:00
|
|
|
for (auto& token : tokens) {
|
|
|
|
if (token.length() == 0) {
|
2021-05-31 14:43:25 +00:00
|
|
|
warnln("cut: byte/character positions are numbered from 1");
|
2021-11-09 00:54:31 +00:00
|
|
|
return false;
|
2019-08-18 15:04:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (token == "-") {
|
2021-05-31 14:43:25 +00:00
|
|
|
warnln("cut: invalid range with no endpoint: {}", token);
|
2021-11-09 00:54:31 +00:00
|
|
|
return false;
|
2019-08-18 15:04:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (token[0] == '-') {
|
2021-11-09 02:07:42 +00:00
|
|
|
auto index = token.substring(1, token.length() - 1).to_uint();
|
2020-06-12 19:07:52 +00:00
|
|
|
if (!index.has_value()) {
|
2021-05-31 14:43:25 +00:00
|
|
|
warnln("cut: invalid byte/character position '{}'", token);
|
2021-11-09 00:54:31 +00:00
|
|
|
return false;
|
2019-08-18 15:04:05 +00:00
|
|
|
}
|
|
|
|
|
2020-06-12 19:07:52 +00:00
|
|
|
if (index.value() == 0) {
|
2021-05-31 14:43:25 +00:00
|
|
|
warnln("cut: byte/character positions are numbered from 1");
|
2021-11-09 00:54:31 +00:00
|
|
|
return false;
|
2019-08-18 15:04:05 +00:00
|
|
|
}
|
|
|
|
|
2021-11-09 02:07:42 +00:00
|
|
|
ranges.append({ 1, index.value() });
|
2019-08-18 15:04:05 +00:00
|
|
|
} else if (token[token.length() - 1] == '-') {
|
2021-11-09 02:07:42 +00:00
|
|
|
auto index = token.substring(0, token.length() - 1).to_uint();
|
2020-06-12 19:07:52 +00:00
|
|
|
if (!index.has_value()) {
|
2021-05-31 14:43:25 +00:00
|
|
|
warnln("cut: invalid byte/character position '{}'", token);
|
2021-11-09 00:54:31 +00:00
|
|
|
return false;
|
2019-08-18 15:04:05 +00:00
|
|
|
}
|
|
|
|
|
2020-06-12 19:07:52 +00:00
|
|
|
if (index.value() == 0) {
|
2021-05-31 14:43:25 +00:00
|
|
|
warnln("cut: byte/character positions are numbered from 1");
|
2021-11-09 00:54:31 +00:00
|
|
|
return false;
|
2019-08-18 15:04:05 +00:00
|
|
|
}
|
2021-11-09 02:07:42 +00:00
|
|
|
|
|
|
|
ranges.append({ index.value(), SIZE_MAX });
|
2019-08-18 15:04:05 +00:00
|
|
|
} else {
|
2023-06-24 19:34:14 +00:00
|
|
|
auto range = token.split('-', SplitBehavior::KeepEmpty);
|
2019-08-18 15:04:05 +00:00
|
|
|
if (range.size() == 2) {
|
2021-11-09 02:07:42 +00:00
|
|
|
auto index1 = range[0].to_uint();
|
2020-06-12 19:07:52 +00:00
|
|
|
if (!index1.has_value()) {
|
2021-05-31 14:43:25 +00:00
|
|
|
warnln("cut: invalid byte/character position '{}'", range[0]);
|
2021-11-09 00:54:31 +00:00
|
|
|
return false;
|
2019-08-18 15:04:05 +00:00
|
|
|
}
|
|
|
|
|
2021-11-09 02:07:42 +00:00
|
|
|
auto index2 = range[1].to_uint();
|
2020-06-12 19:07:52 +00:00
|
|
|
if (!index2.has_value()) {
|
2021-05-31 14:43:25 +00:00
|
|
|
warnln("cut: invalid byte/character position '{}'", range[1]);
|
2021-11-09 00:54:31 +00:00
|
|
|
return false;
|
2019-08-18 15:04:05 +00:00
|
|
|
}
|
|
|
|
|
2020-06-12 19:07:52 +00:00
|
|
|
if (index1.value() > index2.value()) {
|
2021-05-31 14:43:25 +00:00
|
|
|
warnln("cut: invalid decreasing range");
|
2021-11-09 00:54:31 +00:00
|
|
|
return false;
|
2020-06-12 19:07:52 +00:00
|
|
|
} else if (index1.value() == 0 || index2.value() == 0) {
|
2021-05-31 14:43:25 +00:00
|
|
|
warnln("cut: byte/character positions are numbered from 1");
|
2021-11-09 00:54:31 +00:00
|
|
|
return false;
|
2019-08-18 15:04:05 +00:00
|
|
|
}
|
|
|
|
|
2021-11-09 02:07:42 +00:00
|
|
|
ranges.append({ index1.value(), index2.value() });
|
2019-08-18 15:04:05 +00:00
|
|
|
} else if (range.size() == 1) {
|
2021-11-09 02:07:42 +00:00
|
|
|
auto index = range[0].to_uint();
|
2020-06-12 19:07:52 +00:00
|
|
|
if (!index.has_value()) {
|
2021-05-31 14:43:25 +00:00
|
|
|
warnln("cut: invalid byte/character position '{}'", range[0]);
|
2021-11-09 00:54:31 +00:00
|
|
|
return false;
|
2019-08-18 15:04:05 +00:00
|
|
|
}
|
|
|
|
|
2020-06-12 19:07:52 +00:00
|
|
|
if (index.value() == 0) {
|
2021-05-31 14:43:25 +00:00
|
|
|
warnln("cut: byte/character positions are numbered from 1");
|
2021-11-09 00:54:31 +00:00
|
|
|
return false;
|
2019-08-18 15:04:05 +00:00
|
|
|
}
|
|
|
|
|
2021-11-09 02:07:42 +00:00
|
|
|
ranges.append({ index.value(), index.value() });
|
2019-08-18 15:04:05 +00:00
|
|
|
} else {
|
2021-05-31 14:43:25 +00:00
|
|
|
warnln("cut: invalid byte or character range");
|
2021-11-09 00:54:31 +00:00
|
|
|
return false;
|
2019-08-18 15:04:05 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-11-09 00:54:31 +00:00
|
|
|
|
|
|
|
return true;
|
2019-08-18 15:04:05 +00:00
|
|
|
}
|
|
|
|
|
2023-06-07 04:29:42 +00:00
|
|
|
static void process_line_bytes(StringView line, Vector<Range> const& ranges)
|
2021-11-09 02:16:09 +00:00
|
|
|
{
|
|
|
|
for (auto& i : ranges) {
|
2023-06-07 04:29:42 +00:00
|
|
|
if (i.m_from >= line.length())
|
2021-11-09 02:16:09 +00:00
|
|
|
continue;
|
|
|
|
|
2023-06-07 04:29:42 +00:00
|
|
|
auto to = min(i.m_to, line.length());
|
2022-12-04 18:02:33 +00:00
|
|
|
auto sub_string = DeprecatedString(line).substring(i.m_from - 1, to - i.m_from + 1);
|
2021-11-09 02:16:09 +00:00
|
|
|
out("{}", sub_string);
|
|
|
|
}
|
|
|
|
outln();
|
|
|
|
}
|
|
|
|
|
2023-06-24 19:55:38 +00:00
|
|
|
static void process_line_fields(StringView line, Vector<Range> const& ranges, char delimiter, bool only_print_delimited_lines)
|
2021-11-09 02:58:05 +00:00
|
|
|
{
|
2023-06-23 22:44:56 +00:00
|
|
|
auto string_split = DeprecatedString(line).split(delimiter, SplitBehavior::KeepEmpty);
|
2023-06-24 19:31:54 +00:00
|
|
|
if (string_split.size() == 1) {
|
2023-06-24 19:55:38 +00:00
|
|
|
if (!only_print_delimited_lines)
|
|
|
|
outln("{}", line);
|
|
|
|
|
2023-06-24 19:31:54 +00:00
|
|
|
return;
|
|
|
|
}
|
2021-11-09 02:58:05 +00:00
|
|
|
|
2023-06-24 19:31:54 +00:00
|
|
|
Vector<DeprecatedString> output_fields;
|
2021-11-09 02:58:05 +00:00
|
|
|
for (auto& range : ranges) {
|
|
|
|
for (size_t i = range.m_from - 1; i < min(range.m_to, string_split.size()); i++) {
|
|
|
|
output_fields.append(string_split[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-12-04 18:02:33 +00:00
|
|
|
outln("{}", DeprecatedString::join(delimiter, output_fields));
|
2021-11-09 02:58:05 +00:00
|
|
|
}
|
|
|
|
|
2021-11-27 20:33:08 +00:00
|
|
|
ErrorOr<int> serenity_main(Main::Arguments arguments)
|
2019-08-18 15:04:05 +00:00
|
|
|
{
|
2022-12-04 18:02:33 +00:00
|
|
|
DeprecatedString byte_list = "";
|
|
|
|
DeprecatedString fields_list = "";
|
|
|
|
DeprecatedString delimiter = "\t";
|
2023-06-24 19:55:38 +00:00
|
|
|
bool only_print_delimited_lines = false;
|
2021-11-09 01:06:38 +00:00
|
|
|
|
2021-11-26 21:32:37 +00:00
|
|
|
Vector<StringView> files;
|
2019-08-18 15:04:05 +00:00
|
|
|
|
2021-11-09 00:54:31 +00:00
|
|
|
Core::ArgsParser args_parser;
|
|
|
|
args_parser.add_positional_argument(files, "file(s) to cut", "file", Core::ArgsParser::Required::No);
|
|
|
|
args_parser.add_option(byte_list, "select only these bytes", "bytes", 'b', "list");
|
2021-11-09 02:58:05 +00:00
|
|
|
args_parser.add_option(fields_list, "select only these fields", "fields", 'f', "list");
|
|
|
|
args_parser.add_option(delimiter, "set a custom delimiter", "delimiter", 'd', "delimiter");
|
2023-06-24 19:55:38 +00:00
|
|
|
args_parser.add_option(only_print_delimited_lines, "suppress lines which don't contain any field delimiter characters", "only-delimited", 's');
|
2021-11-27 20:33:08 +00:00
|
|
|
args_parser.parse(arguments);
|
2019-08-18 15:04:05 +00:00
|
|
|
|
2021-11-09 02:58:05 +00:00
|
|
|
bool selected_bytes = (byte_list != "");
|
|
|
|
bool selected_fields = (fields_list != "");
|
|
|
|
|
|
|
|
int selected_options_count = (selected_bytes ? 1 : 0) + (selected_fields ? 1 : 0);
|
|
|
|
|
|
|
|
if (selected_options_count == 0) {
|
|
|
|
warnln("cut: you must specify a list of bytes, or fields");
|
2023-02-21 11:44:41 +00:00
|
|
|
args_parser.print_usage(stderr, arguments.strings[0]);
|
2021-11-09 00:54:31 +00:00
|
|
|
return 1;
|
2019-08-18 15:04:05 +00:00
|
|
|
}
|
|
|
|
|
2021-11-09 02:58:05 +00:00
|
|
|
if (selected_options_count > 1) {
|
|
|
|
warnln("cut: you must specify only one of bytes, or fields");
|
2023-02-21 11:44:41 +00:00
|
|
|
args_parser.print_usage(stderr, arguments.strings[0]);
|
2021-11-09 02:58:05 +00:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (delimiter.length() != 1) {
|
|
|
|
warnln("cut: the delimiter must be a single character");
|
2023-02-21 11:44:41 +00:00
|
|
|
args_parser.print_usage(stderr, arguments.strings[0]);
|
2021-11-09 02:58:05 +00:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2022-12-04 18:02:33 +00:00
|
|
|
DeprecatedString ranges_list;
|
2021-11-09 02:58:05 +00:00
|
|
|
Vector<Range> ranges_vector;
|
|
|
|
|
|
|
|
if (selected_bytes) {
|
|
|
|
ranges_list = byte_list;
|
|
|
|
} else if (selected_fields) {
|
|
|
|
ranges_list = fields_list;
|
|
|
|
} else {
|
|
|
|
// This should never happen, since we already checked the options count above.
|
|
|
|
VERIFY_NOT_REACHED();
|
|
|
|
}
|
|
|
|
|
|
|
|
auto expansion_successful = expand_list(ranges_list, ranges_vector);
|
2021-11-09 00:54:31 +00:00
|
|
|
|
|
|
|
if (!expansion_successful) {
|
2023-02-21 11:44:41 +00:00
|
|
|
args_parser.print_usage(stderr, arguments.strings[0]);
|
2021-11-09 00:54:31 +00:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2021-11-09 02:58:05 +00:00
|
|
|
quick_sort(ranges_vector, [](auto& a, auto& b) { return a.m_from < b.m_from; });
|
2020-07-03 00:59:25 +00:00
|
|
|
|
2021-11-09 02:07:42 +00:00
|
|
|
Vector<Range> disjoint_ranges;
|
2021-11-09 02:58:05 +00:00
|
|
|
for (auto& range : ranges_vector) {
|
2021-11-09 02:07:42 +00:00
|
|
|
if (disjoint_ranges.is_empty()) {
|
|
|
|
disjoint_ranges.append(range);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
Range& last_range = disjoint_ranges.last();
|
|
|
|
|
|
|
|
if (!last_range.intersects(range)) {
|
|
|
|
disjoint_ranges.append(range);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
last_range.merge(range);
|
|
|
|
}
|
|
|
|
|
2020-07-03 00:59:25 +00:00
|
|
|
if (files.is_empty())
|
2023-06-07 04:29:42 +00:00
|
|
|
files.append(""sv);
|
2020-07-03 00:59:25 +00:00
|
|
|
|
2020-01-04 21:32:11 +00:00
|
|
|
/* Process each file */
|
2023-06-07 04:29:42 +00:00
|
|
|
for (auto const filename : files) {
|
|
|
|
auto maybe_file = Core::File::open_file_or_standard_stream(filename, Core::File::OpenMode::Read);
|
|
|
|
if (maybe_file.is_error()) {
|
|
|
|
warnln("cut: Could not open file '{}'", filename.is_empty() ? "stdin"sv : filename);
|
|
|
|
continue;
|
2021-11-09 02:58:05 +00:00
|
|
|
}
|
2023-06-07 04:29:42 +00:00
|
|
|
auto file = TRY(Core::InputBufferedFile::create(maybe_file.release_value()));
|
2021-11-09 02:58:05 +00:00
|
|
|
|
2023-06-07 04:29:42 +00:00
|
|
|
Array<u8, PAGE_SIZE> buffer;
|
2023-06-24 20:22:45 +00:00
|
|
|
while (TRY(file->can_read_line())) {
|
2023-06-07 04:29:42 +00:00
|
|
|
auto line = TRY(file->read_line(buffer));
|
2023-06-24 20:22:45 +00:00
|
|
|
if (line == "\n" && TRY(file->can_read_line()))
|
|
|
|
break;
|
2021-11-09 02:58:05 +00:00
|
|
|
|
|
|
|
if (selected_bytes) {
|
2023-06-07 04:29:42 +00:00
|
|
|
process_line_bytes(line, disjoint_ranges);
|
2021-11-09 02:58:05 +00:00
|
|
|
} else if (selected_fields) {
|
2023-06-24 19:55:38 +00:00
|
|
|
process_line_fields(line, disjoint_ranges, delimiter[0], only_print_delimited_lines);
|
2021-11-09 02:58:05 +00:00
|
|
|
} else {
|
|
|
|
VERIFY_NOT_REACHED();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-08-18 15:04:05 +00:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|