cut.cpp 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264
  1. /*
  2. * Copyright (c) 2019-2020, Marios Prokopakis <mariosprokopakis@gmail.com>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/DeprecatedString.h>
  7. #include <AK/QuickSort.h>
  8. #include <AK/StdLibExtras.h>
  9. #include <AK/Vector.h>
  10. #include <LibCore/ArgsParser.h>
  11. #include <LibCore/File.h>
  12. #include <LibMain/Main.h>
  13. #include <stdio.h>
  14. #include <stdlib.h>
  15. struct Range {
  16. size_t m_from { 1 };
  17. size_t m_to { SIZE_MAX };
  18. [[nodiscard]] bool intersects(Range const& other) const
  19. {
  20. return !(other.m_from > m_to || other.m_to < m_from);
  21. }
  22. void merge(Range const& other)
  23. {
  24. // Can't merge two ranges that are disjoint.
  25. VERIFY(intersects(other));
  26. m_from = min(m_from, other.m_from);
  27. m_to = max(m_to, other.m_to);
  28. }
  29. };
  30. static bool expand_list(DeprecatedString& list, Vector<Range>& ranges)
  31. {
  32. Vector<DeprecatedString> tokens = list.split(',', SplitBehavior::KeepEmpty);
  33. for (auto& token : tokens) {
  34. if (token.length() == 0) {
  35. warnln("cut: byte/character positions are numbered from 1");
  36. return false;
  37. }
  38. if (token == "-") {
  39. warnln("cut: invalid range with no endpoint: {}", token);
  40. return false;
  41. }
  42. if (token[0] == '-') {
  43. auto index = token.substring(1, token.length() - 1).to_uint();
  44. if (!index.has_value()) {
  45. warnln("cut: invalid byte/character position '{}'", token);
  46. return false;
  47. }
  48. if (index.value() == 0) {
  49. warnln("cut: byte/character positions are numbered from 1");
  50. return false;
  51. }
  52. ranges.append({ 1, index.value() });
  53. } else if (token[token.length() - 1] == '-') {
  54. auto index = token.substring(0, token.length() - 1).to_uint();
  55. if (!index.has_value()) {
  56. warnln("cut: invalid byte/character position '{}'", token);
  57. return false;
  58. }
  59. if (index.value() == 0) {
  60. warnln("cut: byte/character positions are numbered from 1");
  61. return false;
  62. }
  63. ranges.append({ index.value(), SIZE_MAX });
  64. } else {
  65. auto range = token.split('-', SplitBehavior::KeepEmpty);
  66. if (range.size() == 2) {
  67. auto index1 = range[0].to_uint();
  68. if (!index1.has_value()) {
  69. warnln("cut: invalid byte/character position '{}'", range[0]);
  70. return false;
  71. }
  72. auto index2 = range[1].to_uint();
  73. if (!index2.has_value()) {
  74. warnln("cut: invalid byte/character position '{}'", range[1]);
  75. return false;
  76. }
  77. if (index1.value() > index2.value()) {
  78. warnln("cut: invalid decreasing range");
  79. return false;
  80. } else if (index1.value() == 0 || index2.value() == 0) {
  81. warnln("cut: byte/character positions are numbered from 1");
  82. return false;
  83. }
  84. ranges.append({ index1.value(), index2.value() });
  85. } else if (range.size() == 1) {
  86. auto index = range[0].to_uint();
  87. if (!index.has_value()) {
  88. warnln("cut: invalid byte/character position '{}'", range[0]);
  89. return false;
  90. }
  91. if (index.value() == 0) {
  92. warnln("cut: byte/character positions are numbered from 1");
  93. return false;
  94. }
  95. ranges.append({ index.value(), index.value() });
  96. } else {
  97. warnln("cut: invalid byte or character range");
  98. return false;
  99. }
  100. }
  101. }
  102. return true;
  103. }
  104. static void process_line_bytes(StringView line, Vector<Range> const& ranges)
  105. {
  106. for (auto& i : ranges) {
  107. if (i.m_from >= line.length())
  108. continue;
  109. auto to = min(i.m_to, line.length());
  110. auto sub_string = DeprecatedString(line).substring(i.m_from - 1, to - i.m_from + 1);
  111. out("{}", sub_string);
  112. }
  113. outln();
  114. }
  115. static void process_line_fields(StringView line, Vector<Range> const& ranges, char delimiter, bool only_print_delimited_lines)
  116. {
  117. auto string_split = DeprecatedString(line).split(delimiter, SplitBehavior::KeepEmpty);
  118. if (string_split.size() == 1) {
  119. if (!only_print_delimited_lines)
  120. outln("{}", line);
  121. return;
  122. }
  123. Vector<DeprecatedString> output_fields;
  124. for (auto& range : ranges) {
  125. for (size_t i = range.m_from - 1; i < min(range.m_to, string_split.size()); i++) {
  126. output_fields.append(string_split[i]);
  127. }
  128. }
  129. outln("{}", DeprecatedString::join(delimiter, output_fields));
  130. }
  131. ErrorOr<int> serenity_main(Main::Arguments arguments)
  132. {
  133. DeprecatedString byte_list = "";
  134. DeprecatedString fields_list = "";
  135. DeprecatedString delimiter = "\t";
  136. bool only_print_delimited_lines = false;
  137. Vector<StringView> files;
  138. Core::ArgsParser args_parser;
  139. args_parser.add_positional_argument(files, "file(s) to cut", "file", Core::ArgsParser::Required::No);
  140. args_parser.add_option(byte_list, "select only these bytes", "bytes", 'b', "list");
  141. args_parser.add_option(fields_list, "select only these fields", "fields", 'f', "list");
  142. args_parser.add_option(delimiter, "set a custom delimiter", "delimiter", 'd', "delimiter");
  143. args_parser.add_option(only_print_delimited_lines, "suppress lines which don't contain any field delimiter characters", "only-delimited", 's');
  144. args_parser.parse(arguments);
  145. bool selected_bytes = (byte_list != "");
  146. bool selected_fields = (fields_list != "");
  147. int selected_options_count = (selected_bytes ? 1 : 0) + (selected_fields ? 1 : 0);
  148. if (selected_options_count == 0) {
  149. warnln("cut: you must specify a list of bytes, or fields");
  150. args_parser.print_usage(stderr, arguments.strings[0]);
  151. return 1;
  152. }
  153. if (selected_options_count > 1) {
  154. warnln("cut: you must specify only one of bytes, or fields");
  155. args_parser.print_usage(stderr, arguments.strings[0]);
  156. return 1;
  157. }
  158. if (delimiter.length() != 1) {
  159. warnln("cut: the delimiter must be a single character");
  160. args_parser.print_usage(stderr, arguments.strings[0]);
  161. return 1;
  162. }
  163. DeprecatedString ranges_list;
  164. Vector<Range> ranges_vector;
  165. if (selected_bytes) {
  166. ranges_list = byte_list;
  167. } else if (selected_fields) {
  168. ranges_list = fields_list;
  169. } else {
  170. // This should never happen, since we already checked the options count above.
  171. VERIFY_NOT_REACHED();
  172. }
  173. auto expansion_successful = expand_list(ranges_list, ranges_vector);
  174. if (!expansion_successful) {
  175. args_parser.print_usage(stderr, arguments.strings[0]);
  176. return 1;
  177. }
  178. quick_sort(ranges_vector, [](auto& a, auto& b) { return a.m_from < b.m_from; });
  179. Vector<Range> disjoint_ranges;
  180. for (auto& range : ranges_vector) {
  181. if (disjoint_ranges.is_empty()) {
  182. disjoint_ranges.append(range);
  183. continue;
  184. }
  185. Range& last_range = disjoint_ranges.last();
  186. if (!last_range.intersects(range)) {
  187. disjoint_ranges.append(range);
  188. continue;
  189. }
  190. last_range.merge(range);
  191. }
  192. if (files.is_empty())
  193. files.append(""sv);
  194. /* Process each file */
  195. for (auto const filename : files) {
  196. auto maybe_file = Core::File::open_file_or_standard_stream(filename, Core::File::OpenMode::Read);
  197. if (maybe_file.is_error()) {
  198. warnln("cut: Could not open file '{}'", filename.is_empty() ? "stdin"sv : filename);
  199. continue;
  200. }
  201. auto file = TRY(Core::InputBufferedFile::create(maybe_file.release_value()));
  202. Array<u8, PAGE_SIZE> buffer;
  203. while (TRY(file->can_read_line())) {
  204. auto line = TRY(file->read_line(buffer));
  205. if (line == "\n" && TRY(file->can_read_line()))
  206. break;
  207. if (selected_bytes) {
  208. process_line_bytes(line, disjoint_ranges);
  209. } else if (selected_fields) {
  210. process_line_fields(line, disjoint_ranges, delimiter[0], only_print_delimited_lines);
  211. } else {
  212. VERIFY_NOT_REACHED();
  213. }
  214. }
  215. }
  216. return 0;
  217. }