uniq.cpp 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. /*
  2. * Copyright (c) 2020, Matthew L. Curry <matthew.curry@gmail.com>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/CharacterTypes.h>
  7. #include <AK/RefPtr.h>
  8. #include <AK/StringView.h>
  9. #include <LibCore/ArgsParser.h>
  10. #include <LibCore/File.h>
  11. #include <LibCore/System.h>
  12. #include <unistd.h>
  13. static ErrorOr<void> write_line_content(StringView line, size_t count, bool duplicates_only, bool print_count, Core::File& outfile)
  14. {
  15. if (duplicates_only && count <= 1)
  16. return {};
  17. if (print_count)
  18. TRY(outfile.write_until_depleted(ByteString::formatted("{} {}\n", count, line)));
  19. else
  20. TRY(outfile.write_until_depleted(ByteString::formatted("{}\n", line)));
  21. return {};
  22. }
  23. static StringView skip(StringView line, unsigned char_skip_count, unsigned field_skip_count)
  24. {
  25. line = line.trim("\n"sv);
  26. if (field_skip_count) {
  27. bool in_field = false;
  28. int field_index = 0;
  29. unsigned current_field = 0;
  30. for (size_t i = 0; i < line.length(); i++) {
  31. char c = line[i];
  32. if (is_ascii_space(c)) {
  33. in_field = false;
  34. field_index = i;
  35. if (++current_field > field_skip_count)
  36. break;
  37. } else if (!in_field) {
  38. in_field = true;
  39. }
  40. }
  41. line = line.substring_view(field_index);
  42. }
  43. char_skip_count = min(char_skip_count, line.length());
  44. return line.substring_view(char_skip_count);
  45. }
  46. ErrorOr<int> serenity_main(Main::Arguments arguments)
  47. {
  48. TRY(Core::System::pledge("stdio rpath wpath cpath"));
  49. StringView inpath;
  50. StringView outpath;
  51. bool duplicates_only = false;
  52. bool unique_only = false;
  53. bool ignore_case = false;
  54. bool print_count = false;
  55. unsigned skip_chars = 0;
  56. unsigned skip_fields = 0;
  57. Core::ArgsParser args_parser;
  58. args_parser.add_option(duplicates_only, "Only print duplicated lines", "repeated", 'd');
  59. args_parser.add_option(unique_only, "Only print unique lines (default)", "unique", 'u');
  60. args_parser.add_option(ignore_case, "Ignore case when comparing lines", "ignore-case", 'i');
  61. args_parser.add_option(print_count, "Prefix each line by its number of occurrences", "count", 'c');
  62. args_parser.add_option(skip_chars, "Skip N chars", "skip-chars", 's', "N");
  63. args_parser.add_option(skip_fields, "Skip N fields", "skip-fields", 'f', "N");
  64. args_parser.add_positional_argument(inpath, "Input file", "input", Core::ArgsParser::Required::No);
  65. args_parser.add_positional_argument(outpath, "Output file", "output", Core::ArgsParser::Required::No);
  66. args_parser.parse(arguments);
  67. if (!unique_only && !duplicates_only) {
  68. unique_only = true;
  69. } else if (unique_only && duplicates_only) {
  70. // Printing duplicated and unique lines shouldn't print anything
  71. return 0;
  72. }
  73. auto infile = TRY(Core::InputBufferedFile::create(TRY(Core::File::open_file_or_standard_stream(inpath, Core::File::OpenMode::Read))));
  74. auto outfile = TRY(Core::File::open_file_or_standard_stream(outpath, Core::File::OpenMode::Write));
  75. size_t count = 0;
  76. ByteBuffer previous_buf = TRY(ByteBuffer::create_uninitialized(1024));
  77. ByteBuffer current_buf = TRY(ByteBuffer::create_uninitialized(1024));
  78. StringView previous = TRY(infile->read_line(previous_buf));
  79. StringView previous_to_compare = skip(previous, skip_chars, skip_fields);
  80. while (TRY(infile->can_read_line())) {
  81. // FIXME: The buffer does not automatically resize,
  82. // and this will return EMSGSIZE if the read line
  83. // is more than 1024 bytes.
  84. StringView current = TRY(infile->read_line(current_buf));
  85. StringView current_to_compare = skip(current, skip_chars, skip_fields);
  86. bool lines_equal = ignore_case ? current_to_compare.equals_ignoring_ascii_case(previous_to_compare) : current_to_compare == previous_to_compare;
  87. if (!lines_equal) {
  88. TRY(write_line_content(previous, count, duplicates_only, print_count, *outfile));
  89. count = 1;
  90. } else {
  91. count++;
  92. }
  93. swap(current_to_compare, previous_to_compare);
  94. swap(current_buf, previous_buf);
  95. swap(current, previous);
  96. }
  97. TRY(write_line_content(previous, count, duplicates_only, print_count, *outfile));
  98. return 0;
  99. }