Explorar el Código

Utilities: Make uniq behavior consistent with coreutils

The main difference was that our implementation was writing
the final line of a series of repeated lines, whereas the
spec says "The second and succeeding copies of repeated adjacent
input lines shall not be written."

Additionally, there was a mistake in the -f flag implementation
causing the number of fields skipped to be one greater than
required.
dgaston hace 1 año
padre
commit
342b358341
Se han modificado 2 ficheros con 54 adiciones y 12 borrados
  1. 38 0
      Tests/Utilities/TestUniq.cpp
  2. 16 12
      Userland/Utilities/uniq.cpp

+ 38 - 0
Tests/Utilities/TestUniq.cpp

@@ -51,12 +51,50 @@ TEST_CASE(long_line)
     run_uniq({}, StringView { input }, StringView { expected_output });
 }
 
+TEST_CASE(line_longer_than_internal_stream_buffer)
+{
+    auto input = Array<u8, 131072> {};
+    auto expected_output = Array<u8, 65536> {};
+    // Create two lines of 65535 A's and a newline.
+    input.fill('A');
+    input[65535] = '\n';
+    input[131071] = '\n';
+
+    expected_output.fill('A');
+    expected_output[65535] = '\n';
+
+    run_uniq({}, StringView { input }, StringView { expected_output });
+}
+
+TEST_CASE(ignore_case_flag)
+{
+    run_uniq({ "-i" }, "AAA\nAaA\n"sv, "AAA\n"sv);
+    run_uniq({ "-i" }, "AAA\naaa\nAaA\n"sv, "AAA\n"sv);
+}
+
 TEST_CASE(duplicate_flag)
 {
     run_uniq({ "-d" }, "AAA\nAAA\nBBB\n"sv, "AAA\n"sv);
+    run_uniq({ "-d" }, "AAA\nAAA\nBBB\nBBB\nCCC\n"sv, "AAA\nBBB\n"sv);
+}
+
+TEST_CASE(skip_chars_flag)
+{
+    run_uniq({ "-s1" }, "AAA\nAaA\n"sv, "AAA\nAaA\n"sv);
+    run_uniq({ "-s2" }, "AAA\nAaA\n"sv, "AAA\n"sv);
+    run_uniq({ "-s200" }, "AAA\nAaA\n"sv, "AAA\n"sv);
+}
+
+TEST_CASE(skip_fields_flag)
+{
+    run_uniq({ "-f1" }, "1 AA\n2 AA\n"sv, "1 AA\n"sv);
+    run_uniq({ "-f1" }, "1 a AA\n2 b AA\n"sv, "1 a AA\n2 b AA\n"sv);
+    run_uniq({ "-f2" }, "1 a AA\n2 b AA\n"sv, "1 a AA\n"sv);
+    run_uniq({ "-f200" }, "1 AA\n2 AA\n"sv, "1 AA\n"sv);
 }
 
 TEST_CASE(count_flag)
 {
     run_uniq({ "-c" }, "AAA\nAAA\n"sv, "2 AAA\n"sv);
+    run_uniq({ "-c" }, "AAA\nAAA\nBBB\n"sv, "2 AAA\n1 BBB\n"sv);
 }

+ 16 - 12
Userland/Utilities/uniq.cpp

@@ -37,7 +37,7 @@ static StringView skip(StringView line, unsigned char_skip_count, unsigned field
             if (is_ascii_space(c)) {
                 in_field = false;
                 field_index = i;
-                if (++current_field > field_skip_count)
+                if (++current_field >= field_skip_count)
                     break;
             } else if (!in_field) {
                 in_field = true;
@@ -83,8 +83,8 @@ ErrorOr<int> serenity_main(Main::Arguments arguments)
     auto infile = TRY(Core::InputBufferedFile::create(TRY(Core::File::open_file_or_standard_stream(inpath, Core::File::OpenMode::Read))));
     auto outfile = TRY(Core::File::open_file_or_standard_stream(outpath, Core::File::OpenMode::Write));
 
-    // The count starts at 1 since each line will appear at least once.
-    // Otherwise the -d and -c flags do not work as expected.
+    // The count starts at 1 since each line appears at least once.
+    // Otherwise the -d and -c flags are off by one.
     size_t count = 1;
     ByteBuffer previous_buf = TRY(ByteBuffer::create_uninitialized(1024));
     ByteBuffer current_buf = TRY(ByteBuffer::create_uninitialized(1024));
@@ -92,28 +92,32 @@ ErrorOr<int> serenity_main(Main::Arguments arguments)
     StringView previous = TRY(infile->read_line_with_resize(previous_buf));
     StringView previous_to_compare = skip(previous, skip_chars, skip_fields);
 
-    while (TRY(infile->can_read_line())) {
-
+    while (!infile->is_eof()) {
         StringView current = TRY(infile->read_line_with_resize(current_buf));
 
         StringView current_to_compare = skip(current, skip_chars, skip_fields);
         bool lines_equal = ignore_case ? current_to_compare.equals_ignoring_ascii_case(previous_to_compare) : current_to_compare == previous_to_compare;
-        if (!lines_equal) {
-            TRY(write_line_content(previous, count, duplicates_only, print_count, *outfile));
-            count = 1;
-        } else {
+
+        while (lines_equal && current.length() > 0) {
+            // The docs say "The second and succeeding copies of repeated adjacent input
+            // lines shall not be written", therefore  keep reading lines while they match previous.
+            // See https://pubs.opengroup.org/onlinepubs/9699919799/utilities/uniq.html
+            current = TRY(infile->read_line_with_resize(current_buf));
+            current_to_compare = skip(current, skip_chars, skip_fields);
+            lines_equal = ignore_case ? current_to_compare.equals_ignoring_ascii_case(previous_to_compare) : current_to_compare == previous_to_compare;
             count++;
         }
 
+        TRY(write_line_content(previous, count, duplicates_only, print_count, *outfile));
+        count = 1;
+
         swap(current_buf, previous_buf);
-        // The StringViews cannot be swapped since read_line_with_resize
+        // The StringViews can't be swapped since read_line_with_resize
         // potentially changes the location of the buffers due to reallocation.
         // Instead create a new StringView of what was most recently read in.
         previous = StringView { previous_buf.span().trim(current.length()) };
         previous_to_compare = skip(previous, skip_chars, skip_fields);
     }
 
-    TRY(write_line_content(previous, count, duplicates_only, print_count, *outfile));
-
     return 0;
 }