grep.cpp 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216
  1. /*
  2. * Copyright (c) 2020, Emanuel Sprung <emanuel.sprung@gmail.com>
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. *
  8. * 1. Redistributions of source code must retain the above copyright notice, this
  9. * list of conditions and the following disclaimer.
  10. *
  11. * 2. Redistributions in binary form must reproduce the above copyright notice,
  12. * this list of conditions and the following disclaimer in the documentation
  13. * and/or other materials provided with the distribution.
  14. *
  15. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  16. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  18. * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  19. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  20. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  21. * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  22. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  23. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  24. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. #include <AK/ByteBuffer.h>
  27. #include <AK/String.h>
  28. #include <AK/Utf8View.h>
  29. #include <AK/Vector.h>
  30. #include <LibCore/ArgsParser.h>
  31. #include <LibCore/DirIterator.h>
  32. #include <LibCore/File.h>
  33. #include <LibRegex/Regex.h>
  34. #include <stdio.h>
  35. #include <unistd.h>
  36. enum class BinaryFileMode {
  37. Binary,
  38. Text,
  39. Skip,
  40. };
  41. template<typename... Ts>
  42. void fail(StringView format, Ts... args)
  43. {
  44. fprintf(stderr, "\x1b[31m");
  45. warnln(format, forward<Ts>(args)...);
  46. fprintf(stderr, "\x1b[0m");
  47. abort();
  48. }
  49. int main(int argc, char** argv)
  50. {
  51. if (pledge("stdio rpath", nullptr) < 0) {
  52. perror("pledge");
  53. return 1;
  54. }
  55. Vector<const char*> files;
  56. bool recursive { false };
  57. bool use_ere { true };
  58. const char* pattern = nullptr;
  59. BinaryFileMode binary_mode { BinaryFileMode::Binary };
  60. bool case_insensitive = false;
  61. Core::ArgsParser args_parser;
  62. args_parser.add_option(recursive, "Recursively scan files starting in working directory", "recursive", 'r');
  63. args_parser.add_option(use_ere, "Extended regular expressions (default)", "extended-regexp", 'E');
  64. args_parser.add_option(pattern, "Pattern", "regexp", 'e', "Pattern");
  65. args_parser.add_option(case_insensitive, "Make matches case-insensitive", nullptr, 'i');
  66. args_parser.add_option(Core::ArgsParser::Option {
  67. .requires_argument = true,
  68. .help_string = "Action to take for binary files ([binary], text, skip)",
  69. .long_name = "binary-mode",
  70. .accept_value = [&](auto* str) {
  71. if (StringView { "text" } == str)
  72. binary_mode = BinaryFileMode::Text;
  73. else if (StringView { "binary" } == str)
  74. binary_mode = BinaryFileMode::Binary;
  75. else if (StringView { "skip" } == str)
  76. binary_mode = BinaryFileMode::Skip;
  77. else
  78. return false;
  79. return true;
  80. },
  81. });
  82. args_parser.add_option(Core::ArgsParser::Option {
  83. .requires_argument = false,
  84. .help_string = "Treat binary files as text (same as --binary-mode text)",
  85. .long_name = "text",
  86. .short_name = 'a',
  87. .accept_value = [&](auto) {
  88. binary_mode = BinaryFileMode::Text;
  89. return true;
  90. },
  91. });
  92. args_parser.add_option(Core::ArgsParser::Option {
  93. .requires_argument = false,
  94. .help_string = "Ignore binary files (same as --binary-mode skip)",
  95. .long_name = nullptr,
  96. .short_name = 'I',
  97. .accept_value = [&](auto) {
  98. binary_mode = BinaryFileMode::Skip;
  99. return true;
  100. },
  101. });
  102. args_parser.add_positional_argument(files, "File(s) to process", "file", Core::ArgsParser::Required::No);
  103. args_parser.parse(argc, argv);
  104. if (!use_ere)
  105. return 0;
  106. // mock grep behaviour: if -e is omitted, use first positional argument as pattern
  107. if (pattern == nullptr && files.size())
  108. pattern = files.take_first();
  109. PosixOptions options {};
  110. if (case_insensitive)
  111. options |= PosixFlags::Insensitive;
  112. Regex<PosixExtended> re(pattern, options);
  113. if (re.parser_result.error != Error::NoError) {
  114. return 1;
  115. }
  116. auto matches = [&](StringView str, StringView filename = "", bool print_filename = false, bool is_binary = false) {
  117. size_t last_printed_char_pos { 0 };
  118. if (is_binary && binary_mode == BinaryFileMode::Skip)
  119. return false;
  120. auto result = re.match(str, PosixFlags::Global);
  121. if (result.success) {
  122. if (is_binary && binary_mode == BinaryFileMode::Binary) {
  123. outln("binary file \x1B[34m{}\x1B[0m matches", filename);
  124. } else {
  125. if (result.matches.size() && print_filename) {
  126. out("\x1B[34m{}:\x1B[0m", filename);
  127. }
  128. for (auto& match : result.matches) {
  129. out("{}\x1B[32m{}\x1B[0m",
  130. StringView(&str[last_printed_char_pos], match.global_offset - last_printed_char_pos),
  131. match.view.to_string());
  132. last_printed_char_pos = match.global_offset + match.view.length();
  133. }
  134. out("{}", StringView(&str[last_printed_char_pos], str.length() - last_printed_char_pos));
  135. }
  136. return true;
  137. }
  138. return false;
  139. };
  140. auto handle_file = [&matches, binary_mode](StringView filename, bool print_filename) -> bool {
  141. auto file = Core::File::construct(filename);
  142. if (!file->open(Core::IODevice::ReadOnly)) {
  143. warnln("Failed to open {}: {}", filename, file->error_string());
  144. return false;
  145. }
  146. while (file->can_read_line()) {
  147. auto line = file->read_line(1024);
  148. auto is_binary = memchr(line.data(), 0, line.size()) != nullptr;
  149. StringView str { reinterpret_cast<const char*>(line.data()), line.size() };
  150. if (matches(str, filename, print_filename, is_binary) && is_binary && binary_mode == BinaryFileMode::Binary)
  151. return true;
  152. }
  153. return true;
  154. };
  155. auto add_directory = [&handle_file](String base, Optional<String> recursive, auto handle_directory) -> void {
  156. Core::DirIterator it(recursive.value_or(base), Core::DirIterator::Flags::SkipDots);
  157. while (it.has_next()) {
  158. auto path = it.next_full_path();
  159. if (!Core::File::is_directory(path)) {
  160. auto key = path.substring_view(base.length() + 1, path.length() - base.length() - 1);
  161. handle_file(key, true);
  162. } else {
  163. handle_directory(base, path, handle_directory);
  164. }
  165. }
  166. };
  167. if (!files.size() && !recursive) {
  168. auto stdin_file = Core::File::stdin();
  169. for (;;) {
  170. auto line = stdin_file->read_line(4096);
  171. StringView str { line.data(), line.size() };
  172. bool is_binary = str.bytes().contains_slow(0);
  173. if (is_binary && binary_mode == BinaryFileMode::Skip)
  174. return 1;
  175. if (matches(str, "stdin", false, is_binary) && is_binary && binary_mode == BinaryFileMode::Binary)
  176. return 0;
  177. }
  178. } else {
  179. if (recursive) {
  180. add_directory(".", {}, add_directory);
  181. } else {
  182. bool print_filename { files.size() > 1 };
  183. for (auto& filename : files) {
  184. if (!handle_file(filename, print_filename))
  185. return 1;
  186. }
  187. }
  188. }
  189. return 0;
  190. }