RegexMatcher.h 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287
  1. /*
  2. * Copyright (c) 2020, Emanuel Sprung <emanuel.sprung@gmail.com>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #pragma once
  7. #include "RegexByteCode.h"
  8. #include "RegexMatch.h"
  9. #include "RegexOptions.h"
  10. #include "RegexParser.h"
  11. #include <AK/Forward.h>
  12. #include <AK/GenericLexer.h>
  13. #include <AK/HashMap.h>
  14. #include <AK/NonnullOwnPtrVector.h>
  15. #include <AK/Types.h>
  16. #include <AK/Utf32View.h>
  17. #include <AK/Vector.h>
  18. #include <ctype.h>
  19. #include <stdio.h>
  20. namespace regex {
  21. static constexpr const size_t c_max_recursion = 5000;
  22. static constexpr const size_t c_match_preallocation_count = 0;
  23. struct RegexResult final {
  24. bool success { false };
  25. size_t count { 0 };
  26. Vector<Match> matches;
  27. Vector<Vector<Match>> capture_group_matches;
  28. size_t n_operations { 0 };
  29. size_t n_capture_groups { 0 };
  30. size_t n_named_capture_groups { 0 };
  31. };
  32. template<class Parser>
  33. class Regex;
  34. template<class Parser>
  35. class Matcher final {
  36. public:
  37. Matcher(Regex<Parser> const* pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
  38. : m_pattern(pattern)
  39. , m_regex_options(regex_options.value_or({}))
  40. {
  41. }
  42. ~Matcher() = default;
  43. RegexResult match(RegexStringView const&, Optional<typename ParserTraits<Parser>::OptionsType> = {}) const;
  44. RegexResult match(Vector<RegexStringView> const&, Optional<typename ParserTraits<Parser>::OptionsType> = {}) const;
  45. typename ParserTraits<Parser>::OptionsType options() const
  46. {
  47. return m_regex_options;
  48. }
  49. void reset_pattern(Badge<Regex<Parser>>, Regex<Parser> const* pattern)
  50. {
  51. m_pattern = pattern;
  52. }
  53. private:
  54. Optional<bool> execute(MatchInput const& input, MatchState& state, size_t& operations) const;
  55. Regex<Parser> const* m_pattern;
  56. typename ParserTraits<Parser>::OptionsType const m_regex_options;
  57. };
  58. template<class Parser>
  59. class Regex final {
  60. public:
  61. String pattern_value;
  62. regex::Parser::Result parser_result;
  63. OwnPtr<Matcher<Parser>> matcher { nullptr };
  64. mutable size_t start_offset { 0 };
  65. static regex::Parser::Result parse_pattern(StringView pattern, typename ParserTraits<Parser>::OptionsType regex_options = {});
  66. explicit Regex(String pattern, typename ParserTraits<Parser>::OptionsType regex_options = {});
  67. Regex(regex::Parser::Result parse_result, String pattern, typename ParserTraits<Parser>::OptionsType regex_options = {});
  68. ~Regex() = default;
  69. Regex(Regex&&);
  70. Regex& operator=(Regex&&);
  71. typename ParserTraits<Parser>::OptionsType options() const;
  72. void print_bytecode(FILE* f = stdout) const;
  73. String error_string(Optional<String> message = {}) const;
  74. RegexResult match(RegexStringView const view, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
  75. {
  76. if (!matcher || parser_result.error != Error::NoError)
  77. return {};
  78. return matcher->match(view, regex_options);
  79. }
  80. RegexResult match(Vector<RegexStringView> const views, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
  81. {
  82. if (!matcher || parser_result.error != Error::NoError)
  83. return {};
  84. return matcher->match(views, regex_options);
  85. }
  86. String replace(RegexStringView const view, StringView const& replacement_pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
  87. {
  88. if (!matcher || parser_result.error != Error::NoError)
  89. return {};
  90. StringBuilder builder;
  91. size_t start_offset = 0;
  92. RegexResult result = matcher->match(view, regex_options);
  93. if (!result.success)
  94. return view.to_string();
  95. for (size_t i = 0; i < result.matches.size(); ++i) {
  96. auto& match = result.matches[i];
  97. builder.append(view.substring_view(start_offset, match.global_offset - start_offset).to_string());
  98. start_offset = match.global_offset + match.view.length();
  99. GenericLexer lexer(replacement_pattern);
  100. while (!lexer.is_eof()) {
  101. if (lexer.consume_specific('\\')) {
  102. if (lexer.consume_specific('\\')) {
  103. builder.append('\\');
  104. continue;
  105. }
  106. auto number = lexer.consume_while(isdigit);
  107. if (auto index = number.to_uint(); index.has_value() && result.n_capture_groups >= index.value()) {
  108. builder.append(result.capture_group_matches[i][index.value() - 1].view.to_string());
  109. } else {
  110. builder.appendff("\\{}", number);
  111. }
  112. } else {
  113. builder.append(lexer.consume_while([](auto ch) { return ch != '\\'; }));
  114. }
  115. }
  116. }
  117. builder.append(view.substring_view(start_offset, view.length() - start_offset).to_string());
  118. return builder.to_string();
  119. }
  120. // FIXME: replace(Vector<RegexStringView> const , ...)
  121. RegexResult search(RegexStringView const view, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
  122. {
  123. if (!matcher || parser_result.error != Error::NoError)
  124. return {};
  125. AllOptions options = (AllOptions)regex_options.value_or({});
  126. if ((options & AllFlags::MatchNotBeginOfLine) && (options & AllFlags::MatchNotEndOfLine)) {
  127. options.reset_flag(AllFlags::MatchNotEndOfLine);
  128. options.reset_flag(AllFlags::MatchNotBeginOfLine);
  129. }
  130. options.reset_flag(AllFlags::Internal_Stateful);
  131. options |= AllFlags::Global;
  132. return matcher->match(view, options);
  133. }
  134. RegexResult search(Vector<RegexStringView> const views, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
  135. {
  136. if (!matcher || parser_result.error != Error::NoError)
  137. return {};
  138. AllOptions options = (AllOptions)regex_options.value_or({});
  139. if ((options & AllFlags::MatchNotBeginOfLine) && (options & AllFlags::MatchNotEndOfLine)) {
  140. options.reset_flag(AllFlags::MatchNotEndOfLine);
  141. options.reset_flag(AllFlags::MatchNotBeginOfLine);
  142. }
  143. options.reset_flag(AllFlags::Internal_Stateful);
  144. options |= AllFlags::Global;
  145. return matcher->match(views, options);
  146. }
  147. bool match(RegexStringView const view, RegexResult& m, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
  148. {
  149. m = match(view, regex_options);
  150. return m.success;
  151. }
  152. bool match(Vector<RegexStringView> const views, RegexResult& m, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
  153. {
  154. m = match(views, regex_options);
  155. return m.success;
  156. }
  157. bool search(RegexStringView const view, RegexResult& m, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
  158. {
  159. m = search(view, regex_options);
  160. return m.success;
  161. }
  162. bool search(Vector<RegexStringView> const views, RegexResult& m, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
  163. {
  164. m = search(views, regex_options);
  165. return m.success;
  166. }
  167. bool has_match(RegexStringView const view, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
  168. {
  169. if (!matcher || parser_result.error != Error::NoError)
  170. return false;
  171. RegexResult result = matcher->match(view, AllOptions { regex_options.value_or({}) } | AllFlags::SkipSubExprResults);
  172. return result.success;
  173. }
  174. bool has_match(Vector<RegexStringView> const views, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
  175. {
  176. if (!matcher || parser_result.error != Error::NoError)
  177. return false;
  178. RegexResult result = matcher->match(views, AllOptions { regex_options.value_or({}) } | AllFlags::SkipSubExprResults);
  179. return result.success;
  180. }
  181. };
  182. // free standing functions for match, search and has_match
  183. template<class Parser>
  184. RegexResult match(RegexStringView const view, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
  185. {
  186. return pattern.match(view, regex_options);
  187. }
  188. template<class Parser>
  189. RegexResult match(Vector<RegexStringView> const view, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
  190. {
  191. return pattern.match(view, regex_options);
  192. }
  193. template<class Parser>
  194. bool match(RegexStringView const view, Regex<Parser>& pattern, RegexResult&, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
  195. {
  196. return pattern.match(view, regex_options);
  197. }
  198. template<class Parser>
  199. bool match(Vector<RegexStringView> const view, Regex<Parser>& pattern, RegexResult&, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
  200. {
  201. return pattern.match(view, regex_options);
  202. }
  203. template<class Parser>
  204. RegexResult search(RegexStringView const view, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
  205. {
  206. return pattern.search(view, regex_options);
  207. }
  208. template<class Parser>
  209. RegexResult search(Vector<RegexStringView> const views, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
  210. {
  211. return pattern.search(views, regex_options);
  212. }
  213. template<class Parser>
  214. bool search(RegexStringView const view, Regex<Parser>& pattern, RegexResult&, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
  215. {
  216. return pattern.search(view, regex_options);
  217. }
  218. template<class Parser>
  219. bool search(Vector<RegexStringView> const views, Regex<Parser>& pattern, RegexResult&, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
  220. {
  221. return pattern.search(views, regex_options);
  222. }
  223. template<class Parser>
  224. bool has_match(RegexStringView const view, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
  225. {
  226. return pattern.has_match(view, regex_options);
  227. }
  228. template<class Parser>
  229. bool has_match(Vector<RegexStringView> const views, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
  230. {
  231. return pattern.has_match(views, regex_options);
  232. }
  233. }
  234. using regex::has_match;
  235. using regex::match;
  236. using regex::Regex;
  237. using regex::RegexResult;