Regex.cpp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597
  1. /*
  2. * Copyright (c) 2020, Emanuel Sprung <emanuel.sprung@gmail.com>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <LibTest/TestCase.h> // import first, to prevent warning of VERIFY* redefinition
  7. #include <AK/StringBuilder.h>
  8. #include <LibRegex/Regex.h>
  9. #include <LibRegex/RegexDebug.h>
  10. #include <stdio.h>
  11. static ECMAScriptOptions match_test_api_options(const ECMAScriptOptions options)
  12. {
  13. return options;
  14. }
  15. static PosixOptions match_test_api_options(const PosixOptions options)
  16. {
  17. return options;
  18. }
  19. TEST_CASE(regex_options_ecmascript)
  20. {
  21. ECMAScriptOptions eo;
  22. eo |= ECMAScriptFlags::Global;
  23. EXPECT(eo & ECMAScriptFlags::Global);
  24. EXPECT(!(eo & ECMAScriptFlags::Insensitive));
  25. eo = match_test_api_options(ECMAScriptFlags::Global | ECMAScriptFlags::Insensitive | ECMAScriptFlags::Sticky);
  26. EXPECT(eo & ECMAScriptFlags::Global);
  27. EXPECT(eo & ECMAScriptFlags::Insensitive);
  28. EXPECT(eo & ECMAScriptFlags::Sticky);
  29. EXPECT(!(eo & ECMAScriptFlags::Unicode));
  30. EXPECT(!(eo & ECMAScriptFlags::Multiline));
  31. EXPECT(!(eo & ECMAScriptFlags::SingleLine));
  32. eo &= ECMAScriptFlags::Insensitive;
  33. EXPECT(!(eo & ECMAScriptFlags::Global));
  34. EXPECT(eo & ECMAScriptFlags::Insensitive);
  35. EXPECT(!(eo & ECMAScriptFlags::Multiline));
  36. eo &= ECMAScriptFlags::Sticky;
  37. EXPECT(!(eo & ECMAScriptFlags::Global));
  38. EXPECT(!(eo & ECMAScriptFlags::Insensitive));
  39. EXPECT(!(eo & ECMAScriptFlags::Multiline));
  40. EXPECT(!(eo & ECMAScriptFlags::Sticky));
  41. eo = ~ECMAScriptFlags::Insensitive;
  42. EXPECT(eo & ECMAScriptFlags::Global);
  43. EXPECT(!(eo & ECMAScriptFlags::Insensitive));
  44. EXPECT(eo & ECMAScriptFlags::Multiline);
  45. EXPECT(eo & ECMAScriptFlags::Sticky);
  46. }
  47. TEST_CASE(regex_options_posix)
  48. {
  49. PosixOptions eo;
  50. eo |= PosixFlags::Global;
  51. EXPECT(eo & PosixFlags::Global);
  52. EXPECT(!(eo & PosixFlags::Insensitive));
  53. eo = match_test_api_options(PosixFlags::Global | PosixFlags::Insensitive | PosixFlags::MatchNotBeginOfLine);
  54. EXPECT(eo & PosixFlags::Global);
  55. EXPECT(eo & PosixFlags::Insensitive);
  56. EXPECT(eo & PosixFlags::MatchNotBeginOfLine);
  57. EXPECT(!(eo & PosixFlags::Unicode));
  58. EXPECT(!(eo & PosixFlags::Multiline));
  59. eo &= PosixFlags::Insensitive;
  60. EXPECT(!(eo & PosixFlags::Global));
  61. EXPECT(eo & PosixFlags::Insensitive);
  62. EXPECT(!(eo & PosixFlags::Multiline));
  63. eo &= PosixFlags::MatchNotBeginOfLine;
  64. EXPECT(!(eo & PosixFlags::Global));
  65. EXPECT(!(eo & PosixFlags::Insensitive));
  66. EXPECT(!(eo & PosixFlags::Multiline));
  67. eo = ~PosixFlags::Insensitive;
  68. EXPECT(eo & PosixFlags::Global);
  69. EXPECT(!(eo & PosixFlags::Insensitive));
  70. EXPECT(eo & PosixFlags::Multiline);
  71. }
  72. TEST_CASE(regex_lexer)
  73. {
  74. Lexer l("/[.*+?^${}()|[\\]\\\\]/g");
  75. EXPECT(l.next().type() == regex::TokenType::Slash);
  76. EXPECT(l.next().type() == regex::TokenType::LeftBracket);
  77. EXPECT(l.next().type() == regex::TokenType::Period);
  78. EXPECT(l.next().type() == regex::TokenType::Asterisk);
  79. EXPECT(l.next().type() == regex::TokenType::Plus);
  80. EXPECT(l.next().type() == regex::TokenType::Questionmark);
  81. EXPECT(l.next().type() == regex::TokenType::Circumflex);
  82. EXPECT(l.next().type() == regex::TokenType::Dollar);
  83. EXPECT(l.next().type() == regex::TokenType::LeftCurly);
  84. EXPECT(l.next().type() == regex::TokenType::RightCurly);
  85. EXPECT(l.next().type() == regex::TokenType::LeftParen);
  86. EXPECT(l.next().type() == regex::TokenType::RightParen);
  87. EXPECT(l.next().type() == regex::TokenType::Pipe);
  88. EXPECT(l.next().type() == regex::TokenType::LeftBracket);
  89. EXPECT(l.next().type() == regex::TokenType::EscapeSequence);
  90. EXPECT(l.next().type() == regex::TokenType::EscapeSequence);
  91. EXPECT(l.next().type() == regex::TokenType::RightBracket);
  92. EXPECT(l.next().type() == regex::TokenType::Slash);
  93. EXPECT(l.next().type() == regex::TokenType::Char);
  94. }
  95. TEST_CASE(parser_error_parens)
  96. {
  97. String pattern = "test()test";
  98. Lexer l(pattern);
  99. PosixExtendedParser p(l);
  100. p.parse();
  101. EXPECT(p.has_error());
  102. EXPECT(p.error() == Error::EmptySubExpression);
  103. }
  104. TEST_CASE(parser_error_special_characters_used_at_wrong_place)
  105. {
  106. String pattern;
  107. Vector<char, 5> chars = { '*', '+', '?', '{' };
  108. StringBuilder b;
  109. Lexer l;
  110. PosixExtended p(l);
  111. for (auto& ch : chars) {
  112. // First in ere
  113. b.clear();
  114. b.append(ch);
  115. pattern = b.build();
  116. l.set_source(pattern);
  117. p.parse();
  118. EXPECT(p.has_error());
  119. EXPECT(p.error() == Error::InvalidRepetitionMarker);
  120. // After vertical line
  121. b.clear();
  122. b.append("a|");
  123. b.append(ch);
  124. pattern = b.build();
  125. l.set_source(pattern);
  126. p.parse();
  127. EXPECT(p.has_error());
  128. EXPECT(p.error() == Error::InvalidRepetitionMarker);
  129. // After circumflex
  130. b.clear();
  131. b.append("^");
  132. b.append(ch);
  133. pattern = b.build();
  134. l.set_source(pattern);
  135. p.parse();
  136. EXPECT(p.has_error());
  137. EXPECT(p.error() == Error::InvalidRepetitionMarker);
  138. // After dollar
  139. b.clear();
  140. b.append("$");
  141. b.append(ch);
  142. pattern = b.build();
  143. l.set_source(pattern);
  144. p.parse();
  145. EXPECT(p.has_error());
  146. EXPECT(p.error() == Error::InvalidRepetitionMarker);
  147. // After left parens
  148. b.clear();
  149. b.append("(");
  150. b.append(ch);
  151. b.append(")");
  152. pattern = b.build();
  153. l.set_source(pattern);
  154. p.parse();
  155. EXPECT(p.has_error());
  156. EXPECT(p.error() == Error::InvalidRepetitionMarker);
  157. }
  158. }
  159. TEST_CASE(parser_error_vertical_line_used_at_wrong_place)
  160. {
  161. Lexer l;
  162. PosixExtended p(l);
  163. // First in ere
  164. l.set_source("|asdf");
  165. p.parse();
  166. EXPECT(p.has_error());
  167. EXPECT(p.error() == Error::EmptySubExpression);
  168. // Last in ere
  169. l.set_source("asdf|");
  170. p.parse();
  171. EXPECT(p.has_error());
  172. EXPECT(p.error() == Error::EmptySubExpression);
  173. // After left parens
  174. l.set_source("(|asdf)");
  175. p.parse();
  176. EXPECT(p.has_error());
  177. EXPECT(p.error() == Error::EmptySubExpression);
  178. // Proceed right parens
  179. l.set_source("(asdf)|");
  180. p.parse();
  181. EXPECT(p.has_error());
  182. EXPECT(p.error() == Error::EmptySubExpression);
  183. }
  184. TEST_CASE(catch_all_first)
  185. {
  186. Regex<PosixExtended> re("^.*$");
  187. RegexResult m;
  188. re.match("Hello World", m);
  189. EXPECT(m.count == 1);
  190. EXPECT(re.match("Hello World", m));
  191. }
  192. TEST_CASE(catch_all)
  193. {
  194. Regex<PosixExtended> re("^.*$", PosixFlags::Global);
  195. EXPECT(re.has_match("Hello World"));
  196. EXPECT(re.match("Hello World").success);
  197. EXPECT(re.match("Hello World").count == 1);
  198. EXPECT(has_match("Hello World", re));
  199. auto res = match("Hello World", re);
  200. EXPECT(res.success);
  201. EXPECT(res.count == 1);
  202. EXPECT(res.matches.size() == 1);
  203. EXPECT(res.matches.first().view == "Hello World");
  204. }
  205. TEST_CASE(catch_all_again)
  206. {
  207. Regex<PosixExtended> re("^.*$", PosixFlags::Extra);
  208. EXPECT_EQ(has_match("Hello World", re), true);
  209. }
  210. TEST_CASE(char_utf8)
  211. {
  212. Regex<PosixExtended> re("😀");
  213. RegexResult result;
  214. EXPECT_EQ((result = match("Привет, мир! 😀 γειά σου κόσμος 😀 こんにちは世界", re, PosixFlags::Global)).success, true);
  215. EXPECT_EQ(result.count, 2u);
  216. }
  217. TEST_CASE(catch_all_newline)
  218. {
  219. Regex<PosixExtended> re("^.*$", PosixFlags::Multiline | PosixFlags::StringCopyMatches);
  220. RegexResult result;
  221. auto lambda = [&result, &re]() {
  222. String aaa = "Hello World\nTest\n1234\n";
  223. result = match(aaa, re);
  224. EXPECT_EQ(result.success, true);
  225. };
  226. lambda();
  227. EXPECT_EQ(result.count, 3u);
  228. EXPECT_EQ(result.matches.at(0).view, "Hello World");
  229. EXPECT_EQ(result.matches.at(1).view, "Test");
  230. EXPECT_EQ(result.matches.at(2).view, "1234");
  231. }
  232. TEST_CASE(catch_all_newline_view)
  233. {
  234. Regex<PosixExtended> re("^.*$", PosixFlags::Multiline);
  235. RegexResult result;
  236. String aaa = "Hello World\nTest\n1234\n";
  237. result = match(aaa, re);
  238. EXPECT_EQ(result.success, true);
  239. EXPECT_EQ(result.count, 3u);
  240. String str = "Hello World";
  241. EXPECT_EQ(result.matches.at(0).view, str.view());
  242. EXPECT_EQ(result.matches.at(1).view, "Test");
  243. EXPECT_EQ(result.matches.at(2).view, "1234");
  244. }
  245. TEST_CASE(catch_all_newline_2)
  246. {
  247. Regex<PosixExtended> re("^.*$");
  248. RegexResult result;
  249. result = match("Hello World\nTest\n1234\n", re, PosixFlags::Multiline | PosixFlags::StringCopyMatches);
  250. EXPECT_EQ(result.success, true);
  251. EXPECT_EQ(result.count, 3u);
  252. EXPECT_EQ(result.matches.at(0).view, "Hello World");
  253. EXPECT_EQ(result.matches.at(1).view, "Test");
  254. EXPECT_EQ(result.matches.at(2).view, "1234");
  255. result = match("Hello World\nTest\n1234\n", re);
  256. EXPECT_EQ(result.success, true);
  257. EXPECT_EQ(result.count, 1u);
  258. EXPECT_EQ(result.matches.at(0).view, "Hello World\nTest\n1234\n");
  259. }
  260. TEST_CASE(match_all_character_class)
  261. {
  262. Regex<PosixExtended> re("[[:alpha:]]");
  263. String str = "[Window]\nOpacity=255\nAudibleBeep=0\n";
  264. RegexResult result = match(str, re, PosixFlags::Global | PosixFlags::StringCopyMatches);
  265. EXPECT_EQ(result.success, true);
  266. EXPECT_EQ(result.count, 24u);
  267. EXPECT_EQ(result.matches.at(0).view, "W");
  268. EXPECT_EQ(result.matches.at(1).view, "i");
  269. EXPECT_EQ(result.matches.at(2).view, "n");
  270. EXPECT(&result.matches.at(0).view.characters_without_null_termination()[0] != &str.view().characters_without_null_termination()[1]);
  271. }
  272. TEST_CASE(match_character_class_with_assertion)
  273. {
  274. Regex<PosixExtended> re("[[:alpha:]]+$");
  275. String str = "abcdef";
  276. RegexResult result = match(str, re);
  277. EXPECT_EQ(result.success, true);
  278. EXPECT_EQ(result.count, 1u);
  279. }
  280. TEST_CASE(example_for_git_commit)
  281. {
  282. Regex<PosixExtended> re("^.*$");
  283. auto result = re.match("Well, hello friends!\nHello World!");
  284. EXPECT(result.success);
  285. EXPECT(result.count == 1);
  286. EXPECT(result.matches.at(0).view.starts_with("Well"));
  287. EXPECT(result.matches.at(0).view.length() == 33);
  288. EXPECT(re.has_match("Well,...."));
  289. result = re.match("Well, hello friends!\nHello World!", PosixFlags::Multiline);
  290. EXPECT(result.success);
  291. EXPECT(result.count == 2);
  292. EXPECT(result.matches.at(0).view == "Well, hello friends!");
  293. EXPECT(result.matches.at(1).view == "Hello World!");
  294. }
  295. TEST_CASE(email_address)
  296. {
  297. Regex<PosixExtended> re("^[A-Z0-9a-z._%+-]{1,64}@([A-Za-z0-9-]{1,63}\\.){1,125}[A-Za-z]{2,63}$");
  298. EXPECT(re.has_match("hello.world@domain.tld"));
  299. EXPECT(re.has_match("this.is.a.very_long_email_address@world.wide.web"));
  300. }
  301. TEST_CASE(ini_file_entries)
  302. {
  303. Regex<PosixExtended> re("[[:alpha:]]*=([[:digit:]]*)|\\[(.*)\\]");
  304. RegexResult result;
  305. if constexpr (REGEX_DEBUG) {
  306. RegexDebug regex_dbg(stderr);
  307. regex_dbg.print_raw_bytecode(re);
  308. regex_dbg.print_header();
  309. regex_dbg.print_bytecode(re);
  310. }
  311. String haystack = "[Window]\nOpacity=255\nAudibleBeep=0\n";
  312. EXPECT_EQ(re.search(haystack.view(), result, PosixFlags::Multiline), true);
  313. EXPECT_EQ(result.count, 3u);
  314. if constexpr (REGEX_DEBUG) {
  315. for (auto& v : result.matches)
  316. fprintf(stderr, "%s\n", v.view.to_string().characters());
  317. }
  318. EXPECT_EQ(result.matches.at(0).view, "[Window]");
  319. EXPECT_EQ(result.capture_group_matches.at(0).at(0).view, "Window");
  320. EXPECT_EQ(result.matches.at(1).view, "Opacity=255");
  321. EXPECT_EQ(result.matches.at(1).line, 1u);
  322. EXPECT_EQ(result.matches.at(1).column, 0u);
  323. EXPECT_EQ(result.capture_group_matches.at(1).at(0).view, "255");
  324. EXPECT_EQ(result.capture_group_matches.at(1).at(0).line, 1u);
  325. EXPECT_EQ(result.capture_group_matches.at(1).at(0).column, 8u);
  326. EXPECT_EQ(result.matches.at(2).view, "AudibleBeep=0");
  327. EXPECT_EQ(result.capture_group_matches.at(2).at(0).view, "0");
  328. EXPECT_EQ(result.capture_group_matches.at(2).at(0).line, 2u);
  329. EXPECT_EQ(result.capture_group_matches.at(2).at(0).column, 12u);
  330. }
  331. TEST_CASE(ini_file_entries2)
  332. {
  333. Regex<PosixExtended> re("[[:alpha:]]*=([[:digit:]]*)");
  334. RegexResult result;
  335. String haystack = "ViewMode=Icon";
  336. EXPECT_EQ(re.match(haystack.view(), result), false);
  337. EXPECT_EQ(result.count, 0u);
  338. EXPECT_EQ(re.search(haystack.view(), result), true);
  339. EXPECT_EQ(result.count, 1u);
  340. }
  341. TEST_CASE(named_capture_group)
  342. {
  343. Regex<PosixExtended> re("[[:alpha:]]*=(?<Test>[[:digit:]]*)");
  344. RegexResult result;
  345. if constexpr (REGEX_DEBUG) {
  346. RegexDebug regex_dbg(stderr);
  347. regex_dbg.print_raw_bytecode(re);
  348. regex_dbg.print_header();
  349. regex_dbg.print_bytecode(re);
  350. }
  351. String haystack = "[Window]\nOpacity=255\nAudibleBeep=0\n";
  352. EXPECT_EQ(re.search(haystack, result, PosixFlags::Multiline), true);
  353. EXPECT_EQ(result.count, 2u);
  354. EXPECT_EQ(result.matches.at(0).view, "Opacity=255");
  355. EXPECT_EQ(result.named_capture_group_matches.at(0).ensure("Test").view, "255");
  356. EXPECT_EQ(result.matches.at(1).view, "AudibleBeep=0");
  357. EXPECT_EQ(result.named_capture_group_matches.at(1).ensure("Test").view, "0");
  358. }
  359. TEST_CASE(a_star)
  360. {
  361. Regex<PosixExtended> re("a*");
  362. RegexResult result;
  363. if constexpr (REGEX_DEBUG) {
  364. RegexDebug regex_dbg(stderr);
  365. regex_dbg.print_raw_bytecode(re);
  366. regex_dbg.print_header();
  367. regex_dbg.print_bytecode(re);
  368. }
  369. String haystack = "[Window]\nOpacity=255\nAudibleBeep=0\n";
  370. EXPECT_EQ(re.search(haystack.view(), result, PosixFlags::Multiline), true);
  371. EXPECT_EQ(result.count, 32u);
  372. EXPECT_EQ(result.matches.at(0).view.length(), 0u);
  373. EXPECT_EQ(result.matches.at(10).view.length(), 1u);
  374. EXPECT_EQ(result.matches.at(10).view, "a");
  375. EXPECT_EQ(result.matches.at(31).view.length(), 0u);
  376. }
  377. TEST_CASE(simple_period_end_benchmark)
  378. {
  379. Regex<PosixExtended> re("hello.$");
  380. RegexResult m;
  381. EXPECT_EQ(re.search("Hello1", m), false);
  382. EXPECT_EQ(re.search("hello1hello1", m), true);
  383. EXPECT_EQ(re.search("hello2hell", m), false);
  384. EXPECT_EQ(re.search("hello?", m), true);
  385. }
  386. TEST_CASE(ECMA262_parse)
  387. {
  388. struct _test {
  389. const char* pattern;
  390. regex::Error expected_error { regex::Error::NoError };
  391. regex::ECMAScriptFlags flags {};
  392. };
  393. constexpr _test tests[] {
  394. { "^hello.$" },
  395. { "^(hello.)$" },
  396. { "^h{0,1}ello.$" },
  397. { "^hello\\W$" },
  398. { "^hell\\w.$" },
  399. { "^hell\\x6f1$" }, // ^hello1$
  400. { "^hel(?:l\\w).$" },
  401. { "^hel(?<LO>l\\w).$" },
  402. { "^[-a-zA-Z\\w\\s]+$" },
  403. { "\\bhello\\B" },
  404. { "^[\\w+/_-]+[=]{0,2}$" }, // #4189
  405. { "^(?:[^<]*(<[\\w\\W]+>)[^>]*$|#([\\w\\-]*)$)" }, // #4189
  406. { "\\/" }, // #4189
  407. { ",/=-:" }, // #4243
  408. { "\\x" }, // Even invalid escapes are allowed if ~unicode.
  409. { "\\", regex::Error::InvalidTrailingEscape },
  410. { "(?", regex::Error::InvalidCaptureGroup },
  411. { "\\u1234", regex::Error::NoError, regex::ECMAScriptFlags::Unicode },
  412. { "[\\u1234]", regex::Error::NoError, regex::ECMAScriptFlags::Unicode },
  413. { ",(?", regex::Error::InvalidCaptureGroup }, // #4583
  414. { "{1}", regex::Error::InvalidPattern },
  415. { "{1,2}", regex::Error::InvalidPattern },
  416. };
  417. for (auto& test : tests) {
  418. Regex<ECMA262> re(test.pattern);
  419. EXPECT_EQ(re.parser_result.error, test.expected_error);
  420. if constexpr (REGEX_DEBUG) {
  421. dbgln("\n");
  422. RegexDebug regex_dbg(stderr);
  423. regex_dbg.print_raw_bytecode(re);
  424. regex_dbg.print_header();
  425. regex_dbg.print_bytecode(re);
  426. dbgln("\n");
  427. }
  428. }
  429. }
  430. TEST_CASE(ECMA262_match)
  431. {
  432. struct _test {
  433. const char* pattern;
  434. const char* subject;
  435. bool matches { true };
  436. ECMAScriptFlags options {};
  437. };
  438. // clang-format off
  439. constexpr _test tests[] {
  440. { "^hello.$", "hello1" },
  441. { "^(hello.)$", "hello1" },
  442. { "^h{0,1}ello.$", "ello1" },
  443. { "^hello\\W$", "hello!" },
  444. { "^hell\\w.$", "hellx!" },
  445. { "^hell\\x6f1$", "hello1" },
  446. { "^hel(?<LO>l.)1$", "hello1" },
  447. { "^hel(?<LO>l.)1*\\k<LO>.$", "hello1lo1" },
  448. { "^[-a-z1-3\\s]+$", "hell2 o1" },
  449. { "^[\\0-\\x1f]$", "\n" },
  450. { .pattern = "\\bhello\\B", .subject = "hello1", .options = ECMAScriptFlags::Global },
  451. { "\\b.*\\b", "hello1" },
  452. { "[^\\D\\S]{2}", "1 " },
  453. { "bar(?=f.)foo", "barfoo" },
  454. { "bar(?=foo)bar", "barbar", false },
  455. { "bar(?!foo)bar", "barbar", true },
  456. { "bar(?!bar)bar", "barbar", false },
  457. { "bar.*(?<=foo)", "barbar", false },
  458. { "bar.*(?<!foo)", "barbar", true },
  459. { "((...)X)+", "fooXbarXbazX", true },
  460. { "(?:)", "", true },
  461. { "\\^", "^" },
  462. // ECMA262, B.1.4. Regular Expression Pattern extensions for browsers
  463. { "{", "{", true, ECMAScriptFlags::BrowserExtended },
  464. { "\\5", "\5", true, ECMAScriptFlags::BrowserExtended },
  465. { "\\05", "\5", true, ECMAScriptFlags::BrowserExtended },
  466. { "\\455", "\45""5", true, ECMAScriptFlags::BrowserExtended },
  467. { "\\314", "\314", true, ECMAScriptFlags::BrowserExtended },
  468. { "\\cf", "\06", true, ECMAScriptFlags::BrowserExtended },
  469. { "\\c1", "\\c1", true, ECMAScriptFlags::BrowserExtended },
  470. { "[\\c1]", "\x11", true, ECMAScriptFlags::BrowserExtended },
  471. { "[\\w-\\d]", "-", true, ECMAScriptFlags::BrowserExtended },
  472. { "^(?:^^\\.?|[!+-]|!=|!==|#|%|%=|&|&&|&&=|&=|\\(|\\*|\\*=|\\+=|,|-=|->|\\/|\\/=|:|::|;|<|<<|<<=|<=|=|==|===|>|>=|>>|>>=|>>>|>>>=|[?@[^]|\\^=|\\^\\^|\\^\\^=|{|\\||\\|=|\\|\\||\\|\\|=|~|break|case|continue|delete|do|else|finally|instanceof|return|throw|try|typeof)\\s*(\\/(?=[^*/])(?:[^/[\\\\]|\\\\[\\S\\s]|\\[(?:[^\\\\\\]]|\\\\[\\S\\s])*(?:]|$))+\\/)",
  473. "return /xx/", true, ECMAScriptFlags::BrowserExtended
  474. }, // #5517, appears to be matching JS expressions that involve regular expressions...
  475. { "a{2,}", "aaaa" }, // #5518
  476. };
  477. // clang-format on
  478. for (auto& test : tests) {
  479. Regex<ECMA262> re(test.pattern, test.options);
  480. if constexpr (REGEX_DEBUG) {
  481. dbgln("\n");
  482. RegexDebug regex_dbg(stderr);
  483. regex_dbg.print_raw_bytecode(re);
  484. regex_dbg.print_header();
  485. regex_dbg.print_bytecode(re);
  486. dbgln("\n");
  487. }
  488. EXPECT_EQ(re.parser_result.error, Error::NoError);
  489. EXPECT_EQ(re.match(test.subject).success, test.matches);
  490. }
  491. }
  492. TEST_CASE(replace)
  493. {
  494. struct _test {
  495. const char* pattern;
  496. const char* replacement;
  497. const char* subject;
  498. const char* expected;
  499. ECMAScriptFlags options {};
  500. };
  501. constexpr _test tests[] {
  502. { "foo(.+)", "aaa", "test", "test" },
  503. { "foo(.+)", "test\\1", "foobar", "testbar" },
  504. { "foo(.+)", "\\2\\1", "foobar", "\\2bar" },
  505. { "foo(.+)", "\\\\\\1", "foobar", "\\bar" },
  506. { "foo(.)", "a\\1", "fooxfooy", "axay", ECMAScriptFlags::Multiline },
  507. };
  508. for (auto& test : tests) {
  509. Regex<ECMA262> re(test.pattern, test.options);
  510. if constexpr (REGEX_DEBUG) {
  511. dbgln("\n");
  512. RegexDebug regex_dbg(stderr);
  513. regex_dbg.print_raw_bytecode(re);
  514. regex_dbg.print_header();
  515. regex_dbg.print_bytecode(re);
  516. dbgln("\n");
  517. }
  518. EXPECT_EQ(re.parser_result.error, Error::NoError);
  519. EXPECT_EQ(re.replace(test.subject, test.replacement), test.expected);
  520. }
  521. }