Regex.cpp 52 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230
  1. /*
  2. * Copyright (c) 2020, Emanuel Sprung <emanuel.sprung@gmail.com>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <LibTest/TestCase.h> // import first, to prevent warning of VERIFY* redefinition
  7. #include <AK/Debug.h>
  8. #include <AK/StringBuilder.h>
  9. #include <AK/Tuple.h>
  10. #include <LibRegex/Regex.h>
  11. #include <LibRegex/RegexDebug.h>
  12. #include <LibRegex/RegexMatcher.h>
  13. #include <stdio.h>
  14. static ECMAScriptOptions match_test_api_options(ECMAScriptOptions const options)
  15. {
  16. return options;
  17. }
  18. static PosixOptions match_test_api_options(PosixOptions const options)
  19. {
  20. return options;
  21. }
  22. template<typename... Flags>
  23. static constexpr ECMAScriptFlags combine_flags(Flags&&... flags)
  24. requires((IsSame<Flags, ECMAScriptFlags> && ...))
  25. {
  26. return static_cast<ECMAScriptFlags>((static_cast<regex::FlagsUnderlyingType>(flags) | ...));
  27. }
  28. TEST_CASE(regex_options_ecmascript)
  29. {
  30. ECMAScriptOptions eo;
  31. eo |= ECMAScriptFlags::Global;
  32. EXPECT(eo.has_flag_set(ECMAScriptFlags::Global));
  33. EXPECT(!eo.has_flag_set(ECMAScriptFlags::Insensitive));
  34. eo = match_test_api_options(ECMAScriptFlags::Global | ECMAScriptFlags::Insensitive | ECMAScriptFlags::Sticky);
  35. EXPECT(eo.has_flag_set(ECMAScriptFlags::Global));
  36. EXPECT(eo.has_flag_set(ECMAScriptFlags::Insensitive));
  37. EXPECT(eo.has_flag_set(ECMAScriptFlags::Sticky));
  38. EXPECT(!eo.has_flag_set(ECMAScriptFlags::Unicode));
  39. EXPECT(!eo.has_flag_set(ECMAScriptFlags::Multiline));
  40. EXPECT(!eo.has_flag_set(ECMAScriptFlags::SingleLine));
  41. eo &= ECMAScriptFlags::Insensitive;
  42. EXPECT(!eo.has_flag_set(ECMAScriptFlags::Global));
  43. EXPECT(eo.has_flag_set(ECMAScriptFlags::Insensitive));
  44. EXPECT(!eo.has_flag_set(ECMAScriptFlags::Multiline));
  45. eo &= ECMAScriptFlags::Sticky;
  46. EXPECT(!eo.has_flag_set(ECMAScriptFlags::Global));
  47. EXPECT(!eo.has_flag_set(ECMAScriptFlags::Insensitive));
  48. EXPECT(!eo.has_flag_set(ECMAScriptFlags::Multiline));
  49. EXPECT(!eo.has_flag_set(ECMAScriptFlags::Sticky));
  50. eo = ~ECMAScriptFlags::Insensitive;
  51. EXPECT(eo.has_flag_set(ECMAScriptFlags::Global));
  52. EXPECT(!eo.has_flag_set(ECMAScriptFlags::Insensitive));
  53. EXPECT(eo.has_flag_set(ECMAScriptFlags::Multiline));
  54. EXPECT(eo.has_flag_set(ECMAScriptFlags::Sticky));
  55. }
  56. TEST_CASE(regex_options_posix)
  57. {
  58. PosixOptions eo;
  59. eo |= PosixFlags::Global;
  60. EXPECT(eo.has_flag_set(PosixFlags::Global));
  61. EXPECT(!eo.has_flag_set(PosixFlags::Insensitive));
  62. eo = match_test_api_options(PosixFlags::Global | PosixFlags::Insensitive | PosixFlags::MatchNotBeginOfLine);
  63. EXPECT(eo.has_flag_set(PosixFlags::Global));
  64. EXPECT(eo.has_flag_set(PosixFlags::Insensitive));
  65. EXPECT(eo.has_flag_set(PosixFlags::MatchNotBeginOfLine));
  66. EXPECT(!eo.has_flag_set(PosixFlags::Unicode));
  67. EXPECT(!eo.has_flag_set(PosixFlags::Multiline));
  68. eo &= PosixFlags::Insensitive;
  69. EXPECT(!eo.has_flag_set(PosixFlags::Global));
  70. EXPECT(eo.has_flag_set(PosixFlags::Insensitive));
  71. EXPECT(!eo.has_flag_set(PosixFlags::Multiline));
  72. eo &= PosixFlags::MatchNotBeginOfLine;
  73. EXPECT(!eo.has_flag_set(PosixFlags::Global));
  74. EXPECT(!eo.has_flag_set(PosixFlags::Insensitive));
  75. EXPECT(!eo.has_flag_set(PosixFlags::Multiline));
  76. eo = ~PosixFlags::Insensitive;
  77. EXPECT(eo.has_flag_set(PosixFlags::Global));
  78. EXPECT(!eo.has_flag_set(PosixFlags::Insensitive));
  79. EXPECT(eo.has_flag_set(PosixFlags::Multiline));
  80. }
  81. TEST_CASE(regex_lexer)
  82. {
  83. Lexer l("/[.*+?^${}()|[\\]\\\\]/g"sv);
  84. EXPECT(l.next().type() == regex::TokenType::Slash);
  85. EXPECT(l.next().type() == regex::TokenType::LeftBracket);
  86. EXPECT(l.next().type() == regex::TokenType::Period);
  87. EXPECT(l.next().type() == regex::TokenType::Asterisk);
  88. EXPECT(l.next().type() == regex::TokenType::Plus);
  89. EXPECT(l.next().type() == regex::TokenType::Questionmark);
  90. EXPECT(l.next().type() == regex::TokenType::Circumflex);
  91. EXPECT(l.next().type() == regex::TokenType::Dollar);
  92. EXPECT(l.next().type() == regex::TokenType::LeftCurly);
  93. EXPECT(l.next().type() == regex::TokenType::RightCurly);
  94. EXPECT(l.next().type() == regex::TokenType::LeftParen);
  95. EXPECT(l.next().type() == regex::TokenType::RightParen);
  96. EXPECT(l.next().type() == regex::TokenType::Pipe);
  97. EXPECT(l.next().type() == regex::TokenType::LeftBracket);
  98. EXPECT(l.next().type() == regex::TokenType::EscapeSequence);
  99. EXPECT(l.next().type() == regex::TokenType::EscapeSequence);
  100. EXPECT(l.next().type() == regex::TokenType::RightBracket);
  101. EXPECT(l.next().type() == regex::TokenType::Slash);
  102. EXPECT(l.next().type() == regex::TokenType::Char);
  103. }
  104. TEST_CASE(parser_error_parens)
  105. {
  106. ByteString pattern = "test()test";
  107. Lexer l(pattern);
  108. PosixExtendedParser p(l);
  109. p.parse();
  110. EXPECT(p.has_error());
  111. EXPECT(p.error() == regex::Error::EmptySubExpression);
  112. }
  113. TEST_CASE(parser_error_special_characters_used_at_wrong_place)
  114. {
  115. ByteString pattern;
  116. Vector<char, 5> chars = { '*', '+', '?', '{' };
  117. StringBuilder b;
  118. Lexer l;
  119. PosixExtended p(l);
  120. for (auto& ch : chars) {
  121. // First in ere
  122. b.clear();
  123. b.append(ch);
  124. pattern = b.to_byte_string();
  125. l.set_source(pattern);
  126. p.parse();
  127. EXPECT(p.has_error());
  128. EXPECT(p.error() == regex::Error::InvalidRepetitionMarker);
  129. // After vertical line
  130. b.clear();
  131. b.append("a|"sv);
  132. b.append(ch);
  133. pattern = b.to_byte_string();
  134. l.set_source(pattern);
  135. p.parse();
  136. EXPECT(p.has_error());
  137. EXPECT(p.error() == regex::Error::InvalidRepetitionMarker);
  138. // After circumflex
  139. b.clear();
  140. b.append('^');
  141. b.append(ch);
  142. pattern = b.to_byte_string();
  143. l.set_source(pattern);
  144. p.parse();
  145. EXPECT(p.has_error());
  146. EXPECT(p.error() == regex::Error::InvalidRepetitionMarker);
  147. // After dollar
  148. b.clear();
  149. b.append('$');
  150. b.append(ch);
  151. pattern = b.to_byte_string();
  152. l.set_source(pattern);
  153. p.parse();
  154. EXPECT(p.has_error());
  155. EXPECT(p.error() == regex::Error::InvalidRepetitionMarker);
  156. // After left parens
  157. b.clear();
  158. b.append('(');
  159. b.append(ch);
  160. b.append(')');
  161. pattern = b.to_byte_string();
  162. l.set_source(pattern);
  163. p.parse();
  164. EXPECT(p.has_error());
  165. EXPECT(p.error() == regex::Error::InvalidRepetitionMarker);
  166. }
  167. }
  168. TEST_CASE(parser_error_vertical_line_used_at_wrong_place)
  169. {
  170. Lexer l;
  171. PosixExtended p(l);
  172. // First in ere
  173. l.set_source("|asdf"sv);
  174. p.parse();
  175. EXPECT(p.has_error());
  176. EXPECT(p.error() == regex::Error::EmptySubExpression);
  177. // Last in ere
  178. l.set_source("asdf|"sv);
  179. p.parse();
  180. EXPECT(p.has_error());
  181. EXPECT(p.error() == regex::Error::EmptySubExpression);
  182. // After left parens
  183. l.set_source("(|asdf)"sv);
  184. p.parse();
  185. EXPECT(p.has_error());
  186. EXPECT(p.error() == regex::Error::EmptySubExpression);
  187. // Proceed right parens
  188. l.set_source("(asdf)|"sv);
  189. p.parse();
  190. EXPECT(p.has_error());
  191. EXPECT(p.error() == regex::Error::EmptySubExpression);
  192. }
  193. TEST_CASE(catch_all_first)
  194. {
  195. Regex<PosixExtended> re("^.*$");
  196. RegexResult m;
  197. re.match("Hello World"sv, m);
  198. EXPECT(m.count == 1);
  199. EXPECT(re.match("Hello World"sv, m));
  200. }
  201. TEST_CASE(catch_all)
  202. {
  203. Regex<PosixExtended> re("^.*$", PosixFlags::Global);
  204. EXPECT(re.has_match("Hello World"sv));
  205. EXPECT(re.match("Hello World"sv).success);
  206. EXPECT(re.match("Hello World"sv).count == 1);
  207. EXPECT(has_match("Hello World"sv, re));
  208. auto res = match("Hello World"sv, re);
  209. EXPECT(res.success);
  210. EXPECT(res.count == 1);
  211. EXPECT(res.matches.size() == 1);
  212. EXPECT(res.matches.first().view == "Hello World");
  213. }
  214. TEST_CASE(catch_all_again)
  215. {
  216. Regex<PosixExtended> re("^.*$", PosixFlags::Extra);
  217. EXPECT_EQ(has_match("Hello World"sv, re), true);
  218. }
  219. TEST_CASE(char_utf8)
  220. {
  221. Regex<PosixExtended> re("😀");
  222. RegexResult result;
  223. EXPECT_EQ((result = match(Utf8View { "Привет, мир! 😀 γειά σου κόσμος 😀 こんにちは世界"sv }, re, PosixFlags::Global)).success, true);
  224. EXPECT_EQ(result.count, 2u);
  225. }
  226. TEST_CASE(catch_all_newline)
  227. {
  228. Regex<PosixExtended> re("^.*$", PosixFlags::Multiline | PosixFlags::StringCopyMatches);
  229. RegexResult result;
  230. auto lambda = [&result, &re]() {
  231. ByteString aaa = "Hello World\nTest\n1234\n";
  232. result = match(aaa, re);
  233. EXPECT_EQ(result.success, true);
  234. };
  235. lambda();
  236. EXPECT_EQ(result.count, 3u);
  237. EXPECT_EQ(result.matches.at(0).view, "Hello World");
  238. EXPECT_EQ(result.matches.at(1).view, "Test");
  239. EXPECT_EQ(result.matches.at(2).view, "1234");
  240. }
  241. TEST_CASE(catch_all_newline_view)
  242. {
  243. Regex<PosixExtended> re("^.*$", PosixFlags::Multiline);
  244. RegexResult result;
  245. ByteString aaa = "Hello World\nTest\n1234\n";
  246. result = match(aaa, re);
  247. EXPECT_EQ(result.success, true);
  248. EXPECT_EQ(result.count, 3u);
  249. ByteString str = "Hello World";
  250. EXPECT_EQ(result.matches.at(0).view, str.view());
  251. EXPECT_EQ(result.matches.at(1).view, "Test");
  252. EXPECT_EQ(result.matches.at(2).view, "1234");
  253. }
  254. TEST_CASE(catch_all_newline_2)
  255. {
  256. Regex<PosixExtended> re("^.*$");
  257. RegexResult result;
  258. result = match("Hello World\nTest\n1234\n"sv, re, PosixFlags::Multiline | PosixFlags::StringCopyMatches);
  259. EXPECT_EQ(result.success, true);
  260. EXPECT_EQ(result.count, 3u);
  261. EXPECT_EQ(result.matches.at(0).view, "Hello World");
  262. EXPECT_EQ(result.matches.at(1).view, "Test");
  263. EXPECT_EQ(result.matches.at(2).view, "1234");
  264. result = match("Hello World\nTest\n1234\n"sv, re);
  265. EXPECT_EQ(result.success, true);
  266. EXPECT_EQ(result.count, 1u);
  267. EXPECT_EQ(result.matches.at(0).view, "Hello World\nTest\n1234\n");
  268. }
  269. TEST_CASE(match_all_character_class)
  270. {
  271. Regex<PosixExtended> re("[[:alpha:]]");
  272. ByteString str = "[Window]\nOpacity=255\nAudibleBeep=0\n";
  273. RegexResult result = match(str, re, PosixFlags::Global | PosixFlags::StringCopyMatches);
  274. EXPECT_EQ(result.success, true);
  275. EXPECT_EQ(result.count, 24u);
  276. EXPECT_EQ(result.matches.at(0).view, "W");
  277. EXPECT_EQ(result.matches.at(1).view, "i");
  278. EXPECT_EQ(result.matches.at(2).view, "n");
  279. }
  280. TEST_CASE(match_character_class_with_assertion)
  281. {
  282. Regex<PosixExtended> re("[[:alpha:]]+$");
  283. ByteString str = "abcdef";
  284. RegexResult result = match(str, re);
  285. EXPECT_EQ(result.success, true);
  286. EXPECT_EQ(result.count, 1u);
  287. }
  288. TEST_CASE(example_for_git_commit)
  289. {
  290. Regex<PosixExtended> re("^.*$");
  291. auto result = re.match("Well, hello friends!\nHello World!"sv);
  292. EXPECT(result.success);
  293. EXPECT(result.count == 1);
  294. EXPECT(result.matches.at(0).view.starts_with("Well"sv));
  295. EXPECT(result.matches.at(0).view.length() == 33);
  296. EXPECT(re.has_match("Well,...."sv));
  297. result = re.match("Well, hello friends!\nHello World!"sv, PosixFlags::Multiline);
  298. EXPECT(result.success);
  299. EXPECT(result.count == 2);
  300. EXPECT(result.matches.at(0).view == "Well, hello friends!");
  301. EXPECT(result.matches.at(1).view == "Hello World!");
  302. }
  303. TEST_CASE(email_address)
  304. {
  305. Regex<PosixExtended> re("^[A-Z0-9a-z._%+-]{1,64}@([A-Za-z0-9-]{1,63}\\.){1,125}[A-Za-z]{2,63}$");
  306. EXPECT(re.has_match("hello.world@domain.tld"sv));
  307. EXPECT(re.has_match("this.is.a.very_long_email_address@world.wide.web"sv));
  308. }
  309. TEST_CASE(ini_file_entries)
  310. {
  311. Regex<PosixExtended> re("[[:alpha:]]*=([[:digit:]]*)|\\[(.*)\\]");
  312. RegexResult result;
  313. if constexpr (REGEX_DEBUG) {
  314. RegexDebug regex_dbg(stderr);
  315. regex_dbg.print_raw_bytecode(re);
  316. regex_dbg.print_header();
  317. regex_dbg.print_bytecode(re);
  318. }
  319. ByteString haystack = "[Window]\nOpacity=255\nAudibleBeep=0\n";
  320. EXPECT_EQ(re.search(haystack.view(), result, PosixFlags::Multiline), true);
  321. EXPECT_EQ(result.count, 3u);
  322. if constexpr (REGEX_DEBUG) {
  323. for (auto& v : result.matches)
  324. fprintf(stderr, "%s\n", v.view.to_byte_string().characters());
  325. }
  326. EXPECT_EQ(result.matches.at(0).view, "[Window]");
  327. EXPECT_EQ(result.capture_group_matches.at(0).at(0).view, "Window");
  328. EXPECT_EQ(result.matches.at(1).view, "Opacity=255");
  329. EXPECT_EQ(result.matches.at(1).line, 1u);
  330. EXPECT_EQ(result.matches.at(1).column, 0u);
  331. EXPECT_EQ(result.capture_group_matches.at(1).at(0).view, "255");
  332. EXPECT_EQ(result.capture_group_matches.at(1).at(0).line, 1u);
  333. EXPECT_EQ(result.capture_group_matches.at(1).at(0).column, 8u);
  334. EXPECT_EQ(result.matches.at(2).view, "AudibleBeep=0");
  335. EXPECT_EQ(result.capture_group_matches.at(2).at(0).view, "0");
  336. EXPECT_EQ(result.capture_group_matches.at(2).at(0).line, 2u);
  337. EXPECT_EQ(result.capture_group_matches.at(2).at(0).column, 12u);
  338. }
  339. TEST_CASE(ini_file_entries2)
  340. {
  341. Regex<PosixExtended> re("[[:alpha:]]*=([[:digit:]]*)");
  342. RegexResult result;
  343. ByteString haystack = "ViewMode=Icon";
  344. EXPECT_EQ(re.match(haystack.view(), result), false);
  345. EXPECT_EQ(result.count, 0u);
  346. EXPECT_EQ(re.search(haystack.view(), result), true);
  347. EXPECT_EQ(result.count, 1u);
  348. }
  349. TEST_CASE(named_capture_group)
  350. {
  351. Regex<PosixExtended> re("[[:alpha:]]*=(?<Test>[[:digit:]]*)");
  352. RegexResult result;
  353. if constexpr (REGEX_DEBUG) {
  354. RegexDebug regex_dbg(stderr);
  355. regex_dbg.print_raw_bytecode(re);
  356. regex_dbg.print_header();
  357. regex_dbg.print_bytecode(re);
  358. }
  359. ByteString haystack = "[Window]\nOpacity=255\nAudibleBeep=0\n";
  360. EXPECT_EQ(re.search(haystack, result, PosixFlags::Multiline), true);
  361. EXPECT_EQ(result.count, 2u);
  362. EXPECT_EQ(result.matches.at(0).view, "Opacity=255");
  363. EXPECT_EQ(result.capture_group_matches.at(0).at(0).view, "255");
  364. EXPECT_EQ(result.capture_group_matches.at(0).at(0).capture_group_name, "Test");
  365. EXPECT_EQ(result.matches.at(1).view, "AudibleBeep=0");
  366. EXPECT_EQ(result.capture_group_matches.at(1).at(0).view, "0");
  367. EXPECT_EQ(result.capture_group_matches.at(1).at(0).capture_group_name, "Test");
  368. }
  369. TEST_CASE(ecma262_named_capture_group_with_dollar_sign)
  370. {
  371. Regex<ECMA262> re("[a-zA-Z]*=(?<$Test$>[0-9]*)");
  372. RegexResult result;
  373. if constexpr (REGEX_DEBUG) {
  374. RegexDebug regex_dbg(stderr);
  375. regex_dbg.print_raw_bytecode(re);
  376. regex_dbg.print_header();
  377. regex_dbg.print_bytecode(re);
  378. }
  379. ByteString haystack = "[Window]\nOpacity=255\nAudibleBeep=0\n";
  380. EXPECT_EQ(re.search(haystack, result, ECMAScriptFlags::Multiline), true);
  381. EXPECT_EQ(result.count, 2u);
  382. EXPECT_EQ(result.matches.at(0).view, "Opacity=255");
  383. EXPECT_EQ(result.capture_group_matches.at(0).at(0).view, "255");
  384. EXPECT_EQ(result.capture_group_matches.at(0).at(0).capture_group_name, "$Test$");
  385. EXPECT_EQ(result.matches.at(1).view, "AudibleBeep=0");
  386. EXPECT_EQ(result.capture_group_matches.at(1).at(0).view, "0");
  387. EXPECT_EQ(result.capture_group_matches.at(1).at(0).capture_group_name, "$Test$");
  388. }
  389. TEST_CASE(a_star)
  390. {
  391. Regex<PosixExtended> re("a*");
  392. RegexResult result;
  393. if constexpr (REGEX_DEBUG) {
  394. RegexDebug regex_dbg(stderr);
  395. regex_dbg.print_raw_bytecode(re);
  396. regex_dbg.print_header();
  397. regex_dbg.print_bytecode(re);
  398. }
  399. ByteString haystack = "[Window]\nOpacity=255\nAudibleBeep=0\n";
  400. EXPECT_EQ(re.search(haystack.view(), result, PosixFlags::Multiline), true);
  401. EXPECT_EQ(result.count, 32u);
  402. if (result.count == 32u) {
  403. EXPECT_EQ(result.matches.at(0).view.length(), 0u);
  404. EXPECT_EQ(result.matches.at(10).view.length(), 1u);
  405. EXPECT_EQ(result.matches.at(10).view, "a");
  406. EXPECT_EQ(result.matches.at(31).view.length(), 0u);
  407. }
  408. }
  409. TEST_CASE(simple_period_end_benchmark)
  410. {
  411. Regex<PosixExtended> re("hello.$");
  412. RegexResult m;
  413. EXPECT_EQ(re.search("Hello1"sv, m), false);
  414. EXPECT_EQ(re.search("hello1hello1"sv, m), true);
  415. EXPECT_EQ(re.search("hello2hell"sv, m), false);
  416. EXPECT_EQ(re.search("hello?"sv, m), true);
  417. }
  418. TEST_CASE(posix_extended_nested_capture_group)
  419. {
  420. Regex<PosixExtended> re("(h(e(?<llo>llo)))"); // group 0 -> "hello", group 1 -> "ello", group 2/"llo" -> "llo"
  421. auto result = re.match("hello"sv);
  422. EXPECT(result.success);
  423. EXPECT_EQ(result.capture_group_matches.size(), 1u);
  424. EXPECT_EQ(result.capture_group_matches[0].size(), 3u);
  425. EXPECT_EQ(result.capture_group_matches[0][0].view, "hello"sv);
  426. EXPECT_EQ(result.capture_group_matches[0][1].view, "ello"sv);
  427. EXPECT_EQ(result.capture_group_matches[0][2].view, "llo"sv);
  428. }
  429. auto parse_test_case_long_disjunction_chain = ByteString::repeated("a|"sv, 100000);
  430. TEST_CASE(ECMA262_parse)
  431. {
  432. struct _test {
  433. StringView pattern;
  434. regex::Error expected_error { regex::Error::NoError };
  435. regex::ECMAScriptFlags flags {};
  436. };
  437. _test const tests[] {
  438. { "^hello.$"sv },
  439. { "^(hello.)$"sv },
  440. { "^h{0,1}ello.$"sv },
  441. { "^hello\\W$"sv },
  442. { "^hell\\w.$"sv },
  443. { "^hell\\x6f1$"sv }, // ^hello1$
  444. { "^hel(?:l\\w).$"sv },
  445. { "^hel(?<LO>l\\w).$"sv },
  446. { "^[-a-zA-Z\\w\\s]+$"sv },
  447. { "\\bhello\\B"sv },
  448. { "^[\\w+/_-]+[=]{0,2}$"sv }, // #4189
  449. { "^(?:[^<]*(<[\\w\\W]+>)[^>]*$|#([\\w\\-]*)$)"sv }, // #4189
  450. { "\\/"sv }, // #4189
  451. { ",/=-:"sv }, // #4243
  452. { "\\x"sv }, // Even invalid escapes are allowed if ~unicode.
  453. { "\\x1"sv }, // Even invalid escapes are allowed if ~unicode.
  454. { "\\x1"sv, regex::Error::InvalidPattern, regex::ECMAScriptFlags::Unicode },
  455. { "\\x11"sv },
  456. { "\\x11"sv, regex::Error::NoError, regex::ECMAScriptFlags::Unicode },
  457. { "\\"sv, regex::Error::InvalidTrailingEscape },
  458. { "(?"sv, regex::Error::InvalidCaptureGroup },
  459. { "\\u1234"sv, regex::Error::NoError, regex::ECMAScriptFlags::Unicode },
  460. { "[\\u1234]"sv, regex::Error::NoError, regex::ECMAScriptFlags::Unicode },
  461. { "\\u1"sv, regex::Error::InvalidPattern, regex::ECMAScriptFlags::Unicode },
  462. { "[\\u1]"sv, regex::Error::InvalidPattern, regex::ECMAScriptFlags::Unicode },
  463. { ",(?"sv, regex::Error::InvalidCaptureGroup }, // #4583
  464. { "{1}"sv, regex::Error::InvalidPattern },
  465. { "{1,2}"sv, regex::Error::InvalidPattern },
  466. { "\\uxxxx"sv, regex::Error::NoError },
  467. { "\\uxxxx"sv, regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
  468. { "\\ud83d"sv, regex::Error::NoError, ECMAScriptFlags::Unicode },
  469. { "\\ud83d\\uxxxx"sv, regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
  470. { "\\u{0}"sv },
  471. { "\\u{0}"sv, regex::Error::NoError, ECMAScriptFlags::Unicode },
  472. { "\\u{10ffff}"sv, regex::Error::NoError, ECMAScriptFlags::Unicode },
  473. { "\\u{10ffff"sv, regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
  474. { "\\u{10ffffx"sv, regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
  475. { "\\u{110000}"sv, regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
  476. { "\\p"sv, regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
  477. { "\\p{"sv, regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
  478. { "\\p{}"sv, regex::Error::InvalidNameForProperty, ECMAScriptFlags::Unicode },
  479. { "\\p{AsCiI}"sv, regex::Error::InvalidNameForProperty, ECMAScriptFlags::Unicode },
  480. { "\\p{hello friends}"sv, regex::Error::InvalidNameForProperty, ECMAScriptFlags::Unicode },
  481. { "\\p{Prepended_Concatenation_Mark}"sv, regex::Error::InvalidNameForProperty, ECMAScriptFlags::Unicode },
  482. { "\\p{ASCII}"sv, regex::Error::NoError, ECMAScriptFlags::Unicode },
  483. { "\\\\p{1}"sv, regex::Error::NoError, ECMAScriptFlags::Unicode },
  484. { "\\\\p{AsCiI}"sv, regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
  485. { "\\\\p{ASCII}"sv, regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
  486. { "\\c"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended },
  487. { "\\c"sv, regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
  488. { "[\\c]"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended },
  489. { "[\\c]"sv, regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
  490. { "\\c`"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended },
  491. { "\\c`"sv, regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
  492. { "[\\c`]"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended },
  493. { "[\\c`]"sv, regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
  494. { "\\A"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended },
  495. { "\\A"sv, regex::Error::InvalidCharacterClass, ECMAScriptFlags::Unicode },
  496. { "[\\A]"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended },
  497. { "[\\A]"sv, regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
  498. { "\\0"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended },
  499. { "\\0"sv, regex::Error::NoError, combine_flags(ECMAScriptFlags::Unicode, ECMAScriptFlags::BrowserExtended) },
  500. { "\\00"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended },
  501. { "\\00"sv, regex::Error::InvalidCharacterClass, combine_flags(ECMAScriptFlags::Unicode, ECMAScriptFlags::BrowserExtended) },
  502. { "[\\0]"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended },
  503. { "[\\0]"sv, regex::Error::NoError, combine_flags(ECMAScriptFlags::Unicode, ECMAScriptFlags::BrowserExtended) },
  504. { "[\\00]"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended },
  505. { "[\\00]"sv, regex::Error::InvalidPattern, combine_flags(ECMAScriptFlags::Unicode, ECMAScriptFlags::BrowserExtended) },
  506. { "\\^\\$\\\\\\.\\*\\+\\?\\(\\)\\[\\]\\{\\}\\|\\/"sv, regex::Error::NoError, ECMAScriptFlags::Unicode },
  507. { "[\\^\\$\\\\\\.\\*\\+\\?\\(\\)\\[\\]\\{\\}\\|\\/]"sv, regex::Error::NoError, ECMAScriptFlags::Unicode },
  508. { "]"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended },
  509. { "]"sv, regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
  510. { "\\]"sv, regex::Error::NoError, ECMAScriptFlags::Unicode },
  511. { "}"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended },
  512. { "}"sv, regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
  513. { "\\}"sv, regex::Error::NoError, ECMAScriptFlags::Unicode },
  514. { "a{9007199254740991}"sv }, // 2^53 - 1
  515. { "a{9007199254740991,}"sv },
  516. { "a{9007199254740991,9007199254740991}"sv },
  517. { "a{9007199254740992}"sv, regex::Error::InvalidBraceContent },
  518. { "a{9007199254740992,}"sv, regex::Error::InvalidBraceContent },
  519. { "a{9007199254740991,9007199254740992}"sv, regex::Error::InvalidBraceContent },
  520. { "a{9007199254740992,9007199254740991}"sv, regex::Error::InvalidBraceContent },
  521. { "a{9007199254740992,9007199254740992}"sv, regex::Error::InvalidBraceContent },
  522. { "(?<a>a)(?<a>b)"sv, regex::Error::DuplicateNamedCapture },
  523. { "(?<a>a)(?<b>b)(?<a>c)"sv, regex::Error::DuplicateNamedCapture },
  524. { "(?<1a>a)"sv, regex::Error::InvalidNameForCaptureGroup },
  525. { "(?<\\a>a)"sv, regex::Error::InvalidNameForCaptureGroup },
  526. { "(?<\ta>a)"sv, regex::Error::InvalidNameForCaptureGroup },
  527. { "(?<$$_$$>a)"sv },
  528. { "(?<ÿ>a)"sv },
  529. { "(?<𝓑𝓻𝓸𝔀𝓷>a)"sv },
  530. { "((?=lg)?[vl]k\\-?\\d{3}) bui| 3\\.[-\\w; ]{10}lg?-([06cv9]{3,4})"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended }, // #12373, quantifiable assertions.
  531. { parse_test_case_long_disjunction_chain.view() }, // A whole lot of disjunctions, should not overflow the stack.
  532. { "(\"|')(?:(?!\\2)[^\\\\\\r\\n]|\\\\.)*\\2"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended }, // LegacyOctalEscapeSequence should not consume too many chars (and should not crash)
  533. // #18324, Capture group counter skipped past EOF.
  534. { "\\1[\\"sv, regex::Error::InvalidNumber },
  535. };
  536. for (auto& test : tests) {
  537. Regex<ECMA262> re(test.pattern, test.flags);
  538. EXPECT_EQ(re.parser_result.error, test.expected_error);
  539. if constexpr (REGEX_DEBUG) {
  540. dbgln("\n");
  541. RegexDebug regex_dbg(stderr);
  542. regex_dbg.print_raw_bytecode(re);
  543. regex_dbg.print_header();
  544. regex_dbg.print_bytecode(re);
  545. dbgln("\n");
  546. }
  547. }
  548. }
  549. TEST_CASE(ECMA262_match)
  550. {
  551. constexpr auto global_multiline = ECMAScriptFlags::Global | ECMAScriptFlags::Multiline;
  552. struct _test {
  553. StringView pattern;
  554. StringView subject;
  555. bool matches { true };
  556. ECMAScriptFlags options {};
  557. };
  558. // clang-format off
  559. constexpr _test tests[] {
  560. { "^hello.$"sv, "hello1"sv },
  561. { "^(hello.)$"sv, "hello1"sv },
  562. { "^h{0,1}ello.$"sv, "ello1"sv },
  563. { "^hello\\W$"sv, "hello!"sv },
  564. { "^hell\\w.$"sv, "hellx!"sv },
  565. { "^hell\\x6f1$"sv, "hello1"sv },
  566. { "^hel(?<LO>l.)1$"sv, "hello1"sv },
  567. { "^hel(?<LO>l.)1*\\k<LO>.$"sv, "hello1lo1"sv },
  568. { "^[-a-z1-3\\s]+$"sv, "hell2 o1"sv },
  569. { "^[\\0-\\x1f]$"sv, "\n"sv },
  570. { .pattern = "\\bhello\\B"sv, .subject = "hello1"sv, .options = ECMAScriptFlags::Global },
  571. { "\\b.*\\b"sv, "hello1"sv },
  572. { "[^\\D\\S]{2}"sv, "1 "sv, false },
  573. { "bar(?=f.)foo"sv, "barfoo"sv },
  574. { "bar(?=foo)bar"sv, "barbar"sv, false },
  575. { "bar(?!foo)bar"sv, "barbar"sv, true },
  576. { "bar(?!bar)bar"sv, "barbar"sv, false },
  577. { "bar.*(?<=foo)"sv, "barbar"sv, false },
  578. { "bar.*(?<!foo)"sv, "barbar"sv, true },
  579. { "((...)X)+"sv, "fooXbarXbazX"sv, true },
  580. { "(?:)"sv, ""sv, true },
  581. { "\\^"sv, "^"sv },
  582. { "\\^\\$\\\\\\.\\*\\+\\?\\(\\)\\[\\]\\{\\}\\|\\/"sv, "^$\\.*+?()[]{}|/"sv, true, ECMAScriptFlags::Unicode },
  583. { "[\\^\\$\\\\\\.\\*\\+\\?\\(\\)\\[\\]\\{\\}\\|\\/]{15}"sv, "^$\\.*+?()[]{}|/"sv, true, ECMAScriptFlags::Unicode },
  584. { "(a{2}){3}"sv, "aaaaaa"sv },
  585. { "(a{2}){3}"sv, "aaaabaa"sv, false },
  586. { "(a{2}){4}"sv, "aaaaaaaa"sv },
  587. { "(a{2}){4}"sv, "aaaaaabaa"sv, false },
  588. { "(a{3}){2}"sv, "aaaaaa"sv },
  589. { "(a{3}){2}"sv, "aaaabaa"sv, false },
  590. { "(a{4}){2}"sv, "aaaaaaaa"sv },
  591. { "(a{4}){2}"sv, "aaaaaabaa"sv, false },
  592. { "\\u{4}"sv, "uuuu"sv },
  593. { "(?<=.{3})f"sv, "abcdef"sv, true, (ECMAScriptFlags)regex::AllFlags::Global },
  594. { "(?<=.{3})f"sv, "abc😀ef"sv, true, (ECMAScriptFlags)regex::AllFlags::Global },
  595. // ECMA262, B.1.4. Regular Expression Pattern extensions for browsers
  596. { "{"sv, "{"sv, true, ECMAScriptFlags::BrowserExtended },
  597. { "\\5"sv, "\5"sv, true, ECMAScriptFlags::BrowserExtended },
  598. { "\\05"sv, "\5"sv, true, ECMAScriptFlags::BrowserExtended },
  599. { "\\455"sv, "\45""5"sv, true, ECMAScriptFlags::BrowserExtended },
  600. { "\\314"sv, "\314"sv, true, ECMAScriptFlags::BrowserExtended },
  601. { "\\c"sv, "\\c"sv, true, ECMAScriptFlags::BrowserExtended },
  602. { "\\cf"sv, "\06"sv, true, ECMAScriptFlags::BrowserExtended },
  603. { "\\c1"sv, "\\c1"sv, true, ECMAScriptFlags::BrowserExtended },
  604. { "[\\c1]"sv, "\x11"sv, true, ECMAScriptFlags::BrowserExtended },
  605. { "[\\w-\\d]"sv, "-"sv, true, ECMAScriptFlags::BrowserExtended },
  606. { "^(?:^^\\.?|[!+-]|!=|!==|#|%|%=|&|&&|&&=|&=|\\(|\\*|\\*=|\\+=|,|-=|->|\\/|\\/=|:|::|;|<|<<|<<=|<=|=|==|===|>|>=|>>|>>=|>>>|>>>=|[?@[^]|\\^=|\\^\\^|\\^\\^=|{|\\||\\|=|\\|\\||\\|\\|=|~|break|case|continue|delete|do|else|finally|instanceof|return|throw|try|typeof)\\s*(\\/(?=[^*/])(?:[^/[\\\\]|\\\\[\\S\\s]|\\[(?:[^\\\\\\]]|\\\\[\\S\\s])*(?:]|$))+\\/)"sv,
  607. "return /xx/"sv, true, ECMAScriptFlags::BrowserExtended
  608. }, // #5517, appears to be matching JS expressions that involve regular expressions...
  609. { "a{2,}"sv, "aaaa"sv }, // #5518
  610. { "\\0"sv, "\0"sv, true, ECMAScriptFlags::BrowserExtended },
  611. { "\\0"sv, "\0"sv, true, combine_flags(ECMAScriptFlags::Unicode, ECMAScriptFlags::BrowserExtended) },
  612. { "\\01"sv, "\1"sv, true, ECMAScriptFlags::BrowserExtended },
  613. { "[\\0]"sv, "\0"sv, true, ECMAScriptFlags::BrowserExtended },
  614. { "[\\0]"sv, "\0"sv, true, combine_flags(ECMAScriptFlags::Unicode, ECMAScriptFlags::BrowserExtended) },
  615. { "[\\01]"sv, "\1"sv, true, ECMAScriptFlags::BrowserExtended },
  616. { "(\0|a)"sv, "a"sv, true }, // #9686, Should allow null bytes in pattern
  617. { "(.*?)a(?!(a+)b\\2c)\\2(.*)"sv, "baaabaac"sv, true }, // #6042, Groups inside lookarounds may be referenced outside, but their contents appear empty if the pattern in the lookaround fails.
  618. { "a|$"sv, "x"sv, true, (ECMAScriptFlags)regex::AllFlags::Global }, // #11940, Global (not the 'g' flag) regexps should attempt to match the zero-length end of the string too.
  619. { "foo\nbar"sv, "foo\nbar"sv, true }, // #12126, ECMA262 regexp should match literal newlines without the 's' flag.
  620. { "foo[^]bar"sv, "foo\nbar"sv, true }, // #12126, ECMA262 regexp should match newline with [^].
  621. { "^[_A-Z]+$"sv, "_aA"sv, true, ECMAScriptFlags::Insensitive }, // Insensitive lookup table: characters in a range do not necessarily lie in the same range after being converted to lowercase.
  622. { "^[a-sy-z]$"sv, "b"sv, true, ECMAScriptFlags::Insensitive },
  623. { "^[a-sy-z]$"sv, "y"sv, true, ECMAScriptFlags::Insensitive },
  624. { "^[a-sy-z]$"sv, "u"sv, false, ECMAScriptFlags::Insensitive },
  625. { "."sv, "\n\r\u2028\u2029"sv, false }, // Dot should not match any of CR/LF/LS/PS in ECMA262 mode without DotAll.
  626. { "a$"sv, "a\r\n"sv, true, global_multiline.value() }, // $ should accept all LineTerminators in ECMA262 mode with Multiline.
  627. { "^a"sv, "\ra"sv, true, global_multiline.value() },
  628. { "^(.*?):[ \\t]*([^\\r\\n]*)$"sv, "content-length: 488\r\ncontent-type: application/json; charset=utf-8\r\n"sv, true, global_multiline.value() },
  629. { "^\\?((&?category=[0-9]+)?(&?shippable=1)?(&?ad_type=demand)?(&?page=[0-9]+)?(&?locations=(r|d)_[0-9]+)?)+$"sv,
  630. "?category=54&shippable=1&baby_age=p,0,1,3"sv, false }, // ladybird#968, ?+ should not loop forever.
  631. { "([^\\s]+):\\s*([^;]+);"sv, "font-family: 'Inter';"sv, true }, // optimizer bug, blindly accepting inverted char classes [^x] as atomic rewrite opportunities.
  632. { "(a)(?=a*\\1)"sv, "aaaa"sv, true, global_multiline.value() }, // Optimizer bug, ignoring references that weren't bound in the current or past block, ladybird#2281
  633. };
  634. // clang-format on
  635. for (auto& test : tests) {
  636. Regex<ECMA262> re(test.pattern, test.options);
  637. if constexpr (REGEX_DEBUG) {
  638. dbgln("\n");
  639. RegexDebug regex_dbg(stderr);
  640. regex_dbg.print_raw_bytecode(re);
  641. regex_dbg.print_header();
  642. regex_dbg.print_bytecode(re);
  643. dbgln("\n");
  644. }
  645. EXPECT_EQ(re.parser_result.error, regex::Error::NoError);
  646. EXPECT_EQ(re.match(test.subject).success, test.matches);
  647. }
  648. }
  649. TEST_CASE(ECMA262_unicode_match)
  650. {
  651. constexpr auto space_and_line_terminator_code_points = Array { 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x0020, 0x00A0, 0x1680, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x2028, 0x2029, 0x202F, 0x205F, 0x3000, 0xFEFF };
  652. StringBuilder builder;
  653. for (u32 code_point : space_and_line_terminator_code_points)
  654. builder.append_code_point(code_point);
  655. auto space_and_line_terminators = builder.to_byte_string();
  656. struct _test {
  657. StringView pattern;
  658. StringView subject;
  659. bool matches { true };
  660. ECMAScriptFlags options {};
  661. };
  662. _test tests[] {
  663. { "\xf0\x9d\x8c\x86"sv, "abcdef"sv, false, ECMAScriptFlags::Unicode },
  664. { "[\xf0\x9d\x8c\x86]"sv, "abcdef"sv, false, ECMAScriptFlags::Unicode },
  665. { "\\ud83d"sv, "😀"sv, true },
  666. { "\\ud83d"sv, "😀"sv, false, ECMAScriptFlags::Unicode },
  667. { "\\ude00"sv, "😀"sv, true },
  668. { "\\ude00"sv, "😀"sv, false, ECMAScriptFlags::Unicode },
  669. { "\\ud83d\\ude00"sv, "😀"sv, true },
  670. { "\\ud83d\\ude00"sv, "😀"sv, true, ECMAScriptFlags::Unicode },
  671. { "\\u{1f600}"sv, "😀"sv, true, ECMAScriptFlags::Unicode },
  672. { "\\ud83d\\ud83d"sv, "\xed\xa0\xbd\xed\xa0\xbd"sv, true },
  673. { "\\ud83d\\ud83d"sv, "\xed\xa0\xbd\xed\xa0\xbd"sv, true, ECMAScriptFlags::Unicode },
  674. { "(?<=.{3})f"sv, "abcdef"sv, true, ECMAScriptFlags::Unicode },
  675. { "(?<=.{3})f"sv, "abc😀ef"sv, true, ECMAScriptFlags::Unicode },
  676. { "(?<𝓑𝓻𝓸𝔀𝓷>brown)"sv, "brown"sv, true, ECMAScriptFlags::Unicode },
  677. { "(?<\\u{1d4d1}\\u{1d4fb}\\u{1d4f8}\\u{1d500}\\u{1d4f7}>brown)"sv, "brown"sv, true, ECMAScriptFlags::Unicode },
  678. { "(?<\\ud835\\udcd1\\ud835\\udcfb\\ud835\\udcf8\\ud835\\udd00\\ud835\\udcf7>brown)"sv, "brown"sv, true, ECMAScriptFlags::Unicode },
  679. { "^\\s+$"sv, space_and_line_terminators },
  680. { "^\\s+$"sv, space_and_line_terminators, true, ECMAScriptFlags::Unicode },
  681. { "[\\u0390]"sv, "\u1fd3"sv, false, ECMAScriptFlags::Unicode },
  682. { "[\\u1fd3]"sv, "\u0390"sv, false, ECMAScriptFlags::Unicode },
  683. { "[\\u0390]"sv, "\u1fd3"sv, true, combine_flags(ECMAScriptFlags::Unicode, ECMAScriptFlags::Insensitive) },
  684. { "[\\u1fd3]"sv, "\u0390"sv, true, combine_flags(ECMAScriptFlags::Unicode, ECMAScriptFlags::Insensitive) },
  685. { "[\\u03b0]"sv, "\u1fe3"sv, false, ECMAScriptFlags::Unicode },
  686. { "[\\u1fe3]"sv, "\u03b0"sv, false, ECMAScriptFlags::Unicode },
  687. { "[\\u03b0]"sv, "\u1fe3"sv, true, combine_flags(ECMAScriptFlags::Unicode, ECMAScriptFlags::Insensitive) },
  688. { "[\\u1fe3]"sv, "\u03b0"sv, true, combine_flags(ECMAScriptFlags::Unicode, ECMAScriptFlags::Insensitive) },
  689. { "[\\ufb05]"sv, "\ufb06"sv, false, ECMAScriptFlags::Unicode },
  690. { "[\\ufb06]"sv, "\ufb05"sv, false, ECMAScriptFlags::Unicode },
  691. { "[\\ufb05]"sv, "\ufb06"sv, true, combine_flags(ECMAScriptFlags::Unicode, ECMAScriptFlags::Insensitive) },
  692. { "[\\ufb06]"sv, "\ufb05"sv, true, combine_flags(ECMAScriptFlags::Unicode, ECMAScriptFlags::Insensitive) },
  693. };
  694. for (auto& test : tests) {
  695. Regex<ECMA262> re(test.pattern, (ECMAScriptFlags)regex::AllFlags::Global | test.options);
  696. auto subject = MUST(AK::utf8_to_utf16(test.subject));
  697. Utf16View view { subject };
  698. if constexpr (REGEX_DEBUG) {
  699. dbgln("\n");
  700. RegexDebug regex_dbg(stderr);
  701. regex_dbg.print_raw_bytecode(re);
  702. regex_dbg.print_header();
  703. regex_dbg.print_bytecode(re);
  704. dbgln("\n");
  705. }
  706. EXPECT_EQ(re.parser_result.error, regex::Error::NoError);
  707. EXPECT_EQ(re.match(view).success, test.matches);
  708. }
  709. }
  710. TEST_CASE(ECMA262_unicode_sets_parser_error)
  711. {
  712. struct _test {
  713. StringView pattern;
  714. regex::Error error;
  715. };
  716. constexpr _test tests[] {
  717. { "[[]"sv, regex::Error::InvalidPattern },
  718. { "[[x[]]]"sv, regex::Error::NoError }, // #23691, should not crash on empty charclass within AndOr.
  719. };
  720. for (auto test : tests) {
  721. Regex<ECMA262> re(test.pattern, (ECMAScriptFlags)regex::AllFlags::UnicodeSets);
  722. EXPECT_EQ(re.parser_result.error, test.error);
  723. }
  724. }
  725. TEST_CASE(ECMA262_unicode_sets_match)
  726. {
  727. struct _test {
  728. StringView pattern;
  729. StringView subject;
  730. bool matches { true };
  731. ECMAScriptFlags options {};
  732. };
  733. constexpr _test tests[] {
  734. { "[\\w--x]"sv, "x"sv, false },
  735. { "[\\w&&x]"sv, "y"sv, false },
  736. { "[\\w--x]"sv, "y"sv, true },
  737. { "[\\w&&x]"sv, "x"sv, true },
  738. { "[[0-9\\w]--x--6]"sv, "6"sv, false },
  739. { "[[0-9\\w]--x--6]"sv, "x"sv, false },
  740. { "[[0-9\\w]--x--6]"sv, "y"sv, true },
  741. { "[[0-9\\w]--x--6]"sv, "9"sv, true },
  742. { "[\\w&&\\d]"sv, "a"sv, false },
  743. { "[\\w&&\\d]"sv, "4"sv, true },
  744. };
  745. for (auto& test : tests) {
  746. Regex<ECMA262> re(test.pattern, (ECMAScriptFlags)regex::AllFlags::UnicodeSets | test.options);
  747. if constexpr (REGEX_DEBUG) {
  748. dbgln("\n");
  749. RegexDebug regex_dbg(stderr);
  750. regex_dbg.print_raw_bytecode(re);
  751. regex_dbg.print_header();
  752. regex_dbg.print_bytecode(re);
  753. dbgln("\n");
  754. }
  755. EXPECT_EQ(re.parser_result.error, regex::Error::NoError);
  756. auto result = re.match(test.subject).success;
  757. EXPECT_EQ(result, test.matches);
  758. }
  759. }
  760. TEST_CASE(ECMA262_property_match)
  761. {
  762. struct _test {
  763. StringView pattern;
  764. StringView subject;
  765. bool matches { true };
  766. ECMAScriptFlags options {};
  767. };
  768. constexpr _test tests[] {
  769. { "\\p{ASCII}"sv, "a"sv, false },
  770. { "\\p{ASCII}"sv, "p{ASCII}"sv, true },
  771. { "\\p{ASCII}"sv, "a"sv, true, ECMAScriptFlags::Unicode },
  772. { "\\p{ASCII}"sv, "😀"sv, false, ECMAScriptFlags::Unicode },
  773. { "\\P{ASCII}"sv, "a"sv, false, ECMAScriptFlags::Unicode },
  774. { "\\P{ASCII}"sv, "😀"sv, true, ECMAScriptFlags::Unicode },
  775. { "\\p{ASCII_Hex_Digit}"sv, "1"sv, true, ECMAScriptFlags::Unicode },
  776. { "\\p{ASCII_Hex_Digit}"sv, "a"sv, true, ECMAScriptFlags::Unicode },
  777. { "\\p{ASCII_Hex_Digit}"sv, "x"sv, false, ECMAScriptFlags::Unicode },
  778. { "\\P{ASCII_Hex_Digit}"sv, "1"sv, false, ECMAScriptFlags::Unicode },
  779. { "\\P{ASCII_Hex_Digit}"sv, "a"sv, false, ECMAScriptFlags::Unicode },
  780. { "\\P{ASCII_Hex_Digit}"sv, "x"sv, true, ECMAScriptFlags::Unicode },
  781. { "\\p{Any}"sv, "\xcd\xb8"sv, true, ECMAScriptFlags::Unicode }, // U+0378, which is an unassigned code point.
  782. { "\\P{Any}"sv, "\xcd\xb8"sv, false, ECMAScriptFlags::Unicode }, // U+0378, which is an unassigned code point.
  783. { "\\p{Assigned}"sv, "\xcd\xb8"sv, false, ECMAScriptFlags::Unicode }, // U+0378, which is an unassigned code point.
  784. { "\\P{Assigned}"sv, "\xcd\xb8"sv, true, ECMAScriptFlags::Unicode }, // U+0378, which is an unassigned code point.
  785. { "\\p{Lu}"sv, "a"sv, false, ECMAScriptFlags::Unicode },
  786. { "\\p{Lu}"sv, "A"sv, true, ECMAScriptFlags::Unicode },
  787. { "\\p{Lu}"sv, "9"sv, false, ECMAScriptFlags::Unicode },
  788. { "\\p{Cased_Letter}"sv, "a"sv, true, ECMAScriptFlags::Unicode },
  789. { "\\p{Cased_Letter}"sv, "A"sv, true, ECMAScriptFlags::Unicode },
  790. { "\\p{Cased_Letter}"sv, "9"sv, false, ECMAScriptFlags::Unicode },
  791. { "\\P{Cased_Letter}"sv, "a"sv, false, ECMAScriptFlags::Unicode },
  792. { "\\P{Cased_Letter}"sv, "A"sv, false, ECMAScriptFlags::Unicode },
  793. { "\\P{Cased_Letter}"sv, "9"sv, true, ECMAScriptFlags::Unicode },
  794. { "\\p{General_Category=Cased_Letter}"sv, "a"sv, true, ECMAScriptFlags::Unicode },
  795. { "\\p{General_Category=Cased_Letter}"sv, "A"sv, true, ECMAScriptFlags::Unicode },
  796. { "\\p{General_Category=Cased_Letter}"sv, "9"sv, false, ECMAScriptFlags::Unicode },
  797. { "\\p{gc=Cased_Letter}"sv, "a"sv, true, ECMAScriptFlags::Unicode },
  798. { "\\p{gc=Cased_Letter}"sv, "A"sv, true, ECMAScriptFlags::Unicode },
  799. { "\\p{gc=Cased_Letter}"sv, "9"sv, false, ECMAScriptFlags::Unicode },
  800. { "\\p{Script=Latin}"sv, "a"sv, true, ECMAScriptFlags::Unicode },
  801. { "\\p{Script=Latin}"sv, "A"sv, true, ECMAScriptFlags::Unicode },
  802. { "\\p{Script=Latin}"sv, "9"sv, false, ECMAScriptFlags::Unicode },
  803. { "\\p{sc=Latin}"sv, "a"sv, true, ECMAScriptFlags::Unicode },
  804. { "\\p{sc=Latin}"sv, "A"sv, true, ECMAScriptFlags::Unicode },
  805. { "\\p{sc=Latin}"sv, "9"sv, false, ECMAScriptFlags::Unicode },
  806. { "\\p{Script_Extensions=Deva}"sv, "a"sv, false, ECMAScriptFlags::Unicode },
  807. { "\\p{Script_Extensions=Beng}"sv, "\xe1\xb3\x95"sv, true, ECMAScriptFlags::Unicode }, // U+01CD5
  808. { "\\p{Script_Extensions=Deva}"sv, "\xe1\xb3\x95"sv, true, ECMAScriptFlags::Unicode }, // U+01CD5
  809. { "\\p{scx=Deva}"sv, "a"sv, false, ECMAScriptFlags::Unicode },
  810. { "\\p{scx=Beng}"sv, "\xe1\xb3\x95"sv, true, ECMAScriptFlags::Unicode }, // U+01CD5
  811. { "\\p{scx=Deva}"sv, "\xe1\xb3\x95"sv, true, ECMAScriptFlags::Unicode }, // U+01CD5
  812. };
  813. for (auto& test : tests) {
  814. Regex<ECMA262> re(test.pattern, (ECMAScriptFlags)regex::AllFlags::Global | regex::ECMAScriptFlags::BrowserExtended | test.options);
  815. auto subject = MUST(AK::utf8_to_utf16(test.subject));
  816. Utf16View view { subject };
  817. if constexpr (REGEX_DEBUG) {
  818. dbgln("\n");
  819. RegexDebug regex_dbg(stderr);
  820. regex_dbg.print_raw_bytecode(re);
  821. regex_dbg.print_header();
  822. regex_dbg.print_bytecode(re);
  823. dbgln("\n");
  824. }
  825. EXPECT_EQ(re.parser_result.error, regex::Error::NoError);
  826. EXPECT_EQ(re.match(view).success, test.matches);
  827. }
  828. }
  829. TEST_CASE(replace)
  830. {
  831. struct _test {
  832. StringView pattern;
  833. StringView replacement;
  834. StringView subject;
  835. StringView expected;
  836. ECMAScriptFlags options {};
  837. };
  838. constexpr _test tests[] {
  839. { "foo(.+)"sv, "aaa"sv, "test"sv, "test"sv },
  840. { "foo(.+)"sv, "test\\1"sv, "foobar"sv, "testbar"sv },
  841. { "foo(.+)"sv, "\\2\\1"sv, "foobar"sv, "\\2bar"sv },
  842. { "foo(.+)"sv, "\\\\\\1"sv, "foobar"sv, "\\bar"sv },
  843. { "foo(.)"sv, "a\\1"sv, "fooxfooy"sv, "axay"sv, ECMAScriptFlags::Multiline },
  844. };
  845. for (auto& test : tests) {
  846. Regex<ECMA262> re(test.pattern, test.options);
  847. if constexpr (REGEX_DEBUG) {
  848. dbgln("\n");
  849. RegexDebug regex_dbg(stderr);
  850. regex_dbg.print_raw_bytecode(re);
  851. regex_dbg.print_header();
  852. regex_dbg.print_bytecode(re);
  853. dbgln("\n");
  854. }
  855. EXPECT_EQ(re.parser_result.error, regex::Error::NoError);
  856. EXPECT_EQ(re.replace(test.subject, test.replacement), test.expected);
  857. }
  858. }
  859. TEST_CASE(case_insensitive_match)
  860. {
  861. Regex<PosixExtended> re("cd", PosixFlags::Insensitive | PosixFlags::Global);
  862. auto result = re.match("AEKFCD"sv);
  863. EXPECT_EQ(result.success, true);
  864. if (result.success) {
  865. EXPECT_EQ(result.matches.at(0).column, 4ul);
  866. }
  867. }
  868. TEST_CASE(extremely_long_fork_chain)
  869. {
  870. Regex<ECMA262> re("(?:aa)*");
  871. auto result = re.match(ByteString::repeated('a', 1000));
  872. EXPECT_EQ(result.success, true);
  873. }
  874. TEST_CASE(theoretically_infinite_loop)
  875. {
  876. Array patterns {
  877. "(a*)*"sv, // Infinitely matching empty substrings, the outer loop should short-circuit.
  878. "(a*?)*"sv, // Infinitely matching empty substrings, the outer loop should short-circuit.
  879. "(a*)*?"sv, // Should match exactly nothing.
  880. "(?:)*?"sv, // Should not generate an infinite fork loop.
  881. "(a?)+$"sv, // Infinitely matching empty strings, but with '+' instead of '*'.
  882. };
  883. for (auto& pattern : patterns) {
  884. Regex<ECMA262> re(pattern);
  885. auto result = re.match(""sv);
  886. EXPECT_EQ(result.success, true);
  887. }
  888. }
  889. static auto g_lots_of_a_s = ByteString::repeated('a', 10'000'000);
  890. BENCHMARK_CASE(fork_performance)
  891. {
  892. Regex<ECMA262> re("(?:aa)*");
  893. auto result = re.match(g_lots_of_a_s);
  894. EXPECT_EQ(result.success, true);
  895. }
  896. BENCHMARK_CASE(anchor_performance)
  897. {
  898. Regex<ECMA262> re("^b");
  899. for (auto i = 0; i < 100'000; i++) {
  900. auto result = re.match(g_lots_of_a_s);
  901. EXPECT_EQ(result.success, false);
  902. }
  903. }
  904. TEST_CASE(optimizer_atomic_groups)
  905. {
  906. Array tests {
  907. // Fork -> ForkReplace
  908. Tuple { "a*b"sv, "aaaaa"sv, false },
  909. Tuple { "a+b"sv, "aaaaa"sv, false },
  910. Tuple { "\\\\(\\d+)"sv, "\\\\"sv, false }, // Rewrite bug turning a+ to a*, see #10952.
  911. Tuple { "[a-z.]+\\."sv, "..."sv, true }, // Rewrite bug, incorrect interpretation of Compare.
  912. Tuple { "[.-]+\\."sv, ".-."sv, true },
  913. // Alternative fuse
  914. Tuple { "(abcfoo|abcbar|abcbaz).*x"sv, "abcbarx"sv, true },
  915. Tuple { "(a|a)"sv, "a"sv, true },
  916. Tuple { "(a|)"sv, ""sv, true }, // Ensure that empty alternatives are not outright removed
  917. Tuple { "a{2,3}|a{5,8}"sv, "abc"sv, false }, // Optimizer should not mess up the instruction stream by ignoring inter-insn dependencies, see #11247.
  918. Tuple { "^(a{2,3}|a{5,8})$"sv, "aaaa"sv, false }, // Optimizer should not mess up the instruction stream by ignoring inter-insn dependencies, see #11247.
  919. // Optimizer should not chop off *half* of an instruction when fusing instructions.
  920. Tuple { "cubic-bezier\\(\\s*(-?\\d+\\.?\\d*|-?\\.\\d+)\\s*,\\s*(-?\\d+\\.?\\d*|-?\\.\\d+)\\s*,\\s*(-?\\d+\\.?\\d*|-?\\.\\d+)\\s*,\\s*(-?\\d+\\.?\\d*|-?\\.\\d+)\\s*\\)"sv, "cubic-bezier(.05, 0, 0, 1)"sv, true },
  921. // ForkReplace shouldn't be applied where it would change the semantics
  922. Tuple { "(1+)\\1"sv, "11"sv, true },
  923. Tuple { "(1+)1"sv, "11"sv, true },
  924. Tuple { "(1+)0"sv, "10"sv, true },
  925. // Rewrite should not skip over first required iteration of <x>+.
  926. Tuple { "a+"sv, ""sv, false },
  927. // 'y' and [^x] have an overlap ('y'), the loop should not be rewritten here.
  928. Tuple { "[^x]+y"sv, "ay"sv, true },
  929. // .+ should not be rewritten here, as it's followed by something that would be matched by `.`.
  930. Tuple { ".+(a|b|c)"sv, "xxa"sv, true },
  931. // (b+)(b+) produces an intermediate block with no matching ops, the optimiser should ignore that block when looking for following matches and correctly detect the overlap between (b+) and (b+).
  932. // note that the second loop may be rewritten to a ForkReplace, but the first loop should not be rewritten.
  933. Tuple { "(b+)(b+)"sv, "bbb"sv, true },
  934. // Don't treat [\S] as [\s]; see ladybird#2296.
  935. Tuple { "([^\\s]+?)\\(([\\s\\S]*)\\)"sv, "a(b)"sv, true },
  936. };
  937. for (auto& test : tests) {
  938. Regex<ECMA262> re(test.get<0>());
  939. auto result = re.match(test.get<1>());
  940. EXPECT_EQ(result.success, test.get<2>());
  941. }
  942. }
  943. TEST_CASE(optimizer_char_class_lut)
  944. {
  945. Regex<ECMA262> re(R"([\f\n\r\t\v\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000\ufeff]+$)");
  946. if constexpr (REGEX_DEBUG) {
  947. dbgln("\n");
  948. RegexDebug regex_dbg(stderr);
  949. regex_dbg.print_raw_bytecode(re);
  950. regex_dbg.print_header();
  951. regex_dbg.print_bytecode(re);
  952. dbgln("\n");
  953. }
  954. // This will go through _all_ alternatives in the character class, and then fail.
  955. for (size_t i = 0; i < 1'000'000; ++i)
  956. EXPECT_EQ(re.match("1635488940000"sv).success, false);
  957. }
  958. TEST_CASE(optimizer_alternation)
  959. {
  960. Array tests {
  961. // Pattern, Subject, Expected length
  962. Tuple { "a|"sv, "a"sv, 1u },
  963. Tuple { "a|a|a|a|a|a|a|a|a|b"sv, "a"sv, 1u },
  964. Tuple { "ab|ac|ad|bc"sv, "bc"sv, 2u },
  965. // Should not crash on backwards jumps introduced by '.*'.
  966. Tuple { "\\bDroid\\b.*Build|XT912|XT928|XT926|XT915|XT919|XT925|XT1021|\\bMoto E\\b|XT1068|XT1092|XT1052"sv, "XT1068"sv, 6u },
  967. // Backwards jumps to IP 0 are normal jumps too.
  968. Tuple { "^(\\d+|x)"sv, "42"sv, 2u },
  969. // `Repeat' does not add its insn size to the jump target.
  970. Tuple { "[0-9]{2}|[0-9]"sv, "92"sv, 2u },
  971. };
  972. for (auto& test : tests) {
  973. Regex<ECMA262> re(test.get<0>());
  974. auto result = re.match(test.get<1>());
  975. EXPECT(result.success);
  976. EXPECT_EQ(result.matches.first().view.length(), test.get<2>());
  977. }
  978. }
  979. TEST_CASE(start_anchor)
  980. {
  981. // Ensure that a circumflex at the start only matches the start of the line.
  982. {
  983. Regex<PosixBasic> re("^abc");
  984. EXPECT_EQ(re.match("123abcdef"sv, PosixFlags::Global).success, false);
  985. EXPECT_EQ(re.match("abc123"sv, PosixFlags::Global).success, true);
  986. EXPECT_EQ(re.match("123^abcdef"sv, PosixFlags::Global).success, false);
  987. EXPECT_EQ(re.match("^abc123"sv, PosixFlags::Global).success, false);
  988. // Multiple lines
  989. EXPECT_EQ(re.match("123\nabc"sv, PosixFlags::Multiline).success, true);
  990. }
  991. }
  992. TEST_CASE(posix_basic_dollar_is_end_anchor)
  993. {
  994. // Ensure that a dollar sign at the end only matches the end of the line.
  995. {
  996. Regex<PosixBasic> re("abc$");
  997. EXPECT_EQ(re.match("123abcdef"sv, PosixFlags::Global).success, false);
  998. EXPECT_EQ(re.match("123abc"sv, PosixFlags::Global).success, true);
  999. EXPECT_EQ(re.match("123abc$def"sv, PosixFlags::Global).success, false);
  1000. EXPECT_EQ(re.match("123abc$"sv, PosixFlags::Global).success, false);
  1001. }
  1002. }
  1003. TEST_CASE(posix_basic_dollar_is_literal)
  1004. {
  1005. // Ensure that a dollar sign in the middle is treated as a literal.
  1006. {
  1007. Regex<PosixBasic> re("abc$d");
  1008. EXPECT_EQ(re.match("123abcdef"sv, PosixFlags::Global).success, false);
  1009. EXPECT_EQ(re.match("123abc"sv, PosixFlags::Global).success, false);
  1010. EXPECT_EQ(re.match("123abc$def"sv, PosixFlags::Global).success, true);
  1011. EXPECT_EQ(re.match("123abc$"sv, PosixFlags::Global).success, false);
  1012. }
  1013. // Ensure that a dollar sign is always treated as a literal if escaped, even if at the end of the pattern.
  1014. {
  1015. Regex<PosixBasic> re("abc\\$");
  1016. EXPECT_EQ(re.match("123abcdef"sv, PosixFlags::Global).success, false);
  1017. EXPECT_EQ(re.match("123abc"sv, PosixFlags::Global).success, false);
  1018. EXPECT_EQ(re.match("123abc$def"sv, PosixFlags::Global).success, true);
  1019. EXPECT_EQ(re.match("123abc$"sv, PosixFlags::Global).success, true);
  1020. }
  1021. }
  1022. TEST_CASE(negative_lookahead)
  1023. {
  1024. {
  1025. // Negative lookahead with more than 2 forks difference between lookahead init and finish.
  1026. auto options = ECMAScriptOptions { ECMAScriptFlags::Global };
  1027. options.reset_flag((ECMAScriptFlags)regex::AllFlags::Internal_Stateful);
  1028. Regex<ECMA262> re(":(?!\\^\\)|1)", options);
  1029. EXPECT_EQ(re.match(":^)"sv).success, false);
  1030. EXPECT_EQ(re.match(":1"sv).success, false);
  1031. EXPECT_EQ(re.match(":foobar"sv).success, true);
  1032. }
  1033. {
  1034. // Correctly count forks with nested groups and optimised loops
  1035. Regex<ECMA262> re("^((?:[^\\n]|\\n(?! *\\n))+)(?:\\n *)+\\n");
  1036. EXPECT_EQ(re.match("foo\n\n"sv).success, true);
  1037. EXPECT_EQ(re.match("foo\n"sv).success, false);
  1038. }
  1039. }
  1040. TEST_CASE(single_match_flag)
  1041. {
  1042. {
  1043. // Ensure that only a single match is produced and nothing past that.
  1044. Regex<ECMA262> re("[\\u0008-\\uffff]"sv, ECMAScriptFlags::Global | (ECMAScriptFlags)regex::AllFlags::SingleMatch);
  1045. auto result = re.match("ABC"sv);
  1046. EXPECT_EQ(result.success, true);
  1047. EXPECT_EQ(result.matches.size(), 1u);
  1048. EXPECT_EQ(result.matches.first().view.to_byte_string(), "A"sv);
  1049. }
  1050. }
  1051. TEST_CASE(empty_string_wildcard_match)
  1052. {
  1053. {
  1054. // Ensure that the wildcard ".*" matches the empty string exactly once
  1055. Regex<ECMA262> re(".*"sv, ECMAScriptFlags::Global);
  1056. auto result = re.match(""sv);
  1057. EXPECT_EQ(result.success, true);
  1058. EXPECT_EQ(result.matches.size(), 1u);
  1059. EXPECT_EQ(result.matches.first().view.to_byte_string(), ""sv);
  1060. }
  1061. }
  1062. TEST_CASE(inversion_state_in_char_class)
  1063. {
  1064. {
  1065. // #13755, /[\S\s]/.exec("hello") should be [ "h" ], not null.
  1066. Regex<ECMA262> re("[\\S\\s]", ECMAScriptFlags::Global | (ECMAScriptFlags)regex::AllFlags::SingleMatch);
  1067. auto result = re.match("hello"sv);
  1068. EXPECT_EQ(result.success, true);
  1069. EXPECT_EQ(result.matches.size(), 1u);
  1070. EXPECT_EQ(result.matches.first().view.to_byte_string(), "h"sv);
  1071. }
  1072. {
  1073. Regex<ECMA262> re("^(?:([^\\s!\"#%-,\\./;->@\\[-\\^`\\{-~]+(?=([=~}\\s/.)|]))))"sv, ECMAScriptFlags::Global);
  1074. auto result = re.match("slideNumbers}}"sv);
  1075. EXPECT_EQ(result.success, true);
  1076. EXPECT_EQ(result.matches.size(), 1u);
  1077. EXPECT_EQ(result.matches.first().view.to_byte_string(), "slideNumbers"sv);
  1078. EXPECT_EQ(result.capture_group_matches.first()[0].view.to_byte_string(), "slideNumbers"sv);
  1079. EXPECT_EQ(result.capture_group_matches.first()[1].view.to_byte_string(), "}"sv);
  1080. }
  1081. {
  1082. // #21786, /[^\S\n]/.exec("\n") should be null, not [ "\n" ].
  1083. // This was a general confusion between the inversion state and the negation state (temp inverse).
  1084. Regex<ECMA262> re("[^\\S\\n]", ECMAScriptFlags::Global | (ECMAScriptFlags)regex::AllFlags::SingleMatch);
  1085. auto result = re.match("\n"sv);
  1086. EXPECT_EQ(result.success, false);
  1087. }
  1088. }
  1089. TEST_CASE(mismatching_brackets)
  1090. {
  1091. auto const test_cases = Array {
  1092. "["sv,
  1093. "[ -"sv,
  1094. };
  1095. for (auto const& test_case : test_cases) {
  1096. Regex<ECMA262> re(test_case);
  1097. EXPECT_EQ(re.parser_result.error, regex::Error::MismatchingBracket);
  1098. }
  1099. }