StrUtils.js 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590
  1. import Utils from "../Utils.js";
  2. /**
  3. * String utility operations.
  4. *
  5. * @author n1474335 [n1474335@gmail.com]
  6. * @copyright Crown Copyright 2016
  7. * @license Apache-2.0
  8. *
  9. * @namespace
  10. */
  11. const StrUtils = {
  12. /**
  13. * @constant
  14. * @default
  15. */
  16. REGEX_PRE_POPULATE: [
  17. {
  18. name: "User defined",
  19. value: ""
  20. },
  21. {
  22. name: "IPv4 address",
  23. value: "(?:(?:\\d|[01]?\\d\\d|2[0-4]\\d|25[0-5])\\.){3}(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d|\\d)(?:\\/\\d{1,2})?"
  24. },
  25. {
  26. name: "IPv6 address",
  27. value: "((?=.*::)(?!.*::.+::)(::)?([\\dA-Fa-f]{1,4}:(:|\\b)|){5}|([\\dA-Fa-f]{1,4}:){6})((([\\dA-Fa-f]{1,4}((?!\\3)::|:\\b|(?![\\dA-Fa-f])))|(?!\\2\\3)){2}|(((2[0-4]|1\\d|[1-9])?\\d|25[0-5])\\.?\\b){4})"
  28. },
  29. {
  30. name: "Email address",
  31. value: "(\\w[-.\\w]*)@([-\\w]+(?:\\.[-\\w]+)*)\\.([A-Za-z]{2,4})"
  32. },
  33. {
  34. name: "URL",
  35. value: "([A-Za-z]+://)([-\\w]+(?:\\.\\w[-\\w]*)+)(:\\d+)?(/[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]*(?:[.!,?]+[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]+)*)?"
  36. },
  37. {
  38. name: "Domain",
  39. value: "\\b((?=[a-z0-9-]{1,63}\\.)(xn--)?[a-z0-9]+(-[a-z0-9]+)*\\.)+[a-z]{2,63}\\b"
  40. },
  41. {
  42. name: "Windows file path",
  43. value: "([A-Za-z]):\\\\((?:[A-Za-z\\d][A-Za-z\\d\\- \\x27_\\(\\)]{0,61}\\\\?)*[A-Za-z\\d][A-Za-z\\d\\- \\x27_\\(\\)]{0,61})(\\.[A-Za-z\\d]{1,6})?"
  44. },
  45. {
  46. name: "UNIX file path",
  47. value: "(?:/[A-Za-z\\d.][A-Za-z\\d\\-.]{0,61})+"
  48. },
  49. {
  50. name: "MAC address",
  51. value: "[A-Fa-f\\d]{2}(?:[:-][A-Fa-f\\d]{2}){5}"
  52. },
  53. {
  54. name: "Date (yyyy-mm-dd)",
  55. value: "((?:19|20)\\d\\d)[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])"
  56. },
  57. {
  58. name: "Date (dd/mm/yyyy)",
  59. value: "(0[1-9]|[12][0-9]|3[01])[- /.](0[1-9]|1[012])[- /.]((?:19|20)\\d\\d)"
  60. },
  61. {
  62. name: "Date (mm/dd/yyyy)",
  63. value: "(0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])[- /.]((?:19|20)\\d\\d)"
  64. },
  65. {
  66. name: "Strings",
  67. value: "[A-Za-z\\d/\\-:.,_$%\\x27\"()<>= !\\[\\]{}@]{4,}"
  68. },
  69. ],
  70. /**
  71. * @constant
  72. * @default
  73. */
  74. REGEX_CASE_INSENSITIVE: true,
  75. /**
  76. * @constant
  77. * @default
  78. */
  79. REGEX_MULTILINE_MATCHING: true,
  80. /**
  81. * @constant
  82. * @default
  83. */
  84. OUTPUT_FORMAT: ["Highlight matches", "List matches", "List capture groups", "List matches with capture groups"],
  85. /**
  86. * @constant
  87. * @default
  88. */
  89. DISPLAY_TOTAL: false,
  90. /**
  91. * Regular expression operation.
  92. *
  93. * @param {string} input
  94. * @param {Object[]} args
  95. * @returns {html}
  96. */
  97. runRegex: function(input, args) {
  98. let userRegex = args[1],
  99. i = args[2],
  100. m = args[3],
  101. displayTotal = args[4],
  102. outputFormat = args[5],
  103. modifiers = "g";
  104. if (i) modifiers += "i";
  105. if (m) modifiers += "m";
  106. if (userRegex && userRegex !== "^" && userRegex !== "$") {
  107. try {
  108. const regex = new RegExp(userRegex, modifiers);
  109. switch (outputFormat) {
  110. case "Highlight matches":
  111. return StrUtils._regexHighlight(input, regex, displayTotal);
  112. case "List matches":
  113. return Utils.escapeHtml(StrUtils._regexList(input, regex, displayTotal, true, false));
  114. case "List capture groups":
  115. return Utils.escapeHtml(StrUtils._regexList(input, regex, displayTotal, false, true));
  116. case "List matches with capture groups":
  117. return Utils.escapeHtml(StrUtils._regexList(input, regex, displayTotal, true, true));
  118. default:
  119. return "Error: Invalid output format";
  120. }
  121. } catch (err) {
  122. return "Invalid regex. Details: " + err.message;
  123. }
  124. } else {
  125. return Utils.escapeHtml(input);
  126. }
  127. },
  128. /**
  129. * @constant
  130. * @default
  131. */
  132. CASE_SCOPE: ["All", "Word", "Sentence", "Paragraph"],
  133. /**
  134. * To Upper case operation.
  135. *
  136. * @param {string} input
  137. * @param {Object[]} args
  138. * @returns {string}
  139. */
  140. runUpper: function (input, args) {
  141. const scope = args[0];
  142. switch (scope) {
  143. case "Word":
  144. return input.replace(/(\b\w)/gi, function(m) {
  145. return m.toUpperCase();
  146. });
  147. case "Sentence":
  148. return input.replace(/(?:\.|^)\s*(\b\w)/gi, function(m) {
  149. return m.toUpperCase();
  150. });
  151. case "Paragraph":
  152. return input.replace(/(?:\n|^)\s*(\b\w)/gi, function(m) {
  153. return m.toUpperCase();
  154. });
  155. case "All":
  156. /* falls through */
  157. default:
  158. return input.toUpperCase();
  159. }
  160. },
  161. /**
  162. * To Upper case operation.
  163. *
  164. * @param {string} input
  165. * @param {Object[]} args
  166. * @returns {string}
  167. */
  168. runLower: function (input, args) {
  169. return input.toLowerCase();
  170. },
  171. /**
  172. * @constant
  173. * @default
  174. */
  175. SEARCH_TYPE: ["Regex", "Extended (\\n, \\t, \\x...)", "Simple string"],
  176. /**
  177. * @constant
  178. * @default
  179. */
  180. FIND_REPLACE_GLOBAL: true,
  181. /**
  182. * @constant
  183. * @default
  184. */
  185. FIND_REPLACE_CASE: false,
  186. /**
  187. * @constant
  188. * @default
  189. */
  190. FIND_REPLACE_MULTILINE: true,
  191. /**
  192. * Find / Replace operation.
  193. *
  194. * @param {string} input
  195. * @param {Object[]} args
  196. * @returns {string}
  197. */
  198. runFindReplace: function(input, args) {
  199. let find = args[0].string,
  200. type = args[0].option,
  201. replace = args[1],
  202. g = args[2],
  203. i = args[3],
  204. m = args[4],
  205. modifiers = "";
  206. if (g) modifiers += "g";
  207. if (i) modifiers += "i";
  208. if (m) modifiers += "m";
  209. if (type === "Regex") {
  210. find = new RegExp(find, modifiers);
  211. return input.replace(find, replace);
  212. }
  213. if (type.indexOf("Extended") === 0) {
  214. find = Utils.parseEscapedChars(find);
  215. }
  216. find = new RegExp(Utils.escapeRegex(find), modifiers);
  217. return input.replace(find, replace);
  218. },
  219. /**
  220. * @constant
  221. * @default
  222. */
  223. SPLIT_DELIM: ",",
  224. /**
  225. * @constant
  226. * @default
  227. */
  228. DELIMITER_OPTIONS: ["Line feed", "CRLF", "Space", "Comma", "Semi-colon", "Colon", "Nothing (separate chars)"],
  229. /**
  230. * Split operation.
  231. *
  232. * @param {string} input
  233. * @param {Object[]} args
  234. * @returns {string}
  235. */
  236. runSplit: function(input, args) {
  237. let splitDelim = args[0] || StrUtils.SPLIT_DELIM,
  238. joinDelim = Utils.charRep[args[1]],
  239. sections = input.split(splitDelim);
  240. return sections.join(joinDelim);
  241. },
  242. /**
  243. * Filter operation.
  244. *
  245. * @author Mikescher (https://github.com/Mikescher | https://mikescher.com)
  246. * @param {string} input
  247. * @param {Object[]} args
  248. * @returns {string}
  249. */
  250. runFilter: function(input, args) {
  251. let delim = Utils.charRep[args[0]],
  252. regex,
  253. reverse = args[2];
  254. try {
  255. regex = new RegExp(args[1]);
  256. } catch (err) {
  257. return "Invalid regex. Details: " + err.message;
  258. }
  259. const regexFilter = function(value) {
  260. return reverse ^ regex.test(value);
  261. };
  262. return input.split(delim).filter(regexFilter).join(delim);
  263. },
  264. /**
  265. * @constant
  266. * @default
  267. */
  268. OFF_CHK_SAMPLE_DELIMITER: "\\n\\n",
  269. /**
  270. * Offset checker operation.
  271. *
  272. * @param {string} input
  273. * @param {Object[]} args
  274. * @returns {html}
  275. */
  276. runOffsetChecker: function(input, args) {
  277. let sampleDelim = args[0],
  278. samples = input.split(sampleDelim),
  279. outputs = new Array(samples.length),
  280. i = 0,
  281. s = 0,
  282. match = false,
  283. inMatch = false,
  284. chr;
  285. if (!samples || samples.length < 2) {
  286. return "Not enough samples, perhaps you need to modify the sample delimiter or add more data?";
  287. }
  288. // Initialise output strings
  289. outputs.fill("", 0, samples.length);
  290. // Loop through each character in the first sample
  291. for (i = 0; i < samples[0].length; i++) {
  292. chr = samples[0][i];
  293. match = false;
  294. // Loop through each sample to see if the chars are the same
  295. for (s = 1; s < samples.length; s++) {
  296. if (samples[s][i] !== chr) {
  297. match = false;
  298. break;
  299. }
  300. match = true;
  301. }
  302. // Write output for each sample
  303. for (s = 0; s < samples.length; s++) {
  304. if (samples[s].length <= i) {
  305. if (inMatch) outputs[s] += "</span>";
  306. if (s === samples.length - 1) inMatch = false;
  307. continue;
  308. }
  309. if (match && !inMatch) {
  310. outputs[s] += "<span class='hl5'>" + Utils.escapeHtml(samples[s][i]);
  311. if (samples[s].length === i + 1) outputs[s] += "</span>";
  312. if (s === samples.length - 1) inMatch = true;
  313. } else if (!match && inMatch) {
  314. outputs[s] += "</span>" + Utils.escapeHtml(samples[s][i]);
  315. if (s === samples.length - 1) inMatch = false;
  316. } else {
  317. outputs[s] += Utils.escapeHtml(samples[s][i]);
  318. if (inMatch && samples[s].length === i + 1) {
  319. outputs[s] += "</span>";
  320. if (samples[s].length - 1 !== i) inMatch = false;
  321. }
  322. }
  323. if (samples[0].length - 1 === i) {
  324. if (inMatch) outputs[s] += "</span>";
  325. outputs[s] += Utils.escapeHtml(samples[s].substring(i + 1));
  326. }
  327. }
  328. }
  329. return outputs.join(sampleDelim);
  330. },
  331. /**
  332. * @constant
  333. * @default
  334. */
  335. ESCAPE_REPLACEMENTS: [
  336. {"escaped": "\\\\", "unescaped": "\\"}, // Must be first
  337. {"escaped": "\\'", "unescaped": "'"},
  338. {"escaped": "\\\"", "unescaped": "\""},
  339. {"escaped": "\\n", "unescaped": "\n"},
  340. {"escaped": "\\r", "unescaped": "\r"},
  341. {"escaped": "\\t", "unescaped": "\t"},
  342. {"escaped": "\\b", "unescaped": "\b"},
  343. {"escaped": "\\f", "unescaped": "\f"},
  344. ],
  345. /**
  346. * Escape string operation.
  347. *
  348. * @author Vel0x [dalemy@microsoft.com]
  349. *
  350. * @param {string} input
  351. * @param {Object[]} args
  352. * @returns {string}
  353. *
  354. * @example
  355. * StrUtils.runUnescape("Don't do that", [])
  356. * > "Don\'t do that"
  357. * StrUtils.runUnescape(`Hello
  358. * World`, [])
  359. * > "Hello\nWorld"
  360. */
  361. runEscape: function(input, args) {
  362. return StrUtils._replaceByKeys(input, "unescaped", "escaped");
  363. },
  364. /**
  365. * Unescape string operation.
  366. *
  367. * @author Vel0x [dalemy@microsoft.com]
  368. *
  369. * @param {string} input
  370. * @param {Object[]} args
  371. * @returns {string}
  372. *
  373. * @example
  374. * StrUtils.runUnescape("Don\'t do that", [])
  375. * > "Don't do that"
  376. * StrUtils.runUnescape("Hello\nWorld", [])
  377. * > `Hello
  378. * World`
  379. */
  380. runUnescape: function(input, args) {
  381. return StrUtils._replaceByKeys(input, "escaped", "unescaped");
  382. },
  383. /**
  384. * Replaces all matching tokens in ESCAPE_REPLACEMENTS with the correction. The
  385. * ordering is determined by the patternKey and the replacementKey.
  386. *
  387. * @author Vel0x [dalemy@microsoft.com]
  388. * @author Matt C [matt@artemisbot.uk]
  389. *
  390. * @param {string} input
  391. * @param {string} pattern_key
  392. * @param {string} replacement_key
  393. * @returns {string}
  394. */
  395. _replaceByKeys: function(input, patternKey, replacementKey) {
  396. let output = input;
  397. // Catch the \\x encoded characters
  398. if (patternKey === "escaped") output = Utils.parseEscapedChars(input);
  399. StrUtils.ESCAPE_REPLACEMENTS.forEach(replacement => {
  400. output = output.split(replacement[patternKey]).join(replacement[replacementKey]);
  401. });
  402. return output;
  403. },
  404. /**
  405. * Head lines operation.
  406. *
  407. * @param {string} input
  408. * @param {Object[]} args
  409. * @returns {string}
  410. */
  411. runHead: function(input, args) {
  412. let delimiter = args[0],
  413. number = args[1];
  414. delimiter = Utils.charRep[delimiter];
  415. const splitInput = input.split(delimiter);
  416. return splitInput
  417. .filter((line, lineIndex) => {
  418. lineIndex += 1;
  419. if (number < 0) {
  420. return lineIndex <= splitInput.length + number;
  421. } else {
  422. return lineIndex <= number;
  423. }
  424. })
  425. .join(delimiter);
  426. },
  427. /**
  428. * Tail lines operation.
  429. *
  430. * @param {string} input
  431. * @param {Object[]} args
  432. * @returns {string}
  433. */
  434. runTail: function(input, args) {
  435. let delimiter = args[0],
  436. number = args[1];
  437. delimiter = Utils.charRep[delimiter];
  438. const splitInput = input.split(delimiter);
  439. return splitInput
  440. .filter((line, lineIndex) => {
  441. lineIndex += 1;
  442. if (number < 0) {
  443. return lineIndex > -number;
  444. } else {
  445. return lineIndex > splitInput.length - number;
  446. }
  447. })
  448. .join(delimiter);
  449. },
  450. /**
  451. * Adds HTML highlights to matches within a string.
  452. *
  453. * @private
  454. * @param {string} input
  455. * @param {RegExp} regex
  456. * @param {boolean} displayTotal
  457. * @returns {string}
  458. */
  459. _regexHighlight: function(input, regex, displayTotal) {
  460. let output = "",
  461. m,
  462. hl = 1,
  463. i = 0,
  464. total = 0;
  465. while ((m = regex.exec(input))) {
  466. // Add up to match
  467. output += Utils.escapeHtml(input.slice(i, m.index));
  468. // Add match with highlighting
  469. output += "<span class='hl"+hl+"'>" + Utils.escapeHtml(m[0]) + "</span>";
  470. // Switch highlight
  471. hl = hl === 1 ? 2 : 1;
  472. i = regex.lastIndex;
  473. total++;
  474. }
  475. // Add all after final match
  476. output += Utils.escapeHtml(input.slice(i, input.length));
  477. if (displayTotal)
  478. output = "Total found: " + total + "\n\n" + output;
  479. return output;
  480. },
  481. /**
  482. * Creates a string listing the matches within a string.
  483. *
  484. * @private
  485. * @param {string} input
  486. * @param {RegExp} regex
  487. * @param {boolean} displayTotal
  488. * @param {boolean} matches - Display full match
  489. * @param {boolean} captureGroups - Display each of the capture groups separately
  490. * @returns {string}
  491. */
  492. _regexList: function(input, regex, displayTotal, matches, captureGroups) {
  493. let output = "",
  494. total = 0,
  495. match;
  496. while ((match = regex.exec(input))) {
  497. total++;
  498. if (matches) {
  499. output += match[0] + "\n";
  500. }
  501. if (captureGroups) {
  502. for (let i = 1; i < match.length; i++) {
  503. if (matches) {
  504. output += " Group " + i + ": ";
  505. }
  506. output += match[i] + "\n";
  507. }
  508. }
  509. }
  510. if (displayTotal)
  511. output = "Total found: " + total + "\n\n" + output;
  512. return output;
  513. },
  514. };
  515. export default StrUtils;