StrUtils.js 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542
  1. import Utils from "../Utils.js";
  2. import JsDiff from "diff";
  3. /**
  4. * String utility operations.
  5. *
  6. * @author n1474335 [n1474335@gmail.com]
  7. * @copyright Crown Copyright 2016
  8. * @license Apache-2.0
  9. *
  10. * @namespace
  11. */
  12. const StrUtils = {
  13. /**
  14. * @constant
  15. * @default
  16. */
  17. REGEX_PRE_POPULATE: [
  18. {
  19. name: "User defined",
  20. value: ""
  21. },
  22. {
  23. name: "IPv4 address",
  24. value: "(?:(?:\\d|[01]?\\d\\d|2[0-4]\\d|25[0-5])\\.){3}(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d|\\d)(?:\\/\\d{1,2})?"
  25. },
  26. {
  27. name: "IPv6 address",
  28. value: "((?=.*::)(?!.*::.+::)(::)?([\\dA-Fa-f]{1,4}:(:|\\b)|){5}|([\\dA-Fa-f]{1,4}:){6})((([\\dA-Fa-f]{1,4}((?!\\3)::|:\\b|(?![\\dA-Fa-f])))|(?!\\2\\3)){2}|(((2[0-4]|1\\d|[1-9])?\\d|25[0-5])\\.?\\b){4})"
  29. },
  30. {
  31. name: "Email address",
  32. value: "(\\w[-.\\w]*)@([-\\w]+(?:\\.[-\\w]+)*)\\.([A-Za-z]{2,4})"
  33. },
  34. {
  35. name: "URL",
  36. value: "([A-Za-z]+://)([-\\w]+(?:\\.\\w[-\\w]*)+)(:\\d+)?(/[^.!,?;\"\\x27<>()\\[\\]{}\\s\\x7F-\\xFF]*(?:[.!,?]+[^.!,?;\"\\x27<>()\\[\\]{}\\s\\x7F-\\xFF]+)*)?"
  37. },
  38. {
  39. name: "Domain",
  40. value: "(?:(https?):\\/\\/)?([-\\w.]+)\\.(com|net|org|biz|info|co|uk|onion|int|mobi|name|edu|gov|mil|eu|ac|ae|af|de|ca|ch|cn|cy|es|gb|hk|il|in|io|tv|me|nl|no|nz|ro|ru|tr|us|az|ir|kz|uz|pk)+"
  41. },
  42. {
  43. name: "Windows file path",
  44. value: "([A-Za-z]):\\\\((?:[A-Za-z\\d][A-Za-z\\d\\- \\x27_\\(\\)]{0,61}\\\\?)*[A-Za-z\\d][A-Za-z\\d\\- \\x27_\\(\\)]{0,61})(\\.[A-Za-z\\d]{1,6})?"
  45. },
  46. {
  47. name: "UNIX file path",
  48. value: "(?:/[A-Za-z\\d.][A-Za-z\\d\\-.]{0,61})+"
  49. },
  50. {
  51. name: "MAC address",
  52. value: "[A-Fa-f\\d]{2}(?:[:-][A-Fa-f\\d]{2}){5}"
  53. },
  54. {
  55. name: "Date (yyyy-mm-dd)",
  56. value: "((?:19|20)\\d\\d)[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])"
  57. },
  58. {
  59. name: "Date (dd/mm/yyyy)",
  60. value: "(0[1-9]|[12][0-9]|3[01])[- /.](0[1-9]|1[012])[- /.]((?:19|20)\\d\\d)"
  61. },
  62. {
  63. name: "Date (mm/dd/yyyy)",
  64. value: "(0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])[- /.]((?:19|20)\\d\\d)"
  65. },
  66. {
  67. name: "Strings",
  68. value: "[A-Za-z\\d/\\-:.,_$%\\x27\"()<>= !\\[\\]{}@]{4,}"
  69. },
  70. ],
  71. /**
  72. * @constant
  73. * @default
  74. */
  75. REGEX_CASE_INSENSITIVE: true,
  76. /**
  77. * @constant
  78. * @default
  79. */
  80. REGEX_MULTILINE_MATCHING: true,
  81. /**
  82. * @constant
  83. * @default
  84. */
  85. OUTPUT_FORMAT: ["Highlight matches", "List matches", "List capture groups", "List matches with capture groups"],
  86. /**
  87. * @constant
  88. * @default
  89. */
  90. DISPLAY_TOTAL: false,
  91. /**
  92. * Regular expression operation.
  93. *
  94. * @param {string} input
  95. * @param {Object[]} args
  96. * @returns {html}
  97. */
  98. runRegex: function(input, args) {
  99. var userRegex = args[1],
  100. i = args[2],
  101. m = args[3],
  102. displayTotal = args[4],
  103. outputFormat = args[5],
  104. modifiers = "g";
  105. if (i) modifiers += "i";
  106. if (m) modifiers += "m";
  107. if (userRegex && userRegex !== "^" && userRegex !== "$") {
  108. try {
  109. var regex = new RegExp(userRegex, modifiers);
  110. switch (outputFormat) {
  111. case "Highlight matches":
  112. return StrUtils._regexHighlight(input, regex, displayTotal);
  113. case "List matches":
  114. return Utils.escapeHtml(StrUtils._regexList(input, regex, displayTotal, true, false));
  115. case "List capture groups":
  116. return Utils.escapeHtml(StrUtils._regexList(input, regex, displayTotal, false, true));
  117. case "List matches with capture groups":
  118. return Utils.escapeHtml(StrUtils._regexList(input, regex, displayTotal, true, true));
  119. default:
  120. return "Error: Invalid output format";
  121. }
  122. } catch (err) {
  123. return "Invalid regex. Details: " + err.message;
  124. }
  125. } else {
  126. return Utils.escapeHtml(input);
  127. }
  128. },
  129. /**
  130. * @constant
  131. * @default
  132. */
  133. CASE_SCOPE: ["All", "Word", "Sentence", "Paragraph"],
  134. /**
  135. * To Upper case operation.
  136. *
  137. * @param {string} input
  138. * @param {Object[]} args
  139. * @returns {string}
  140. */
  141. runUpper: function (input, args) {
  142. var scope = args[0];
  143. switch (scope) {
  144. case "Word":
  145. return input.replace(/(\b\w)/gi, function(m) {
  146. return m.toUpperCase();
  147. });
  148. case "Sentence":
  149. return input.replace(/(?:\.|^)\s*(\b\w)/gi, function(m) {
  150. return m.toUpperCase();
  151. });
  152. case "Paragraph":
  153. return input.replace(/(?:\n|^)\s*(\b\w)/gi, function(m) {
  154. return m.toUpperCase();
  155. });
  156. case "All":
  157. /* falls through */
  158. default:
  159. return input.toUpperCase();
  160. }
  161. },
  162. /**
  163. * To Upper case operation.
  164. *
  165. * @param {string} input
  166. * @param {Object[]} args
  167. * @returns {string}
  168. */
  169. runLower: function (input, args) {
  170. return input.toLowerCase();
  171. },
  172. /**
  173. * @constant
  174. * @default
  175. */
  176. SEARCH_TYPE: ["Regex", "Extended (\\n, \\t, \\x...)", "Simple string"],
  177. /**
  178. * @constant
  179. * @default
  180. */
  181. FIND_REPLACE_GLOBAL : true,
  182. /**
  183. * @constant
  184. * @default
  185. */
  186. FIND_REPLACE_CASE : false,
  187. /**
  188. * @constant
  189. * @default
  190. */
  191. FIND_REPLACE_MULTILINE : true,
  192. /**
  193. * Find / Replace operation.
  194. *
  195. * @param {string} input
  196. * @param {Object[]} args
  197. * @returns {string}
  198. */
  199. runFindReplace: function(input, args) {
  200. var find = args[0].string,
  201. type = args[0].option,
  202. replace = args[1],
  203. g = args[2],
  204. i = args[3],
  205. m = args[4],
  206. modifiers = "";
  207. if (g) modifiers += "g";
  208. if (i) modifiers += "i";
  209. if (m) modifiers += "m";
  210. if (type === "Regex") {
  211. find = new RegExp(find, modifiers);
  212. } else if (type.indexOf("Extended") === 0) {
  213. find = Utils.parseEscapedChars(find);
  214. }
  215. return input.replace(find, replace, modifiers);
  216. // Non-standard addition of flags in the third argument. This will work in Firefox but
  217. // probably nowhere else. The purpose is to allow global matching when the `find` parameter
  218. // is just a string.
  219. },
  220. /**
  221. * @constant
  222. * @default
  223. */
  224. SPLIT_DELIM: ",",
  225. /**
  226. * @constant
  227. * @default
  228. */
  229. DELIMITER_OPTIONS: ["Line feed", "CRLF", "Space", "Comma", "Semi-colon", "Colon", "Nothing (separate chars)"],
  230. /**
  231. * Split operation.
  232. *
  233. * @param {string} input
  234. * @param {Object[]} args
  235. * @returns {string}
  236. */
  237. runSplit: function(input, args) {
  238. var splitDelim = args[0] || StrUtils.SPLIT_DELIM,
  239. joinDelim = Utils.charRep[args[1]],
  240. sections = input.split(splitDelim);
  241. return sections.join(joinDelim);
  242. },
  243. /**
  244. * Filter operation.
  245. *
  246. * @author Mikescher (https://github.com/Mikescher | https://mikescher.com)
  247. * @param {string} input
  248. * @param {Object[]} args
  249. * @returns {string}
  250. */
  251. runFilter: function(input, args) {
  252. var delim = Utils.charRep[args[0]],
  253. reverse = args[2];
  254. try {
  255. var regex = new RegExp(args[1]);
  256. } catch (err) {
  257. return "Invalid regex. Details: " + err.message;
  258. }
  259. var regexFilter = function(value) {
  260. return reverse ^ regex.test(value);
  261. };
  262. return input.split(delim).filter(regexFilter).join(delim);
  263. },
  264. /**
  265. * @constant
  266. * @default
  267. */
  268. DIFF_SAMPLE_DELIMITER: "\\n\\n",
  269. /**
  270. * @constant
  271. * @default
  272. */
  273. DIFF_BY: ["Character", "Word", "Line", "Sentence", "CSS", "JSON"],
  274. /**
  275. * Diff operation.
  276. *
  277. * @param {string} input
  278. * @param {Object[]} args
  279. * @returns {html}
  280. */
  281. runDiff: function(input, args) {
  282. var sampleDelim = args[0],
  283. diffBy = args[1],
  284. showAdded = args[2],
  285. showRemoved = args[3],
  286. ignoreWhitespace = args[4],
  287. samples = input.split(sampleDelim),
  288. output = "",
  289. diff;
  290. if (!samples || samples.length !== 2) {
  291. return "Incorrect number of samples, perhaps you need to modify the sample delimiter or add more samples?";
  292. }
  293. switch (diffBy) {
  294. case "Character":
  295. diff = JsDiff.diffChars(samples[0], samples[1]);
  296. break;
  297. case "Word":
  298. if (ignoreWhitespace) {
  299. diff = JsDiff.diffWords(samples[0], samples[1]);
  300. } else {
  301. diff = JsDiff.diffWordsWithSpace(samples[0], samples[1]);
  302. }
  303. break;
  304. case "Line":
  305. if (ignoreWhitespace) {
  306. diff = JsDiff.diffTrimmedLines(samples[0], samples[1]);
  307. } else {
  308. diff = JsDiff.diffLines(samples[0], samples[1]);
  309. }
  310. break;
  311. case "Sentence":
  312. diff = JsDiff.diffSentences(samples[0], samples[1]);
  313. break;
  314. case "CSS":
  315. diff = JsDiff.diffCss(samples[0], samples[1]);
  316. break;
  317. case "JSON":
  318. diff = JsDiff.diffJson(samples[0], samples[1]);
  319. break;
  320. default:
  321. return "Invalid 'Diff by' option.";
  322. }
  323. for (var i = 0; i < diff.length; i++) {
  324. if (diff[i].added) {
  325. if (showAdded) output += "<span class='hlgreen'>" + Utils.escapeHtml(diff[i].value) + "</span>";
  326. } else if (diff[i].removed) {
  327. if (showRemoved) output += "<span class='hlred'>" + Utils.escapeHtml(diff[i].value) + "</span>";
  328. } else {
  329. output += Utils.escapeHtml(diff[i].value);
  330. }
  331. }
  332. return output;
  333. },
  334. /**
  335. * @constant
  336. * @default
  337. */
  338. OFF_CHK_SAMPLE_DELIMITER: "\\n\\n",
  339. /**
  340. * Offset checker operation.
  341. *
  342. * @param {string} input
  343. * @param {Object[]} args
  344. * @returns {html}
  345. */
  346. runOffsetChecker: function(input, args) {
  347. var sampleDelim = args[0],
  348. samples = input.split(sampleDelim),
  349. outputs = [],
  350. i = 0,
  351. s = 0,
  352. match = false,
  353. inMatch = false,
  354. chr;
  355. if (!samples || samples.length < 2) {
  356. return "Not enough samples, perhaps you need to modify the sample delimiter or add more data?";
  357. }
  358. // Initialise output strings
  359. for (s = 0; s < samples.length; s++) {
  360. outputs[s] = "";
  361. }
  362. // Loop through each character in the first sample
  363. for (i = 0; i < samples[0].length; i++) {
  364. chr = samples[0][i];
  365. match = false;
  366. // Loop through each sample to see if the chars are the same
  367. for (s = 1; s < samples.length; s++) {
  368. if (samples[s][i] !== chr) {
  369. match = false;
  370. break;
  371. }
  372. match = true;
  373. }
  374. // Write output for each sample
  375. for (s = 0; s < samples.length; s++) {
  376. if (samples[s].length <= i) {
  377. if (inMatch) outputs[s] += "</span>";
  378. if (s === samples.length - 1) inMatch = false;
  379. continue;
  380. }
  381. if (match && !inMatch) {
  382. outputs[s] += "<span class='hlgreen'>" + Utils.escapeHtml(samples[s][i]);
  383. if (samples[s].length === i + 1) outputs[s] += "</span>";
  384. if (s === samples.length - 1) inMatch = true;
  385. } else if (!match && inMatch) {
  386. outputs[s] += "</span>" + Utils.escapeHtml(samples[s][i]);
  387. if (s === samples.length - 1) inMatch = false;
  388. } else {
  389. outputs[s] += Utils.escapeHtml(samples[s][i]);
  390. if (inMatch && samples[s].length === i + 1) {
  391. outputs[s] += "</span>";
  392. if (samples[s].length - 1 !== i) inMatch = false;
  393. }
  394. }
  395. if (samples[0].length - 1 === i) {
  396. if (inMatch) outputs[s] += "</span>";
  397. outputs[s] += Utils.escapeHtml(samples[s].substring(i + 1));
  398. }
  399. }
  400. }
  401. return outputs.join(sampleDelim);
  402. },
  403. /**
  404. * Parse escaped string operation.
  405. *
  406. * @param {string} input
  407. * @param {Object[]} args
  408. * @returns {string}
  409. */
  410. runParseEscapedString: function(input, args) {
  411. return Utils.parseEscapedChars(input);
  412. },
  413. /**
  414. * Adds HTML highlights to matches within a string.
  415. *
  416. * @private
  417. * @param {string} input
  418. * @param {RegExp} regex
  419. * @param {boolean} displayTotal
  420. * @returns {string}
  421. */
  422. _regexHighlight: function(input, regex, displayTotal) {
  423. var output = "",
  424. m,
  425. hl = 1,
  426. i = 0,
  427. total = 0;
  428. while ((m = regex.exec(input))) {
  429. // Add up to match
  430. output += Utils.escapeHtml(input.slice(i, m.index));
  431. // Add match with highlighting
  432. output += "<span class='hl"+hl+"'>" + Utils.escapeHtml(m[0]) + "</span>";
  433. // Switch highlight
  434. hl = hl === 1 ? 2 : 1;
  435. i = regex.lastIndex;
  436. total++;
  437. }
  438. // Add all after final match
  439. output += Utils.escapeHtml(input.slice(i, input.length));
  440. if (displayTotal)
  441. output = "Total found: " + total + "\n\n" + output;
  442. return output;
  443. },
  444. /**
  445. * Creates a string listing the matches within a string.
  446. *
  447. * @private
  448. * @param {string} input
  449. * @param {RegExp} regex
  450. * @param {boolean} displayTotal
  451. * @param {boolean} matches - Display full match
  452. * @param {boolean} captureGroups - Display each of the capture groups separately
  453. * @returns {string}
  454. */
  455. _regexList: function(input, regex, displayTotal, matches, captureGroups) {
  456. var output = "",
  457. total = 0,
  458. match;
  459. while ((match = regex.exec(input))) {
  460. total++;
  461. if (matches) {
  462. output += match[0] + "\n";
  463. }
  464. if (captureGroups) {
  465. for (var i = 1; i < match.length; i++) {
  466. if (matches) {
  467. output += " Group " + i + ": ";
  468. }
  469. output += match[i] + "\n";
  470. }
  471. }
  472. }
  473. if (displayTotal)
  474. output = "Total found: " + total + "\n\n" + output;
  475. return output;
  476. },
  477. };
  478. export default StrUtils;