Extract.mjs 1.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
  1. /**
  2. * Identifier extraction functions
  3. *
  4. * @author n1474335 [n1474335@gmail.com]
  5. * @copyright Crown Copyright 2016
  6. * @license Apache-2.0
  7. *
  8. */
  9. /**
  10. * Runs search operations across the input data using regular expressions.
  11. *
  12. * @param {string} input
  13. * @param {RegExp} searchRegex
  14. * @param {RegExp} removeRegex - A regular expression defining results to remove from the
  15. * final list
  16. * @param {boolean} includeTotal - Whether or not to include the total number of results
  17. * @returns {string}
  18. */
  19. export function search (input, searchRegex, removeRegex, includeTotal) {
  20. let output = "",
  21. total = 0,
  22. match;
  23. while ((match = searchRegex.exec(input))) {
  24. // Moves pointer when an empty string is matched (prevents infinite loop)
  25. if (match.index === searchRegex.lastIndex) {
  26. searchRegex.lastIndex++;
  27. }
  28. if (removeRegex && removeRegex.test(match[0]))
  29. continue;
  30. total++;
  31. output += match[0] + "\n";
  32. }
  33. if (includeTotal)
  34. output = "Total found: " + total + "\n\n" + output;
  35. return output;
  36. }
  37. /**
  38. * URL regular expression
  39. */
  40. const protocol = "[A-Z]+://",
  41. hostname = "[-\\w]+(?:\\.\\w[-\\w]*)+",
  42. port = ":\\d+",
  43. path = "/[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]*" +
  44. "(?:[.!,?]+[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]+)*";
  45. export const URL_REGEX = new RegExp(protocol + hostname + "(?:" + port + ")?(?:" + path + ")?", "ig");
  46. /**
  47. * Domain name regular expression
  48. */
  49. export const DOMAIN_REGEX = /\b((?=[a-z0-9-]{1,63}\.)(xn--)?[a-z0-9]+(-[a-z0-9]+)*\.)+[a-z]{2,63}\b/ig;