Unicode.js 2.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101
  1. import Utils from "../Utils.js";
  2. /**
  3. * Unicode operations.
  4. *
  5. * @author n1474335 [n1474335@gmail.com]
  6. * @copyright Crown Copyright 2016
  7. * @license Apache-2.0
  8. *
  9. * @namespace
  10. */
  11. const Unicode = {
  12. /**
  13. * @constant
  14. * @default
  15. */
  16. PREFIXES: ["\\u", "%u", "U+"],
  17. /**
  18. * Unescape Unicode Characters operation.
  19. *
  20. * @param {string} input
  21. * @param {Object[]} args
  22. * @returns {string}
  23. */
  24. runUnescape: function(input, args) {
  25. let prefix = Unicode._prefixToRegex[args[0]],
  26. regex = new RegExp(prefix+"([a-f\\d]{4})", "ig"),
  27. output = "",
  28. m,
  29. i = 0;
  30. while ((m = regex.exec(input))) {
  31. // Add up to match
  32. output += input.slice(i, m.index);
  33. i = m.index;
  34. // Add match
  35. output += Utils.chr(parseInt(m[1], 16));
  36. i = regex.lastIndex;
  37. }
  38. // Add all after final match
  39. output += input.slice(i, input.length);
  40. return output;
  41. },
  42. /**
  43. * Escape Unicode Characters operation.
  44. *
  45. * @param {string} input
  46. * @param {Object[]} args
  47. * @returns {string}
  48. */
  49. runEscape: function(input, args) {
  50. const regexWhitelist = /[ -~]/i,
  51. prefix = args[0],
  52. encodeAll = args[1],
  53. padding = args[2],
  54. uppercaseHex = args[3];
  55. let output = "",
  56. character = "";
  57. for (let i = 0; i < input.length; i++) {
  58. character = input[i];
  59. if (!encodeAll && regexWhitelist.test(character)) {
  60. // It’s a printable ASCII character so don’t escape it.
  61. output += character;
  62. continue;
  63. }
  64. let cp = character.codePointAt(0).toString(16);
  65. if (uppercaseHex) cp = cp.toUpperCase();
  66. output += prefix + cp.padStart(padding, "0");
  67. }
  68. return output;
  69. },
  70. /**
  71. * Lookup table to add prefixes to unicode delimiters so that they can be used in a regex.
  72. *
  73. * @private
  74. * @constant
  75. */
  76. _prefixToRegex: {
  77. "\\u": "\\\\u",
  78. "%u": "%u",
  79. "U+": "U\\+"
  80. },
  81. };
  82. export default Unicode;