Entropy.js 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166
  1. /**
  2. * Entropy operations.
  3. *
  4. * @author n1474335 [n1474335@gmail.com]
  5. * @copyright Crown Copyright 2016
  6. * @license Apache-2.0
  7. *
  8. * @namespace
  9. */
  10. var Entropy = {
  11. /**
  12. * @constant
  13. * @default
  14. */
  15. CHUNK_SIZE: 1000,
  16. /**
  17. * Entropy operation.
  18. *
  19. * @param {byteArray} input
  20. * @param {Object[]} args
  21. * @returns {html}
  22. */
  23. runEntropy: function(input, args) {
  24. var chunkSize = args[0],
  25. output = "",
  26. entropy = Entropy._calcEntropy(input);
  27. output += "Shannon entropy: " + entropy + "\n" +
  28. "<br><canvas id='chart-area'></canvas><br>\n" +
  29. "- 0 represents no randomness (i.e. all the bytes in the data have the same value) whereas 8, the maximum, represents a completely random string.\n" +
  30. "- Standard English text usually falls somewhere between 3.5 and 5.\n" +
  31. "- Properly encrypted or compressed data of a reasonable length should have an entropy of over 7.5.\n\n" +
  32. "The following results show the entropy of chunks of the input data. Chunks with particularly high entropy could suggest encrypted or compressed sections.\n\n" +
  33. "<br><script>\
  34. var canvas = document.getElementById('chart-area'),\
  35. parentRect = canvas.parentNode.getBoundingClientRect(),\
  36. entropy = " + entropy + ",\
  37. height = parentRect.height * 0.25;\
  38. \
  39. canvas.width = parentRect.width * 0.95;\
  40. canvas.height = height > 150 ? 150 : height;\
  41. \
  42. CanvasComponents.drawScaleBar(canvas, entropy, 8, [\
  43. {\
  44. label: 'English text',\
  45. min: 3.5,\
  46. max: 5\
  47. },{\
  48. label: 'Encrypted/compressed',\
  49. min: 7.5,\
  50. max: 8\
  51. }\
  52. ]);\
  53. </script>";
  54. var chunkEntropy = 0;
  55. if (chunkSize !== 0) {
  56. for (var i = 0; i < input.length; i += chunkSize) {
  57. chunkEntropy = Entropy._calcEntropy(input.slice(i, i+chunkSize));
  58. output += "Bytes " + i + " to " + (i+chunkSize) + ": " + chunkEntropy + "\n";
  59. }
  60. } else {
  61. output += "Chunk size cannot be 0.";
  62. }
  63. return output;
  64. },
  65. /**
  66. * @constant
  67. * @default
  68. */
  69. FREQ_ZEROS: false,
  70. /**
  71. * Frequency distribution operation.
  72. *
  73. * @param {byteArray} input
  74. * @param {Object[]} args
  75. * @returns {html}
  76. */
  77. runFreqDistrib: function (input, args) {
  78. if (!input.length) return "No data";
  79. var distrib = new Array(256),
  80. percentages = new Array(256),
  81. len = input.length,
  82. showZeroes = args[0];
  83. // Initialise distrib to 0
  84. for (var i = 0; i < 256; i++) {
  85. distrib[i] = 0;
  86. }
  87. // Count bytes
  88. for (i = 0; i < len; i++) {
  89. distrib[input[i]]++;
  90. }
  91. // Calculate percentages
  92. var repr = 0;
  93. for (i = 0; i < 256; i++) {
  94. if (distrib[i] > 0) repr++;
  95. percentages[i] = distrib[i] / len * 100;
  96. }
  97. // Print
  98. var output = "<canvas id='chart-area'></canvas><br>" +
  99. "Total data length: " + len +
  100. "\nNumber of bytes represented: " + repr +
  101. "\nNumber of bytes not represented: " + (256-repr) +
  102. "\n\nByte Percentage\n" +
  103. "<script>\
  104. var canvas = document.getElementById('chart-area'),\
  105. parentRect = canvas.parentNode.getBoundingClientRect(),\
  106. scores = " + JSON.stringify(percentages) + ";\
  107. \
  108. canvas.width = parentRect.width * 0.95;\
  109. canvas.height = parentRect.height * 0.9;\
  110. \
  111. CanvasComponents.drawBarChart(canvas, scores, 'Byte', 'Frequency %', 16, 6);\
  112. </script>";
  113. for (i = 0; i < 256; i++) {
  114. if (distrib[i] || showZeroes) {
  115. output += " " + Utils.hex(i, 2) + " (" +
  116. Utils.padRight(percentages[i].toFixed(2).replace(".00", "") + "%)", 8) +
  117. Array(Math.ceil(percentages[i])+1).join("|") + "\n";
  118. }
  119. }
  120. return output;
  121. },
  122. /**
  123. * Calculates the Shannon entropy for a given chunk of data.
  124. *
  125. * @private
  126. * @param {byteArray} data
  127. * @returns {number}
  128. */
  129. _calcEntropy: function(data) {
  130. var prob = [],
  131. uniques = data.unique(),
  132. str = Utils.byteArrayToChars(data);
  133. for (var i = 0; i < uniques.length; i++) {
  134. prob.push(str.count(Utils.chr(uniques[i])) / data.length);
  135. }
  136. var entropy = 0,
  137. p;
  138. for (i = 0; i < prob.length; i++) {
  139. p = prob[i];
  140. entropy += p * Math.log(p) / Math.log(2);
  141. }
  142. return -entropy;
  143. },
  144. };