Forráskód Böngészése

Merge branch 'feature-unicode-strings'

n1474335 7 éve
szülő
commit
192d0ed8a6

+ 5 - 0
package-lock.json

@@ -10256,6 +10256,11 @@
       "resolved": "https://registry.npmjs.org/xpath/-/xpath-0.0.27.tgz",
       "resolved": "https://registry.npmjs.org/xpath/-/xpath-0.0.27.tgz",
       "integrity": "sha512-fg03WRxtkCV6ohClePNAECYsmpKKTv5L8y/X3Dn1hQrec3POx2jHZ/0P2qQ6HvsrU1BmeqXcof3NGGueG6LxwQ=="
       "integrity": "sha512-fg03WRxtkCV6ohClePNAECYsmpKKTv5L8y/X3Dn1hQrec3POx2jHZ/0P2qQ6HvsrU1BmeqXcof3NGGueG6LxwQ=="
     },
     },
+    "xregexp": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/xregexp/-/xregexp-4.0.0.tgz",
+      "integrity": "sha512-PHyM+sQouu7xspQQwELlGwwd05mXUFqwFYfqPO0cC7x4fxyHnnuetmQr6CjJiafIDoH4MogHb9dOoJzR/Y4rFg=="
+    },
     "xtend": {
     "xtend": {
       "version": "4.0.1",
       "version": "4.0.1",
       "resolved": "https://registry.npmjs.org/xtend/-/xtend-4.0.1.tgz",
       "resolved": "https://registry.npmjs.org/xtend/-/xtend-4.0.1.tgz",

+ 1 - 0
package.json

@@ -103,6 +103,7 @@
     "vkbeautify": "^0.99.3",
     "vkbeautify": "^0.99.3",
     "xmldom": "^0.1.27",
     "xmldom": "^0.1.27",
     "xpath": "0.0.27",
     "xpath": "0.0.27",
+    "xregexp": "^4.0.0",
     "zlibjs": "^0.3.1"
     "zlibjs": "^0.3.1"
   },
   },
   "scripts": {
   "scripts": {

+ 47 - 24
src/core/config/OperationConfig.js

@@ -30,6 +30,7 @@ import NetBIOS from "../operations/NetBIOS.js";
 import PHP from "../operations/PHP.js";
 import PHP from "../operations/PHP.js";
 import PublicKey from "../operations/PublicKey.js";
 import PublicKey from "../operations/PublicKey.js";
 import Punycode from "../operations/Punycode.js";
 import Punycode from "../operations/Punycode.js";
+import Regex from "../operations/Regex.js";
 import Rotate from "../operations/Rotate.js";
 import Rotate from "../operations/Rotate.js";
 import SeqUtils from "../operations/SeqUtils.js";
 import SeqUtils from "../operations/SeqUtils.js";
 import Shellcode from "../operations/Shellcode.js";
 import Shellcode from "../operations/Shellcode.js";
@@ -2058,9 +2059,8 @@ const OperationConfig = {
         args: []
         args: []
     },
     },
     "Find / Replace": {
     "Find / Replace": {
-        module: "Default",
+        module: "Regex",
         description: "Replaces all occurrences of the first string with the second.<br><br> Includes support for regular expressions (regex), simple strings and extended strings (which support \\n, \\r, \\t, \\b, \\f and escaped hex bytes using \\x notation, e.g. \\x00 for a null byte).",
         description: "Replaces all occurrences of the first string with the second.<br><br> Includes support for regular expressions (regex), simple strings and extended strings (which support \\n, \\r, \\t, \\b, \\f and escaped hex bytes using \\x notation, e.g. \\x00 for a null byte).",
-        manualBake: true,
         inputType: "string",
         inputType: "string",
         outputType: "string",
         outputType: "string",
         args: [
         args: [
@@ -2068,7 +2068,7 @@ const OperationConfig = {
                 name: "Find",
                 name: "Find",
                 type: "toggleString",
                 type: "toggleString",
                 value: "",
                 value: "",
-                toggleValues: StrUtils.SEARCH_TYPE
+                toggleValues: Regex.SEARCH_TYPE
             },
             },
             {
             {
                 name: "Replace",
                 name: "Replace",
@@ -2078,17 +2078,17 @@ const OperationConfig = {
             {
             {
                 name: "Global match",
                 name: "Global match",
                 type: "boolean",
                 type: "boolean",
-                value: StrUtils.FIND_REPLACE_GLOBAL,
+                value: Regex.FIND_REPLACE_GLOBAL,
             },
             },
             {
             {
                 name: "Case insensitive",
                 name: "Case insensitive",
                 type: "boolean",
                 type: "boolean",
-                value: StrUtils.FIND_REPLACE_CASE,
+                value: Regex.FIND_REPLACE_CASE,
             },
             },
             {
             {
                 name: "Multiline matching",
                 name: "Multiline matching",
                 type: "boolean",
                 type: "boolean",
-                value: StrUtils.FIND_REPLACE_MULTILINE,
+                value: Regex.FIND_REPLACE_MULTILINE,
             },
             },
 
 
         ]
         ]
@@ -2138,7 +2138,6 @@ const OperationConfig = {
     "Filter": {
     "Filter": {
         module: "Default",
         module: "Default",
         description: "Splits up the input using the specified delimiter and then filters each branch based on a regular expression.",
         description: "Splits up the input using the specified delimiter and then filters each branch based on a regular expression.",
-        manualBake: true,
         inputType: "string",
         inputType: "string",
         outputType: "string",
         outputType: "string",
         args: [
         args: [
@@ -2160,16 +2159,26 @@ const OperationConfig = {
         ]
         ]
     },
     },
     "Strings": {
     "Strings": {
-        module: "Default",
+        module: "Regex",
         description: "Extracts all strings from the input.",
         description: "Extracts all strings from the input.",
         inputType: "string",
         inputType: "string",
         outputType: "string",
         outputType: "string",
         args: [
         args: [
+            {
+                name: "Encoding",
+                type: "option",
+                value: Extract.ENCODING_LIST
+            },
             {
             {
                 name: "Minimum length",
                 name: "Minimum length",
                 type: "number",
                 type: "number",
                 value: Extract.MIN_STRING_LEN
                 value: Extract.MIN_STRING_LEN
             },
             },
+            {
+                name: "Match",
+                type: "option",
+                value: Extract.STRING_MATCH_TYPE
+            },
             {
             {
                 name: "Display total",
                 name: "Display total",
                 type: "boolean",
                 type: "boolean",
@@ -2178,7 +2187,7 @@ const OperationConfig = {
         ]
         ]
     },
     },
     "Extract IP addresses": {
     "Extract IP addresses": {
-        module: "Default",
+        module: "Regex",
         description: "Extracts all IPv4 and IPv6 addresses.<br><br>Warning: Given a string <code>710.65.0.456</code>, this will match <code>10.65.0.45</code> so always check the original input!",
         description: "Extracts all IPv4 and IPv6 addresses.<br><br>Warning: Given a string <code>710.65.0.456</code>, this will match <code>10.65.0.45</code> so always check the original input!",
         inputType: "string",
         inputType: "string",
         outputType: "string",
         outputType: "string",
@@ -2206,7 +2215,7 @@ const OperationConfig = {
         ]
         ]
     },
     },
     "Extract email addresses": {
     "Extract email addresses": {
-        module: "Default",
+        module: "Regex",
         description: "Extracts all email addresses from the input.",
         description: "Extracts all email addresses from the input.",
         inputType: "string",
         inputType: "string",
         outputType: "string",
         outputType: "string",
@@ -2219,7 +2228,7 @@ const OperationConfig = {
         ]
         ]
     },
     },
     "Extract MAC addresses": {
     "Extract MAC addresses": {
-        module: "Default",
+        module: "Regex",
         description: "Extracts all Media Access Control (MAC) addresses from the input.",
         description: "Extracts all Media Access Control (MAC) addresses from the input.",
         inputType: "string",
         inputType: "string",
         outputType: "string",
         outputType: "string",
@@ -2232,7 +2241,7 @@ const OperationConfig = {
         ]
         ]
     },
     },
     "Extract URLs": {
     "Extract URLs": {
-        module: "Default",
+        module: "Regex",
         description: "Extracts Uniform Resource Locators (URLs) from the input. The protocol (http, ftp etc.) is required otherwise there will be far too many false positives.",
         description: "Extracts Uniform Resource Locators (URLs) from the input. The protocol (http, ftp etc.) is required otherwise there will be far too many false positives.",
         inputType: "string",
         inputType: "string",
         outputType: "string",
         outputType: "string",
@@ -2245,7 +2254,7 @@ const OperationConfig = {
         ]
         ]
     },
     },
     "Extract domains": {
     "Extract domains": {
-        module: "Default",
+        module: "Regex",
         description: "Extracts domain names.<br>Note that this will not include paths. Use <strong>Extract URLs</strong> to find entire URLs.",
         description: "Extracts domain names.<br>Note that this will not include paths. Use <strong>Extract URLs</strong> to find entire URLs.",
         inputType: "string",
         inputType: "string",
         outputType: "string",
         outputType: "string",
@@ -2258,7 +2267,7 @@ const OperationConfig = {
         ]
         ]
     },
     },
     "Extract file paths": {
     "Extract file paths": {
-        module: "Default",
+        module: "Regex",
         description: "Extracts anything that looks like a Windows or UNIX file path.<br><br>Note that if UNIX is selected, there will likely be a lot of false positives.",
         description: "Extracts anything that looks like a Windows or UNIX file path.<br><br>Note that if UNIX is selected, there will likely be a lot of false positives.",
         inputType: "string",
         inputType: "string",
         outputType: "string",
         outputType: "string",
@@ -2281,7 +2290,7 @@ const OperationConfig = {
         ]
         ]
     },
     },
     "Extract dates": {
     "Extract dates": {
-        module: "Default",
+        module: "Regex",
         description: "Extracts dates in the following formats<ul><li><code>yyyy-mm-dd</code></li><li><code>dd/mm/yyyy</code></li><li><code>mm/dd/yyyy</code></li></ul>Dividers can be any of /, -, . or space",
         description: "Extracts dates in the following formats<ul><li><code>yyyy-mm-dd</code></li><li><code>dd/mm/yyyy</code></li><li><code>mm/dd/yyyy</code></li></ul>Dividers can be any of /, -, . or space",
         inputType: "string",
         inputType: "string",
         outputType: "string",
         outputType: "string",
@@ -2294,16 +2303,15 @@ const OperationConfig = {
         ]
         ]
     },
     },
     "Regular expression": {
     "Regular expression": {
-        module: "Default",
-        description: "Define your own regular expression (regex) to search the input data with, optionally choosing from a list of pre-defined patterns.",
-        manualBake: true,
+        module: "Regex",
+        description: "Define your own regular expression (regex) to search the input data with, optionally choosing from a list of pre-defined patterns.<br><br>Supports extended regex syntax including the 'dot matches all' flag, named capture groups, full unicode coverage (including <code>\\p{}</code> categories and scripts as well as astral codes) and recursive matching.",
         inputType: "string",
         inputType: "string",
         outputType: "html",
         outputType: "html",
         args: [
         args: [
             {
             {
                 name: "Built in regexes",
                 name: "Built in regexes",
                 type: "populateOption",
                 type: "populateOption",
-                value: StrUtils.REGEX_PRE_POPULATE,
+                value: Regex.REGEX_PRE_POPULATE,
                 target: 1,
                 target: 1,
             },
             },
             {
             {
@@ -2314,22 +2322,37 @@ const OperationConfig = {
             {
             {
                 name: "Case insensitive",
                 name: "Case insensitive",
                 type: "boolean",
                 type: "boolean",
-                value: StrUtils.REGEX_CASE_INSENSITIVE
+                value: true
             },
             },
             {
             {
-                name: "Multiline matching",
+                name: "^ and $ match at newlines",
                 type: "boolean",
                 type: "boolean",
-                value: StrUtils.REGEX_MULTILINE_MATCHING
+                value: true
+            },
+            {
+                name: "Dot matches all",
+                type: "boolean",
+                value: false
+            },
+            {
+                name: "Unicode support",
+                type: "boolean",
+                value: false
+            },
+            {
+                name: "Astral support",
+                type: "boolean",
+                value: false
             },
             },
             {
             {
                 name: "Display total",
                 name: "Display total",
                 type: "boolean",
                 type: "boolean",
-                value: StrUtils.DISPLAY_TOTAL
+                value: Regex.DISPLAY_TOTAL
             },
             },
             {
             {
                 name: "Output format",
                 name: "Output format",
                 type: "option",
                 type: "option",
-                value: StrUtils.OUTPUT_FORMAT
+                value: Regex.OUTPUT_FORMAT
             },
             },
         ]
         ]
     },
     },

+ 0 - 11
src/core/config/modules/Default.js

@@ -10,7 +10,6 @@ import Convert from "../../operations/Convert.js";
 import DateTime from "../../operations/DateTime.js";
 import DateTime from "../../operations/DateTime.js";
 import Endian from "../../operations/Endian.js";
 import Endian from "../../operations/Endian.js";
 import Entropy from "../../operations/Entropy.js";
 import Entropy from "../../operations/Entropy.js";
-import Extract from "../../operations/Extract.js";
 import FileType from "../../operations/FileType.js";
 import FileType from "../../operations/FileType.js";
 import Hexdump from "../../operations/Hexdump.js";
 import Hexdump from "../../operations/Hexdump.js";
 import HTML from "../../operations/HTML.js";
 import HTML from "../../operations/HTML.js";
@@ -99,11 +98,9 @@ OpModules.Default = {
     "Format MAC addresses": MAC.runFormat,
     "Format MAC addresses": MAC.runFormat,
     "Encode NetBIOS Name":  NetBIOS.runEncodeName,
     "Encode NetBIOS Name":  NetBIOS.runEncodeName,
     "Decode NetBIOS Name":  NetBIOS.runDecodeName,
     "Decode NetBIOS Name":  NetBIOS.runDecodeName,
-    "Regular expression":   StrUtils.runRegex,
     "Offset checker":       StrUtils.runOffsetChecker,
     "Offset checker":       StrUtils.runOffsetChecker,
     "To Upper case":        StrUtils.runUpper,
     "To Upper case":        StrUtils.runUpper,
     "To Lower case":        StrUtils.runLower,
     "To Lower case":        StrUtils.runLower,
-    "Find / Replace":       StrUtils.runFindReplace,
     "Split":                StrUtils.runSplit,
     "Split":                StrUtils.runSplit,
     "Filter":               StrUtils.runFilter,
     "Filter":               StrUtils.runFilter,
     "Escape string":        StrUtils.runEscape,
     "Escape string":        StrUtils.runEscape,
@@ -133,14 +130,6 @@ OpModules.Default = {
     "Translate DateTime Format": DateTime.runTranslateFormat,
     "Translate DateTime Format": DateTime.runTranslateFormat,
     "From UNIX Timestamp":  DateTime.runFromUnixTimestamp,
     "From UNIX Timestamp":  DateTime.runFromUnixTimestamp,
     "To UNIX Timestamp":    DateTime.runToUnixTimestamp,
     "To UNIX Timestamp":    DateTime.runToUnixTimestamp,
-    "Strings":              Extract.runStrings,
-    "Extract IP addresses": Extract.runIp,
-    "Extract email addresses": Extract.runEmail,
-    "Extract MAC addresses": Extract.runMac,
-    "Extract URLs":         Extract.runUrls,
-    "Extract domains":      Extract.runDomains,
-    "Extract file paths":   Extract.runFilePaths,
-    "Extract dates":        Extract.runDates,
     "Microsoft Script Decoder": MS.runDecodeScript,
     "Microsoft Script Decoder": MS.runDecodeScript,
     "Entropy":              Entropy.runEntropy,
     "Entropy":              Entropy.runEntropy,
     "Frequency distribution": Entropy.runFreqDistrib,
     "Frequency distribution": Entropy.runFreqDistrib,

+ 2 - 0
src/core/config/modules/OpModules.js

@@ -18,6 +18,7 @@ import HTTPModule from "./HTTP.js";
 import ImageModule from "./Image.js";
 import ImageModule from "./Image.js";
 import JSBNModule from "./JSBN.js";
 import JSBNModule from "./JSBN.js";
 import PublicKeyModule from "./PublicKey.js";
 import PublicKeyModule from "./PublicKey.js";
+import RegexModule from "./Regex.js";
 import ShellcodeModule from "./Shellcode.js";
 import ShellcodeModule from "./Shellcode.js";
 import URLModule from "./URL.js";
 import URLModule from "./URL.js";
 
 
@@ -34,6 +35,7 @@ Object.assign(
     ImageModule,
     ImageModule,
     JSBNModule,
     JSBNModule,
     PublicKeyModule,
     PublicKeyModule,
+    RegexModule,
     ShellcodeModule,
     ShellcodeModule,
     URLModule
     URLModule
 );
 );

+ 30 - 0
src/core/config/modules/Regex.js

@@ -0,0 +1,30 @@
+import Extract from "../../operations/Extract.js";
+import Regex from "../../operations/Regex.js";
+
+
+/**
+ * Regex module.
+ *
+ * Libraries:
+ *  - XRegExp
+ *
+ * @author n1474335 [n1474335@gmail.com]
+ * @copyright Crown Copyright 2018
+ * @license Apache-2.0
+ */
+let OpModules = typeof self === "undefined" ? {} : self.OpModules || {};
+
+OpModules.Regex = {
+    "Regular expression":   Regex.runRegex,
+    "Find / Replace":       Regex.runFindReplace,
+    "Strings":              Extract.runStrings,
+    "Extract IP addresses": Extract.runIp,
+    "Extract email addresses": Extract.runEmail,
+    "Extract MAC addresses": Extract.runMac,
+    "Extract URLs":         Extract.runUrls,
+    "Extract domains":      Extract.runDomains,
+    "Extract file paths":   Extract.runFilePaths,
+    "Extract dates":        Extract.runDates,
+};
+
+export default OpModules;

+ 75 - 5
src/core/operations/Extract.js

@@ -1,3 +1,6 @@
+import XRegExp from "xregexp";
+
+
 /**
 /**
  * Identifier extraction operations.
  * Identifier extraction operations.
  *
  *
@@ -26,6 +29,11 @@ const Extract = {
             match;
             match;
 
 
         while ((match = searchRegex.exec(input))) {
         while ((match = searchRegex.exec(input))) {
+            // Moves pointer when an empty string is matched (prevents infinite loop)
+            if (match.index === searchRegex.lastIndex) {
+                searchRegex.lastIndex++;
+            }
+
             if (removeRegex && removeRegex.test(match[0]))
             if (removeRegex && removeRegex.test(match[0]))
                 continue;
                 continue;
             total++;
             total++;
@@ -43,7 +51,20 @@ const Extract = {
      * @constant
      * @constant
      * @default
      * @default
      */
      */
-    MIN_STRING_LEN: 3,
+    MIN_STRING_LEN: 4,
+    /**
+     * @constant
+     * @default
+     */
+    STRING_MATCH_TYPE: [
+        "[ASCII]", "Alphanumeric + punctuation (A)", "All printable chars (A)", "Null-terminated strings (A)",
+        "[Unicode]", "Alphanumeric + punctuation (U)", "All printable chars (U)", "Null-terminated strings (U)"
+    ],
+    /**
+     * @constant
+     * @default
+     */
+    ENCODING_LIST: ["Single byte", "16-bit littleendian", "16-bit bigendian", "All"],
     /**
     /**
      * @constant
      * @constant
      * @default
      * @default
@@ -58,10 +79,59 @@ const Extract = {
      * @returns {string}
      * @returns {string}
      */
      */
     runStrings: function(input, args) {
     runStrings: function(input, args) {
-        let minLen = args[0] || Extract.MIN_STRING_LEN,
-            displayTotal = args[1],
-            strings = "[A-Z\\d/\\-:.,_$%'\"()<>= !\\[\\]{}@]",
-            regex = new RegExp(strings + "{" + minLen + ",}", "ig");
+        const encoding = args[0],
+            minLen = args[1],
+            matchType = args[2],
+            displayTotal = args[3],
+            alphanumeric = "A-Z\\d",
+            punctuation = "/\\-:.,_$%'\"()<>= !\\[\\]{}@",
+            printable = "\x20-\x7e",
+            uniAlphanumeric = "\\pL\\pN",
+            uniPunctuation = "\\pP\\pZ",
+            uniPrintable = "\\pL\\pM\\pZ\\pS\\pN\\pP";
+
+        let strings = "";
+
+        switch (matchType) {
+            case "Alphanumeric + punctuation (A)":
+                strings = `[${alphanumeric + punctuation}]`;
+                break;
+            case "All printable chars (A)":
+            case "Null-terminated strings (A)":
+                strings = `[${printable}]`;
+                break;
+            case "Alphanumeric + punctuation (U)":
+                strings = `[${uniAlphanumeric + uniPunctuation}]`;
+                break;
+            case "All printable chars (U)":
+            case "Null-terminated strings (U)":
+                strings = `[${uniPrintable}]`;
+                break;
+        }
+
+        // UTF-16 support is hacked in by allowing null bytes on either side of the matched chars
+        switch (encoding) {
+            case "All":
+                strings = `(\x00?${strings}\x00?)`;
+                break;
+            case "16-bit littleendian":
+                strings = `(${strings}\x00)`;
+                break;
+            case "16-bit bigendian":
+                strings = `(\x00${strings})`;
+                break;
+            case "Single byte":
+            default:
+                break;
+        }
+
+        strings = `${strings}{${minLen},}`;
+
+        if (matchType.includes("Null-terminated")) {
+            strings += "\x00";
+        }
+
+        const regex = new XRegExp(strings, "ig");
 
 
         return Extract._search(input, regex, null, displayTotal);
         return Extract._search(input, regex, null, displayTotal);
     },
     },

+ 278 - 0
src/core/operations/Regex.js

@@ -0,0 +1,278 @@
+import XRegExp from "xregexp";
+import Utils from "../Utils.js";
+
+
+/**
+ * Regex operations.
+ *
+ * @author n1474335 [n1474335@gmail.com]
+ * @copyright Crown Copyright 2018
+ * @license Apache-2.0
+ *
+ * @namespace
+ */
+const Regex = {
+
+    /**
+     * @constant
+     * @default
+     */
+    REGEX_PRE_POPULATE: [
+        {
+            name: "User defined",
+            value: ""
+        },
+        {
+            name: "IPv4 address",
+            value: "(?:(?:\\d|[01]?\\d\\d|2[0-4]\\d|25[0-5])\\.){3}(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d|\\d)(?:\\/\\d{1,2})?"
+        },
+        {
+            name: "IPv6 address",
+            value: "((?=.*::)(?!.*::.+::)(::)?([\\dA-Fa-f]{1,4}:(:|\\b)|){5}|([\\dA-Fa-f]{1,4}:){6})((([\\dA-Fa-f]{1,4}((?!\\3)::|:\\b|(?![\\dA-Fa-f])))|(?!\\2\\3)){2}|(((2[0-4]|1\\d|[1-9])?\\d|25[0-5])\\.?\\b){4})"
+        },
+        {
+            name: "Email address",
+            value: "(\\w[-.\\w]*)@([-\\w]+(?:\\.[-\\w]+)*)\\.([A-Za-z]{2,4})"
+        },
+        {
+            name: "URL",
+            value: "([A-Za-z]+://)([-\\w]+(?:\\.\\w[-\\w]*)+)(:\\d+)?(/[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]*(?:[.!,?]+[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]+)*)?"
+        },
+        {
+            name: "Domain",
+            value: "\\b((?=[a-z0-9-]{1,63}\\.)(xn--)?[a-z0-9]+(-[a-z0-9]+)*\\.)+[a-z]{2,63}\\b"
+        },
+        {
+            name: "Windows file path",
+            value: "([A-Za-z]):\\\\((?:[A-Za-z\\d][A-Za-z\\d\\- \\x27_\\(\\)]{0,61}\\\\?)*[A-Za-z\\d][A-Za-z\\d\\- \\x27_\\(\\)]{0,61})(\\.[A-Za-z\\d]{1,6})?"
+        },
+        {
+            name: "UNIX file path",
+            value: "(?:/[A-Za-z\\d.][A-Za-z\\d\\-.]{0,61})+"
+        },
+        {
+            name: "MAC address",
+            value: "[A-Fa-f\\d]{2}(?:[:-][A-Fa-f\\d]{2}){5}"
+        },
+        {
+            name: "Date (yyyy-mm-dd)",
+            value: "((?:19|20)\\d\\d)[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])"
+        },
+        {
+            name: "Date (dd/mm/yyyy)",
+            value: "(0[1-9]|[12][0-9]|3[01])[- /.](0[1-9]|1[012])[- /.]((?:19|20)\\d\\d)"
+        },
+        {
+            name: "Date (mm/dd/yyyy)",
+            value: "(0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])[- /.]((?:19|20)\\d\\d)"
+        },
+        {
+            name: "Strings",
+            value: "[A-Za-z\\d/\\-:.,_$%\\x27\"()<>= !\\[\\]{}@]{4,}"
+        },
+    ],
+    /**
+     * @constant
+     * @default
+     */
+    OUTPUT_FORMAT: ["Highlight matches", "List matches", "List capture groups", "List matches with capture groups"],
+    /**
+     * @constant
+     * @default
+     */
+    DISPLAY_TOTAL: false,
+
+    /**
+     * Regular expression operation.
+     *
+     * @param {string} input
+     * @param {Object[]} args
+     * @returns {html}
+     */
+    runRegex: function(input, args) {
+        const userRegex = args[1],
+            i = args[2],
+            m = args[3],
+            s = args[4],
+            u = args[5],
+            a = args[6],
+            displayTotal = args[7],
+            outputFormat = args[8];
+        let modifiers = "g";
+
+        if (i) modifiers += "i";
+        if (m) modifiers += "m";
+        if (s) modifiers += "s";
+        if (u) modifiers += "u";
+        if (a) modifiers += "A";
+
+        if (userRegex && userRegex !== "^" && userRegex !== "$") {
+            try {
+                const regex = new XRegExp(userRegex, modifiers);
+
+                switch (outputFormat) {
+                    case "Highlight matches":
+                        return Regex._regexHighlight(input, regex, displayTotal);
+                    case "List matches":
+                        return Utils.escapeHtml(Regex._regexList(input, regex, displayTotal, true, false));
+                    case "List capture groups":
+                        return Utils.escapeHtml(Regex._regexList(input, regex, displayTotal, false, true));
+                    case "List matches with capture groups":
+                        return Utils.escapeHtml(Regex._regexList(input, regex, displayTotal, true, true));
+                    default:
+                        return "Error: Invalid output format";
+                }
+            } catch (err) {
+                return "Invalid regex. Details: " + err.message;
+            }
+        } else {
+            return Utils.escapeHtml(input);
+        }
+    },
+
+
+    /**
+     * @constant
+     * @default
+     */
+    SEARCH_TYPE: ["Regex", "Extended (\\n, \\t, \\x...)", "Simple string"],
+    /**
+     * @constant
+     * @default
+     */
+    FIND_REPLACE_GLOBAL: true,
+    /**
+     * @constant
+     * @default
+     */
+    FIND_REPLACE_CASE: false,
+    /**
+     * @constant
+     * @default
+     */
+    FIND_REPLACE_MULTILINE: true,
+
+    /**
+     * Find / Replace operation.
+     *
+     * @param {string} input
+     * @param {Object[]} args
+     * @returns {string}
+     */
+    runFindReplace: function(input, args) {
+        let find = args[0].string,
+            type = args[0].option,
+            replace = args[1],
+            g = args[2],
+            i = args[3],
+            m = args[4],
+            modifiers = "";
+
+        if (g) modifiers += "g";
+        if (i) modifiers += "i";
+        if (m) modifiers += "m";
+
+        if (type === "Regex") {
+            find = new RegExp(find, modifiers);
+            return input.replace(find, replace);
+        }
+
+        if (type.indexOf("Extended") === 0) {
+            find = Utils.parseEscapedChars(find);
+        }
+
+        find = new RegExp(Utils.escapeRegex(find), modifiers);
+
+        return input.replace(find, replace);
+    },
+
+
+    /**
+     * Adds HTML highlights to matches within a string.
+     *
+     * @private
+     * @param {string} input
+     * @param {RegExp} regex
+     * @param {boolean} displayTotal
+     * @returns {string}
+     */
+    _regexHighlight: function(input, regex, displayTotal) {
+        let output = "",
+            m,
+            hl = 1,
+            i = 0,
+            total = 0;
+
+        while ((m = regex.exec(input))) {
+            // Moves pointer when an empty string is matched (prevents infinite loop)
+            if (m.index === regex.lastIndex) {
+                regex.lastIndex++;
+            }
+
+            // Add up to match
+            output += Utils.escapeHtml(input.slice(i, m.index));
+
+            // Add match with highlighting
+            output += "<span class='hl"+hl+"'>" + Utils.escapeHtml(m[0]) + "</span>";
+
+            // Switch highlight
+            hl = hl === 1 ? 2 : 1;
+
+            i = regex.lastIndex;
+            total++;
+        }
+
+        // Add all after final match
+        output += Utils.escapeHtml(input.slice(i, input.length));
+
+        if (displayTotal)
+            output = "Total found: " + total + "\n\n" + output;
+
+        return output;
+    },
+
+
+    /**
+     * Creates a string listing the matches within a string.
+     *
+     * @private
+     * @param {string} input
+     * @param {RegExp} regex
+     * @param {boolean} displayTotal
+     * @param {boolean} matches - Display full match
+     * @param {boolean} captureGroups - Display each of the capture groups separately
+     * @returns {string}
+     */
+    _regexList: function(input, regex, displayTotal, matches, captureGroups) {
+        let output = "",
+            total = 0,
+            match;
+
+        while ((match = regex.exec(input))) {
+            // Moves pointer when an empty string is matched (prevents infinite loop)
+            if (match.index === regex.lastIndex) {
+                regex.lastIndex++;
+            }
+
+            total++;
+            if (matches) {
+                output += match[0] + "\n";
+            }
+            if (captureGroups) {
+                for (let i = 1; i < match.length; i++) {
+                    if (matches) {
+                        output += "  Group " + i + ": ";
+                    }
+                    output += match[i] + "\n";
+                }
+            }
+        }
+
+        if (displayTotal)
+            output = "Total found: " + total + "\n\n" + output;
+
+        return output.slice(0, -1);
+    },
+};
+
+export default Regex;

+ 0 - 256
src/core/operations/StrUtils.js

@@ -12,128 +12,6 @@ import Utils from "../Utils.js";
  */
  */
 const StrUtils = {
 const StrUtils = {
 
 
-    /**
-     * @constant
-     * @default
-     */
-    REGEX_PRE_POPULATE: [
-        {
-            name: "User defined",
-            value: ""
-        },
-        {
-            name: "IPv4 address",
-            value: "(?:(?:\\d|[01]?\\d\\d|2[0-4]\\d|25[0-5])\\.){3}(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d|\\d)(?:\\/\\d{1,2})?"
-        },
-        {
-            name: "IPv6 address",
-            value: "((?=.*::)(?!.*::.+::)(::)?([\\dA-Fa-f]{1,4}:(:|\\b)|){5}|([\\dA-Fa-f]{1,4}:){6})((([\\dA-Fa-f]{1,4}((?!\\3)::|:\\b|(?![\\dA-Fa-f])))|(?!\\2\\3)){2}|(((2[0-4]|1\\d|[1-9])?\\d|25[0-5])\\.?\\b){4})"
-        },
-        {
-            name: "Email address",
-            value: "(\\w[-.\\w]*)@([-\\w]+(?:\\.[-\\w]+)*)\\.([A-Za-z]{2,4})"
-        },
-        {
-            name: "URL",
-            value: "([A-Za-z]+://)([-\\w]+(?:\\.\\w[-\\w]*)+)(:\\d+)?(/[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]*(?:[.!,?]+[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]+)*)?"
-        },
-        {
-            name: "Domain",
-            value: "\\b((?=[a-z0-9-]{1,63}\\.)(xn--)?[a-z0-9]+(-[a-z0-9]+)*\\.)+[a-z]{2,63}\\b"
-        },
-        {
-            name: "Windows file path",
-            value: "([A-Za-z]):\\\\((?:[A-Za-z\\d][A-Za-z\\d\\- \\x27_\\(\\)]{0,61}\\\\?)*[A-Za-z\\d][A-Za-z\\d\\- \\x27_\\(\\)]{0,61})(\\.[A-Za-z\\d]{1,6})?"
-        },
-        {
-            name: "UNIX file path",
-            value: "(?:/[A-Za-z\\d.][A-Za-z\\d\\-.]{0,61})+"
-        },
-        {
-            name: "MAC address",
-            value: "[A-Fa-f\\d]{2}(?:[:-][A-Fa-f\\d]{2}){5}"
-        },
-        {
-            name: "Date (yyyy-mm-dd)",
-            value: "((?:19|20)\\d\\d)[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])"
-        },
-        {
-            name: "Date (dd/mm/yyyy)",
-            value: "(0[1-9]|[12][0-9]|3[01])[- /.](0[1-9]|1[012])[- /.]((?:19|20)\\d\\d)"
-        },
-        {
-            name: "Date (mm/dd/yyyy)",
-            value: "(0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])[- /.]((?:19|20)\\d\\d)"
-        },
-        {
-            name: "Strings",
-            value: "[A-Za-z\\d/\\-:.,_$%\\x27\"()<>= !\\[\\]{}@]{4,}"
-        },
-    ],
-    /**
-     * @constant
-     * @default
-     */
-    REGEX_CASE_INSENSITIVE: true,
-    /**
-     * @constant
-     * @default
-     */
-    REGEX_MULTILINE_MATCHING: true,
-    /**
-     * @constant
-     * @default
-     */
-    OUTPUT_FORMAT: ["Highlight matches", "List matches", "List capture groups", "List matches with capture groups"],
-    /**
-     * @constant
-     * @default
-     */
-    DISPLAY_TOTAL: false,
-
-    /**
-     * Regular expression operation.
-     *
-     * @param {string} input
-     * @param {Object[]} args
-     * @returns {html}
-     */
-    runRegex: function(input, args) {
-        let userRegex = args[1],
-            i = args[2],
-            m = args[3],
-            displayTotal = args[4],
-            outputFormat = args[5],
-            modifiers = "g";
-
-        if (i) modifiers += "i";
-        if (m) modifiers += "m";
-
-        if (userRegex && userRegex !== "^" && userRegex !== "$") {
-            try {
-                const regex = new RegExp(userRegex, modifiers);
-
-                switch (outputFormat) {
-                    case "Highlight matches":
-                        return StrUtils._regexHighlight(input, regex, displayTotal);
-                    case "List matches":
-                        return Utils.escapeHtml(StrUtils._regexList(input, regex, displayTotal, true, false));
-                    case "List capture groups":
-                        return Utils.escapeHtml(StrUtils._regexList(input, regex, displayTotal, false, true));
-                    case "List matches with capture groups":
-                        return Utils.escapeHtml(StrUtils._regexList(input, regex, displayTotal, true, true));
-                    default:
-                        return "Error: Invalid output format";
-                }
-            } catch (err) {
-                return "Invalid regex. Details: " + err.message;
-            }
-        } else {
-            return Utils.escapeHtml(input);
-        }
-    },
-
-
     /**
     /**
      * @constant
      * @constant
      * @default
      * @default
@@ -183,62 +61,6 @@ const StrUtils = {
     },
     },
 
 
 
 
-    /**
-     * @constant
-     * @default
-     */
-    SEARCH_TYPE: ["Regex", "Extended (\\n, \\t, \\x...)", "Simple string"],
-    /**
-     * @constant
-     * @default
-     */
-    FIND_REPLACE_GLOBAL: true,
-    /**
-     * @constant
-     * @default
-     */
-    FIND_REPLACE_CASE: false,
-    /**
-     * @constant
-     * @default
-     */
-    FIND_REPLACE_MULTILINE: true,
-
-    /**
-     * Find / Replace operation.
-     *
-     * @param {string} input
-     * @param {Object[]} args
-     * @returns {string}
-     */
-    runFindReplace: function(input, args) {
-        let find = args[0].string,
-            type = args[0].option,
-            replace = args[1],
-            g = args[2],
-            i = args[3],
-            m = args[4],
-            modifiers = "";
-
-        if (g) modifiers += "g";
-        if (i) modifiers += "i";
-        if (m) modifiers += "m";
-
-        if (type === "Regex") {
-            find = new RegExp(find, modifiers);
-            return input.replace(find, replace);
-        }
-
-        if (type.indexOf("Extended") === 0) {
-            find = Utils.parseEscapedChars(find);
-        }
-
-        find = new RegExp(Utils.escapeRegex(find), modifiers);
-
-        return input.replace(find, replace);
-    },
-
-
     /**
     /**
      * @constant
      * @constant
      * @default
      * @default
@@ -576,84 +398,6 @@ const StrUtils = {
 
 
         return dist.toString();
         return dist.toString();
     },
     },
-
-
-    /**
-     * Adds HTML highlights to matches within a string.
-     *
-     * @private
-     * @param {string} input
-     * @param {RegExp} regex
-     * @param {boolean} displayTotal
-     * @returns {string}
-     */
-    _regexHighlight: function(input, regex, displayTotal) {
-        let output = "",
-            m,
-            hl = 1,
-            i = 0,
-            total = 0;
-
-        while ((m = regex.exec(input))) {
-            // Add up to match
-            output += Utils.escapeHtml(input.slice(i, m.index));
-
-            // Add match with highlighting
-            output += "<span class='hl"+hl+"'>" + Utils.escapeHtml(m[0]) + "</span>";
-
-            // Switch highlight
-            hl = hl === 1 ? 2 : 1;
-
-            i = regex.lastIndex;
-            total++;
-        }
-
-        // Add all after final match
-        output += Utils.escapeHtml(input.slice(i, input.length));
-
-        if (displayTotal)
-            output = "Total found: " + total + "\n\n" + output;
-
-        return output;
-    },
-
-
-    /**
-     * Creates a string listing the matches within a string.
-     *
-     * @private
-     * @param {string} input
-     * @param {RegExp} regex
-     * @param {boolean} displayTotal
-     * @param {boolean} matches - Display full match
-     * @param {boolean} captureGroups - Display each of the capture groups separately
-     * @returns {string}
-     */
-    _regexList: function(input, regex, displayTotal, matches, captureGroups) {
-        let output = "",
-            total = 0,
-            match;
-
-        while ((match = regex.exec(input))) {
-            total++;
-            if (matches) {
-                output += match[0] + "\n";
-            }
-            if (captureGroups) {
-                for (let i = 1; i < match.length; i++) {
-                    if (matches) {
-                        output += "  Group " + i + ": ";
-                    }
-                    output += match[i] + "\n";
-                }
-            }
-        }
-
-        if (displayTotal)
-            output = "Total found: " + total + "\n\n" + output;
-
-        return output;
-    },
 };
 };
 
 
 export default StrUtils;
 export default StrUtils;

+ 1 - 0
test/index.js

@@ -30,6 +30,7 @@ import "./tests/operations/MS.js";
 import "./tests/operations/PHP.js";
 import "./tests/operations/PHP.js";
 import "./tests/operations/NetBIOS.js";
 import "./tests/operations/NetBIOS.js";
 import "./tests/operations/OTP.js";
 import "./tests/operations/OTP.js";
+import "./tests/operations/Regex.js";
 import "./tests/operations/StrUtils.js";
 import "./tests/operations/StrUtils.js";
 import "./tests/operations/SeqUtils.js";
 import "./tests/operations/SeqUtils.js";
 
 

+ 59 - 0
test/tests/operations/Regex.js

@@ -0,0 +1,59 @@
+/**
+ * StrUtils tests.
+ *
+ * @author n1474335 [n1474335@gmail.com]
+ * @copyright Crown Copyright 2017
+ * @license Apache-2.0
+ */
+import TestRegister from "../../TestRegister.js";
+
+TestRegister.addTests([
+    {
+        name: "Regex: non-HTML op",
+        input: "/<>",
+        expectedOutput: "/<>",
+        recipeConfig: [
+            {
+                "op": "Regular expression",
+                "args": ["User defined", "", true, true, false, false, false, false, "Highlight matches"]
+            },
+            {
+                "op": "Remove whitespace",
+                "args": [true, true, true, true, true, false]
+            }
+        ],
+    },
+    {
+        name: "Regex: Dot matches all",
+        input: "Hello\nWorld",
+        expectedOutput: "Hello\nWorld",
+        recipeConfig: [
+            {
+                "op": "Regular expression",
+                "args": ["User defined", ".+", true, true, true, false, false, false, "List matches"]
+            }
+        ],
+    },
+    {
+        name: "Regex: Astral off",
+        input: "𝌆😆",
+        expectedOutput: "",
+        recipeConfig: [
+            {
+                "op": "Regular expression",
+                "args": ["User defined", "\\pS", true, true, false, false, false, false, "List matches"]
+            }
+        ],
+    },
+    {
+        name: "Regex: Astral on",
+        input: "𝌆😆",
+        expectedOutput: "𝌆\n😆",
+        recipeConfig: [
+            {
+                "op": "Regular expression",
+                "args": ["User defined", "\\pS", true, true, false, false, true, false, "List matches"]
+            }
+        ],
+    }
+]);

+ 0 - 15
test/tests/operations/StrUtils.js

@@ -8,21 +8,6 @@
 import TestRegister from "../../TestRegister.js";
 import TestRegister from "../../TestRegister.js";
 
 
 TestRegister.addTests([
 TestRegister.addTests([
-    {
-        name: "Regex, non-HTML op",
-        input: "/<>",
-        expectedOutput: "/<>",
-        recipeConfig: [
-            {
-                "op": "Regular expression",
-                "args": ["User defined", "", true, true, false, "Highlight matches"]
-            },
-            {
-                "op": "Remove whitespace",
-                "args": [true, true, true, true, true, false]
-            }
-        ],
-    },
     {
     {
         name: "Diff, basic usage",
         name: "Diff, basic usage",
         input: "testing23\n\ntesting123",
         input: "testing23\n\ntesting123",