Browse Source

'Strings' now supports various different match types in ASCII and Unicode

n1474335 7 years ago
parent
commit
f2c073798b
2 changed files with 61 additions and 16 deletions
  1. 10 5
      src/core/config/OperationConfig.js
  2. 51 11
      src/core/operations/Extract.js

+ 10 - 5
src/core/config/OperationConfig.js

@@ -2164,20 +2164,25 @@ const OperationConfig = {
         inputType: "string",
         inputType: "string",
         outputType: "string",
         outputType: "string",
         args: [
         args: [
+            {
+                name: "Encoding",
+                type: "option",
+                value: Extract.ENCODING_LIST
+            },
             {
             {
                 name: "Minimum length",
                 name: "Minimum length",
                 type: "number",
                 type: "number",
                 value: Extract.MIN_STRING_LEN
                 value: Extract.MIN_STRING_LEN
             },
             },
+            {
+                name: "Match",
+                type: "option",
+                value: Extract.STRING_MATCH_TYPE
+            },
             {
             {
                 name: "Display total",
                 name: "Display total",
                 type: "boolean",
                 type: "boolean",
                 value: Extract.DISPLAY_TOTAL
                 value: Extract.DISPLAY_TOTAL
-            },
-            {
-                name: "Encoding",
-                type: "option",
-                value: Extract.ENCODING_LIST
             }
             }
         ]
         ]
     },
     },

+ 51 - 11
src/core/operations/Extract.js

@@ -51,17 +51,25 @@ const Extract = {
      * @constant
      * @constant
      * @default
      * @default
      */
      */
-    MIN_STRING_LEN: 3,
+    MIN_STRING_LEN: 4,
     /**
     /**
      * @constant
      * @constant
      * @default
      * @default
      */
      */
-    DISPLAY_TOTAL: false,
+    STRING_MATCH_TYPE: [
+        "[ASCII]", "Alphanumeric + punctuation (A)", "All printable chars (A)", "Null-terminated strings (A)",
+        "[Unicode]", "Alphanumeric + punctuation (U)", "All printable chars (U)", "Null-terminated strings (U)"
+    ],
+    /**
+     * @constant
+     * @default
+     */
+    ENCODING_LIST: ["Single byte", "16-bit littleendian", "16-bit bigendian", "All"],
     /**
     /**
      * @constant
      * @constant
      * @default
      * @default
      */
      */
-    ENCODING_LIST: ["All", "Single byte", "16-bit littleendian", "16-bit bigendian"],
+    DISPLAY_TOTAL: false,
 
 
     /**
     /**
      * Strings operation.
      * Strings operation.
@@ -71,27 +79,59 @@ const Extract = {
      * @returns {string}
      * @returns {string}
      */
      */
     runStrings: function(input, args) {
     runStrings: function(input, args) {
-        const minLen = args[0] || Extract.MIN_STRING_LEN,
-            displayTotal = args[1],
-            encoding = args[2];
-        let strings = "[A-Z\\d/\\-:.,_$%'\"()<>= !\\[\\]{}@]";
+        const encoding = args[0],
+            minLen = args[1],
+            matchType = args[2],
+            displayTotal = args[3],
+            alphanumeric = "A-Z\\d",
+            punctuation = "/\\-:.,_$%'\"()<>= !\\[\\]{}@",
+            printable = "\x20-\x7e",
+            uniAlphanumeric = "\\pL\\pN",
+            uniPunctuation = "\\pP\\pZ",
+            uniPrintable = "\\pL\\pM\\pZ\\pS\\pN\\pP";
+
+        let strings = "";
+
+        switch (matchType) {
+            case "Alphanumeric + punctuation (A)":
+                strings = `[${alphanumeric + punctuation}]`;
+                break;
+            case "All printable chars (A)":
+            case "Null-terminated strings (A)":
+                strings = `[${printable}]`;
+                break;
+            case "Alphanumeric + punctuation (U)":
+                strings = `[${uniAlphanumeric + uniPunctuation}]`;
+                break;
+            case "All printable chars (U)":
+            case "Null-terminated strings (U)":
+                strings = `[${uniPrintable}]`;
+                break;
+        }
 
 
+        // UTF-16 support is hacked in by allowing null bytes on either side of the matched chars
         switch (encoding) {
         switch (encoding) {
             case "All":
             case "All":
-                strings = "(\x00?" + strings + "\x00?)";
+                strings = `(\x00?${strings}\x00?)`;
                 break;
                 break;
             case "16-bit littleendian":
             case "16-bit littleendian":
-                strings = "(" + strings + "\x00)";
+                strings = `(${strings}\x00)`;
                 break;
                 break;
             case "16-bit bigendian":
             case "16-bit bigendian":
-                strings = "(\x00" + strings + ")";
+                strings = `(\x00${strings})`;
                 break;
                 break;
             case "Single byte":
             case "Single byte":
             default:
             default:
                 break;
                 break;
         }
         }
 
 
-        const regex = new XRegExp(strings + "{" + minLen + ",}", "ig");
+        strings = `${strings}{${minLen},}`;
+
+        if (matchType.includes("Null-terminated")) {
+            strings += "\x00";
+        }
+
+        const regex = new XRegExp(strings, "ig");
 
 
         return Extract._search(input, regex, null, displayTotal);
         return Extract._search(input, regex, null, displayTotal);
     },
     },