瀏覽代碼

Converted all previous file signatures to the new format.

n1474335 6 年之前
父節點
當前提交
729307336e
共有 2 個文件被更改,包括 1141 次插入806 次删除
  1. 1136 0
      src/core/lib/FileSignatures.mjs
  2. 5 806
      src/core/lib/FileType.mjs

+ 1136 - 0
src/core/lib/FileSignatures.mjs

@@ -0,0 +1,1136 @@
+/**
+ * File signatures and extractor functions
+ *
+ * @author n1474335 [n1474335@gmail.com]
+ * @copyright Crown Copyright 2018
+ * @license Apache-2.0
+ *
+ */
+import Stream from "./Stream";
+
+/**
+ * A categorised table of file types, including signatures to identify them and functions
+ * to extract them where possible.
+ */
+export const FILE_SIGNATURES = {
+    "Images": [
+        {
+            name: "Joint Photographic Experts Group image",
+            extension: "jpg",
+            mime: "image/jpeg",
+            description: "",
+            signature: {
+                0: 0xff,
+                1: 0xd8,
+                2: 0xff
+            },
+            extractor: extractJPEG
+        },
+        {
+            name: "Graphics Interchange Format image",
+            extension: "gif",
+            mime: "image/gif",
+            description: "",
+            signature: {
+                0: 0x47,
+                1: 0x49,
+                2: 0x46
+            },
+            extractor: null
+        },
+        {
+            name: "Portable Network Graphics image",
+            extension: "png",
+            mime: "image/png",
+            description: "",
+            signature: {
+                0: 0x89,
+                1: 0x50,
+                2: 0x4e,
+                3: 0x47
+            },
+            extractor: null
+        },
+        {
+            name: "WEBP Image",
+            extension: "webp",
+            mime: "image/webp",
+            description: "",
+            signature: {
+                8: 0x57,
+                9: 0x45,
+                10: 0x42,
+                11: 0x50
+            },
+            extractor: null
+        },
+        { // Place before tiff check
+            name: "Canon CR2 raw image",
+            extension: "cr2",
+            mime: "image/x-canon-cr2",
+            description: "",
+            signature: [
+                {
+                    0: 0x49,
+                    1: 0x49,
+                    2: 0x2a,
+                    3: 0x0,
+                    8: 0x43,
+                    9: 0x52
+                },
+                {
+                    0: 0x4d,
+                    1: 0x4d,
+                    2: 0x0,
+                    3: 0x2a,
+                    8: 0x43,
+                    9: 0x52
+                }
+            ],
+            extractor: null
+        },
+        {
+            name: "Tagged Image File Format image",
+            extension: "tif",
+            mime: "image/tiff",
+            description: "",
+            signature: [
+                {
+                    0: 0x49,
+                    1: 0x49,
+                    2: 0x2a,
+                    3: 0x0
+                },
+                {
+                    0: 0x4d,
+                    1: 0x4d,
+                    2: 0x0,
+                    3: 0x2a
+                }
+            ],
+            extractor: null
+        },
+        {
+            name: "Bitmap image",
+            extension: "bmp",
+            mime: "image/bmp",
+            description: "",
+            signature: {
+                0: 0x42,
+                1: 0x4d
+            },
+            extractor: null
+        },
+        {
+            name: "JPEG Extended Range image",
+            extension: "jxr",
+            mime: "image/vnd.ms-photo",
+            description: "",
+            signature: {
+                0: 0x49,
+                1: 0x49,
+                2: 0xbc
+            },
+            extractor: null
+        },
+        {
+            name: "Photoshop image",
+            extension: "psd",
+            mime: "image/vnd.adobe.photoshop",
+            description: "",
+            signature: {
+                0: 0x38,
+                1: 0x42,
+                2: 0x50,
+                3: 0x53
+            },
+            extractor: null
+        },
+        {
+            name: "Icon image",
+            extension: "ico",
+            mime: "image/x-icon",
+            description: "",
+            signature: {
+                0: 0x0,
+                1: 0x0,
+                2: 0x1,
+                3: 0x0
+            },
+            extractor: null
+        }
+    ],
+    "Video": [
+        { // Place before webm
+            name: "Matroska Multimedia Container",
+            extension: "mkv",
+            mime: "video/x-matroska",
+            description: "",
+            signature: {
+                31: 0x6d,
+                32: 0x61,
+                33: 0x74,
+                34: 0x72,
+                35: 0x6f,
+                36: 0x73,
+                37: 0x6b,
+                38: 0x61
+            },
+            extractor: null
+        },
+        {
+            name: "WEBM video",
+            extension: "webm",
+            mime: "video/webm",
+            description: "",
+            signature: {
+                0: 0x1a,
+                1: 0x45,
+                2: 0xdf,
+                3: 0xa3
+            },
+            extractor: null
+        },
+        {
+            name: "MPEG-4 video",
+            extension: "mp4",
+            mime: "video/mp4",
+            description: "",
+            signature: [
+                {
+                    0: 0x0,
+                    1: 0x0,
+                    2: 0x0,
+                    3: [0x18, 0x20],
+                    4: 0x66,
+                    5: 0x74,
+                    6: 0x79,
+                    7: 0x70
+                },
+                {
+                    0: 0x33, // 3gp5
+                    1: 0x67,
+                    2: 0x70,
+                    3: 0x35
+                },
+                {
+                    0: 0x0,
+                    1: 0x0,
+                    2: 0x0,
+                    3: 0x1c,
+                    4: 0x66,
+                    5: 0x74,
+                    6: 0x79,
+                    7: 0x70,
+                    8: 0x6d,
+                    9: 0x70,
+                    10: 0x34,
+                    11: 0x32,
+                    16: 0x6d, // mp41mp42isom
+                    17: 0x70,
+                    18: 0x34,
+                    19: 0x31,
+                    20: 0x6d,
+                    21: 0x70,
+                    22: 0x34,
+                    23: 0x32,
+                    24: 0x69,
+                    25: 0x73,
+                    26: 0x6f,
+                    27: 0x6d
+                }
+            ],
+            extractor: null
+        },
+        {
+            name: "M4V video",
+            extension: "m4v",
+            mime: "video/x-m4v",
+            description: "",
+            signature: {
+                0: 0x0,
+                1: 0x0,
+                2: 0x0,
+                3: 0x1c,
+                4: 0x66,
+                5: 0x74,
+                6: 0x79,
+                7: 0x70,
+                8: 0x4d,
+                9: 0x34,
+                10: 0x56
+            },
+            extractor: null
+        },
+        {
+            name: "Quicktime video",
+            extension: "mov",
+            mime: "video/quicktime",
+            description: "",
+            signature: {
+                0: 0x0,
+                1: 0x0,
+                2: 0x0,
+                3: 0x14,
+                4: 0x66,
+                5: 0x74,
+                6: 0x79,
+                7: 0x70
+            },
+            extractor: null
+        },
+        {
+            name: "Audio Video Interleave",
+            extension: "avi",
+            mime: "video/x-msvideo",
+            description: "",
+            signature: {
+                0: 0x52,
+                1: 0x49,
+                2: 0x46,
+                3: 0x46,
+                8: 0x41,
+                9: 0x56,
+                10: 0x49
+            },
+            extractor: null
+        },
+        {
+            name: "Windows Media Video",
+            extension: "wmv",
+            mime: "video/x-ms-wmv",
+            description: "",
+            signature: {
+                0: 0x30,
+                1: 0x26,
+                2: 0xb2,
+                3: 0x75,
+                4: 0x8e,
+                5: 0x66,
+                6: 0xcf,
+                7: 0x11,
+                8: 0xa6,
+                9: 0xd9
+            },
+            extractor: null
+        },
+        {
+            name: "MPEG video",
+            extension: "mpg",
+            mime: "video/mpeg",
+            description: "",
+            signature: {
+                0: 0x0,
+                1: 0x0,
+                2: 0x1,
+                3: 0xba
+            },
+            extractor: null
+        },
+        {
+            name: "Flash Video",
+            extension: "flv",
+            mime: "video/x-flv",
+            description: "",
+            signature: {
+                0: 0x46,
+                1: 0x4c,
+                2: 0x56,
+                3: 0x1
+            },
+            extractor: null
+        },
+    ],
+    "Audio": [
+        {
+            name: "Waveform Audio",
+            extension: "wav",
+            mime: "audio/x-wav",
+            description: "",
+            signature: {
+                0: 0x52,
+                1: 0x49,
+                2: 0x46,
+                3: 0x46,
+                8: 0x57,
+                9: 0x41,
+                10: 0x56,
+                11: 0x45
+            },
+            extractor: null
+        },
+        {
+            name: "OGG audio",
+            extension: "ogg",
+            mime: "audio/ogg",
+            description: "",
+            signature: {
+                0: 0x4f,
+                1: 0x67,
+                2: 0x67,
+                3: 0x53
+            },
+            extractor: null
+        },
+        {
+            name: "Musical Instrument Digital Interface audio",
+            extension: "midi",
+            mime: "audio/midi",
+            description: "",
+            signature: {
+                0: 0x4d,
+                1: 0x54,
+                2: 0x68,
+                3: 0x64
+            },
+            extractor: null
+        },
+        {
+            name: "MPEG-3 audio",
+            extension: "mp3",
+            mime: "audio/mpeg",
+            description: "",
+            signature: [
+                {
+                    0: 0x49,
+                    1: 0x44,
+                    2: 0x33
+                },
+                {
+                    0: 0xff,
+                    1: 0xfb
+                }
+            ],
+            extractor: null
+        },
+        {
+            name: "MPEG-4 Part 14 audio",
+            extension: "m4a",
+            mime: "audio/m4a",
+            description: "",
+            signature: [
+                {
+                    4: 0x66,
+                    5: 0x74,
+                    6: 0x79,
+                    7: 0x70,
+                    8: 0x4d,
+                    9: 0x34,
+                    10: 0x41
+                },
+                {
+                    0: 0x4d,
+                    1: 0x34,
+                    2: 0x41,
+                    3: 0x20
+                }
+            ],
+            extractor: null
+        },
+        {
+            name: "Free Lossless Audio Codec",
+            extension: "flac",
+            mime: "audio/x-flac",
+            description: "",
+            signature: {
+                0: 0x66,
+                1: 0x4c,
+                2: 0x61,
+                3: 0x43
+            },
+            extractor: null
+        },
+        {
+            name: "Adaptive Multi-Rate audio codec",
+            extension: "amr",
+            mime: "audio/amr",
+            description: "",
+            signature: {
+                0: 0x23,
+                1: 0x21,
+                2: 0x41,
+                3: 0x4d,
+                4: 0x52,
+                5: 0x0a
+            },
+            extractor: null
+        },
+    ],
+    "Documents": [
+        {
+            name: "Portable Document Format",
+            extension: "pdf",
+            mime: "application/pdf",
+            description: "",
+            signature: {
+                0: 0x25,
+                1: 0x50,
+                2: 0x44,
+                3: 0x46
+            },
+            extractor: extractPDF
+        },
+        {
+            name: "PostScript",
+            extension: "ps",
+            mime: "application/postscript",
+            description: "",
+            signature: {
+                0: 0x25,
+                1: 0x21
+            },
+            extractor: null
+        },
+        {
+            name: "Rich Text Format",
+            extension: "rtf",
+            mime: "application/rtf",
+            description: "",
+            signature: {
+                0: 0x7b,
+                1: 0x5c,
+                2: 0x72,
+                3: 0x74,
+                4: 0x66
+            },
+            extractor: null
+        },
+        {
+            name: "Microsoft Office documents/OLE2",
+            extension: "ole2,doc,xls,dot,ppt,xla,ppa,pps,pot,msi,sdw,db,vsd,msg",
+            mime: "application/msword,application/vnd.ms-excel,application/vnd.ms-powerpoint",
+            description: "Microsoft Office documents",
+            signature: {
+                0: 0xd0,
+                1: 0xcf,
+                2: 0x11,
+                3: 0xe0,
+                4: 0xa1,
+                5: 0xb1,
+                6: 0x1a,
+                7: 0xe1
+            },
+            extractor: null
+        },
+        {
+            name: "EPUB e-book",
+            extension: "epub",
+            mime: "application/epub+zip",
+            description: "",
+            signature: {
+                0: 0x50,
+                1: 0x4b,
+                2: 0x3,
+                3: 0x4,
+                30: 0x6d, // mimetypeapplication/epub_zip
+                31: 0x69,
+                32: 0x6d,
+                33: 0x65,
+                34: 0x74,
+                35: 0x79,
+                36: 0x70,
+                37: 0x65,
+                38: 0x61,
+                39: 0x70,
+                40: 0x70,
+                41: 0x6c,
+                42: 0x69,
+                43: 0x63,
+                44: 0x61,
+                45: 0x74,
+                46: 0x69,
+                47: 0x6f,
+                48: 0x6e,
+                49: 0x2f,
+                50: 0x65,
+                51: 0x70,
+                52: 0x75,
+                53: 0x62,
+                54: 0x2b,
+                55: 0x7a,
+                56: 0x69,
+                57: 0x70
+            },
+            extractor: null
+        },
+    ],
+    "Applications": [
+        {
+            name: "Windows Portable Executable",
+            extension: "exe,dll,drv,vxd,sys,ocx,vbx,com,fon,scr",
+            mime: "application/x-msdownload",
+            description: "",
+            signature: {
+                0: 0x4d,
+                1: 0x5a,
+                3: [0x0, 0x1, 0x2],
+                5: [0x0, 0x1, 0x2]
+            },
+            extractor: extractMZPE
+        },
+        {
+            name: "Executable and Linkable Format file",
+            extension: "elf,bin,axf,o,prx,so",
+            mime: "application/x-executable",
+            description: "Executable and Linkable Format file. No standard file extension.",
+            signature: {
+                0: 0x7f,
+                1: 0x45,
+                2: 0x4c,
+                3: 0x46
+            },
+            extractor: null
+        },
+        {
+            name: "Adobe Flash",
+            extension: "swf",
+            mime: "application/x-shockwave-flash",
+            description: "",
+            signature: {
+                0: [0x43, 0x46],
+                1: 0x57,
+                2: 0x53
+            },
+            extractor: null
+        },
+        {
+            name: "Java Class",
+            extension: "class",
+            mime: "application/java-vm",
+            description: "",
+            signature: {
+                0: 0xca,
+                1: 0xfe,
+                2: 0xba,
+                3: 0xbe
+            },
+            extractor: null
+        },
+        {
+            name: "Dalvik Executable",
+            extension: "dex",
+            mime: "application/octet-stream",
+            description: "Dalvik Executable as used by Android",
+            signature: {
+                0: 0x64,
+                1: 0x65,
+                2: 0x78,
+                3: 0x0a,
+                4: 0x30,
+                5: 0x33,
+                6: 0x35,
+                7: 0x0
+            },
+            extractor: null
+        },
+        {
+            name: "Google Chrome Extension",
+            extension: "crx",
+            mime: "application/crx",
+            description: "Google Chrome extension or packaged app",
+            signature: {
+                0: 0x43,
+                1: 0x72,
+                2: 0x32,
+                3: 0x34
+            },
+            extractor: null
+        },
+    ],
+    "Archives": [
+        {
+            name: "PKZIP archive",
+            extension: "zip",
+            mime: "application/zip",
+            description: "",
+            signature: {
+                0: 0x50,
+                1: 0x4b,
+                2: [0x3, 0x5, 0x7],
+                3: [0x4, 0x6, 0x8]
+            },
+            extractor: extractZIP
+        },
+        {
+            name: "TAR archive",
+            extension: "tar",
+            mime: "application/x-tar",
+            description: "",
+            signature: {
+                257: 0x75,
+                258: 0x73,
+                259: 0x74,
+                260: 0x61,
+                261: 0x72
+            },
+            extractor: null
+        },
+        {
+            name: "Roshal Archive",
+            extension: "rar",
+            mime: "application/x-rar-compressed",
+            description: "",
+            signature: {
+                0: 0x52,
+                1: 0x61,
+                2: 0x72,
+                3: 0x21,
+                4: 0x1a,
+                5: 0x7,
+                6: [0x0, 0x1]
+            },
+            extractor: null
+        },
+        {
+            name: "Gzip",
+            extension: "gz",
+            mime: "application/gzip",
+            description: "",
+            signature: {
+                0: 0x1f,
+                1: 0x8b,
+                2: 0x8
+            },
+            extractor: null
+        },
+        {
+            name: "Bzip2",
+            extension: "bz2",
+            mime: "application/x-bzip2",
+            description: "",
+            signature: {
+                0: 0x42,
+                1: 0x5a,
+                2: 0x68
+            },
+            extractor: null
+        },
+        {
+            name: "7zip",
+            extension: "7z",
+            mime: "application/x-7z-compressed",
+            description: "",
+            signature: {
+                0: 0x37,
+                1: 0x7a,
+                2: 0xbc,
+                3: 0xaf,
+                4: 0x27,
+                5: 0x1c
+            },
+            extractor: null
+        },
+        {
+            name: "Zlib Deflate",
+            extension: "zlib",
+            mime: "application/x-deflate",
+            description: "",
+            signature: {
+                0: 0x78,
+                1: [0x1, 0x9c, 0xda, 0x5e]
+            },
+            extractor: null
+        },
+        {
+            name: "xz compression",
+            extension: "xz",
+            mime: "application/x-xz",
+            description: "",
+            signature: {
+                0: 0xfd,
+                1: 0x37,
+                2: 0x7a,
+                3: 0x58,
+                4: 0x5a,
+                5: 0x0
+            },
+            extractor: null
+        },
+        {
+            name: "Tarball",
+            extension: "tar.z",
+            mime: "application/x-gtar",
+            description: "",
+            signature: {
+                0: 0x1f,
+                1: [0x9d, 0xa0]
+            },
+            extractor: null
+        },
+        {
+            name: "ISO disk image",
+            extension: "iso",
+            mime: "application/octet-stream",
+            description: "ISO 9660 CD/DVD image file",
+            signature: [
+                {
+                    0x8001: 0x43,
+                    0x8002: 0x44,
+                    0x8003: 0x30,
+                    0x8004: 0x30,
+                    0x8005: 0x31
+                },
+                {
+                    0x8801: 0x43,
+                    0x8802: 0x44,
+                    0x8803: 0x30,
+                    0x8804: 0x30,
+                    0x8805: 0x31
+                },
+                {
+                    0x9001: 0x43,
+                    0x9002: 0x44,
+                    0x9003: 0x30,
+                    0x9004: 0x30,
+                    0x9005: 0x31
+                }
+            ],
+            extractor: null
+        },
+        {
+            name: "Virtual Machine Disk",
+            extension: "vmdk",
+            mime: "application/vmdk,application/x-virtualbox-vmdk",
+            description: "",
+            signature: {
+                0: 0x4b,
+                1: 0x44,
+                2: 0x4d
+            },
+            extractor: null
+        },
+    ],
+    "Miscellaneous": [
+        {
+            name: "UTF-8 text file",
+            extension: "txt",
+            mime: "text/plain",
+            description: "UTF-8 encoded Unicode byte order mark, commonly but not exclusively seen in text files.",
+            signature: {
+                0: 0xef,
+                1: 0xbb,
+                2: 0xbf
+            },
+            extractor: null
+        },
+        { // Place before UTF-16 LE file
+            name: "UTF-32 LE file",
+            extension: "utf32le",
+            mime: "charset/utf32le",
+            description: "Little-endian UTF-32 encoded Unicode byte order mark.",
+            signature: {
+                0: 0xff,
+                1: 0xfe,
+                2: 0x00,
+                3: 0x00
+            },
+            extractor: null
+        },
+        {
+            name: "UTF-16 LE file",
+            extension: "utf16le",
+            mime: "charset/utf16le",
+            description: "Little-endian UTF-16 encoded Unicode byte order mark.",
+            signature: {
+                0: 0xff,
+                1: 0xfe
+            },
+            extractor: null
+        },
+        {
+            name: "Web Open Font Format",
+            extension: "woff",
+            mime: "application/font-woff",
+            description: "",
+            signature: {
+                0: 0x77,
+                1: 0x4f,
+                2: 0x46,
+                3: 0x46,
+                4: 0x0,
+                5: 0x1,
+                6: 0x0,
+                7: 0x0
+            },
+            extractor: null
+        },
+        {
+            name: "Web Open Font Format 2",
+            extension: "woff2",
+            mime: "application/font-woff",
+            description: "",
+            signature: {
+                0: 0x77,
+                1: 0x4f,
+                2: 0x46,
+                3: 0x32,
+                4: 0x0,
+                5: 0x1,
+                6: 0x0,
+                7: 0x0
+            },
+            extractor: null
+        },
+        {
+            name: "Embedded OpenType font",
+            extension: "eot",
+            mime: "application/octet-stream",
+            description: "",
+            signature: [
+                {
+                    8: 0x2,
+                    9: 0x0,
+                    10: 0x1,
+                    34: 0x4c,
+                    35: 0x50
+                },
+                {
+                    8: 0x1,
+                    9: 0x0,
+                    10: 0x0,
+                    34: 0x4c,
+                    35: 0x50
+                },
+                {
+                    8: 0x2,
+                    9: 0x0,
+                    10: 0x2,
+                    34: 0x4c,
+                    35: 0x50
+                },
+            ],
+            extractor: null
+        },
+        {
+            name: "TrueType Font",
+            extension: "ttf",
+            mime: "application/font-sfnt",
+            description: "",
+            signature: {
+                0: 0x0,
+                1: 0x1,
+                2: 0x0,
+                3: 0x0,
+                4: 0x0
+            },
+            extractor: null
+        },
+        {
+            name: "OpenType Font",
+            extension: "otf",
+            mime: "application/font-sfnt",
+            description: "",
+            signature: {
+                0: 0x4f,
+                1: 0x54,
+                2: 0x54,
+                3: 0x4f,
+                4: 0x0
+            },
+            extractor: null
+        },
+        {
+            name: "SQLite",
+            extension: "sqlite",
+            mime: "application/x-sqlite3",
+            description: "",
+            signature: {
+                0: 0x53,
+                1: 0x51,
+                2: 0x4c,
+                3: 0x69
+            },
+            extractor: null
+        },
+    ]
+};
+
+
+/**
+ * JPEG extractor.
+ *
+ * @param {Uint8Array} bytes
+ * @param {number} offset
+ * @returns {Uint8Array}
+ */
+export function extractJPEG(bytes, offset) {
+    const stream = new Stream(bytes.slice(offset));
+
+    while (stream.hasMore()) {
+        const marker = stream.getBytes(2);
+        if (marker[0] !== 0xff) throw new Error("Invalid JPEG marker: " + marker);
+
+        let segmentSize = 0;
+        switch (marker[1]) {
+            // No length
+            case 0xd8: // Start of Image
+            case 0x01: // For temporary use in arithmetic coding
+                break;
+            case 0xd9: // End found
+                return stream.carve();
+
+            // Variable size segment
+            case 0xc0: // Start of frame (Baseline DCT)
+            case 0xc1: // Start of frame (Extended sequential DCT)
+            case 0xc2: // Start of frame (Progressive DCT)
+            case 0xc3: // Start of frame (Lossless sequential)
+            case 0xc4: // Define Huffman Table
+            case 0xc5: // Start of frame (Differential sequential DCT)
+            case 0xc6: // Start of frame (Differential progressive DCT)
+            case 0xc7: // Start of frame (Differential lossless)
+            case 0xc8: // Reserved for JPEG extensions
+            case 0xc9: // Start of frame (Extended sequential DCT)
+            case 0xca: // Start of frame (Progressive DCT)
+            case 0xcb: // Start of frame (Lossless sequential)
+            case 0xcc: // Define arithmetic conditioning table
+            case 0xcd: // Start of frame (Differential sequential DCT)
+            case 0xce: // Start of frame (Differential progressive DCT)
+            case 0xcf: // Start of frame (Differential lossless)
+            case 0xdb: // Define Quantization Table
+            case 0xde: // Define hierarchical progression
+            case 0xe0: // Application-specific
+            case 0xe1: // Application-specific
+            case 0xe2: // Application-specific
+            case 0xe3: // Application-specific
+            case 0xe4: // Application-specific
+            case 0xe5: // Application-specific
+            case 0xe6: // Application-specific
+            case 0xe7: // Application-specific
+            case 0xe8: // Application-specific
+            case 0xe9: // Application-specific
+            case 0xea: // Application-specific
+            case 0xeb: // Application-specific
+            case 0xec: // Application-specific
+            case 0xed: // Application-specific
+            case 0xee: // Application-specific
+            case 0xef: // Application-specific
+            case 0xfe: // Comment
+                segmentSize = stream.readInt(2, "be");
+                stream.position += segmentSize - 2;
+                break;
+
+            // 1 byte
+            case 0xdf: // Expand reference image
+                stream.position++;
+                break;
+
+            // 2 bytes
+            case 0xdc: // Define number of lines
+            case 0xdd: // Define restart interval
+                stream.position += 2;
+                break;
+
+            // Start scan
+            case 0xda: // Start of scan
+                segmentSize = stream.readInt(2, "be");
+                stream.position += segmentSize - 2;
+                stream.continueUntil(0xff);
+                break;
+
+            // Continue through encoded data
+            case 0x00: // Byte stuffing
+            case 0xd0: // Restart
+            case 0xd1: // Restart
+            case 0xd2: // Restart
+            case 0xd3: // Restart
+            case 0xd4: // Restart
+            case 0xd5: // Restart
+            case 0xd6: // Restart
+            case 0xd7: // Restart
+                stream.continueUntil(0xff);
+                break;
+
+            default:
+                stream.continueUntil(0xff);
+                break;
+        }
+    }
+
+    throw new Error("Unable to parse JPEG successfully");
+}
+
+
+/**
+ * Portable executable extractor.
+ * Assumes that the offset refers to an MZ header.
+ *
+ * @param {Uint8Array} bytes
+ * @param {number} offset
+ * @returns {Uint8Array}
+ */
+export function extractMZPE(bytes, offset) {
+    const stream = new Stream(bytes.slice(offset));
+
+    // Move to PE header pointer
+    stream.moveTo(0x3c);
+    const peAddress = stream.readInt(4, "le");
+
+    // Move to PE header
+    stream.moveTo(peAddress);
+
+    // Get number of sections
+    stream.moveForwardsBy(6);
+    const numSections = stream.readInt(2, "le");
+
+    // Get optional header size
+    stream.moveForwardsBy(12);
+    const optionalHeaderSize = stream.readInt(2, "le");
+
+    // Move past optional header to section header
+    stream.moveForwardsBy(2 + optionalHeaderSize);
+
+    // Move to final section header
+    stream.moveForwardsBy((numSections - 1) * 0x28);
+
+    // Get raw data info
+    stream.moveForwardsBy(16);
+    const rawDataSize = stream.readInt(4, "le");
+    const rawDataAddress = stream.readInt(4, "le");
+
+    // Move to end of final section
+    stream.moveTo(rawDataAddress + rawDataSize);
+
+    return stream.carve();
+}
+
+
+/**
+ * PDF extractor.
+ *
+ * @param {Uint8Array} bytes
+ * @param {number} offset
+ * @returns {Uint8Array}
+ */
+export function extractPDF(bytes, offset) {
+    const stream = new Stream(bytes.slice(offset));
+
+    // Find end-of-file marker (%%EOF)
+    stream.continueUntil([0x25, 0x25, 0x45, 0x4f, 0x46]);
+    stream.moveForwardsBy(5);
+    stream.consumeIf(0x0d);
+    stream.consumeIf(0x0a);
+
+    return stream.carve();
+}
+
+
+/**
+ * ZIP extractor.
+ *
+ * @param {Uint8Array} bytes
+ * @param {number} offset
+ * @returns {Uint8Array}
+ */
+export function extractZIP(bytes, offset) {
+    const stream = new Stream(bytes.slice(offset));
+
+    // Find End of central directory record
+    stream.continueUntil([0x50, 0x4b, 0x05, 0x06]);
+
+    // Get comment length and consume
+    stream.moveForwardsBy(20);
+    const commentLength = stream.readInt(2, "le");
+    stream.moveForwardsBy(commentLength);
+
+    return stream.carve();
+}

+ 5 - 806
src/core/lib/FileType.mjs

@@ -6,256 +6,7 @@
  * @license Apache-2.0
  * @license Apache-2.0
  *
  *
  */
  */
-import Stream from "./Stream";
-
-/**
- * A categorised table of file types, including signatures to identifying them and functions
- * to extract them where possible.
- */
-const FILE_SIGNATURES = {
-    "Images": [
-        {
-            name: "JPEG Image",
-            extension: "jpg",
-            mime: "image/jpeg",
-            description: "",
-            signature: {
-                0: 0xff,
-                1: 0xd8,
-                2: 0xff
-            },
-            extractor: extractJPEG
-        },
-        {
-            name: "GIF Image",
-            extension: "gif",
-            mime: "image/gif",
-            description: "",
-            signature: {
-                0: 0x47,
-                1: 0x49,
-                2: 0x46
-            },
-            extractor: null
-        },
-        {
-            name: "PNG Image",
-            extension: "png",
-            mime: "image/png",
-            description: "",
-            signature: {
-                0: 0x89,
-                1: 0x50,
-                2: 0x4e,
-                3: 0x47
-            },
-            extractor: null
-        },
-        {
-            name: "WEBP Image",
-            extension: "webp",
-            mime: "image/webp",
-            description: "",
-            signature: {
-                8: 0x57,
-                9: 0x45,
-                10: 0x42,
-                11: 0x50
-            },
-            extractor: null
-        },
-        {
-            name: "TIFF Image",
-            extension: "tif",
-            mime: "image/tiff",
-            description: "",
-            signature: [
-                {
-                    0: 0x49,
-                    1: 0x49,
-                    2: 0x2a,
-                    3: 0x0
-                },
-                {
-                    0: 0x4d,
-                    1: 0x4d,
-                    2: 0x0,
-                    3: 0x2a
-                }
-            ],
-            extractor: null
-        }, /*
-        {
-            name: " Image",
-            extension: "",
-            mime: "image/",
-            description: "",
-            signature: {
-                0: 0x,
-                1: 0x,
-                2: 0x,
-                3: 0x
-            },
-            extractor: null
-        },
-        {
-            name: " Image",
-            extension: "",
-            mime: "image/",
-            description: "",
-            signature: {
-                0: 0x,
-                1: 0x,
-                2: 0x,
-                3: 0x
-            },
-            extractor: null
-        },
-        {
-            name: " Image",
-            extension: "",
-            mime: "image/",
-            description: "",
-            signature: {
-                0: 0x,
-                1: 0x,
-                2: 0x,
-                3: 0x
-            },
-            extractor: null
-        },
-        {
-            name: " Image",
-            extension: "",
-            mime: "image/",
-            description: "",
-            signature: {
-                0: 0x,
-                1: 0x,
-                2: 0x,
-                3: 0x
-            },
-            extractor: null
-        },
-        {
-            name: " Image",
-            extension: "",
-            mime: "image/",
-            description: "",
-            signature: {
-                0: 0x,
-                1: 0x,
-                2: 0x,
-                3: 0x
-            },
-            extractor: null
-        },
-        {
-            name: " Image",
-            extension: "",
-            mime: "image/",
-            description: "",
-            signature: {
-                0: 0x,
-                1: 0x,
-                2: 0x,
-                3: 0x
-            },
-            extractor: null
-        },*/
-    ],
-    "Video": [
-        {
-            name: "WEBM",
-            extension: "webm",
-            mime: "video/webm",
-            description: "",
-            signature: {
-                0: 0x1a,
-                1: 0x45,
-                2: 0xdf,
-                3: 0xa3
-            },
-            extractor: null
-        },
-    ],
-    "Audio": [
-        {
-            name: "WAV",
-            extension: "wav",
-            mime: "audio/x-wav",
-            description: "",
-            signature: {
-                0: 0x52,
-                1: 0x49,
-                2: 0x46,
-                3: 0x46,
-                8: 0x57,
-                9: 0x41,
-                10: 0x56,
-                11: 0x45
-            },
-            extractor: null
-        },
-        {
-            name: "OGG",
-            extension: "ogg",
-            mime: "audio/ogg",
-            description: "",
-            signature: {
-                0: 0x4f,
-                1: 0x67,
-                2: 0x67,
-                3: 0x53
-            },
-            extractor: null
-        },
-    ],
-    "Documents": [
-        {
-            name: "Portable Document Format",
-            extension: "pdf",
-            mime: "application/pdf",
-            description: "",
-            signature: {
-                0: 0x25,
-                1: 0x50,
-                2: 0x44,
-                3: 0x46
-            },
-            extractor: extractPDF
-        },
-    ],
-    "Applications": [
-        {
-            name: "Windows Portable Executable",
-            extension: "exe",
-            mime: "application/x-msdownload",
-            description: "",
-            signature: {
-                0: 0x4d,
-                1: 0x5a
-            },
-            extractor: extractMZPE
-        },
-    ],
-    "Archives": [
-        {
-            name: "ZIP",
-            extension: "zip",
-            mime: "application/zip",
-            description: "",
-            signature: {
-                0: 0x50,
-                1: 0x4b,
-                2: [0x3, 0x5, 0x7],
-                3: [0x4, 0x6, 0x8]
-            },
-            extractor: extractZIP
-        },
-
-    ],
-};
+import {FILE_SIGNATURES} from "./FileSignatures";
 
 
 
 
 /**
 /**
@@ -313,7 +64,8 @@ function bytesMatch(sig, buf) {
  * extension and mime type.
  * extension and mime type.
  *
  *
  * @param {Uint8Array} buf
  * @param {Uint8Array} buf
- * @returns {Object[]} type
+ * @returns {Object[]} types
+ * @returns {string} type.name - Name of file type
  * @returns {string} type.ext - File extension
  * @returns {string} type.ext - File extension
  * @returns {string} type.mime - Mime type
  * @returns {string} type.mime - Mime type
  * @returns {string} [type.desc] - Description
  * @returns {string} [type.desc] - Description
@@ -336,370 +88,6 @@ export function detectFileType(buf) {
         });
         });
     }
     }
     return matchingFiles;
     return matchingFiles;
-
-    // Delete all below this line once implemented in FILE_SIGNATURES above.
-
-
-    /*
-    // needs to be before `tif` check
-    if (((buf[0] === 0x49 && buf[1] === 0x49 && buf[2] === 0x2A && buf[3] === 0x0) || (buf[0] === 0x4D && buf[1] === 0x4D && buf[2] === 0x0 && buf[3] === 0x2A)) && buf[8] === 0x43 && buf[9] === 0x52) {
-        return {
-            ext: "cr2",
-            mime: "image/x-canon-cr2"
-        };
-    }
-
-    if ((buf[0] === 0x49 && buf[1] === 0x49 && buf[2] === 0x2A && buf[3] === 0x0) || (buf[0] === 0x4D && buf[1] === 0x4D && buf[2] === 0x0 && buf[3] === 0x2A)) {
-        return {
-            ext: "tif",
-            mime: "image/tiff"
-        };
-    }
-
-    if (buf[0] === 0x42 && buf[1] === 0x4D) {
-        return {
-            ext: "bmp",
-            mime: "image/bmp"
-        };
-    }
-
-    if (buf[0] === 0x49 && buf[1] === 0x49 && buf[2] === 0xBC) {
-        return {
-            ext: "jxr",
-            mime: "image/vnd.ms-photo"
-        };
-    }
-
-    if (buf[0] === 0x38 && buf[1] === 0x42 && buf[2] === 0x50 && buf[3] === 0x53) {
-        return {
-            ext: "psd",
-            mime: "image/vnd.adobe.photoshop"
-        };
-    }
-
-    // needs to be before `zip` check
-    if (buf[0] === 0x50 && buf[1] === 0x4B && buf[2] === 0x3 && buf[3] === 0x4 && buf[30] === 0x6D && buf[31] === 0x69 && buf[32] === 0x6D && buf[33] === 0x65 && buf[34] === 0x74 && buf[35] === 0x79 && buf[36] === 0x70 && buf[37] === 0x65 && buf[38] === 0x61 && buf[39] === 0x70 && buf[40] === 0x70 && buf[41] === 0x6C && buf[42] === 0x69 && buf[43] === 0x63 && buf[44] === 0x61 && buf[45] === 0x74 && buf[46] === 0x69 && buf[47] === 0x6F && buf[48] === 0x6E && buf[49] === 0x2F && buf[50] === 0x65 && buf[51] === 0x70 && buf[52] === 0x75 && buf[53] === 0x62 && buf[54] === 0x2B && buf[55] === 0x7A && buf[56] === 0x69 && buf[57] === 0x70) {
-        return {
-            ext: "epub",
-            mime: "application/epub+zip"
-        };
-    }
-
-    if (buf[257] === 0x75 && buf[258] === 0x73 && buf[259] === 0x74 && buf[260] === 0x61 && buf[261] === 0x72) {
-        return {
-            ext: "tar",
-            mime: "application/x-tar"
-        };
-    }
-
-    if (buf[0] === 0x52 && buf[1] === 0x61 && buf[2] === 0x72 && buf[3] === 0x21 && buf[4] === 0x1A && buf[5] === 0x7 && (buf[6] === 0x0 || buf[6] === 0x1)) {
-        return {
-            ext: "rar",
-            mime: "application/x-rar-compressed"
-        };
-    }
-
-    if (buf[0] === 0x1F && buf[1] === 0x8B && buf[2] === 0x8) {
-        return {
-            ext: "gz",
-            mime: "application/gzip"
-        };
-    }
-
-    if (buf[0] === 0x42 && buf[1] === 0x5A && buf[2] === 0x68) {
-        return {
-            ext: "bz2",
-            mime: "application/x-bzip2"
-        };
-    }
-
-    if (buf[0] === 0x37 && buf[1] === 0x7A && buf[2] === 0xBC && buf[3] === 0xAF && buf[4] === 0x27 && buf[5] === 0x1C) {
-        return {
-            ext: "7z",
-            mime: "application/x-7z-compressed"
-        };
-    }
-
-    if (buf[0] === 0x78 && buf[1] === 0x01) {
-        return {
-            ext: "dmg, zlib",
-            mime: "application/x-apple-diskimage, application/x-deflate"
-        };
-    }
-
-    if ((buf[0] === 0x0 && buf[1] === 0x0 && buf[2] === 0x0 && (buf[3] === 0x18 || buf[3] === 0x20) && buf[4] === 0x66 && buf[5] === 0x74 && buf[6] === 0x79 && buf[7] === 0x70) || (buf[0] === 0x33 && buf[1] === 0x67 && buf[2] === 0x70 && buf[3] === 0x35) || (buf[0] === 0x0 && buf[1] === 0x0 && buf[2] === 0x0 && buf[3] === 0x1C && buf[4] === 0x66 && buf[5] === 0x74 && buf[6] === 0x79 && buf[7] === 0x70 && buf[8] === 0x6D && buf[9] === 0x70 && buf[10] === 0x34 && buf[11] === 0x32 && buf[16] === 0x6D && buf[17] === 0x70 && buf[18] === 0x34 && buf[19] === 0x31 && buf[20] === 0x6D && buf[21] === 0x70 && buf[22] === 0x34 && buf[23] === 0x32 && buf[24] === 0x69 && buf[25] === 0x73 && buf[26] === 0x6F && buf[27] === 0x6D)) {
-        return {
-            ext: "mp4",
-            mime: "video/mp4"
-        };
-    }
-
-    if ((buf[0] === 0x0 && buf[1] === 0x0 && buf[2] === 0x0 && buf[3] === 0x1C && buf[4] === 0x66 && buf[5] === 0x74 && buf[6] === 0x79 && buf[7] === 0x70 && buf[8] === 0x4D && buf[9] === 0x34 && buf[10] === 0x56)) {
-        return {
-            ext: "m4v",
-            mime: "video/x-m4v"
-        };
-    }
-
-    if (buf[0] === 0x4D && buf[1] === 0x54 && buf[2] === 0x68 && buf[3] === 0x64) {
-        return {
-            ext: "mid",
-            mime: "audio/midi"
-        };
-    }
-
-    // needs to be before the `webm` check
-    if (buf[31] === 0x6D && buf[32] === 0x61 && buf[33] === 0x74 && buf[34] === 0x72 && buf[35] === 0x6f && buf[36] === 0x73 && buf[37] === 0x6B && buf[38] === 0x61) {
-        return {
-            ext: "mkv",
-            mime: "video/x-matroska"
-        };
-    }
-
-    if (buf[0] === 0x0 && buf[1] === 0x0 && buf[2] === 0x0 && buf[3] === 0x14 && buf[4] === 0x66 && buf[5] === 0x74 && buf[6] === 0x79 && buf[7] === 0x70) {
-        return {
-            ext: "mov",
-            mime: "video/quicktime"
-        };
-    }
-
-    if (buf[0] === 0x52 && buf[1] === 0x49 && buf[2] === 0x46 && buf[3] === 0x46 && buf[8] === 0x41 && buf[9] === 0x56 && buf[10] === 0x49) {
-        return {
-            ext: "avi",
-            mime: "video/x-msvideo"
-        };
-    }
-
-    if (buf[0] === 0x30 && buf[1] === 0x26 && buf[2] === 0xB2 && buf[3] === 0x75 && buf[4] === 0x8E && buf[5] === 0x66 && buf[6] === 0xCF && buf[7] === 0x11 && buf[8] === 0xA6 && buf[9] === 0xD9) {
-        return {
-            ext: "wmv",
-            mime: "video/x-ms-wmv"
-        };
-    }
-
-    if (buf[0] === 0x0 && buf[1] === 0x0 && buf[2] === 0x1 && buf[3].toString(16)[0] === "b") {
-        return {
-            ext: "mpg",
-            mime: "video/mpeg"
-        };
-    }
-
-    if ((buf[0] === 0x49 && buf[1] === 0x44 && buf[2] === 0x33) || (buf[0] === 0xFF && buf[1] === 0xfb)) {
-        return {
-            ext: "mp3",
-            mime: "audio/mpeg"
-        };
-    }
-
-    if ((buf[4] === 0x66 && buf[5] === 0x74 && buf[6] === 0x79 && buf[7] === 0x70 && buf[8] === 0x4D && buf[9] === 0x34 && buf[10] === 0x41) || (buf[0] === 0x4D && buf[1] === 0x34 && buf[2] === 0x41 && buf[3] === 0x20)) {
-        return {
-            ext: "m4a",
-            mime: "audio/m4a"
-        };
-    }
-
-    if (buf[0] === 0x66 && buf[1] === 0x4C && buf[2] === 0x61 && buf[3] === 0x43) {
-        return {
-            ext: "flac",
-            mime: "audio/x-flac"
-        };
-    }
-
-    if (buf[0] === 0x23 && buf[1] === 0x21 && buf[2] === 0x41 && buf[3] === 0x4D && buf[4] === 0x52 && buf[5] === 0x0A) {
-        return {
-            ext: "amr",
-            mime: "audio/amr"
-        };
-    }
-
-    if ((buf[0] === 0x43 || buf[0] === 0x46) && buf[1] === 0x57 && buf[2] === 0x53) {
-        return {
-            ext: "swf",
-            mime: "application/x-shockwave-flash"
-        };
-    }
-
-    if (buf[0] === 0x7B && buf[1] === 0x5C && buf[2] === 0x72 && buf[3] === 0x74 && buf[4] === 0x66) {
-        return {
-            ext: "rtf",
-            mime: "application/rtf"
-        };
-    }
-
-    if (buf[0] === 0x77 && buf[1] === 0x4F && buf[2] === 0x46 && buf[3] === 0x46 && buf[4] === 0x00 && buf[5] === 0x01 && buf[6] === 0x00 && buf[7] === 0x00) {
-        return {
-            ext: "woff",
-            mime: "application/font-woff"
-        };
-    }
-
-    if (buf[0] === 0x77 && buf[1] === 0x4F && buf[2] === 0x46 && buf[3] === 0x32 && buf[4] === 0x00 && buf[5] === 0x01 && buf[6] === 0x00 && buf[7] === 0x00) {
-        return {
-            ext: "woff2",
-            mime: "application/font-woff"
-        };
-    }
-
-    if (buf[34] === 0x4C && buf[35] === 0x50 && ((buf[8] === 0x02 && buf[9] === 0x00 && buf[10] === 0x01) || (buf[8] === 0x01 && buf[9] === 0x00 && buf[10] === 0x00) || (buf[8] === 0x02 && buf[9] === 0x00 && buf[10] === 0x02))) {
-        return {
-            ext: "eot",
-            mime: "application/octet-stream"
-        };
-    }
-
-    if (buf[0] === 0x00 && buf[1] === 0x01 && buf[2] === 0x00 && buf[3] === 0x00 && buf[4] === 0x00) {
-        return {
-            ext: "ttf",
-            mime: "application/font-sfnt"
-        };
-    }
-
-    if (buf[0] === 0x4F && buf[1] === 0x54 && buf[2] === 0x54 && buf[3] === 0x4F && buf[4] === 0x00) {
-        return {
-            ext: "otf",
-            mime: "application/font-sfnt"
-        };
-    }
-
-    if (buf[0] === 0x00 && buf[1] === 0x00 && buf[2] === 0x01 && buf[3] === 0x00) {
-        return {
-            ext: "ico",
-            mime: "image/x-icon"
-        };
-    }
-
-    if (buf[0] === 0x46 && buf[1] === 0x4C && buf[2] === 0x56 && buf[3] === 0x01) {
-        return {
-            ext: "flv",
-            mime: "video/x-flv"
-        };
-    }
-
-    if (buf[0] === 0x25 && buf[1] === 0x21) {
-        return {
-            ext: "ps",
-            mime: "application/postscript"
-        };
-    }
-
-    if (buf[0] === 0xFD && buf[1] === 0x37 && buf[2] === 0x7A && buf[3] === 0x58 && buf[4] === 0x5A && buf[5] === 0x00) {
-        return {
-            ext: "xz",
-            mime: "application/x-xz"
-        };
-    }
-
-    if (buf[0] === 0x53 && buf[1] === 0x51 && buf[2] === 0x4C && buf[3] === 0x69) {
-        return {
-            ext: "sqlite",
-            mime: "application/x-sqlite3"
-        };
-    }
-    */
-
-    /**
-     *
-     * Added by n1474335 [n1474335@gmail.com] from here on
-     *
-     */
-    /*
-    if ((buf[0] === 0x1F && buf[1] === 0x9D) || (buf[0] === 0x1F && buf[1] === 0xA0)) {
-        return {
-            ext: "z, tar.z",
-            mime: "application/x-gtar"
-        };
-    }
-
-    if (buf[0] === 0x7F && buf[1] === 0x45 && buf[2] === 0x4C && buf[3] === 0x46) {
-        return {
-            ext: "none, axf, bin, elf, o, prx, puff, so",
-            mime: "application/x-executable",
-            desc: "Executable and Linkable Format file. No standard file extension."
-        };
-    }
-
-    if (buf[0] === 0xCA && buf[1] === 0xFE && buf[2] === 0xBA && buf[3] === 0xBE) {
-        return {
-            ext: "class",
-            mime: "application/java-vm"
-        };
-    }
-
-    if (buf[0] === 0xEF && buf[1] === 0xBB && buf[2] === 0xBF) {
-        return {
-            ext: "txt",
-            mime: "text/plain",
-            desc: "UTF-8 encoded Unicode byte order mark detected, commonly but not exclusively seen in text files."
-        };
-    }
-
-    // Must be before Little-endian UTF-16 BOM
-    if (buf[0] === 0xFF && buf[1] === 0xFE && buf[2] === 0x00 && buf[3] === 0x00) {
-        return {
-            ext: "UTF32LE",
-            mime: "charset/utf32le",
-            desc: "Little-endian UTF-32 encoded Unicode byte order mark detected."
-        };
-    }
-
-    if (buf[0] === 0xFF && buf[1] === 0xFE) {
-        return {
-            ext: "UTF16LE",
-            mime: "charset/utf16le",
-            desc: "Little-endian UTF-16 encoded Unicode byte order mark detected."
-        };
-    }
-
-    if ((buf[0x8001] === 0x43 && buf[0x8002] === 0x44 && buf[0x8003] === 0x30 && buf[0x8004] === 0x30 && buf[0x8005] === 0x31) ||
-        (buf[0x8801] === 0x43 && buf[0x8802] === 0x44 && buf[0x8803] === 0x30 && buf[0x8804] === 0x30 && buf[0x8805] === 0x31) ||
-        (buf[0x9001] === 0x43 && buf[0x9002] === 0x44 && buf[0x9003] === 0x30 && buf[0x9004] === 0x30 && buf[0x9005] === 0x31)) {
-        return {
-            ext: "iso",
-            mime: "application/octet-stream",
-            desc: "ISO 9660 CD/DVD image file"
-        };
-    }
-
-    if (buf[0] === 0xD0 && buf[1] === 0xCF && buf[2] === 0x11 && buf[3] === 0xE0 && buf[4] === 0xA1 && buf[5] === 0xB1 && buf[6] === 0x1A && buf[7] === 0xE1) {
-        return {
-            ext: "doc, xls, ppt",
-            mime: "application/msword, application/vnd.ms-excel, application/vnd.ms-powerpoint",
-            desc: "Microsoft Office documents"
-        };
-    }
-
-    if (buf[0] === 0x64 && buf[1] === 0x65 && buf[2] === 0x78 && buf[3] === 0x0A && buf[4] === 0x30 && buf[5] === 0x33 && buf[6] === 0x35 && buf[7] === 0x00) {
-        return {
-            ext: "dex",
-            mime: "application/octet-stream",
-            desc: "Dalvik Executable (Android)"
-        };
-    }
-
-    if (buf[0] === 0x4B && buf[1] === 0x44 && buf[2] === 0x4D) {
-        return {
-            ext: "vmdk",
-            mime: "application/vmdk, application/x-virtualbox-vmdk"
-        };
-    }
-
-    if (buf[0] === 0x43 && buf[1] === 0x72 && buf[2] === 0x32 && buf[3] === 0x34) {
-        return {
-            ext: "crx",
-            mime: "application/crx",
-            desc: "Google Chrome extension or packaged app"
-        };
-    }
-
-    if (buf[0] === 0x78 && (buf[1] === 0x01 || buf[1] === 0x9C || buf[1] === 0xDA || buf[1] === 0x5e)) {
-        return {
-            ext: "zlib",
-            mime: "application/x-deflate"
-        };
-    }
-
-    return null;
-    */
 }
 }
 
 
 
 
@@ -750,198 +138,9 @@ export function isImage(buf) {
 export function extractFile(bytes, fileDetail, offset) {
 export function extractFile(bytes, fileDetail, offset) {
     if (fileDetail.extractor) {
     if (fileDetail.extractor) {
         const fileData = fileDetail.extractor(bytes, offset);
         const fileData = fileDetail.extractor(bytes, offset);
-        return new File([fileData], `extracted_at_0x${offset.toString(16)}.${fileDetail.extension}`);
+        const ext = fileDetail.extension.split(",")[0];
+        return new File([fileData], `extracted_at_0x${offset.toString(16)}.${ext}`);
     }
     }
 
 
     throw new Error(`No extraction algorithm available for "${fileDetail.mime}" files`);
     throw new Error(`No extraction algorithm available for "${fileDetail.mime}" files`);
 }
 }
-
-
-/**
- * JPEG extractor.
- *
- * @param {Uint8Array} bytes
- * @param {number} offset
- * @returns {Uint8Array}
- */
-export function extractJPEG(bytes, offset) {
-    const stream = new Stream(bytes.slice(offset));
-
-    while (stream.hasMore()) {
-        const marker = stream.getBytes(2);
-        if (marker[0] !== 0xff) throw new Error("Invalid JPEG marker: " + marker);
-
-        let segmentSize = 0;
-        switch (marker[1]) {
-            // No length
-            case 0xd8: // Start of Image
-            case 0x01: // For temporary use in arithmetic coding
-                break;
-            case 0xd9: // End found
-                return stream.carve();
-
-            // Variable size segment
-            case 0xc0: // Start of frame (Baseline DCT)
-            case 0xc1: // Start of frame (Extended sequential DCT)
-            case 0xc2: // Start of frame (Progressive DCT)
-            case 0xc3: // Start of frame (Lossless sequential)
-            case 0xc4: // Define Huffman Table
-            case 0xc5: // Start of frame (Differential sequential DCT)
-            case 0xc6: // Start of frame (Differential progressive DCT)
-            case 0xc7: // Start of frame (Differential lossless)
-            case 0xc8: // Reserved for JPEG extensions
-            case 0xc9: // Start of frame (Extended sequential DCT)
-            case 0xca: // Start of frame (Progressive DCT)
-            case 0xcb: // Start of frame (Lossless sequential)
-            case 0xcc: // Define arithmetic conditioning table
-            case 0xcd: // Start of frame (Differential sequential DCT)
-            case 0xce: // Start of frame (Differential progressive DCT)
-            case 0xcf: // Start of frame (Differential lossless)
-            case 0xdb: // Define Quantization Table
-            case 0xde: // Define hierarchical progression
-            case 0xe0: // Application-specific
-            case 0xe1: // Application-specific
-            case 0xe2: // Application-specific
-            case 0xe3: // Application-specific
-            case 0xe4: // Application-specific
-            case 0xe5: // Application-specific
-            case 0xe6: // Application-specific
-            case 0xe7: // Application-specific
-            case 0xe8: // Application-specific
-            case 0xe9: // Application-specific
-            case 0xea: // Application-specific
-            case 0xeb: // Application-specific
-            case 0xec: // Application-specific
-            case 0xed: // Application-specific
-            case 0xee: // Application-specific
-            case 0xef: // Application-specific
-            case 0xfe: // Comment
-                segmentSize = stream.readInt(2, "be");
-                stream.position += segmentSize - 2;
-                break;
-
-            // 1 byte
-            case 0xdf: // Expand reference image
-                stream.position++;
-                break;
-
-            // 2 bytes
-            case 0xdc: // Define number of lines
-            case 0xdd: // Define restart interval
-                stream.position += 2;
-                break;
-
-            // Start scan
-            case 0xda: // Start of scan
-                segmentSize = stream.readInt(2, "be");
-                stream.position += segmentSize - 2;
-                stream.continueUntil(0xff);
-                break;
-
-            // Continue through encoded data
-            case 0x00: // Byte stuffing
-            case 0xd0: // Restart
-            case 0xd1: // Restart
-            case 0xd2: // Restart
-            case 0xd3: // Restart
-            case 0xd4: // Restart
-            case 0xd5: // Restart
-            case 0xd6: // Restart
-            case 0xd7: // Restart
-                stream.continueUntil(0xff);
-                break;
-
-            default:
-                stream.continueUntil(0xff);
-                break;
-        }
-    }
-
-    throw new Error("Unable to parse JPEG successfully");
-}
-
-
-/**
- * Portable executable extractor.
- * Assumes that the offset refers to an MZ header.
- *
- * @param {Uint8Array} bytes
- * @param {number} offset
- * @returns {Uint8Array}
- */
-export function extractMZPE(bytes, offset) {
-    const stream = new Stream(bytes.slice(offset));
-
-    // Move to PE header pointer
-    stream.moveTo(0x3c);
-    const peAddress = stream.readInt(4, "le");
-
-    // Move to PE header
-    stream.moveTo(peAddress);
-
-    // Get number of sections
-    stream.moveForwardsBy(6);
-    const numSections = stream.readInt(2, "le");
-
-    // Get optional header size
-    stream.moveForwardsBy(12);
-    const optionalHeaderSize = stream.readInt(2, "le");
-
-    // Move past optional header to section header
-    stream.moveForwardsBy(2 + optionalHeaderSize);
-
-    // Move to final section header
-    stream.moveForwardsBy((numSections - 1) * 0x28);
-
-    // Get raw data info
-    stream.moveForwardsBy(16);
-    const rawDataSize = stream.readInt(4, "le");
-    const rawDataAddress = stream.readInt(4, "le");
-
-    // Move to end of final section
-    stream.moveTo(rawDataAddress + rawDataSize);
-
-    return stream.carve();
-}
-
-
-/**
- * PDF extractor.
- *
- * @param {Uint8Array} bytes
- * @param {number} offset
- * @returns {Uint8Array}
- */
-export function extractPDF(bytes, offset) {
-    const stream = new Stream(bytes.slice(offset));
-
-    // Find end-of-file marker (%%EOF)
-    stream.continueUntil([0x25, 0x25, 0x45, 0x4f, 0x46]);
-    stream.moveForwardsBy(5);
-    stream.consumeIf(0x0d);
-    stream.consumeIf(0x0a);
-
-    return stream.carve();
-}
-
-
-/**
- * ZIP extractor.
- *
- * @param {Uint8Array} bytes
- * @param {number} offset
- * @returns {Uint8Array}
- */
-export function extractZIP(bytes, offset) {
-    const stream = new Stream(bytes.slice(offset));
-
-    // Find End of central directory record
-    stream.continueUntil([0x50, 0x4b, 0x05, 0x06]);
-
-    // Get comment length and consume
-    stream.moveForwardsBy(20);
-    const commentLength = stream.readInt(2, "le");
-    stream.moveForwardsBy(commentLength);
-
-    return stream.carve();
-}