Browse Source

Began implementing GZIP/DEFLATE extraction. Unfinished.

n1474335 6 years ago
parent
commit
2a6db47aeb
2 changed files with 167 additions and 7 deletions
  1. 104 5
      src/core/lib/FileSignatures.mjs
  2. 63 2
      src/core/lib/Stream.mjs

+ 104 - 5
src/core/lib/FileSignatures.mjs

@@ -650,7 +650,7 @@ export const FILE_SIGNATURES = {
                 56: 0x69,
                 57: 0x70
             },
-            extractor: null
+            extractor: extractZIP
         },
     ],
     "Applications": [
@@ -790,7 +790,7 @@ export const FILE_SIGNATURES = {
                 1: 0x8b,
                 2: 0x8
             },
-            extractor: null
+            extractor: extractGZIP
         },
         {
             name: "Bzip2",
@@ -1309,7 +1309,7 @@ export function extractFLV(bytes, offset) {
     let tagSize = -11; // Fake size of previous tag header
     while (stream.hasMore()) {
         const prevTagSize = stream.readInt(4, "be");
-        const tagType = stream.readInt(1, "be");
+        const tagType = stream.readInt(1);
 
         if ([8, 9, 18].indexOf(tagType) < 0) {
             // This tag is not valid
@@ -1346,14 +1346,14 @@ export function extractRTF(bytes, offset) {
 
     let openTags = 0;
 
-    if (stream.readInt(1, "be") !== 0x7b) { // {
+    if (stream.readInt(1) !== 0x7b) { // {
         throw new Error("Not a valid RTF file");
     } else {
         openTags++;
     }
 
     while (openTags > 0 && stream.hasMore()) {
-        switch (stream.readInt(1, "be")) {
+        switch (stream.readInt(1)) {
             case 0x7b: // {
                 openTags++;
                 break;
@@ -1372,3 +1372,102 @@ export function extractRTF(bytes, offset) {
 
     return stream.carve();
 }
+
+
+/**
+ * GZIP extractor.
+ *
+ * @param {Uint8Array} bytes
+ * @param {number} offset
+ * @returns {Uint8Array}
+ */
+export function extractGZIP(bytes, offset) {
+    const stream = new Stream(bytes.slice(offset));
+
+    /* HEADER */
+
+    // Skip over signature and compression method
+    stream.moveForwardsBy(3);
+
+    // Read flags
+    const flags = stream.readInt(1);
+
+    // Skip over last modification time
+    stream.moveForwardsBy(4);
+
+    // Read compression flags
+    const compressionFlags = stream.readInt(1);
+
+    // Skip over OS
+    stream.moveForwardsBy(1);
+
+
+    /* OPTIONAL HEADERS */
+
+    // Extra fields
+    if (flags & 0x4) {
+        console.log("Extra fields");
+        const extraFieldsSize = stream.readInt(2, "le");
+        stream.moveForwardsby(extraFieldsSize);
+    }
+
+    // Original filename
+    if (flags & 0x8) {
+        console.log("Filename");
+        stream.continueUntil(0x00);
+        stream.moveForwardsBy(1);
+    }
+
+    // Comment
+    if (flags & 0x10) {
+        console.log("Comment");
+        stream.continueUntil(0x00);
+        stream.moveForwardsBy(1);
+    }
+
+    // Checksum
+    if (flags & 0x2) {
+        console.log("Checksum");
+        stream.moveForwardsBy(2);
+    }
+
+
+    /* DEFLATE DATA */
+
+    let finalBlock = 0;
+
+    while (!finalBlock) {
+        // Read header
+        const blockHeader = stream.readBits(3);
+
+        finalBlock = blockHeader & 0x1;
+        const blockType = blockHeader & 0x6;
+
+        if (blockType === 0) {
+            // No compression
+            stream.moveForwardsBy(1);
+            const blockLength = stream.readInt(2, "le");
+            console.log("No compression. Length: " + blockLength);
+            stream.moveForwardsBy(2 + blockLength);
+        } else if (blockType === 1) {
+            // Fixed Huffman
+
+        } else if (blockType === 2) {
+            // Dynamic Huffman
+
+        } else {
+            throw new Error("Invalid block type");
+            break;
+        }
+    }
+
+
+    /* FOOTER */
+
+    // Skip over checksum and size of original uncompressed input
+    stream.moveForwardsBy(8);
+
+    console.log(stream.position);
+
+    return stream.carve();
+}

+ 63 - 2
src/core/lib/Stream.mjs

@@ -21,8 +21,9 @@ export default class Stream {
      */
     constructor(input) {
         this.bytes = input;
-        this.position = 0;
         this.length = this.bytes.length;
+        this.position = 0;
+        this.bitPos = 0;
     }
 
     /**
@@ -37,6 +38,7 @@ export default class Stream {
         const newPosition = this.position + numBytes;
         const bytes = this.bytes.slice(this.position, newPosition);
         this.position = newPosition;
+        this.bitPos = 0;
         return bytes;
     }
 
@@ -57,6 +59,7 @@ export default class Stream {
             result += String.fromCharCode(currentByte);
         }
         this.position += numBytes;
+        this.bitPos = 0;
         return result;
     }
 
@@ -83,9 +86,59 @@ export default class Stream {
             }
         }
         this.position += numBytes;
+        this.bitPos = 0;
         return val;
     }
 
+
+    /**
+     * Reads a number of bits from the buffer.
+     *
+     * @TODO Add endianness
+     *
+     * @param {number} numBits
+     * @returns {number}
+     */
+    readBits(numBits) {
+        if (this.position > this.length) return undefined;
+
+        let bitBuf = 0,
+            bitBufLen = 0;
+
+        // Add remaining bits from current byte
+        bitBuf = this.bytes[this.position++] & bitMask(this.bitPos);
+        bitBufLen = 8 - this.bitPos;
+        this.bitPos = 0;
+
+        // Not enough bits yet
+        while (bitBufLen < numBits) {
+            bitBuf |= this.bytes[this.position++] << bitBufLen;
+            bitBufLen += 8;
+        }
+
+        // Reverse back to numBits
+        if (bitBufLen > numBits) {
+            const excess = bitBufLen - numBits;
+            bitBuf >>>= excess;
+            bitBufLen -= excess;
+            this.position--;
+            this.bitPos = 8 - excess;
+        }
+
+        return bitBuf;
+
+        /**
+         * Calculates the bit mask based on the current bit position.
+         *
+         * @param {number} bitPos
+         * @returns {number} The bit mask
+         */
+        function bitMask(bitPos) {
+            return (1 << (8 - bitPos)) - 1;
+        }
+    }
+
+
     /**
      * Consume the stream until we reach the specified byte or sequence of bytes.
      *
@@ -94,6 +147,8 @@ export default class Stream {
     continueUntil(val) {
         if (this.position > this.length) return;
 
+        this.bitPos = 0;
+
         if (typeof val === "number") {
             while (++this.position < this.length && this.bytes[this.position] !== val) {
                 continue;
@@ -121,8 +176,10 @@ export default class Stream {
      * @param {number} val
      */
     consumeIf(val) {
-        if (this.bytes[this.position] === val)
+        if (this.bytes[this.position] === val) {
             this.position++;
+            this.bitPos = 0;
+        }
     }
 
     /**
@@ -135,6 +192,7 @@ export default class Stream {
         if (pos < 0 || pos > this.length)
             throw new Error("Cannot move to position " + pos + " in stream. Out of bounds.");
         this.position = pos;
+        this.bitPos = 0;
     }
 
 
@@ -148,6 +206,7 @@ export default class Stream {
         if (pos < 0 || pos > this.length)
             throw new Error("Cannot move to position " + pos + " in stream. Out of bounds.");
         this.position = pos;
+        this.bitPos = 0;
     }
 
     /**
@@ -159,6 +218,7 @@ export default class Stream {
         if (pos < 0 || pos > this.length)
             throw new Error("Cannot move to position " + pos + " in stream. Out of bounds.");
         this.position = pos;
+        this.bitPos = 0;
     }
 
     /**
@@ -176,6 +236,7 @@ export default class Stream {
      * @returns {Uint8Array}
      */
     carve() {
+        if (this.bitPos > 0) this.position++;
         return this.bytes.slice(0, this.position);
     }