Переглянути джерело

LibCompress: Implement GZip compression

This commit implements a stream compressor for the gzip
specification (RFC 1952), which is essentially a thin
wrapper around the DEFLATE compression format.
Idan Horowitz 4 роки тому
батько
коміт
135751c3a2

+ 57 - 2
Userland/Libraries/LibCompress/Gzip.cpp

@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2020, the SerenityOS developers.
+ * Copyright (c) 2021, Idan Horowitz <idan.horowitz@gmail.com>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -36,12 +37,12 @@ bool GzipDecompressor::is_likely_compressed(ReadonlyBytes bytes)
     return bytes.size() >= 2 && bytes[0] == gzip_magic_1 && bytes[1] == gzip_magic_2;
 }
 
-bool GzipDecompressor::BlockHeader::valid_magic_number() const
+bool BlockHeader::valid_magic_number() const
 {
     return identification_1 == gzip_magic_1 && identification_2 == gzip_magic_2;
 }
 
-bool GzipDecompressor::BlockHeader::supported_by_implementation() const
+bool BlockHeader::supported_by_implementation() const
 {
     if (compression_method != 0x08) {
         // RFC 1952 does not define any compression methods other than deflate.
@@ -187,4 +188,58 @@ Optional<ByteBuffer> GzipDecompressor::decompress_all(ReadonlyBytes bytes)
 
 bool GzipDecompressor::unreliable_eof() const { return m_eof; }
 
+GzipCompressor::GzipCompressor(OutputStream& stream)
+    : m_output_stream(stream)
+{
+}
+
+GzipCompressor::~GzipCompressor()
+{
+}
+
+size_t GzipCompressor::write(ReadonlyBytes bytes)
+{
+    BlockHeader header;
+    header.identification_1 = 0x1f;
+    header.identification_2 = 0x8b;
+    header.compression_method = 0x08;
+    header.flags = 0;
+    header.modification_time = 0;
+    header.extra_flags = 3;      // DEFLATE sets 2 for maximum compression and 4 for minimum compression
+    header.operating_system = 3; // unix
+    m_output_stream << Bytes { &header, sizeof(header) };
+    DeflateCompressor compressed_stream { m_output_stream };
+    VERIFY(compressed_stream.write_or_error(bytes));
+    compressed_stream.final_flush();
+    Crypto::Checksum::CRC32 crc32;
+    crc32.update(bytes);
+    LittleEndian<u32> digest = crc32.digest();
+    LittleEndian<u32> size = bytes.size();
+    m_output_stream << digest << size;
+    return bytes.size();
+}
+
+bool GzipCompressor::write_or_error(ReadonlyBytes bytes)
+{
+    if (write(bytes) < bytes.size()) {
+        set_fatal_error();
+        return false;
+    }
+
+    return true;
+}
+
+Optional<ByteBuffer> GzipCompressor::compress_all(const ReadonlyBytes& bytes)
+{
+    DuplexMemoryStream output_stream;
+    GzipCompressor gzip_stream { output_stream };
+
+    gzip_stream.write_or_error(bytes);
+
+    if (gzip_stream.handle_any_error())
+        return {};
+
+    return output_stream.copy_into_contiguous_buffer();
+}
+
 }

+ 37 - 23
Userland/Libraries/LibCompress/Gzip.h

@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2020, the SerenityOS developers.
+ * Copyright (c) 2021, Idan Horowitz <idan.horowitz@gmail.com>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -33,6 +34,28 @@ namespace Compress {
 
 constexpr u8 gzip_magic_1 = 0x1f;
 constexpr u8 gzip_magic_2 = 0x8b;
+struct [[gnu::packed]] BlockHeader {
+    u8 identification_1;
+    u8 identification_2;
+    u8 compression_method;
+    u8 flags;
+    LittleEndian<u32> modification_time;
+    u8 extra_flags;
+    u8 operating_system;
+
+    bool valid_magic_number() const;
+    bool supported_by_implementation() const;
+};
+
+struct Flags {
+    static constexpr u8 FTEXT = 1 << 0;
+    static constexpr u8 FHCRC = 1 << 1;
+    static constexpr u8 FEXTRA = 1 << 2;
+    static constexpr u8 FNAME = 1 << 3;
+    static constexpr u8 FCOMMENT = 1 << 4;
+
+    static constexpr u8 MAX = FTEXT | FHCRC | FEXTRA | FNAME | FCOMMENT;
+};
 
 class GzipDecompressor final : public InputStream {
 public:
@@ -49,29 +72,6 @@ public:
     static bool is_likely_compressed(ReadonlyBytes bytes);
 
 private:
-    struct [[gnu::packed]] BlockHeader {
-        u8 identification_1;
-        u8 identification_2;
-        u8 compression_method;
-        u8 flags;
-        LittleEndian<u32> modification_time;
-        u8 extra_flags;
-        u8 operating_system;
-
-        bool valid_magic_number() const;
-        bool supported_by_implementation() const;
-    };
-
-    struct Flags {
-        static constexpr u8 FTEXT = 1 << 0;
-        static constexpr u8 FHCRC = 1 << 1;
-        static constexpr u8 FEXTRA = 1 << 2;
-        static constexpr u8 FNAME = 1 << 3;
-        static constexpr u8 FCOMMENT = 1 << 4;
-
-        static constexpr u8 MAX = FTEXT | FHCRC | FEXTRA | FNAME | FCOMMENT;
-    };
-
     class Member {
     public:
         Member(BlockHeader header, InputStream& stream)
@@ -95,4 +95,18 @@ private:
     bool m_eof { false };
 };
 
+class GzipCompressor final : public OutputStream {
+public:
+    GzipCompressor(OutputStream&);
+    ~GzipCompressor();
+
+    size_t write(ReadonlyBytes) override;
+    bool write_or_error(ReadonlyBytes) override;
+
+    static Optional<ByteBuffer> compress_all(const ReadonlyBytes& bytes);
+
+private:
+    OutputStream& m_output_stream;
+};
+
 }