Use gzip to compress files uploaded to swift

We've discovered that rackspace swift seems to always want to gzip encode files when clients request their contents. When our files are deflate encoded this results in files that are first deflate encoded then gzip encoded. Not all browers or layer 7 firewalls can handle this (despite being perfectly valid according to the HTTP RFCs). We'll use gzip to see if that causes rackspace to not double encode the files. To do this with memory efficienty we vendor a tool from pypi called gzip-stream which allows us to read chunks of the compressed data at a time without loading the entire file into memory or writing multiple gzip headers in a single file. Change-Id: I9483cfdbd8e7d0683eeb24d28dd6d8b0c0e772fa
2019-10-11 08:46:20 -07:00 · 2019-10-11 08:46:20 -07:00 · 7755ef1b8d
commit 7755ef1b8d
parent 61c05a180f
1 changed files with 113 additions and 2 deletions
--- a/roles/upload-logs-swift/library/zuul_swift_upload.py
+++ b/roles/upload-logs-swift/library/zuul_swift_upload.py
@ -25,6 +25,8 @@ Utility to upload files to swift
 """
 import argparse
 import gzip
 import io
 import logging
 import mimetypes
 import os
@ -131,6 +133,95 @@ ICON_IMAGES = {
                   'AupSdoFsAAAAAElFTkSuQmCC'}
 # Begin vendored code
 # This code is licensed under the Public Domain/CC0 and comes from
 # https://github.com/leenr/gzip-stream/blob/master/gzip_stream.py
 # Code was modified:
 #   removed type annotations to support python2.
 #   removed use of *, somearg for positional anonymous args.
 #   Default compression level to 9.
 class GZIPCompressedStream(io.RawIOBase):
    def __init__(self, stream, compression_level=9):
        assert 1 <= compression_level <= 9
        self._compression_level = compression_level
        self._stream = stream
        self._compressed_stream = io.BytesIO()
        self._compressor = gzip.GzipFile(
            mode='wb',
            fileobj=self._compressed_stream,
            compresslevel=compression_level
        )
        # because of the GZIP header written by `GzipFile.__init__`:
        self._compressed_stream.seek(0)
    @property
    def compression_level(self):
        return self._compression_level
    @property
    def stream(self):
        return self._stream
    def readable(self):
        return True
    def _read_compressed_into(self, b):
        buf = self._compressed_stream.read(len(b))
        b[:len(buf)] = buf
        return len(buf)
    def readinto(self, b):
        b = memoryview(b)
        offset = 0
        size = len(b)
        while offset < size:
            offset += self._read_compressed_into(b[offset:])
            if offset < size:
                # self._compressed_buffer now empty
                if self._compressor.closed:
                    # nothing to compress anymore
                    break
                # compress next bytes
                self._read_n_compress(size)
        return offset
    def _read_n_compress(self, size):
        assert size > 0
        data = self._stream.read(size)
        # rewind buffer to the start to free up memory
        # (because anything currently in the buffer should be already
        #  streamed off the object)
        self._compressed_stream.seek(0)
        self._compressed_stream.truncate(0)
        if data:
            self._compressor.write(data)
        else:
            # this will write final data (will flush zlib with Z_FINISH)
            self._compressor.close()
        # rewind to the buffer start
        self._compressed_stream.seek(0)
    def __repr__(self):
        return (
            '{self.__class__.__name__}('
            '{self.stream!r}, '
            'compression_level={self.compression_level!r}'
            ')'
        ).format(self=self)
 # End vendored code
 def get_mime_icon(mime, filename=''):
    icon = (APACHE_FILE_ICON_MAP.get(filename) or
            APACHE_MIME_ICON_MAP.get(mime) or
@ -463,6 +554,26 @@ class Indexer():
        self.file_list.file_list = new_list
 class GzipFilter():
    chunk_size = 16384
    def __init__(self, infile):
        self.gzipfile = GZIPCompressedStream(infile)
        self.done = False
    def __iter__(self):
        return self
    def __next__(self):
        if self.done:
            self.gzipfile.close()
            raise StopIteration()
        data = self.gzipfile.read(self.chunk_size)
        if not data:
            self.done = True
        return data
 class DeflateFilter():
    chunk_size = 16384
@ -622,8 +733,8 @@ class Uploader():
        if not file_detail.folder:
            if (file_detail.encoding is None and
                self._is_text_type(file_detail.mimetype)):
-                headers['content-encoding'] = 'deflate'
+                headers['content-encoding'] = 'gzip'
-                data = DeflateFilter(open(file_detail.full_path, 'rb'))
+                data = GzipFilter(open(file_detail.full_path, 'rb'))
            else:
                if file_detail.encoding:
                    headers['content-encoding'] = file_detail.encoding