diff --git a/swift/obj/diskfile.py b/swift/obj/diskfile.py index fe5fc73804..fda7a96142 100644 --- a/swift/obj/diskfile.py +++ b/swift/obj/diskfile.py @@ -114,6 +114,20 @@ def _get_filename(fd): return fd +def _encode_metadata(metadata): + """ + UTF8 encode any unicode keys or values in given metadata dict. + + :param metadata: a dict + """ + def encode_str(item): + if isinstance(item, six.text_type): + return item.encode('utf8') + return item + + return dict(((encode_str(k), encode_str(v)) for k, v in metadata.items())) + + def read_metadata(fd): """ Helper function to read the pickled metadata from an object file. @@ -140,7 +154,10 @@ def read_metadata(fd): raise DiskFileNotExist() # TODO: we might want to re-raise errors that don't denote a missing # xattr here. Seems to be ENODATA on linux and ENOATTR on BSD/OSX. - return pickle.loads(metadata) + # strings are utf-8 encoded when written, but have not always been + # (see https://bugs.launchpad.net/swift/+bug/1678018) so encode them again + # when read + return _encode_metadata(pickle.loads(metadata)) def write_metadata(fd, metadata, xattr_size=65536): @@ -150,7 +167,7 @@ def write_metadata(fd, metadata, xattr_size=65536): :param fd: file descriptor or filename to write the metadata :param metadata: metadata to write """ - metastr = pickle.dumps(metadata, PICKLE_PROTOCOL) + metastr = pickle.dumps(_encode_metadata(metadata), PICKLE_PROTOCOL) key = 0 while metastr: try: diff --git a/test/probe/test_reconstructor_rebuild.py b/test/probe/test_reconstructor_rebuild.py index 24c5eb9047..cd4ee1cf0d 100644 --- a/test/probe/test_reconstructor_rebuild.py +++ b/test/probe/test_reconstructor_rebuild.py @@ -81,8 +81,13 @@ class TestReconstructorRebuild(ECProbeTest): # PUT object and POST some metadata contents = Body() - headers = {'x-object-meta-foo': 'meta-foo'} - self.headers_post = {'x-object-meta-bar': 'meta-bar'} + headers = { + self._make_name('x-object-meta-').decode('utf8'): + self._make_name('meta-foo-').decode('utf8'), + } + self.headers_post = { + self._make_name('x-object-meta-').decode('utf8'): + self._make_name('meta-bar-').decode('utf8')} self.etag = client.put_object(self.url, self.token, self.container_name, diff --git a/test/unit/obj/test_diskfile.py b/test/unit/obj/test_diskfile.py index fe7069c91c..a541101a4c 100644 --- a/test/unit/obj/test_diskfile.py +++ b/test/unit/obj/test_diskfile.py @@ -286,6 +286,58 @@ class TestDiskFileModuleMethods(unittest.TestCase): exp_dir = '/srv/node/sdb5/objects-1/123' self.assertEqual(part_dir, exp_dir) + def test_write_read_metadata(self): + path = os.path.join(self.testdir, str(uuid.uuid4())) + metadata = {'name': '/a/c/o', + 'Content-Length': 99, + u'X-Object-Sysmeta-Ec-Frag-Index': 4, + u'X-Object-Meta-Strange': u'should be bytes', + b'X-Object-Meta-x\xff': b'not utf8 \xff', + u'X-Object-Meta-y\xe8': u'not ascii \xe8'} + expected = {b'name': b'/a/c/o', + b'Content-Length': 99, + b'X-Object-Sysmeta-Ec-Frag-Index': 4, + b'X-Object-Meta-Strange': b'should be bytes', + b'X-Object-Meta-x\xff': b'not utf8 \xff', + b'X-Object-Meta-y\xc3\xa8': b'not ascii \xc3\xa8'} + + def check_metadata(): + with open(path, 'rb') as fd: + actual = diskfile.read_metadata(fd) + self.assertEqual(expected, actual) + for k in actual.keys(): + self.assertIsInstance(k, six.binary_type) + for k in (b'name', + b'X-Object-Meta-Strange', + b'X-Object-Meta-x\xff', + b'X-Object-Meta-y\xc3\xa8'): + self.assertIsInstance(actual[k], six.binary_type) + + with open(path, 'wb') as fd: + diskfile.write_metadata(fd, metadata) + check_metadata() + + # mock the read path to check the write path encoded persisted metadata + with mock.patch.object(diskfile, '_encode_metadata', lambda x: x): + check_metadata() + + # simulate a legacy diskfile that might have persisted unicode metadata + with mock.patch.object(diskfile, '_encode_metadata', lambda x: x): + with open(path, 'wb') as fd: + diskfile.write_metadata(fd, metadata) + # sanity check, while still mocked, that we did persist unicode + with open(path, 'rb') as fd: + actual = diskfile.read_metadata(fd) + for k, v in actual.items(): + if k == u'X-Object-Meta-Strange': + self.assertIsInstance(k, six.text_type) + self.assertIsInstance(v, six.text_type) + break + else: + self.fail('Did not find X-Object-Meta-Strange') + # check that read_metadata converts binary_type + check_metadata() + @patch_policies class TestObjectAuditLocationGenerator(unittest.TestCase): diff --git a/test/unit/obj/test_server.py b/test/unit/obj/test_server.py index 3ab677e98f..7e41d806f2 100644 --- a/test/unit/obj/test_server.py +++ b/test/unit/obj/test_server.py @@ -7255,6 +7255,19 @@ class TestObjectServer(unittest.TestCase): obj_datafile = found_files['.data'][0] self.assertEqual("%s#2#d.data" % put_timestamp.internal, os.path.basename(obj_datafile)) + + with open(obj_datafile) as fd: + actual_meta = diskfile.read_metadata(fd) + expected_meta = {'Content-Length': '82', + 'name': '/a/c/o', + 'X-Object-Sysmeta-Ec-Frag-Index': '2', + 'X-Timestamp': put_timestamp.normal, + 'Content-Type': 'text/plain'} + for k, v in actual_meta.items(): + self.assertIsInstance(k, six.binary_type) + self.assertIsInstance(v, six.binary_type) + self.assertIsNotNone(actual_meta.pop('ETag', None)) + self.assertEqual(expected_meta, actual_meta) # but no other files self.assertFalse(found_files['.data'][1:]) found_files.pop('.data') diff --git a/test/unit/obj/test_ssync_sender.py b/test/unit/obj/test_ssync_sender.py index fe718a2370..401cef6395 100644 --- a/test/unit/obj/test_ssync_sender.py +++ b/test/unit/obj/test_ssync_sender.py @@ -1452,62 +1452,86 @@ class TestSender(BaseTest): exc = err self.assertEqual(str(exc), '0.01 seconds: send_put chunk') - def test_send_put(self): + def _check_send_put(self, obj_name, meta_value): ts_iter = make_timestamp_iter() t1 = next(ts_iter) body = 'test' - extra_metadata = {'Some-Other-Header': 'value'} - df = self._make_open_diskfile(body=body, timestamp=t1, + extra_metadata = {'Some-Other-Header': 'value', + u'Unicode-Meta-Name': meta_value} + df = self._make_open_diskfile(obj=obj_name, body=body, + timestamp=t1, extra_metadata=extra_metadata) expected = dict(df.get_metadata()) expected['body'] = body expected['chunk_size'] = len(body) + expected['meta'] = meta_value + path = six.moves.urllib.parse.quote(expected['name']) + expected['path'] = path + expected['length'] = format(145 + len(path) + len(meta_value), 'x') # .meta file metadata is not included in expected for data only PUT t2 = next(ts_iter) metadata = {'X-Timestamp': t2.internal, 'X-Object-Meta-Fruit': 'kiwi'} df.write_metadata(metadata) df.open() self.sender.connection = FakeConnection() - self.sender.send_put('/a/c/o', df) + self.sender.send_put(path, df) self.assertEqual( ''.join(self.sender.connection.sent), - '82\r\n' - 'PUT /a/c/o\r\n' + '%(length)s\r\n' + 'PUT %(path)s\r\n' 'Content-Length: %(Content-Length)s\r\n' 'ETag: %(ETag)s\r\n' 'Some-Other-Header: value\r\n' + 'Unicode-Meta-Name: %(meta)s\r\n' 'X-Timestamp: %(X-Timestamp)s\r\n' '\r\n' '\r\n' '%(chunk_size)s\r\n' '%(body)s\r\n' % expected) - def test_send_post(self): + def test_send_put(self): + self._check_send_put('o', 'meta') + + def test_send_put_unicode(self): + self._check_send_put( + 'o_with_caract\xc3\xa8res_like_in_french', 'm\xc3\xa8ta') + + def _check_send_post(self, obj_name, meta_value): ts_iter = make_timestamp_iter() # create .data file extra_metadata = {'X-Object-Meta-Foo': 'old_value', 'X-Object-Sysmeta-Test': 'test_sysmeta', 'Content-Type': 'test_content_type'} ts_0 = next(ts_iter) - df = self._make_open_diskfile(extra_metadata=extra_metadata, + df = self._make_open_diskfile(obj=obj_name, + extra_metadata=extra_metadata, timestamp=ts_0) # create .meta file ts_1 = next(ts_iter) - newer_metadata = {'X-Object-Meta-Foo': 'new_value', + newer_metadata = {u'X-Object-Meta-Foo': meta_value, 'X-Timestamp': ts_1.internal} df.write_metadata(newer_metadata) + path = six.moves.urllib.parse.quote(df.read_metadata()['name']) + length = format(61 + len(path) + len(meta_value), 'x') self.sender.connection = FakeConnection() with df.open(): - self.sender.send_post('/a/c/o', df) + self.sender.send_post(path, df) self.assertEqual( ''.join(self.sender.connection.sent), - '4c\r\n' - 'POST /a/c/o\r\n' - 'X-Object-Meta-Foo: new_value\r\n' + '%s\r\n' + 'POST %s\r\n' + 'X-Object-Meta-Foo: %s\r\n' 'X-Timestamp: %s\r\n' '\r\n' - '\r\n' % ts_1.internal) + '\r\n' % (length, path, meta_value, ts_1.internal)) + + def test_send_post(self): + self._check_send_post('o', 'meta') + + def test_send_post_unicode(self): + self._check_send_post( + 'o_with_caract\xc3\xa8res_like_in_french', 'm\xc3\xa8ta') def test_disconnect_timeout(self): self.sender.connection = FakeConnection()