874 lines
37 KiB
Python
Raw Normal View History

# Copyright (c) 2010-2012 OpenStack Foundation
2011-06-15 02:01:01 +00:00
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re
2011-06-15 02:01:01 +00:00
import unittest
from contextlib import nested
import mock
2011-06-15 02:01:01 +00:00
Adding StatsD logging to Swift. Documentation, including a list of metrics reported and their semantics, is in the Admin Guide in a new section, "Reporting Metrics to StatsD". An optional "metric prefix" may be configured which will be prepended to every metric name sent to StatsD. Here is the rationale for doing a deep integration like this versus only sending metrics to StatsD in middleware. It's the only way to report some internal activities of Swift in a real-time manner. So to have one way of reporting to StatsD and one place/style of configuration, even some things (like, say, timing of PUT requests into the proxy-server) which could be logged via middleware are consistently logged the same way (deep integration via the logger delegate methods). When log_statsd_host is configured, get_logger() injects a swift.common.utils.StatsdClient object into the logger as logger.statsd_client. Then a set of delegate methods on LogAdapter either pass through to the StatsdClient object or become no-ops. This allows StatsD logging to look like: self.logger.increment('some.metric.here') and do the right thing in all cases and with no messy conditional logic. I wanted to use the pystatsd module for the StatsD client, but the version on PyPi is lagging the git repo (and is missing both the prefix functionality and timing_since() method). So I wrote my swift.common.utils.StatsdClient. The interface is the same as pystatsd.Client, but the code was written from scratch. It's pretty simple, and the tests I added cover it. This also frees Swift from an optional dependency on the pystatsd module, making this feature easier to enable. There's test coverage for the new code and all existing tests continue to pass. Refactored out _one_audit_pass() method in swift/account/auditor.py and swift/container/auditor.py. Fixed some misc. PEP8 violations. Misc test cleanups and refactorings (particularly the way "fake logging" is handled). Change-Id: Ie968a9ae8771f59ee7591e2ae11999c44bfe33b2
2012-04-01 16:47:08 -07:00
from test.unit import FakeLogger
2011-06-15 02:01:01 +00:00
from swift.container import sync
from swift.common import utils
from swiftclient import ClientException
2011-06-15 02:01:01 +00:00
utils.HASH_PATH_SUFFIX = 'endcap'
utils.HASH_PATH_PREFIX = 'endcap'
class FakeRing(object):
2011-06-15 02:01:01 +00:00
def __init__(self):
self.devs = [{'ip': '10.0.0.%s' % x, 'port': 1000 + x, 'device': 'sda'}
for x in xrange(3)]
def get_nodes(self, account, container=None, obj=None):
return 1, list(self.devs)
class FakeContainerBroker(object):
def __init__(self, path, metadata=None, info=None, deleted=False,
items_since=None):
self.db_file = path
self.metadata = metadata if metadata else {}
self.info = info if info else {}
self.deleted = deleted
self.items_since = items_since if items_since else []
self.sync_point1 = -1
self.sync_point2 = -1
def get_info(self):
return self.info
def is_deleted(self):
return self.deleted
def get_items_since(self, sync_point, limit):
if sync_point < 0:
sync_point = 0
return self.items_since[sync_point:sync_point + limit]
def set_x_container_sync_points(self, sync_point1, sync_point2):
self.sync_point1 = sync_point1
self.sync_point2 = sync_point2
2011-06-15 02:01:01 +00:00
class TestContainerSync(unittest.TestCase):
def test_FileLikeIter(self):
# Retained test to show new FileLikeIter acts just like the removed
# _Iter2FileLikeObject did.
flo = sync.FileLikeIter(iter(['123', '4567', '89', '0']))
2011-06-15 02:01:01 +00:00
expect = '1234567890'
got = flo.read(2)
self.assertTrue(len(got) <= 2)
self.assertEquals(got, expect[:len(got)])
expect = expect[len(got):]
got = flo.read(5)
self.assertTrue(len(got) <= 5)
self.assertEquals(got, expect[:len(got)])
expect = expect[len(got):]
self.assertEquals(flo.read(), expect)
self.assertEquals(flo.read(), '')
self.assertEquals(flo.read(2), '')
flo = sync.FileLikeIter(iter(['123', '4567', '89', '0']))
2011-06-15 02:01:01 +00:00
self.assertEquals(flo.read(), '1234567890')
self.assertEquals(flo.read(), '')
self.assertEquals(flo.read(2), '')
def test_init(self):
cring = FakeRing()
oring = FakeRing()
cs = sync.ContainerSync({}, container_ring=cring, object_ring=oring)
self.assertTrue(cs.container_ring is cring)
self.assertTrue(cs.object_ring is oring)
def test_run_forever(self):
# This runs runs_forever with fakes to succeed for two loops, the first
# causing a report but no interval sleep, the second no report but an
# interval sleep.
time_calls = [0]
sleep_calls = []
audit_location_generator_calls = [0]
def fake_time():
time_calls[0] += 1
returns = [1, # Initialized reported time
1, # Start time
3602, # Is it report time (yes)
3602, # Report time
3602, # Elapsed time for "under interval" (no)
3602, # Start time
3603, # Is it report time (no)
3603] # Elapsed time for "under interval" (yes)
2011-06-15 02:01:01 +00:00
if time_calls[0] == len(returns) + 1:
raise Exception('we are now done')
return returns[time_calls[0] - 1]
def fake_sleep(amount):
sleep_calls.append(amount)
def fake_audit_location_generator(*args, **kwargs):
audit_location_generator_calls[0] += 1
# Makes .container_sync() short-circuit
yield 'container.db', 'device', 'partition'
return
2011-06-15 02:01:01 +00:00
orig_time = sync.time
orig_sleep = sync.sleep
orig_ContainerBroker = sync.ContainerBroker
DiskFile API, with reference implementation Refactor on-disk knowledge out of the object server by pushing the async update pickle creation to the new DiskFileManager class (name is not the best, so suggestions welcome), along with the REPLICATOR method logic. We also move the mount checking and thread pool storage to the new ondisk.Devices object, which then also becomes the new home of the audit_location_generator method. For the object server, a new setup() method is now called at the end of the controller's construction, and the _diskfile() method has been renamed to get_diskfile(), to allow implementation specific behavior. We then hide the need for the REST API layer to know how and where quarantining needs to be performed. There are now two places it is checked internally, on open() where we verify the content-length, name, and x-timestamp metadata, and in the reader on close where the etag metadata is checked if the entire file was read. We add a reader class to allow implementations to isolate the WSGI handling code for that specific environment (it is used no-where else in the REST APIs). This simplifies the caller's code to just use a "with" statement once open to avoid multiple points where close needs to be called. For a full historical comparison, including the usage patterns see: https://gist.github.com/portante/5488238 (as of master, 2b639f5, Merge "Fix 500 from account-quota This Commit middleware") --------------------------------+------------------------------------ DiskFileManager(conf) Methods: .pickle_async_update() .get_diskfile() .get_hashes() Attributes: .devices .logger .disk_chunk_size .keep_cache_size .bytes_per_sync DiskFile(a,c,o,keep_data_fp=) DiskFile(a,c,o) Methods: Methods: *.__iter__() .close(verify_file=) .is_deleted() .is_expired() .quarantine() .get_data_file_size() .open() .read_metadata() .create() .create() .write_metadata() .delete() .delete() Attributes: Attributes: .quarantined_dir .keep_cache .metadata *DiskFileReader() Methods: .__iter__() .close() Attributes: +.was_quarantined DiskWriter() DiskFileWriter() Methods: Methods: .write() .write() .put() .put() * Note that the DiskFile class * Note that the DiskReader() object implements all the methods returned by the necessary for a WSGI app DiskFileOpened.reader() method iterator implements all the methods necessary for a WSGI app iterator + Note that if the auditor is refactored to not use the DiskFile class, see https://review.openstack.org/44787 then we don't need the was_quarantined attribute A reference "in-memory" object server implementation of a backend DiskFile class in swift/obj/mem_server.py and swift/obj/mem_diskfile.py. One can also reference https://github.com/portante/gluster-swift/commits/diskfile for the proposed integration with the gluster-swift code based on these changes. Change-Id: I44e153fdb405a5743e9c05349008f94136764916 Signed-off-by: Peter Portante <peter.portante@redhat.com>
2013-09-12 19:51:18 -04:00
orig_audit_location_generator = sync.audit_location_generator
2011-06-15 02:01:01 +00:00
try:
sync.ContainerBroker = lambda p: FakeContainerBroker(
p, info={'account': 'a', 'container': 'c'})
2011-06-15 02:01:01 +00:00
sync.time = fake_time
sync.sleep = fake_sleep
cs = sync.ContainerSync({}, container_ring=FakeRing(),
object_ring=FakeRing())
DiskFile API, with reference implementation Refactor on-disk knowledge out of the object server by pushing the async update pickle creation to the new DiskFileManager class (name is not the best, so suggestions welcome), along with the REPLICATOR method logic. We also move the mount checking and thread pool storage to the new ondisk.Devices object, which then also becomes the new home of the audit_location_generator method. For the object server, a new setup() method is now called at the end of the controller's construction, and the _diskfile() method has been renamed to get_diskfile(), to allow implementation specific behavior. We then hide the need for the REST API layer to know how and where quarantining needs to be performed. There are now two places it is checked internally, on open() where we verify the content-length, name, and x-timestamp metadata, and in the reader on close where the etag metadata is checked if the entire file was read. We add a reader class to allow implementations to isolate the WSGI handling code for that specific environment (it is used no-where else in the REST APIs). This simplifies the caller's code to just use a "with" statement once open to avoid multiple points where close needs to be called. For a full historical comparison, including the usage patterns see: https://gist.github.com/portante/5488238 (as of master, 2b639f5, Merge "Fix 500 from account-quota This Commit middleware") --------------------------------+------------------------------------ DiskFileManager(conf) Methods: .pickle_async_update() .get_diskfile() .get_hashes() Attributes: .devices .logger .disk_chunk_size .keep_cache_size .bytes_per_sync DiskFile(a,c,o,keep_data_fp=) DiskFile(a,c,o) Methods: Methods: *.__iter__() .close(verify_file=) .is_deleted() .is_expired() .quarantine() .get_data_file_size() .open() .read_metadata() .create() .create() .write_metadata() .delete() .delete() Attributes: Attributes: .quarantined_dir .keep_cache .metadata *DiskFileReader() Methods: .__iter__() .close() Attributes: +.was_quarantined DiskWriter() DiskFileWriter() Methods: Methods: .write() .write() .put() .put() * Note that the DiskFile class * Note that the DiskReader() object implements all the methods returned by the necessary for a WSGI app DiskFileOpened.reader() method iterator implements all the methods necessary for a WSGI app iterator + Note that if the auditor is refactored to not use the DiskFile class, see https://review.openstack.org/44787 then we don't need the was_quarantined attribute A reference "in-memory" object server implementation of a backend DiskFile class in swift/obj/mem_server.py and swift/obj/mem_diskfile.py. One can also reference https://github.com/portante/gluster-swift/commits/diskfile for the proposed integration with the gluster-swift code based on these changes. Change-Id: I44e153fdb405a5743e9c05349008f94136764916 Signed-off-by: Peter Portante <peter.portante@redhat.com>
2013-09-12 19:51:18 -04:00
sync.audit_location_generator = fake_audit_location_generator
2011-06-15 02:01:01 +00:00
cs.run_forever()
except Exception as err:
2011-06-15 02:01:01 +00:00
if str(err) != 'we are now done':
raise
finally:
sync.time = orig_time
sync.sleep = orig_sleep
sync.audit_location_generator = orig_audit_location_generator
sync.ContainerBroker = orig_ContainerBroker
2011-06-15 02:01:01 +00:00
self.assertEquals(time_calls, [9])
self.assertEquals(len(sleep_calls), 2)
self.assertTrue(sleep_calls[0] <= cs.interval)
self.assertTrue(sleep_calls[1] == cs.interval - 1)
self.assertEquals(audit_location_generator_calls, [2])
self.assertEquals(cs.reported, 3602)
def test_run_once(self):
# This runs runs_once with fakes twice, the first causing an interim
# report, the second with no interim report.
2011-06-15 02:01:01 +00:00
time_calls = [0]
audit_location_generator_calls = [0]
def fake_time():
time_calls[0] += 1
returns = [1, # Initialized reported time
1, # Start time
3602, # Is it report time (yes)
3602, # Report time
3602, # End report time
3602, # For elapsed
3602, # Start time
3603, # Is it report time (no)
3604, # End report time
3605] # For elapsed
2011-06-15 02:01:01 +00:00
if time_calls[0] == len(returns) + 1:
raise Exception('we are now done')
return returns[time_calls[0] - 1]
def fake_audit_location_generator(*args, **kwargs):
audit_location_generator_calls[0] += 1
# Makes .container_sync() short-circuit
yield 'container.db', 'device', 'partition'
return
2011-06-15 02:01:01 +00:00
orig_time = sync.time
orig_audit_location_generator = sync.audit_location_generator
orig_ContainerBroker = sync.ContainerBroker
2011-06-15 02:01:01 +00:00
try:
sync.ContainerBroker = lambda p: FakeContainerBroker(
p, info={'account': 'a', 'container': 'c'})
2011-06-15 02:01:01 +00:00
sync.time = fake_time
cs = sync.ContainerSync({}, container_ring=FakeRing(),
object_ring=FakeRing())
DiskFile API, with reference implementation Refactor on-disk knowledge out of the object server by pushing the async update pickle creation to the new DiskFileManager class (name is not the best, so suggestions welcome), along with the REPLICATOR method logic. We also move the mount checking and thread pool storage to the new ondisk.Devices object, which then also becomes the new home of the audit_location_generator method. For the object server, a new setup() method is now called at the end of the controller's construction, and the _diskfile() method has been renamed to get_diskfile(), to allow implementation specific behavior. We then hide the need for the REST API layer to know how and where quarantining needs to be performed. There are now two places it is checked internally, on open() where we verify the content-length, name, and x-timestamp metadata, and in the reader on close where the etag metadata is checked if the entire file was read. We add a reader class to allow implementations to isolate the WSGI handling code for that specific environment (it is used no-where else in the REST APIs). This simplifies the caller's code to just use a "with" statement once open to avoid multiple points where close needs to be called. For a full historical comparison, including the usage patterns see: https://gist.github.com/portante/5488238 (as of master, 2b639f5, Merge "Fix 500 from account-quota This Commit middleware") --------------------------------+------------------------------------ DiskFileManager(conf) Methods: .pickle_async_update() .get_diskfile() .get_hashes() Attributes: .devices .logger .disk_chunk_size .keep_cache_size .bytes_per_sync DiskFile(a,c,o,keep_data_fp=) DiskFile(a,c,o) Methods: Methods: *.__iter__() .close(verify_file=) .is_deleted() .is_expired() .quarantine() .get_data_file_size() .open() .read_metadata() .create() .create() .write_metadata() .delete() .delete() Attributes: Attributes: .quarantined_dir .keep_cache .metadata *DiskFileReader() Methods: .__iter__() .close() Attributes: +.was_quarantined DiskWriter() DiskFileWriter() Methods: Methods: .write() .write() .put() .put() * Note that the DiskFile class * Note that the DiskReader() object implements all the methods returned by the necessary for a WSGI app DiskFileOpened.reader() method iterator implements all the methods necessary for a WSGI app iterator + Note that if the auditor is refactored to not use the DiskFile class, see https://review.openstack.org/44787 then we don't need the was_quarantined attribute A reference "in-memory" object server implementation of a backend DiskFile class in swift/obj/mem_server.py and swift/obj/mem_diskfile.py. One can also reference https://github.com/portante/gluster-swift/commits/diskfile for the proposed integration with the gluster-swift code based on these changes. Change-Id: I44e153fdb405a5743e9c05349008f94136764916 Signed-off-by: Peter Portante <peter.portante@redhat.com>
2013-09-12 19:51:18 -04:00
sync.audit_location_generator = fake_audit_location_generator
2011-06-15 02:01:01 +00:00
cs.run_once()
self.assertEquals(time_calls, [6])
self.assertEquals(audit_location_generator_calls, [1])
self.assertEquals(cs.reported, 3602)
cs.run_once()
except Exception as err:
2011-06-15 02:01:01 +00:00
if str(err) != 'we are now done':
raise
finally:
sync.time = orig_time
sync.audit_location_generator = orig_audit_location_generator
sync.ContainerBroker = orig_ContainerBroker
2011-06-15 02:01:01 +00:00
self.assertEquals(time_calls, [10])
self.assertEquals(audit_location_generator_calls, [2])
self.assertEquals(cs.reported, 3604)
def test_container_sync_not_db(self):
cring = FakeRing()
oring = FakeRing()
cs = sync.ContainerSync({}, container_ring=cring, object_ring=oring)
self.assertEquals(cs.container_failures, 0)
def test_container_sync_missing_db(self):
cring = FakeRing()
oring = FakeRing()
cs = sync.ContainerSync({}, container_ring=cring, object_ring=oring)
cs.container_sync('isa.db')
self.assertEquals(cs.container_failures, 1)
def test_container_sync_not_my_db(self):
# Db could be there due to handoff replication so test that we ignore
# those.
cring = FakeRing()
oring = FakeRing()
cs = sync.ContainerSync({}, container_ring=cring, object_ring=oring)
orig_ContainerBroker = sync.ContainerBroker
try:
sync.ContainerBroker = lambda p: FakeContainerBroker(
p, info={'account': 'a', 'container': 'c'})
cs._myips = ['127.0.0.1'] # No match
cs._myport = 1 # No match
cs.container_sync('isa.db')
self.assertEquals(cs.container_failures, 0)
cs._myips = ['10.0.0.0'] # Match
cs._myport = 1 # No match
cs.container_sync('isa.db')
self.assertEquals(cs.container_failures, 0)
cs._myips = ['127.0.0.1'] # No match
cs._myport = 1000 # Match
cs.container_sync('isa.db')
self.assertEquals(cs.container_failures, 0)
cs._myips = ['10.0.0.0'] # Match
cs._myport = 1000 # Match
# This complete match will cause the 1 container failure since the
# broker's info doesn't contain sync point keys
cs.container_sync('isa.db')
self.assertEquals(cs.container_failures, 1)
finally:
sync.ContainerBroker = orig_ContainerBroker
def test_container_sync_deleted(self):
cring = FakeRing()
oring = FakeRing()
cs = sync.ContainerSync({}, container_ring=cring, object_ring=oring)
orig_ContainerBroker = sync.ContainerBroker
try:
sync.ContainerBroker = lambda p: FakeContainerBroker(
p, info={'account': 'a', 'container': 'c'}, deleted=False)
cs._myips = ['10.0.0.0'] # Match
cs._myport = 1000 # Match
# This complete match will cause the 1 container failure since the
# broker's info doesn't contain sync point keys
cs.container_sync('isa.db')
self.assertEquals(cs.container_failures, 1)
sync.ContainerBroker = lambda p: FakeContainerBroker(
p, info={'account': 'a', 'container': 'c'}, deleted=True)
# This complete match will not cause any more container failures
# since the broker indicates deletion
cs.container_sync('isa.db')
self.assertEquals(cs.container_failures, 1)
finally:
sync.ContainerBroker = orig_ContainerBroker
def test_container_sync_no_to_or_key(self):
cring = FakeRing()
oring = FakeRing()
cs = sync.ContainerSync({}, container_ring=cring, object_ring=oring)
orig_ContainerBroker = sync.ContainerBroker
try:
sync.ContainerBroker = lambda p: FakeContainerBroker(
p, info={'account': 'a', 'container': 'c',
'x_container_sync_point1': -1,
'x_container_sync_point2': -1})
cs._myips = ['10.0.0.0'] # Match
cs._myport = 1000 # Match
# This complete match will be skipped since the broker's metadata
# has no x-container-sync-to or x-container-sync-key
cs.container_sync('isa.db')
self.assertEquals(cs.container_failures, 0)
self.assertEquals(cs.container_skips, 1)
sync.ContainerBroker = lambda p: FakeContainerBroker(
p, info={'account': 'a', 'container': 'c',
'x_container_sync_point1': -1,
'x_container_sync_point2': -1},
metadata={'x-container-sync-to': ('http://127.0.0.1/a/c', 1)})
cs._myips = ['10.0.0.0'] # Match
cs._myport = 1000 # Match
# This complete match will be skipped since the broker's metadata
# has no x-container-sync-key
cs.container_sync('isa.db')
self.assertEquals(cs.container_failures, 0)
self.assertEquals(cs.container_skips, 2)
sync.ContainerBroker = lambda p: FakeContainerBroker(
p, info={'account': 'a', 'container': 'c',
'x_container_sync_point1': -1,
'x_container_sync_point2': -1},
metadata={'x-container-sync-key': ('key', 1)})
cs._myips = ['10.0.0.0'] # Match
cs._myport = 1000 # Match
# This complete match will be skipped since the broker's metadata
# has no x-container-sync-to
cs.container_sync('isa.db')
self.assertEquals(cs.container_failures, 0)
self.assertEquals(cs.container_skips, 3)
sync.ContainerBroker = lambda p: FakeContainerBroker(
p, info={'account': 'a', 'container': 'c',
'x_container_sync_point1': -1,
'x_container_sync_point2': -1},
metadata={'x-container-sync-to': ('http://127.0.0.1/a/c', 1),
'x-container-sync-key': ('key', 1)})
cs._myips = ['10.0.0.0'] # Match
cs._myport = 1000 # Match
cs.allowed_sync_hosts = []
# This complete match will cause a container failure since the
# sync-to won't validate as allowed.
cs.container_sync('isa.db')
self.assertEquals(cs.container_failures, 1)
self.assertEquals(cs.container_skips, 3)
sync.ContainerBroker = lambda p: FakeContainerBroker(
p, info={'account': 'a', 'container': 'c',
'x_container_sync_point1': -1,
'x_container_sync_point2': -1},
metadata={'x-container-sync-to': ('http://127.0.0.1/a/c', 1),
'x-container-sync-key': ('key', 1)})
cs._myips = ['10.0.0.0'] # Match
cs._myport = 1000 # Match
cs.allowed_sync_hosts = ['127.0.0.1']
# This complete match will succeed completely since the broker
# get_items_since will return no new rows.
cs.container_sync('isa.db')
self.assertEquals(cs.container_failures, 1)
self.assertEquals(cs.container_skips, 3)
finally:
sync.ContainerBroker = orig_ContainerBroker
def test_container_stop_at(self):
cring = FakeRing()
oring = FakeRing()
cs = sync.ContainerSync({}, container_ring=cring, object_ring=oring)
orig_ContainerBroker = sync.ContainerBroker
orig_time = sync.time
try:
sync.ContainerBroker = lambda p: FakeContainerBroker(
p, info={'account': 'a', 'container': 'c',
'x_container_sync_point1': -1,
'x_container_sync_point2': -1},
metadata={'x-container-sync-to': ('http://127.0.0.1/a/c', 1),
'x-container-sync-key': ('key', 1)},
items_since=['erroneous data'])
cs._myips = ['10.0.0.0'] # Match
cs._myport = 1000 # Match
cs.allowed_sync_hosts = ['127.0.0.1']
# This sync will fail since the items_since data is bad.
cs.container_sync('isa.db')
self.assertEquals(cs.container_failures, 1)
self.assertEquals(cs.container_skips, 0)
# Set up fake times to make the sync short-circuit as having taken
# too long
fake_times = [
1.0, # Compute the time to move on
100000.0, # Compute if it's time to move on from first loop
100000.0] # Compute if it's time to move on from second loop
def fake_time():
return fake_times.pop(0)
sync.time = fake_time
# This same sync won't fail since it will look like it took so long
# as to be time to move on (before it ever actually tries to do
# anything).
cs.container_sync('isa.db')
self.assertEquals(cs.container_failures, 1)
self.assertEquals(cs.container_skips, 0)
finally:
sync.ContainerBroker = orig_ContainerBroker
sync.time = orig_time
def test_container_first_loop(self):
cring = FakeRing()
oring = FakeRing()
cs = sync.ContainerSync({}, container_ring=cring, object_ring=oring)
def fake_hash_path(account, container, obj, raw_digest=False):
# Ensures that no rows match for full syncing, ordinal is 0 and
# all hashes are 0
return '\x00' * 16
fcb = FakeContainerBroker(
'path',
info={'account': 'a', 'container': 'c',
'x_container_sync_point1': 2,
'x_container_sync_point2': -1},
metadata={'x-container-sync-to': ('http://127.0.0.1/a/c', 1),
'x-container-sync-key': ('key', 1)},
items_since=[{'ROWID': 1, 'name': 'o'}])
with nested(
mock.patch('swift.container.sync.ContainerBroker',
lambda p: fcb),
mock.patch('swift.container.sync.hash_path', fake_hash_path)):
cs._myips = ['10.0.0.0'] # Match
cs._myport = 1000 # Match
cs.allowed_sync_hosts = ['127.0.0.1']
cs.container_sync('isa.db')
# Succeeds because no rows match
self.assertEquals(cs.container_failures, 1)
self.assertEquals(cs.container_skips, 0)
self.assertEquals(fcb.sync_point1, None)
self.assertEquals(fcb.sync_point2, -1)
def fake_hash_path(account, container, obj, raw_digest=False):
# Ensures that all rows match for full syncing, ordinal is 0
# and all hashes are 1
return '\x01' * 16
fcb = FakeContainerBroker('path', info={'account': 'a',
'container': 'c',
'x_container_sync_point1': 1,
'x_container_sync_point2': 1},
metadata={'x-container-sync-to':
('http://127.0.0.1/a/c', 1),
'x-container-sync-key':
('key', 1)},
items_since=[{'ROWID': 1, 'name': 'o'}])
with nested(
mock.patch('swift.container.sync.ContainerBroker',
lambda p: fcb),
mock.patch('swift.container.sync.hash_path', fake_hash_path)):
cs._myips = ['10.0.0.0'] # Match
cs._myport = 1000 # Match
cs.allowed_sync_hosts = ['127.0.0.1']
cs.container_sync('isa.db')
# Succeeds because the two sync points haven't deviated yet
self.assertEquals(cs.container_failures, 1)
self.assertEquals(cs.container_skips, 0)
self.assertEquals(fcb.sync_point1, -1)
self.assertEquals(fcb.sync_point2, -1)
fcb = FakeContainerBroker(
'path',
info={'account': 'a', 'container': 'c',
'x_container_sync_point1': 2,
'x_container_sync_point2': -1},
metadata={'x-container-sync-to': ('http://127.0.0.1/a/c', 1),
'x-container-sync-key': ('key', 1)},
items_since=[{'ROWID': 1, 'name': 'o'}])
with mock.patch('swift.container.sync.ContainerBroker', lambda p: fcb):
cs._myips = ['10.0.0.0'] # Match
cs._myport = 1000 # Match
cs.allowed_sync_hosts = ['127.0.0.1']
cs.container_sync('isa.db')
# Fails because container_sync_row will fail since the row has no
# 'deleted' key
self.assertEquals(cs.container_failures, 2)
self.assertEquals(cs.container_skips, 0)
self.assertEquals(fcb.sync_point1, None)
self.assertEquals(fcb.sync_point2, -1)
def fake_delete_object(*args, **kwargs):
raise ClientException
fcb = FakeContainerBroker(
'path',
info={'account': 'a', 'container': 'c',
'x_container_sync_point1': 2,
'x_container_sync_point2': -1},
metadata={'x-container-sync-to': ('http://127.0.0.1/a/c', 1),
'x-container-sync-key': ('key', 1)},
items_since=[{'ROWID': 1, 'name': 'o', 'created_at': '1.2',
'deleted': True}])
with nested(
mock.patch('swift.container.sync.ContainerBroker',
lambda p: fcb),
mock.patch('swift.container.sync.delete_object',
fake_delete_object)):
cs._myips = ['10.0.0.0'] # Match
cs._myport = 1000 # Match
cs.allowed_sync_hosts = ['127.0.0.1']
cs.container_sync('isa.db')
# Fails because delete_object fails
self.assertEquals(cs.container_failures, 3)
self.assertEquals(cs.container_skips, 0)
self.assertEquals(fcb.sync_point1, None)
self.assertEquals(fcb.sync_point2, -1)
fcb = FakeContainerBroker(
'path',
info={'account': 'a', 'container': 'c',
'x_container_sync_point1': 2,
'x_container_sync_point2': -1},
metadata={'x-container-sync-to': ('http://127.0.0.1/a/c', 1),
'x-container-sync-key': ('key', 1)},
items_since=[{'ROWID': 1, 'name': 'o', 'created_at': '1.2',
'deleted': True}])
with nested(
mock.patch('swift.container.sync.ContainerBroker',
lambda p: fcb),
mock.patch('swift.container.sync.delete_object',
lambda *x, **y: None)):
cs._myips = ['10.0.0.0'] # Match
cs._myport = 1000 # Match
cs.allowed_sync_hosts = ['127.0.0.1']
cs.container_sync('isa.db')
# Succeeds because delete_object succeeds
self.assertEquals(cs.container_failures, 3)
self.assertEquals(cs.container_skips, 0)
self.assertEquals(fcb.sync_point1, None)
self.assertEquals(fcb.sync_point2, 1)
def test_container_second_loop(self):
cring = FakeRing()
oring = FakeRing()
cs = sync.ContainerSync({}, container_ring=cring, object_ring=oring)
orig_ContainerBroker = sync.ContainerBroker
orig_hash_path = sync.hash_path
orig_delete_object = sync.delete_object
try:
# We'll ensure the first loop is always skipped by keeping the two
# sync points equal
def fake_hash_path(account, container, obj, raw_digest=False):
# Ensures that no rows match for second loop, ordinal is 0 and
# all hashes are 1
return '\x01' * 16
sync.hash_path = fake_hash_path
fcb = FakeContainerBroker(
'path',
info={'account': 'a', 'container': 'c',
'x_container_sync_point1': -1,
'x_container_sync_point2': -1},
metadata={'x-container-sync-to': ('http://127.0.0.1/a/c', 1),
'x-container-sync-key': ('key', 1)},
items_since=[{'ROWID': 1, 'name': 'o'}])
sync.ContainerBroker = lambda p: fcb
cs._myips = ['10.0.0.0'] # Match
cs._myport = 1000 # Match
cs.allowed_sync_hosts = ['127.0.0.1']
cs.container_sync('isa.db')
# Succeeds because no rows match
self.assertEquals(cs.container_failures, 0)
self.assertEquals(cs.container_skips, 0)
self.assertEquals(fcb.sync_point1, 1)
self.assertEquals(fcb.sync_point2, None)
def fake_hash_path(account, container, obj, raw_digest=False):
# Ensures that all rows match for second loop, ordinal is 0 and
# all hashes are 0
return '\x00' * 16
def fake_delete_object(*args, **kwargs):
pass
sync.hash_path = fake_hash_path
sync.delete_object = fake_delete_object
fcb = FakeContainerBroker(
'path',
info={'account': 'a', 'container': 'c',
'x_container_sync_point1': -1,
'x_container_sync_point2': -1},
metadata={'x-container-sync-to': ('http://127.0.0.1/a/c', 1),
'x-container-sync-key': ('key', 1)},
items_since=[{'ROWID': 1, 'name': 'o'}])
sync.ContainerBroker = lambda p: fcb
cs._myips = ['10.0.0.0'] # Match
cs._myport = 1000 # Match
cs.allowed_sync_hosts = ['127.0.0.1']
cs.container_sync('isa.db')
# Fails because row is missing 'deleted' key
# Nevertheless the fault is skipped
self.assertEquals(cs.container_failures, 1)
self.assertEquals(cs.container_skips, 0)
self.assertEquals(fcb.sync_point1, 1)
self.assertEquals(fcb.sync_point2, None)
fcb = FakeContainerBroker(
'path',
info={'account': 'a', 'container': 'c',
'x_container_sync_point1': -1,
'x_container_sync_point2': -1},
metadata={'x-container-sync-to': ('http://127.0.0.1/a/c', 1),
'x-container-sync-key': ('key', 1)},
items_since=[{'ROWID': 1, 'name': 'o', 'created_at': '1.2',
'deleted': True}])
sync.ContainerBroker = lambda p: fcb
cs._myips = ['10.0.0.0'] # Match
cs._myport = 1000 # Match
cs.allowed_sync_hosts = ['127.0.0.1']
cs.container_sync('isa.db')
# Succeeds because row now has 'deleted' key and delete_object
# succeeds
self.assertEquals(cs.container_failures, 1)
self.assertEquals(cs.container_skips, 0)
self.assertEquals(fcb.sync_point1, 1)
self.assertEquals(fcb.sync_point2, None)
finally:
sync.ContainerBroker = orig_ContainerBroker
sync.hash_path = orig_hash_path
sync.delete_object = orig_delete_object
def test_container_sync_row_delete(self):
orig_delete_object = sync.delete_object
try:
def fake_delete_object(path, name=None, headers=None, proxy=None):
self.assertEquals(path, 'http://sync/to/path')
self.assertEquals(name, 'object')
self.assertEquals(
headers,
{'x-container-sync-key': 'key', 'x-timestamp': '1.2'})
self.assertEquals(proxy, 'http://proxy')
sync.delete_object = fake_delete_object
cs = sync.ContainerSync({}, container_ring=FakeRing(),
object_ring=FakeRing())
cs.proxy = 'http://proxy'
# Success
self.assertTrue(cs.container_sync_row(
{'deleted': True,
'name': 'object',
'created_at': '1.2'}, 'http://sync/to/path',
'key', FakeContainerBroker('broker'), 'info'))
self.assertEquals(cs.container_deletes, 1)
exc = []
def fake_delete_object(path, name=None, headers=None, proxy=None):
exc.append(Exception('test exception'))
raise exc[-1]
sync.delete_object = fake_delete_object
# Failure because of delete_object exception
self.assertFalse(cs.container_sync_row(
{'deleted': True,
'name': 'object',
'created_at': '1.2'}, 'http://sync/to/path',
'key', FakeContainerBroker('broker'), 'info'))
self.assertEquals(cs.container_deletes, 1)
self.assertEquals(len(exc), 1)
self.assertEquals(str(exc[-1]), 'test exception')
def fake_delete_object(path, name=None, headers=None, proxy=None):
exc.append(ClientException('test client exception'))
raise exc[-1]
sync.delete_object = fake_delete_object
# Failure because of delete_object exception
self.assertFalse(cs.container_sync_row(
{'deleted': True,
'name': 'object',
'created_at': '1.2'}, 'http://sync/to/path',
'key', FakeContainerBroker('broker'), 'info'))
self.assertEquals(cs.container_deletes, 1)
self.assertEquals(len(exc), 2)
self.assertEquals(str(exc[-1]), 'test client exception')
def fake_delete_object(path, name=None, headers=None, proxy=None):
exc.append(ClientException('test client exception',
http_status=404))
raise exc[-1]
sync.delete_object = fake_delete_object
# Success because the object wasn't even found
self.assertTrue(cs.container_sync_row(
{'deleted': True,
'name': 'object',
'created_at': '1.2'}, 'http://sync/to/path',
'key', FakeContainerBroker('broker'), 'info'))
self.assertEquals(cs.container_deletes, 2)
self.assertEquals(len(exc), 3)
self.assertEquals(str(exc[-1]), 'test client exception: 404')
finally:
sync.delete_object = orig_delete_object
def test_container_sync_row_put(self):
orig_shuffle = sync.shuffle
orig_put_object = sync.put_object
orig_direct_get_object = sync.direct_get_object
try:
sync.shuffle = lambda x: x
def fake_put_object(sync_to, name=None, headers=None,
contents=None, proxy=None):
self.assertEquals(sync_to, 'http://sync/to/path')
self.assertEquals(name, 'object')
self.assertEquals(headers, {
'x-container-sync-key': 'key',
'x-timestamp': '1.2',
'other-header': 'other header value',
'etag': 'etagvalue'})
self.assertEquals(contents.read(), 'contents')
self.assertEquals(proxy, 'http://proxy')
sync.put_object = fake_put_object
cs = sync.ContainerSync({}, container_ring=FakeRing(),
object_ring=FakeRing())
cs.proxy = 'http://proxy'
def fake_direct_get_object(node, part, account, container, obj,
resp_chunk_size=1):
return ({'other-header': 'other header value',
'etag': '"etagvalue"', 'x-timestamp': '1.2'},
iter('contents'))
sync.direct_get_object = fake_direct_get_object
# Success as everything says it worked
self.assertTrue(cs.container_sync_row(
{'deleted': False,
'name': 'object',
'created_at': '1.2'}, 'http://sync/to/path',
'key', FakeContainerBroker('broker'), {
'account': 'a',
'container': 'c'}))
self.assertEquals(cs.container_puts, 1)
def fake_direct_get_object(node, part, account, container, obj,
resp_chunk_size=1):
return ({'date': 'date value',
'last-modified': 'last modified value',
'x-timestamp': '1.2',
'other-header': 'other header value',
'etag': '"etagvalue"'},
iter('contents'))
sync.direct_get_object = fake_direct_get_object
# Success as everything says it worked, also checks 'date' and
# 'last-modified' headers are removed and that 'etag' header is
# stripped of double quotes.
self.assertTrue(cs.container_sync_row(
{'deleted': False,
'name': 'object',
'created_at': '1.2'}, 'http://sync/to/path',
'key', FakeContainerBroker('broker'), {
'account': 'a',
'container': 'c'}))
self.assertEquals(cs.container_puts, 2)
exc = []
def fake_direct_get_object(node, part, account, container, obj,
resp_chunk_size=1):
exc.append(Exception('test exception'))
raise exc[-1]
sync.direct_get_object = fake_direct_get_object
# Fail due to completely unexpected exception
self.assertFalse(cs.container_sync_row(
{'deleted': False,
'name': 'object',
'created_at': '1.2'}, 'http://sync/to/path',
'key', FakeContainerBroker('broker'), {
'account': 'a',
'container': 'c'}))
self.assertEquals(cs.container_puts, 2)
self.assertEquals(len(exc), 3)
self.assertEquals(str(exc[-1]), 'test exception')
exc = []
def fake_direct_get_object(node, part, account, container, obj,
resp_chunk_size=1):
exc.append(ClientException('test client exception'))
raise exc[-1]
sync.direct_get_object = fake_direct_get_object
# Fail due to all direct_get_object calls failing
self.assertFalse(cs.container_sync_row(
{'deleted': False,
'name': 'object',
'created_at': '1.2'}, 'http://sync/to/path',
'key', FakeContainerBroker('broker'), {
'account': 'a',
'container': 'c'}))
self.assertEquals(cs.container_puts, 2)
self.assertEquals(len(exc), 3)
self.assertEquals(str(exc[-1]), 'test client exception')
def fake_direct_get_object(node, part, account, container, obj,
resp_chunk_size=1):
return ({'other-header': 'other header value',
'x-timestamp': '1.2', 'etag': '"etagvalue"'},
iter('contents'))
def fake_put_object(sync_to, name=None, headers=None,
contents=None, proxy=None):
raise ClientException('test client exception', http_status=401)
sync.direct_get_object = fake_direct_get_object
sync.put_object = fake_put_object
cs.logger = FakeLogger()
# Fail due to 401
self.assertFalse(cs.container_sync_row(
{'deleted': False,
'name': 'object',
'created_at': '1.2'}, 'http://sync/to/path',
'key', FakeContainerBroker('broker'), {
'account': 'a',
'container': 'c'}))
self.assertEquals(cs.container_puts, 2)
Adding StatsD logging to Swift. Documentation, including a list of metrics reported and their semantics, is in the Admin Guide in a new section, "Reporting Metrics to StatsD". An optional "metric prefix" may be configured which will be prepended to every metric name sent to StatsD. Here is the rationale for doing a deep integration like this versus only sending metrics to StatsD in middleware. It's the only way to report some internal activities of Swift in a real-time manner. So to have one way of reporting to StatsD and one place/style of configuration, even some things (like, say, timing of PUT requests into the proxy-server) which could be logged via middleware are consistently logged the same way (deep integration via the logger delegate methods). When log_statsd_host is configured, get_logger() injects a swift.common.utils.StatsdClient object into the logger as logger.statsd_client. Then a set of delegate methods on LogAdapter either pass through to the StatsdClient object or become no-ops. This allows StatsD logging to look like: self.logger.increment('some.metric.here') and do the right thing in all cases and with no messy conditional logic. I wanted to use the pystatsd module for the StatsD client, but the version on PyPi is lagging the git repo (and is missing both the prefix functionality and timing_since() method). So I wrote my swift.common.utils.StatsdClient. The interface is the same as pystatsd.Client, but the code was written from scratch. It's pretty simple, and the tests I added cover it. This also frees Swift from an optional dependency on the pystatsd module, making this feature easier to enable. There's test coverage for the new code and all existing tests continue to pass. Refactored out _one_audit_pass() method in swift/account/auditor.py and swift/container/auditor.py. Fixed some misc. PEP8 violations. Misc test cleanups and refactorings (particularly the way "fake logging" is handled). Change-Id: Ie968a9ae8771f59ee7591e2ae11999c44bfe33b2
2012-04-01 16:47:08 -07:00
self.assert_(re.match('Unauth ',
cs.logger.log_dict['info'][0][0][0]))
def fake_put_object(sync_to, name=None, headers=None,
contents=None, proxy=None):
raise ClientException('test client exception', http_status=404)
sync.put_object = fake_put_object
# Fail due to 404
Adding StatsD logging to Swift. Documentation, including a list of metrics reported and their semantics, is in the Admin Guide in a new section, "Reporting Metrics to StatsD". An optional "metric prefix" may be configured which will be prepended to every metric name sent to StatsD. Here is the rationale for doing a deep integration like this versus only sending metrics to StatsD in middleware. It's the only way to report some internal activities of Swift in a real-time manner. So to have one way of reporting to StatsD and one place/style of configuration, even some things (like, say, timing of PUT requests into the proxy-server) which could be logged via middleware are consistently logged the same way (deep integration via the logger delegate methods). When log_statsd_host is configured, get_logger() injects a swift.common.utils.StatsdClient object into the logger as logger.statsd_client. Then a set of delegate methods on LogAdapter either pass through to the StatsdClient object or become no-ops. This allows StatsD logging to look like: self.logger.increment('some.metric.here') and do the right thing in all cases and with no messy conditional logic. I wanted to use the pystatsd module for the StatsD client, but the version on PyPi is lagging the git repo (and is missing both the prefix functionality and timing_since() method). So I wrote my swift.common.utils.StatsdClient. The interface is the same as pystatsd.Client, but the code was written from scratch. It's pretty simple, and the tests I added cover it. This also frees Swift from an optional dependency on the pystatsd module, making this feature easier to enable. There's test coverage for the new code and all existing tests continue to pass. Refactored out _one_audit_pass() method in swift/account/auditor.py and swift/container/auditor.py. Fixed some misc. PEP8 violations. Misc test cleanups and refactorings (particularly the way "fake logging" is handled). Change-Id: Ie968a9ae8771f59ee7591e2ae11999c44bfe33b2
2012-04-01 16:47:08 -07:00
cs.logger = FakeLogger()
self.assertFalse(cs.container_sync_row(
{'deleted': False,
'name': 'object',
'created_at': '1.2'}, 'http://sync/to/path',
'key', FakeContainerBroker('broker'), {
'account': 'a',
'container': 'c'}))
self.assertEquals(cs.container_puts, 2)
Adding StatsD logging to Swift. Documentation, including a list of metrics reported and their semantics, is in the Admin Guide in a new section, "Reporting Metrics to StatsD". An optional "metric prefix" may be configured which will be prepended to every metric name sent to StatsD. Here is the rationale for doing a deep integration like this versus only sending metrics to StatsD in middleware. It's the only way to report some internal activities of Swift in a real-time manner. So to have one way of reporting to StatsD and one place/style of configuration, even some things (like, say, timing of PUT requests into the proxy-server) which could be logged via middleware are consistently logged the same way (deep integration via the logger delegate methods). When log_statsd_host is configured, get_logger() injects a swift.common.utils.StatsdClient object into the logger as logger.statsd_client. Then a set of delegate methods on LogAdapter either pass through to the StatsdClient object or become no-ops. This allows StatsD logging to look like: self.logger.increment('some.metric.here') and do the right thing in all cases and with no messy conditional logic. I wanted to use the pystatsd module for the StatsD client, but the version on PyPi is lagging the git repo (and is missing both the prefix functionality and timing_since() method). So I wrote my swift.common.utils.StatsdClient. The interface is the same as pystatsd.Client, but the code was written from scratch. It's pretty simple, and the tests I added cover it. This also frees Swift from an optional dependency on the pystatsd module, making this feature easier to enable. There's test coverage for the new code and all existing tests continue to pass. Refactored out _one_audit_pass() method in swift/account/auditor.py and swift/container/auditor.py. Fixed some misc. PEP8 violations. Misc test cleanups and refactorings (particularly the way "fake logging" is handled). Change-Id: Ie968a9ae8771f59ee7591e2ae11999c44bfe33b2
2012-04-01 16:47:08 -07:00
self.assert_(re.match('Not found ',
cs.logger.log_dict['info'][0][0][0]))
def fake_put_object(sync_to, name=None, headers=None,
contents=None, proxy=None):
raise ClientException('test client exception', http_status=503)
sync.put_object = fake_put_object
# Fail due to 503
self.assertFalse(cs.container_sync_row(
{'deleted': False,
'name': 'object',
'created_at': '1.2'}, 'http://sync/to/path',
'key', FakeContainerBroker('broker'), {
'account': 'a',
'container': 'c'}))
self.assertEquals(cs.container_puts, 2)
Adding StatsD logging to Swift. Documentation, including a list of metrics reported and their semantics, is in the Admin Guide in a new section, "Reporting Metrics to StatsD". An optional "metric prefix" may be configured which will be prepended to every metric name sent to StatsD. Here is the rationale for doing a deep integration like this versus only sending metrics to StatsD in middleware. It's the only way to report some internal activities of Swift in a real-time manner. So to have one way of reporting to StatsD and one place/style of configuration, even some things (like, say, timing of PUT requests into the proxy-server) which could be logged via middleware are consistently logged the same way (deep integration via the logger delegate methods). When log_statsd_host is configured, get_logger() injects a swift.common.utils.StatsdClient object into the logger as logger.statsd_client. Then a set of delegate methods on LogAdapter either pass through to the StatsdClient object or become no-ops. This allows StatsD logging to look like: self.logger.increment('some.metric.here') and do the right thing in all cases and with no messy conditional logic. I wanted to use the pystatsd module for the StatsD client, but the version on PyPi is lagging the git repo (and is missing both the prefix functionality and timing_since() method). So I wrote my swift.common.utils.StatsdClient. The interface is the same as pystatsd.Client, but the code was written from scratch. It's pretty simple, and the tests I added cover it. This also frees Swift from an optional dependency on the pystatsd module, making this feature easier to enable. There's test coverage for the new code and all existing tests continue to pass. Refactored out _one_audit_pass() method in swift/account/auditor.py and swift/container/auditor.py. Fixed some misc. PEP8 violations. Misc test cleanups and refactorings (particularly the way "fake logging" is handled). Change-Id: Ie968a9ae8771f59ee7591e2ae11999c44bfe33b2
2012-04-01 16:47:08 -07:00
self.assertTrue(
cs.logger.log_dict['exception'][0][0][0].startswith(
'ERROR Syncing '))
finally:
sync.shuffle = orig_shuffle
sync.put_object = orig_put_object
sync.direct_get_object = orig_direct_get_object
2011-06-15 02:01:01 +00:00
if __name__ == '__main__':
unittest.main()