2013-09-20 01:00:54 +08:00
|
|
|
# Copyright (c) 2010-2012 OpenStack Foundation
|
2010-07-12 17:03:45 -05:00
|
|
|
#
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
|
|
# implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
|
2011-02-10 11:57:51 -06:00
|
|
|
from test import unit
|
2010-07-12 17:03:45 -05:00
|
|
|
import unittest
|
2013-06-25 15:16:35 -04:00
|
|
|
import mock
|
2010-12-16 16:20:57 -08:00
|
|
|
import os
|
|
|
|
import time
|
|
|
|
from shutil import rmtree
|
|
|
|
from hashlib import md5
|
2011-01-19 14:18:37 -06:00
|
|
|
from tempfile import mkdtemp
|
2011-03-15 22:12:03 -07:00
|
|
|
from test.unit import FakeLogger
|
2010-07-12 17:03:45 -05:00
|
|
|
from swift.obj import auditor
|
DiskFile API, with reference implementation
Refactor on-disk knowledge out of the object server by pushing the
async update pickle creation to the new DiskFileManager class (name is
not the best, so suggestions welcome), along with the REPLICATOR
method logic. We also move the mount checking and thread pool storage
to the new ondisk.Devices object, which then also becomes the new home
of the audit_location_generator method.
For the object server, a new setup() method is now called at the end
of the controller's construction, and the _diskfile() method has been
renamed to get_diskfile(), to allow implementation specific behavior.
We then hide the need for the REST API layer to know how and where
quarantining needs to be performed. There are now two places it is
checked internally, on open() where we verify the content-length,
name, and x-timestamp metadata, and in the reader on close where the
etag metadata is checked if the entire file was read.
We add a reader class to allow implementations to isolate the WSGI
handling code for that specific environment (it is used no-where else
in the REST APIs). This simplifies the caller's code to just use a
"with" statement once open to avoid multiple points where close needs
to be called.
For a full historical comparison, including the usage patterns see:
https://gist.github.com/portante/5488238
(as of master, 2b639f5, Merge
"Fix 500 from account-quota This Commit
middleware")
--------------------------------+------------------------------------
DiskFileManager(conf)
Methods:
.pickle_async_update()
.get_diskfile()
.get_hashes()
Attributes:
.devices
.logger
.disk_chunk_size
.keep_cache_size
.bytes_per_sync
DiskFile(a,c,o,keep_data_fp=) DiskFile(a,c,o)
Methods: Methods:
*.__iter__()
.close(verify_file=)
.is_deleted()
.is_expired()
.quarantine()
.get_data_file_size()
.open()
.read_metadata()
.create() .create()
.write_metadata()
.delete() .delete()
Attributes: Attributes:
.quarantined_dir
.keep_cache
.metadata
*DiskFileReader()
Methods:
.__iter__()
.close()
Attributes:
+.was_quarantined
DiskWriter() DiskFileWriter()
Methods: Methods:
.write() .write()
.put() .put()
* Note that the DiskFile class * Note that the DiskReader() object
implements all the methods returned by the
necessary for a WSGI app DiskFileOpened.reader() method
iterator implements all the methods
necessary for a WSGI app iterator
+ Note that if the auditor is
refactored to not use the DiskFile
class, see
https://review.openstack.org/44787
then we don't need the
was_quarantined attribute
A reference "in-memory" object server implementation of a backend
DiskFile class in swift/obj/mem_server.py and
swift/obj/mem_diskfile.py.
One can also reference
https://github.com/portante/gluster-swift/commits/diskfile for the
proposed integration with the gluster-swift code based on these
changes.
Change-Id: I44e153fdb405a5743e9c05349008f94136764916
Signed-off-by: Peter Portante <peter.portante@redhat.com>
2013-09-12 19:51:18 -04:00
|
|
|
from swift.obj.diskfile import DiskFile, write_metadata, invalidate_hash, \
|
Alternate DiskFile constructor for efficient auditing.
Before, to audit an object, the auditor:
- calls listdir(object-hash-dir)
- picks out the .data file from the listing
- pulls out all N of its user.swift.metadata* xattrs
- unpickles them
- pulls out the value for 'name'
- splits the name into a/c/o
- then instantiates and opens a DiskFile(a, c, o),
which does the following
- joins a/c/o back into a name
- hashes the name
- calls listdir(object-hash-dir) (AGAIN)
- picks out the .data file (and maybe .meta) from the listing (AGAIN)
- pulls out all N of its user.swift.metadata* xattrs (AGAIN)
- unpickles them (AGAIN)
- starts reading object's contents off disk
Now, the auditor simply locates the hash dir on the filesystem (saving
one listdir) and then hands it off to
DiskFileManager.get_diskfile_from_audit_location, which then
instantiates a DiskFile in a way that lazy-loads the name later
(saving one xattr reading).
As part of this, DiskFile.open() will now quarantine a hash
"directory" that's actually a file. Before, the audit location
generator would skip those, but now they make it clear into
DiskFile(). It's better to quarantine them anyway, as they're not
doing any good the way they are.
Also, removed the was_quarantined attribute on DiskFileReader. Now you
can pass in a quarantine_hook callable to DiskFile.reader() that gets
called if the file was quarantined. Default is to log quarantines, but
otherwise do nothing.
Change-Id: I04fc14569982a17fcc89e00832725ae71009335a
2013-10-28 14:57:18 -07:00
|
|
|
DATADIR, DiskFileManager, AuditLocation
|
2013-10-07 12:10:31 +00:00
|
|
|
from swift.common.utils import hash_path, mkdirs, normalize_timestamp, \
|
2013-03-26 20:42:26 +00:00
|
|
|
storage_directory
|
2010-07-12 17:03:45 -05:00
|
|
|
|
2010-12-28 14:54:00 -08:00
|
|
|
|
2010-07-12 17:03:45 -05:00
|
|
|
class TestAuditor(unittest.TestCase):
|
|
|
|
|
2010-12-16 16:20:57 -08:00
|
|
|
def setUp(self):
|
2011-03-15 22:12:03 -07:00
|
|
|
self.testdir = os.path.join(mkdtemp(), 'tmp_test_object_auditor')
|
2010-12-16 16:20:57 -08:00
|
|
|
self.devices = os.path.join(self.testdir, 'node')
|
2011-03-15 22:12:03 -07:00
|
|
|
self.logger = FakeLogger()
|
2010-12-16 16:20:57 -08:00
|
|
|
rmtree(self.testdir, ignore_errors=1)
|
2011-03-15 22:12:03 -07:00
|
|
|
mkdirs(os.path.join(self.devices, 'sda'))
|
2010-12-16 16:20:57 -08:00
|
|
|
self.objects = os.path.join(self.devices, 'sda', 'objects')
|
2010-12-17 00:27:08 -08:00
|
|
|
|
|
|
|
os.mkdir(os.path.join(self.devices, 'sdb'))
|
|
|
|
self.objects_2 = os.path.join(self.devices, 'sdb', 'objects')
|
|
|
|
|
2010-12-16 16:20:57 -08:00
|
|
|
os.mkdir(self.objects)
|
|
|
|
self.parts = {}
|
|
|
|
for part in ['0', '1', '2', '3']:
|
|
|
|
self.parts[part] = os.path.join(self.objects, part)
|
|
|
|
os.mkdir(os.path.join(self.objects, part))
|
|
|
|
|
|
|
|
self.conf = dict(
|
|
|
|
devices=self.devices,
|
2013-07-01 14:58:35 -07:00
|
|
|
mount_check='false',
|
|
|
|
object_size_stats='10,100,1024,10240')
|
DiskFile API, with reference implementation
Refactor on-disk knowledge out of the object server by pushing the
async update pickle creation to the new DiskFileManager class (name is
not the best, so suggestions welcome), along with the REPLICATOR
method logic. We also move the mount checking and thread pool storage
to the new ondisk.Devices object, which then also becomes the new home
of the audit_location_generator method.
For the object server, a new setup() method is now called at the end
of the controller's construction, and the _diskfile() method has been
renamed to get_diskfile(), to allow implementation specific behavior.
We then hide the need for the REST API layer to know how and where
quarantining needs to be performed. There are now two places it is
checked internally, on open() where we verify the content-length,
name, and x-timestamp metadata, and in the reader on close where the
etag metadata is checked if the entire file was read.
We add a reader class to allow implementations to isolate the WSGI
handling code for that specific environment (it is used no-where else
in the REST APIs). This simplifies the caller's code to just use a
"with" statement once open to avoid multiple points where close needs
to be called.
For a full historical comparison, including the usage patterns see:
https://gist.github.com/portante/5488238
(as of master, 2b639f5, Merge
"Fix 500 from account-quota This Commit
middleware")
--------------------------------+------------------------------------
DiskFileManager(conf)
Methods:
.pickle_async_update()
.get_diskfile()
.get_hashes()
Attributes:
.devices
.logger
.disk_chunk_size
.keep_cache_size
.bytes_per_sync
DiskFile(a,c,o,keep_data_fp=) DiskFile(a,c,o)
Methods: Methods:
*.__iter__()
.close(verify_file=)
.is_deleted()
.is_expired()
.quarantine()
.get_data_file_size()
.open()
.read_metadata()
.create() .create()
.write_metadata()
.delete() .delete()
Attributes: Attributes:
.quarantined_dir
.keep_cache
.metadata
*DiskFileReader()
Methods:
.__iter__()
.close()
Attributes:
+.was_quarantined
DiskWriter() DiskFileWriter()
Methods: Methods:
.write() .write()
.put() .put()
* Note that the DiskFile class * Note that the DiskReader() object
implements all the methods returned by the
necessary for a WSGI app DiskFileOpened.reader() method
iterator implements all the methods
necessary for a WSGI app iterator
+ Note that if the auditor is
refactored to not use the DiskFile
class, see
https://review.openstack.org/44787
then we don't need the
was_quarantined attribute
A reference "in-memory" object server implementation of a backend
DiskFile class in swift/obj/mem_server.py and
swift/obj/mem_diskfile.py.
One can also reference
https://github.com/portante/gluster-swift/commits/diskfile for the
proposed integration with the gluster-swift code based on these
changes.
Change-Id: I44e153fdb405a5743e9c05349008f94136764916
Signed-off-by: Peter Portante <peter.portante@redhat.com>
2013-09-12 19:51:18 -04:00
|
|
|
self.df_mgr = DiskFileManager(self.conf, self.logger)
|
|
|
|
self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o')
|
2010-12-16 16:20:57 -08:00
|
|
|
|
|
|
|
def tearDown(self):
|
2011-01-24 17:12:38 -08:00
|
|
|
rmtree(os.path.dirname(self.testdir), ignore_errors=1)
|
2011-02-10 11:57:51 -06:00
|
|
|
unit.xattr_data = {}
|
2010-12-16 16:20:57 -08:00
|
|
|
|
2010-12-17 00:27:08 -08:00
|
|
|
def test_object_audit_extra_data(self):
|
2013-09-13 13:55:10 -06:00
|
|
|
auditor_worker = auditor.AuditorWorker(self.conf, self.logger)
|
2010-12-16 16:20:57 -08:00
|
|
|
data = '0' * 1024
|
|
|
|
etag = md5()
|
2013-09-03 10:26:39 -04:00
|
|
|
with self.disk_file.create() as writer:
|
2013-04-18 20:42:36 -04:00
|
|
|
writer.write(data)
|
2010-12-16 16:20:57 -08:00
|
|
|
etag.update(data)
|
|
|
|
etag = etag.hexdigest()
|
|
|
|
timestamp = str(normalize_timestamp(time.time()))
|
|
|
|
metadata = {
|
|
|
|
'ETag': etag,
|
|
|
|
'X-Timestamp': timestamp,
|
DiskFile API, with reference implementation
Refactor on-disk knowledge out of the object server by pushing the
async update pickle creation to the new DiskFileManager class (name is
not the best, so suggestions welcome), along with the REPLICATOR
method logic. We also move the mount checking and thread pool storage
to the new ondisk.Devices object, which then also becomes the new home
of the audit_location_generator method.
For the object server, a new setup() method is now called at the end
of the controller's construction, and the _diskfile() method has been
renamed to get_diskfile(), to allow implementation specific behavior.
We then hide the need for the REST API layer to know how and where
quarantining needs to be performed. There are now two places it is
checked internally, on open() where we verify the content-length,
name, and x-timestamp metadata, and in the reader on close where the
etag metadata is checked if the entire file was read.
We add a reader class to allow implementations to isolate the WSGI
handling code for that specific environment (it is used no-where else
in the REST APIs). This simplifies the caller's code to just use a
"with" statement once open to avoid multiple points where close needs
to be called.
For a full historical comparison, including the usage patterns see:
https://gist.github.com/portante/5488238
(as of master, 2b639f5, Merge
"Fix 500 from account-quota This Commit
middleware")
--------------------------------+------------------------------------
DiskFileManager(conf)
Methods:
.pickle_async_update()
.get_diskfile()
.get_hashes()
Attributes:
.devices
.logger
.disk_chunk_size
.keep_cache_size
.bytes_per_sync
DiskFile(a,c,o,keep_data_fp=) DiskFile(a,c,o)
Methods: Methods:
*.__iter__()
.close(verify_file=)
.is_deleted()
.is_expired()
.quarantine()
.get_data_file_size()
.open()
.read_metadata()
.create() .create()
.write_metadata()
.delete() .delete()
Attributes: Attributes:
.quarantined_dir
.keep_cache
.metadata
*DiskFileReader()
Methods:
.__iter__()
.close()
Attributes:
+.was_quarantined
DiskWriter() DiskFileWriter()
Methods: Methods:
.write() .write()
.put() .put()
* Note that the DiskFile class * Note that the DiskReader() object
implements all the methods returned by the
necessary for a WSGI app DiskFileOpened.reader() method
iterator implements all the methods
necessary for a WSGI app iterator
+ Note that if the auditor is
refactored to not use the DiskFile
class, see
https://review.openstack.org/44787
then we don't need the
was_quarantined attribute
A reference "in-memory" object server implementation of a backend
DiskFile class in swift/obj/mem_server.py and
swift/obj/mem_diskfile.py.
One can also reference
https://github.com/portante/gluster-swift/commits/diskfile for the
proposed integration with the gluster-swift code based on these
changes.
Change-Id: I44e153fdb405a5743e9c05349008f94136764916
Signed-off-by: Peter Portante <peter.portante@redhat.com>
2013-09-12 19:51:18 -04:00
|
|
|
'Content-Length': str(os.fstat(writer._fd).st_size),
|
2010-12-16 16:20:57 -08:00
|
|
|
}
|
2013-04-18 20:42:36 -04:00
|
|
|
writer.put(metadata)
|
2013-09-13 13:55:10 -06:00
|
|
|
pre_quarantines = auditor_worker.quarantines
|
2010-12-16 16:20:57 -08:00
|
|
|
|
2013-09-13 13:55:10 -06:00
|
|
|
auditor_worker.object_audit(
|
Alternate DiskFile constructor for efficient auditing.
Before, to audit an object, the auditor:
- calls listdir(object-hash-dir)
- picks out the .data file from the listing
- pulls out all N of its user.swift.metadata* xattrs
- unpickles them
- pulls out the value for 'name'
- splits the name into a/c/o
- then instantiates and opens a DiskFile(a, c, o),
which does the following
- joins a/c/o back into a name
- hashes the name
- calls listdir(object-hash-dir) (AGAIN)
- picks out the .data file (and maybe .meta) from the listing (AGAIN)
- pulls out all N of its user.swift.metadata* xattrs (AGAIN)
- unpickles them (AGAIN)
- starts reading object's contents off disk
Now, the auditor simply locates the hash dir on the filesystem (saving
one listdir) and then hands it off to
DiskFileManager.get_diskfile_from_audit_location, which then
instantiates a DiskFile in a way that lazy-loads the name later
(saving one xattr reading).
As part of this, DiskFile.open() will now quarantine a hash
"directory" that's actually a file. Before, the audit location
generator would skip those, but now they make it clear into
DiskFile(). It's better to quarantine them anyway, as they're not
doing any good the way they are.
Also, removed the was_quarantined attribute on DiskFileReader. Now you
can pass in a quarantine_hook callable to DiskFile.reader() that gets
called if the file was quarantined. Default is to log quarantines, but
otherwise do nothing.
Change-Id: I04fc14569982a17fcc89e00832725ae71009335a
2013-10-28 14:57:18 -07:00
|
|
|
AuditLocation(self.disk_file._datadir, 'sda', '0'))
|
2013-09-13 13:55:10 -06:00
|
|
|
self.assertEquals(auditor_worker.quarantines, pre_quarantines)
|
2010-12-16 16:20:57 -08:00
|
|
|
|
DiskFile API, with reference implementation
Refactor on-disk knowledge out of the object server by pushing the
async update pickle creation to the new DiskFileManager class (name is
not the best, so suggestions welcome), along with the REPLICATOR
method logic. We also move the mount checking and thread pool storage
to the new ondisk.Devices object, which then also becomes the new home
of the audit_location_generator method.
For the object server, a new setup() method is now called at the end
of the controller's construction, and the _diskfile() method has been
renamed to get_diskfile(), to allow implementation specific behavior.
We then hide the need for the REST API layer to know how and where
quarantining needs to be performed. There are now two places it is
checked internally, on open() where we verify the content-length,
name, and x-timestamp metadata, and in the reader on close where the
etag metadata is checked if the entire file was read.
We add a reader class to allow implementations to isolate the WSGI
handling code for that specific environment (it is used no-where else
in the REST APIs). This simplifies the caller's code to just use a
"with" statement once open to avoid multiple points where close needs
to be called.
For a full historical comparison, including the usage patterns see:
https://gist.github.com/portante/5488238
(as of master, 2b639f5, Merge
"Fix 500 from account-quota This Commit
middleware")
--------------------------------+------------------------------------
DiskFileManager(conf)
Methods:
.pickle_async_update()
.get_diskfile()
.get_hashes()
Attributes:
.devices
.logger
.disk_chunk_size
.keep_cache_size
.bytes_per_sync
DiskFile(a,c,o,keep_data_fp=) DiskFile(a,c,o)
Methods: Methods:
*.__iter__()
.close(verify_file=)
.is_deleted()
.is_expired()
.quarantine()
.get_data_file_size()
.open()
.read_metadata()
.create() .create()
.write_metadata()
.delete() .delete()
Attributes: Attributes:
.quarantined_dir
.keep_cache
.metadata
*DiskFileReader()
Methods:
.__iter__()
.close()
Attributes:
+.was_quarantined
DiskWriter() DiskFileWriter()
Methods: Methods:
.write() .write()
.put() .put()
* Note that the DiskFile class * Note that the DiskReader() object
implements all the methods returned by the
necessary for a WSGI app DiskFileOpened.reader() method
iterator implements all the methods
necessary for a WSGI app iterator
+ Note that if the auditor is
refactored to not use the DiskFile
class, see
https://review.openstack.org/44787
then we don't need the
was_quarantined attribute
A reference "in-memory" object server implementation of a backend
DiskFile class in swift/obj/mem_server.py and
swift/obj/mem_diskfile.py.
One can also reference
https://github.com/portante/gluster-swift/commits/diskfile for the
proposed integration with the gluster-swift code based on these
changes.
Change-Id: I44e153fdb405a5743e9c05349008f94136764916
Signed-off-by: Peter Portante <peter.portante@redhat.com>
2013-09-12 19:51:18 -04:00
|
|
|
os.write(writer._fd, 'extra_data')
|
2013-09-13 13:55:10 -06:00
|
|
|
auditor_worker.object_audit(
|
Alternate DiskFile constructor for efficient auditing.
Before, to audit an object, the auditor:
- calls listdir(object-hash-dir)
- picks out the .data file from the listing
- pulls out all N of its user.swift.metadata* xattrs
- unpickles them
- pulls out the value for 'name'
- splits the name into a/c/o
- then instantiates and opens a DiskFile(a, c, o),
which does the following
- joins a/c/o back into a name
- hashes the name
- calls listdir(object-hash-dir) (AGAIN)
- picks out the .data file (and maybe .meta) from the listing (AGAIN)
- pulls out all N of its user.swift.metadata* xattrs (AGAIN)
- unpickles them (AGAIN)
- starts reading object's contents off disk
Now, the auditor simply locates the hash dir on the filesystem (saving
one listdir) and then hands it off to
DiskFileManager.get_diskfile_from_audit_location, which then
instantiates a DiskFile in a way that lazy-loads the name later
(saving one xattr reading).
As part of this, DiskFile.open() will now quarantine a hash
"directory" that's actually a file. Before, the audit location
generator would skip those, but now they make it clear into
DiskFile(). It's better to quarantine them anyway, as they're not
doing any good the way they are.
Also, removed the was_quarantined attribute on DiskFileReader. Now you
can pass in a quarantine_hook callable to DiskFile.reader() that gets
called if the file was quarantined. Default is to log quarantines, but
otherwise do nothing.
Change-Id: I04fc14569982a17fcc89e00832725ae71009335a
2013-10-28 14:57:18 -07:00
|
|
|
AuditLocation(self.disk_file._datadir, 'sda', '0'))
|
2013-09-13 13:55:10 -06:00
|
|
|
self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)
|
2010-12-16 16:20:57 -08:00
|
|
|
|
2010-12-17 00:27:08 -08:00
|
|
|
def test_object_audit_diff_data(self):
|
2013-09-13 13:55:10 -06:00
|
|
|
auditor_worker = auditor.AuditorWorker(self.conf, self.logger)
|
2010-12-17 00:27:08 -08:00
|
|
|
data = '0' * 1024
|
|
|
|
etag = md5()
|
|
|
|
timestamp = str(normalize_timestamp(time.time()))
|
2013-09-03 10:26:39 -04:00
|
|
|
with self.disk_file.create() as writer:
|
2013-04-18 20:42:36 -04:00
|
|
|
writer.write(data)
|
2010-12-17 00:27:08 -08:00
|
|
|
etag.update(data)
|
|
|
|
etag = etag.hexdigest()
|
|
|
|
metadata = {
|
|
|
|
'ETag': etag,
|
|
|
|
'X-Timestamp': timestamp,
|
DiskFile API, with reference implementation
Refactor on-disk knowledge out of the object server by pushing the
async update pickle creation to the new DiskFileManager class (name is
not the best, so suggestions welcome), along with the REPLICATOR
method logic. We also move the mount checking and thread pool storage
to the new ondisk.Devices object, which then also becomes the new home
of the audit_location_generator method.
For the object server, a new setup() method is now called at the end
of the controller's construction, and the _diskfile() method has been
renamed to get_diskfile(), to allow implementation specific behavior.
We then hide the need for the REST API layer to know how and where
quarantining needs to be performed. There are now two places it is
checked internally, on open() where we verify the content-length,
name, and x-timestamp metadata, and in the reader on close where the
etag metadata is checked if the entire file was read.
We add a reader class to allow implementations to isolate the WSGI
handling code for that specific environment (it is used no-where else
in the REST APIs). This simplifies the caller's code to just use a
"with" statement once open to avoid multiple points where close needs
to be called.
For a full historical comparison, including the usage patterns see:
https://gist.github.com/portante/5488238
(as of master, 2b639f5, Merge
"Fix 500 from account-quota This Commit
middleware")
--------------------------------+------------------------------------
DiskFileManager(conf)
Methods:
.pickle_async_update()
.get_diskfile()
.get_hashes()
Attributes:
.devices
.logger
.disk_chunk_size
.keep_cache_size
.bytes_per_sync
DiskFile(a,c,o,keep_data_fp=) DiskFile(a,c,o)
Methods: Methods:
*.__iter__()
.close(verify_file=)
.is_deleted()
.is_expired()
.quarantine()
.get_data_file_size()
.open()
.read_metadata()
.create() .create()
.write_metadata()
.delete() .delete()
Attributes: Attributes:
.quarantined_dir
.keep_cache
.metadata
*DiskFileReader()
Methods:
.__iter__()
.close()
Attributes:
+.was_quarantined
DiskWriter() DiskFileWriter()
Methods: Methods:
.write() .write()
.put() .put()
* Note that the DiskFile class * Note that the DiskReader() object
implements all the methods returned by the
necessary for a WSGI app DiskFileOpened.reader() method
iterator implements all the methods
necessary for a WSGI app iterator
+ Note that if the auditor is
refactored to not use the DiskFile
class, see
https://review.openstack.org/44787
then we don't need the
was_quarantined attribute
A reference "in-memory" object server implementation of a backend
DiskFile class in swift/obj/mem_server.py and
swift/obj/mem_diskfile.py.
One can also reference
https://github.com/portante/gluster-swift/commits/diskfile for the
proposed integration with the gluster-swift code based on these
changes.
Change-Id: I44e153fdb405a5743e9c05349008f94136764916
Signed-off-by: Peter Portante <peter.portante@redhat.com>
2013-09-12 19:51:18 -04:00
|
|
|
'Content-Length': str(os.fstat(writer._fd).st_size),
|
2010-12-17 00:27:08 -08:00
|
|
|
}
|
2013-04-18 20:42:36 -04:00
|
|
|
writer.put(metadata)
|
2013-09-13 13:55:10 -06:00
|
|
|
pre_quarantines = auditor_worker.quarantines
|
2010-12-17 00:27:08 -08:00
|
|
|
|
2013-04-18 20:42:36 -04:00
|
|
|
# remake so it will have metadata
|
DiskFile API, with reference implementation
Refactor on-disk knowledge out of the object server by pushing the
async update pickle creation to the new DiskFileManager class (name is
not the best, so suggestions welcome), along with the REPLICATOR
method logic. We also move the mount checking and thread pool storage
to the new ondisk.Devices object, which then also becomes the new home
of the audit_location_generator method.
For the object server, a new setup() method is now called at the end
of the controller's construction, and the _diskfile() method has been
renamed to get_diskfile(), to allow implementation specific behavior.
We then hide the need for the REST API layer to know how and where
quarantining needs to be performed. There are now two places it is
checked internally, on open() where we verify the content-length,
name, and x-timestamp metadata, and in the reader on close where the
etag metadata is checked if the entire file was read.
We add a reader class to allow implementations to isolate the WSGI
handling code for that specific environment (it is used no-where else
in the REST APIs). This simplifies the caller's code to just use a
"with" statement once open to avoid multiple points where close needs
to be called.
For a full historical comparison, including the usage patterns see:
https://gist.github.com/portante/5488238
(as of master, 2b639f5, Merge
"Fix 500 from account-quota This Commit
middleware")
--------------------------------+------------------------------------
DiskFileManager(conf)
Methods:
.pickle_async_update()
.get_diskfile()
.get_hashes()
Attributes:
.devices
.logger
.disk_chunk_size
.keep_cache_size
.bytes_per_sync
DiskFile(a,c,o,keep_data_fp=) DiskFile(a,c,o)
Methods: Methods:
*.__iter__()
.close(verify_file=)
.is_deleted()
.is_expired()
.quarantine()
.get_data_file_size()
.open()
.read_metadata()
.create() .create()
.write_metadata()
.delete() .delete()
Attributes: Attributes:
.quarantined_dir
.keep_cache
.metadata
*DiskFileReader()
Methods:
.__iter__()
.close()
Attributes:
+.was_quarantined
DiskWriter() DiskFileWriter()
Methods: Methods:
.write() .write()
.put() .put()
* Note that the DiskFile class * Note that the DiskReader() object
implements all the methods returned by the
necessary for a WSGI app DiskFileOpened.reader() method
iterator implements all the methods
necessary for a WSGI app iterator
+ Note that if the auditor is
refactored to not use the DiskFile
class, see
https://review.openstack.org/44787
then we don't need the
was_quarantined attribute
A reference "in-memory" object server implementation of a backend
DiskFile class in swift/obj/mem_server.py and
swift/obj/mem_diskfile.py.
One can also reference
https://github.com/portante/gluster-swift/commits/diskfile for the
proposed integration with the gluster-swift code based on these
changes.
Change-Id: I44e153fdb405a5743e9c05349008f94136764916
Signed-off-by: Peter Portante <peter.portante@redhat.com>
2013-09-12 19:51:18 -04:00
|
|
|
self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o')
|
2010-12-16 16:20:57 -08:00
|
|
|
|
2013-09-13 13:55:10 -06:00
|
|
|
auditor_worker.object_audit(
|
Alternate DiskFile constructor for efficient auditing.
Before, to audit an object, the auditor:
- calls listdir(object-hash-dir)
- picks out the .data file from the listing
- pulls out all N of its user.swift.metadata* xattrs
- unpickles them
- pulls out the value for 'name'
- splits the name into a/c/o
- then instantiates and opens a DiskFile(a, c, o),
which does the following
- joins a/c/o back into a name
- hashes the name
- calls listdir(object-hash-dir) (AGAIN)
- picks out the .data file (and maybe .meta) from the listing (AGAIN)
- pulls out all N of its user.swift.metadata* xattrs (AGAIN)
- unpickles them (AGAIN)
- starts reading object's contents off disk
Now, the auditor simply locates the hash dir on the filesystem (saving
one listdir) and then hands it off to
DiskFileManager.get_diskfile_from_audit_location, which then
instantiates a DiskFile in a way that lazy-loads the name later
(saving one xattr reading).
As part of this, DiskFile.open() will now quarantine a hash
"directory" that's actually a file. Before, the audit location
generator would skip those, but now they make it clear into
DiskFile(). It's better to quarantine them anyway, as they're not
doing any good the way they are.
Also, removed the was_quarantined attribute on DiskFileReader. Now you
can pass in a quarantine_hook callable to DiskFile.reader() that gets
called if the file was quarantined. Default is to log quarantines, but
otherwise do nothing.
Change-Id: I04fc14569982a17fcc89e00832725ae71009335a
2013-10-28 14:57:18 -07:00
|
|
|
AuditLocation(self.disk_file._datadir, 'sda', '0'))
|
2013-09-13 13:55:10 -06:00
|
|
|
self.assertEquals(auditor_worker.quarantines, pre_quarantines)
|
2013-04-18 20:42:36 -04:00
|
|
|
etag = md5()
|
|
|
|
etag.update('1' + '0' * 1023)
|
|
|
|
etag = etag.hexdigest()
|
|
|
|
metadata['ETag'] = etag
|
|
|
|
|
2013-09-03 10:26:39 -04:00
|
|
|
with self.disk_file.create() as writer:
|
2013-04-18 20:42:36 -04:00
|
|
|
writer.write(data)
|
|
|
|
writer.put(metadata)
|
|
|
|
|
2013-09-13 13:55:10 -06:00
|
|
|
auditor_worker.object_audit(
|
Alternate DiskFile constructor for efficient auditing.
Before, to audit an object, the auditor:
- calls listdir(object-hash-dir)
- picks out the .data file from the listing
- pulls out all N of its user.swift.metadata* xattrs
- unpickles them
- pulls out the value for 'name'
- splits the name into a/c/o
- then instantiates and opens a DiskFile(a, c, o),
which does the following
- joins a/c/o back into a name
- hashes the name
- calls listdir(object-hash-dir) (AGAIN)
- picks out the .data file (and maybe .meta) from the listing (AGAIN)
- pulls out all N of its user.swift.metadata* xattrs (AGAIN)
- unpickles them (AGAIN)
- starts reading object's contents off disk
Now, the auditor simply locates the hash dir on the filesystem (saving
one listdir) and then hands it off to
DiskFileManager.get_diskfile_from_audit_location, which then
instantiates a DiskFile in a way that lazy-loads the name later
(saving one xattr reading).
As part of this, DiskFile.open() will now quarantine a hash
"directory" that's actually a file. Before, the audit location
generator would skip those, but now they make it clear into
DiskFile(). It's better to quarantine them anyway, as they're not
doing any good the way they are.
Also, removed the was_quarantined attribute on DiskFileReader. Now you
can pass in a quarantine_hook callable to DiskFile.reader() that gets
called if the file was quarantined. Default is to log quarantines, but
otherwise do nothing.
Change-Id: I04fc14569982a17fcc89e00832725ae71009335a
2013-10-28 14:57:18 -07:00
|
|
|
AuditLocation(self.disk_file._datadir, 'sda', '0'))
|
2013-09-13 13:55:10 -06:00
|
|
|
self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)
|
2010-07-12 17:03:45 -05:00
|
|
|
|
2010-12-28 14:54:00 -08:00
|
|
|
def test_object_audit_no_meta(self):
|
2011-01-24 17:12:38 -08:00
|
|
|
timestamp = str(normalize_timestamp(time.time()))
|
DiskFile API, with reference implementation
Refactor on-disk knowledge out of the object server by pushing the
async update pickle creation to the new DiskFileManager class (name is
not the best, so suggestions welcome), along with the REPLICATOR
method logic. We also move the mount checking and thread pool storage
to the new ondisk.Devices object, which then also becomes the new home
of the audit_location_generator method.
For the object server, a new setup() method is now called at the end
of the controller's construction, and the _diskfile() method has been
renamed to get_diskfile(), to allow implementation specific behavior.
We then hide the need for the REST API layer to know how and where
quarantining needs to be performed. There are now two places it is
checked internally, on open() where we verify the content-length,
name, and x-timestamp metadata, and in the reader on close where the
etag metadata is checked if the entire file was read.
We add a reader class to allow implementations to isolate the WSGI
handling code for that specific environment (it is used no-where else
in the REST APIs). This simplifies the caller's code to just use a
"with" statement once open to avoid multiple points where close needs
to be called.
For a full historical comparison, including the usage patterns see:
https://gist.github.com/portante/5488238
(as of master, 2b639f5, Merge
"Fix 500 from account-quota This Commit
middleware")
--------------------------------+------------------------------------
DiskFileManager(conf)
Methods:
.pickle_async_update()
.get_diskfile()
.get_hashes()
Attributes:
.devices
.logger
.disk_chunk_size
.keep_cache_size
.bytes_per_sync
DiskFile(a,c,o,keep_data_fp=) DiskFile(a,c,o)
Methods: Methods:
*.__iter__()
.close(verify_file=)
.is_deleted()
.is_expired()
.quarantine()
.get_data_file_size()
.open()
.read_metadata()
.create() .create()
.write_metadata()
.delete() .delete()
Attributes: Attributes:
.quarantined_dir
.keep_cache
.metadata
*DiskFileReader()
Methods:
.__iter__()
.close()
Attributes:
+.was_quarantined
DiskWriter() DiskFileWriter()
Methods: Methods:
.write() .write()
.put() .put()
* Note that the DiskFile class * Note that the DiskReader() object
implements all the methods returned by the
necessary for a WSGI app DiskFileOpened.reader() method
iterator implements all the methods
necessary for a WSGI app iterator
+ Note that if the auditor is
refactored to not use the DiskFile
class, see
https://review.openstack.org/44787
then we don't need the
was_quarantined attribute
A reference "in-memory" object server implementation of a backend
DiskFile class in swift/obj/mem_server.py and
swift/obj/mem_diskfile.py.
One can also reference
https://github.com/portante/gluster-swift/commits/diskfile for the
proposed integration with the gluster-swift code based on these
changes.
Change-Id: I44e153fdb405a5743e9c05349008f94136764916
Signed-off-by: Peter Portante <peter.portante@redhat.com>
2013-09-12 19:51:18 -04:00
|
|
|
path = os.path.join(self.disk_file._datadir, timestamp + '.data')
|
|
|
|
mkdirs(self.disk_file._datadir)
|
2011-01-24 17:12:38 -08:00
|
|
|
fp = open(path, 'w')
|
|
|
|
fp.write('0' * 1024)
|
|
|
|
fp.close()
|
DiskFile API, with reference implementation
Refactor on-disk knowledge out of the object server by pushing the
async update pickle creation to the new DiskFileManager class (name is
not the best, so suggestions welcome), along with the REPLICATOR
method logic. We also move the mount checking and thread pool storage
to the new ondisk.Devices object, which then also becomes the new home
of the audit_location_generator method.
For the object server, a new setup() method is now called at the end
of the controller's construction, and the _diskfile() method has been
renamed to get_diskfile(), to allow implementation specific behavior.
We then hide the need for the REST API layer to know how and where
quarantining needs to be performed. There are now two places it is
checked internally, on open() where we verify the content-length,
name, and x-timestamp metadata, and in the reader on close where the
etag metadata is checked if the entire file was read.
We add a reader class to allow implementations to isolate the WSGI
handling code for that specific environment (it is used no-where else
in the REST APIs). This simplifies the caller's code to just use a
"with" statement once open to avoid multiple points where close needs
to be called.
For a full historical comparison, including the usage patterns see:
https://gist.github.com/portante/5488238
(as of master, 2b639f5, Merge
"Fix 500 from account-quota This Commit
middleware")
--------------------------------+------------------------------------
DiskFileManager(conf)
Methods:
.pickle_async_update()
.get_diskfile()
.get_hashes()
Attributes:
.devices
.logger
.disk_chunk_size
.keep_cache_size
.bytes_per_sync
DiskFile(a,c,o,keep_data_fp=) DiskFile(a,c,o)
Methods: Methods:
*.__iter__()
.close(verify_file=)
.is_deleted()
.is_expired()
.quarantine()
.get_data_file_size()
.open()
.read_metadata()
.create() .create()
.write_metadata()
.delete() .delete()
Attributes: Attributes:
.quarantined_dir
.keep_cache
.metadata
*DiskFileReader()
Methods:
.__iter__()
.close()
Attributes:
+.was_quarantined
DiskWriter() DiskFileWriter()
Methods: Methods:
.write() .write()
.put() .put()
* Note that the DiskFile class * Note that the DiskReader() object
implements all the methods returned by the
necessary for a WSGI app DiskFileOpened.reader() method
iterator implements all the methods
necessary for a WSGI app iterator
+ Note that if the auditor is
refactored to not use the DiskFile
class, see
https://review.openstack.org/44787
then we don't need the
was_quarantined attribute
A reference "in-memory" object server implementation of a backend
DiskFile class in swift/obj/mem_server.py and
swift/obj/mem_diskfile.py.
One can also reference
https://github.com/portante/gluster-swift/commits/diskfile for the
proposed integration with the gluster-swift code based on these
changes.
Change-Id: I44e153fdb405a5743e9c05349008f94136764916
Signed-off-by: Peter Portante <peter.portante@redhat.com>
2013-09-12 19:51:18 -04:00
|
|
|
invalidate_hash(os.path.dirname(self.disk_file._datadir))
|
2013-09-13 13:55:10 -06:00
|
|
|
auditor_worker = auditor.AuditorWorker(self.conf, self.logger)
|
|
|
|
pre_quarantines = auditor_worker.quarantines
|
|
|
|
auditor_worker.object_audit(
|
Alternate DiskFile constructor for efficient auditing.
Before, to audit an object, the auditor:
- calls listdir(object-hash-dir)
- picks out the .data file from the listing
- pulls out all N of its user.swift.metadata* xattrs
- unpickles them
- pulls out the value for 'name'
- splits the name into a/c/o
- then instantiates and opens a DiskFile(a, c, o),
which does the following
- joins a/c/o back into a name
- hashes the name
- calls listdir(object-hash-dir) (AGAIN)
- picks out the .data file (and maybe .meta) from the listing (AGAIN)
- pulls out all N of its user.swift.metadata* xattrs (AGAIN)
- unpickles them (AGAIN)
- starts reading object's contents off disk
Now, the auditor simply locates the hash dir on the filesystem (saving
one listdir) and then hands it off to
DiskFileManager.get_diskfile_from_audit_location, which then
instantiates a DiskFile in a way that lazy-loads the name later
(saving one xattr reading).
As part of this, DiskFile.open() will now quarantine a hash
"directory" that's actually a file. Before, the audit location
generator would skip those, but now they make it clear into
DiskFile(). It's better to quarantine them anyway, as they're not
doing any good the way they are.
Also, removed the was_quarantined attribute on DiskFileReader. Now you
can pass in a quarantine_hook callable to DiskFile.reader() that gets
called if the file was quarantined. Default is to log quarantines, but
otherwise do nothing.
Change-Id: I04fc14569982a17fcc89e00832725ae71009335a
2013-10-28 14:57:18 -07:00
|
|
|
AuditLocation(self.disk_file._datadir, 'sda', '0'))
|
2013-09-13 13:55:10 -06:00
|
|
|
self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)
|
2010-12-28 14:54:00 -08:00
|
|
|
|
2013-09-11 22:42:19 -07:00
|
|
|
def test_object_audit_will_not_swallow_errors_in_tests(self):
|
|
|
|
timestamp = str(normalize_timestamp(time.time()))
|
DiskFile API, with reference implementation
Refactor on-disk knowledge out of the object server by pushing the
async update pickle creation to the new DiskFileManager class (name is
not the best, so suggestions welcome), along with the REPLICATOR
method logic. We also move the mount checking and thread pool storage
to the new ondisk.Devices object, which then also becomes the new home
of the audit_location_generator method.
For the object server, a new setup() method is now called at the end
of the controller's construction, and the _diskfile() method has been
renamed to get_diskfile(), to allow implementation specific behavior.
We then hide the need for the REST API layer to know how and where
quarantining needs to be performed. There are now two places it is
checked internally, on open() where we verify the content-length,
name, and x-timestamp metadata, and in the reader on close where the
etag metadata is checked if the entire file was read.
We add a reader class to allow implementations to isolate the WSGI
handling code for that specific environment (it is used no-where else
in the REST APIs). This simplifies the caller's code to just use a
"with" statement once open to avoid multiple points where close needs
to be called.
For a full historical comparison, including the usage patterns see:
https://gist.github.com/portante/5488238
(as of master, 2b639f5, Merge
"Fix 500 from account-quota This Commit
middleware")
--------------------------------+------------------------------------
DiskFileManager(conf)
Methods:
.pickle_async_update()
.get_diskfile()
.get_hashes()
Attributes:
.devices
.logger
.disk_chunk_size
.keep_cache_size
.bytes_per_sync
DiskFile(a,c,o,keep_data_fp=) DiskFile(a,c,o)
Methods: Methods:
*.__iter__()
.close(verify_file=)
.is_deleted()
.is_expired()
.quarantine()
.get_data_file_size()
.open()
.read_metadata()
.create() .create()
.write_metadata()
.delete() .delete()
Attributes: Attributes:
.quarantined_dir
.keep_cache
.metadata
*DiskFileReader()
Methods:
.__iter__()
.close()
Attributes:
+.was_quarantined
DiskWriter() DiskFileWriter()
Methods: Methods:
.write() .write()
.put() .put()
* Note that the DiskFile class * Note that the DiskReader() object
implements all the methods returned by the
necessary for a WSGI app DiskFileOpened.reader() method
iterator implements all the methods
necessary for a WSGI app iterator
+ Note that if the auditor is
refactored to not use the DiskFile
class, see
https://review.openstack.org/44787
then we don't need the
was_quarantined attribute
A reference "in-memory" object server implementation of a backend
DiskFile class in swift/obj/mem_server.py and
swift/obj/mem_diskfile.py.
One can also reference
https://github.com/portante/gluster-swift/commits/diskfile for the
proposed integration with the gluster-swift code based on these
changes.
Change-Id: I44e153fdb405a5743e9c05349008f94136764916
Signed-off-by: Peter Portante <peter.portante@redhat.com>
2013-09-12 19:51:18 -04:00
|
|
|
path = os.path.join(self.disk_file._datadir, timestamp + '.data')
|
|
|
|
mkdirs(self.disk_file._datadir)
|
2013-09-11 22:42:19 -07:00
|
|
|
with open(path, 'w') as f:
|
|
|
|
write_metadata(f, {'name': '/a/c/o'})
|
2013-09-13 13:55:10 -06:00
|
|
|
auditor_worker = auditor.AuditorWorker(self.conf, self.logger)
|
2013-09-11 22:42:19 -07:00
|
|
|
|
|
|
|
def blowup(*args):
|
|
|
|
raise NameError('tpyo')
|
Alternate DiskFile constructor for efficient auditing.
Before, to audit an object, the auditor:
- calls listdir(object-hash-dir)
- picks out the .data file from the listing
- pulls out all N of its user.swift.metadata* xattrs
- unpickles them
- pulls out the value for 'name'
- splits the name into a/c/o
- then instantiates and opens a DiskFile(a, c, o),
which does the following
- joins a/c/o back into a name
- hashes the name
- calls listdir(object-hash-dir) (AGAIN)
- picks out the .data file (and maybe .meta) from the listing (AGAIN)
- pulls out all N of its user.swift.metadata* xattrs (AGAIN)
- unpickles them (AGAIN)
- starts reading object's contents off disk
Now, the auditor simply locates the hash dir on the filesystem (saving
one listdir) and then hands it off to
DiskFileManager.get_diskfile_from_audit_location, which then
instantiates a DiskFile in a way that lazy-loads the name later
(saving one xattr reading).
As part of this, DiskFile.open() will now quarantine a hash
"directory" that's actually a file. Before, the audit location
generator would skip those, but now they make it clear into
DiskFile(). It's better to quarantine them anyway, as they're not
doing any good the way they are.
Also, removed the was_quarantined attribute on DiskFileReader. Now you
can pass in a quarantine_hook callable to DiskFile.reader() that gets
called if the file was quarantined. Default is to log quarantines, but
otherwise do nothing.
Change-Id: I04fc14569982a17fcc89e00832725ae71009335a
2013-10-28 14:57:18 -07:00
|
|
|
with mock.patch.object(DiskFileManager,
|
|
|
|
'get_diskfile_from_audit_location', blowup):
|
2013-09-13 13:55:10 -06:00
|
|
|
self.assertRaises(NameError, auditor_worker.object_audit,
|
Alternate DiskFile constructor for efficient auditing.
Before, to audit an object, the auditor:
- calls listdir(object-hash-dir)
- picks out the .data file from the listing
- pulls out all N of its user.swift.metadata* xattrs
- unpickles them
- pulls out the value for 'name'
- splits the name into a/c/o
- then instantiates and opens a DiskFile(a, c, o),
which does the following
- joins a/c/o back into a name
- hashes the name
- calls listdir(object-hash-dir) (AGAIN)
- picks out the .data file (and maybe .meta) from the listing (AGAIN)
- pulls out all N of its user.swift.metadata* xattrs (AGAIN)
- unpickles them (AGAIN)
- starts reading object's contents off disk
Now, the auditor simply locates the hash dir on the filesystem (saving
one listdir) and then hands it off to
DiskFileManager.get_diskfile_from_audit_location, which then
instantiates a DiskFile in a way that lazy-loads the name later
(saving one xattr reading).
As part of this, DiskFile.open() will now quarantine a hash
"directory" that's actually a file. Before, the audit location
generator would skip those, but now they make it clear into
DiskFile(). It's better to quarantine them anyway, as they're not
doing any good the way they are.
Also, removed the was_quarantined attribute on DiskFileReader. Now you
can pass in a quarantine_hook callable to DiskFile.reader() that gets
called if the file was quarantined. Default is to log quarantines, but
otherwise do nothing.
Change-Id: I04fc14569982a17fcc89e00832725ae71009335a
2013-10-28 14:57:18 -07:00
|
|
|
AuditLocation(os.path.dirname(path), 'sda', '0'))
|
2013-09-11 22:42:19 -07:00
|
|
|
|
|
|
|
def test_failsafe_object_audit_will_swallow_errors_in_tests(self):
|
|
|
|
timestamp = str(normalize_timestamp(time.time()))
|
DiskFile API, with reference implementation
Refactor on-disk knowledge out of the object server by pushing the
async update pickle creation to the new DiskFileManager class (name is
not the best, so suggestions welcome), along with the REPLICATOR
method logic. We also move the mount checking and thread pool storage
to the new ondisk.Devices object, which then also becomes the new home
of the audit_location_generator method.
For the object server, a new setup() method is now called at the end
of the controller's construction, and the _diskfile() method has been
renamed to get_diskfile(), to allow implementation specific behavior.
We then hide the need for the REST API layer to know how and where
quarantining needs to be performed. There are now two places it is
checked internally, on open() where we verify the content-length,
name, and x-timestamp metadata, and in the reader on close where the
etag metadata is checked if the entire file was read.
We add a reader class to allow implementations to isolate the WSGI
handling code for that specific environment (it is used no-where else
in the REST APIs). This simplifies the caller's code to just use a
"with" statement once open to avoid multiple points where close needs
to be called.
For a full historical comparison, including the usage patterns see:
https://gist.github.com/portante/5488238
(as of master, 2b639f5, Merge
"Fix 500 from account-quota This Commit
middleware")
--------------------------------+------------------------------------
DiskFileManager(conf)
Methods:
.pickle_async_update()
.get_diskfile()
.get_hashes()
Attributes:
.devices
.logger
.disk_chunk_size
.keep_cache_size
.bytes_per_sync
DiskFile(a,c,o,keep_data_fp=) DiskFile(a,c,o)
Methods: Methods:
*.__iter__()
.close(verify_file=)
.is_deleted()
.is_expired()
.quarantine()
.get_data_file_size()
.open()
.read_metadata()
.create() .create()
.write_metadata()
.delete() .delete()
Attributes: Attributes:
.quarantined_dir
.keep_cache
.metadata
*DiskFileReader()
Methods:
.__iter__()
.close()
Attributes:
+.was_quarantined
DiskWriter() DiskFileWriter()
Methods: Methods:
.write() .write()
.put() .put()
* Note that the DiskFile class * Note that the DiskReader() object
implements all the methods returned by the
necessary for a WSGI app DiskFileOpened.reader() method
iterator implements all the methods
necessary for a WSGI app iterator
+ Note that if the auditor is
refactored to not use the DiskFile
class, see
https://review.openstack.org/44787
then we don't need the
was_quarantined attribute
A reference "in-memory" object server implementation of a backend
DiskFile class in swift/obj/mem_server.py and
swift/obj/mem_diskfile.py.
One can also reference
https://github.com/portante/gluster-swift/commits/diskfile for the
proposed integration with the gluster-swift code based on these
changes.
Change-Id: I44e153fdb405a5743e9c05349008f94136764916
Signed-off-by: Peter Portante <peter.portante@redhat.com>
2013-09-12 19:51:18 -04:00
|
|
|
path = os.path.join(self.disk_file._datadir, timestamp + '.data')
|
|
|
|
mkdirs(self.disk_file._datadir)
|
2013-09-11 22:42:19 -07:00
|
|
|
with open(path, 'w') as f:
|
|
|
|
write_metadata(f, {'name': '/a/c/o'})
|
2013-09-13 13:55:10 -06:00
|
|
|
auditor_worker = auditor.AuditorWorker(self.conf, self.logger)
|
2013-09-11 22:42:19 -07:00
|
|
|
|
|
|
|
def blowup(*args):
|
|
|
|
raise NameError('tpyo')
|
|
|
|
with mock.patch('swift.obj.diskfile.DiskFile',
|
|
|
|
blowup):
|
Alternate DiskFile constructor for efficient auditing.
Before, to audit an object, the auditor:
- calls listdir(object-hash-dir)
- picks out the .data file from the listing
- pulls out all N of its user.swift.metadata* xattrs
- unpickles them
- pulls out the value for 'name'
- splits the name into a/c/o
- then instantiates and opens a DiskFile(a, c, o),
which does the following
- joins a/c/o back into a name
- hashes the name
- calls listdir(object-hash-dir) (AGAIN)
- picks out the .data file (and maybe .meta) from the listing (AGAIN)
- pulls out all N of its user.swift.metadata* xattrs (AGAIN)
- unpickles them (AGAIN)
- starts reading object's contents off disk
Now, the auditor simply locates the hash dir on the filesystem (saving
one listdir) and then hands it off to
DiskFileManager.get_diskfile_from_audit_location, which then
instantiates a DiskFile in a way that lazy-loads the name later
(saving one xattr reading).
As part of this, DiskFile.open() will now quarantine a hash
"directory" that's actually a file. Before, the audit location
generator would skip those, but now they make it clear into
DiskFile(). It's better to quarantine them anyway, as they're not
doing any good the way they are.
Also, removed the was_quarantined attribute on DiskFileReader. Now you
can pass in a quarantine_hook callable to DiskFile.reader() that gets
called if the file was quarantined. Default is to log quarantines, but
otherwise do nothing.
Change-Id: I04fc14569982a17fcc89e00832725ae71009335a
2013-10-28 14:57:18 -07:00
|
|
|
auditor_worker.failsafe_object_audit(
|
|
|
|
AuditLocation(os.path.dirname(path), 'sda', '0'))
|
2013-09-13 13:55:10 -06:00
|
|
|
self.assertEquals(auditor_worker.errors, 1)
|
2013-09-11 22:42:19 -07:00
|
|
|
|
2013-06-25 15:16:35 -04:00
|
|
|
def test_generic_exception_handling(self):
|
2013-09-13 13:55:10 -06:00
|
|
|
auditor_worker = auditor.AuditorWorker(self.conf, self.logger)
|
2013-06-25 15:16:35 -04:00
|
|
|
timestamp = str(normalize_timestamp(time.time()))
|
2013-09-13 13:55:10 -06:00
|
|
|
pre_errors = auditor_worker.errors
|
2013-06-25 15:16:35 -04:00
|
|
|
data = '0' * 1024
|
|
|
|
etag = md5()
|
2013-09-03 10:26:39 -04:00
|
|
|
with self.disk_file.create() as writer:
|
2013-06-25 15:16:35 -04:00
|
|
|
writer.write(data)
|
|
|
|
etag.update(data)
|
|
|
|
etag = etag.hexdigest()
|
|
|
|
metadata = {
|
|
|
|
'ETag': etag,
|
|
|
|
'X-Timestamp': timestamp,
|
DiskFile API, with reference implementation
Refactor on-disk knowledge out of the object server by pushing the
async update pickle creation to the new DiskFileManager class (name is
not the best, so suggestions welcome), along with the REPLICATOR
method logic. We also move the mount checking and thread pool storage
to the new ondisk.Devices object, which then also becomes the new home
of the audit_location_generator method.
For the object server, a new setup() method is now called at the end
of the controller's construction, and the _diskfile() method has been
renamed to get_diskfile(), to allow implementation specific behavior.
We then hide the need for the REST API layer to know how and where
quarantining needs to be performed. There are now two places it is
checked internally, on open() where we verify the content-length,
name, and x-timestamp metadata, and in the reader on close where the
etag metadata is checked if the entire file was read.
We add a reader class to allow implementations to isolate the WSGI
handling code for that specific environment (it is used no-where else
in the REST APIs). This simplifies the caller's code to just use a
"with" statement once open to avoid multiple points where close needs
to be called.
For a full historical comparison, including the usage patterns see:
https://gist.github.com/portante/5488238
(as of master, 2b639f5, Merge
"Fix 500 from account-quota This Commit
middleware")
--------------------------------+------------------------------------
DiskFileManager(conf)
Methods:
.pickle_async_update()
.get_diskfile()
.get_hashes()
Attributes:
.devices
.logger
.disk_chunk_size
.keep_cache_size
.bytes_per_sync
DiskFile(a,c,o,keep_data_fp=) DiskFile(a,c,o)
Methods: Methods:
*.__iter__()
.close(verify_file=)
.is_deleted()
.is_expired()
.quarantine()
.get_data_file_size()
.open()
.read_metadata()
.create() .create()
.write_metadata()
.delete() .delete()
Attributes: Attributes:
.quarantined_dir
.keep_cache
.metadata
*DiskFileReader()
Methods:
.__iter__()
.close()
Attributes:
+.was_quarantined
DiskWriter() DiskFileWriter()
Methods: Methods:
.write() .write()
.put() .put()
* Note that the DiskFile class * Note that the DiskReader() object
implements all the methods returned by the
necessary for a WSGI app DiskFileOpened.reader() method
iterator implements all the methods
necessary for a WSGI app iterator
+ Note that if the auditor is
refactored to not use the DiskFile
class, see
https://review.openstack.org/44787
then we don't need the
was_quarantined attribute
A reference "in-memory" object server implementation of a backend
DiskFile class in swift/obj/mem_server.py and
swift/obj/mem_diskfile.py.
One can also reference
https://github.com/portante/gluster-swift/commits/diskfile for the
proposed integration with the gluster-swift code based on these
changes.
Change-Id: I44e153fdb405a5743e9c05349008f94136764916
Signed-off-by: Peter Portante <peter.portante@redhat.com>
2013-09-12 19:51:18 -04:00
|
|
|
'Content-Length': str(os.fstat(writer._fd).st_size),
|
2013-06-25 15:16:35 -04:00
|
|
|
}
|
|
|
|
writer.put(metadata)
|
2013-07-17 16:32:35 -07:00
|
|
|
with mock.patch('swift.obj.diskfile.DiskFile',
|
2013-06-25 15:16:35 -04:00
|
|
|
lambda *_: 1 / 0):
|
2013-09-13 13:55:10 -06:00
|
|
|
auditor_worker.audit_all_objects()
|
|
|
|
self.assertEquals(auditor_worker.errors, pre_errors + 1)
|
2010-12-28 14:54:00 -08:00
|
|
|
|
2010-12-17 00:27:08 -08:00
|
|
|
def test_object_run_once_pass(self):
|
2013-09-13 13:55:10 -06:00
|
|
|
auditor_worker = auditor.AuditorWorker(self.conf, self.logger)
|
|
|
|
auditor_worker.log_time = 0
|
2010-12-17 00:27:08 -08:00
|
|
|
timestamp = str(normalize_timestamp(time.time()))
|
2013-09-13 13:55:10 -06:00
|
|
|
pre_quarantines = auditor_worker.quarantines
|
2010-12-17 00:27:08 -08:00
|
|
|
data = '0' * 1024
|
|
|
|
etag = md5()
|
2013-09-03 10:26:39 -04:00
|
|
|
with self.disk_file.create() as writer:
|
2013-04-18 20:42:36 -04:00
|
|
|
writer.write(data)
|
2010-12-17 00:27:08 -08:00
|
|
|
etag.update(data)
|
|
|
|
etag = etag.hexdigest()
|
|
|
|
metadata = {
|
|
|
|
'ETag': etag,
|
|
|
|
'X-Timestamp': timestamp,
|
DiskFile API, with reference implementation
Refactor on-disk knowledge out of the object server by pushing the
async update pickle creation to the new DiskFileManager class (name is
not the best, so suggestions welcome), along with the REPLICATOR
method logic. We also move the mount checking and thread pool storage
to the new ondisk.Devices object, which then also becomes the new home
of the audit_location_generator method.
For the object server, a new setup() method is now called at the end
of the controller's construction, and the _diskfile() method has been
renamed to get_diskfile(), to allow implementation specific behavior.
We then hide the need for the REST API layer to know how and where
quarantining needs to be performed. There are now two places it is
checked internally, on open() where we verify the content-length,
name, and x-timestamp metadata, and in the reader on close where the
etag metadata is checked if the entire file was read.
We add a reader class to allow implementations to isolate the WSGI
handling code for that specific environment (it is used no-where else
in the REST APIs). This simplifies the caller's code to just use a
"with" statement once open to avoid multiple points where close needs
to be called.
For a full historical comparison, including the usage patterns see:
https://gist.github.com/portante/5488238
(as of master, 2b639f5, Merge
"Fix 500 from account-quota This Commit
middleware")
--------------------------------+------------------------------------
DiskFileManager(conf)
Methods:
.pickle_async_update()
.get_diskfile()
.get_hashes()
Attributes:
.devices
.logger
.disk_chunk_size
.keep_cache_size
.bytes_per_sync
DiskFile(a,c,o,keep_data_fp=) DiskFile(a,c,o)
Methods: Methods:
*.__iter__()
.close(verify_file=)
.is_deleted()
.is_expired()
.quarantine()
.get_data_file_size()
.open()
.read_metadata()
.create() .create()
.write_metadata()
.delete() .delete()
Attributes: Attributes:
.quarantined_dir
.keep_cache
.metadata
*DiskFileReader()
Methods:
.__iter__()
.close()
Attributes:
+.was_quarantined
DiskWriter() DiskFileWriter()
Methods: Methods:
.write() .write()
.put() .put()
* Note that the DiskFile class * Note that the DiskReader() object
implements all the methods returned by the
necessary for a WSGI app DiskFileOpened.reader() method
iterator implements all the methods
necessary for a WSGI app iterator
+ Note that if the auditor is
refactored to not use the DiskFile
class, see
https://review.openstack.org/44787
then we don't need the
was_quarantined attribute
A reference "in-memory" object server implementation of a backend
DiskFile class in swift/obj/mem_server.py and
swift/obj/mem_diskfile.py.
One can also reference
https://github.com/portante/gluster-swift/commits/diskfile for the
proposed integration with the gluster-swift code based on these
changes.
Change-Id: I44e153fdb405a5743e9c05349008f94136764916
Signed-off-by: Peter Portante <peter.portante@redhat.com>
2013-09-12 19:51:18 -04:00
|
|
|
'Content-Length': str(os.fstat(writer._fd).st_size),
|
2010-12-17 00:27:08 -08:00
|
|
|
}
|
2013-04-18 20:42:36 -04:00
|
|
|
writer.put(metadata)
|
2013-09-13 13:55:10 -06:00
|
|
|
auditor_worker.audit_all_objects()
|
|
|
|
self.assertEquals(auditor_worker.quarantines, pre_quarantines)
|
|
|
|
self.assertEquals(auditor_worker.stats_buckets[1024], 1)
|
|
|
|
self.assertEquals(auditor_worker.stats_buckets[10240], 0)
|
2010-12-17 00:27:08 -08:00
|
|
|
|
2010-12-28 14:54:00 -08:00
|
|
|
def test_object_run_once_no_sda(self):
|
2013-09-13 13:55:10 -06:00
|
|
|
auditor_worker = auditor.AuditorWorker(self.conf, self.logger)
|
2010-12-17 00:27:08 -08:00
|
|
|
timestamp = str(normalize_timestamp(time.time()))
|
2013-09-13 13:55:10 -06:00
|
|
|
pre_quarantines = auditor_worker.quarantines
|
2010-12-17 00:27:08 -08:00
|
|
|
data = '0' * 1024
|
|
|
|
etag = md5()
|
2013-09-03 10:26:39 -04:00
|
|
|
with self.disk_file.create() as writer:
|
2013-04-18 20:42:36 -04:00
|
|
|
writer.write(data)
|
2010-12-17 00:27:08 -08:00
|
|
|
etag.update(data)
|
|
|
|
etag = etag.hexdigest()
|
|
|
|
metadata = {
|
|
|
|
'ETag': etag,
|
|
|
|
'X-Timestamp': timestamp,
|
DiskFile API, with reference implementation
Refactor on-disk knowledge out of the object server by pushing the
async update pickle creation to the new DiskFileManager class (name is
not the best, so suggestions welcome), along with the REPLICATOR
method logic. We also move the mount checking and thread pool storage
to the new ondisk.Devices object, which then also becomes the new home
of the audit_location_generator method.
For the object server, a new setup() method is now called at the end
of the controller's construction, and the _diskfile() method has been
renamed to get_diskfile(), to allow implementation specific behavior.
We then hide the need for the REST API layer to know how and where
quarantining needs to be performed. There are now two places it is
checked internally, on open() where we verify the content-length,
name, and x-timestamp metadata, and in the reader on close where the
etag metadata is checked if the entire file was read.
We add a reader class to allow implementations to isolate the WSGI
handling code for that specific environment (it is used no-where else
in the REST APIs). This simplifies the caller's code to just use a
"with" statement once open to avoid multiple points where close needs
to be called.
For a full historical comparison, including the usage patterns see:
https://gist.github.com/portante/5488238
(as of master, 2b639f5, Merge
"Fix 500 from account-quota This Commit
middleware")
--------------------------------+------------------------------------
DiskFileManager(conf)
Methods:
.pickle_async_update()
.get_diskfile()
.get_hashes()
Attributes:
.devices
.logger
.disk_chunk_size
.keep_cache_size
.bytes_per_sync
DiskFile(a,c,o,keep_data_fp=) DiskFile(a,c,o)
Methods: Methods:
*.__iter__()
.close(verify_file=)
.is_deleted()
.is_expired()
.quarantine()
.get_data_file_size()
.open()
.read_metadata()
.create() .create()
.write_metadata()
.delete() .delete()
Attributes: Attributes:
.quarantined_dir
.keep_cache
.metadata
*DiskFileReader()
Methods:
.__iter__()
.close()
Attributes:
+.was_quarantined
DiskWriter() DiskFileWriter()
Methods: Methods:
.write() .write()
.put() .put()
* Note that the DiskFile class * Note that the DiskReader() object
implements all the methods returned by the
necessary for a WSGI app DiskFileOpened.reader() method
iterator implements all the methods
necessary for a WSGI app iterator
+ Note that if the auditor is
refactored to not use the DiskFile
class, see
https://review.openstack.org/44787
then we don't need the
was_quarantined attribute
A reference "in-memory" object server implementation of a backend
DiskFile class in swift/obj/mem_server.py and
swift/obj/mem_diskfile.py.
One can also reference
https://github.com/portante/gluster-swift/commits/diskfile for the
proposed integration with the gluster-swift code based on these
changes.
Change-Id: I44e153fdb405a5743e9c05349008f94136764916
Signed-off-by: Peter Portante <peter.portante@redhat.com>
2013-09-12 19:51:18 -04:00
|
|
|
'Content-Length': str(os.fstat(writer._fd).st_size),
|
2010-12-17 00:27:08 -08:00
|
|
|
}
|
2013-04-18 20:42:36 -04:00
|
|
|
writer.put(metadata)
|
DiskFile API, with reference implementation
Refactor on-disk knowledge out of the object server by pushing the
async update pickle creation to the new DiskFileManager class (name is
not the best, so suggestions welcome), along with the REPLICATOR
method logic. We also move the mount checking and thread pool storage
to the new ondisk.Devices object, which then also becomes the new home
of the audit_location_generator method.
For the object server, a new setup() method is now called at the end
of the controller's construction, and the _diskfile() method has been
renamed to get_diskfile(), to allow implementation specific behavior.
We then hide the need for the REST API layer to know how and where
quarantining needs to be performed. There are now two places it is
checked internally, on open() where we verify the content-length,
name, and x-timestamp metadata, and in the reader on close where the
etag metadata is checked if the entire file was read.
We add a reader class to allow implementations to isolate the WSGI
handling code for that specific environment (it is used no-where else
in the REST APIs). This simplifies the caller's code to just use a
"with" statement once open to avoid multiple points where close needs
to be called.
For a full historical comparison, including the usage patterns see:
https://gist.github.com/portante/5488238
(as of master, 2b639f5, Merge
"Fix 500 from account-quota This Commit
middleware")
--------------------------------+------------------------------------
DiskFileManager(conf)
Methods:
.pickle_async_update()
.get_diskfile()
.get_hashes()
Attributes:
.devices
.logger
.disk_chunk_size
.keep_cache_size
.bytes_per_sync
DiskFile(a,c,o,keep_data_fp=) DiskFile(a,c,o)
Methods: Methods:
*.__iter__()
.close(verify_file=)
.is_deleted()
.is_expired()
.quarantine()
.get_data_file_size()
.open()
.read_metadata()
.create() .create()
.write_metadata()
.delete() .delete()
Attributes: Attributes:
.quarantined_dir
.keep_cache
.metadata
*DiskFileReader()
Methods:
.__iter__()
.close()
Attributes:
+.was_quarantined
DiskWriter() DiskFileWriter()
Methods: Methods:
.write() .write()
.put() .put()
* Note that the DiskFile class * Note that the DiskReader() object
implements all the methods returned by the
necessary for a WSGI app DiskFileOpened.reader() method
iterator implements all the methods
necessary for a WSGI app iterator
+ Note that if the auditor is
refactored to not use the DiskFile
class, see
https://review.openstack.org/44787
then we don't need the
was_quarantined attribute
A reference "in-memory" object server implementation of a backend
DiskFile class in swift/obj/mem_server.py and
swift/obj/mem_diskfile.py.
One can also reference
https://github.com/portante/gluster-swift/commits/diskfile for the
proposed integration with the gluster-swift code based on these
changes.
Change-Id: I44e153fdb405a5743e9c05349008f94136764916
Signed-off-by: Peter Portante <peter.portante@redhat.com>
2013-09-12 19:51:18 -04:00
|
|
|
os.write(writer._fd, 'extra_data')
|
2013-09-13 13:55:10 -06:00
|
|
|
auditor_worker.audit_all_objects()
|
|
|
|
self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)
|
2010-12-17 00:27:08 -08:00
|
|
|
|
2010-12-28 14:54:00 -08:00
|
|
|
def test_object_run_once_multi_devices(self):
|
2013-09-13 13:55:10 -06:00
|
|
|
auditor_worker = auditor.AuditorWorker(self.conf, self.logger)
|
2010-12-28 14:54:00 -08:00
|
|
|
timestamp = str(normalize_timestamp(time.time()))
|
2013-09-13 13:55:10 -06:00
|
|
|
pre_quarantines = auditor_worker.quarantines
|
2010-12-28 14:54:00 -08:00
|
|
|
data = '0' * 10
|
|
|
|
etag = md5()
|
2013-09-03 10:26:39 -04:00
|
|
|
with self.disk_file.create() as writer:
|
2013-04-18 20:42:36 -04:00
|
|
|
writer.write(data)
|
2010-12-28 14:54:00 -08:00
|
|
|
etag.update(data)
|
|
|
|
etag = etag.hexdigest()
|
|
|
|
metadata = {
|
|
|
|
'ETag': etag,
|
|
|
|
'X-Timestamp': timestamp,
|
DiskFile API, with reference implementation
Refactor on-disk knowledge out of the object server by pushing the
async update pickle creation to the new DiskFileManager class (name is
not the best, so suggestions welcome), along with the REPLICATOR
method logic. We also move the mount checking and thread pool storage
to the new ondisk.Devices object, which then also becomes the new home
of the audit_location_generator method.
For the object server, a new setup() method is now called at the end
of the controller's construction, and the _diskfile() method has been
renamed to get_diskfile(), to allow implementation specific behavior.
We then hide the need for the REST API layer to know how and where
quarantining needs to be performed. There are now two places it is
checked internally, on open() where we verify the content-length,
name, and x-timestamp metadata, and in the reader on close where the
etag metadata is checked if the entire file was read.
We add a reader class to allow implementations to isolate the WSGI
handling code for that specific environment (it is used no-where else
in the REST APIs). This simplifies the caller's code to just use a
"with" statement once open to avoid multiple points where close needs
to be called.
For a full historical comparison, including the usage patterns see:
https://gist.github.com/portante/5488238
(as of master, 2b639f5, Merge
"Fix 500 from account-quota This Commit
middleware")
--------------------------------+------------------------------------
DiskFileManager(conf)
Methods:
.pickle_async_update()
.get_diskfile()
.get_hashes()
Attributes:
.devices
.logger
.disk_chunk_size
.keep_cache_size
.bytes_per_sync
DiskFile(a,c,o,keep_data_fp=) DiskFile(a,c,o)
Methods: Methods:
*.__iter__()
.close(verify_file=)
.is_deleted()
.is_expired()
.quarantine()
.get_data_file_size()
.open()
.read_metadata()
.create() .create()
.write_metadata()
.delete() .delete()
Attributes: Attributes:
.quarantined_dir
.keep_cache
.metadata
*DiskFileReader()
Methods:
.__iter__()
.close()
Attributes:
+.was_quarantined
DiskWriter() DiskFileWriter()
Methods: Methods:
.write() .write()
.put() .put()
* Note that the DiskFile class * Note that the DiskReader() object
implements all the methods returned by the
necessary for a WSGI app DiskFileOpened.reader() method
iterator implements all the methods
necessary for a WSGI app iterator
+ Note that if the auditor is
refactored to not use the DiskFile
class, see
https://review.openstack.org/44787
then we don't need the
was_quarantined attribute
A reference "in-memory" object server implementation of a backend
DiskFile class in swift/obj/mem_server.py and
swift/obj/mem_diskfile.py.
One can also reference
https://github.com/portante/gluster-swift/commits/diskfile for the
proposed integration with the gluster-swift code based on these
changes.
Change-Id: I44e153fdb405a5743e9c05349008f94136764916
Signed-off-by: Peter Portante <peter.portante@redhat.com>
2013-09-12 19:51:18 -04:00
|
|
|
'Content-Length': str(os.fstat(writer._fd).st_size),
|
2010-12-28 14:54:00 -08:00
|
|
|
}
|
2013-04-18 20:42:36 -04:00
|
|
|
writer.put(metadata)
|
2013-09-13 13:55:10 -06:00
|
|
|
auditor_worker.audit_all_objects()
|
DiskFile API, with reference implementation
Refactor on-disk knowledge out of the object server by pushing the
async update pickle creation to the new DiskFileManager class (name is
not the best, so suggestions welcome), along with the REPLICATOR
method logic. We also move the mount checking and thread pool storage
to the new ondisk.Devices object, which then also becomes the new home
of the audit_location_generator method.
For the object server, a new setup() method is now called at the end
of the controller's construction, and the _diskfile() method has been
renamed to get_diskfile(), to allow implementation specific behavior.
We then hide the need for the REST API layer to know how and where
quarantining needs to be performed. There are now two places it is
checked internally, on open() where we verify the content-length,
name, and x-timestamp metadata, and in the reader on close where the
etag metadata is checked if the entire file was read.
We add a reader class to allow implementations to isolate the WSGI
handling code for that specific environment (it is used no-where else
in the REST APIs). This simplifies the caller's code to just use a
"with" statement once open to avoid multiple points where close needs
to be called.
For a full historical comparison, including the usage patterns see:
https://gist.github.com/portante/5488238
(as of master, 2b639f5, Merge
"Fix 500 from account-quota This Commit
middleware")
--------------------------------+------------------------------------
DiskFileManager(conf)
Methods:
.pickle_async_update()
.get_diskfile()
.get_hashes()
Attributes:
.devices
.logger
.disk_chunk_size
.keep_cache_size
.bytes_per_sync
DiskFile(a,c,o,keep_data_fp=) DiskFile(a,c,o)
Methods: Methods:
*.__iter__()
.close(verify_file=)
.is_deleted()
.is_expired()
.quarantine()
.get_data_file_size()
.open()
.read_metadata()
.create() .create()
.write_metadata()
.delete() .delete()
Attributes: Attributes:
.quarantined_dir
.keep_cache
.metadata
*DiskFileReader()
Methods:
.__iter__()
.close()
Attributes:
+.was_quarantined
DiskWriter() DiskFileWriter()
Methods: Methods:
.write() .write()
.put() .put()
* Note that the DiskFile class * Note that the DiskReader() object
implements all the methods returned by the
necessary for a WSGI app DiskFileOpened.reader() method
iterator implements all the methods
necessary for a WSGI app iterator
+ Note that if the auditor is
refactored to not use the DiskFile
class, see
https://review.openstack.org/44787
then we don't need the
was_quarantined attribute
A reference "in-memory" object server implementation of a backend
DiskFile class in swift/obj/mem_server.py and
swift/obj/mem_diskfile.py.
One can also reference
https://github.com/portante/gluster-swift/commits/diskfile for the
proposed integration with the gluster-swift code based on these
changes.
Change-Id: I44e153fdb405a5743e9c05349008f94136764916
Signed-off-by: Peter Portante <peter.portante@redhat.com>
2013-09-12 19:51:18 -04:00
|
|
|
self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'ob')
|
2010-12-28 14:54:00 -08:00
|
|
|
data = '1' * 10
|
|
|
|
etag = md5()
|
2013-09-03 10:26:39 -04:00
|
|
|
with self.disk_file.create() as writer:
|
2013-04-18 20:42:36 -04:00
|
|
|
writer.write(data)
|
2010-12-28 14:54:00 -08:00
|
|
|
etag.update(data)
|
|
|
|
etag = etag.hexdigest()
|
|
|
|
metadata = {
|
|
|
|
'ETag': etag,
|
|
|
|
'X-Timestamp': timestamp,
|
DiskFile API, with reference implementation
Refactor on-disk knowledge out of the object server by pushing the
async update pickle creation to the new DiskFileManager class (name is
not the best, so suggestions welcome), along with the REPLICATOR
method logic. We also move the mount checking and thread pool storage
to the new ondisk.Devices object, which then also becomes the new home
of the audit_location_generator method.
For the object server, a new setup() method is now called at the end
of the controller's construction, and the _diskfile() method has been
renamed to get_diskfile(), to allow implementation specific behavior.
We then hide the need for the REST API layer to know how and where
quarantining needs to be performed. There are now two places it is
checked internally, on open() where we verify the content-length,
name, and x-timestamp metadata, and in the reader on close where the
etag metadata is checked if the entire file was read.
We add a reader class to allow implementations to isolate the WSGI
handling code for that specific environment (it is used no-where else
in the REST APIs). This simplifies the caller's code to just use a
"with" statement once open to avoid multiple points where close needs
to be called.
For a full historical comparison, including the usage patterns see:
https://gist.github.com/portante/5488238
(as of master, 2b639f5, Merge
"Fix 500 from account-quota This Commit
middleware")
--------------------------------+------------------------------------
DiskFileManager(conf)
Methods:
.pickle_async_update()
.get_diskfile()
.get_hashes()
Attributes:
.devices
.logger
.disk_chunk_size
.keep_cache_size
.bytes_per_sync
DiskFile(a,c,o,keep_data_fp=) DiskFile(a,c,o)
Methods: Methods:
*.__iter__()
.close(verify_file=)
.is_deleted()
.is_expired()
.quarantine()
.get_data_file_size()
.open()
.read_metadata()
.create() .create()
.write_metadata()
.delete() .delete()
Attributes: Attributes:
.quarantined_dir
.keep_cache
.metadata
*DiskFileReader()
Methods:
.__iter__()
.close()
Attributes:
+.was_quarantined
DiskWriter() DiskFileWriter()
Methods: Methods:
.write() .write()
.put() .put()
* Note that the DiskFile class * Note that the DiskReader() object
implements all the methods returned by the
necessary for a WSGI app DiskFileOpened.reader() method
iterator implements all the methods
necessary for a WSGI app iterator
+ Note that if the auditor is
refactored to not use the DiskFile
class, see
https://review.openstack.org/44787
then we don't need the
was_quarantined attribute
A reference "in-memory" object server implementation of a backend
DiskFile class in swift/obj/mem_server.py and
swift/obj/mem_diskfile.py.
One can also reference
https://github.com/portante/gluster-swift/commits/diskfile for the
proposed integration with the gluster-swift code based on these
changes.
Change-Id: I44e153fdb405a5743e9c05349008f94136764916
Signed-off-by: Peter Portante <peter.portante@redhat.com>
2013-09-12 19:51:18 -04:00
|
|
|
'Content-Length': str(os.fstat(writer._fd).st_size),
|
2010-12-28 14:54:00 -08:00
|
|
|
}
|
2013-04-18 20:42:36 -04:00
|
|
|
writer.put(metadata)
|
DiskFile API, with reference implementation
Refactor on-disk knowledge out of the object server by pushing the
async update pickle creation to the new DiskFileManager class (name is
not the best, so suggestions welcome), along with the REPLICATOR
method logic. We also move the mount checking and thread pool storage
to the new ondisk.Devices object, which then also becomes the new home
of the audit_location_generator method.
For the object server, a new setup() method is now called at the end
of the controller's construction, and the _diskfile() method has been
renamed to get_diskfile(), to allow implementation specific behavior.
We then hide the need for the REST API layer to know how and where
quarantining needs to be performed. There are now two places it is
checked internally, on open() where we verify the content-length,
name, and x-timestamp metadata, and in the reader on close where the
etag metadata is checked if the entire file was read.
We add a reader class to allow implementations to isolate the WSGI
handling code for that specific environment (it is used no-where else
in the REST APIs). This simplifies the caller's code to just use a
"with" statement once open to avoid multiple points where close needs
to be called.
For a full historical comparison, including the usage patterns see:
https://gist.github.com/portante/5488238
(as of master, 2b639f5, Merge
"Fix 500 from account-quota This Commit
middleware")
--------------------------------+------------------------------------
DiskFileManager(conf)
Methods:
.pickle_async_update()
.get_diskfile()
.get_hashes()
Attributes:
.devices
.logger
.disk_chunk_size
.keep_cache_size
.bytes_per_sync
DiskFile(a,c,o,keep_data_fp=) DiskFile(a,c,o)
Methods: Methods:
*.__iter__()
.close(verify_file=)
.is_deleted()
.is_expired()
.quarantine()
.get_data_file_size()
.open()
.read_metadata()
.create() .create()
.write_metadata()
.delete() .delete()
Attributes: Attributes:
.quarantined_dir
.keep_cache
.metadata
*DiskFileReader()
Methods:
.__iter__()
.close()
Attributes:
+.was_quarantined
DiskWriter() DiskFileWriter()
Methods: Methods:
.write() .write()
.put() .put()
* Note that the DiskFile class * Note that the DiskReader() object
implements all the methods returned by the
necessary for a WSGI app DiskFileOpened.reader() method
iterator implements all the methods
necessary for a WSGI app iterator
+ Note that if the auditor is
refactored to not use the DiskFile
class, see
https://review.openstack.org/44787
then we don't need the
was_quarantined attribute
A reference "in-memory" object server implementation of a backend
DiskFile class in swift/obj/mem_server.py and
swift/obj/mem_diskfile.py.
One can also reference
https://github.com/portante/gluster-swift/commits/diskfile for the
proposed integration with the gluster-swift code based on these
changes.
Change-Id: I44e153fdb405a5743e9c05349008f94136764916
Signed-off-by: Peter Portante <peter.portante@redhat.com>
2013-09-12 19:51:18 -04:00
|
|
|
os.write(writer._fd, 'extra_data')
|
2013-09-13 13:55:10 -06:00
|
|
|
auditor_worker.audit_all_objects()
|
|
|
|
self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)
|
2010-12-28 14:54:00 -08:00
|
|
|
|
2011-02-14 20:25:40 +00:00
|
|
|
def test_object_run_fast_track_non_zero(self):
|
|
|
|
self.auditor = auditor.ObjectAuditor(self.conf)
|
|
|
|
self.auditor.log_time = 0
|
|
|
|
data = '0' * 1024
|
|
|
|
etag = md5()
|
2013-09-03 10:26:39 -04:00
|
|
|
with self.disk_file.create() as writer:
|
2013-04-18 20:42:36 -04:00
|
|
|
writer.write(data)
|
2011-02-14 20:25:40 +00:00
|
|
|
etag.update(data)
|
|
|
|
etag = etag.hexdigest()
|
|
|
|
metadata = {
|
|
|
|
'ETag': etag,
|
|
|
|
'X-Timestamp': str(normalize_timestamp(time.time())),
|
DiskFile API, with reference implementation
Refactor on-disk knowledge out of the object server by pushing the
async update pickle creation to the new DiskFileManager class (name is
not the best, so suggestions welcome), along with the REPLICATOR
method logic. We also move the mount checking and thread pool storage
to the new ondisk.Devices object, which then also becomes the new home
of the audit_location_generator method.
For the object server, a new setup() method is now called at the end
of the controller's construction, and the _diskfile() method has been
renamed to get_diskfile(), to allow implementation specific behavior.
We then hide the need for the REST API layer to know how and where
quarantining needs to be performed. There are now two places it is
checked internally, on open() where we verify the content-length,
name, and x-timestamp metadata, and in the reader on close where the
etag metadata is checked if the entire file was read.
We add a reader class to allow implementations to isolate the WSGI
handling code for that specific environment (it is used no-where else
in the REST APIs). This simplifies the caller's code to just use a
"with" statement once open to avoid multiple points where close needs
to be called.
For a full historical comparison, including the usage patterns see:
https://gist.github.com/portante/5488238
(as of master, 2b639f5, Merge
"Fix 500 from account-quota This Commit
middleware")
--------------------------------+------------------------------------
DiskFileManager(conf)
Methods:
.pickle_async_update()
.get_diskfile()
.get_hashes()
Attributes:
.devices
.logger
.disk_chunk_size
.keep_cache_size
.bytes_per_sync
DiskFile(a,c,o,keep_data_fp=) DiskFile(a,c,o)
Methods: Methods:
*.__iter__()
.close(verify_file=)
.is_deleted()
.is_expired()
.quarantine()
.get_data_file_size()
.open()
.read_metadata()
.create() .create()
.write_metadata()
.delete() .delete()
Attributes: Attributes:
.quarantined_dir
.keep_cache
.metadata
*DiskFileReader()
Methods:
.__iter__()
.close()
Attributes:
+.was_quarantined
DiskWriter() DiskFileWriter()
Methods: Methods:
.write() .write()
.put() .put()
* Note that the DiskFile class * Note that the DiskReader() object
implements all the methods returned by the
necessary for a WSGI app DiskFileOpened.reader() method
iterator implements all the methods
necessary for a WSGI app iterator
+ Note that if the auditor is
refactored to not use the DiskFile
class, see
https://review.openstack.org/44787
then we don't need the
was_quarantined attribute
A reference "in-memory" object server implementation of a backend
DiskFile class in swift/obj/mem_server.py and
swift/obj/mem_diskfile.py.
One can also reference
https://github.com/portante/gluster-swift/commits/diskfile for the
proposed integration with the gluster-swift code based on these
changes.
Change-Id: I44e153fdb405a5743e9c05349008f94136764916
Signed-off-by: Peter Portante <peter.portante@redhat.com>
2013-09-12 19:51:18 -04:00
|
|
|
'Content-Length': str(os.fstat(writer._fd).st_size),
|
2011-02-14 20:25:40 +00:00
|
|
|
}
|
2013-04-18 20:42:36 -04:00
|
|
|
writer.put(metadata)
|
2011-02-14 20:25:40 +00:00
|
|
|
etag = md5()
|
|
|
|
etag.update('1' + '0' * 1023)
|
|
|
|
etag = etag.hexdigest()
|
|
|
|
metadata['ETag'] = etag
|
DiskFile API, with reference implementation
Refactor on-disk knowledge out of the object server by pushing the
async update pickle creation to the new DiskFileManager class (name is
not the best, so suggestions welcome), along with the REPLICATOR
method logic. We also move the mount checking and thread pool storage
to the new ondisk.Devices object, which then also becomes the new home
of the audit_location_generator method.
For the object server, a new setup() method is now called at the end
of the controller's construction, and the _diskfile() method has been
renamed to get_diskfile(), to allow implementation specific behavior.
We then hide the need for the REST API layer to know how and where
quarantining needs to be performed. There are now two places it is
checked internally, on open() where we verify the content-length,
name, and x-timestamp metadata, and in the reader on close where the
etag metadata is checked if the entire file was read.
We add a reader class to allow implementations to isolate the WSGI
handling code for that specific environment (it is used no-where else
in the REST APIs). This simplifies the caller's code to just use a
"with" statement once open to avoid multiple points where close needs
to be called.
For a full historical comparison, including the usage patterns see:
https://gist.github.com/portante/5488238
(as of master, 2b639f5, Merge
"Fix 500 from account-quota This Commit
middleware")
--------------------------------+------------------------------------
DiskFileManager(conf)
Methods:
.pickle_async_update()
.get_diskfile()
.get_hashes()
Attributes:
.devices
.logger
.disk_chunk_size
.keep_cache_size
.bytes_per_sync
DiskFile(a,c,o,keep_data_fp=) DiskFile(a,c,o)
Methods: Methods:
*.__iter__()
.close(verify_file=)
.is_deleted()
.is_expired()
.quarantine()
.get_data_file_size()
.open()
.read_metadata()
.create() .create()
.write_metadata()
.delete() .delete()
Attributes: Attributes:
.quarantined_dir
.keep_cache
.metadata
*DiskFileReader()
Methods:
.__iter__()
.close()
Attributes:
+.was_quarantined
DiskWriter() DiskFileWriter()
Methods: Methods:
.write() .write()
.put() .put()
* Note that the DiskFile class * Note that the DiskReader() object
implements all the methods returned by the
necessary for a WSGI app DiskFileOpened.reader() method
iterator implements all the methods
necessary for a WSGI app iterator
+ Note that if the auditor is
refactored to not use the DiskFile
class, see
https://review.openstack.org/44787
then we don't need the
was_quarantined attribute
A reference "in-memory" object server implementation of a backend
DiskFile class in swift/obj/mem_server.py and
swift/obj/mem_diskfile.py.
One can also reference
https://github.com/portante/gluster-swift/commits/diskfile for the
proposed integration with the gluster-swift code based on these
changes.
Change-Id: I44e153fdb405a5743e9c05349008f94136764916
Signed-off-by: Peter Portante <peter.portante@redhat.com>
2013-09-12 19:51:18 -04:00
|
|
|
write_metadata(writer._fd, metadata)
|
2011-02-14 20:25:40 +00:00
|
|
|
|
|
|
|
quarantine_path = os.path.join(self.devices,
|
|
|
|
'sda', 'quarantined', 'objects')
|
2011-02-21 16:37:12 -08:00
|
|
|
self.auditor.run_once(zero_byte_fps=50)
|
2011-02-14 20:25:40 +00:00
|
|
|
self.assertFalse(os.path.isdir(quarantine_path))
|
|
|
|
self.auditor.run_once()
|
|
|
|
self.assertTrue(os.path.isdir(quarantine_path))
|
|
|
|
|
2011-02-24 12:27:20 -08:00
|
|
|
def setup_bad_zero_byte(self, with_ts=False):
|
2011-02-14 20:25:40 +00:00
|
|
|
self.auditor = auditor.ObjectAuditor(self.conf)
|
|
|
|
self.auditor.log_time = 0
|
2011-02-24 12:27:20 -08:00
|
|
|
ts_file_path = ''
|
|
|
|
if with_ts:
|
|
|
|
name_hash = hash_path('a', 'c', 'o')
|
2013-08-31 22:36:58 -04:00
|
|
|
dir_path = os.path.join(
|
|
|
|
self.devices, 'sda',
|
|
|
|
storage_directory(DATADIR, '0', name_hash))
|
2011-02-24 12:27:20 -08:00
|
|
|
ts_file_path = os.path.join(dir_path, '99999.ts')
|
|
|
|
if not os.path.exists(dir_path):
|
|
|
|
mkdirs(dir_path)
|
|
|
|
fp = open(ts_file_path, 'w')
|
Alternate DiskFile constructor for efficient auditing.
Before, to audit an object, the auditor:
- calls listdir(object-hash-dir)
- picks out the .data file from the listing
- pulls out all N of its user.swift.metadata* xattrs
- unpickles them
- pulls out the value for 'name'
- splits the name into a/c/o
- then instantiates and opens a DiskFile(a, c, o),
which does the following
- joins a/c/o back into a name
- hashes the name
- calls listdir(object-hash-dir) (AGAIN)
- picks out the .data file (and maybe .meta) from the listing (AGAIN)
- pulls out all N of its user.swift.metadata* xattrs (AGAIN)
- unpickles them (AGAIN)
- starts reading object's contents off disk
Now, the auditor simply locates the hash dir on the filesystem (saving
one listdir) and then hands it off to
DiskFileManager.get_diskfile_from_audit_location, which then
instantiates a DiskFile in a way that lazy-loads the name later
(saving one xattr reading).
As part of this, DiskFile.open() will now quarantine a hash
"directory" that's actually a file. Before, the audit location
generator would skip those, but now they make it clear into
DiskFile(). It's better to quarantine them anyway, as they're not
doing any good the way they are.
Also, removed the was_quarantined attribute on DiskFileReader. Now you
can pass in a quarantine_hook callable to DiskFile.reader() that gets
called if the file was quarantined. Default is to log quarantines, but
otherwise do nothing.
Change-Id: I04fc14569982a17fcc89e00832725ae71009335a
2013-10-28 14:57:18 -07:00
|
|
|
write_metadata(fp, {'X-Timestamp': '99999', 'name': '/a/c/o'})
|
2011-02-24 12:27:20 -08:00
|
|
|
fp.close()
|
|
|
|
|
2011-02-14 20:25:40 +00:00
|
|
|
etag = md5()
|
2013-09-03 10:26:39 -04:00
|
|
|
with self.disk_file.create() as writer:
|
2011-02-14 20:25:40 +00:00
|
|
|
etag = etag.hexdigest()
|
|
|
|
metadata = {
|
|
|
|
'ETag': etag,
|
|
|
|
'X-Timestamp': str(normalize_timestamp(time.time())),
|
|
|
|
'Content-Length': 10,
|
|
|
|
}
|
2013-04-18 20:42:36 -04:00
|
|
|
writer.put(metadata)
|
2011-02-14 20:25:40 +00:00
|
|
|
etag = md5()
|
|
|
|
etag = etag.hexdigest()
|
|
|
|
metadata['ETag'] = etag
|
DiskFile API, with reference implementation
Refactor on-disk knowledge out of the object server by pushing the
async update pickle creation to the new DiskFileManager class (name is
not the best, so suggestions welcome), along with the REPLICATOR
method logic. We also move the mount checking and thread pool storage
to the new ondisk.Devices object, which then also becomes the new home
of the audit_location_generator method.
For the object server, a new setup() method is now called at the end
of the controller's construction, and the _diskfile() method has been
renamed to get_diskfile(), to allow implementation specific behavior.
We then hide the need for the REST API layer to know how and where
quarantining needs to be performed. There are now two places it is
checked internally, on open() where we verify the content-length,
name, and x-timestamp metadata, and in the reader on close where the
etag metadata is checked if the entire file was read.
We add a reader class to allow implementations to isolate the WSGI
handling code for that specific environment (it is used no-where else
in the REST APIs). This simplifies the caller's code to just use a
"with" statement once open to avoid multiple points where close needs
to be called.
For a full historical comparison, including the usage patterns see:
https://gist.github.com/portante/5488238
(as of master, 2b639f5, Merge
"Fix 500 from account-quota This Commit
middleware")
--------------------------------+------------------------------------
DiskFileManager(conf)
Methods:
.pickle_async_update()
.get_diskfile()
.get_hashes()
Attributes:
.devices
.logger
.disk_chunk_size
.keep_cache_size
.bytes_per_sync
DiskFile(a,c,o,keep_data_fp=) DiskFile(a,c,o)
Methods: Methods:
*.__iter__()
.close(verify_file=)
.is_deleted()
.is_expired()
.quarantine()
.get_data_file_size()
.open()
.read_metadata()
.create() .create()
.write_metadata()
.delete() .delete()
Attributes: Attributes:
.quarantined_dir
.keep_cache
.metadata
*DiskFileReader()
Methods:
.__iter__()
.close()
Attributes:
+.was_quarantined
DiskWriter() DiskFileWriter()
Methods: Methods:
.write() .write()
.put() .put()
* Note that the DiskFile class * Note that the DiskReader() object
implements all the methods returned by the
necessary for a WSGI app DiskFileOpened.reader() method
iterator implements all the methods
necessary for a WSGI app iterator
+ Note that if the auditor is
refactored to not use the DiskFile
class, see
https://review.openstack.org/44787
then we don't need the
was_quarantined attribute
A reference "in-memory" object server implementation of a backend
DiskFile class in swift/obj/mem_server.py and
swift/obj/mem_diskfile.py.
One can also reference
https://github.com/portante/gluster-swift/commits/diskfile for the
proposed integration with the gluster-swift code based on these
changes.
Change-Id: I44e153fdb405a5743e9c05349008f94136764916
Signed-off-by: Peter Portante <peter.portante@redhat.com>
2013-09-12 19:51:18 -04:00
|
|
|
write_metadata(writer._fd, metadata)
|
2011-02-24 12:27:20 -08:00
|
|
|
return ts_file_path
|
2011-02-21 16:37:12 -08:00
|
|
|
|
|
|
|
def test_object_run_fast_track_all(self):
|
|
|
|
self.setup_bad_zero_byte()
|
|
|
|
self.auditor.run_once()
|
2011-02-14 20:25:40 +00:00
|
|
|
quarantine_path = os.path.join(self.devices,
|
|
|
|
'sda', 'quarantined', 'objects')
|
|
|
|
self.assertTrue(os.path.isdir(quarantine_path))
|
|
|
|
|
2011-02-21 16:37:12 -08:00
|
|
|
def test_object_run_fast_track_zero(self):
|
|
|
|
self.setup_bad_zero_byte()
|
|
|
|
self.auditor.run_once(zero_byte_fps=50)
|
|
|
|
quarantine_path = os.path.join(self.devices,
|
|
|
|
'sda', 'quarantined', 'objects')
|
|
|
|
self.assertTrue(os.path.isdir(quarantine_path))
|
2010-07-12 17:03:45 -05:00
|
|
|
|
2011-08-30 14:29:19 -07:00
|
|
|
def test_object_run_fast_track_zero_check_closed(self):
|
|
|
|
rat = [False]
|
|
|
|
|
|
|
|
class FakeFile(DiskFile):
|
|
|
|
|
DiskFile API, with reference implementation
Refactor on-disk knowledge out of the object server by pushing the
async update pickle creation to the new DiskFileManager class (name is
not the best, so suggestions welcome), along with the REPLICATOR
method logic. We also move the mount checking and thread pool storage
to the new ondisk.Devices object, which then also becomes the new home
of the audit_location_generator method.
For the object server, a new setup() method is now called at the end
of the controller's construction, and the _diskfile() method has been
renamed to get_diskfile(), to allow implementation specific behavior.
We then hide the need for the REST API layer to know how and where
quarantining needs to be performed. There are now two places it is
checked internally, on open() where we verify the content-length,
name, and x-timestamp metadata, and in the reader on close where the
etag metadata is checked if the entire file was read.
We add a reader class to allow implementations to isolate the WSGI
handling code for that specific environment (it is used no-where else
in the REST APIs). This simplifies the caller's code to just use a
"with" statement once open to avoid multiple points where close needs
to be called.
For a full historical comparison, including the usage patterns see:
https://gist.github.com/portante/5488238
(as of master, 2b639f5, Merge
"Fix 500 from account-quota This Commit
middleware")
--------------------------------+------------------------------------
DiskFileManager(conf)
Methods:
.pickle_async_update()
.get_diskfile()
.get_hashes()
Attributes:
.devices
.logger
.disk_chunk_size
.keep_cache_size
.bytes_per_sync
DiskFile(a,c,o,keep_data_fp=) DiskFile(a,c,o)
Methods: Methods:
*.__iter__()
.close(verify_file=)
.is_deleted()
.is_expired()
.quarantine()
.get_data_file_size()
.open()
.read_metadata()
.create() .create()
.write_metadata()
.delete() .delete()
Attributes: Attributes:
.quarantined_dir
.keep_cache
.metadata
*DiskFileReader()
Methods:
.__iter__()
.close()
Attributes:
+.was_quarantined
DiskWriter() DiskFileWriter()
Methods: Methods:
.write() .write()
.put() .put()
* Note that the DiskFile class * Note that the DiskReader() object
implements all the methods returned by the
necessary for a WSGI app DiskFileOpened.reader() method
iterator implements all the methods
necessary for a WSGI app iterator
+ Note that if the auditor is
refactored to not use the DiskFile
class, see
https://review.openstack.org/44787
then we don't need the
was_quarantined attribute
A reference "in-memory" object server implementation of a backend
DiskFile class in swift/obj/mem_server.py and
swift/obj/mem_diskfile.py.
One can also reference
https://github.com/portante/gluster-swift/commits/diskfile for the
proposed integration with the gluster-swift code based on these
changes.
Change-Id: I44e153fdb405a5743e9c05349008f94136764916
Signed-off-by: Peter Portante <peter.portante@redhat.com>
2013-09-12 19:51:18 -04:00
|
|
|
def _quarantine(self, data_file, msg):
|
2011-08-30 14:29:19 -07:00
|
|
|
rat[0] = True
|
DiskFile API, with reference implementation
Refactor on-disk knowledge out of the object server by pushing the
async update pickle creation to the new DiskFileManager class (name is
not the best, so suggestions welcome), along with the REPLICATOR
method logic. We also move the mount checking and thread pool storage
to the new ondisk.Devices object, which then also becomes the new home
of the audit_location_generator method.
For the object server, a new setup() method is now called at the end
of the controller's construction, and the _diskfile() method has been
renamed to get_diskfile(), to allow implementation specific behavior.
We then hide the need for the REST API layer to know how and where
quarantining needs to be performed. There are now two places it is
checked internally, on open() where we verify the content-length,
name, and x-timestamp metadata, and in the reader on close where the
etag metadata is checked if the entire file was read.
We add a reader class to allow implementations to isolate the WSGI
handling code for that specific environment (it is used no-where else
in the REST APIs). This simplifies the caller's code to just use a
"with" statement once open to avoid multiple points where close needs
to be called.
For a full historical comparison, including the usage patterns see:
https://gist.github.com/portante/5488238
(as of master, 2b639f5, Merge
"Fix 500 from account-quota This Commit
middleware")
--------------------------------+------------------------------------
DiskFileManager(conf)
Methods:
.pickle_async_update()
.get_diskfile()
.get_hashes()
Attributes:
.devices
.logger
.disk_chunk_size
.keep_cache_size
.bytes_per_sync
DiskFile(a,c,o,keep_data_fp=) DiskFile(a,c,o)
Methods: Methods:
*.__iter__()
.close(verify_file=)
.is_deleted()
.is_expired()
.quarantine()
.get_data_file_size()
.open()
.read_metadata()
.create() .create()
.write_metadata()
.delete() .delete()
Attributes: Attributes:
.quarantined_dir
.keep_cache
.metadata
*DiskFileReader()
Methods:
.__iter__()
.close()
Attributes:
+.was_quarantined
DiskWriter() DiskFileWriter()
Methods: Methods:
.write() .write()
.put() .put()
* Note that the DiskFile class * Note that the DiskReader() object
implements all the methods returned by the
necessary for a WSGI app DiskFileOpened.reader() method
iterator implements all the methods
necessary for a WSGI app iterator
+ Note that if the auditor is
refactored to not use the DiskFile
class, see
https://review.openstack.org/44787
then we don't need the
was_quarantined attribute
A reference "in-memory" object server implementation of a backend
DiskFile class in swift/obj/mem_server.py and
swift/obj/mem_diskfile.py.
One can also reference
https://github.com/portante/gluster-swift/commits/diskfile for the
proposed integration with the gluster-swift code based on these
changes.
Change-Id: I44e153fdb405a5743e9c05349008f94136764916
Signed-off-by: Peter Portante <peter.portante@redhat.com>
2013-09-12 19:51:18 -04:00
|
|
|
DiskFile._quarantine(self, data_file, msg)
|
|
|
|
|
2011-08-30 14:29:19 -07:00
|
|
|
self.setup_bad_zero_byte()
|
2013-07-17 16:32:35 -07:00
|
|
|
was_df = auditor.diskfile.DiskFile
|
2011-08-31 07:28:36 -07:00
|
|
|
try:
|
2013-07-17 16:32:35 -07:00
|
|
|
auditor.diskfile.DiskFile = FakeFile
|
2011-08-31 07:28:36 -07:00
|
|
|
self.auditor.run_once(zero_byte_fps=50)
|
|
|
|
quarantine_path = os.path.join(self.devices,
|
|
|
|
'sda', 'quarantined', 'objects')
|
|
|
|
self.assertTrue(os.path.isdir(quarantine_path))
|
|
|
|
self.assertTrue(rat[0])
|
|
|
|
finally:
|
2013-07-17 16:32:35 -07:00
|
|
|
auditor.diskfile.DiskFile = was_df
|
2011-08-30 14:29:19 -07:00
|
|
|
|
DiskFile API, with reference implementation
Refactor on-disk knowledge out of the object server by pushing the
async update pickle creation to the new DiskFileManager class (name is
not the best, so suggestions welcome), along with the REPLICATOR
method logic. We also move the mount checking and thread pool storage
to the new ondisk.Devices object, which then also becomes the new home
of the audit_location_generator method.
For the object server, a new setup() method is now called at the end
of the controller's construction, and the _diskfile() method has been
renamed to get_diskfile(), to allow implementation specific behavior.
We then hide the need for the REST API layer to know how and where
quarantining needs to be performed. There are now two places it is
checked internally, on open() where we verify the content-length,
name, and x-timestamp metadata, and in the reader on close where the
etag metadata is checked if the entire file was read.
We add a reader class to allow implementations to isolate the WSGI
handling code for that specific environment (it is used no-where else
in the REST APIs). This simplifies the caller's code to just use a
"with" statement once open to avoid multiple points where close needs
to be called.
For a full historical comparison, including the usage patterns see:
https://gist.github.com/portante/5488238
(as of master, 2b639f5, Merge
"Fix 500 from account-quota This Commit
middleware")
--------------------------------+------------------------------------
DiskFileManager(conf)
Methods:
.pickle_async_update()
.get_diskfile()
.get_hashes()
Attributes:
.devices
.logger
.disk_chunk_size
.keep_cache_size
.bytes_per_sync
DiskFile(a,c,o,keep_data_fp=) DiskFile(a,c,o)
Methods: Methods:
*.__iter__()
.close(verify_file=)
.is_deleted()
.is_expired()
.quarantine()
.get_data_file_size()
.open()
.read_metadata()
.create() .create()
.write_metadata()
.delete() .delete()
Attributes: Attributes:
.quarantined_dir
.keep_cache
.metadata
*DiskFileReader()
Methods:
.__iter__()
.close()
Attributes:
+.was_quarantined
DiskWriter() DiskFileWriter()
Methods: Methods:
.write() .write()
.put() .put()
* Note that the DiskFile class * Note that the DiskReader() object
implements all the methods returned by the
necessary for a WSGI app DiskFileOpened.reader() method
iterator implements all the methods
necessary for a WSGI app iterator
+ Note that if the auditor is
refactored to not use the DiskFile
class, see
https://review.openstack.org/44787
then we don't need the
was_quarantined attribute
A reference "in-memory" object server implementation of a backend
DiskFile class in swift/obj/mem_server.py and
swift/obj/mem_diskfile.py.
One can also reference
https://github.com/portante/gluster-swift/commits/diskfile for the
proposed integration with the gluster-swift code based on these
changes.
Change-Id: I44e153fdb405a5743e9c05349008f94136764916
Signed-off-by: Peter Portante <peter.portante@redhat.com>
2013-09-12 19:51:18 -04:00
|
|
|
def test_with_tombstone(self):
|
|
|
|
ts_file_path = self.setup_bad_zero_byte(with_ts=True)
|
|
|
|
self.assertTrue(ts_file_path.endswith('ts'))
|
Alternate DiskFile constructor for efficient auditing.
Before, to audit an object, the auditor:
- calls listdir(object-hash-dir)
- picks out the .data file from the listing
- pulls out all N of its user.swift.metadata* xattrs
- unpickles them
- pulls out the value for 'name'
- splits the name into a/c/o
- then instantiates and opens a DiskFile(a, c, o),
which does the following
- joins a/c/o back into a name
- hashes the name
- calls listdir(object-hash-dir) (AGAIN)
- picks out the .data file (and maybe .meta) from the listing (AGAIN)
- pulls out all N of its user.swift.metadata* xattrs (AGAIN)
- unpickles them (AGAIN)
- starts reading object's contents off disk
Now, the auditor simply locates the hash dir on the filesystem (saving
one listdir) and then hands it off to
DiskFileManager.get_diskfile_from_audit_location, which then
instantiates a DiskFile in a way that lazy-loads the name later
(saving one xattr reading).
As part of this, DiskFile.open() will now quarantine a hash
"directory" that's actually a file. Before, the audit location
generator would skip those, but now they make it clear into
DiskFile(). It's better to quarantine them anyway, as they're not
doing any good the way they are.
Also, removed the was_quarantined attribute on DiskFileReader. Now you
can pass in a quarantine_hook callable to DiskFile.reader() that gets
called if the file was quarantined. Default is to log quarantines, but
otherwise do nothing.
Change-Id: I04fc14569982a17fcc89e00832725ae71009335a
2013-10-28 14:57:18 -07:00
|
|
|
self.auditor.run_once()
|
DiskFile API, with reference implementation
Refactor on-disk knowledge out of the object server by pushing the
async update pickle creation to the new DiskFileManager class (name is
not the best, so suggestions welcome), along with the REPLICATOR
method logic. We also move the mount checking and thread pool storage
to the new ondisk.Devices object, which then also becomes the new home
of the audit_location_generator method.
For the object server, a new setup() method is now called at the end
of the controller's construction, and the _diskfile() method has been
renamed to get_diskfile(), to allow implementation specific behavior.
We then hide the need for the REST API layer to know how and where
quarantining needs to be performed. There are now two places it is
checked internally, on open() where we verify the content-length,
name, and x-timestamp metadata, and in the reader on close where the
etag metadata is checked if the entire file was read.
We add a reader class to allow implementations to isolate the WSGI
handling code for that specific environment (it is used no-where else
in the REST APIs). This simplifies the caller's code to just use a
"with" statement once open to avoid multiple points where close needs
to be called.
For a full historical comparison, including the usage patterns see:
https://gist.github.com/portante/5488238
(as of master, 2b639f5, Merge
"Fix 500 from account-quota This Commit
middleware")
--------------------------------+------------------------------------
DiskFileManager(conf)
Methods:
.pickle_async_update()
.get_diskfile()
.get_hashes()
Attributes:
.devices
.logger
.disk_chunk_size
.keep_cache_size
.bytes_per_sync
DiskFile(a,c,o,keep_data_fp=) DiskFile(a,c,o)
Methods: Methods:
*.__iter__()
.close(verify_file=)
.is_deleted()
.is_expired()
.quarantine()
.get_data_file_size()
.open()
.read_metadata()
.create() .create()
.write_metadata()
.delete() .delete()
Attributes: Attributes:
.quarantined_dir
.keep_cache
.metadata
*DiskFileReader()
Methods:
.__iter__()
.close()
Attributes:
+.was_quarantined
DiskWriter() DiskFileWriter()
Methods: Methods:
.write() .write()
.put() .put()
* Note that the DiskFile class * Note that the DiskReader() object
implements all the methods returned by the
necessary for a WSGI app DiskFileOpened.reader() method
iterator implements all the methods
necessary for a WSGI app iterator
+ Note that if the auditor is
refactored to not use the DiskFile
class, see
https://review.openstack.org/44787
then we don't need the
was_quarantined attribute
A reference "in-memory" object server implementation of a backend
DiskFile class in swift/obj/mem_server.py and
swift/obj/mem_diskfile.py.
One can also reference
https://github.com/portante/gluster-swift/commits/diskfile for the
proposed integration with the gluster-swift code based on these
changes.
Change-Id: I44e153fdb405a5743e9c05349008f94136764916
Signed-off-by: Peter Portante <peter.portante@redhat.com>
2013-09-12 19:51:18 -04:00
|
|
|
self.assertTrue(os.path.exists(ts_file_path))
|
|
|
|
|
|
|
|
def test_sleeper(self):
|
|
|
|
auditor.SLEEP_BETWEEN_AUDITS = 0.10
|
|
|
|
my_auditor = auditor.ObjectAuditor(self.conf)
|
|
|
|
start = time.time()
|
|
|
|
my_auditor._sleep()
|
|
|
|
delta_t = time.time() - start
|
|
|
|
self.assert_(delta_t > 0.08)
|
|
|
|
self.assert_(delta_t < 0.12)
|
|
|
|
|
2011-02-24 12:27:20 -08:00
|
|
|
def test_run_forever(self):
|
|
|
|
|
|
|
|
class StopForever(Exception):
|
|
|
|
pass
|
|
|
|
|
|
|
|
class ObjectAuditorMock(object):
|
|
|
|
check_args = ()
|
|
|
|
check_kwargs = {}
|
|
|
|
fork_called = 0
|
|
|
|
fork_res = 0
|
|
|
|
|
|
|
|
def mock_run(self, *args, **kwargs):
|
|
|
|
self.check_args = args
|
|
|
|
self.check_kwargs = kwargs
|
|
|
|
|
|
|
|
def mock_sleep(self):
|
|
|
|
raise StopForever('stop')
|
|
|
|
|
|
|
|
def mock_fork(self):
|
|
|
|
self.fork_called += 1
|
|
|
|
return self.fork_res
|
|
|
|
|
|
|
|
my_auditor = auditor.ObjectAuditor(dict(devices=self.devices,
|
|
|
|
mount_check='false',
|
|
|
|
zero_byte_files_per_second=89))
|
|
|
|
mocker = ObjectAuditorMock()
|
|
|
|
my_auditor.run_once = mocker.mock_run
|
|
|
|
my_auditor._sleep = mocker.mock_sleep
|
|
|
|
was_fork = os.fork
|
|
|
|
try:
|
|
|
|
os.fork = mocker.mock_fork
|
|
|
|
self.assertRaises(StopForever,
|
|
|
|
my_auditor.run_forever, zero_byte_fps=50)
|
|
|
|
self.assertEquals(mocker.check_kwargs['zero_byte_fps'], 50)
|
|
|
|
self.assertEquals(mocker.fork_called, 0)
|
|
|
|
|
|
|
|
self.assertRaises(StopForever, my_auditor.run_forever)
|
|
|
|
self.assertEquals(mocker.fork_called, 1)
|
|
|
|
self.assertEquals(mocker.check_args, ())
|
|
|
|
|
|
|
|
mocker.fork_res = 1
|
|
|
|
self.assertRaises(StopForever, my_auditor.run_forever)
|
|
|
|
self.assertEquals(mocker.fork_called, 2)
|
|
|
|
self.assertEquals(mocker.check_kwargs['zero_byte_fps'], 89)
|
|
|
|
|
|
|
|
finally:
|
|
|
|
os.fork = was_fork
|
|
|
|
|
2010-07-12 17:03:45 -05:00
|
|
|
if __name__ == '__main__':
|
|
|
|
unittest.main()
|