From 65917f3ec06c0c5c615518b7072c3a31c19d6001 Mon Sep 17 00:00:00 2001 From: Mariam John Date: Wed, 20 Jan 2016 12:16:29 -0600 Subject: [PATCH] Implement Backup and Restore for CouchDB The recommended method for doing full backups in CouchDB has been a simple filesystem copy of the data files. This is because CouchDB stores data in wholly contained append only files. For example, when a user creates a database, a corresponding .couch file is created in the database directory. The backup functionality has been implemented by compressing the database directory and then encrypting it and sending it over to store in Swift. Similarly, the restore functionality has been implemented by fetching the files from Swift and uncompressing them into the database directory. After this, the ownership of the directory needs to be updated. To test the changes, follow the steps: - Create a CouchDB instance - Access the admin console called Futon using the following url: http://10.0.0.5:5984/_utils/ - Create a database from there and create one or more documents - Create a backup of this CouchDB instance - Create another CouchDB instance from the backup created above - Access the admin console for this new instance and verify that the database created above is there couchdb client library for the integration tests has been added to global-requirements: https://review.openstack.org/#/c/285191/ Change-Id: Iad6d69bf60ace73825819081964a43ad53d6c6fc Implements: blueprint couchdb-backup-restore --- etc/trove/trove-guestagent.conf.sample | 6 + test-requirements.txt | 1 + trove/common/cfg.py | 8 +- .../datastore/experimental/couchdb/manager.py | 26 ++++- .../backup/experimental/couchdb_impl.py | 35 ++++++ .../restore/experimental/couchdb_impl.py | 41 +++++++ trove/tests/int_tests.py | 2 +- .../tests/scenario/helpers/couchdb_helper.py | 87 ++++++++++++++ .../unittests/guestagent/test_backups.py | 110 +++++++++++++++++- .../guestagent/test_couchdb_manager.py | 22 +++- 10 files changed, 329 insertions(+), 9 deletions(-) create mode 100644 trove/guestagent/strategies/backup/experimental/couchdb_impl.py create mode 100644 trove/guestagent/strategies/restore/experimental/couchdb_impl.py create mode 100644 trove/tests/scenario/helpers/couchdb_helper.py diff --git a/etc/trove/trove-guestagent.conf.sample b/etc/trove/trove-guestagent.conf.sample index dbd3069be9..b4b2063a39 100644 --- a/etc/trove/trove-guestagent.conf.sample +++ b/etc/trove/trove-guestagent.conf.sample @@ -161,3 +161,9 @@ restore_namespace = trove.guestagent.strategies.restore.experimental.cassandra_i # backup_strategy = DB2Backup # backup_namespace = trove.guestagent.strategies.backup.experimental.db2_impl # restore_namespace = trove.guestagent.strategies.restore.experimental.db2_impl + +[couchdb] +#For CouchDB, the following are the defaults for backup and restore: +# backup_strategy = CouchDBBackup +# backup_namespace = trove.guestagent.strategies.backup.experimental.couchdb_impl +# restore_namespace = trove.guestagent.strategies.restore.experimental.couchdb_impl diff --git a/test-requirements.txt b/test-requirements.txt index 1f3193ca51..166f71b78f 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -24,3 +24,4 @@ redis>=2.10.0 # MIT psycopg2>=2.5 # LGPL/ZPL cassandra-driver>=2.1.4 # Apache-2.0 pycrypto>=2.6 # Public Domain +couchdb>=0.8 # Apache-2.0 diff --git a/trove/common/cfg.py b/trove/common/cfg.py index 046f7bd72e..c92789dc03 100644 --- a/trove/common/cfg.py +++ b/trove/common/cfg.py @@ -1070,13 +1070,15 @@ couchdb_opts = [ help='Whether to provision a Cinder volume for datadir.'), cfg.StrOpt('device_path', default='/dev/vdb', help='Device path for volume if volume support is enabled.'), - cfg.StrOpt('backup_strategy', default=None, + cfg.StrOpt('backup_strategy', default='CouchDBBackup', help='Default strategy to perform backups.'), cfg.StrOpt('replication_strategy', default=None, help='Default strategy for replication.'), - cfg.StrOpt('backup_namespace', default=None, + cfg.StrOpt('backup_namespace', default='trove.guestagent.strategies' + '.backup.experimental.couchdb_impl', help='Namespace to load backup strategies from.'), - cfg.StrOpt('restore_namespace', default=None, + cfg.StrOpt('restore_namespace', default='trove.guestagent.strategies' + '.restore.experimental.couchdb_impl', help='Namespace to load restore strategies from.'), cfg.DictOpt('backup_incremental_strategy', default={}, help='Incremental Backup Runner based on the default ' diff --git a/trove/guestagent/datastore/experimental/couchdb/manager.py b/trove/guestagent/datastore/experimental/couchdb/manager.py index f06c3431ed..175ca34506 100644 --- a/trove/guestagent/datastore/experimental/couchdb/manager.py +++ b/trove/guestagent/datastore/experimental/couchdb/manager.py @@ -17,11 +17,13 @@ import os from oslo_log import log as logging +from trove.common.i18n import _ +from trove.common import instance as rd_instance +from trove.guestagent import backup from trove.guestagent.datastore.experimental.couchdb import service from trove.guestagent.datastore import manager from trove.guestagent import volume - LOG = logging.getLogger(__name__) @@ -59,6 +61,8 @@ class Manager(manager.Manager): self.app.start_db() self.app.change_permissions() self.app.make_host_reachable() + if backup_info: + self._perform_restore(backup_info, context, mount_point) def stop_db(self, context, do_not_start_on_reboot=False): """ @@ -81,3 +85,23 @@ class Manager(manager.Manager): def start_db_with_conf_changes(self, context, config_contents): LOG.debug("Starting CouchDB with configuration changes.") self.app.start_db_with_conf_changes(config_contents) + + def _perform_restore(self, backup_info, context, restore_location): + """ + Restores all CouchDB databases and their documents from the + backup. + """ + LOG.info(_("Restoring database from backup %s") % + backup_info['id']) + try: + backup.restore(context, backup_info, restore_location) + except Exception: + LOG.exception(_("Error performing restore from backup %s") % + backup_info['id']) + self.status.set_status(rd_instance.ServiceStatuses.FAILED) + raise + LOG.info(_("Restored database successfully")) + + def create_backup(self, context, backup_info): + LOG.debug("Creating backup for CouchDB.") + backup.backup(context, backup_info) diff --git a/trove/guestagent/strategies/backup/experimental/couchdb_impl.py b/trove/guestagent/strategies/backup/experimental/couchdb_impl.py new file mode 100644 index 0000000000..91bdf7b6d7 --- /dev/null +++ b/trove/guestagent/strategies/backup/experimental/couchdb_impl.py @@ -0,0 +1,35 @@ +# Copyright 2016 IBM Corporation +# +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from trove.guestagent.datastore.experimental.couchdb import service +from trove.guestagent.strategies.backup import base + + +class CouchDBBackup(base.BackupRunner): + + __strategy_name__ = 'couchdbbackup' + + @property + def cmd(self): + """ + CouchDB backup is based on a simple filesystem copy of the database + files. Each database is a single fully contained append only file. + For example, if a user creates a database 'foo', then a corresponding + 'foo.couch' file will be created in the database directory which by + default is in '/var/lib/couchdb'. + """ + cmd = 'sudo tar cpPf - ' + service.COUCHDB_LIB_DIR + return cmd + self.zip_cmd + self.encrypt_cmd diff --git a/trove/guestagent/strategies/restore/experimental/couchdb_impl.py b/trove/guestagent/strategies/restore/experimental/couchdb_impl.py new file mode 100644 index 0000000000..559674adf4 --- /dev/null +++ b/trove/guestagent/strategies/restore/experimental/couchdb_impl.py @@ -0,0 +1,41 @@ +# Copyright 2016 IBM Corporation +# +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from trove.guestagent.common import operating_system +from trove.guestagent.datastore.experimental.couchdb import service +from trove.guestagent.strategies.restore import base + + +class CouchDBBackup(base.RestoreRunner): + + __strategy_name__ = 'couchdbbackup' + base_restore_cmd = 'sudo tar xPf -' + + def __init__(self, *args, **kwargs): + self.appStatus = service.CouchDBAppStatus() + self.app = service.CouchDBApp(self.appStatus) + super(CouchDBBackup, self).__init__(*args, **kwargs) + + def post_restore(self): + """ + To restore from backup, all we need to do is untar the compressed + database files into the database directory and change its ownership. + """ + operating_system.chown(service.COUCHDB_LIB_DIR, + 'couchdb', + 'couchdb', + as_root=True) + self.app.restart() diff --git a/trove/tests/int_tests.py b/trove/tests/int_tests.py index 86ed581d23..611b55de51 100644 --- a/trove/tests/int_tests.py +++ b/trove/tests/int_tests.py @@ -203,7 +203,7 @@ register(["cassandra_supported"], common_groups, backup_groups, configuration_groups, cluster_actions_groups) register(["couchbase_supported"], common_groups, backup_groups, root_actions_groups) -register(["couchdb_supported"], common_groups) +register(["couchdb_supported"], common_groups, backup_groups) register(["postgresql_supported"], common_groups, backup_groups, database_actions_groups, configuration_groups, root_actions_groups, user_actions_groups) diff --git a/trove/tests/scenario/helpers/couchdb_helper.py b/trove/tests/scenario/helpers/couchdb_helper.py new file mode 100644 index 0000000000..503bd1634f --- /dev/null +++ b/trove/tests/scenario/helpers/couchdb_helper.py @@ -0,0 +1,87 @@ +# Copyright 2016 IBM Corporation +# +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import couchdb +from trove.tests.scenario.helpers.test_helper import TestHelper +from trove.tests.scenario.runners.test_runners import TestRunner + + +class CouchdbHelper(TestHelper): + + def __init__(self, expected_override_name): + super(CouchdbHelper, self).__init__(expected_override_name) + self._data_cache = dict() + self.field_name = 'ff-%s' + self.database = 'foodb' + + def create_client(self, host, *args, **kwargs): + url = 'http://' + host + ':5984/' + server = couchdb.Server(url) + return server + + def add_actual_data(self, data_label, data_start, data_size, host, + *args, **kwargs): + client = self.get_client(host, *args, **kwargs) + db = client.create(self.database + '_' + data_label) + doc = {} + doc_id, doc_rev = db.save(doc) + data = self._get_dataset(data_size) + doc = db.get(doc_id) + for value in data: + key = self.field_name % value + doc[key] = value + db.save(doc) + + def _get_dataset(self, data_size): + cache_key = str(data_size) + if cache_key in self._data_cache: + return self._data_cache.get(cache_key) + + data = self._generate_dataset(data_size) + self._data_cache[cache_key] = data + return data + + def _generate_dataset(self, data_size): + return range(1, data_size + 1) + + def remove_actual_data(self, data_label, data_start, data_size, host, + *args, **kwargs): + client = self.get_client(host) + db = client[self.database + "_" + data_label] + client.delete(db) + + def verify_actual_data(self, data_label, data_start, data_size, host, + *args, **kwargs): + expected_data = self._get_dataset(data_size) + client = self.get_client(host, *args, **kwargs) + db = client[self.database + '_' + data_label] + actual_data = [] + + TestRunner.assert_equal(len(db), 1) + + for i in db: + items = db[i].items() + actual_data = ([value for key, value in items + if key not in ['_id', '_rev']]) + + TestRunner.assert_equal(len(expected_data), + len(actual_data), + "Unexpected number of result rows.") + + for expected_row in expected_data: + TestRunner.assert_true(expected_row in actual_data, + "Row not found in the result set: %s" + % expected_row) diff --git a/trove/tests/unittests/guestagent/test_backups.py b/trove/tests/unittests/guestagent/test_backups.py index 24ba5ec715..9ba5c854e3 100644 --- a/trove/tests/unittests/guestagent/test_backups.py +++ b/trove/tests/unittests/guestagent/test_backups.py @@ -60,7 +60,10 @@ BACKUP_DB2_CLS = ("trove.guestagent.strategies.backup." "experimental.db2_impl.DB2Backup") RESTORE_DB2_CLS = ("trove.guestagent.strategies.restore." "experimental.db2_impl.DB2Backup") - +BACKUP_COUCHDB_BACKUP_CLS = ("trove.guestagent.strategies.backup." + "experimental.couchdb_impl.CouchDBBackup") +RESTORE_COUCHDB_BACKUP_CLS = ("trove.guestagent.strategies.restore." + "experimental.couchdb_impl.CouchDBBackup") PIPE = " | " ZIP = "gzip" @@ -106,6 +109,9 @@ REDISBACKUP_RESTORE = "tee /var/lib/redis/dump.rdb" DB2BACKUP_CMD = "sudo tar cPf - /home/db2inst1/db2inst1/backup" DB2BACKUP_RESTORE = "sudo tar xPf -" +COUCHDB_BACKUP_CMD = "sudo tar cpPf - /var/lib/couchdb" +COUCHDB_RESTORE_CMD = "sudo tar xPf -" + class GuestAgentBackupTest(trove_testtools.TestCase): @@ -465,6 +471,39 @@ class GuestAgentBackupTest(trove_testtools.TestCase): self.assertEqual(restr.restore_cmd, DECRYPT + PIPE + UNZIP + PIPE + DB2BACKUP_RESTORE) + def test_backup_encrypted_couchdbbackup_command(self): + backupBase.BackupRunner.encrypt_key = CRYPTO_KEY + RunnerClass = utils.import_class(BACKUP_COUCHDB_BACKUP_CLS) + bkp = RunnerClass(12345) + self.assertIsNotNone(bkp) + self.assertEqual( + COUCHDB_BACKUP_CMD + PIPE + ZIP + PIPE + ENCRYPT, bkp.command) + self.assertIn("gz.enc", bkp.manifest) + + def test_backup_not_encrypted_couchdbbackup_command(self): + backupBase.BackupRunner.is_encrypted = False + backupBase.BackupRunner.encrypt_key = CRYPTO_KEY + RunnerClass = utils.import_class(BACKUP_COUCHDB_BACKUP_CLS) + bkp = RunnerClass(12345) + self.assertIsNotNone(bkp) + self.assertEqual(COUCHDB_BACKUP_CMD + PIPE + ZIP, bkp.command) + self.assertIn("gz", bkp.manifest) + + def test_restore_decrypted_couchdbbackup_command(self): + restoreBase.RestoreRunner.is_encrypted = False + RunnerClass = utils.import_class(RESTORE_COUCHDB_BACKUP_CLS) + restr = RunnerClass(None, restore_location="/var/lib/couchdb", + location="filename", checksum="md5") + self.assertEqual(UNZIP + PIPE + COUCHDB_RESTORE_CMD, restr.restore_cmd) + + def test_restore_encrypted_couchdbbackup_command(self): + restoreBase.RestoreRunner.decrypt_key = CRYPTO_KEY + RunnerClass = utils.import_class(RESTORE_COUCHDB_BACKUP_CLS) + restr = RunnerClass(None, restore_location="/var/lib/couchdb", + location="filename", checksum="md5") + self.assertEqual(DECRYPT + PIPE + UNZIP + PIPE + COUCHDB_RESTORE_CMD, + restr.restore_cmd) + class CassandraBackupTest(trove_testtools.TestCase): @@ -910,3 +949,72 @@ class DB2RestoreTests(trove_testtools.TestCase): self.restore_runner.post_restore = mock.Mock() self.assertRaises(exception.ProcessExecutionError, self.restore_runner.restore) + + +class CouchDBBackupTests(trove_testtools.TestCase): + + def setUp(self): + super(CouchDBBackupTests, self).setUp() + self.backup_runner = utils.import_class(BACKUP_COUCHDB_BACKUP_CLS) + self.backup_runner_patch = patch.multiple( + self.backup_runner, _run=DEFAULT, + _run_pre_backup=DEFAULT, _run_post_backup=DEFAULT) + + def tearDown(self): + super(CouchDBBackupTests, self).tearDown() + self.backup_runner_patch.stop() + + def test_backup_success(self): + backup_runner_mocks = self.backup_runner_patch.start() + with self.backup_runner(12345): + pass + + backup_runner_mocks['_run_pre_backup'].assert_called_once_with() + backup_runner_mocks['_run'].assert_called_once_with() + backup_runner_mocks['_run_post_backup'].assert_called_once_with() + + def test_backup_failed_due_to_run_backup(self): + backup_runner_mocks = self.backup_runner_patch.start() + backup_runner_mocks['_run'].configure_mock( + side_effect=exception.TroveError('test') + ) + with ExpectedException(exception.TroveError, 'test'): + with self.backup_runner(12345): + pass + + backup_runner_mocks['_run_pre_backup'].assert_called_once_with() + backup_runner_mocks['_run'].assert_called_once_with() + self.assertEqual(0, backup_runner_mocks['_run_post_backup'].call_count) + + +class CouchDBRestoreTests(trove_testtools.TestCase): + + def setUp(self): + super(CouchDBRestoreTests, self).setUp() + + self.restore_runner = utils.import_class( + RESTORE_COUCHDB_BACKUP_CLS)( + 'swift', location='http://some.where', + checksum='True_checksum', + restore_location='/tmp/somewhere') + + def tearDown(self): + super(CouchDBRestoreTests, self).tearDown() + + def test_restore_success(self): + expected_content_length = 123 + self.restore_runner._run_restore = mock.Mock( + return_value=expected_content_length) + self.restore_runner.pre_restore = mock.Mock() + self.restore_runner.post_restore = mock.Mock() + actual_content_length = self.restore_runner.restore() + self.assertEqual( + expected_content_length, actual_content_length) + + def test_restore_failed_due_to_run_restore(self): + self.restore_runner.pre_restore = mock.Mock() + self.restore_runner._run_restore = mock.Mock( + side_effect=exception.ProcessExecutionError('Error')) + self.restore_runner.post_restore = mock.Mock() + self.assertRaises(exception.ProcessExecutionError, + self.restore_runner.restore) diff --git a/trove/tests/unittests/guestagent/test_couchdb_manager.py b/trove/tests/unittests/guestagent/test_couchdb_manager.py index 91bc1b1542..2d1bfaf804 100644 --- a/trove/tests/unittests/guestagent/test_couchdb_manager.py +++ b/trove/tests/unittests/guestagent/test_couchdb_manager.py @@ -19,6 +19,7 @@ from mock import patch from oslo_utils import netutils from trove.common.instance import ServiceStatuses +from trove.guestagent import backup from trove.guestagent.datastore.experimental.couchdb import ( manager as couchdb_manager) from trove.guestagent.datastore.experimental.couchdb import ( @@ -56,6 +57,7 @@ class GuestAgentCouchDBManagerTest(trove_testtools.TestCase): self.original_get_ip = netutils.get_my_ipv4 self.orig_make_host_reachable = ( couchdb_service.CouchDBApp.make_host_reachable) + self.orig_backup_restore = backup.restore def tearDown(self): super(GuestAgentCouchDBManagerTest, self).tearDown() @@ -71,6 +73,7 @@ class GuestAgentCouchDBManagerTest(trove_testtools.TestCase): netutils.get_my_ipv4 = self.original_get_ip couchdb_service.CouchDBApp.make_host_reachable = ( self.orig_make_host_reachable) + backup.restore = self.orig_backup_restore def test_update_status(self): mock_status = MagicMock() @@ -85,6 +88,7 @@ class GuestAgentCouchDBManagerTest(trove_testtools.TestCase): mock_app = MagicMock() self.manager.appStatus = mock_status self.manager.app = mock_app + mount_point = '/var/lib/couchdb' mock_status.begin_install = MagicMock(return_value=None) mock_app.install_if_needed = MagicMock(return_value=None) @@ -97,6 +101,12 @@ class GuestAgentCouchDBManagerTest(trove_testtools.TestCase): volume.VolumeDevice.migrate_data = MagicMock(return_value=None) volume.VolumeDevice.mount = MagicMock(return_value=None) volume.VolumeDevice.mount_points = MagicMock(return_value=[]) + backup.restore = MagicMock(return_value=None) + + backup_info = {'id': backup_id, + 'location': 'fake-location', + 'type': 'CouchDBBackup', + 'checksum': 'fake-checksum'} if backup_id else None with patch.object(pkg.Package, 'pkg_is_installed', return_value=MagicMock( @@ -106,16 +116,19 @@ class GuestAgentCouchDBManagerTest(trove_testtools.TestCase): databases=None, memory_mb='2048', users=None, device_path=device_path, - mount_point="/var/lib/couchdb", - backup_info=None, + mount_point=mount_point, + backup_info=backup_info, overrides=None, cluster_config=None) - # verification/assertion mock_status.begin_install.assert_any_call() mock_app.install_if_needed.assert_any_call(packages) mock_app.make_host_reachable.assert_any_call() mock_app.change_permissions.assert_any_call() + if backup_id: + backup.restore.assert_any_call(self.context, + backup_info, + mount_point) def test_prepare_pkg(self): self._prepare_dynamic(['couchdb']) @@ -123,6 +136,9 @@ class GuestAgentCouchDBManagerTest(trove_testtools.TestCase): def test_prepare_no_pkg(self): self._prepare_dynamic([]) + def test_prepare_from_backup(self): + self._prepare_dynamic(['couchdb'], backup_id='123abc456') + def test_restart(self): mock_status = MagicMock() self.manager.appStatus = mock_status