[OVN] Remove SB "Chassis"/"Chassis_Private" duplicated registers
A new OVN maintenance method is added. This method lists all existing OVN SB Chassis registers and checks if any of them has the same hostname. In case of having duplicated "Chassis"/"Chassis_Private" registers, the maintenance method will remove those with older (lower) timestamp, that is stored in "Chassis_Private.nb_cfg_timestamp", leaving only the newer one. Closes-Bug: #2016158 Change-Id: Ib3c6f0dc01efd31430691e720ba23ccb4ede65fa
This commit is contained in:
		 Rodolfo Alonso Hernandez
					Rodolfo Alonso Hernandez
				
			
				
					committed by
					
						 Rodolfo Alonso
						Rodolfo Alonso
					
				
			
			
				
	
			
			
			 Rodolfo Alonso
						Rodolfo Alonso
					
				
			
						parent
						
							8887cdf5d3
						
					
				
				
					commit
					9d9f47c20c
				
			| @@ -1030,6 +1030,63 @@ class DBInconsistenciesPeriodics(SchemaAwarePeriodicsBase): | ||||
|                                                    from_reload=True) | ||||
|         raise periodics.NeverAgain() | ||||
|  | ||||
|     @periodics.periodic(spacing=300, run_immediately=True) | ||||
|     def remove_duplicated_chassis_registers(self): | ||||
|         """Remove the "Chassis" and "Chassis_Private" duplicated registers. | ||||
|  | ||||
|         When the ovn-controller service of a node is updated and the system-id | ||||
|         is changed, if the old service is not stopped gracefully, it will leave | ||||
|         a "Chassis" and a "Chassis_Private" registers on the OVN SB database. | ||||
|         These leftovers must be removed. | ||||
|  | ||||
|         NOTE: this method is executed every 5 minutes. If a new chassis is | ||||
|         added, this method will perform again the clean-up process. | ||||
|  | ||||
|         NOTE: this method can be executed only if the OVN SB has the | ||||
|         "Chassis_Private" table. Otherwise, is not possible to find out which | ||||
|         register is newer and thus must be kept in the database. | ||||
|         """ | ||||
|         if not self._sb_idl.is_table_present('Chassis_Private'): | ||||
|             raise periodics.NeverAgain() | ||||
|  | ||||
|         if not self.has_lock: | ||||
|             return | ||||
|  | ||||
|         # dup_chassis_port_host = {host_name: [(ch1, ch_private1), | ||||
|         #                                      (ch2, ch_private2), ... ]} | ||||
|         dup_chassis_port_host = {} | ||||
|         chassis = self._sb_idl.chassis_list().execute(check_error=True) | ||||
|         chassis_hostnames = {ch.hostname for ch in chassis} | ||||
|         # Find the duplicated "Chassis" and "Chassis_Private" registers, | ||||
|         # comparing the hostname. | ||||
|         for hostname in chassis_hostnames: | ||||
|             ch_list = [] | ||||
|             # Find these chassis matching the hostname and create a list. | ||||
|             for ch in (ch for ch in chassis if ch.hostname == hostname): | ||||
|                 ch_private = self._sb_idl.lookup('Chassis_Private', ch.name, | ||||
|                                                  default=None) | ||||
|                 if ch_private: | ||||
|                     ch_list.append((ch, ch_private)) | ||||
|  | ||||
|             # If the chassis list > 1, then we have duplicated chassis. | ||||
|             if len(ch_list) > 1: | ||||
|                 # Order ch_list by Chassis_Private.nb_cfg_timestamp, from newer | ||||
|                 # (greater value) to older. | ||||
|                 ch_list.sort(key=lambda x: x[1].nb_cfg_timestamp, reverse=True) | ||||
|                 dup_chassis_port_host[hostname] = ch_list | ||||
|  | ||||
|         if not dup_chassis_port_host: | ||||
|             return | ||||
|  | ||||
|         # Remove the "Chassis" and "Chassis_Private" registers with the | ||||
|         # older Chassis_Private.nb_cfg_timestamp. | ||||
|         with self._sb_idl.transaction(check_error=True) as txn: | ||||
|             for ch_list in dup_chassis_port_host.values(): | ||||
|                 # The first item is skipped, this is the newest element. | ||||
|                 for ch, ch_private in ch_list[1:]: | ||||
|                     for table in ('Chassis_Private', 'Chassis'): | ||||
|                         txn.add(self._sb_idl.db_destroy(table, ch.name)) | ||||
|  | ||||
|  | ||||
| class HashRingHealthCheckPeriodics(object): | ||||
|  | ||||
|   | ||||
| @@ -972,6 +972,64 @@ class TestMaintenance(_TestMaintenanceHelper): | ||||
|         lsp = self.nb_api.lookup('Logical_Switch_Port', p1['id']) | ||||
|         self.assertEqual(hcg_uuid, lsp.ha_chassis_group[0].uuid) | ||||
|  | ||||
|     def test_remove_duplicated_chassis_registers(self): | ||||
|         hostnames = ['host1', 'host2'] | ||||
|         for hostname in hostnames: | ||||
|             for _ in range(3): | ||||
|                 self.add_fake_chassis(hostname) | ||||
|  | ||||
|         chassis = self.sb_api.chassis_list().execute(check_error=True) | ||||
|         self.assertEqual(6, len(chassis)) | ||||
|         # Make the chassis private timestamp different | ||||
|         for idx, ch in enumerate(chassis): | ||||
|             self.sb_api.db_set('Chassis_Private', ch.name, | ||||
|                                ('nb_cfg_timestamp', idx)).execute() | ||||
|  | ||||
|         ch_private_dict = {}  # host: [ch_private1, ch_private2, ...] | ||||
|         for hostname in hostnames: | ||||
|             ch_private_list = [] | ||||
|             for ch in (ch for ch in chassis if ch.hostname == hostname): | ||||
|                 ch_private = self.sb_api.lookup('Chassis_Private', ch.name, | ||||
|                                                 default=None) | ||||
|                 if ch_private: | ||||
|                     # One of the "Chassis_Private" has been deleted on purpose | ||||
|                     # in this test. | ||||
|                     ch_private_list.append(ch_private) | ||||
|             ch_private_list.sort(key=lambda x: x.nb_cfg_timestamp, | ||||
|                                  reverse=True) | ||||
|             ch_private_dict[hostname] = ch_private_list | ||||
|  | ||||
|         self.maint.remove_duplicated_chassis_registers() | ||||
|         chassis_result = self.sb_api.chassis_list().execute(check_error=True) | ||||
|         self.assertEqual(2, len(chassis_result)) | ||||
|         for ch in chassis_result: | ||||
|             self.assertIn(ch.hostname, hostnames) | ||||
|             hostnames.remove(ch.hostname) | ||||
|             # From ch_private_dict[ch.hostname], we retrieve the first | ||||
|             # "Chassis_Private" register because these are ordered by | ||||
|             # timestamp. The newer one (bigger timestamp) should remain in the | ||||
|             # system. | ||||
|             ch_expected = ch_private_dict[ch.hostname][0].chassis[0] | ||||
|             self.assertEqual(ch_expected.name, ch.name) | ||||
|  | ||||
|     def test_remove_duplicated_chassis_registers_no_ch_private_register(self): | ||||
|         for _ in range(2): | ||||
|             self.add_fake_chassis('host1') | ||||
|  | ||||
|         chassis = self.sb_api.chassis_list().execute(check_error=True) | ||||
|         self.assertEqual(2, len(chassis)) | ||||
|         # Make the chassis private timestamp different | ||||
|         # Delete on of the "Chassis_Private" registers. | ||||
|         self.sb_api.db_destroy('Chassis_Private', chassis[0].name).execute() | ||||
|         self.sb_api.db_set('Chassis_Private', chassis[1].name, | ||||
|                            ('nb_cfg_timestamp', 1)).execute() | ||||
|  | ||||
|         self.maint.remove_duplicated_chassis_registers() | ||||
|         chassis_result = self.sb_api.chassis_list().execute(check_error=True) | ||||
|         # Both "Chassis" registers are still in the DB because one | ||||
|         # "Chassis_Private" register was missing. | ||||
|         self.assertEqual(2, len(chassis_result)) | ||||
|  | ||||
|  | ||||
| class TestLogMaintenance(_TestMaintenanceHelper, | ||||
|                          test_log_driver.LogApiTestCaseBase): | ||||
|   | ||||
| @@ -0,0 +1,17 @@ | ||||
| --- | ||||
| issues: | ||||
|   - | | ||||
|     When using ML2/OVN, during an upgrade procedure, the OVS system-id stored | ||||
|     value can be changed. The ovn-controller service will create the "Chassis" | ||||
|     and "Chassis_Private" registers based on this OVS system-id. If the | ||||
|     ovn-controller process is not gracefully stopped, that could lead to the | ||||
|     existence of duplicated "Chassis" and "Chassis_Private" registers in the | ||||
|     OVN Southbound database. | ||||
| fixes: | ||||
|   - | | ||||
|     A new OVN maintenance method ``remove_duplicated_chassis_registers`` is | ||||
|     added. This method will periodically check the OVN Southbound "Chassis" | ||||
|     and "Chassis_Private" tables looking for duplicated registers. The older | ||||
|     ones (based on the "Chassis_Private.nb_cfg_timestamp" value) will be | ||||
|     removed when more than one register has the same hostname, that should | ||||
|     be unique. | ||||
		Reference in New Issue
	
	Block a user