Fix instance_group/autoscaling_group intermittent test failures
With convergence there is a chance some leaf resource won't be processed by workers if the stack is marked as failed due to failure of one resources and stack traversal set to empty string i.e traversal cancelled. Also uses TestResource to simplifly the tests. Change-Id: I1a04853d42f519d9a14dd345ac8cb441b08c4d77 Signed-off-by: rabi <ramishra@redhat.com>
This commit is contained in:
@@ -103,7 +103,10 @@ function _run_heat_integrationtests {
|
||||
function create {
|
||||
if [ "${RUN_HEAT_INTEGRATION_TESTS}" == "True" ]; then
|
||||
# run heat integration tests instead of tempest smoke before create
|
||||
_run_heat_integrationtests $BASE_DEVSTACK_DIR
|
||||
# TODO(ramishra) switch to run tests from $BASE_DEVSTACK_DIR once
|
||||
# https://review.opendev.org/c/openstack/heat/+/954938
|
||||
# has been backported
|
||||
_run_heat_integrationtests $TARGET_DEVSTACK_DIR
|
||||
fi
|
||||
|
||||
source $TOP_DIR/openrc admin admin
|
||||
|
@@ -77,11 +77,9 @@ parameters:
|
||||
|
||||
resources:
|
||||
random1:
|
||||
type: OS::Heat::RandomString
|
||||
properties:
|
||||
salt: {get_param: UserData}
|
||||
type: OS::Heat::TestResource
|
||||
outputs:
|
||||
PublicIp: {value: {get_attr: [random1, value]}}
|
||||
PublicIp: {value: {get_attr: [random1, output]}}
|
||||
AvailabilityZone: {value: 'not-used11'}
|
||||
PrivateDnsName: {value: 'not-used12'}
|
||||
PublicDnsName: {value: 'not-used13'}
|
||||
@@ -100,18 +98,12 @@ parameters:
|
||||
|
||||
resources:
|
||||
random1:
|
||||
type: OS::Heat::RandomString
|
||||
depends_on: waiter
|
||||
ready_poster:
|
||||
type: AWS::CloudFormation::WaitConditionHandle
|
||||
waiter:
|
||||
type: AWS::CloudFormation::WaitCondition
|
||||
type: OS::Heat::TestResource
|
||||
properties:
|
||||
Handle: {get_resource: ready_poster}
|
||||
Timeout: 1
|
||||
fail: true
|
||||
outputs:
|
||||
PublicIp:
|
||||
value: {get_attr: [random1, value]}
|
||||
value: {get_attr: [random1, output]}
|
||||
'''
|
||||
|
||||
def setUp(self):
|
||||
@@ -252,7 +244,16 @@ class AutoscalingGroupBasicTest(AutoscalingGroupTest):
|
||||
|
||||
nested_ident = self.assert_resource_is_a_stack(stack_identifier,
|
||||
'JobServerGroup')
|
||||
self._assert_instance_state(nested_ident, 0, 2)
|
||||
# Check at least one resource is in *_FAILED as there is a
|
||||
# chance that before other leaf resources are processed, stack
|
||||
# is marked as failed and traversal is set to empty string,
|
||||
# so that all other workers processing resources bail out
|
||||
# and the traversal gets cancelled.
|
||||
for res in self.client.resources.list(nested_ident):
|
||||
if res.resource_status.endswith('CREATE_FAILED'):
|
||||
break
|
||||
else:
|
||||
self.fail('No resource in CREATE_FAILED')
|
||||
|
||||
def test_update_instance_error_causes_group_error(self):
|
||||
"""Test update failing a resource in the instance group.
|
||||
@@ -281,8 +282,6 @@ class AutoscalingGroupBasicTest(AutoscalingGroupTest):
|
||||
nested_ident = self.assert_resource_is_a_stack(stack_identifier,
|
||||
'JobServerGroup')
|
||||
self._assert_instance_state(nested_ident, 2, 0)
|
||||
initial_list = [res.resource_name
|
||||
for res in self.client.resources.list(nested_ident)]
|
||||
|
||||
env['parameters']['size'] = 3
|
||||
files2 = {'provider.yaml': self.bad_instance_template}
|
||||
@@ -296,20 +295,18 @@ class AutoscalingGroupBasicTest(AutoscalingGroupTest):
|
||||
)
|
||||
self._wait_for_stack_status(stack_identifier, 'UPDATE_FAILED')
|
||||
|
||||
# assert that there are 3 bad instances
|
||||
nested_ident = self.assert_resource_is_a_stack(stack_identifier,
|
||||
'JobServerGroup')
|
||||
|
||||
# 2 resources should be in update failed, and one create failed.
|
||||
# Check at least one resource is in *_FAILED as there is a
|
||||
# chance that before other leaf resources are processed, stack
|
||||
# is marked as failed and traversal is set to empty string,
|
||||
# so that all other workers processing resources bail out
|
||||
# and the traversal gets cancelled.
|
||||
for res in self.client.resources.list(nested_ident):
|
||||
if res.resource_name in initial_list:
|
||||
self._wait_for_resource_status(nested_ident,
|
||||
res.resource_name,
|
||||
'UPDATE_FAILED')
|
||||
else:
|
||||
self._wait_for_resource_status(nested_ident,
|
||||
res.resource_name,
|
||||
'CREATE_FAILED')
|
||||
if res.resource_status.endswith('_FAILED'):
|
||||
break
|
||||
else:
|
||||
self.fail('No resource in *_FAILED')
|
||||
|
||||
def test_group_suspend_resume(self):
|
||||
|
||||
|
@@ -70,12 +70,10 @@ parameters:
|
||||
|
||||
resources:
|
||||
random1:
|
||||
type: OS::Heat::RandomString
|
||||
properties:
|
||||
salt: {get_param: UserData}
|
||||
type: OS::Heat::TestResource
|
||||
outputs:
|
||||
PublicIp:
|
||||
value: {get_attr: [random1, value]}
|
||||
value: {get_attr: [random1, output]}
|
||||
'''
|
||||
|
||||
# This is designed to fail.
|
||||
@@ -90,18 +88,12 @@ parameters:
|
||||
|
||||
resources:
|
||||
random1:
|
||||
type: OS::Heat::RandomString
|
||||
depends_on: waiter
|
||||
ready_poster:
|
||||
type: AWS::CloudFormation::WaitConditionHandle
|
||||
waiter:
|
||||
type: AWS::CloudFormation::WaitCondition
|
||||
type: OS::Heat::TestResource
|
||||
properties:
|
||||
Handle: {Ref: ready_poster}
|
||||
Timeout: 1
|
||||
fail: true
|
||||
outputs:
|
||||
PublicIp:
|
||||
value: {get_attr: [random1, value]}
|
||||
value: {get_attr: [random1, output]}
|
||||
'''
|
||||
|
||||
def setUp(self):
|
||||
@@ -242,7 +234,16 @@ class InstanceGroupBasicTest(InstanceGroupTest):
|
||||
|
||||
nested_ident = self.assert_resource_is_a_stack(stack_identifier,
|
||||
'JobServerGroup')
|
||||
self._assert_instance_state(nested_ident, 0, 2)
|
||||
# Check at least one resource is in *_FAILED as there is a
|
||||
# chance that before other leaf resources are processed, stack
|
||||
# is marked as failed and traversal is set to empty string,
|
||||
# so that all other workers processing resources bail out
|
||||
# and the traversal gets cancelled.
|
||||
for res in self.client.resources.list(nested_ident):
|
||||
if res.resource_status.endswith('CREATE_FAILED'):
|
||||
break
|
||||
else:
|
||||
self.fail('No resource in CREATE_FAILED')
|
||||
|
||||
def test_update_instance_error_causes_group_error(self):
|
||||
"""Test update failing a resource in the instance group.
|
||||
@@ -271,8 +272,6 @@ class InstanceGroupBasicTest(InstanceGroupTest):
|
||||
nested_ident = self.assert_resource_is_a_stack(stack_identifier,
|
||||
'JobServerGroup')
|
||||
self._assert_instance_state(nested_ident, 2, 0)
|
||||
initial_list = [res.resource_name
|
||||
for res in self.client.resources.list(nested_ident)]
|
||||
|
||||
env['parameters']['size'] = 3
|
||||
files2 = {'provider.yaml': self.bad_instance_template}
|
||||
@@ -285,20 +284,18 @@ class InstanceGroupBasicTest(InstanceGroupTest):
|
||||
environment=env
|
||||
)
|
||||
self._wait_for_stack_status(stack_identifier, 'UPDATE_FAILED')
|
||||
|
||||
nested_ident = self.assert_resource_is_a_stack(stack_identifier,
|
||||
'JobServerGroup')
|
||||
# assert that there are 3 bad instances
|
||||
# 2 resources should be in update failed, and one create failed.
|
||||
# Check at least one resource is in *_FAILED as there is a
|
||||
# chance that before other leaf resources are processed, stack
|
||||
# is marked as failed and traversal is set to empty string,
|
||||
# so that all other workers processing resources bail out
|
||||
# and the traversal gets cancelled.
|
||||
for res in self.client.resources.list(nested_ident):
|
||||
if res.resource_name in initial_list:
|
||||
self._wait_for_resource_status(nested_ident,
|
||||
res.resource_name,
|
||||
'UPDATE_FAILED')
|
||||
else:
|
||||
self._wait_for_resource_status(nested_ident,
|
||||
res.resource_name,
|
||||
'CREATE_FAILED')
|
||||
if res.resource_status.endswith('_FAILED'):
|
||||
break
|
||||
else:
|
||||
self.fail('No resource in *_FAILED')
|
||||
|
||||
|
||||
class InstanceGroupUpdatePolicyTest(InstanceGroupTest):
|
||||
|
Reference in New Issue
Block a user