From a16e1f55a742f33bdfda55d38d6962a785fe5c97 Mon Sep 17 00:00:00 2001
From: Alistair Coles <alistairncoles@gmail.com>
Date: Thu, 2 Nov 2023 17:32:17 +0000
Subject: [PATCH] Improve unit tests for proxy GET ChunkReadTimeouts

Unit test changes only:

- Add tests for some resuming replicated GET scenarios.

- Add test to cover resuming GET fast_forward "failing" when range
  read is complete.

- Add test to verify different node_timeout for account and container
  vs object controller getters.

- Refactor proxy.test_server.py tests to split out different
  scenarios.

Drive-by: remove some ring device manipulation setup that's not needed.

Change-Id: I38c7fa648492c9bd2173ecf92f89e423bee4abf3
Co-Authored-By: Clay Gerrard <clay.gerrard@gmail.com>
---
 test/unit/proxy/controllers/test_base.py |  20 ++
 test/unit/proxy/controllers/test_obj.py  |  99 +++++++++-
 test/unit/proxy/test_server.py           | 227 ++++++++++++++---------
 3 files changed, 251 insertions(+), 95 deletions(-)

diff --git a/test/unit/proxy/controllers/test_base.py b/test/unit/proxy/controllers/test_base.py
index ba75ed472e..44b6f804ed 100644
--- a/test/unit/proxy/controllers/test_base.py
+++ b/test/unit/proxy/controllers/test_base.py
@@ -1672,6 +1672,26 @@ class TestGetterSource(unittest.TestCase):
 
 @patch_policies([StoragePolicy(0, 'zero', True, object_ring=FakeRing())])
 class TestGetOrHeadHandler(BaseTest):
+    def test_init_node_timeout(self):
+        conf = {'node_timeout': 2, 'recoverable_node_timeout': 3}
+        app = proxy_server.Application(conf,
+                                       logger=self.logger,
+                                       account_ring=self.account_ring,
+                                       container_ring=self.container_ring)
+        req = Request.blank('/v1/a/c/o')
+        node_iter = Namespace(num_primary_nodes=3)
+        # app.recoverable_node_timeout
+        getter = GetOrHeadHandler(
+            app, req, 'Object', node_iter, None, None, {})
+        self.assertEqual(3, getter.node_timeout)
+        # app.node_timeout
+        getter = GetOrHeadHandler(
+            app, req, 'Account', node_iter, None, None, {})
+        self.assertEqual(2, getter.node_timeout)
+        getter = GetOrHeadHandler(
+            app, req, 'Container', node_iter, None, None, {})
+        self.assertEqual(2, getter.node_timeout)
+
     def test_disconnected_logging(self):
         req = Request.blank('/v1/a/c/o')
         headers = {'content-type': 'text/plain'}
diff --git a/test/unit/proxy/controllers/test_obj.py b/test/unit/proxy/controllers/test_obj.py
index 9b17d0de77..474fb160d9 100644
--- a/test/unit/proxy/controllers/test_obj.py
+++ b/test/unit/proxy/controllers/test_obj.py
@@ -40,7 +40,7 @@ else:
 import swift
 from swift.common import utils, swob, exceptions
 from swift.common.exceptions import ChunkWriteTimeout, ShortReadError, \
-    ChunkReadTimeout
+    ChunkReadTimeout, RangeAlreadyComplete
 from swift.common.utils import Timestamp, list_from_csv, md5, FileLikeIter, \
     ShardRange, Namespace, NamespaceBoundList
 from swift.proxy import server as proxy_server
@@ -1865,6 +1865,73 @@ class TestReplicatedObjController(CommonObjectControllerMixin,
         for line in error_lines:
             self.assertIn('Trying to read object during GET ', line)
 
+    def test_GET_unable_to_resume(self):
+        self.app.recoverable_node_timeout = 0.01
+        self.app.client_timeout = 0.1
+        self.app.object_chunk_size = 10
+        resp_body = b'length 8'
+        etag = md5(resp_body, usedforsecurity=False).hexdigest()
+        headers = {
+            'Etag': etag,
+            'Content-Type': b'plain/text',
+            'Content-Length': len(resp_body),
+            'X-Timestamp': Timestamp(self.ts()).normal,
+        }
+
+        # make all responses slow...
+        responses = [
+            StubResponse(200, resp_body, headers, slowdown=0.1),
+            StubResponse(200, resp_body, headers, slowdown=0.1),
+            StubResponse(200, resp_body, headers, slowdown=0.1),
+        ]
+
+        def get_response(req):
+            return responses.pop(0) if responses else StubResponse(404)
+
+        req = swob.Request.blank('/v1/a/c/o')
+        with capture_http_requests(get_response):
+            resp = req.get_response(self.app)
+            with self.assertRaises(ChunkReadTimeout):
+                _ = resp.body
+        self.assertEqual(resp.status_int, 200)
+        self.assertEqual(etag, resp.headers.get('ETag'))
+
+        error_lines = self.app.logger.get_lines_for_level('error')
+        self.assertEqual(3, len(error_lines))
+        for line in error_lines[:3]:
+            self.assertIn('Trying to read object during GET', line)
+
+    def test_GET_newest_will_not_resume(self):
+        self.app.recoverable_node_timeout = 0.01
+        self.app.client_timeout = 0.1
+        self.app.object_chunk_size = 10
+        resp_body = b'length 8'
+        etag = md5(resp_body, usedforsecurity=False).hexdigest()
+        headers = {
+            'Etag': etag,
+            'Content-Type': b'plain/text',
+            'Content-Length': len(resp_body),
+            'X-Timestamp': Timestamp(self.ts()).normal,
+        }
+
+        # make all responses slow...
+        responses = [
+            StubResponse(200, resp_body, headers, slowdown=0.1),
+        ]
+
+        def get_response(req):
+            return responses.pop(0) if responses else StubResponse(404)
+
+        req = swob.Request.blank('/v1/a/c/o', headers={'X-Newest': 'true'})
+        with capture_http_requests(get_response):
+            resp = req.get_response(self.app)
+            with self.assertRaises(ChunkReadTimeout):
+                _ = resp.body
+        self.assertEqual(resp.status_int, 200)
+        self.assertEqual(etag, resp.headers.get('ETag'))
+        error_lines = self.app.logger.get_lines_for_level('error')
+        self.assertEqual(0, len(error_lines))
+
     def test_GET_resuming_ignores_416(self):
         # verify that a resuming getter will not try to use the content of a
         # 416 response (because it's etag will mismatch that from the first
@@ -5511,7 +5578,7 @@ class TestECObjController(ECObjectControllerMixin, unittest.TestCase):
         for line in self.logger.logger.records['ERROR']:
             self.assertIn(req.headers['x-trans-id'], line)
 
-    def test_GET_read_timeout_fails(self):
+    def _do_test_GET_read_timeout_fast_forward_fails(self, error):
         segment_size = self.policy.ec_segment_size
         test_data = (b'test' * segment_size)[:-333]
         etag = md5(test_data).hexdigest()
@@ -5538,10 +5605,18 @@ class TestECObjController(ECObjectControllerMixin, unittest.TestCase):
                               headers=headers), \
                 mock.patch(
                     'swift.proxy.controllers.obj.ECFragGetter.fast_forward',
-                    side_effect=ValueError()):
+                    side_effect=error):
             resp = req.get_response(self.app)
             self.assertEqual(resp.status_int, 200)
             self.assertNotEqual(md5(resp.body).hexdigest(), etag)
+
+        for line in self.logger.logger.records['ERROR'] + \
+                self.logger.logger.records['WARNING']:
+            self.assertIn(req.headers['x-trans-id'], line)
+
+    def test_GET_read_timeout_fast_forward_fails(self):
+        self._do_test_GET_read_timeout_fast_forward_fails(ValueError())
+
         error_lines = self.logger.get_lines_for_level('error')
         self.assertEqual(2, len(error_lines))
         self.assertIn('Unable to fast forward', error_lines[0])
@@ -5551,9 +5626,21 @@ class TestECObjController(ECObjectControllerMixin, unittest.TestCase):
         self.assertIn(
             'Un-recoverable fragment rebuild. Only received 9/10 fragments',
             warning_lines[0])
-        for line in self.logger.logger.records['ERROR'] + \
-                self.logger.logger.records['WARNING']:
-            self.assertIn(req.headers['x-trans-id'], line)
+
+    def test_GET_read_timeout_fast_forward_range_complete(self):
+        self._do_test_GET_read_timeout_fast_forward_fails(
+            RangeAlreadyComplete())
+
+        error_lines = self.logger.get_lines_for_level('error')
+        self.assertEqual(0, len(error_lines))
+        # the test is a little bogus - presumably if the range was complete
+        # then the fragment would be ok to rebuild. But the test pretends range
+        # was complete without actually feeding the bytes to the getter...
+        warning_lines = self.logger.get_lines_for_level('warning')
+        self.assertEqual(1, len(warning_lines))
+        self.assertIn(
+            'Un-recoverable fragment rebuild. Only received 9/10 fragments',
+            warning_lines[0])
 
     def test_GET_one_short_fragment_archive(self):
         # verify that a warning is logged when one fragment archive returns
diff --git a/test/unit/proxy/test_server.py b/test/unit/proxy/test_server.py
index c61c1f10e4..4602f146c2 100644
--- a/test/unit/proxy/test_server.py
+++ b/test/unit/proxy/test_server.py
@@ -5425,99 +5425,148 @@ class TestReplicatedObjectController(
             resp = req.get_response(self.app)
             self.assertEqual(resp.status_int, 201)
 
-    def test_node_read_timeout(self):
+    def _do_test_node_read_timeout(self, slow=None, etags=None):
+        # this helper gets as far as making the first backend request,
+        # returning 200, but the response body isn't read - callers read the
+        # body and trigger any resuming GETs.
+        object_ring = self.app.get_object_ring(None)
+        # there are only 3 devices so no handoff requests expected
+        self.assertEqual(3, len(object_ring.devs))
+        self.app.recoverable_node_timeout = 0.2
+
+        self.logger.clear()
+        req = Request.blank('/v1/a/c/o', environ={'REQUEST_METHOD': 'GET'})
+        self.app.update_request(req)
+
+        request_log = []
+
+        # we don't use mocked_http_conn because we return before the code_iter
+        # is empty and would get a "left over status" AssertionError
+        def capture_req(ipaddr, port, device, partition, method, path,
+                        headers=None, query_string=None):
+            request_log.append((method, path))
+
+        # account HEAD, container HEAD, obj GET x 3
+        set_http_connect(200, 200, 200, 200, 200, body=b'lalala',
+                         slow=slow, etags=etags, give_connect=capture_req)
+        resp = req.get_response(self.app)
+        self.assertEqual(200, resp.status_int)
+        # at this point we've only made the request to the first object
+        self.assertEqual([
+            ('HEAD', '/a'),
+            ('HEAD', '/a/c'),
+            ('GET', '/a/c/o'),
+        ], request_log)
+        return resp, request_log
+
+    def test_node_read_timeout_retry_three_time_out(self):
         with save_globals():
-            self.app.account_ring.get_nodes('account')
-            for dev in self.app.account_ring.devs:
-                dev['ip'] = '127.0.0.1'
-                dev['port'] = 1
-            self.app.container_ring.get_nodes('account')
-            for dev in self.app.container_ring.devs:
-                dev['ip'] = '127.0.0.1'
-                dev['port'] = 1
-            object_ring = self.app.get_object_ring(None)
-            object_ring.get_nodes('account')
-            for dev in object_ring.devs:
-                dev['ip'] = '127.0.0.1'
-                dev['port'] = 1
-            req = Request.blank('/v1/a/c/o', environ={'REQUEST_METHOD': 'GET'})
-            self.app.update_request(req)
-            set_http_connect(200, 200, 200, slow=0.1)
-            req.sent_size = 0
-            resp = req.get_response(self.app)
-            got_exc = False
-            try:
-                resp.body
-            except ChunkReadTimeout:
-                got_exc = True
-            self.assertFalse(got_exc)
-            self.app.recoverable_node_timeout = 0.1
-            set_http_connect(200, 200, 200, slow=1.0)
-            resp = req.get_response(self.app)
+            # all obj nodes time out
+            resp, req_log = self._do_test_node_read_timeout(
+                slow=[0.0, 0.0, 1.0, 1.0, 1.0])
             with self.assertRaises(ChunkReadTimeout):
                 resp.body
+            # two nodes left to attempt resume, both timeout reading body
+            self.assertEqual([
+                ('GET', '/a/c/o'),
+                ('GET', '/a/c/o'),
+            ], req_log[3:])
+            error_lines = self.logger.get_lines_for_level('error')
+            self.assertEqual(3, len(error_lines))
+            for line in error_lines[:3]:
+                self.assertIn('Trying to read object during GET', line)
 
-    def test_node_read_timeout_retry(self):
+    def test_node_read_timeout_retry_only_first_time_out(self):
         with save_globals():
-            self.app.account_ring.get_nodes('account')
-            for dev in self.app.account_ring.devs:
-                dev['ip'] = '127.0.0.1'
-                dev['port'] = 1
-            self.app.container_ring.get_nodes('account')
-            for dev in self.app.container_ring.devs:
-                dev['ip'] = '127.0.0.1'
-                dev['port'] = 1
-            object_ring = self.app.get_object_ring(None)
-            object_ring.get_nodes('account')
-            for dev in object_ring.devs:
-                dev['ip'] = '127.0.0.1'
-                dev['port'] = 1
-            req = Request.blank('/v1/a/c/o', environ={
-                'REQUEST_METHOD': 'GET', 'swift.cache': FakeMemcache()})
-            self.app.update_request(req)
+            resp, req_log = self._do_test_node_read_timeout(
+                slow=[0, 0, 1.0])
+            self.assertEqual([], req_log[3:])  # sanity
+            # we get the body
+            self.assertEqual(resp.body, b'lalala')
+            # only one retry
+            self.assertEqual([
+                ('GET', '/a/c/o'),
+            ], req_log[3:])
+            error_lines = self.logger.get_lines_for_level('error')
+            self.assertEqual(1, len(error_lines))
+            self.assertIn('Trying to read object during GET', error_lines[0])
 
-            self.app.recoverable_node_timeout = 0.1
-            set_http_connect(200, 200, 200, slow=[1.0, 1.0, 1.0])
-            resp = req.get_response(self.app)
-            with self.assertRaises(ChunkReadTimeout):
-                resp.body
-
-            set_http_connect(200, 200, 200, body=b'lalala',
-                             slow=[1.0, 1.0])
-            resp = req.get_response(self.app)
-            self.assertEqual(resp.body, b'lalala')
-
-            set_http_connect(200, 200, 200, body=b'lalala',
-                             slow=[1.0, 1.0], etags=['a', 'a', 'a'])
-            resp = req.get_response(self.app)
-            self.assertEqual(resp.body, b'lalala')
-
-            set_http_connect(200, 200, 200, body=b'lalala',
-                             slow=[1.0, 1.0], etags=['a', 'b', 'a'])
-            resp = req.get_response(self.app)
-            self.assertEqual(resp.body, b'lalala')
-
-            set_http_connect(200, 200, 200, body=b'lalala',
-                             slow=[1.0, 1.0], etags=['a', 'b', 'b'])
-            resp = req.get_response(self.app)
+    def test_node_read_timeout_retry_two_time_out(self):
+        with save_globals():
+            resp, req_log = self._do_test_node_read_timeout(
+                slow=[0, 0, 1.0, 1.0])
+            self.assertEqual([], req_log[3:])  # sanity
+            # first 2 obj nodes time out
+            self.assertEqual(resp.body, b'lalala')
+            # ... so two retries
+            self.assertEqual([
+                ('GET', '/a/c/o'),
+                ('GET', '/a/c/o'),
+            ], req_log[3:])
+            error_lines = self.logger.get_lines_for_level('error')
+            self.assertEqual(2, len(error_lines))
+            for line in error_lines[:2]:
+                self.assertIn('Trying to read object during GET', line)
+
+    def test_node_read_timeout_retry_first_two_time_out_etags_match(self):
+        with save_globals():
+            resp, req_log = self._do_test_node_read_timeout(
+                slow=[0, 0, 1.0, 1.0],
+                etags=['account', 'container', 'a', 'a', 'a'])
+            self.assertEqual([], req_log[3:])  # sanity
+            # we get the body
+            self.assertEqual(resp.body, b'lalala')
+            # this is the same as above test, but explicit etag match
+            self.assertEqual([
+                ('GET', '/a/c/o'),
+                ('GET', '/a/c/o'),
+            ], req_log[3:])
+            error_lines = self.logger.get_lines_for_level('error')
+            self.assertEqual(2, len(error_lines))
+            for line in error_lines[:2]:
+                self.assertIn('Trying to read object during GET', line)
+
+    def test_node_read_timeout_retry_one_time_out_one_etag_mismatch(self):
+        with save_globals():
+            resp, req_log = self._do_test_node_read_timeout(
+                slow=[0, 0, 1.0],
+                etags=['account', 'container', 'a', 'b', 'a'])
+            self.assertEqual([], req_log[3:])  # sanity
+            self.assertEqual(resp.body, b'lalala')
+            # N.B. even if you break the proxy to return second response, this
+            # header was sent with the original 200 ok
+            self.assertEqual(resp.etag, 'a')
+            # but we can know we got the right response because we had to retry
+            # twice because of etag
+            self.assertEqual([
+                ('GET', '/a/c/o'),
+                ('GET', '/a/c/o'),
+            ], req_log[3:])
+            error_lines = self.logger.get_lines_for_level('error')
+            # only one timeout error for the first source because the second
+            # source with wrong etag is not read
+            self.assertEqual(1, len(error_lines))
+            self.assertIn('Trying to read object during GET', error_lines[0])
+
+    def test_node_read_timeout_retry_one_time_out_two_etag_mismatch(self):
+        with save_globals():
+            resp, req_log = self._do_test_node_read_timeout(
+                slow=[0, 0, 1.0],
+                etags=['account', 'container', 'a', 'b', 'b'])
             with self.assertRaises(ChunkReadTimeout):
                 resp.body
+            self.assertEqual([
+                ('GET', '/a/c/o'),
+                ('GET', '/a/c/o'),
+            ], req_log[3:])
+            error_lines = self.logger.get_lines_for_level('error')
+            # only one timeout error for the first source because the second
+            # and third sources with wrong etag are not read
+            self.assertEqual(1, len(error_lines))
+            self.assertIn('Trying to read object during GET', error_lines[0])
 
     def test_node_write_timeout(self):
         with save_globals():
-            self.app.account_ring.get_nodes('account')
-            for dev in self.app.account_ring.devs:
-                dev['ip'] = '127.0.0.1'
-                dev['port'] = 1
-            self.app.container_ring.get_nodes('account')
-            for dev in self.app.container_ring.devs:
-                dev['ip'] = '127.0.0.1'
-                dev['port'] = 1
-            object_ring = self.app.get_object_ring(None)
-            object_ring.get_nodes('account')
-            for dev in object_ring.devs:
-                dev['ip'] = '127.0.0.1'
-                dev['port'] = 1
             req = Request.blank('/v1/a/c/o',
                                 environ={'REQUEST_METHOD': 'PUT'},
                                 headers={'Content-Length': '4',
@@ -11131,18 +11180,18 @@ class TestContainerController(unittest.TestCase):
             self.assertEqual(timestamp, timestamps[0])
             self.assertTrue(re.match(r'[0-9]{10}\.[0-9]{5}', timestamp))
 
-    def test_node_read_timeout_retry_to_container(self):
+    def test_node_read_timeout_no_retry_to_container(self):
         with save_globals():
             req = Request.blank('/v1/a/c', environ={'REQUEST_METHOD': 'GET'})
             self.app.node_timeout = 0.1
-            set_http_connect(200, 200, 200, body='abcdef', slow=[1.0, 1.0])
+            # account HEAD, container GET
+            set_http_connect(200, 200, body='abcdef', slow=[0.0, 1.0])
             resp = req.get_response(self.app)
-            got_exc = False
-            try:
+            self.assertEqual(200, resp.status_int)
+            with self.assertRaises(ChunkReadTimeout):
                 resp.body
-            except ChunkReadTimeout:
-                got_exc = True
-            self.assertTrue(got_exc)
+            error_lines = self.app.logger.get_lines_for_level('error')
+            self.assertEqual(0, len(error_lines))
 
 
 @patch_policies([StoragePolicy(0, 'zero', True, object_ring=FakeRing())])