diff --git a/doc/source/misc.rst b/doc/source/misc.rst index 64549570bd..9640c36754 100644 --- a/doc/source/misc.rst +++ b/doc/source/misc.rst @@ -202,3 +202,11 @@ Static Large Objects .. automodule:: swift.common.middleware.slo :members: :show-inheritance: + + +List Endpoints +============== + +.. automodule:: swift.common.middleware.list_endpoints + :members: + :show-inheritance: diff --git a/etc/proxy-server.conf-sample b/etc/proxy-server.conf-sample index 86d58c48a3..7deddad0b4 100644 --- a/etc/proxy-server.conf-sample +++ b/etc/proxy-server.conf-sample @@ -316,6 +316,10 @@ use = egg:swift#name_check # maximum_length = 255 # forbidden_regexp = /\./|/\.\./|/\.$|/\.\.$ +[filter:list-endpoints] +use = egg:swift#list_endpoints +# list_endpoints_path = /endpoints/ + [filter:proxy-logging] use = egg:swift#proxy_logging # If not set, logging directives from [DEFAULT] without "access_" will be used diff --git a/setup.py b/setup.py index cbe2e5cd39..6b4230a18d 100644 --- a/setup.py +++ b/setup.py @@ -107,6 +107,8 @@ setup( 'proxy_logging=swift.common.middleware.proxy_logging:' 'filter_factory', 'slo=swift.common.middleware.slo:filter_factory', + 'list_endpoints=swift.common.middleware.list_endpoints:' + 'filter_factory', ], }, ) diff --git a/swift/common/middleware/list_endpoints.py b/swift/common/middleware/list_endpoints.py new file mode 100644 index 0000000000..3fa24dcdf5 --- /dev/null +++ b/swift/common/middleware/list_endpoints.py @@ -0,0 +1,153 @@ +# Copyright (c) 2012 OpenStack, LLC. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +List endpoints for an object, account or container. + +This middleware makes it possible to integrate swift with software +that relies on data locality information to avoid network overhead, +such as Hadoop. + +Answers requests of the form:: + + /endpoints/{account}/{container}/{object} + /endpoints/{account}/{container} + /endpoints/{account} + +with a JSON-encoded list of endpoints of the form:: + + http://{server}:{port}/{dev}/{part}/{acc}/{cont}/{obj} + http://{server}:{port}/{dev}/{part}/{acc}/{cont} + http://{server}:{port}/{dev}/{part}/{acc} + +correspondingly, e.g.:: + + http://10.1.1.1:6000/sda1/2/a/c2/o1 + http://10.1.1.1:6000/sda1/2/a/c2 + http://10.1.1.1:6000/sda1/2/a + +The '/endpoints/' path is customizable ('list_endpoints_path' +configuration parameter). + +Intended for consumption by third-party services living inside the +cluster (as the endpoints make sense only inside the cluster behind +the firewall); potentially written in a different language. + +This is why it's provided as a REST API and not just a Python API: +to avoid requiring clients to write their own ring parsers in their +languages, and to avoid the necessity to distribute the ring file +to clients and keep it up-to-date. + +Note that the call is not authenticated, which means that a proxy +with this middleware enabled should not be open to an untrusted +environment (everyone can query the locality data using this middleware). +""" + +from urllib import quote, unquote + +from swift.common.ring import Ring +from swift.common.utils import json, get_logger, split_path +from swift.common.swob import Request, Response +from swift.common.swob import HTTPBadRequest, HTTPMethodNotAllowed + + +class ListEndpointsMiddleware(object): + """ + List endpoints for an object, account or container. + + See above for a full description. + + Uses configuration parameter `swift_dir` (default `/etc/swift`). + + :param app: The next WSGI filter or app in the paste.deploy + chain. + :param conf: The configuration dict for the middleware. + """ + + def __init__(self, app, conf): + self.app = app + self.logger = get_logger(conf, log_route='endpoints') + swift_dir = conf.get('swift_dir', '/etc/swift') + self.account_ring = Ring(swift_dir, ring_name='account') + self.container_ring = Ring(swift_dir, ring_name='container') + self.object_ring = Ring(swift_dir, ring_name='object') + self.endpoints_path = conf.get('list_endpoints_path', '/endpoints/') + if not self.endpoints_path.endswith('/'): + self.endpoints_path += '/' + + def __call__(self, env, start_response): + request = Request(env) + + if not request.path.startswith(self.endpoints_path): + return self.app(env, start_response) + + if request.method != 'GET': + return HTTPMethodNotAllowed( + req=request, headers={"Allow": "GET"})(env, start_response) + + try: + clean_path = request.path[len(self.endpoints_path) - 1:] + account, container, obj = \ + split_path(clean_path, 1, 3, True) + except ValueError: + return HTTPBadRequest('No account specified')(env, start_response) + + if account is not None: + account = unquote(account) + if container is not None: + container = unquote(container) + if obj is not None: + obj = unquote(obj) + + if obj is not None: + partition, nodes = self.object_ring.get_nodes( + account, container, obj) + endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \ + '{account}/{container}/{obj}' + elif container is not None: + partition, nodes = self.container_ring.get_nodes( + account, container) + endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \ + '{account}/{container}' + else: + partition, nodes = self.account_ring.get_nodes( + account) + endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \ + '{account}' + + endpoints = [] + for node in nodes: + endpoint = endpoint_template.format( + ip=node['ip'], + port=node['port'], + device=node['device'], + partition=partition, + account=quote(account), + container=quote(container or ''), + obj=quote(obj or '')) + endpoints.append(endpoint) + + return Response(json.dumps(endpoints), + content_type='application/json')(env, start_response) + + +def filter_factory(global_conf, **local_conf): + conf = global_conf.copy() + conf.update(local_conf) + + def list_endpoints_filter(app): + return ListEndpointsMiddleware(app, conf) + + return list_endpoints_filter diff --git a/test/unit/common/middleware/test_list_endpoints.py b/test/unit/common/middleware/test_list_endpoints.py new file mode 100644 index 0000000000..642c01f9d1 --- /dev/null +++ b/test/unit/common/middleware/test_list_endpoints.py @@ -0,0 +1,200 @@ +# Copyright (c) 2012 OpenStack, LLC. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import array +import unittest +from shutil import rmtree + +import os +from swift.common import ring, utils +from swift.common.utils import json +from swift.common.swob import Request, Response +from swift.common.middleware import list_endpoints + + +class FakeApp(object): + def __call__(self, env, start_response): + return Response(body="FakeApp")(env, start_response) + + +def start_response(*args): + pass + + +class TestListEndpoints(unittest.TestCase): + def setUp(self): + utils.HASH_PATH_SUFFIX = 'endcap' + self.testdir = os.path.join(os.path.dirname(__file__), 'ring') + rmtree(self.testdir, ignore_errors=1) + os.mkdir(self.testdir) + + accountgz = os.path.join(self.testdir, 'account.ring.gz') + containergz = os.path.join(self.testdir, 'container.ring.gz') + objectgz = os.path.join(self.testdir, 'object.ring.gz') + + # Let's make the rings slightly different so we can test + # that the correct ring is consulted (e.g. we don't consult + # the object ring to get nodes for a container) + intended_replica2part2dev_id_a = [ + array.array('H', [3, 1, 3, 1]), + array.array('H', [0, 3, 1, 4]), + array.array('H', [1, 4, 0, 3])] + intended_replica2part2dev_id_c = [ + array.array('H', [4, 3, 0, 1]), + array.array('H', [0, 1, 3, 4]), + array.array('H', [3, 4, 0, 1])] + intended_replica2part2dev_id_o = [ + array.array('H', [0, 1, 0, 1]), + array.array('H', [0, 1, 0, 1]), + array.array('H', [3, 4, 3, 4])] + intended_devs = [{'id': 0, 'zone': 0, 'weight': 1.0, + 'ip': '10.1.1.1', 'port': 6000, + 'device': 'sda1'}, + {'id': 1, 'zone': 0, 'weight': 1.0, + 'ip': '10.1.1.1', 'port': 6000, + 'device': 'sdb1'}, + None, + {'id': 3, 'zone': 2, 'weight': 1.0, + 'ip': '10.1.2.1', 'port': 6000, + 'device': 'sdc1'}, + {'id': 4, 'zone': 2, 'weight': 1.0, + 'ip': '10.1.2.2', 'port': 6000, + 'device': 'sdd1'}] + intended_part_shift = 30 + intended_reload_time = 15 + ring.RingData(intended_replica2part2dev_id_a, + intended_devs, intended_part_shift).save(accountgz) + ring.RingData(intended_replica2part2dev_id_c, + intended_devs, intended_part_shift).save(containergz) + ring.RingData(intended_replica2part2dev_id_o, + intended_devs, intended_part_shift).save(objectgz) + + self.app = FakeApp() + self.list_endpoints = list_endpoints.filter_factory( + {'swift_dir': self.testdir})(self.app) + + def tearDown(self): + rmtree(self.testdir, ignore_errors=1) + + def test_get_endpoint(self): + # Expected results for objects taken from test_ring + # Expected results for others computed by manually invoking + # ring.get_nodes(). + resp = Request.blank('/endpoints/a/c/o1').get_response( + self.list_endpoints) + self.assertEquals(resp.status_int, 200) + self.assertEquals(resp.content_type, 'application/json') + self.assertEquals(json.loads(resp.body), [ + "http://10.1.1.1:6000/sdb1/1/a/c/o1", + "http://10.1.2.2:6000/sdd1/1/a/c/o1" + ]) + + # Here, 'o1/' is the object name. + resp = Request.blank('/endpoints/a/c/o1/').get_response( + self.list_endpoints) + self.assertEquals(resp.status_int, 200) + self.assertEquals(json.loads(resp.body), [ + "http://10.1.1.1:6000/sdb1/3/a/c/o1/", + "http://10.1.2.2:6000/sdd1/3/a/c/o1/" + ]) + + resp = Request.blank('/endpoints/a/c2').get_response( + self.list_endpoints) + self.assertEquals(resp.status_int, 200) + self.assertEquals(json.loads(resp.body), [ + "http://10.1.1.1:6000/sda1/2/a/c2", + "http://10.1.2.1:6000/sdc1/2/a/c2" + ]) + + resp = Request.blank('/endpoints/a1').get_response( + self.list_endpoints) + self.assertEquals(resp.status_int, 200) + self.assertEquals(json.loads(resp.body), [ + "http://10.1.2.1:6000/sdc1/0/a1", + "http://10.1.1.1:6000/sda1/0/a1", + "http://10.1.1.1:6000/sdb1/0/a1" + ]) + + resp = Request.blank('/endpoints/').get_response( + self.list_endpoints) + self.assertEquals(resp.status_int, 400) + + resp = Request.blank('/endpoints/a/c 2').get_response( + self.list_endpoints) + self.assertEquals(resp.status_int, 200) + self.assertEquals(json.loads(resp.body), [ + "http://10.1.1.1:6000/sdb1/3/a/c%202", + "http://10.1.2.2:6000/sdd1/3/a/c%202" + ]) + + resp = Request.blank('/endpoints/a/c%202').get_response( + self.list_endpoints) + self.assertEquals(resp.status_int, 200) + self.assertEquals(json.loads(resp.body), [ + "http://10.1.1.1:6000/sdb1/3/a/c%202", + "http://10.1.2.2:6000/sdd1/3/a/c%202" + ]) + + resp = Request.blank('/endpoints/ac%20count/con%20tainer/ob%20ject') \ + .get_response(self.list_endpoints) + self.assertEquals(resp.status_int, 200) + self.assertEquals(json.loads(resp.body), [ + "http://10.1.1.1:6000/sdb1/3/ac%20count/con%20tainer/ob%20ject", + "http://10.1.2.2:6000/sdd1/3/ac%20count/con%20tainer/ob%20ject" + ]) + + resp = Request.blank('/endpoints/a/c/o1', {'REQUEST_METHOD': 'POST'}) \ + .get_response(self.list_endpoints) + self.assertEquals(resp.status_int, 405) + self.assertEquals(resp.status, '405 Method Not Allowed') + self.assertEquals(resp.headers['allow'], 'GET') + + resp = Request.blank('/not-endpoints').get_response( + self.list_endpoints) + self.assertEquals(resp.status_int, 200) + self.assertEquals(resp.status, '200 OK') + self.assertEquals(resp.body, 'FakeApp') + + # test custom path with trailing slash + custom_path_le = list_endpoints.filter_factory({ + 'swift_dir': self.testdir, + 'list_endpoints_path': '/some/another/path/' + })(self.app) + resp = Request.blank('/some/another/path/a/c/o1') \ + .get_response(custom_path_le) + self.assertEquals(resp.status_int, 200) + self.assertEquals(resp.content_type, 'application/json') + self.assertEquals(json.loads(resp.body), [ + "http://10.1.1.1:6000/sdb1/1/a/c/o1", + "http://10.1.2.2:6000/sdd1/1/a/c/o1" + ]) + + # test ustom path without trailing slash + custom_path_le = list_endpoints.filter_factory({ + 'swift_dir': self.testdir, + 'list_endpoints_path': '/some/another/path' + })(self.app) + resp = Request.blank('/some/another/path/a/c/o1') \ + .get_response(custom_path_le) + self.assertEquals(resp.status_int, 200) + self.assertEquals(resp.content_type, 'application/json') + self.assertEquals(json.loads(resp.body), [ + "http://10.1.1.1:6000/sdb1/1/a/c/o1", + "http://10.1.2.2:6000/sdd1/1/a/c/o1" + ]) + + +if __name__ == '__main__': + unittest.main()