From a8b4bc6ff3eddbc1c38fdaa1e2bc5c061b256069 Mon Sep 17 00:00:00 2001 From: "James E. Blair" Date: Mon, 24 Jan 2022 13:45:25 -0800 Subject: [PATCH] Add upload-logs-ibm role This role uploads logs to IBM Cloud object storage. Change-Id: Ibe1131f863a64051b427fcb03b126b1577c4843a --- doc/source/log-roles.rst | 1 + .../library/test_zuul_ibm_upload.py | 68 ++++ .../library/zuul_ibm_upload.py | 368 ++++++++++++++++++ roles/upload-logs-ibm/README.rst | 91 +++++ roles/upload-logs-ibm/defaults/main.yaml | 3 + roles/upload-logs-ibm/meta/main.yaml | 2 + roles/upload-logs-ibm/tasks/main.yaml | 42 ++ test-requirements.txt | 3 + 8 files changed, 578 insertions(+) create mode 100644 roles/upload-logs-base/library/test_zuul_ibm_upload.py create mode 100755 roles/upload-logs-base/library/zuul_ibm_upload.py create mode 100644 roles/upload-logs-ibm/README.rst create mode 100644 roles/upload-logs-ibm/defaults/main.yaml create mode 100644 roles/upload-logs-ibm/meta/main.yaml create mode 100644 roles/upload-logs-ibm/tasks/main.yaml diff --git a/doc/source/log-roles.rst b/doc/source/log-roles.rst index de7782b22..986e22405 100644 --- a/doc/source/log-roles.rst +++ b/doc/source/log-roles.rst @@ -15,5 +15,6 @@ Log Roles .. zuul:autorole:: upload-logs .. zuul:autorole:: upload-logs-azure .. zuul:autorole:: upload-logs-gcs +.. zuul:autorole:: upload-logs-ibm .. zuul:autorole:: upload-logs-s3 .. zuul:autorole:: upload-logs-swift diff --git a/roles/upload-logs-base/library/test_zuul_ibm_upload.py b/roles/upload-logs-base/library/test_zuul_ibm_upload.py new file mode 100644 index 000000000..24821eb22 --- /dev/null +++ b/roles/upload-logs-base/library/test_zuul_ibm_upload.py @@ -0,0 +1,68 @@ +# Copyright (C) 2018-2019 Red Hat, Inc. +# Copyright (C) 2021-2022 Acme Gating, LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# +# See the License for the specific language governing permissions and +# limitations under the License. + +# Make coding more python3-ish +from __future__ import (absolute_import, division, print_function) +__metaclass__ = type + +import os +import testtools +try: + from unittest import mock +except ImportError: + import mock + +from .zuul_ibm_upload import Uploader +from ..module_utils.zuul_jobs.upload_utils import FileDetail + + +FIXTURE_DIR = os.path.join(os.path.dirname(__file__), + 'test-fixtures') + + +class TestUpload(testtools.TestCase): + + def test_upload_result(self): + client = mock.Mock() + uploader = Uploader(client=client, bucket="bucket", + endpoint_url='http://example.com') + + # Get some test files to upload + files = [ + FileDetail( + os.path.join(FIXTURE_DIR, "logs/job-output.json"), + "job-output.json", + ), + FileDetail( + os.path.join(FIXTURE_DIR, "logs/zuul-info/inventory.yaml"), + "inventory.yaml", + ), + ] + + uploader.upload(files) + client.put_bucket_cors.assert_called_with( + Bucket='bucket', + CORSConfiguration={ + 'CORSRules': [{ + 'AllowedMethods': ['GET', 'HEAD'], + 'AllowedOrigins': ['*']}] + }) + + upload_calls = uploader.client.upload_fileobj.mock_calls + upload_call_filenames = [x[1][2] for x in upload_calls] + self.assertIn('job-output.json', upload_call_filenames) + self.assertIn('inventory.yaml', upload_call_filenames) diff --git a/roles/upload-logs-base/library/zuul_ibm_upload.py b/roles/upload-logs-base/library/zuul_ibm_upload.py new file mode 100755 index 000000000..07215fd30 --- /dev/null +++ b/roles/upload-logs-base/library/zuul_ibm_upload.py @@ -0,0 +1,368 @@ +#!/usr/bin/env python3 +# +# Copyright 2014 Rackspace Australia +# Copyright 2018-2019 Red Hat, Inc +# Copyright 2021-2022 Acme Gating, LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +# Make coding more python3-ish +from __future__ import (absolute_import, division, print_function) +__metaclass__ = type + + +""" +Utility to upload files to IBM Cloud + +Run this from the CLI from the zuul-jobs/roles directory with: + + python -m upload-logs-base.library.zuul_ibm_upload +""" + +import argparse +import logging +import os +try: + import queue as queuelib +except ImportError: + import Queue as queuelib +import sys +import threading + +from ibm_botocore.client import Config +import ibm_boto3 +import ibm_boto3.s3.transfer + +from ansible.module_utils.basic import AnsibleModule + +try: + # Ansible context + from ansible.module_utils.zuul_jobs.upload_utils import ( + FileList, + GZIPCompressedStream, + Indexer, + retry_function, + ) +except ImportError: + # Test context + from ..module_utils.zuul_jobs.upload_utils import ( + FileList, + GZIPCompressedStream, + Indexer, + retry_function, + ) + +MAX_UPLOAD_THREADS = 24 + + +class Uploader(): + def __init__(self, client, bucket, prefix=None, public=True, + dry_run=False, endpoint_url=None, + bucket_location=None): + self.dry_run = dry_run + self.public = public + if dry_run: + self.url = 'https://example.com/a/path/' + return + + self.client = client + self.prefix = prefix or '' + self.bucket = bucket + + self.url = os.path.join(endpoint_url, + bucket, self.prefix) + + try: + self._set_cors(bucket) + except self.client.exceptions.NoSuchBucket: + if not bucket_location: + raise Exception("Bucket location must be specified") + if public: + acl = 'public-read' + else: + acl = 'private' + self.client.create_bucket( + ACL=acl, + Bucket=bucket, + CreateBucketConfiguration={ + 'LocationConstraint': bucket_location + } + ) + self._set_cors(bucket) + + def _set_cors(self, bucket): + self.client.put_bucket_cors( + Bucket=bucket, + CORSConfiguration={ + 'CORSRules': [{ + 'AllowedMethods': [ + 'GET', + 'HEAD', + ], + 'AllowedOrigins': [ + '*', + ], + }], + }, + ) + + def upload(self, file_list): + """Spin up thread pool to upload to storage""" + + if self.dry_run: + return + + num_threads = min(len(file_list), MAX_UPLOAD_THREADS) + threads = [] + queue = queuelib.Queue() + # add items to queue + for f in file_list: + queue.put(f) + + for x in range(num_threads): + t = threading.Thread(target=self.post_thread, args=(queue,)) + threads.append(t) + t.start() + + for t in threads: + t.join() + + def post_thread(self, queue): + while True: + try: + file_detail = queue.get_nowait() + logging.debug("%s: processing job %s", + threading.current_thread(), + file_detail) + retry_function(lambda: self._post_file(file_detail)) + except IOError: + # Do our best to attempt to upload all the files + logging.exception("Error opening file") + continue + except queuelib.Empty: + # No more work to do + return + + @staticmethod + def _is_text_type(mimetype): + # We want to compress all text types. + if mimetype.startswith('text/'): + return True + + # Further compress types that typically contain text but are no + # text sub type. + compress_types = [ + 'application/json', + 'image/svg+xml', + ] + if mimetype in compress_types: + return True + return False + + def _post_file(self, file_detail): + relative_path = os.path.join(self.prefix, file_detail.relative_path) + content_encoding = None + + if file_detail.folder: + # We don't need to upload folders to IBM + return + + if (file_detail.encoding is None and + self._is_text_type(file_detail.mimetype)): + content_encoding = 'gzip' + data = GZIPCompressedStream(open(file_detail.full_path, 'rb')) + else: + if (not file_detail.filename.endswith(".gz") and + file_detail.encoding): + # Don't apply gzip encoding to files that we receive as + # already gzipped. The reason for this is storage will + # serve this back to users as an uncompressed file if they + # don't set an accept-encoding that includes gzip. This + # can cause problems when the desired file state is + # compressed as with .tar.gz tarballs. + content_encoding = file_detail.encoding + data = open(file_detail.full_path, 'rb') + + extra_args = dict( + ContentType=file_detail.mimetype, + ) + if content_encoding: + extra_args['ContentEncoding'] = content_encoding + + if self.public: + extra_args['ACL'] = 'public-read' + + self.client.upload_fileobj( + data, + self.bucket, + relative_path, + ExtraArgs=extra_args + ) + + +def run(bucket, files, + indexes=True, parent_links=True, topdir_parent_link=False, + partition=False, footer='index_footer.html', + prefix=None, public=True, dry_run=False, api_key=None, + instance_id=None, endpoint_url=None, bucket_location=None): + + client = ibm_boto3.client( + "s3", + ibm_api_key_id=api_key, + ibm_service_instance_id=instance_id, + config=Config(signature_version="oauth"), + endpoint_url=endpoint_url, + ) + + if prefix: + prefix = prefix.lstrip('/') + if partition and prefix: + parts = prefix.split('/') + if len(parts) > 1: + bucket += '_' + parts[0] + prefix = '/'.join(parts[1:]) + + # Create the objects to make sure the arguments are sound. + with FileList() as file_list: + # Scan the files. + for file_path in files: + file_list.add(file_path) + + indexer = Indexer(file_list) + + # (Possibly) make indexes. + if indexes: + indexer.make_indexes(create_parent_links=parent_links, + create_topdir_parent_link=topdir_parent_link, + append_footer=footer) + + logging.debug("List of files prepared to upload:") + for x in file_list: + logging.debug(x) + + # Upload. + uploader = Uploader(client, bucket, prefix, public, dry_run, + endpoint_url, bucket_location) + uploader.upload(file_list) + return uploader.url + + +def ansible_main(): + module = AnsibleModule( + argument_spec=dict( + bucket=dict(required=True, type='str'), + files=dict(required=True, type='list'), + partition=dict(type='bool', default=False), + indexes=dict(type='bool', default=True), + parent_links=dict(type='bool', default=True), + topdir_parent_link=dict(type='bool', default=False), + public=dict(type='bool', default=True), + footer=dict(type='str'), + prefix=dict(type='str'), + api_key=dict(type='str'), + instance_id=dict(type='str'), + endpoint_url=dict(type='str'), + bucket_location=dict(type='str'), + ) + ) + + p = module.params + url = run(p.get('bucket'), p.get('files'), + indexes=p.get('indexes'), + parent_links=p.get('parent_links'), + topdir_parent_link=p.get('topdir_parent_link'), + partition=p.get('partition'), + footer=p.get('footer'), + prefix=p.get('prefix'), + public=p.get('public'), + api_key=p.get('api_key'), + instance_id=p.get('instance_id'), + endpoint_url=p.get('endpoint_url'), + bucket_location=p.get('bucket_location')) + module.exit_json(changed=True, + url=url) + + +def cli_main(): + parser = argparse.ArgumentParser( + description="Upload files to IBM Cloud Storage" + ) + parser.add_argument('--verbose', action='store_true', + help='show debug information') + parser.add_argument('--no-indexes', action='store_true', + help='do not generate any indexes at all') + parser.add_argument('--no-parent-links', action='store_true', + help='do not include links back to a parent dir') + parser.add_argument('--create-topdir-parent-link', action='store_true', + help='include a link in the root directory of the ' + 'files to the parent directory which may be the ' + 'index of all results') + parser.add_argument('--no-public', action='store_true', + help='do not create the bucket as public') + parser.add_argument('--partition', action='store_true', + help='partition the prefix into multiple buckets') + parser.add_argument('--append-footer', default='index_footer.html', + help='when generating an index, if the given file is ' + 'present in a directory, append it to the index ' + '(set to "none" to disable)') + parser.add_argument('--prefix', + help='Prepend this path to the object names when ' + 'uploading') + parser.add_argument('--dry-run', action='store_true', + help='do not attempt to create buckets or upload, ' + 'useful with --verbose for debugging') + parser.add_argument('--api-key', + help='An IBM Cloud API key') + parser.add_argument('--instance-id', + help='An IBM Cloud Object Storage instance ID') + parser.add_argument('--endpoint-url', + help='An IBM Cloud Object Storage endpoint URL') + parser.add_argument('--bucket-location', + help='The location constraint for the bucket') + parser.add_argument('bucket', + help='Name of the bucket to use when uploading') + parser.add_argument('files', nargs='+', + help='the file(s) to upload with recursive glob ' + 'matching when supplied as a string') + + args = parser.parse_args() + + if args.verbose: + logging.basicConfig(level=logging.DEBUG) + logging.captureWarnings(True) + + append_footer = args.append_footer + if append_footer.lower() == 'none': + append_footer = None + + url = run(args.bucket, args.files, + indexes=not args.no_indexes, + parent_links=not args.no_parent_links, + topdir_parent_link=args.create_topdir_parent_link, + partition=args.partition, + footer=append_footer, + prefix=args.prefix, + public=not args.no_public, + dry_run=args.dry_run, + api_key=args.api_key, + instance_id=args.instance_id, + endpoint_url=args.endpoint_url, + bucket_location=args.bucket_location) + print(url) + + +if __name__ == '__main__': + if sys.stdin.isatty(): + cli_main() + else: + ansible_main() diff --git a/roles/upload-logs-ibm/README.rst b/roles/upload-logs-ibm/README.rst new file mode 100644 index 000000000..508625e10 --- /dev/null +++ b/roles/upload-logs-ibm/README.rst @@ -0,0 +1,91 @@ +Upload logs to IBM Cloud Storage + +Before using this role, create a cloud object storage `service instance`_ +and a `service credential`_. + +You may create a bucket within the instance, or allow this role to +create the bucket (or buckets) for you. + +**Role Variables** + +.. zuul:rolevar:: zuul_site_upload_logs + :default: true + + Controls when logs are uploaded. true, the default, means always + upload logs. false means never upload logs. 'failure' means to only + upload logs when the job has failed. + + .. note:: Intended to be set by admins via site-variables. + +.. zuul:rolevar:: zuul_log_partition + :default: false + + If set to true, then the first component of the log path will be + removed from the object name and added to the bucket name, so + that logs for different changes are distributed across a large + number of buckets. + +.. zuul:rolevar:: zuul_log_bucket + + If partitioning is not enabled, this is the name of the bucket + which will be used. If partitioning is enabled, then this will be + used as the prefix for the bucket name which will be separated + from the partition name by an underscore. For example, "logs_42" + would be the bucket name for partition 42. + +.. zuul:rolevar:: zuul_log_bucket_public + :default: true + + If the bucket is created, this indicates whether it should be + created with global read ACLs. If the bucket already exists, it + will not be modified. + +.. zuul:rolevar:: zuul_log_bucket_location + + If the bucket is created, this `storage location`_ will be used as + the location constraint. + +.. zuul:rolevar:: zuul_log_path + :default: Generated by the role `set-zuul-log-path-fact` + + Prepend this path to the object names when uploading. + +.. zuul:rolevar:: zuul_log_create_indexes + :default: true + + Whether to create `index.html` files with directory indexes. + +.. zuul:rolevar:: zuul_log_path_shard_build + :default: false + + This var is consumed by set-zuul-log-path-fact which + upload-logs-ibm calls into. If you set this you will get log + paths prefixed with the first three characters of the build + uuid. This will improve log file sharding. + + More details can be found at + :zuul:rolevar:`set-zuul-log-path-fact.zuul_log_path_shard_build`. + +.. zuul:rolevar:: zuul_log_api_key + + The API key that was created as part of the `service credential`_. + This is required. + +.. zuul:rolevar:: zuul_log_instance_id + + The instance id that appears in the `service credential`_. + This is required. + +.. zuul:rolevar:: zuul_log_endpoint_url + + The cloud storage `endpoint`_. + This is required. + + +.. _service instance: https://cloud.ibm.com/docs/cloud-object-storage/iam/service-credentials.html + +.. _service credential: https://cloud.ibm.com/docs/cloud-object-storage/iam?topic=cloud-object-storage-service-credentials + +.. _endpoint: https://cloud.ibm.com/docs/cloud-object-storage?topic=cloud-object-storage-endpoints#endpoints + +.. _storage location: https://cloud.ibm.com/docs/cloud-object-storage?topic=cloud-object-storage-classes#classes diff --git a/roles/upload-logs-ibm/defaults/main.yaml b/roles/upload-logs-ibm/defaults/main.yaml new file mode 100644 index 000000000..35ca84b6f --- /dev/null +++ b/roles/upload-logs-ibm/defaults/main.yaml @@ -0,0 +1,3 @@ +zuul_log_partition: false +zuul_log_bucket_public: true +zuul_log_create_indexes: true diff --git a/roles/upload-logs-ibm/meta/main.yaml b/roles/upload-logs-ibm/meta/main.yaml new file mode 100644 index 000000000..3a4055855 --- /dev/null +++ b/roles/upload-logs-ibm/meta/main.yaml @@ -0,0 +1,2 @@ +dependencies: + - role: upload-logs-base diff --git a/roles/upload-logs-ibm/tasks/main.yaml b/roles/upload-logs-ibm/tasks/main.yaml new file mode 100644 index 000000000..3b1b1ed20 --- /dev/null +++ b/roles/upload-logs-ibm/tasks/main.yaml @@ -0,0 +1,42 @@ +- name: Set zuul-log-path fact + include_role: + name: set-zuul-log-path-fact + when: zuul_log_path is not defined + +# Always upload (true), never upload (false) or only on failure ('failure') +- when: zuul_site_upload_logs | default(true) | bool or + (zuul_site_upload_logs == 'failure' and not zuul_success | bool) + block: + # Use chmod instead of file because ansible 2.5 file with recurse and + # follow can't really handle symlinks to . + - name: Ensure logs are readable before uploading + delegate_to: localhost + command: "chmod -R u=rwX,g=rX,o=rX {{ zuul.executor.log_root }}/" + # ANSIBLE0007 chmod used in place of argument mode to file + tags: + - skip_ansible_lint + + - name: Upload logs to IBM Cloud + delegate_to: localhost + no_log: true + zuul_ibm_upload: + partition: "{{ zuul_log_partition }}" + bucket: "{{ zuul_log_bucket }}" + bucket_location: "{{ zuul_log_bucket_location }}" + public: "{{ zuul_log_bucket_public }}" + prefix: "{{ zuul_log_path }}" + indexes: "{{ zuul_log_create_indexes }}" + api_key: "{{ zuul_log_api_key }}" + instance_id: "{{ zuul_log_instance_id }}" + endpoint_url: "{{ zuul_log_endpoint_url }}" + files: + - "{{ zuul.executor.log_root }}/" + register: upload_results + +- name: Return log URL to Zuul + delegate_to: localhost + zuul_return: + data: + zuul: + log_url: "{{ upload_results.url }}/" + when: upload_results is defined diff --git a/test-requirements.txt b/test-requirements.txt index 6b7cbda8c..50bcaa58d 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -42,5 +42,8 @@ boto3 # For upload-logs-azure azure-storage-blob +# For upload-logs-ibm +ibm-cos-sdk + # unittest.mock compatibility package for Python < 3.3 mock;python_version<'3.3'