Rewrite maintainers.py functionality
Base it on maintainer-only votes cast within the requisite timeframe rather than attempting to parse Gerrit ACLs and query groups. Change-Id: I982cb2e422f267b2834b4b20b11f1fd011516548
This commit is contained in:
@@ -1,6 +1,4 @@
|
|||||||
#!/usr/bin/env python
|
# Copyright OpenDev Contributors
|
||||||
|
|
||||||
# Copyright (c) 2015 OpenStack Foundation
|
|
||||||
#
|
#
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
# you may not use this file except in compliance with the License.
|
# you may not use this file except in compliance with the License.
|
||||||
@@ -8,154 +6,169 @@
|
|||||||
#
|
#
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
#
|
#
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
# Unless required by applicable law or agreed to in writing,
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
# software distributed under the License is distributed on an "AS
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
# IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
|
||||||
# implied.
|
# express or implied. See the License for the specific language
|
||||||
# See the License for the specific language governing permissions and
|
# governing permissions and limitations under the License.
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
# Description: When run using OpenStack's Gerrit server, this builds
|
import datetime
|
||||||
# JSON and YAML representations of repos with information on the
|
import os
|
||||||
# official owning project team if any, deliverable tags, and groups
|
|
||||||
# with approve rights listing the members of each along with their
|
|
||||||
# Gerrit preferred E-mail addresses and usernames when available.
|
|
||||||
|
|
||||||
# Rationale: It was done as a demonstration to a representative of a
|
|
||||||
# foundation member company who requested a list of the "core
|
|
||||||
# reviewers" for official projects, optionally broken down by
|
|
||||||
# integrated vs. other. I'm attempting to show that this data is
|
|
||||||
# already publicly available and can be extracted/analyzed by anyone
|
|
||||||
# without needing to request it.
|
|
||||||
|
|
||||||
# Use: This needs your Gerrit username passed as the command-line
|
|
||||||
# parameter, found at https://review.opendev.org/#/settings/ when
|
|
||||||
# authenticated in the WebUI. It also prompts for an HTTP password
|
|
||||||
# which https://review.opendev.org/#/settings/http-password will
|
|
||||||
# allow you to generate. The results end up in files named
|
|
||||||
# approvers.json and approvers.yaml. At the time of writing, it
|
|
||||||
# takes approximately 6.5 minutes to run on a well-connected machine
|
|
||||||
# with 70-80ms round-trip latency to review.opendev.org.
|
|
||||||
|
|
||||||
# Example:
|
|
||||||
#
|
|
||||||
# $ virtualenv approvers
|
|
||||||
# [...]
|
|
||||||
# $ ./approvers/bin/pip install pyyaml requests
|
|
||||||
# [...]
|
|
||||||
# $ ./approvers/bin/python tools/who-approves.py fungi
|
|
||||||
# Password:
|
|
||||||
# [wait for completion]
|
|
||||||
# $ ./approvers/bin/python
|
|
||||||
# >>> import yaml
|
|
||||||
# >>>
|
|
||||||
# >>> def get_approvers(repos):
|
|
||||||
# ... approvers = set()
|
|
||||||
# ... for repo in repos:
|
|
||||||
# ... for group in repos[repo]['approvers']:
|
|
||||||
# ... for approver in repos[repo]['approvers'][group]:
|
|
||||||
# ... approvers.add(approver)
|
|
||||||
# ... return(approvers)
|
|
||||||
# ...
|
|
||||||
# >>> p = yaml.safe_load(open('approvers.yaml'))
|
|
||||||
# >>> print('Total repos: %s' % len(p))
|
|
||||||
# Total repos: 751
|
|
||||||
# >>> print('Total approvers: %s' % len(get_approvers(p)))
|
|
||||||
# Total approvers: 849
|
|
||||||
# >>>
|
|
||||||
# >>> o = {k: v for k, v in p.iteritems() if 'team' in v}
|
|
||||||
# >>> print('Repos for official teams: %s' % len(o))
|
|
||||||
# Repos for official teams: 380
|
|
||||||
# >>> print('OpenStack repo approvers: %s' % len(get_approvers(o)))
|
|
||||||
# OpenStack repo approvers: 456
|
|
||||||
# >>>
|
|
||||||
# >>> i = {k: v for k, v in p.iteritems() if 'tags' in v
|
|
||||||
# ... and 'release:managed' in v['tags']}
|
|
||||||
# >>> print('Repos under release management: %s' % len(i))
|
|
||||||
# Repos under release management: 77
|
|
||||||
# >>> print('Managed release repo approvers: %s' % len(get_approvers(i)))
|
|
||||||
# Managed release repo approvers: 245
|
|
||||||
|
|
||||||
import getpass
|
|
||||||
import json
|
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
import requests
|
from engagement.stats import (
|
||||||
|
from_gerrit_time,
|
||||||
|
get_projects,
|
||||||
|
query_gerrit,
|
||||||
|
report_times,
|
||||||
|
to_gerrit_time,
|
||||||
|
)
|
||||||
|
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def usage_error():
|
||||||
gerrit_url = 'https://review.opendev.org/'
|
"""Write a generic usage message to stderr and exit nonzero"""
|
||||||
try:
|
|
||||||
gerrit_auth = requests.auth.HTTPDigestAuth(
|
sys.stderr.write(
|
||||||
sys.argv[1], getpass.getpass())
|
'ERROR: specify report period like YEAR, YEAR-H[1-2], YEAR-Q[1-4],\n'
|
||||||
except IndexError:
|
' YEAR-[01-12], or YYYY-MM-DD..YYYY-MM-DD for a date range\n'
|
||||||
sys.stderr.write("Usage: %s USERNAME\n" % sys.argv[0])
|
' (start date is inclusive, end date is exclusive)\n')
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
acl_path = (
|
|
||||||
'gitweb?p=%s.git;a=blob_plain;f=project.config;hb=refs/meta/config')
|
|
||||||
group_path = 'a/groups/%s/members/?recursive&pp=0'
|
def parse_report_period(when):
|
||||||
projects_file = ('gitweb?p=openstack/governance.git;a=blob_plain;'
|
"""Parse a supplied report period string, returning a tuple of
|
||||||
'f=reference/projects.yaml;hb=%s')
|
after and before datetime objects"""
|
||||||
ref_name = 'refs/heads/master'
|
|
||||||
aprv_pattern = r'label-Workflow = .*\.\.\+1 group (.*)'
|
daterange = re.compile(
|
||||||
projects = requests.get(gerrit_url + projects_file % ref_name)
|
r'^(\d{4})-(\d{2})-(\d{2})\.\.(\d{4})-(\d{2})-(\d{2})$')
|
||||||
projects.encoding = 'utf-8' # Workaround for Gitweb encoding
|
monthly = re.compile(r'^(\d{4})-(\d{2})$')
|
||||||
projects = yaml.safe_load(projects.text)
|
quarterly = re.compile(r'^(\d{4})-q([1-4])$', re.IGNORECASE)
|
||||||
repos_dump = json.loads(requests.get(
|
halfyearly = re.compile(r'^(\d{4})-h([1-4])$', re.IGNORECASE)
|
||||||
gerrit_url + 'projects/?pp=0').text[4:])
|
yearly = re.compile(r'^\d{4}$')
|
||||||
all_groups = json.loads(requests.get(gerrit_url + 'a/groups/',
|
# TODO: merge this functionality into engagement.stats.parse_report_period
|
||||||
auth=gerrit_auth).text[4:])
|
if daterange.match(when):
|
||||||
repos = {}
|
after = datetime.datetime(
|
||||||
aprv_groups = {}
|
int(daterange.match(when).group(1)),
|
||||||
for repo in repos_dump:
|
int(daterange.match(when).group(2)),
|
||||||
repos[repo.encode('utf-8')] = {'approvers': {}}
|
int(daterange.match(when).group(3)))
|
||||||
acl_ini = requests.get(gerrit_url + acl_path % repo).text
|
before = datetime.datetime(
|
||||||
for aprv_group in [str(x) for x in re.findall(aprv_pattern, acl_ini)]:
|
int(daterange.match(when).group(4)),
|
||||||
if aprv_group not in repos[repo]['approvers']:
|
int(daterange.match(when).group(5)),
|
||||||
repos[repo]['approvers'][aprv_group] = []
|
int(daterange.match(when).group(6)))
|
||||||
if aprv_group not in aprv_groups:
|
return after, before
|
||||||
aprv_groups[aprv_group] = []
|
if monthly.match(when):
|
||||||
for team in projects:
|
start_year = int(monthly.match(when).group(1))
|
||||||
if 'deliverables' in projects[team]:
|
start_month = int(monthly.match(when).group(2))
|
||||||
for deli in projects[team]['deliverables']:
|
end_year = start_year + start_month // 12
|
||||||
if 'repos' in projects[team]['deliverables'][deli]:
|
end_month = 1 + start_month % 12
|
||||||
drepos = projects[team]['deliverables'][deli]['repos']
|
elif quarterly.match(when):
|
||||||
for repo in drepos:
|
start_year = int(quarterly.match(when).group(1))
|
||||||
if repo in repos:
|
start_month = 1 + 3 * (int(quarterly.match(when).group(2)) - 1)
|
||||||
repos[repo]['team'] = team
|
end_year = start_year + (start_month + 2) // 12
|
||||||
if 'tags' in projects[team]['deliverables'][deli]:
|
end_month = 1 + (start_month + 2) % 12
|
||||||
repos[repo]['tags'] = projects[
|
elif halfyearly.match(when):
|
||||||
team]['deliverables'][deli]['tags']
|
start_year = int(halfyearly.match(when).group(1))
|
||||||
for aprv_group in aprv_groups.keys():
|
start_month = 1 + 6 * (int(halfyearly.match(when).group(2)) - 1)
|
||||||
# It's possible for built-in metagroups in recent Gerrit releases to
|
end_year = start_year + (start_month + 5) // 12
|
||||||
# appear in ACLs but not in the groups list
|
end_month = 1 + (start_month + 5) % 12
|
||||||
if aprv_group in all_groups:
|
elif yearly.match(when):
|
||||||
aprv_groups[aprv_group] = json.loads(requests.get(
|
start_year = int(yearly.match(when).group())
|
||||||
gerrit_url + group_path % all_groups[aprv_group]['id'],
|
start_month = 1
|
||||||
auth=gerrit_auth).text[4:])
|
end_year = start_year + 1
|
||||||
|
end_month = 1
|
||||||
else:
|
else:
|
||||||
sys.stderr.write('Ignoring nonexistent "%s" group.\n' % aprv_group)
|
usage_error()
|
||||||
for repo in repos:
|
after = datetime.datetime(start_year, start_month, 1)
|
||||||
for aprv_group in repos[repo]['approvers'].keys():
|
before = datetime.datetime(end_year, end_month, 1)
|
||||||
for approver in aprv_groups[aprv_group]:
|
return after, before
|
||||||
if 'name' in approver:
|
|
||||||
approver_details = '"%s"' % approver['name']
|
|
||||||
|
def parse_command_line():
|
||||||
|
"""Parse the command line to obtain the report period, then return it"""
|
||||||
|
|
||||||
|
if len(sys.argv) == 2:
|
||||||
|
return sys.argv[1]
|
||||||
else:
|
else:
|
||||||
approver_details = ''
|
usage_error()
|
||||||
if 'email' in approver:
|
|
||||||
if approver_details:
|
|
||||||
approver_details += ' '
|
def main(verbose=0):
|
||||||
approver_details += '<%s>' % approver['email']
|
"""Utility entry point"""
|
||||||
if 'username' in approver:
|
|
||||||
if approver_details:
|
argument = parse_command_line()
|
||||||
approver_details += ' '
|
after, before = parse_report_period(argument)
|
||||||
approver_details += '(%s)' % approver['username']
|
changes = dict()
|
||||||
repos[repo]['approvers'][aprv_group].append(
|
|
||||||
approver_details.encode('utf-8'))
|
# TODO: deduplicate this and the similar version in stats.main
|
||||||
approvers_yaml = open('approvers.yaml', 'w')
|
# Shard querying by project, to help with the inherent instability of
|
||||||
yaml.dump(repos, approvers_yaml, allow_unicode=True, encoding='utf-8',
|
# result pagination from the Gerrit API
|
||||||
default_flow_style=False)
|
for project in get_projects(verbose=verbose):
|
||||||
approvers_json = open('approvers.json', 'w')
|
if verbose >= 1:
|
||||||
json.dump(repos, approvers_json, indent=2)
|
print("Checking project: %s" % project)
|
||||||
|
offset = 0
|
||||||
|
# Loop due to unavoidable query result pagination
|
||||||
|
while offset >= 0:
|
||||||
|
# We only constrain the query by the after date, as changes created
|
||||||
|
# between the before and after date may have been updated more
|
||||||
|
# recently with a new revision or comment
|
||||||
|
new_changes = query_gerrit("changes/", params={
|
||||||
|
"q": "project:%s after:{%s}" % (
|
||||||
|
project, to_gerrit_time(after)),
|
||||||
|
"no-limit": "1",
|
||||||
|
"start": offset,
|
||||||
|
"o": ["DETAILED_ACCOUNTS", "DETAILED_LABELS", "SKIP_DIFFSTAT"],
|
||||||
|
}, verbose=verbose)
|
||||||
|
# Since we redundantly query ranges with offsets to help combat
|
||||||
|
# pagination instability, we must deduplicate results
|
||||||
|
for change in new_changes:
|
||||||
|
if change["id"] not in changes:
|
||||||
|
changes[change["id"]] = change
|
||||||
|
# Offset additional pages by half the returned entry count to help
|
||||||
|
# avoid missing changes due to pagination instability
|
||||||
|
if new_changes and new_changes[-1].get("_more_changes", False):
|
||||||
|
offset += int(len(new_changes) / 2)
|
||||||
|
else:
|
||||||
|
offset = -1
|
||||||
|
|
||||||
|
report = {"namespaces": dict()}
|
||||||
|
report_times(report, after, before)
|
||||||
|
maintainers = dict()
|
||||||
|
for change in changes.values():
|
||||||
|
namespace = change["project"].split("/")[0]
|
||||||
|
if namespace not in report["namespaces"]:
|
||||||
|
report["namespaces"][namespace] = set()
|
||||||
|
if "labels" in change:
|
||||||
|
for label, maintvotes in {
|
||||||
|
"Code-Review": (-2, 2), "Workflow": (1,)}.items():
|
||||||
|
if label in change["labels"]:
|
||||||
|
for vote in change["labels"][label].get("all", []):
|
||||||
|
when = vote.get("date")
|
||||||
|
if ("name" in vote and "email" in vote
|
||||||
|
and vote.get("value", 0) in maintvotes and when
|
||||||
|
and after < from_gerrit_time(when) < before):
|
||||||
|
if namespace not in maintainers:
|
||||||
|
maintainers[namespace] = set()
|
||||||
|
maintainers[namespace].add('"%s" <%s>' % (
|
||||||
|
vote["name"], vote["email"]))
|
||||||
|
for namespace in maintainers:
|
||||||
|
report["namespaces"][namespace] = sorted(list(maintainers[namespace]))
|
||||||
|
|
||||||
|
# Operate on a copy of the keys since we'll be altering the dict
|
||||||
|
for namespace in list(report["namespaces"].keys()):
|
||||||
|
# Cull inactive namespaces from the report
|
||||||
|
if not report["namespaces"][namespace]:
|
||||||
|
del report["namespaces"][namespace]
|
||||||
|
|
||||||
|
# Write the full YAML structured data report
|
||||||
|
os.makedirs("maintainers", exist_ok=True)
|
||||||
|
open("maintainers/%s.yaml" % argument, "w").write(yaml.dump(report))
|
||||||
|
|
||||||
|
# Write per-namespace text dumps of names/addresses
|
||||||
|
for namespace, maintlist in list(report["namespaces"].items()):
|
||||||
|
with open("maintainers/%s_%s.txt" % (
|
||||||
|
argument, namespace), "w", encoding="utf-8") as dumpfile:
|
||||||
|
for maintainer in maintlist:
|
||||||
|
dumpfile.write(maintainer + "\n")
|
||||||
|
Reference in New Issue
Block a user