diff --git a/elastic_recheck/cmd/check_success.py b/elastic_recheck/cmd/check_success.py index b4e6bb0c..6e7b3762 100755 --- a/elastic_recheck/cmd/check_success.py +++ b/elastic_recheck/cmd/check_success.py @@ -15,6 +15,7 @@ # under the License. import argparse +import collections import operator import os import re @@ -63,7 +64,15 @@ def all_fails(classifier): return all_fails -def classifying_rate(classifier, data): +def num_fails_per_build_name(all_jobs): + counts = collections.defaultdict(int) + for f in all_jobs: + build, job = f.split('.', 1) + counts[job] += 1 + return counts + + +def classifying_rate(fails, data): """Builds and prints the classification rate. It's important to know how good a job we are doing, so this @@ -71,24 +80,20 @@ def classifying_rate(classifier, data): classification rate. For every failure in the gate queue did we find a match for it. """ - fails = all_fails(classifier) for bugnum in data: bug = data[bugnum] for job in bug['failed_jobs']: fails[job] = True total = len(fails.keys()) - bad_jobs = {} + bad_jobs = collections.defaultdict(int) count = 0 for f in fails: if fails[f] is True: count += 1 else: build, job = f.split('.', 1) - if job in bad_jobs: - bad_jobs[job] += 1 - else: - bad_jobs[job] = 1 + bad_jobs[job] += 1 print("Classification percentage: %2.2f%%" % ((float(count) / float(total)) * 100.0)) @@ -101,28 +106,73 @@ def classifying_rate(classifier, data): print " %3s : %s" % (s[1], s[0]) -def collect_metrics(classifier): +def _status_count(results): + counts = {} + facets = er_results.FacetSet() + facets.detect_facets( + results, + ["build_status", "build_uuid"]) + + for key in facets: + counts[key] = len(facets[key]) + return counts + + +def _failure_count(hits): + if "FAILURE" in hits: + return hits["FAILURE"] + else: + return 0 + + +def _failed_jobs(results): + failed_jobs = [] + facets = er_results.FacetSet() + facets.detect_facets( + results, + ["build_status", "build_uuid"]) + if "FAILURE" in facets: + for build in facets["FAILURE"]: + for result in facets["FAILURE"][build]: + failed_jobs.append("%s.%s" % (build, result.build_name)) + return failed_jobs + + +def _count_fails_per_build_name(hits): + facets = er_results.FacetSet() + counts = collections.defaultdict(int) + facets.detect_facets( + hits, + ["build_status", "build_name", "build_uuid"]) + if "FAILURE" in facets: + for build_name in facets["FAILURE"]: + counts[build_name] += 1 + return counts + + +def _failure_percentage(hits, fails): + total_fails_per_build_name = num_fails_per_build_name(fails) + fails_per_build_name = _count_fails_per_build_name(hits) + per = {} + for build in fails_per_build_name: + this_job = fails_per_build_name[build] + if build in total_fails_per_build_name: + total = total_fails_per_build_name[build] + per[build] = (float(this_job) / float(total)) * 100.0 + return per + + +def collect_metrics(classifier, fails): data = {} for q in classifier.queries: results = classifier.hits_by_query(q['query'], size=30000) - facets = er_results.FacetSet() - facets.detect_facets( - results, - ["build_status", "build_uuid"]) - - num_fails = 0 - failed_jobs = [] - if "FAILURE" in facets: - num_fails = len(facets["FAILURE"]) - for build in facets["FAILURE"]: - for result in facets["FAILURE"][build]: - failed_jobs.append("%s.%s" % (build, result.build_name)) - + hits = _status_count(results) data[q['bug']] = { - 'fails': num_fails, - 'hits': facets, + 'fails': _failure_count(hits), + 'hits': hits, + 'percentages': _failure_percentage(results, fails), 'query': q['query'], - 'failed_jobs': failed_jobs + 'failed_jobs': _failed_jobs(results) } return data @@ -135,13 +185,18 @@ def print_metrics(data, with_lp=False): sorted_data = sorted(data.iteritems(), key=lambda x: -x[1]['fails']) for d in sorted_data: + bug = d[0] + data = d[1] print("Bug: https://bugs.launchpad.net/bugs/%s => %s" - % (d[0], d[1]['query'].rstrip())) + % (bug, data['query'].rstrip())) if with_lp: get_launchpad_bug(d[0]) print "Hits" - for s in d[1]['hits'].keys(): - print " %s: %s" % (s, len(d[1]['hits'][s])) + for s in data['hits']: + print " %s: %s" % (s, data['hits'][s]) + print "Percentage of Gate Queue Job failures triggered by this bug" + for s in data['percentages']: + print " %s: %2.2f%%" % (s, data['percentages'][s]) print @@ -160,10 +215,11 @@ def get_launchpad_bug(bug): def main(): opts = get_options() classifier = er.Classifier(opts.dir) - data = collect_metrics(classifier) + fails = all_fails(classifier) + data = collect_metrics(classifier, fails) print_metrics(data, with_lp=opts.lp) if opts.rate: - classifying_rate(classifier, data) + classifying_rate(fails, data) if __name__ == "__main__":