Make uncategorized_fails.py configurable

There are a few hardcoded strings and numbers in uncategorized_fails. Make these configurable so that it is easier to reuse this tool. Also add some debug logging. Change-Id: Ie62ce83bb43dcc8d9b382fe6719fe57eacc5727b
2015-11-25 15:20:16 -08:00 · 2015-11-25 15:20:16 -08:00 · 5261615d24
commit 5261615d24
parent 49999256f4
3 changed files with 96 additions and 40 deletions
--- a/elasticRecheck.conf.sample
+++ b/elasticRecheck.conf.sample
@ -22,4 +22,4 @@ key=/home/mtreinish/.ssh/id_rsa
 es_url=http://logstash.openstack.org:80/elasticsearch
 ls_url=http://logstash.openstack.org
 db_uri=mysql+pymysql://query:query@logstash.openstack.org/subunit2sql
-index_format=logstash-%Y.%m.%d
+index_format=logstash-%Y.%m.%d
--- a/elastic_recheck/cmd/uncategorized_fails.py
+++ b/elastic_recheck/cmd/uncategorized_fails.py
@ -31,23 +31,8 @@ import elastic_recheck.elasticRecheck as er
 import elastic_recheck.query_builder as qb
 import elastic_recheck.results as er_results
 # Not all teams actively used elastic recheck for categorizing their
 # work, so to keep the uncategorized page more meaningful, we exclude
 # jobs from teams that don't use this toolchain.
 EXCLUDED_JOBS = (
    # Docs team
    "api-site",
    "operations-guide",
    "openstack-manuals",
    # Ansible
    "ansible",
    # Puppet
    "puppet",
 )
 EXCLUDED_JOBS_REGEX = re.compile('(' + '|'.join(EXCLUDED_JOBS) + ')')
 LOG = logging.getLogger('eruncategorized')
 logging.basicConfig()
 def get_options():
@ -68,6 +53,24 @@ def get_options():
                        "file to use for data_source options such as "
                        "elastic search url, logstash url, and database "
                        "uri.")
    parser.add_argument('--search-size',
                        help="Max search results elastic search should return",
                        default=er_config.UNCAT_MAX_SEARCH_SIZE)
    parser.add_argument('--all-fails-query',
                        help="Query to find all failures in elastic search",
                        default=er_config.ALL_FAILS_QUERY)
    parser.add_argument('--excluded-jobs-regex',
                        help="Regular express to exclude jobs from results",
                        default=er_config.EXCLUDED_JOBS_REGEX)
    parser.add_argument('--included-projects-regex',
                        help="Regular express to include only certain projects"
                             " in results",
                        default=er_config.INCLUDED_PROJECTS_REGEX)
    return parser.parse_args()
@ -87,27 +90,25 @@ def setup_template_engine(directory, group='integrated_gate'):
    return env.get_template(filename)
-def all_fails(classifier):
+def all_fails(classifier, config=None):
    """Find all the the fails in the integrated gate.
    This attempts to find all the build jobs in the integrated gate
    so we can figure out how good we are doing on total classification.
    """
    config = config or er_config.Config()
    integrated_fails = {}
    other_fails = {}
    all_fails = {}
-    query = ('filename:"console.html" '
+    results = classifier.hits_by_query(config.all_fails_query,
-             'AND (message:"Finished: FAILURE" '
+                                       size=config.uncat_search_size)
             'OR message:"[Zuul] Job complete, result: FAILURE") '
             'AND build_queue:"gate" '
             'AND voting:"1"')
    results = classifier.hits_by_query(query, size=30000)
    facets = er_results.FacetSet()
    facets.detect_facets(results, ["build_uuid"])
    for build in facets:
        for result in facets[build]:
            # If the job is on the exclude list, skip
-            if re.search(EXCLUDED_JOBS_REGEX, result.build_name):
+            if re.search(config.excluded_jobs_regex, result.build_name):
                continue
            integrated_gate_projects = [
@ -132,14 +133,21 @@ def all_fails(classifier):
                    'build_uuid': result.build_uuid
                }
            else:
-                name = result.build_name
+                # not perfect, but basically an attempt to show the integrated
-                timestamp = dp.parse(result.timestamp)
+                # gate. Would be nice if there was a zuul attr for this in es.
-                log = result.log_url.split("console.html")[0]
+                if re.search(config.included_projects_regex, result.project):
-                other_fails["%s.%s" % (build, name)] = {
+                    name = result.build_name
-                    'log': log,
+                    timestamp = dp.parse(result.timestamp)
-                    'timestamp': timestamp,
+                    log = result.log_url.split("console.html")[0]
-                    'build_uuid': result.build_uuid
+                    other_fails["%s.%s" % (build, name)] = {
-                }
+                        'log': log,
                        'timestamp': timestamp,
                        'build_uuid': result.build_uuid
                    }
            LOG.debug("Found failure: %s build_uuid: %s project %s",
                      len(all_fails), result.build_uuid, result.project)
    all_fails = {
        'integrated_gate': integrated_fails,
        'others': other_fails
@ -200,9 +208,12 @@ def classifying_rate(fails, data, engine, classifier, ls_url):
            logstash_query = qb.encode_logstash_query(query)
            logstash_url = ('%s/#/dashboard/file/logstash.json?%s'
                            % (ls_url, logstash_query))
            LOG.debug("looking up hits for job %s query %s", job, query)
            results = classifier.hits_by_query(query, size=1)
            if results:
                url['crm114'] = logstash_url
                LOG.debug("Hits found. Using logstash url %s",
                          logstash_url)
    classifying_rate = collections.defaultdict(int)
    rate = 0
@ -211,7 +222,7 @@ def classifying_rate(fails, data, engine, classifier, ls_url):
        rate = (float(count) / float(total)) * 100.0
    classifying_rate['overall'] = "%.1f" % rate
-
+    LOG.debug("overall classifying_rate is %s", classifying_rate['overall'])
    for job in bad_jobs:
        if bad_jobs[job] == 0 and total_job_failures[job] == 0:
            classifying_rate[job] = 0
@ -294,12 +305,16 @@ def _failure_percentage(hits, fails):
    return per
-def collect_metrics(classifier, fails):
+def collect_metrics(classifier, fails, config=None):
    config = config or er_config.Config()
    data = {}
    for q in classifier.queries:
        try:
-            results = classifier.hits_by_query(q['query'], size=30000)
+            results = classifier.hits_by_query(q['query'],
                                               size=config.uncat_search_size)
            hits = _status_count(results)
            LOG.debug("Collected metrics for query %s, hits %s", q['query'],
                      hits)
            data[q['bug']] = {
                'fails': _failure_count(hits),
                'hits': hits,
@ -316,15 +331,20 @@ def collect_metrics(classifier, fails):
 def main():
    opts = get_options()
-    config = er_config.Config(config_file=opts.conf)
+    config = er_config.Config(
        config_file=opts.conf,
        uncat_search_size=opts.search_size,
        all_fails_query=opts.all_fails_query,
        excluded_jobs_regex=opts.excluded_jobs_regex,
        included_projects_regex=opts.included_projects_regex)
    classifier = er.Classifier(opts.dir, config=config)
-    all_gate_fails = all_fails(classifier)
+    all_gate_fails = all_fails(classifier, config=config)
    for group in all_gate_fails:
        fails = all_gate_fails[group]
        if not fails:
            continue
-        data = collect_metrics(classifier, fails)
+        data = collect_metrics(classifier, fails, config=config)
        engine = setup_template_engine(opts.templatedir, group=group)
        html = classifying_rate(fails, data, engine, classifier, config.ls_url)
        if opts.output:
--- a/elastic_recheck/config.py
+++ b/elastic_recheck/config.py
@ -14,6 +14,7 @@
 import ConfigParser
 import os
 import re
 DEFAULT_INDEX_FORMAT = 'logstash-%Y.%m.%d'
@ -26,6 +27,32 @@ CI_USERNAME = 'jenkins'
 PID_FN = '/var/run/elastic-recheck/elastic-recheck.pid'
 # Not all teams actively used elastic recheck for categorizing their
 # work, so to keep the uncategorized page more meaningful, we exclude
 # jobs from teams that don't use this toolchain.
 EXCLUDED_JOBS = (
    # Docs team
    "api-site",
    "operations-guide",
    "openstack-manuals",
    # Ansible
    "ansible",
    # Puppet
    "puppet",
 )
 EXCLUDED_JOBS_REGEX = re.compile('(' + '|'.join(EXCLUDED_JOBS) + ')')
 INCLUDED_PROJECTS_REGEX = "(^openstack/|devstack|grenade)"
 ALL_FAILS_QUERY = ('filename:"console.html" '
                   'AND (message:"Finished: FAILURE" '
                   'OR message:"[Zuul] Job complete, result: FAILURE") '
                   'AND build_queue:"gate" '
                   'AND voting:"1"')
 UNCAT_MAX_SEARCH_SIZE = 30000
 class Config(object):
@ -38,7 +65,11 @@ class Config(object):
                 jobs_re=None,
                 ci_username=None,
                 pid_fn=None,
-                 es_index_format=None):
+                 es_index_format=None,
                 all_fails_query=None,
                 excluded_jobs_regex=None,
                 included_projects_regex=None,
                 uncat_search_size=None):
        self.es_url = es_url or ES_URL
        self.ls_url = ls_url or LS_URL
@ -49,6 +80,11 @@ class Config(object):
        self.pid_fn = pid_fn or PID_FN
        self.ircbot_channel_config = None
        self.irc_log_config = None
        self.all_fails_query = all_fails_query or ALL_FAILS_QUERY
        self.excluded_jobs_regex = excluded_jobs_regex or EXCLUDED_JOBS_REGEX
        self.included_projects_regex = \
            included_projects_regex or INCLUDED_PROJECTS_REGEX
        self.uncat_search_size = uncat_search_size or UNCAT_MAX_SEARCH_SIZE
        if config_file or config_obj:
            if config_obj: