diff --git a/elastic_recheck/bot.py b/elastic_recheck/bot.py index f531f5db..2c967662 100755 --- a/elastic_recheck/bot.py +++ b/elastic_recheck/bot.py @@ -196,7 +196,10 @@ class RecheckWatch(threading.Thread): for job in event.failed_jobs: job.bugs = set(classifier.classify( - event.change, event.rev, job.build_short_uuid)) + event.change, + event.rev, + job.build_short_uuid, + recent=True)) if not event.get_all_bugs(): self._read(event) else: diff --git a/elastic_recheck/elasticRecheck.py b/elastic_recheck/elasticRecheck.py index 8f114f08..7e6c1e4f 100644 --- a/elastic_recheck/elasticRecheck.py +++ b/elastic_recheck/elasticRecheck.py @@ -215,7 +215,7 @@ class Stream(object): def _job_console_uploaded(self, change, patch, name, build_short_uuid): query = qb.result_ready(change, patch, name, build_short_uuid) - r = self.es.search(query, size='10') + r = self.es.search(query, size='10', recent=True) if len(r) == 0: msg = ("Console logs not ready for %s %s,%s,%s" % (name, change, patch, build_short_uuid)) @@ -226,7 +226,7 @@ class Stream(object): def _has_required_files(self, change, patch, name, build_short_uuid): query = qb.files_ready(change, patch, name, build_short_uuid) - r = self.es.search(query, size='80') + r = self.es.search(query, size='80', recent=True) files = [x['term'] for x in r.terms] required = required_files(name) missing_files = [x for x in required if x not in files] @@ -371,7 +371,8 @@ class Classifier(): es_query = qb.generic(query, facet=facet) return self.es.search(es_query, size=size) - def classify(self, change_number, patch_number, build_short_uuid): + def classify(self, change_number, patch_number, + build_short_uuid, recent=False): """Returns either empty list or list with matched bugs.""" self.log.debug("Entering classify") #Reload each time @@ -383,7 +384,7 @@ class Classifier(): % x['bug']) query = qb.single_patch(x['query'], change_number, patch_number, build_short_uuid) - results = self.es.search(query, size='10') + results = self.es.search(query, size='10', recent=recent) if len(results) > 0: bug_matches.append(x['bug']) return bug_matches diff --git a/elastic_recheck/results.py b/elastic_recheck/results.py index 17576c73..e17c85c2 100644 --- a/elastic_recheck/results.py +++ b/elastic_recheck/results.py @@ -32,7 +32,7 @@ class SearchEngine(object): def __init__(self, url): self._url = url - def search(self, query, size=1000): + def search(self, query, size=1000, recent=False): """Search an elasticsearch server. `query` parameter is the complicated query structure that @@ -43,10 +43,26 @@ class SearchEngine(object): For certain classes of queries (like faceted ones), this can actually be set very low, as it won't impact the facet counts. + `recent` search only most recent indexe(s), assuming this is basically + a real time query that you only care about the last hour of time. + Using recent dramatically reduces the load on the ES cluster. + The returned result is a ResultSet query. + """ es = pyelasticsearch.ElasticSearch(self._url) - results = es.search(query, size=size) + args = {'size': size} + if recent: + # today's index + datefmt = 'logstash-%Y.%m.%d' + now = datetime.datetime.utcnow() + lasthr = now - datetime.timedelta(hours=1) + indexes = [now.strftime(datefmt)] + if (lasthr.strftime(datefmt) != now.strftime(datefmt)): + indexes.append(lasthr.strftime(datefmt)) + args['index'] = indexes + + results = es.search(query, **args) return ResultSet(results)