Improved timestamp parsing

Use dateutil to accept be more flexible in parsing timestamps. A recent upgrade to ElasticSearch changed the timestamp format to use '+00:00' to note the timezone instead of 'Z' Co-Authored-By: Joe Gordon <joe.gordon0@gmail.com> Change-Id: I11f441ba3bf7ba46c55921352fcc87eb5d1ce3ae
2014-02-14 18:55:11 -05:00 · 2014-02-14 18:55:11 -05:00 · 4ea5d02a70
commit 4ea5d02a70
parent 9b04b4a851
3 changed files with 12 additions and 22 deletions
--- a/elastic_recheck/cmd/uncategorized_fails.py
+++ b/elastic_recheck/cmd/uncategorized_fails.py
@ -19,8 +19,8 @@ import collections
 import datetime
 import operator
 import re
 import time
 import dateutil.parser as dp
 import jinja2
 import elastic_recheck.elasticRecheck as er
@ -68,15 +68,7 @@ def all_fails(classifier):
            # gate. Would be nice if there was a zuul attr for this in es.
            if re.search("(^openstack/|devstack|grenade)", result.project):
                name = result.build_name
-                if "+00:00" in result.timestamp:
+                timestamp = dp.parse(result.timestamp)
                    # Newer ES adds timezone into the timestamp, and it will
                    # always be +00:00
                    timestamp = time.strptime(result.timestamp,
                                              "%Y-%m-%dT%H:%M:%S.%f+00:00")
                else:
                    timestamp = time.strptime(result.timestamp,
                                              "%Y-%m-%dT%H:%M:%S.%fZ")
                log = result.log_url.split("console.html")[0]
                all_fails["%s.%s" % (build, name)] = {
                    'log': log,
@ -129,9 +121,8 @@ def classifying_rate(fails, data, engine):
                                   key=lambda v: v['timestamp'], reverse=True)
        # Convert timestamp into string
        for url in bad_job_urls[job]:
-            url['timestamp'] = time.strftime(
+            url['timestamp'] = url['timestamp'].strftime(
-                "%Y-%m-%dT%H:%M",
+                "%Y-%m-%dT%H:%M")
                url['timestamp'])
    classifying_rate = collections.defaultdict(int)
    classifying_rate['overall'] = "%.1f" % (
--- a/elastic_recheck/results.py
+++ b/elastic_recheck/results.py
@ -14,12 +14,14 @@
 """Elastic search wrapper to make handling results easier."""
 import calendar
 import copy
 import datetime
 import pprint
 import time
 import dateutil.parser as dp
 import pyelasticsearch
 import pytz
 pp = pprint.PrettyPrinter()
@ -109,19 +111,14 @@ class FacetSet(dict):
    def _histogram(self, data, facet, res=3600):
        """A preprocessor for data should we want to bucket it."""
        if facet == "timestamp":
-            if "+00:00" in data:
+            ts = dp.parse(data)
-                ts = datetime.datetime.strptime(data,
+            tsepoch = int(calendar.timegm(ts.timetuple()))
                                                "%Y-%m-%dT%H:%M:%S.%f+00:00")
            else:
                ts = datetime.datetime.strptime(data, "%Y-%m-%dT%H:%M:%S.%fZ")
            tsepoch = int(time.mktime(ts.timetuple()))
            # take the floor based on resolution
            ts -= datetime.timedelta(
                seconds=(tsepoch % res),
                microseconds=ts.microsecond)
            # ms since epoch
-            epoch = datetime.datetime.utcfromtimestamp(0)
+            epoch = datetime.datetime.fromtimestamp(0, pytz.UTC)
            pos = int(((ts - epoch).total_seconds()) * 1000)
            return pos
        else:
--- a/requirements.txt
+++ b/requirements.txt
@ -1,3 +1,5 @@
 python-dateutil>=2.0
 pytz
 pyelasticsearch
 gerritlib
 python-daemon