From 4ea5d02a70998f41644357b303e9551fa82ab7a0 Mon Sep 17 00:00:00 2001 From: Sean Dague Date: Fri, 14 Feb 2014 18:55:11 -0500 Subject: [PATCH] Improved timestamp parsing Use dateutil to accept be more flexible in parsing timestamps. A recent upgrade to ElasticSearch changed the timestamp format to use '+00:00' to note the timezone instead of 'Z' Co-Authored-By: Joe Gordon Change-Id: I11f441ba3bf7ba46c55921352fcc87eb5d1ce3ae --- elastic_recheck/cmd/uncategorized_fails.py | 17 ++++------------- elastic_recheck/results.py | 15 ++++++--------- requirements.txt | 2 ++ 3 files changed, 12 insertions(+), 22 deletions(-) diff --git a/elastic_recheck/cmd/uncategorized_fails.py b/elastic_recheck/cmd/uncategorized_fails.py index f9468f7d..64afde1e 100755 --- a/elastic_recheck/cmd/uncategorized_fails.py +++ b/elastic_recheck/cmd/uncategorized_fails.py @@ -19,8 +19,8 @@ import collections import datetime import operator import re -import time +import dateutil.parser as dp import jinja2 import elastic_recheck.elasticRecheck as er @@ -68,15 +68,7 @@ def all_fails(classifier): # gate. Would be nice if there was a zuul attr for this in es. if re.search("(^openstack/|devstack|grenade)", result.project): name = result.build_name - if "+00:00" in result.timestamp: - # Newer ES adds timezone into the timestamp, and it will - # always be +00:00 - timestamp = time.strptime(result.timestamp, - "%Y-%m-%dT%H:%M:%S.%f+00:00") - else: - timestamp = time.strptime(result.timestamp, - "%Y-%m-%dT%H:%M:%S.%fZ") - + timestamp = dp.parse(result.timestamp) log = result.log_url.split("console.html")[0] all_fails["%s.%s" % (build, name)] = { 'log': log, @@ -129,9 +121,8 @@ def classifying_rate(fails, data, engine): key=lambda v: v['timestamp'], reverse=True) # Convert timestamp into string for url in bad_job_urls[job]: - url['timestamp'] = time.strftime( - "%Y-%m-%dT%H:%M", - url['timestamp']) + url['timestamp'] = url['timestamp'].strftime( + "%Y-%m-%dT%H:%M") classifying_rate = collections.defaultdict(int) classifying_rate['overall'] = "%.1f" % ( diff --git a/elastic_recheck/results.py b/elastic_recheck/results.py index 46198efd..17576c73 100644 --- a/elastic_recheck/results.py +++ b/elastic_recheck/results.py @@ -14,12 +14,14 @@ """Elastic search wrapper to make handling results easier.""" +import calendar import copy import datetime import pprint -import time +import dateutil.parser as dp import pyelasticsearch +import pytz pp = pprint.PrettyPrinter() @@ -109,19 +111,14 @@ class FacetSet(dict): def _histogram(self, data, facet, res=3600): """A preprocessor for data should we want to bucket it.""" if facet == "timestamp": - if "+00:00" in data: - ts = datetime.datetime.strptime(data, - "%Y-%m-%dT%H:%M:%S.%f+00:00") - else: - ts = datetime.datetime.strptime(data, "%Y-%m-%dT%H:%M:%S.%fZ") - - tsepoch = int(time.mktime(ts.timetuple())) + ts = dp.parse(data) + tsepoch = int(calendar.timegm(ts.timetuple())) # take the floor based on resolution ts -= datetime.timedelta( seconds=(tsepoch % res), microseconds=ts.microsecond) # ms since epoch - epoch = datetime.datetime.utcfromtimestamp(0) + epoch = datetime.datetime.fromtimestamp(0, pytz.UTC) pos = int(((ts - epoch).total_seconds()) * 1000) return pos else: diff --git a/requirements.txt b/requirements.txt index de1bafc3..43e7eba7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,5 @@ +python-dateutil>=2.0 +pytz pyelasticsearch gerritlib python-daemon