Improved timestamp parsing

Use dateutil to accept be more flexible in parsing timestamps. A recent
upgrade to ElasticSearch changed the timestamp format to use '+00:00' to
note the timezone instead of 'Z'

Co-Authored-By: Joe Gordon <joe.gordon0@gmail.com>
Change-Id: I11f441ba3bf7ba46c55921352fcc87eb5d1ce3ae
This commit is contained in:
Sean Dague 2014-02-14 18:55:11 -05:00
parent 9b04b4a851
commit 4ea5d02a70
3 changed files with 12 additions and 22 deletions

View File

@ -19,8 +19,8 @@ import collections
import datetime import datetime
import operator import operator
import re import re
import time
import dateutil.parser as dp
import jinja2 import jinja2
import elastic_recheck.elasticRecheck as er import elastic_recheck.elasticRecheck as er
@ -68,15 +68,7 @@ def all_fails(classifier):
# gate. Would be nice if there was a zuul attr for this in es. # gate. Would be nice if there was a zuul attr for this in es.
if re.search("(^openstack/|devstack|grenade)", result.project): if re.search("(^openstack/|devstack|grenade)", result.project):
name = result.build_name name = result.build_name
if "+00:00" in result.timestamp: timestamp = dp.parse(result.timestamp)
# Newer ES adds timezone into the timestamp, and it will
# always be +00:00
timestamp = time.strptime(result.timestamp,
"%Y-%m-%dT%H:%M:%S.%f+00:00")
else:
timestamp = time.strptime(result.timestamp,
"%Y-%m-%dT%H:%M:%S.%fZ")
log = result.log_url.split("console.html")[0] log = result.log_url.split("console.html")[0]
all_fails["%s.%s" % (build, name)] = { all_fails["%s.%s" % (build, name)] = {
'log': log, 'log': log,
@ -129,9 +121,8 @@ def classifying_rate(fails, data, engine):
key=lambda v: v['timestamp'], reverse=True) key=lambda v: v['timestamp'], reverse=True)
# Convert timestamp into string # Convert timestamp into string
for url in bad_job_urls[job]: for url in bad_job_urls[job]:
url['timestamp'] = time.strftime( url['timestamp'] = url['timestamp'].strftime(
"%Y-%m-%dT%H:%M", "%Y-%m-%dT%H:%M")
url['timestamp'])
classifying_rate = collections.defaultdict(int) classifying_rate = collections.defaultdict(int)
classifying_rate['overall'] = "%.1f" % ( classifying_rate['overall'] = "%.1f" % (

View File

@ -14,12 +14,14 @@
"""Elastic search wrapper to make handling results easier.""" """Elastic search wrapper to make handling results easier."""
import calendar
import copy import copy
import datetime import datetime
import pprint import pprint
import time
import dateutil.parser as dp
import pyelasticsearch import pyelasticsearch
import pytz
pp = pprint.PrettyPrinter() pp = pprint.PrettyPrinter()
@ -109,19 +111,14 @@ class FacetSet(dict):
def _histogram(self, data, facet, res=3600): def _histogram(self, data, facet, res=3600):
"""A preprocessor for data should we want to bucket it.""" """A preprocessor for data should we want to bucket it."""
if facet == "timestamp": if facet == "timestamp":
if "+00:00" in data: ts = dp.parse(data)
ts = datetime.datetime.strptime(data, tsepoch = int(calendar.timegm(ts.timetuple()))
"%Y-%m-%dT%H:%M:%S.%f+00:00")
else:
ts = datetime.datetime.strptime(data, "%Y-%m-%dT%H:%M:%S.%fZ")
tsepoch = int(time.mktime(ts.timetuple()))
# take the floor based on resolution # take the floor based on resolution
ts -= datetime.timedelta( ts -= datetime.timedelta(
seconds=(tsepoch % res), seconds=(tsepoch % res),
microseconds=ts.microsecond) microseconds=ts.microsecond)
# ms since epoch # ms since epoch
epoch = datetime.datetime.utcfromtimestamp(0) epoch = datetime.datetime.fromtimestamp(0, pytz.UTC)
pos = int(((ts - epoch).total_seconds()) * 1000) pos = int(((ts - epoch).total_seconds()) * 1000)
return pos return pos
else: else:

View File

@ -1,3 +1,5 @@
python-dateutil>=2.0
pytz
pyelasticsearch pyelasticsearch
gerritlib gerritlib
python-daemon python-daemon