From 8b982ed886d5caf085d9028b2c627362f7df90c9 Mon Sep 17 00:00:00 2001 From: Clark Boylan Date: Tue, 26 Aug 2014 15:25:42 -0700 Subject: [PATCH] Handle utf8 in JJB JJB didn't actually handle unicode data very well for a couple reasons. First the local yaml loader was loading files into yaml as strings instead of unicode which we should just go ahead and do because yaml's built int loader loads utf-8 by default (and we don't override the default). Second we need to do parameter substitution on unicode and regular strings so change the substitution typecheck to use basestring instead of str. Finally we need to use UTF-8 as the encoding when emitting XML so do that. Add tests to actually test this in the yamlparser tests. The addition of these new tests comes with a little bit of cleanup in the test classes to make sure we load unicode files as utf8 more consistently. Change-Id: I2169e19aae2cdc7ddbd1e7217ef7584c786a039a Fixes-bug: 1361090 --- jenkins_jobs/builder.py | 4 +-- jenkins_jobs/local_yaml.py | 3 ++- tests/base.py | 25 ++++++++----------- .../fixtures/include-rawunicode001-cool.sh | 2 ++ .../fixtures/include-rawunicode001.xml | 25 +++++++++++++++++++ .../fixtures/include-rawunicode001.yaml | 11 ++++++++ tests/yamlparser/fixtures/unicode001.xml | 25 +++++++++++++++++++ tests/yamlparser/fixtures/unicode001.yaml | 12 +++++++++ 8 files changed, 90 insertions(+), 17 deletions(-) create mode 100644 tests/yamlparser/fixtures/include-rawunicode001-cool.sh create mode 100644 tests/yamlparser/fixtures/include-rawunicode001.xml create mode 100644 tests/yamlparser/fixtures/include-rawunicode001.yaml create mode 100644 tests/yamlparser/fixtures/unicode001.xml create mode 100644 tests/yamlparser/fixtures/unicode001.yaml diff --git a/jenkins_jobs/builder.py b/jenkins_jobs/builder.py index 66de84acc..463c03951 100644 --- a/jenkins_jobs/builder.py +++ b/jenkins_jobs/builder.py @@ -82,7 +82,7 @@ def deep_format(obj, paramdict): # limitations on the values in paramdict - the post-format result must # still be valid YAML (so substituting-in a string containing quotes, for # example, is problematic). - if isinstance(obj, str): + if isinstance(obj, basestring): try: result = re.match('^{obj:(?P\w+)}$', obj) if result is not None: @@ -464,7 +464,7 @@ class XmlJob(object): return hashlib.md5(self.output()).hexdigest() def output(self): - out = minidom.parseString(XML.tostring(self.xml)) + out = minidom.parseString(XML.tostring(self.xml, encoding='UTF-8')) return out.toprettyxml(indent=' ', encoding='utf-8') diff --git a/jenkins_jobs/local_yaml.py b/jenkins_jobs/local_yaml.py index f67e52bdd..27df904aa 100644 --- a/jenkins_jobs/local_yaml.py +++ b/jenkins_jobs/local_yaml.py @@ -53,6 +53,7 @@ Example: """ +import codecs import functools import logging import re @@ -140,7 +141,7 @@ class LocalLoader(yaml.Loader): def _include_raw_tag(self, loader, node): filename = self._find_file(loader.construct_yaml_str(node)) try: - with open(filename, 'r') as f: + with codecs.open(filename, 'r', 'utf-8') as f: data = f.read() except: logger.error("Failed to include file using search path: '{0}'" diff --git a/tests/base.py b/tests/base.py index 364889b46..d7e71817e 100644 --- a/tests/base.py +++ b/tests/base.py @@ -78,22 +78,24 @@ class BaseTestCase(object): logging.basicConfig() - def _read_content(self): + def _read_utf8_content(self): # Read XML content, assuming it is unicode encoded xml_filepath = os.path.join(self.fixtures_path, self.out_filename) xml_content = u"%s" % codecs.open(xml_filepath, 'r', 'utf-8').read() + return xml_content + def _read_yaml_content(self): yaml_filepath = os.path.join(self.fixtures_path, self.in_filename) with file(yaml_filepath, 'r') as yaml_file: yaml_content = yaml.load(yaml_file) - - return (yaml_content, xml_content) + return yaml_content def test_yaml_snippet(self): if not self.out_filename or not self.in_filename: return - yaml_content, expected_xml = self._read_content() + expected_xml = self._read_utf8_content() + yaml_content = self._read_yaml_content() project = None if ('project-type' in yaml_content): if (yaml_content['project-type'] == "maven"): @@ -130,11 +132,7 @@ class BaseTestCase(object): class SingleJobTestCase(BaseTestCase): def test_yaml_snippet(self): - if not self.out_filename or not self.in_filename: - return - - xml_filepath = os.path.join(self.fixtures_path, self.out_filename) - expected_xml = u"%s" % open(xml_filepath, 'r').read() + expected_xml = self._read_utf8_content() yaml_filepath = os.path.join(self.fixtures_path, self.in_filename) @@ -154,7 +152,8 @@ class SingleJobTestCase(BaseTestCase): parser.jobs.sort(key=operator.attrgetter('name')) # Prettify generated XML - pretty_xml = "\n".join(job.output() for job in parser.jobs) + pretty_xml = unicode("\n".join(job.output() for job in parser.jobs), + 'utf-8') self.assertThat( pretty_xml, @@ -168,10 +167,8 @@ class SingleJobTestCase(BaseTestCase): class JsonTestCase(BaseTestCase): def test_yaml_snippet(self): - if not self.out_filename or not self.in_filename: - return - - yaml_content, expected_json = self._read_content() + expected_json = self._read_utf8_content() + yaml_content = self._read_yaml_content() pretty_json = json.dumps(yaml_content, indent=4, separators=(',', ': ')) diff --git a/tests/yamlparser/fixtures/include-rawunicode001-cool.sh b/tests/yamlparser/fixtures/include-rawunicode001-cool.sh new file mode 100644 index 000000000..2cf43d421 --- /dev/null +++ b/tests/yamlparser/fixtures/include-rawunicode001-cool.sh @@ -0,0 +1,2 @@ +#!/bin/bash +echo "Unicode! ☃" diff --git a/tests/yamlparser/fixtures/include-rawunicode001.xml b/tests/yamlparser/fixtures/include-rawunicode001.xml new file mode 100644 index 000000000..f06dfb72d --- /dev/null +++ b/tests/yamlparser/fixtures/include-rawunicode001.xml @@ -0,0 +1,25 @@ + + + + <!-- Managed by Jenkins Job Builder --> + false + false + false + false + true + + + + + + + + + #!/bin/bash +echo "Unicode! ☃" + + + + + + diff --git a/tests/yamlparser/fixtures/include-rawunicode001.yaml b/tests/yamlparser/fixtures/include-rawunicode001.yaml new file mode 100644 index 000000000..e9c303660 --- /dev/null +++ b/tests/yamlparser/fixtures/include-rawunicode001.yaml @@ -0,0 +1,11 @@ +- wrapper: + name: unicode-raw-include-wrapper + wrappers: + - pre-scm-buildstep: + - shell: + !include-raw include-rawunicode001-cool.sh + +- job: + name: test-unicode-raw-include-wrapper + wrappers: + - unicode-raw-include-wrapper diff --git a/tests/yamlparser/fixtures/unicode001.xml b/tests/yamlparser/fixtures/unicode001.xml new file mode 100644 index 000000000..f06dfb72d --- /dev/null +++ b/tests/yamlparser/fixtures/unicode001.xml @@ -0,0 +1,25 @@ + + + + <!-- Managed by Jenkins Job Builder --> + false + false + false + false + true + + + + + + + + + #!/bin/bash +echo "Unicode! ☃" + + + + + + diff --git a/tests/yamlparser/fixtures/unicode001.yaml b/tests/yamlparser/fixtures/unicode001.yaml new file mode 100644 index 000000000..a2bee1639 --- /dev/null +++ b/tests/yamlparser/fixtures/unicode001.yaml @@ -0,0 +1,12 @@ +- wrapper: + name: unicode-wrapper + wrappers: + - pre-scm-buildstep: + - shell: | + #!/bin/bash + echo "Unicode! ☃" + +- job: + name: test-unicode-wrapper + wrappers: + - unicode-wrapper