From ea4a56dcecac34c6411ae274f5f5d07523c21d3f Mon Sep 17 00:00:00 2001 From: Henrique Nakashima Date: Wed, 29 Nov 2017 19:34:19 +0000 Subject: Read Skia gold json and compare to locally generated pngs. Change-Id: I73b6a477c1a66beec2b73072f23faab7db7d3342 Reviewed-on: https://pdfium-review.googlesource.com/18513 Commit-Queue: Henrique Nakashima Reviewed-by: Lei Zhang --- testing/tools/gold.py | 136 ++++++++++++++++++++++++++++++++++++++++--- testing/tools/test_runner.py | 30 ++++++---- 2 files changed, 147 insertions(+), 19 deletions(-) diff --git a/testing/tools/gold.py b/testing/tools/gold.py index bdae9f4195..a8222378e8 100644 --- a/testing/tools/gold.py +++ b/testing/tools/gold.py @@ -7,6 +7,132 @@ import json import os import shlex import shutil +import urllib2 + + +def _ParseKeyValuePairs(kv_str): + """ + Parses a string of the type 'key1 value1 key2 value2' into a dict. + """ + kv_pairs = shlex.split(kv_str) + if len(kv_pairs) % 2: + raise ValueError('Uneven number of key/value pairs. Got %s' % kv_str) + return { kv_pairs[i]:kv_pairs[i + 1] for i in xrange(0, len(kv_pairs), 2) } + + +# This module downloads a json provided by Skia Gold with the expected baselines +# for each test file. +# +# The expected format for the json is: +# { +# "commit": { +# "author": "John Doe (jdoe@chromium.org)", +# "commit_time": 1510598123, +# "hash": "cee39e6e90c219cc91f2c94a912a06977f4461a0" +# }, +# "master": { +# "abc.pdf.1": { +# "0ec3d86f545052acd7c9a16fde8ca9d4": 1, +# "80455b71673becc9fbc100d6da56ca65": 1, +# "b68e2ecb80090b4502ec89ad1be2322c": 1 +# }, +# "defgh.pdf.0": { +# "01e020cd4cd05c6738e479a46a506044": 1, +# "b68e2ecb80090b4502ec89ad1be2322c": 1 +# } +# }, +# "changeLists": { +# "18499" : { +# "abc.pdf.1": { +# "d5dd649124cf1779152253dc8fb239c5": 1, +# "42a270581930579cdb0f28674972fb1a": 1, +# } +# } +# } +# } +class GoldBaseline(object): + + def __init__(self, properties_str): + """ + properties_str is a string with space separated key/value pairs that + is used to find the cl number for which to baseline + """ + self._properties = _ParseKeyValuePairs(properties_str) + self._baselines = self._LoadSkiaGoldBaselines() + + def _LoadSkiaGoldBaselines(self): + """ + Download the baseline json and return a list of the two baselines that + should be used to match hashes (master and cl#). + """ + GOLD_BASELINE_URL = ('https://storage.googleapis.com/skia-infra-gm/' + 'hash_files/gold-pdfium-baseline.json') + try: + response = urllib2.urlopen(GOLD_BASELINE_URL) + json_data = response.read() + except (urllib2.HTTPError, urllib2.URLError) as e: + print ('Error: Unable to read skia gold json from %s: %s' + % (GOLD_BASELINE_URL, e)) + return None + + try: + data = json.loads(json_data) + except ValueError: + print 'Error: Malformed json read from %s: %s' % (GOLD_BASELINE_URL, e) + return None + + try: + master_baseline = data['master'] + except (KeyError, TypeError): + print ('Error: "master" key not in json read from %s: %s' + % (GOLD_BASELINE_URL, e)) + return None + + cl_number_str = self._properties.get('issue') + if cl_number_str is None: + return [master_baseline] + + try: + cl_baseline = data['changeLists'][cl_number_str] + except KeyError: + return [master_baseline] + + return [cl_baseline, master_baseline] + + # Return values for MatchLocalResult(). + MATCH = 'match' + MISMATCH = 'mismatch' + NO_BASELINE = 'no_baseline' + BASELINE_DOWNLOAD_FAILED = 'baseline_download_failed' + + def MatchLocalResult(self, test_name, md5_hash): + """ + Match a locally generated hash of a test cases rendered image with the + expected hashes downloaded in the baselines json. + + Each baseline is a dict mapping the test case name to a dict with the + expected hashes as keys. Therefore, this list of baselines should be + searched until the test case name is found, then the hash should be matched + with the options in that dict. If the hashes don't match, it should be + considered a failure and we should not continue searching the baseline list. + + Returns MATCH if the md5 provided matches the ones in the baseline json, + MISMATCH if it does not, NO_BASELINE if the test case has no baseline, or + BASELINE_DOWNLOAD_FAILED if the baseline could not be downloaded and parsed. + """ + if self._baselines is None: + return GoldBaseline.BASELINE_DOWNLOAD_FAILED + + found_test_case = False + for baseline in self._baselines: + if test_name in baseline: + found_test_case = True + if md5_hash in baseline[test_name]: + return GoldBaseline.MATCH + + return (GoldBaseline.MISMATCH if found_test_case + else GoldBaseline.NO_BASELINE) + # This module collects and writes output in a format expected by the # Gold baseline tool. Based on meta data provided explicitly and by @@ -66,8 +192,8 @@ class GoldResults(object): that should be ignored. """ self._source_type = source_type - self._properties = self._parseKeyValuePairs(propertiesStr) - self._properties["key"] = self._parseKeyValuePairs(keyStr) + self._properties = _ParseKeyValuePairs(propertiesStr) + self._properties["key"] = _ParseKeyValuePairs(keyStr) self._results = [] self._outputDir = outputDir @@ -106,12 +232,6 @@ class GoldResults(object): } }) - def _parseKeyValuePairs(self, kvStr): - kvPairs = shlex.split(kvStr) - if len(kvPairs) % 2: - raise ValueError("Uneven number of key/value pairs. Got %s" % kvStr) - return { kvPairs[i]:kvPairs[i+1] for i in range(0, len(kvPairs), 2) } - def WriteResults(self): self._properties.update({ "results": self._results diff --git a/testing/tools/test_runner.py b/testing/tools/test_runner.py index 8921b2f005..e895552bc8 100644 --- a/testing/tools/test_runner.py +++ b/testing/tools/test_runner.py @@ -116,7 +116,6 @@ class TestRunner: [sys.executable, self.fixup_path, '--output-dir=' + self.working_dir, input_path]) - def TestText(self, input_root, expected_txt_path, pdf_path): txt_path = os.path.join(self.working_dir, input_root + '.txt') @@ -127,11 +126,8 @@ class TestRunner: cmd = [sys.executable, self.text_diff_path, expected_txt_path, txt_path] return common.RunCommand(cmd) - def TestPixel(self, input_root, pdf_path): - cmd_to_run = [self.pdfium_test_path, '--send-events', '--png'] - if self.gold_results: - cmd_to_run.append('--md5') + cmd_to_run = [self.pdfium_test_path, '--send-events', '--png', '--md5'] if self.oneshot_renderer: cmd_to_run.append('--render-oneshot') cmd_to_run.append(pdf_path) @@ -139,11 +135,22 @@ class TestRunner: def HandleResult(self, input_filename, input_path, result): success, image_paths = result - if self.gold_results: - if image_paths: - for img_path, md5_hash in image_paths: - # the output filename (without extension becomes the test name) - test_name = os.path.splitext(os.path.split(img_path)[1])[0] + + if image_paths: + for img_path, md5_hash in image_paths: + # The output filename without image extension becomes the test name. + # For example, "/path/to/.../testing/corpus/example_005.pdf.0.png" + # becomes "example_005.pdf.0". + test_name = os.path.splitext(os.path.split(img_path)[1])[0] + + if not self.test_suppressor.IsResultSuppressed(input_filename): + matched = self.gold_baseline.MatchLocalResult(test_name, md5_hash) + if matched == gold.GoldBaseline.MISMATCH: + print 'Skia Gold hash mismatch for test case: %s' % test_name + elif matched == gold.GoldBaseline.NO_BASELINE: + print 'No Skia Gold baseline found for test case: %s' % test_name + + if self.gold_results: self.gold_results.AddTestResult(test_name, md5_hash, img_path) if self.test_suppressor.IsResultSuppressed(input_filename): @@ -154,7 +161,6 @@ class TestRunner: if not success: self.failures.append(input_path) - def Run(self): parser = optparse.OptionParser() @@ -225,6 +231,8 @@ class TestRunner: self.test_suppressor = suppressor.Suppressor(finder, self.feature_string) self.image_differ = pngdiffer.PNGDiffer(finder) + self.gold_baseline = gold.GoldBaseline(self.options.gold_properties) + walk_from_dir = finder.TestingDir(test_dir); self.test_cases = [] -- cgit v1.2.3