From ea4a56dcecac34c6411ae274f5f5d07523c21d3f Mon Sep 17 00:00:00 2001
From: Henrique Nakashima <hnakashima@chromium.org>
Date: Wed, 29 Nov 2017 19:34:19 +0000
Subject: Read Skia gold json and compare to locally generated pngs.

Change-Id: I73b6a477c1a66beec2b73072f23faab7db7d3342
Reviewed-on: https://pdfium-review.googlesource.com/18513
Commit-Queue: Henrique Nakashima <hnakashima@chromium.org>
Reviewed-by: Lei Zhang <thestig@chromium.org>
---
 testing/tools/gold.py        | 136 ++++++++++++++++++++++++++++++++++++++++---
 testing/tools/test_runner.py |  30 ++++++----
 2 files changed, 147 insertions(+), 19 deletions(-)

diff --git a/testing/tools/gold.py b/testing/tools/gold.py
index bdae9f4195..a8222378e8 100644
--- a/testing/tools/gold.py
+++ b/testing/tools/gold.py
@@ -7,6 +7,132 @@ import json
 import os
 import shlex
 import shutil
+import urllib2
+
+
+def _ParseKeyValuePairs(kv_str):
+  """
+  Parses a string of the type 'key1 value1 key2 value2' into a dict.
+  """
+  kv_pairs = shlex.split(kv_str)
+  if len(kv_pairs) % 2:
+    raise ValueError('Uneven number of key/value pairs. Got %s' % kv_str)
+  return { kv_pairs[i]:kv_pairs[i + 1] for i in xrange(0, len(kv_pairs), 2) }
+
+
+# This module downloads a json provided by Skia Gold with the expected baselines
+# for each test file.
+#
+# The expected format for the json is:
+# {
+#   "commit": {
+#     "author": "John Doe (jdoe@chromium.org)",
+#     "commit_time": 1510598123,
+#     "hash": "cee39e6e90c219cc91f2c94a912a06977f4461a0"
+#   },
+#   "master": {
+#     "abc.pdf.1": {
+#       "0ec3d86f545052acd7c9a16fde8ca9d4": 1,
+#       "80455b71673becc9fbc100d6da56ca65": 1,
+#       "b68e2ecb80090b4502ec89ad1be2322c": 1
+#      },
+#     "defgh.pdf.0": {
+#       "01e020cd4cd05c6738e479a46a506044": 1,
+#       "b68e2ecb80090b4502ec89ad1be2322c": 1
+#     }
+#   },
+#   "changeLists": {
+#     "18499" : {
+#       "abc.pdf.1": {
+#         "d5dd649124cf1779152253dc8fb239c5": 1,
+#         "42a270581930579cdb0f28674972fb1a": 1,
+#       }
+#     }
+#   }
+# }
+class GoldBaseline(object):
+
+  def __init__(self, properties_str):
+    """
+    properties_str is a string with space separated key/value pairs that
+               is used to find the cl number for which to baseline
+    """
+    self._properties = _ParseKeyValuePairs(properties_str)
+    self._baselines = self._LoadSkiaGoldBaselines()
+
+  def _LoadSkiaGoldBaselines(self):
+    """
+    Download the baseline json and return a list of the two baselines that
+    should be used to match hashes (master and cl#).
+    """
+    GOLD_BASELINE_URL = ('https://storage.googleapis.com/skia-infra-gm/'
+                         'hash_files/gold-pdfium-baseline.json')
+    try:
+      response = urllib2.urlopen(GOLD_BASELINE_URL)
+      json_data = response.read()
+    except (urllib2.HTTPError, urllib2.URLError) as e:
+      print ('Error: Unable to read skia gold json from %s: %s'
+             % (GOLD_BASELINE_URL, e))
+      return None
+
+    try:
+      data = json.loads(json_data)
+    except ValueError:
+      print 'Error: Malformed json read from %s: %s' % (GOLD_BASELINE_URL, e)
+      return None
+
+    try:
+      master_baseline = data['master']
+    except (KeyError, TypeError):
+      print ('Error: "master" key not in json read from %s: %s'
+             % (GOLD_BASELINE_URL, e))
+      return None
+
+    cl_number_str = self._properties.get('issue')
+    if cl_number_str is None:
+      return [master_baseline]
+
+    try:
+      cl_baseline = data['changeLists'][cl_number_str]
+    except KeyError:
+      return [master_baseline]
+
+    return [cl_baseline, master_baseline]
+
+  # Return values for MatchLocalResult().
+  MATCH = 'match'
+  MISMATCH = 'mismatch'
+  NO_BASELINE = 'no_baseline'
+  BASELINE_DOWNLOAD_FAILED = 'baseline_download_failed'
+
+  def MatchLocalResult(self, test_name, md5_hash):
+    """
+    Match a locally generated hash of a test cases rendered image with the
+    expected hashes downloaded in the baselines json.
+
+    Each baseline is a dict mapping the test case name to a dict with the
+    expected hashes as keys. Therefore, this list of baselines should be
+    searched until the test case name is found, then the hash should be matched
+    with the options in that dict. If the hashes don't match, it should be
+    considered a failure and we should not continue searching the baseline list.
+
+    Returns MATCH if the md5 provided matches the ones in the baseline json,
+    MISMATCH if it does not, NO_BASELINE if the test case has no baseline, or
+    BASELINE_DOWNLOAD_FAILED if the baseline could not be downloaded and parsed.
+    """
+    if self._baselines is None:
+      return GoldBaseline.BASELINE_DOWNLOAD_FAILED
+
+    found_test_case = False
+    for baseline in self._baselines:
+      if test_name in baseline:
+        found_test_case = True
+        if md5_hash in baseline[test_name]:
+          return GoldBaseline.MATCH
+
+    return (GoldBaseline.MISMATCH if found_test_case
+            else GoldBaseline.NO_BASELINE)
+
 
 # This module collects and writes output in a format expected by the
 # Gold baseline tool. Based on meta data provided explicitly and by
@@ -66,8 +192,8 @@ class GoldResults(object):
                that should be ignored.
     """
     self._source_type = source_type
-    self._properties = self._parseKeyValuePairs(propertiesStr)
-    self._properties["key"] = self._parseKeyValuePairs(keyStr)
+    self._properties = _ParseKeyValuePairs(propertiesStr)
+    self._properties["key"] = _ParseKeyValuePairs(keyStr)
     self._results =  []
     self._outputDir = outputDir
 
@@ -106,12 +232,6 @@ class GoldResults(object):
       }
     })
 
-  def _parseKeyValuePairs(self, kvStr):
-    kvPairs = shlex.split(kvStr)
-    if len(kvPairs) % 2:
-      raise ValueError("Uneven number of key/value pairs. Got %s" % kvStr)
-    return { kvPairs[i]:kvPairs[i+1] for i in range(0, len(kvPairs), 2) }
-
   def WriteResults(self):
     self._properties.update({
       "results": self._results
diff --git a/testing/tools/test_runner.py b/testing/tools/test_runner.py
index 8921b2f005..e895552bc8 100644
--- a/testing/tools/test_runner.py
+++ b/testing/tools/test_runner.py
@@ -116,7 +116,6 @@ class TestRunner:
         [sys.executable, self.fixup_path, '--output-dir=' + self.working_dir,
             input_path])
 
-
   def TestText(self, input_root, expected_txt_path, pdf_path):
     txt_path = os.path.join(self.working_dir, input_root + '.txt')
 
@@ -127,11 +126,8 @@ class TestRunner:
     cmd = [sys.executable, self.text_diff_path, expected_txt_path, txt_path]
     return common.RunCommand(cmd)
 
-
   def TestPixel(self, input_root, pdf_path):
-    cmd_to_run = [self.pdfium_test_path, '--send-events', '--png']
-    if self.gold_results:
-      cmd_to_run.append('--md5')
+    cmd_to_run = [self.pdfium_test_path, '--send-events', '--png', '--md5']
     if self.oneshot_renderer:
       cmd_to_run.append('--render-oneshot')
     cmd_to_run.append(pdf_path)
@@ -139,11 +135,22 @@ class TestRunner:
 
   def HandleResult(self, input_filename, input_path, result):
     success, image_paths = result
-    if self.gold_results:
-      if image_paths:
-        for img_path, md5_hash in image_paths:
-          # the output filename (without extension becomes the test name)
-          test_name = os.path.splitext(os.path.split(img_path)[1])[0]
+
+    if image_paths:
+      for img_path, md5_hash in image_paths:
+        # The output filename without image extension becomes the test name.
+        # For example, "/path/to/.../testing/corpus/example_005.pdf.0.png"
+        # becomes "example_005.pdf.0".
+        test_name = os.path.splitext(os.path.split(img_path)[1])[0]
+
+        if not self.test_suppressor.IsResultSuppressed(input_filename):
+          matched = self.gold_baseline.MatchLocalResult(test_name, md5_hash)
+          if matched == gold.GoldBaseline.MISMATCH:
+            print 'Skia Gold hash mismatch for test case: %s' % test_name
+          elif matched ==  gold.GoldBaseline.NO_BASELINE:
+            print 'No Skia Gold baseline found for test case: %s' % test_name
+
+        if self.gold_results:
           self.gold_results.AddTestResult(test_name, md5_hash, img_path)
 
     if self.test_suppressor.IsResultSuppressed(input_filename):
@@ -154,7 +161,6 @@ class TestRunner:
       if not success:
         self.failures.append(input_path)
 
-
   def Run(self):
     parser = optparse.OptionParser()
 
@@ -225,6 +231,8 @@ class TestRunner:
     self.test_suppressor = suppressor.Suppressor(finder, self.feature_string)
     self.image_differ = pngdiffer.PNGDiffer(finder)
 
+    self.gold_baseline = gold.GoldBaseline(self.options.gold_properties)
+
     walk_from_dir = finder.TestingDir(test_dir);
 
     self.test_cases = []
-- 
cgit v1.2.3