diff options
author | stephana <stephana@google.com> | 2017-01-02 06:19:41 -0800 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2017-01-02 06:19:41 -0800 |
commit | fa05e97fc4d796bd12e21c32634d9614f8edf607 (patch) | |
tree | 51ce6dd564bf9129bac591813da6ccfc8dc9edb3 | |
parent | aea77059d309820dbcea9ec3e583fa673960a0b9 (diff) | |
download | pdfium-fa05e97fc4d796bd12e21c32634d9614f8edf607.tar.xz |
Gold support in PDFiumchromium/2970
Extends the PDFium tests to collect images and meta data to be uploaded
to Gold. This feature is triggered by adding the --gold_* flags.
It extends pdfium_test to output the MD5 hash of the underlying pixel
buffer for each page it renders.
That output is then processed by test_runner.py to generate the
gold meta data.
This behavior is modeled after the 'dm' tool in skia. See
https://skia.googlesource.com/skia/+/master/dm/DM.cpp#1090
This should not cause any change in the current behavior of the tests,
it will be trigger once we update the buildbot recipe.
BUG=skia:5973
Review-Url: https://codereview.chromium.org/2578893004
-rw-r--r-- | samples/DEPS | 1 | ||||
-rw-r--r-- | samples/pdfium_test.cc | 87 | ||||
-rwxr-xr-x | testing/tools/common.py | 18 | ||||
-rw-r--r-- | testing/tools/gold.py | 126 | ||||
-rw-r--r-- | testing/tools/test_runner.py | 64 |
5 files changed, 262 insertions, 34 deletions
diff --git a/samples/DEPS b/samples/DEPS index f38daab263..4a926a7cf2 100644 --- a/samples/DEPS +++ b/samples/DEPS @@ -4,4 +4,5 @@ include_rules = [ '+third_party/skia/include', '+third_party/zlib_v128', '+v8', + '+core/fdrm/crypto/fx_crypt.h', ] diff --git a/samples/pdfium_test.cc b/samples/pdfium_test.cc index ac6073aa2e..1e0a5be50e 100644 --- a/samples/pdfium_test.cc +++ b/samples/pdfium_test.cc @@ -17,6 +17,7 @@ #define _SKIA_SUPPORT_ #endif +#include "core/fdrm/crypto/fx_crypt.h" #include "public/fpdf_dataavail.h" #include "public/fpdf_edit.h" #include "public/fpdf_ext.h" @@ -67,11 +68,13 @@ struct Options { : show_config(false), send_events(false), pages(false), + md5(false), output_format(OUTPUT_NONE) {} bool show_config; bool send_events; bool pages; + bool md5; OutputFormat output_format; std::string scale_factor_as_string; std::string exe_path; @@ -109,23 +112,37 @@ static bool CheckDimensions(int stride, int width, int height) { return true; } -static void WritePpm(const char* pdf_name, int num, const void* buffer_void, - int stride, int width, int height) { +static void OutputMD5Hash(const char* file_name, const char* buffer, int len) { + // Get the MD5 hash and write it to stdout. + uint8_t digest[16]; + CRYPT_MD5Generate(reinterpret_cast<const uint8_t*>(buffer), len, digest); + printf("MD5:%s:", file_name); + for (int i = 0; i < 16; i++) + printf("%02x", digest[i]); + printf("\n"); +} + +static std::string WritePpm(const char* pdf_name, + int num, + const void* buffer_void, + int stride, + int width, + int height) { const char* buffer = reinterpret_cast<const char*>(buffer_void); if (!CheckDimensions(stride, width, height)) - return; + return ""; int out_len = width * height; if (out_len > INT_MAX / 3) - return; + return ""; out_len *= 3; char filename[256]; snprintf(filename, sizeof(filename), "%s.%d.ppm", pdf_name, num); FILE* fp = fopen(filename, "wb"); if (!fp) - return; + return ""; fprintf(fp, "P6\n# PDF test render\n%d %d\n255\n", width, height); // Source data is B, G, R, unused. // Dest data is R, G, B. @@ -144,6 +161,7 @@ static void WritePpm(const char* pdf_name, int num, const void* buffer_void, } fwrite(result.data(), out_len, 1, fp); fclose(fp); + return std::string(filename); } void WriteText(FPDF_PAGE page, const char* pdf_name, int num) { @@ -177,17 +195,21 @@ void WriteText(FPDF_PAGE page, const char* pdf_name, int num) { (void)fclose(fp); } -static void WritePng(const char* pdf_name, int num, const void* buffer_void, - int stride, int width, int height) { +static std::string WritePng(const char* pdf_name, + int num, + const void* buffer_void, + int stride, + int width, + int height) { if (!CheckDimensions(stride, width, height)) - return; + return ""; std::vector<unsigned char> png_encoding; const unsigned char* buffer = static_cast<const unsigned char*>(buffer_void); if (!image_diff_png::EncodeBGRAPNG( buffer, width, height, stride, false, &png_encoding)) { fprintf(stderr, "Failed to convert bitmap to PNG\n"); - return; + return ""; } char filename[256]; @@ -196,13 +218,13 @@ static void WritePng(const char* pdf_name, int num, const void* buffer_void, if (chars_formatted < 0 || static_cast<size_t>(chars_formatted) >= sizeof(filename)) { fprintf(stderr, "Filename %s is too long\n", filename); - return; + return ""; } FILE* fp = fopen(filename, "wb"); if (!fp) { fprintf(stderr, "Failed to open %s for output\n", filename); - return; + return ""; } size_t bytes_written = fwrite( @@ -211,23 +233,28 @@ static void WritePng(const char* pdf_name, int num, const void* buffer_void, fprintf(stderr, "Failed to write to %s\n", filename); (void)fclose(fp); + return std::string(filename); } #ifdef _WIN32 -static void WriteBmp(const char* pdf_name, int num, const void* buffer, - int stride, int width, int height) { +static std::string WriteBmp(const char* pdf_name, + int num, + const void* buffer, + int stride, + int width, + int height) { if (!CheckDimensions(stride, width, height)) - return; + return ""; int out_len = stride * height; if (out_len > INT_MAX / 3) - return; + return ""; char filename[256]; snprintf(filename, sizeof(filename), "%s.%d.bmp", pdf_name, num); FILE* fp = fopen(filename, "wb"); if (!fp) - return; + return ""; BITMAPINFO bmi = {}; bmi.bmiHeader.biSize = sizeof(bmi) - sizeof(RGBQUAD); @@ -247,6 +274,7 @@ static void WriteBmp(const char* pdf_name, int num, const void* buffer, fwrite(&bmi, bmi.bmiHeader.biSize, 1, fp); fwrite(buffer, out_len, 1, fp); fclose(fp); + return std::string(filename); } void WriteEmf(FPDF_PAGE page, const char* pdf_name, int num) { @@ -275,7 +303,9 @@ void WriteEmf(FPDF_PAGE page, const char* pdf_name, int num) { #endif #ifdef PDF_ENABLE_SKIA -void WriteSkp(const char* pdf_name, int num, SkPictureRecorder* recorder) { +static std::string WriteSkp(const char* pdf_name, + int num, + SkPictureRecorder* recorder) { char filename[256]; int chars_formatted = snprintf(filename, sizeof(filename), "%s.%d.skp", pdf_name, num); @@ -283,12 +313,13 @@ void WriteSkp(const char* pdf_name, int num, SkPictureRecorder* recorder) { if (chars_formatted < 0 || static_cast<size_t>(chars_formatted) >= sizeof(filename)) { fprintf(stderr, "Filename %s is too long\n", filename); - return; + return ""; } sk_sp<SkPicture> picture(recorder->finishRecordingAsPicture()); SkFILEWStream wStream(filename); picture->serialize(&wStream); + return std::string(filename); } #endif @@ -490,6 +521,8 @@ bool ParseCommandLine(const std::vector<std::string>& args, std::stringstream(pages_string.substr(first_dash + 1)) >> options->last_page; } + } else if (cur_arg == "--md5") { + options->md5 = true; } else if (cur_arg.size() >= 2 && cur_arg[0] == '-' && cur_arg[1] == '-') { fprintf(stderr, "Unrecognized argument %s\n", cur_arg.c_str()); return false; @@ -628,10 +661,12 @@ bool RenderPage(const std::string& name, const char* buffer = reinterpret_cast<const char*>(FPDFBitmap_GetBuffer(bitmap)); + std::string&& image_file_name = ""; switch (options.output_format) { #ifdef _WIN32 case OUTPUT_BMP: - WriteBmp(name.c_str(), page_index, buffer, stride, width, height); + image_file_name = + WriteBmp(name.c_str(), page_index, buffer, stride, width, height); break; case OUTPUT_EMF: @@ -643,11 +678,13 @@ bool RenderPage(const std::string& name, break; case OUTPUT_PNG: - WritePng(name.c_str(), page_index, buffer, stride, width, height); + image_file_name = + WritePng(name.c_str(), page_index, buffer, stride, width, height); break; case OUTPUT_PPM: - WritePpm(name.c_str(), page_index, buffer, stride, width, height); + image_file_name = + WritePpm(name.c_str(), page_index, buffer, stride, width, height); break; #ifdef PDF_ENABLE_SKIA @@ -656,13 +693,18 @@ bool RenderPage(const std::string& name, reinterpret_cast<SkPictureRecorder*>( FPDF_RenderPageSkp(page, width, height))); FPDF_FFLRecord(form, recorder.get(), page, 0, 0, width, height, 0, 0); - WriteSkp(name.c_str(), page_index, recorder.get()); + image_file_name = WriteSkp(name.c_str(), page_index, recorder.get()); } break; #endif default: break; } + // Write the filename and the MD5 of the buffer to stdout if we wrote a + // file. + if (options.md5 && image_file_name != "") + OutputMD5Hash(image_file_name.c_str(), buffer, stride * height); + FPDFBitmap_Destroy(bitmap); } else { fprintf(stderr, "Page was too large to be rendered.\n"); @@ -872,6 +914,7 @@ static const char kUsageString[] = #ifdef PDF_ENABLE_SKIA " --skp - write page images <pdf-name>.<page-number>.skp\n" #endif + " --md5 - write output image paths and their md5 hashes to stdout.\n" ""; int main(int argc, const char* argv[]) { diff --git a/testing/tools/common.py b/testing/tools/common.py index 1e1d257f48..a0cc946f1a 100755 --- a/testing/tools/common.py +++ b/testing/tools/common.py @@ -25,6 +25,24 @@ def RunCommand(cmd): except subprocess.CalledProcessError as e: return e +# RunCommandExtractHashedFiles returns a tuple: (raised_exception, hashed_files) +# It runs the given command. If it fails it will return an exception and None. +# If it succeeds it will return None and the list of processed files extracted +# from the output of the command. It expects lines in this format: +# MD5:<path_to_image_file>:<md5_hash_in_hex> +# The returned hashed_files is a list of (file_path, MD5-hash) pairs. +def RunCommandExtractHashedFiles(cmd): + try: + output = subprocess.check_output(cmd, universal_newlines=True) + ret = [] + for line in output.split('\n'): + line = line.strip() + if line.startswith("MD5:"): + ret.append([x.strip() for x in line.lstrip("MD5:").rsplit(":", 1)]) + return None, ret + except subprocess.CalledProcessError as e: + return e, None + # Adjust Dr. Memory wrapper to have separate log directory for each test # for better error reporting. def DrMemoryWrapper(wrapper, pdf_name): diff --git a/testing/tools/gold.py b/testing/tools/gold.py new file mode 100644 index 0000000000..fda63b6deb --- /dev/null +++ b/testing/tools/gold.py @@ -0,0 +1,126 @@ +# Copyright 2015 The PDFium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + + +import json +import os +import shlex +import shutil + +# This module collects and writes output in a format expected by the +# Gold baseline tool. Based on meta data provided explicitly and by +# adding a series of test results it can be used to produce +# a JSON file that is uploaded to Google Storage and ingested by Gold. +# +# The output will look similar this: +# +# { +# "build_number" : "2", +# "gitHash" : "a4a338179013b029d6dd55e737b5bd648a9fb68c", +# "key" : { +# "arch" : "arm64", +# "compiler" : "Clang", +# }, +# "results" : [ +# { +# "key" : { +# "config" : "vk", +# "name" : "yuv_nv12_to_rgb_effect", +# "source_type" : "gm" +# }, +# "md5" : "7db34da246868d50ab9ddd776ce6d779", +# "options" : { +# "ext" : "png", +# "gamma_correct" : "no" +# } +# }, +# { +# "key" : { +# "config" : "vk", +# "name" : "yuv_to_rgb_effect", +# "source_type" : "gm" +# }, +# "md5" : "0b955f387740c66eb23bf0e253c80d64", +# "options" : { +# "ext" : "png", +# "gamma_correct" : "no" +# } +# } +# ], +# } +# +class GoldResults(object): + def __init__(self, source_type, outputDir, propertiesStr, keyStr): + """ + source_type is the source_type (=corpus) field used for all results. + output_dir is the directory where the resulting images are copied and + the dm.json file is written. + propertiesStr is a string with space separated key/value pairs that + is used to set the top level fields in the output JSON file. + keyStr is a string with space separated key/value pairs that + is used to set the 'key' field in the output JSON file. + """ + self._source_type = source_type + self._properties = self._parseKeyValuePairs(propertiesStr) + self._properties["key"] = self._parseKeyValuePairs(keyStr) + self._results = [] + self._outputDir = outputDir + + def AddTestResult(self, testName, md5Hash, outputImagePath): + # Copy the image to <output_dir>/<md5Hash>.<image_extension> + imgExt = os.path.splitext(outputImagePath)[1].lstrip(".") + if not imgExt: + raise ValueError("File %s does not have an extension" % outputImagePath) + newFilePath = os.path.join(self._outputDir, md5Hash + '.' + imgExt) + shutil.copy2(outputImagePath, newFilePath) + + # Add an entry to the list of test results + self._results.append({ + "key": { + "name": testName, + "source_type": self._source_type, + }, + "md5": md5Hash, + "options": { + "ext": imgExt, + "gamma_correct": "no" + } + }) + + def _parseKeyValuePairs(self, kvStr): + kvPairs = shlex.split(kvStr) + if len(kvPairs) % 2: + raise ValueError("Uneven number of key/value pairs. Got %s" % kvStr) + return { kvPairs[i]:kvPairs[i+1] for i in range(0, len(kvPairs), 2) } + + def WriteResults(self): + self._properties.update({ + "results": self._results + }) + + outputFileName = os.path.join(self._outputDir, "dm.json") + with open(outputFileName, 'wb') as outfile: + json.dump(self._properties, outfile, indent=1) + outfile.write("\n") + +# Produce example output for manual testing. +if __name__ == "__main__": + # Create a test directory with three empty 'image' files. + testDir = "./testdirectory" + if not os.path.exists(testDir): + os.makedirs(testDir) + open(os.path.join(testDir, "image1.png"), 'wb').close() + open(os.path.join(testDir, "image2.png"), 'wb').close() + open(os.path.join(testDir, "image3.png"), 'wb').close() + + # Create an instance and add results. + propStr = """build_number 2 "builder name" Builder-Name gitHash a4a338179013b029d6dd55e737b5bd648a9fb68c""" + + keyStr = "arch arm64 compiler Clang configuration Debug" + + gr = GoldResults("pdfium", testDir, propStr, keyStr) + gr.AddTestResult("test-1", "hash-1", os.path.join(testDir, "image1.png")) + gr.AddTestResult("test-2", "hash-2", os.path.join(testDir, "image2.png")) + gr.AddTestResult("test-3", "hash-3", os.path.join(testDir, "image3.png")) + gr.WriteResults() diff --git a/testing/tools/test_runner.py b/testing/tools/test_runner.py index 5c377067d0..fad7a9c66a 100644 --- a/testing/tools/test_runner.py +++ b/testing/tools/test_runner.py @@ -14,6 +14,7 @@ import subprocess import sys import common +import gold import pngdiffer import suppressor @@ -39,6 +40,10 @@ class TestRunner: def __init__(self, dirname): self.test_dir = dirname + # GenerateAndTest returns a tuple <success, outputfiles> where + # success is a boolean indicating whether the tests passed comparison + # tests and outputfiles is a list tuples: + # (path_to_image, md5_hash_of_pixelbuffer) def GenerateAndTest(self, input_filename, source_dir): input_root, _ = os.path.splitext(input_filename) expected_txt_path = os.path.join(source_dir, input_root + '_expected.txt') @@ -59,23 +64,23 @@ class TestRunner: if raised_exception != None: print "FAILURE: " + input_filename + "; " + str(raised_exception) - return False + return False, [] + results = [] if os.path.exists(expected_txt_path): raised_exception = self.TestText(input_root, expected_txt_path, pdf_path) else: - raised_exception = self.TestPixel(input_root, pdf_path) + raised_exception, results = self.TestPixel(input_root, pdf_path) if raised_exception != None: print "FAILURE: " + input_filename + "; " + str(raised_exception) - return False + return False, results if len(actual_images): if self.image_differ.HasDifferences(input_filename, source_dir, self.working_dir): - return False - - return True + return False, results + return True, results def Generate(self, source_dir, input_filename, input_root, pdf_path): original_path = os.path.join(source_dir, input_filename) @@ -113,12 +118,20 @@ class TestRunner: def TestPixel(self, input_root, pdf_path): cmd_to_run = common.DrMemoryWrapper(self.drmem_wrapper, input_root) - cmd_to_run.extend([self.pdfium_test_path, '--send-events', '--png', - pdf_path]) - return common.RunCommand(cmd_to_run) - + cmd_to_run.extend([self.pdfium_test_path, '--send-events', '--png']) + if self.gold_results: + cmd_to_run.append('--md5') + cmd_to_run.append(pdf_path) + return common.RunCommandExtractHashedFiles(cmd_to_run) def HandleResult(self, input_filename, input_path, result): + if self.gold_results: + success, image_paths = result + for img_path, md5_hash in image_paths: + # the output filename (without extension becomes the test name) + test_name = os.path.splitext(os.path.split(img_path)[1])[0] + self.gold_results.AddTestResult(test_name, md5_hash, img_path) + if self.test_suppressor.IsResultSuppressed(input_filename): if result: self.surprises.append(input_path) @@ -129,13 +142,29 @@ class TestRunner: def Run(self): parser = optparse.OptionParser() + parser.add_option('--build-dir', default=os.path.join('out', 'Debug'), help='relative path from the base source directory') + parser.add_option('-j', default=multiprocessing.cpu_count(), dest='num_workers', type='int', help='run NUM_WORKERS jobs in parallel') + parser.add_option('--wrapper', default='', dest="wrapper", help='wrapper for running test under Dr. Memory') + + parser.add_option('--gold_properties', default='', dest="gold_properties", + help='Key value pairs that are written to the top level of the JSON file that is ingested by Gold.') + + parser.add_option('--gold_key', default='', dest="gold_key", + help='Key value pairs that are added to the "key" field of the JSON file that is ingested by Gold.') + + parser.add_option('--gold_output_dir', default='', dest="gold_output_dir", + help='Path of where to write the JSON output to be uploaded to Gold.') + + parser.add_option('--ignore_errors', action="store_true", dest="ignore_errors", + help='Prevents the return value from being non-zero when image comparison fails.') + options, args = parser.parse_args() finder = common.DirectoryFinder(options.build_dir) @@ -191,6 +220,14 @@ class TestRunner: self.failures = [] self.surprises = [] + # Collect Gold results if an output directory was named. + self.gold_results = None + if options.gold_output_dir: + self.gold_results = gold.GoldResults("pdfium", + options.gold_output_dir, + options.gold_properties, + options.gold_key) + if options.num_workers > 1 and len(test_cases) > 1: try: pool = multiprocessing.Pool(options.num_workers) @@ -215,6 +252,9 @@ class TestRunner: self.HandleResult(input_filename, os.path.join(input_file_dir, input_filename), result) + if self.gold_results: + self.gold_results.WriteResults() + if self.surprises: self.surprises.sort() print '\n\nUnexpected Successes:' @@ -226,6 +266,6 @@ class TestRunner: print '\n\nSummary of Failures:' for failure in self.failures: print failure - return 1 - + if not options.ignore_errors: + return 1 return 0 |