diff options
author | Henrique Nakashima <hnakashima@chromium.org> | 2017-08-03 13:29:22 -0400 |
---|---|---|
committer | Chromium commit bot <commit-bot@chromium.org> | 2017-08-03 22:00:23 +0000 |
commit | f24fc1e69d77db16527de99bff192693878f4080 (patch) | |
tree | be79d47e1367cdee5ea7eef363addb5233d3b514 /testing/tools/safetynet_compare.py | |
parent | b35dbadce21f684619377ce545e066de6494a441 (diff) | |
download | pdfium-f24fc1e69d77db16527de99bff192693878f4080.tar.xz |
Add script to compare performance of two versions of pdfium.
Run from the pdfium root:
$ testing/tools/safetynet_compare.py testing
This compares the current branch with and without local changes.
$ testing/tools/safetynet_compare.py testing/corpus --branch-before x
This compares the current branch + local changes against branch x.
It runs only the corpus tests.
$ testing/tools/safetynet_compare.py testing --branch-before x
--branch-after y --build-dir=~/output_compare
This compares branch x and branch y. x and y can be revision hashes.
The callgrind.out files of cases with significant changes will be
created in ~/output_compare.
$ testing/tools/safetynet_compare.py -h
Print all options.
Change-Id: I43aaf5fe890745db611fb3bc00a656ef799fdfef
Reviewed-on: https://pdfium-review.googlesource.com/7390
Reviewed-by: Lei Zhang <thestig@chromium.org>
Commit-Queue: Henrique Nakashima <hnakashima@chromium.org>
Diffstat (limited to 'testing/tools/safetynet_compare.py')
-rwxr-xr-x | testing/tools/safetynet_compare.py | 680 |
1 files changed, 680 insertions, 0 deletions
diff --git a/testing/tools/safetynet_compare.py b/testing/tools/safetynet_compare.py new file mode 100755 index 0000000000..e54bec5366 --- /dev/null +++ b/testing/tools/safetynet_compare.py @@ -0,0 +1,680 @@ +#!/usr/bin/env python +# Copyright 2017 The PDFium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""Compares the performance of two versions of the pdfium code.""" + +import argparse +import functools +import json +import multiprocessing +import os +import re +import shutil +import subprocess +import sys +import tempfile + +from common import GetBooleanGnArg +from githelper import GitHelper +from safetynet_conclusions import ComparisonConclusions +from safetynet_conclusions import PrintConclusionsDictHumanReadable +from safetynet_conclusions import RATING_IMPROVEMENT +from safetynet_conclusions import RATING_REGRESSION + + +def PrintErr(s): + """Prints s to stderr.""" + print >> sys.stderr, s + + +def RunSingleTestCaseParallel(this, run_label, build_dir, test_case): + result = this.RunSingleTestCase(run_label, build_dir, test_case) + return (test_case, result) + + +class CompareRun(object): + """A comparison between two branches of pdfium.""" + + def __init__(self, args): + self.git = GitHelper() + self.args = args + self._InitPaths() + + def _InitPaths(self): + if self.args.this_repo: + measure_script_path = os.path.join(self.args.build_dir, + 'safetynet_measure_current.py') + else: + measure_script_path = 'testing/tools/safetynet_measure.py' + self.safe_measure_script_path = os.path.abspath(measure_script_path) + + input_file_re = re.compile('^.+[.]pdf$') + self.test_cases = [] + for input_path in self.args.input_paths: + if os.path.isfile(input_path): + self.test_cases.append(input_path) + elif os.path.isdir(input_path): + for file_dir, _, filename_list in os.walk(input_path): + for input_filename in filename_list: + if input_file_re.match(input_filename): + file_path = os.path.join(file_dir, input_filename) + if os.path.isfile(file_path): + self.test_cases.append(file_path) + + self.after_build_dir = self.args.build_dir + if self.args.build_dir_before: + self.before_build_dir = self.args.build_dir_before + else: + self.before_build_dir = self.after_build_dir + + def Run(self): + """Runs comparison by checking out branches, building and measuring them. + + Returns: + Exit code for the script. + """ + if self.args.this_repo: + self._FreezeMeasureScript() + + if self.args.branch_after: + if self.args.this_repo: + before, after = self._ProfileTwoOtherBranchesInThisRepo( + self.args.branch_before, + self.args.branch_after) + else: + before, after = self._ProfileTwoOtherBranches( + self.args.branch_before, + self.args.branch_after) + elif self.args.branch_before: + if self.args.this_repo: + before, after = self._ProfileCurrentAndOtherBranchInThisRepo( + self.args.branch_before) + else: + before, after = self._ProfileCurrentAndOtherBranch( + self.args.branch_before) + else: + if self.args.this_repo: + before, after = self._ProfileLocalChangesAndCurrentBranchInThisRepo() + else: + before, after = self._ProfileLocalChangesAndCurrentBranch() + + conclusions = self._DrawConclusions(before, after) + conclusions_dict = conclusions.GetOutputDict() + + self._PrintConclusions(conclusions_dict) + + self._CleanUp(conclusions) + + return 0 + + def _FreezeMeasureScript(self): + """Freezes a version of the measuring script. + + This is needed to make sure we are comparing the pdfium library changes and + not script changes that may happen between the two branches. + """ + subprocess.check_output(['cp', 'testing/tools/safetynet_measure.py', + self.safe_measure_script_path]) + + def _ProfileTwoOtherBranchesInThisRepo(self, before_branch, after_branch): + """Profiles two branches that are not the current branch. + + This is done in the local repository and changes may not be restored if the + script fails or is interrupted. + + after_branch does not need to descend from before_branch, they will be + measured the same way + + Args: + before_branch: One branch to profile. + after_branch: Other branch to profile. + + Returns: + A tuple (before, after), where each of before and after is a dict + mapping a test case name to the the profiling values for that test case + in the given branch. + """ + branch_to_restore = self.git.GetCurrentBranchName() + + self._StashLocalChanges() + + self._CheckoutBranch(after_branch) + self._BuildCurrentBranch(self.after_build_dir) + after = self._MeasureCurrentBranch('after', self.after_build_dir) + + self._CheckoutBranch(before_branch) + self._BuildCurrentBranch(self.before_build_dir) + before = self._MeasureCurrentBranch('before', self.before_build_dir) + + self._CheckoutBranch(branch_to_restore) + self._RestoreLocalChanges() + + return before, after + + def _ProfileTwoOtherBranches(self, before_branch, after_branch): + """Profiles two branches that are not the current branch. + + This is done in new, cloned repositories, therefore it is safer but slower + and requires downloads. + + after_branch does not need to descend from before_branch, they will be + measured the same way + + Args: + before_branch: One branch to profile. + after_branch: Other branch to profile. + + Returns: + A tuple (before, after), where each of before and after is a dict + mapping a test case name to the the profiling values for that test case + in the given branch. + """ + after = self._ProfileSeparateRepo('after', + self.after_build_dir, + after_branch) + before = self._ProfileSeparateRepo('before', + self.before_build_dir, + before_branch) + return before, after + + def _ProfileCurrentAndOtherBranchInThisRepo(self, other_branch): + """Profiles the current branch (with uncommitted changes) and another one. + + This is done in the local repository and changes may not be restored if the + script fails or is interrupted. + + The current branch does not need to descend from other_branch. + + Args: + other_branch: Other branch to profile that is not the current. + + Returns: + A tuple (before, after), where each of before and after is a dict + mapping a test case name to the the profiling values for that test case + in the given branch. The current branch is considered to be "after" and + the other branch is considered to be "before". + """ + branch_to_restore = self.git.GetCurrentBranchName() + + self._BuildCurrentBranch(self.after_build_dir) + after = self._MeasureCurrentBranch('after', self.after_build_dir) + + self._StashLocalChanges() + + self._CheckoutBranch(other_branch) + self._BuildCurrentBranch(self.before_build_dir) + before = self._MeasureCurrentBranch('before', self.before_build_dir) + + self._CheckoutBranch(branch_to_restore) + self._RestoreLocalChanges() + + return before, after + + def _ProfileCurrentAndOtherBranch(self, other_branch): + """Profiles the current branch (with uncommitted changes) and another one. + + This is done in new, cloned repositories, therefore it is safer but slower + and requires downloads. + + The current branch does not need to descend from other_branch. + + Args: + other_branch: Other branch to profile that is not the current. None will + compare to the same branch. + + Returns: + A tuple (before, after), where each of before and after is a dict + mapping a test case name to the the profiling values for that test case + in the given branch. The current branch is considered to be "after" and + the other branch is considered to be "before". + """ + self._BuildCurrentBranch(self.after_build_dir) + after = self._MeasureCurrentBranch('after', self.after_build_dir) + + before = self._ProfileSeparateRepo('before', + self.before_build_dir, + other_branch) + + return before, after + + def _ProfileLocalChangesAndCurrentBranchInThisRepo(self): + """Profiles the current branch with and without uncommitted changes. + + This is done in the local repository and changes may not be restored if the + script fails or is interrupted. + + Returns: + A tuple (before, after), where each of before and after is a dict + mapping a test case name to the the profiling values for that test case + using the given version. The current branch without uncommitted changes is + considered to be "before" and with uncommitted changes is considered to be + "after". + """ + self._BuildCurrentBranch(self.after_build_dir) + after = self._MeasureCurrentBranch('after', self.after_build_dir) + + pushed = self._StashLocalChanges() + if not pushed and not self.args.build_dir_before: + PrintErr('Warning: No local changes to compare') + + before_build_dir = self.before_build_dir + + self._BuildCurrentBranch(before_build_dir) + before = self._MeasureCurrentBranch('before', before_build_dir) + + self._RestoreLocalChanges() + + return before, after + + def _ProfileLocalChangesAndCurrentBranch(self): + """Profiles the current branch with and without uncommitted changes. + + This is done in new, cloned repositories, therefore it is safer but slower + and requires downloads. + + Returns: + A tuple (before, after), where each of before and after is a dict + mapping a test case name to the the profiling values for that test case + using the given version. The current branch without uncommitted changes is + considered to be "before" and with uncommitted changes is considered to be + "after". + """ + return self._ProfileCurrentAndOtherBranch(other_branch=None) + + def _ProfileSeparateRepo(self, run_label, relative_build_dir, branch): + """Profiles a branch in a a temporary git repository. + + Args: + run_label: String to differentiate this version of the code in output + files from other versions. + relative_build_dir: Path to the build dir in the current working dir to + clone build args from. + branch: Branch to checkout in the new repository. None will + profile the same branch checked out in the original repo. + Returns: + A dict mapping each test case name to the the profiling values for that + test case. + """ + build_dir = self._CreateTempRepo('repo_%s' % run_label, + relative_build_dir, + branch) + + self._BuildCurrentBranch(build_dir) + return self._MeasureCurrentBranch(run_label, build_dir) + + def _CreateTempRepo(self, dir_name, relative_build_dir, branch): + """Clones a temporary git repository out of the current working dir. + + Args: + dir_name: Name for the temporary repository directory + relative_build_dir: Path to the build dir in the current working dir to + clone build args from. + branch: Branch to checkout in the new repository. None will keep checked + out the same branch as the local repo. + Returns: + Path to the build directory of the new repository. + """ + cwd = os.getcwd() + + repo_dir = tempfile.mkdtemp(suffix='-%s' % dir_name) + src_dir = os.path.join(repo_dir, 'pdfium') + + self.git.CloneLocal(os.getcwd(), src_dir) + + if branch is not None: + os.chdir(src_dir) + self.git.Checkout(branch) + + os.chdir(repo_dir) + PrintErr('Syncing...') + + cmd = ['gclient', 'config', '--unmanaged', + 'https://pdfium.googlesource.com/pdfium.git'] + if self.args.cache_dir: + cmd.append('--cache-dir=%s' % self.args.cache_dir) + subprocess.check_output(cmd) + + subprocess.check_output(['gclient', 'sync']) + PrintErr('Done.') + + build_dir = os.path.join(src_dir, relative_build_dir) + os.makedirs(build_dir) + os.chdir(src_dir) + + source_gn_args = os.path.join(cwd, relative_build_dir, 'args.gn') + dest_gn_args = os.path.join(build_dir, 'args.gn') + shutil.copy(source_gn_args, dest_gn_args) + + subprocess.check_output(['gn', 'gen', relative_build_dir]) + + os.chdir(cwd) + + return build_dir + + + def _CheckoutBranch(self, branch): + PrintErr("Checking out branch '%s'" % branch) + self.git.Checkout(branch) + + def _StashLocalChanges(self): + PrintErr('Stashing local changes') + return self.git.StashPush() + + def _RestoreLocalChanges(self): + PrintErr('Restoring local changes') + self.git.StashPopAll() + + def _BuildCurrentBranch(self, build_dir): + """Synchronizes and builds the current version of pdfium. + + Args: + build_dir: String with path to build directory + """ + PrintErr('Syncing...') + subprocess.check_output(['gclient', 'sync']) + PrintErr('Done.') + + cmd = ['ninja', '-C', build_dir, 'pdfium_test'] + + if GetBooleanGnArg('use_goma', build_dir): + cmd.extend(['-j', '250']) + + PrintErr('Building...') + subprocess.check_output(cmd) + PrintErr('Done.') + + def _MeasureCurrentBranch(self, run_label, build_dir): + PrintErr('Measuring...') + if self.args.num_workers > 1 and len(self.test_cases) > 1: + results = self._RunAsync(run_label, build_dir) + else: + results = self._RunSync(run_label, build_dir) + PrintErr('Done.') + + return results + + def _RunSync(self, run_label, build_dir): + """Profiles the test cases synchronously. + + Args: + run_label: String to differentiate this version of the code in output + files from other versions. + build_dir: String with path to build directory + + Returns: + A dict mapping each test case name to the the profiling values for that + test case. + """ + results = {} + + for test_case in self.test_cases: + result = self.RunSingleTestCase(run_label, build_dir, test_case) + if result is not None: + results[test_case] = result + + return results + + def _RunAsync(self, run_label, build_dir): + """Profiles the test cases asynchronously. + + Uses as many workers as configured by --num-workers. + + Args: + run_label: String to differentiate this version of the code in output + files from other versions. + build_dir: String with path to build directory + + Returns: + A dict mapping each test case name to the the profiling values for that + test case. + """ + results = {} + pool = multiprocessing.Pool(self.args.num_workers) + worker_func = functools.partial( + RunSingleTestCaseParallel, self, run_label, build_dir) + + try: + # The timeout is a workaround for http://bugs.python.org/issue8296 + # which prevents KeyboardInterrupt from working. + one_year_in_seconds = 3600 * 24 * 365 + worker_results = (pool.map_async(worker_func, self.test_cases) + .get(one_year_in_seconds)) + for worker_result in worker_results: + test_case, result = worker_result + if result is not None: + results[test_case] = result + except KeyboardInterrupt: + pool.terminate() + sys.exit(1) + else: + pool.close() + + pool.join() + + return results + + def RunSingleTestCase(self, run_label, build_dir, test_case): + """Profiles a single test case. + + Args: + run_label: String to differentiate this version of the code in output + files from other versions. + build_dir: String with path to build directory + test_case: Path to the test case. + + Returns: + The measured profiling value for that test case. + """ + command = [self.safe_measure_script_path, test_case, + '--build-dir=%s' % build_dir] + + if self.args.interesting_section: + command.append('--interesting-section') + + if self.args.profiler: + command.append('--profiler=%s' % self.args.profiler) + + profile_file_path = self._GetProfileFilePath(run_label, test_case) + if profile_file_path: + command.append('--output-path=%s' % profile_file_path) + + try: + output = subprocess.check_output(command) + except subprocess.CalledProcessError as e: + PrintErr(e) + PrintErr(35 * '=' + ' Output: ' + 34 * '=') + PrintErr(e.output) + PrintErr(80 * '=') + return None + + # Get the time number as output, making sure it's just a number + output = output.strip() + if re.match('^[0-9]+$', output): + return int(output) + + return None + + def _GetProfileFilePath(self, run_label, test_case): + if self.args.output_dir: + output_filename = ('callgrind.out.%s.%s' + % (test_case.replace('/', '_'), + run_label)) + return os.path.join(self.args.output_dir, output_filename) + else: + return None + + def _DrawConclusions(self, times_before_branch, times_after_branch): + """Draws conclusions comparing results of test runs in two branches. + + Args: + times_before_branch: A dict mapping each test case name to the the + profiling values for that test case in the branch to be considered + as the baseline. + times_after_branch: A dict mapping each test case name to the the + profiling values for that test case in the branch to be considered + as the new version. + + Returns: + ComparisonConclusions with all test cases processed. + """ + conclusions = ComparisonConclusions(self.args.threshold_significant) + + for test_case in sorted(self.test_cases): + before = times_before_branch.get(test_case) + after = times_after_branch.get(test_case) + conclusions.ProcessCase(test_case, before, after) + + return conclusions + + def _PrintConclusions(self, conclusions_dict): + """Prints the conclusions as the script output. + + Depending on the script args, this can output a human or a machine-readable + version of the conclusions. + + Args: + conclusions_dict: Dict to print returned from + ComparisonConclusions.GetOutputDict(). + """ + if self.args.machine_readable: + print json.dumps(conclusions_dict) + else: + PrintConclusionsDictHumanReadable( + conclusions_dict, colored=True, key=self.args.case_order) + + def _CleanUp(self, conclusions): + """Removes profile output files for uninteresting cases. + + Cases without significant regressions or improvements and considered + uninteresting. + + Args: + conclusions: A ComparisonConclusions. + """ + if not self.args.output_dir: + return + + if self.args.profiler != 'callgrind': + return + + for case_result in conclusions.GetCaseResults().values(): + if case_result.rating not in [RATING_REGRESSION, RATING_IMPROVEMENT]: + self._CleanUpOutputFile('before', case_result.case_name) + self._CleanUpOutputFile('after', case_result.case_name) + + def _CleanUpOutputFile(self, run_label, case_name): + """Removes one profile output file. + + If the output file does not exist, fails silently. + + Args: + run_label: String to differentiate a version of the code in output + files from other versions. + case_name: String identifying test case for which to remove the output + file. + """ + try: + os.remove(self._GetProfileFilePath(run_label, case_name)) + except OSError: + pass + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('input_paths', nargs='+', + help='pdf files or directories to search for pdf files ' + 'to run as test cases') + parser.add_argument('--branch-before', + help='git branch to use as "before" for comparison. ' + 'Omitting this will use the current branch ' + 'without uncommitted changes as the baseline.') + parser.add_argument('--branch-after', + help='git branch to use as "after" for comparison. ' + 'Omitting this will use the current branch ' + 'with uncommitted changes.') + parser.add_argument('--build-dir', default=os.path.join('out', 'Release'), + help='relative path from the base source directory ' + 'to the build directory') + parser.add_argument('--build-dir-before', + help='relative path from the base source directory ' + 'to the build directory for the "before" branch, if ' + 'different from the build directory for the ' + '"after" branch') + parser.add_argument('--cache-dir', default=None, + help='directory with a new or preexisting cache for ' + 'downloads. Default is to not use a cache.') + parser.add_argument('--this-repo', action='store_true', + help='use the repository where the script is instead of ' + 'checking out a temporary one. This is faster and ' + 'does not require downloads, but although it ' + 'restores the state of the local repo, if the ' + 'script is killed or crashes the changes can remain ' + 'stashed and you may be on another branch.') + parser.add_argument('--profiler', default='callgrind', + help='which profiler to use. Supports callgrind and ' + 'perfstat for now. Default is callgrind.') + parser.add_argument('--interesting-section', action='store_true', + help='whether to measure just the interesting section or ' + 'the whole test harness. Limiting to only the ' + 'interesting section does not work on Release since ' + 'the delimiters are optimized out') + parser.add_argument('--num-workers', default=multiprocessing.cpu_count(), + type=int, help='run NUM_WORKERS jobs in parallel') + parser.add_argument('--output-dir', + help='directory to write the profile data output files') + parser.add_argument('--threshold-significant', default=0.02, type=float, + help='variations in performance above this factor are ' + 'considered significant') + parser.add_argument('--machine-readable', action='store_true', + help='whether to get output for machines. If enabled the ' + 'output will be a json with the format specified in ' + 'ComparisonConclusions.GetOutputDict(). Default is ' + 'human-readable.') + parser.add_argument('--case-order', default=None, + help='what key to use when sorting test cases in the ' + 'output. Accepted values are "after", "before", ' + '"ratio" and "rating". Default is sorting by test ' + 'case path.') + + args = parser.parse_args() + + # Always start at the pdfium src dir, which is assumed to be two level above + # this script. + pdfium_src_dir = os.path.join( + os.path.dirname(__file__), + os.path.pardir, + os.path.pardir) + os.chdir(pdfium_src_dir) + + git = GitHelper() + + if args.branch_after and not args.branch_before: + PrintErr('--branch-after requires --branch-before to be specified.') + return 1 + + if args.branch_after and not git.BranchExists(args.branch_after): + PrintErr('Branch "%s" does not exist' % args.branch_after) + return 1 + + if args.branch_before and not git.BranchExists(args.branch_before): + PrintErr('Branch "%s" does not exist' % args.branch_before) + return 1 + + if args.output_dir: + args.output_dir = os.path.expanduser(args.output_dir) + if not os.path.isdir(args.output_dir): + PrintErr('"%s" is not a directory' % args.output_dir) + return 1 + + if args.threshold_significant <= 0.0: + PrintErr('--threshold-significant should receive a positive float') + return 1 + + run = CompareRun(args) + return run.Run() + + +if __name__ == '__main__': + sys.exit(main()) |