From 574366b637c1e937efc7b1becb1d151c3599f7af Mon Sep 17 00:00:00 2001
From: Ryan Harrison <rharrison@chromium.org>
Date: Tue, 18 Jul 2017 10:18:55 -0400
Subject: Add tools for generating coverage information

Adds script coverage_report.py for generating code coverage
reports. This script depends on the llvm-gcov wrapper script and the
lcovrc config file that are checked in with it.

BUG=pdfium:797

Change-Id: I0416349186835c0e4dd7f4907d2b472a4406e027
Reviewed-on: https://pdfium-review.googlesource.com/7810
Commit-Queue: Ryan Harrison <rharrison@chromium.org>
Reviewed-by: Henrique Nakashima <hnakashima@chromium.org>
Reviewed-by: Lei Zhang <thestig@chromium.org>
---
 tools/coverage/coverage_report.py | 393 ++++++++++++++++++++++++++++++++++++++
 tools/coverage/lcovrc             | 156 +++++++++++++++
 tools/coverage/llvm-gcov          |  15 ++
 3 files changed, 564 insertions(+)
 create mode 100755 tools/coverage/coverage_report.py
 create mode 100644 tools/coverage/lcovrc
 create mode 100755 tools/coverage/llvm-gcov

diff --git a/tools/coverage/coverage_report.py b/tools/coverage/coverage_report.py
new file mode 100755
index 0000000000..9d6a95c088
--- /dev/null
+++ b/tools/coverage/coverage_report.py
@@ -0,0 +1,393 @@
+#!/usr/bin/env python
+# Copyright 2017 The PDFium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Generates a coverage report for given binaries using llvm-gcov & lcov.
+
+Requires llvm-cov 3.5 or later.
+Requires lcov 1.11 or later.
+Requires that 'use_coverage = true' is set in args.gn.
+"""
+
+import argparse
+from collections import namedtuple
+import pprint
+import os
+import re
+import subprocess
+import sys
+
+
+# 'binary' is the file that is to be run for the test.
+# 'use_test_runner' indicates if 'binary' depends on test_runner.py and thus
+# requires special handling.
+TestSpec = namedtuple('binary', 'use_test_runner')
+
+# All of the coverage tests that the script knows how to run.
+COVERAGE_TESTS = {
+    'pdfium_unittests': TestSpec('pdfium_unittests', False),
+    'pdfium_embeddertests': TestSpec('pdfium_embeddertests', False),
+    'corpus_tests': TestSpec('run_corpus_tests.py', True),
+    'javascript_tests': TestSpec('run_javascript_tests.py', True),
+    'pixel_tests': TestSpec('run_pixel_tests.py', True),
+}
+
+# Coverage tests that are known to take a long time to run, so are not in the
+# default set. The user must either explicitly invoke these tests or pass in
+# --slow.
+SLOW_TESTS = ['corpus_tests', 'javascript_tests', 'pixel_tests']
+
+class CoverageExecutor(object):
+
+  def __init__(self, parser, args):
+    """Initialize executor based on the current script environment
+
+    Args:
+        parser: argparse.ArgumentParser for handling improper inputs.
+        args: Dictionary of arguments passed into the calling script.
+    """
+    self.dry_run = args['dry_run']
+    self.verbose = args['verbose']
+
+    llvm_cov = self.determine_proper_llvm_cov()
+    if not llvm_cov:
+      print 'Unable to find appropriate llvm-cov to use'
+      sys.exit(1)
+    self.lcov_env = os.environ
+    self.lcov_env['LLVM_COV_BIN'] = llvm_cov
+
+    self.lcov = self.determine_proper_lcov()
+    if not self.lcov:
+      print 'Unable to find appropriate lcov to use'
+      sys.exit(1)
+
+    self.coverage_files = set()
+    self.source_directory = args['source_directory']
+    if not os.path.isdir(self.source_directory):
+      parser.error("'%s' needs to be a directory" % self.source_directory)
+
+    self.build_directory = args['build_directory']
+    if not os.path.isdir(self.build_directory):
+      parser.error("'%s' needs to be a directory" % self.build_directory)
+
+    self.coverage_tests = self.calculate_coverage_tests(args)
+    if not self.coverage_tests:
+      parser.error(
+          'No valid tests in set to be run. This is likely due to bad command '
+          'line arguments')
+
+    if not self.boolean_gn_arg('use_coverage'):
+      parser.error(
+          'use_coverage does not appear to be set to true for build, but is '
+          'needed')
+
+    self.use_goma = self.boolean_gn_arg('use_goma')
+
+    self.output_directory = args['output_directory']
+    if not os.path.exists(self.output_directory):
+      if not self.dry_run:
+        os.mkdirs(self.output_directory)
+    elif not os.path.isdir(self.output_directory):
+      parser.error('%s exists, but is not a directory' % self.output_directory)
+    self.coverage_totals_path = os.path.join(self.output_directory,
+                                             'pdfium_totals.info')
+
+  def boolean_gn_arg(self, arg):
+    """Extract the value of a boolean flag in args.gn"""
+    cwd = os.getcwd()
+    os.chdir(self.build_directory)
+    gn_args_output = self.check_output(
+        ['gn', 'args', '.', '--list=%s' % arg, '--short'])
+    os.chdir(cwd)
+    arg_match_output = re.match('%s = (.*)' % arg, gn_args_output).group(1)
+    if self.verbose:
+      print "Found '%s' for value of %s" % (arg_match_output, arg)
+    return arg_match_output == 'true'
+
+  def check_output(self, args, dry_run=False, env=None):
+    """Dry run aware wrapper of subprocess.check_output()"""
+    if dry_run:
+      print "Would have run '%s'" % ' '.join(args)
+      return ''
+
+    output = subprocess.check_output(args, env=env)
+
+    if self.verbose:
+      print "check_output(%s) returned '%s'" % (args, output)
+    return output
+
+  def call(self, args, dry_run=False, env=None):
+    """Dry run aware wrapper of subprocess.call()"""
+    if dry_run:
+      print "Would have run '%s'" % ' '.join(args)
+      return 0
+
+    output = subprocess.call(args, env=env)
+
+    if self.verbose:
+      print 'call(%s) returned %s' % (args, output)
+    return output
+
+  def call_lcov(self, args, dry_run=False, needs_directory=True):
+    """Wrapper to call lcov that adds appropriate arguments as needed."""
+    lcov_args = [
+        self.lcov, '--config-file',
+        os.path.join(self.source_directory, 'tools', 'coverage', 'lcovrc'),
+        '--gcov-tool',
+        os.path.join(self.source_directory, 'tools', 'coverage', 'llvm-gcov')
+    ]
+    if needs_directory:
+      lcov_args.extend(['--directory', self.source_directory])
+    if not self.verbose:
+      lcov_args.append('--quiet')
+    lcov_args.extend(args)
+    return self.call(lcov_args, dry_run=dry_run, env=self.lcov_env)
+
+  def calculate_coverage_tests(self, args):
+    """Determine which tests should be run."""
+    testing_tools_directory = os.path.join(self.source_directory, 'testing',
+                                           'tools')
+    coverage_tests = {}
+    for name in COVERAGE_TESTS.keys():
+      (binary, uses_test_runner) = COVERAGE_TESTS[name]
+      if uses_test_runner:
+        binary_path = os.path.join(testing_tools_directory, binary)
+      else:
+        binary_path = os.path.join(self.build_directory, binary)
+      coverage_tests[name] = (binary_path, uses_test_runner)
+
+    if args['tests']:
+      return {name: spec
+        for name, spec in coverage_tests.iteritems() if name in args['tests']}
+    elif not args['slow']:
+      return {name: spec
+        for name, spec in coverage_tests.iteritems() if name not in SLOW_TESTS}
+    else:
+      return coverage_tests
+
+  def find_acceptable_binary(self, binary_name, version_regex,
+                             min_major_version, min_minor_version):
+    """Find the newest version of binary that meets the min version."""
+    min_version = (min_major_version, min_minor_version)
+    parsed_versions = {}
+    # When calling Bash builtins like this the command and arguments must be
+    # passed in as a single string instead of as separate list members.
+    potential_binaries = self.check_output(
+        ['bash', '-c', 'compgen -abck %s' % binary_name]).splitlines()
+    for binary in potential_binaries:
+      if self.verbose:
+        print 'Testing llvm-cov binary, %s' % binary
+      # Assuming that scripts that don't respond to --version correctly are not
+      # valid binaries and just happened to get globbed in. This is true for
+      # lcov and llvm-cov
+      try:
+        version_output = self.check_output([binary, '--version']).splitlines()
+      except subprocess.CalledProcessError:
+        if self.verbose:
+          print '--version returned failure status 1, so ignoring'
+        continue
+
+      for line in version_output:
+        matcher = re.match(version_regex, line)
+        if matcher:
+          parsed_version = (int(matcher.group(1)), int(matcher.group(2)))
+          if parsed_version >= min_version:
+            parsed_versions[parsed_version] = binary
+          break
+
+    if not parsed_versions:
+      return None
+    return parsed_versions[max(parsed_versions)]
+
+  def determine_proper_llvm_cov(self):
+    """Find a version of llvm_cov that will work with the script."""
+    version_regex = re.compile('.*LLVM version ([\d]+)\.([\d]+).*')
+    return self.find_acceptable_binary('llvm-cov', version_regex, 3, 5)
+
+  def determine_proper_lcov(self):
+    """Find a version of lcov that will work with the script."""
+    version_regex = re.compile('.*LCOV version ([\d]+)\.([\d]+).*')
+    return self.find_acceptable_binary('lcov', version_regex, 1, 11)
+
+  def build_binaries(self):
+    """Build all the binaries that are going to be needed for coverage
+    generation."""
+    call_args = ['ninja']
+    if self.use_goma:
+      call_args.extend(['-j', '250'])
+    call_args.extend(['-C', self.build_directory])
+    return self.call(call_args, dry_run=self.dry_run) == 0
+
+  def generate_coverage(self, name, spec):
+    """Generate the coverage data for a test
+
+    Args:
+        name: Name associated with the test to be run. This is used as a label
+              in the coverage data, so should be unique across all of the tests
+              being run.
+        spec: Tuple containing the path to the binary to run, and if this test
+              uses test_runner.py.
+    """
+    if self.verbose:
+      print "Generating coverage for test '%s', using data '%s'" % (name, spec)
+    if not os.path.exists(spec.binary):
+      print('Unable to generate coverage for %s, since it appears to not exist'
+            ' @ %s') % (name, spec.binary)
+      return False
+
+    if self.call_lcov(['--zerocounters'], dry_run=self.dry_run):
+      print 'Unable to clear counters for %s' % name
+      return False
+
+    binary_args = [spec.binary]
+    if spec.use_test_runner:
+      # Test runner performs multi-threading in the wrapper script, not the test
+      # binary, so need -j 1, otherwise multiple processes will be writing to
+      # the code coverage files, invalidating results.
+      # TODO(pdfium:811): Rewrite how test runner tests work, so that they can
+      # be run in multi-threaded mode.
+      binary_args.extend(['-j', '1', '--build-dir', self.build_directory])
+    if self.call(binary_args, dry_run=self.dry_run) and self.verbose:
+      print('Running %s appears to have failed, which might affect '
+            'results') % spec.binary
+
+    output_raw_path = os.path.join(self.output_directory, '%s_raw.info' % name)
+    if self.call_lcov(
+        ['--capture', '--test-name', name, '--output-file', output_raw_path],
+        dry_run=self.dry_run):
+      print 'Unable to capture coverage data for %s' % name
+      return False
+
+    output_filtered_path = os.path.join(self.output_directory,
+                                        '%s_filtered.info' % name)
+    output_filters = [
+        '/usr/include/*', '*third_party*', '*testing*', '*_unittest.cpp',
+        '*_embeddertest.cpp'
+    ]
+    if self.call_lcov(
+        ['--remove', output_raw_path] + output_filters +
+        ['--output-file', output_filtered_path],
+        dry_run=self.dry_run,
+        needs_directory=False):
+      print 'Unable to filter coverage data for %s' % name
+      return False
+
+    self.coverage_files.add(output_filtered_path)
+    return True
+
+  def merge_coverage(self):
+    """Merge all of the coverage data sets into one for report generation."""
+    merge_args = []
+    for coverage_file in self.coverage_files:
+      merge_args.extend(['--add-tracefile', coverage_file])
+
+    merge_args.extend(['--output-file', self.coverage_totals_path])
+    return self.call_lcov(
+        merge_args, dry_run=self.dry_run, needs_directory=False) == 0
+
+  def generate_report(self):
+    """Produce HTML coverage report based on combined coverage data set."""
+    config_file = os.path.join(
+        self.source_directory, 'tools', 'coverage', 'lcovrc')
+
+    lcov_args = ['genhtml',
+      '--config-file', config_file,
+      '--legend',
+      '--demangle-cpp',
+      '--show-details',
+      '--prefix', self.source_directory,
+      '--ignore-errors',
+      'source', self.coverage_totals_path,
+      '--output-directory', self.output_directory]
+    return self.call(lcov_args, dry_run=self.dry_run) == 0
+
+  def run(self):
+    """Setup environment, execute the tests and generate coverage report"""
+    if not self.build_binaries():
+      print 'Failed to successfully build binaries'
+      return False
+
+    for name in self.coverage_tests.keys():
+      if not self.generate_coverage(name, self.coverage_tests[name]):
+        print 'Failed to successfully generate coverage data'
+        return False
+
+    if not self.merge_coverage():
+      print 'Failed to successfully merge generated coverage data'
+      return False
+
+    if not self.generate_report():
+      print 'Failed to successfully generated coverage report'
+      return False
+
+    return True
+
+
+def main():
+  parser = argparse.ArgumentParser()
+  parser.formatter_class = argparse.RawDescriptionHelpFormatter
+  parser.description = ('Generates a coverage report for given binaries using '
+                        'llvm-cov & lcov.\n\n'
+                        'Requires llvm-cov 3.5 or later.\n'
+                        'Requires lcov 1.11 or later.\n\n'
+                        'By default runs pdfium_unittests and '
+                        'pdfium_embeddertests. If --slow is passed in then all '
+                        'tests will be run. If any of the tests are specified '
+                        'on the command line, then only those will be run.')
+  parser.add_argument(
+      '-s',
+      '--source_directory',
+      nargs=1,
+      help='Location of PDFium source directory, defaults to CWD',
+      default=os.getcwd())
+  build_default = os.path.join('out', 'Coverage')
+  parser.add_argument(
+      '-b',
+      '--build_directory',
+      nargs=1,
+      help=
+      'Location of PDFium build directory with coverage enabled, defaults to '
+      '%s under CWD' % build_default,
+      default=os.path.join(os.getcwd(), build_default))
+  output_default = 'coverage_report'
+  parser.add_argument(
+      '-o',
+      '--output_directory',
+      nargs=1,
+      help='Location to write out coverage report to, defaults to %s under CWD '
+      % output_default,
+      default=os.path.join(os.getcwd(), output_default))
+  parser.add_argument(
+      '-n',
+      '--dry-run',
+      help='Output commands instead of executing them',
+      action='store_true')
+  parser.add_argument(
+      '-v',
+      '--verbose',
+      help='Output additional diagnostic information',
+      action='store_true')
+  parser.add_argument(
+      '--slow',
+      help='Run all tests, even those known to take a long time. Ignored if '
+      'specific tests are passed in.',
+      action='store_true')
+  parser.add_argument(
+      'tests',
+      help='Tests to be run, defaults to all. Valid entries are %s' %
+      COVERAGE_TESTS.keys(),
+      nargs='*')
+
+  args = vars(parser.parse_args())
+  if args['verbose']:
+    pprint.pprint(args)
+
+  executor = CoverageExecutor(parser, args)
+  if executor.run():
+    return 0
+  return 1
+
+
+if __name__ == '__main__':
+  sys.exit(main())
diff --git a/tools/coverage/lcovrc b/tools/coverage/lcovrc
new file mode 100644
index 0000000000..d2d39d75da
--- /dev/null
+++ b/tools/coverage/lcovrc
@@ -0,0 +1,156 @@
+# Specify an external style sheet file (same as --css-file option of genhtml)
+#genhtml_css_file = gcov.css
+
+# Specify coverage rate limits (in %) for classifying file entries
+# HI:   hi_limit <= rate <= 100         graph color: green
+# MED: med_limit <= rate <  hi_limit    graph color: orange
+# LO:         0  <= rate <  med_limit   graph color: red
+genhtml_hi_limit = 80
+genhtml_med_limit = 50
+
+# Width of line coverage field in source code view
+genhtml_line_field_width = 12
+
+# Width of branch coverage field in source code view
+genhtml_branch_field_width = 16
+
+# Width of overview image (used by --frames option of genhtml)
+genhtml_overview_width = 80
+
+# Resolution of overview navigation: this number specifies the maximum
+# difference in lines between the position a user selected from the overview
+# and the position the source code window is scrolled to (used by --frames
+# option of genhtml)
+genhtml_nav_resolution = 4
+
+# Clicking a line in the overview image should show the source code view at
+# a position a bit further up so that the requested line is not the first
+# line in the window. This number specifies that offset in lines (used by
+# --frames option of genhtml)
+genhtml_nav_offset = 10
+
+# Do not remove unused test descriptions if non-zero (same as
+# --keep-descriptions option of genhtml)
+genhtml_keep_descriptions = 0
+
+# Do not remove prefix from directory names if non-zero (same as --no-prefix
+# option of genhtml)
+genhtml_no_prefix = 0
+
+# Do not create source code view if non-zero (same as --no-source option of
+# genhtml)
+genhtml_no_source = 0
+
+# Replace tabs with number of spaces in source view (same as --num-spaces
+# option of genhtml)
+genhtml_num_spaces = 8
+
+# Highlight lines with converted-only data if non-zero (same as --highlight
+# option of genhtml)
+genhtml_highlight = 0
+
+# Include color legend in HTML output if non-zero (same as --legend option of
+# genhtml)
+genhtml_legend = 0
+
+# Use FILE as HTML prolog for generated pages (same as --html-prolog option of
+# genhtml)
+#genhtml_html_prolog = FILE
+
+# Use FILE as HTML epilog for generated pages (same as --html-epilog option of
+# genhtml)
+#genhtml_html_epilog = FILE
+
+# Use custom filename extension for pages (same as --html-extension option of
+# genhtml)
+#genhtml_html_extension = html
+
+# Compress all generated html files with gzip.
+#genhtml_html_gzip = 1
+
+# Include sorted overview pages (can be disabled by the --no-sort option of
+# genhtml)
+genhtml_sort = 1
+
+# Include function coverage data display (can be disabled by the
+# --no-func-coverage option of genhtml)
+genhtml_function_coverage = 1
+
+# Include branch coverage data display (can be disabled by the
+# --no-branch-coverage option of genhtml)
+genhtml_branch_coverage = 1
+
+# Specify the character set of all generated HTML pages
+genhtml_charset=UTF-8
+
+# Allow HTML markup in test case description text if non-zero
+genhtml_desc_html=0
+
+# Specify the precision for coverage rates
+#genhtml_precision=1
+
+# Location of the gcov tool (same as --gcov-info option of geninfo)
+#geninfo_gcov_tool = llvm-gcov
+
+# Adjust test names to include operating system information if non-zero
+#geninfo_adjust_testname = 0
+
+# Calculate checksum for each source code line if non-zero (same as --checksum
+# option of geninfo if non-zero, same as --no-checksum if zero)
+#geninfo_checksum = 1
+
+# Specify whether to capture coverage data for external source files (can
+# be overridden by the --external and --no-external options of geninfo/lcov)
+geninfo_external = 1
+
+# Enable libtool compatibility mode if non-zero (same as --compat-libtool option
+# of geninfo if non-zero, same as --no-compat-libtool if zero)
+#geninfo_compat_libtool = 0
+
+# Use gcov's --all-blocks option if non-zero
+#geninfo_gcov_all_blocks = 1
+
+# Specify compatiblity modes (same as --compat option of geninfo).
+#geninfo_compat = libtool=on, hammer=auto, split_crc=auto
+
+# Adjust path to source files by removing or changing path components that
+# match the specified pattern (Perl regular expression format)
+#geninfo_adjust_src_path = /tmp/build => /usr/src
+
+# Specify if geninfo should try to automatically determine the base-directory
+# when collecting coverage data.
+geninfo_auto_base = 1
+
+# Directory containing gcov kernel files
+# lcov_gcov_dir = /proc/gcov
+
+# Location of the insmod tool
+lcov_insmod_tool = /sbin/insmod
+
+# Location of the modprobe tool
+lcov_modprobe_tool = /sbin/modprobe
+
+# Location of the rmmod tool
+lcov_rmmod_tool = /sbin/rmmod
+
+# Location for temporary directories
+lcov_tmp_dir = /tmp
+
+# Show full paths during list operation if non-zero (same as --list-full-path
+# option of lcov)
+lcov_list_full_path = 0
+
+# Specify the maximum width for list output. This value is ignored when
+# lcov_list_full_path is non-zero.
+lcov_list_width = 80
+
+# Specify the maximum percentage of file names which may be truncated when
+# choosing a directory prefix in list output. This value is ignored when
+# lcov_list_full_path is non-zero.
+lcov_list_truncate_max = 20
+
+# Specify if function coverage data should be collected and processed.
+lcov_function_coverage = 1
+
+# Specify if branch coverage data should be collected and processed.
+lcov_branch_coverage = 1
diff --git a/tools/coverage/llvm-gcov b/tools/coverage/llvm-gcov
new file mode 100755
index 0000000000..8141e7ef27
--- /dev/null
+++ b/tools/coverage/llvm-gcov
@@ -0,0 +1,15 @@
+#!/bin/bash
+# Copyright 2017 The PDFium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+# Wrapper script to make llvm-cov behave like gcov, so it can be passed in as the --gcov-tool
+# parameter when using lcov. Specifically adds the keyword 'gcov' to the arguments being passed in,
+# to tell llvm-cov to operate in gcov compatibility mode.
+#
+# LLVM_COV_BIN needs to be set by caller and should the path to
+# a llvm-cov binary with a version of 3.5 or greater.
+
+set -e
+
+exec ${LLVM_COV_BIN} gcov $*
-- 
cgit v1.2.3