tools/coverage/coverage_report.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390

#!/usr/bin/env python
# Copyright 2017 The PDFium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""Generates a coverage report for given binaries using llvm-gcov & lcov.

Requires llvm-cov 3.5 or later.
Requires lcov 1.11 or later.
Requires that 'use_coverage = true' is set in args.gn.
"""

import argparse
from collections import namedtuple
import pprint
import os
import re
import subprocess
import sys


# 'binary' is the file that is to be run for the test.
# 'use_test_runner' indicates if 'binary' depends on test_runner.py and thus
# requires special handling.
TestSpec = namedtuple('TestSpec', 'binary, use_test_runner')

# All of the coverage tests that the script knows how to run.
COVERAGE_TESTS = {
    'pdfium_unittests': TestSpec('pdfium_unittests', False),
    'pdfium_embeddertests': TestSpec('pdfium_embeddertests', False),
    'corpus_tests': TestSpec('run_corpus_tests.py', True),
    'javascript_tests': TestSpec('run_javascript_tests.py', True),
    'pixel_tests': TestSpec('run_pixel_tests.py', True),
}

# Coverage tests that are known to take a long time to run, so are not in the
# default set. The user must either explicitly invoke these tests or pass in
# --slow.
SLOW_TESTS = ['corpus_tests', 'javascript_tests', 'pixel_tests']

class CoverageExecutor(object):

  def __init__(self, parser, args):
    """Initialize executor based on the current script environment

    Args:
        parser: argparse.ArgumentParser for handling improper inputs.
        args: Dictionary of arguments passed into the calling script.
    """
    self.dry_run = args['dry_run']
    self.verbose = args['verbose']

    llvm_cov = self.determine_proper_llvm_cov()
    if not llvm_cov:
      print 'Unable to find appropriate llvm-cov to use'
      sys.exit(1)
    self.lcov_env = os.environ
    self.lcov_env['LLVM_COV_BIN'] = llvm_cov

    self.lcov = self.determine_proper_lcov()
    if not self.lcov:
      print 'Unable to find appropriate lcov to use'
      sys.exit(1)

    self.coverage_files = set()
    self.source_directory = args['source_directory']
    if not os.path.isdir(self.source_directory):
      parser.error("'%s' needs to be a directory" % self.source_directory)

    self.build_directory = args['build_directory']
    if not os.path.isdir(self.build_directory):
      parser.error("'%s' needs to be a directory" % self.build_directory)

    self.coverage_tests = self.calculate_coverage_tests(args)
    if not self.coverage_tests:
      parser.error(
          'No valid tests in set to be run. This is likely due to bad command '
          'line arguments')

    if not self.boolean_gn_arg('use_coverage'):
      parser.error(
          'use_coverage does not appear to be set to true for build, but is '
          'needed')

    self.use_goma = self.boolean_gn_arg('use_goma')

    self.output_directory = args['output_directory']
    if not os.path.exists(self.output_directory):
      if not self.dry_run:
        os.makedirs(self.output_directory)
    elif not os.path.isdir(self.output_directory):
      parser.error('%s exists, but is not a directory' % self.output_directory)
    self.coverage_totals_path = os.path.join(self.output_directory,
                                             'pdfium_totals.info')

  def boolean_gn_arg(self, arg):
    """Extract the value of a boolean flag in args.gn"""
    cwd = os.getcwd()
    os.chdir(self.build_directory)
    gn_args_output = self.check_output(
        ['gn', 'args', '.', '--list=%s' % arg, '--short'])
    os.chdir(cwd)
    arg_match_output = re.match('%s = (.*)' % arg, gn_args_output).group(1)
    if self.verbose:
      print "Found '%s' for value of %s" % (arg_match_output, arg)
    return arg_match_output == 'true'

  def check_output(self, args, dry_run=False, env=None):
    """Dry run aware wrapper of subprocess.check_output()"""
    if dry_run:
      print "Would have run '%s'" % ' '.join(args)
      return ''

    output = subprocess.check_output(args, env=env)

    if self.verbose:
      print "check_output(%s) returned '%s'" % (args, output)
    return output

  def call(self, args, dry_run=False, env=None):
    """Dry run aware wrapper of subprocess.call()"""
    if dry_run:
      print "Would have run '%s'" % ' '.join(args)
      return 0

    output = subprocess.call(args, env=env)

    if self.verbose:
      print 'call(%s) returned %s' % (args, output)
    return output

  def call_lcov(self, args, dry_run=False, needs_directory=True):
    """Wrapper to call lcov that adds appropriate arguments as needed."""
    lcov_args = [
        self.lcov, '--config-file',
        os.path.join(self.source_directory, 'tools', 'coverage', 'lcovrc'),
        '--gcov-tool',
        os.path.join(self.source_directory, 'tools', 'coverage', 'llvm-gcov')
    ]
    if needs_directory:
      lcov_args.extend(['--directory', self.source_directory])
    if not self.verbose:
      lcov_args.append('--quiet')
    lcov_args.extend(args)
    return self.call(lcov_args, dry_run=dry_run, env=self.lcov_env)

  def calculate_coverage_tests(self, args):
    """Determine which tests should be run."""
    testing_tools_directory = os.path.join(self.source_directory, 'testing',
                                           'tools')
    coverage_tests = {}
    for name in COVERAGE_TESTS.keys():
      test_spec = COVERAGE_TESTS[name]
      if test_spec.use_test_runner:
        binary_path = os.path.join(testing_tools_directory, test_spec.binary)
      else:
        binary_path = os.path.join(self.build_directory, test_spec.binary)
      coverage_tests[name] = TestSpec(binary_path, test_spec.use_test_runner)

    if args['tests']:
      return {name: spec
        for name, spec in coverage_tests.iteritems() if name in args['tests']}
    elif not args['slow']:
      return {name: spec
        for name, spec in coverage_tests.iteritems() if name not in SLOW_TESTS}
    else:
      return coverage_tests

  def find_acceptable_binary(self, binary_name, version_regex,
                             min_major_version, min_minor_version):
    """Find the newest version of binary that meets the min version."""
    min_version = (min_major_version, min_minor_version)
    parsed_versions = {}
    # When calling Bash builtins like this the command and arguments must be
    # passed in as a single string instead of as separate list members.
    potential_binaries = self.check_output(
        ['bash', '-c', 'compgen -abck %s' % binary_name]).splitlines()
    for binary in potential_binaries:
      if self.verbose:
        print 'Testing llvm-cov binary, %s' % binary
      # Assuming that scripts that don't respond to --version correctly are not
      # valid binaries and just happened to get globbed in. This is true for
      # lcov and llvm-cov
      try:
        version_output = self.check_output([binary, '--version']).splitlines()
      except subprocess.CalledProcessError:
        if self.verbose:
          print '--version returned failure status 1, so ignoring'
        continue

      for line in version_output:
        matcher = re.match(version_regex, line)
        if matcher:
          parsed_version = (int(matcher.group(1)), int(matcher.group(2)))
          if parsed_version >= min_version:
            parsed_versions[parsed_version] = binary
          break

    if not parsed_versions:
      return None
    return parsed_versions[max(parsed_versions)]

  def determine_proper_llvm_cov(self):
    """Find a version of llvm_cov that will work with the script."""
    version_regex = re.compile('.*LLVM version ([\d]+)\.([\d]+).*')
    return self.find_acceptable_binary('llvm-cov', version_regex, 3, 5)

  def determine_proper_lcov(self):
    """Find a version of lcov that will work with the script."""
    version_regex = re.compile('.*LCOV version ([\d]+)\.([\d]+).*')
    return self.find_acceptable_binary('lcov', version_regex, 1, 11)

  def build_binaries(self):
    """Build all the binaries that are going to be needed for coverage
    generation."""
    call_args = ['ninja']
    if self.use_goma:
      call_args.extend(['-j', '250'])
    call_args.extend(['-C', self.build_directory])
    return self.call(call_args, dry_run=self.dry_run) == 0

  def generate_coverage(self, name, spec):
    """Generate the coverage data for a test

    Args:
        name: Name associated with the test to be run. This is used as a label
              in the coverage data, so should be unique across all of the tests
              being run.
        spec: Tuple containing the path to the binary to run, and if this test
              uses test_runner.py.
    """
    if self.verbose:
      print "Generating coverage for test '%s', using data '%s'" % (name, spec)
    if not os.path.exists(spec.binary):
      print('Unable to generate coverage for %s, since it appears to not exist'
            ' @ %s') % (name, spec.binary)
      return False

    if self.call_lcov(['--zerocounters'], dry_run=self.dry_run):
      print 'Unable to clear counters for %s' % name
      return False

    binary_args = [spec.binary]
    if spec.use_test_runner:
      # Test runner performs multi-threading in the wrapper script, not the test
      # binary, so need -j 1, otherwise multiple processes will be writing to
      # the code coverage files, invalidating results.
      # TODO(pdfium:811): Rewrite how test runner tests work, so that they can
      # be run in multi-threaded mode.
      binary_args.extend(['-j', '1', '--build-dir', self.build_directory])
    if self.call(binary_args, dry_run=self.dry_run) and self.verbose:
      print('Running %s appears to have failed, which might affect '
            'results') % spec.binary

    output_raw_path = os.path.join(self.output_directory, '%s_raw.info' % name)
    if self.call_lcov(
        ['--capture', '--test-name', name, '--output-file', output_raw_path],
        dry_run=self.dry_run):
      print 'Unable to capture coverage data for %s' % name
      return False

    output_filtered_path = os.path.join(self.output_directory,
                                        '%s_filtered.info' % name)
    output_filters = [
        '/usr/include/*', '*third_party*', '*testing*', '*_unittest.cpp',
        '*_embeddertest.cpp'
    ]
    if self.call_lcov(
        ['--remove', output_raw_path] + output_filters +
        ['--output-file', output_filtered_path],
        dry_run=self.dry_run,
        needs_directory=False):
      print 'Unable to filter coverage data for %s' % name
      return False

    self.coverage_files.add(output_filtered_path)
    return True

  def merge_coverage(self):
    """Merge all of the coverage data sets into one for report generation."""
    merge_args = []
    for coverage_file in self.coverage_files:
      merge_args.extend(['--add-tracefile', coverage_file])

    merge_args.extend(['--output-file', self.coverage_totals_path])
    return self.call_lcov(
        merge_args, dry_run=self.dry_run, needs_directory=False) == 0

  def generate_report(self):
    """Produce HTML coverage report based on combined coverage data set."""
    config_file = os.path.join(
        self.source_directory, 'tools', 'coverage', 'lcovrc')

    lcov_args = ['genhtml',
      '--config-file', config_file,
      '--legend',
      '--demangle-cpp',
      '--show-details',
      '--prefix', self.source_directory,
      '--ignore-errors',
      'source', self.coverage_totals_path,
      '--output-directory', self.output_directory]
    return self.call(lcov_args, dry_run=self.dry_run) == 0

  def run(self):
    """Setup environment, execute the tests and generate coverage report"""
    if not self.build_binaries():
      print 'Failed to successfully build binaries'
      return False

    for name in self.coverage_tests.keys():
      if not self.generate_coverage(name, self.coverage_tests[name]):
        print 'Failed to successfully generate coverage data'
        return False

    if not self.merge_coverage():
      print 'Failed to successfully merge generated coverage data'
      return False

    if not self.generate_report():
      print 'Failed to successfully generated coverage report'
      return False

    return True


def main():
  parser = argparse.ArgumentParser()
  parser.formatter_class = argparse.RawDescriptionHelpFormatter
  parser.description = ('Generates a coverage report for given binaries using '
                        'llvm-cov & lcov.\n\n'
                        'Requires llvm-cov 3.5 or later.\n'
                        'Requires lcov 1.11 or later.\n\n'
                        'By default runs pdfium_unittests and '
                        'pdfium_embeddertests. If --slow is passed in then all '
                        'tests will be run. If any of the tests are specified '
                        'on the command line, then only those will be run.')
  parser.add_argument(
      '-s',
      '--source_directory',
      help='Location of PDFium source directory, defaults to CWD',
      default=os.getcwd())
  build_default = os.path.join('out', 'Coverage')
  parser.add_argument(
      '-b',
      '--build_directory',
      help=
      'Location of PDFium build directory with coverage enabled, defaults to '
      '%s under CWD' % build_default,
      default=os.path.join(os.getcwd(), build_default))
  output_default = 'coverage_report'
  parser.add_argument(
      '-o',
      '--output_directory',
      help='Location to write out coverage report to, defaults to %s under CWD '
      % output_default,
      default=os.path.join(os.getcwd(), output_default))
  parser.add_argument(
      '-n',
      '--dry-run',
      help='Output commands instead of executing them',
      action='store_true')
  parser.add_argument(
      '-v',
      '--verbose',
      help='Output additional diagnostic information',
      action='store_true')
  parser.add_argument(
      '--slow',
      help='Run all tests, even those known to take a long time. Ignored if '
      'specific tests are passed in.',
      action='store_true')
  parser.add_argument(
      'tests',
      help='Tests to be run, defaults to all. Valid entries are %s' %
      COVERAGE_TESTS.keys(),
      nargs='*')

  args = vars(parser.parse_args())
  if args['verbose']:
    pprint.pprint(args)

  executor = CoverageExecutor(parser, args)
  if executor.run():
    return 0
  return 1


if __name__ == '__main__':
  sys.exit(main())