From 56f7117d3fd5799d7c1d13a9c3723e67f508d553 Mon Sep 17 00:00:00 2001 From: Lei Zhang Date: Tue, 13 Feb 2018 21:50:53 +0000 Subject: Teach fixup_pdf_template.py to automatically calculate stream length. BUG=pdfium:1008 Change-Id: I5136d57bd401d44b56ac19e5cfb52702afa32200 Reviewed-on: https://pdfium-review.googlesource.com/26651 Commit-Queue: Lei Zhang Reviewed-by: Ryan Harrison --- testing/resources/bug_921.in | 2 +- testing/resources/combobox_form.in | 2 +- testing/resources/text_form.in | 2 +- testing/resources/text_form_multiple.in | 2 +- testing/tools/fixup_pdf_template.py | 39 +++++++++++++++++++++++++++++++++ 5 files changed, 43 insertions(+), 4 deletions(-) diff --git a/testing/resources/bug_921.in b/testing/resources/bug_921.in index 9c48b2004e..a0c2e73cc8 100644 --- a/testing/resources/bug_921.in +++ b/testing/resources/bug_921.in @@ -43,7 +43,7 @@ endobj >> endobj {{object 6 0}} << -/Length 2784 +{{streamlen}} >> stream BT diff --git a/testing/resources/combobox_form.in b/testing/resources/combobox_form.in index 6c9f914023..77085f2861 100644 --- a/testing/resources/combobox_form.in +++ b/testing/resources/combobox_form.in @@ -33,7 +33,7 @@ endobj >> endobj {{object 7 0}} -<< /Length 51 >> +<< {{streamlen}} >> stream BT 0 0 0 rg diff --git a/testing/resources/text_form.in b/testing/resources/text_form.in index c30f0bac4e..12ea13a3e2 100644 --- a/testing/resources/text_form.in +++ b/testing/resources/text_form.in @@ -42,7 +42,7 @@ endobj >> endobj {{object 8 0}} -<< /Length 51 >> +<< {{streamlen}} >> stream BT 0 0 0 rg diff --git a/testing/resources/text_form_multiple.in b/testing/resources/text_form_multiple.in index 576e15597c..066ab202df 100644 --- a/testing/resources/text_form_multiple.in +++ b/testing/resources/text_form_multiple.in @@ -42,7 +42,7 @@ endobj >> endobj {{object 8 0}} -<< /Length 51 >> +<< {{streamlen}} >> stream BT 0 0 0 rg diff --git a/testing/tools/fixup_pdf_template.py b/testing/tools/fixup_pdf_template.py index 0f31eccada..19f75e0263 100755 --- a/testing/tools/fixup_pdf_template.py +++ b/testing/tools/fixup_pdf_template.py @@ -13,13 +13,20 @@ script replaces {{name}}-style variables in the input with calculated results {{trailer}} - expands to a standard trailer with "1 0 R" as the /Root. {{startxref} - expands to a startxref directive followed by correct offset. {{object x y}} - expands to |x y obj| declaration, noting the offset. + {{streamlen}} - expands to |/Length n|. """ +import cStringIO import optparse import os import re import sys +class StreamLenState: + START = 1 + FIND_STREAM = 2 + FIND_ENDSTREAM = 3 + class TemplateProcessor: HEADER_TOKEN = '{{header}}' HEADER_REPLACEMENT = '%PDF-1.7\n%\xa0\xf2\xa4\xf4' @@ -41,7 +48,12 @@ class TemplateProcessor: OBJECT_PATTERN = r'\{\{object\s+(\d+)\s+(\d+)\}\}' OBJECT_REPLACEMENT = r'\1 \2 obj' + STREAMLEN_TOKEN = '{{streamlen}}' + STREAMLEN_REPLACEMENT = '/Length %d' + def __init__(self): + self.streamlen_state = StreamLenState.START + self.streamlens = [] self.offset = 0 self.xref_offset = 0 self.max_object_number = 0 @@ -60,9 +72,30 @@ class TemplateProcessor: result += self.XREF_REPLACEMENT_F return result + def preprocess_line(self, line): + if self.STREAMLEN_TOKEN in line: + assert(self.streamlen_state == StreamLenState.START) + self.streamlen_state = StreamLenState.FIND_STREAM + self.streamlens.append(0) + return + + if (self.streamlen_state == StreamLenState.FIND_STREAM and + line.rstrip() == 'stream'): + self.streamlen_state = StreamLenState.FIND_ENDSTREAM + return + + if self.streamlen_state == StreamLenState.FIND_ENDSTREAM: + if line.rstrip() == 'endstream': + self.streamlen_state = StreamLenState.START + else: + self.streamlens[-1] += len(line) + def process_line(self, line): if self.HEADER_TOKEN in line: line = line.replace(self.HEADER_TOKEN, self.HEADER_REPLACEMENT) + if self.STREAMLEN_TOKEN in line: + sub = self.STREAMLEN_REPLACEMENT % self.streamlens.pop(0) + line = re.sub(self.STREAMLEN_TOKEN, sub, line) if self.XREF_TOKEN in line: self.xref_offset = self.offset line = self.generate_xref_table() @@ -76,6 +109,7 @@ class TemplateProcessor: if match: self.insert_xref_entry(int(match.group(1)), int(match.group(2))) line = re.sub(self.OBJECT_PATTERN, self.OBJECT_REPLACEMENT, line) + self.offset += len(line) return line @@ -85,7 +119,12 @@ def expand_file(input_path, output_path): try: with open(input_path, 'rb') as infile: with open(output_path, 'wb') as outfile: + preprocessed = cStringIO.StringIO() for line in infile: + preprocessed.write(line) + processor.preprocess_line(line) + preprocessed.seek(0) + for line in preprocessed: outfile.write(processor.process_line(line)) except IOError: print >> sys.stderr, 'failed to process %s' % input_path -- cgit v1.2.3