From 9bd18183ba8210c91d71c3060146235750a4c71c Mon Sep 17 00:00:00 2001 From: Jun Fang Date: Fri, 25 Sep 2015 20:32:46 -0700 Subject: Fix the issue that pdfium swallows 'fi' or 'ff' in some pdf files Pdfium swallows 'fi' or 'ff' in some tested files because it doesn't load the embedded font file correctly. The root cause is that there is incorrect keyword like 'ngendstream' in the stream of the embedded font file. Pdfium tries to find another correct keyword but uses wrong offset rather than accumulated offset. BUG=524043 R=thestig@chromium.org, tsepez@chromium.org Review URL: https://codereview.chromium.org/1307353005 . --- testing/resources/pixel/bug_524043_1.in | 61 ++++++++++++++++++++ .../pixel/bug_524043_1_expected.pdf.0.png | Bin 0 -> 5433 bytes testing/resources/pixel/bug_524043_2.in | 62 +++++++++++++++++++++ .../pixel/bug_524043_2_expected.pdf.0.png | Bin 0 -> 2237 bytes testing/resources/pixel/bug_524043_3.in | 62 +++++++++++++++++++++ .../pixel/bug_524043_3_expected.pdf.0.png | Bin 0 -> 5433 bytes testing/resources/pixel/bug_524043_4.in | 61 ++++++++++++++++++++ .../pixel/bug_524043_4_expected.pdf.0.png | Bin 0 -> 5433 bytes testing/resources/pixel/bug_524043_5.in | 61 ++++++++++++++++++++ .../pixel/bug_524043_5_expected.pdf.0.png | Bin 0 -> 5433 bytes testing/resources/pixel/bug_524043_6.in | 60 ++++++++++++++++++++ .../pixel/bug_524043_6_expected.pdf.0.png | Bin 0 -> 590 bytes testing/resources/pixel/bug_524043_7.in | 61 ++++++++++++++++++++ .../pixel/bug_524043_7_expected.pdf.0.png | Bin 0 -> 2237 bytes 14 files changed, 428 insertions(+) create mode 100644 testing/resources/pixel/bug_524043_1.in create mode 100644 testing/resources/pixel/bug_524043_1_expected.pdf.0.png create mode 100644 testing/resources/pixel/bug_524043_2.in create mode 100644 testing/resources/pixel/bug_524043_2_expected.pdf.0.png create mode 100644 testing/resources/pixel/bug_524043_3.in create mode 100644 testing/resources/pixel/bug_524043_3_expected.pdf.0.png create mode 100644 testing/resources/pixel/bug_524043_4.in create mode 100644 testing/resources/pixel/bug_524043_4_expected.pdf.0.png create mode 100644 testing/resources/pixel/bug_524043_5.in create mode 100644 testing/resources/pixel/bug_524043_5_expected.pdf.0.png create mode 100644 testing/resources/pixel/bug_524043_6.in create mode 100644 testing/resources/pixel/bug_524043_6_expected.pdf.0.png create mode 100644 testing/resources/pixel/bug_524043_7.in create mode 100644 testing/resources/pixel/bug_524043_7_expected.pdf.0.png (limited to 'testing') diff --git a/testing/resources/pixel/bug_524043_1.in b/testing/resources/pixel/bug_524043_1.in new file mode 100644 index 0000000000..61ac6f4bb2 --- /dev/null +++ b/testing/resources/pixel/bug_524043_1.in @@ -0,0 +1,61 @@ +{{header}} +{{object 1 0}} << + /Type /Catalog + /Pages 2 0 R +>> +{{object 2 0}} << + /Type /Pages + /MediaBox [ 0 0 200 200 ] + /Count 1 + /Kids [ 3 0 R ] +>> +endobj +{{object 3 0}} << + /Type /Page + /Parent 2 0 R + /Resources << + /Font << + /F1 4 0 R + /F2 5 0 R + >> + >> + /Contents 6 0 R +>> +endobj +{{object 4 0}} << + /Type /Font + /Subtype /Type1 + /BaseFont /Times-Roman +>> +endobj +{{object 5 0}} << + /Type /Font + /Subtype /Type1 + /BaseFont /Helvetica +>> +endobj +% Case 1: +% /Length identifies intended end of stream, despite embedded keywords. +% Both should render as text. +{{object 6 0}} << + /Length 107 +>> +stream +BT +20 50 Td +/F1 12 Tf +(endobj is text) Tj +endstream +0 50 Td +/F2 13 Tf +(endstream is text per /Length) Tj +ET +endstream +endobj +{{xref}} +trailer << + /Size 6 + /Root 1 0 R +>> +{{startxref}} +%%EOF diff --git a/testing/resources/pixel/bug_524043_1_expected.pdf.0.png b/testing/resources/pixel/bug_524043_1_expected.pdf.0.png new file mode 100644 index 0000000000..a044bb0c37 Binary files /dev/null and b/testing/resources/pixel/bug_524043_1_expected.pdf.0.png differ diff --git a/testing/resources/pixel/bug_524043_2.in b/testing/resources/pixel/bug_524043_2.in new file mode 100644 index 0000000000..0ee43da106 --- /dev/null +++ b/testing/resources/pixel/bug_524043_2.in @@ -0,0 +1,62 @@ +{{header}} +{{object 1 0}} << + /Type /Catalog + /Pages 2 0 R +>> +{{object 2 0}} << + /Type /Pages + /MediaBox [ 0 0 200 200 ] + /Count 1 + /Kids [ 3 0 R ] +>> +endobj +{{object 3 0}} << + /Type /Page + /Parent 2 0 R + /Resources << + /Font << + /F1 4 0 R + /F2 5 0 R + >> + >> + /Contents 6 0 R +>> +endobj +{{object 4 0}} << + /Type /Font + /Subtype /Type1 + /BaseFont /Times-Roman +>> +endobj +{{object 5 0}} << + /Type /Font + /Subtype /Type1 + /BaseFont /Helvetica +>> +endobj +% Case 2: +% /Length incorrectly identifies middle of stream. +% The stream is blocked by the keyword "endstream" +% Only the text before the keyword can be rendered. +{{object 6 0}} << + /Length 87 +>> +stream +BT +20 50 Td +/F1 12 Tf +(endobj is text) Tj +endstream +0 50 Td +/F2 13 Tf +(It's wrong when you see the text!) Tj +ET +endstream +endobj +{{xref}} +trailer << + /Size 6 + /Root 1 0 R +>> +{{startxref}} +%%EOF diff --git a/testing/resources/pixel/bug_524043_2_expected.pdf.0.png b/testing/resources/pixel/bug_524043_2_expected.pdf.0.png new file mode 100644 index 0000000000..19de52a360 Binary files /dev/null and b/testing/resources/pixel/bug_524043_2_expected.pdf.0.png differ diff --git a/testing/resources/pixel/bug_524043_3.in b/testing/resources/pixel/bug_524043_3.in new file mode 100644 index 0000000000..716345ede1 --- /dev/null +++ b/testing/resources/pixel/bug_524043_3.in @@ -0,0 +1,62 @@ +{{header}} +{{object 1 0}} << + /Type /Catalog + /Pages 2 0 R +>> +{{object 2 0}} << + /Type /Pages + /MediaBox [ 0 0 200 200 ] + /Count 1 + /Kids [ 3 0 R ] +>> +endobj +{{object 3 0}} << + /Type /Page + /Parent 2 0 R + /Resources << + /Font << + /F1 4 0 R + /F2 5 0 R + >> + >> + /Contents 6 0 R +>> +endobj +{{object 4 0}} << + /Type /Font + /Subtype /Type1 + /BaseFont /Times-Roman +>> +endobj +{{object 5 0}} << + /Type /Font + /Subtype /Type1 + /BaseFont /Helvetica +>> +endobj +% Case 3: +% /Length incorrectly identifies middle of stream. +% "eendstream" shouldn't block the stream. +% Both should render as text. +{{object 6 0}} << + /Length 87 +>> +stream +BT +20 50 Td +/F1 12 Tf +(endobj is text) Tj +eendstream +0 50 Td +/F2 13 Tf +(endstream is text per /Length) Tj +ET +endstream +endobj +{{xref}} +trailer << + /Size 6 + /Root 1 0 R +>> +{{startxref}} +%%EOF diff --git a/testing/resources/pixel/bug_524043_3_expected.pdf.0.png b/testing/resources/pixel/bug_524043_3_expected.pdf.0.png new file mode 100644 index 0000000000..a044bb0c37 Binary files /dev/null and b/testing/resources/pixel/bug_524043_3_expected.pdf.0.png differ diff --git a/testing/resources/pixel/bug_524043_4.in b/testing/resources/pixel/bug_524043_4.in new file mode 100644 index 0000000000..6cdb3d18a7 --- /dev/null +++ b/testing/resources/pixel/bug_524043_4.in @@ -0,0 +1,61 @@ +{{header}} +{{object 1 0}} << + /Type /Catalog + /Pages 2 0 R +>> +{{object 2 0}} << + /Type /Pages + /MediaBox [ 0 0 200 200 ] + /Count 1 + /Kids [ 3 0 R ] +>> +endobj +{{object 3 0}} << + /Type /Page + /Parent 2 0 R + /Resources << + /Font << + /F1 4 0 R + /F2 5 0 R + >> + >> + /Contents 6 0 R +>> +endobj +{{object 4 0}} << + /Type /Font + /Subtype /Type1 + /BaseFont /Times-Roman +>> +endobj +{{object 5 0}} << + /Type /Font + /Subtype /Type1 + /BaseFont /Helvetica +>> +endobj +% Case 4: +% /Length incorrectly identifies middle of stream. +% "endstream." shouldn't block the stream. +% Both should render as text even "endstream" is missing. +{{object 6 0}} << + /Length 87 +>> +stream +BT +20 50 Td +/F1 12 Tf +(endobj is text) Tj +endstream. +0 50 Td +/F2 13 Tf +(endstream is text per /Length) Tj +ET +endobj +{{xref}} +trailer << + /Size 6 + /Root 1 0 R +>> +{{startxref}} +%%EOF diff --git a/testing/resources/pixel/bug_524043_4_expected.pdf.0.png b/testing/resources/pixel/bug_524043_4_expected.pdf.0.png new file mode 100644 index 0000000000..a044bb0c37 Binary files /dev/null and b/testing/resources/pixel/bug_524043_4_expected.pdf.0.png differ diff --git a/testing/resources/pixel/bug_524043_5.in b/testing/resources/pixel/bug_524043_5.in new file mode 100644 index 0000000000..799674628d --- /dev/null +++ b/testing/resources/pixel/bug_524043_5.in @@ -0,0 +1,61 @@ +{{header}} +{{object 1 0}} << + /Type /Catalog + /Pages 2 0 R +>> +{{object 2 0}} << + /Type /Pages + /MediaBox [ 0 0 200 200 ] + /Count 1 + /Kids [ 3 0 R ] +>> +endobj +{{object 3 0}} << + /Type /Page + /Parent 2 0 R + /Resources << + /Font << + /F1 4 0 R + /F2 5 0 R + >> + >> + /Contents 6 0 R +>> +endobj +{{object 4 0}} << + /Type /Font + /Subtype /Type1 + /BaseFont /Times-Roman +>> +endobj +{{object 5 0}} << + /Type /Font + /Subtype /Type1 + /BaseFont /Helvetica +>> +endobj +% Case 5: +% /Length incorrectly identifies middle of stream. +% "endstream%" shouldn't block the stream. +% Both should render as text even "endobj" is missing. +{{object 6 0}} << + /Length 87 +>> +stream +BT +20 50 Td +/F1 12 Tf +(endobj is text) Tj +endstream% +0 50 Td +/F2 13 Tf +(endstream is text per /Length) Tj +ET +endstream +{{xref}} +trailer << + /Size 6 + /Root 1 0 R +>> +{{startxref}} +%%EOF diff --git a/testing/resources/pixel/bug_524043_5_expected.pdf.0.png b/testing/resources/pixel/bug_524043_5_expected.pdf.0.png new file mode 100644 index 0000000000..a044bb0c37 Binary files /dev/null and b/testing/resources/pixel/bug_524043_5_expected.pdf.0.png differ diff --git a/testing/resources/pixel/bug_524043_6.in b/testing/resources/pixel/bug_524043_6.in new file mode 100644 index 0000000000..318b20750d --- /dev/null +++ b/testing/resources/pixel/bug_524043_6.in @@ -0,0 +1,60 @@ +{{header}} +{{object 1 0}} << + /Type /Catalog + /Pages 2 0 R +>> +{{object 2 0}} << + /Type /Pages + /MediaBox [ 0 0 200 200 ] + /Count 1 + /Kids [ 3 0 R ] +>> +endobj +{{object 3 0}} << + /Type /Page + /Parent 2 0 R + /Resources << + /Font << + /F1 4 0 R + /F2 5 0 R + >> + >> + /Contents 6 0 R +>> +endobj +{{object 4 0}} << + /Type /Font + /Subtype /Type1 + /BaseFont /Times-Roman +>> +endobj +{{object 5 0}} << + /Type /Font + /Subtype /Type1 + /BaseFont /Helvetica +>> +endobj +% Case 6: +% /Length incorrectly identifies middle of stream. +% "endstream+" shouldn't block the stream. +% Nothing will be rendered when both "endstream" and "endobj" are missing. +{{object 6 0}} << + /Length 87 +>> +stream +BT +20 50 Td +/F1 12 Tf +(endobj is text) Tj +endstream+ +0 50 Td +/F2 13 Tf +(endstream is text per /Length) Tj +ET +{{xref}} +trailer << + /Size 6 + /Root 1 0 R +>> +{{startxref}} +%%EOF diff --git a/testing/resources/pixel/bug_524043_6_expected.pdf.0.png b/testing/resources/pixel/bug_524043_6_expected.pdf.0.png new file mode 100644 index 0000000000..3edcc2db89 Binary files /dev/null and b/testing/resources/pixel/bug_524043_6_expected.pdf.0.png differ diff --git a/testing/resources/pixel/bug_524043_7.in b/testing/resources/pixel/bug_524043_7.in new file mode 100644 index 0000000000..c95f2d94e1 --- /dev/null +++ b/testing/resources/pixel/bug_524043_7.in @@ -0,0 +1,61 @@ +{{header}} +{{object 1 0}} << + /Type /Catalog + /Pages 2 0 R +>> +{{object 2 0}} << + /Type /Pages + /MediaBox [ 0 0 200 200 ] + /Count 1 + /Kids [ 3 0 R ] +>> +endobj +{{object 3 0}} << + /Type /Page + /Parent 2 0 R + /Resources << + /Font << + /F1 4 0 R + /F2 5 0 R + >> + >> + /Contents 6 0 R +>> +endobj +{{object 4 0}} << + /Type /Font + /Subtype /Type1 + /BaseFont /Times-Roman +>> +endobj +{{object 5 0}} << + /Type /Font + /Subtype /Type1 + /BaseFont /Helvetica +>> +endobj +% Case 7: +% /Length incorrectly identifies middle of stream. +% "endstream" will block the stream. +% The text before the keyword "endstream" can be rendered although +% both "endstream" and "endobj" are missing at the end of stream. +{{object 6 0}} << + /Length 87 +>> +stream +BT +20 50 Td +/F1 12 Tf +(endobj is text) Tj +endstream +0 50 Td +/F2 13 Tf +(endstream is text per /Length) Tj +ET +{{xref}} +trailer << + /Size 6 + /Root 1 0 R +>> +{{startxref}} +%%EOF diff --git a/testing/resources/pixel/bug_524043_7_expected.pdf.0.png b/testing/resources/pixel/bug_524043_7_expected.pdf.0.png new file mode 100644 index 0000000000..19de52a360 Binary files /dev/null and b/testing/resources/pixel/bug_524043_7_expected.pdf.0.png differ -- cgit v1.2.3