summaryrefslogtreecommitdiff
path: root/testing
diff options
context:
space:
mode:
authorRyan Harrison <rharrison@chromium.org>2018-08-28 20:22:32 +0000
committerChromium commit bot <commit-bot@chromium.org>2018-08-28 20:22:32 +0000
commit4d92af5ace491a2e87a6c73e5afa9ed53ac86bd0 (patch)
tree16bd5583a1d4650d4e757f0f8abe02ed045a70a1 /testing
parent3321f15291b3fff3ebd6ef41bf48d883c8b78352 (diff)
downloadpdfium-4d92af5ace491a2e87a6c73e5afa9ed53ac86bd0.tar.xz
Handle non-printing characters at beginning of extraction region
Currently if a text extraction region begins on a non-printing character then "" will be returned. This is the incorrect behaviour, instead the call should scan ahead until a printing character is found and start extracting from there. Also proactively adds a similar check and scan for the end of the extraction region. BUG=pdfium:1139 Change-Id: Ia2001ac89740f3d31d2bb69e8000773f8b01091b Reviewed-on: https://pdfium-review.googlesource.com/41532 Reviewed-by: Henrique Nakashima <hnakashima@chromium.org> Commit-Queue: Ryan Harrison <rharrison@chromium.org>
Diffstat (limited to 'testing')
-rw-r--r--testing/resources/bug_1139.in55
-rw-r--r--testing/resources/bug_1139.pdf68
2 files changed, 123 insertions, 0 deletions
diff --git a/testing/resources/bug_1139.in b/testing/resources/bug_1139.in
new file mode 100644
index 0000000000..d5603f08aa
--- /dev/null
+++ b/testing/resources/bug_1139.in
@@ -0,0 +1,55 @@
+{{header}}
+{{object 1 0}} <<
+ /Type /Catalog
+ /Pages 2 0 R
+>>
+endobj
+{{object 2 0}} <<
+ /Type /Pages
+ /MediaBox [ 0 0 200 200 ]
+ /Count 1
+ /Kids [ 3 0 R ]
+>>
+endobj
+{{object 3 0}} <<
+ /Type /Page
+ /Parent 2 0 R
+ /Resources <<
+ /Font <<
+ /F1 4 0 R
+ /F2 5 0 R
+ >>
+ >>
+ /Contents 6 0 R
+>>
+endobj
+{{object 4 0}} <<
+ /Type /Font
+ /Subtype /Type1
+ /BaseFont /Times-Roman
+>>
+endobj
+{{object 5 0}} <<
+ /Type /Font
+ /Subtype /Type1
+ /BaseFont /Helvetica
+>>
+endobj
+{{object 6 0}} <<
+>>
+stream
+stream
+BT
+20 50 Td
+/F1 12 Tf
+(\003Hello, world!) Tj
+0 50 Td
+/F2 16 Tf
+(Goodbye, world!) Tj
+ET
+endstream
+endobj
+{{xref}}
+{{trailer}}
+{{startxref}}
+%%EOF
diff --git a/testing/resources/bug_1139.pdf b/testing/resources/bug_1139.pdf
new file mode 100644
index 0000000000..d89695e82b
--- /dev/null
+++ b/testing/resources/bug_1139.pdf
@@ -0,0 +1,68 @@
+%PDF-1.7
+% ò¤ô
+1 0 obj <<
+ /Type /Catalog
+ /Pages 2 0 R
+>>
+endobj
+2 0 obj <<
+ /Type /Pages
+ /MediaBox [ 0 0 200 200 ]
+ /Count 1
+ /Kids [ 3 0 R ]
+>>
+endobj
+3 0 obj <<
+ /Type /Page
+ /Parent 2 0 R
+ /Resources <<
+ /Font <<
+ /F1 4 0 R
+ /F2 5 0 R
+ >>
+ >>
+ /Contents 6 0 R
+>>
+endobj
+4 0 obj <<
+ /Type /Font
+ /Subtype /Type1
+ /BaseFont /Times-Roman
+>>
+endobj
+5 0 obj <<
+ /Type /Font
+ /Subtype /Type1
+ /BaseFont /Helvetica
+>>
+endobj
+6 0 obj <<
+>>
+stream
+stream
+BT
+20 50 Td
+/F1 12 Tf
+(\003Hello, world!) Tj
+0 50 Td
+/F2 16 Tf
+(Goodbye, world!) Tj
+ET
+endstream
+endobj
+xref
+0 7
+0000000000 65535 f
+0000000015 00000 n
+0000000068 00000 n
+0000000161 00000 n
+0000000303 00000 n
+0000000381 00000 n
+0000000457 00000 n
+trailer <<
+ /Root 1 0 R
+ /Size 7
+>>
+startxref
+589
+%%EOF