From 7630907c7ecbb700e4de287550dbed06f36fbe9e Mon Sep 17 00:00:00 2001 From: Wei Li Date: Thu, 16 Mar 2017 17:31:03 -0700 Subject: Handle web links across lines When a web link has a hyphen at the end of line, we consider it to be continued to the next line. For example, "http://www.abc.com/my-\r\ntest" should be extracted as "http://www.abc.com/my-test". BUG=pdfium:650 Change-Id: I64a93d9c66faf2be0abdaf8cfe8ee496c435d0ca Reviewed-on: https://pdfium-review.googlesource.com/3092 Commit-Queue: Wei Li Reviewed-by: Lei Zhang --- testing/resources/bug_650.pdf | Bin 0 -> 85296 bytes testing/resources/weblinks_across_lines.in | 74 ++++++++++++++++++++++++ testing/resources/weblinks_across_lines.pdf | 84 ++++++++++++++++++++++++++++ 3 files changed, 158 insertions(+) create mode 100644 testing/resources/bug_650.pdf create mode 100644 testing/resources/weblinks_across_lines.in create mode 100644 testing/resources/weblinks_across_lines.pdf (limited to 'testing') diff --git a/testing/resources/bug_650.pdf b/testing/resources/bug_650.pdf new file mode 100644 index 0000000000..5e46032f6c Binary files /dev/null and b/testing/resources/bug_650.pdf differ diff --git a/testing/resources/weblinks_across_lines.in b/testing/resources/weblinks_across_lines.in new file mode 100644 index 0000000000..bb04b5e9cf --- /dev/null +++ b/testing/resources/weblinks_across_lines.in @@ -0,0 +1,74 @@ +{{header}} +{{object 1 0}} << + /Type /Catalog + /Pages 2 0 R +>> +{{object 2 0}} << + /Type /Pages + /MediaBox [ 0 0 600 600 ] + /Count 1 + /Kids [ 3 0 R ] +>> +endobj +{{object 3 0}} << + /Type /Page + /Parent 2 0 R + /Resources << + /Font << + /F1 4 0 R + /F2 5 0 R + >> + >> + /Contents 6 0 R +>> +endobj +{{object 4 0}} << + /Type /Font + /Subtype /Type1 + /BaseFont /Times-Roman +>> +endobj +{{object 5 0}} << + /Type /Font + /Subtype /Type1 + /BaseFont /Helvetica +>> +endobj +{{object 6 0}} << +>> +stream +BT +/F1 12 Tf +50 50 Td +(Hello, world! This is not a link.) Tj +0 50 Td +(Is this http://example.com?) Tj +0 50 Td +(foo a link?) Tj +/F2 14 Tf +0 50 Td +(How about this http://example.com/) Tj +0 50 Td +(foo a link?) Tj +0 50 Td +(Is this http://example.com/test-) Tj +0 50 Td +(foo a link?) Tj +(Is this http://abc.com/test-) Tj +0 50 Td +0 50 Td +(foo a link?) Tj +0 50 Td +(And this http://example.com/) Tj +0 50 Td +(http://www.abc.com a link?) Tj +ET +endstream +endobj +{{xref}} +trailer << + /Size 6 + /Root 1 0 R +>> +{{startxref}} +%%EOF diff --git a/testing/resources/weblinks_across_lines.pdf b/testing/resources/weblinks_across_lines.pdf new file mode 100644 index 0000000000..e9327c4b34 --- /dev/null +++ b/testing/resources/weblinks_across_lines.pdf @@ -0,0 +1,84 @@ +%PDF-1.7 +% ò¤ô +1 0 obj << + /Type /Catalog + /Pages 2 0 R +>> +2 0 obj << + /Type /Pages + /MediaBox [ 0 0 600 600 ] + /Count 1 + /Kids [ 3 0 R ] +>> +endobj +3 0 obj << + /Type /Page + /Parent 2 0 R + /Resources << + /Font << + /F1 4 0 R + /F2 5 0 R + >> + >> + /Contents 6 0 R +>> +endobj +4 0 obj << + /Type /Font + /Subtype /Type1 + /BaseFont /Times-Roman +>> +endobj +5 0 obj << + /Type /Font + /Subtype /Type1 + /BaseFont /Helvetica +>> +endobj +6 0 obj << +>> +stream +BT +/F1 12 Tf +50 50 Td +(Hello, world! This is not a link.) Tj +0 50 Td +(Is this http://example.com?) Tj +0 50 Td +(foo a link?) Tj +/F2 14 Tf +0 50 Td +(How about this http://example.com/) Tj +0 50 Td +(foo a link?) Tj +0 50 Td +(Is this http://example.com/test-) Tj +0 50 Td +(foo a link?) Tj +(Is this http://abc.com/test-) Tj +0 50 Td +0 50 Td +(foo a link?) Tj +0 50 Td +(And this http://example.com/) Tj +0 50 Td +(http://www.abc.com a link?) Tj +ET +endstream +endobj +xref +0 7 +0000000000 65535 f +0000000015 00000 n +0000000061 00000 n +0000000154 00000 n +0000000296 00000 n +0000000374 00000 n +0000000450 00000 n +trailer << + /Size 6 + /Root 1 0 R +>> +startxref +921 +%%EOF -- cgit v1.2.3