From 7630907c7ecbb700e4de287550dbed06f36fbe9e Mon Sep 17 00:00:00 2001 From: Wei Li Date: Thu, 16 Mar 2017 17:31:03 -0700 Subject: Handle web links across lines When a web link has a hyphen at the end of line, we consider it to be continued to the next line. For example, "http://www.abc.com/my-\r\ntest" should be extracted as "http://www.abc.com/my-test". BUG=pdfium:650 Change-Id: I64a93d9c66faf2be0abdaf8cfe8ee496c435d0ca Reviewed-on: https://pdfium-review.googlesource.com/3092 Commit-Queue: Wei Li Reviewed-by: Lei Zhang --- fpdfsdk/fpdftext_embeddertest.cpp | 68 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) (limited to 'fpdfsdk/fpdftext_embeddertest.cpp') diff --git a/fpdfsdk/fpdftext_embeddertest.cpp b/fpdfsdk/fpdftext_embeddertest.cpp index 198ef8a7f2..3d496bc06f 100644 --- a/fpdfsdk/fpdftext_embeddertest.cpp +++ b/fpdfsdk/fpdftext_embeddertest.cpp @@ -370,6 +370,74 @@ TEST_F(FPDFTextEmbeddertest, WebLinks) { UnloadPage(page); } +TEST_F(FPDFTextEmbeddertest, WebLinksAcrossLines) { + EXPECT_TRUE(OpenDocument("weblinks_across_lines.pdf")); + FPDF_PAGE page = LoadPage(0); + EXPECT_TRUE(page); + + FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); + EXPECT_TRUE(textpage); + + FPDF_PAGELINK pagelink = FPDFLink_LoadWebLinks(textpage); + EXPECT_TRUE(pagelink); + + static const char* const kExpectedUrls[] = { + "http://example.com?", // from "http://www.example.com?\r\nfoo" + "http://example.com/", // from "http://www.example.com/\r\nfoo" + "http://example.com/test-foo", // from "http://example.com/test-\r\nfoo" + "http://abc.com/test-foo", // from "http://abc.com/test-\r\n\r\nfoo" + // Next two links from "http://www.example.com/\r\nhttp://www.abc.com/" + "http://example.com/", "http://www.abc.com", + }; + static const int kNumLinks = static_cast(FX_ArraySize(kExpectedUrls)); + + EXPECT_EQ(kNumLinks, FPDFLink_CountWebLinks(pagelink)); + + unsigned short fixed_buffer[128]; + for (int i = 0; i < kNumLinks; i++) { + const size_t expected_len = strlen(kExpectedUrls[i]) + 1; + memset(fixed_buffer, 0, FX_ArraySize(fixed_buffer)); + EXPECT_EQ(static_cast(expected_len), + FPDFLink_GetURL(pagelink, i, nullptr, 0)); + EXPECT_EQ( + static_cast(expected_len), + FPDFLink_GetURL(pagelink, i, fixed_buffer, FX_ArraySize(fixed_buffer))); + EXPECT_TRUE( + check_unsigned_shorts(kExpectedUrls[i], fixed_buffer, expected_len)); + } + + FPDFLink_CloseWebLinks(pagelink); + FPDFText_ClosePage(textpage); + UnloadPage(page); +} + +TEST_F(FPDFTextEmbeddertest, WebLinksAcrossLinesBug) { + EXPECT_TRUE(OpenDocument("bug_650.pdf")); + FPDF_PAGE page = LoadPage(0); + EXPECT_TRUE(page); + + FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); + EXPECT_TRUE(textpage); + + FPDF_PAGELINK pagelink = FPDFLink_LoadWebLinks(textpage); + EXPECT_TRUE(pagelink); + + EXPECT_EQ(2, FPDFLink_CountWebLinks(pagelink)); + unsigned short fixed_buffer[128] = {0}; + static const char kExpectedUrl[] = + "http://tutorial45.com/learn-autocad-basics-day-166/"; + static const int kUrlSize = static_cast(sizeof(kExpectedUrl)); + + EXPECT_EQ(kUrlSize, FPDFLink_GetURL(pagelink, 1, nullptr, 0)); + EXPECT_EQ(kUrlSize, FPDFLink_GetURL(pagelink, 1, fixed_buffer, + FX_ArraySize(fixed_buffer))); + EXPECT_TRUE(check_unsigned_shorts(kExpectedUrl, fixed_buffer, kUrlSize)); + + FPDFLink_CloseWebLinks(pagelink); + FPDFText_ClosePage(textpage); + UnloadPage(page); +} + TEST_F(FPDFTextEmbeddertest, GetFontSize) { EXPECT_TRUE(OpenDocument("hello_world.pdf")); FPDF_PAGE page = LoadPage(0); -- cgit v1.2.3