From defe54dbf60459972ff1c295af332ccdbbe15a70 Mon Sep 17 00:00:00 2001 From: Henrique Nakashima Date: Thu, 8 Jun 2017 11:57:57 -0400 Subject: Fixing click area for links with a URL with prepended text. Bug: pdfium:655 Change-Id: Idd90be487d390f066a76140800096feead6b9e55 Reviewed-on: https://pdfium-review.googlesource.com/6310 Reviewed-by: dsinclair Commit-Queue: Henrique Nakashima --- core/fpdftext/cpdf_linkextract_unittest.cpp | 129 +++++++++++++++++----------- 1 file changed, 79 insertions(+), 50 deletions(-) (limited to 'core/fpdftext/cpdf_linkextract_unittest.cpp') diff --git a/core/fpdftext/cpdf_linkextract_unittest.cpp b/core/fpdftext/cpdf_linkextract_unittest.cpp index bd059862fd..de870b0cc1 100644 --- a/core/fpdftext/cpdf_linkextract_unittest.cpp +++ b/core/fpdftext/cpdf_linkextract_unittest.cpp @@ -32,7 +32,7 @@ TEST(CPDF_LinkExtractTest, CheckMailLink) { }; for (size_t i = 0; i < FX_ArraySize(invalid_strs); ++i) { CFX_WideString text_str(invalid_strs[i]); - EXPECT_FALSE(extractor.CheckMailLink(text_str)) << text_str.c_str(); + EXPECT_FALSE(extractor.CheckMailLink(&text_str)) << text_str.c_str(); } // Check cases that can extract valid mail link. @@ -54,7 +54,7 @@ TEST(CPDF_LinkExtractTest, CheckMailLink) { CFX_WideString text_str(valid_strs[i][0]); CFX_WideString expected_str(L"mailto:"); expected_str += valid_strs[i][1]; - EXPECT_TRUE(extractor.CheckMailLink(text_str)) << text_str.c_str(); + EXPECT_TRUE(extractor.CheckMailLink(&text_str)) << text_str.c_str(); EXPECT_STREQ(expected_str.c_str(), text_str.c_str()); } } @@ -75,64 +75,93 @@ TEST(CPDF_LinkExtractTest, CheckWebLink) { // Web addresses that in correct format that we don't handle. L"abc.example.com", // URL without scheme. }; + const int32_t DEFAULT_VALUE = -42; for (size_t i = 0; i < FX_ArraySize(invalid_cases); ++i) { CFX_WideString text_str(invalid_cases[i]); - EXPECT_FALSE(extractor.CheckWebLink(text_str)) << text_str.c_str(); + int32_t start_offset = DEFAULT_VALUE; + int32_t count = DEFAULT_VALUE; + EXPECT_FALSE(extractor.CheckWebLink(&text_str, &start_offset, &count)) + << text_str.c_str(); + EXPECT_EQ(DEFAULT_VALUE, start_offset) << text_str.c_str(); + EXPECT_EQ(DEFAULT_VALUE, count) << text_str.c_str(); } // Check cases that can extract valid web link. // An array of {input_string, expected_extracted_web_link}. - const wchar_t* const valid_cases[][2] = { - {L"http://www.example.com", L"http://www.example.com"}, // standard URL. - {L"http://www.example.com:88", - L"http://www.example.com:88"}, // URL with port number. - {L"http://test@www.example.com", - L"http://test@www.example.com"}, // URL with username. - {L"http://test:test@example.com", - L"http://test:test@example.com"}, // URL with username and password. - {L"http://example", L"http://example"}, // URL with short domain name. - {L"http////www.server", L"http://www.server"}, // URL starts with "www.". - {L"http:/www.abc.com", L"http://www.abc.com"}, // URL starts with "www.". - {L"www.a.b.c", L"http://www.a.b.c"}, // URL starts with "www.". - {L"https://a.us", L"https://a.us"}, // Secure http URL. - {L"https://www.t.us", L"https://www.t.us"}, // Secure http URL. - {L"www.example-test.com", - L"http://www.example-test.com"}, // '-' in host is ok. - {L"www.example.com,", - L"http://www.example.com"}, // Trim ending invalid chars. - {L"www.example.com;(", - L"http://www.example.com"}, // Trim ending invalid chars. - {L"test:www.abc.com", L"http://www.abc.com"}, // Trim chars before URL. - {L"www.g.com..", L"http://www.g.com.."}, // Leave ending periods. - // Web link can contain IP address too. - {L"http://192.168.0.1", L"http://192.168.0.1"}, // IPv4 address. - {L"http://192.168.0.1:80", - L"http://192.168.0.1:80"}, // IPv4 address with port. - {L"http://[aa::00:bb::00:cc:00]", - L"http://[aa::00:bb::00:cc:00]"}, // IPv6 reference. - {L"http://[aa::00:bb::00:cc:00]:12", - L"http://[aa::00:bb::00:cc:00]:12"}, // IPv6 reference with port. - {L"http://[aa]:12", L"http://[aa]:12"}, // Not validate IP address. - {L"http://[aa]:12abc", L"http://[aa]:12"}, // Trim for IPv6 address. - {L"http://[aa]:", L"http://[aa]"}, // Trim for IPv6 address. + struct ValidCase { + const wchar_t* const input_string; + const wchar_t* const url_extracted; + const int32_t start_offset; + const int32_t count; + }; + const ValidCase valid_cases[] = { + {L"http://www.example.com", L"http://www.example.com", 0, + 22}, // standard URL. + {L"http://www.example.com:88", L"http://www.example.com:88", 0, + 25}, // URL with port number. + {L"http://test@www.example.com", L"http://test@www.example.com", 0, + 27}, // URL with username. + {L"http://test:test@example.com", L"http://test:test@example.com", 0, + 28}, // URL with username and password. + {L"http://example", L"http://example", 0, + 14}, // URL with short domain name. + {L"http////www.server", L"http://www.server", 8, + 10}, // URL starts with "www.". + {L"http:/www.abc.com", L"http://www.abc.com", 6, + 11}, // URL starts with "www.". + {L"www.a.b.c", L"http://www.a.b.c", 0, 9}, // URL starts with "www.". + {L"https://a.us", L"https://a.us", 0, 12}, // Secure http URL. + {L"https://www.t.us", L"https://www.t.us", 0, 16}, // Secure http URL. + {L"www.example-test.com", L"http://www.example-test.com", 0, + 20}, // '-' in host is ok. + {L"www.example.com,", L"http://www.example.com", 0, + 15}, // Trim ending invalid chars. + {L"www.example.com;(", L"http://www.example.com", 0, + 15}, // Trim ending invalid chars. + {L"test:www.abc.com", L"http://www.abc.com", 5, + 11}, // Trim chars before URL. + {L"www.g.com..", L"http://www.g.com..", 0, 11}, // Leave ending periods. + + // Web links can contain IP addresses too. + {L"http://192.168.0.1", L"http://192.168.0.1", 0, 18}, // IPv4 address. + {L"http://192.168.0.1:80", L"http://192.168.0.1:80", 0, + 21}, // IPv4 address with port. + {L"http://[aa::00:bb::00:cc:00]", L"http://[aa::00:bb::00:cc:00]", 0, + 28}, // IPv6 reference. + {L"http://[aa::00:bb::00:cc:00]:12", L"http://[aa::00:bb::00:cc:00]:12", + 0, 31}, // IPv6 reference with port. + {L"http://[aa]:12", L"http://[aa]:12", 0, + 14}, // Not validate IP address. + {L"http://[aa]:12abc", L"http://[aa]:12", 0, + 14}, // Trim for IPv6 address. + {L"http://[aa]:", L"http://[aa]", 0, 11}, // Trim for IPv6 address. + // Path and query parts can be anything. - {L"www.abc.com/#%%^&&*(", L"http://www.abc.com/#%%^&&*("}, - {L"www.a.com/#a=@?q=rr&r=y", L"http://www.a.com/#a=@?q=rr&r=y"}, - {L"http://a.com/1/2/3/4\5\6", L"http://a.com/1/2/3/4\5\6"}, - {L"http://www.example.com/foo;bar", L"http://www.example.com/foo;bar"}, + {L"www.abc.com/#%%^&&*(", L"http://www.abc.com/#%%^&&*(", 0, 20}, + {L"www.a.com/#a=@?q=rr&r=y", L"http://www.a.com/#a=@?q=rr&r=y", 0, 23}, + {L"http://a.com/1/2/3/4\5\6", L"http://a.com/1/2/3/4\5\6", 0, 22}, + {L"http://www.example.com/foo;bar", L"http://www.example.com/foo;bar", 0, + 30}, + // Invalid chars inside host name are ok as we don't validate them. - {L"http://ex[am]ple", L"http://ex[am]ple"}, - {L"http://:example.com", L"http://:example.com"}, - {L"http://((())/path?", L"http://((())/path?"}, - {L"http:////abc.server", L"http:////abc.server"}, + {L"http://ex[am]ple", L"http://ex[am]ple", 0, 16}, + {L"http://:example.com", L"http://:example.com", 0, 19}, + {L"http://((())/path?", L"http://((())/path?", 0, 18}, + {L"http:////abc.server", L"http:////abc.server", 0, 19}, + // Non-ASCII chars are not validated either. - {L"www.测试.net", L"http://www.测试.net"}, - {L"www.测试。net。", L"http://www.测试。net。"}, - {L"www.测试.net;", L"http://www.测试.net;"}, + {L"www.测试.net", L"http://www.测试.net", 0, 10}, + {L"www.测试。net。", L"http://www.测试。net。", 0, 11}, + {L"www.测试.net;", L"http://www.测试.net;", 0, 11}, }; for (size_t i = 0; i < FX_ArraySize(valid_cases); ++i) { - CFX_WideString text_str(valid_cases[i][0]); - EXPECT_TRUE(extractor.CheckWebLink(text_str)) << text_str.c_str(); - EXPECT_STREQ(valid_cases[i][1], text_str.c_str()); + CFX_WideString text_str(valid_cases[i].input_string); + int32_t start_offset = DEFAULT_VALUE; + int32_t count = DEFAULT_VALUE; + EXPECT_TRUE(extractor.CheckWebLink(&text_str, &start_offset, &count)) + << text_str.c_str(); + EXPECT_STREQ(valid_cases[i].url_extracted, text_str.c_str()); + EXPECT_EQ(valid_cases[i].start_offset, start_offset) << text_str.c_str(); + EXPECT_EQ(valid_cases[i].count, count) << text_str.c_str(); } } -- cgit v1.2.3