summaryrefslogtreecommitdiff
path: root/core/fpdftext/cpdf_linkextract_unittest.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'core/fpdftext/cpdf_linkextract_unittest.cpp')
-rw-r--r--core/fpdftext/cpdf_linkextract_unittest.cpp129
1 files changed, 79 insertions, 50 deletions
diff --git a/core/fpdftext/cpdf_linkextract_unittest.cpp b/core/fpdftext/cpdf_linkextract_unittest.cpp
index bd059862fd..de870b0cc1 100644
--- a/core/fpdftext/cpdf_linkextract_unittest.cpp
+++ b/core/fpdftext/cpdf_linkextract_unittest.cpp
@@ -32,7 +32,7 @@ TEST(CPDF_LinkExtractTest, CheckMailLink) {
};
for (size_t i = 0; i < FX_ArraySize(invalid_strs); ++i) {
CFX_WideString text_str(invalid_strs[i]);
- EXPECT_FALSE(extractor.CheckMailLink(text_str)) << text_str.c_str();
+ EXPECT_FALSE(extractor.CheckMailLink(&text_str)) << text_str.c_str();
}
// Check cases that can extract valid mail link.
@@ -54,7 +54,7 @@ TEST(CPDF_LinkExtractTest, CheckMailLink) {
CFX_WideString text_str(valid_strs[i][0]);
CFX_WideString expected_str(L"mailto:");
expected_str += valid_strs[i][1];
- EXPECT_TRUE(extractor.CheckMailLink(text_str)) << text_str.c_str();
+ EXPECT_TRUE(extractor.CheckMailLink(&text_str)) << text_str.c_str();
EXPECT_STREQ(expected_str.c_str(), text_str.c_str());
}
}
@@ -75,64 +75,93 @@ TEST(CPDF_LinkExtractTest, CheckWebLink) {
// Web addresses that in correct format that we don't handle.
L"abc.example.com", // URL without scheme.
};
+ const int32_t DEFAULT_VALUE = -42;
for (size_t i = 0; i < FX_ArraySize(invalid_cases); ++i) {
CFX_WideString text_str(invalid_cases[i]);
- EXPECT_FALSE(extractor.CheckWebLink(text_str)) << text_str.c_str();
+ int32_t start_offset = DEFAULT_VALUE;
+ int32_t count = DEFAULT_VALUE;
+ EXPECT_FALSE(extractor.CheckWebLink(&text_str, &start_offset, &count))
+ << text_str.c_str();
+ EXPECT_EQ(DEFAULT_VALUE, start_offset) << text_str.c_str();
+ EXPECT_EQ(DEFAULT_VALUE, count) << text_str.c_str();
}
// Check cases that can extract valid web link.
// An array of {input_string, expected_extracted_web_link}.
- const wchar_t* const valid_cases[][2] = {
- {L"http://www.example.com", L"http://www.example.com"}, // standard URL.
- {L"http://www.example.com:88",
- L"http://www.example.com:88"}, // URL with port number.
- {L"http://test@www.example.com",
- L"http://test@www.example.com"}, // URL with username.
- {L"http://test:test@example.com",
- L"http://test:test@example.com"}, // URL with username and password.
- {L"http://example", L"http://example"}, // URL with short domain name.
- {L"http////www.server", L"http://www.server"}, // URL starts with "www.".
- {L"http:/www.abc.com", L"http://www.abc.com"}, // URL starts with "www.".
- {L"www.a.b.c", L"http://www.a.b.c"}, // URL starts with "www.".
- {L"https://a.us", L"https://a.us"}, // Secure http URL.
- {L"https://www.t.us", L"https://www.t.us"}, // Secure http URL.
- {L"www.example-test.com",
- L"http://www.example-test.com"}, // '-' in host is ok.
- {L"www.example.com,",
- L"http://www.example.com"}, // Trim ending invalid chars.
- {L"www.example.com;(",
- L"http://www.example.com"}, // Trim ending invalid chars.
- {L"test:www.abc.com", L"http://www.abc.com"}, // Trim chars before URL.
- {L"www.g.com..", L"http://www.g.com.."}, // Leave ending periods.
- // Web link can contain IP address too.
- {L"http://192.168.0.1", L"http://192.168.0.1"}, // IPv4 address.
- {L"http://192.168.0.1:80",
- L"http://192.168.0.1:80"}, // IPv4 address with port.
- {L"http://[aa::00:bb::00:cc:00]",
- L"http://[aa::00:bb::00:cc:00]"}, // IPv6 reference.
- {L"http://[aa::00:bb::00:cc:00]:12",
- L"http://[aa::00:bb::00:cc:00]:12"}, // IPv6 reference with port.
- {L"http://[aa]:12", L"http://[aa]:12"}, // Not validate IP address.
- {L"http://[aa]:12abc", L"http://[aa]:12"}, // Trim for IPv6 address.
- {L"http://[aa]:", L"http://[aa]"}, // Trim for IPv6 address.
+ struct ValidCase {
+ const wchar_t* const input_string;
+ const wchar_t* const url_extracted;
+ const int32_t start_offset;
+ const int32_t count;
+ };
+ const ValidCase valid_cases[] = {
+ {L"http://www.example.com", L"http://www.example.com", 0,
+ 22}, // standard URL.
+ {L"http://www.example.com:88", L"http://www.example.com:88", 0,
+ 25}, // URL with port number.
+ {L"http://test@www.example.com", L"http://test@www.example.com", 0,
+ 27}, // URL with username.
+ {L"http://test:test@example.com", L"http://test:test@example.com", 0,
+ 28}, // URL with username and password.
+ {L"http://example", L"http://example", 0,
+ 14}, // URL with short domain name.
+ {L"http////www.server", L"http://www.server", 8,
+ 10}, // URL starts with "www.".
+ {L"http:/www.abc.com", L"http://www.abc.com", 6,
+ 11}, // URL starts with "www.".
+ {L"www.a.b.c", L"http://www.a.b.c", 0, 9}, // URL starts with "www.".
+ {L"https://a.us", L"https://a.us", 0, 12}, // Secure http URL.
+ {L"https://www.t.us", L"https://www.t.us", 0, 16}, // Secure http URL.
+ {L"www.example-test.com", L"http://www.example-test.com", 0,
+ 20}, // '-' in host is ok.
+ {L"www.example.com,", L"http://www.example.com", 0,
+ 15}, // Trim ending invalid chars.
+ {L"www.example.com;(", L"http://www.example.com", 0,
+ 15}, // Trim ending invalid chars.
+ {L"test:www.abc.com", L"http://www.abc.com", 5,
+ 11}, // Trim chars before URL.
+ {L"www.g.com..", L"http://www.g.com..", 0, 11}, // Leave ending periods.
+
+ // Web links can contain IP addresses too.
+ {L"http://192.168.0.1", L"http://192.168.0.1", 0, 18}, // IPv4 address.
+ {L"http://192.168.0.1:80", L"http://192.168.0.1:80", 0,
+ 21}, // IPv4 address with port.
+ {L"http://[aa::00:bb::00:cc:00]", L"http://[aa::00:bb::00:cc:00]", 0,
+ 28}, // IPv6 reference.
+ {L"http://[aa::00:bb::00:cc:00]:12", L"http://[aa::00:bb::00:cc:00]:12",
+ 0, 31}, // IPv6 reference with port.
+ {L"http://[aa]:12", L"http://[aa]:12", 0,
+ 14}, // Not validate IP address.
+ {L"http://[aa]:12abc", L"http://[aa]:12", 0,
+ 14}, // Trim for IPv6 address.
+ {L"http://[aa]:", L"http://[aa]", 0, 11}, // Trim for IPv6 address.
+
// Path and query parts can be anything.
- {L"www.abc.com/#%%^&&*(", L"http://www.abc.com/#%%^&&*("},
- {L"www.a.com/#a=@?q=rr&r=y", L"http://www.a.com/#a=@?q=rr&r=y"},
- {L"http://a.com/1/2/3/4\5\6", L"http://a.com/1/2/3/4\5\6"},
- {L"http://www.example.com/foo;bar", L"http://www.example.com/foo;bar"},
+ {L"www.abc.com/#%%^&&*(", L"http://www.abc.com/#%%^&&*(", 0, 20},
+ {L"www.a.com/#a=@?q=rr&r=y", L"http://www.a.com/#a=@?q=rr&r=y", 0, 23},
+ {L"http://a.com/1/2/3/4\5\6", L"http://a.com/1/2/3/4\5\6", 0, 22},
+ {L"http://www.example.com/foo;bar", L"http://www.example.com/foo;bar", 0,
+ 30},
+
// Invalid chars inside host name are ok as we don't validate them.
- {L"http://ex[am]ple", L"http://ex[am]ple"},
- {L"http://:example.com", L"http://:example.com"},
- {L"http://((())/path?", L"http://((())/path?"},
- {L"http:////abc.server", L"http:////abc.server"},
+ {L"http://ex[am]ple", L"http://ex[am]ple", 0, 16},
+ {L"http://:example.com", L"http://:example.com", 0, 19},
+ {L"http://((())/path?", L"http://((())/path?", 0, 18},
+ {L"http:////abc.server", L"http:////abc.server", 0, 19},
+
// Non-ASCII chars are not validated either.
- {L"www.测试.net", L"http://www.测试.net"},
- {L"www.测试。net。", L"http://www.测试。net。"},
- {L"www.测试.net;", L"http://www.测试.net;"},
+ {L"www.测试.net", L"http://www.测试.net", 0, 10},
+ {L"www.测试。net。", L"http://www.测试。net。", 0, 11},
+ {L"www.测试.net;", L"http://www.测试.net;", 0, 11},
};
for (size_t i = 0; i < FX_ArraySize(valid_cases); ++i) {
- CFX_WideString text_str(valid_cases[i][0]);
- EXPECT_TRUE(extractor.CheckWebLink(text_str)) << text_str.c_str();
- EXPECT_STREQ(valid_cases[i][1], text_str.c_str());
+ CFX_WideString text_str(valid_cases[i].input_string);
+ int32_t start_offset = DEFAULT_VALUE;
+ int32_t count = DEFAULT_VALUE;
+ EXPECT_TRUE(extractor.CheckWebLink(&text_str, &start_offset, &count))
+ << text_str.c_str();
+ EXPECT_STREQ(valid_cases[i].url_extracted, text_str.c_str());
+ EXPECT_EQ(valid_cases[i].start_offset, start_offset) << text_str.c_str();
+ EXPECT_EQ(valid_cases[i].count, count) << text_str.c_str();
}
}