Rename CPDF_LinkExtract test file to match class

Change-Id: I6200968b0c72d2de32d51a741ac821084ad84f8a Reviewed-on: https://pdfium-review.googlesource.com/5952 Reviewed-by: Nicolás Peña <npm@chromium.org> Commit-Queue: dsinclair <dsinclair@chromium.org>
author: Dan Sinclair <dsinclair@chromium.org> 2017-05-25 14:21:20 -0400
committer: Chromium commit bot <commit-bot@chromium.org> 2017-05-25 18:36:22 +0000
commit: 3a4c408554f2f2ffb5a143f6dadcdd528fcf106e (patch)
tree: 41c32a9df644fbe0751e337bcc6a04f4689dff11 /core/fpdftext/cpdf_linkextract_unittest.cpp
parent: 5acacd361b15b82a8b30cdd5cb92abb8a2104ecf (diff)
download: pdfium-3a4c408554f2f2ffb5a143f6dadcdd528fcf106e.tar.xz
1 files changed, 138 insertions, 0 deletions
diff --git a/core/fpdftext/cpdf_linkextract_unittest.cpp b/core/fpdftext/cpdf_linkextract_unittest.cpp
new file mode 100644
index 0000000000..bd059862fd
--- /dev/null
+++ b/core/fpdftext/cpdf_linkextract_unittest.cpp
@@ -0,0 +1,138 @@
+// Copyright 2015 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "core/fpdftext/cpdf_linkextract.h"
+
+#include "testing/gtest/include/gtest/gtest.h"
+
+// Class to help test functions in CPDF_LinkExtract class.
+class CPDF_TestLinkExtract : public CPDF_LinkExtract {
+ public:
+  CPDF_TestLinkExtract() : CPDF_LinkExtract(nullptr) {}
+
+ private:
+  // Add test cases as friends to access protected member functions.
+  // Access CheckMailLink and CheckWebLink.
+  FRIEND_TEST(CPDF_LinkExtractTest, CheckMailLink);
+  FRIEND_TEST(CPDF_LinkExtractTest, CheckWebLink);
+};
+
+TEST(CPDF_LinkExtractTest, CheckMailLink) {
+  CPDF_TestLinkExtract extractor;
+  // Check cases that fail to extract valid mail link.
+  const wchar_t* const invalid_strs[] = {
+      L"",
+      L"peter.pan",       // '@' is required.
+      L"abc@server",      // Domain name needs at least one '.'.
+      L"abc.@gmail.com",  // '.' can not immediately precede '@'.
+      L"abc@xyz&q.org",   // Domain name should not contain '&'.
+      L"abc@.xyz.org",    // Domain name should not start with '.'.
+      L"fan@g..com"       // Domain name should not have consecutive '.'
+  };
+  for (size_t i = 0; i < FX_ArraySize(invalid_strs); ++i) {
+    CFX_WideString text_str(invalid_strs[i]);
+    EXPECT_FALSE(extractor.CheckMailLink(text_str)) << text_str.c_str();
+  }
+
+  // Check cases that can extract valid mail link.
+  // An array of {input_string, expected_extracted_email_address}.
+  const wchar_t* const valid_strs[][2] = {
+      {L"peter@abc.d", L"peter@abc.d"},
+      {L"red.teddy.b@abc.com", L"red.teddy.b@abc.com"},
+      {L"abc_@gmail.com", L"abc_@gmail.com"},  // '_' is ok before '@'.
+      {L"dummy-hi@gmail.com",
+       L"dummy-hi@gmail.com"},                  // '-' is ok in user name.
+      {L"a..df@gmail.com", L"df@gmail.com"},    // Stop at consecutive '.'.
+      {L".john@yahoo.com", L"john@yahoo.com"},  // Remove heading '.'.
+      {L"abc@xyz.org?/", L"abc@xyz.org"},       // Trim ending invalid chars.
+      {L"fan{abc@xyz.org", L"abc@xyz.org"},     // Trim beginning invalid chars.
+      {L"fan@g.com..", L"fan@g.com"},           // Trim the ending periods.
+      {L"CAP.cap@Gmail.Com", L"CAP.cap@Gmail.Com"},  // Keep the original case.
+  };
+  for (size_t i = 0; i < FX_ArraySize(valid_strs); ++i) {
+    CFX_WideString text_str(valid_strs[i][0]);
+    CFX_WideString expected_str(L"mailto:");
+    expected_str += valid_strs[i][1];
+    EXPECT_TRUE(extractor.CheckMailLink(text_str)) << text_str.c_str();
+    EXPECT_STREQ(expected_str.c_str(), text_str.c_str());
+  }
+}
+
+TEST(CPDF_LinkExtractTest, CheckWebLink) {
+  CPDF_TestLinkExtract extractor;
+  // Check cases that fail to extract valid web link.
+  // The last few are legit web addresses that we don't handle now.
+  const wchar_t* const invalid_cases[] = {
+      L"", L"http", L"www.", L"https-and-www",
+      L"http:/abc.com",             // Missing slash.
+      L"http://((()),",             // Only invalid chars in host name.
+      L"ftp://example.com",         // Ftp scheme is not supported.
+      L"http:example.com",          // Missing slashes.
+      L"http//[example.com",        // Invalid IPv6 address.
+      L"http//[00:00:00:00:00:00",  // Invalid IPv6 address.
+      L"http//[]",                  // Empty IPv6 address.
+      // Web addresses that in correct format that we don't handle.
+      L"abc.example.com",  // URL without scheme.
+  };
+  for (size_t i = 0; i < FX_ArraySize(invalid_cases); ++i) {
+    CFX_WideString text_str(invalid_cases[i]);
+    EXPECT_FALSE(extractor.CheckWebLink(text_str)) << text_str.c_str();
+  }
+
+  // Check cases that can extract valid web link.
+  // An array of {input_string, expected_extracted_web_link}.
+  const wchar_t* const valid_cases[][2] = {
+      {L"http://www.example.com", L"http://www.example.com"},  // standard URL.
+      {L"http://www.example.com:88",
+       L"http://www.example.com:88"},  // URL with port number.
+      {L"http://test@www.example.com",
+       L"http://test@www.example.com"},  // URL with username.
+      {L"http://test:test@example.com",
+       L"http://test:test@example.com"},  // URL with username and password.
+      {L"http://example", L"http://example"},  // URL with short domain name.
+      {L"http////www.server", L"http://www.server"},  // URL starts with "www.".
+      {L"http:/www.abc.com", L"http://www.abc.com"},  // URL starts with "www.".
+      {L"www.a.b.c", L"http://www.a.b.c"},            // URL starts with "www.".
+      {L"https://a.us", L"https://a.us"},             // Secure http URL.
+      {L"https://www.t.us", L"https://www.t.us"},     // Secure http URL.
+      {L"www.example-test.com",
+       L"http://www.example-test.com"},  // '-' in host is ok.
+      {L"www.example.com,",
+       L"http://www.example.com"},  // Trim ending invalid chars.
+      {L"www.example.com;(",
+       L"http://www.example.com"},  // Trim ending invalid chars.
+      {L"test:www.abc.com", L"http://www.abc.com"},  // Trim chars before URL.
+      {L"www.g.com..", L"http://www.g.com.."},       // Leave ending periods.
+      // Web link can contain IP address too.
+      {L"http://192.168.0.1", L"http://192.168.0.1"},  // IPv4 address.
+      {L"http://192.168.0.1:80",
+       L"http://192.168.0.1:80"},  // IPv4 address with port.
+      {L"http://[aa::00:bb::00:cc:00]",
+       L"http://[aa::00:bb::00:cc:00]"},  // IPv6 reference.
+      {L"http://[aa::00:bb::00:cc:00]:12",
+       L"http://[aa::00:bb::00:cc:00]:12"},       // IPv6 reference with port.
+      {L"http://[aa]:12", L"http://[aa]:12"},     // Not validate IP address.
+      {L"http://[aa]:12abc", L"http://[aa]:12"},  // Trim for IPv6 address.
+      {L"http://[aa]:", L"http://[aa]"},          // Trim for IPv6 address.
+      // Path and query parts can be anything.
+      {L"www.abc.com/#%%^&&*(", L"http://www.abc.com/#%%^&&*("},
+      {L"www.a.com/#a=@?q=rr&r=y", L"http://www.a.com/#a=@?q=rr&r=y"},
+      {L"http://a.com/1/2/3/4\5\6", L"http://a.com/1/2/3/4\5\6"},
+      {L"http://www.example.com/foo;bar", L"http://www.example.com/foo;bar"},
+      // Invalid chars inside host name are ok as we don't validate them.
+      {L"http://ex[am]ple", L"http://ex[am]ple"},
+      {L"http://:example.com", L"http://:example.com"},
+      {L"http://((())/path?", L"http://((())/path?"},
+      {L"http:////abc.server", L"http:////abc.server"},
+      // Non-ASCII chars are not validated either.
+      {L"www.测试.net", L"http://www.测试.net"},
+      {L"www.测试。net。", L"http://www.测试。net。"},
+      {L"www.测试.net；", L"http://www.测试.net；"},
+  };
+  for (size_t i = 0; i < FX_ArraySize(valid_cases); ++i) {
+    CFX_WideString text_str(valid_cases[i][0]);
+    EXPECT_TRUE(extractor.CheckWebLink(text_str)) << text_str.c_str();
+    EXPECT_STREQ(valid_cases[i][1], text_str.c_str());
+  }
+}
author	Dan Sinclair <dsinclair@chromium.org>	2017-05-25 14:21:20 -0400
committer	Chromium commit bot <commit-bot@chromium.org>	2017-05-25 18:36:22 +0000
commit	3a4c408554f2f2ffb5a143f6dadcdd528fcf106e (patch)
tree	41c32a9df644fbe0751e337bcc6a04f4689dff11 /core/fpdftext/cpdf_linkextract_unittest.cpp
parent	5acacd361b15b82a8b30cdd5cb92abb8a2104ecf (diff)
download	pdfium-3a4c408554f2f2ffb5a143f6dadcdd528fcf106e.tar.xz