From 15c0fccfeff5bf389b494a828c89adb3207c73d6 Mon Sep 17 00:00:00 2001 From: Ryan Harrison Date: Mon, 12 Mar 2018 15:20:04 +0000 Subject: Remove all usages of FXSYS_iswASCIIalnum Instances are either replaced with FXSYS_iswalnum, which calls out to the ICU library to do the proper Unicode operations, or have been converted to a isascii && isalnum pair, if ASCII alnum is actually what was wanted. BUG=pdfium:1035 Change-Id: I959ec8739a4d020e61562180393ab8113a81577c Reviewed-on: https://pdfium-review.googlesource.com/28430 Reviewed-by: dsinclair Commit-Queue: Ryan Harrison --- BUILD.gn | 1 + core/fpdftext/cpdf_linkextract.cpp | 4 ++-- core/fpdftext/cpdf_textpage.cpp | 2 +- core/fxcrt/DEPS | 3 +++ core/fxcrt/css/cfx_cssselector.cpp | 4 ++-- core/fxcrt/fx_extension.h | 5 +++-- fxjs/cjs_publicmethods.cpp | 2 +- fxjs/cjs_util.cpp | 2 +- 8 files changed, 14 insertions(+), 9 deletions(-) create mode 100644 core/fxcrt/DEPS diff --git a/BUILD.gn b/BUILD.gn index 485afe28bf..d4e2d46a31 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -883,6 +883,7 @@ jumbo_static_library("fxcrt") { public_deps = [ ":freetype_common", "third_party:pdfium_base", + "//third_party/icu:icuuc", ] if (pdf_enable_xfa) { diff --git a/core/fpdftext/cpdf_linkextract.cpp b/core/fpdftext/cpdf_linkextract.cpp index f8144a171b..c3cf4fc1ef 100644 --- a/core/fpdftext/cpdf_linkextract.cpp +++ b/core/fpdftext/cpdf_linkextract.cpp @@ -246,7 +246,7 @@ bool CPDF_LinkExtract::CheckMailLink(WideString* str) { size_t pPos = aPos.value(); // Used to track the position of '@' or '.'. for (size_t i = aPos.value(); i > 0; i--) { wchar_t ch = (*str)[i - 1]; - if (ch == L'_' || ch == L'-' || FXSYS_iswASCIIalnum(ch)) + if (ch == L'_' || ch == L'-' || FXSYS_iswalnum(ch)) continue; if (ch != L'.' || i == pPos || i == 1) { @@ -282,7 +282,7 @@ bool CPDF_LinkExtract::CheckMailLink(WideString* str) { pPos = 0; // Used to track the position of '.'. for (size_t i = aPos.value() + 1; i < nLen; i++) { wchar_t wch = (*str)[i]; - if (wch == L'-' || FXSYS_iswASCIIalnum(wch)) + if (wch == L'-' || FXSYS_iswalnum(wch)) continue; if (wch != L'.' || i == pPos + 1) { diff --git a/core/fpdftext/cpdf_textpage.cpp b/core/fpdftext/cpdf_textpage.cpp index 5019eabaf5..7541dae6a7 100644 --- a/core/fpdftext/cpdf_textpage.cpp +++ b/core/fpdftext/cpdf_textpage.cpp @@ -1210,7 +1210,7 @@ bool CPDF_TextPage::IsHyphen(wchar_t curChar) const { if ((iter + 1) != curText.rend()) { iter++; - if (FXSYS_iswASCIIalpha(*iter) && FXSYS_iswASCIIalnum(curChar)) + if (FXSYS_iswASCIIalpha(*iter) && FXSYS_iswalnum(curChar)) return true; } diff --git a/core/fxcrt/DEPS b/core/fxcrt/DEPS new file mode 100644 index 0000000000..2be03524b6 --- /dev/null +++ b/core/fxcrt/DEPS @@ -0,0 +1,3 @@ +include_rules = [ + '+third_party/icu', +] diff --git a/core/fxcrt/css/cfx_cssselector.cpp b/core/fxcrt/css/cfx_cssselector.cpp index 76cb846890..8a22b12287 100644 --- a/core/fxcrt/css/cfx_cssselector.cpp +++ b/core/fxcrt/css/cfx_cssselector.cpp @@ -16,9 +16,9 @@ namespace { int32_t GetCSSNameLen(const wchar_t* psz, const wchar_t* pEnd) { const wchar_t* pStart = psz; while (psz < pEnd) { - wchar_t wch = *psz; - if (!FXSYS_iswASCIIalnum(wch) && wch != '_' && wch != '-') + if (!isascii(*psz) || (!isalnum(*psz) && *psz != '_' && *psz != '-')) { break; + } ++psz; } return psz - pStart; diff --git a/core/fxcrt/fx_extension.h b/core/fxcrt/fx_extension.h index 491d4b29b7..bd0ac8b303 100644 --- a/core/fxcrt/fx_extension.h +++ b/core/fxcrt/fx_extension.h @@ -12,6 +12,7 @@ #include #include "core/fxcrt/fx_string.h" +#include "third_party/icu/source/common/unicode/uchar.h" #define FX_INVALID_OFFSET static_cast(-1) @@ -45,8 +46,8 @@ inline bool FXSYS_iswASCIIalpha(wchar_t wch) { return FXSYS_isASCIIupper(wch) || FXSYS_isASCIIlower(wch); } -inline bool FXSYS_iswASCIIalnum(wchar_t wch) { - return FXSYS_iswASCIIalpha(wch) || std::iswdigit(wch); +inline bool FXSYS_iswalnum(wchar_t c) { + return u_isalnum(c); } inline bool FXSYS_iswASCIIspace(wchar_t c) { diff --git a/fxjs/cjs_publicmethods.cpp b/fxjs/cjs_publicmethods.cpp index 28df0a305b..81a84d286b 100644 --- a/fxjs/cjs_publicmethods.cpp +++ b/fxjs/cjs_publicmethods.cpp @@ -285,7 +285,7 @@ bool CJS_PublicMethods::MaskSatisfied(wchar_t c_Change, wchar_t c_Mask) { case L'A': return FXSYS_iswASCIIalpha(c_Change); case L'O': - return FXSYS_iswASCIIalnum(c_Change); + return isascii(c_Change) && isalnum(c_Change); case L'X': return true; default: diff --git a/fxjs/cjs_util.cpp b/fxjs/cjs_util.cpp index d552fcdf34..d73c238488 100644 --- a/fxjs/cjs_util.cpp +++ b/fxjs/cjs_util.cpp @@ -311,7 +311,7 @@ WideString CJS_Util::printx(const WideString& wsFormat, } break; case 'X': { if (iSourceIdx < wsSource.GetLength()) { - if (FXSYS_iswASCIIalnum(wsSource[iSourceIdx])) { + if (isascii(wsSource[iSourceIdx]) && isalnum(wsSource[iSourceIdx])) { wsResult += TranslateCase(wsSource[iSourceIdx], eCaseMode); ++iFormatIdx; } -- cgit v1.2.3