summaryrefslogtreecommitdiff
path: root/core/src/fpdftext/unicodenormalization.cpp
blob: b92787b95200e8034a57e37c79057e3889f2182b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
// Copyright 2014 PDFium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
 
// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com

#include "../../include/fpdftext/fpdf_text.h"
extern const FX_WCHAR g_UnicodeData_Normalization[65536];
extern const FX_WCHAR g_UnicodeData_Normalization_Map1[5376];
extern const FX_WCHAR g_UnicodeData_Normalization_Map2[1734];
extern const FX_WCHAR g_UnicodeData_Normalization_Map3[1164];
extern const FX_WCHAR g_UnicodeData_Normalization_Map4[488];
FX_LPCWSTR g_UnicodeData_Normalization_Maps[5] = {
    NULL,
    g_UnicodeData_Normalization_Map1,
    g_UnicodeData_Normalization_Map2,
    g_UnicodeData_Normalization_Map3,
    g_UnicodeData_Normalization_Map4
};
FX_STRSIZE FX_Unicode_GetNormalization(FX_WCHAR wch, FX_LPWSTR pDst)
{
    wch = wch & 0xFFFF;
    FX_WCHAR wFind = g_UnicodeData_Normalization[wch];
    if (!wFind) {
        if (pDst) {
            *pDst = wch;
        }
        return 1;
    }
    if(wFind >= 0x8000) {
        wch = wFind - 0x8000;
        wFind = 1;
    } else {
        wch = wFind & 0x0FFF;
        wFind >>= 12;
    }
    FX_LPCWSTR pMap = g_UnicodeData_Normalization_Maps[wFind];
    if (pMap == g_UnicodeData_Normalization_Map4) {
        pMap = g_UnicodeData_Normalization_Map4 + wch;
        wFind = (FX_WCHAR)(*pMap ++);
    } else {
        pMap += wch;
    }
    if (pDst) {
        FX_WCHAR n = wFind;
        while (n --) {
            *pDst ++ = *pMap ++;
        }
    }
    return (FX_STRSIZE)wFind;
}
FX_STRSIZE FX_WideString_GetNormalization(FX_WSTR wsSrc, FX_LPWSTR pDst)
{
    FX_STRSIZE nCount = 0;
    for (FX_STRSIZE len = 0; len < wsSrc.GetLength(); len ++) {
        FX_WCHAR wch = wsSrc.GetAt(len);
        if(pDst) {
            nCount += FX_Unicode_GetNormalization(wch, pDst + nCount);
        } else {
            nCount += FX_Unicode_GetNormalization(wch, pDst);
        }
    }
    return nCount;
}
FX_STRSIZE FX_WideString_GetNormalization(FX_WSTR wsSrc, CFX_WideString &wsDst)
{
    FX_STRSIZE nLen = FX_WideString_GetNormalization(wsSrc, (FX_LPWSTR)NULL);
    if (!nLen) {
        return 0;
    }
    FX_LPWSTR pBuf = wsDst.GetBuffer(nLen);
    FX_WideString_GetNormalization(wsSrc, pBuf);
    wsDst.ReleaseBuffer(nLen);
    return nLen;
}