summaryrefslogtreecommitdiff
path: root/core/src/fpdftext/unicodenormalization.cpp
blob: 7b0183058278accacaf7e64b9739d1598b65a4e9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
// Copyright 2014 PDFium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com

#include "../../include/fpdftext/fpdf_text.h"
extern const FX_WCHAR g_UnicodeData_Normalization[];
extern const FX_WCHAR g_UnicodeData_Normalization_Map1[];
extern const FX_WCHAR g_UnicodeData_Normalization_Map2[];
extern const FX_WCHAR g_UnicodeData_Normalization_Map3[];
extern const FX_WCHAR g_UnicodeData_Normalization_Map4[];
const FX_WCHAR* g_UnicodeData_Normalization_Maps[5] = {
    NULL,
    g_UnicodeData_Normalization_Map1,
    g_UnicodeData_Normalization_Map2,
    g_UnicodeData_Normalization_Map3,
    g_UnicodeData_Normalization_Map4
};
FX_STRSIZE FX_Unicode_GetNormalization(FX_WCHAR wch, FX_WCHAR* pDst)
{
    wch = wch & 0xFFFF;
    FX_WCHAR wFind = g_UnicodeData_Normalization[wch];
    if (!wFind) {
        if (pDst) {
            *pDst = wch;
        }
        return 1;
    }
    if(wFind >= 0x8000) {
        wch = wFind - 0x8000;
        wFind = 1;
    } else {
        wch = wFind & 0x0FFF;
        wFind >>= 12;
    }
    const FX_WCHAR* pMap = g_UnicodeData_Normalization_Maps[wFind];
    if (pMap == g_UnicodeData_Normalization_Map4) {
        pMap = g_UnicodeData_Normalization_Map4 + wch;
        wFind = (FX_WCHAR)(*pMap ++);
    } else {
        pMap += wch;
    }
    if (pDst) {
        FX_WCHAR n = wFind;
        while (n --) {
            *pDst ++ = *pMap ++;
        }
    }
    return (FX_STRSIZE)wFind;
}
FX_STRSIZE FX_WideString_GetNormalization(FX_WSTR wsSrc, FX_WCHAR* pDst)
{
    FX_STRSIZE nCount = 0;
    for (FX_STRSIZE len = 0; len < wsSrc.GetLength(); len ++) {
        FX_WCHAR wch = wsSrc.GetAt(len);
        if(pDst) {
            nCount += FX_Unicode_GetNormalization(wch, pDst + nCount);
        } else {
            nCount += FX_Unicode_GetNormalization(wch, pDst);
        }
    }
    return nCount;
}
FX_STRSIZE FX_WideString_GetNormalization(FX_WSTR wsSrc, CFX_WideString &wsDst)
{
    FX_STRSIZE nLen = FX_WideString_GetNormalization(wsSrc, (FX_WCHAR*)NULL);
    if (!nLen) {
        return 0;
    }
    FX_WCHAR* pBuf = wsDst.GetBuffer(nLen);
    FX_WideString_GetNormalization(wsSrc, pBuf);
    wsDst.ReleaseBuffer(nLen);
    return nLen;
}