diff options
Diffstat (limited to 'pdf')
38 files changed, 25526 insertions, 0 deletions
diff --git a/pdf/COPYING b/pdf/COPYING new file mode 100644 index 00000000..94a9ed02 --- /dev/null +++ b/pdf/COPYING @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/> + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + <program> Copyright (C) <year> <name of author> + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +<http://www.gnu.org/licenses/>. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +<http://www.gnu.org/philosophy/why-not-lgpl.html>. diff --git a/pdf/cmapdump.c b/pdf/cmapdump.c new file mode 100644 index 00000000..5c39b647 --- /dev/null +++ b/pdf/cmapdump.c @@ -0,0 +1,161 @@ +/* cmapdump.c -- parse a CMap file and dump it as a c-struct */ + +#include <stdio.h> +#include <string.h> + +#include "fitz.h" +#include "mupdf.h" + +#include "../fitz/base_error.c" +#include "../fitz/base_memory.c" +#include "../fitz/base_string.c" +#include "../fitz/stm_buffer.c" +#include "../fitz/stm_open.c" +#include "../fitz/stm_read.c" + +#include "../pdf/pdf_lex.c" +#include "../pdf/pdf_cmap.c" +#include "../pdf/pdf_cmap_parse.c" + +static void +clean(char *p) +{ + while (*p) + { + if ((*p == '/') || (*p == '.') || (*p == '\\') || (*p == '-')) + *p = '_'; + p ++; + } +} + +int +main(int argc, char **argv) +{ + pdf_cmap *cmap; + fz_error error; + fz_stream *fi; + FILE *fo; + char name[256]; + char *realname; + int i, k; + + if (argc < 3) + { + fprintf(stderr, "usage: cmapdump output.c lots of cmap files\n"); + return 1; + } + + fo = fopen(argv[1], "wb"); + if (!fo) + { + fprintf(stderr, "cmapdump: could not open output file '%s'\n", argv[1]); + return 1; + } + + fprintf(fo, "#include \"fitz.h\"\n"); + fprintf(fo, "#include \"mupdf.h\"\n"); + fprintf(fo, "\n"); + + for (i = 2; i < argc; i++) + { + realname = strrchr(argv[i], '/'); + if (!realname) + realname = strrchr(argv[i], '\\'); + if (realname) + realname ++; + else + realname = argv[i]; + + if (strlen(realname) > (sizeof name - 1)) + { + fprintf(stderr, "cmapdump: file name too long\n"); + return 1; + } + + strcpy(name, realname); + clean(name); + + fi = fz_openfile(argv[i]); + if (!fi) + fz_throw("cmapdump: could not open input file '%s'\n", argv[i]); + + error = pdf_parsecmap(&cmap, fi); + if (error) + { + fz_catch(error, "cmapdump: could not parse input cmap '%s'\n", argv[i]); + return 1; + } + + fprintf(fo, "/* %s */\n\n", cmap->cmapname); + + fprintf(fo, "static const pdf_range pdf_cmap_%s_ranges[] =\n{\n", name); + if (cmap->rlen == 0) + { + fprintf(fo, "\t/* dummy entry for non-c99 compilers */\n"); + fprintf(fo, "\t{ 0x0, %d, 0 }\n", PDF_CMAP_RANGE); + } + for (k = 0; k < cmap->rlen; k++) + { + fprintf(fo, "\t{ 0x%04x, 0x%04x, %d },\n", + cmap->ranges[k].low, cmap->ranges[k].extentflags, cmap->ranges[k].offset); + } + fprintf(fo, "};\n\n"); + + if (cmap->tlen == 0) + { + fprintf(fo, "static const unsigned short pdf_cmap_%s_table[1] = { 0 };\n\n", name); + } + else + { + fprintf(fo, "static const unsigned short pdf_cmap_%s_table[%d] =\n{", + name, cmap->tlen); + for (k = 0; k < cmap->tlen; k++) + { + if (k % 8 == 0) + fprintf(fo, "\n\t"); + fprintf(fo, "%d,", cmap->table[k]); + } + fprintf(fo, "\n};\n\n"); + } + + fprintf(fo, "pdf_cmap pdf_cmap_%s =\n", name); + fprintf(fo, "{\n"); + fprintf(fo, "\t-1, "); + fprintf(fo, "\"%s\", ", cmap->cmapname); + fprintf(fo, "\"%s\", nil, ", cmap->usecmapname); + fprintf(fo, "%d,\n", cmap->wmode); + + fprintf(fo, "\t%d, /* codespace table */\n", cmap->ncspace); + fprintf(fo, "\t{\n"); + + if (cmap->ncspace == 0) + { + fprintf(fo, "\t/* dummy entry for non-c99 compilers */\n"); + fprintf(fo, "\t{ 0, 0x0, 0x0 },\n"); + } + for (k = 0; k < cmap->ncspace; k++) + { + fprintf(fo, "\t\t{ %d, 0x%04x, 0x%04x },\n", + cmap->cspace[k].n, cmap->cspace[k].low, cmap->cspace[k].high); + } + fprintf(fo, "\t},\n"); + + fprintf(fo, "\t%d, %d, (pdf_range*) pdf_cmap_%s_ranges,\n", + cmap->rlen, cmap->rlen, name); + + fprintf(fo, "\t%d, %d, (unsigned short*) pdf_cmap_%s_table,\n", + cmap->tlen, cmap->tlen, name); + + fprintf(fo, "};\n\n"); + + fz_close(fi); + } + + if (fclose(fo)) + { + fprintf(stderr, "cmapdump: could not close output file '%s'\n", argv[1]); + return 1; + } + + return 0; +} diff --git a/pdf/fontdump.c b/pdf/fontdump.c new file mode 100644 index 00000000..efda0b01 --- /dev/null +++ b/pdf/fontdump.c @@ -0,0 +1,107 @@ +/* fontdump.c -- an "xxd -i" workalike for dumping binary fonts as source code */ + +#include <stdio.h> +#include <string.h> + +static int +hexdump(FILE *fo, FILE *fi) +{ + int c, n; + + n = 0; + c = fgetc(fi); + while (c != -1) + { + fprintf(fo, "0x%02x,", c); + if (n % 16 == 15) + fprintf(fo, "\n"); + c = fgetc(fi); + n ++; + } + + return n; +} + +int +main(int argc, char **argv) +{ + FILE *fo; + FILE *fi; + char name[256]; + char *basename; + char *p; + int i, len; + + if (argc < 3) + { + fprintf(stderr, "usage: fontdump output.c input.dat\n"); + return 1; + } + + fo = fopen(argv[1], "wb"); + if (!fo) + { + fprintf(stderr, "fontdump: could not open output file '%s'\n", argv[1]); + return 1; + } + + fprintf(fo, "#ifndef __STRICT_ANSI__\n"); + fprintf(fo, "#if defined(__linux__) || defined(__FreeBSD__)\n"); + fprintf(fo, "#define HAVE_INCBIN\n"); + fprintf(fo, "#endif\n"); + fprintf(fo, "#endif\n\n"); + + for (i = 2; i < argc; i++) + { + fi = fopen(argv[i], "rb"); + if (!fi) + { + fclose(fo); + fprintf(stderr, "fontdump: could not open input file '%s'\n", argv[i]); + return 1; + } + + basename = strrchr(argv[i], '/'); + if (!basename) + basename = strrchr(argv[i], '\\'); + if (basename) + basename++; + else + basename = argv[i]; + strcpy(name, basename); + p = name; + while (*p) + { + if ((*p == '/') || (*p == '.') || (*p == '\\') || (*p == '-')) + *p = '_'; + p ++; + } + + fseek(fi, 0, SEEK_END); + len = ftell(fi); + fseek(fi, 0, SEEK_SET); + + fprintf(fo, "const unsigned int pdf_font_%s_len = %d;\n", name, len); + + fprintf(fo, "#ifdef HAVE_INCBIN\n"); + fprintf(fo, "asm(\".globl pdf_font_%s_buf\");\n", name); + fprintf(fo, "asm(\".balign 8\");\n"); + fprintf(fo, "asm(\"pdf_font_%s_buf:\");\n", name); + fprintf(fo, "asm(\".incbin \\\"%s\\\"\");\n", argv[i]); + fprintf(fo, "#else\n"); + fprintf(fo, "const unsigned char pdf_font_%s_buf[%d] = {\n", name, len); + hexdump(fo, fi); + fprintf(fo, "};\n"); + fprintf(fo, "#endif\n"); + + fclose(fi); + } + + if (fclose(fo)) + { + fprintf(stderr, "fontdump: could not close output file '%s'\n", argv[1]); + return 1; + } + + return 0; +} diff --git a/pdf/glyphdump.py b/pdf/glyphdump.py new file mode 100755 index 00000000..25b2afcc --- /dev/null +++ b/pdf/glyphdump.py @@ -0,0 +1,126 @@ +#!/usr/bin/python + +import sys + +agl = [] +comments = [] +agltab = [] +aglmap = {} +aglnames = [] + +f = open("glyphlist.txt", "r") +for line in f.readlines(): + if line[0] == '#': + comments.append(line.strip()); + continue + line = line[:-1] + name, list = line.split(';') + list = map(lambda x: int(x, 16), list.split(' ')) + agl.append((name, list)) + +for name, ucslist in agl: + num = len(ucslist) + ucs = ucslist[0] + agltab.append((name, ucs)) + if ucs not in aglmap: + aglmap[ucs] = [] + aglmap[ucs].append(name) + +print "/*" +for line in comments: + print line +print "*/" +print + +agltab.sort() +print "static const struct { char *name; int ucs; }" +print "aglcodes[] = {" +for name, ucs in agltab: + print "{\"%s\", 0x%04X}," % (name, ucs) +print "};" +print + +keys = aglmap.keys() +keys.sort() +print "static const struct { int ucs; int ofs; }" +print "agldupcodes[] = {" +for ucs in keys: + namelist = aglmap[ucs] + ofs = len(aglnames) + if len(namelist) > 1: + print "{0x%04X, %d}," % (ucs, ofs) + for name in namelist: + aglnames.append(name) + aglnames.append(0) +print "};" +print + +print "static char *agldupnames[] = {" +for name in aglnames: + if name: + print ("\"%s\"," % name), + else: + print "0," +print "};" +print + +print """ +#include "fitz.h" +#include "mupdf.h" + +int pdf_lookupagl(char *name) +{ + char buf[64]; + char *p; + int l = 0; + int r = nelem(aglcodes) - 1; + + fz_strlcpy(buf, name, sizeof buf); + + /* kill anything after first period and underscore */ + p = strchr(buf, '.'); + if (p) p[0] = 0; + p = strchr(buf, '_'); + if (p) p[0] = 0; + + while (l <= r) + { + int m = (l + r) >> 1; + int c = strcmp(buf, aglcodes[m].name); + if (c < 0) + r = m - 1; + else if (c > 0) + l = m + 1; + else + return aglcodes[m].ucs; + } + + if (strstr(buf, "uni") == buf) + return strtol(buf + 3, nil, 16); + else if (strstr(buf, "u") == buf) + return strtol(buf + 1, nil, 16); + else if (strstr(buf, "a") == buf && strlen(buf) >= 3) + return strtol(buf + 1, nil, 10); + + return 0; +} + +static char *aglnoname[1] = { 0 }; + +char **pdf_lookupaglnames(int ucs) +{ + int l = 0; + int r = nelem(agldupcodes) - 1; + while (l <= r) + { + int m = (l + r) >> 1; + if (ucs < agldupcodes[m].ucs) + r = m - 1; + else if (ucs > agldupcodes[m].ucs) + l = m + 1; + else + return agldupnames + agldupcodes[m].ofs; + } + return aglnoname; +} +""" diff --git a/pdf/glyphlist.txt b/pdf/glyphlist.txt new file mode 100644 index 00000000..b21e977d --- /dev/null +++ b/pdf/glyphlist.txt @@ -0,0 +1,4291 @@ +# Name: Adobe Glyph List +# Table version: 2.0 +# Date: September 20, 2002 +# +# See http://partners.adobe.com/asn/developer/typeforum/unicodegn.html +# +# Format: Semicolon-delimited fields: +# (1) glyph name +# (2) Unicode scalar value +A;0041 +AE;00C6 +AEacute;01FC +AEmacron;01E2 +AEsmall;F7E6 +Aacute;00C1 +Aacutesmall;F7E1 +Abreve;0102 +Abreveacute;1EAE +Abrevecyrillic;04D0 +Abrevedotbelow;1EB6 +Abrevegrave;1EB0 +Abrevehookabove;1EB2 +Abrevetilde;1EB4 +Acaron;01CD +Acircle;24B6 +Acircumflex;00C2 +Acircumflexacute;1EA4 +Acircumflexdotbelow;1EAC +Acircumflexgrave;1EA6 +Acircumflexhookabove;1EA8 +Acircumflexsmall;F7E2 +Acircumflextilde;1EAA +Acute;F6C9 +Acutesmall;F7B4 +Acyrillic;0410 +Adblgrave;0200 +Adieresis;00C4 +Adieresiscyrillic;04D2 +Adieresismacron;01DE +Adieresissmall;F7E4 +Adotbelow;1EA0 +Adotmacron;01E0 +Agrave;00C0 +Agravesmall;F7E0 +Ahookabove;1EA2 +Aiecyrillic;04D4 +Ainvertedbreve;0202 +Alpha;0391 +Alphatonos;0386 +Amacron;0100 +Amonospace;FF21 +Aogonek;0104 +Aring;00C5 +Aringacute;01FA +Aringbelow;1E00 +Aringsmall;F7E5 +Asmall;F761 +Atilde;00C3 +Atildesmall;F7E3 +Aybarmenian;0531 +B;0042 +Bcircle;24B7 +Bdotaccent;1E02 +Bdotbelow;1E04 +Becyrillic;0411 +Benarmenian;0532 +Beta;0392 +Bhook;0181 +Blinebelow;1E06 +Bmonospace;FF22 +Brevesmall;F6F4 +Bsmall;F762 +Btopbar;0182 +C;0043 +Caarmenian;053E +Cacute;0106 +Caron;F6CA +Caronsmall;F6F5 +Ccaron;010C +Ccedilla;00C7 +Ccedillaacute;1E08 +Ccedillasmall;F7E7 +Ccircle;24B8 +Ccircumflex;0108 +Cdot;010A +Cdotaccent;010A +Cedillasmall;F7B8 +Chaarmenian;0549 +Cheabkhasiancyrillic;04BC +Checyrillic;0427 +Chedescenderabkhasiancyrillic;04BE +Chedescendercyrillic;04B6 +Chedieresiscyrillic;04F4 +Cheharmenian;0543 +Chekhakassiancyrillic;04CB +Cheverticalstrokecyrillic;04B8 +Chi;03A7 +Chook;0187 +Circumflexsmall;F6F6 +Cmonospace;FF23 +Coarmenian;0551 +Csmall;F763 +D;0044 +DZ;01F1 +DZcaron;01C4 +Daarmenian;0534 +Dafrican;0189 +Dcaron;010E +Dcedilla;1E10 +Dcircle;24B9 +Dcircumflexbelow;1E12 +Dcroat;0110 +Ddotaccent;1E0A +Ddotbelow;1E0C +Decyrillic;0414 +Deicoptic;03EE +Delta;2206 +Deltagreek;0394 +Dhook;018A +Dieresis;F6CB +DieresisAcute;F6CC +DieresisGrave;F6CD +Dieresissmall;F7A8 +Digammagreek;03DC +Djecyrillic;0402 +Dlinebelow;1E0E +Dmonospace;FF24 +Dotaccentsmall;F6F7 +Dslash;0110 +Dsmall;F764 +Dtopbar;018B +Dz;01F2 +Dzcaron;01C5 +Dzeabkhasiancyrillic;04E0 +Dzecyrillic;0405 +Dzhecyrillic;040F +E;0045 +Eacute;00C9 +Eacutesmall;F7E9 +Ebreve;0114 +Ecaron;011A +Ecedillabreve;1E1C +Echarmenian;0535 +Ecircle;24BA +Ecircumflex;00CA +Ecircumflexacute;1EBE +Ecircumflexbelow;1E18 +Ecircumflexdotbelow;1EC6 +Ecircumflexgrave;1EC0 +Ecircumflexhookabove;1EC2 +Ecircumflexsmall;F7EA +Ecircumflextilde;1EC4 +Ecyrillic;0404 +Edblgrave;0204 +Edieresis;00CB +Edieresissmall;F7EB +Edot;0116 +Edotaccent;0116 +Edotbelow;1EB8 +Efcyrillic;0424 +Egrave;00C8 +Egravesmall;F7E8 +Eharmenian;0537 +Ehookabove;1EBA +Eightroman;2167 +Einvertedbreve;0206 +Eiotifiedcyrillic;0464 +Elcyrillic;041B +Elevenroman;216A +Emacron;0112 +Emacronacute;1E16 +Emacrongrave;1E14 +Emcyrillic;041C +Emonospace;FF25 +Encyrillic;041D +Endescendercyrillic;04A2 +Eng;014A +Enghecyrillic;04A4 +Enhookcyrillic;04C7 +Eogonek;0118 +Eopen;0190 +Epsilon;0395 +Epsilontonos;0388 +Ercyrillic;0420 +Ereversed;018E +Ereversedcyrillic;042D +Escyrillic;0421 +Esdescendercyrillic;04AA +Esh;01A9 +Esmall;F765 +Eta;0397 +Etarmenian;0538 +Etatonos;0389 +Eth;00D0 +Ethsmall;F7F0 +Etilde;1EBC +Etildebelow;1E1A +Euro;20AC +Ezh;01B7 +Ezhcaron;01EE +Ezhreversed;01B8 +F;0046 +Fcircle;24BB +Fdotaccent;1E1E +Feharmenian;0556 +Feicoptic;03E4 +Fhook;0191 +Fitacyrillic;0472 +Fiveroman;2164 +Fmonospace;FF26 +Fourroman;2163 +Fsmall;F766 +G;0047 +GBsquare;3387 +Gacute;01F4 +Gamma;0393 +Gammaafrican;0194 +Gangiacoptic;03EA +Gbreve;011E +Gcaron;01E6 +Gcedilla;0122 +Gcircle;24BC +Gcircumflex;011C +Gcommaaccent;0122 +Gdot;0120 +Gdotaccent;0120 +Gecyrillic;0413 +Ghadarmenian;0542 +Ghemiddlehookcyrillic;0494 +Ghestrokecyrillic;0492 +Gheupturncyrillic;0490 +Ghook;0193 +Gimarmenian;0533 +Gjecyrillic;0403 +Gmacron;1E20 +Gmonospace;FF27 +Grave;F6CE +Gravesmall;F760 +Gsmall;F767 +Gsmallhook;029B +Gstroke;01E4 +H;0048 +H18533;25CF +H18543;25AA +H18551;25AB +H22073;25A1 +HPsquare;33CB +Haabkhasiancyrillic;04A8 +Hadescendercyrillic;04B2 +Hardsigncyrillic;042A +Hbar;0126 +Hbrevebelow;1E2A +Hcedilla;1E28 +Hcircle;24BD +Hcircumflex;0124 +Hdieresis;1E26 +Hdotaccent;1E22 +Hdotbelow;1E24 +Hmonospace;FF28 +Hoarmenian;0540 +Horicoptic;03E8 +Hsmall;F768 +Hungarumlaut;F6CF +Hungarumlautsmall;F6F8 +Hzsquare;3390 +I;0049 +IAcyrillic;042F +IJ;0132 +IUcyrillic;042E +Iacute;00CD +Iacutesmall;F7ED +Ibreve;012C +Icaron;01CF +Icircle;24BE +Icircumflex;00CE +Icircumflexsmall;F7EE +Icyrillic;0406 +Idblgrave;0208 +Idieresis;00CF +Idieresisacute;1E2E +Idieresiscyrillic;04E4 +Idieresissmall;F7EF +Idot;0130 +Idotaccent;0130 +Idotbelow;1ECA +Iebrevecyrillic;04D6 +Iecyrillic;0415 +Ifraktur;2111 +Igrave;00CC +Igravesmall;F7EC +Ihookabove;1EC8 +Iicyrillic;0418 +Iinvertedbreve;020A +Iishortcyrillic;0419 +Imacron;012A +Imacroncyrillic;04E2 +Imonospace;FF29 +Iniarmenian;053B +Iocyrillic;0401 +Iogonek;012E +Iota;0399 +Iotaafrican;0196 +Iotadieresis;03AA +Iotatonos;038A +Ismall;F769 +Istroke;0197 +Itilde;0128 +Itildebelow;1E2C +Izhitsacyrillic;0474 +Izhitsadblgravecyrillic;0476 +J;004A +Jaarmenian;0541 +Jcircle;24BF +Jcircumflex;0134 +Jecyrillic;0408 +Jheharmenian;054B +Jmonospace;FF2A +Jsmall;F76A +K;004B +KBsquare;3385 +KKsquare;33CD +Kabashkircyrillic;04A0 +Kacute;1E30 +Kacyrillic;041A +Kadescendercyrillic;049A +Kahookcyrillic;04C3 +Kappa;039A +Kastrokecyrillic;049E +Kaverticalstrokecyrillic;049C +Kcaron;01E8 +Kcedilla;0136 +Kcircle;24C0 +Kcommaaccent;0136 +Kdotbelow;1E32 +Keharmenian;0554 +Kenarmenian;053F +Khacyrillic;0425 +Kheicoptic;03E6 +Khook;0198 +Kjecyrillic;040C +Klinebelow;1E34 +Kmonospace;FF2B +Koppacyrillic;0480 +Koppagreek;03DE +Ksicyrillic;046E +Ksmall;F76B +L;004C +LJ;01C7 +LL;F6BF +Lacute;0139 +Lambda;039B +Lcaron;013D +Lcedilla;013B +Lcircle;24C1 +Lcircumflexbelow;1E3C +Lcommaaccent;013B +Ldot;013F +Ldotaccent;013F +Ldotbelow;1E36 +Ldotbelowmacron;1E38 +Liwnarmenian;053C +Lj;01C8 +Ljecyrillic;0409 +Llinebelow;1E3A +Lmonospace;FF2C +Lslash;0141 +Lslashsmall;F6F9 +Lsmall;F76C +M;004D +MBsquare;3386 +Macron;F6D0 +Macronsmall;F7AF +Macute;1E3E +Mcircle;24C2 +Mdotaccent;1E40 +Mdotbelow;1E42 +Menarmenian;0544 +Mmonospace;FF2D +Msmall;F76D +Mturned;019C +Mu;039C +N;004E +NJ;01CA +Nacute;0143 +Ncaron;0147 +Ncedilla;0145 +Ncircle;24C3 +Ncircumflexbelow;1E4A +Ncommaaccent;0145 +Ndotaccent;1E44 +Ndotbelow;1E46 +Nhookleft;019D +Nineroman;2168 +Nj;01CB +Njecyrillic;040A +Nlinebelow;1E48 +Nmonospace;FF2E +Nowarmenian;0546 +Nsmall;F76E +Ntilde;00D1 +Ntildesmall;F7F1 +Nu;039D +O;004F +OE;0152 +OEsmall;F6FA +Oacute;00D3 +Oacutesmall;F7F3 +Obarredcyrillic;04E8 +Obarreddieresiscyrillic;04EA +Obreve;014E +Ocaron;01D1 +Ocenteredtilde;019F +Ocircle;24C4 +Ocircumflex;00D4 +Ocircumflexacute;1ED0 +Ocircumflexdotbelow;1ED8 +Ocircumflexgrave;1ED2 +Ocircumflexhookabove;1ED4 +Ocircumflexsmall;F7F4 +Ocircumflextilde;1ED6 +Ocyrillic;041E +Odblacute;0150 +Odblgrave;020C +Odieresis;00D6 +Odieresiscyrillic;04E6 +Odieresissmall;F7F6 +Odotbelow;1ECC +Ogoneksmall;F6FB +Ograve;00D2 +Ogravesmall;F7F2 +Oharmenian;0555 +Ohm;2126 +Ohookabove;1ECE +Ohorn;01A0 +Ohornacute;1EDA +Ohorndotbelow;1EE2 +Ohorngrave;1EDC +Ohornhookabove;1EDE +Ohorntilde;1EE0 +Ohungarumlaut;0150 +Oi;01A2 +Oinvertedbreve;020E +Omacron;014C +Omacronacute;1E52 +Omacrongrave;1E50 +Omega;2126 +Omegacyrillic;0460 +Omegagreek;03A9 +Omegaroundcyrillic;047A +Omegatitlocyrillic;047C +Omegatonos;038F +Omicron;039F +Omicrontonos;038C +Omonospace;FF2F +Oneroman;2160 +Oogonek;01EA +Oogonekmacron;01EC +Oopen;0186 +Oslash;00D8 +Oslashacute;01FE +Oslashsmall;F7F8 +Osmall;F76F +Ostrokeacute;01FE +Otcyrillic;047E +Otilde;00D5 +Otildeacute;1E4C +Otildedieresis;1E4E +Otildesmall;F7F5 +P;0050 +Pacute;1E54 +Pcircle;24C5 +Pdotaccent;1E56 +Pecyrillic;041F +Peharmenian;054A +Pemiddlehookcyrillic;04A6 +Phi;03A6 +Phook;01A4 +Pi;03A0 +Piwrarmenian;0553 +Pmonospace;FF30 +Psi;03A8 +Psicyrillic;0470 +Psmall;F770 +Q;0051 +Qcircle;24C6 +Qmonospace;FF31 +Qsmall;F771 +R;0052 +Raarmenian;054C +Racute;0154 +Rcaron;0158 +Rcedilla;0156 +Rcircle;24C7 +Rcommaaccent;0156 +Rdblgrave;0210 +Rdotaccent;1E58 +Rdotbelow;1E5A +Rdotbelowmacron;1E5C +Reharmenian;0550 +Rfraktur;211C +Rho;03A1 +Ringsmall;F6FC +Rinvertedbreve;0212 +Rlinebelow;1E5E +Rmonospace;FF32 +Rsmall;F772 +Rsmallinverted;0281 +Rsmallinvertedsuperior;02B6 +S;0053 +SF010000;250C +SF020000;2514 +SF030000;2510 +SF040000;2518 +SF050000;253C +SF060000;252C +SF070000;2534 +SF080000;251C +SF090000;2524 +SF100000;2500 +SF110000;2502 +SF190000;2561 +SF200000;2562 +SF210000;2556 +SF220000;2555 +SF230000;2563 +SF240000;2551 +SF250000;2557 +SF260000;255D +SF270000;255C +SF280000;255B +SF360000;255E +SF370000;255F +SF380000;255A +SF390000;2554 +SF400000;2569 +SF410000;2566 +SF420000;2560 +SF430000;2550 +SF440000;256C +SF450000;2567 +SF460000;2568 +SF470000;2564 +SF480000;2565 +SF490000;2559 +SF500000;2558 +SF510000;2552 +SF520000;2553 +SF530000;256B +SF540000;256A +Sacute;015A +Sacutedotaccent;1E64 +Sampigreek;03E0 +Scaron;0160 +Scarondotaccent;1E66 +Scaronsmall;F6FD +Scedilla;015E +Schwa;018F +Schwacyrillic;04D8 +Schwadieresiscyrillic;04DA +Scircle;24C8 +Scircumflex;015C +Scommaaccent;0218 +Sdotaccent;1E60 +Sdotbelow;1E62 +Sdotbelowdotaccent;1E68 +Seharmenian;054D +Sevenroman;2166 +Shaarmenian;0547 +Shacyrillic;0428 +Shchacyrillic;0429 +Sheicoptic;03E2 +Shhacyrillic;04BA +Shimacoptic;03EC +Sigma;03A3 +Sixroman;2165 +Smonospace;FF33 +Softsigncyrillic;042C +Ssmall;F773 +Stigmagreek;03DA +T;0054 +Tau;03A4 +Tbar;0166 +Tcaron;0164 +Tcedilla;0162 +Tcircle;24C9 +Tcircumflexbelow;1E70 +Tcommaaccent;0162 +Tdotaccent;1E6A +Tdotbelow;1E6C +Tecyrillic;0422 +Tedescendercyrillic;04AC +Tenroman;2169 +Tetsecyrillic;04B4 +Theta;0398 +Thook;01AC +Thorn;00DE +Thornsmall;F7FE +Threeroman;2162 +Tildesmall;F6FE +Tiwnarmenian;054F +Tlinebelow;1E6E +Tmonospace;FF34 +Toarmenian;0539 +Tonefive;01BC +Tonesix;0184 +Tonetwo;01A7 +Tretroflexhook;01AE +Tsecyrillic;0426 +Tshecyrillic;040B +Tsmall;F774 +Twelveroman;216B +Tworoman;2161 +U;0055 +Uacute;00DA +Uacutesmall;F7FA +Ubreve;016C +Ucaron;01D3 +Ucircle;24CA +Ucircumflex;00DB +Ucircumflexbelow;1E76 +Ucircumflexsmall;F7FB +Ucyrillic;0423 +Udblacute;0170 +Udblgrave;0214 +Udieresis;00DC +Udieresisacute;01D7 +Udieresisbelow;1E72 +Udieresiscaron;01D9 +Udieresiscyrillic;04F0 +Udieresisgrave;01DB +Udieresismacron;01D5 +Udieresissmall;F7FC +Udotbelow;1EE4 +Ugrave;00D9 +Ugravesmall;F7F9 +Uhookabove;1EE6 +Uhorn;01AF +Uhornacute;1EE8 +Uhorndotbelow;1EF0 +Uhorngrave;1EEA +Uhornhookabove;1EEC +Uhorntilde;1EEE +Uhungarumlaut;0170 +Uhungarumlautcyrillic;04F2 +Uinvertedbreve;0216 +Ukcyrillic;0478 +Umacron;016A +Umacroncyrillic;04EE +Umacrondieresis;1E7A +Umonospace;FF35 +Uogonek;0172 +Upsilon;03A5 +Upsilon1;03D2 +Upsilonacutehooksymbolgreek;03D3 +Upsilonafrican;01B1 +Upsilondieresis;03AB +Upsilondieresishooksymbolgreek;03D4 +Upsilonhooksymbol;03D2 +Upsilontonos;038E +Uring;016E +Ushortcyrillic;040E +Usmall;F775 +Ustraightcyrillic;04AE +Ustraightstrokecyrillic;04B0 +Utilde;0168 +Utildeacute;1E78 +Utildebelow;1E74 +V;0056 +Vcircle;24CB +Vdotbelow;1E7E +Vecyrillic;0412 +Vewarmenian;054E +Vhook;01B2 +Vmonospace;FF36 +Voarmenian;0548 +Vsmall;F776 +Vtilde;1E7C +W;0057 +Wacute;1E82 +Wcircle;24CC +Wcircumflex;0174 +Wdieresis;1E84 +Wdotaccent;1E86 +Wdotbelow;1E88 +Wgrave;1E80 +Wmonospace;FF37 +Wsmall;F777 +X;0058 +Xcircle;24CD +Xdieresis;1E8C +Xdotaccent;1E8A +Xeharmenian;053D +Xi;039E +Xmonospace;FF38 +Xsmall;F778 +Y;0059 +Yacute;00DD +Yacutesmall;F7FD +Yatcyrillic;0462 +Ycircle;24CE +Ycircumflex;0176 +Ydieresis;0178 +Ydieresissmall;F7FF +Ydotaccent;1E8E +Ydotbelow;1EF4 +Yericyrillic;042B +Yerudieresiscyrillic;04F8 +Ygrave;1EF2 +Yhook;01B3 +Yhookabove;1EF6 +Yiarmenian;0545 +Yicyrillic;0407 +Yiwnarmenian;0552 +Ymonospace;FF39 +Ysmall;F779 +Ytilde;1EF8 +Yusbigcyrillic;046A +Yusbigiotifiedcyrillic;046C +Yuslittlecyrillic;0466 +Yuslittleiotifiedcyrillic;0468 +Z;005A +Zaarmenian;0536 +Zacute;0179 +Zcaron;017D +Zcaronsmall;F6FF +Zcircle;24CF +Zcircumflex;1E90 +Zdot;017B +Zdotaccent;017B +Zdotbelow;1E92 +Zecyrillic;0417 +Zedescendercyrillic;0498 +Zedieresiscyrillic;04DE +Zeta;0396 +Zhearmenian;053A +Zhebrevecyrillic;04C1 +Zhecyrillic;0416 +Zhedescendercyrillic;0496 +Zhedieresiscyrillic;04DC +Zlinebelow;1E94 +Zmonospace;FF3A +Zsmall;F77A +Zstroke;01B5 +a;0061 +aabengali;0986 +aacute;00E1 +aadeva;0906 +aagujarati;0A86 +aagurmukhi;0A06 +aamatragurmukhi;0A3E +aarusquare;3303 +aavowelsignbengali;09BE +aavowelsigndeva;093E +aavowelsigngujarati;0ABE +abbreviationmarkarmenian;055F +abbreviationsigndeva;0970 +abengali;0985 +abopomofo;311A +abreve;0103 +abreveacute;1EAF +abrevecyrillic;04D1 +abrevedotbelow;1EB7 +abrevegrave;1EB1 +abrevehookabove;1EB3 +abrevetilde;1EB5 +acaron;01CE +acircle;24D0 +acircumflex;00E2 +acircumflexacute;1EA5 +acircumflexdotbelow;1EAD +acircumflexgrave;1EA7 +acircumflexhookabove;1EA9 +acircumflextilde;1EAB +acute;00B4 +acutebelowcmb;0317 +acutecmb;0301 +acutecomb;0301 +acutedeva;0954 +acutelowmod;02CF +acutetonecmb;0341 +acyrillic;0430 +adblgrave;0201 +addakgurmukhi;0A71 +adeva;0905 +adieresis;00E4 +adieresiscyrillic;04D3 +adieresismacron;01DF +adotbelow;1EA1 +adotmacron;01E1 +ae;00E6 +aeacute;01FD +aekorean;3150 +aemacron;01E3 +afii00208;2015 +afii08941;20A4 +afii10017;0410 +afii10018;0411 +afii10019;0412 +afii10020;0413 +afii10021;0414 +afii10022;0415 +afii10023;0401 +afii10024;0416 +afii10025;0417 +afii10026;0418 +afii10027;0419 +afii10028;041A +afii10029;041B +afii10030;041C +afii10031;041D +afii10032;041E +afii10033;041F +afii10034;0420 +afii10035;0421 +afii10036;0422 +afii10037;0423 +afii10038;0424 +afii10039;0425 +afii10040;0426 +afii10041;0427 +afii10042;0428 +afii10043;0429 +afii10044;042A +afii10045;042B +afii10046;042C +afii10047;042D +afii10048;042E +afii10049;042F +afii10050;0490 +afii10051;0402 +afii10052;0403 +afii10053;0404 +afii10054;0405 +afii10055;0406 +afii10056;0407 +afii10057;0408 +afii10058;0409 +afii10059;040A +afii10060;040B +afii10061;040C +afii10062;040E +afii10063;F6C4 +afii10064;F6C5 +afii10065;0430 +afii10066;0431 +afii10067;0432 +afii10068;0433 +afii10069;0434 +afii10070;0435 +afii10071;0451 +afii10072;0436 +afii10073;0437 +afii10074;0438 +afii10075;0439 +afii10076;043A +afii10077;043B +afii10078;043C +afii10079;043D +afii10080;043E +afii10081;043F +afii10082;0440 +afii10083;0441 +afii10084;0442 +afii10085;0443 +afii10086;0444 +afii10087;0445 +afii10088;0446 +afii10089;0447 +afii10090;0448 +afii10091;0449 +afii10092;044A +afii10093;044B +afii10094;044C +afii10095;044D +afii10096;044E +afii10097;044F +afii10098;0491 +afii10099;0452 +afii10100;0453 +afii10101;0454 +afii10102;0455 +afii10103;0456 +afii10104;0457 +afii10105;0458 +afii10106;0459 +afii10107;045A +afii10108;045B +afii10109;045C +afii10110;045E +afii10145;040F +afii10146;0462 +afii10147;0472 +afii10148;0474 +afii10192;F6C6 +afii10193;045F +afii10194;0463 +afii10195;0473 +afii10196;0475 +afii10831;F6C7 +afii10832;F6C8 +afii10846;04D9 +afii299;200E +afii300;200F +afii301;200D +afii57381;066A +afii57388;060C +afii57392;0660 +afii57393;0661 +afii57394;0662 +afii57395;0663 +afii57396;0664 +afii57397;0665 +afii57398;0666 +afii57399;0667 +afii57400;0668 +afii57401;0669 +afii57403;061B +afii57407;061F +afii57409;0621 +afii57410;0622 +afii57411;0623 +afii57412;0624 +afii57413;0625 +afii57414;0626 +afii57415;0627 +afii57416;0628 +afii57417;0629 +afii57418;062A +afii57419;062B +afii57420;062C +afii57421;062D +afii57422;062E +afii57423;062F +afii57424;0630 +afii57425;0631 +afii57426;0632 +afii57427;0633 +afii57428;0634 +afii57429;0635 +afii57430;0636 +afii57431;0637 +afii57432;0638 +afii57433;0639 +afii57434;063A +afii57440;0640 +afii57441;0641 +afii57442;0642 +afii57443;0643 +afii57444;0644 +afii57445;0645 +afii57446;0646 +afii57448;0648 +afii57449;0649 +afii57450;064A +afii57451;064B +afii57452;064C +afii57453;064D +afii57454;064E +afii57455;064F +afii57456;0650 +afii57457;0651 +afii57458;0652 +afii57470;0647 +afii57505;06A4 +afii57506;067E +afii57507;0686 +afii57508;0698 +afii57509;06AF +afii57511;0679 +afii57512;0688 +afii57513;0691 +afii57514;06BA +afii57519;06D2 +afii57534;06D5 +afii57636;20AA +afii57645;05BE +afii57658;05C3 +afii57664;05D0 +afii57665;05D1 +afii57666;05D2 +afii57667;05D3 +afii57668;05D4 +afii57669;05D5 +afii57670;05D6 +afii57671;05D7 +afii57672;05D8 +afii57673;05D9 +afii57674;05DA +afii57675;05DB +afii57676;05DC +afii57677;05DD +afii57678;05DE +afii57679;05DF +afii57680;05E0 +afii57681;05E1 +afii57682;05E2 +afii57683;05E3 +afii57684;05E4 +afii57685;05E5 +afii57686;05E6 +afii57687;05E7 +afii57688;05E8 +afii57689;05E9 +afii57690;05EA +afii57694;FB2A +afii57695;FB2B +afii57700;FB4B +afii57705;FB1F +afii57716;05F0 +afii57717;05F1 +afii57718;05F2 +afii57723;FB35 +afii57793;05B4 +afii57794;05B5 +afii57795;05B6 +afii57796;05BB +afii57797;05B8 +afii57798;05B7 +afii57799;05B0 +afii57800;05B2 +afii57801;05B1 +afii57802;05B3 +afii57803;05C2 +afii57804;05C1 +afii57806;05B9 +afii57807;05BC +afii57839;05BD +afii57841;05BF +afii57842;05C0 +afii57929;02BC +afii61248;2105 +afii61289;2113 +afii61352;2116 +afii61573;202C +afii61574;202D +afii61575;202E +afii61664;200C +afii63167;066D +afii64937;02BD +agrave;00E0 +agujarati;0A85 +agurmukhi;0A05 +ahiragana;3042 +ahookabove;1EA3 +aibengali;0990 +aibopomofo;311E +aideva;0910 +aiecyrillic;04D5 +aigujarati;0A90 +aigurmukhi;0A10 +aimatragurmukhi;0A48 +ainarabic;0639 +ainfinalarabic;FECA +aininitialarabic;FECB +ainmedialarabic;FECC +ainvertedbreve;0203 +aivowelsignbengali;09C8 +aivowelsigndeva;0948 +aivowelsigngujarati;0AC8 +akatakana;30A2 +akatakanahalfwidth;FF71 +akorean;314F +alef;05D0 +alefarabic;0627 +alefdageshhebrew;FB30 +aleffinalarabic;FE8E +alefhamzaabovearabic;0623 +alefhamzaabovefinalarabic;FE84 +alefhamzabelowarabic;0625 +alefhamzabelowfinalarabic;FE88 +alefhebrew;05D0 +aleflamedhebrew;FB4F +alefmaddaabovearabic;0622 +alefmaddaabovefinalarabic;FE82 +alefmaksuraarabic;0649 +alefmaksurafinalarabic;FEF0 +alefmaksurainitialarabic;FEF3 +alefmaksuramedialarabic;FEF4 +alefpatahhebrew;FB2E +alefqamatshebrew;FB2F +aleph;2135 +allequal;224C +alpha;03B1 +alphatonos;03AC +amacron;0101 +amonospace;FF41 +ampersand;0026 +ampersandmonospace;FF06 +ampersandsmall;F726 +amsquare;33C2 +anbopomofo;3122 +angbopomofo;3124 +angkhankhuthai;0E5A +angle;2220 +anglebracketleft;3008 +anglebracketleftvertical;FE3F +anglebracketright;3009 +anglebracketrightvertical;FE40 +angleleft;2329 +angleright;232A +angstrom;212B +anoteleia;0387 +anudattadeva;0952 +anusvarabengali;0982 +anusvaradeva;0902 +anusvaragujarati;0A82 +aogonek;0105 +apaatosquare;3300 +aparen;249C +apostrophearmenian;055A +apostrophemod;02BC +apple;F8FF +approaches;2250 +approxequal;2248 +approxequalorimage;2252 +approximatelyequal;2245 +araeaekorean;318E +araeakorean;318D +arc;2312 +arighthalfring;1E9A +aring;00E5 +aringacute;01FB +aringbelow;1E01 +arrowboth;2194 +arrowdashdown;21E3 +arrowdashleft;21E0 +arrowdashright;21E2 +arrowdashup;21E1 +arrowdblboth;21D4 +arrowdbldown;21D3 +arrowdblleft;21D0 +arrowdblright;21D2 +arrowdblup;21D1 +arrowdown;2193 +arrowdownleft;2199 +arrowdownright;2198 +arrowdownwhite;21E9 +arrowheaddownmod;02C5 +arrowheadleftmod;02C2 +arrowheadrightmod;02C3 +arrowheadupmod;02C4 +arrowhorizex;F8E7 +arrowleft;2190 +arrowleftdbl;21D0 +arrowleftdblstroke;21CD +arrowleftoverright;21C6 +arrowleftwhite;21E6 +arrowright;2192 +arrowrightdblstroke;21CF +arrowrightheavy;279E +arrowrightoverleft;21C4 +arrowrightwhite;21E8 +arrowtableft;21E4 +arrowtabright;21E5 +arrowup;2191 +arrowupdn;2195 +arrowupdnbse;21A8 +arrowupdownbase;21A8 +arrowupleft;2196 +arrowupleftofdown;21C5 +arrowupright;2197 +arrowupwhite;21E7 +arrowvertex;F8E6 +asciicircum;005E +asciicircummonospace;FF3E +asciitilde;007E +asciitildemonospace;FF5E +ascript;0251 +ascriptturned;0252 +asmallhiragana;3041 +asmallkatakana;30A1 +asmallkatakanahalfwidth;FF67 +asterisk;002A +asteriskaltonearabic;066D +asteriskarabic;066D +asteriskmath;2217 +asteriskmonospace;FF0A +asterisksmall;FE61 +asterism;2042 +asuperior;F6E9 +asymptoticallyequal;2243 +at;0040 +atilde;00E3 +atmonospace;FF20 +atsmall;FE6B +aturned;0250 +aubengali;0994 +aubopomofo;3120 +audeva;0914 +augujarati;0A94 +augurmukhi;0A14 +aulengthmarkbengali;09D7 +aumatragurmukhi;0A4C +auvowelsignbengali;09CC +auvowelsigndeva;094C +auvowelsigngujarati;0ACC +avagrahadeva;093D +aybarmenian;0561 +ayin;05E2 +ayinaltonehebrew;FB20 +ayinhebrew;05E2 +b;0062 +babengali;09AC +backslash;005C +backslashmonospace;FF3C +badeva;092C +bagujarati;0AAC +bagurmukhi;0A2C +bahiragana;3070 +bahtthai;0E3F +bakatakana;30D0 +bar;007C +barmonospace;FF5C +bbopomofo;3105 +bcircle;24D1 +bdotaccent;1E03 +bdotbelow;1E05 +beamedsixteenthnotes;266C +because;2235 +becyrillic;0431 +beharabic;0628 +behfinalarabic;FE90 +behinitialarabic;FE91 +behiragana;3079 +behmedialarabic;FE92 +behmeeminitialarabic;FC9F +behmeemisolatedarabic;FC08 +behnoonfinalarabic;FC6D +bekatakana;30D9 +benarmenian;0562 +bet;05D1 +beta;03B2 +betasymbolgreek;03D0 +betdagesh;FB31 +betdageshhebrew;FB31 +bethebrew;05D1 +betrafehebrew;FB4C +bhabengali;09AD +bhadeva;092D +bhagujarati;0AAD +bhagurmukhi;0A2D +bhook;0253 +bihiragana;3073 +bikatakana;30D3 +bilabialclick;0298 +bindigurmukhi;0A02 +birusquare;3331 +blackcircle;25CF +blackdiamond;25C6 +blackdownpointingtriangle;25BC +blackleftpointingpointer;25C4 +blackleftpointingtriangle;25C0 +blacklenticularbracketleft;3010 +blacklenticularbracketleftvertical;FE3B +blacklenticularbracketright;3011 +blacklenticularbracketrightvertical;FE3C +blacklowerlefttriangle;25E3 +blacklowerrighttriangle;25E2 +blackrectangle;25AC +blackrightpointingpointer;25BA +blackrightpointingtriangle;25B6 +blacksmallsquare;25AA +blacksmilingface;263B +blacksquare;25A0 +blackstar;2605 +blackupperlefttriangle;25E4 +blackupperrighttriangle;25E5 +blackuppointingsmalltriangle;25B4 +blackuppointingtriangle;25B2 +blank;2423 +blinebelow;1E07 +block;2588 +bmonospace;FF42 +bobaimaithai;0E1A +bohiragana;307C +bokatakana;30DC +bparen;249D +bqsquare;33C3 +braceex;F8F4 +braceleft;007B +braceleftbt;F8F3 +braceleftmid;F8F2 +braceleftmonospace;FF5B +braceleftsmall;FE5B +bracelefttp;F8F1 +braceleftvertical;FE37 +braceright;007D +bracerightbt;F8FE +bracerightmid;F8FD +bracerightmonospace;FF5D +bracerightsmall;FE5C +bracerighttp;F8FC +bracerightvertical;FE38 +bracketleft;005B +bracketleftbt;F8F0 +bracketleftex;F8EF +bracketleftmonospace;FF3B +bracketlefttp;F8EE +bracketright;005D +bracketrightbt;F8FB +bracketrightex;F8FA +bracketrightmonospace;FF3D +bracketrighttp;F8F9 +breve;02D8 +brevebelowcmb;032E +brevecmb;0306 +breveinvertedbelowcmb;032F +breveinvertedcmb;0311 +breveinverteddoublecmb;0361 +bridgebelowcmb;032A +bridgeinvertedbelowcmb;033A +brokenbar;00A6 +bstroke;0180 +bsuperior;F6EA +btopbar;0183 +buhiragana;3076 +bukatakana;30D6 +bullet;2022 +bulletinverse;25D8 +bulletoperator;2219 +bullseye;25CE +c;0063 +caarmenian;056E +cabengali;099A +cacute;0107 +cadeva;091A +cagujarati;0A9A +cagurmukhi;0A1A +calsquare;3388 +candrabindubengali;0981 +candrabinducmb;0310 +candrabindudeva;0901 +candrabindugujarati;0A81 +capslock;21EA +careof;2105 +caron;02C7 +caronbelowcmb;032C +caroncmb;030C +carriagereturn;21B5 +cbopomofo;3118 +ccaron;010D +ccedilla;00E7 +ccedillaacute;1E09 +ccircle;24D2 +ccircumflex;0109 +ccurl;0255 +cdot;010B +cdotaccent;010B +cdsquare;33C5 +cedilla;00B8 +cedillacmb;0327 +cent;00A2 +centigrade;2103 +centinferior;F6DF +centmonospace;FFE0 +centoldstyle;F7A2 +centsuperior;F6E0 +chaarmenian;0579 +chabengali;099B +chadeva;091B +chagujarati;0A9B +chagurmukhi;0A1B +chbopomofo;3114 +cheabkhasiancyrillic;04BD +checkmark;2713 +checyrillic;0447 +chedescenderabkhasiancyrillic;04BF +chedescendercyrillic;04B7 +chedieresiscyrillic;04F5 +cheharmenian;0573 +chekhakassiancyrillic;04CC +cheverticalstrokecyrillic;04B9 +chi;03C7 +chieuchacirclekorean;3277 +chieuchaparenkorean;3217 +chieuchcirclekorean;3269 +chieuchkorean;314A +chieuchparenkorean;3209 +chochangthai;0E0A +chochanthai;0E08 +chochingthai;0E09 +chochoethai;0E0C +chook;0188 +cieucacirclekorean;3276 +cieucaparenkorean;3216 +cieuccirclekorean;3268 +cieuckorean;3148 +cieucparenkorean;3208 +cieucuparenkorean;321C +circle;25CB +circlemultiply;2297 +circleot;2299 +circleplus;2295 +circlepostalmark;3036 +circlewithlefthalfblack;25D0 +circlewithrighthalfblack;25D1 +circumflex;02C6 +circumflexbelowcmb;032D +circumflexcmb;0302 +clear;2327 +clickalveolar;01C2 +clickdental;01C0 +clicklateral;01C1 +clickretroflex;01C3 +club;2663 +clubsuitblack;2663 +clubsuitwhite;2667 +cmcubedsquare;33A4 +cmonospace;FF43 +cmsquaredsquare;33A0 +coarmenian;0581 +colon;003A +colonmonetary;20A1 +colonmonospace;FF1A +colonsign;20A1 +colonsmall;FE55 +colontriangularhalfmod;02D1 +colontriangularmod;02D0 +comma;002C +commaabovecmb;0313 +commaaboverightcmb;0315 +commaaccent;F6C3 +commaarabic;060C +commaarmenian;055D +commainferior;F6E1 +commamonospace;FF0C +commareversedabovecmb;0314 +commareversedmod;02BD +commasmall;FE50 +commasuperior;F6E2 +commaturnedabovecmb;0312 +commaturnedmod;02BB +compass;263C +congruent;2245 +contourintegral;222E +control;2303 +controlACK;0006 +controlBEL;0007 +controlBS;0008 +controlCAN;0018 +controlCR;000D +controlDC1;0011 +controlDC2;0012 +controlDC3;0013 +controlDC4;0014 +controlDEL;007F +controlDLE;0010 +controlEM;0019 +controlENQ;0005 +controlEOT;0004 +controlESC;001B +controlETB;0017 +controlETX;0003 +controlFF;000C +controlFS;001C +controlGS;001D +controlHT;0009 +controlLF;000A +controlNAK;0015 +controlRS;001E +controlSI;000F +controlSO;000E +controlSOT;0002 +controlSTX;0001 +controlSUB;001A +controlSYN;0016 +controlUS;001F +controlVT;000B +copyright;00A9 +copyrightsans;F8E9 +copyrightserif;F6D9 +cornerbracketleft;300C +cornerbracketlefthalfwidth;FF62 +cornerbracketleftvertical;FE41 +cornerbracketright;300D +cornerbracketrighthalfwidth;FF63 +cornerbracketrightvertical;FE42 +corporationsquare;337F +cosquare;33C7 +coverkgsquare;33C6 +cparen;249E +cruzeiro;20A2 +cstretched;0297 +curlyand;22CF +curlyor;22CE +currency;00A4 +cyrBreve;F6D1 +cyrFlex;F6D2 +cyrbreve;F6D4 +cyrflex;F6D5 +d;0064 +daarmenian;0564 +dabengali;09A6 +dadarabic;0636 +dadeva;0926 +dadfinalarabic;FEBE +dadinitialarabic;FEBF +dadmedialarabic;FEC0 +dagesh;05BC +dageshhebrew;05BC +dagger;2020 +daggerdbl;2021 +dagujarati;0AA6 +dagurmukhi;0A26 +dahiragana;3060 +dakatakana;30C0 +dalarabic;062F +dalet;05D3 +daletdagesh;FB33 +daletdageshhebrew;FB33 +dalethatafpatah;05D3 05B2 +dalethatafpatahhebrew;05D3 05B2 +dalethatafsegol;05D3 05B1 +dalethatafsegolhebrew;05D3 05B1 +dalethebrew;05D3 +dalethiriq;05D3 05B4 +dalethiriqhebrew;05D3 05B4 +daletholam;05D3 05B9 +daletholamhebrew;05D3 05B9 +daletpatah;05D3 05B7 +daletpatahhebrew;05D3 05B7 +daletqamats;05D3 05B8 +daletqamatshebrew;05D3 05B8 +daletqubuts;05D3 05BB +daletqubutshebrew;05D3 05BB +daletsegol;05D3 05B6 +daletsegolhebrew;05D3 05B6 +daletsheva;05D3 05B0 +daletshevahebrew;05D3 05B0 +dalettsere;05D3 05B5 +dalettserehebrew;05D3 05B5 +dalfinalarabic;FEAA +dammaarabic;064F +dammalowarabic;064F +dammatanaltonearabic;064C +dammatanarabic;064C +danda;0964 +dargahebrew;05A7 +dargalefthebrew;05A7 +dasiapneumatacyrilliccmb;0485 +dblGrave;F6D3 +dblanglebracketleft;300A +dblanglebracketleftvertical;FE3D +dblanglebracketright;300B +dblanglebracketrightvertical;FE3E +dblarchinvertedbelowcmb;032B +dblarrowleft;21D4 +dblarrowright;21D2 +dbldanda;0965 +dblgrave;F6D6 +dblgravecmb;030F +dblintegral;222C +dbllowline;2017 +dbllowlinecmb;0333 +dbloverlinecmb;033F +dblprimemod;02BA +dblverticalbar;2016 +dblverticallineabovecmb;030E +dbopomofo;3109 +dbsquare;33C8 +dcaron;010F +dcedilla;1E11 +dcircle;24D3 +dcircumflexbelow;1E13 +dcroat;0111 +ddabengali;09A1 +ddadeva;0921 +ddagujarati;0AA1 +ddagurmukhi;0A21 +ddalarabic;0688 +ddalfinalarabic;FB89 +dddhadeva;095C +ddhabengali;09A2 +ddhadeva;0922 +ddhagujarati;0AA2 +ddhagurmukhi;0A22 +ddotaccent;1E0B +ddotbelow;1E0D +decimalseparatorarabic;066B +decimalseparatorpersian;066B +decyrillic;0434 +degree;00B0 +dehihebrew;05AD +dehiragana;3067 +deicoptic;03EF +dekatakana;30C7 +deleteleft;232B +deleteright;2326 +delta;03B4 +deltaturned;018D +denominatorminusonenumeratorbengali;09F8 +dezh;02A4 +dhabengali;09A7 +dhadeva;0927 +dhagujarati;0AA7 +dhagurmukhi;0A27 +dhook;0257 +dialytikatonos;0385 +dialytikatonoscmb;0344 +diamond;2666 +diamondsuitwhite;2662 +dieresis;00A8 +dieresisacute;F6D7 +dieresisbelowcmb;0324 +dieresiscmb;0308 +dieresisgrave;F6D8 +dieresistonos;0385 +dihiragana;3062 +dikatakana;30C2 +dittomark;3003 +divide;00F7 +divides;2223 +divisionslash;2215 +djecyrillic;0452 +dkshade;2593 +dlinebelow;1E0F +dlsquare;3397 +dmacron;0111 +dmonospace;FF44 +dnblock;2584 +dochadathai;0E0E +dodekthai;0E14 +dohiragana;3069 +dokatakana;30C9 +dollar;0024 +dollarinferior;F6E3 +dollarmonospace;FF04 +dollaroldstyle;F724 +dollarsmall;FE69 +dollarsuperior;F6E4 +dong;20AB +dorusquare;3326 +dotaccent;02D9 +dotaccentcmb;0307 +dotbelowcmb;0323 +dotbelowcomb;0323 +dotkatakana;30FB +dotlessi;0131 +dotlessj;F6BE +dotlessjstrokehook;0284 +dotmath;22C5 +dottedcircle;25CC +doubleyodpatah;FB1F +doubleyodpatahhebrew;FB1F +downtackbelowcmb;031E +downtackmod;02D5 +dparen;249F +dsuperior;F6EB +dtail;0256 +dtopbar;018C +duhiragana;3065 +dukatakana;30C5 +dz;01F3 +dzaltone;02A3 +dzcaron;01C6 +dzcurl;02A5 +dzeabkhasiancyrillic;04E1 +dzecyrillic;0455 +dzhecyrillic;045F +e;0065 +eacute;00E9 +earth;2641 +ebengali;098F +ebopomofo;311C +ebreve;0115 +ecandradeva;090D +ecandragujarati;0A8D +ecandravowelsigndeva;0945 +ecandravowelsigngujarati;0AC5 +ecaron;011B +ecedillabreve;1E1D +echarmenian;0565 +echyiwnarmenian;0587 +ecircle;24D4 +ecircumflex;00EA +ecircumflexacute;1EBF +ecircumflexbelow;1E19 +ecircumflexdotbelow;1EC7 +ecircumflexgrave;1EC1 +ecircumflexhookabove;1EC3 +ecircumflextilde;1EC5 +ecyrillic;0454 +edblgrave;0205 +edeva;090F +edieresis;00EB +edot;0117 +edotaccent;0117 +edotbelow;1EB9 +eegurmukhi;0A0F +eematragurmukhi;0A47 +efcyrillic;0444 +egrave;00E8 +egujarati;0A8F +eharmenian;0567 +ehbopomofo;311D +ehiragana;3048 +ehookabove;1EBB +eibopomofo;311F +eight;0038 +eightarabic;0668 +eightbengali;09EE +eightcircle;2467 +eightcircleinversesansserif;2791 +eightdeva;096E +eighteencircle;2471 +eighteenparen;2485 +eighteenperiod;2499 +eightgujarati;0AEE +eightgurmukhi;0A6E +eighthackarabic;0668 +eighthangzhou;3028 +eighthnotebeamed;266B +eightideographicparen;3227 +eightinferior;2088 +eightmonospace;FF18 +eightoldstyle;F738 +eightparen;247B +eightperiod;248F +eightpersian;06F8 +eightroman;2177 +eightsuperior;2078 +eightthai;0E58 +einvertedbreve;0207 +eiotifiedcyrillic;0465 +ekatakana;30A8 +ekatakanahalfwidth;FF74 +ekonkargurmukhi;0A74 +ekorean;3154 +elcyrillic;043B +element;2208 +elevencircle;246A +elevenparen;247E +elevenperiod;2492 +elevenroman;217A +ellipsis;2026 +ellipsisvertical;22EE +emacron;0113 +emacronacute;1E17 +emacrongrave;1E15 +emcyrillic;043C +emdash;2014 +emdashvertical;FE31 +emonospace;FF45 +emphasismarkarmenian;055B +emptyset;2205 +enbopomofo;3123 +encyrillic;043D +endash;2013 +endashvertical;FE32 +endescendercyrillic;04A3 +eng;014B +engbopomofo;3125 +enghecyrillic;04A5 +enhookcyrillic;04C8 +enspace;2002 +eogonek;0119 +eokorean;3153 +eopen;025B +eopenclosed;029A +eopenreversed;025C +eopenreversedclosed;025E +eopenreversedhook;025D +eparen;24A0 +epsilon;03B5 +epsilontonos;03AD +equal;003D +equalmonospace;FF1D +equalsmall;FE66 +equalsuperior;207C +equivalence;2261 +erbopomofo;3126 +ercyrillic;0440 +ereversed;0258 +ereversedcyrillic;044D +escyrillic;0441 +esdescendercyrillic;04AB +esh;0283 +eshcurl;0286 +eshortdeva;090E +eshortvowelsigndeva;0946 +eshreversedloop;01AA +eshsquatreversed;0285 +esmallhiragana;3047 +esmallkatakana;30A7 +esmallkatakanahalfwidth;FF6A +estimated;212E +esuperior;F6EC +eta;03B7 +etarmenian;0568 +etatonos;03AE +eth;00F0 +etilde;1EBD +etildebelow;1E1B +etnahtafoukhhebrew;0591 +etnahtafoukhlefthebrew;0591 +etnahtahebrew;0591 +etnahtalefthebrew;0591 +eturned;01DD +eukorean;3161 +euro;20AC +evowelsignbengali;09C7 +evowelsigndeva;0947 +evowelsigngujarati;0AC7 +exclam;0021 +exclamarmenian;055C +exclamdbl;203C +exclamdown;00A1 +exclamdownsmall;F7A1 +exclammonospace;FF01 +exclamsmall;F721 +existential;2203 +ezh;0292 +ezhcaron;01EF +ezhcurl;0293 +ezhreversed;01B9 +ezhtail;01BA +f;0066 +fadeva;095E +fagurmukhi;0A5E +fahrenheit;2109 +fathaarabic;064E +fathalowarabic;064E +fathatanarabic;064B +fbopomofo;3108 +fcircle;24D5 +fdotaccent;1E1F +feharabic;0641 +feharmenian;0586 +fehfinalarabic;FED2 +fehinitialarabic;FED3 +fehmedialarabic;FED4 +feicoptic;03E5 +female;2640 +ff;FB00 +ffi;FB03 +ffl;FB04 +fi;FB01 +fifteencircle;246E +fifteenparen;2482 +fifteenperiod;2496 +figuredash;2012 +filledbox;25A0 +filledrect;25AC +finalkaf;05DA +finalkafdagesh;FB3A +finalkafdageshhebrew;FB3A +finalkafhebrew;05DA +finalkafqamats;05DA 05B8 +finalkafqamatshebrew;05DA 05B8 +finalkafsheva;05DA 05B0 +finalkafshevahebrew;05DA 05B0 +finalmem;05DD +finalmemhebrew;05DD +finalnun;05DF +finalnunhebrew;05DF +finalpe;05E3 +finalpehebrew;05E3 +finaltsadi;05E5 +finaltsadihebrew;05E5 +firsttonechinese;02C9 +fisheye;25C9 +fitacyrillic;0473 +five;0035 +fivearabic;0665 +fivebengali;09EB +fivecircle;2464 +fivecircleinversesansserif;278E +fivedeva;096B +fiveeighths;215D +fivegujarati;0AEB +fivegurmukhi;0A6B +fivehackarabic;0665 +fivehangzhou;3025 +fiveideographicparen;3224 +fiveinferior;2085 +fivemonospace;FF15 +fiveoldstyle;F735 +fiveparen;2478 +fiveperiod;248C +fivepersian;06F5 +fiveroman;2174 +fivesuperior;2075 +fivethai;0E55 +fl;FB02 +florin;0192 +fmonospace;FF46 +fmsquare;3399 +fofanthai;0E1F +fofathai;0E1D +fongmanthai;0E4F +forall;2200 +four;0034 +fourarabic;0664 +fourbengali;09EA +fourcircle;2463 +fourcircleinversesansserif;278D +fourdeva;096A +fourgujarati;0AEA +fourgurmukhi;0A6A +fourhackarabic;0664 +fourhangzhou;3024 +fourideographicparen;3223 +fourinferior;2084 +fourmonospace;FF14 +fournumeratorbengali;09F7 +fouroldstyle;F734 +fourparen;2477 +fourperiod;248B +fourpersian;06F4 +fourroman;2173 +foursuperior;2074 +fourteencircle;246D +fourteenparen;2481 +fourteenperiod;2495 +fourthai;0E54 +fourthtonechinese;02CB +fparen;24A1 +fraction;2044 +franc;20A3 +g;0067 +gabengali;0997 +gacute;01F5 +gadeva;0917 +gafarabic;06AF +gaffinalarabic;FB93 +gafinitialarabic;FB94 +gafmedialarabic;FB95 +gagujarati;0A97 +gagurmukhi;0A17 +gahiragana;304C +gakatakana;30AC +gamma;03B3 +gammalatinsmall;0263 +gammasuperior;02E0 +gangiacoptic;03EB +gbopomofo;310D +gbreve;011F +gcaron;01E7 +gcedilla;0123 +gcircle;24D6 +gcircumflex;011D +gcommaaccent;0123 +gdot;0121 +gdotaccent;0121 +gecyrillic;0433 +gehiragana;3052 +gekatakana;30B2 +geometricallyequal;2251 +gereshaccenthebrew;059C +gereshhebrew;05F3 +gereshmuqdamhebrew;059D +germandbls;00DF +gershayimaccenthebrew;059E +gershayimhebrew;05F4 +getamark;3013 +ghabengali;0998 +ghadarmenian;0572 +ghadeva;0918 +ghagujarati;0A98 +ghagurmukhi;0A18 +ghainarabic;063A +ghainfinalarabic;FECE +ghaininitialarabic;FECF +ghainmedialarabic;FED0 +ghemiddlehookcyrillic;0495 +ghestrokecyrillic;0493 +gheupturncyrillic;0491 +ghhadeva;095A +ghhagurmukhi;0A5A +ghook;0260 +ghzsquare;3393 +gihiragana;304E +gikatakana;30AE +gimarmenian;0563 +gimel;05D2 +gimeldagesh;FB32 +gimeldageshhebrew;FB32 +gimelhebrew;05D2 +gjecyrillic;0453 +glottalinvertedstroke;01BE +glottalstop;0294 +glottalstopinverted;0296 +glottalstopmod;02C0 +glottalstopreversed;0295 +glottalstopreversedmod;02C1 +glottalstopreversedsuperior;02E4 +glottalstopstroke;02A1 +glottalstopstrokereversed;02A2 +gmacron;1E21 +gmonospace;FF47 +gohiragana;3054 +gokatakana;30B4 +gparen;24A2 +gpasquare;33AC +gradient;2207 +grave;0060 +gravebelowcmb;0316 +gravecmb;0300 +gravecomb;0300 +gravedeva;0953 +gravelowmod;02CE +gravemonospace;FF40 +gravetonecmb;0340 +greater;003E +greaterequal;2265 +greaterequalorless;22DB +greatermonospace;FF1E +greaterorequivalent;2273 +greaterorless;2277 +greateroverequal;2267 +greatersmall;FE65 +gscript;0261 +gstroke;01E5 +guhiragana;3050 +guillemotleft;00AB +guillemotright;00BB +guilsinglleft;2039 +guilsinglright;203A +gukatakana;30B0 +guramusquare;3318 +gysquare;33C9 +h;0068 +haabkhasiancyrillic;04A9 +haaltonearabic;06C1 +habengali;09B9 +hadescendercyrillic;04B3 +hadeva;0939 +hagujarati;0AB9 +hagurmukhi;0A39 +haharabic;062D +hahfinalarabic;FEA2 +hahinitialarabic;FEA3 +hahiragana;306F +hahmedialarabic;FEA4 +haitusquare;332A +hakatakana;30CF +hakatakanahalfwidth;FF8A +halantgurmukhi;0A4D +hamzaarabic;0621 +hamzadammaarabic;0621 064F +hamzadammatanarabic;0621 064C +hamzafathaarabic;0621 064E +hamzafathatanarabic;0621 064B +hamzalowarabic;0621 +hamzalowkasraarabic;0621 0650 +hamzalowkasratanarabic;0621 064D +hamzasukunarabic;0621 0652 +hangulfiller;3164 +hardsigncyrillic;044A +harpoonleftbarbup;21BC +harpoonrightbarbup;21C0 +hasquare;33CA +hatafpatah;05B2 +hatafpatah16;05B2 +hatafpatah23;05B2 +hatafpatah2f;05B2 +hatafpatahhebrew;05B2 +hatafpatahnarrowhebrew;05B2 +hatafpatahquarterhebrew;05B2 +hatafpatahwidehebrew;05B2 +hatafqamats;05B3 +hatafqamats1b;05B3 +hatafqamats28;05B3 +hatafqamats34;05B3 +hatafqamatshebrew;05B3 +hatafqamatsnarrowhebrew;05B3 +hatafqamatsquarterhebrew;05B3 +hatafqamatswidehebrew;05B3 +hatafsegol;05B1 +hatafsegol17;05B1 +hatafsegol24;05B1 +hatafsegol30;05B1 +hatafsegolhebrew;05B1 +hatafsegolnarrowhebrew;05B1 +hatafsegolquarterhebrew;05B1 +hatafsegolwidehebrew;05B1 +hbar;0127 +hbopomofo;310F +hbrevebelow;1E2B +hcedilla;1E29 +hcircle;24D7 +hcircumflex;0125 +hdieresis;1E27 +hdotaccent;1E23 +hdotbelow;1E25 +he;05D4 +heart;2665 +heartsuitblack;2665 +heartsuitwhite;2661 +hedagesh;FB34 +hedageshhebrew;FB34 +hehaltonearabic;06C1 +heharabic;0647 +hehebrew;05D4 +hehfinalaltonearabic;FBA7 +hehfinalalttwoarabic;FEEA +hehfinalarabic;FEEA +hehhamzaabovefinalarabic;FBA5 +hehhamzaaboveisolatedarabic;FBA4 +hehinitialaltonearabic;FBA8 +hehinitialarabic;FEEB +hehiragana;3078 +hehmedialaltonearabic;FBA9 +hehmedialarabic;FEEC +heiseierasquare;337B +hekatakana;30D8 +hekatakanahalfwidth;FF8D +hekutaarusquare;3336 +henghook;0267 +herutusquare;3339 +het;05D7 +hethebrew;05D7 +hhook;0266 +hhooksuperior;02B1 +hieuhacirclekorean;327B +hieuhaparenkorean;321B +hieuhcirclekorean;326D +hieuhkorean;314E +hieuhparenkorean;320D +hihiragana;3072 +hikatakana;30D2 +hikatakanahalfwidth;FF8B +hiriq;05B4 +hiriq14;05B4 +hiriq21;05B4 +hiriq2d;05B4 +hiriqhebrew;05B4 +hiriqnarrowhebrew;05B4 +hiriqquarterhebrew;05B4 +hiriqwidehebrew;05B4 +hlinebelow;1E96 +hmonospace;FF48 +hoarmenian;0570 +hohipthai;0E2B +hohiragana;307B +hokatakana;30DB +hokatakanahalfwidth;FF8E +holam;05B9 +holam19;05B9 +holam26;05B9 +holam32;05B9 +holamhebrew;05B9 +holamnarrowhebrew;05B9 +holamquarterhebrew;05B9 +holamwidehebrew;05B9 +honokhukthai;0E2E +hookabovecomb;0309 +hookcmb;0309 +hookpalatalizedbelowcmb;0321 +hookretroflexbelowcmb;0322 +hoonsquare;3342 +horicoptic;03E9 +horizontalbar;2015 +horncmb;031B +hotsprings;2668 +house;2302 +hparen;24A3 +hsuperior;02B0 +hturned;0265 +huhiragana;3075 +huiitosquare;3333 +hukatakana;30D5 +hukatakanahalfwidth;FF8C +hungarumlaut;02DD +hungarumlautcmb;030B +hv;0195 +hyphen;002D +hypheninferior;F6E5 +hyphenmonospace;FF0D +hyphensmall;FE63 +hyphensuperior;F6E6 +hyphentwo;2010 +i;0069 +iacute;00ED +iacyrillic;044F +ibengali;0987 +ibopomofo;3127 +ibreve;012D +icaron;01D0 +icircle;24D8 +icircumflex;00EE +icyrillic;0456 +idblgrave;0209 +ideographearthcircle;328F +ideographfirecircle;328B +ideographicallianceparen;323F +ideographiccallparen;323A +ideographiccentrecircle;32A5 +ideographicclose;3006 +ideographiccomma;3001 +ideographiccommaleft;FF64 +ideographiccongratulationparen;3237 +ideographiccorrectcircle;32A3 +ideographicearthparen;322F +ideographicenterpriseparen;323D +ideographicexcellentcircle;329D +ideographicfestivalparen;3240 +ideographicfinancialcircle;3296 +ideographicfinancialparen;3236 +ideographicfireparen;322B +ideographichaveparen;3232 +ideographichighcircle;32A4 +ideographiciterationmark;3005 +ideographiclaborcircle;3298 +ideographiclaborparen;3238 +ideographicleftcircle;32A7 +ideographiclowcircle;32A6 +ideographicmedicinecircle;32A9 +ideographicmetalparen;322E +ideographicmoonparen;322A +ideographicnameparen;3234 +ideographicperiod;3002 +ideographicprintcircle;329E +ideographicreachparen;3243 +ideographicrepresentparen;3239 +ideographicresourceparen;323E +ideographicrightcircle;32A8 +ideographicsecretcircle;3299 +ideographicselfparen;3242 +ideographicsocietyparen;3233 +ideographicspace;3000 +ideographicspecialparen;3235 +ideographicstockparen;3231 +ideographicstudyparen;323B +ideographicsunparen;3230 +ideographicsuperviseparen;323C +ideographicwaterparen;322C +ideographicwoodparen;322D +ideographiczero;3007 +ideographmetalcircle;328E +ideographmooncircle;328A +ideographnamecircle;3294 +ideographsuncircle;3290 +ideographwatercircle;328C +ideographwoodcircle;328D +ideva;0907 +idieresis;00EF +idieresisacute;1E2F +idieresiscyrillic;04E5 +idotbelow;1ECB +iebrevecyrillic;04D7 +iecyrillic;0435 +ieungacirclekorean;3275 +ieungaparenkorean;3215 +ieungcirclekorean;3267 +ieungkorean;3147 +ieungparenkorean;3207 +igrave;00EC +igujarati;0A87 +igurmukhi;0A07 +ihiragana;3044 +ihookabove;1EC9 +iibengali;0988 +iicyrillic;0438 +iideva;0908 +iigujarati;0A88 +iigurmukhi;0A08 +iimatragurmukhi;0A40 +iinvertedbreve;020B +iishortcyrillic;0439 +iivowelsignbengali;09C0 +iivowelsigndeva;0940 +iivowelsigngujarati;0AC0 +ij;0133 +ikatakana;30A4 +ikatakanahalfwidth;FF72 +ikorean;3163 +ilde;02DC +iluyhebrew;05AC +imacron;012B +imacroncyrillic;04E3 +imageorapproximatelyequal;2253 +imatragurmukhi;0A3F +imonospace;FF49 +increment;2206 +infinity;221E +iniarmenian;056B +integral;222B +integralbottom;2321 +integralbt;2321 +integralex;F8F5 +integraltop;2320 +integraltp;2320 +intersection;2229 +intisquare;3305 +invbullet;25D8 +invcircle;25D9 +invsmileface;263B +iocyrillic;0451 +iogonek;012F +iota;03B9 +iotadieresis;03CA +iotadieresistonos;0390 +iotalatin;0269 +iotatonos;03AF +iparen;24A4 +irigurmukhi;0A72 +ismallhiragana;3043 +ismallkatakana;30A3 +ismallkatakanahalfwidth;FF68 +issharbengali;09FA +istroke;0268 +isuperior;F6ED +iterationhiragana;309D +iterationkatakana;30FD +itilde;0129 +itildebelow;1E2D +iubopomofo;3129 +iucyrillic;044E +ivowelsignbengali;09BF +ivowelsigndeva;093F +ivowelsigngujarati;0ABF +izhitsacyrillic;0475 +izhitsadblgravecyrillic;0477 +j;006A +jaarmenian;0571 +jabengali;099C +jadeva;091C +jagujarati;0A9C +jagurmukhi;0A1C +jbopomofo;3110 +jcaron;01F0 +jcircle;24D9 +jcircumflex;0135 +jcrossedtail;029D +jdotlessstroke;025F +jecyrillic;0458 +jeemarabic;062C +jeemfinalarabic;FE9E +jeeminitialarabic;FE9F +jeemmedialarabic;FEA0 +jeharabic;0698 +jehfinalarabic;FB8B +jhabengali;099D +jhadeva;091D +jhagujarati;0A9D +jhagurmukhi;0A1D +jheharmenian;057B +jis;3004 +jmonospace;FF4A +jparen;24A5 +jsuperior;02B2 +k;006B +kabashkircyrillic;04A1 +kabengali;0995 +kacute;1E31 +kacyrillic;043A +kadescendercyrillic;049B +kadeva;0915 +kaf;05DB +kafarabic;0643 +kafdagesh;FB3B +kafdageshhebrew;FB3B +kaffinalarabic;FEDA +kafhebrew;05DB +kafinitialarabic;FEDB +kafmedialarabic;FEDC +kafrafehebrew;FB4D +kagujarati;0A95 +kagurmukhi;0A15 +kahiragana;304B +kahookcyrillic;04C4 +kakatakana;30AB +kakatakanahalfwidth;FF76 +kappa;03BA +kappasymbolgreek;03F0 +kapyeounmieumkorean;3171 +kapyeounphieuphkorean;3184 +kapyeounpieupkorean;3178 +kapyeounssangpieupkorean;3179 +karoriisquare;330D +kashidaautoarabic;0640 +kashidaautonosidebearingarabic;0640 +kasmallkatakana;30F5 +kasquare;3384 +kasraarabic;0650 +kasratanarabic;064D +kastrokecyrillic;049F +katahiraprolongmarkhalfwidth;FF70 +kaverticalstrokecyrillic;049D +kbopomofo;310E +kcalsquare;3389 +kcaron;01E9 +kcedilla;0137 +kcircle;24DA +kcommaaccent;0137 +kdotbelow;1E33 +keharmenian;0584 +kehiragana;3051 +kekatakana;30B1 +kekatakanahalfwidth;FF79 +kenarmenian;056F +kesmallkatakana;30F6 +kgreenlandic;0138 +khabengali;0996 +khacyrillic;0445 +khadeva;0916 +khagujarati;0A96 +khagurmukhi;0A16 +khaharabic;062E +khahfinalarabic;FEA6 +khahinitialarabic;FEA7 +khahmedialarabic;FEA8 +kheicoptic;03E7 +khhadeva;0959 +khhagurmukhi;0A59 +khieukhacirclekorean;3278 +khieukhaparenkorean;3218 +khieukhcirclekorean;326A +khieukhkorean;314B +khieukhparenkorean;320A +khokhaithai;0E02 +khokhonthai;0E05 +khokhuatthai;0E03 +khokhwaithai;0E04 +khomutthai;0E5B +khook;0199 +khorakhangthai;0E06 +khzsquare;3391 +kihiragana;304D +kikatakana;30AD +kikatakanahalfwidth;FF77 +kiroguramusquare;3315 +kiromeetorusquare;3316 +kirosquare;3314 +kiyeokacirclekorean;326E +kiyeokaparenkorean;320E +kiyeokcirclekorean;3260 +kiyeokkorean;3131 +kiyeokparenkorean;3200 +kiyeoksioskorean;3133 +kjecyrillic;045C +klinebelow;1E35 +klsquare;3398 +kmcubedsquare;33A6 +kmonospace;FF4B +kmsquaredsquare;33A2 +kohiragana;3053 +kohmsquare;33C0 +kokaithai;0E01 +kokatakana;30B3 +kokatakanahalfwidth;FF7A +kooposquare;331E +koppacyrillic;0481 +koreanstandardsymbol;327F +koroniscmb;0343 +kparen;24A6 +kpasquare;33AA +ksicyrillic;046F +ktsquare;33CF +kturned;029E +kuhiragana;304F +kukatakana;30AF +kukatakanahalfwidth;FF78 +kvsquare;33B8 +kwsquare;33BE +l;006C +labengali;09B2 +lacute;013A +ladeva;0932 +lagujarati;0AB2 +lagurmukhi;0A32 +lakkhangyaothai;0E45 +lamaleffinalarabic;FEFC +lamalefhamzaabovefinalarabic;FEF8 +lamalefhamzaaboveisolatedarabic;FEF7 +lamalefhamzabelowfinalarabic;FEFA +lamalefhamzabelowisolatedarabic;FEF9 +lamalefisolatedarabic;FEFB +lamalefmaddaabovefinalarabic;FEF6 +lamalefmaddaaboveisolatedarabic;FEF5 +lamarabic;0644 +lambda;03BB +lambdastroke;019B +lamed;05DC +lameddagesh;FB3C +lameddageshhebrew;FB3C +lamedhebrew;05DC +lamedholam;05DC 05B9 +lamedholamdagesh;05DC 05B9 05BC +lamedholamdageshhebrew;05DC 05B9 05BC +lamedholamhebrew;05DC 05B9 +lamfinalarabic;FEDE +lamhahinitialarabic;FCCA +laminitialarabic;FEDF +lamjeeminitialarabic;FCC9 +lamkhahinitialarabic;FCCB +lamlamhehisolatedarabic;FDF2 +lammedialarabic;FEE0 +lammeemhahinitialarabic;FD88 +lammeeminitialarabic;FCCC +lammeemjeeminitialarabic;FEDF FEE4 FEA0 +lammeemkhahinitialarabic;FEDF FEE4 FEA8 +largecircle;25EF +lbar;019A +lbelt;026C +lbopomofo;310C +lcaron;013E +lcedilla;013C +lcircle;24DB +lcircumflexbelow;1E3D +lcommaaccent;013C +ldot;0140 +ldotaccent;0140 +ldotbelow;1E37 +ldotbelowmacron;1E39 +leftangleabovecmb;031A +lefttackbelowcmb;0318 +less;003C +lessequal;2264 +lessequalorgreater;22DA +lessmonospace;FF1C +lessorequivalent;2272 +lessorgreater;2276 +lessoverequal;2266 +lesssmall;FE64 +lezh;026E +lfblock;258C +lhookretroflex;026D +lira;20A4 +liwnarmenian;056C +lj;01C9 +ljecyrillic;0459 +ll;F6C0 +lladeva;0933 +llagujarati;0AB3 +llinebelow;1E3B +llladeva;0934 +llvocalicbengali;09E1 +llvocalicdeva;0961 +llvocalicvowelsignbengali;09E3 +llvocalicvowelsigndeva;0963 +lmiddletilde;026B +lmonospace;FF4C +lmsquare;33D0 +lochulathai;0E2C +logicaland;2227 +logicalnot;00AC +logicalnotreversed;2310 +logicalor;2228 +lolingthai;0E25 +longs;017F +lowlinecenterline;FE4E +lowlinecmb;0332 +lowlinedashed;FE4D +lozenge;25CA +lparen;24A7 +lslash;0142 +lsquare;2113 +lsuperior;F6EE +ltshade;2591 +luthai;0E26 +lvocalicbengali;098C +lvocalicdeva;090C +lvocalicvowelsignbengali;09E2 +lvocalicvowelsigndeva;0962 +lxsquare;33D3 +m;006D +mabengali;09AE +macron;00AF +macronbelowcmb;0331 +macroncmb;0304 +macronlowmod;02CD +macronmonospace;FFE3 +macute;1E3F +madeva;092E +magujarati;0AAE +magurmukhi;0A2E +mahapakhhebrew;05A4 +mahapakhlefthebrew;05A4 +mahiragana;307E +maichattawalowleftthai;F895 +maichattawalowrightthai;F894 +maichattawathai;0E4B +maichattawaupperleftthai;F893 +maieklowleftthai;F88C +maieklowrightthai;F88B +maiekthai;0E48 +maiekupperleftthai;F88A +maihanakatleftthai;F884 +maihanakatthai;0E31 +maitaikhuleftthai;F889 +maitaikhuthai;0E47 +maitholowleftthai;F88F +maitholowrightthai;F88E +maithothai;0E49 +maithoupperleftthai;F88D +maitrilowleftthai;F892 +maitrilowrightthai;F891 +maitrithai;0E4A +maitriupperleftthai;F890 +maiyamokthai;0E46 +makatakana;30DE +makatakanahalfwidth;FF8F +male;2642 +mansyonsquare;3347 +maqafhebrew;05BE +mars;2642 +masoracirclehebrew;05AF +masquare;3383 +mbopomofo;3107 +mbsquare;33D4 +mcircle;24DC +mcubedsquare;33A5 +mdotaccent;1E41 +mdotbelow;1E43 +meemarabic;0645 +meemfinalarabic;FEE2 +meeminitialarabic;FEE3 +meemmedialarabic;FEE4 +meemmeeminitialarabic;FCD1 +meemmeemisolatedarabic;FC48 +meetorusquare;334D +mehiragana;3081 +meizierasquare;337E +mekatakana;30E1 +mekatakanahalfwidth;FF92 +mem;05DE +memdagesh;FB3E +memdageshhebrew;FB3E +memhebrew;05DE +menarmenian;0574 +merkhahebrew;05A5 +merkhakefulahebrew;05A6 +merkhakefulalefthebrew;05A6 +merkhalefthebrew;05A5 +mhook;0271 +mhzsquare;3392 +middledotkatakanahalfwidth;FF65 +middot;00B7 +mieumacirclekorean;3272 +mieumaparenkorean;3212 +mieumcirclekorean;3264 +mieumkorean;3141 +mieumpansioskorean;3170 +mieumparenkorean;3204 +mieumpieupkorean;316E +mieumsioskorean;316F +mihiragana;307F +mikatakana;30DF +mikatakanahalfwidth;FF90 +minus;2212 +minusbelowcmb;0320 +minuscircle;2296 +minusmod;02D7 +minusplus;2213 +minute;2032 +miribaarusquare;334A +mirisquare;3349 +mlonglegturned;0270 +mlsquare;3396 +mmcubedsquare;33A3 +mmonospace;FF4D +mmsquaredsquare;339F +mohiragana;3082 +mohmsquare;33C1 +mokatakana;30E2 +mokatakanahalfwidth;FF93 +molsquare;33D6 +momathai;0E21 +moverssquare;33A7 +moverssquaredsquare;33A8 +mparen;24A8 +mpasquare;33AB +mssquare;33B3 +msuperior;F6EF +mturned;026F +mu;00B5 +mu1;00B5 +muasquare;3382 +muchgreater;226B +muchless;226A +mufsquare;338C +mugreek;03BC +mugsquare;338D +muhiragana;3080 +mukatakana;30E0 +mukatakanahalfwidth;FF91 +mulsquare;3395 +multiply;00D7 +mumsquare;339B +munahhebrew;05A3 +munahlefthebrew;05A3 +musicalnote;266A +musicalnotedbl;266B +musicflatsign;266D +musicsharpsign;266F +mussquare;33B2 +muvsquare;33B6 +muwsquare;33BC +mvmegasquare;33B9 +mvsquare;33B7 +mwmegasquare;33BF +mwsquare;33BD +n;006E +nabengali;09A8 +nabla;2207 +nacute;0144 +nadeva;0928 +nagujarati;0AA8 +nagurmukhi;0A28 +nahiragana;306A +nakatakana;30CA +nakatakanahalfwidth;FF85 +napostrophe;0149 +nasquare;3381 +nbopomofo;310B +nbspace;00A0 +ncaron;0148 +ncedilla;0146 +ncircle;24DD +ncircumflexbelow;1E4B +ncommaaccent;0146 +ndotaccent;1E45 +ndotbelow;1E47 +nehiragana;306D +nekatakana;30CD +nekatakanahalfwidth;FF88 +newsheqelsign;20AA +nfsquare;338B +ngabengali;0999 +ngadeva;0919 +ngagujarati;0A99 +ngagurmukhi;0A19 +ngonguthai;0E07 +nhiragana;3093 +nhookleft;0272 +nhookretroflex;0273 +nieunacirclekorean;326F +nieunaparenkorean;320F +nieuncieuckorean;3135 +nieuncirclekorean;3261 +nieunhieuhkorean;3136 +nieunkorean;3134 +nieunpansioskorean;3168 +nieunparenkorean;3201 +nieunsioskorean;3167 +nieuntikeutkorean;3166 +nihiragana;306B +nikatakana;30CB +nikatakanahalfwidth;FF86 +nikhahitleftthai;F899 +nikhahitthai;0E4D +nine;0039 +ninearabic;0669 +ninebengali;09EF +ninecircle;2468 +ninecircleinversesansserif;2792 +ninedeva;096F +ninegujarati;0AEF +ninegurmukhi;0A6F +ninehackarabic;0669 +ninehangzhou;3029 +nineideographicparen;3228 +nineinferior;2089 +ninemonospace;FF19 +nineoldstyle;F739 +nineparen;247C +nineperiod;2490 +ninepersian;06F9 +nineroman;2178 +ninesuperior;2079 +nineteencircle;2472 +nineteenparen;2486 +nineteenperiod;249A +ninethai;0E59 +nj;01CC +njecyrillic;045A +nkatakana;30F3 +nkatakanahalfwidth;FF9D +nlegrightlong;019E +nlinebelow;1E49 +nmonospace;FF4E +nmsquare;339A +nnabengali;09A3 +nnadeva;0923 +nnagujarati;0AA3 +nnagurmukhi;0A23 +nnnadeva;0929 +nohiragana;306E +nokatakana;30CE +nokatakanahalfwidth;FF89 +nonbreakingspace;00A0 +nonenthai;0E13 +nonuthai;0E19 +noonarabic;0646 +noonfinalarabic;FEE6 +noonghunnaarabic;06BA +noonghunnafinalarabic;FB9F +noonhehinitialarabic;FEE7 FEEC +nooninitialarabic;FEE7 +noonjeeminitialarabic;FCD2 +noonjeemisolatedarabic;FC4B +noonmedialarabic;FEE8 +noonmeeminitialarabic;FCD5 +noonmeemisolatedarabic;FC4E +noonnoonfinalarabic;FC8D +notcontains;220C +notelement;2209 +notelementof;2209 +notequal;2260 +notgreater;226F +notgreaternorequal;2271 +notgreaternorless;2279 +notidentical;2262 +notless;226E +notlessnorequal;2270 +notparallel;2226 +notprecedes;2280 +notsubset;2284 +notsucceeds;2281 +notsuperset;2285 +nowarmenian;0576 +nparen;24A9 +nssquare;33B1 +nsuperior;207F +ntilde;00F1 +nu;03BD +nuhiragana;306C +nukatakana;30CC +nukatakanahalfwidth;FF87 +nuktabengali;09BC +nuktadeva;093C +nuktagujarati;0ABC +nuktagurmukhi;0A3C +numbersign;0023 +numbersignmonospace;FF03 +numbersignsmall;FE5F +numeralsigngreek;0374 +numeralsignlowergreek;0375 +numero;2116 +nun;05E0 +nundagesh;FB40 +nundageshhebrew;FB40 +nunhebrew;05E0 +nvsquare;33B5 +nwsquare;33BB +nyabengali;099E +nyadeva;091E +nyagujarati;0A9E +nyagurmukhi;0A1E +o;006F +oacute;00F3 +oangthai;0E2D +obarred;0275 +obarredcyrillic;04E9 +obarreddieresiscyrillic;04EB +obengali;0993 +obopomofo;311B +obreve;014F +ocandradeva;0911 +ocandragujarati;0A91 +ocandravowelsigndeva;0949 +ocandravowelsigngujarati;0AC9 +ocaron;01D2 +ocircle;24DE +ocircumflex;00F4 +ocircumflexacute;1ED1 +ocircumflexdotbelow;1ED9 +ocircumflexgrave;1ED3 +ocircumflexhookabove;1ED5 +ocircumflextilde;1ED7 +ocyrillic;043E +odblacute;0151 +odblgrave;020D +odeva;0913 +odieresis;00F6 +odieresiscyrillic;04E7 +odotbelow;1ECD +oe;0153 +oekorean;315A +ogonek;02DB +ogonekcmb;0328 +ograve;00F2 +ogujarati;0A93 +oharmenian;0585 +ohiragana;304A +ohookabove;1ECF +ohorn;01A1 +ohornacute;1EDB +ohorndotbelow;1EE3 +ohorngrave;1EDD +ohornhookabove;1EDF +ohorntilde;1EE1 +ohungarumlaut;0151 +oi;01A3 +oinvertedbreve;020F +okatakana;30AA +okatakanahalfwidth;FF75 +okorean;3157 +olehebrew;05AB +omacron;014D +omacronacute;1E53 +omacrongrave;1E51 +omdeva;0950 +omega;03C9 +omega1;03D6 +omegacyrillic;0461 +omegalatinclosed;0277 +omegaroundcyrillic;047B +omegatitlocyrillic;047D +omegatonos;03CE +omgujarati;0AD0 +omicron;03BF +omicrontonos;03CC +omonospace;FF4F +one;0031 +onearabic;0661 +onebengali;09E7 +onecircle;2460 +onecircleinversesansserif;278A +onedeva;0967 +onedotenleader;2024 +oneeighth;215B +onefitted;F6DC +onegujarati;0AE7 +onegurmukhi;0A67 +onehackarabic;0661 +onehalf;00BD +onehangzhou;3021 +oneideographicparen;3220 +oneinferior;2081 +onemonospace;FF11 +onenumeratorbengali;09F4 +oneoldstyle;F731 +oneparen;2474 +oneperiod;2488 +onepersian;06F1 +onequarter;00BC +oneroman;2170 +onesuperior;00B9 +onethai;0E51 +onethird;2153 +oogonek;01EB +oogonekmacron;01ED +oogurmukhi;0A13 +oomatragurmukhi;0A4B +oopen;0254 +oparen;24AA +openbullet;25E6 +option;2325 +ordfeminine;00AA +ordmasculine;00BA +orthogonal;221F +oshortdeva;0912 +oshortvowelsigndeva;094A +oslash;00F8 +oslashacute;01FF +osmallhiragana;3049 +osmallkatakana;30A9 +osmallkatakanahalfwidth;FF6B +ostrokeacute;01FF +osuperior;F6F0 +otcyrillic;047F +otilde;00F5 +otildeacute;1E4D +otildedieresis;1E4F +oubopomofo;3121 +overline;203E +overlinecenterline;FE4A +overlinecmb;0305 +overlinedashed;FE49 +overlinedblwavy;FE4C +overlinewavy;FE4B +overscore;00AF +ovowelsignbengali;09CB +ovowelsigndeva;094B +ovowelsigngujarati;0ACB +p;0070 +paampssquare;3380 +paasentosquare;332B +pabengali;09AA +pacute;1E55 +padeva;092A +pagedown;21DF +pageup;21DE +pagujarati;0AAA +pagurmukhi;0A2A +pahiragana;3071 +paiyannoithai;0E2F +pakatakana;30D1 +palatalizationcyrilliccmb;0484 +palochkacyrillic;04C0 +pansioskorean;317F +paragraph;00B6 +parallel;2225 +parenleft;0028 +parenleftaltonearabic;FD3E +parenleftbt;F8ED +parenleftex;F8EC +parenleftinferior;208D +parenleftmonospace;FF08 +parenleftsmall;FE59 +parenleftsuperior;207D +parenlefttp;F8EB +parenleftvertical;FE35 +parenright;0029 +parenrightaltonearabic;FD3F +parenrightbt;F8F8 +parenrightex;F8F7 +parenrightinferior;208E +parenrightmonospace;FF09 +parenrightsmall;FE5A +parenrightsuperior;207E +parenrighttp;F8F6 +parenrightvertical;FE36 +partialdiff;2202 +paseqhebrew;05C0 +pashtahebrew;0599 +pasquare;33A9 +patah;05B7 +patah11;05B7 +patah1d;05B7 +patah2a;05B7 +patahhebrew;05B7 +patahnarrowhebrew;05B7 +patahquarterhebrew;05B7 +patahwidehebrew;05B7 +pazerhebrew;05A1 +pbopomofo;3106 +pcircle;24DF +pdotaccent;1E57 +pe;05E4 +pecyrillic;043F +pedagesh;FB44 +pedageshhebrew;FB44 +peezisquare;333B +pefinaldageshhebrew;FB43 +peharabic;067E +peharmenian;057A +pehebrew;05E4 +pehfinalarabic;FB57 +pehinitialarabic;FB58 +pehiragana;307A +pehmedialarabic;FB59 +pekatakana;30DA +pemiddlehookcyrillic;04A7 +perafehebrew;FB4E +percent;0025 +percentarabic;066A +percentmonospace;FF05 +percentsmall;FE6A +period;002E +periodarmenian;0589 +periodcentered;00B7 +periodhalfwidth;FF61 +periodinferior;F6E7 +periodmonospace;FF0E +periodsmall;FE52 +periodsuperior;F6E8 +perispomenigreekcmb;0342 +perpendicular;22A5 +perthousand;2030 +peseta;20A7 +pfsquare;338A +phabengali;09AB +phadeva;092B +phagujarati;0AAB +phagurmukhi;0A2B +phi;03C6 +phi1;03D5 +phieuphacirclekorean;327A +phieuphaparenkorean;321A +phieuphcirclekorean;326C +phieuphkorean;314D +phieuphparenkorean;320C +philatin;0278 +phinthuthai;0E3A +phisymbolgreek;03D5 +phook;01A5 +phophanthai;0E1E +phophungthai;0E1C +phosamphaothai;0E20 +pi;03C0 +pieupacirclekorean;3273 +pieupaparenkorean;3213 +pieupcieuckorean;3176 +pieupcirclekorean;3265 +pieupkiyeokkorean;3172 +pieupkorean;3142 +pieupparenkorean;3205 +pieupsioskiyeokkorean;3174 +pieupsioskorean;3144 +pieupsiostikeutkorean;3175 +pieupthieuthkorean;3177 +pieuptikeutkorean;3173 +pihiragana;3074 +pikatakana;30D4 +pisymbolgreek;03D6 +piwrarmenian;0583 +plus;002B +plusbelowcmb;031F +pluscircle;2295 +plusminus;00B1 +plusmod;02D6 +plusmonospace;FF0B +plussmall;FE62 +plussuperior;207A +pmonospace;FF50 +pmsquare;33D8 +pohiragana;307D +pointingindexdownwhite;261F +pointingindexleftwhite;261C +pointingindexrightwhite;261E +pointingindexupwhite;261D +pokatakana;30DD +poplathai;0E1B +postalmark;3012 +postalmarkface;3020 +pparen;24AB +precedes;227A +prescription;211E +primemod;02B9 +primereversed;2035 +product;220F +projective;2305 +prolongedkana;30FC +propellor;2318 +propersubset;2282 +propersuperset;2283 +proportion;2237 +proportional;221D +psi;03C8 +psicyrillic;0471 +psilipneumatacyrilliccmb;0486 +pssquare;33B0 +puhiragana;3077 +pukatakana;30D7 +pvsquare;33B4 +pwsquare;33BA +q;0071 +qadeva;0958 +qadmahebrew;05A8 +qafarabic;0642 +qaffinalarabic;FED6 +qafinitialarabic;FED7 +qafmedialarabic;FED8 +qamats;05B8 +qamats10;05B8 +qamats1a;05B8 +qamats1c;05B8 +qamats27;05B8 +qamats29;05B8 +qamats33;05B8 +qamatsde;05B8 +qamatshebrew;05B8 +qamatsnarrowhebrew;05B8 +qamatsqatanhebrew;05B8 +qamatsqatannarrowhebrew;05B8 +qamatsqatanquarterhebrew;05B8 +qamatsqatanwidehebrew;05B8 +qamatsquarterhebrew;05B8 +qamatswidehebrew;05B8 +qarneyparahebrew;059F +qbopomofo;3111 +qcircle;24E0 +qhook;02A0 +qmonospace;FF51 +qof;05E7 +qofdagesh;FB47 +qofdageshhebrew;FB47 +qofhatafpatah;05E7 05B2 +qofhatafpatahhebrew;05E7 05B2 +qofhatafsegol;05E7 05B1 +qofhatafsegolhebrew;05E7 05B1 +qofhebrew;05E7 +qofhiriq;05E7 05B4 +qofhiriqhebrew;05E7 05B4 +qofholam;05E7 05B9 +qofholamhebrew;05E7 05B9 +qofpatah;05E7 05B7 +qofpatahhebrew;05E7 05B7 +qofqamats;05E7 05B8 +qofqamatshebrew;05E7 05B8 +qofqubuts;05E7 05BB +qofqubutshebrew;05E7 05BB +qofsegol;05E7 05B6 +qofsegolhebrew;05E7 05B6 +qofsheva;05E7 05B0 +qofshevahebrew;05E7 05B0 +qoftsere;05E7 05B5 +qoftserehebrew;05E7 05B5 +qparen;24AC +quarternote;2669 +qubuts;05BB +qubuts18;05BB +qubuts25;05BB +qubuts31;05BB +qubutshebrew;05BB +qubutsnarrowhebrew;05BB +qubutsquarterhebrew;05BB +qubutswidehebrew;05BB +question;003F +questionarabic;061F +questionarmenian;055E +questiondown;00BF +questiondownsmall;F7BF +questiongreek;037E +questionmonospace;FF1F +questionsmall;F73F +quotedbl;0022 +quotedblbase;201E +quotedblleft;201C +quotedblmonospace;FF02 +quotedblprime;301E +quotedblprimereversed;301D +quotedblright;201D +quoteleft;2018 +quoteleftreversed;201B +quotereversed;201B +quoteright;2019 +quoterightn;0149 +quotesinglbase;201A +quotesingle;0027 +quotesinglemonospace;FF07 +r;0072 +raarmenian;057C +rabengali;09B0 +racute;0155 +radeva;0930 +radical;221A +radicalex;F8E5 +radoverssquare;33AE +radoverssquaredsquare;33AF +radsquare;33AD +rafe;05BF +rafehebrew;05BF +ragujarati;0AB0 +ragurmukhi;0A30 +rahiragana;3089 +rakatakana;30E9 +rakatakanahalfwidth;FF97 +ralowerdiagonalbengali;09F1 +ramiddlediagonalbengali;09F0 +ramshorn;0264 +ratio;2236 +rbopomofo;3116 +rcaron;0159 +rcedilla;0157 +rcircle;24E1 +rcommaaccent;0157 +rdblgrave;0211 +rdotaccent;1E59 +rdotbelow;1E5B +rdotbelowmacron;1E5D +referencemark;203B +reflexsubset;2286 +reflexsuperset;2287 +registered;00AE +registersans;F8E8 +registerserif;F6DA +reharabic;0631 +reharmenian;0580 +rehfinalarabic;FEAE +rehiragana;308C +rehyehaleflamarabic;0631 FEF3 FE8E 0644 +rekatakana;30EC +rekatakanahalfwidth;FF9A +resh;05E8 +reshdageshhebrew;FB48 +reshhatafpatah;05E8 05B2 +reshhatafpatahhebrew;05E8 05B2 +reshhatafsegol;05E8 05B1 +reshhatafsegolhebrew;05E8 05B1 +reshhebrew;05E8 +reshhiriq;05E8 05B4 +reshhiriqhebrew;05E8 05B4 +reshholam;05E8 05B9 +reshholamhebrew;05E8 05B9 +reshpatah;05E8 05B7 +reshpatahhebrew;05E8 05B7 +reshqamats;05E8 05B8 +reshqamatshebrew;05E8 05B8 +reshqubuts;05E8 05BB +reshqubutshebrew;05E8 05BB +reshsegol;05E8 05B6 +reshsegolhebrew;05E8 05B6 +reshsheva;05E8 05B0 +reshshevahebrew;05E8 05B0 +reshtsere;05E8 05B5 +reshtserehebrew;05E8 05B5 +reversedtilde;223D +reviahebrew;0597 +reviamugrashhebrew;0597 +revlogicalnot;2310 +rfishhook;027E +rfishhookreversed;027F +rhabengali;09DD +rhadeva;095D +rho;03C1 +rhook;027D +rhookturned;027B +rhookturnedsuperior;02B5 +rhosymbolgreek;03F1 +rhotichookmod;02DE +rieulacirclekorean;3271 +rieulaparenkorean;3211 +rieulcirclekorean;3263 +rieulhieuhkorean;3140 +rieulkiyeokkorean;313A +rieulkiyeoksioskorean;3169 +rieulkorean;3139 +rieulmieumkorean;313B +rieulpansioskorean;316C +rieulparenkorean;3203 +rieulphieuphkorean;313F +rieulpieupkorean;313C +rieulpieupsioskorean;316B +rieulsioskorean;313D +rieulthieuthkorean;313E +rieultikeutkorean;316A +rieulyeorinhieuhkorean;316D +rightangle;221F +righttackbelowcmb;0319 +righttriangle;22BF +rihiragana;308A +rikatakana;30EA +rikatakanahalfwidth;FF98 +ring;02DA +ringbelowcmb;0325 +ringcmb;030A +ringhalfleft;02BF +ringhalfleftarmenian;0559 +ringhalfleftbelowcmb;031C +ringhalfleftcentered;02D3 +ringhalfright;02BE +ringhalfrightbelowcmb;0339 +ringhalfrightcentered;02D2 +rinvertedbreve;0213 +rittorusquare;3351 +rlinebelow;1E5F +rlongleg;027C +rlonglegturned;027A +rmonospace;FF52 +rohiragana;308D +rokatakana;30ED +rokatakanahalfwidth;FF9B +roruathai;0E23 +rparen;24AD +rrabengali;09DC +rradeva;0931 +rragurmukhi;0A5C +rreharabic;0691 +rrehfinalarabic;FB8D +rrvocalicbengali;09E0 +rrvocalicdeva;0960 +rrvocalicgujarati;0AE0 +rrvocalicvowelsignbengali;09C4 +rrvocalicvowelsigndeva;0944 +rrvocalicvowelsigngujarati;0AC4 +rsuperior;F6F1 +rtblock;2590 +rturned;0279 +rturnedsuperior;02B4 +ruhiragana;308B +rukatakana;30EB +rukatakanahalfwidth;FF99 +rupeemarkbengali;09F2 +rupeesignbengali;09F3 +rupiah;F6DD +ruthai;0E24 +rvocalicbengali;098B +rvocalicdeva;090B +rvocalicgujarati;0A8B +rvocalicvowelsignbengali;09C3 +rvocalicvowelsigndeva;0943 +rvocalicvowelsigngujarati;0AC3 +s;0073 +sabengali;09B8 +sacute;015B +sacutedotaccent;1E65 +sadarabic;0635 +sadeva;0938 +sadfinalarabic;FEBA +sadinitialarabic;FEBB +sadmedialarabic;FEBC +sagujarati;0AB8 +sagurmukhi;0A38 +sahiragana;3055 +sakatakana;30B5 +sakatakanahalfwidth;FF7B +sallallahoualayhewasallamarabic;FDFA +samekh;05E1 +samekhdagesh;FB41 +samekhdageshhebrew;FB41 +samekhhebrew;05E1 +saraaathai;0E32 +saraaethai;0E41 +saraaimaimalaithai;0E44 +saraaimaimuanthai;0E43 +saraamthai;0E33 +saraathai;0E30 +saraethai;0E40 +saraiileftthai;F886 +saraiithai;0E35 +saraileftthai;F885 +saraithai;0E34 +saraothai;0E42 +saraueeleftthai;F888 +saraueethai;0E37 +saraueleftthai;F887 +sarauethai;0E36 +sarauthai;0E38 +sarauuthai;0E39 +sbopomofo;3119 +scaron;0161 +scarondotaccent;1E67 +scedilla;015F +schwa;0259 +schwacyrillic;04D9 +schwadieresiscyrillic;04DB +schwahook;025A +scircle;24E2 +scircumflex;015D +scommaaccent;0219 +sdotaccent;1E61 +sdotbelow;1E63 +sdotbelowdotaccent;1E69 +seagullbelowcmb;033C +second;2033 +secondtonechinese;02CA +section;00A7 +seenarabic;0633 +seenfinalarabic;FEB2 +seeninitialarabic;FEB3 +seenmedialarabic;FEB4 +segol;05B6 +segol13;05B6 +segol1f;05B6 +segol2c;05B6 +segolhebrew;05B6 +segolnarrowhebrew;05B6 +segolquarterhebrew;05B6 +segoltahebrew;0592 +segolwidehebrew;05B6 +seharmenian;057D +sehiragana;305B +sekatakana;30BB +sekatakanahalfwidth;FF7E +semicolon;003B +semicolonarabic;061B +semicolonmonospace;FF1B +semicolonsmall;FE54 +semivoicedmarkkana;309C +semivoicedmarkkanahalfwidth;FF9F +sentisquare;3322 +sentosquare;3323 +seven;0037 +sevenarabic;0667 +sevenbengali;09ED +sevencircle;2466 +sevencircleinversesansserif;2790 +sevendeva;096D +seveneighths;215E +sevengujarati;0AED +sevengurmukhi;0A6D +sevenhackarabic;0667 +sevenhangzhou;3027 +sevenideographicparen;3226 +seveninferior;2087 +sevenmonospace;FF17 +sevenoldstyle;F737 +sevenparen;247A +sevenperiod;248E +sevenpersian;06F7 +sevenroman;2176 +sevensuperior;2077 +seventeencircle;2470 +seventeenparen;2484 +seventeenperiod;2498 +seventhai;0E57 +sfthyphen;00AD +shaarmenian;0577 +shabengali;09B6 +shacyrillic;0448 +shaddaarabic;0651 +shaddadammaarabic;FC61 +shaddadammatanarabic;FC5E +shaddafathaarabic;FC60 +shaddafathatanarabic;0651 064B +shaddakasraarabic;FC62 +shaddakasratanarabic;FC5F +shade;2592 +shadedark;2593 +shadelight;2591 +shademedium;2592 +shadeva;0936 +shagujarati;0AB6 +shagurmukhi;0A36 +shalshelethebrew;0593 +shbopomofo;3115 +shchacyrillic;0449 +sheenarabic;0634 +sheenfinalarabic;FEB6 +sheeninitialarabic;FEB7 +sheenmedialarabic;FEB8 +sheicoptic;03E3 +sheqel;20AA +sheqelhebrew;20AA +sheva;05B0 +sheva115;05B0 +sheva15;05B0 +sheva22;05B0 +sheva2e;05B0 +shevahebrew;05B0 +shevanarrowhebrew;05B0 +shevaquarterhebrew;05B0 +shevawidehebrew;05B0 +shhacyrillic;04BB +shimacoptic;03ED +shin;05E9 +shindagesh;FB49 +shindageshhebrew;FB49 +shindageshshindot;FB2C +shindageshshindothebrew;FB2C +shindageshsindot;FB2D +shindageshsindothebrew;FB2D +shindothebrew;05C1 +shinhebrew;05E9 +shinshindot;FB2A +shinshindothebrew;FB2A +shinsindot;FB2B +shinsindothebrew;FB2B +shook;0282 +sigma;03C3 +sigma1;03C2 +sigmafinal;03C2 +sigmalunatesymbolgreek;03F2 +sihiragana;3057 +sikatakana;30B7 +sikatakanahalfwidth;FF7C +siluqhebrew;05BD +siluqlefthebrew;05BD +similar;223C +sindothebrew;05C2 +siosacirclekorean;3274 +siosaparenkorean;3214 +sioscieuckorean;317E +sioscirclekorean;3266 +sioskiyeokkorean;317A +sioskorean;3145 +siosnieunkorean;317B +siosparenkorean;3206 +siospieupkorean;317D +siostikeutkorean;317C +six;0036 +sixarabic;0666 +sixbengali;09EC +sixcircle;2465 +sixcircleinversesansserif;278F +sixdeva;096C +sixgujarati;0AEC +sixgurmukhi;0A6C +sixhackarabic;0666 +sixhangzhou;3026 +sixideographicparen;3225 +sixinferior;2086 +sixmonospace;FF16 +sixoldstyle;F736 +sixparen;2479 +sixperiod;248D +sixpersian;06F6 +sixroman;2175 +sixsuperior;2076 +sixteencircle;246F +sixteencurrencydenominatorbengali;09F9 +sixteenparen;2483 +sixteenperiod;2497 +sixthai;0E56 +slash;002F +slashmonospace;FF0F +slong;017F +slongdotaccent;1E9B +smileface;263A +smonospace;FF53 +sofpasuqhebrew;05C3 +softhyphen;00AD +softsigncyrillic;044C +sohiragana;305D +sokatakana;30BD +sokatakanahalfwidth;FF7F +soliduslongoverlaycmb;0338 +solidusshortoverlaycmb;0337 +sorusithai;0E29 +sosalathai;0E28 +sosothai;0E0B +sosuathai;0E2A +space;0020 +spacehackarabic;0020 +spade;2660 +spadesuitblack;2660 +spadesuitwhite;2664 +sparen;24AE +squarebelowcmb;033B +squarecc;33C4 +squarecm;339D +squarediagonalcrosshatchfill;25A9 +squarehorizontalfill;25A4 +squarekg;338F +squarekm;339E +squarekmcapital;33CE +squareln;33D1 +squarelog;33D2 +squaremg;338E +squaremil;33D5 +squaremm;339C +squaremsquared;33A1 +squareorthogonalcrosshatchfill;25A6 +squareupperlefttolowerrightfill;25A7 +squareupperrighttolowerleftfill;25A8 +squareverticalfill;25A5 +squarewhitewithsmallblack;25A3 +srsquare;33DB +ssabengali;09B7 +ssadeva;0937 +ssagujarati;0AB7 +ssangcieuckorean;3149 +ssanghieuhkorean;3185 +ssangieungkorean;3180 +ssangkiyeokkorean;3132 +ssangnieunkorean;3165 +ssangpieupkorean;3143 +ssangsioskorean;3146 +ssangtikeutkorean;3138 +ssuperior;F6F2 +sterling;00A3 +sterlingmonospace;FFE1 +strokelongoverlaycmb;0336 +strokeshortoverlaycmb;0335 +subset;2282 +subsetnotequal;228A +subsetorequal;2286 +succeeds;227B +suchthat;220B +suhiragana;3059 +sukatakana;30B9 +sukatakanahalfwidth;FF7D +sukunarabic;0652 +summation;2211 +sun;263C +superset;2283 +supersetnotequal;228B +supersetorequal;2287 +svsquare;33DC +syouwaerasquare;337C +t;0074 +tabengali;09A4 +tackdown;22A4 +tackleft;22A3 +tadeva;0924 +tagujarati;0AA4 +tagurmukhi;0A24 +taharabic;0637 +tahfinalarabic;FEC2 +tahinitialarabic;FEC3 +tahiragana;305F +tahmedialarabic;FEC4 +taisyouerasquare;337D +takatakana;30BF +takatakanahalfwidth;FF80 +tatweelarabic;0640 +tau;03C4 +tav;05EA +tavdages;FB4A +tavdagesh;FB4A +tavdageshhebrew;FB4A +tavhebrew;05EA +tbar;0167 +tbopomofo;310A +tcaron;0165 +tccurl;02A8 +tcedilla;0163 +tcheharabic;0686 +tchehfinalarabic;FB7B +tchehinitialarabic;FB7C +tchehmedialarabic;FB7D +tchehmeeminitialarabic;FB7C FEE4 +tcircle;24E3 +tcircumflexbelow;1E71 +tcommaaccent;0163 +tdieresis;1E97 +tdotaccent;1E6B +tdotbelow;1E6D +tecyrillic;0442 +tedescendercyrillic;04AD +teharabic;062A +tehfinalarabic;FE96 +tehhahinitialarabic;FCA2 +tehhahisolatedarabic;FC0C +tehinitialarabic;FE97 +tehiragana;3066 +tehjeeminitialarabic;FCA1 +tehjeemisolatedarabic;FC0B +tehmarbutaarabic;0629 +tehmarbutafinalarabic;FE94 +tehmedialarabic;FE98 +tehmeeminitialarabic;FCA4 +tehmeemisolatedarabic;FC0E +tehnoonfinalarabic;FC73 +tekatakana;30C6 +tekatakanahalfwidth;FF83 +telephone;2121 +telephoneblack;260E +telishagedolahebrew;05A0 +telishaqetanahebrew;05A9 +tencircle;2469 +tenideographicparen;3229 +tenparen;247D +tenperiod;2491 +tenroman;2179 +tesh;02A7 +tet;05D8 +tetdagesh;FB38 +tetdageshhebrew;FB38 +tethebrew;05D8 +tetsecyrillic;04B5 +tevirhebrew;059B +tevirlefthebrew;059B +thabengali;09A5 +thadeva;0925 +thagujarati;0AA5 +thagurmukhi;0A25 +thalarabic;0630 +thalfinalarabic;FEAC +thanthakhatlowleftthai;F898 +thanthakhatlowrightthai;F897 +thanthakhatthai;0E4C +thanthakhatupperleftthai;F896 +theharabic;062B +thehfinalarabic;FE9A +thehinitialarabic;FE9B +thehmedialarabic;FE9C +thereexists;2203 +therefore;2234 +theta;03B8 +theta1;03D1 +thetasymbolgreek;03D1 +thieuthacirclekorean;3279 +thieuthaparenkorean;3219 +thieuthcirclekorean;326B +thieuthkorean;314C +thieuthparenkorean;320B +thirteencircle;246C +thirteenparen;2480 +thirteenperiod;2494 +thonangmonthothai;0E11 +thook;01AD +thophuthaothai;0E12 +thorn;00FE +thothahanthai;0E17 +thothanthai;0E10 +thothongthai;0E18 +thothungthai;0E16 +thousandcyrillic;0482 +thousandsseparatorarabic;066C +thousandsseparatorpersian;066C +three;0033 +threearabic;0663 +threebengali;09E9 +threecircle;2462 +threecircleinversesansserif;278C +threedeva;0969 +threeeighths;215C +threegujarati;0AE9 +threegurmukhi;0A69 +threehackarabic;0663 +threehangzhou;3023 +threeideographicparen;3222 +threeinferior;2083 +threemonospace;FF13 +threenumeratorbengali;09F6 +threeoldstyle;F733 +threeparen;2476 +threeperiod;248A +threepersian;06F3 +threequarters;00BE +threequartersemdash;F6DE +threeroman;2172 +threesuperior;00B3 +threethai;0E53 +thzsquare;3394 +tihiragana;3061 +tikatakana;30C1 +tikatakanahalfwidth;FF81 +tikeutacirclekorean;3270 +tikeutaparenkorean;3210 +tikeutcirclekorean;3262 +tikeutkorean;3137 +tikeutparenkorean;3202 +tilde;02DC +tildebelowcmb;0330 +tildecmb;0303 +tildecomb;0303 +tildedoublecmb;0360 +tildeoperator;223C +tildeoverlaycmb;0334 +tildeverticalcmb;033E +timescircle;2297 +tipehahebrew;0596 +tipehalefthebrew;0596 +tippigurmukhi;0A70 +titlocyrilliccmb;0483 +tiwnarmenian;057F +tlinebelow;1E6F +tmonospace;FF54 +toarmenian;0569 +tohiragana;3068 +tokatakana;30C8 +tokatakanahalfwidth;FF84 +tonebarextrahighmod;02E5 +tonebarextralowmod;02E9 +tonebarhighmod;02E6 +tonebarlowmod;02E8 +tonebarmidmod;02E7 +tonefive;01BD +tonesix;0185 +tonetwo;01A8 +tonos;0384 +tonsquare;3327 +topatakthai;0E0F +tortoiseshellbracketleft;3014 +tortoiseshellbracketleftsmall;FE5D +tortoiseshellbracketleftvertical;FE39 +tortoiseshellbracketright;3015 +tortoiseshellbracketrightsmall;FE5E +tortoiseshellbracketrightvertical;FE3A +totaothai;0E15 +tpalatalhook;01AB +tparen;24AF +trademark;2122 +trademarksans;F8EA +trademarkserif;F6DB +tretroflexhook;0288 +triagdn;25BC +triaglf;25C4 +triagrt;25BA +triagup;25B2 +ts;02A6 +tsadi;05E6 +tsadidagesh;FB46 +tsadidageshhebrew;FB46 +tsadihebrew;05E6 +tsecyrillic;0446 +tsere;05B5 +tsere12;05B5 +tsere1e;05B5 +tsere2b;05B5 +tserehebrew;05B5 +tserenarrowhebrew;05B5 +tserequarterhebrew;05B5 +tserewidehebrew;05B5 +tshecyrillic;045B +tsuperior;F6F3 +ttabengali;099F +ttadeva;091F +ttagujarati;0A9F +ttagurmukhi;0A1F +tteharabic;0679 +ttehfinalarabic;FB67 +ttehinitialarabic;FB68 +ttehmedialarabic;FB69 +tthabengali;09A0 +tthadeva;0920 +tthagujarati;0AA0 +tthagurmukhi;0A20 +tturned;0287 +tuhiragana;3064 +tukatakana;30C4 +tukatakanahalfwidth;FF82 +tusmallhiragana;3063 +tusmallkatakana;30C3 +tusmallkatakanahalfwidth;FF6F +twelvecircle;246B +twelveparen;247F +twelveperiod;2493 +twelveroman;217B +twentycircle;2473 +twentyhangzhou;5344 +twentyparen;2487 +twentyperiod;249B +two;0032 +twoarabic;0662 +twobengali;09E8 +twocircle;2461 +twocircleinversesansserif;278B +twodeva;0968 +twodotenleader;2025 +twodotleader;2025 +twodotleadervertical;FE30 +twogujarati;0AE8 +twogurmukhi;0A68 +twohackarabic;0662 +twohangzhou;3022 +twoideographicparen;3221 +twoinferior;2082 +twomonospace;FF12 +twonumeratorbengali;09F5 +twooldstyle;F732 +twoparen;2475 +twoperiod;2489 +twopersian;06F2 +tworoman;2171 +twostroke;01BB +twosuperior;00B2 +twothai;0E52 +twothirds;2154 +u;0075 +uacute;00FA +ubar;0289 +ubengali;0989 +ubopomofo;3128 +ubreve;016D +ucaron;01D4 +ucircle;24E4 +ucircumflex;00FB +ucircumflexbelow;1E77 +ucyrillic;0443 +udattadeva;0951 +udblacute;0171 +udblgrave;0215 +udeva;0909 +udieresis;00FC +udieresisacute;01D8 +udieresisbelow;1E73 +udieresiscaron;01DA +udieresiscyrillic;04F1 +udieresisgrave;01DC +udieresismacron;01D6 +udotbelow;1EE5 +ugrave;00F9 +ugujarati;0A89 +ugurmukhi;0A09 +uhiragana;3046 +uhookabove;1EE7 +uhorn;01B0 +uhornacute;1EE9 +uhorndotbelow;1EF1 +uhorngrave;1EEB +uhornhookabove;1EED +uhorntilde;1EEF +uhungarumlaut;0171 +uhungarumlautcyrillic;04F3 +uinvertedbreve;0217 +ukatakana;30A6 +ukatakanahalfwidth;FF73 +ukcyrillic;0479 +ukorean;315C +umacron;016B +umacroncyrillic;04EF +umacrondieresis;1E7B +umatragurmukhi;0A41 +umonospace;FF55 +underscore;005F +underscoredbl;2017 +underscoremonospace;FF3F +underscorevertical;FE33 +underscorewavy;FE4F +union;222A +universal;2200 +uogonek;0173 +uparen;24B0 +upblock;2580 +upperdothebrew;05C4 +upsilon;03C5 +upsilondieresis;03CB +upsilondieresistonos;03B0 +upsilonlatin;028A +upsilontonos;03CD +uptackbelowcmb;031D +uptackmod;02D4 +uragurmukhi;0A73 +uring;016F +ushortcyrillic;045E +usmallhiragana;3045 +usmallkatakana;30A5 +usmallkatakanahalfwidth;FF69 +ustraightcyrillic;04AF +ustraightstrokecyrillic;04B1 +utilde;0169 +utildeacute;1E79 +utildebelow;1E75 +uubengali;098A +uudeva;090A +uugujarati;0A8A +uugurmukhi;0A0A +uumatragurmukhi;0A42 +uuvowelsignbengali;09C2 +uuvowelsigndeva;0942 +uuvowelsigngujarati;0AC2 +uvowelsignbengali;09C1 +uvowelsigndeva;0941 +uvowelsigngujarati;0AC1 +v;0076 +vadeva;0935 +vagujarati;0AB5 +vagurmukhi;0A35 +vakatakana;30F7 +vav;05D5 +vavdagesh;FB35 +vavdagesh65;FB35 +vavdageshhebrew;FB35 +vavhebrew;05D5 +vavholam;FB4B +vavholamhebrew;FB4B +vavvavhebrew;05F0 +vavyodhebrew;05F1 +vcircle;24E5 +vdotbelow;1E7F +vecyrillic;0432 +veharabic;06A4 +vehfinalarabic;FB6B +vehinitialarabic;FB6C +vehmedialarabic;FB6D +vekatakana;30F9 +venus;2640 +verticalbar;007C +verticallineabovecmb;030D +verticallinebelowcmb;0329 +verticallinelowmod;02CC +verticallinemod;02C8 +vewarmenian;057E +vhook;028B +vikatakana;30F8 +viramabengali;09CD +viramadeva;094D +viramagujarati;0ACD +visargabengali;0983 +visargadeva;0903 +visargagujarati;0A83 +vmonospace;FF56 +voarmenian;0578 +voicediterationhiragana;309E +voicediterationkatakana;30FE +voicedmarkkana;309B +voicedmarkkanahalfwidth;FF9E +vokatakana;30FA +vparen;24B1 +vtilde;1E7D +vturned;028C +vuhiragana;3094 +vukatakana;30F4 +w;0077 +wacute;1E83 +waekorean;3159 +wahiragana;308F +wakatakana;30EF +wakatakanahalfwidth;FF9C +wakorean;3158 +wasmallhiragana;308E +wasmallkatakana;30EE +wattosquare;3357 +wavedash;301C +wavyunderscorevertical;FE34 +wawarabic;0648 +wawfinalarabic;FEEE +wawhamzaabovearabic;0624 +wawhamzaabovefinalarabic;FE86 +wbsquare;33DD +wcircle;24E6 +wcircumflex;0175 +wdieresis;1E85 +wdotaccent;1E87 +wdotbelow;1E89 +wehiragana;3091 +weierstrass;2118 +wekatakana;30F1 +wekorean;315E +weokorean;315D +wgrave;1E81 +whitebullet;25E6 +whitecircle;25CB +whitecircleinverse;25D9 +whitecornerbracketleft;300E +whitecornerbracketleftvertical;FE43 +whitecornerbracketright;300F +whitecornerbracketrightvertical;FE44 +whitediamond;25C7 +whitediamondcontainingblacksmalldiamond;25C8 +whitedownpointingsmalltriangle;25BF +whitedownpointingtriangle;25BD +whiteleftpointingsmalltriangle;25C3 +whiteleftpointingtriangle;25C1 +whitelenticularbracketleft;3016 +whitelenticularbracketright;3017 +whiterightpointingsmalltriangle;25B9 +whiterightpointingtriangle;25B7 +whitesmallsquare;25AB +whitesmilingface;263A +whitesquare;25A1 +whitestar;2606 +whitetelephone;260F +whitetortoiseshellbracketleft;3018 +whitetortoiseshellbracketright;3019 +whiteuppointingsmalltriangle;25B5 +whiteuppointingtriangle;25B3 +wihiragana;3090 +wikatakana;30F0 +wikorean;315F +wmonospace;FF57 +wohiragana;3092 +wokatakana;30F2 +wokatakanahalfwidth;FF66 +won;20A9 +wonmonospace;FFE6 +wowaenthai;0E27 +wparen;24B2 +wring;1E98 +wsuperior;02B7 +wturned;028D +wynn;01BF +x;0078 +xabovecmb;033D +xbopomofo;3112 +xcircle;24E7 +xdieresis;1E8D +xdotaccent;1E8B +xeharmenian;056D +xi;03BE +xmonospace;FF58 +xparen;24B3 +xsuperior;02E3 +y;0079 +yaadosquare;334E +yabengali;09AF +yacute;00FD +yadeva;092F +yaekorean;3152 +yagujarati;0AAF +yagurmukhi;0A2F +yahiragana;3084 +yakatakana;30E4 +yakatakanahalfwidth;FF94 +yakorean;3151 +yamakkanthai;0E4E +yasmallhiragana;3083 +yasmallkatakana;30E3 +yasmallkatakanahalfwidth;FF6C +yatcyrillic;0463 +ycircle;24E8 +ycircumflex;0177 +ydieresis;00FF +ydotaccent;1E8F +ydotbelow;1EF5 +yeharabic;064A +yehbarreearabic;06D2 +yehbarreefinalarabic;FBAF +yehfinalarabic;FEF2 +yehhamzaabovearabic;0626 +yehhamzaabovefinalarabic;FE8A +yehhamzaaboveinitialarabic;FE8B +yehhamzaabovemedialarabic;FE8C +yehinitialarabic;FEF3 +yehmedialarabic;FEF4 +yehmeeminitialarabic;FCDD +yehmeemisolatedarabic;FC58 +yehnoonfinalarabic;FC94 +yehthreedotsbelowarabic;06D1 +yekorean;3156 +yen;00A5 +yenmonospace;FFE5 +yeokorean;3155 +yeorinhieuhkorean;3186 +yerahbenyomohebrew;05AA +yerahbenyomolefthebrew;05AA +yericyrillic;044B +yerudieresiscyrillic;04F9 +yesieungkorean;3181 +yesieungpansioskorean;3183 +yesieungsioskorean;3182 +yetivhebrew;059A +ygrave;1EF3 +yhook;01B4 +yhookabove;1EF7 +yiarmenian;0575 +yicyrillic;0457 +yikorean;3162 +yinyang;262F +yiwnarmenian;0582 +ymonospace;FF59 +yod;05D9 +yoddagesh;FB39 +yoddageshhebrew;FB39 +yodhebrew;05D9 +yodyodhebrew;05F2 +yodyodpatahhebrew;FB1F +yohiragana;3088 +yoikorean;3189 +yokatakana;30E8 +yokatakanahalfwidth;FF96 +yokorean;315B +yosmallhiragana;3087 +yosmallkatakana;30E7 +yosmallkatakanahalfwidth;FF6E +yotgreek;03F3 +yoyaekorean;3188 +yoyakorean;3187 +yoyakthai;0E22 +yoyingthai;0E0D +yparen;24B4 +ypogegrammeni;037A +ypogegrammenigreekcmb;0345 +yr;01A6 +yring;1E99 +ysuperior;02B8 +ytilde;1EF9 +yturned;028E +yuhiragana;3086 +yuikorean;318C +yukatakana;30E6 +yukatakanahalfwidth;FF95 +yukorean;3160 +yusbigcyrillic;046B +yusbigiotifiedcyrillic;046D +yuslittlecyrillic;0467 +yuslittleiotifiedcyrillic;0469 +yusmallhiragana;3085 +yusmallkatakana;30E5 +yusmallkatakanahalfwidth;FF6D +yuyekorean;318B +yuyeokorean;318A +yyabengali;09DF +yyadeva;095F +z;007A +zaarmenian;0566 +zacute;017A +zadeva;095B +zagurmukhi;0A5B +zaharabic;0638 +zahfinalarabic;FEC6 +zahinitialarabic;FEC7 +zahiragana;3056 +zahmedialarabic;FEC8 +zainarabic;0632 +zainfinalarabic;FEB0 +zakatakana;30B6 +zaqefgadolhebrew;0595 +zaqefqatanhebrew;0594 +zarqahebrew;0598 +zayin;05D6 +zayindagesh;FB36 +zayindageshhebrew;FB36 +zayinhebrew;05D6 +zbopomofo;3117 +zcaron;017E +zcircle;24E9 +zcircumflex;1E91 +zcurl;0291 +zdot;017C +zdotaccent;017C +zdotbelow;1E93 +zecyrillic;0437 +zedescendercyrillic;0499 +zedieresiscyrillic;04DF +zehiragana;305C +zekatakana;30BC +zero;0030 +zeroarabic;0660 +zerobengali;09E6 +zerodeva;0966 +zerogujarati;0AE6 +zerogurmukhi;0A66 +zerohackarabic;0660 +zeroinferior;2080 +zeromonospace;FF10 +zerooldstyle;F730 +zeropersian;06F0 +zerosuperior;2070 +zerothai;0E50 +zerowidthjoiner;FEFF +zerowidthnonjoiner;200C +zerowidthspace;200B +zeta;03B6 +zhbopomofo;3113 +zhearmenian;056A +zhebrevecyrillic;04C2 +zhecyrillic;0436 +zhedescendercyrillic;0497 +zhedieresiscyrillic;04DD +zihiragana;3058 +zikatakana;30B8 +zinorhebrew;05AE +zlinebelow;1E95 +zmonospace;FF5A +zohiragana;305E +zokatakana;30BE +zparen;24B5 +zretroflexhook;0290 +zstroke;01B6 +zuhiragana;305A +zukatakana;30BA +#--end diff --git a/pdf/mupdf.h b/pdf/mupdf.h new file mode 100644 index 00000000..4184a59f --- /dev/null +++ b/pdf/mupdf.h @@ -0,0 +1,660 @@ +#ifndef _MUPDF_H_ +#define _MUPDF_H_ + +#ifndef _FITZ_H_ +#error "fitz.h must be included before mupdf.h" +#endif + +typedef struct pdf_xref_s pdf_xref; + +void pdf_logxref(char *fmt, ...); +void pdf_logrsrc(char *fmt, ...); +void pdf_logfont(char *fmt, ...); +void pdf_logimage(char *fmt, ...); +void pdf_logshade(char *fmt, ...); +void pdf_logpage(char *fmt, ...); + +/* + * tokenizer and low-level object parser + */ + +enum +{ + PDF_TERROR, PDF_TEOF, + PDF_TOARRAY, PDF_TCARRAY, + PDF_TODICT, PDF_TCDICT, + PDF_TOBRACE, PDF_TCBRACE, + PDF_TNAME, PDF_TINT, PDF_TREAL, PDF_TSTRING, PDF_TKEYWORD, + PDF_TR, PDF_TTRUE, PDF_TFALSE, PDF_TNULL, + PDF_TOBJ, PDF_TENDOBJ, + PDF_TSTREAM, PDF_TENDSTREAM, + PDF_TXREF, PDF_TTRAILER, PDF_TSTARTXREF, + PDF_NTOKENS +}; + +/* lex.c */ +fz_error pdf_lex(int *tok, fz_stream *f, char *buf, int n, int *len); + +/* parse.c */ +fz_error pdf_parsearray(fz_obj **op, pdf_xref *xref, fz_stream *f, char *buf, int cap); +fz_error pdf_parsedict(fz_obj **op, pdf_xref *xref, fz_stream *f, char *buf, int cap); +fz_error pdf_parsestmobj(fz_obj **op, pdf_xref *xref, fz_stream *f, char *buf, int cap); +fz_error pdf_parseindobj(fz_obj **op, pdf_xref *xref, fz_stream *f, char *buf, int cap, int *num, int *gen, int *stmofs); + +fz_rect pdf_torect(fz_obj *array); +fz_matrix pdf_tomatrix(fz_obj *array); +char *pdf_toutf8(fz_obj *src); +unsigned short *pdf_toucs2(fz_obj *src); +fz_obj *pdf_toutf8name(fz_obj *src); + +/* + * Encryption + */ + +/* Permission flag bits */ +#define PDF_PERM_PRINT (1<<2) +#define PDF_PERM_CHANGE (1<<3) +#define PDF_PERM_COPY (1<<4) +#define PDF_PERM_NOTES (1<<5) +#define PDF_PERM_FILL_FORM (1<<8) +#define PDF_PERM_ACCESSIBILITY (1<<9) +#define PDF_PERM_ASSEMBLE (1<<10) +#define PDF_PERM_HIGH_RES_PRINT (1<<11) +#define PDF_DEFAULT_PERM_FLAGS 0xfffc + +typedef struct pdf_crypt_s pdf_crypt; +typedef struct pdf_cryptfilter_s pdf_cryptfilter; + +enum +{ + PDF_CRYPT_NONE, + PDF_CRYPT_RC4, + PDF_CRYPT_AESV2, + PDF_CRYPT_AESV3, + PDF_CRYPT_UNKNOWN, +}; + +struct pdf_cryptfilter_s +{ + int method; + int length; +}; + +struct pdf_crypt_s +{ + unsigned char idstring[32]; + int idlength; + + int v; + int length; + fz_obj *cf; + pdf_cryptfilter stmf; + pdf_cryptfilter strf; + + int r; + unsigned char o[48]; + unsigned char u[48]; + unsigned char oe[32]; + unsigned char ue[32]; + int p; + int encryptmetadata; + + unsigned char key[32]; /* decryption key generated from password */ +}; + +/* crypt.c */ +fz_error pdf_newcrypt(pdf_crypt **cp, fz_obj *enc, fz_obj *id); +void pdf_freecrypt(pdf_crypt *crypt); + +fz_error pdf_parsecryptfilter(pdf_cryptfilter *cf, fz_obj *dict, int defaultlength); +fz_stream *pdf_opencrypt(fz_stream *chain, pdf_crypt *crypt, pdf_cryptfilter *cf, int num, int gen); +void pdf_cryptobj(pdf_crypt *crypt, fz_obj *obj, int num, int gen); + +int pdf_needspassword(pdf_xref *xref); +int pdf_authenticatepassword(pdf_xref *xref, char *pw); + +void pdf_debugcrypt(pdf_crypt *crypt); + +/* + * xref and object / stream api + */ + +typedef struct pdf_xrefentry_s pdf_xrefentry; + +struct pdf_xrefentry_s +{ + int ofs; /* file offset / objstm object number */ + int gen; /* generation / objstm index */ + int stmofs; /* on-disk stream */ + fz_obj *obj; /* stored/cached object */ + int type; /* 0=unset (f)ree i(n)use (o)bjstm */ +}; + +struct pdf_xref_s +{ + fz_stream *file; + int version; + int startxref; + int filesize; + pdf_crypt *crypt; + fz_obj *trailer; + + int len; + pdf_xrefentry *table; + + int pagelen; + int pagecap; + fz_obj **pageobjs; + fz_obj **pagerefs; + + struct pdf_store_s *store; + + char scratch[65536]; +}; + +fz_obj *pdf_resolveindirect(fz_obj *ref); +fz_error pdf_cacheobject(pdf_xref *, int num, int gen); +fz_error pdf_loadobject(fz_obj **objp, pdf_xref *, int num, int gen); +void pdf_updateobject( pdf_xref *xref, int num, int gen, fz_obj *newobj); + +int pdf_isstream(pdf_xref *xref, int num, int gen); +fz_stream *pdf_openinlinestream(fz_stream *chain, pdf_xref *xref, fz_obj *stmobj, int length); +fz_error pdf_loadrawstream(fz_buffer **bufp, pdf_xref *xref, int num, int gen); +fz_error pdf_loadstream(fz_buffer **bufp, pdf_xref *xref, int num, int gen); +fz_error pdf_openrawstream(fz_stream **stmp, pdf_xref *, int num, int gen); +fz_error pdf_openstream(fz_stream **stmp, pdf_xref *, int num, int gen); +fz_error pdf_openstreamat(fz_stream **stmp, pdf_xref *xref, int num, int gen, fz_obj *dict, int stmofs); + +fz_error pdf_openxrefwithstream(pdf_xref **xrefp, fz_stream *file, char *password); +fz_error pdf_openxref(pdf_xref **xrefp, char *filename, char *password); +void pdf_freexref(pdf_xref *); + +/* private */ +fz_error pdf_repairxref(pdf_xref *xref, char *buf, int bufsize); +fz_error pdf_repairobjstms(pdf_xref *xref); +void pdf_debugxref(pdf_xref *); +void pdf_resizexref(pdf_xref *xref, int newcap); + +/* + * Resource store + */ + +typedef struct pdf_store_s pdf_store; + +pdf_store *pdf_newstore(void); +void pdf_freestore(pdf_store *store); +void pdf_debugstore(pdf_store *store); + +void pdf_storeitem(pdf_store *store, void *keepfn, void *dropfn, fz_obj *key, void *val); +void *pdf_finditem(pdf_store *store, void *dropfn, fz_obj *key); +void pdf_removeitem(pdf_store *store, void *dropfn, fz_obj *key); +void pdf_agestore(pdf_store *store, int maxage); + +/* + * Functions + */ + +typedef struct pdf_function_s pdf_function; + +fz_error pdf_loadfunction(pdf_function **func, pdf_xref *xref, fz_obj *ref); +void pdf_evalfunction(pdf_function *func, float *in, int inlen, float *out, int outlen); +pdf_function *pdf_keepfunction(pdf_function *func); +void pdf_dropfunction(pdf_function *func); + +/* + * Colorspace + */ + +fz_error pdf_loadcolorspace(fz_colorspace **csp, pdf_xref *xref, fz_obj *obj); +fz_pixmap *pdf_expandindexedpixmap(fz_pixmap *src); + +/* + * Pattern + */ + +typedef struct pdf_pattern_s pdf_pattern; + +struct pdf_pattern_s +{ + int refs; + int ismask; + float xstep; + float ystep; + fz_matrix matrix; + fz_rect bbox; + fz_obj *resources; + fz_buffer *contents; +}; + +fz_error pdf_loadpattern(pdf_pattern **patp, pdf_xref *xref, fz_obj *obj); +pdf_pattern *pdf_keeppattern(pdf_pattern *pat); +void pdf_droppattern(pdf_pattern *pat); + +/* + * Shading + */ + +fz_error pdf_loadshading(fz_shade **shadep, pdf_xref *xref, fz_obj *obj); + +/* + * XObject + */ + +typedef struct pdf_xobject_s pdf_xobject; + +struct pdf_xobject_s +{ + int refs; + fz_matrix matrix; + fz_rect bbox; + int isolated; + int knockout; + int transparency; + fz_colorspace *colorspace; + fz_obj *resources; + fz_buffer *contents; +}; + +fz_error pdf_loadxobject(pdf_xobject **xobjp, pdf_xref *xref, fz_obj *obj); +pdf_xobject *pdf_keepxobject(pdf_xobject *xobj); +void pdf_dropxobject(pdf_xobject *xobj); + +/* + * Image + */ + +fz_error pdf_loadinlineimage(fz_pixmap **imgp, pdf_xref *xref, fz_obj *rdb, fz_obj *dict, fz_stream *file); +fz_error pdf_loadimage(fz_pixmap **imgp, pdf_xref *xref, fz_obj *obj); +int pdf_isjpximage(fz_obj *dict); + +/* + * CMap + */ + +typedef struct pdf_cmap_s pdf_cmap; +typedef struct pdf_range_s pdf_range; + +enum { PDF_CMAP_SINGLE, PDF_CMAP_RANGE, PDF_CMAP_TABLE, PDF_CMAP_MULTI }; + +struct pdf_range_s +{ + unsigned short low; + /* Next, we pack 2 fields into the same unsigned short. Top 14 bits + * are the extent, bottom 2 bits are flags: single, range, table, + * multi */ + unsigned short extentflags; + unsigned short offset; /* range-delta or table-index */ +}; + +struct pdf_cmap_s +{ + int refs; + char cmapname[32]; + + char usecmapname[32]; + pdf_cmap *usecmap; + + int wmode; + + int ncspace; + struct + { + unsigned short n; + unsigned short low; + unsigned short high; + } cspace[40]; + + int rlen, rcap; + pdf_range *ranges; + + int tlen, tcap; + unsigned short *table; +}; + +extern pdf_cmap *pdf_cmaptable[]; /* list of builtin system cmaps */ + +pdf_cmap *pdf_newcmap(void); +pdf_cmap *pdf_keepcmap(pdf_cmap *cmap); +void pdf_dropcmap(pdf_cmap *cmap); + +void pdf_debugcmap(pdf_cmap *cmap); +int pdf_getwmode(pdf_cmap *cmap); +void pdf_setwmode(pdf_cmap *cmap, int wmode); +void pdf_setusecmap(pdf_cmap *cmap, pdf_cmap *usecmap); + +void pdf_addcodespace(pdf_cmap *cmap, int low, int high, int n); +void pdf_maprangetotable(pdf_cmap *cmap, int low, int *map, int len); +void pdf_maprangetorange(pdf_cmap *cmap, int srclo, int srchi, int dstlo); +void pdf_maponetomany(pdf_cmap *cmap, int one, int *many, int len); +void pdf_sortcmap(pdf_cmap *cmap); + +int pdf_lookupcmap(pdf_cmap *cmap, int cpt); +int pdf_lookupcmapfull(pdf_cmap *cmap, int cpt, int *out); +unsigned char *pdf_decodecmap(pdf_cmap *cmap, unsigned char *s, int *cpt); + +pdf_cmap *pdf_newidentitycmap(int wmode, int bytes); +fz_error pdf_parsecmap(pdf_cmap **cmapp, fz_stream *file); +fz_error pdf_loadembeddedcmap(pdf_cmap **cmapp, pdf_xref *xref, fz_obj *ref); +fz_error pdf_loadsystemcmap(pdf_cmap **cmapp, char *name); + +/* + * Font + */ + +void pdf_loadencoding(char **estrings, char *encoding); +int pdf_lookupagl(char *name); +char **pdf_lookupaglnames(int ucs); + +extern const unsigned short pdf_docencoding[256]; +extern const char * const pdf_macroman[256]; +extern const char * const pdf_macexpert[256]; +extern const char * const pdf_winansi[256]; +extern const char * const pdf_standard[256]; +extern const char * const pdf_expert[256]; +extern const char * const pdf_symbol[256]; +extern const char * const pdf_zapfdingbats[256]; + +typedef struct pdf_fontdesc_s pdf_fontdesc; +typedef struct pdf_hmtx_s pdf_hmtx; +typedef struct pdf_vmtx_s pdf_vmtx; + +struct pdf_hmtx_s +{ + unsigned short lo; + unsigned short hi; + int w; /* type3 fonts can be big! */ +}; + +struct pdf_vmtx_s +{ + unsigned short lo; + unsigned short hi; + short x; + short y; + short w; +}; + +struct pdf_fontdesc_s +{ + int refs; + + fz_font *font; + + /* FontDescriptor */ + int flags; + float italicangle; + float ascent; + float descent; + float capheight; + float xheight; + float missingwidth; + + /* Encoding (CMap) */ + pdf_cmap *encoding; + pdf_cmap *tottfcmap; + int ncidtogid; + unsigned short *cidtogid; + + /* ToUnicode */ + pdf_cmap *tounicode; + int ncidtoucs; + unsigned short *cidtoucs; + + /* Metrics (given in the PDF file) */ + int wmode; + + int nhmtx, hmtxcap; + pdf_hmtx dhmtx; + pdf_hmtx *hmtx; + + int nvmtx, vmtxcap; + pdf_vmtx dvmtx; + pdf_vmtx *vmtx; + + int isembedded; +}; + +/* fontmtx.c */ +void pdf_setfontwmode(pdf_fontdesc *font, int wmode); +void pdf_setdefaulthmtx(pdf_fontdesc *font, int w); +void pdf_setdefaultvmtx(pdf_fontdesc *font, int y, int w); +void pdf_addhmtx(pdf_fontdesc *font, int lo, int hi, int w); +void pdf_addvmtx(pdf_fontdesc *font, int lo, int hi, int x, int y, int w); +void pdf_endhmtx(pdf_fontdesc *font); +void pdf_endvmtx(pdf_fontdesc *font); +pdf_hmtx pdf_gethmtx(pdf_fontdesc *font, int cid); +pdf_vmtx pdf_getvmtx(pdf_fontdesc *font, int cid); + +/* unicode.c */ +fz_error pdf_loadtounicode(pdf_fontdesc *font, pdf_xref *xref, char **strings, char *collection, fz_obj *cmapstm); + +/* fontfile.c */ +fz_error pdf_loadbuiltinfont(pdf_fontdesc *font, char *basefont); +fz_error pdf_loadembeddedfont(pdf_fontdesc *font, pdf_xref *xref, fz_obj *stmref); +fz_error pdf_loadsystemfont(pdf_fontdesc *font, char *basefont, char *collection); + +/* type3.c */ +fz_error pdf_loadtype3font(pdf_fontdesc **fontp, pdf_xref *xref, fz_obj *rdb, fz_obj *obj); + +/* font.c */ +int pdf_fontcidtogid(pdf_fontdesc *fontdesc, int cid); +fz_error pdf_loadfontdescriptor(pdf_fontdesc *font, pdf_xref *xref, fz_obj *desc, char *collection, char *basefont); +fz_error pdf_loadfont(pdf_fontdesc **fontp, pdf_xref *xref, fz_obj *rdb, fz_obj *obj); +pdf_fontdesc *pdf_newfontdesc(void); +pdf_fontdesc *pdf_keepfont(pdf_fontdesc *fontdesc); +void pdf_dropfont(pdf_fontdesc *font); +void pdf_debugfont(pdf_fontdesc *fontdesc); + +/* + * Interactive features + */ + +typedef struct pdf_link_s pdf_link; +typedef struct pdf_annot_s pdf_annot; +typedef struct pdf_outline_s pdf_outline; + +typedef enum pdf_linkkind_e +{ + PDF_LGOTO = 0, + PDF_LURI, + PDF_LLAUNCH, + PDF_LNAMED, + PDF_LACTION, +} pdf_linkkind; + +struct pdf_link_s +{ + pdf_linkkind kind; + fz_rect rect; + fz_obj *dest; + pdf_link *next; +}; + +struct pdf_annot_s +{ + fz_obj *obj; + fz_rect rect; + pdf_xobject *ap; + fz_matrix matrix; + pdf_annot *next; +}; + +struct pdf_outline_s +{ + char *title; + pdf_link *link; + int count; + pdf_outline *child; + pdf_outline *next; +}; + +fz_obj *pdf_lookupdest(pdf_xref *xref, fz_obj *needle); +fz_obj *pdf_lookupname(pdf_xref *xref, char *which, fz_obj *needle); +fz_obj *pdf_loadnametree(pdf_xref *xref, char *which); + +pdf_outline *pdf_loadoutline(pdf_xref *xref); +void pdf_debugoutline(pdf_outline *outline, int level); +void pdf_freeoutline(pdf_outline *outline); + +pdf_link *pdf_loadlink(pdf_xref *xref, fz_obj *dict); +void pdf_loadlinks(pdf_link **, pdf_xref *, fz_obj *annots); +void pdf_freelink(pdf_link *link); + +void pdf_loadannots(pdf_annot **, pdf_xref *, fz_obj *annots); +void pdf_freeannot(pdf_annot *link); + +/* + * Page tree, pages and related objects + */ + +typedef struct pdf_page_s pdf_page; + +struct pdf_page_s +{ + fz_rect mediabox; + int rotate; + int transparency; + fz_obj *resources; + fz_buffer *contents; + pdf_link *links; + pdf_annot *annots; +}; + +/* pagetree.c */ +fz_error pdf_loadpagetree(pdf_xref *xref); +int pdf_getpagecount(pdf_xref *xref); +fz_obj *pdf_getpageobject(pdf_xref *xref, int p); +fz_obj *pdf_getpageref(pdf_xref *xref, int p); +int pdf_findpageobject(pdf_xref *xref, fz_obj *pageobj); + +/* page.c */ +fz_error pdf_loadpage(pdf_page **pagep, pdf_xref *xref, fz_obj *ref); +void pdf_freepage(pdf_page *page); + +/* + * content stream parsing + */ + +typedef struct pdf_material_s pdf_material; +typedef struct pdf_gstate_s pdf_gstate; +typedef struct pdf_csi_s pdf_csi; + +enum +{ + PDF_MFILL, + PDF_MSTROKE, +}; + +enum +{ + PDF_MNONE, + PDF_MCOLOR, + PDF_MPATTERN, + PDF_MSHADE, +}; + +struct pdf_material_s +{ + int kind; + fz_colorspace *colorspace; + pdf_pattern *pattern; + fz_shade *shade; + float alpha; + float v[32]; +}; + +struct pdf_gstate_s +{ + fz_matrix ctm; + int clipdepth; + + /* path stroking */ + fz_strokestate strokestate; + + /* materials */ + pdf_material stroke; + pdf_material fill; + + /* text state */ + float charspace; + float wordspace; + float scale; + float leading; + pdf_fontdesc *font; + float size; + int render; + float rise; + + /* transparency */ + fz_blendmode blendmode; + pdf_xobject *softmask; + fz_matrix softmaskctm; + float softmaskbc[FZ_MAXCOLORS]; + int luminosity; +}; + +struct pdf_csi_s +{ + fz_device *dev; + pdf_xref *xref; + + /* usage mode for optional content groups */ + char *target; /* "View", "Print", "Export" */ + + /* interpreter stack */ + fz_obj *obj; + char name[256]; + unsigned char string[256]; + int stringlen; + float stack[32]; + int top; + + int xbalance; + int intext; + int inarray; + + /* path object state */ + fz_path *path; + int clip; + int clipevenodd; + + /* text object state */ + fz_text *text; + fz_matrix tlm; + fz_matrix tm; + int textmode; + int accumulate; + + /* graphics state */ + fz_matrix topctm; + pdf_gstate gstate[64]; + int gtop; +}; + +/* build.c */ +void pdf_initgstate(pdf_gstate *gs, fz_matrix ctm); +void pdf_setcolorspace(pdf_csi *csi, int what, fz_colorspace *cs); +void pdf_setcolor(pdf_csi *csi, int what, float *v); +void pdf_setpattern(pdf_csi *csi, int what, pdf_pattern *pat, float *v); +void pdf_setshade(pdf_csi *csi, int what, fz_shade *shade); +void pdf_showpath(pdf_csi*, int close, int fill, int stroke, int evenodd); +void pdf_showspace(pdf_csi *csi, float tadj); +void pdf_showstring(pdf_csi *csi, unsigned char *buf, int len); +void pdf_showtext(pdf_csi*, fz_obj *text); +void pdf_flushtext(pdf_csi*); +void pdf_showimage(pdf_csi*, fz_pixmap *image); +void pdf_showshade(pdf_csi*, fz_shade *shade); + +/* interpret.c */ +void pdf_gsave(pdf_csi *csi); +void pdf_grestore(pdf_csi *csi); +fz_error pdf_runcsibuffer(pdf_csi *csi, fz_obj *rdb, fz_buffer *contents); +fz_error pdf_runxobject(pdf_csi *csi, fz_obj *resources, pdf_xobject *xobj, fz_matrix transform); +fz_error pdf_runpagewithtarget(pdf_xref *xref, pdf_page *page, fz_device *dev, fz_matrix ctm, char *target); +fz_error pdf_runpage(pdf_xref *xref, pdf_page *page, fz_device *dev, fz_matrix ctm); +fz_error pdf_runglyph(pdf_xref *xref, fz_obj *resources, fz_buffer *contents, fz_device *dev, fz_matrix ctm); + +pdf_material *pdf_keepmaterial(pdf_material *mat); +pdf_material *pdf_dropmaterial(pdf_material *mat); + +#endif diff --git a/pdf/pdf_annot.c b/pdf/pdf_annot.c new file mode 100644 index 00000000..c2c67b62 --- /dev/null +++ b/pdf/pdf_annot.c @@ -0,0 +1,252 @@ +#include "fitz.h" +#include "mupdf.h" + +void +pdf_freelink(pdf_link *link) +{ + if (link->next) + pdf_freelink(link->next); + if (link->dest) + fz_dropobj(link->dest); + fz_free(link); +} + +static fz_obj * +resolvedest(pdf_xref *xref, fz_obj *dest) +{ + if (fz_isname(dest) || fz_isstring(dest)) + { + dest = pdf_lookupdest(xref, dest); + return resolvedest(xref, dest); + } + + else if (fz_isarray(dest)) + { + return dest; + } + + else if (fz_isdict(dest)) + { + dest = fz_dictgets(dest, "D"); + return resolvedest(xref, dest); + } + + else if (fz_isindirect(dest)) + return dest; + + return nil; +} + +pdf_link * +pdf_loadlink(pdf_xref *xref, fz_obj *dict) +{ + fz_obj *dest; + fz_obj *action; + fz_obj *obj; + fz_rect bbox; + pdf_linkkind kind; + + pdf_logpage("load link {\n"); + + dest = nil; + + obj = fz_dictgets(dict, "Rect"); + if (obj) + { + bbox = pdf_torect(obj); + pdf_logpage("rect [%g %g %g %g]\n", + bbox.x0, bbox.y0, + bbox.x1, bbox.y1); + } + else + bbox = fz_emptyrect; + + obj = fz_dictgets(dict, "Dest"); + if (obj) + { + kind = PDF_LGOTO; + dest = resolvedest(xref, obj); + pdf_logpage("dest (%d %d R)\n", fz_tonum(dest), fz_togen(dest)); + } + + action = fz_dictgets(dict, "A"); + if (action) + { + obj = fz_dictgets(action, "S"); + if (fz_isname(obj) && !strcmp(fz_toname(obj), "GoTo")) + { + kind = PDF_LGOTO; + dest = resolvedest(xref, fz_dictgets(action, "D")); + pdf_logpage("action goto (%d %d R)\n", fz_tonum(dest), fz_togen(dest)); + } + else if (fz_isname(obj) && !strcmp(fz_toname(obj), "URI")) + { + kind = PDF_LURI; + dest = fz_dictgets(action, "URI"); + pdf_logpage("action uri %s\n", fz_tostrbuf(dest)); + } + else if (fz_isname(obj) && !strcmp(fz_toname(obj), "Launch")) + { + kind = PDF_LLAUNCH; + dest = fz_dictgets(action, "F"); + pdf_logpage("action %s (%d %d R)\n", fz_toname(obj), fz_tonum(dest), fz_togen(dest)); + } + else if (fz_isname(obj) && !strcmp(fz_toname(obj), "Named")) + { + kind = PDF_LNAMED; + dest = fz_dictgets(action, "N"); + pdf_logpage("action %s (%d %d R)\n", fz_toname(obj), fz_tonum(dest), fz_togen(dest)); + } + else if (fz_isname(obj) && (!strcmp(fz_toname(obj), "GoToR"))) + { + kind = PDF_LACTION; + dest = action; + pdf_logpage("action %s (%d %d R)\n", fz_toname(obj), fz_tonum(dest), fz_togen(dest)); + } + else + { + pdf_logpage("unhandled link action, ignoring link\n"); + dest = nil; + } + } + + pdf_logpage("}\n"); + + if (dest) + { + pdf_link *link = fz_malloc(sizeof(pdf_link)); + link->kind = kind; + link->rect = bbox; + link->dest = fz_keepobj(dest); + link->next = nil; + return link; + } + + return nil; +} + +void +pdf_loadlinks(pdf_link **linkp, pdf_xref *xref, fz_obj *annots) +{ + pdf_link *link, *head, *tail; + fz_obj *obj; + int i; + + head = tail = nil; + link = nil; + + pdf_logpage("load link annotations {\n"); + + for (i = 0; i < fz_arraylen(annots); i++) + { + obj = fz_arrayget(annots, i); + link = pdf_loadlink(xref, obj); + if (link) + { + if (!head) + head = tail = link; + else + { + tail->next = link; + tail = link; + } + } + } + + pdf_logpage("}\n"); + + *linkp = head; +} + +void +pdf_freeannot(pdf_annot *annot) +{ + if (annot->next) + pdf_freeannot(annot->next); + if (annot->ap) + pdf_dropxobject(annot->ap); + if (annot->obj) + fz_dropobj(annot->obj); + fz_free(annot); +} + +static void +pdf_transformannot(pdf_annot *annot) +{ + fz_matrix matrix = annot->ap->matrix; + fz_rect bbox = annot->ap->bbox; + fz_rect rect = annot->rect; + float w, h, x, y; + + bbox = fz_transformrect(matrix, bbox); + w = (rect.x1 - rect.x0) / (bbox.x1 - bbox.x0); + h = (rect.y1 - rect.y0) / (bbox.y1 - bbox.y0); + x = rect.x0 - bbox.x0; + y = rect.y0 - bbox.y0; + annot->matrix = fz_concat(fz_scale(w, h), fz_translate(x, y)); +} + +void +pdf_loadannots(pdf_annot **annotp, pdf_xref *xref, fz_obj *annots) +{ + pdf_annot *annot, *head, *tail; + fz_obj *obj, *ap, *as, *n, *rect; + pdf_xobject *form; + fz_error error; + int i; + + head = tail = nil; + annot = nil; + + pdf_logpage("load appearance annotations {\n"); + + for (i = 0; i < fz_arraylen(annots); i++) + { + obj = fz_arrayget(annots, i); + + rect = fz_dictgets(obj, "Rect"); + ap = fz_dictgets(obj, "AP"); + as = fz_dictgets(obj, "AS"); + if (fz_isdict(ap)) + { + n = fz_dictgets(ap, "N"); /* normal state */ + + /* lookup current state in sub-dictionary */ + if (!pdf_isstream(xref, fz_tonum(n), fz_togen(n))) + n = fz_dictget(n, as); + + if (pdf_isstream(xref, fz_tonum(n), fz_togen(n))) + { + error = pdf_loadxobject(&form, xref, n); + if (error) + { + fz_catch(error, "ignoring broken annotation"); + continue; + } + + annot = fz_malloc(sizeof(pdf_annot)); + annot->obj = fz_keepobj(obj); + annot->rect = pdf_torect(rect); + annot->ap = form; + annot->next = nil; + + pdf_transformannot(annot); + + if (annot) + { + if (!head) + head = tail = annot; + else + { + tail->next = annot; + tail = annot; + } + } + } + } + } + + pdf_logpage("}\n"); + + *annotp = head; +} diff --git a/pdf/pdf_build.c b/pdf/pdf_build.c new file mode 100644 index 00000000..616f1c86 --- /dev/null +++ b/pdf/pdf_build.c @@ -0,0 +1,718 @@ +#include "fitz.h" +#include "mupdf.h" + +#define TILE + +void +pdf_initgstate(pdf_gstate *gs, fz_matrix ctm) +{ + gs->ctm = ctm; + gs->clipdepth = 0; + + gs->strokestate.linewidth = 1; + gs->strokestate.linecap = 0; + gs->strokestate.linejoin = 0; + gs->strokestate.miterlimit = 10; + gs->strokestate.dashphase = 0; + gs->strokestate.dashlen = 0; + memset(gs->strokestate.dashlist, 0, sizeof(gs->strokestate.dashlist)); + + gs->stroke.kind = PDF_MCOLOR; + gs->stroke.colorspace = fz_keepcolorspace(fz_devicegray); + gs->stroke.v[0] = 0; + gs->stroke.pattern = nil; + gs->stroke.shade = nil; + gs->stroke.alpha = 1; + + gs->fill.kind = PDF_MCOLOR; + gs->fill.colorspace = fz_keepcolorspace(fz_devicegray); + gs->fill.v[0] = 0; + gs->fill.pattern = nil; + gs->fill.shade = nil; + gs->fill.alpha = 1; + + gs->charspace = 0; + gs->wordspace = 0; + gs->scale = 1; + gs->leading = 0; + gs->font = nil; + gs->size = -1; + gs->render = 0; + gs->rise = 0; + + gs->blendmode = FZ_BNORMAL; + gs->softmask = nil; + gs->softmaskctm = fz_identity; + gs->luminosity = 0; +} + +void +pdf_setcolorspace(pdf_csi *csi, int what, fz_colorspace *colorspace) +{ + pdf_gstate *gs = csi->gstate + csi->gtop; + pdf_material *mat; + + pdf_flushtext(csi); + + mat = what == PDF_MFILL ? &gs->fill : &gs->stroke; + + fz_dropcolorspace(mat->colorspace); + + mat->kind = PDF_MCOLOR; + mat->colorspace = fz_keepcolorspace(colorspace); + + mat->v[0] = 0; + mat->v[1] = 0; + mat->v[2] = 0; + mat->v[3] = 1; +} + +void +pdf_setcolor(pdf_csi *csi, int what, float *v) +{ + pdf_gstate *gs = csi->gstate + csi->gtop; + pdf_material *mat; + int i; + + pdf_flushtext(csi); + + mat = what == PDF_MFILL ? &gs->fill : &gs->stroke; + + switch (mat->kind) + { + case PDF_MPATTERN: + case PDF_MCOLOR: + if (!strcmp(mat->colorspace->name, "Lab")) + { + mat->v[0] = v[0] / 100; + mat->v[1] = (v[1] + 100) / 200; + mat->v[2] = (v[2] + 100) / 200; + } + for (i = 0; i < mat->colorspace->n; i++) + mat->v[i] = v[i]; + break; + default: + fz_warn("color incompatible with material"); + } +} + +static void +pdf_unsetpattern(pdf_csi *csi, int what) +{ + pdf_gstate *gs = csi->gstate + csi->gtop; + pdf_material *mat; + mat = what == PDF_MFILL ? &gs->fill : &gs->stroke; + if (mat->kind == PDF_MPATTERN) + { + if (mat->pattern) + pdf_droppattern(mat->pattern); + mat->pattern = nil; + mat->kind = PDF_MCOLOR; + } +} + +void +pdf_setpattern(pdf_csi *csi, int what, pdf_pattern *pat, float *v) +{ + pdf_gstate *gs = csi->gstate + csi->gtop; + pdf_material *mat; + + pdf_flushtext(csi); + + mat = what == PDF_MFILL ? &gs->fill : &gs->stroke; + + if (mat->pattern) + pdf_droppattern(mat->pattern); + + mat->kind = PDF_MPATTERN; + if (pat) + mat->pattern = pdf_keeppattern(pat); + else + mat->pattern = nil; + + if (v) + pdf_setcolor(csi, what, v); +} + +void +pdf_setshade(pdf_csi *csi, int what, fz_shade *shade) +{ + pdf_gstate *gs = csi->gstate + csi->gtop; + pdf_material *mat; + + pdf_flushtext(csi); + + mat = what == PDF_MFILL ? &gs->fill : &gs->stroke; + + if (mat->shade) + fz_dropshade(mat->shade); + + mat->kind = PDF_MSHADE; + mat->shade = fz_keepshade(shade); +} + +static void +pdf_showpattern(pdf_csi *csi, pdf_pattern *pat, fz_rect area, int what) +{ + pdf_gstate *gstate; + fz_matrix ptm, invptm; + fz_matrix oldtopctm; + fz_error error; + int x0, y0, x1, y1; + int oldtop; + + pdf_gsave(csi); + gstate = csi->gstate + csi->gtop; + + if (pat->ismask) + { + pdf_unsetpattern(csi, PDF_MFILL); + pdf_unsetpattern(csi, PDF_MSTROKE); + if (what == PDF_MFILL) + { + pdf_dropmaterial(&gstate->stroke); + pdf_keepmaterial(&gstate->fill); + gstate->stroke = gstate->fill; + } + if (what == PDF_MSTROKE) + { + pdf_dropmaterial(&gstate->fill); + pdf_keepmaterial(&gstate->stroke); + gstate->fill = gstate->stroke; + } + } + else + { + // TODO: unset only the current fill/stroke or both? + pdf_unsetpattern(csi, what); + } + + /* don't apply softmasks to objects in the pattern as well */ + if (gstate->softmask) + { + pdf_dropxobject(gstate->softmask); + gstate->softmask = nil; + } + + ptm = fz_concat(pat->matrix, csi->topctm); + invptm = fz_invertmatrix(ptm); + + /* patterns are painted using the ctm in effect at the beginning of the content stream */ + /* get bbox of shape in pattern space for stamping */ + area = fz_transformrect(invptm, area); + x0 = floorf(area.x0 / pat->xstep); + y0 = floorf(area.y0 / pat->ystep); + x1 = ceilf(area.x1 / pat->xstep); + y1 = ceilf(area.y1 / pat->ystep); + + oldtopctm = csi->topctm; + oldtop = csi->gtop; + +#ifdef TILE + if ((x1 - x0) * (y1 - y0) > 0) + { + csi->dev->begintile(csi->dev->user, area, pat->bbox, pat->xstep, pat->ystep, ptm); + gstate->ctm = ptm; + csi->topctm = gstate->ctm; + error = pdf_runcsibuffer(csi, pat->resources, pat->contents); + if (error) + fz_catch(error, "cannot render pattern tile"); + while (oldtop < csi->gtop) + pdf_grestore(csi); + csi->dev->endtile(csi->dev->user); + } +#else + { + int x, y; + for (y = y0; y < y1; y++) + { + for (x = x0; x < x1; x++) + { + gstate->ctm = fz_concat(fz_translate(x * pat->xstep, y * pat->ystep), ptm); + csi->topctm = gstate->ctm; + error = pdf_runcsibuffer(csi, pat->resources, pat->contents); + while (oldtop < csi->gtop) + pdf_grestore(csi); + if (error) + { + fz_catch(error, "cannot render pattern tile"); + goto cleanup; + } + } + } + } +cleanup: +#endif + + csi->topctm = oldtopctm; + + pdf_grestore(csi); +} + +static void +pdf_begingroup(pdf_csi *csi, fz_rect bbox) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + fz_error error; + + if (gstate->softmask) + { + pdf_xobject *softmask = gstate->softmask; + fz_rect bbox = fz_transformrect(gstate->ctm, softmask->bbox); + + gstate->softmask = nil; + + csi->dev->beginmask(csi->dev->user, bbox, gstate->luminosity, + softmask->colorspace, gstate->softmaskbc); + error = pdf_runxobject(csi, nil, softmask, fz_identity); + if (error) + fz_catch(error, "cannot run softmask"); + csi->dev->endmask(csi->dev->user); + + gstate->softmask = softmask; + } + + if (gstate->blendmode != FZ_BNORMAL) + csi->dev->begingroup(csi->dev->user, bbox, 0, 0, gstate->blendmode, 1); +} + +static void +pdf_endgroup(pdf_csi *csi) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + + if (gstate->blendmode != FZ_BNORMAL) + csi->dev->endgroup(csi->dev->user); + + if (gstate->softmask) + csi->dev->popclip(csi->dev->user); +} + +void +pdf_showshade(pdf_csi *csi, fz_shade *shd) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + fz_rect bbox; + + bbox = fz_boundshade(shd, gstate->ctm); + + pdf_begingroup(csi, bbox); + + csi->dev->fillshade(csi->dev->user, shd, gstate->ctm, gstate->fill.alpha); + + pdf_endgroup(csi); +} + +void +pdf_showimage(pdf_csi *csi, fz_pixmap *image) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + fz_rect bbox; + + bbox = fz_transformrect(gstate->ctm, fz_unitrect); + + if (image->mask) + { + /* apply blend group even though we skip the softmask */ + if (gstate->blendmode != FZ_BNORMAL) + csi->dev->begingroup(csi->dev->user, bbox, 0, 0, gstate->blendmode, 1); + csi->dev->clipimagemask(csi->dev->user, image->mask, gstate->ctm); + } + else + pdf_begingroup(csi, bbox); + + if (!image->colorspace) + { + + switch (gstate->fill.kind) + { + case PDF_MNONE: + break; + case PDF_MCOLOR: + csi->dev->fillimagemask(csi->dev->user, image, gstate->ctm, + gstate->fill.colorspace, gstate->fill.v, gstate->fill.alpha); + break; + case PDF_MPATTERN: + if (gstate->fill.pattern) + { + csi->dev->clipimagemask(csi->dev->user, image, gstate->ctm); + pdf_showpattern(csi, gstate->fill.pattern, bbox, PDF_MFILL); + csi->dev->popclip(csi->dev->user); + } + break; + case PDF_MSHADE: + if (gstate->fill.shade) + { + csi->dev->clipimagemask(csi->dev->user, image, gstate->ctm); + csi->dev->fillshade(csi->dev->user, gstate->fill.shade, gstate->ctm, gstate->fill.alpha); + csi->dev->popclip(csi->dev->user); + } + break; + } + } + else + { + csi->dev->fillimage(csi->dev->user, image, gstate->ctm, gstate->fill.alpha); + } + + if (image->mask) + { + csi->dev->popclip(csi->dev->user); + if (gstate->blendmode != FZ_BNORMAL) + csi->dev->endgroup(csi->dev->user); + } + else + pdf_endgroup(csi); +} + +void +pdf_showpath(pdf_csi *csi, int doclose, int dofill, int dostroke, int evenodd) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + fz_path *path; + fz_rect bbox; + + path = csi->path; + csi->path = fz_newpath(); + + if (doclose) + fz_closepath(path); + + if (dostroke) + bbox = fz_boundpath(path, &gstate->strokestate, gstate->ctm); + else + bbox = fz_boundpath(path, nil, gstate->ctm); + + if (csi->clip) + { + gstate->clipdepth++; + csi->dev->clippath(csi->dev->user, path, evenodd, gstate->ctm); + csi->clip = 0; + } + + pdf_begingroup(csi, bbox); + + if (dofill) + { + switch (gstate->fill.kind) + { + case PDF_MNONE: + break; + case PDF_MCOLOR: + csi->dev->fillpath(csi->dev->user, path, evenodd, gstate->ctm, + gstate->fill.colorspace, gstate->fill.v, gstate->fill.alpha); + break; + case PDF_MPATTERN: + if (gstate->fill.pattern) + { + csi->dev->clippath(csi->dev->user, path, evenodd, gstate->ctm); + pdf_showpattern(csi, gstate->fill.pattern, bbox, PDF_MFILL); + csi->dev->popclip(csi->dev->user); + } + break; + case PDF_MSHADE: + if (gstate->fill.shade) + { + csi->dev->clippath(csi->dev->user, path, evenodd, gstate->ctm); + csi->dev->fillshade(csi->dev->user, gstate->fill.shade, csi->topctm, gstate->fill.alpha); + csi->dev->popclip(csi->dev->user); + } + break; + } + } + + if (dostroke) + { + switch (gstate->stroke.kind) + { + case PDF_MNONE: + break; + case PDF_MCOLOR: + csi->dev->strokepath(csi->dev->user, path, &gstate->strokestate, gstate->ctm, + gstate->stroke.colorspace, gstate->stroke.v, gstate->stroke.alpha); + break; + case PDF_MPATTERN: + if (gstate->stroke.pattern) + { + csi->dev->clipstrokepath(csi->dev->user, path, &gstate->strokestate, gstate->ctm); + pdf_showpattern(csi, gstate->stroke.pattern, bbox, PDF_MFILL); + csi->dev->popclip(csi->dev->user); + } + break; + case PDF_MSHADE: + if (gstate->stroke.shade) + { + csi->dev->clipstrokepath(csi->dev->user, path, &gstate->strokestate, gstate->ctm); + csi->dev->fillshade(csi->dev->user, gstate->stroke.shade, csi->topctm, gstate->stroke.alpha); + csi->dev->popclip(csi->dev->user); + } + break; + } + } + + pdf_endgroup(csi); + + fz_freepath(path); +} + +void +pdf_flushtext(pdf_csi *csi) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + fz_text *text; + int dofill = 0; + int dostroke = 0; + int doclip = 0; + int doinvisible = 0; + fz_rect bbox; + + if (!csi->text) + return; + text = csi->text; + csi->text = nil; + + dofill = dostroke = doclip = doinvisible = 0; + switch (csi->textmode) + { + case 0: dofill = 1; break; + case 1: dostroke = 1; break; + case 2: dofill = dostroke = 1; break; + case 3: doinvisible = 1; break; + case 4: dofill = doclip = 1; break; + case 5: dostroke = doclip = 1; break; + case 6: dofill = dostroke = doclip = 1; break; + case 7: doclip = 1; break; + } + + bbox = fz_boundtext(text, gstate->ctm); + + pdf_begingroup(csi, bbox); + + if (doinvisible) + csi->dev->ignoretext(csi->dev->user, text, gstate->ctm); + + if (doclip) + { + if (csi->accumulate < 2) + gstate->clipdepth++; + csi->dev->cliptext(csi->dev->user, text, gstate->ctm, csi->accumulate); + csi->accumulate = 2; + } + + if (dofill) + { + switch (gstate->fill.kind) + { + case PDF_MNONE: + break; + case PDF_MCOLOR: + csi->dev->filltext(csi->dev->user, text, gstate->ctm, + gstate->fill.colorspace, gstate->fill.v, gstate->fill.alpha); + break; + case PDF_MPATTERN: + if (gstate->fill.pattern) + { + csi->dev->cliptext(csi->dev->user, text, gstate->ctm, 0); + pdf_showpattern(csi, gstate->fill.pattern, bbox, PDF_MFILL); + csi->dev->popclip(csi->dev->user); + } + break; + case PDF_MSHADE: + if (gstate->fill.shade) + { + csi->dev->cliptext(csi->dev->user, text, gstate->ctm, 0); + csi->dev->fillshade(csi->dev->user, gstate->fill.shade, csi->topctm, gstate->fill.alpha); + csi->dev->popclip(csi->dev->user); + } + break; + } + } + + if (dostroke) + { + switch (gstate->stroke.kind) + { + case PDF_MNONE: + break; + case PDF_MCOLOR: + csi->dev->stroketext(csi->dev->user, text, &gstate->strokestate, gstate->ctm, + gstate->stroke.colorspace, gstate->stroke.v, gstate->stroke.alpha); + break; + case PDF_MPATTERN: + if (gstate->stroke.pattern) + { + csi->dev->clipstroketext(csi->dev->user, text, &gstate->strokestate, gstate->ctm); + pdf_showpattern(csi, gstate->stroke.pattern, bbox, PDF_MFILL); + csi->dev->popclip(csi->dev->user); + } + break; + case PDF_MSHADE: + if (gstate->stroke.shade) + { + csi->dev->clipstroketext(csi->dev->user, text, &gstate->strokestate, gstate->ctm); + csi->dev->fillshade(csi->dev->user, gstate->stroke.shade, csi->topctm, gstate->stroke.alpha); + csi->dev->popclip(csi->dev->user); + } + break; + } + } + + pdf_endgroup(csi); + + fz_freetext(text); +} + +static void +pdf_showglyph(pdf_csi *csi, int cid) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + pdf_fontdesc *fontdesc = gstate->font; + fz_matrix tsm, trm; + float w0, w1, tx, ty; + pdf_hmtx h; + pdf_vmtx v; + int gid; + int ucsbuf[8]; + int ucslen; + int i; + + tsm.a = gstate->size * gstate->scale; + tsm.b = 0; + tsm.c = 0; + tsm.d = gstate->size; + tsm.e = 0; + tsm.f = gstate->rise; + + ucslen = 0; + if (fontdesc->tounicode) + ucslen = pdf_lookupcmapfull(fontdesc->tounicode, cid, ucsbuf); + if (ucslen == 0 && cid < fontdesc->ncidtoucs) + { + ucsbuf[0] = fontdesc->cidtoucs[cid]; + ucslen = 1; + } + if (ucslen == 0 || (ucslen == 1 && ucsbuf[0] == 0)) + { + ucsbuf[0] = '?'; + ucslen = 1; + } + + gid = pdf_fontcidtogid(fontdesc, cid); + + if (fontdesc->wmode == 1) + { + v = pdf_getvmtx(fontdesc, cid); + tsm.e -= v.x * gstate->size * 0.001f; + tsm.f -= v.y * gstate->size * 0.001f; + } + + trm = fz_concat(tsm, csi->tm); + + /* flush buffered text if face or matrix or rendermode has changed */ + if (!csi->text || + fontdesc->font != csi->text->font || + fontdesc->wmode != csi->text->wmode || + fabsf(trm.a - csi->text->trm.a) > FLT_EPSILON || + fabsf(trm.b - csi->text->trm.b) > FLT_EPSILON || + fabsf(trm.c - csi->text->trm.c) > FLT_EPSILON || + fabsf(trm.d - csi->text->trm.d) > FLT_EPSILON || + gstate->render != csi->textmode) + { + pdf_flushtext(csi); + + csi->text = fz_newtext(fontdesc->font, trm, fontdesc->wmode); + csi->text->trm.e = 0; + csi->text->trm.f = 0; + csi->textmode = gstate->render; + } + + /* add glyph to textobject */ + fz_addtext(csi->text, gid, ucsbuf[0], trm.e, trm.f); + + /* add filler glyphs for one-to-many unicode mapping */ + for (i = 1; i < ucslen; i++) + fz_addtext(csi->text, -1, ucsbuf[i], trm.e, trm.f); + + if (fontdesc->wmode == 0) + { + h = pdf_gethmtx(fontdesc, cid); + w0 = h.w * 0.001f; + tx = (w0 * gstate->size + gstate->charspace) * gstate->scale; + csi->tm = fz_concat(fz_translate(tx, 0), csi->tm); + } + + if (fontdesc->wmode == 1) + { + w1 = v.w * 0.001f; + ty = w1 * gstate->size + gstate->charspace; + csi->tm = fz_concat(fz_translate(0, ty), csi->tm); + } +} + +void +pdf_showspace(pdf_csi *csi, float tadj) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + pdf_fontdesc *fontdesc = gstate->font; + + if (!fontdesc) + { + fz_warn("cannot draw text since font and size not set"); + return; + } + + if (fontdesc->wmode == 0) + csi->tm = fz_concat(fz_translate(tadj * gstate->scale, 0), csi->tm); + else + csi->tm = fz_concat(fz_translate(0, tadj), csi->tm); +} + +void +pdf_showstring(pdf_csi *csi, unsigned char *buf, int len) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + pdf_fontdesc *fontdesc = gstate->font; + unsigned char *end = buf + len; + int cpt, cid; + + if (!fontdesc) + { + fz_warn("cannot draw text since font and size not set"); + return; + } + + while (buf < end) + { + buf = pdf_decodecmap(fontdesc->encoding, buf, &cpt); + cid = pdf_lookupcmap(fontdesc->encoding, cpt); + if (cid >= 0) + pdf_showglyph(csi, cid); + else + fz_warn("cannot encode character with code point %#x", cpt); + if (cpt == 32) + pdf_showspace(csi, gstate->wordspace); + } +} + +void +pdf_showtext(pdf_csi *csi, fz_obj *text) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + int i; + + if (fz_isarray(text)) + { + for (i = 0; i < fz_arraylen(text); i++) + { + fz_obj *item = fz_arrayget(text, i); + if (fz_isstring(item)) + pdf_showstring(csi, (unsigned char *)fz_tostrbuf(item), fz_tostrlen(item)); + else + pdf_showspace(csi, - fz_toreal(item) * gstate->size * 0.001f); + } + } + else if (fz_isstring(text)) + { + pdf_showstring(csi, (unsigned char *)fz_tostrbuf(text), fz_tostrlen(text)); + } +} diff --git a/pdf/pdf_cmap.c b/pdf/pdf_cmap.c new file mode 100644 index 00000000..aab6bd60 --- /dev/null +++ b/pdf/pdf_cmap.c @@ -0,0 +1,475 @@ +/* + * The CMap data structure here is constructed on the fly by + * adding simple range-to-range mappings. Then the data structure + * is optimized to contain both range-to-range and range-to-table + * lookups. + * + * Any one-to-many mappings are inserted as one-to-table + * lookups in the beginning, and are not affected by the optimization + * stage. + * + * There is a special function to add a 256-length range-to-table mapping. + * The ranges do not have to be added in order. + * + * This code can be a lot simpler if we don't care about wasting memory, + * or can trust the parser to give us optimal mappings. + */ + +#include "fitz.h" +#include "mupdf.h" + +/* Macros for accessing the combined extentflags field */ +#define pdf_range_high(r) ((r)->low + ((r)->extentflags >> 2)) +#define pdf_range_flags(r) ((r)->extentflags & 3) +#define pdf_range_set_high(r, h) ((r)->extentflags = (((r)->extentflags & 3) | ((h - (r)->low) << 2))) +#define pdf_range_set_flags(r, f) ((r)->extentflags = (((r)->extentflags & ~3) | f)) + +/* + * Allocate, destroy and simple parameters. + */ + +pdf_cmap * +pdf_newcmap(void) +{ + pdf_cmap *cmap; + + cmap = fz_malloc(sizeof(pdf_cmap)); + cmap->refs = 1; + + strcpy(cmap->cmapname, ""); + strcpy(cmap->usecmapname, ""); + cmap->usecmap = nil; + cmap->wmode = 0; + cmap->ncspace = 0; + + cmap->rlen = 0; + cmap->rcap = 0; + cmap->ranges = nil; + + cmap->tlen = 0; + cmap->tcap = 0; + cmap->table = nil; + + return cmap; +} + +pdf_cmap * +pdf_keepcmap(pdf_cmap *cmap) +{ + if (cmap->refs >= 0) + cmap->refs ++; + return cmap; +} + +void +pdf_dropcmap(pdf_cmap *cmap) +{ + if (cmap->refs >= 0) + { + if (--cmap->refs == 0) + { + if (cmap->usecmap) + pdf_dropcmap(cmap->usecmap); + fz_free(cmap->ranges); + fz_free(cmap->table); + fz_free(cmap); + } + } +} + +void +pdf_setusecmap(pdf_cmap *cmap, pdf_cmap *usecmap) +{ + int i; + + if (cmap->usecmap) + pdf_dropcmap(cmap->usecmap); + cmap->usecmap = pdf_keepcmap(usecmap); + + if (cmap->ncspace == 0) + { + cmap->ncspace = usecmap->ncspace; + for (i = 0; i < usecmap->ncspace; i++) + cmap->cspace[i] = usecmap->cspace[i]; + } +} + +int +pdf_getwmode(pdf_cmap *cmap) +{ + return cmap->wmode; +} + +void +pdf_setwmode(pdf_cmap *cmap, int wmode) +{ + cmap->wmode = wmode; +} + +void +pdf_debugcmap(pdf_cmap *cmap) +{ + int i, k, n; + + printf("cmap $%p /%s {\n", (void *) cmap, cmap->cmapname); + + if (cmap->usecmapname[0]) + printf("\tusecmap /%s\n", cmap->usecmapname); + if (cmap->usecmap) + printf("\tusecmap $%p\n", (void *) cmap->usecmap); + + printf("\twmode %d\n", cmap->wmode); + + printf("\tcodespaces {\n"); + for (i = 0; i < cmap->ncspace; i++) + { + printf("\t\t<%x> <%x>\n", cmap->cspace[i].low, cmap->cspace[i].high); + } + printf("\t}\n"); + + printf("\tranges (%d,%d) {\n", cmap->rlen, cmap->tlen); + for (i = 0; i < cmap->rlen; i++) + { + pdf_range *r = &cmap->ranges[i]; + printf("\t\t<%04x> <%04x> ", r->low, pdf_range_high(r)); + if (pdf_range_flags(r) == PDF_CMAP_TABLE) + { + printf("[ "); + for (k = 0; k < pdf_range_high(r) - r->low + 1; k++) + printf("%d ", cmap->table[r->offset + k]); + printf("]\n"); + } + else if (pdf_range_flags(r) == PDF_CMAP_MULTI) + { + printf("< "); + n = cmap->table[r->offset]; + for (k = 0; k < n; k++) + printf("%04x ", cmap->table[r->offset + 1 + k]); + printf(">\n"); + } + else + printf("%d\n", r->offset); + } + printf("\t}\n}\n"); +} + +/* + * Add a codespacerange section. + * These ranges are used by pdf_decodecmap to decode + * multi-byte encoded strings. + */ +void +pdf_addcodespace(pdf_cmap *cmap, int low, int high, int n) +{ + if (cmap->ncspace + 1 == nelem(cmap->cspace)) + { + fz_warn("assert: too many code space ranges"); + return; + } + + cmap->cspace[cmap->ncspace].n = n; + cmap->cspace[cmap->ncspace].low = low; + cmap->cspace[cmap->ncspace].high = high; + cmap->ncspace ++; +} + +/* + * Add an integer to the table. + */ +static void +addtable(pdf_cmap *cmap, int value) +{ + if (cmap->tlen + 1 > cmap->tcap) + { + cmap->tcap = cmap->tcap > 1 ? (cmap->tcap * 3) / 2 : 256; + cmap->table = fz_realloc(cmap->table, cmap->tcap, sizeof(unsigned short)); + } + cmap->table[cmap->tlen++] = value; +} + +/* + * Add a range. + */ +static void +addrange(pdf_cmap *cmap, int low, int high, int flag, int offset) +{ + /* If the range is too large to be represented, split it */ + if (high - low > 0x3fff) + { + addrange(cmap, low, low+0x3fff, flag, offset); + addrange(cmap, low+0x3fff, high, flag, offset+0x3fff); + return; + } + if (cmap->rlen + 1 > cmap->rcap) + { + cmap->rcap = cmap->rcap > 1 ? (cmap->rcap * 3) / 2 : 256; + cmap->ranges = fz_realloc(cmap->ranges, cmap->rcap, sizeof(pdf_range)); + } + cmap->ranges[cmap->rlen].low = low; + pdf_range_set_high(&cmap->ranges[cmap->rlen], high); + pdf_range_set_flags(&cmap->ranges[cmap->rlen], flag); + cmap->ranges[cmap->rlen].offset = offset; + cmap->rlen ++; +} + +/* + * Add a range-to-table mapping. + */ +void +pdf_maprangetotable(pdf_cmap *cmap, int low, int *table, int len) +{ + int i; + int high = low + len; + int offset = cmap->tlen; + for (i = 0; i < len; i++) + addtable(cmap, table[i]); + addrange(cmap, low, high, PDF_CMAP_TABLE, offset); +} + +/* + * Add a range of contiguous one-to-one mappings (ie 1..5 maps to 21..25) + */ +void +pdf_maprangetorange(pdf_cmap *cmap, int low, int high, int offset) +{ + addrange(cmap, low, high, high - low == 0 ? PDF_CMAP_SINGLE : PDF_CMAP_RANGE, offset); +} + +/* + * Add a single one-to-many mapping. + */ +void +pdf_maponetomany(pdf_cmap *cmap, int low, int *values, int len) +{ + int offset, i; + + if (len == 1) + { + addrange(cmap, low, low, PDF_CMAP_SINGLE, values[0]); + return; + } + + if (len > 8) + { + fz_warn("one to many mapping is too large (%d); truncating", len); + len = 8; + } + + offset = cmap->tlen; + addtable(cmap, len); + for (i = 0; i < len; i++) + addtable(cmap, values[i]); + addrange(cmap, low, low, PDF_CMAP_MULTI, offset); +} + +/* + * Sort the input ranges. + * Merge contiguous input ranges to range-to-range if the output is contiguous. + * Merge contiguous input ranges to range-to-table if the output is random. + */ + +static int cmprange(const void *va, const void *vb) +{ + return ((const pdf_range*)va)->low - ((const pdf_range*)vb)->low; +} + +void +pdf_sortcmap(pdf_cmap *cmap) +{ + pdf_range *a; /* last written range on output */ + pdf_range *b; /* current range examined on input */ + + if (cmap->rlen == 0) + return; + + qsort(cmap->ranges, cmap->rlen, sizeof(pdf_range), cmprange); + + a = cmap->ranges; + b = cmap->ranges + 1; + + while (b < cmap->ranges + cmap->rlen) + { + /* ignore one-to-many mappings */ + if (pdf_range_flags(b) == PDF_CMAP_MULTI) + { + *(++a) = *b; + } + + /* input contiguous */ + else if (pdf_range_high(a) + 1 == b->low) + { + /* output contiguous */ + if (pdf_range_high(a) - a->low + a->offset + 1 == b->offset) + { + /* SR -> R and SS -> R and RR -> R and RS -> R */ + if (pdf_range_flags(a) == PDF_CMAP_SINGLE || pdf_range_flags(a) == PDF_CMAP_RANGE) + { + pdf_range_set_flags(a, PDF_CMAP_RANGE); + pdf_range_set_high(a, pdf_range_high(b)); + } + + /* LS -> L */ + else if (pdf_range_flags(a) == PDF_CMAP_TABLE && pdf_range_flags(b) == PDF_CMAP_SINGLE) + { + pdf_range_set_high(a, pdf_range_high(b)); + addtable(cmap, b->offset); + } + + /* LR -> LR */ + else if (pdf_range_flags(a) == PDF_CMAP_TABLE && pdf_range_flags(b) == PDF_CMAP_RANGE) + { + *(++a) = *b; + } + + /* XX -> XX */ + else + { + *(++a) = *b; + } + } + + /* output separated */ + else + { + /* SS -> L */ + if (pdf_range_flags(a) == PDF_CMAP_SINGLE && pdf_range_flags(b) == PDF_CMAP_SINGLE) + { + pdf_range_set_flags(a, PDF_CMAP_TABLE); + pdf_range_set_high(a, pdf_range_high(b)); + addtable(cmap, a->offset); + addtable(cmap, b->offset); + a->offset = cmap->tlen - 2; + } + + /* LS -> L */ + else if (pdf_range_flags(a) == PDF_CMAP_TABLE && pdf_range_flags(b) == PDF_CMAP_SINGLE) + { + pdf_range_set_high(a, pdf_range_high(b)); + addtable(cmap, b->offset); + } + + /* XX -> XX */ + else + { + *(++a) = *b; + } + } + } + + /* input separated: XX -> XX */ + else + { + *(++a) = *b; + } + + b ++; + } +} + +/* + * Lookup the mapping of a codepoint. + */ +int +pdf_lookupcmap(pdf_cmap *cmap, int cpt) +{ + int l = 0; + int r = cmap->rlen - 1; + int m; + + while (l <= r) + { + m = (l + r) >> 1; + if (cpt < cmap->ranges[m].low) + r = m - 1; + else if (cpt > pdf_range_high(&cmap->ranges[m])) + l = m + 1; + else + { + int i = cpt - cmap->ranges[m].low + cmap->ranges[m].offset; + if (pdf_range_flags(&cmap->ranges[m]) == PDF_CMAP_TABLE) + return cmap->table[i]; + if (pdf_range_flags(&cmap->ranges[m]) == PDF_CMAP_MULTI) + return cmap->table[cmap->ranges[m].offset + 1]; /* first char */ + return i; + } + } + + if (cmap->usecmap) + return pdf_lookupcmap(cmap->usecmap, cpt); + + return -1; +} + +int +pdf_lookupcmapfull(pdf_cmap *cmap, int cpt, int *out) +{ + int i, k, n; + int l = 0; + int r = cmap->rlen - 1; + int m; + + while (l <= r) + { + m = (l + r) >> 1; + if (cpt < cmap->ranges[m].low) + r = m - 1; + else if (cpt > pdf_range_high(&cmap->ranges[m])) + l = m + 1; + else + { + k = cpt - cmap->ranges[m].low + cmap->ranges[m].offset; + if (pdf_range_flags(&cmap->ranges[m]) == PDF_CMAP_TABLE) + { + out[0] = cmap->table[k]; + return 1; + } + else if (pdf_range_flags(&cmap->ranges[m]) == PDF_CMAP_MULTI) + { + n = cmap->ranges[m].offset; + for (i = 0; i < cmap->table[n]; i++) + out[i] = cmap->table[n + i + 1]; + return cmap->table[n]; + } + else + { + out[0] = k; + return 1; + } + } + } + + if (cmap->usecmap) + return pdf_lookupcmapfull(cmap->usecmap, cpt, out); + + return 0; +} + +/* + * Use the codespace ranges to extract a codepoint from a + * multi-byte encoded string. + */ +unsigned char * +pdf_decodecmap(pdf_cmap *cmap, unsigned char *buf, int *cpt) +{ + int k, n, c; + + c = 0; + for (n = 0; n < 4; n++) + { + c = (c << 8) | buf[n]; + for (k = 0; k < cmap->ncspace; k++) + { + if (cmap->cspace[k].n == n + 1) + { + if (c >= cmap->cspace[k].low && c <= cmap->cspace[k].high) + { + *cpt = c; + return buf + n + 1; + } + } + } + } + + *cpt = 0; + return buf + 1; +} diff --git a/pdf/pdf_cmap_load.c b/pdf/pdf_cmap_load.c new file mode 100644 index 00000000..eae52d8d --- /dev/null +++ b/pdf/pdf_cmap_load.c @@ -0,0 +1,135 @@ +#include "fitz.h" +#include "mupdf.h" + +/* + * Load CMap stream in PDF file + */ +fz_error +pdf_loadembeddedcmap(pdf_cmap **cmapp, pdf_xref *xref, fz_obj *stmobj) +{ + fz_error error = fz_okay; + fz_stream *file = nil; + pdf_cmap *cmap = nil; + pdf_cmap *usecmap; + fz_obj *wmode; + fz_obj *obj; + + if ((*cmapp = pdf_finditem(xref->store, pdf_dropcmap, stmobj))) + { + pdf_keepcmap(*cmapp); + return fz_okay; + } + + pdf_logfont("load embedded cmap (%d %d R) {\n", fz_tonum(stmobj), fz_togen(stmobj)); + + error = pdf_openstream(&file, xref, fz_tonum(stmobj), fz_togen(stmobj)); + if (error) + { + error = fz_rethrow(error, "cannot open cmap stream (%d %d R)", fz_tonum(stmobj), fz_togen(stmobj)); + goto cleanup; + } + + error = pdf_parsecmap(&cmap, file); + if (error) + { + error = fz_rethrow(error, "cannot parse cmap stream (%d %d R)", fz_tonum(stmobj), fz_togen(stmobj)); + goto cleanup; + } + + fz_close(file); + + wmode = fz_dictgets(stmobj, "WMode"); + if (fz_isint(wmode)) + { + pdf_logfont("wmode %d\n", wmode); + pdf_setwmode(cmap, fz_toint(wmode)); + } + + obj = fz_dictgets(stmobj, "UseCMap"); + if (fz_isname(obj)) + { + pdf_logfont("usecmap /%s\n", fz_toname(obj)); + error = pdf_loadsystemcmap(&usecmap, fz_toname(obj)); + if (error) + { + error = fz_rethrow(error, "cannot load system usecmap '%s'", fz_toname(obj)); + goto cleanup; + } + pdf_setusecmap(cmap, usecmap); + pdf_dropcmap(usecmap); + } + else if (fz_isindirect(obj)) + { + pdf_logfont("usecmap (%d %d R)\n", fz_tonum(obj), fz_togen(obj)); + error = pdf_loadembeddedcmap(&usecmap, xref, obj); + if (error) + { + error = fz_rethrow(error, "cannot load embedded usecmap (%d %d R)", fz_tonum(obj), fz_togen(obj)); + goto cleanup; + } + pdf_setusecmap(cmap, usecmap); + pdf_dropcmap(usecmap); + } + + pdf_logfont("}\n"); + + pdf_storeitem(xref->store, pdf_keepcmap, pdf_dropcmap, stmobj, cmap); + + *cmapp = cmap; + return fz_okay; + +cleanup: + if (file) + fz_close(file); + if (cmap) + pdf_dropcmap(cmap); + return error; /* already rethrown */ +} + +/* + * Create an Identity-* CMap (for both 1 and 2-byte encodings) + */ +pdf_cmap * +pdf_newidentitycmap(int wmode, int bytes) +{ + pdf_cmap *cmap = pdf_newcmap(); + sprintf(cmap->cmapname, "Identity-%c", wmode ? 'V' : 'H'); + pdf_addcodespace(cmap, 0x0000, 0xffff, bytes); + pdf_maprangetorange(cmap, 0x0000, 0xffff, 0); + pdf_sortcmap(cmap); + pdf_setwmode(cmap, wmode); + return cmap; +} + +/* + * Load predefined CMap from system. + */ +fz_error +pdf_loadsystemcmap(pdf_cmap **cmapp, char *cmapname) +{ + fz_error error; + pdf_cmap *usecmap; + pdf_cmap *cmap; + int i; + + pdf_logfont("loading system cmap %s\n", cmapname); + + for (i = 0; pdf_cmaptable[i]; i++) + { + if (!strcmp(cmapname, pdf_cmaptable[i]->cmapname)) + { + cmap = pdf_cmaptable[i]; + if (cmap->usecmapname[0] && !cmap->usecmap) + { + error = pdf_loadsystemcmap(&usecmap, cmap->usecmapname); + if (error) + return fz_rethrow(error, "cannot load usecmap: %s", cmap->usecmapname); + pdf_setusecmap(cmap, usecmap); + } + *cmapp = cmap; + return fz_okay; + } + } + + return fz_throw("no builtin cmap file: %s", cmapname); +} diff --git a/pdf/pdf_cmap_parse.c b/pdf/pdf_cmap_parse.c new file mode 100644 index 00000000..d899af0e --- /dev/null +++ b/pdf/pdf_cmap_parse.c @@ -0,0 +1,490 @@ +#include "fitz.h" +#include "mupdf.h" + +/* + * CMap parser + */ + +enum +{ + TUSECMAP = PDF_NTOKENS, + TBEGINCODESPACERANGE, + TENDCODESPACERANGE, + TBEGINBFCHAR, + TENDBFCHAR, + TBEGINBFRANGE, + TENDBFRANGE, + TBEGINCIDCHAR, + TENDCIDCHAR, + TBEGINCIDRANGE, + TENDCIDRANGE, + TENDCMAP +}; + +static int +pdf_cmaptokenfromkeyword(char *key) +{ + if (!strcmp(key, "usecmap")) return TUSECMAP; + if (!strcmp(key, "begincodespacerange")) return TBEGINCODESPACERANGE; + if (!strcmp(key, "endcodespacerange")) return TENDCODESPACERANGE; + if (!strcmp(key, "beginbfchar")) return TBEGINBFCHAR; + if (!strcmp(key, "endbfchar")) return TENDBFCHAR; + if (!strcmp(key, "beginbfrange")) return TBEGINBFRANGE; + if (!strcmp(key, "endbfrange")) return TENDBFRANGE; + if (!strcmp(key, "begincidchar")) return TBEGINCIDCHAR; + if (!strcmp(key, "endcidchar")) return TENDCIDCHAR; + if (!strcmp(key, "begincidrange")) return TBEGINCIDRANGE; + if (!strcmp(key, "endcidrange")) return TENDCIDRANGE; + if (!strcmp(key, "endcmap")) return TENDCMAP; + return PDF_TKEYWORD; +} + +static int +pdf_codefromstring(char *buf, int len) +{ + int a = 0; + while (len--) + a = (a << 8) | *(unsigned char *)buf++; + return a; +} + +static fz_error +pdf_lexcmap(int *tok, fz_stream *file, char *buf, int n, int *sl) +{ + fz_error error; + + error = pdf_lex(tok, file, buf, n, sl); + if (error) + return fz_rethrow(error, "cannot parse cmap token"); + + if (*tok == PDF_TKEYWORD) + *tok = pdf_cmaptokenfromkeyword(buf); + + return fz_okay; +} + +static fz_error +pdf_parsecmapname(pdf_cmap *cmap, fz_stream *file) +{ + fz_error error; + char buf[256]; + int tok; + int len; + + error = pdf_lexcmap(&tok, file, buf, sizeof buf, &len); + if (error) + return fz_rethrow(error, "syntaxerror in cmap"); + + if (tok == PDF_TNAME) + fz_strlcpy(cmap->cmapname, buf, sizeof(cmap->cmapname)); + else + fz_warn("expected name after CMapName in cmap"); + + return fz_okay; +} + +static fz_error +pdf_parsewmode(pdf_cmap *cmap, fz_stream *file) +{ + fz_error error; + char buf[256]; + int tok; + int len; + + error = pdf_lexcmap(&tok, file, buf, sizeof buf, &len); + if (error) + return fz_rethrow(error, "syntaxerror in cmap"); + + if (tok == PDF_TINT) + pdf_setwmode(cmap, atoi(buf)); + else + fz_warn("expected integer after WMode in cmap"); + + return fz_okay; +} + +static fz_error +pdf_parsecodespacerange(pdf_cmap *cmap, fz_stream *file) +{ + fz_error error; + char buf[256]; + int tok; + int len; + int lo, hi; + + while (1) + { + error = pdf_lexcmap(&tok, file, buf, sizeof buf, &len); + if (error) + return fz_rethrow(error, "syntaxerror in cmap"); + + if (tok == TENDCODESPACERANGE) + return fz_okay; + + else if (tok == PDF_TSTRING) + { + lo = pdf_codefromstring(buf, len); + error = pdf_lexcmap(&tok, file, buf, sizeof buf, &len); + if (error) + return fz_rethrow(error, "syntaxerror in cmap"); + if (tok == PDF_TSTRING) + { + hi = pdf_codefromstring(buf, len); + pdf_addcodespace(cmap, lo, hi, len); + } + else break; + } + + else break; + } + + return fz_throw("expected string or endcodespacerange"); +} + +static fz_error +pdf_parsecidrange(pdf_cmap *cmap, fz_stream *file) +{ + fz_error error; + char buf[256]; + int tok; + int len; + int lo, hi, dst; + + while (1) + { + error = pdf_lexcmap(&tok, file, buf, sizeof buf, &len); + if (error) + return fz_rethrow(error, "syntaxerror in cmap"); + + if (tok == TENDCIDRANGE) + return fz_okay; + + else if (tok != PDF_TSTRING) + return fz_throw("expected string or endcidrange"); + + lo = pdf_codefromstring(buf, len); + + error = pdf_lexcmap(&tok, file, buf, sizeof buf, &len); + if (error) + return fz_rethrow(error, "syntaxerror in cmap"); + if (tok != PDF_TSTRING) + return fz_throw("expected string"); + + hi = pdf_codefromstring(buf, len); + + error = pdf_lexcmap(&tok, file, buf, sizeof buf, &len); + if (error) + return fz_rethrow(error, "syntaxerror in cmap"); + if (tok != PDF_TINT) + return fz_throw("expected integer"); + + dst = atoi(buf); + + pdf_maprangetorange(cmap, lo, hi, dst); + } +} + +static fz_error +pdf_parsecidchar(pdf_cmap *cmap, fz_stream *file) +{ + fz_error error; + char buf[256]; + int tok; + int len; + int src, dst; + + while (1) + { + error = pdf_lexcmap(&tok, file, buf, sizeof buf, &len); + if (error) + return fz_rethrow(error, "syntaxerror in cmap"); + + if (tok == TENDCIDCHAR) + return fz_okay; + + else if (tok != PDF_TSTRING) + return fz_throw("expected string or endcidchar"); + + src = pdf_codefromstring(buf, len); + + error = pdf_lexcmap(&tok, file, buf, sizeof buf, &len); + if (error) + return fz_rethrow(error, "syntaxerror in cmap"); + if (tok != PDF_TINT) + return fz_throw("expected integer"); + + dst = atoi(buf); + + pdf_maprangetorange(cmap, src, src, dst); + } +} + +static fz_error +pdf_parsebfrangearray(pdf_cmap *cmap, fz_stream *file, int lo, int hi) +{ + fz_error error; + char buf[256]; + int tok; + int len; + int dst[256]; + int i; + + while (1) + { + error = pdf_lexcmap(&tok, file, buf, sizeof buf, &len); + if (error) + return fz_rethrow(error, "syntaxerror in cmap"); + + if (tok == PDF_TCARRAY) + return fz_okay; + + /* Note: does not handle [ /Name /Name ... ] */ + else if (tok != PDF_TSTRING) + return fz_throw("expected string or ]"); + + if (len / 2) + { + for (i = 0; i < len / 2; i++) + dst[i] = pdf_codefromstring(buf + i * 2, 2); + + pdf_maponetomany(cmap, lo, dst, len / 2); + } + + lo ++; + } +} + +static fz_error +pdf_parsebfrange(pdf_cmap *cmap, fz_stream *file) +{ + fz_error error; + char buf[256]; + int tok; + int len; + int lo, hi, dst; + + while (1) + { + error = pdf_lexcmap(&tok, file, buf, sizeof buf, &len); + if (error) + return fz_rethrow(error, "syntaxerror in cmap"); + + if (tok == TENDBFRANGE) + return fz_okay; + + else if (tok != PDF_TSTRING) + return fz_throw("expected string or endbfrange"); + + lo = pdf_codefromstring(buf, len); + + error = pdf_lexcmap(&tok, file, buf, sizeof buf, &len); + if (error) + return fz_rethrow(error, "syntaxerror in cmap"); + if (tok != PDF_TSTRING) + return fz_throw("expected string"); + + hi = pdf_codefromstring(buf, len); + + error = pdf_lexcmap(&tok, file, buf, sizeof buf, &len); + if (error) + return fz_rethrow(error, "syntaxerror in cmap"); + + if (tok == PDF_TSTRING) + { + if (len == 2) + { + dst = pdf_codefromstring(buf, len); + pdf_maprangetorange(cmap, lo, hi, dst); + } + else + { + int dststr[256]; + int i; + + if (len / 2) + { + for (i = 0; i < len / 2; i++) + dststr[i] = pdf_codefromstring(buf + i * 2, 2); + + while (lo <= hi) + { + dststr[i-1] ++; + pdf_maponetomany(cmap, lo, dststr, i); + lo ++; + } + } + } + } + + else if (tok == PDF_TOARRAY) + { + error = pdf_parsebfrangearray(cmap, file, lo, hi); + if (error) + return fz_rethrow(error, "cannot map bfrange"); + } + + else + { + return fz_throw("expected string or array or endbfrange"); + } + } +} + +static fz_error +pdf_parsebfchar(pdf_cmap *cmap, fz_stream *file) +{ + fz_error error; + char buf[256]; + int tok; + int len; + int dst[256]; + int src; + int i; + + while (1) + { + error = pdf_lexcmap(&tok, file, buf, sizeof buf, &len); + if (error) + return fz_rethrow(error, "syntaxerror in cmap"); + + if (tok == TENDBFCHAR) + return fz_okay; + + else if (tok != PDF_TSTRING) + return fz_throw("expected string or endbfchar"); + + src = pdf_codefromstring(buf, len); + + error = pdf_lexcmap(&tok, file, buf, sizeof buf, &len); + if (error) + return fz_rethrow(error, "syntaxerror in cmap"); + /* Note: does not handle /dstName */ + if (tok != PDF_TSTRING) + return fz_throw("expected string"); + + if (len / 2) + { + for (i = 0; i < len / 2; i++) + dst[i] = pdf_codefromstring(buf + i * 2, 2); + pdf_maponetomany(cmap, src, dst, i); + } + } +} + +fz_error +pdf_parsecmap(pdf_cmap **cmapp, fz_stream *file) +{ + fz_error error; + pdf_cmap *cmap; + char key[64]; + char buf[256]; + int tok; + int len; + + cmap = pdf_newcmap(); + + strcpy(key, ".notdef"); + + while (1) + { + error = pdf_lexcmap(&tok, file, buf, sizeof buf, &len); + if (error) + { + error = fz_rethrow(error, "syntaxerror in cmap"); + goto cleanup; + } + + if (tok == PDF_TEOF || tok == TENDCMAP) + break; + + else if (tok == PDF_TNAME) + { + if (!strcmp(buf, "CMapName")) + { + error = pdf_parsecmapname(cmap, file); + if (error) + { + error = fz_rethrow(error, "syntaxerror in cmap after CMapName"); + goto cleanup; + } + } + else if (!strcmp(buf, "WMode")) + { + error = pdf_parsewmode(cmap, file); + if (error) + { + error = fz_rethrow(error, "syntaxerror in cmap after WMode"); + goto cleanup; + } + } + else + fz_strlcpy(key, buf, sizeof key); + } + + else if (tok == TUSECMAP) + { + fz_strlcpy(cmap->usecmapname, key, sizeof(cmap->usecmapname)); + } + + else if (tok == TBEGINCODESPACERANGE) + { + error = pdf_parsecodespacerange(cmap, file); + if (error) + { + error = fz_rethrow(error, "syntaxerror in cmap codespacerange"); + goto cleanup; + } + } + + else if (tok == TBEGINBFCHAR) + { + error = pdf_parsebfchar(cmap, file); + if (error) + { + error = fz_rethrow(error, "syntaxerror in cmap bfchar"); + goto cleanup; + } + } + + else if (tok == TBEGINCIDCHAR) + { + error = pdf_parsecidchar(cmap, file); + if (error) + { + error = fz_rethrow(error, "syntaxerror in cmap cidchar"); + goto cleanup; + } + } + + else if (tok == TBEGINBFRANGE) + { + error = pdf_parsebfrange(cmap, file); + if (error) + { + error = fz_rethrow(error, "syntaxerror in cmap bfrange"); + goto cleanup; + } + } + + else if (tok == TBEGINCIDRANGE) + { + error = pdf_parsecidrange(cmap, file); + if (error) + { + error = fz_rethrow(error, "syntaxerror in cmap cidrange"); + goto cleanup; + } + } + + /* ignore everything else */ + } + + pdf_sortcmap(cmap); + + *cmapp = cmap; + return fz_okay; + +cleanup: + pdf_dropcmap(cmap); + return error; /* already rethrown */ +} diff --git a/pdf/pdf_cmap_table.c b/pdf/pdf_cmap_table.c new file mode 100644 index 00000000..a1f61365 --- /dev/null +++ b/pdf/pdf_cmap_table.c @@ -0,0 +1,316 @@ +/* + * :r !grep -h '^pdf_cmap' build/macosx-x86-debug/cmap_*.c + * :.,'as/\(pdf_cmap.*\) =/extern \1;/ + * :.,'as/pdf_cmap \(pdf_cmap.*\) =/\&\1;/ + */ + +#include "fitz.h" +#include "mupdf.h" + +extern pdf_cmap pdf_cmap_Adobe_CNS1_0; +extern pdf_cmap pdf_cmap_Adobe_CNS1_1; +extern pdf_cmap pdf_cmap_Adobe_CNS1_2; +extern pdf_cmap pdf_cmap_Adobe_CNS1_3; +extern pdf_cmap pdf_cmap_Adobe_CNS1_4; +extern pdf_cmap pdf_cmap_Adobe_CNS1_5; +extern pdf_cmap pdf_cmap_Adobe_CNS1_6; +extern pdf_cmap pdf_cmap_B5_H; +extern pdf_cmap pdf_cmap_B5_V; +extern pdf_cmap pdf_cmap_B5pc_H; +extern pdf_cmap pdf_cmap_B5pc_V; +extern pdf_cmap pdf_cmap_CNS_EUC_H; +extern pdf_cmap pdf_cmap_CNS_EUC_V; +extern pdf_cmap pdf_cmap_CNS1_H; +extern pdf_cmap pdf_cmap_CNS1_V; +extern pdf_cmap pdf_cmap_CNS2_H; +extern pdf_cmap pdf_cmap_CNS2_V; +extern pdf_cmap pdf_cmap_ETen_B5_H; +extern pdf_cmap pdf_cmap_ETen_B5_V; +extern pdf_cmap pdf_cmap_ETenms_B5_H; +extern pdf_cmap pdf_cmap_ETenms_B5_V; +extern pdf_cmap pdf_cmap_ETHK_B5_H; +extern pdf_cmap pdf_cmap_ETHK_B5_V; +extern pdf_cmap pdf_cmap_HKdla_B5_H; +extern pdf_cmap pdf_cmap_HKdla_B5_V; +extern pdf_cmap pdf_cmap_HKdlb_B5_H; +extern pdf_cmap pdf_cmap_HKdlb_B5_V; +extern pdf_cmap pdf_cmap_HKgccs_B5_H; +extern pdf_cmap pdf_cmap_HKgccs_B5_V; +extern pdf_cmap pdf_cmap_HKm314_B5_H; +extern pdf_cmap pdf_cmap_HKm314_B5_V; +extern pdf_cmap pdf_cmap_HKm471_B5_H; +extern pdf_cmap pdf_cmap_HKm471_B5_V; +extern pdf_cmap pdf_cmap_HKscs_B5_H; +extern pdf_cmap pdf_cmap_HKscs_B5_V; +extern pdf_cmap pdf_cmap_UniCNS_UCS2_H; +extern pdf_cmap pdf_cmap_UniCNS_UCS2_V; +extern pdf_cmap pdf_cmap_UniCNS_UTF16_H; +extern pdf_cmap pdf_cmap_UniCNS_UTF16_V; +extern pdf_cmap pdf_cmap_Adobe_GB1_0; +extern pdf_cmap pdf_cmap_Adobe_GB1_1; +extern pdf_cmap pdf_cmap_Adobe_GB1_2; +extern pdf_cmap pdf_cmap_Adobe_GB1_3; +extern pdf_cmap pdf_cmap_Adobe_GB1_4; +extern pdf_cmap pdf_cmap_Adobe_GB1_5; +extern pdf_cmap pdf_cmap_GB_EUC_H; +extern pdf_cmap pdf_cmap_GB_EUC_V; +extern pdf_cmap pdf_cmap_GB_H; +extern pdf_cmap pdf_cmap_GB_V; +extern pdf_cmap pdf_cmap_GBK_EUC_H; +extern pdf_cmap pdf_cmap_GBK_EUC_V; +extern pdf_cmap pdf_cmap_GBK2K_H; +extern pdf_cmap pdf_cmap_GBK2K_V; +extern pdf_cmap pdf_cmap_GBKp_EUC_H; +extern pdf_cmap pdf_cmap_GBKp_EUC_V; +extern pdf_cmap pdf_cmap_GBpc_EUC_H; +extern pdf_cmap pdf_cmap_GBpc_EUC_V; +extern pdf_cmap pdf_cmap_GBT_EUC_H; +extern pdf_cmap pdf_cmap_GBT_EUC_V; +extern pdf_cmap pdf_cmap_GBT_H; +extern pdf_cmap pdf_cmap_GBT_V; +extern pdf_cmap pdf_cmap_GBTpc_EUC_H; +extern pdf_cmap pdf_cmap_GBTpc_EUC_V; +extern pdf_cmap pdf_cmap_UniGB_UCS2_H; +extern pdf_cmap pdf_cmap_UniGB_UCS2_V; +extern pdf_cmap pdf_cmap_UniGB_UTF16_H; +extern pdf_cmap pdf_cmap_UniGB_UTF16_V; +extern pdf_cmap pdf_cmap_78_EUC_H; +extern pdf_cmap pdf_cmap_78_EUC_V; +extern pdf_cmap pdf_cmap_78_H; +extern pdf_cmap pdf_cmap_78_RKSJ_H; +extern pdf_cmap pdf_cmap_78_RKSJ_V; +extern pdf_cmap pdf_cmap_78_V; +extern pdf_cmap pdf_cmap_78ms_RKSJ_H; +extern pdf_cmap pdf_cmap_78ms_RKSJ_V; +extern pdf_cmap pdf_cmap_83pv_RKSJ_H; +extern pdf_cmap pdf_cmap_90ms_RKSJ_H; +extern pdf_cmap pdf_cmap_90ms_RKSJ_V; +extern pdf_cmap pdf_cmap_90msp_RKSJ_H; +extern pdf_cmap pdf_cmap_90msp_RKSJ_V; +extern pdf_cmap pdf_cmap_90pv_RKSJ_H; +extern pdf_cmap pdf_cmap_90pv_RKSJ_V; +extern pdf_cmap pdf_cmap_Add_H; +extern pdf_cmap pdf_cmap_Add_RKSJ_H; +extern pdf_cmap pdf_cmap_Add_RKSJ_V; +extern pdf_cmap pdf_cmap_Add_V; +extern pdf_cmap pdf_cmap_Adobe_Japan1_0; +extern pdf_cmap pdf_cmap_Adobe_Japan1_1; +extern pdf_cmap pdf_cmap_Adobe_Japan1_2; +extern pdf_cmap pdf_cmap_Adobe_Japan1_3; +extern pdf_cmap pdf_cmap_Adobe_Japan1_4; +extern pdf_cmap pdf_cmap_Adobe_Japan1_5; +extern pdf_cmap pdf_cmap_Adobe_Japan1_6; +extern pdf_cmap pdf_cmap_EUC_H; +extern pdf_cmap pdf_cmap_EUC_V; +extern pdf_cmap pdf_cmap_Ext_H; +extern pdf_cmap pdf_cmap_Ext_RKSJ_H; +extern pdf_cmap pdf_cmap_Ext_RKSJ_V; +extern pdf_cmap pdf_cmap_Ext_V; +extern pdf_cmap pdf_cmap_H; +extern pdf_cmap pdf_cmap_Hankaku; +extern pdf_cmap pdf_cmap_Hiragana; +extern pdf_cmap pdf_cmap_Katakana; +extern pdf_cmap pdf_cmap_NWP_H; +extern pdf_cmap pdf_cmap_NWP_V; +extern pdf_cmap pdf_cmap_RKSJ_H; +extern pdf_cmap pdf_cmap_RKSJ_V; +extern pdf_cmap pdf_cmap_Roman; +extern pdf_cmap pdf_cmap_UniJIS_UCS2_H; +extern pdf_cmap pdf_cmap_UniJIS_UCS2_HW_H; +extern pdf_cmap pdf_cmap_UniJIS_UCS2_HW_V; +extern pdf_cmap pdf_cmap_UniJIS_UCS2_V; +extern pdf_cmap pdf_cmap_UniJISPro_UCS2_HW_V; +extern pdf_cmap pdf_cmap_UniJISPro_UCS2_V; +extern pdf_cmap pdf_cmap_V; +extern pdf_cmap pdf_cmap_WP_Symbol; +extern pdf_cmap pdf_cmap_Adobe_Japan2_0; +extern pdf_cmap pdf_cmap_Hojo_EUC_H; +extern pdf_cmap pdf_cmap_Hojo_EUC_V; +extern pdf_cmap pdf_cmap_Hojo_H; +extern pdf_cmap pdf_cmap_Hojo_V; +extern pdf_cmap pdf_cmap_UniHojo_UCS2_H; +extern pdf_cmap pdf_cmap_UniHojo_UCS2_V; +extern pdf_cmap pdf_cmap_UniHojo_UTF16_H; +extern pdf_cmap pdf_cmap_UniHojo_UTF16_V; +extern pdf_cmap pdf_cmap_UniJIS_UTF16_H; +extern pdf_cmap pdf_cmap_UniJIS_UTF16_V; +extern pdf_cmap pdf_cmap_Adobe_Korea1_0; +extern pdf_cmap pdf_cmap_Adobe_Korea1_1; +extern pdf_cmap pdf_cmap_Adobe_Korea1_2; +extern pdf_cmap pdf_cmap_KSC_EUC_H; +extern pdf_cmap pdf_cmap_KSC_EUC_V; +extern pdf_cmap pdf_cmap_KSC_H; +extern pdf_cmap pdf_cmap_KSC_Johab_H; +extern pdf_cmap pdf_cmap_KSC_Johab_V; +extern pdf_cmap pdf_cmap_KSC_V; +extern pdf_cmap pdf_cmap_KSCms_UHC_H; +extern pdf_cmap pdf_cmap_KSCms_UHC_HW_H; +extern pdf_cmap pdf_cmap_KSCms_UHC_HW_V; +extern pdf_cmap pdf_cmap_KSCms_UHC_V; +extern pdf_cmap pdf_cmap_KSCpc_EUC_H; +extern pdf_cmap pdf_cmap_KSCpc_EUC_V; +extern pdf_cmap pdf_cmap_UniKS_UCS2_H; +extern pdf_cmap pdf_cmap_UniKS_UCS2_V; +extern pdf_cmap pdf_cmap_UniKS_UTF16_H; +extern pdf_cmap pdf_cmap_UniKS_UTF16_V; +extern pdf_cmap pdf_cmap_Adobe_CNS1_UCS2; +extern pdf_cmap pdf_cmap_Adobe_GB1_UCS2; +extern pdf_cmap pdf_cmap_Adobe_Japan1_UCS2; +extern pdf_cmap pdf_cmap_Adobe_Korea1_UCS2; + +pdf_cmap *pdf_cmaptable[] = +{ +#ifndef NOCJK + &pdf_cmap_Adobe_CNS1_0, + &pdf_cmap_Adobe_CNS1_1, + &pdf_cmap_Adobe_CNS1_2, + &pdf_cmap_Adobe_CNS1_3, + &pdf_cmap_Adobe_CNS1_4, + &pdf_cmap_Adobe_CNS1_5, + &pdf_cmap_Adobe_CNS1_6, + &pdf_cmap_B5_H, + &pdf_cmap_B5_V, + &pdf_cmap_B5pc_H, + &pdf_cmap_B5pc_V, + &pdf_cmap_CNS_EUC_H, + &pdf_cmap_CNS_EUC_V, + &pdf_cmap_CNS1_H, + &pdf_cmap_CNS1_V, + &pdf_cmap_CNS2_H, + &pdf_cmap_CNS2_V, + &pdf_cmap_ETen_B5_H, + &pdf_cmap_ETen_B5_V, + &pdf_cmap_ETenms_B5_H, + &pdf_cmap_ETenms_B5_V, + &pdf_cmap_ETHK_B5_H, + &pdf_cmap_ETHK_B5_V, + &pdf_cmap_HKdla_B5_H, + &pdf_cmap_HKdla_B5_V, + &pdf_cmap_HKdlb_B5_H, + &pdf_cmap_HKdlb_B5_V, + &pdf_cmap_HKgccs_B5_H, + &pdf_cmap_HKgccs_B5_V, + &pdf_cmap_HKm314_B5_H, + &pdf_cmap_HKm314_B5_V, + &pdf_cmap_HKm471_B5_H, + &pdf_cmap_HKm471_B5_V, + &pdf_cmap_HKscs_B5_H, + &pdf_cmap_HKscs_B5_V, + &pdf_cmap_UniCNS_UCS2_H, + &pdf_cmap_UniCNS_UCS2_V, + &pdf_cmap_UniCNS_UTF16_H, + &pdf_cmap_UniCNS_UTF16_V, + &pdf_cmap_Adobe_GB1_0, + &pdf_cmap_Adobe_GB1_1, + &pdf_cmap_Adobe_GB1_2, + &pdf_cmap_Adobe_GB1_3, + &pdf_cmap_Adobe_GB1_4, + &pdf_cmap_Adobe_GB1_5, + &pdf_cmap_GB_EUC_H, + &pdf_cmap_GB_EUC_V, + &pdf_cmap_GB_H, + &pdf_cmap_GB_V, + &pdf_cmap_GBK_EUC_H, + &pdf_cmap_GBK_EUC_V, + &pdf_cmap_GBK2K_H, + &pdf_cmap_GBK2K_V, + &pdf_cmap_GBKp_EUC_H, + &pdf_cmap_GBKp_EUC_V, + &pdf_cmap_GBpc_EUC_H, + &pdf_cmap_GBpc_EUC_V, + &pdf_cmap_GBT_EUC_H, + &pdf_cmap_GBT_EUC_V, + &pdf_cmap_GBT_H, + &pdf_cmap_GBT_V, + &pdf_cmap_GBTpc_EUC_H, + &pdf_cmap_GBTpc_EUC_V, + &pdf_cmap_UniGB_UCS2_H, + &pdf_cmap_UniGB_UCS2_V, + &pdf_cmap_UniGB_UTF16_H, + &pdf_cmap_UniGB_UTF16_V, + &pdf_cmap_78_EUC_H, + &pdf_cmap_78_EUC_V, + &pdf_cmap_78_H, + &pdf_cmap_78_RKSJ_H, + &pdf_cmap_78_RKSJ_V, + &pdf_cmap_78_V, + &pdf_cmap_78ms_RKSJ_H, + &pdf_cmap_78ms_RKSJ_V, + &pdf_cmap_83pv_RKSJ_H, + &pdf_cmap_90ms_RKSJ_H, + &pdf_cmap_90ms_RKSJ_V, + &pdf_cmap_90msp_RKSJ_H, + &pdf_cmap_90msp_RKSJ_V, + &pdf_cmap_90pv_RKSJ_H, + &pdf_cmap_90pv_RKSJ_V, + &pdf_cmap_Add_H, + &pdf_cmap_Add_RKSJ_H, + &pdf_cmap_Add_RKSJ_V, + &pdf_cmap_Add_V, + &pdf_cmap_Adobe_Japan1_0, + &pdf_cmap_Adobe_Japan1_1, + &pdf_cmap_Adobe_Japan1_2, + &pdf_cmap_Adobe_Japan1_3, + &pdf_cmap_Adobe_Japan1_4, + &pdf_cmap_Adobe_Japan1_5, + &pdf_cmap_Adobe_Japan1_6, + &pdf_cmap_EUC_H, + &pdf_cmap_EUC_V, + &pdf_cmap_Ext_H, + &pdf_cmap_Ext_RKSJ_H, + &pdf_cmap_Ext_RKSJ_V, + &pdf_cmap_Ext_V, + &pdf_cmap_H, + &pdf_cmap_Hankaku, + &pdf_cmap_Hiragana, + &pdf_cmap_Katakana, + &pdf_cmap_NWP_H, + &pdf_cmap_NWP_V, + &pdf_cmap_RKSJ_H, + &pdf_cmap_RKSJ_V, + &pdf_cmap_Roman, + &pdf_cmap_UniJIS_UCS2_H, + &pdf_cmap_UniJIS_UCS2_HW_H, + &pdf_cmap_UniJIS_UCS2_HW_V, + &pdf_cmap_UniJIS_UCS2_V, + &pdf_cmap_UniJISPro_UCS2_HW_V, + &pdf_cmap_UniJISPro_UCS2_V, + &pdf_cmap_V, + &pdf_cmap_WP_Symbol, + &pdf_cmap_Adobe_Japan2_0, + &pdf_cmap_Hojo_EUC_H, + &pdf_cmap_Hojo_EUC_V, + &pdf_cmap_Hojo_H, + &pdf_cmap_Hojo_V, + &pdf_cmap_UniHojo_UCS2_H, + &pdf_cmap_UniHojo_UCS2_V, + &pdf_cmap_UniHojo_UTF16_H, + &pdf_cmap_UniHojo_UTF16_V, + &pdf_cmap_UniJIS_UTF16_H, + &pdf_cmap_UniJIS_UTF16_V, + &pdf_cmap_Adobe_Korea1_0, + &pdf_cmap_Adobe_Korea1_1, + &pdf_cmap_Adobe_Korea1_2, + &pdf_cmap_KSC_EUC_H, + &pdf_cmap_KSC_EUC_V, + &pdf_cmap_KSC_H, + &pdf_cmap_KSC_Johab_H, + &pdf_cmap_KSC_Johab_V, + &pdf_cmap_KSC_V, + &pdf_cmap_KSCms_UHC_H, + &pdf_cmap_KSCms_UHC_HW_H, + &pdf_cmap_KSCms_UHC_HW_V, + &pdf_cmap_KSCms_UHC_V, + &pdf_cmap_KSCpc_EUC_H, + &pdf_cmap_KSCpc_EUC_V, + &pdf_cmap_UniKS_UCS2_H, + &pdf_cmap_UniKS_UCS2_V, + &pdf_cmap_UniKS_UTF16_H, + &pdf_cmap_UniKS_UTF16_V, + &pdf_cmap_Adobe_CNS1_UCS2, + &pdf_cmap_Adobe_GB1_UCS2, + &pdf_cmap_Adobe_Japan1_UCS2, + &pdf_cmap_Adobe_Korea1_UCS2, +#endif + 0 +}; diff --git a/pdf/pdf_colorspace.c b/pdf/pdf_colorspace.c new file mode 100644 index 00000000..6203e1bb --- /dev/null +++ b/pdf/pdf_colorspace.c @@ -0,0 +1,415 @@ +#include "fitz.h" +#include "mupdf.h" + +/* ICCBased */ + +static fz_error +loadiccbased(fz_colorspace **csp, pdf_xref *xref, fz_obj *dict) +{ + int n; + + pdf_logrsrc("load ICCBased\n"); + + n = fz_toint(fz_dictgets(dict, "N")); + + switch (n) + { + case 1: *csp = fz_devicegray; return fz_okay; + case 3: *csp = fz_devicergb; return fz_okay; + case 4: *csp = fz_devicecmyk; return fz_okay; + } + + return fz_throw("syntaxerror: ICCBased must have 1, 3 or 4 components"); +} + +/* Lab */ + +static inline float fung(float x) +{ + if (x >= 6.0f / 29.0f) + return x * x * x; + return (108.0f / 841.0f) * (x - (4.0f / 29.0f)); +} + +static inline float invg(float x) +{ + if (x > 0.008856f) + return powf(x, 1.0f / 3.0f); + return (7.787f * x) + (16.0f / 116.0f); +} + +static void +labtoxyz(fz_colorspace *cs, float *lab, float *xyz) +{ + /* input is in range (0..100, -128..127, -128..127) not (0..1, 0..1, 0..1) */ + float lstar, astar, bstar, l, m, n; + lstar = lab[0]; + astar = lab[1]; + bstar = lab[2]; + m = (lstar + 16) / 116; + l = m + astar / 500; + n = m - bstar / 200; + xyz[0] = fung(l); + xyz[1] = fung(m); + xyz[2] = fung(n); +} + +static void +xyztolab(fz_colorspace *cs, float *xyz, float *lab) +{ + float lstar, astar, bstar; + float yyn = xyz[1]; + if (yyn < 0.008856f) + lstar = 116.0f * yyn * (1.0f / 3.0f) - 16.0f; + else + lstar = 903.3f * yyn; + astar = 500 * (invg(xyz[0]) - invg(xyz[1])); + bstar = 200 * (invg(xyz[1]) - invg(xyz[2])); + lab[0] = lstar; + lab[1] = astar; + lab[2] = bstar; +} + +static fz_colorspace kdevicelab = { -1, "Lab", 3, labtoxyz, xyztolab }; +static fz_colorspace *fz_devicelab = &kdevicelab; + +/* Separation and DeviceN */ + +struct separation +{ + fz_colorspace *base; + pdf_function *tint; +}; + +static void +separationtoxyz(fz_colorspace *cs, float *color, float *xyz) +{ + struct separation *sep = cs->data; + float alt[FZ_MAXCOLORS]; + pdf_evalfunction(sep->tint, color, cs->n, alt, sep->base->n); + sep->base->toxyz(sep->base, alt, xyz); +} + +static void +freeseparation(fz_colorspace *cs) +{ + struct separation *sep = cs->data; + fz_dropcolorspace(sep->base); + pdf_dropfunction(sep->tint); + fz_free(sep); +} + +static fz_error +loadseparation(fz_colorspace **csp, pdf_xref *xref, fz_obj *array) +{ + fz_error error; + fz_colorspace *cs; + struct separation *sep; + fz_obj *nameobj = fz_arrayget(array, 1); + fz_obj *baseobj = fz_arrayget(array, 2); + fz_obj *tintobj = fz_arrayget(array, 3); + fz_colorspace *base; + pdf_function *tint; + int n; + + pdf_logrsrc("load Separation {\n"); + + if (fz_isarray(nameobj)) + n = fz_arraylen(nameobj); + else + n = 1; + + if (n > FZ_MAXCOLORS) + return fz_throw("too many components in colorspace"); + + pdf_logrsrc("n = %d\n", n); + + error = pdf_loadcolorspace(&base, xref, baseobj); + if (error) + return fz_rethrow(error, "cannot load base colorspace (%d %d R)", fz_tonum(baseobj), fz_togen(baseobj)); + + error = pdf_loadfunction(&tint, xref, tintobj); + if (error) + { + fz_dropcolorspace(base); + return fz_rethrow(error, "cannot load tint function (%d %d R)", fz_tonum(tintobj), fz_togen(tintobj)); + } + + sep = fz_malloc(sizeof(struct separation)); + sep->base = base; + sep->tint = tint; + + cs = fz_newcolorspace(n == 1 ? "Separation" : "DeviceN", n); + cs->toxyz = separationtoxyz; + cs->freedata = freeseparation; + cs->data = sep; + + pdf_logrsrc("}\n"); + + *csp = cs; + return fz_okay; +} + +/* Indexed */ + +struct indexed +{ + fz_colorspace *base; + int high; + unsigned char *lookup; +}; + +static void +indexedtoxyz(fz_colorspace *cs, float *color, float *xyz) +{ + struct indexed *idx = cs->data; + float alt[FZ_MAXCOLORS]; + int i, k; + i = color[0] * 255; + i = CLAMP(i, 0, idx->high); + for (k = 0; k < idx->base->n; k++) + alt[k] = idx->lookup[i * idx->base->n + k] / 255.0f; + idx->base->toxyz(idx->base, alt, xyz); +} + +static void +freeindexed(fz_colorspace *cs) +{ + struct indexed *idx = cs->data; + if (idx->base) + fz_dropcolorspace(idx->base); + fz_free(idx->lookup); + fz_free(idx); +} + +fz_pixmap * +pdf_expandindexedpixmap(fz_pixmap *src) +{ + struct indexed *idx; + fz_pixmap *dst; + unsigned char *s, *d; + int y, x, k, n, high; + unsigned char *lookup; + + assert(src->colorspace->toxyz == indexedtoxyz); + assert(src->n == 2); + + idx = src->colorspace->data; + high = idx->high; + lookup = idx->lookup; + n = idx->base->n; + + dst = fz_newpixmap(idx->base, src->x, src->y, src->w, src->h); + s = src->samples; + d = dst->samples; + + for (y = 0; y < src->h; y++) + { + for (x = 0; x < src->w; x++) + { + int v = *s++; + int a = *s++; + v = MIN(v, high); + for (k = 0; k < n; k++) + *d++ = fz_mul255(lookup[v * n + k], a); + *d++ = a; + } + } + + if (src->mask) + dst->mask = fz_keeppixmap(src->mask); + dst->interpolate = src->interpolate; + + return dst; +} + +static fz_error +loadindexed(fz_colorspace **csp, pdf_xref *xref, fz_obj *array) +{ + fz_error error; + fz_colorspace *cs; + struct indexed *idx; + fz_obj *baseobj = fz_arrayget(array, 1); + fz_obj *highobj = fz_arrayget(array, 2); + fz_obj *lookup = fz_arrayget(array, 3); + fz_colorspace *base; + int i, n; + + pdf_logrsrc("load Indexed {\n"); + + error = pdf_loadcolorspace(&base, xref, baseobj); + if (error) + return fz_rethrow(error, "cannot load base colorspace (%d %d R)", fz_tonum(baseobj), fz_togen(baseobj)); + + pdf_logrsrc("base %s\n", base->name); + + idx = fz_malloc(sizeof(struct indexed)); + idx->base = base; + idx->high = fz_toint(highobj); + idx->high = CLAMP(idx->high, 0, 255); + n = base->n * (idx->high + 1); + idx->lookup = fz_malloc(n); + memset(idx->lookup, 0, n); + + cs = fz_newcolorspace("Indexed", 1); + cs->toxyz = indexedtoxyz; + cs->freedata = freeindexed; + cs->data = idx; + + if (fz_isstring(lookup) && fz_tostrlen(lookup) == n) + { + unsigned char *buf; + + pdf_logrsrc("string lookup\n"); + + buf = (unsigned char *) fz_tostrbuf(lookup); + for (i = 0; i < n; i++) + idx->lookup[i] = buf[i]; + } + else if (fz_isindirect(lookup)) + { + fz_stream *file; + + pdf_logrsrc("stream lookup\n"); + + error = pdf_openstream(&file, xref, fz_tonum(lookup), fz_togen(lookup)); + if (error) + { + fz_dropcolorspace(cs); + return fz_rethrow(error, "cannot open colorspace lookup table (%d 0 R)", fz_tonum(lookup)); + } + + i = fz_read(file, idx->lookup, n); + if (i < 0) + { + fz_dropcolorspace(cs); + return fz_throw("cannot read colorspace lookup table (%d 0 R)", fz_tonum(lookup)); + } + + fz_close(file); + } + else + { + fz_dropcolorspace(cs); + return fz_throw("cannot parse colorspace lookup table"); + } + + pdf_logrsrc("}\n"); + + *csp = cs; + return fz_okay; +} + +/* Parse and create colorspace from PDF object */ + +static fz_error +pdf_loadcolorspaceimp(fz_colorspace **csp, pdf_xref *xref, fz_obj *obj) +{ + if (fz_isname(obj)) + { + if (!strcmp(fz_toname(obj), "Pattern")) + *csp = fz_devicegray; + else if (!strcmp(fz_toname(obj), "G")) + *csp = fz_devicegray; + else if (!strcmp(fz_toname(obj), "RGB")) + *csp = fz_devicergb; + else if (!strcmp(fz_toname(obj), "CMYK")) + *csp = fz_devicecmyk; + else if (!strcmp(fz_toname(obj), "DeviceGray")) + *csp = fz_devicegray; + else if (!strcmp(fz_toname(obj), "DeviceRGB")) + *csp = fz_devicergb; + else if (!strcmp(fz_toname(obj), "DeviceCMYK")) + *csp = fz_devicecmyk; + else + return fz_throw("unknown colorspace: %s", fz_toname(obj)); + return fz_okay; + } + + else if (fz_isarray(obj)) + { + fz_obj *name = fz_arrayget(obj, 0); + + if (fz_isname(name)) + { + /* load base colorspace instead */ + if (!strcmp(fz_toname(name), "Pattern")) + { + fz_error error; + + obj = fz_arrayget(obj, 1); + if (!obj) + { + *csp = fz_devicegray; + return fz_okay; + } + + error = pdf_loadcolorspace(csp, xref, obj); + if (error) + return fz_rethrow(error, "cannot load pattern (%d %d R)", fz_tonum(obj), fz_togen(obj)); + } + + else if (!strcmp(fz_toname(name), "G")) + *csp = fz_devicegray; + else if (!strcmp(fz_toname(name), "RGB")) + *csp = fz_devicergb; + else if (!strcmp(fz_toname(name), "CMYK")) + *csp = fz_devicecmyk; + else if (!strcmp(fz_toname(name), "DeviceGray")) + *csp = fz_devicegray; + else if (!strcmp(fz_toname(name), "DeviceRGB")) + *csp = fz_devicergb; + else if (!strcmp(fz_toname(name), "DeviceCMYK")) + *csp = fz_devicecmyk; + else if (!strcmp(fz_toname(name), "CalGray")) + *csp = fz_devicegray; + else if (!strcmp(fz_toname(name), "CalRGB")) + *csp = fz_devicergb; + else if (!strcmp(fz_toname(name), "CalCMYK")) + *csp = fz_devicecmyk; + else if (!strcmp(fz_toname(name), "Lab")) + *csp = fz_devicelab; + + else if (!strcmp(fz_toname(name), "ICCBased")) + return loadiccbased(csp, xref, fz_arrayget(obj, 1)); + + else if (!strcmp(fz_toname(name), "Indexed")) + return loadindexed(csp, xref, obj); + else if (!strcmp(fz_toname(name), "I")) + return loadindexed(csp, xref, obj); + + else if (!strcmp(fz_toname(name), "Separation")) + return loadseparation(csp, xref, obj); + + else if (!strcmp(fz_toname(name), "DeviceN")) + return loadseparation(csp, xref, obj); + + else + return fz_throw("syntaxerror: unknown colorspace %s", fz_toname(name)); + + return fz_okay; + } + } + + return fz_throw("syntaxerror: could not parse color space (%d %d R)", fz_tonum(obj), fz_togen(obj)); +} + +fz_error +pdf_loadcolorspace(fz_colorspace **csp, pdf_xref *xref, fz_obj *obj) +{ + fz_error error; + + if ((*csp = pdf_finditem(xref->store, fz_dropcolorspace, obj))) + { + fz_keepcolorspace(*csp); + return fz_okay; + } + + error = pdf_loadcolorspaceimp(csp, xref, obj); + if (error) + return fz_rethrow(error, "cannot load colorspace (%d %d R)", fz_tonum(obj), fz_togen(obj)); + + pdf_storeitem(xref->store, fz_keepcolorspace, fz_dropcolorspace, obj, *csp); + + return fz_okay; +} diff --git a/pdf/pdf_crypt.c b/pdf/pdf_crypt.c new file mode 100644 index 00000000..6266e188 --- /dev/null +++ b/pdf/pdf_crypt.c @@ -0,0 +1,721 @@ +#include "fitz.h" +#include "mupdf.h" + +/* + * Create crypt object for decrypting strings and streams + * given the Encryption and ID objects. + */ + +fz_error +pdf_newcrypt(pdf_crypt **cryptp, fz_obj *dict, fz_obj *id) +{ + pdf_crypt *crypt; + fz_error error; + fz_obj *obj; + + crypt = fz_malloc(sizeof(pdf_crypt)); + memset(crypt, 0x00, sizeof(pdf_crypt)); + crypt->cf = nil; + + /* Common to all security handlers (PDF 1.7 table 3.18) */ + + obj = fz_dictgets(dict, "Filter"); + if (!fz_isname(obj)) + { + pdf_freecrypt(crypt); + return fz_throw("unspecified encryption handler"); + } + if (strcmp(fz_toname(obj), "Standard") != 0) + { + pdf_freecrypt(crypt); + return fz_throw("unknown encryption handler: '%s'", fz_toname(obj)); + } + + crypt->v = 0; + obj = fz_dictgets(dict, "V"); + if (fz_isint(obj)) + crypt->v = fz_toint(obj); + if (crypt->v != 1 && crypt->v != 2 && crypt->v != 4 && crypt->v != 5) + { + pdf_freecrypt(crypt); + return fz_throw("unknown encryption version"); + } + + crypt->length = 40; + if (crypt->v == 2 || crypt->v == 4) + { + obj = fz_dictgets(dict, "Length"); + if (fz_isint(obj)) + crypt->length = fz_toint(obj); + + /* work-around for pdf generators that assume length is in bytes */ + if (crypt->length < 40) + crypt->length = crypt->length * 8; + + if (crypt->length % 8 != 0) + { + pdf_freecrypt(crypt); + return fz_throw("invalid encryption key length"); + } + if (crypt->length > 256) + { + pdf_freecrypt(crypt); + return fz_throw("invalid encryption key length"); + } + } + + if (crypt->v == 5) + crypt->length = 256; + + if (crypt->v == 1 || crypt->v == 2) + { + crypt->stmf.method = PDF_CRYPT_RC4; + crypt->stmf.length = crypt->length; + + crypt->strf.method = PDF_CRYPT_RC4; + crypt->strf.length = crypt->length; + } + + if (crypt->v == 4 || crypt->v == 5) + { + crypt->stmf.method = PDF_CRYPT_NONE; + crypt->stmf.length = crypt->length; + + crypt->strf.method = PDF_CRYPT_NONE; + crypt->strf.length = crypt->length; + + obj = fz_dictgets(dict, "CF"); + if (fz_isdict(obj)) + { + crypt->cf = fz_keepobj(obj); + + obj = fz_dictgets(dict, "StmF"); + if (fz_isname(obj)) + { + /* should verify that it is either Identity or StdCF */ + obj = fz_dictgets(crypt->cf, fz_toname(obj)); + if (fz_isdict(obj)) + { + error = pdf_parsecryptfilter(&crypt->stmf, obj, crypt->length); + if (error) + { + pdf_freecrypt(crypt); + return fz_rethrow(error, "cannot parse stream crypt filter (%d %d R)", fz_tonum(obj), fz_togen(obj)); + } + } + } + + obj = fz_dictgets(dict, "StrF"); + if (fz_isname(obj)) + { + /* should verify that it is either Identity or StdCF */ + obj = fz_dictgets(crypt->cf, fz_toname(obj)); + if (fz_isdict(obj)) + { + error = pdf_parsecryptfilter(&crypt->strf, obj, crypt->length); + if (error) + { + pdf_freecrypt(crypt); + return fz_rethrow(error, "cannot parse string crypt filter (%d %d R)", fz_tonum(obj), fz_togen(obj)); + } + } + } + + /* in crypt revision 4, the crypt filter determines the key length */ + if (crypt->strf.method != PDF_CRYPT_NONE) + crypt->length = crypt->stmf.length; + } + } + + /* Standard security handler (PDF 1.7 table 3.19) */ + + obj = fz_dictgets(dict, "R"); + if (fz_isint(obj)) + crypt->r = fz_toint(obj); + else + { + pdf_freecrypt(crypt); + return fz_throw("encryption dictionary missing revision value"); + } + + obj = fz_dictgets(dict, "O"); + if (fz_isstring(obj) && fz_tostrlen(obj) == 32) + memcpy(crypt->o, fz_tostrbuf(obj), 32); + /* /O and /U are supposed to be 48 bytes long for revision 5, they're often longer, though */ + else if (crypt->r == 5 && fz_isstring(obj) && fz_tostrlen(obj) >= 48) + memcpy(crypt->o, fz_tostrbuf(obj), 48); + else + { + pdf_freecrypt(crypt); + return fz_throw("encryption dictionary missing owner password"); + } + + obj = fz_dictgets(dict, "U"); + if (fz_isstring(obj) && fz_tostrlen(obj) == 32) + memcpy(crypt->u, fz_tostrbuf(obj), 32); + else if (fz_isstring(obj) && fz_tostrlen(obj) >= 48 && crypt->r == 5) + memcpy(crypt->u, fz_tostrbuf(obj), 48); + else + { + pdf_freecrypt(crypt); + return fz_throw("encryption dictionary missing user password"); + } + + obj = fz_dictgets(dict, "P"); + if (fz_isint(obj)) + crypt->p = fz_toint(obj); + else + { + pdf_freecrypt(crypt); + return fz_throw("encryption dictionary missing permissions value"); + } + + if (crypt->r == 5) + { + obj = fz_dictgets(dict, "OE"); + if (!fz_isstring(obj) || fz_tostrlen(obj) != 32) + { + pdf_freecrypt(crypt); + return fz_throw("encryption dictionary missing owner encryption key"); + } + memcpy(crypt->oe, fz_tostrbuf(obj), 32); + + obj = fz_dictgets(dict, "UE"); + if (!fz_isstring(obj) || fz_tostrlen(obj) != 32) + { + pdf_freecrypt(crypt); + return fz_throw("encryption dictionary missing user encryption key"); + } + memcpy(crypt->ue, fz_tostrbuf(obj), 32); + } + + crypt->encryptmetadata = 1; + obj = fz_dictgets(dict, "EncryptMetadata"); + if (fz_isbool(obj)) + crypt->encryptmetadata = fz_tobool(obj); + + /* Extract file identifier string */ + + crypt->idlength = 0; + + if (fz_isarray(id) && fz_arraylen(id) == 2) + { + obj = fz_arrayget(id, 0); + if (fz_isstring(obj)) + { + if (fz_tostrlen(obj) <= sizeof(crypt->idstring)) + { + memcpy(crypt->idstring, fz_tostrbuf(obj), fz_tostrlen(obj)); + crypt->idlength = fz_tostrlen(obj); + } + } + } + else + fz_warn("missing file identifier, may not be able to do decryption"); + + *cryptp = crypt; + return fz_okay; +} + +void +pdf_freecrypt(pdf_crypt *crypt) +{ + if (crypt->cf) fz_dropobj(crypt->cf); + fz_free(crypt); +} + +/* + * Parse a CF dictionary entry (PDF 1.7 table 3.22) + */ + +fz_error +pdf_parsecryptfilter(pdf_cryptfilter *cf, fz_obj *dict, int defaultlength) +{ + fz_obj *obj; + + cf->method = PDF_CRYPT_NONE; + cf->length = defaultlength; + + obj = fz_dictgets(dict, "CFM"); + if (fz_isname(obj)) + { + if (!strcmp(fz_toname(obj), "None")) + cf->method = PDF_CRYPT_NONE; + else if (!strcmp(fz_toname(obj), "V2")) + cf->method = PDF_CRYPT_RC4; + else if (!strcmp(fz_toname(obj), "AESV2")) + cf->method = PDF_CRYPT_AESV2; + else if (!strcmp(fz_toname(obj), "AESV3")) + cf->method = PDF_CRYPT_AESV3; + else + fz_throw("unknown encryption method: %s", fz_toname(obj)); + } + + obj = fz_dictgets(dict, "Length"); + if (fz_isint(obj)) + cf->length = fz_toint(obj); + + /* the length for crypt filters is supposed to be in bytes not bits */ + if (cf->length < 40) + cf->length = cf->length * 8; + + if ((cf->length % 8) != 0) + return fz_throw("invalid key length: %d", cf->length); + + return fz_okay; +} + +/* + * Compute an encryption key (PDF 1.7 algorithm 3.2) + */ + +static const unsigned char padding[32] = +{ + 0x28, 0xbf, 0x4e, 0x5e, 0x4e, 0x75, 0x8a, 0x41, + 0x64, 0x00, 0x4e, 0x56, 0xff, 0xfa, 0x01, 0x08, + 0x2e, 0x2e, 0x00, 0xb6, 0xd0, 0x68, 0x3e, 0x80, + 0x2f, 0x0c, 0xa9, 0xfe, 0x64, 0x53, 0x69, 0x7a +}; + +static void +pdf_computeencryptionkey(pdf_crypt *crypt, unsigned char *password, int pwlen, unsigned char *key) +{ + unsigned char buf[32]; + unsigned int p; + int i, n; + fz_md5 md5; + + n = crypt->length / 8; + + /* Step 1 - copy and pad password string */ + if (pwlen > 32) + pwlen = 32; + memcpy(buf, password, pwlen); + memcpy(buf + pwlen, padding, 32 - pwlen); + + /* Step 2 - init md5 and pass value of step 1 */ + fz_md5init(&md5); + fz_md5update(&md5, buf, 32); + + /* Step 3 - pass O value */ + fz_md5update(&md5, crypt->o, 32); + + /* Step 4 - pass P value as unsigned int, low-order byte first */ + p = (unsigned int) crypt->p; + buf[0] = (p) & 0xFF; + buf[1] = (p >> 8) & 0xFF; + buf[2] = (p >> 16) & 0xFF; + buf[3] = (p >> 24) & 0xFF; + fz_md5update(&md5, buf, 4); + + /* Step 5 - pass first element of ID array */ + fz_md5update(&md5, crypt->idstring, crypt->idlength); + + /* Step 6 (revision 4 or greater) - if metadata is not encrypted pass 0xFFFFFFFF */ + if (crypt->r >= 4) + { + if (!crypt->encryptmetadata) + { + buf[0] = 0xFF; + buf[1] = 0xFF; + buf[2] = 0xFF; + buf[3] = 0xFF; + fz_md5update(&md5, buf, 4); + } + } + + /* Step 7 - finish the hash */ + fz_md5final(&md5, buf); + + /* Step 8 (revision 3 or greater) - do some voodoo 50 times */ + if (crypt->r >= 3) + { + for (i = 0; i < 50; i++) + { + fz_md5init(&md5); + fz_md5update(&md5, buf, n); + fz_md5final(&md5, buf); + } + } + + /* Step 9 - the key is the first 'n' bytes of the result */ + memcpy(key, buf, n); +} + +/* + * Compute an encryption key (PDF 1.7 ExtensionLevel 3 algorithm 3.2a) + */ + +static void +pdf_computeencryptionkey_r5(pdf_crypt *crypt, unsigned char *password, int pwlen, int ownerkey, unsigned char *validationkey) +{ + unsigned char buffer[128 + 8 + 48]; + fz_sha256 sha256; + fz_aes aes; + + /* Step 2 - truncate UTF-8 password to 127 characters */ + + if (pwlen > 127) + pwlen = 127; + + /* Step 3/4 - test password against owner/user key and compute encryption key */ + + memcpy(buffer, password, pwlen); + if (ownerkey) + { + memcpy(buffer + pwlen, crypt->o + 32, 8); + memcpy(buffer + pwlen + 8, crypt->u, 48); + } + else + memcpy(buffer + pwlen, crypt->u + 32, 8); + + fz_sha256init(&sha256); + fz_sha256update(&sha256, buffer, pwlen + 8 + (ownerkey ? 48 : 0)); + fz_sha256final(&sha256, validationkey); + + /* Step 3.5/4.5 - compute file encryption key from OE/UE */ + + memcpy(buffer + pwlen, crypt->u + 40, 8); + + fz_sha256init(&sha256); + fz_sha256update(&sha256, buffer, pwlen + 8); + fz_sha256final(&sha256, buffer); + + // clear password buffer and use it as iv + memset(buffer + 32, 0, sizeof(buffer) - 32); + aes_setkey_dec(&aes, buffer, crypt->length); + aes_crypt_cbc(&aes, AES_DECRYPT, 32, buffer + 32, ownerkey ? crypt->oe : crypt->ue, crypt->key); +} + +/* + * Computing the user password (PDF 1.7 algorithm 3.4 and 3.5) + * Also save the generated key for decrypting objects and streams in crypt->key. + */ + +static void +pdf_computeuserpassword(pdf_crypt *crypt, unsigned char *password, int pwlen, unsigned char *output) +{ + if (crypt->r == 2) + { + fz_arc4 arc4; + + pdf_computeencryptionkey(crypt, password, pwlen, crypt->key); + fz_arc4init(&arc4, crypt->key, crypt->length / 8); + fz_arc4encrypt(&arc4, output, padding, 32); + } + + if (crypt->r == 3 || crypt->r == 4) + { + unsigned char xor[32]; + unsigned char digest[16]; + fz_md5 md5; + fz_arc4 arc4; + int i, x, n; + + n = crypt->length / 8; + + pdf_computeencryptionkey(crypt, password, pwlen, crypt->key); + + fz_md5init(&md5); + fz_md5update(&md5, padding, 32); + fz_md5update(&md5, crypt->idstring, crypt->idlength); + fz_md5final(&md5, digest); + + fz_arc4init(&arc4, crypt->key, n); + fz_arc4encrypt(&arc4, output, digest, 16); + + for (x = 1; x <= 19; x++) + { + for (i = 0; i < n; i++) + xor[i] = crypt->key[i] ^ x; + fz_arc4init(&arc4, xor, n); + fz_arc4encrypt(&arc4, output, output, 16); + } + + memcpy(output + 16, padding, 16); + } + + if (crypt->r == 5) + { + pdf_computeencryptionkey_r5(crypt, password, pwlen, 0, output); + } +} + +/* + * Authenticating the user password (PDF 1.7 algorithm 3.6 + * and ExtensionLevel 3 algorithm 3.11) + * This also has the side effect of saving a key generated + * from the password for decrypting objects and streams. + */ + +static int +pdf_authenticateuserpassword(pdf_crypt *crypt, unsigned char *password, int pwlen) +{ + unsigned char output[32]; + pdf_computeuserpassword(crypt, password, pwlen, output); + if (crypt->r == 2 || crypt->r == 5) + return memcmp(output, crypt->u, 32) == 0; + if (crypt->r == 3 || crypt->r == 4) + return memcmp(output, crypt->u, 16) == 0; + return 0; +} + +/* + * Authenticating the owner password (PDF 1.7 algorithm 3.7 + * and ExtensionLevel 3 algorithm 3.12) + * Generates the user password from the owner password + * and calls pdf_authenticateuserpassword. + */ + +static int +pdf_authenticateownerpassword(pdf_crypt *crypt, unsigned char *ownerpass, int pwlen) +{ + unsigned char pwbuf[32]; + unsigned char key[32]; + unsigned char xor[32]; + unsigned char userpass[32]; + int i, n, x; + fz_md5 md5; + fz_arc4 arc4; + + if (crypt->r == 5) + { + /* PDF 1.7 ExtensionLevel 3 algorithm 3.12 */ + + pdf_computeencryptionkey_r5(crypt, ownerpass, pwlen, 1, key); + + return !memcmp(key, crypt->o, 32); + } + + n = crypt->length / 8; + + /* Step 1 -- steps 1 to 4 of PDF 1.7 algorithm 3.3 */ + + /* copy and pad password string */ + if (pwlen > 32) + pwlen = 32; + memcpy(pwbuf, ownerpass, pwlen); + memcpy(pwbuf + pwlen, padding, 32 - pwlen); + + /* take md5 hash of padded password */ + fz_md5init(&md5); + fz_md5update(&md5, pwbuf, 32); + fz_md5final(&md5, key); + + /* do some voodoo 50 times (Revision 3 or greater) */ + if (crypt->r >= 3) + { + for (i = 0; i < 50; i++) + { + fz_md5init(&md5); + fz_md5update(&md5, key, 16); + fz_md5final(&md5, key); + } + } + + /* Step 2 (Revision 2) */ + if (crypt->r == 2) + { + fz_arc4init(&arc4, key, n); + fz_arc4encrypt(&arc4, userpass, crypt->o, 32); + } + + /* Step 2 (Revision 3 or greater) */ + if (crypt->r >= 3) + { + memcpy(userpass, crypt->o, 32); + for (x = 0; x < 20; x++) + { + for (i = 0; i < n; i++) + xor[i] = key[i] ^ (19 - x); + fz_arc4init(&arc4, xor, n); + fz_arc4encrypt(&arc4, userpass, userpass, 32); + } + } + + return pdf_authenticateuserpassword(crypt, userpass, 32); +} + +int +pdf_authenticatepassword(pdf_xref *xref, char *password) +{ + if (xref->crypt) + { + if (pdf_authenticateuserpassword(xref->crypt, (unsigned char *)password, strlen(password))) + return 1; + if (pdf_authenticateownerpassword(xref->crypt, (unsigned char *)password, strlen(password))) + return 1; + return 0; + } + return 1; +} + +int +pdf_needspassword(pdf_xref *xref) +{ + if (!xref->crypt) + return 0; + if (pdf_authenticatepassword(xref, "")) + return 0; + return 1; +} + +/* + * PDF 1.7 algorithm 3.1 and ExtensionLevel 3 algorithm 3.1a + * + * Using the global encryption key that was generated from the + * password, create a new key that is used to decrypt indivual + * objects and streams. This key is based on the object and + * generation numbers. + */ + +static int +pdf_computeobjectkey(pdf_crypt *crypt, pdf_cryptfilter *cf, int num, int gen, unsigned char *key) +{ + fz_md5 md5; + unsigned char message[5]; + + if (cf->method == PDF_CRYPT_AESV3) + { + memcpy(key, crypt->key, crypt->length / 8); + return crypt->length / 8; + } + + fz_md5init(&md5); + fz_md5update(&md5, crypt->key, crypt->length / 8); + message[0] = (num) & 0xFF; + message[1] = (num >> 8) & 0xFF; + message[2] = (num >> 16) & 0xFF; + message[3] = (gen) & 0xFF; + message[4] = (gen >> 8) & 0xFF; + fz_md5update(&md5, message, 5); + + if (cf->method == PDF_CRYPT_AESV2) + fz_md5update(&md5, (unsigned char *)"sAlT", 4); + + fz_md5final(&md5, key); + + if (crypt->length / 8 + 5 > 16) + return 16; + return crypt->length / 8 + 5; +} + +/* + * PDF 1.7 algorithm 3.1 and ExtensionLevel 3 algorithm 3.1a + * + * Decrypt all strings in obj modifying the data in-place. + * Recurse through arrays and dictionaries, but do not follow + * indirect references. + */ + +static void +pdf_cryptobjimp(pdf_crypt *crypt, fz_obj *obj, unsigned char *key, int keylen) +{ + unsigned char *s; + int i, n; + + if (fz_isindirect(obj)) + return; + + if (fz_isstring(obj)) + { + s = (unsigned char *) fz_tostrbuf(obj); + n = fz_tostrlen(obj); + + if (crypt->strf.method == PDF_CRYPT_RC4) + { + fz_arc4 arc4; + fz_arc4init(&arc4, key, keylen); + fz_arc4encrypt(&arc4, s, s, n); + } + + if (crypt->strf.method == PDF_CRYPT_AESV2 || crypt->strf.method == PDF_CRYPT_AESV3) + { + if (n >= 32) + { + unsigned char iv[16]; + fz_aes aes; + memcpy(iv, s, 16); + aes_setkey_dec(&aes, key, keylen * 8); + aes_crypt_cbc(&aes, AES_DECRYPT, n - 16, iv, s + 16, s); + obj->u.s.len -= 16; /* delete space used for iv */ + obj->u.s.len -= s[n - 17]; /* delete padding bytes at end */ + } + } + } + + else if (fz_isarray(obj)) + { + n = fz_arraylen(obj); + for (i = 0; i < n; i++) + { + pdf_cryptobjimp(crypt, fz_arrayget(obj, i), key, keylen); + } + } + + else if (fz_isdict(obj)) + { + n = fz_dictlen(obj); + for (i = 0; i < n; i++) + { + pdf_cryptobjimp(crypt, fz_dictgetval(obj, i), key, keylen); + } + } +} + +void +pdf_cryptobj(pdf_crypt *crypt, fz_obj *obj, int num, int gen) +{ + unsigned char key[32]; + int len; + + len = pdf_computeobjectkey(crypt, &crypt->strf, num, gen, key); + + pdf_cryptobjimp(crypt, obj, key, len); +} + +/* + * PDF 1.7 algorithm 3.1 and ExtensionLevel 3 algorithm 3.1a + * + * Create filter suitable for de/encrypting a stream. + */ +fz_stream * +pdf_opencrypt(fz_stream *chain, pdf_crypt *crypt, pdf_cryptfilter *stmf, int num, int gen) +{ + unsigned char key[32]; + int len; + + len = pdf_computeobjectkey(crypt, stmf, num, gen, key); + + if (stmf->method == PDF_CRYPT_RC4) + return fz_openarc4(chain, key, len); + + if (stmf->method == PDF_CRYPT_AESV2 || stmf->method == PDF_CRYPT_AESV3) + return fz_openaesd(chain, key, len); + + return fz_opencopy(chain); +} + +void pdf_debugcrypt(pdf_crypt *crypt) +{ + int i; + + printf("crypt {\n"); + + printf("\tv=%d length=%d\n", crypt->v, crypt->length); + printf("\tstmf method=%d length=%d\n", crypt->stmf.method, crypt->stmf.length); + printf("\tstrf method=%d length=%d\n", crypt->strf.method, crypt->strf.length); + printf("\tr=%d\n", crypt->r); + + printf("\to=<"); + for (i = 0; i < 32; i++) + printf("%02X", crypt->o[i]); + printf(">\n"); + + printf("\tu=<"); + for (i = 0; i < 32; i++) + printf("%02X", crypt->u[i]); + printf(">\n"); + + printf("}\n"); +} diff --git a/pdf/pdf_debug.c b/pdf/pdf_debug.c new file mode 100644 index 00000000..9d7f1f56 --- /dev/null +++ b/pdf/pdf_debug.c @@ -0,0 +1,91 @@ +#include "fitz.h" +#include "mupdf.h" + +/* + * Enable logging by setting environment variable MULOG to: + * (a)ll or a combination of + * (x)ref (r)src (f)ont (i)mage (s)hade (p)age + * + * eg. MULOG=fis ./x11pdf mytestfile.pdf + */ + +enum +{ + PDF_LXREF = 1, + PDF_LRSRC = 2, + PDF_LFONT = 4, + PDF_LIMAGE = 8, + PDF_LSHADE = 16, + PDF_LPAGE = 32 +}; + +static inline void pdflog(int tag, char *name, char *fmt, va_list ap) +{ + static int flags = 128; + static int level = 0; + static int push = 1; + int i; + + if (flags == 128) + { + char *s = getenv("MULOG"); + flags = 0; + if (s) + { + if (strstr(s, "a")) + flags |= 0xffff; + if (strstr(s, "x")) + flags |= PDF_LXREF; + if (strstr(s, "r")) + flags |= PDF_LRSRC; + if (strstr(s, "f")) + flags |= PDF_LFONT; + if (strstr(s, "i")) + flags |= PDF_LIMAGE; + if (strstr(s, "s")) + flags |= PDF_LSHADE; + if (strstr(s, "p")) + flags |= PDF_LPAGE; + } + } + + if (!(flags & tag)) + return; + + if (strchr(fmt, '}')) + level --; + + if (push) + { + printf("%s: ", name); + for (i = 0; i < level; i++) + printf("\t"); + } + + vprintf(fmt, ap); + + if (strchr(fmt, '{')) + level ++; + + push = !!strchr(fmt, '\n'); + + fflush(stdout); +} + +void pdf_logxref(char *fmt, ...) +{va_list ap;va_start(ap,fmt);pdflog(PDF_LXREF,"xref",fmt,ap);va_end(ap);} + +void pdf_logrsrc(char *fmt, ...) +{va_list ap;va_start(ap,fmt);pdflog(PDF_LRSRC,"rsrc",fmt,ap);va_end(ap);} + +void pdf_logfont(char *fmt, ...) +{va_list ap;va_start(ap,fmt);pdflog(PDF_LFONT,"font",fmt,ap);va_end(ap);} + +void pdf_logimage(char *fmt, ...) +{va_list ap;va_start(ap,fmt);pdflog(PDF_LIMAGE,"imag",fmt,ap);va_end(ap);} + +void pdf_logshade(char *fmt, ...) +{va_list ap;va_start(ap,fmt);pdflog(PDF_LSHADE,"shad",fmt,ap);va_end(ap);} + +void pdf_logpage(char *fmt, ...) +{va_list ap;va_start(ap,fmt);pdflog(PDF_LPAGE,"page",fmt,ap);va_end(ap);} diff --git a/pdf/pdf_font.c b/pdf/pdf_font.c new file mode 100644 index 00000000..75bc141f --- /dev/null +++ b/pdf/pdf_font.c @@ -0,0 +1,1052 @@ +#include "fitz.h" +#include "mupdf.h" + +#include <ft2build.h> +#include FT_FREETYPE_H +#include FT_XFREE86_H + +static char *basefontnames[14][7] = +{ + { "Courier", "CourierNew", "CourierNewPSMT", nil }, + { "Courier-Bold", "CourierNew,Bold", "Courier,Bold", + "CourierNewPS-BoldMT", "CourierNew-Bold", nil }, + { "Courier-Oblique", "CourierNew,Italic", "Courier,Italic", + "CourierNewPS-ItalicMT", "CourierNew-Italic", nil }, + { "Courier-BoldOblique", "CourierNew,BoldItalic", "Courier,BoldItalic", + "CourierNewPS-BoldItalicMT", "CourierNew-BoldItalic", nil }, + { "Helvetica", "ArialMT", "Arial", nil }, + { "Helvetica-Bold", "Arial-BoldMT", "Arial,Bold", "Arial-Bold", + "Helvetica,Bold", nil }, + { "Helvetica-Oblique", "Arial-ItalicMT", "Arial,Italic", "Arial-Italic", + "Helvetica,Italic", "Helvetica-Italic", nil }, + { "Helvetica-BoldOblique", "Arial-BoldItalicMT", + "Arial,BoldItalic", "Arial-BoldItalic", + "Helvetica,BoldItalic", "Helvetica-BoldItalic", nil }, + { "Times-Roman", "TimesNewRomanPSMT", "TimesNewRoman", + "TimesNewRomanPS", nil }, + { "Times-Bold", "TimesNewRomanPS-BoldMT", "TimesNewRoman,Bold", + "TimesNewRomanPS-Bold", "TimesNewRoman-Bold", nil }, + { "Times-Italic", "TimesNewRomanPS-ItalicMT", "TimesNewRoman,Italic", + "TimesNewRomanPS-Italic", "TimesNewRoman-Italic", nil }, + { "Times-BoldItalic", "TimesNewRomanPS-BoldItalicMT", + "TimesNewRoman,BoldItalic", "TimesNewRomanPS-BoldItalic", + "TimesNewRoman-BoldItalic", nil }, + { "Symbol", nil }, + { "ZapfDingbats", nil } +}; + +static int isdynalab(char *name) +{ + if (strstr(name, "HuaTian")) + return 1; + if (strstr(name, "MingLi")) + return 1; + if ((strstr(name, "DF") == name) || strstr(name, "+DF")) + return 1; + if ((strstr(name, "DLC") == name) || strstr(name, "+DLC")) + return 1; + return 0; +} + +static int strcmpignorespace(char *a, char *b) +{ + while (1) + { + while (*a == ' ') + a++; + while (*b == ' ') + b++; + if (*a != *b) + return 1; + if (*a == 0) + return *a != *b; + if (*b == 0) + return *a != *b; + a++; + b++; + } +} + +static char *cleanfontname(char *fontname) +{ + int i, k; + for (i = 0; i < 14; i++) + for (k = 0; basefontnames[i][k]; k++) + if (!strcmpignorespace(basefontnames[i][k], fontname)) + return basefontnames[i][0]; + return fontname; +} + +/* + * FreeType and Rendering glue + */ + +enum { UNKNOWN, TYPE1, TRUETYPE }; + +static int ftkind(FT_Face face) +{ + const char *kind = FT_Get_X11_Font_Format(face); + pdf_logfont("ft font format %s\n", kind); + if (!strcmp(kind, "TrueType")) + return TRUETYPE; + if (!strcmp(kind, "Type 1")) + return TYPE1; + if (!strcmp(kind, "CFF")) + return TYPE1; + if (!strcmp(kind, "CID Type 1")) + return TYPE1; + return UNKNOWN; +} + +static int ftcharindex(FT_Face face, int cid) +{ + int gid = FT_Get_Char_Index(face, cid); + if (gid == 0) + gid = FT_Get_Char_Index(face, 0xf000 + cid); + return gid; +} + +static inline int ftcidtogid(pdf_fontdesc *fontdesc, int cid) +{ + if (fontdesc->tottfcmap) + { + cid = pdf_lookupcmap(fontdesc->tottfcmap, cid); + return ftcharindex(fontdesc->font->ftface, cid); + } + + if (fontdesc->cidtogid) + return fontdesc->cidtogid[cid]; + + return cid; +} + +int +pdf_fontcidtogid(pdf_fontdesc *fontdesc, int cid) +{ + if (fontdesc->font->ftface) + return ftcidtogid(fontdesc, cid); + return cid; +} + +static int ftwidth(pdf_fontdesc *fontdesc, int cid) +{ + int gid, fterr; + + gid = ftcidtogid(fontdesc, cid); + + fterr = FT_Load_Glyph(fontdesc->font->ftface, gid, + FT_LOAD_NO_HINTING | FT_LOAD_NO_BITMAP | FT_LOAD_IGNORE_TRANSFORM); + if (fterr) + { + fz_warn("freetype load glyph (gid %d): %s", gid, ft_errorstring(fterr)); + return 0; + } + return ((FT_Face)fontdesc->font->ftface)->glyph->advance.x; +} + +/* + * Basic encoding tables + */ + +static int mrecode(char *name) +{ + int i; + for (i = 0; i < 256; i++) + if (pdf_macroman[i] && !strcmp(name, pdf_macroman[i])) + return i; + return -1; +} + +/* + * Create and destroy + */ + +pdf_fontdesc * +pdf_keepfont(pdf_fontdesc *fontdesc) +{ + fontdesc->refs ++; + return fontdesc; +} + +void +pdf_dropfont(pdf_fontdesc *fontdesc) +{ + if (fontdesc && --fontdesc->refs == 0) + { + if (fontdesc->font) + fz_dropfont(fontdesc->font); + if (fontdesc->encoding) + pdf_dropcmap(fontdesc->encoding); + if (fontdesc->tottfcmap) + pdf_dropcmap(fontdesc->tottfcmap); + if (fontdesc->tounicode) + pdf_dropcmap(fontdesc->tounicode); + fz_free(fontdesc->cidtogid); + fz_free(fontdesc->cidtoucs); + fz_free(fontdesc->hmtx); + fz_free(fontdesc->vmtx); + fz_free(fontdesc); + } +} + +pdf_fontdesc * +pdf_newfontdesc(void) +{ + pdf_fontdesc *fontdesc; + + fontdesc = fz_malloc(sizeof(pdf_fontdesc)); + fontdesc->refs = 1; + + fontdesc->font = nil; + + fontdesc->flags = 0; + fontdesc->italicangle = 0; + fontdesc->ascent = 0; + fontdesc->descent = 0; + fontdesc->capheight = 0; + fontdesc->xheight = 0; + fontdesc->missingwidth = 0; + + fontdesc->encoding = nil; + fontdesc->tottfcmap = nil; + fontdesc->ncidtogid = 0; + fontdesc->cidtogid = nil; + + fontdesc->tounicode = nil; + fontdesc->ncidtoucs = 0; + fontdesc->cidtoucs = nil; + + fontdesc->wmode = 0; + + fontdesc->hmtxcap = 0; + fontdesc->vmtxcap = 0; + fontdesc->nhmtx = 0; + fontdesc->nvmtx = 0; + fontdesc->hmtx = nil; + fontdesc->vmtx = nil; + + fontdesc->dhmtx.lo = 0x0000; + fontdesc->dhmtx.hi = 0xFFFF; + fontdesc->dhmtx.w = 1000; + + fontdesc->dvmtx.lo = 0x0000; + fontdesc->dvmtx.hi = 0xFFFF; + fontdesc->dvmtx.x = 0; + fontdesc->dvmtx.y = 880; + fontdesc->dvmtx.w = -1000; + + fontdesc->isembedded = 0; + + return fontdesc; +} + +/* + * Simple fonts (Type1 and TrueType) + */ + +static fz_error +loadsimplefont(pdf_fontdesc **fontdescp, pdf_xref *xref, fz_obj *dict) +{ + fz_error error; + fz_obj *descriptor; + fz_obj *encoding; + fz_obj *widths; + unsigned short *etable = nil; + pdf_fontdesc *fontdesc; + FT_Face face; + FT_CharMap cmap; + int kind; + int symbolic; + + char *basefont; + char *fontname; + char *estrings[256]; + char ebuffer[256][32]; + int i, k, n; + int fterr; + + basefont = fz_toname(fz_dictgets(dict, "BaseFont")); + fontname = cleanfontname(basefont); + + /* Load font file */ + + fontdesc = pdf_newfontdesc(); + + pdf_logfont("load simple font (%d %d R) ptr=%p {\n", fz_tonum(dict), fz_togen(dict), fontdesc); + pdf_logfont("basefont %s -> %s\n", basefont, fontname); + + descriptor = fz_dictgets(dict, "FontDescriptor"); + if (descriptor) + error = pdf_loadfontdescriptor(fontdesc, xref, descriptor, nil, basefont); + else + error = pdf_loadbuiltinfont(fontdesc, fontname); + if (error) + goto cleanup; + + /* Some chinese documents mistakenly consider WinAnsiEncoding to be codepage 936 */ + if (!*fontdesc->font->name && + !fz_dictgets(dict, "ToUnicode") && + !strcmp(fz_toname(fz_dictgets(dict, "Encoding")), "WinAnsiEncoding") && + fz_toint(fz_dictgets(descriptor, "Flags")) == 4) + { + /* note: without the comma, pdf_loadfontdescriptor would prefer /FontName over /BaseFont */ + char *cp936fonts[] = { + "\xCB\xCE\xCC\xE5", "SimSun,Regular", + "\xBA\xDA\xCC\xE5", "SimHei,Regular", + "\xBF\xAC\xCC\xE5_GB2312", "SimKai,Regular", + "\xB7\xC2\xCB\xCE_GB2312", "SimFang,Regular", + "\xC1\xA5\xCA\xE9", "SimLi,Regular", + NULL + }; + for (i = 0; cp936fonts[i]; i += 2) + if (!strcmp(basefont, cp936fonts[i])) + break; + if (cp936fonts[i]) + { + fz_warn("workaround for S22PDF lying about chinese font encodings"); + pdf_dropfont(fontdesc); + fontdesc = pdf_newfontdesc(); + error = pdf_loadfontdescriptor(fontdesc, xref, descriptor, "Adobe-GB1", cp936fonts[i+1]); + error |= pdf_loadsystemcmap(&fontdesc->encoding, "GBK-EUC-H"); + error |= pdf_loadsystemcmap(&fontdesc->tounicode, "Adobe-GB1-UCS2"); + error |= pdf_loadsystemcmap(&fontdesc->tottfcmap, "Adobe-GB1-UCS2"); + if (error) + return fz_rethrow(error, "cannot load font"); + + face = fontdesc->font->ftface; + kind = ftkind(face); + goto skip_encoding; + } + } + + face = fontdesc->font->ftface; + kind = ftkind(face); + + /* Encoding */ + + pdf_logfont("ft name '%s' '%s'\n", face->family_name, face->style_name); + + symbolic = fontdesc->flags & 4; + + if (face->num_charmaps > 0) + cmap = face->charmaps[0]; + else + cmap = nil; + + for (i = 0; i < face->num_charmaps; i++) + { + FT_CharMap test = face->charmaps[i]; + + if (kind == TYPE1) + { + if (test->platform_id == 7) + cmap = test; + } + + if (kind == TRUETYPE) + { + if (test->platform_id == 1 && test->encoding_id == 0) + cmap = test; + if (test->platform_id == 3 && test->encoding_id == 1) + cmap = test; + } + } + + if (cmap) + { + fterr = FT_Set_Charmap(face, cmap); + if (fterr) + fz_warn("freetype could not set cmap: %s", ft_errorstring(fterr)); + } + else + fz_warn("freetype could not find any cmaps"); + + etable = fz_calloc(256, sizeof(unsigned short)); + for (i = 0; i < 256; i++) + { + estrings[i] = nil; + etable[i] = 0; + } + + encoding = fz_dictgets(dict, "Encoding"); + if (encoding) + { + if (fz_isname(encoding)) + pdf_loadencoding(estrings, fz_toname(encoding)); + + if (fz_isdict(encoding)) + { + fz_obj *base, *diff, *item; + + base = fz_dictgets(encoding, "BaseEncoding"); + if (fz_isname(base)) + pdf_loadencoding(estrings, fz_toname(base)); + else if (!fontdesc->isembedded && !symbolic) + pdf_loadencoding(estrings, "StandardEncoding"); + + diff = fz_dictgets(encoding, "Differences"); + if (fz_isarray(diff)) + { + n = fz_arraylen(diff); + k = 0; + for (i = 0; i < n; i++) + { + item = fz_arrayget(diff, i); + if (fz_isint(item)) + k = fz_toint(item); + if (fz_isname(item)) + estrings[k++] = fz_toname(item); + if (k < 0) k = 0; + if (k > 255) k = 255; + } + } + } + } + + /* start with the builtin encoding */ + for (i = 0; i < 256; i++) + etable[i] = ftcharindex(face, i); + + /* encode by glyph name where we can */ + if (kind == TYPE1) + { + pdf_logfont("encode type1/cff by strings\n"); + for (i = 0; i < 256; i++) + { + if (estrings[i]) + { + etable[i] = FT_Get_Name_Index(face, estrings[i]); + if (etable[i] == 0) + { + int aglcode = pdf_lookupagl(estrings[i]); + char **aglnames = pdf_lookupaglnames(aglcode); + while (*aglnames) + { + etable[i] = FT_Get_Name_Index(face, *aglnames); + if (etable[i]) + break; + aglnames++; + } + } + } + } + } + + /* encode by glyph name where we can */ + if (kind == TRUETYPE) + { + /* Unicode cmap */ + if (!symbolic && face->charmap && face->charmap->platform_id == 3) + { + pdf_logfont("encode truetype via unicode\n"); + for (i = 0; i < 256; i++) + { + if (estrings[i]) + { + int aglcode = pdf_lookupagl(estrings[i]); + if (!aglcode) + etable[i] = FT_Get_Name_Index(face, estrings[i]); + else + etable[i] = ftcharindex(face, aglcode); + } + } + } + + /* MacRoman cmap */ + else if (!symbolic && face->charmap && face->charmap->platform_id == 1) + { + pdf_logfont("encode truetype via macroman\n"); + for (i = 0; i < 256; i++) + { + if (estrings[i]) + { + k = mrecode(estrings[i]); + if (k <= 0) + etable[i] = FT_Get_Name_Index(face, estrings[i]); + else + etable[i] = ftcharindex(face, k); + } + } + } + + /* Symbolic cmap */ + else + { + pdf_logfont("encode truetype symbolic\n"); + for (i = 0; i < 256; i++) + { + if (estrings[i]) + { + etable[i] = FT_Get_Name_Index(face, estrings[i]); + if (etable[i] == 0) + etable[i] = ftcharindex(face, i); + } + } + } + } + + /* try to reverse the glyph names from the builtin encoding */ + for (i = 0; i < 256; i++) + { + if (etable[i] && !estrings[i]) + { + if (FT_HAS_GLYPH_NAMES(face)) + { + fterr = FT_Get_Glyph_Name(face, etable[i], ebuffer[i], 32); + if (fterr) + fz_warn("freetype get glyph name (gid %d): %s", etable[i], ft_errorstring(fterr)); + if (ebuffer[i][0]) + estrings[i] = ebuffer[i]; + } + else + { + estrings[i] = (char*) pdf_winansi[i]; /* discard const */ + } + } + } + + fontdesc->encoding = pdf_newidentitycmap(0, 1); + fontdesc->ncidtogid = 256; + fontdesc->cidtogid = etable; + + error = pdf_loadtounicode(fontdesc, xref, estrings, nil, fz_dictgets(dict, "ToUnicode")); + if (error) + fz_catch(error, "cannot load tounicode"); + +skip_encoding: + + /* Widths */ + + pdf_setdefaulthmtx(fontdesc, fontdesc->missingwidth); + + widths = fz_dictgets(dict, "Widths"); + if (widths) + { + int first, last; + + first = fz_toint(fz_dictgets(dict, "FirstChar")); + last = fz_toint(fz_dictgets(dict, "LastChar")); + + if (first < 0 || last > 255 || first > last) + first = last = 0; + + for (i = 0; i < last - first + 1; i++) + { + int wid = fz_toint(fz_arrayget(widths, i)); + pdf_addhmtx(fontdesc, i + first, i + first, wid); + } + } + else + { + fterr = FT_Set_Char_Size(face, 1000, 1000, 72, 72); + if (fterr) + fz_warn("freetype set character size: %s", ft_errorstring(fterr)); + for (i = 0; i < 256; i++) + { + pdf_addhmtx(fontdesc, i, i, ftwidth(fontdesc, i)); + } + } + + pdf_endhmtx(fontdesc); + + pdf_logfont("}\n"); + + *fontdescp = fontdesc; + return fz_okay; + +cleanup: + if (etable != fontdesc->cidtogid) + fz_free(etable); + pdf_dropfont(fontdesc); + return fz_rethrow(error, "cannot load simple font (%d %d R)", fz_tonum(dict), fz_togen(dict)); +} + +/* + * CID Fonts + */ + +static fz_error +loadcidfont(pdf_fontdesc **fontdescp, pdf_xref *xref, fz_obj *dict, fz_obj *encoding, fz_obj *tounicode) +{ + fz_error error; + fz_obj *widths; + fz_obj *descriptor; + pdf_fontdesc *fontdesc; + FT_Face face; + int kind; + char collection[256]; + char *basefont; + int i, k, fterr; + fz_obj *obj; + int dw; + + /* Get font name and CID collection */ + + basefont = fz_toname(fz_dictgets(dict, "BaseFont")); + + { + fz_obj *cidinfo; + char tmpstr[64]; + int tmplen; + + cidinfo = fz_dictgets(dict, "CIDSystemInfo"); + if (!cidinfo) + return fz_throw("cid font is missing info"); + + obj = fz_dictgets(cidinfo, "Registry"); + tmplen = MIN(sizeof tmpstr - 1, fz_tostrlen(obj)); + memcpy(tmpstr, fz_tostrbuf(obj), tmplen); + tmpstr[tmplen] = '\0'; + fz_strlcpy(collection, tmpstr, sizeof collection); + + fz_strlcat(collection, "-", sizeof collection); + + obj = fz_dictgets(cidinfo, "Ordering"); + tmplen = MIN(sizeof tmpstr - 1, fz_tostrlen(obj)); + memcpy(tmpstr, fz_tostrbuf(obj), tmplen); + tmpstr[tmplen] = '\0'; + fz_strlcat(collection, tmpstr, sizeof collection); + } + + /* Load font file */ + + fontdesc = pdf_newfontdesc(); + + pdf_logfont("load cid font (%d %d R) ptr=%p {\n", fz_tonum(dict), fz_togen(dict), fontdesc); + pdf_logfont("basefont %s\n", basefont); + pdf_logfont("collection %s\n", collection); + + descriptor = fz_dictgets(dict, "FontDescriptor"); + if (descriptor) + error = pdf_loadfontdescriptor(fontdesc, xref, descriptor, collection, basefont); + else + error = fz_throw("syntaxerror: missing font descriptor"); + if (error) + goto cleanup; + + face = fontdesc->font->ftface; + kind = ftkind(face); + + /* Check for DynaLab fonts that must use hinting */ + if (kind == TRUETYPE) + { + if (FT_IS_TRICKY(face) || isdynalab(fontdesc->font->name)) + { + fontdesc->font->fthint = 1; + pdf_logfont("forced hinting for dynalab font\n"); + } + } + + /* Encoding */ + + error = fz_okay; + if (fz_isname(encoding)) + { + pdf_logfont("encoding /%s\n", fz_toname(encoding)); + if (!strcmp(fz_toname(encoding), "Identity-H")) + fontdesc->encoding = pdf_newidentitycmap(0, 2); + else if (!strcmp(fz_toname(encoding), "Identity-V")) + fontdesc->encoding = pdf_newidentitycmap(1, 2); + else + error = pdf_loadsystemcmap(&fontdesc->encoding, fz_toname(encoding)); + } + else if (fz_isindirect(encoding)) + { + pdf_logfont("encoding %d %d R\n", fz_tonum(encoding), fz_togen(encoding)); + error = pdf_loadembeddedcmap(&fontdesc->encoding, xref, encoding); + } + else + { + error = fz_throw("syntaxerror: font missing encoding"); + } + if (error) + goto cleanup; + + pdf_setfontwmode(fontdesc, pdf_getwmode(fontdesc->encoding)); + pdf_logfont("wmode %d\n", pdf_getwmode(fontdesc->encoding)); + + if (kind == TRUETYPE) + { + fz_obj *cidtogidmap; + + cidtogidmap = fz_dictgets(dict, "CIDToGIDMap"); + if (fz_isindirect(cidtogidmap)) + { + fz_buffer *buf; + + pdf_logfont("cidtogidmap stream\n"); + + error = pdf_loadstream(&buf, xref, fz_tonum(cidtogidmap), fz_togen(cidtogidmap)); + if (error) + goto cleanup; + + fontdesc->ncidtogid = (buf->len) / 2; + fontdesc->cidtogid = fz_calloc(fontdesc->ncidtogid, sizeof(unsigned short)); + for (i = 0; i < fontdesc->ncidtogid; i++) + fontdesc->cidtogid[i] = (buf->data[i * 2] << 8) + buf->data[i * 2 + 1]; + + fz_dropbuffer(buf); + } + + /* if truetype font is external, cidtogidmap should not be identity */ + /* so we map from cid to unicode and then map that through the (3 1) */ + /* unicode cmap to get a glyph id */ + else if (fontdesc->font->ftsubstitute) + { + pdf_logfont("emulate ttf cidfont\n"); + + fterr = FT_Select_Charmap(face, ft_encoding_unicode); + if (fterr) + { + error = fz_throw("fonterror: no unicode cmap when emulating CID font: %s", ft_errorstring(fterr)); + goto cleanup; + } + + if (!strcmp(collection, "Adobe-CNS1")) + error = pdf_loadsystemcmap(&fontdesc->tottfcmap, "Adobe-CNS1-UCS2"); + else if (!strcmp(collection, "Adobe-GB1")) + error = pdf_loadsystemcmap(&fontdesc->tottfcmap, "Adobe-GB1-UCS2"); + else if (!strcmp(collection, "Adobe-Japan1")) + error = pdf_loadsystemcmap(&fontdesc->tottfcmap, "Adobe-Japan1-UCS2"); + else if (!strcmp(collection, "Adobe-Japan2")) + error = pdf_loadsystemcmap(&fontdesc->tottfcmap, "Adobe-Japan2-UCS2"); + else if (!strcmp(collection, "Adobe-Korea1")) + error = pdf_loadsystemcmap(&fontdesc->tottfcmap, "Adobe-Korea1-UCS2"); + else + error = fz_okay; + + if (error) + { + error = fz_rethrow(error, "cannot load system cmap %s", collection); + goto cleanup; + } + } + } + + error = pdf_loadtounicode(fontdesc, xref, nil, collection, tounicode); + if (error) + fz_catch(error, "cannot load tounicode"); + + /* Horizontal */ + + dw = 1000; + obj = fz_dictgets(dict, "DW"); + if (obj) + dw = fz_toint(obj); + pdf_setdefaulthmtx(fontdesc, dw); + + widths = fz_dictgets(dict, "W"); + if (widths) + { + int c0, c1, w; + + for (i = 0; i < fz_arraylen(widths); ) + { + c0 = fz_toint(fz_arrayget(widths, i)); + obj = fz_arrayget(widths, i + 1); + if (fz_isarray(obj)) + { + for (k = 0; k < fz_arraylen(obj); k++) + { + w = fz_toint(fz_arrayget(obj, k)); + pdf_addhmtx(fontdesc, c0 + k, c0 + k, w); + } + i += 2; + } + else + { + c1 = fz_toint(obj); + w = fz_toint(fz_arrayget(widths, i + 2)); + pdf_addhmtx(fontdesc, c0, c1, w); + i += 3; + } + } + } + + pdf_endhmtx(fontdesc); + + /* Vertical */ + + if (pdf_getwmode(fontdesc->encoding) == 1) + { + int dw2y = 880; + int dw2w = -1000; + + obj = fz_dictgets(dict, "DW2"); + if (obj) + { + dw2y = fz_toint(fz_arrayget(obj, 0)); + dw2w = fz_toint(fz_arrayget(obj, 1)); + } + + pdf_setdefaultvmtx(fontdesc, dw2y, dw2w); + + widths = fz_dictgets(dict, "W2"); + if (widths) + { + int c0, c1, w, x, y; + + for (i = 0; i < fz_arraylen(widths); ) + { + c0 = fz_toint(fz_arrayget(widths, i)); + obj = fz_arrayget(widths, i + 1); + if (fz_isarray(obj)) + { + for (k = 0; k * 3 < fz_arraylen(obj); k ++) + { + w = fz_toint(fz_arrayget(obj, k * 3 + 0)); + x = fz_toint(fz_arrayget(obj, k * 3 + 1)); + y = fz_toint(fz_arrayget(obj, k * 3 + 2)); + pdf_addvmtx(fontdesc, c0 + k, c0 + k, x, y, w); + } + i += 2; + } + else + { + c1 = fz_toint(obj); + w = fz_toint(fz_arrayget(widths, i + 2)); + x = fz_toint(fz_arrayget(widths, i + 3)); + y = fz_toint(fz_arrayget(widths, i + 4)); + pdf_addvmtx(fontdesc, c0, c1, x, y, w); + i += 5; + } + } + } + + pdf_endvmtx(fontdesc); + } + + pdf_logfont("}\n"); + + *fontdescp = fontdesc; + return fz_okay; + +cleanup: + pdf_dropfont(fontdesc); + return fz_rethrow(error, "cannot load cid font (%d %d R)", fz_tonum(dict), fz_togen(dict)); +} + +static fz_error +loadtype0(pdf_fontdesc **fontdescp, pdf_xref *xref, fz_obj *dict) +{ + fz_error error; + fz_obj *dfonts; + fz_obj *dfont; + fz_obj *subtype; + fz_obj *encoding; + fz_obj *tounicode; + + dfonts = fz_dictgets(dict, "DescendantFonts"); + if (!dfonts) + return fz_throw("cid font is missing descendant fonts"); + + dfont = fz_arrayget(dfonts, 0); + + subtype = fz_dictgets(dfont, "Subtype"); + encoding = fz_dictgets(dict, "Encoding"); + tounicode = fz_dictgets(dict, "ToUnicode"); + + if (fz_isname(subtype) && !strcmp(fz_toname(subtype), "CIDFontType0")) + error = loadcidfont(fontdescp, xref, dfont, encoding, tounicode); + else if (fz_isname(subtype) && !strcmp(fz_toname(subtype), "CIDFontType2")) + error = loadcidfont(fontdescp, xref, dfont, encoding, tounicode); + else + error = fz_throw("syntaxerror: unknown cid font type"); + if (error) + return fz_rethrow(error, "cannot load descendant font (%d %d R)", fz_tonum(dfont), fz_togen(dfont)); + + return fz_okay; +} + +/* + * FontDescriptor + */ + +fz_error +pdf_loadfontdescriptor(pdf_fontdesc *fontdesc, pdf_xref *xref, fz_obj *dict, char *collection, char *basefont) +{ + fz_error error; + fz_obj *obj1, *obj2, *obj3, *obj; + char *fontname; + char *origname; + + pdf_logfont("load fontdescriptor {\n"); + + if (!strchr(basefont, ',') || strchr(basefont, '+')) + origname = fz_toname(fz_dictgets(dict, "FontName")); + else + origname = basefont; + fontname = cleanfontname(origname); + + pdf_logfont("fontname %s -> %s\n", origname, fontname); + + fontdesc->flags = fz_toint(fz_dictgets(dict, "Flags")); + fontdesc->italicangle = fz_toreal(fz_dictgets(dict, "ItalicAngle")); + fontdesc->ascent = fz_toreal(fz_dictgets(dict, "Ascent")); + fontdesc->descent = fz_toreal(fz_dictgets(dict, "Descent")); + fontdesc->capheight = fz_toreal(fz_dictgets(dict, "CapHeight")); + fontdesc->xheight = fz_toreal(fz_dictgets(dict, "XHeight")); + fontdesc->missingwidth = fz_toreal(fz_dictgets(dict, "MissingWidth")); + + pdf_logfont("flags %d\n", fontdesc->flags); + + obj1 = fz_dictgets(dict, "FontFile"); + obj2 = fz_dictgets(dict, "FontFile2"); + obj3 = fz_dictgets(dict, "FontFile3"); + obj = obj1 ? obj1 : obj2 ? obj2 : obj3; + + if (getenv("NOFONT")) + obj = nil; + + if (fz_isindirect(obj)) + { + error = pdf_loadembeddedfont(fontdesc, xref, obj); + if (error) + { + fz_catch(error, "ignored error when loading embedded font, attempting to load system font"); + if (origname != fontname) + error = pdf_loadbuiltinfont(fontdesc, fontname); + else + error = pdf_loadsystemfont(fontdesc, fontname, collection); + if (error) + return fz_rethrow(error, "cannot load font descriptor (%d %d R)", fz_tonum(dict), fz_togen(dict)); + } + } + else + { + if (origname != fontname) + error = pdf_loadbuiltinfont(fontdesc, fontname); + else + error = pdf_loadsystemfont(fontdesc, fontname, collection); + if (error) + return fz_rethrow(error, "cannot load font descriptor (%d %d R)", fz_tonum(dict), fz_togen(dict)); + } + + fz_strlcpy(fontdesc->font->name, fontname, sizeof fontdesc->font->name); + + pdf_logfont("}\n"); + + return fz_okay; + +} + +static void +pdf_makewidthtable(pdf_fontdesc *fontdesc) +{ + fz_font *font = fontdesc->font; + int i, k, cid, gid; + + font->widthcount = 0; + for (i = 0; i < fontdesc->nhmtx; i++) + { + for (k = fontdesc->hmtx[i].lo; k <= fontdesc->hmtx[i].hi; k++) + { + cid = pdf_lookupcmap(fontdesc->encoding, k); + gid = pdf_fontcidtogid(fontdesc, cid); + if (gid > font->widthcount) + font->widthcount = gid; + } + } + font->widthcount ++; + + font->widthtable = fz_calloc(font->widthcount, sizeof(int)); + memset(font->widthtable, 0, sizeof(int) * font->widthcount); + + for (i = 0; i < fontdesc->nhmtx; i++) + { + for (k = fontdesc->hmtx[i].lo; k <= fontdesc->hmtx[i].hi; k++) + { + cid = pdf_lookupcmap(fontdesc->encoding, k); + gid = pdf_fontcidtogid(fontdesc, cid); + if (gid >= 0 && gid < font->widthcount) + font->widthtable[gid] = fontdesc->hmtx[i].w; + } + } +} + +fz_error +pdf_loadfont(pdf_fontdesc **fontdescp, pdf_xref *xref, fz_obj *rdb, fz_obj *dict) +{ + fz_error error; + char *subtype; + fz_obj *dfonts; + fz_obj *charprocs; + + if ((*fontdescp = pdf_finditem(xref->store, pdf_dropfont, dict))) + { + pdf_keepfont(*fontdescp); + return fz_okay; + } + + subtype = fz_toname(fz_dictgets(dict, "Subtype")); + dfonts = fz_dictgets(dict, "DescendantFonts"); + charprocs = fz_dictgets(dict, "CharProcs"); + + if (subtype && !strcmp(subtype, "Type0")) + error = loadtype0(fontdescp, xref, dict); + else if (subtype && !strcmp(subtype, "Type1")) + error = loadsimplefont(fontdescp, xref, dict); + else if (subtype && !strcmp(subtype, "MMType1")) + error = loadsimplefont(fontdescp, xref, dict); + else if (subtype && !strcmp(subtype, "TrueType")) + error = loadsimplefont(fontdescp, xref, dict); + else if (subtype && !strcmp(subtype, "Type3")) + error = pdf_loadtype3font(fontdescp, xref, rdb, dict); + else if (charprocs) + { + fz_warn("unknown font format, guessing type3."); + error = pdf_loadtype3font(fontdescp, xref, rdb, dict); + } + else if (dfonts) + { + fz_warn("unknown font format, guessing type0."); + error = loadtype0(fontdescp, xref, dict); + } + else + { + fz_warn("unknown font format, guessing type1 or truetype."); + error = loadsimplefont(fontdescp, xref, dict); + } + if (error) + return fz_rethrow(error, "cannot load font (%d %d R)", fz_tonum(dict), fz_togen(dict)); + + /* Save the widths to stretch non-CJK substitute fonts */ + if ((*fontdescp)->font->ftsubstitute && !(*fontdescp)->tottfcmap) + pdf_makewidthtable(*fontdescp); + + pdf_storeitem(xref->store, pdf_keepfont, pdf_dropfont, dict, *fontdescp); + + return fz_okay; +} + +void +pdf_debugfont(pdf_fontdesc *fontdesc) +{ + int i; + + printf("fontdesc {\n"); + + if (fontdesc->font->ftface) + printf("\tfreetype font\n"); + if (fontdesc->font->t3procs) + printf("\ttype3 font\n"); + + printf("\twmode %d\n", fontdesc->wmode); + printf("\tDW %d\n", fontdesc->dhmtx.w); + + printf("\tW {\n"); + for (i = 0; i < fontdesc->nhmtx; i++) + printf("\t\t<%04x> <%04x> %d\n", + fontdesc->hmtx[i].lo, fontdesc->hmtx[i].hi, fontdesc->hmtx[i].w); + printf("\t}\n"); + + if (fontdesc->wmode) + { + printf("\tDW2 [%d %d]\n", fontdesc->dvmtx.y, fontdesc->dvmtx.w); + printf("\tW2 {\n"); + for (i = 0; i < fontdesc->nvmtx; i++) + printf("\t\t<%04x> <%04x> %d %d %d\n", fontdesc->vmtx[i].lo, fontdesc->vmtx[i].hi, + fontdesc->vmtx[i].x, fontdesc->vmtx[i].y, fontdesc->vmtx[i].w); + printf("\t}\n"); + } +} diff --git a/pdf/pdf_fontagl.c b/pdf/pdf_fontagl.c new file mode 100644 index 00000000..e310848b --- /dev/null +++ b/pdf/pdf_fontagl.c @@ -0,0 +1,5119 @@ +/* +# Name: Adobe Glyph List +# Table version: 2.0 +# Date: September 20, 2002 +# +# See http://partners.adobe.com/asn/developer/typeforum/unicodegn.html +# +# Format: Semicolon-delimited fields: +# (1) glyph name +# (2) Unicode scalar value +#--end +*/ + +static const struct { char *name; int ucs; } +aglcodes[] = { +{"A", 0x0041}, +{"AE", 0x00C6}, +{"AEacute", 0x01FC}, +{"AEmacron", 0x01E2}, +{"AEsmall", 0xF7E6}, +{"Aacute", 0x00C1}, +{"Aacutesmall", 0xF7E1}, +{"Abreve", 0x0102}, +{"Abreveacute", 0x1EAE}, +{"Abrevecyrillic", 0x04D0}, +{"Abrevedotbelow", 0x1EB6}, +{"Abrevegrave", 0x1EB0}, +{"Abrevehookabove", 0x1EB2}, +{"Abrevetilde", 0x1EB4}, +{"Acaron", 0x01CD}, +{"Acircle", 0x24B6}, +{"Acircumflex", 0x00C2}, +{"Acircumflexacute", 0x1EA4}, +{"Acircumflexdotbelow", 0x1EAC}, +{"Acircumflexgrave", 0x1EA6}, +{"Acircumflexhookabove", 0x1EA8}, +{"Acircumflexsmall", 0xF7E2}, +{"Acircumflextilde", 0x1EAA}, +{"Acute", 0xF6C9}, +{"Acutesmall", 0xF7B4}, +{"Acyrillic", 0x0410}, +{"Adblgrave", 0x0200}, +{"Adieresis", 0x00C4}, +{"Adieresiscyrillic", 0x04D2}, +{"Adieresismacron", 0x01DE}, +{"Adieresissmall", 0xF7E4}, +{"Adotbelow", 0x1EA0}, +{"Adotmacron", 0x01E0}, +{"Agrave", 0x00C0}, +{"Agravesmall", 0xF7E0}, +{"Ahookabove", 0x1EA2}, +{"Aiecyrillic", 0x04D4}, +{"Ainvertedbreve", 0x0202}, +{"Alpha", 0x0391}, +{"Alphatonos", 0x0386}, +{"Amacron", 0x0100}, +{"Amonospace", 0xFF21}, +{"Aogonek", 0x0104}, +{"Aring", 0x00C5}, +{"Aringacute", 0x01FA}, +{"Aringbelow", 0x1E00}, +{"Aringsmall", 0xF7E5}, +{"Asmall", 0xF761}, +{"Atilde", 0x00C3}, +{"Atildesmall", 0xF7E3}, +{"Aybarmenian", 0x0531}, +{"B", 0x0042}, +{"Bcircle", 0x24B7}, +{"Bdotaccent", 0x1E02}, +{"Bdotbelow", 0x1E04}, +{"Becyrillic", 0x0411}, +{"Benarmenian", 0x0532}, +{"Beta", 0x0392}, +{"Bhook", 0x0181}, +{"Blinebelow", 0x1E06}, +{"Bmonospace", 0xFF22}, +{"Brevesmall", 0xF6F4}, +{"Bsmall", 0xF762}, +{"Btopbar", 0x0182}, +{"C", 0x0043}, +{"Caarmenian", 0x053E}, +{"Cacute", 0x0106}, +{"Caron", 0xF6CA}, +{"Caronsmall", 0xF6F5}, +{"Ccaron", 0x010C}, +{"Ccedilla", 0x00C7}, +{"Ccedillaacute", 0x1E08}, +{"Ccedillasmall", 0xF7E7}, +{"Ccircle", 0x24B8}, +{"Ccircumflex", 0x0108}, +{"Cdot", 0x010A}, +{"Cdotaccent", 0x010A}, +{"Cedillasmall", 0xF7B8}, +{"Chaarmenian", 0x0549}, +{"Cheabkhasiancyrillic", 0x04BC}, +{"Checyrillic", 0x0427}, +{"Chedescenderabkhasiancyrillic", 0x04BE}, +{"Chedescendercyrillic", 0x04B6}, +{"Chedieresiscyrillic", 0x04F4}, +{"Cheharmenian", 0x0543}, +{"Chekhakassiancyrillic", 0x04CB}, +{"Cheverticalstrokecyrillic", 0x04B8}, +{"Chi", 0x03A7}, +{"Chook", 0x0187}, +{"Circumflexsmall", 0xF6F6}, +{"Cmonospace", 0xFF23}, +{"Coarmenian", 0x0551}, +{"Csmall", 0xF763}, +{"D", 0x0044}, +{"DZ", 0x01F1}, +{"DZcaron", 0x01C4}, +{"Daarmenian", 0x0534}, +{"Dafrican", 0x0189}, +{"Dcaron", 0x010E}, +{"Dcedilla", 0x1E10}, +{"Dcircle", 0x24B9}, +{"Dcircumflexbelow", 0x1E12}, +{"Dcroat", 0x0110}, +{"Ddotaccent", 0x1E0A}, +{"Ddotbelow", 0x1E0C}, +{"Decyrillic", 0x0414}, +{"Deicoptic", 0x03EE}, +{"Delta", 0x2206}, +{"Deltagreek", 0x0394}, +{"Dhook", 0x018A}, +{"Dieresis", 0xF6CB}, +{"DieresisAcute", 0xF6CC}, +{"DieresisGrave", 0xF6CD}, +{"Dieresissmall", 0xF7A8}, +{"Digammagreek", 0x03DC}, +{"Djecyrillic", 0x0402}, +{"Dlinebelow", 0x1E0E}, +{"Dmonospace", 0xFF24}, +{"Dotaccentsmall", 0xF6F7}, +{"Dslash", 0x0110}, +{"Dsmall", 0xF764}, +{"Dtopbar", 0x018B}, +{"Dz", 0x01F2}, +{"Dzcaron", 0x01C5}, +{"Dzeabkhasiancyrillic", 0x04E0}, +{"Dzecyrillic", 0x0405}, +{"Dzhecyrillic", 0x040F}, +{"E", 0x0045}, +{"Eacute", 0x00C9}, +{"Eacutesmall", 0xF7E9}, +{"Ebreve", 0x0114}, +{"Ecaron", 0x011A}, +{"Ecedillabreve", 0x1E1C}, +{"Echarmenian", 0x0535}, +{"Ecircle", 0x24BA}, +{"Ecircumflex", 0x00CA}, +{"Ecircumflexacute", 0x1EBE}, +{"Ecircumflexbelow", 0x1E18}, +{"Ecircumflexdotbelow", 0x1EC6}, +{"Ecircumflexgrave", 0x1EC0}, +{"Ecircumflexhookabove", 0x1EC2}, +{"Ecircumflexsmall", 0xF7EA}, +{"Ecircumflextilde", 0x1EC4}, +{"Ecyrillic", 0x0404}, +{"Edblgrave", 0x0204}, +{"Edieresis", 0x00CB}, +{"Edieresissmall", 0xF7EB}, +{"Edot", 0x0116}, +{"Edotaccent", 0x0116}, +{"Edotbelow", 0x1EB8}, +{"Efcyrillic", 0x0424}, +{"Egrave", 0x00C8}, +{"Egravesmall", 0xF7E8}, +{"Eharmenian", 0x0537}, +{"Ehookabove", 0x1EBA}, +{"Eightroman", 0x2167}, +{"Einvertedbreve", 0x0206}, +{"Eiotifiedcyrillic", 0x0464}, +{"Elcyrillic", 0x041B}, +{"Elevenroman", 0x216A}, +{"Emacron", 0x0112}, +{"Emacronacute", 0x1E16}, +{"Emacrongrave", 0x1E14}, +{"Emcyrillic", 0x041C}, +{"Emonospace", 0xFF25}, +{"Encyrillic", 0x041D}, +{"Endescendercyrillic", 0x04A2}, +{"Eng", 0x014A}, +{"Enghecyrillic", 0x04A4}, +{"Enhookcyrillic", 0x04C7}, +{"Eogonek", 0x0118}, +{"Eopen", 0x0190}, +{"Epsilon", 0x0395}, +{"Epsilontonos", 0x0388}, +{"Ercyrillic", 0x0420}, +{"Ereversed", 0x018E}, +{"Ereversedcyrillic", 0x042D}, +{"Escyrillic", 0x0421}, +{"Esdescendercyrillic", 0x04AA}, +{"Esh", 0x01A9}, +{"Esmall", 0xF765}, +{"Eta", 0x0397}, +{"Etarmenian", 0x0538}, +{"Etatonos", 0x0389}, +{"Eth", 0x00D0}, +{"Ethsmall", 0xF7F0}, +{"Etilde", 0x1EBC}, +{"Etildebelow", 0x1E1A}, +{"Euro", 0x20AC}, +{"Ezh", 0x01B7}, +{"Ezhcaron", 0x01EE}, +{"Ezhreversed", 0x01B8}, +{"F", 0x0046}, +{"Fcircle", 0x24BB}, +{"Fdotaccent", 0x1E1E}, +{"Feharmenian", 0x0556}, +{"Feicoptic", 0x03E4}, +{"Fhook", 0x0191}, +{"Fitacyrillic", 0x0472}, +{"Fiveroman", 0x2164}, +{"Fmonospace", 0xFF26}, +{"Fourroman", 0x2163}, +{"Fsmall", 0xF766}, +{"G", 0x0047}, +{"GBsquare", 0x3387}, +{"Gacute", 0x01F4}, +{"Gamma", 0x0393}, +{"Gammaafrican", 0x0194}, +{"Gangiacoptic", 0x03EA}, +{"Gbreve", 0x011E}, +{"Gcaron", 0x01E6}, +{"Gcedilla", 0x0122}, +{"Gcircle", 0x24BC}, +{"Gcircumflex", 0x011C}, +{"Gcommaaccent", 0x0122}, +{"Gdot", 0x0120}, +{"Gdotaccent", 0x0120}, +{"Gecyrillic", 0x0413}, +{"Ghadarmenian", 0x0542}, +{"Ghemiddlehookcyrillic", 0x0494}, +{"Ghestrokecyrillic", 0x0492}, +{"Gheupturncyrillic", 0x0490}, +{"Ghook", 0x0193}, +{"Gimarmenian", 0x0533}, +{"Gjecyrillic", 0x0403}, +{"Gmacron", 0x1E20}, +{"Gmonospace", 0xFF27}, +{"Grave", 0xF6CE}, +{"Gravesmall", 0xF760}, +{"Gsmall", 0xF767}, +{"Gsmallhook", 0x029B}, +{"Gstroke", 0x01E4}, +{"H", 0x0048}, +{"H18533", 0x25CF}, +{"H18543", 0x25AA}, +{"H18551", 0x25AB}, +{"H22073", 0x25A1}, +{"HPsquare", 0x33CB}, +{"Haabkhasiancyrillic", 0x04A8}, +{"Hadescendercyrillic", 0x04B2}, +{"Hardsigncyrillic", 0x042A}, +{"Hbar", 0x0126}, +{"Hbrevebelow", 0x1E2A}, +{"Hcedilla", 0x1E28}, +{"Hcircle", 0x24BD}, +{"Hcircumflex", 0x0124}, +{"Hdieresis", 0x1E26}, +{"Hdotaccent", 0x1E22}, +{"Hdotbelow", 0x1E24}, +{"Hmonospace", 0xFF28}, +{"Hoarmenian", 0x0540}, +{"Horicoptic", 0x03E8}, +{"Hsmall", 0xF768}, +{"Hungarumlaut", 0xF6CF}, +{"Hungarumlautsmall", 0xF6F8}, +{"Hzsquare", 0x3390}, +{"I", 0x0049}, +{"IAcyrillic", 0x042F}, +{"IJ", 0x0132}, +{"IUcyrillic", 0x042E}, +{"Iacute", 0x00CD}, +{"Iacutesmall", 0xF7ED}, +{"Ibreve", 0x012C}, +{"Icaron", 0x01CF}, +{"Icircle", 0x24BE}, +{"Icircumflex", 0x00CE}, +{"Icircumflexsmall", 0xF7EE}, +{"Icyrillic", 0x0406}, +{"Idblgrave", 0x0208}, +{"Idieresis", 0x00CF}, +{"Idieresisacute", 0x1E2E}, +{"Idieresiscyrillic", 0x04E4}, +{"Idieresissmall", 0xF7EF}, +{"Idot", 0x0130}, +{"Idotaccent", 0x0130}, +{"Idotbelow", 0x1ECA}, +{"Iebrevecyrillic", 0x04D6}, +{"Iecyrillic", 0x0415}, +{"Ifraktur", 0x2111}, +{"Igrave", 0x00CC}, +{"Igravesmall", 0xF7EC}, +{"Ihookabove", 0x1EC8}, +{"Iicyrillic", 0x0418}, +{"Iinvertedbreve", 0x020A}, +{"Iishortcyrillic", 0x0419}, +{"Imacron", 0x012A}, +{"Imacroncyrillic", 0x04E2}, +{"Imonospace", 0xFF29}, +{"Iniarmenian", 0x053B}, +{"Iocyrillic", 0x0401}, +{"Iogonek", 0x012E}, +{"Iota", 0x0399}, +{"Iotaafrican", 0x0196}, +{"Iotadieresis", 0x03AA}, +{"Iotatonos", 0x038A}, +{"Ismall", 0xF769}, +{"Istroke", 0x0197}, +{"Itilde", 0x0128}, +{"Itildebelow", 0x1E2C}, +{"Izhitsacyrillic", 0x0474}, +{"Izhitsadblgravecyrillic", 0x0476}, +{"J", 0x004A}, +{"Jaarmenian", 0x0541}, +{"Jcircle", 0x24BF}, +{"Jcircumflex", 0x0134}, +{"Jecyrillic", 0x0408}, +{"Jheharmenian", 0x054B}, +{"Jmonospace", 0xFF2A}, +{"Jsmall", 0xF76A}, +{"K", 0x004B}, +{"KBsquare", 0x3385}, +{"KKsquare", 0x33CD}, +{"Kabashkircyrillic", 0x04A0}, +{"Kacute", 0x1E30}, +{"Kacyrillic", 0x041A}, +{"Kadescendercyrillic", 0x049A}, +{"Kahookcyrillic", 0x04C3}, +{"Kappa", 0x039A}, +{"Kastrokecyrillic", 0x049E}, +{"Kaverticalstrokecyrillic", 0x049C}, +{"Kcaron", 0x01E8}, +{"Kcedilla", 0x0136}, +{"Kcircle", 0x24C0}, +{"Kcommaaccent", 0x0136}, +{"Kdotbelow", 0x1E32}, +{"Keharmenian", 0x0554}, +{"Kenarmenian", 0x053F}, +{"Khacyrillic", 0x0425}, +{"Kheicoptic", 0x03E6}, +{"Khook", 0x0198}, +{"Kjecyrillic", 0x040C}, +{"Klinebelow", 0x1E34}, +{"Kmonospace", 0xFF2B}, +{"Koppacyrillic", 0x0480}, +{"Koppagreek", 0x03DE}, +{"Ksicyrillic", 0x046E}, +{"Ksmall", 0xF76B}, +{"L", 0x004C}, +{"LJ", 0x01C7}, +{"LL", 0xF6BF}, +{"Lacute", 0x0139}, +{"Lambda", 0x039B}, +{"Lcaron", 0x013D}, +{"Lcedilla", 0x013B}, +{"Lcircle", 0x24C1}, +{"Lcircumflexbelow", 0x1E3C}, +{"Lcommaaccent", 0x013B}, +{"Ldot", 0x013F}, +{"Ldotaccent", 0x013F}, +{"Ldotbelow", 0x1E36}, +{"Ldotbelowmacron", 0x1E38}, +{"Liwnarmenian", 0x053C}, +{"Lj", 0x01C8}, +{"Ljecyrillic", 0x0409}, +{"Llinebelow", 0x1E3A}, +{"Lmonospace", 0xFF2C}, +{"Lslash", 0x0141}, +{"Lslashsmall", 0xF6F9}, +{"Lsmall", 0xF76C}, +{"M", 0x004D}, +{"MBsquare", 0x3386}, +{"Macron", 0xF6D0}, +{"Macronsmall", 0xF7AF}, +{"Macute", 0x1E3E}, +{"Mcircle", 0x24C2}, +{"Mdotaccent", 0x1E40}, +{"Mdotbelow", 0x1E42}, +{"Menarmenian", 0x0544}, +{"Mmonospace", 0xFF2D}, +{"Msmall", 0xF76D}, +{"Mturned", 0x019C}, +{"Mu", 0x039C}, +{"N", 0x004E}, +{"NJ", 0x01CA}, +{"Nacute", 0x0143}, +{"Ncaron", 0x0147}, +{"Ncedilla", 0x0145}, +{"Ncircle", 0x24C3}, +{"Ncircumflexbelow", 0x1E4A}, +{"Ncommaaccent", 0x0145}, +{"Ndotaccent", 0x1E44}, +{"Ndotbelow", 0x1E46}, +{"Nhookleft", 0x019D}, +{"Nineroman", 0x2168}, +{"Nj", 0x01CB}, +{"Njecyrillic", 0x040A}, +{"Nlinebelow", 0x1E48}, +{"Nmonospace", 0xFF2E}, +{"Nowarmenian", 0x0546}, +{"Nsmall", 0xF76E}, +{"Ntilde", 0x00D1}, +{"Ntildesmall", 0xF7F1}, +{"Nu", 0x039D}, +{"O", 0x004F}, +{"OE", 0x0152}, +{"OEsmall", 0xF6FA}, +{"Oacute", 0x00D3}, +{"Oacutesmall", 0xF7F3}, +{"Obarredcyrillic", 0x04E8}, +{"Obarreddieresiscyrillic", 0x04EA}, +{"Obreve", 0x014E}, +{"Ocaron", 0x01D1}, +{"Ocenteredtilde", 0x019F}, +{"Ocircle", 0x24C4}, +{"Ocircumflex", 0x00D4}, +{"Ocircumflexacute", 0x1ED0}, +{"Ocircumflexdotbelow", 0x1ED8}, +{"Ocircumflexgrave", 0x1ED2}, +{"Ocircumflexhookabove", 0x1ED4}, +{"Ocircumflexsmall", 0xF7F4}, +{"Ocircumflextilde", 0x1ED6}, +{"Ocyrillic", 0x041E}, +{"Odblacute", 0x0150}, +{"Odblgrave", 0x020C}, +{"Odieresis", 0x00D6}, +{"Odieresiscyrillic", 0x04E6}, +{"Odieresissmall", 0xF7F6}, +{"Odotbelow", 0x1ECC}, +{"Ogoneksmall", 0xF6FB}, +{"Ograve", 0x00D2}, +{"Ogravesmall", 0xF7F2}, +{"Oharmenian", 0x0555}, +{"Ohm", 0x2126}, +{"Ohookabove", 0x1ECE}, +{"Ohorn", 0x01A0}, +{"Ohornacute", 0x1EDA}, +{"Ohorndotbelow", 0x1EE2}, +{"Ohorngrave", 0x1EDC}, +{"Ohornhookabove", 0x1EDE}, +{"Ohorntilde", 0x1EE0}, +{"Ohungarumlaut", 0x0150}, +{"Oi", 0x01A2}, +{"Oinvertedbreve", 0x020E}, +{"Omacron", 0x014C}, +{"Omacronacute", 0x1E52}, +{"Omacrongrave", 0x1E50}, +{"Omega", 0x2126}, +{"Omegacyrillic", 0x0460}, +{"Omegagreek", 0x03A9}, +{"Omegaroundcyrillic", 0x047A}, +{"Omegatitlocyrillic", 0x047C}, +{"Omegatonos", 0x038F}, +{"Omicron", 0x039F}, +{"Omicrontonos", 0x038C}, +{"Omonospace", 0xFF2F}, +{"Oneroman", 0x2160}, +{"Oogonek", 0x01EA}, +{"Oogonekmacron", 0x01EC}, +{"Oopen", 0x0186}, +{"Oslash", 0x00D8}, +{"Oslashacute", 0x01FE}, +{"Oslashsmall", 0xF7F8}, +{"Osmall", 0xF76F}, +{"Ostrokeacute", 0x01FE}, +{"Otcyrillic", 0x047E}, +{"Otilde", 0x00D5}, +{"Otildeacute", 0x1E4C}, +{"Otildedieresis", 0x1E4E}, +{"Otildesmall", 0xF7F5}, +{"P", 0x0050}, +{"Pacute", 0x1E54}, +{"Pcircle", 0x24C5}, +{"Pdotaccent", 0x1E56}, +{"Pecyrillic", 0x041F}, +{"Peharmenian", 0x054A}, +{"Pemiddlehookcyrillic", 0x04A6}, +{"Phi", 0x03A6}, +{"Phook", 0x01A4}, +{"Pi", 0x03A0}, +{"Piwrarmenian", 0x0553}, +{"Pmonospace", 0xFF30}, +{"Psi", 0x03A8}, +{"Psicyrillic", 0x0470}, +{"Psmall", 0xF770}, +{"Q", 0x0051}, +{"Qcircle", 0x24C6}, +{"Qmonospace", 0xFF31}, +{"Qsmall", 0xF771}, +{"R", 0x0052}, +{"Raarmenian", 0x054C}, +{"Racute", 0x0154}, +{"Rcaron", 0x0158}, +{"Rcedilla", 0x0156}, +{"Rcircle", 0x24C7}, +{"Rcommaaccent", 0x0156}, +{"Rdblgrave", 0x0210}, +{"Rdotaccent", 0x1E58}, +{"Rdotbelow", 0x1E5A}, +{"Rdotbelowmacron", 0x1E5C}, +{"Reharmenian", 0x0550}, +{"Rfraktur", 0x211C}, +{"Rho", 0x03A1}, +{"Ringsmall", 0xF6FC}, +{"Rinvertedbreve", 0x0212}, +{"Rlinebelow", 0x1E5E}, +{"Rmonospace", 0xFF32}, +{"Rsmall", 0xF772}, +{"Rsmallinverted", 0x0281}, +{"Rsmallinvertedsuperior", 0x02B6}, +{"S", 0x0053}, +{"SF010000", 0x250C}, +{"SF020000", 0x2514}, +{"SF030000", 0x2510}, +{"SF040000", 0x2518}, +{"SF050000", 0x253C}, +{"SF060000", 0x252C}, +{"SF070000", 0x2534}, +{"SF080000", 0x251C}, +{"SF090000", 0x2524}, +{"SF100000", 0x2500}, +{"SF110000", 0x2502}, +{"SF190000", 0x2561}, +{"SF200000", 0x2562}, +{"SF210000", 0x2556}, +{"SF220000", 0x2555}, +{"SF230000", 0x2563}, +{"SF240000", 0x2551}, +{"SF250000", 0x2557}, +{"SF260000", 0x255D}, +{"SF270000", 0x255C}, +{"SF280000", 0x255B}, +{"SF360000", 0x255E}, +{"SF370000", 0x255F}, +{"SF380000", 0x255A}, +{"SF390000", 0x2554}, +{"SF400000", 0x2569}, +{"SF410000", 0x2566}, +{"SF420000", 0x2560}, +{"SF430000", 0x2550}, +{"SF440000", 0x256C}, +{"SF450000", 0x2567}, +{"SF460000", 0x2568}, +{"SF470000", 0x2564}, +{"SF480000", 0x2565}, +{"SF490000", 0x2559}, +{"SF500000", 0x2558}, +{"SF510000", 0x2552}, +{"SF520000", 0x2553}, +{"SF530000", 0x256B}, +{"SF540000", 0x256A}, +{"Sacute", 0x015A}, +{"Sacutedotaccent", 0x1E64}, +{"Sampigreek", 0x03E0}, +{"Scaron", 0x0160}, +{"Scarondotaccent", 0x1E66}, +{"Scaronsmall", 0xF6FD}, +{"Scedilla", 0x015E}, +{"Schwa", 0x018F}, +{"Schwacyrillic", 0x04D8}, +{"Schwadieresiscyrillic", 0x04DA}, +{"Scircle", 0x24C8}, +{"Scircumflex", 0x015C}, +{"Scommaaccent", 0x0218}, +{"Sdotaccent", 0x1E60}, +{"Sdotbelow", 0x1E62}, +{"Sdotbelowdotaccent", 0x1E68}, +{"Seharmenian", 0x054D}, +{"Sevenroman", 0x2166}, +{"Shaarmenian", 0x0547}, +{"Shacyrillic", 0x0428}, +{"Shchacyrillic", 0x0429}, +{"Sheicoptic", 0x03E2}, +{"Shhacyrillic", 0x04BA}, +{"Shimacoptic", 0x03EC}, +{"Sigma", 0x03A3}, +{"Sixroman", 0x2165}, +{"Smonospace", 0xFF33}, +{"Softsigncyrillic", 0x042C}, +{"Ssmall", 0xF773}, +{"Stigmagreek", 0x03DA}, +{"T", 0x0054}, +{"Tau", 0x03A4}, +{"Tbar", 0x0166}, +{"Tcaron", 0x0164}, +{"Tcedilla", 0x0162}, +{"Tcircle", 0x24C9}, +{"Tcircumflexbelow", 0x1E70}, +{"Tcommaaccent", 0x0162}, +{"Tdotaccent", 0x1E6A}, +{"Tdotbelow", 0x1E6C}, +{"Tecyrillic", 0x0422}, +{"Tedescendercyrillic", 0x04AC}, +{"Tenroman", 0x2169}, +{"Tetsecyrillic", 0x04B4}, +{"Theta", 0x0398}, +{"Thook", 0x01AC}, +{"Thorn", 0x00DE}, +{"Thornsmall", 0xF7FE}, +{"Threeroman", 0x2162}, +{"Tildesmall", 0xF6FE}, +{"Tiwnarmenian", 0x054F}, +{"Tlinebelow", 0x1E6E}, +{"Tmonospace", 0xFF34}, +{"Toarmenian", 0x0539}, +{"Tonefive", 0x01BC}, +{"Tonesix", 0x0184}, +{"Tonetwo", 0x01A7}, +{"Tretroflexhook", 0x01AE}, +{"Tsecyrillic", 0x0426}, +{"Tshecyrillic", 0x040B}, +{"Tsmall", 0xF774}, +{"Twelveroman", 0x216B}, +{"Tworoman", 0x2161}, +{"U", 0x0055}, +{"Uacute", 0x00DA}, +{"Uacutesmall", 0xF7FA}, +{"Ubreve", 0x016C}, +{"Ucaron", 0x01D3}, +{"Ucircle", 0x24CA}, +{"Ucircumflex", 0x00DB}, +{"Ucircumflexbelow", 0x1E76}, +{"Ucircumflexsmall", 0xF7FB}, +{"Ucyrillic", 0x0423}, +{"Udblacute", 0x0170}, +{"Udblgrave", 0x0214}, +{"Udieresis", 0x00DC}, +{"Udieresisacute", 0x01D7}, +{"Udieresisbelow", 0x1E72}, +{"Udieresiscaron", 0x01D9}, +{"Udieresiscyrillic", 0x04F0}, +{"Udieresisgrave", 0x01DB}, +{"Udieresismacron", 0x01D5}, +{"Udieresissmall", 0xF7FC}, +{"Udotbelow", 0x1EE4}, +{"Ugrave", 0x00D9}, +{"Ugravesmall", 0xF7F9}, +{"Uhookabove", 0x1EE6}, +{"Uhorn", 0x01AF}, +{"Uhornacute", 0x1EE8}, +{"Uhorndotbelow", 0x1EF0}, +{"Uhorngrave", 0x1EEA}, +{"Uhornhookabove", 0x1EEC}, +{"Uhorntilde", 0x1EEE}, +{"Uhungarumlaut", 0x0170}, +{"Uhungarumlautcyrillic", 0x04F2}, +{"Uinvertedbreve", 0x0216}, +{"Ukcyrillic", 0x0478}, +{"Umacron", 0x016A}, +{"Umacroncyrillic", 0x04EE}, +{"Umacrondieresis", 0x1E7A}, +{"Umonospace", 0xFF35}, +{"Uogonek", 0x0172}, +{"Upsilon", 0x03A5}, +{"Upsilon1", 0x03D2}, +{"Upsilonacutehooksymbolgreek", 0x03D3}, +{"Upsilonafrican", 0x01B1}, +{"Upsilondieresis", 0x03AB}, +{"Upsilondieresishooksymbolgreek", 0x03D4}, +{"Upsilonhooksymbol", 0x03D2}, +{"Upsilontonos", 0x038E}, +{"Uring", 0x016E}, +{"Ushortcyrillic", 0x040E}, +{"Usmall", 0xF775}, +{"Ustraightcyrillic", 0x04AE}, +{"Ustraightstrokecyrillic", 0x04B0}, +{"Utilde", 0x0168}, +{"Utildeacute", 0x1E78}, +{"Utildebelow", 0x1E74}, +{"V", 0x0056}, +{"Vcircle", 0x24CB}, +{"Vdotbelow", 0x1E7E}, +{"Vecyrillic", 0x0412}, +{"Vewarmenian", 0x054E}, +{"Vhook", 0x01B2}, +{"Vmonospace", 0xFF36}, +{"Voarmenian", 0x0548}, +{"Vsmall", 0xF776}, +{"Vtilde", 0x1E7C}, +{"W", 0x0057}, +{"Wacute", 0x1E82}, +{"Wcircle", 0x24CC}, +{"Wcircumflex", 0x0174}, +{"Wdieresis", 0x1E84}, +{"Wdotaccent", 0x1E86}, +{"Wdotbelow", 0x1E88}, +{"Wgrave", 0x1E80}, +{"Wmonospace", 0xFF37}, +{"Wsmall", 0xF777}, +{"X", 0x0058}, +{"Xcircle", 0x24CD}, +{"Xdieresis", 0x1E8C}, +{"Xdotaccent", 0x1E8A}, +{"Xeharmenian", 0x053D}, +{"Xi", 0x039E}, +{"Xmonospace", 0xFF38}, +{"Xsmall", 0xF778}, +{"Y", 0x0059}, +{"Yacute", 0x00DD}, +{"Yacutesmall", 0xF7FD}, +{"Yatcyrillic", 0x0462}, +{"Ycircle", 0x24CE}, +{"Ycircumflex", 0x0176}, +{"Ydieresis", 0x0178}, +{"Ydieresissmall", 0xF7FF}, +{"Ydotaccent", 0x1E8E}, +{"Ydotbelow", 0x1EF4}, +{"Yericyrillic", 0x042B}, +{"Yerudieresiscyrillic", 0x04F8}, +{"Ygrave", 0x1EF2}, +{"Yhook", 0x01B3}, +{"Yhookabove", 0x1EF6}, +{"Yiarmenian", 0x0545}, +{"Yicyrillic", 0x0407}, +{"Yiwnarmenian", 0x0552}, +{"Ymonospace", 0xFF39}, +{"Ysmall", 0xF779}, +{"Ytilde", 0x1EF8}, +{"Yusbigcyrillic", 0x046A}, +{"Yusbigiotifiedcyrillic", 0x046C}, +{"Yuslittlecyrillic", 0x0466}, +{"Yuslittleiotifiedcyrillic", 0x0468}, +{"Z", 0x005A}, +{"Zaarmenian", 0x0536}, +{"Zacute", 0x0179}, +{"Zcaron", 0x017D}, +{"Zcaronsmall", 0xF6FF}, +{"Zcircle", 0x24CF}, +{"Zcircumflex", 0x1E90}, +{"Zdot", 0x017B}, +{"Zdotaccent", 0x017B}, +{"Zdotbelow", 0x1E92}, +{"Zecyrillic", 0x0417}, +{"Zedescendercyrillic", 0x0498}, +{"Zedieresiscyrillic", 0x04DE}, +{"Zeta", 0x0396}, +{"Zhearmenian", 0x053A}, +{"Zhebrevecyrillic", 0x04C1}, +{"Zhecyrillic", 0x0416}, +{"Zhedescendercyrillic", 0x0496}, +{"Zhedieresiscyrillic", 0x04DC}, +{"Zlinebelow", 0x1E94}, +{"Zmonospace", 0xFF3A}, +{"Zsmall", 0xF77A}, +{"Zstroke", 0x01B5}, +{"a", 0x0061}, +{"aabengali", 0x0986}, +{"aacute", 0x00E1}, +{"aadeva", 0x0906}, +{"aagujarati", 0x0A86}, +{"aagurmukhi", 0x0A06}, +{"aamatragurmukhi", 0x0A3E}, +{"aarusquare", 0x3303}, +{"aavowelsignbengali", 0x09BE}, +{"aavowelsigndeva", 0x093E}, +{"aavowelsigngujarati", 0x0ABE}, +{"abbreviationmarkarmenian", 0x055F}, +{"abbreviationsigndeva", 0x0970}, +{"abengali", 0x0985}, +{"abopomofo", 0x311A}, +{"abreve", 0x0103}, +{"abreveacute", 0x1EAF}, +{"abrevecyrillic", 0x04D1}, +{"abrevedotbelow", 0x1EB7}, +{"abrevegrave", 0x1EB1}, +{"abrevehookabove", 0x1EB3}, +{"abrevetilde", 0x1EB5}, +{"acaron", 0x01CE}, +{"acircle", 0x24D0}, +{"acircumflex", 0x00E2}, +{"acircumflexacute", 0x1EA5}, +{"acircumflexdotbelow", 0x1EAD}, +{"acircumflexgrave", 0x1EA7}, +{"acircumflexhookabove", 0x1EA9}, +{"acircumflextilde", 0x1EAB}, +{"acute", 0x00B4}, +{"acutebelowcmb", 0x0317}, +{"acutecmb", 0x0301}, +{"acutecomb", 0x0301}, +{"acutedeva", 0x0954}, +{"acutelowmod", 0x02CF}, +{"acutetonecmb", 0x0341}, +{"acyrillic", 0x0430}, +{"adblgrave", 0x0201}, +{"addakgurmukhi", 0x0A71}, +{"adeva", 0x0905}, +{"adieresis", 0x00E4}, +{"adieresiscyrillic", 0x04D3}, +{"adieresismacron", 0x01DF}, +{"adotbelow", 0x1EA1}, +{"adotmacron", 0x01E1}, +{"ae", 0x00E6}, +{"aeacute", 0x01FD}, +{"aekorean", 0x3150}, +{"aemacron", 0x01E3}, +{"afii00208", 0x2015}, +{"afii08941", 0x20A4}, +{"afii10017", 0x0410}, +{"afii10018", 0x0411}, +{"afii10019", 0x0412}, +{"afii10020", 0x0413}, +{"afii10021", 0x0414}, +{"afii10022", 0x0415}, +{"afii10023", 0x0401}, +{"afii10024", 0x0416}, +{"afii10025", 0x0417}, +{"afii10026", 0x0418}, +{"afii10027", 0x0419}, +{"afii10028", 0x041A}, +{"afii10029", 0x041B}, +{"afii10030", 0x041C}, +{"afii10031", 0x041D}, +{"afii10032", 0x041E}, +{"afii10033", 0x041F}, +{"afii10034", 0x0420}, +{"afii10035", 0x0421}, +{"afii10036", 0x0422}, +{"afii10037", 0x0423}, +{"afii10038", 0x0424}, +{"afii10039", 0x0425}, +{"afii10040", 0x0426}, +{"afii10041", 0x0427}, +{"afii10042", 0x0428}, +{"afii10043", 0x0429}, +{"afii10044", 0x042A}, +{"afii10045", 0x042B}, +{"afii10046", 0x042C}, +{"afii10047", 0x042D}, +{"afii10048", 0x042E}, +{"afii10049", 0x042F}, +{"afii10050", 0x0490}, +{"afii10051", 0x0402}, +{"afii10052", 0x0403}, +{"afii10053", 0x0404}, +{"afii10054", 0x0405}, +{"afii10055", 0x0406}, +{"afii10056", 0x0407}, +{"afii10057", 0x0408}, +{"afii10058", 0x0409}, +{"afii10059", 0x040A}, +{"afii10060", 0x040B}, +{"afii10061", 0x040C}, +{"afii10062", 0x040E}, +{"afii10063", 0xF6C4}, +{"afii10064", 0xF6C5}, +{"afii10065", 0x0430}, +{"afii10066", 0x0431}, +{"afii10067", 0x0432}, +{"afii10068", 0x0433}, +{"afii10069", 0x0434}, +{"afii10070", 0x0435}, +{"afii10071", 0x0451}, +{"afii10072", 0x0436}, +{"afii10073", 0x0437}, +{"afii10074", 0x0438}, +{"afii10075", 0x0439}, +{"afii10076", 0x043A}, +{"afii10077", 0x043B}, +{"afii10078", 0x043C}, +{"afii10079", 0x043D}, +{"afii10080", 0x043E}, +{"afii10081", 0x043F}, +{"afii10082", 0x0440}, +{"afii10083", 0x0441}, +{"afii10084", 0x0442}, +{"afii10085", 0x0443}, +{"afii10086", 0x0444}, +{"afii10087", 0x0445}, +{"afii10088", 0x0446}, +{"afii10089", 0x0447}, +{"afii10090", 0x0448}, +{"afii10091", 0x0449}, +{"afii10092", 0x044A}, +{"afii10093", 0x044B}, +{"afii10094", 0x044C}, +{"afii10095", 0x044D}, +{"afii10096", 0x044E}, +{"afii10097", 0x044F}, +{"afii10098", 0x0491}, +{"afii10099", 0x0452}, +{"afii10100", 0x0453}, +{"afii10101", 0x0454}, +{"afii10102", 0x0455}, +{"afii10103", 0x0456}, +{"afii10104", 0x0457}, +{"afii10105", 0x0458}, +{"afii10106", 0x0459}, +{"afii10107", 0x045A}, +{"afii10108", 0x045B}, +{"afii10109", 0x045C}, +{"afii10110", 0x045E}, +{"afii10145", 0x040F}, +{"afii10146", 0x0462}, +{"afii10147", 0x0472}, +{"afii10148", 0x0474}, +{"afii10192", 0xF6C6}, +{"afii10193", 0x045F}, +{"afii10194", 0x0463}, +{"afii10195", 0x0473}, +{"afii10196", 0x0475}, +{"afii10831", 0xF6C7}, +{"afii10832", 0xF6C8}, +{"afii10846", 0x04D9}, +{"afii299", 0x200E}, +{"afii300", 0x200F}, +{"afii301", 0x200D}, +{"afii57381", 0x066A}, +{"afii57388", 0x060C}, +{"afii57392", 0x0660}, +{"afii57393", 0x0661}, +{"afii57394", 0x0662}, +{"afii57395", 0x0663}, +{"afii57396", 0x0664}, +{"afii57397", 0x0665}, +{"afii57398", 0x0666}, +{"afii57399", 0x0667}, +{"afii57400", 0x0668}, +{"afii57401", 0x0669}, +{"afii57403", 0x061B}, +{"afii57407", 0x061F}, +{"afii57409", 0x0621}, +{"afii57410", 0x0622}, +{"afii57411", 0x0623}, +{"afii57412", 0x0624}, +{"afii57413", 0x0625}, +{"afii57414", 0x0626}, +{"afii57415", 0x0627}, +{"afii57416", 0x0628}, +{"afii57417", 0x0629}, +{"afii57418", 0x062A}, +{"afii57419", 0x062B}, +{"afii57420", 0x062C}, +{"afii57421", 0x062D}, +{"afii57422", 0x062E}, +{"afii57423", 0x062F}, +{"afii57424", 0x0630}, +{"afii57425", 0x0631}, +{"afii57426", 0x0632}, +{"afii57427", 0x0633}, +{"afii57428", 0x0634}, +{"afii57429", 0x0635}, +{"afii57430", 0x0636}, +{"afii57431", 0x0637}, +{"afii57432", 0x0638}, +{"afii57433", 0x0639}, +{"afii57434", 0x063A}, +{"afii57440", 0x0640}, +{"afii57441", 0x0641}, +{"afii57442", 0x0642}, +{"afii57443", 0x0643}, +{"afii57444", 0x0644}, +{"afii57445", 0x0645}, +{"afii57446", 0x0646}, +{"afii57448", 0x0648}, +{"afii57449", 0x0649}, +{"afii57450", 0x064A}, +{"afii57451", 0x064B}, +{"afii57452", 0x064C}, +{"afii57453", 0x064D}, +{"afii57454", 0x064E}, +{"afii57455", 0x064F}, +{"afii57456", 0x0650}, +{"afii57457", 0x0651}, +{"afii57458", 0x0652}, +{"afii57470", 0x0647}, +{"afii57505", 0x06A4}, +{"afii57506", 0x067E}, +{"afii57507", 0x0686}, +{"afii57508", 0x0698}, +{"afii57509", 0x06AF}, +{"afii57511", 0x0679}, +{"afii57512", 0x0688}, +{"afii57513", 0x0691}, +{"afii57514", 0x06BA}, +{"afii57519", 0x06D2}, +{"afii57534", 0x06D5}, +{"afii57636", 0x20AA}, +{"afii57645", 0x05BE}, +{"afii57658", 0x05C3}, +{"afii57664", 0x05D0}, +{"afii57665", 0x05D1}, +{"afii57666", 0x05D2}, +{"afii57667", 0x05D3}, +{"afii57668", 0x05D4}, +{"afii57669", 0x05D5}, +{"afii57670", 0x05D6}, +{"afii57671", 0x05D7}, +{"afii57672", 0x05D8}, +{"afii57673", 0x05D9}, +{"afii57674", 0x05DA}, +{"afii57675", 0x05DB}, +{"afii57676", 0x05DC}, +{"afii57677", 0x05DD}, +{"afii57678", 0x05DE}, +{"afii57679", 0x05DF}, +{"afii57680", 0x05E0}, +{"afii57681", 0x05E1}, +{"afii57682", 0x05E2}, +{"afii57683", 0x05E3}, +{"afii57684", 0x05E4}, +{"afii57685", 0x05E5}, +{"afii57686", 0x05E6}, +{"afii57687", 0x05E7}, +{"afii57688", 0x05E8}, +{"afii57689", 0x05E9}, +{"afii57690", 0x05EA}, +{"afii57694", 0xFB2A}, +{"afii57695", 0xFB2B}, +{"afii57700", 0xFB4B}, +{"afii57705", 0xFB1F}, +{"afii57716", 0x05F0}, +{"afii57717", 0x05F1}, +{"afii57718", 0x05F2}, +{"afii57723", 0xFB35}, +{"afii57793", 0x05B4}, +{"afii57794", 0x05B5}, +{"afii57795", 0x05B6}, +{"afii57796", 0x05BB}, +{"afii57797", 0x05B8}, +{"afii57798", 0x05B7}, +{"afii57799", 0x05B0}, +{"afii57800", 0x05B2}, +{"afii57801", 0x05B1}, +{"afii57802", 0x05B3}, +{"afii57803", 0x05C2}, +{"afii57804", 0x05C1}, +{"afii57806", 0x05B9}, +{"afii57807", 0x05BC}, +{"afii57839", 0x05BD}, +{"afii57841", 0x05BF}, +{"afii57842", 0x05C0}, +{"afii57929", 0x02BC}, +{"afii61248", 0x2105}, +{"afii61289", 0x2113}, +{"afii61352", 0x2116}, +{"afii61573", 0x202C}, +{"afii61574", 0x202D}, +{"afii61575", 0x202E}, +{"afii61664", 0x200C}, +{"afii63167", 0x066D}, +{"afii64937", 0x02BD}, +{"agrave", 0x00E0}, +{"agujarati", 0x0A85}, +{"agurmukhi", 0x0A05}, +{"ahiragana", 0x3042}, +{"ahookabove", 0x1EA3}, +{"aibengali", 0x0990}, +{"aibopomofo", 0x311E}, +{"aideva", 0x0910}, +{"aiecyrillic", 0x04D5}, +{"aigujarati", 0x0A90}, +{"aigurmukhi", 0x0A10}, +{"aimatragurmukhi", 0x0A48}, +{"ainarabic", 0x0639}, +{"ainfinalarabic", 0xFECA}, +{"aininitialarabic", 0xFECB}, +{"ainmedialarabic", 0xFECC}, +{"ainvertedbreve", 0x0203}, +{"aivowelsignbengali", 0x09C8}, +{"aivowelsigndeva", 0x0948}, +{"aivowelsigngujarati", 0x0AC8}, +{"akatakana", 0x30A2}, +{"akatakanahalfwidth", 0xFF71}, +{"akorean", 0x314F}, +{"alef", 0x05D0}, +{"alefarabic", 0x0627}, +{"alefdageshhebrew", 0xFB30}, +{"aleffinalarabic", 0xFE8E}, +{"alefhamzaabovearabic", 0x0623}, +{"alefhamzaabovefinalarabic", 0xFE84}, +{"alefhamzabelowarabic", 0x0625}, +{"alefhamzabelowfinalarabic", 0xFE88}, +{"alefhebrew", 0x05D0}, +{"aleflamedhebrew", 0xFB4F}, +{"alefmaddaabovearabic", 0x0622}, +{"alefmaddaabovefinalarabic", 0xFE82}, +{"alefmaksuraarabic", 0x0649}, +{"alefmaksurafinalarabic", 0xFEF0}, +{"alefmaksurainitialarabic", 0xFEF3}, +{"alefmaksuramedialarabic", 0xFEF4}, +{"alefpatahhebrew", 0xFB2E}, +{"alefqamatshebrew", 0xFB2F}, +{"aleph", 0x2135}, +{"allequal", 0x224C}, +{"alpha", 0x03B1}, +{"alphatonos", 0x03AC}, +{"amacron", 0x0101}, +{"amonospace", 0xFF41}, +{"ampersand", 0x0026}, +{"ampersandmonospace", 0xFF06}, +{"ampersandsmall", 0xF726}, +{"amsquare", 0x33C2}, +{"anbopomofo", 0x3122}, +{"angbopomofo", 0x3124}, +{"angkhankhuthai", 0x0E5A}, +{"angle", 0x2220}, +{"anglebracketleft", 0x3008}, +{"anglebracketleftvertical", 0xFE3F}, +{"anglebracketright", 0x3009}, +{"anglebracketrightvertical", 0xFE40}, +{"angleleft", 0x2329}, +{"angleright", 0x232A}, +{"angstrom", 0x212B}, +{"anoteleia", 0x0387}, +{"anudattadeva", 0x0952}, +{"anusvarabengali", 0x0982}, +{"anusvaradeva", 0x0902}, +{"anusvaragujarati", 0x0A82}, +{"aogonek", 0x0105}, +{"apaatosquare", 0x3300}, +{"aparen", 0x249C}, +{"apostrophearmenian", 0x055A}, +{"apostrophemod", 0x02BC}, +{"apple", 0xF8FF}, +{"approaches", 0x2250}, +{"approxequal", 0x2248}, +{"approxequalorimage", 0x2252}, +{"approximatelyequal", 0x2245}, +{"araeaekorean", 0x318E}, +{"araeakorean", 0x318D}, +{"arc", 0x2312}, +{"arighthalfring", 0x1E9A}, +{"aring", 0x00E5}, +{"aringacute", 0x01FB}, +{"aringbelow", 0x1E01}, +{"arrowboth", 0x2194}, +{"arrowdashdown", 0x21E3}, +{"arrowdashleft", 0x21E0}, +{"arrowdashright", 0x21E2}, +{"arrowdashup", 0x21E1}, +{"arrowdblboth", 0x21D4}, +{"arrowdbldown", 0x21D3}, +{"arrowdblleft", 0x21D0}, +{"arrowdblright", 0x21D2}, +{"arrowdblup", 0x21D1}, +{"arrowdown", 0x2193}, +{"arrowdownleft", 0x2199}, +{"arrowdownright", 0x2198}, +{"arrowdownwhite", 0x21E9}, +{"arrowheaddownmod", 0x02C5}, +{"arrowheadleftmod", 0x02C2}, +{"arrowheadrightmod", 0x02C3}, +{"arrowheadupmod", 0x02C4}, +{"arrowhorizex", 0xF8E7}, +{"arrowleft", 0x2190}, +{"arrowleftdbl", 0x21D0}, +{"arrowleftdblstroke", 0x21CD}, +{"arrowleftoverright", 0x21C6}, +{"arrowleftwhite", 0x21E6}, +{"arrowright", 0x2192}, +{"arrowrightdblstroke", 0x21CF}, +{"arrowrightheavy", 0x279E}, +{"arrowrightoverleft", 0x21C4}, +{"arrowrightwhite", 0x21E8}, +{"arrowtableft", 0x21E4}, +{"arrowtabright", 0x21E5}, +{"arrowup", 0x2191}, +{"arrowupdn", 0x2195}, +{"arrowupdnbse", 0x21A8}, +{"arrowupdownbase", 0x21A8}, +{"arrowupleft", 0x2196}, +{"arrowupleftofdown", 0x21C5}, +{"arrowupright", 0x2197}, +{"arrowupwhite", 0x21E7}, +{"arrowvertex", 0xF8E6}, +{"asciicircum", 0x005E}, +{"asciicircummonospace", 0xFF3E}, +{"asciitilde", 0x007E}, +{"asciitildemonospace", 0xFF5E}, +{"ascript", 0x0251}, +{"ascriptturned", 0x0252}, +{"asmallhiragana", 0x3041}, +{"asmallkatakana", 0x30A1}, +{"asmallkatakanahalfwidth", 0xFF67}, +{"asterisk", 0x002A}, +{"asteriskaltonearabic", 0x066D}, +{"asteriskarabic", 0x066D}, +{"asteriskmath", 0x2217}, +{"asteriskmonospace", 0xFF0A}, +{"asterisksmall", 0xFE61}, +{"asterism", 0x2042}, +{"asuperior", 0xF6E9}, +{"asymptoticallyequal", 0x2243}, +{"at", 0x0040}, +{"atilde", 0x00E3}, +{"atmonospace", 0xFF20}, +{"atsmall", 0xFE6B}, +{"aturned", 0x0250}, +{"aubengali", 0x0994}, +{"aubopomofo", 0x3120}, +{"audeva", 0x0914}, +{"augujarati", 0x0A94}, +{"augurmukhi", 0x0A14}, +{"aulengthmarkbengali", 0x09D7}, +{"aumatragurmukhi", 0x0A4C}, +{"auvowelsignbengali", 0x09CC}, +{"auvowelsigndeva", 0x094C}, +{"auvowelsigngujarati", 0x0ACC}, +{"avagrahadeva", 0x093D}, +{"aybarmenian", 0x0561}, +{"ayin", 0x05E2}, +{"ayinaltonehebrew", 0xFB20}, +{"ayinhebrew", 0x05E2}, +{"b", 0x0062}, +{"babengali", 0x09AC}, +{"backslash", 0x005C}, +{"backslashmonospace", 0xFF3C}, +{"badeva", 0x092C}, +{"bagujarati", 0x0AAC}, +{"bagurmukhi", 0x0A2C}, +{"bahiragana", 0x3070}, +{"bahtthai", 0x0E3F}, +{"bakatakana", 0x30D0}, +{"bar", 0x007C}, +{"barmonospace", 0xFF5C}, +{"bbopomofo", 0x3105}, +{"bcircle", 0x24D1}, +{"bdotaccent", 0x1E03}, +{"bdotbelow", 0x1E05}, +{"beamedsixteenthnotes", 0x266C}, +{"because", 0x2235}, +{"becyrillic", 0x0431}, +{"beharabic", 0x0628}, +{"behfinalarabic", 0xFE90}, +{"behinitialarabic", 0xFE91}, +{"behiragana", 0x3079}, +{"behmedialarabic", 0xFE92}, +{"behmeeminitialarabic", 0xFC9F}, +{"behmeemisolatedarabic", 0xFC08}, +{"behnoonfinalarabic", 0xFC6D}, +{"bekatakana", 0x30D9}, +{"benarmenian", 0x0562}, +{"bet", 0x05D1}, +{"beta", 0x03B2}, +{"betasymbolgreek", 0x03D0}, +{"betdagesh", 0xFB31}, +{"betdageshhebrew", 0xFB31}, +{"bethebrew", 0x05D1}, +{"betrafehebrew", 0xFB4C}, +{"bhabengali", 0x09AD}, +{"bhadeva", 0x092D}, +{"bhagujarati", 0x0AAD}, +{"bhagurmukhi", 0x0A2D}, +{"bhook", 0x0253}, +{"bihiragana", 0x3073}, +{"bikatakana", 0x30D3}, +{"bilabialclick", 0x0298}, +{"bindigurmukhi", 0x0A02}, +{"birusquare", 0x3331}, +{"blackcircle", 0x25CF}, +{"blackdiamond", 0x25C6}, +{"blackdownpointingtriangle", 0x25BC}, +{"blackleftpointingpointer", 0x25C4}, +{"blackleftpointingtriangle", 0x25C0}, +{"blacklenticularbracketleft", 0x3010}, +{"blacklenticularbracketleftvertical", 0xFE3B}, +{"blacklenticularbracketright", 0x3011}, +{"blacklenticularbracketrightvertical", 0xFE3C}, +{"blacklowerlefttriangle", 0x25E3}, +{"blacklowerrighttriangle", 0x25E2}, +{"blackrectangle", 0x25AC}, +{"blackrightpointingpointer", 0x25BA}, +{"blackrightpointingtriangle", 0x25B6}, +{"blacksmallsquare", 0x25AA}, +{"blacksmilingface", 0x263B}, +{"blacksquare", 0x25A0}, +{"blackstar", 0x2605}, +{"blackupperlefttriangle", 0x25E4}, +{"blackupperrighttriangle", 0x25E5}, +{"blackuppointingsmalltriangle", 0x25B4}, +{"blackuppointingtriangle", 0x25B2}, +{"blank", 0x2423}, +{"blinebelow", 0x1E07}, +{"block", 0x2588}, +{"bmonospace", 0xFF42}, +{"bobaimaithai", 0x0E1A}, +{"bohiragana", 0x307C}, +{"bokatakana", 0x30DC}, +{"bparen", 0x249D}, +{"bqsquare", 0x33C3}, +{"braceex", 0xF8F4}, +{"braceleft", 0x007B}, +{"braceleftbt", 0xF8F3}, +{"braceleftmid", 0xF8F2}, +{"braceleftmonospace", 0xFF5B}, +{"braceleftsmall", 0xFE5B}, +{"bracelefttp", 0xF8F1}, +{"braceleftvertical", 0xFE37}, +{"braceright", 0x007D}, +{"bracerightbt", 0xF8FE}, +{"bracerightmid", 0xF8FD}, +{"bracerightmonospace", 0xFF5D}, +{"bracerightsmall", 0xFE5C}, +{"bracerighttp", 0xF8FC}, +{"bracerightvertical", 0xFE38}, +{"bracketleft", 0x005B}, +{"bracketleftbt", 0xF8F0}, +{"bracketleftex", 0xF8EF}, +{"bracketleftmonospace", 0xFF3B}, +{"bracketlefttp", 0xF8EE}, +{"bracketright", 0x005D}, +{"bracketrightbt", 0xF8FB}, +{"bracketrightex", 0xF8FA}, +{"bracketrightmonospace", 0xFF3D}, +{"bracketrighttp", 0xF8F9}, +{"breve", 0x02D8}, +{"brevebelowcmb", 0x032E}, +{"brevecmb", 0x0306}, +{"breveinvertedbelowcmb", 0x032F}, +{"breveinvertedcmb", 0x0311}, +{"breveinverteddoublecmb", 0x0361}, +{"bridgebelowcmb", 0x032A}, +{"bridgeinvertedbelowcmb", 0x033A}, +{"brokenbar", 0x00A6}, +{"bstroke", 0x0180}, +{"bsuperior", 0xF6EA}, +{"btopbar", 0x0183}, +{"buhiragana", 0x3076}, +{"bukatakana", 0x30D6}, +{"bullet", 0x2022}, +{"bulletinverse", 0x25D8}, +{"bulletoperator", 0x2219}, +{"bullseye", 0x25CE}, +{"c", 0x0063}, +{"caarmenian", 0x056E}, +{"cabengali", 0x099A}, +{"cacute", 0x0107}, +{"cadeva", 0x091A}, +{"cagujarati", 0x0A9A}, +{"cagurmukhi", 0x0A1A}, +{"calsquare", 0x3388}, +{"candrabindubengali", 0x0981}, +{"candrabinducmb", 0x0310}, +{"candrabindudeva", 0x0901}, +{"candrabindugujarati", 0x0A81}, +{"capslock", 0x21EA}, +{"careof", 0x2105}, +{"caron", 0x02C7}, +{"caronbelowcmb", 0x032C}, +{"caroncmb", 0x030C}, +{"carriagereturn", 0x21B5}, +{"cbopomofo", 0x3118}, +{"ccaron", 0x010D}, +{"ccedilla", 0x00E7}, +{"ccedillaacute", 0x1E09}, +{"ccircle", 0x24D2}, +{"ccircumflex", 0x0109}, +{"ccurl", 0x0255}, +{"cdot", 0x010B}, +{"cdotaccent", 0x010B}, +{"cdsquare", 0x33C5}, +{"cedilla", 0x00B8}, +{"cedillacmb", 0x0327}, +{"cent", 0x00A2}, +{"centigrade", 0x2103}, +{"centinferior", 0xF6DF}, +{"centmonospace", 0xFFE0}, +{"centoldstyle", 0xF7A2}, +{"centsuperior", 0xF6E0}, +{"chaarmenian", 0x0579}, +{"chabengali", 0x099B}, +{"chadeva", 0x091B}, +{"chagujarati", 0x0A9B}, +{"chagurmukhi", 0x0A1B}, +{"chbopomofo", 0x3114}, +{"cheabkhasiancyrillic", 0x04BD}, +{"checkmark", 0x2713}, +{"checyrillic", 0x0447}, +{"chedescenderabkhasiancyrillic", 0x04BF}, +{"chedescendercyrillic", 0x04B7}, +{"chedieresiscyrillic", 0x04F5}, +{"cheharmenian", 0x0573}, +{"chekhakassiancyrillic", 0x04CC}, +{"cheverticalstrokecyrillic", 0x04B9}, +{"chi", 0x03C7}, +{"chieuchacirclekorean", 0x3277}, +{"chieuchaparenkorean", 0x3217}, +{"chieuchcirclekorean", 0x3269}, +{"chieuchkorean", 0x314A}, +{"chieuchparenkorean", 0x3209}, +{"chochangthai", 0x0E0A}, +{"chochanthai", 0x0E08}, +{"chochingthai", 0x0E09}, +{"chochoethai", 0x0E0C}, +{"chook", 0x0188}, +{"cieucacirclekorean", 0x3276}, +{"cieucaparenkorean", 0x3216}, +{"cieuccirclekorean", 0x3268}, +{"cieuckorean", 0x3148}, +{"cieucparenkorean", 0x3208}, +{"cieucuparenkorean", 0x321C}, +{"circle", 0x25CB}, +{"circlemultiply", 0x2297}, +{"circleot", 0x2299}, +{"circleplus", 0x2295}, +{"circlepostalmark", 0x3036}, +{"circlewithlefthalfblack", 0x25D0}, +{"circlewithrighthalfblack", 0x25D1}, +{"circumflex", 0x02C6}, +{"circumflexbelowcmb", 0x032D}, +{"circumflexcmb", 0x0302}, +{"clear", 0x2327}, +{"clickalveolar", 0x01C2}, +{"clickdental", 0x01C0}, +{"clicklateral", 0x01C1}, +{"clickretroflex", 0x01C3}, +{"club", 0x2663}, +{"clubsuitblack", 0x2663}, +{"clubsuitwhite", 0x2667}, +{"cmcubedsquare", 0x33A4}, +{"cmonospace", 0xFF43}, +{"cmsquaredsquare", 0x33A0}, +{"coarmenian", 0x0581}, +{"colon", 0x003A}, +{"colonmonetary", 0x20A1}, +{"colonmonospace", 0xFF1A}, +{"colonsign", 0x20A1}, +{"colonsmall", 0xFE55}, +{"colontriangularhalfmod", 0x02D1}, +{"colontriangularmod", 0x02D0}, +{"comma", 0x002C}, +{"commaabovecmb", 0x0313}, +{"commaaboverightcmb", 0x0315}, +{"commaaccent", 0xF6C3}, +{"commaarabic", 0x060C}, +{"commaarmenian", 0x055D}, +{"commainferior", 0xF6E1}, +{"commamonospace", 0xFF0C}, +{"commareversedabovecmb", 0x0314}, +{"commareversedmod", 0x02BD}, +{"commasmall", 0xFE50}, +{"commasuperior", 0xF6E2}, +{"commaturnedabovecmb", 0x0312}, +{"commaturnedmod", 0x02BB}, +{"compass", 0x263C}, +{"congruent", 0x2245}, +{"contourintegral", 0x222E}, +{"control", 0x2303}, +{"controlACK", 0x0006}, +{"controlBEL", 0x0007}, +{"controlBS", 0x0008}, +{"controlCAN", 0x0018}, +{"controlCR", 0x000D}, +{"controlDC1", 0x0011}, +{"controlDC2", 0x0012}, +{"controlDC3", 0x0013}, +{"controlDC4", 0x0014}, +{"controlDEL", 0x007F}, +{"controlDLE", 0x0010}, +{"controlEM", 0x0019}, +{"controlENQ", 0x0005}, +{"controlEOT", 0x0004}, +{"controlESC", 0x001B}, +{"controlETB", 0x0017}, +{"controlETX", 0x0003}, +{"controlFF", 0x000C}, +{"controlFS", 0x001C}, +{"controlGS", 0x001D}, +{"controlHT", 0x0009}, +{"controlLF", 0x000A}, +{"controlNAK", 0x0015}, +{"controlRS", 0x001E}, +{"controlSI", 0x000F}, +{"controlSO", 0x000E}, +{"controlSOT", 0x0002}, +{"controlSTX", 0x0001}, +{"controlSUB", 0x001A}, +{"controlSYN", 0x0016}, +{"controlUS", 0x001F}, +{"controlVT", 0x000B}, +{"copyright", 0x00A9}, +{"copyrightsans", 0xF8E9}, +{"copyrightserif", 0xF6D9}, +{"cornerbracketleft", 0x300C}, +{"cornerbracketlefthalfwidth", 0xFF62}, +{"cornerbracketleftvertical", 0xFE41}, +{"cornerbracketright", 0x300D}, +{"cornerbracketrighthalfwidth", 0xFF63}, +{"cornerbracketrightvertical", 0xFE42}, +{"corporationsquare", 0x337F}, +{"cosquare", 0x33C7}, +{"coverkgsquare", 0x33C6}, +{"cparen", 0x249E}, +{"cruzeiro", 0x20A2}, +{"cstretched", 0x0297}, +{"curlyand", 0x22CF}, +{"curlyor", 0x22CE}, +{"currency", 0x00A4}, +{"cyrBreve", 0xF6D1}, +{"cyrFlex", 0xF6D2}, +{"cyrbreve", 0xF6D4}, +{"cyrflex", 0xF6D5}, +{"d", 0x0064}, +{"daarmenian", 0x0564}, +{"dabengali", 0x09A6}, +{"dadarabic", 0x0636}, +{"dadeva", 0x0926}, +{"dadfinalarabic", 0xFEBE}, +{"dadinitialarabic", 0xFEBF}, +{"dadmedialarabic", 0xFEC0}, +{"dagesh", 0x05BC}, +{"dageshhebrew", 0x05BC}, +{"dagger", 0x2020}, +{"daggerdbl", 0x2021}, +{"dagujarati", 0x0AA6}, +{"dagurmukhi", 0x0A26}, +{"dahiragana", 0x3060}, +{"dakatakana", 0x30C0}, +{"dalarabic", 0x062F}, +{"dalet", 0x05D3}, +{"daletdagesh", 0xFB33}, +{"daletdageshhebrew", 0xFB33}, +{"dalethatafpatah", 0x05D3}, +{"dalethatafpatahhebrew", 0x05D3}, +{"dalethatafsegol", 0x05D3}, +{"dalethatafsegolhebrew", 0x05D3}, +{"dalethebrew", 0x05D3}, +{"dalethiriq", 0x05D3}, +{"dalethiriqhebrew", 0x05D3}, +{"daletholam", 0x05D3}, +{"daletholamhebrew", 0x05D3}, +{"daletpatah", 0x05D3}, +{"daletpatahhebrew", 0x05D3}, +{"daletqamats", 0x05D3}, +{"daletqamatshebrew", 0x05D3}, +{"daletqubuts", 0x05D3}, +{"daletqubutshebrew", 0x05D3}, +{"daletsegol", 0x05D3}, +{"daletsegolhebrew", 0x05D3}, +{"daletsheva", 0x05D3}, +{"daletshevahebrew", 0x05D3}, +{"dalettsere", 0x05D3}, +{"dalettserehebrew", 0x05D3}, +{"dalfinalarabic", 0xFEAA}, +{"dammaarabic", 0x064F}, +{"dammalowarabic", 0x064F}, +{"dammatanaltonearabic", 0x064C}, +{"dammatanarabic", 0x064C}, +{"danda", 0x0964}, +{"dargahebrew", 0x05A7}, +{"dargalefthebrew", 0x05A7}, +{"dasiapneumatacyrilliccmb", 0x0485}, +{"dblGrave", 0xF6D3}, +{"dblanglebracketleft", 0x300A}, +{"dblanglebracketleftvertical", 0xFE3D}, +{"dblanglebracketright", 0x300B}, +{"dblanglebracketrightvertical", 0xFE3E}, +{"dblarchinvertedbelowcmb", 0x032B}, +{"dblarrowleft", 0x21D4}, +{"dblarrowright", 0x21D2}, +{"dbldanda", 0x0965}, +{"dblgrave", 0xF6D6}, +{"dblgravecmb", 0x030F}, +{"dblintegral", 0x222C}, +{"dbllowline", 0x2017}, +{"dbllowlinecmb", 0x0333}, +{"dbloverlinecmb", 0x033F}, +{"dblprimemod", 0x02BA}, +{"dblverticalbar", 0x2016}, +{"dblverticallineabovecmb", 0x030E}, +{"dbopomofo", 0x3109}, +{"dbsquare", 0x33C8}, +{"dcaron", 0x010F}, +{"dcedilla", 0x1E11}, +{"dcircle", 0x24D3}, +{"dcircumflexbelow", 0x1E13}, +{"dcroat", 0x0111}, +{"ddabengali", 0x09A1}, +{"ddadeva", 0x0921}, +{"ddagujarati", 0x0AA1}, +{"ddagurmukhi", 0x0A21}, +{"ddalarabic", 0x0688}, +{"ddalfinalarabic", 0xFB89}, +{"dddhadeva", 0x095C}, +{"ddhabengali", 0x09A2}, +{"ddhadeva", 0x0922}, +{"ddhagujarati", 0x0AA2}, +{"ddhagurmukhi", 0x0A22}, +{"ddotaccent", 0x1E0B}, +{"ddotbelow", 0x1E0D}, +{"decimalseparatorarabic", 0x066B}, +{"decimalseparatorpersian", 0x066B}, +{"decyrillic", 0x0434}, +{"degree", 0x00B0}, +{"dehihebrew", 0x05AD}, +{"dehiragana", 0x3067}, +{"deicoptic", 0x03EF}, +{"dekatakana", 0x30C7}, +{"deleteleft", 0x232B}, +{"deleteright", 0x2326}, +{"delta", 0x03B4}, +{"deltaturned", 0x018D}, +{"denominatorminusonenumeratorbengali", 0x09F8}, +{"dezh", 0x02A4}, +{"dhabengali", 0x09A7}, +{"dhadeva", 0x0927}, +{"dhagujarati", 0x0AA7}, +{"dhagurmukhi", 0x0A27}, +{"dhook", 0x0257}, +{"dialytikatonos", 0x0385}, +{"dialytikatonoscmb", 0x0344}, +{"diamond", 0x2666}, +{"diamondsuitwhite", 0x2662}, +{"dieresis", 0x00A8}, +{"dieresisacute", 0xF6D7}, +{"dieresisbelowcmb", 0x0324}, +{"dieresiscmb", 0x0308}, +{"dieresisgrave", 0xF6D8}, +{"dieresistonos", 0x0385}, +{"dihiragana", 0x3062}, +{"dikatakana", 0x30C2}, +{"dittomark", 0x3003}, +{"divide", 0x00F7}, +{"divides", 0x2223}, +{"divisionslash", 0x2215}, +{"djecyrillic", 0x0452}, +{"dkshade", 0x2593}, +{"dlinebelow", 0x1E0F}, +{"dlsquare", 0x3397}, +{"dmacron", 0x0111}, +{"dmonospace", 0xFF44}, +{"dnblock", 0x2584}, +{"dochadathai", 0x0E0E}, +{"dodekthai", 0x0E14}, +{"dohiragana", 0x3069}, +{"dokatakana", 0x30C9}, +{"dollar", 0x0024}, +{"dollarinferior", 0xF6E3}, +{"dollarmonospace", 0xFF04}, +{"dollaroldstyle", 0xF724}, +{"dollarsmall", 0xFE69}, +{"dollarsuperior", 0xF6E4}, +{"dong", 0x20AB}, +{"dorusquare", 0x3326}, +{"dotaccent", 0x02D9}, +{"dotaccentcmb", 0x0307}, +{"dotbelowcmb", 0x0323}, +{"dotbelowcomb", 0x0323}, +{"dotkatakana", 0x30FB}, +{"dotlessi", 0x0131}, +{"dotlessj", 0xF6BE}, +{"dotlessjstrokehook", 0x0284}, +{"dotmath", 0x22C5}, +{"dottedcircle", 0x25CC}, +{"doubleyodpatah", 0xFB1F}, +{"doubleyodpatahhebrew", 0xFB1F}, +{"downtackbelowcmb", 0x031E}, +{"downtackmod", 0x02D5}, +{"dparen", 0x249F}, +{"dsuperior", 0xF6EB}, +{"dtail", 0x0256}, +{"dtopbar", 0x018C}, +{"duhiragana", 0x3065}, +{"dukatakana", 0x30C5}, +{"dz", 0x01F3}, +{"dzaltone", 0x02A3}, +{"dzcaron", 0x01C6}, +{"dzcurl", 0x02A5}, +{"dzeabkhasiancyrillic", 0x04E1}, +{"dzecyrillic", 0x0455}, +{"dzhecyrillic", 0x045F}, +{"e", 0x0065}, +{"eacute", 0x00E9}, +{"earth", 0x2641}, +{"ebengali", 0x098F}, +{"ebopomofo", 0x311C}, +{"ebreve", 0x0115}, +{"ecandradeva", 0x090D}, +{"ecandragujarati", 0x0A8D}, +{"ecandravowelsigndeva", 0x0945}, +{"ecandravowelsigngujarati", 0x0AC5}, +{"ecaron", 0x011B}, +{"ecedillabreve", 0x1E1D}, +{"echarmenian", 0x0565}, +{"echyiwnarmenian", 0x0587}, +{"ecircle", 0x24D4}, +{"ecircumflex", 0x00EA}, +{"ecircumflexacute", 0x1EBF}, +{"ecircumflexbelow", 0x1E19}, +{"ecircumflexdotbelow", 0x1EC7}, +{"ecircumflexgrave", 0x1EC1}, +{"ecircumflexhookabove", 0x1EC3}, +{"ecircumflextilde", 0x1EC5}, +{"ecyrillic", 0x0454}, +{"edblgrave", 0x0205}, +{"edeva", 0x090F}, +{"edieresis", 0x00EB}, +{"edot", 0x0117}, +{"edotaccent", 0x0117}, +{"edotbelow", 0x1EB9}, +{"eegurmukhi", 0x0A0F}, +{"eematragurmukhi", 0x0A47}, +{"efcyrillic", 0x0444}, +{"egrave", 0x00E8}, +{"egujarati", 0x0A8F}, +{"eharmenian", 0x0567}, +{"ehbopomofo", 0x311D}, +{"ehiragana", 0x3048}, +{"ehookabove", 0x1EBB}, +{"eibopomofo", 0x311F}, +{"eight", 0x0038}, +{"eightarabic", 0x0668}, +{"eightbengali", 0x09EE}, +{"eightcircle", 0x2467}, +{"eightcircleinversesansserif", 0x2791}, +{"eightdeva", 0x096E}, +{"eighteencircle", 0x2471}, +{"eighteenparen", 0x2485}, +{"eighteenperiod", 0x2499}, +{"eightgujarati", 0x0AEE}, +{"eightgurmukhi", 0x0A6E}, +{"eighthackarabic", 0x0668}, +{"eighthangzhou", 0x3028}, +{"eighthnotebeamed", 0x266B}, +{"eightideographicparen", 0x3227}, +{"eightinferior", 0x2088}, +{"eightmonospace", 0xFF18}, +{"eightoldstyle", 0xF738}, +{"eightparen", 0x247B}, +{"eightperiod", 0x248F}, +{"eightpersian", 0x06F8}, +{"eightroman", 0x2177}, +{"eightsuperior", 0x2078}, +{"eightthai", 0x0E58}, +{"einvertedbreve", 0x0207}, +{"eiotifiedcyrillic", 0x0465}, +{"ekatakana", 0x30A8}, +{"ekatakanahalfwidth", 0xFF74}, +{"ekonkargurmukhi", 0x0A74}, +{"ekorean", 0x3154}, +{"elcyrillic", 0x043B}, +{"element", 0x2208}, +{"elevencircle", 0x246A}, +{"elevenparen", 0x247E}, +{"elevenperiod", 0x2492}, +{"elevenroman", 0x217A}, +{"ellipsis", 0x2026}, +{"ellipsisvertical", 0x22EE}, +{"emacron", 0x0113}, +{"emacronacute", 0x1E17}, +{"emacrongrave", 0x1E15}, +{"emcyrillic", 0x043C}, +{"emdash", 0x2014}, +{"emdashvertical", 0xFE31}, +{"emonospace", 0xFF45}, +{"emphasismarkarmenian", 0x055B}, +{"emptyset", 0x2205}, +{"enbopomofo", 0x3123}, +{"encyrillic", 0x043D}, +{"endash", 0x2013}, +{"endashvertical", 0xFE32}, +{"endescendercyrillic", 0x04A3}, +{"eng", 0x014B}, +{"engbopomofo", 0x3125}, +{"enghecyrillic", 0x04A5}, +{"enhookcyrillic", 0x04C8}, +{"enspace", 0x2002}, +{"eogonek", 0x0119}, +{"eokorean", 0x3153}, +{"eopen", 0x025B}, +{"eopenclosed", 0x029A}, +{"eopenreversed", 0x025C}, +{"eopenreversedclosed", 0x025E}, +{"eopenreversedhook", 0x025D}, +{"eparen", 0x24A0}, +{"epsilon", 0x03B5}, +{"epsilontonos", 0x03AD}, +{"equal", 0x003D}, +{"equalmonospace", 0xFF1D}, +{"equalsmall", 0xFE66}, +{"equalsuperior", 0x207C}, +{"equivalence", 0x2261}, +{"erbopomofo", 0x3126}, +{"ercyrillic", 0x0440}, +{"ereversed", 0x0258}, +{"ereversedcyrillic", 0x044D}, +{"escyrillic", 0x0441}, +{"esdescendercyrillic", 0x04AB}, +{"esh", 0x0283}, +{"eshcurl", 0x0286}, +{"eshortdeva", 0x090E}, +{"eshortvowelsigndeva", 0x0946}, +{"eshreversedloop", 0x01AA}, +{"eshsquatreversed", 0x0285}, +{"esmallhiragana", 0x3047}, +{"esmallkatakana", 0x30A7}, +{"esmallkatakanahalfwidth", 0xFF6A}, +{"estimated", 0x212E}, +{"esuperior", 0xF6EC}, +{"eta", 0x03B7}, +{"etarmenian", 0x0568}, +{"etatonos", 0x03AE}, +{"eth", 0x00F0}, +{"etilde", 0x1EBD}, +{"etildebelow", 0x1E1B}, +{"etnahtafoukhhebrew", 0x0591}, +{"etnahtafoukhlefthebrew", 0x0591}, +{"etnahtahebrew", 0x0591}, +{"etnahtalefthebrew", 0x0591}, +{"eturned", 0x01DD}, +{"eukorean", 0x3161}, +{"euro", 0x20AC}, +{"evowelsignbengali", 0x09C7}, +{"evowelsigndeva", 0x0947}, +{"evowelsigngujarati", 0x0AC7}, +{"exclam", 0x0021}, +{"exclamarmenian", 0x055C}, +{"exclamdbl", 0x203C}, +{"exclamdown", 0x00A1}, +{"exclamdownsmall", 0xF7A1}, +{"exclammonospace", 0xFF01}, +{"exclamsmall", 0xF721}, +{"existential", 0x2203}, +{"ezh", 0x0292}, +{"ezhcaron", 0x01EF}, +{"ezhcurl", 0x0293}, +{"ezhreversed", 0x01B9}, +{"ezhtail", 0x01BA}, +{"f", 0x0066}, +{"fadeva", 0x095E}, +{"fagurmukhi", 0x0A5E}, +{"fahrenheit", 0x2109}, +{"fathaarabic", 0x064E}, +{"fathalowarabic", 0x064E}, +{"fathatanarabic", 0x064B}, +{"fbopomofo", 0x3108}, +{"fcircle", 0x24D5}, +{"fdotaccent", 0x1E1F}, +{"feharabic", 0x0641}, +{"feharmenian", 0x0586}, +{"fehfinalarabic", 0xFED2}, +{"fehinitialarabic", 0xFED3}, +{"fehmedialarabic", 0xFED4}, +{"feicoptic", 0x03E5}, +{"female", 0x2640}, +{"ff", 0xFB00}, +{"ffi", 0xFB03}, +{"ffl", 0xFB04}, +{"fi", 0xFB01}, +{"fifteencircle", 0x246E}, +{"fifteenparen", 0x2482}, +{"fifteenperiod", 0x2496}, +{"figuredash", 0x2012}, +{"filledbox", 0x25A0}, +{"filledrect", 0x25AC}, +{"finalkaf", 0x05DA}, +{"finalkafdagesh", 0xFB3A}, +{"finalkafdageshhebrew", 0xFB3A}, +{"finalkafhebrew", 0x05DA}, +{"finalkafqamats", 0x05DA}, +{"finalkafqamatshebrew", 0x05DA}, +{"finalkafsheva", 0x05DA}, +{"finalkafshevahebrew", 0x05DA}, +{"finalmem", 0x05DD}, +{"finalmemhebrew", 0x05DD}, +{"finalnun", 0x05DF}, +{"finalnunhebrew", 0x05DF}, +{"finalpe", 0x05E3}, +{"finalpehebrew", 0x05E3}, +{"finaltsadi", 0x05E5}, +{"finaltsadihebrew", 0x05E5}, +{"firsttonechinese", 0x02C9}, +{"fisheye", 0x25C9}, +{"fitacyrillic", 0x0473}, +{"five", 0x0035}, +{"fivearabic", 0x0665}, +{"fivebengali", 0x09EB}, +{"fivecircle", 0x2464}, +{"fivecircleinversesansserif", 0x278E}, +{"fivedeva", 0x096B}, +{"fiveeighths", 0x215D}, +{"fivegujarati", 0x0AEB}, +{"fivegurmukhi", 0x0A6B}, +{"fivehackarabic", 0x0665}, +{"fivehangzhou", 0x3025}, +{"fiveideographicparen", 0x3224}, +{"fiveinferior", 0x2085}, +{"fivemonospace", 0xFF15}, +{"fiveoldstyle", 0xF735}, +{"fiveparen", 0x2478}, +{"fiveperiod", 0x248C}, +{"fivepersian", 0x06F5}, +{"fiveroman", 0x2174}, +{"fivesuperior", 0x2075}, +{"fivethai", 0x0E55}, +{"fl", 0xFB02}, +{"florin", 0x0192}, +{"fmonospace", 0xFF46}, +{"fmsquare", 0x3399}, +{"fofanthai", 0x0E1F}, +{"fofathai", 0x0E1D}, +{"fongmanthai", 0x0E4F}, +{"forall", 0x2200}, +{"four", 0x0034}, +{"fourarabic", 0x0664}, +{"fourbengali", 0x09EA}, +{"fourcircle", 0x2463}, +{"fourcircleinversesansserif", 0x278D}, +{"fourdeva", 0x096A}, +{"fourgujarati", 0x0AEA}, +{"fourgurmukhi", 0x0A6A}, +{"fourhackarabic", 0x0664}, +{"fourhangzhou", 0x3024}, +{"fourideographicparen", 0x3223}, +{"fourinferior", 0x2084}, +{"fourmonospace", 0xFF14}, +{"fournumeratorbengali", 0x09F7}, +{"fouroldstyle", 0xF734}, +{"fourparen", 0x2477}, +{"fourperiod", 0x248B}, +{"fourpersian", 0x06F4}, +{"fourroman", 0x2173}, +{"foursuperior", 0x2074}, +{"fourteencircle", 0x246D}, +{"fourteenparen", 0x2481}, +{"fourteenperiod", 0x2495}, +{"fourthai", 0x0E54}, +{"fourthtonechinese", 0x02CB}, +{"fparen", 0x24A1}, +{"fraction", 0x2044}, +{"franc", 0x20A3}, +{"g", 0x0067}, +{"gabengali", 0x0997}, +{"gacute", 0x01F5}, +{"gadeva", 0x0917}, +{"gafarabic", 0x06AF}, +{"gaffinalarabic", 0xFB93}, +{"gafinitialarabic", 0xFB94}, +{"gafmedialarabic", 0xFB95}, +{"gagujarati", 0x0A97}, +{"gagurmukhi", 0x0A17}, +{"gahiragana", 0x304C}, +{"gakatakana", 0x30AC}, +{"gamma", 0x03B3}, +{"gammalatinsmall", 0x0263}, +{"gammasuperior", 0x02E0}, +{"gangiacoptic", 0x03EB}, +{"gbopomofo", 0x310D}, +{"gbreve", 0x011F}, +{"gcaron", 0x01E7}, +{"gcedilla", 0x0123}, +{"gcircle", 0x24D6}, +{"gcircumflex", 0x011D}, +{"gcommaaccent", 0x0123}, +{"gdot", 0x0121}, +{"gdotaccent", 0x0121}, +{"gecyrillic", 0x0433}, +{"gehiragana", 0x3052}, +{"gekatakana", 0x30B2}, +{"geometricallyequal", 0x2251}, +{"gereshaccenthebrew", 0x059C}, +{"gereshhebrew", 0x05F3}, +{"gereshmuqdamhebrew", 0x059D}, +{"germandbls", 0x00DF}, +{"gershayimaccenthebrew", 0x059E}, +{"gershayimhebrew", 0x05F4}, +{"getamark", 0x3013}, +{"ghabengali", 0x0998}, +{"ghadarmenian", 0x0572}, +{"ghadeva", 0x0918}, +{"ghagujarati", 0x0A98}, +{"ghagurmukhi", 0x0A18}, +{"ghainarabic", 0x063A}, +{"ghainfinalarabic", 0xFECE}, +{"ghaininitialarabic", 0xFECF}, +{"ghainmedialarabic", 0xFED0}, +{"ghemiddlehookcyrillic", 0x0495}, +{"ghestrokecyrillic", 0x0493}, +{"gheupturncyrillic", 0x0491}, +{"ghhadeva", 0x095A}, +{"ghhagurmukhi", 0x0A5A}, +{"ghook", 0x0260}, +{"ghzsquare", 0x3393}, +{"gihiragana", 0x304E}, +{"gikatakana", 0x30AE}, +{"gimarmenian", 0x0563}, +{"gimel", 0x05D2}, +{"gimeldagesh", 0xFB32}, +{"gimeldageshhebrew", 0xFB32}, +{"gimelhebrew", 0x05D2}, +{"gjecyrillic", 0x0453}, +{"glottalinvertedstroke", 0x01BE}, +{"glottalstop", 0x0294}, +{"glottalstopinverted", 0x0296}, +{"glottalstopmod", 0x02C0}, +{"glottalstopreversed", 0x0295}, +{"glottalstopreversedmod", 0x02C1}, +{"glottalstopreversedsuperior", 0x02E4}, +{"glottalstopstroke", 0x02A1}, +{"glottalstopstrokereversed", 0x02A2}, +{"gmacron", 0x1E21}, +{"gmonospace", 0xFF47}, +{"gohiragana", 0x3054}, +{"gokatakana", 0x30B4}, +{"gparen", 0x24A2}, +{"gpasquare", 0x33AC}, +{"gradient", 0x2207}, +{"grave", 0x0060}, +{"gravebelowcmb", 0x0316}, +{"gravecmb", 0x0300}, +{"gravecomb", 0x0300}, +{"gravedeva", 0x0953}, +{"gravelowmod", 0x02CE}, +{"gravemonospace", 0xFF40}, +{"gravetonecmb", 0x0340}, +{"greater", 0x003E}, +{"greaterequal", 0x2265}, +{"greaterequalorless", 0x22DB}, +{"greatermonospace", 0xFF1E}, +{"greaterorequivalent", 0x2273}, +{"greaterorless", 0x2277}, +{"greateroverequal", 0x2267}, +{"greatersmall", 0xFE65}, +{"gscript", 0x0261}, +{"gstroke", 0x01E5}, +{"guhiragana", 0x3050}, +{"guillemotleft", 0x00AB}, +{"guillemotright", 0x00BB}, +{"guilsinglleft", 0x2039}, +{"guilsinglright", 0x203A}, +{"gukatakana", 0x30B0}, +{"guramusquare", 0x3318}, +{"gysquare", 0x33C9}, +{"h", 0x0068}, +{"haabkhasiancyrillic", 0x04A9}, +{"haaltonearabic", 0x06C1}, +{"habengali", 0x09B9}, +{"hadescendercyrillic", 0x04B3}, +{"hadeva", 0x0939}, +{"hagujarati", 0x0AB9}, +{"hagurmukhi", 0x0A39}, +{"haharabic", 0x062D}, +{"hahfinalarabic", 0xFEA2}, +{"hahinitialarabic", 0xFEA3}, +{"hahiragana", 0x306F}, +{"hahmedialarabic", 0xFEA4}, +{"haitusquare", 0x332A}, +{"hakatakana", 0x30CF}, +{"hakatakanahalfwidth", 0xFF8A}, +{"halantgurmukhi", 0x0A4D}, +{"hamzaarabic", 0x0621}, +{"hamzadammaarabic", 0x0621}, +{"hamzadammatanarabic", 0x0621}, +{"hamzafathaarabic", 0x0621}, +{"hamzafathatanarabic", 0x0621}, +{"hamzalowarabic", 0x0621}, +{"hamzalowkasraarabic", 0x0621}, +{"hamzalowkasratanarabic", 0x0621}, +{"hamzasukunarabic", 0x0621}, +{"hangulfiller", 0x3164}, +{"hardsigncyrillic", 0x044A}, +{"harpoonleftbarbup", 0x21BC}, +{"harpoonrightbarbup", 0x21C0}, +{"hasquare", 0x33CA}, +{"hatafpatah", 0x05B2}, +{"hatafpatah16", 0x05B2}, +{"hatafpatah23", 0x05B2}, +{"hatafpatah2f", 0x05B2}, +{"hatafpatahhebrew", 0x05B2}, +{"hatafpatahnarrowhebrew", 0x05B2}, +{"hatafpatahquarterhebrew", 0x05B2}, +{"hatafpatahwidehebrew", 0x05B2}, +{"hatafqamats", 0x05B3}, +{"hatafqamats1b", 0x05B3}, +{"hatafqamats28", 0x05B3}, +{"hatafqamats34", 0x05B3}, +{"hatafqamatshebrew", 0x05B3}, +{"hatafqamatsnarrowhebrew", 0x05B3}, +{"hatafqamatsquarterhebrew", 0x05B3}, +{"hatafqamatswidehebrew", 0x05B3}, +{"hatafsegol", 0x05B1}, +{"hatafsegol17", 0x05B1}, +{"hatafsegol24", 0x05B1}, +{"hatafsegol30", 0x05B1}, +{"hatafsegolhebrew", 0x05B1}, +{"hatafsegolnarrowhebrew", 0x05B1}, +{"hatafsegolquarterhebrew", 0x05B1}, +{"hatafsegolwidehebrew", 0x05B1}, +{"hbar", 0x0127}, +{"hbopomofo", 0x310F}, +{"hbrevebelow", 0x1E2B}, +{"hcedilla", 0x1E29}, +{"hcircle", 0x24D7}, +{"hcircumflex", 0x0125}, +{"hdieresis", 0x1E27}, +{"hdotaccent", 0x1E23}, +{"hdotbelow", 0x1E25}, +{"he", 0x05D4}, +{"heart", 0x2665}, +{"heartsuitblack", 0x2665}, +{"heartsuitwhite", 0x2661}, +{"hedagesh", 0xFB34}, +{"hedageshhebrew", 0xFB34}, +{"hehaltonearabic", 0x06C1}, +{"heharabic", 0x0647}, +{"hehebrew", 0x05D4}, +{"hehfinalaltonearabic", 0xFBA7}, +{"hehfinalalttwoarabic", 0xFEEA}, +{"hehfinalarabic", 0xFEEA}, +{"hehhamzaabovefinalarabic", 0xFBA5}, +{"hehhamzaaboveisolatedarabic", 0xFBA4}, +{"hehinitialaltonearabic", 0xFBA8}, +{"hehinitialarabic", 0xFEEB}, +{"hehiragana", 0x3078}, +{"hehmedialaltonearabic", 0xFBA9}, +{"hehmedialarabic", 0xFEEC}, +{"heiseierasquare", 0x337B}, +{"hekatakana", 0x30D8}, +{"hekatakanahalfwidth", 0xFF8D}, +{"hekutaarusquare", 0x3336}, +{"henghook", 0x0267}, +{"herutusquare", 0x3339}, +{"het", 0x05D7}, +{"hethebrew", 0x05D7}, +{"hhook", 0x0266}, +{"hhooksuperior", 0x02B1}, +{"hieuhacirclekorean", 0x327B}, +{"hieuhaparenkorean", 0x321B}, +{"hieuhcirclekorean", 0x326D}, +{"hieuhkorean", 0x314E}, +{"hieuhparenkorean", 0x320D}, +{"hihiragana", 0x3072}, +{"hikatakana", 0x30D2}, +{"hikatakanahalfwidth", 0xFF8B}, +{"hiriq", 0x05B4}, +{"hiriq14", 0x05B4}, +{"hiriq21", 0x05B4}, +{"hiriq2d", 0x05B4}, +{"hiriqhebrew", 0x05B4}, +{"hiriqnarrowhebrew", 0x05B4}, +{"hiriqquarterhebrew", 0x05B4}, +{"hiriqwidehebrew", 0x05B4}, +{"hlinebelow", 0x1E96}, +{"hmonospace", 0xFF48}, +{"hoarmenian", 0x0570}, +{"hohipthai", 0x0E2B}, +{"hohiragana", 0x307B}, +{"hokatakana", 0x30DB}, +{"hokatakanahalfwidth", 0xFF8E}, +{"holam", 0x05B9}, +{"holam19", 0x05B9}, +{"holam26", 0x05B9}, +{"holam32", 0x05B9}, +{"holamhebrew", 0x05B9}, +{"holamnarrowhebrew", 0x05B9}, +{"holamquarterhebrew", 0x05B9}, +{"holamwidehebrew", 0x05B9}, +{"honokhukthai", 0x0E2E}, +{"hookabovecomb", 0x0309}, +{"hookcmb", 0x0309}, +{"hookpalatalizedbelowcmb", 0x0321}, +{"hookretroflexbelowcmb", 0x0322}, +{"hoonsquare", 0x3342}, +{"horicoptic", 0x03E9}, +{"horizontalbar", 0x2015}, +{"horncmb", 0x031B}, +{"hotsprings", 0x2668}, +{"house", 0x2302}, +{"hparen", 0x24A3}, +{"hsuperior", 0x02B0}, +{"hturned", 0x0265}, +{"huhiragana", 0x3075}, +{"huiitosquare", 0x3333}, +{"hukatakana", 0x30D5}, +{"hukatakanahalfwidth", 0xFF8C}, +{"hungarumlaut", 0x02DD}, +{"hungarumlautcmb", 0x030B}, +{"hv", 0x0195}, +{"hyphen", 0x002D}, +{"hypheninferior", 0xF6E5}, +{"hyphenmonospace", 0xFF0D}, +{"hyphensmall", 0xFE63}, +{"hyphensuperior", 0xF6E6}, +{"hyphentwo", 0x2010}, +{"i", 0x0069}, +{"iacute", 0x00ED}, +{"iacyrillic", 0x044F}, +{"ibengali", 0x0987}, +{"ibopomofo", 0x3127}, +{"ibreve", 0x012D}, +{"icaron", 0x01D0}, +{"icircle", 0x24D8}, +{"icircumflex", 0x00EE}, +{"icyrillic", 0x0456}, +{"idblgrave", 0x0209}, +{"ideographearthcircle", 0x328F}, +{"ideographfirecircle", 0x328B}, +{"ideographicallianceparen", 0x323F}, +{"ideographiccallparen", 0x323A}, +{"ideographiccentrecircle", 0x32A5}, +{"ideographicclose", 0x3006}, +{"ideographiccomma", 0x3001}, +{"ideographiccommaleft", 0xFF64}, +{"ideographiccongratulationparen", 0x3237}, +{"ideographiccorrectcircle", 0x32A3}, +{"ideographicearthparen", 0x322F}, +{"ideographicenterpriseparen", 0x323D}, +{"ideographicexcellentcircle", 0x329D}, +{"ideographicfestivalparen", 0x3240}, +{"ideographicfinancialcircle", 0x3296}, +{"ideographicfinancialparen", 0x3236}, +{"ideographicfireparen", 0x322B}, +{"ideographichaveparen", 0x3232}, +{"ideographichighcircle", 0x32A4}, +{"ideographiciterationmark", 0x3005}, +{"ideographiclaborcircle", 0x3298}, +{"ideographiclaborparen", 0x3238}, +{"ideographicleftcircle", 0x32A7}, +{"ideographiclowcircle", 0x32A6}, +{"ideographicmedicinecircle", 0x32A9}, +{"ideographicmetalparen", 0x322E}, +{"ideographicmoonparen", 0x322A}, +{"ideographicnameparen", 0x3234}, +{"ideographicperiod", 0x3002}, +{"ideographicprintcircle", 0x329E}, +{"ideographicreachparen", 0x3243}, +{"ideographicrepresentparen", 0x3239}, +{"ideographicresourceparen", 0x323E}, +{"ideographicrightcircle", 0x32A8}, +{"ideographicsecretcircle", 0x3299}, +{"ideographicselfparen", 0x3242}, +{"ideographicsocietyparen", 0x3233}, +{"ideographicspace", 0x3000}, +{"ideographicspecialparen", 0x3235}, +{"ideographicstockparen", 0x3231}, +{"ideographicstudyparen", 0x323B}, +{"ideographicsunparen", 0x3230}, +{"ideographicsuperviseparen", 0x323C}, +{"ideographicwaterparen", 0x322C}, +{"ideographicwoodparen", 0x322D}, +{"ideographiczero", 0x3007}, +{"ideographmetalcircle", 0x328E}, +{"ideographmooncircle", 0x328A}, +{"ideographnamecircle", 0x3294}, +{"ideographsuncircle", 0x3290}, +{"ideographwatercircle", 0x328C}, +{"ideographwoodcircle", 0x328D}, +{"ideva", 0x0907}, +{"idieresis", 0x00EF}, +{"idieresisacute", 0x1E2F}, +{"idieresiscyrillic", 0x04E5}, +{"idotbelow", 0x1ECB}, +{"iebrevecyrillic", 0x04D7}, +{"iecyrillic", 0x0435}, +{"ieungacirclekorean", 0x3275}, +{"ieungaparenkorean", 0x3215}, +{"ieungcirclekorean", 0x3267}, +{"ieungkorean", 0x3147}, +{"ieungparenkorean", 0x3207}, +{"igrave", 0x00EC}, +{"igujarati", 0x0A87}, +{"igurmukhi", 0x0A07}, +{"ihiragana", 0x3044}, +{"ihookabove", 0x1EC9}, +{"iibengali", 0x0988}, +{"iicyrillic", 0x0438}, +{"iideva", 0x0908}, +{"iigujarati", 0x0A88}, +{"iigurmukhi", 0x0A08}, +{"iimatragurmukhi", 0x0A40}, +{"iinvertedbreve", 0x020B}, +{"iishortcyrillic", 0x0439}, +{"iivowelsignbengali", 0x09C0}, +{"iivowelsigndeva", 0x0940}, +{"iivowelsigngujarati", 0x0AC0}, +{"ij", 0x0133}, +{"ikatakana", 0x30A4}, +{"ikatakanahalfwidth", 0xFF72}, +{"ikorean", 0x3163}, +{"ilde", 0x02DC}, +{"iluyhebrew", 0x05AC}, +{"imacron", 0x012B}, +{"imacroncyrillic", 0x04E3}, +{"imageorapproximatelyequal", 0x2253}, +{"imatragurmukhi", 0x0A3F}, +{"imonospace", 0xFF49}, +{"increment", 0x2206}, +{"infinity", 0x221E}, +{"iniarmenian", 0x056B}, +{"integral", 0x222B}, +{"integralbottom", 0x2321}, +{"integralbt", 0x2321}, +{"integralex", 0xF8F5}, +{"integraltop", 0x2320}, +{"integraltp", 0x2320}, +{"intersection", 0x2229}, +{"intisquare", 0x3305}, +{"invbullet", 0x25D8}, +{"invcircle", 0x25D9}, +{"invsmileface", 0x263B}, +{"iocyrillic", 0x0451}, +{"iogonek", 0x012F}, +{"iota", 0x03B9}, +{"iotadieresis", 0x03CA}, +{"iotadieresistonos", 0x0390}, +{"iotalatin", 0x0269}, +{"iotatonos", 0x03AF}, +{"iparen", 0x24A4}, +{"irigurmukhi", 0x0A72}, +{"ismallhiragana", 0x3043}, +{"ismallkatakana", 0x30A3}, +{"ismallkatakanahalfwidth", 0xFF68}, +{"issharbengali", 0x09FA}, +{"istroke", 0x0268}, +{"isuperior", 0xF6ED}, +{"iterationhiragana", 0x309D}, +{"iterationkatakana", 0x30FD}, +{"itilde", 0x0129}, +{"itildebelow", 0x1E2D}, +{"iubopomofo", 0x3129}, +{"iucyrillic", 0x044E}, +{"ivowelsignbengali", 0x09BF}, +{"ivowelsigndeva", 0x093F}, +{"ivowelsigngujarati", 0x0ABF}, +{"izhitsacyrillic", 0x0475}, +{"izhitsadblgravecyrillic", 0x0477}, +{"j", 0x006A}, +{"jaarmenian", 0x0571}, +{"jabengali", 0x099C}, +{"jadeva", 0x091C}, +{"jagujarati", 0x0A9C}, +{"jagurmukhi", 0x0A1C}, +{"jbopomofo", 0x3110}, +{"jcaron", 0x01F0}, +{"jcircle", 0x24D9}, +{"jcircumflex", 0x0135}, +{"jcrossedtail", 0x029D}, +{"jdotlessstroke", 0x025F}, +{"jecyrillic", 0x0458}, +{"jeemarabic", 0x062C}, +{"jeemfinalarabic", 0xFE9E}, +{"jeeminitialarabic", 0xFE9F}, +{"jeemmedialarabic", 0xFEA0}, +{"jeharabic", 0x0698}, +{"jehfinalarabic", 0xFB8B}, +{"jhabengali", 0x099D}, +{"jhadeva", 0x091D}, +{"jhagujarati", 0x0A9D}, +{"jhagurmukhi", 0x0A1D}, +{"jheharmenian", 0x057B}, +{"jis", 0x3004}, +{"jmonospace", 0xFF4A}, +{"jparen", 0x24A5}, +{"jsuperior", 0x02B2}, +{"k", 0x006B}, +{"kabashkircyrillic", 0x04A1}, +{"kabengali", 0x0995}, +{"kacute", 0x1E31}, +{"kacyrillic", 0x043A}, +{"kadescendercyrillic", 0x049B}, +{"kadeva", 0x0915}, +{"kaf", 0x05DB}, +{"kafarabic", 0x0643}, +{"kafdagesh", 0xFB3B}, +{"kafdageshhebrew", 0xFB3B}, +{"kaffinalarabic", 0xFEDA}, +{"kafhebrew", 0x05DB}, +{"kafinitialarabic", 0xFEDB}, +{"kafmedialarabic", 0xFEDC}, +{"kafrafehebrew", 0xFB4D}, +{"kagujarati", 0x0A95}, +{"kagurmukhi", 0x0A15}, +{"kahiragana", 0x304B}, +{"kahookcyrillic", 0x04C4}, +{"kakatakana", 0x30AB}, +{"kakatakanahalfwidth", 0xFF76}, +{"kappa", 0x03BA}, +{"kappasymbolgreek", 0x03F0}, +{"kapyeounmieumkorean", 0x3171}, +{"kapyeounphieuphkorean", 0x3184}, +{"kapyeounpieupkorean", 0x3178}, +{"kapyeounssangpieupkorean", 0x3179}, +{"karoriisquare", 0x330D}, +{"kashidaautoarabic", 0x0640}, +{"kashidaautonosidebearingarabic", 0x0640}, +{"kasmallkatakana", 0x30F5}, +{"kasquare", 0x3384}, +{"kasraarabic", 0x0650}, +{"kasratanarabic", 0x064D}, +{"kastrokecyrillic", 0x049F}, +{"katahiraprolongmarkhalfwidth", 0xFF70}, +{"kaverticalstrokecyrillic", 0x049D}, +{"kbopomofo", 0x310E}, +{"kcalsquare", 0x3389}, +{"kcaron", 0x01E9}, +{"kcedilla", 0x0137}, +{"kcircle", 0x24DA}, +{"kcommaaccent", 0x0137}, +{"kdotbelow", 0x1E33}, +{"keharmenian", 0x0584}, +{"kehiragana", 0x3051}, +{"kekatakana", 0x30B1}, +{"kekatakanahalfwidth", 0xFF79}, +{"kenarmenian", 0x056F}, +{"kesmallkatakana", 0x30F6}, +{"kgreenlandic", 0x0138}, +{"khabengali", 0x0996}, +{"khacyrillic", 0x0445}, +{"khadeva", 0x0916}, +{"khagujarati", 0x0A96}, +{"khagurmukhi", 0x0A16}, +{"khaharabic", 0x062E}, +{"khahfinalarabic", 0xFEA6}, +{"khahinitialarabic", 0xFEA7}, +{"khahmedialarabic", 0xFEA8}, +{"kheicoptic", 0x03E7}, +{"khhadeva", 0x0959}, +{"khhagurmukhi", 0x0A59}, +{"khieukhacirclekorean", 0x3278}, +{"khieukhaparenkorean", 0x3218}, +{"khieukhcirclekorean", 0x326A}, +{"khieukhkorean", 0x314B}, +{"khieukhparenkorean", 0x320A}, +{"khokhaithai", 0x0E02}, +{"khokhonthai", 0x0E05}, +{"khokhuatthai", 0x0E03}, +{"khokhwaithai", 0x0E04}, +{"khomutthai", 0x0E5B}, +{"khook", 0x0199}, +{"khorakhangthai", 0x0E06}, +{"khzsquare", 0x3391}, +{"kihiragana", 0x304D}, +{"kikatakana", 0x30AD}, +{"kikatakanahalfwidth", 0xFF77}, +{"kiroguramusquare", 0x3315}, +{"kiromeetorusquare", 0x3316}, +{"kirosquare", 0x3314}, +{"kiyeokacirclekorean", 0x326E}, +{"kiyeokaparenkorean", 0x320E}, +{"kiyeokcirclekorean", 0x3260}, +{"kiyeokkorean", 0x3131}, +{"kiyeokparenkorean", 0x3200}, +{"kiyeoksioskorean", 0x3133}, +{"kjecyrillic", 0x045C}, +{"klinebelow", 0x1E35}, +{"klsquare", 0x3398}, +{"kmcubedsquare", 0x33A6}, +{"kmonospace", 0xFF4B}, +{"kmsquaredsquare", 0x33A2}, +{"kohiragana", 0x3053}, +{"kohmsquare", 0x33C0}, +{"kokaithai", 0x0E01}, +{"kokatakana", 0x30B3}, +{"kokatakanahalfwidth", 0xFF7A}, +{"kooposquare", 0x331E}, +{"koppacyrillic", 0x0481}, +{"koreanstandardsymbol", 0x327F}, +{"koroniscmb", 0x0343}, +{"kparen", 0x24A6}, +{"kpasquare", 0x33AA}, +{"ksicyrillic", 0x046F}, +{"ktsquare", 0x33CF}, +{"kturned", 0x029E}, +{"kuhiragana", 0x304F}, +{"kukatakana", 0x30AF}, +{"kukatakanahalfwidth", 0xFF78}, +{"kvsquare", 0x33B8}, +{"kwsquare", 0x33BE}, +{"l", 0x006C}, +{"labengali", 0x09B2}, +{"lacute", 0x013A}, +{"ladeva", 0x0932}, +{"lagujarati", 0x0AB2}, +{"lagurmukhi", 0x0A32}, +{"lakkhangyaothai", 0x0E45}, +{"lamaleffinalarabic", 0xFEFC}, +{"lamalefhamzaabovefinalarabic", 0xFEF8}, +{"lamalefhamzaaboveisolatedarabic", 0xFEF7}, +{"lamalefhamzabelowfinalarabic", 0xFEFA}, +{"lamalefhamzabelowisolatedarabic", 0xFEF9}, +{"lamalefisolatedarabic", 0xFEFB}, +{"lamalefmaddaabovefinalarabic", 0xFEF6}, +{"lamalefmaddaaboveisolatedarabic", 0xFEF5}, +{"lamarabic", 0x0644}, +{"lambda", 0x03BB}, +{"lambdastroke", 0x019B}, +{"lamed", 0x05DC}, +{"lameddagesh", 0xFB3C}, +{"lameddageshhebrew", 0xFB3C}, +{"lamedhebrew", 0x05DC}, +{"lamedholam", 0x05DC}, +{"lamedholamdagesh", 0x05DC}, +{"lamedholamdageshhebrew", 0x05DC}, +{"lamedholamhebrew", 0x05DC}, +{"lamfinalarabic", 0xFEDE}, +{"lamhahinitialarabic", 0xFCCA}, +{"laminitialarabic", 0xFEDF}, +{"lamjeeminitialarabic", 0xFCC9}, +{"lamkhahinitialarabic", 0xFCCB}, +{"lamlamhehisolatedarabic", 0xFDF2}, +{"lammedialarabic", 0xFEE0}, +{"lammeemhahinitialarabic", 0xFD88}, +{"lammeeminitialarabic", 0xFCCC}, +{"lammeemjeeminitialarabic", 0xFEDF}, +{"lammeemkhahinitialarabic", 0xFEDF}, +{"largecircle", 0x25EF}, +{"lbar", 0x019A}, +{"lbelt", 0x026C}, +{"lbopomofo", 0x310C}, +{"lcaron", 0x013E}, +{"lcedilla", 0x013C}, +{"lcircle", 0x24DB}, +{"lcircumflexbelow", 0x1E3D}, +{"lcommaaccent", 0x013C}, +{"ldot", 0x0140}, +{"ldotaccent", 0x0140}, +{"ldotbelow", 0x1E37}, +{"ldotbelowmacron", 0x1E39}, +{"leftangleabovecmb", 0x031A}, +{"lefttackbelowcmb", 0x0318}, +{"less", 0x003C}, +{"lessequal", 0x2264}, +{"lessequalorgreater", 0x22DA}, +{"lessmonospace", 0xFF1C}, +{"lessorequivalent", 0x2272}, +{"lessorgreater", 0x2276}, +{"lessoverequal", 0x2266}, +{"lesssmall", 0xFE64}, +{"lezh", 0x026E}, +{"lfblock", 0x258C}, +{"lhookretroflex", 0x026D}, +{"lira", 0x20A4}, +{"liwnarmenian", 0x056C}, +{"lj", 0x01C9}, +{"ljecyrillic", 0x0459}, +{"ll", 0xF6C0}, +{"lladeva", 0x0933}, +{"llagujarati", 0x0AB3}, +{"llinebelow", 0x1E3B}, +{"llladeva", 0x0934}, +{"llvocalicbengali", 0x09E1}, +{"llvocalicdeva", 0x0961}, +{"llvocalicvowelsignbengali", 0x09E3}, +{"llvocalicvowelsigndeva", 0x0963}, +{"lmiddletilde", 0x026B}, +{"lmonospace", 0xFF4C}, +{"lmsquare", 0x33D0}, +{"lochulathai", 0x0E2C}, +{"logicaland", 0x2227}, +{"logicalnot", 0x00AC}, +{"logicalnotreversed", 0x2310}, +{"logicalor", 0x2228}, +{"lolingthai", 0x0E25}, +{"longs", 0x017F}, +{"lowlinecenterline", 0xFE4E}, +{"lowlinecmb", 0x0332}, +{"lowlinedashed", 0xFE4D}, +{"lozenge", 0x25CA}, +{"lparen", 0x24A7}, +{"lslash", 0x0142}, +{"lsquare", 0x2113}, +{"lsuperior", 0xF6EE}, +{"ltshade", 0x2591}, +{"luthai", 0x0E26}, +{"lvocalicbengali", 0x098C}, +{"lvocalicdeva", 0x090C}, +{"lvocalicvowelsignbengali", 0x09E2}, +{"lvocalicvowelsigndeva", 0x0962}, +{"lxsquare", 0x33D3}, +{"m", 0x006D}, +{"mabengali", 0x09AE}, +{"macron", 0x00AF}, +{"macronbelowcmb", 0x0331}, +{"macroncmb", 0x0304}, +{"macronlowmod", 0x02CD}, +{"macronmonospace", 0xFFE3}, +{"macute", 0x1E3F}, +{"madeva", 0x092E}, +{"magujarati", 0x0AAE}, +{"magurmukhi", 0x0A2E}, +{"mahapakhhebrew", 0x05A4}, +{"mahapakhlefthebrew", 0x05A4}, +{"mahiragana", 0x307E}, +{"maichattawalowleftthai", 0xF895}, +{"maichattawalowrightthai", 0xF894}, +{"maichattawathai", 0x0E4B}, +{"maichattawaupperleftthai", 0xF893}, +{"maieklowleftthai", 0xF88C}, +{"maieklowrightthai", 0xF88B}, +{"maiekthai", 0x0E48}, +{"maiekupperleftthai", 0xF88A}, +{"maihanakatleftthai", 0xF884}, +{"maihanakatthai", 0x0E31}, +{"maitaikhuleftthai", 0xF889}, +{"maitaikhuthai", 0x0E47}, +{"maitholowleftthai", 0xF88F}, +{"maitholowrightthai", 0xF88E}, +{"maithothai", 0x0E49}, +{"maithoupperleftthai", 0xF88D}, +{"maitrilowleftthai", 0xF892}, +{"maitrilowrightthai", 0xF891}, +{"maitrithai", 0x0E4A}, +{"maitriupperleftthai", 0xF890}, +{"maiyamokthai", 0x0E46}, +{"makatakana", 0x30DE}, +{"makatakanahalfwidth", 0xFF8F}, +{"male", 0x2642}, +{"mansyonsquare", 0x3347}, +{"maqafhebrew", 0x05BE}, +{"mars", 0x2642}, +{"masoracirclehebrew", 0x05AF}, +{"masquare", 0x3383}, +{"mbopomofo", 0x3107}, +{"mbsquare", 0x33D4}, +{"mcircle", 0x24DC}, +{"mcubedsquare", 0x33A5}, +{"mdotaccent", 0x1E41}, +{"mdotbelow", 0x1E43}, +{"meemarabic", 0x0645}, +{"meemfinalarabic", 0xFEE2}, +{"meeminitialarabic", 0xFEE3}, +{"meemmedialarabic", 0xFEE4}, +{"meemmeeminitialarabic", 0xFCD1}, +{"meemmeemisolatedarabic", 0xFC48}, +{"meetorusquare", 0x334D}, +{"mehiragana", 0x3081}, +{"meizierasquare", 0x337E}, +{"mekatakana", 0x30E1}, +{"mekatakanahalfwidth", 0xFF92}, +{"mem", 0x05DE}, +{"memdagesh", 0xFB3E}, +{"memdageshhebrew", 0xFB3E}, +{"memhebrew", 0x05DE}, +{"menarmenian", 0x0574}, +{"merkhahebrew", 0x05A5}, +{"merkhakefulahebrew", 0x05A6}, +{"merkhakefulalefthebrew", 0x05A6}, +{"merkhalefthebrew", 0x05A5}, +{"mhook", 0x0271}, +{"mhzsquare", 0x3392}, +{"middledotkatakanahalfwidth", 0xFF65}, +{"middot", 0x00B7}, +{"mieumacirclekorean", 0x3272}, +{"mieumaparenkorean", 0x3212}, +{"mieumcirclekorean", 0x3264}, +{"mieumkorean", 0x3141}, +{"mieumpansioskorean", 0x3170}, +{"mieumparenkorean", 0x3204}, +{"mieumpieupkorean", 0x316E}, +{"mieumsioskorean", 0x316F}, +{"mihiragana", 0x307F}, +{"mikatakana", 0x30DF}, +{"mikatakanahalfwidth", 0xFF90}, +{"minus", 0x2212}, +{"minusbelowcmb", 0x0320}, +{"minuscircle", 0x2296}, +{"minusmod", 0x02D7}, +{"minusplus", 0x2213}, +{"minute", 0x2032}, +{"miribaarusquare", 0x334A}, +{"mirisquare", 0x3349}, +{"mlonglegturned", 0x0270}, +{"mlsquare", 0x3396}, +{"mmcubedsquare", 0x33A3}, +{"mmonospace", 0xFF4D}, +{"mmsquaredsquare", 0x339F}, +{"mohiragana", 0x3082}, +{"mohmsquare", 0x33C1}, +{"mokatakana", 0x30E2}, +{"mokatakanahalfwidth", 0xFF93}, +{"molsquare", 0x33D6}, +{"momathai", 0x0E21}, +{"moverssquare", 0x33A7}, +{"moverssquaredsquare", 0x33A8}, +{"mparen", 0x24A8}, +{"mpasquare", 0x33AB}, +{"mssquare", 0x33B3}, +{"msuperior", 0xF6EF}, +{"mturned", 0x026F}, +{"mu", 0x00B5}, +{"mu1", 0x00B5}, +{"muasquare", 0x3382}, +{"muchgreater", 0x226B}, +{"muchless", 0x226A}, +{"mufsquare", 0x338C}, +{"mugreek", 0x03BC}, +{"mugsquare", 0x338D}, +{"muhiragana", 0x3080}, +{"mukatakana", 0x30E0}, +{"mukatakanahalfwidth", 0xFF91}, +{"mulsquare", 0x3395}, +{"multiply", 0x00D7}, +{"mumsquare", 0x339B}, +{"munahhebrew", 0x05A3}, +{"munahlefthebrew", 0x05A3}, +{"musicalnote", 0x266A}, +{"musicalnotedbl", 0x266B}, +{"musicflatsign", 0x266D}, +{"musicsharpsign", 0x266F}, +{"mussquare", 0x33B2}, +{"muvsquare", 0x33B6}, +{"muwsquare", 0x33BC}, +{"mvmegasquare", 0x33B9}, +{"mvsquare", 0x33B7}, +{"mwmegasquare", 0x33BF}, +{"mwsquare", 0x33BD}, +{"n", 0x006E}, +{"nabengali", 0x09A8}, +{"nabla", 0x2207}, +{"nacute", 0x0144}, +{"nadeva", 0x0928}, +{"nagujarati", 0x0AA8}, +{"nagurmukhi", 0x0A28}, +{"nahiragana", 0x306A}, +{"nakatakana", 0x30CA}, +{"nakatakanahalfwidth", 0xFF85}, +{"napostrophe", 0x0149}, +{"nasquare", 0x3381}, +{"nbopomofo", 0x310B}, +{"nbspace", 0x00A0}, +{"ncaron", 0x0148}, +{"ncedilla", 0x0146}, +{"ncircle", 0x24DD}, +{"ncircumflexbelow", 0x1E4B}, +{"ncommaaccent", 0x0146}, +{"ndotaccent", 0x1E45}, +{"ndotbelow", 0x1E47}, +{"nehiragana", 0x306D}, +{"nekatakana", 0x30CD}, +{"nekatakanahalfwidth", 0xFF88}, +{"newsheqelsign", 0x20AA}, +{"nfsquare", 0x338B}, +{"ngabengali", 0x0999}, +{"ngadeva", 0x0919}, +{"ngagujarati", 0x0A99}, +{"ngagurmukhi", 0x0A19}, +{"ngonguthai", 0x0E07}, +{"nhiragana", 0x3093}, +{"nhookleft", 0x0272}, +{"nhookretroflex", 0x0273}, +{"nieunacirclekorean", 0x326F}, +{"nieunaparenkorean", 0x320F}, +{"nieuncieuckorean", 0x3135}, +{"nieuncirclekorean", 0x3261}, +{"nieunhieuhkorean", 0x3136}, +{"nieunkorean", 0x3134}, +{"nieunpansioskorean", 0x3168}, +{"nieunparenkorean", 0x3201}, +{"nieunsioskorean", 0x3167}, +{"nieuntikeutkorean", 0x3166}, +{"nihiragana", 0x306B}, +{"nikatakana", 0x30CB}, +{"nikatakanahalfwidth", 0xFF86}, +{"nikhahitleftthai", 0xF899}, +{"nikhahitthai", 0x0E4D}, +{"nine", 0x0039}, +{"ninearabic", 0x0669}, +{"ninebengali", 0x09EF}, +{"ninecircle", 0x2468}, +{"ninecircleinversesansserif", 0x2792}, +{"ninedeva", 0x096F}, +{"ninegujarati", 0x0AEF}, +{"ninegurmukhi", 0x0A6F}, +{"ninehackarabic", 0x0669}, +{"ninehangzhou", 0x3029}, +{"nineideographicparen", 0x3228}, +{"nineinferior", 0x2089}, +{"ninemonospace", 0xFF19}, +{"nineoldstyle", 0xF739}, +{"nineparen", 0x247C}, +{"nineperiod", 0x2490}, +{"ninepersian", 0x06F9}, +{"nineroman", 0x2178}, +{"ninesuperior", 0x2079}, +{"nineteencircle", 0x2472}, +{"nineteenparen", 0x2486}, +{"nineteenperiod", 0x249A}, +{"ninethai", 0x0E59}, +{"nj", 0x01CC}, +{"njecyrillic", 0x045A}, +{"nkatakana", 0x30F3}, +{"nkatakanahalfwidth", 0xFF9D}, +{"nlegrightlong", 0x019E}, +{"nlinebelow", 0x1E49}, +{"nmonospace", 0xFF4E}, +{"nmsquare", 0x339A}, +{"nnabengali", 0x09A3}, +{"nnadeva", 0x0923}, +{"nnagujarati", 0x0AA3}, +{"nnagurmukhi", 0x0A23}, +{"nnnadeva", 0x0929}, +{"nohiragana", 0x306E}, +{"nokatakana", 0x30CE}, +{"nokatakanahalfwidth", 0xFF89}, +{"nonbreakingspace", 0x00A0}, +{"nonenthai", 0x0E13}, +{"nonuthai", 0x0E19}, +{"noonarabic", 0x0646}, +{"noonfinalarabic", 0xFEE6}, +{"noonghunnaarabic", 0x06BA}, +{"noonghunnafinalarabic", 0xFB9F}, +{"noonhehinitialarabic", 0xFEE7}, +{"nooninitialarabic", 0xFEE7}, +{"noonjeeminitialarabic", 0xFCD2}, +{"noonjeemisolatedarabic", 0xFC4B}, +{"noonmedialarabic", 0xFEE8}, +{"noonmeeminitialarabic", 0xFCD5}, +{"noonmeemisolatedarabic", 0xFC4E}, +{"noonnoonfinalarabic", 0xFC8D}, +{"notcontains", 0x220C}, +{"notelement", 0x2209}, +{"notelementof", 0x2209}, +{"notequal", 0x2260}, +{"notgreater", 0x226F}, +{"notgreaternorequal", 0x2271}, +{"notgreaternorless", 0x2279}, +{"notidentical", 0x2262}, +{"notless", 0x226E}, +{"notlessnorequal", 0x2270}, +{"notparallel", 0x2226}, +{"notprecedes", 0x2280}, +{"notsubset", 0x2284}, +{"notsucceeds", 0x2281}, +{"notsuperset", 0x2285}, +{"nowarmenian", 0x0576}, +{"nparen", 0x24A9}, +{"nssquare", 0x33B1}, +{"nsuperior", 0x207F}, +{"ntilde", 0x00F1}, +{"nu", 0x03BD}, +{"nuhiragana", 0x306C}, +{"nukatakana", 0x30CC}, +{"nukatakanahalfwidth", 0xFF87}, +{"nuktabengali", 0x09BC}, +{"nuktadeva", 0x093C}, +{"nuktagujarati", 0x0ABC}, +{"nuktagurmukhi", 0x0A3C}, +{"numbersign", 0x0023}, +{"numbersignmonospace", 0xFF03}, +{"numbersignsmall", 0xFE5F}, +{"numeralsigngreek", 0x0374}, +{"numeralsignlowergreek", 0x0375}, +{"numero", 0x2116}, +{"nun", 0x05E0}, +{"nundagesh", 0xFB40}, +{"nundageshhebrew", 0xFB40}, +{"nunhebrew", 0x05E0}, +{"nvsquare", 0x33B5}, +{"nwsquare", 0x33BB}, +{"nyabengali", 0x099E}, +{"nyadeva", 0x091E}, +{"nyagujarati", 0x0A9E}, +{"nyagurmukhi", 0x0A1E}, +{"o", 0x006F}, +{"oacute", 0x00F3}, +{"oangthai", 0x0E2D}, +{"obarred", 0x0275}, +{"obarredcyrillic", 0x04E9}, +{"obarreddieresiscyrillic", 0x04EB}, +{"obengali", 0x0993}, +{"obopomofo", 0x311B}, +{"obreve", 0x014F}, +{"ocandradeva", 0x0911}, +{"ocandragujarati", 0x0A91}, +{"ocandravowelsigndeva", 0x0949}, +{"ocandravowelsigngujarati", 0x0AC9}, +{"ocaron", 0x01D2}, +{"ocircle", 0x24DE}, +{"ocircumflex", 0x00F4}, +{"ocircumflexacute", 0x1ED1}, +{"ocircumflexdotbelow", 0x1ED9}, +{"ocircumflexgrave", 0x1ED3}, +{"ocircumflexhookabove", 0x1ED5}, +{"ocircumflextilde", 0x1ED7}, +{"ocyrillic", 0x043E}, +{"odblacute", 0x0151}, +{"odblgrave", 0x020D}, +{"odeva", 0x0913}, +{"odieresis", 0x00F6}, +{"odieresiscyrillic", 0x04E7}, +{"odotbelow", 0x1ECD}, +{"oe", 0x0153}, +{"oekorean", 0x315A}, +{"ogonek", 0x02DB}, +{"ogonekcmb", 0x0328}, +{"ograve", 0x00F2}, +{"ogujarati", 0x0A93}, +{"oharmenian", 0x0585}, +{"ohiragana", 0x304A}, +{"ohookabove", 0x1ECF}, +{"ohorn", 0x01A1}, +{"ohornacute", 0x1EDB}, +{"ohorndotbelow", 0x1EE3}, +{"ohorngrave", 0x1EDD}, +{"ohornhookabove", 0x1EDF}, +{"ohorntilde", 0x1EE1}, +{"ohungarumlaut", 0x0151}, +{"oi", 0x01A3}, +{"oinvertedbreve", 0x020F}, +{"okatakana", 0x30AA}, +{"okatakanahalfwidth", 0xFF75}, +{"okorean", 0x3157}, +{"olehebrew", 0x05AB}, +{"omacron", 0x014D}, +{"omacronacute", 0x1E53}, +{"omacrongrave", 0x1E51}, +{"omdeva", 0x0950}, +{"omega", 0x03C9}, +{"omega1", 0x03D6}, +{"omegacyrillic", 0x0461}, +{"omegalatinclosed", 0x0277}, +{"omegaroundcyrillic", 0x047B}, +{"omegatitlocyrillic", 0x047D}, +{"omegatonos", 0x03CE}, +{"omgujarati", 0x0AD0}, +{"omicron", 0x03BF}, +{"omicrontonos", 0x03CC}, +{"omonospace", 0xFF4F}, +{"one", 0x0031}, +{"onearabic", 0x0661}, +{"onebengali", 0x09E7}, +{"onecircle", 0x2460}, +{"onecircleinversesansserif", 0x278A}, +{"onedeva", 0x0967}, +{"onedotenleader", 0x2024}, +{"oneeighth", 0x215B}, +{"onefitted", 0xF6DC}, +{"onegujarati", 0x0AE7}, +{"onegurmukhi", 0x0A67}, +{"onehackarabic", 0x0661}, +{"onehalf", 0x00BD}, +{"onehangzhou", 0x3021}, +{"oneideographicparen", 0x3220}, +{"oneinferior", 0x2081}, +{"onemonospace", 0xFF11}, +{"onenumeratorbengali", 0x09F4}, +{"oneoldstyle", 0xF731}, +{"oneparen", 0x2474}, +{"oneperiod", 0x2488}, +{"onepersian", 0x06F1}, +{"onequarter", 0x00BC}, +{"oneroman", 0x2170}, +{"onesuperior", 0x00B9}, +{"onethai", 0x0E51}, +{"onethird", 0x2153}, +{"oogonek", 0x01EB}, +{"oogonekmacron", 0x01ED}, +{"oogurmukhi", 0x0A13}, +{"oomatragurmukhi", 0x0A4B}, +{"oopen", 0x0254}, +{"oparen", 0x24AA}, +{"openbullet", 0x25E6}, +{"option", 0x2325}, +{"ordfeminine", 0x00AA}, +{"ordmasculine", 0x00BA}, +{"orthogonal", 0x221F}, +{"oshortdeva", 0x0912}, +{"oshortvowelsigndeva", 0x094A}, +{"oslash", 0x00F8}, +{"oslashacute", 0x01FF}, +{"osmallhiragana", 0x3049}, +{"osmallkatakana", 0x30A9}, +{"osmallkatakanahalfwidth", 0xFF6B}, +{"ostrokeacute", 0x01FF}, +{"osuperior", 0xF6F0}, +{"otcyrillic", 0x047F}, +{"otilde", 0x00F5}, +{"otildeacute", 0x1E4D}, +{"otildedieresis", 0x1E4F}, +{"oubopomofo", 0x3121}, +{"overline", 0x203E}, +{"overlinecenterline", 0xFE4A}, +{"overlinecmb", 0x0305}, +{"overlinedashed", 0xFE49}, +{"overlinedblwavy", 0xFE4C}, +{"overlinewavy", 0xFE4B}, +{"overscore", 0x00AF}, +{"ovowelsignbengali", 0x09CB}, +{"ovowelsigndeva", 0x094B}, +{"ovowelsigngujarati", 0x0ACB}, +{"p", 0x0070}, +{"paampssquare", 0x3380}, +{"paasentosquare", 0x332B}, +{"pabengali", 0x09AA}, +{"pacute", 0x1E55}, +{"padeva", 0x092A}, +{"pagedown", 0x21DF}, +{"pageup", 0x21DE}, +{"pagujarati", 0x0AAA}, +{"pagurmukhi", 0x0A2A}, +{"pahiragana", 0x3071}, +{"paiyannoithai", 0x0E2F}, +{"pakatakana", 0x30D1}, +{"palatalizationcyrilliccmb", 0x0484}, +{"palochkacyrillic", 0x04C0}, +{"pansioskorean", 0x317F}, +{"paragraph", 0x00B6}, +{"parallel", 0x2225}, +{"parenleft", 0x0028}, +{"parenleftaltonearabic", 0xFD3E}, +{"parenleftbt", 0xF8ED}, +{"parenleftex", 0xF8EC}, +{"parenleftinferior", 0x208D}, +{"parenleftmonospace", 0xFF08}, +{"parenleftsmall", 0xFE59}, +{"parenleftsuperior", 0x207D}, +{"parenlefttp", 0xF8EB}, +{"parenleftvertical", 0xFE35}, +{"parenright", 0x0029}, +{"parenrightaltonearabic", 0xFD3F}, +{"parenrightbt", 0xF8F8}, +{"parenrightex", 0xF8F7}, +{"parenrightinferior", 0x208E}, +{"parenrightmonospace", 0xFF09}, +{"parenrightsmall", 0xFE5A}, +{"parenrightsuperior", 0x207E}, +{"parenrighttp", 0xF8F6}, +{"parenrightvertical", 0xFE36}, +{"partialdiff", 0x2202}, +{"paseqhebrew", 0x05C0}, +{"pashtahebrew", 0x0599}, +{"pasquare", 0x33A9}, +{"patah", 0x05B7}, +{"patah11", 0x05B7}, +{"patah1d", 0x05B7}, +{"patah2a", 0x05B7}, +{"patahhebrew", 0x05B7}, +{"patahnarrowhebrew", 0x05B7}, +{"patahquarterhebrew", 0x05B7}, +{"patahwidehebrew", 0x05B7}, +{"pazerhebrew", 0x05A1}, +{"pbopomofo", 0x3106}, +{"pcircle", 0x24DF}, +{"pdotaccent", 0x1E57}, +{"pe", 0x05E4}, +{"pecyrillic", 0x043F}, +{"pedagesh", 0xFB44}, +{"pedageshhebrew", 0xFB44}, +{"peezisquare", 0x333B}, +{"pefinaldageshhebrew", 0xFB43}, +{"peharabic", 0x067E}, +{"peharmenian", 0x057A}, +{"pehebrew", 0x05E4}, +{"pehfinalarabic", 0xFB57}, +{"pehinitialarabic", 0xFB58}, +{"pehiragana", 0x307A}, +{"pehmedialarabic", 0xFB59}, +{"pekatakana", 0x30DA}, +{"pemiddlehookcyrillic", 0x04A7}, +{"perafehebrew", 0xFB4E}, +{"percent", 0x0025}, +{"percentarabic", 0x066A}, +{"percentmonospace", 0xFF05}, +{"percentsmall", 0xFE6A}, +{"period", 0x002E}, +{"periodarmenian", 0x0589}, +{"periodcentered", 0x00B7}, +{"periodhalfwidth", 0xFF61}, +{"periodinferior", 0xF6E7}, +{"periodmonospace", 0xFF0E}, +{"periodsmall", 0xFE52}, +{"periodsuperior", 0xF6E8}, +{"perispomenigreekcmb", 0x0342}, +{"perpendicular", 0x22A5}, +{"perthousand", 0x2030}, +{"peseta", 0x20A7}, +{"pfsquare", 0x338A}, +{"phabengali", 0x09AB}, +{"phadeva", 0x092B}, +{"phagujarati", 0x0AAB}, +{"phagurmukhi", 0x0A2B}, +{"phi", 0x03C6}, +{"phi1", 0x03D5}, +{"phieuphacirclekorean", 0x327A}, +{"phieuphaparenkorean", 0x321A}, +{"phieuphcirclekorean", 0x326C}, +{"phieuphkorean", 0x314D}, +{"phieuphparenkorean", 0x320C}, +{"philatin", 0x0278}, +{"phinthuthai", 0x0E3A}, +{"phisymbolgreek", 0x03D5}, +{"phook", 0x01A5}, +{"phophanthai", 0x0E1E}, +{"phophungthai", 0x0E1C}, +{"phosamphaothai", 0x0E20}, +{"pi", 0x03C0}, +{"pieupacirclekorean", 0x3273}, +{"pieupaparenkorean", 0x3213}, +{"pieupcieuckorean", 0x3176}, +{"pieupcirclekorean", 0x3265}, +{"pieupkiyeokkorean", 0x3172}, +{"pieupkorean", 0x3142}, +{"pieupparenkorean", 0x3205}, +{"pieupsioskiyeokkorean", 0x3174}, +{"pieupsioskorean", 0x3144}, +{"pieupsiostikeutkorean", 0x3175}, +{"pieupthieuthkorean", 0x3177}, +{"pieuptikeutkorean", 0x3173}, +{"pihiragana", 0x3074}, +{"pikatakana", 0x30D4}, +{"pisymbolgreek", 0x03D6}, +{"piwrarmenian", 0x0583}, +{"plus", 0x002B}, +{"plusbelowcmb", 0x031F}, +{"pluscircle", 0x2295}, +{"plusminus", 0x00B1}, +{"plusmod", 0x02D6}, +{"plusmonospace", 0xFF0B}, +{"plussmall", 0xFE62}, +{"plussuperior", 0x207A}, +{"pmonospace", 0xFF50}, +{"pmsquare", 0x33D8}, +{"pohiragana", 0x307D}, +{"pointingindexdownwhite", 0x261F}, +{"pointingindexleftwhite", 0x261C}, +{"pointingindexrightwhite", 0x261E}, +{"pointingindexupwhite", 0x261D}, +{"pokatakana", 0x30DD}, +{"poplathai", 0x0E1B}, +{"postalmark", 0x3012}, +{"postalmarkface", 0x3020}, +{"pparen", 0x24AB}, +{"precedes", 0x227A}, +{"prescription", 0x211E}, +{"primemod", 0x02B9}, +{"primereversed", 0x2035}, +{"product", 0x220F}, +{"projective", 0x2305}, +{"prolongedkana", 0x30FC}, +{"propellor", 0x2318}, +{"propersubset", 0x2282}, +{"propersuperset", 0x2283}, +{"proportion", 0x2237}, +{"proportional", 0x221D}, +{"psi", 0x03C8}, +{"psicyrillic", 0x0471}, +{"psilipneumatacyrilliccmb", 0x0486}, +{"pssquare", 0x33B0}, +{"puhiragana", 0x3077}, +{"pukatakana", 0x30D7}, +{"pvsquare", 0x33B4}, +{"pwsquare", 0x33BA}, +{"q", 0x0071}, +{"qadeva", 0x0958}, +{"qadmahebrew", 0x05A8}, +{"qafarabic", 0x0642}, +{"qaffinalarabic", 0xFED6}, +{"qafinitialarabic", 0xFED7}, +{"qafmedialarabic", 0xFED8}, +{"qamats", 0x05B8}, +{"qamats10", 0x05B8}, +{"qamats1a", 0x05B8}, +{"qamats1c", 0x05B8}, +{"qamats27", 0x05B8}, +{"qamats29", 0x05B8}, +{"qamats33", 0x05B8}, +{"qamatsde", 0x05B8}, +{"qamatshebrew", 0x05B8}, +{"qamatsnarrowhebrew", 0x05B8}, +{"qamatsqatanhebrew", 0x05B8}, +{"qamatsqatannarrowhebrew", 0x05B8}, +{"qamatsqatanquarterhebrew", 0x05B8}, +{"qamatsqatanwidehebrew", 0x05B8}, +{"qamatsquarterhebrew", 0x05B8}, +{"qamatswidehebrew", 0x05B8}, +{"qarneyparahebrew", 0x059F}, +{"qbopomofo", 0x3111}, +{"qcircle", 0x24E0}, +{"qhook", 0x02A0}, +{"qmonospace", 0xFF51}, +{"qof", 0x05E7}, +{"qofdagesh", 0xFB47}, +{"qofdageshhebrew", 0xFB47}, +{"qofhatafpatah", 0x05E7}, +{"qofhatafpatahhebrew", 0x05E7}, +{"qofhatafsegol", 0x05E7}, +{"qofhatafsegolhebrew", 0x05E7}, +{"qofhebrew", 0x05E7}, +{"qofhiriq", 0x05E7}, +{"qofhiriqhebrew", 0x05E7}, +{"qofholam", 0x05E7}, +{"qofholamhebrew", 0x05E7}, +{"qofpatah", 0x05E7}, +{"qofpatahhebrew", 0x05E7}, +{"qofqamats", 0x05E7}, +{"qofqamatshebrew", 0x05E7}, +{"qofqubuts", 0x05E7}, +{"qofqubutshebrew", 0x05E7}, +{"qofsegol", 0x05E7}, +{"qofsegolhebrew", 0x05E7}, +{"qofsheva", 0x05E7}, +{"qofshevahebrew", 0x05E7}, +{"qoftsere", 0x05E7}, +{"qoftserehebrew", 0x05E7}, +{"qparen", 0x24AC}, +{"quarternote", 0x2669}, +{"qubuts", 0x05BB}, +{"qubuts18", 0x05BB}, +{"qubuts25", 0x05BB}, +{"qubuts31", 0x05BB}, +{"qubutshebrew", 0x05BB}, +{"qubutsnarrowhebrew", 0x05BB}, +{"qubutsquarterhebrew", 0x05BB}, +{"qubutswidehebrew", 0x05BB}, +{"question", 0x003F}, +{"questionarabic", 0x061F}, +{"questionarmenian", 0x055E}, +{"questiondown", 0x00BF}, +{"questiondownsmall", 0xF7BF}, +{"questiongreek", 0x037E}, +{"questionmonospace", 0xFF1F}, +{"questionsmall", 0xF73F}, +{"quotedbl", 0x0022}, +{"quotedblbase", 0x201E}, +{"quotedblleft", 0x201C}, +{"quotedblmonospace", 0xFF02}, +{"quotedblprime", 0x301E}, +{"quotedblprimereversed", 0x301D}, +{"quotedblright", 0x201D}, +{"quoteleft", 0x2018}, +{"quoteleftreversed", 0x201B}, +{"quotereversed", 0x201B}, +{"quoteright", 0x2019}, +{"quoterightn", 0x0149}, +{"quotesinglbase", 0x201A}, +{"quotesingle", 0x0027}, +{"quotesinglemonospace", 0xFF07}, +{"r", 0x0072}, +{"raarmenian", 0x057C}, +{"rabengali", 0x09B0}, +{"racute", 0x0155}, +{"radeva", 0x0930}, +{"radical", 0x221A}, +{"radicalex", 0xF8E5}, +{"radoverssquare", 0x33AE}, +{"radoverssquaredsquare", 0x33AF}, +{"radsquare", 0x33AD}, +{"rafe", 0x05BF}, +{"rafehebrew", 0x05BF}, +{"ragujarati", 0x0AB0}, +{"ragurmukhi", 0x0A30}, +{"rahiragana", 0x3089}, +{"rakatakana", 0x30E9}, +{"rakatakanahalfwidth", 0xFF97}, +{"ralowerdiagonalbengali", 0x09F1}, +{"ramiddlediagonalbengali", 0x09F0}, +{"ramshorn", 0x0264}, +{"ratio", 0x2236}, +{"rbopomofo", 0x3116}, +{"rcaron", 0x0159}, +{"rcedilla", 0x0157}, +{"rcircle", 0x24E1}, +{"rcommaaccent", 0x0157}, +{"rdblgrave", 0x0211}, +{"rdotaccent", 0x1E59}, +{"rdotbelow", 0x1E5B}, +{"rdotbelowmacron", 0x1E5D}, +{"referencemark", 0x203B}, +{"reflexsubset", 0x2286}, +{"reflexsuperset", 0x2287}, +{"registered", 0x00AE}, +{"registersans", 0xF8E8}, +{"registerserif", 0xF6DA}, +{"reharabic", 0x0631}, +{"reharmenian", 0x0580}, +{"rehfinalarabic", 0xFEAE}, +{"rehiragana", 0x308C}, +{"rehyehaleflamarabic", 0x0631}, +{"rekatakana", 0x30EC}, +{"rekatakanahalfwidth", 0xFF9A}, +{"resh", 0x05E8}, +{"reshdageshhebrew", 0xFB48}, +{"reshhatafpatah", 0x05E8}, +{"reshhatafpatahhebrew", 0x05E8}, +{"reshhatafsegol", 0x05E8}, +{"reshhatafsegolhebrew", 0x05E8}, +{"reshhebrew", 0x05E8}, +{"reshhiriq", 0x05E8}, +{"reshhiriqhebrew", 0x05E8}, +{"reshholam", 0x05E8}, +{"reshholamhebrew", 0x05E8}, +{"reshpatah", 0x05E8}, +{"reshpatahhebrew", 0x05E8}, +{"reshqamats", 0x05E8}, +{"reshqamatshebrew", 0x05E8}, +{"reshqubuts", 0x05E8}, +{"reshqubutshebrew", 0x05E8}, +{"reshsegol", 0x05E8}, +{"reshsegolhebrew", 0x05E8}, +{"reshsheva", 0x05E8}, +{"reshshevahebrew", 0x05E8}, +{"reshtsere", 0x05E8}, +{"reshtserehebrew", 0x05E8}, +{"reversedtilde", 0x223D}, +{"reviahebrew", 0x0597}, +{"reviamugrashhebrew", 0x0597}, +{"revlogicalnot", 0x2310}, +{"rfishhook", 0x027E}, +{"rfishhookreversed", 0x027F}, +{"rhabengali", 0x09DD}, +{"rhadeva", 0x095D}, +{"rho", 0x03C1}, +{"rhook", 0x027D}, +{"rhookturned", 0x027B}, +{"rhookturnedsuperior", 0x02B5}, +{"rhosymbolgreek", 0x03F1}, +{"rhotichookmod", 0x02DE}, +{"rieulacirclekorean", 0x3271}, +{"rieulaparenkorean", 0x3211}, +{"rieulcirclekorean", 0x3263}, +{"rieulhieuhkorean", 0x3140}, +{"rieulkiyeokkorean", 0x313A}, +{"rieulkiyeoksioskorean", 0x3169}, +{"rieulkorean", 0x3139}, +{"rieulmieumkorean", 0x313B}, +{"rieulpansioskorean", 0x316C}, +{"rieulparenkorean", 0x3203}, +{"rieulphieuphkorean", 0x313F}, +{"rieulpieupkorean", 0x313C}, +{"rieulpieupsioskorean", 0x316B}, +{"rieulsioskorean", 0x313D}, +{"rieulthieuthkorean", 0x313E}, +{"rieultikeutkorean", 0x316A}, +{"rieulyeorinhieuhkorean", 0x316D}, +{"rightangle", 0x221F}, +{"righttackbelowcmb", 0x0319}, +{"righttriangle", 0x22BF}, +{"rihiragana", 0x308A}, +{"rikatakana", 0x30EA}, +{"rikatakanahalfwidth", 0xFF98}, +{"ring", 0x02DA}, +{"ringbelowcmb", 0x0325}, +{"ringcmb", 0x030A}, +{"ringhalfleft", 0x02BF}, +{"ringhalfleftarmenian", 0x0559}, +{"ringhalfleftbelowcmb", 0x031C}, +{"ringhalfleftcentered", 0x02D3}, +{"ringhalfright", 0x02BE}, +{"ringhalfrightbelowcmb", 0x0339}, +{"ringhalfrightcentered", 0x02D2}, +{"rinvertedbreve", 0x0213}, +{"rittorusquare", 0x3351}, +{"rlinebelow", 0x1E5F}, +{"rlongleg", 0x027C}, +{"rlonglegturned", 0x027A}, +{"rmonospace", 0xFF52}, +{"rohiragana", 0x308D}, +{"rokatakana", 0x30ED}, +{"rokatakanahalfwidth", 0xFF9B}, +{"roruathai", 0x0E23}, +{"rparen", 0x24AD}, +{"rrabengali", 0x09DC}, +{"rradeva", 0x0931}, +{"rragurmukhi", 0x0A5C}, +{"rreharabic", 0x0691}, +{"rrehfinalarabic", 0xFB8D}, +{"rrvocalicbengali", 0x09E0}, +{"rrvocalicdeva", 0x0960}, +{"rrvocalicgujarati", 0x0AE0}, +{"rrvocalicvowelsignbengali", 0x09C4}, +{"rrvocalicvowelsigndeva", 0x0944}, +{"rrvocalicvowelsigngujarati", 0x0AC4}, +{"rsuperior", 0xF6F1}, +{"rtblock", 0x2590}, +{"rturned", 0x0279}, +{"rturnedsuperior", 0x02B4}, +{"ruhiragana", 0x308B}, +{"rukatakana", 0x30EB}, +{"rukatakanahalfwidth", 0xFF99}, +{"rupeemarkbengali", 0x09F2}, +{"rupeesignbengali", 0x09F3}, +{"rupiah", 0xF6DD}, +{"ruthai", 0x0E24}, +{"rvocalicbengali", 0x098B}, +{"rvocalicdeva", 0x090B}, +{"rvocalicgujarati", 0x0A8B}, +{"rvocalicvowelsignbengali", 0x09C3}, +{"rvocalicvowelsigndeva", 0x0943}, +{"rvocalicvowelsigngujarati", 0x0AC3}, +{"s", 0x0073}, +{"sabengali", 0x09B8}, +{"sacute", 0x015B}, +{"sacutedotaccent", 0x1E65}, +{"sadarabic", 0x0635}, +{"sadeva", 0x0938}, +{"sadfinalarabic", 0xFEBA}, +{"sadinitialarabic", 0xFEBB}, +{"sadmedialarabic", 0xFEBC}, +{"sagujarati", 0x0AB8}, +{"sagurmukhi", 0x0A38}, +{"sahiragana", 0x3055}, +{"sakatakana", 0x30B5}, +{"sakatakanahalfwidth", 0xFF7B}, +{"sallallahoualayhewasallamarabic", 0xFDFA}, +{"samekh", 0x05E1}, +{"samekhdagesh", 0xFB41}, +{"samekhdageshhebrew", 0xFB41}, +{"samekhhebrew", 0x05E1}, +{"saraaathai", 0x0E32}, +{"saraaethai", 0x0E41}, +{"saraaimaimalaithai", 0x0E44}, +{"saraaimaimuanthai", 0x0E43}, +{"saraamthai", 0x0E33}, +{"saraathai", 0x0E30}, +{"saraethai", 0x0E40}, +{"saraiileftthai", 0xF886}, +{"saraiithai", 0x0E35}, +{"saraileftthai", 0xF885}, +{"saraithai", 0x0E34}, +{"saraothai", 0x0E42}, +{"saraueeleftthai", 0xF888}, +{"saraueethai", 0x0E37}, +{"saraueleftthai", 0xF887}, +{"sarauethai", 0x0E36}, +{"sarauthai", 0x0E38}, +{"sarauuthai", 0x0E39}, +{"sbopomofo", 0x3119}, +{"scaron", 0x0161}, +{"scarondotaccent", 0x1E67}, +{"scedilla", 0x015F}, +{"schwa", 0x0259}, +{"schwacyrillic", 0x04D9}, +{"schwadieresiscyrillic", 0x04DB}, +{"schwahook", 0x025A}, +{"scircle", 0x24E2}, +{"scircumflex", 0x015D}, +{"scommaaccent", 0x0219}, +{"sdotaccent", 0x1E61}, +{"sdotbelow", 0x1E63}, +{"sdotbelowdotaccent", 0x1E69}, +{"seagullbelowcmb", 0x033C}, +{"second", 0x2033}, +{"secondtonechinese", 0x02CA}, +{"section", 0x00A7}, +{"seenarabic", 0x0633}, +{"seenfinalarabic", 0xFEB2}, +{"seeninitialarabic", 0xFEB3}, +{"seenmedialarabic", 0xFEB4}, +{"segol", 0x05B6}, +{"segol13", 0x05B6}, +{"segol1f", 0x05B6}, +{"segol2c", 0x05B6}, +{"segolhebrew", 0x05B6}, +{"segolnarrowhebrew", 0x05B6}, +{"segolquarterhebrew", 0x05B6}, +{"segoltahebrew", 0x0592}, +{"segolwidehebrew", 0x05B6}, +{"seharmenian", 0x057D}, +{"sehiragana", 0x305B}, +{"sekatakana", 0x30BB}, +{"sekatakanahalfwidth", 0xFF7E}, +{"semicolon", 0x003B}, +{"semicolonarabic", 0x061B}, +{"semicolonmonospace", 0xFF1B}, +{"semicolonsmall", 0xFE54}, +{"semivoicedmarkkana", 0x309C}, +{"semivoicedmarkkanahalfwidth", 0xFF9F}, +{"sentisquare", 0x3322}, +{"sentosquare", 0x3323}, +{"seven", 0x0037}, +{"sevenarabic", 0x0667}, +{"sevenbengali", 0x09ED}, +{"sevencircle", 0x2466}, +{"sevencircleinversesansserif", 0x2790}, +{"sevendeva", 0x096D}, +{"seveneighths", 0x215E}, +{"sevengujarati", 0x0AED}, +{"sevengurmukhi", 0x0A6D}, +{"sevenhackarabic", 0x0667}, +{"sevenhangzhou", 0x3027}, +{"sevenideographicparen", 0x3226}, +{"seveninferior", 0x2087}, +{"sevenmonospace", 0xFF17}, +{"sevenoldstyle", 0xF737}, +{"sevenparen", 0x247A}, +{"sevenperiod", 0x248E}, +{"sevenpersian", 0x06F7}, +{"sevenroman", 0x2176}, +{"sevensuperior", 0x2077}, +{"seventeencircle", 0x2470}, +{"seventeenparen", 0x2484}, +{"seventeenperiod", 0x2498}, +{"seventhai", 0x0E57}, +{"sfthyphen", 0x00AD}, +{"shaarmenian", 0x0577}, +{"shabengali", 0x09B6}, +{"shacyrillic", 0x0448}, +{"shaddaarabic", 0x0651}, +{"shaddadammaarabic", 0xFC61}, +{"shaddadammatanarabic", 0xFC5E}, +{"shaddafathaarabic", 0xFC60}, +{"shaddafathatanarabic", 0x0651}, +{"shaddakasraarabic", 0xFC62}, +{"shaddakasratanarabic", 0xFC5F}, +{"shade", 0x2592}, +{"shadedark", 0x2593}, +{"shadelight", 0x2591}, +{"shademedium", 0x2592}, +{"shadeva", 0x0936}, +{"shagujarati", 0x0AB6}, +{"shagurmukhi", 0x0A36}, +{"shalshelethebrew", 0x0593}, +{"shbopomofo", 0x3115}, +{"shchacyrillic", 0x0449}, +{"sheenarabic", 0x0634}, +{"sheenfinalarabic", 0xFEB6}, +{"sheeninitialarabic", 0xFEB7}, +{"sheenmedialarabic", 0xFEB8}, +{"sheicoptic", 0x03E3}, +{"sheqel", 0x20AA}, +{"sheqelhebrew", 0x20AA}, +{"sheva", 0x05B0}, +{"sheva115", 0x05B0}, +{"sheva15", 0x05B0}, +{"sheva22", 0x05B0}, +{"sheva2e", 0x05B0}, +{"shevahebrew", 0x05B0}, +{"shevanarrowhebrew", 0x05B0}, +{"shevaquarterhebrew", 0x05B0}, +{"shevawidehebrew", 0x05B0}, +{"shhacyrillic", 0x04BB}, +{"shimacoptic", 0x03ED}, +{"shin", 0x05E9}, +{"shindagesh", 0xFB49}, +{"shindageshhebrew", 0xFB49}, +{"shindageshshindot", 0xFB2C}, +{"shindageshshindothebrew", 0xFB2C}, +{"shindageshsindot", 0xFB2D}, +{"shindageshsindothebrew", 0xFB2D}, +{"shindothebrew", 0x05C1}, +{"shinhebrew", 0x05E9}, +{"shinshindot", 0xFB2A}, +{"shinshindothebrew", 0xFB2A}, +{"shinsindot", 0xFB2B}, +{"shinsindothebrew", 0xFB2B}, +{"shook", 0x0282}, +{"sigma", 0x03C3}, +{"sigma1", 0x03C2}, +{"sigmafinal", 0x03C2}, +{"sigmalunatesymbolgreek", 0x03F2}, +{"sihiragana", 0x3057}, +{"sikatakana", 0x30B7}, +{"sikatakanahalfwidth", 0xFF7C}, +{"siluqhebrew", 0x05BD}, +{"siluqlefthebrew", 0x05BD}, +{"similar", 0x223C}, +{"sindothebrew", 0x05C2}, +{"siosacirclekorean", 0x3274}, +{"siosaparenkorean", 0x3214}, +{"sioscieuckorean", 0x317E}, +{"sioscirclekorean", 0x3266}, +{"sioskiyeokkorean", 0x317A}, +{"sioskorean", 0x3145}, +{"siosnieunkorean", 0x317B}, +{"siosparenkorean", 0x3206}, +{"siospieupkorean", 0x317D}, +{"siostikeutkorean", 0x317C}, +{"six", 0x0036}, +{"sixarabic", 0x0666}, +{"sixbengali", 0x09EC}, +{"sixcircle", 0x2465}, +{"sixcircleinversesansserif", 0x278F}, +{"sixdeva", 0x096C}, +{"sixgujarati", 0x0AEC}, +{"sixgurmukhi", 0x0A6C}, +{"sixhackarabic", 0x0666}, +{"sixhangzhou", 0x3026}, +{"sixideographicparen", 0x3225}, +{"sixinferior", 0x2086}, +{"sixmonospace", 0xFF16}, +{"sixoldstyle", 0xF736}, +{"sixparen", 0x2479}, +{"sixperiod", 0x248D}, +{"sixpersian", 0x06F6}, +{"sixroman", 0x2175}, +{"sixsuperior", 0x2076}, +{"sixteencircle", 0x246F}, +{"sixteencurrencydenominatorbengali", 0x09F9}, +{"sixteenparen", 0x2483}, +{"sixteenperiod", 0x2497}, +{"sixthai", 0x0E56}, +{"slash", 0x002F}, +{"slashmonospace", 0xFF0F}, +{"slong", 0x017F}, +{"slongdotaccent", 0x1E9B}, +{"smileface", 0x263A}, +{"smonospace", 0xFF53}, +{"sofpasuqhebrew", 0x05C3}, +{"softhyphen", 0x00AD}, +{"softsigncyrillic", 0x044C}, +{"sohiragana", 0x305D}, +{"sokatakana", 0x30BD}, +{"sokatakanahalfwidth", 0xFF7F}, +{"soliduslongoverlaycmb", 0x0338}, +{"solidusshortoverlaycmb", 0x0337}, +{"sorusithai", 0x0E29}, +{"sosalathai", 0x0E28}, +{"sosothai", 0x0E0B}, +{"sosuathai", 0x0E2A}, +{"space", 0x0020}, +{"spacehackarabic", 0x0020}, +{"spade", 0x2660}, +{"spadesuitblack", 0x2660}, +{"spadesuitwhite", 0x2664}, +{"sparen", 0x24AE}, +{"squarebelowcmb", 0x033B}, +{"squarecc", 0x33C4}, +{"squarecm", 0x339D}, +{"squarediagonalcrosshatchfill", 0x25A9}, +{"squarehorizontalfill", 0x25A4}, +{"squarekg", 0x338F}, +{"squarekm", 0x339E}, +{"squarekmcapital", 0x33CE}, +{"squareln", 0x33D1}, +{"squarelog", 0x33D2}, +{"squaremg", 0x338E}, +{"squaremil", 0x33D5}, +{"squaremm", 0x339C}, +{"squaremsquared", 0x33A1}, +{"squareorthogonalcrosshatchfill", 0x25A6}, +{"squareupperlefttolowerrightfill", 0x25A7}, +{"squareupperrighttolowerleftfill", 0x25A8}, +{"squareverticalfill", 0x25A5}, +{"squarewhitewithsmallblack", 0x25A3}, +{"srsquare", 0x33DB}, +{"ssabengali", 0x09B7}, +{"ssadeva", 0x0937}, +{"ssagujarati", 0x0AB7}, +{"ssangcieuckorean", 0x3149}, +{"ssanghieuhkorean", 0x3185}, +{"ssangieungkorean", 0x3180}, +{"ssangkiyeokkorean", 0x3132}, +{"ssangnieunkorean", 0x3165}, +{"ssangpieupkorean", 0x3143}, +{"ssangsioskorean", 0x3146}, +{"ssangtikeutkorean", 0x3138}, +{"ssuperior", 0xF6F2}, +{"sterling", 0x00A3}, +{"sterlingmonospace", 0xFFE1}, +{"strokelongoverlaycmb", 0x0336}, +{"strokeshortoverlaycmb", 0x0335}, +{"subset", 0x2282}, +{"subsetnotequal", 0x228A}, +{"subsetorequal", 0x2286}, +{"succeeds", 0x227B}, +{"suchthat", 0x220B}, +{"suhiragana", 0x3059}, +{"sukatakana", 0x30B9}, +{"sukatakanahalfwidth", 0xFF7D}, +{"sukunarabic", 0x0652}, +{"summation", 0x2211}, +{"sun", 0x263C}, +{"superset", 0x2283}, +{"supersetnotequal", 0x228B}, +{"supersetorequal", 0x2287}, +{"svsquare", 0x33DC}, +{"syouwaerasquare", 0x337C}, +{"t", 0x0074}, +{"tabengali", 0x09A4}, +{"tackdown", 0x22A4}, +{"tackleft", 0x22A3}, +{"tadeva", 0x0924}, +{"tagujarati", 0x0AA4}, +{"tagurmukhi", 0x0A24}, +{"taharabic", 0x0637}, +{"tahfinalarabic", 0xFEC2}, +{"tahinitialarabic", 0xFEC3}, +{"tahiragana", 0x305F}, +{"tahmedialarabic", 0xFEC4}, +{"taisyouerasquare", 0x337D}, +{"takatakana", 0x30BF}, +{"takatakanahalfwidth", 0xFF80}, +{"tatweelarabic", 0x0640}, +{"tau", 0x03C4}, +{"tav", 0x05EA}, +{"tavdages", 0xFB4A}, +{"tavdagesh", 0xFB4A}, +{"tavdageshhebrew", 0xFB4A}, +{"tavhebrew", 0x05EA}, +{"tbar", 0x0167}, +{"tbopomofo", 0x310A}, +{"tcaron", 0x0165}, +{"tccurl", 0x02A8}, +{"tcedilla", 0x0163}, +{"tcheharabic", 0x0686}, +{"tchehfinalarabic", 0xFB7B}, +{"tchehinitialarabic", 0xFB7C}, +{"tchehmedialarabic", 0xFB7D}, +{"tchehmeeminitialarabic", 0xFB7C}, +{"tcircle", 0x24E3}, +{"tcircumflexbelow", 0x1E71}, +{"tcommaaccent", 0x0163}, +{"tdieresis", 0x1E97}, +{"tdotaccent", 0x1E6B}, +{"tdotbelow", 0x1E6D}, +{"tecyrillic", 0x0442}, +{"tedescendercyrillic", 0x04AD}, +{"teharabic", 0x062A}, +{"tehfinalarabic", 0xFE96}, +{"tehhahinitialarabic", 0xFCA2}, +{"tehhahisolatedarabic", 0xFC0C}, +{"tehinitialarabic", 0xFE97}, +{"tehiragana", 0x3066}, +{"tehjeeminitialarabic", 0xFCA1}, +{"tehjeemisolatedarabic", 0xFC0B}, +{"tehmarbutaarabic", 0x0629}, +{"tehmarbutafinalarabic", 0xFE94}, +{"tehmedialarabic", 0xFE98}, +{"tehmeeminitialarabic", 0xFCA4}, +{"tehmeemisolatedarabic", 0xFC0E}, +{"tehnoonfinalarabic", 0xFC73}, +{"tekatakana", 0x30C6}, +{"tekatakanahalfwidth", 0xFF83}, +{"telephone", 0x2121}, +{"telephoneblack", 0x260E}, +{"telishagedolahebrew", 0x05A0}, +{"telishaqetanahebrew", 0x05A9}, +{"tencircle", 0x2469}, +{"tenideographicparen", 0x3229}, +{"tenparen", 0x247D}, +{"tenperiod", 0x2491}, +{"tenroman", 0x2179}, +{"tesh", 0x02A7}, +{"tet", 0x05D8}, +{"tetdagesh", 0xFB38}, +{"tetdageshhebrew", 0xFB38}, +{"tethebrew", 0x05D8}, +{"tetsecyrillic", 0x04B5}, +{"tevirhebrew", 0x059B}, +{"tevirlefthebrew", 0x059B}, +{"thabengali", 0x09A5}, +{"thadeva", 0x0925}, +{"thagujarati", 0x0AA5}, +{"thagurmukhi", 0x0A25}, +{"thalarabic", 0x0630}, +{"thalfinalarabic", 0xFEAC}, +{"thanthakhatlowleftthai", 0xF898}, +{"thanthakhatlowrightthai", 0xF897}, +{"thanthakhatthai", 0x0E4C}, +{"thanthakhatupperleftthai", 0xF896}, +{"theharabic", 0x062B}, +{"thehfinalarabic", 0xFE9A}, +{"thehinitialarabic", 0xFE9B}, +{"thehmedialarabic", 0xFE9C}, +{"thereexists", 0x2203}, +{"therefore", 0x2234}, +{"theta", 0x03B8}, +{"theta1", 0x03D1}, +{"thetasymbolgreek", 0x03D1}, +{"thieuthacirclekorean", 0x3279}, +{"thieuthaparenkorean", 0x3219}, +{"thieuthcirclekorean", 0x326B}, +{"thieuthkorean", 0x314C}, +{"thieuthparenkorean", 0x320B}, +{"thirteencircle", 0x246C}, +{"thirteenparen", 0x2480}, +{"thirteenperiod", 0x2494}, +{"thonangmonthothai", 0x0E11}, +{"thook", 0x01AD}, +{"thophuthaothai", 0x0E12}, +{"thorn", 0x00FE}, +{"thothahanthai", 0x0E17}, +{"thothanthai", 0x0E10}, +{"thothongthai", 0x0E18}, +{"thothungthai", 0x0E16}, +{"thousandcyrillic", 0x0482}, +{"thousandsseparatorarabic", 0x066C}, +{"thousandsseparatorpersian", 0x066C}, +{"three", 0x0033}, +{"threearabic", 0x0663}, +{"threebengali", 0x09E9}, +{"threecircle", 0x2462}, +{"threecircleinversesansserif", 0x278C}, +{"threedeva", 0x0969}, +{"threeeighths", 0x215C}, +{"threegujarati", 0x0AE9}, +{"threegurmukhi", 0x0A69}, +{"threehackarabic", 0x0663}, +{"threehangzhou", 0x3023}, +{"threeideographicparen", 0x3222}, +{"threeinferior", 0x2083}, +{"threemonospace", 0xFF13}, +{"threenumeratorbengali", 0x09F6}, +{"threeoldstyle", 0xF733}, +{"threeparen", 0x2476}, +{"threeperiod", 0x248A}, +{"threepersian", 0x06F3}, +{"threequarters", 0x00BE}, +{"threequartersemdash", 0xF6DE}, +{"threeroman", 0x2172}, +{"threesuperior", 0x00B3}, +{"threethai", 0x0E53}, +{"thzsquare", 0x3394}, +{"tihiragana", 0x3061}, +{"tikatakana", 0x30C1}, +{"tikatakanahalfwidth", 0xFF81}, +{"tikeutacirclekorean", 0x3270}, +{"tikeutaparenkorean", 0x3210}, +{"tikeutcirclekorean", 0x3262}, +{"tikeutkorean", 0x3137}, +{"tikeutparenkorean", 0x3202}, +{"tilde", 0x02DC}, +{"tildebelowcmb", 0x0330}, +{"tildecmb", 0x0303}, +{"tildecomb", 0x0303}, +{"tildedoublecmb", 0x0360}, +{"tildeoperator", 0x223C}, +{"tildeoverlaycmb", 0x0334}, +{"tildeverticalcmb", 0x033E}, +{"timescircle", 0x2297}, +{"tipehahebrew", 0x0596}, +{"tipehalefthebrew", 0x0596}, +{"tippigurmukhi", 0x0A70}, +{"titlocyrilliccmb", 0x0483}, +{"tiwnarmenian", 0x057F}, +{"tlinebelow", 0x1E6F}, +{"tmonospace", 0xFF54}, +{"toarmenian", 0x0569}, +{"tohiragana", 0x3068}, +{"tokatakana", 0x30C8}, +{"tokatakanahalfwidth", 0xFF84}, +{"tonebarextrahighmod", 0x02E5}, +{"tonebarextralowmod", 0x02E9}, +{"tonebarhighmod", 0x02E6}, +{"tonebarlowmod", 0x02E8}, +{"tonebarmidmod", 0x02E7}, +{"tonefive", 0x01BD}, +{"tonesix", 0x0185}, +{"tonetwo", 0x01A8}, +{"tonos", 0x0384}, +{"tonsquare", 0x3327}, +{"topatakthai", 0x0E0F}, +{"tortoiseshellbracketleft", 0x3014}, +{"tortoiseshellbracketleftsmall", 0xFE5D}, +{"tortoiseshellbracketleftvertical", 0xFE39}, +{"tortoiseshellbracketright", 0x3015}, +{"tortoiseshellbracketrightsmall", 0xFE5E}, +{"tortoiseshellbracketrightvertical", 0xFE3A}, +{"totaothai", 0x0E15}, +{"tpalatalhook", 0x01AB}, +{"tparen", 0x24AF}, +{"trademark", 0x2122}, +{"trademarksans", 0xF8EA}, +{"trademarkserif", 0xF6DB}, +{"tretroflexhook", 0x0288}, +{"triagdn", 0x25BC}, +{"triaglf", 0x25C4}, +{"triagrt", 0x25BA}, +{"triagup", 0x25B2}, +{"ts", 0x02A6}, +{"tsadi", 0x05E6}, +{"tsadidagesh", 0xFB46}, +{"tsadidageshhebrew", 0xFB46}, +{"tsadihebrew", 0x05E6}, +{"tsecyrillic", 0x0446}, +{"tsere", 0x05B5}, +{"tsere12", 0x05B5}, +{"tsere1e", 0x05B5}, +{"tsere2b", 0x05B5}, +{"tserehebrew", 0x05B5}, +{"tserenarrowhebrew", 0x05B5}, +{"tserequarterhebrew", 0x05B5}, +{"tserewidehebrew", 0x05B5}, +{"tshecyrillic", 0x045B}, +{"tsuperior", 0xF6F3}, +{"ttabengali", 0x099F}, +{"ttadeva", 0x091F}, +{"ttagujarati", 0x0A9F}, +{"ttagurmukhi", 0x0A1F}, +{"tteharabic", 0x0679}, +{"ttehfinalarabic", 0xFB67}, +{"ttehinitialarabic", 0xFB68}, +{"ttehmedialarabic", 0xFB69}, +{"tthabengali", 0x09A0}, +{"tthadeva", 0x0920}, +{"tthagujarati", 0x0AA0}, +{"tthagurmukhi", 0x0A20}, +{"tturned", 0x0287}, +{"tuhiragana", 0x3064}, +{"tukatakana", 0x30C4}, +{"tukatakanahalfwidth", 0xFF82}, +{"tusmallhiragana", 0x3063}, +{"tusmallkatakana", 0x30C3}, +{"tusmallkatakanahalfwidth", 0xFF6F}, +{"twelvecircle", 0x246B}, +{"twelveparen", 0x247F}, +{"twelveperiod", 0x2493}, +{"twelveroman", 0x217B}, +{"twentycircle", 0x2473}, +{"twentyhangzhou", 0x5344}, +{"twentyparen", 0x2487}, +{"twentyperiod", 0x249B}, +{"two", 0x0032}, +{"twoarabic", 0x0662}, +{"twobengali", 0x09E8}, +{"twocircle", 0x2461}, +{"twocircleinversesansserif", 0x278B}, +{"twodeva", 0x0968}, +{"twodotenleader", 0x2025}, +{"twodotleader", 0x2025}, +{"twodotleadervertical", 0xFE30}, +{"twogujarati", 0x0AE8}, +{"twogurmukhi", 0x0A68}, +{"twohackarabic", 0x0662}, +{"twohangzhou", 0x3022}, +{"twoideographicparen", 0x3221}, +{"twoinferior", 0x2082}, +{"twomonospace", 0xFF12}, +{"twonumeratorbengali", 0x09F5}, +{"twooldstyle", 0xF732}, +{"twoparen", 0x2475}, +{"twoperiod", 0x2489}, +{"twopersian", 0x06F2}, +{"tworoman", 0x2171}, +{"twostroke", 0x01BB}, +{"twosuperior", 0x00B2}, +{"twothai", 0x0E52}, +{"twothirds", 0x2154}, +{"u", 0x0075}, +{"uacute", 0x00FA}, +{"ubar", 0x0289}, +{"ubengali", 0x0989}, +{"ubopomofo", 0x3128}, +{"ubreve", 0x016D}, +{"ucaron", 0x01D4}, +{"ucircle", 0x24E4}, +{"ucircumflex", 0x00FB}, +{"ucircumflexbelow", 0x1E77}, +{"ucyrillic", 0x0443}, +{"udattadeva", 0x0951}, +{"udblacute", 0x0171}, +{"udblgrave", 0x0215}, +{"udeva", 0x0909}, +{"udieresis", 0x00FC}, +{"udieresisacute", 0x01D8}, +{"udieresisbelow", 0x1E73}, +{"udieresiscaron", 0x01DA}, +{"udieresiscyrillic", 0x04F1}, +{"udieresisgrave", 0x01DC}, +{"udieresismacron", 0x01D6}, +{"udotbelow", 0x1EE5}, +{"ugrave", 0x00F9}, +{"ugujarati", 0x0A89}, +{"ugurmukhi", 0x0A09}, +{"uhiragana", 0x3046}, +{"uhookabove", 0x1EE7}, +{"uhorn", 0x01B0}, +{"uhornacute", 0x1EE9}, +{"uhorndotbelow", 0x1EF1}, +{"uhorngrave", 0x1EEB}, +{"uhornhookabove", 0x1EED}, +{"uhorntilde", 0x1EEF}, +{"uhungarumlaut", 0x0171}, +{"uhungarumlautcyrillic", 0x04F3}, +{"uinvertedbreve", 0x0217}, +{"ukatakana", 0x30A6}, +{"ukatakanahalfwidth", 0xFF73}, +{"ukcyrillic", 0x0479}, +{"ukorean", 0x315C}, +{"umacron", 0x016B}, +{"umacroncyrillic", 0x04EF}, +{"umacrondieresis", 0x1E7B}, +{"umatragurmukhi", 0x0A41}, +{"umonospace", 0xFF55}, +{"underscore", 0x005F}, +{"underscoredbl", 0x2017}, +{"underscoremonospace", 0xFF3F}, +{"underscorevertical", 0xFE33}, +{"underscorewavy", 0xFE4F}, +{"union", 0x222A}, +{"universal", 0x2200}, +{"uogonek", 0x0173}, +{"uparen", 0x24B0}, +{"upblock", 0x2580}, +{"upperdothebrew", 0x05C4}, +{"upsilon", 0x03C5}, +{"upsilondieresis", 0x03CB}, +{"upsilondieresistonos", 0x03B0}, +{"upsilonlatin", 0x028A}, +{"upsilontonos", 0x03CD}, +{"uptackbelowcmb", 0x031D}, +{"uptackmod", 0x02D4}, +{"uragurmukhi", 0x0A73}, +{"uring", 0x016F}, +{"ushortcyrillic", 0x045E}, +{"usmallhiragana", 0x3045}, +{"usmallkatakana", 0x30A5}, +{"usmallkatakanahalfwidth", 0xFF69}, +{"ustraightcyrillic", 0x04AF}, +{"ustraightstrokecyrillic", 0x04B1}, +{"utilde", 0x0169}, +{"utildeacute", 0x1E79}, +{"utildebelow", 0x1E75}, +{"uubengali", 0x098A}, +{"uudeva", 0x090A}, +{"uugujarati", 0x0A8A}, +{"uugurmukhi", 0x0A0A}, +{"uumatragurmukhi", 0x0A42}, +{"uuvowelsignbengali", 0x09C2}, +{"uuvowelsigndeva", 0x0942}, +{"uuvowelsigngujarati", 0x0AC2}, +{"uvowelsignbengali", 0x09C1}, +{"uvowelsigndeva", 0x0941}, +{"uvowelsigngujarati", 0x0AC1}, +{"v", 0x0076}, +{"vadeva", 0x0935}, +{"vagujarati", 0x0AB5}, +{"vagurmukhi", 0x0A35}, +{"vakatakana", 0x30F7}, +{"vav", 0x05D5}, +{"vavdagesh", 0xFB35}, +{"vavdagesh65", 0xFB35}, +{"vavdageshhebrew", 0xFB35}, +{"vavhebrew", 0x05D5}, +{"vavholam", 0xFB4B}, +{"vavholamhebrew", 0xFB4B}, +{"vavvavhebrew", 0x05F0}, +{"vavyodhebrew", 0x05F1}, +{"vcircle", 0x24E5}, +{"vdotbelow", 0x1E7F}, +{"vecyrillic", 0x0432}, +{"veharabic", 0x06A4}, +{"vehfinalarabic", 0xFB6B}, +{"vehinitialarabic", 0xFB6C}, +{"vehmedialarabic", 0xFB6D}, +{"vekatakana", 0x30F9}, +{"venus", 0x2640}, +{"verticalbar", 0x007C}, +{"verticallineabovecmb", 0x030D}, +{"verticallinebelowcmb", 0x0329}, +{"verticallinelowmod", 0x02CC}, +{"verticallinemod", 0x02C8}, +{"vewarmenian", 0x057E}, +{"vhook", 0x028B}, +{"vikatakana", 0x30F8}, +{"viramabengali", 0x09CD}, +{"viramadeva", 0x094D}, +{"viramagujarati", 0x0ACD}, +{"visargabengali", 0x0983}, +{"visargadeva", 0x0903}, +{"visargagujarati", 0x0A83}, +{"vmonospace", 0xFF56}, +{"voarmenian", 0x0578}, +{"voicediterationhiragana", 0x309E}, +{"voicediterationkatakana", 0x30FE}, +{"voicedmarkkana", 0x309B}, +{"voicedmarkkanahalfwidth", 0xFF9E}, +{"vokatakana", 0x30FA}, +{"vparen", 0x24B1}, +{"vtilde", 0x1E7D}, +{"vturned", 0x028C}, +{"vuhiragana", 0x3094}, +{"vukatakana", 0x30F4}, +{"w", 0x0077}, +{"wacute", 0x1E83}, +{"waekorean", 0x3159}, +{"wahiragana", 0x308F}, +{"wakatakana", 0x30EF}, +{"wakatakanahalfwidth", 0xFF9C}, +{"wakorean", 0x3158}, +{"wasmallhiragana", 0x308E}, +{"wasmallkatakana", 0x30EE}, +{"wattosquare", 0x3357}, +{"wavedash", 0x301C}, +{"wavyunderscorevertical", 0xFE34}, +{"wawarabic", 0x0648}, +{"wawfinalarabic", 0xFEEE}, +{"wawhamzaabovearabic", 0x0624}, +{"wawhamzaabovefinalarabic", 0xFE86}, +{"wbsquare", 0x33DD}, +{"wcircle", 0x24E6}, +{"wcircumflex", 0x0175}, +{"wdieresis", 0x1E85}, +{"wdotaccent", 0x1E87}, +{"wdotbelow", 0x1E89}, +{"wehiragana", 0x3091}, +{"weierstrass", 0x2118}, +{"wekatakana", 0x30F1}, +{"wekorean", 0x315E}, +{"weokorean", 0x315D}, +{"wgrave", 0x1E81}, +{"whitebullet", 0x25E6}, +{"whitecircle", 0x25CB}, +{"whitecircleinverse", 0x25D9}, +{"whitecornerbracketleft", 0x300E}, +{"whitecornerbracketleftvertical", 0xFE43}, +{"whitecornerbracketright", 0x300F}, +{"whitecornerbracketrightvertical", 0xFE44}, +{"whitediamond", 0x25C7}, +{"whitediamondcontainingblacksmalldiamond", 0x25C8}, +{"whitedownpointingsmalltriangle", 0x25BF}, +{"whitedownpointingtriangle", 0x25BD}, +{"whiteleftpointingsmalltriangle", 0x25C3}, +{"whiteleftpointingtriangle", 0x25C1}, +{"whitelenticularbracketleft", 0x3016}, +{"whitelenticularbracketright", 0x3017}, +{"whiterightpointingsmalltriangle", 0x25B9}, +{"whiterightpointingtriangle", 0x25B7}, +{"whitesmallsquare", 0x25AB}, +{"whitesmilingface", 0x263A}, +{"whitesquare", 0x25A1}, +{"whitestar", 0x2606}, +{"whitetelephone", 0x260F}, +{"whitetortoiseshellbracketleft", 0x3018}, +{"whitetortoiseshellbracketright", 0x3019}, +{"whiteuppointingsmalltriangle", 0x25B5}, +{"whiteuppointingtriangle", 0x25B3}, +{"wihiragana", 0x3090}, +{"wikatakana", 0x30F0}, +{"wikorean", 0x315F}, +{"wmonospace", 0xFF57}, +{"wohiragana", 0x3092}, +{"wokatakana", 0x30F2}, +{"wokatakanahalfwidth", 0xFF66}, +{"won", 0x20A9}, +{"wonmonospace", 0xFFE6}, +{"wowaenthai", 0x0E27}, +{"wparen", 0x24B2}, +{"wring", 0x1E98}, +{"wsuperior", 0x02B7}, +{"wturned", 0x028D}, +{"wynn", 0x01BF}, +{"x", 0x0078}, +{"xabovecmb", 0x033D}, +{"xbopomofo", 0x3112}, +{"xcircle", 0x24E7}, +{"xdieresis", 0x1E8D}, +{"xdotaccent", 0x1E8B}, +{"xeharmenian", 0x056D}, +{"xi", 0x03BE}, +{"xmonospace", 0xFF58}, +{"xparen", 0x24B3}, +{"xsuperior", 0x02E3}, +{"y", 0x0079}, +{"yaadosquare", 0x334E}, +{"yabengali", 0x09AF}, +{"yacute", 0x00FD}, +{"yadeva", 0x092F}, +{"yaekorean", 0x3152}, +{"yagujarati", 0x0AAF}, +{"yagurmukhi", 0x0A2F}, +{"yahiragana", 0x3084}, +{"yakatakana", 0x30E4}, +{"yakatakanahalfwidth", 0xFF94}, +{"yakorean", 0x3151}, +{"yamakkanthai", 0x0E4E}, +{"yasmallhiragana", 0x3083}, +{"yasmallkatakana", 0x30E3}, +{"yasmallkatakanahalfwidth", 0xFF6C}, +{"yatcyrillic", 0x0463}, +{"ycircle", 0x24E8}, +{"ycircumflex", 0x0177}, +{"ydieresis", 0x00FF}, +{"ydotaccent", 0x1E8F}, +{"ydotbelow", 0x1EF5}, +{"yeharabic", 0x064A}, +{"yehbarreearabic", 0x06D2}, +{"yehbarreefinalarabic", 0xFBAF}, +{"yehfinalarabic", 0xFEF2}, +{"yehhamzaabovearabic", 0x0626}, +{"yehhamzaabovefinalarabic", 0xFE8A}, +{"yehhamzaaboveinitialarabic", 0xFE8B}, +{"yehhamzaabovemedialarabic", 0xFE8C}, +{"yehinitialarabic", 0xFEF3}, +{"yehmedialarabic", 0xFEF4}, +{"yehmeeminitialarabic", 0xFCDD}, +{"yehmeemisolatedarabic", 0xFC58}, +{"yehnoonfinalarabic", 0xFC94}, +{"yehthreedotsbelowarabic", 0x06D1}, +{"yekorean", 0x3156}, +{"yen", 0x00A5}, +{"yenmonospace", 0xFFE5}, +{"yeokorean", 0x3155}, +{"yeorinhieuhkorean", 0x3186}, +{"yerahbenyomohebrew", 0x05AA}, +{"yerahbenyomolefthebrew", 0x05AA}, +{"yericyrillic", 0x044B}, +{"yerudieresiscyrillic", 0x04F9}, +{"yesieungkorean", 0x3181}, +{"yesieungpansioskorean", 0x3183}, +{"yesieungsioskorean", 0x3182}, +{"yetivhebrew", 0x059A}, +{"ygrave", 0x1EF3}, +{"yhook", 0x01B4}, +{"yhookabove", 0x1EF7}, +{"yiarmenian", 0x0575}, +{"yicyrillic", 0x0457}, +{"yikorean", 0x3162}, +{"yinyang", 0x262F}, +{"yiwnarmenian", 0x0582}, +{"ymonospace", 0xFF59}, +{"yod", 0x05D9}, +{"yoddagesh", 0xFB39}, +{"yoddageshhebrew", 0xFB39}, +{"yodhebrew", 0x05D9}, +{"yodyodhebrew", 0x05F2}, +{"yodyodpatahhebrew", 0xFB1F}, +{"yohiragana", 0x3088}, +{"yoikorean", 0x3189}, +{"yokatakana", 0x30E8}, +{"yokatakanahalfwidth", 0xFF96}, +{"yokorean", 0x315B}, +{"yosmallhiragana", 0x3087}, +{"yosmallkatakana", 0x30E7}, +{"yosmallkatakanahalfwidth", 0xFF6E}, +{"yotgreek", 0x03F3}, +{"yoyaekorean", 0x3188}, +{"yoyakorean", 0x3187}, +{"yoyakthai", 0x0E22}, +{"yoyingthai", 0x0E0D}, +{"yparen", 0x24B4}, +{"ypogegrammeni", 0x037A}, +{"ypogegrammenigreekcmb", 0x0345}, +{"yr", 0x01A6}, +{"yring", 0x1E99}, +{"ysuperior", 0x02B8}, +{"ytilde", 0x1EF9}, +{"yturned", 0x028E}, +{"yuhiragana", 0x3086}, +{"yuikorean", 0x318C}, +{"yukatakana", 0x30E6}, +{"yukatakanahalfwidth", 0xFF95}, +{"yukorean", 0x3160}, +{"yusbigcyrillic", 0x046B}, +{"yusbigiotifiedcyrillic", 0x046D}, +{"yuslittlecyrillic", 0x0467}, +{"yuslittleiotifiedcyrillic", 0x0469}, +{"yusmallhiragana", 0x3085}, +{"yusmallkatakana", 0x30E5}, +{"yusmallkatakanahalfwidth", 0xFF6D}, +{"yuyekorean", 0x318B}, +{"yuyeokorean", 0x318A}, +{"yyabengali", 0x09DF}, +{"yyadeva", 0x095F}, +{"z", 0x007A}, +{"zaarmenian", 0x0566}, +{"zacute", 0x017A}, +{"zadeva", 0x095B}, +{"zagurmukhi", 0x0A5B}, +{"zaharabic", 0x0638}, +{"zahfinalarabic", 0xFEC6}, +{"zahinitialarabic", 0xFEC7}, +{"zahiragana", 0x3056}, +{"zahmedialarabic", 0xFEC8}, +{"zainarabic", 0x0632}, +{"zainfinalarabic", 0xFEB0}, +{"zakatakana", 0x30B6}, +{"zaqefgadolhebrew", 0x0595}, +{"zaqefqatanhebrew", 0x0594}, +{"zarqahebrew", 0x0598}, +{"zayin", 0x05D6}, +{"zayindagesh", 0xFB36}, +{"zayindageshhebrew", 0xFB36}, +{"zayinhebrew", 0x05D6}, +{"zbopomofo", 0x3117}, +{"zcaron", 0x017E}, +{"zcircle", 0x24E9}, +{"zcircumflex", 0x1E91}, +{"zcurl", 0x0291}, +{"zdot", 0x017C}, +{"zdotaccent", 0x017C}, +{"zdotbelow", 0x1E93}, +{"zecyrillic", 0x0437}, +{"zedescendercyrillic", 0x0499}, +{"zedieresiscyrillic", 0x04DF}, +{"zehiragana", 0x305C}, +{"zekatakana", 0x30BC}, +{"zero", 0x0030}, +{"zeroarabic", 0x0660}, +{"zerobengali", 0x09E6}, +{"zerodeva", 0x0966}, +{"zerogujarati", 0x0AE6}, +{"zerogurmukhi", 0x0A66}, +{"zerohackarabic", 0x0660}, +{"zeroinferior", 0x2080}, +{"zeromonospace", 0xFF10}, +{"zerooldstyle", 0xF730}, +{"zeropersian", 0x06F0}, +{"zerosuperior", 0x2070}, +{"zerothai", 0x0E50}, +{"zerowidthjoiner", 0xFEFF}, +{"zerowidthnonjoiner", 0x200C}, +{"zerowidthspace", 0x200B}, +{"zeta", 0x03B6}, +{"zhbopomofo", 0x3113}, +{"zhearmenian", 0x056A}, +{"zhebrevecyrillic", 0x04C2}, +{"zhecyrillic", 0x0436}, +{"zhedescendercyrillic", 0x0497}, +{"zhedieresiscyrillic", 0x04DD}, +{"zihiragana", 0x3058}, +{"zikatakana", 0x30B8}, +{"zinorhebrew", 0x05AE}, +{"zlinebelow", 0x1E95}, +{"zmonospace", 0xFF5A}, +{"zohiragana", 0x305E}, +{"zokatakana", 0x30BE}, +{"zparen", 0x24B5}, +{"zretroflexhook", 0x0290}, +{"zstroke", 0x01B6}, +{"zuhiragana", 0x305A}, +{"zukatakana", 0x30BA}, +}; + +static const struct { int ucs; int ofs; } +agldupcodes[] = { +{0x0020, 0}, +{0x007C, 3}, +{0x00A0, 6}, +{0x00AD, 9}, +{0x00AF, 12}, +{0x00B5, 15}, +{0x00B7, 18}, +{0x010A, 21}, +{0x010B, 24}, +{0x0110, 27}, +{0x0111, 30}, +{0x0116, 33}, +{0x0117, 36}, +{0x0120, 39}, +{0x0121, 42}, +{0x0122, 45}, +{0x0123, 48}, +{0x0130, 51}, +{0x0136, 54}, +{0x0137, 57}, +{0x013B, 60}, +{0x013C, 63}, +{0x013F, 66}, +{0x0140, 69}, +{0x0145, 72}, +{0x0146, 75}, +{0x0149, 78}, +{0x0150, 81}, +{0x0151, 84}, +{0x0156, 87}, +{0x0157, 90}, +{0x0162, 93}, +{0x0163, 96}, +{0x0170, 99}, +{0x0171, 102}, +{0x017B, 105}, +{0x017C, 108}, +{0x017F, 111}, +{0x01FE, 114}, +{0x01FF, 117}, +{0x02BC, 120}, +{0x02BD, 123}, +{0x02DC, 126}, +{0x0300, 129}, +{0x0301, 132}, +{0x0303, 135}, +{0x0309, 138}, +{0x0323, 141}, +{0x0385, 144}, +{0x03C2, 147}, +{0x03D1, 150}, +{0x03D2, 153}, +{0x03D5, 156}, +{0x03D6, 159}, +{0x0401, 162}, +{0x0402, 165}, +{0x0403, 168}, +{0x0404, 171}, +{0x0405, 174}, +{0x0406, 177}, +{0x0407, 180}, +{0x0408, 183}, +{0x0409, 186}, +{0x040A, 189}, +{0x040B, 192}, +{0x040C, 195}, +{0x040E, 198}, +{0x040F, 201}, +{0x0410, 204}, +{0x0411, 207}, +{0x0412, 210}, +{0x0413, 213}, +{0x0414, 216}, +{0x0415, 219}, +{0x0416, 222}, +{0x0417, 225}, +{0x0418, 228}, +{0x0419, 231}, +{0x041A, 234}, +{0x041B, 237}, +{0x041C, 240}, +{0x041D, 243}, +{0x041E, 246}, +{0x041F, 249}, +{0x0420, 252}, +{0x0421, 255}, +{0x0422, 258}, +{0x0423, 261}, +{0x0424, 264}, +{0x0425, 267}, +{0x0426, 270}, +{0x0427, 273}, +{0x0428, 276}, +{0x0429, 279}, +{0x042A, 282}, +{0x042B, 285}, +{0x042C, 288}, +{0x042D, 291}, +{0x042E, 294}, +{0x042F, 297}, +{0x0430, 300}, +{0x0431, 303}, +{0x0432, 306}, +{0x0433, 309}, +{0x0434, 312}, +{0x0435, 315}, +{0x0436, 318}, +{0x0437, 321}, +{0x0438, 324}, +{0x0439, 327}, +{0x043A, 330}, +{0x043B, 333}, +{0x043C, 336}, +{0x043D, 339}, +{0x043E, 342}, +{0x043F, 345}, +{0x0440, 348}, +{0x0441, 351}, +{0x0442, 354}, +{0x0443, 357}, +{0x0444, 360}, +{0x0445, 363}, +{0x0446, 366}, +{0x0447, 369}, +{0x0448, 372}, +{0x0449, 375}, +{0x044A, 378}, +{0x044B, 381}, +{0x044C, 384}, +{0x044D, 387}, +{0x044E, 390}, +{0x044F, 393}, +{0x0451, 396}, +{0x0452, 399}, +{0x0453, 402}, +{0x0454, 405}, +{0x0455, 408}, +{0x0456, 411}, +{0x0457, 414}, +{0x0458, 417}, +{0x0459, 420}, +{0x045A, 423}, +{0x045B, 426}, +{0x045C, 429}, +{0x045E, 432}, +{0x045F, 435}, +{0x0462, 438}, +{0x0463, 441}, +{0x0472, 444}, +{0x0473, 447}, +{0x0474, 450}, +{0x0475, 453}, +{0x0490, 456}, +{0x0491, 459}, +{0x04D9, 462}, +{0x0591, 465}, +{0x0596, 470}, +{0x0597, 473}, +{0x059B, 476}, +{0x05A3, 479}, +{0x05A4, 482}, +{0x05A5, 485}, +{0x05A6, 488}, +{0x05A7, 491}, +{0x05AA, 494}, +{0x05B0, 497}, +{0x05B1, 508}, +{0x05B2, 518}, +{0x05B3, 528}, +{0x05B4, 538}, +{0x05B5, 548}, +{0x05B6, 558}, +{0x05B7, 568}, +{0x05B8, 578}, +{0x05B9, 596}, +{0x05BB, 606}, +{0x05BC, 616}, +{0x05BD, 620}, +{0x05BE, 624}, +{0x05BF, 627}, +{0x05C0, 631}, +{0x05C1, 634}, +{0x05C2, 637}, +{0x05C3, 640}, +{0x05D0, 643}, +{0x05D1, 647}, +{0x05D2, 651}, +{0x05D3, 655}, +{0x05D4, 679}, +{0x05D5, 683}, +{0x05D6, 687}, +{0x05D7, 691}, +{0x05D8, 695}, +{0x05D9, 699}, +{0x05DA, 703}, +{0x05DB, 711}, +{0x05DC, 715}, +{0x05DD, 723}, +{0x05DE, 727}, +{0x05DF, 731}, +{0x05E0, 735}, +{0x05E1, 739}, +{0x05E2, 743}, +{0x05E3, 747}, +{0x05E4, 751}, +{0x05E5, 755}, +{0x05E6, 759}, +{0x05E7, 763}, +{0x05E8, 787}, +{0x05E9, 811}, +{0x05EA, 815}, +{0x05F0, 819}, +{0x05F1, 822}, +{0x05F2, 825}, +{0x060C, 828}, +{0x061B, 831}, +{0x061F, 834}, +{0x0621, 837}, +{0x0622, 848}, +{0x0623, 851}, +{0x0624, 854}, +{0x0625, 857}, +{0x0626, 860}, +{0x0627, 863}, +{0x0628, 866}, +{0x0629, 869}, +{0x062A, 872}, +{0x062B, 875}, +{0x062C, 878}, +{0x062D, 881}, +{0x062E, 884}, +{0x062F, 887}, +{0x0630, 890}, +{0x0631, 893}, +{0x0632, 897}, +{0x0633, 900}, +{0x0634, 903}, +{0x0635, 906}, +{0x0636, 909}, +{0x0637, 912}, +{0x0638, 915}, +{0x0639, 918}, +{0x063A, 921}, +{0x0640, 924}, +{0x0641, 929}, +{0x0642, 932}, +{0x0643, 935}, +{0x0644, 938}, +{0x0645, 941}, +{0x0646, 944}, +{0x0647, 947}, +{0x0648, 950}, +{0x0649, 953}, +{0x064A, 956}, +{0x064B, 959}, +{0x064C, 962}, +{0x064D, 966}, +{0x064E, 969}, +{0x064F, 973}, +{0x0650, 977}, +{0x0651, 980}, +{0x0652, 984}, +{0x0660, 987}, +{0x0661, 991}, +{0x0662, 995}, +{0x0663, 999}, +{0x0664, 1003}, +{0x0665, 1007}, +{0x0666, 1011}, +{0x0667, 1015}, +{0x0668, 1019}, +{0x0669, 1023}, +{0x066A, 1027}, +{0x066B, 1030}, +{0x066C, 1033}, +{0x066D, 1036}, +{0x0679, 1040}, +{0x067E, 1043}, +{0x0686, 1046}, +{0x0688, 1049}, +{0x0691, 1052}, +{0x0698, 1055}, +{0x06A4, 1058}, +{0x06AF, 1061}, +{0x06BA, 1064}, +{0x06C1, 1067}, +{0x06D2, 1070}, +{0x200C, 1073}, +{0x2015, 1076}, +{0x2017, 1079}, +{0x201B, 1082}, +{0x2025, 1085}, +{0x20A1, 1088}, +{0x20A4, 1091}, +{0x20AA, 1094}, +{0x20AC, 1099}, +{0x2105, 1102}, +{0x2113, 1105}, +{0x2116, 1108}, +{0x2126, 1111}, +{0x21A8, 1114}, +{0x21D0, 1117}, +{0x21D2, 1120}, +{0x21D4, 1123}, +{0x2200, 1126}, +{0x2203, 1129}, +{0x2206, 1132}, +{0x2207, 1135}, +{0x2209, 1138}, +{0x221F, 1141}, +{0x223C, 1144}, +{0x2245, 1147}, +{0x2282, 1150}, +{0x2283, 1153}, +{0x2286, 1156}, +{0x2287, 1159}, +{0x2295, 1162}, +{0x2297, 1165}, +{0x2310, 1168}, +{0x2320, 1171}, +{0x2321, 1174}, +{0x2591, 1177}, +{0x2592, 1180}, +{0x2593, 1183}, +{0x25A0, 1186}, +{0x25A1, 1189}, +{0x25AA, 1192}, +{0x25AB, 1195}, +{0x25AC, 1198}, +{0x25B2, 1201}, +{0x25BA, 1204}, +{0x25BC, 1207}, +{0x25C4, 1210}, +{0x25CB, 1213}, +{0x25CF, 1216}, +{0x25D8, 1219}, +{0x25D9, 1222}, +{0x25E6, 1225}, +{0x263A, 1228}, +{0x263B, 1231}, +{0x263C, 1234}, +{0x2640, 1237}, +{0x2642, 1240}, +{0x2660, 1243}, +{0x2663, 1246}, +{0x2665, 1249}, +{0x266B, 1252}, +{0xFB1F, 1255}, +{0xFB2A, 1260}, +{0xFB2B, 1264}, +{0xFB2C, 1268}, +{0xFB2D, 1271}, +{0xFB31, 1274}, +{0xFB32, 1277}, +{0xFB33, 1280}, +{0xFB34, 1283}, +{0xFB35, 1286}, +{0xFB36, 1291}, +{0xFB38, 1294}, +{0xFB39, 1297}, +{0xFB3A, 1300}, +{0xFB3B, 1303}, +{0xFB3C, 1306}, +{0xFB3E, 1309}, +{0xFB40, 1312}, +{0xFB41, 1315}, +{0xFB44, 1318}, +{0xFB46, 1321}, +{0xFB47, 1324}, +{0xFB49, 1327}, +{0xFB4A, 1330}, +{0xFB4B, 1334}, +{0xFB7C, 1338}, +{0xFEDF, 1341}, +{0xFEE7, 1345}, +{0xFEEA, 1348}, +{0xFEF3, 1351}, +{0xFEF4, 1354}, +}; + +static char *agldupnames[] = { +"space", "spacehackarabic", 0, +"bar", "verticalbar", 0, +"nbspace", "nonbreakingspace", 0, +"sfthyphen", "softhyphen", 0, +"macron", "overscore", 0, +"mu", "mu1", 0, +"middot", "periodcentered", 0, +"Cdot", "Cdotaccent", 0, +"cdot", "cdotaccent", 0, +"Dcroat", "Dslash", 0, +"dcroat", "dmacron", 0, +"Edot", "Edotaccent", 0, +"edot", "edotaccent", 0, +"Gdot", "Gdotaccent", 0, +"gdot", "gdotaccent", 0, +"Gcedilla", "Gcommaaccent", 0, +"gcedilla", "gcommaaccent", 0, +"Idot", "Idotaccent", 0, +"Kcedilla", "Kcommaaccent", 0, +"kcedilla", "kcommaaccent", 0, +"Lcedilla", "Lcommaaccent", 0, +"lcedilla", "lcommaaccent", 0, +"Ldot", "Ldotaccent", 0, +"ldot", "ldotaccent", 0, +"Ncedilla", "Ncommaaccent", 0, +"ncedilla", "ncommaaccent", 0, +"napostrophe", "quoterightn", 0, +"Odblacute", "Ohungarumlaut", 0, +"odblacute", "ohungarumlaut", 0, +"Rcedilla", "Rcommaaccent", 0, +"rcedilla", "rcommaaccent", 0, +"Tcedilla", "Tcommaaccent", 0, +"tcedilla", "tcommaaccent", 0, +"Udblacute", "Uhungarumlaut", 0, +"udblacute", "uhungarumlaut", 0, +"Zdot", "Zdotaccent", 0, +"zdot", "zdotaccent", 0, +"longs", "slong", 0, +"Oslashacute", "Ostrokeacute", 0, +"oslashacute", "ostrokeacute", 0, +"afii57929", "apostrophemod", 0, +"afii64937", "commareversedmod", 0, +"ilde", "tilde", 0, +"gravecmb", "gravecomb", 0, +"acutecmb", "acutecomb", 0, +"tildecmb", "tildecomb", 0, +"hookabovecomb", "hookcmb", 0, +"dotbelowcmb", "dotbelowcomb", 0, +"dialytikatonos", "dieresistonos", 0, +"sigma1", "sigmafinal", 0, +"theta1", "thetasymbolgreek", 0, +"Upsilon1", "Upsilonhooksymbol", 0, +"phi1", "phisymbolgreek", 0, +"omega1", "pisymbolgreek", 0, +"Iocyrillic", "afii10023", 0, +"Djecyrillic", "afii10051", 0, +"Gjecyrillic", "afii10052", 0, +"Ecyrillic", "afii10053", 0, +"Dzecyrillic", "afii10054", 0, +"Icyrillic", "afii10055", 0, +"Yicyrillic", "afii10056", 0, +"Jecyrillic", "afii10057", 0, +"Ljecyrillic", "afii10058", 0, +"Njecyrillic", "afii10059", 0, +"Tshecyrillic", "afii10060", 0, +"Kjecyrillic", "afii10061", 0, +"Ushortcyrillic", "afii10062", 0, +"Dzhecyrillic", "afii10145", 0, +"Acyrillic", "afii10017", 0, +"Becyrillic", "afii10018", 0, +"Vecyrillic", "afii10019", 0, +"Gecyrillic", "afii10020", 0, +"Decyrillic", "afii10021", 0, +"Iecyrillic", "afii10022", 0, +"Zhecyrillic", "afii10024", 0, +"Zecyrillic", "afii10025", 0, +"Iicyrillic", "afii10026", 0, +"Iishortcyrillic", "afii10027", 0, +"Kacyrillic", "afii10028", 0, +"Elcyrillic", "afii10029", 0, +"Emcyrillic", "afii10030", 0, +"Encyrillic", "afii10031", 0, +"Ocyrillic", "afii10032", 0, +"Pecyrillic", "afii10033", 0, +"Ercyrillic", "afii10034", 0, +"Escyrillic", "afii10035", 0, +"Tecyrillic", "afii10036", 0, +"Ucyrillic", "afii10037", 0, +"Efcyrillic", "afii10038", 0, +"Khacyrillic", "afii10039", 0, +"Tsecyrillic", "afii10040", 0, +"Checyrillic", "afii10041", 0, +"Shacyrillic", "afii10042", 0, +"Shchacyrillic", "afii10043", 0, +"Hardsigncyrillic", "afii10044", 0, +"Yericyrillic", "afii10045", 0, +"Softsigncyrillic", "afii10046", 0, +"Ereversedcyrillic", "afii10047", 0, +"IUcyrillic", "afii10048", 0, +"IAcyrillic", "afii10049", 0, +"acyrillic", "afii10065", 0, +"afii10066", "becyrillic", 0, +"afii10067", "vecyrillic", 0, +"afii10068", "gecyrillic", 0, +"afii10069", "decyrillic", 0, +"afii10070", "iecyrillic", 0, +"afii10072", "zhecyrillic", 0, +"afii10073", "zecyrillic", 0, +"afii10074", "iicyrillic", 0, +"afii10075", "iishortcyrillic", 0, +"afii10076", "kacyrillic", 0, +"afii10077", "elcyrillic", 0, +"afii10078", "emcyrillic", 0, +"afii10079", "encyrillic", 0, +"afii10080", "ocyrillic", 0, +"afii10081", "pecyrillic", 0, +"afii10082", "ercyrillic", 0, +"afii10083", "escyrillic", 0, +"afii10084", "tecyrillic", 0, +"afii10085", "ucyrillic", 0, +"afii10086", "efcyrillic", 0, +"afii10087", "khacyrillic", 0, +"afii10088", "tsecyrillic", 0, +"afii10089", "checyrillic", 0, +"afii10090", "shacyrillic", 0, +"afii10091", "shchacyrillic", 0, +"afii10092", "hardsigncyrillic", 0, +"afii10093", "yericyrillic", 0, +"afii10094", "softsigncyrillic", 0, +"afii10095", "ereversedcyrillic", 0, +"afii10096", "iucyrillic", 0, +"afii10097", "iacyrillic", 0, +"afii10071", "iocyrillic", 0, +"afii10099", "djecyrillic", 0, +"afii10100", "gjecyrillic", 0, +"afii10101", "ecyrillic", 0, +"afii10102", "dzecyrillic", 0, +"afii10103", "icyrillic", 0, +"afii10104", "yicyrillic", 0, +"afii10105", "jecyrillic", 0, +"afii10106", "ljecyrillic", 0, +"afii10107", "njecyrillic", 0, +"afii10108", "tshecyrillic", 0, +"afii10109", "kjecyrillic", 0, +"afii10110", "ushortcyrillic", 0, +"afii10193", "dzhecyrillic", 0, +"Yatcyrillic", "afii10146", 0, +"afii10194", "yatcyrillic", 0, +"Fitacyrillic", "afii10147", 0, +"afii10195", "fitacyrillic", 0, +"Izhitsacyrillic", "afii10148", 0, +"afii10196", "izhitsacyrillic", 0, +"Gheupturncyrillic", "afii10050", 0, +"afii10098", "gheupturncyrillic", 0, +"afii10846", "schwacyrillic", 0, +"etnahtafoukhhebrew", "etnahtafoukhlefthebrew", "etnahtahebrew", "etnahtalefthebrew", 0, +"tipehahebrew", "tipehalefthebrew", 0, +"reviahebrew", "reviamugrashhebrew", 0, +"tevirhebrew", "tevirlefthebrew", 0, +"munahhebrew", "munahlefthebrew", 0, +"mahapakhhebrew", "mahapakhlefthebrew", 0, +"merkhahebrew", "merkhalefthebrew", 0, +"merkhakefulahebrew", "merkhakefulalefthebrew", 0, +"dargahebrew", "dargalefthebrew", 0, +"yerahbenyomohebrew", "yerahbenyomolefthebrew", 0, +"afii57799", "sheva", "sheva115", "sheva15", "sheva22", "sheva2e", "shevahebrew", "shevanarrowhebrew", "shevaquarterhebrew", "shevawidehebrew", 0, +"afii57801", "hatafsegol", "hatafsegol17", "hatafsegol24", "hatafsegol30", "hatafsegolhebrew", "hatafsegolnarrowhebrew", "hatafsegolquarterhebrew", "hatafsegolwidehebrew", 0, +"afii57800", "hatafpatah", "hatafpatah16", "hatafpatah23", "hatafpatah2f", "hatafpatahhebrew", "hatafpatahnarrowhebrew", "hatafpatahquarterhebrew", "hatafpatahwidehebrew", 0, +"afii57802", "hatafqamats", "hatafqamats1b", "hatafqamats28", "hatafqamats34", "hatafqamatshebrew", "hatafqamatsnarrowhebrew", "hatafqamatsquarterhebrew", "hatafqamatswidehebrew", 0, +"afii57793", "hiriq", "hiriq14", "hiriq21", "hiriq2d", "hiriqhebrew", "hiriqnarrowhebrew", "hiriqquarterhebrew", "hiriqwidehebrew", 0, +"afii57794", "tsere", "tsere12", "tsere1e", "tsere2b", "tserehebrew", "tserenarrowhebrew", "tserequarterhebrew", "tserewidehebrew", 0, +"afii57795", "segol", "segol13", "segol1f", "segol2c", "segolhebrew", "segolnarrowhebrew", "segolquarterhebrew", "segolwidehebrew", 0, +"afii57798", "patah", "patah11", "patah1d", "patah2a", "patahhebrew", "patahnarrowhebrew", "patahquarterhebrew", "patahwidehebrew", 0, +"afii57797", "qamats", "qamats10", "qamats1a", "qamats1c", "qamats27", "qamats29", "qamats33", "qamatsde", "qamatshebrew", "qamatsnarrowhebrew", "qamatsqatanhebrew", "qamatsqatannarrowhebrew", "qamatsqatanquarterhebrew", "qamatsqatanwidehebrew", "qamatsquarterhebrew", "qamatswidehebrew", 0, +"afii57806", "holam", "holam19", "holam26", "holam32", "holamhebrew", "holamnarrowhebrew", "holamquarterhebrew", "holamwidehebrew", 0, +"afii57796", "qubuts", "qubuts18", "qubuts25", "qubuts31", "qubutshebrew", "qubutsnarrowhebrew", "qubutsquarterhebrew", "qubutswidehebrew", 0, +"afii57807", "dagesh", "dageshhebrew", 0, +"afii57839", "siluqhebrew", "siluqlefthebrew", 0, +"afii57645", "maqafhebrew", 0, +"afii57841", "rafe", "rafehebrew", 0, +"afii57842", "paseqhebrew", 0, +"afii57804", "shindothebrew", 0, +"afii57803", "sindothebrew", 0, +"afii57658", "sofpasuqhebrew", 0, +"afii57664", "alef", "alefhebrew", 0, +"afii57665", "bet", "bethebrew", 0, +"afii57666", "gimel", "gimelhebrew", 0, +"afii57667", "dalet", "dalethatafpatah", "dalethatafpatahhebrew", "dalethatafsegol", "dalethatafsegolhebrew", "dalethebrew", "dalethiriq", "dalethiriqhebrew", "daletholam", "daletholamhebrew", "daletpatah", "daletpatahhebrew", "daletqamats", "daletqamatshebrew", "daletqubuts", "daletqubutshebrew", "daletsegol", "daletsegolhebrew", "daletsheva", "daletshevahebrew", "dalettsere", "dalettserehebrew", 0, +"afii57668", "he", "hehebrew", 0, +"afii57669", "vav", "vavhebrew", 0, +"afii57670", "zayin", "zayinhebrew", 0, +"afii57671", "het", "hethebrew", 0, +"afii57672", "tet", "tethebrew", 0, +"afii57673", "yod", "yodhebrew", 0, +"afii57674", "finalkaf", "finalkafhebrew", "finalkafqamats", "finalkafqamatshebrew", "finalkafsheva", "finalkafshevahebrew", 0, +"afii57675", "kaf", "kafhebrew", 0, +"afii57676", "lamed", "lamedhebrew", "lamedholam", "lamedholamdagesh", "lamedholamdageshhebrew", "lamedholamhebrew", 0, +"afii57677", "finalmem", "finalmemhebrew", 0, +"afii57678", "mem", "memhebrew", 0, +"afii57679", "finalnun", "finalnunhebrew", 0, +"afii57680", "nun", "nunhebrew", 0, +"afii57681", "samekh", "samekhhebrew", 0, +"afii57682", "ayin", "ayinhebrew", 0, +"afii57683", "finalpe", "finalpehebrew", 0, +"afii57684", "pe", "pehebrew", 0, +"afii57685", "finaltsadi", "finaltsadihebrew", 0, +"afii57686", "tsadi", "tsadihebrew", 0, +"afii57687", "qof", "qofhatafpatah", "qofhatafpatahhebrew", "qofhatafsegol", "qofhatafsegolhebrew", "qofhebrew", "qofhiriq", "qofhiriqhebrew", "qofholam", "qofholamhebrew", "qofpatah", "qofpatahhebrew", "qofqamats", "qofqamatshebrew", "qofqubuts", "qofqubutshebrew", "qofsegol", "qofsegolhebrew", "qofsheva", "qofshevahebrew", "qoftsere", "qoftserehebrew", 0, +"afii57688", "resh", "reshhatafpatah", "reshhatafpatahhebrew", "reshhatafsegol", "reshhatafsegolhebrew", "reshhebrew", "reshhiriq", "reshhiriqhebrew", "reshholam", "reshholamhebrew", "reshpatah", "reshpatahhebrew", "reshqamats", "reshqamatshebrew", "reshqubuts", "reshqubutshebrew", "reshsegol", "reshsegolhebrew", "reshsheva", "reshshevahebrew", "reshtsere", "reshtserehebrew", 0, +"afii57689", "shin", "shinhebrew", 0, +"afii57690", "tav", "tavhebrew", 0, +"afii57716", "vavvavhebrew", 0, +"afii57717", "vavyodhebrew", 0, +"afii57718", "yodyodhebrew", 0, +"afii57388", "commaarabic", 0, +"afii57403", "semicolonarabic", 0, +"afii57407", "questionarabic", 0, +"afii57409", "hamzaarabic", "hamzadammaarabic", "hamzadammatanarabic", "hamzafathaarabic", "hamzafathatanarabic", "hamzalowarabic", "hamzalowkasraarabic", "hamzalowkasratanarabic", "hamzasukunarabic", 0, +"afii57410", "alefmaddaabovearabic", 0, +"afii57411", "alefhamzaabovearabic", 0, +"afii57412", "wawhamzaabovearabic", 0, +"afii57413", "alefhamzabelowarabic", 0, +"afii57414", "yehhamzaabovearabic", 0, +"afii57415", "alefarabic", 0, +"afii57416", "beharabic", 0, +"afii57417", "tehmarbutaarabic", 0, +"afii57418", "teharabic", 0, +"afii57419", "theharabic", 0, +"afii57420", "jeemarabic", 0, +"afii57421", "haharabic", 0, +"afii57422", "khaharabic", 0, +"afii57423", "dalarabic", 0, +"afii57424", "thalarabic", 0, +"afii57425", "reharabic", "rehyehaleflamarabic", 0, +"afii57426", "zainarabic", 0, +"afii57427", "seenarabic", 0, +"afii57428", "sheenarabic", 0, +"afii57429", "sadarabic", 0, +"afii57430", "dadarabic", 0, +"afii57431", "taharabic", 0, +"afii57432", "zaharabic", 0, +"afii57433", "ainarabic", 0, +"afii57434", "ghainarabic", 0, +"afii57440", "kashidaautoarabic", "kashidaautonosidebearingarabic", "tatweelarabic", 0, +"afii57441", "feharabic", 0, +"afii57442", "qafarabic", 0, +"afii57443", "kafarabic", 0, +"afii57444", "lamarabic", 0, +"afii57445", "meemarabic", 0, +"afii57446", "noonarabic", 0, +"afii57470", "heharabic", 0, +"afii57448", "wawarabic", 0, +"afii57449", "alefmaksuraarabic", 0, +"afii57450", "yeharabic", 0, +"afii57451", "fathatanarabic", 0, +"afii57452", "dammatanaltonearabic", "dammatanarabic", 0, +"afii57453", "kasratanarabic", 0, +"afii57454", "fathaarabic", "fathalowarabic", 0, +"afii57455", "dammaarabic", "dammalowarabic", 0, +"afii57456", "kasraarabic", 0, +"afii57457", "shaddaarabic", "shaddafathatanarabic", 0, +"afii57458", "sukunarabic", 0, +"afii57392", "zeroarabic", "zerohackarabic", 0, +"afii57393", "onearabic", "onehackarabic", 0, +"afii57394", "twoarabic", "twohackarabic", 0, +"afii57395", "threearabic", "threehackarabic", 0, +"afii57396", "fourarabic", "fourhackarabic", 0, +"afii57397", "fivearabic", "fivehackarabic", 0, +"afii57398", "sixarabic", "sixhackarabic", 0, +"afii57399", "sevenarabic", "sevenhackarabic", 0, +"afii57400", "eightarabic", "eighthackarabic", 0, +"afii57401", "ninearabic", "ninehackarabic", 0, +"afii57381", "percentarabic", 0, +"decimalseparatorarabic", "decimalseparatorpersian", 0, +"thousandsseparatorarabic", "thousandsseparatorpersian", 0, +"afii63167", "asteriskaltonearabic", "asteriskarabic", 0, +"afii57511", "tteharabic", 0, +"afii57506", "peharabic", 0, +"afii57507", "tcheharabic", 0, +"afii57512", "ddalarabic", 0, +"afii57513", "rreharabic", 0, +"afii57508", "jeharabic", 0, +"afii57505", "veharabic", 0, +"afii57509", "gafarabic", 0, +"afii57514", "noonghunnaarabic", 0, +"haaltonearabic", "hehaltonearabic", 0, +"afii57519", "yehbarreearabic", 0, +"afii61664", "zerowidthnonjoiner", 0, +"afii00208", "horizontalbar", 0, +"dbllowline", "underscoredbl", 0, +"quoteleftreversed", "quotereversed", 0, +"twodotenleader", "twodotleader", 0, +"colonmonetary", "colonsign", 0, +"afii08941", "lira", 0, +"afii57636", "newsheqelsign", "sheqel", "sheqelhebrew", 0, +"Euro", "euro", 0, +"afii61248", "careof", 0, +"afii61289", "lsquare", 0, +"afii61352", "numero", 0, +"Ohm", "Omega", 0, +"arrowupdnbse", "arrowupdownbase", 0, +"arrowdblleft", "arrowleftdbl", 0, +"arrowdblright", "dblarrowright", 0, +"arrowdblboth", "dblarrowleft", 0, +"forall", "universal", 0, +"existential", "thereexists", 0, +"Delta", "increment", 0, +"gradient", "nabla", 0, +"notelement", "notelementof", 0, +"orthogonal", "rightangle", 0, +"similar", "tildeoperator", 0, +"approximatelyequal", "congruent", 0, +"propersubset", "subset", 0, +"propersuperset", "superset", 0, +"reflexsubset", "subsetorequal", 0, +"reflexsuperset", "supersetorequal", 0, +"circleplus", "pluscircle", 0, +"circlemultiply", "timescircle", 0, +"logicalnotreversed", "revlogicalnot", 0, +"integraltop", "integraltp", 0, +"integralbottom", "integralbt", 0, +"ltshade", "shadelight", 0, +"shade", "shademedium", 0, +"dkshade", "shadedark", 0, +"blacksquare", "filledbox", 0, +"H22073", "whitesquare", 0, +"H18543", "blacksmallsquare", 0, +"H18551", "whitesmallsquare", 0, +"blackrectangle", "filledrect", 0, +"blackuppointingtriangle", "triagup", 0, +"blackrightpointingpointer", "triagrt", 0, +"blackdownpointingtriangle", "triagdn", 0, +"blackleftpointingpointer", "triaglf", 0, +"circle", "whitecircle", 0, +"H18533", "blackcircle", 0, +"bulletinverse", "invbullet", 0, +"invcircle", "whitecircleinverse", 0, +"openbullet", "whitebullet", 0, +"smileface", "whitesmilingface", 0, +"blacksmilingface", "invsmileface", 0, +"compass", "sun", 0, +"female", "venus", 0, +"male", "mars", 0, +"spade", "spadesuitblack", 0, +"club", "clubsuitblack", 0, +"heart", "heartsuitblack", 0, +"eighthnotebeamed", "musicalnotedbl", 0, +"afii57705", "doubleyodpatah", "doubleyodpatahhebrew", "yodyodpatahhebrew", 0, +"afii57694", "shinshindot", "shinshindothebrew", 0, +"afii57695", "shinsindot", "shinsindothebrew", 0, +"shindageshshindot", "shindageshshindothebrew", 0, +"shindageshsindot", "shindageshsindothebrew", 0, +"betdagesh", "betdageshhebrew", 0, +"gimeldagesh", "gimeldageshhebrew", 0, +"daletdagesh", "daletdageshhebrew", 0, +"hedagesh", "hedageshhebrew", 0, +"afii57723", "vavdagesh", "vavdagesh65", "vavdageshhebrew", 0, +"zayindagesh", "zayindageshhebrew", 0, +"tetdagesh", "tetdageshhebrew", 0, +"yoddagesh", "yoddageshhebrew", 0, +"finalkafdagesh", "finalkafdageshhebrew", 0, +"kafdagesh", "kafdageshhebrew", 0, +"lameddagesh", "lameddageshhebrew", 0, +"memdagesh", "memdageshhebrew", 0, +"nundagesh", "nundageshhebrew", 0, +"samekhdagesh", "samekhdageshhebrew", 0, +"pedagesh", "pedageshhebrew", 0, +"tsadidagesh", "tsadidageshhebrew", 0, +"qofdagesh", "qofdageshhebrew", 0, +"shindagesh", "shindageshhebrew", 0, +"tavdages", "tavdagesh", "tavdageshhebrew", 0, +"afii57700", "vavholam", "vavholamhebrew", 0, +"tchehinitialarabic", "tchehmeeminitialarabic", 0, +"laminitialarabic", "lammeemjeeminitialarabic", "lammeemkhahinitialarabic", 0, +"noonhehinitialarabic", "nooninitialarabic", 0, +"hehfinalalttwoarabic", "hehfinalarabic", 0, +"alefmaksurainitialarabic", "yehinitialarabic", 0, +"alefmaksuramedialarabic", "yehmedialarabic", 0, +}; + +#include "fitz.h" +#include "mupdf.h" + +int pdf_lookupagl(char *name) +{ + char buf[64]; + char *p; + int l = 0; + int r = nelem(aglcodes) - 1; + + fz_strlcpy(buf, name, sizeof buf); + + /* kill anything after first period and underscore */ + p = strchr(buf, '.'); + if (p) p[0] = 0; + p = strchr(buf, '_'); + if (p) p[0] = 0; + + while (l <= r) + { + int m = (l + r) >> 1; + int c = strcmp(buf, aglcodes[m].name); + if (c < 0) + r = m - 1; + else if (c > 0) + l = m + 1; + else + return aglcodes[m].ucs; + } + + if (strstr(buf, "uni") == buf) + return strtol(buf + 3, nil, 16); + else if (strstr(buf, "u") == buf) + return strtol(buf + 1, nil, 16); + else if (strstr(buf, "a") == buf && strlen(buf) >= 3) + return strtol(buf + 1, nil, 10); + + return 0; +} + +static char *aglnoname[1] = { 0 }; + +char **pdf_lookupaglnames(int ucs) +{ + int l = 0; + int r = nelem(agldupcodes) - 1; + while (l <= r) + { + int m = (l + r) >> 1; + if (ucs < agldupcodes[m].ucs) + r = m - 1; + else if (ucs > agldupcodes[m].ucs) + l = m + 1; + else + return agldupnames + agldupcodes[m].ofs; + } + return aglnoname; +} diff --git a/pdf/pdf_fontenc.c b/pdf/pdf_fontenc.c new file mode 100644 index 00000000..0f72cb6a --- /dev/null +++ b/pdf/pdf_fontenc.c @@ -0,0 +1,372 @@ +#include "fitz.h" +#include "mupdf.h" + +#define _notdef nil + +void pdf_loadencoding(char **estrings, char *encoding) +{ + char **bstrings = nil; + int i; + + if (!strcmp(encoding, "MacRomanEncoding")) + bstrings = (char**) pdf_macroman; + if (!strcmp(encoding, "MacExpertEncoding")) + bstrings = (char**) pdf_macexpert; + if (!strcmp(encoding, "WinAnsiEncoding")) + bstrings = (char**) pdf_winansi; + if (!strcmp(encoding, "StandardEncoding")) + bstrings = (char**) pdf_standard; + + if (bstrings) + for (i = 0; i < 256; i++) + estrings[i] = bstrings[i]; +} + +const unsigned short pdf_docencoding[256] = +{ + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0009, 0x000A, 0x0000, 0x0000, 0x000D, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x02d8, 0x02c7, 0x02c6, 0x02d9, 0x02dd, 0x02db, 0x02da, 0x02dc, + 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, + 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, + 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, + 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, + 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, + 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, + 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, + 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, + 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, + 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, + 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, + 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x0000, + 0x2022, 0x2020, 0x2021, 0x2026, 0x2014, 0x2013, 0x0192, 0x2044, + 0x2039, 0x203a, 0x2212, 0x2030, 0x201e, 0x201c, 0x201d, 0x2018, + 0x2019, 0x201a, 0x2122, 0xfb01, 0xfb02, 0x0141, 0x0152, 0x0160, + 0x0178, 0x017d, 0x0131, 0x0142, 0x0153, 0x0161, 0x017e, 0x0000, + 0x20ac, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, + 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x0000, 0x00ae, 0x00af, + 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, + 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, + 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, + 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, + 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, + 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, + 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, + 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, + 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, + 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff +}; + +const char * const pdf_macroman[256] = { _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + "space", "exclam", "quotedbl", "numbersign", "dollar", "percent", + "ampersand", "quotesingle", "parenleft", "parenright", "asterisk", + "plus", "comma", "hyphen", "period", "slash", "zero", "one", "two", + "three", "four", "five", "six", "seven", "eight", "nine", "colon", + "semicolon", "less", "equal", "greater", "question", "at", "A", + "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", + "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", + "bracketleft", "backslash", "bracketright", "asciicircum", "underscore", + "grave", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", + "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", + "y", "z", "braceleft", "bar", "braceright", "asciitilde", _notdef, + "Adieresis", "Aring", "Ccedilla", "Eacute", "Ntilde", "Odieresis", + "Udieresis", "aacute", "agrave", "acircumflex", "adieresis", "atilde", + "aring", "ccedilla", "eacute", "egrave", "ecircumflex", "edieresis", + "iacute", "igrave", "icircumflex", "idieresis", "ntilde", "oacute", + "ograve", "ocircumflex", "odieresis", "otilde", "uacute", "ugrave", + "ucircumflex", "udieresis", "dagger", "degree", "cent", "sterling", + "section", "bullet", "paragraph", "germandbls", "registered", + "copyright", "trademark", "acute", "dieresis", _notdef, "AE", + "Oslash", _notdef, "plusminus", _notdef, _notdef, "yen", "mu", + _notdef, _notdef, _notdef, _notdef, _notdef, "ordfeminine", + "ordmasculine", _notdef, "ae", "oslash", "questiondown", "exclamdown", + "logicalnot", _notdef, "florin", _notdef, _notdef, "guillemotleft", + "guillemotright", "ellipsis", "space", "Agrave", "Atilde", "Otilde", + "OE", "oe", "endash", "emdash", "quotedblleft", "quotedblright", + "quoteleft", "quoteright", "divide", _notdef, "ydieresis", + "Ydieresis", "fraction", "currency", "guilsinglleft", "guilsinglright", + "fi", "fl", "daggerdbl", "periodcentered", "quotesinglbase", + "quotedblbase", "perthousand", "Acircumflex", "Ecircumflex", "Aacute", + "Edieresis", "Egrave", "Iacute", "Icircumflex", "Idieresis", "Igrave", + "Oacute", "Ocircumflex", _notdef, "Ograve", "Uacute", "Ucircumflex", + "Ugrave", "dotlessi", "circumflex", "tilde", "macron", "breve", + "dotaccent", "ring", "cedilla", "hungarumlaut", "ogonek", "caron" }; + +const char * const pdf_macexpert[256] = { _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + "space", "exclamsmall", "Hungarumlautsmall", "centoldstyle", + "dollaroldstyle", "dollarsuperior", "ampersandsmall", "Acutesmall", + "parenleftsuperior", "parenrightsuperior", "twodotenleader", + "onedotenleader", "comma", "hyphen", "period", "fraction", + "zerooldstyle", "oneoldstyle", "twooldstyle", "threeoldstyle", + "fouroldstyle", "fiveoldstyle", "sixoldstyle", "sevenoldstyle", + "eightoldstyle", "nineoldstyle", "colon", "semicolon", _notdef, + "threequartersemdash", _notdef, "questionsmall", _notdef, + _notdef, _notdef, _notdef, "Ethsmall", _notdef, _notdef, + "onequarter", "onehalf", "threequarters", "oneeighth", "threeeighths", + "fiveeighths", "seveneighths", "onethird", "twothirds", _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, "ff", "fi", + "fl", "ffi", "ffl", "parenleftinferior", _notdef, "parenrightinferior", + "Circumflexsmall", "hypheninferior", "Gravesmall", "Asmall", "Bsmall", + "Csmall", "Dsmall", "Esmall", "Fsmall", "Gsmall", "Hsmall", "Ismall", + "Jsmall", "Ksmall", "Lsmall", "Msmall", "Nsmall", "Osmall", "Psmall", + "Qsmall", "Rsmall", "Ssmall", "Tsmall", "Usmall", "Vsmall", "Wsmall", + "Xsmall", "Ysmall", "Zsmall", "colonmonetary", "onefitted", "rupiah", + "Tildesmall", _notdef, _notdef, "asuperior", "centsuperior", + _notdef, _notdef, _notdef, _notdef, "Aacutesmall", + "Agravesmall", "Acircumflexsmall", "Adieresissmall", "Atildesmall", + "Aringsmall", "Ccedillasmall", "Eacutesmall", "Egravesmall", + "Ecircumflexsmall", "Edieresissmall", "Iacutesmall", "Igravesmall", + "Icircumflexsmall", "Idieresissmall", "Ntildesmall", "Oacutesmall", + "Ogravesmall", "Ocircumflexsmall", "Odieresissmall", "Otildesmall", + "Uacutesmall", "Ugravesmall", "Ucircumflexsmall", "Udieresissmall", + _notdef, "eightsuperior", "fourinferior", "threeinferior", + "sixinferior", "eightinferior", "seveninferior", "Scaronsmall", + _notdef, "centinferior", "twoinferior", _notdef, "Dieresissmall", + _notdef, "Caronsmall", "osuperior", "fiveinferior", _notdef, + "commainferior", "periodinferior", "Yacutesmall", _notdef, + "dollarinferior", _notdef, _notdef, "Thornsmall", _notdef, + "nineinferior", "zeroinferior", "Zcaronsmall", "AEsmall", "Oslashsmall", + "questiondownsmall", "oneinferior", "Lslashsmall", _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, "Cedillasmall", + _notdef, _notdef, _notdef, _notdef, _notdef, "OEsmall", + "figuredash", "hyphensuperior", _notdef, _notdef, _notdef, + _notdef, "exclamdownsmall", _notdef, "Ydieresissmall", _notdef, + "onesuperior", "twosuperior", "threesuperior", "foursuperior", + "fivesuperior", "sixsuperior", "sevensuperior", "ninesuperior", + "zerosuperior", _notdef, "esuperior", "rsuperior", "tsuperior", + _notdef, _notdef, "isuperior", "ssuperior", "dsuperior", + _notdef, _notdef, _notdef, _notdef, _notdef, "lsuperior", + "Ogoneksmall", "Brevesmall", "Macronsmall", "bsuperior", "nsuperior", + "msuperior", "commasuperior", "periodsuperior", "Dotaccentsmall", + "Ringsmall", _notdef, _notdef, _notdef, _notdef }; + +const char * const pdf_winansi[256] = { _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, "space", + "exclam", "quotedbl", "numbersign", "dollar", "percent", "ampersand", + "quotesingle", "parenleft", "parenright", "asterisk", "plus", + "comma", "hyphen", "period", "slash", "zero", "one", "two", "three", + "four", "five", "six", "seven", "eight", "nine", "colon", "semicolon", + "less", "equal", "greater", "question", "at", "A", "B", "C", "D", + "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", + "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "bracketleft", + "backslash", "bracketright", "asciicircum", "underscore", "grave", + "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", + "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", + "braceleft", "bar", "braceright", "asciitilde", "bullet", "Euro", + "bullet", "quotesinglbase", "florin", "quotedblbase", "ellipsis", + "dagger", "daggerdbl", "circumflex", "perthousand", "Scaron", + "guilsinglleft", "OE", "bullet", "Zcaron", "bullet", "bullet", + "quoteleft", "quoteright", "quotedblleft", "quotedblright", "bullet", + "endash", "emdash", "tilde", "trademark", "scaron", "guilsinglright", + "oe", "bullet", "zcaron", "Ydieresis", "space", "exclamdown", "cent", + "sterling", "currency", "yen", "brokenbar", "section", "dieresis", + "copyright", "ordfeminine", "guillemotleft", "logicalnot", "hyphen", + "registered", "macron", "degree", "plusminus", "twosuperior", + "threesuperior", "acute", "mu", "paragraph", "periodcentered", + "cedilla", "onesuperior", "ordmasculine", "guillemotright", + "onequarter", "onehalf", "threequarters", "questiondown", "Agrave", + "Aacute", "Acircumflex", "Atilde", "Adieresis", "Aring", "AE", + "Ccedilla", "Egrave", "Eacute", "Ecircumflex", "Edieresis", "Igrave", + "Iacute", "Icircumflex", "Idieresis", "Eth", "Ntilde", "Ograve", + "Oacute", "Ocircumflex", "Otilde", "Odieresis", "multiply", "Oslash", + "Ugrave", "Uacute", "Ucircumflex", "Udieresis", "Yacute", "Thorn", + "germandbls", "agrave", "aacute", "acircumflex", "atilde", "adieresis", + "aring", "ae", "ccedilla", "egrave", "eacute", "ecircumflex", + "edieresis", "igrave", "iacute", "icircumflex", "idieresis", "eth", + "ntilde", "ograve", "oacute", "ocircumflex", "otilde", "odieresis", + "divide", "oslash", "ugrave", "uacute", "ucircumflex", "udieresis", + "yacute", "thorn", "ydieresis" }; + +const char * const pdf_standard[256] = { _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + "space", "exclam", "quotedbl", "numbersign", "dollar", "percent", + "ampersand", "quoteright", "parenleft", "parenright", "asterisk", + "plus", "comma", "hyphen", "period", "slash", "zero", "one", "two", + "three", "four", "five", "six", "seven", "eight", "nine", "colon", + "semicolon", "less", "equal", "greater", "question", "at", "A", + "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", + "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", + "bracketleft", "backslash", "bracketright", "asciicircum", "underscore", + "quoteleft", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", + "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", + "y", "z", "braceleft", "bar", "braceright", "asciitilde", _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, "exclamdown", "cent", "sterling", + "fraction", "yen", "florin", "section", "currency", "quotesingle", + "quotedblleft", "guillemotleft", "guilsinglleft", "guilsinglright", + "fi", "fl", _notdef, "endash", "dagger", "daggerdbl", "periodcentered", + _notdef, "paragraph", "bullet", "quotesinglbase", "quotedblbase", + "quotedblright", "guillemotright", "ellipsis", "perthousand", + _notdef, "questiondown", _notdef, "grave", "acute", "circumflex", + "tilde", "macron", "breve", "dotaccent", "dieresis", _notdef, + "ring", "cedilla", _notdef, "hungarumlaut", "ogonek", "caron", + "emdash", _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, "AE", + _notdef, "ordfeminine", _notdef, _notdef, _notdef, _notdef, + "Lslash", "Oslash", "OE", "ordmasculine", _notdef, _notdef, + _notdef, _notdef, _notdef, "ae", _notdef, _notdef, + _notdef, "dotlessi", _notdef, _notdef, "lslash", "oslash", + "oe", "germandbls", _notdef, _notdef, _notdef, _notdef }; + +#if 0 + +const char * const pdf_expert[256] = { _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, "space", + "exclamsmall", "Hungarumlautsmall", _notdef, "dollaroldstyle", + "dollarsuperior", "ampersandsmall", "Acutesmall", "parenleftsuperior", + "parenrightsuperior", "twodotenleader", "onedotenleader", "comma", + "hyphen", "period", "fraction", "zerooldstyle", "oneoldstyle", + "twooldstyle", "threeoldstyle", "fouroldstyle", "fiveoldstyle", + "sixoldstyle", "sevenoldstyle", "eightoldstyle", "nineoldstyle", + "colon", "semicolon", "commasuperior", "threequartersemdash", + "periodsuperior", "questionsmall", _notdef, "asuperior", "bsuperior", + "centsuperior", "dsuperior", "esuperior", _notdef, _notdef, + _notdef, "isuperior", _notdef, _notdef, "lsuperior", "msuperior", + "nsuperior", "osuperior", _notdef, _notdef, "rsuperior", + "ssuperior", "tsuperior", _notdef, "ff", "fi", "fl", "ffi", "ffl", + "parenleftinferior", _notdef, "parenrightinferior", "Circumflexsmall", + "hyphensuperior", "Gravesmall", "Asmall", "Bsmall", "Csmall", + "Dsmall", "Esmall", "Fsmall", "Gsmall", "Hsmall", "Ismall", "Jsmall", + "Ksmall", "Lsmall", "Msmall", "Nsmall", "Osmall", "Psmall", "Qsmall", + "Rsmall", "Ssmall", "Tsmall", "Usmall", "Vsmall", "Wsmall", "Xsmall", + "Ysmall", "Zsmall", "colonmonetary", "onefitted", "rupiah", + "Tildesmall", _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, "exclamdownsmall", + "centoldstyle", "Lslashsmall", _notdef, _notdef, "Scaronsmall", + "Zcaronsmall", "Dieresissmall", "Brevesmall", "Caronsmall", _notdef, + "Dotaccentsmall", _notdef, _notdef, "Macronsmall", _notdef, + _notdef, "figuredash", "hypheninferior", _notdef, _notdef, + "Ogoneksmall", "Ringsmall", "Cedillasmall", _notdef, _notdef, + _notdef, "onequarter", "onehalf", "threequarters", "questiondownsmall", + "oneeighth", "threeeighths", "fiveeighths", "seveneighths", "onethird", + "twothirds", _notdef, _notdef, "zerosuperior", "onesuperior", + "twosuperior", "threesuperior", "foursuperior", "fivesuperior", + "sixsuperior", "sevensuperior", "eightsuperior", "ninesuperior", + "zeroinferior", "oneinferior", "twoinferior", "threeinferior", + "fourinferior", "fiveinferior", "sixinferior", "seveninferior", + "eightinferior", "nineinferior", "centinferior", "dollarinferior", + "periodinferior", "commainferior", "Agravesmall", "Aacutesmall", + "Acircumflexsmall", "Atildesmall", "Adieresissmall", "Aringsmall", + "AEsmall", "Ccedillasmall", "Egravesmall", "Eacutesmall", + "Ecircumflexsmall", "Edieresissmall", "Igravesmall", "Iacutesmall", + "Icircumflexsmall", "Idieresissmall", "Ethsmall", "Ntildesmall", + "Ogravesmall", "Oacutesmall", "Ocircumflexsmall", "Otildesmall", + "Odieresissmall", "OEsmall", "Oslashsmall", "Ugravesmall", + "Uacutesmall", "Ucircumflexsmall", "Udieresissmall", "Yacutesmall", + "Thornsmall", "Ydieresissmall" }; + +const char * const pdf_symbol[256] = { _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, "space", + "exclam", "universal", "numbersign", "existential", "percent", + "ampersand", "suchthat", "parenleft", "parenright", "asteriskmath", + "plus", "comma", "minus", "period", "slash", "zero", "one", "two", + "three", "four", "five", "six", "seven", "eight", "nine", "colon", + "semicolon", "less", "equal", "greater", "question", "congruent", + "Alpha", "Beta", "Chi", "Delta", "Epsilon", "Phi", "Gamma", "Eta", + "Iota", "theta1", "Kappa", "Lambda", "Mu", "Nu", "Omicron", "Pi", + "Theta", "Rho", "Sigma", "Tau", "Upsilon", "sigma1", "Omega", "Xi", + "Psi", "Zeta", "bracketleft", "therefore", "bracketright", + "perpendicular", "underscore", "radicalex", "alpha", "beta", "chi", + "delta", "epsilon", "phi", "gamma", "eta", "iota", "phi1", "kappa", + "lambda", "mu", "nu", "omicron", "pi", "theta", "rho", "sigma", + "tau", "upsilon", "omega1", "omega", "xi", "psi", "zeta", "braceleft", + "bar", "braceright", "similar", _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, "Upsilon1", "minute", "lessequal", "fraction", "infinity", + "florin", "club", "diamond", "heart", "spade", "arrowboth", + "arrowleft", "arrowup", "arrowright", "arrowdown", "degree", + "plusminus", "second", "greaterequal", "multiply", "proportional", + "partialdiff", "bullet", "divide", "notequal", "equivalence", + "approxequal", "ellipsis", "arrowvertex", "arrowhorizex", + "carriagereturn", "aleph", "Ifraktur", "Rfraktur", "weierstrass", + "circlemultiply", "circleplus", "emptyset", "intersection", "union", + "propersuperset", "reflexsuperset", "notsubset", "propersubset", + "reflexsubset", "element", "notelement", "angle", "gradient", + "registerserif", "copyrightserif", "trademarkserif", "product", + "radical", "dotmath", "logicalnot", "logicaland", "logicalor", + "arrowdblboth", "arrowdblleft", "arrowdblup", "arrowdblright", + "arrowdbldown", "lozenge", "angleleft", "registersans", "copyrightsans", + "trademarksans", "summation", "parenlefttp", "parenleftex", + "parenleftbt", "bracketlefttp", "bracketleftex", "bracketleftbt", + "bracelefttp", "braceleftmid", "braceleftbt", "braceex", _notdef, + "angleright", "integral", "integraltp", "integralex", "integralbt", + "parenrighttp", "parenrightex", "parenrightbt", "bracketrighttp", + "bracketrightex", "bracketrightbt", "bracerighttp", "bracerightmid", + "bracerightbt", _notdef }; + +const char * const pdf_zapfdingbats[256] = { _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + "space", "a1", "a2", "a202", "a3", "a4", "a5", "a119", "a118", + "a117", "a11", "a12", "a13", "a14", "a15", "a16", "a105", "a17", + "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26", + "a27", "a28", "a6", "a7", "a8", "a9", "a10", "a29", "a30", "a31", + "a32", "a33", "a34", "a35", "a36", "a37", "a38", "a39", "a40", + "a41", "a42", "a43", "a44", "a45", "a46", "a47", "a48", "a49", + "a50", "a51", "a52", "a53", "a54", "a55", "a56", "a57", "a58", + "a59", "a60", "a61", "a62", "a63", "a64", "a65", "a66", "a67", + "a68", "a69", "a70", "a71", "a72", "a73", "a74", "a203", "a75", + "a204", "a76", "a77", "a78", "a79", "a81", "a82", "a83", "a84", + "a97", "a98", "a99", "a100", _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, "a101", "a102", "a103", "a104", "a106", "a107", "a108", + "a112", "a111", "a110", "a109", "a120", "a121", "a122", "a123", + "a124", "a125", "a126", "a127", "a128", "a129", "a130", "a131", + "a132", "a133", "a134", "a135", "a136", "a137", "a138", "a139", + "a140", "a141", "a142", "a143", "a144", "a145", "a146", "a147", + "a148", "a149", "a150", "a151", "a152", "a153", "a154", "a155", + "a156", "a157", "a158", "a159", "a160", "a161", "a163", "a164", + "a196", "a165", "a192", "a166", "a167", "a168", "a169", "a170", + "a171", "a172", "a173", "a162", "a174", "a175", "a176", "a177", + "a178", "a179", "a193", "a180", "a199", "a181", "a200", "a182", + _notdef, "a201", "a183", "a184", "a197", "a185", "a194", "a198", + "a186", "a195", "a187", "a188", "a189", "a190", "a191", _notdef }; + +#endif diff --git a/pdf/pdf_fontfile.c b/pdf/pdf_fontfile.c new file mode 100644 index 00000000..c164033e --- /dev/null +++ b/pdf/pdf_fontfile.c @@ -0,0 +1,295 @@ +#include "fitz.h" +#include "mupdf.h" + +extern const unsigned char pdf_font_Dingbats_cff_buf[]; +extern const unsigned int pdf_font_Dingbats_cff_len; +extern const unsigned char pdf_font_NimbusMonL_Bold_cff_buf[]; +extern const unsigned int pdf_font_NimbusMonL_Bold_cff_len; +extern const unsigned char pdf_font_NimbusMonL_BoldObli_cff_buf[]; +extern const unsigned int pdf_font_NimbusMonL_BoldObli_cff_len; +extern const unsigned char pdf_font_NimbusMonL_Regu_cff_buf[]; +extern const unsigned int pdf_font_NimbusMonL_Regu_cff_len; +extern const unsigned char pdf_font_NimbusMonL_ReguObli_cff_buf[]; +extern const unsigned int pdf_font_NimbusMonL_ReguObli_cff_len; +extern const unsigned char pdf_font_NimbusRomNo9L_Medi_cff_buf[]; +extern const unsigned int pdf_font_NimbusRomNo9L_Medi_cff_len; +extern const unsigned char pdf_font_NimbusRomNo9L_MediItal_cff_buf[]; +extern const unsigned int pdf_font_NimbusRomNo9L_MediItal_cff_len; +extern const unsigned char pdf_font_NimbusRomNo9L_Regu_cff_buf[]; +extern const unsigned int pdf_font_NimbusRomNo9L_Regu_cff_len; +extern const unsigned char pdf_font_NimbusRomNo9L_ReguItal_cff_buf[]; +extern const unsigned int pdf_font_NimbusRomNo9L_ReguItal_cff_len; +extern const unsigned char pdf_font_NimbusSanL_Bold_cff_buf[]; +extern const unsigned int pdf_font_NimbusSanL_Bold_cff_len; +extern const unsigned char pdf_font_NimbusSanL_BoldItal_cff_buf[]; +extern const unsigned int pdf_font_NimbusSanL_BoldItal_cff_len; +extern const unsigned char pdf_font_NimbusSanL_Regu_cff_buf[]; +extern const unsigned int pdf_font_NimbusSanL_Regu_cff_len; +extern const unsigned char pdf_font_NimbusSanL_ReguItal_cff_buf[]; +extern const unsigned int pdf_font_NimbusSanL_ReguItal_cff_len; +extern const unsigned char pdf_font_StandardSymL_cff_buf[]; +extern const unsigned int pdf_font_StandardSymL_cff_len; +extern const unsigned char pdf_font_URWChanceryL_MediItal_cff_buf[]; +extern const unsigned int pdf_font_URWChanceryL_MediItal_cff_len; + +#ifndef NOCJK +extern const unsigned char pdf_font_DroidSansFallback_ttf_buf[]; +extern const unsigned int pdf_font_DroidSansFallback_ttf_len; +#endif + +enum +{ + FD_FIXED = 1 << 0, + FD_SERIF = 1 << 1, + FD_SYMBOLIC = 1 << 2, + FD_SCRIPT = 1 << 3, + FD_NONSYMBOLIC = 1 << 5, + FD_ITALIC = 1 << 6, + FD_ALLCAP = 1 << 16, + FD_SMALLCAP = 1 << 17, + FD_FORCEBOLD = 1 << 18 +}; + +enum { CNS, GB, Japan, Korea }; +enum { MINCHO, GOTHIC }; + +static const struct { + const char *name; + const unsigned char *cff; + const unsigned int *len; +} basefonts[] = { + { "Courier", + pdf_font_NimbusMonL_Regu_cff_buf, + &pdf_font_NimbusMonL_Regu_cff_len }, + { "Courier-Bold", + pdf_font_NimbusMonL_Bold_cff_buf, + &pdf_font_NimbusMonL_Bold_cff_len }, + { "Courier-Oblique", + pdf_font_NimbusMonL_ReguObli_cff_buf, + &pdf_font_NimbusMonL_ReguObli_cff_len }, + { "Courier-BoldOblique", + pdf_font_NimbusMonL_BoldObli_cff_buf, + &pdf_font_NimbusMonL_BoldObli_cff_len }, + { "Helvetica", + pdf_font_NimbusSanL_Regu_cff_buf, + &pdf_font_NimbusSanL_Regu_cff_len }, + { "Helvetica-Bold", + pdf_font_NimbusSanL_Bold_cff_buf, + &pdf_font_NimbusSanL_Bold_cff_len }, + { "Helvetica-Oblique", + pdf_font_NimbusSanL_ReguItal_cff_buf, + &pdf_font_NimbusSanL_ReguItal_cff_len }, + { "Helvetica-BoldOblique", + pdf_font_NimbusSanL_BoldItal_cff_buf, + &pdf_font_NimbusSanL_BoldItal_cff_len }, + { "Times-Roman", + pdf_font_NimbusRomNo9L_Regu_cff_buf, + &pdf_font_NimbusRomNo9L_Regu_cff_len }, + { "Times-Bold", + pdf_font_NimbusRomNo9L_Medi_cff_buf, + &pdf_font_NimbusRomNo9L_Medi_cff_len }, + { "Times-Italic", + pdf_font_NimbusRomNo9L_ReguItal_cff_buf, + &pdf_font_NimbusRomNo9L_ReguItal_cff_len }, + { "Times-BoldItalic", + pdf_font_NimbusRomNo9L_MediItal_cff_buf, + &pdf_font_NimbusRomNo9L_MediItal_cff_len }, + { "Symbol", + pdf_font_StandardSymL_cff_buf, + &pdf_font_StandardSymL_cff_len }, + { "ZapfDingbats", + pdf_font_Dingbats_cff_buf, + &pdf_font_Dingbats_cff_len }, + { "Chancery", + pdf_font_URWChanceryL_MediItal_cff_buf, + &pdf_font_URWChanceryL_MediItal_cff_len }, + { nil, nil, nil } +}; + +fz_error +pdf_loadbuiltinfont(pdf_fontdesc *fontdesc, char *fontname) +{ + fz_error error; + unsigned char *data; + unsigned int len; + int i; + + for (i = 0; basefonts[i].name; i++) + if (!strcmp(fontname, basefonts[i].name)) + goto found; + + return fz_throw("cannot find font: '%s'", fontname); + +found: + pdf_logfont("load builtin font %s\n", fontname); + + data = (unsigned char *) basefonts[i].cff; + len = *basefonts[i].len; + + error = fz_newfontfrombuffer(&fontdesc->font, data, len, 0); + if (error) + return fz_rethrow(error, "cannot load freetype font from buffer"); + + fz_strlcpy(fontdesc->font->name, fontname, sizeof fontdesc->font->name); + + if (!strcmp(fontname, "Symbol") || !strcmp(fontname, "ZapfDingbats")) + fontdesc->flags |= FD_SYMBOLIC; + + return fz_okay; +} + +static fz_error +loadsystemcidfont(pdf_fontdesc *fontdesc, int ros, int kind) +{ +#ifndef NOCJK + fz_error error; + /* + We only have one builtin fallback font. + We'd really like to have one for each combination of ROS and Kind. + */ + pdf_logfont("loading builtin CJK font\n"); + error = fz_newfontfrombuffer(&fontdesc->font, + (unsigned char *)pdf_font_DroidSansFallback_ttf_buf, + pdf_font_DroidSansFallback_ttf_len, 0); + if (error) + return fz_rethrow(error, "cannot load builtin CJK font"); + fontdesc->font->ftsubstitute = 1; /* substitute font */ + return fz_okay; +#else + return fz_throw("no builtin CJK font file"); +#endif +} + +fz_error +pdf_loadsystemfont(pdf_fontdesc *fontdesc, char *fontname, char *collection) +{ + fz_error error; + char *name; + + int isbold = 0; + int isitalic = 0; + int isserif = 0; + int isscript = 0; + int isfixed = 0; + + if (strstr(fontname, "Bold")) + isbold = 1; + if (strstr(fontname, "Italic")) + isitalic = 1; + if (strstr(fontname, "Oblique")) + isitalic = 1; + + if (fontdesc->flags & FD_FIXED) + isfixed = 1; + if (fontdesc->flags & FD_SERIF) + isserif = 1; + if (fontdesc->flags & FD_ITALIC) + isitalic = 1; + if (fontdesc->flags & FD_SCRIPT) + isscript = 1; + if (fontdesc->flags & FD_FORCEBOLD) + isbold = 1; + + pdf_logfont("fixed-%d serif-%d italic-%d script-%d bold-%d\n", + isfixed, isserif, isitalic, isscript, isbold); + + if (collection) + { + int kind; + + if (isserif) + kind = MINCHO; + else + kind = GOTHIC; + + if (!strcmp(collection, "Adobe-CNS1")) + return loadsystemcidfont(fontdesc, CNS, kind); + else if (!strcmp(collection, "Adobe-GB1")) + return loadsystemcidfont(fontdesc, GB, kind); + else if (!strcmp(collection, "Adobe-Japan1")) + return loadsystemcidfont(fontdesc, Japan, kind); + else if (!strcmp(collection, "Adobe-Japan2")) + return loadsystemcidfont(fontdesc, Japan, kind); + else if (!strcmp(collection, "Adobe-Korea1")) + return loadsystemcidfont(fontdesc, Korea, kind); + + fz_warn("unknown cid collection: %s", collection); + } + + if (isscript) + name = "Chancery"; + + else if (isfixed) + { + if (isitalic) { + if (isbold) name = "Courier-BoldOblique"; + else name = "Courier-Oblique"; + } + else { + if (isbold) name = "Courier-Bold"; + else name = "Courier"; + } + } + + else if (isserif) + { + if (isitalic) { + if (isbold) name = "Times-BoldItalic"; + else name = "Times-Italic"; + } + else { + if (isbold) name = "Times-Bold"; + else name = "Times-Roman"; + } + } + + else + { + if (isitalic) { + if (isbold) name = "Helvetica-BoldOblique"; + else name = "Helvetica-Oblique"; + } + else { + if (isbold) name = "Helvetica-Bold"; + else name = "Helvetica"; + } + } + + error = pdf_loadbuiltinfont(fontdesc, name); + if (error) + return fz_throw("cannot load builtin substitute font: %s", name); + + /* it's a substitute font: override the metrics */ + fontdesc->font->ftsubstitute = 1; + + return fz_okay; +} + +fz_error +pdf_loadembeddedfont(pdf_fontdesc *fontdesc, pdf_xref *xref, fz_obj *stmref) +{ + fz_error error; + fz_buffer *buf; + + pdf_logfont("load embedded font\n"); + + error = pdf_loadstream(&buf, xref, fz_tonum(stmref), fz_togen(stmref)); + if (error) + return fz_rethrow(error, "cannot load font stream (%d %d R)", fz_tonum(stmref), fz_togen(stmref)); + + error = fz_newfontfrombuffer(&fontdesc->font, buf->data, buf->len, 0); + if (error) + { + fz_dropbuffer(buf); + return fz_rethrow(error, "cannot load embedded font (%d %d R)", fz_tonum(stmref), fz_togen(stmref)); + } + + /* save the buffer so we can free it later */ + fontdesc->font->ftdata = buf->data; + fontdesc->font->ftsize = buf->len; + fz_free(buf); /* only free the fz_buffer struct, not the contained data */ + + fontdesc->isembedded = 1; + + return fz_okay; +} diff --git a/pdf/pdf_fontmtx.c b/pdf/pdf_fontmtx.c new file mode 100644 index 00000000..e957125a --- /dev/null +++ b/pdf/pdf_fontmtx.c @@ -0,0 +1,138 @@ +#include "fitz.h" +#include "mupdf.h" + +void +pdf_setfontwmode(pdf_fontdesc *font, int wmode) +{ + font->wmode = wmode; +} + +void +pdf_setdefaulthmtx(pdf_fontdesc *font, int w) +{ + font->dhmtx.w = w; +} + +void +pdf_setdefaultvmtx(pdf_fontdesc *font, int y, int w) +{ + font->dvmtx.y = y; + font->dvmtx.w = w; +} + +void +pdf_addhmtx(pdf_fontdesc *font, int lo, int hi, int w) +{ + if (font->nhmtx + 1 >= font->hmtxcap) + { + font->hmtxcap = font->hmtxcap + 16; + font->hmtx = fz_realloc(font->hmtx, font->hmtxcap, sizeof(pdf_hmtx)); + } + + font->hmtx[font->nhmtx].lo = lo; + font->hmtx[font->nhmtx].hi = hi; + font->hmtx[font->nhmtx].w = w; + font->nhmtx++; +} + +void +pdf_addvmtx(pdf_fontdesc *font, int lo, int hi, int x, int y, int w) +{ + if (font->nvmtx + 1 >= font->vmtxcap) + { + font->vmtxcap = font->vmtxcap + 16; + font->vmtx = fz_realloc(font->vmtx, font->vmtxcap, sizeof(pdf_vmtx)); + } + + font->vmtx[font->nvmtx].lo = lo; + font->vmtx[font->nvmtx].hi = hi; + font->vmtx[font->nvmtx].x = x; + font->vmtx[font->nvmtx].y = y; + font->vmtx[font->nvmtx].w = w; + font->nvmtx++; +} + +static int cmph(const void *a0, const void *b0) +{ + pdf_hmtx *a = (pdf_hmtx*)a0; + pdf_hmtx *b = (pdf_hmtx*)b0; + return a->lo - b->lo; +} + +static int cmpv(const void *a0, const void *b0) +{ + pdf_vmtx *a = (pdf_vmtx*)a0; + pdf_vmtx *b = (pdf_vmtx*)b0; + return a->lo - b->lo; +} + +void +pdf_endhmtx(pdf_fontdesc *font) +{ + if (!font->hmtx) + return; + qsort(font->hmtx, font->nhmtx, sizeof(pdf_hmtx), cmph); +} + +void +pdf_endvmtx(pdf_fontdesc *font) +{ + if (!font->vmtx) + return; + qsort(font->vmtx, font->nvmtx, sizeof(pdf_vmtx), cmpv); +} + +pdf_hmtx +pdf_gethmtx(pdf_fontdesc *font, int cid) +{ + int l = 0; + int r = font->nhmtx - 1; + int m; + + if (!font->hmtx) + goto notfound; + + while (l <= r) + { + m = (l + r) >> 1; + if (cid < font->hmtx[m].lo) + r = m - 1; + else if (cid > font->hmtx[m].hi) + l = m + 1; + else + return font->hmtx[m]; + } + +notfound: + return font->dhmtx; +} + +pdf_vmtx +pdf_getvmtx(pdf_fontdesc *font, int cid) +{ + pdf_hmtx h; + pdf_vmtx v; + int l = 0; + int r = font->nvmtx - 1; + int m; + + if (!font->vmtx) + goto notfound; + + while (l <= r) + { + m = (l + r) >> 1; + if (cid < font->vmtx[m].lo) + r = m - 1; + else if (cid > font->vmtx[m].hi) + l = m + 1; + else + return font->vmtx[m]; + } + +notfound: + h = pdf_gethmtx(font, cid); + v = font->dvmtx; + v.x = h.w / 2; + return v; +} diff --git a/pdf/pdf_function.c b/pdf/pdf_function.c new file mode 100644 index 00000000..3fdd6ac7 --- /dev/null +++ b/pdf/pdf_function.c @@ -0,0 +1,1727 @@ +#include "fitz.h" +#include "mupdf.h" + +enum +{ + MAXN = FZ_MAXCOLORS, + MAXM = FZ_MAXCOLORS, +}; + +typedef struct psobj_s psobj; + +enum +{ + SAMPLE = 0, + EXPONENTIAL = 2, + STITCHING = 3, + POSTSCRIPT = 4 +}; + +struct pdf_function_s +{ + int refs; + int type; /* 0=sample 2=exponential 3=stitching 4=postscript */ + int m; /* number of input values */ + int n; /* number of output values */ + float domain[MAXM][2]; /* even index : min value, odd index : max value */ + float range[MAXN][2]; /* even index : min value, odd index : max value */ + int hasrange; + + union + { + struct { + unsigned short bps; + int size[MAXM]; + float encode[MAXM][2]; + float decode[MAXN][2]; + float *samples; + } sa; + + struct { + float n; + float c0[MAXN]; + float c1[MAXN]; + } e; + + struct { + int k; + pdf_function **funcs; /* k */ + float *bounds; /* k - 1 */ + float *encode; /* k * 2 */ + } st; + + struct { + psobj *code; + int cap; + } p; + } u; +}; + +#define RADIAN 57.2957795 + +static inline float LERP(float x, float xmin, float xmax, float ymin, float ymax) +{ + if (xmin == xmax) + return ymin; + if (ymin == ymax) + return ymin; + return ymin + (x - xmin) * (ymax - ymin) / (xmax - xmin); +} + +/* + * PostScript calculator + */ + +enum { PSBOOL, PSINT, PSREAL, PSOPERATOR, PSBLOCK }; + +enum +{ + PSOABS, PSOADD, PSOAND, PSOATAN, PSOBITSHIFT, PSOCEILING, + PSOCOPY, PSOCOS, PSOCVI, PSOCVR, PSODIV, PSODUP, PSOEQ, + PSOEXCH, PSOEXP, PSOFALSE, PSOFLOOR, PSOGE, PSOGT, PSOIDIV, + PSOINDEX, PSOLE, PSOLN, PSOLOG, PSOLT, PSOMOD, PSOMUL, + PSONE, PSONEG, PSONOT, PSOOR, PSOPOP, PSOROLL, PSOROUND, + PSOSIN, PSOSQRT, PSOSUB, PSOTRUE, PSOTRUNCATE, PSOXOR, + PSOIF, PSOIFELSE, PSORETURN +}; + +static char *psopnames[] = +{ + "abs", "add", "and", "atan", "bitshift", "ceiling", "copy", + "cos", "cvi", "cvr", "div", "dup", "eq", "exch", "exp", + "false", "floor", "ge", "gt", "idiv", "index", "le", "ln", + "log", "lt", "mod", "mul", "ne", "neg", "not", "or", "pop", + "roll", "round", "sin", "sqrt", "sub", "true", "truncate", + "xor", "if", "ifelse", "return" +}; + +struct psobj_s +{ + int type; + union + { + int b; /* boolean (stack only) */ + int i; /* integer (stack and code) */ + float f; /* real (stack and code) */ + int op; /* operator (code only) */ + int block; /* if/ifelse block pointer (code only) */ + } u; +}; + +typedef struct psstack_s psstack; + +struct psstack_s +{ + psobj stack[100]; + int sp; +}; + +void +pdf_debugpsstack(psstack *st) +{ + int i; + + printf("stack: "); + + for (i = 0; i < st->sp; i++) + { + switch (st->stack[i].type) + { + case PSBOOL: + if (st->stack[i].u.b) + printf("true "); + else + printf("false "); + break; + + case PSINT: + printf("%d ", st->stack[i].u.i); + break; + + case PSREAL: + printf("%g ", st->stack[i].u.f); + break; + } + } + printf("\n"); + +} + +static void +psinitstack(psstack *st) +{ + memset(st->stack, 0, sizeof(st->stack)); + st->sp = 0; +} + +static inline int +psoverflow(psstack *st, int n) +{ + return n < 0 || st->sp + n >= nelem(st->stack); +} + +static inline int +psunderflow(psstack *st, int n) +{ + return n < 0 || st->sp - n < 0; +} + +static inline int +psistype(psstack *st, int t) +{ + return !psunderflow(st, 1) && st->stack[st->sp - 1].type == t; +} + +static inline int +psistype2(psstack *st, int t) +{ + return !psunderflow(st, 2) && st->stack[st->sp - 1].type == t && st->stack[st->sp - 2].type == t; +} + +static void +pspushbool(psstack *st, int b) +{ + if (!psoverflow(st, 1)) + { + st->stack[st->sp].type = PSBOOL; + st->stack[st->sp].u.b = b; + st->sp++; + } +} + +static void +pspushint(psstack *st, int n) +{ + if (!psoverflow(st, 1)) + { + st->stack[st->sp].type = PSINT; + st->stack[st->sp].u.i = n; + st->sp++; + } +} + +static void +pspushreal(psstack *st, float n) +{ + if (!psoverflow(st, 1)) + { + st->stack[st->sp].type = PSREAL; + st->stack[st->sp].u.f = n; + st->sp++; + } +} + +static int +pspopbool(psstack *st) +{ + if (!psunderflow(st, 1)) + { + if (psistype(st, PSBOOL)) + return st->stack[--st->sp].u.b; + } + return 0; +} + +static int +pspopint(psstack *st) +{ + if (!psunderflow(st, 1)) + { + if (psistype(st, PSINT)) + return st->stack[--st->sp].u.i; + if (psistype(st, PSREAL)) + return st->stack[--st->sp].u.f; + } + return 0; +} + +static float +pspopreal(psstack *st) +{ + if (!psunderflow(st, 1)) + { + if (psistype(st, PSINT)) + return st->stack[--st->sp].u.i; + if (psistype(st, PSREAL)) + return st->stack[--st->sp].u.f; + } + return 0; +} + +static void +pscopy(psstack *st, int n) +{ + if (!psunderflow(st, n) && !psoverflow(st, n)) + { + memcpy(st->stack + st->sp, st->stack + st->sp - n, n * sizeof(psobj)); + st->sp += n; + } +} + +static void +psroll(psstack *st, int n, int j) +{ + psobj tmp; + int i; + + if (psunderflow(st, n) || j == 0 || n == 0) + return; + + if (j >= 0) + { + j %= n; + } + else + { + j = -j % n; + if (j != 0) + j = n - j; + } + + for (i = 0; i < j; i++) + { + tmp = st->stack[st->sp - 1]; + memmove(st->stack + st->sp - n + 1, st->stack + st->sp - n, n * sizeof(psobj)); + st->stack[st->sp - n] = tmp; + } +} + +static void +psindex(psstack *st, int n) +{ + if (!psoverflow(st, 1) && !psunderflow(st, n)) + { + st->stack[st->sp] = st->stack[st->sp - n - 1]; + st->sp++; + } +} + +static void +psrun(psobj *code, psstack *st, int pc) +{ + int i1, i2; + float r1, r2; + int b1, b2; + + while (1) + { + switch (code[pc].type) + { + case PSINT: + pspushint(st, code[pc++].u.i); + break; + + case PSREAL: + pspushreal(st, code[pc++].u.f); + break; + + case PSOPERATOR: + switch (code[pc++].u.op) + { + case PSOABS: + if (psistype(st, PSINT)) + pspushint(st, abs(pspopint(st))); + else + pspushreal(st, fabsf(pspopreal(st))); + break; + + case PSOADD: + if (psistype2(st, PSINT)) { + i2 = pspopint(st); + i1 = pspopint(st); + pspushint(st, i1 + i2); + } + else { + r2 = pspopreal(st); + r1 = pspopreal(st); + pspushreal(st, r1 + r2); + } + break; + + case PSOAND: + if (psistype2(st, PSINT)) { + i2 = pspopint(st); + i1 = pspopint(st); + pspushint(st, i1 & i2); + } + else { + b2 = pspopbool(st); + b1 = pspopbool(st); + pspushbool(st, b1 && b2); + } + break; + + case PSOATAN: + r2 = pspopreal(st); + r1 = pspopreal(st); + r1 = atan2f(r1, r2) * RADIAN; + if (r1 < 0) + r1 += 360; + pspushreal(st, r1); + break; + + case PSOBITSHIFT: + i2 = pspopint(st); + i1 = pspopint(st); + if (i2 > 0) + pspushint(st, i1 << i2); + else if (i2 < 0) + pspushint(st, (int)((unsigned int)i1 >> i2)); + else + pspushint(st, i1); + break; + + case PSOCEILING: + r1 = pspopreal(st); + pspushreal(st, ceilf(r1)); + break; + + case PSOCOPY: + pscopy(st, pspopint(st)); + break; + + case PSOCOS: + r1 = pspopreal(st); + pspushreal(st, cosf(r1/RADIAN)); + break; + + case PSOCVI: + pspushint(st, pspopint(st)); + break; + + case PSOCVR: + pspushreal(st, pspopreal(st)); + break; + + case PSODIV: + r2 = pspopreal(st); + r1 = pspopreal(st); + pspushreal(st, r1 / r2); + break; + + case PSODUP: + pscopy(st, 1); + break; + + case PSOEQ: + if (psistype2(st, PSBOOL)) { + b2 = pspopbool(st); + b1 = pspopbool(st); + pspushbool(st, b1 == b2); + } + else if (psistype2(st, PSINT)) { + i2 = pspopint(st); + i1 = pspopint(st); + pspushbool(st, i1 == i2); + } + else { + r2 = pspopreal(st); + r1 = pspopreal(st); + pspushbool(st, r1 == r2); + } + break; + + case PSOEXCH: + psroll(st, 2, 1); + break; + + case PSOEXP: + r2 = pspopreal(st); + r1 = pspopreal(st); + pspushreal(st, powf(r1, r2)); + break; + + case PSOFALSE: + pspushbool(st, 0); + break; + + case PSOFLOOR: + r1 = pspopreal(st); + pspushreal(st, floorf(r1)); + break; + + case PSOGE: + if (psistype2(st, PSINT)) { + i2 = pspopint(st); + i1 = pspopint(st); + pspushbool(st, i1 >= i2); + } + else { + r2 = pspopreal(st); + r1 = pspopreal(st); + pspushbool(st, r1 >= r2); + } + break; + + case PSOGT: + if (psistype2(st, PSINT)) { + i2 = pspopint(st); + i1 = pspopint(st); + pspushbool(st, i1 > i2); + } + else { + r2 = pspopreal(st); + r1 = pspopreal(st); + pspushbool(st, r1 > r2); + } + break; + + case PSOIDIV: + i2 = pspopint(st); + i1 = pspopint(st); + pspushint(st, i1 / i2); + break; + + case PSOINDEX: + psindex(st, pspopint(st)); + break; + + case PSOLE: + if (psistype2(st, PSINT)) { + i2 = pspopint(st); + i1 = pspopint(st); + pspushbool(st, i1 <= i2); + } + else { + r2 = pspopreal(st); + r1 = pspopreal(st); + pspushbool(st, r1 <= r2); + } + break; + + case PSOLN: + r1 = pspopreal(st); + pspushreal(st, logf(r1)); + break; + + case PSOLOG: + r1 = pspopreal(st); + pspushreal(st, log10f(r1)); + break; + + case PSOLT: + if (psistype2(st, PSINT)) { + i2 = pspopint(st); + i1 = pspopint(st); + pspushbool(st, i1 < i2); + } + else { + r2 = pspopreal(st); + r1 = pspopreal(st); + pspushbool(st, r1 < r2); + } + break; + + case PSOMOD: + i2 = pspopint(st); + i1 = pspopint(st); + pspushint(st, i1 % i2); + break; + + case PSOMUL: + if (psistype2(st, PSINT)) { + i2 = pspopint(st); + i1 = pspopint(st); + pspushint(st, i1 * i2); + } + else { + r2 = pspopreal(st); + r1 = pspopreal(st); + pspushreal(st, r1 * r2); + } + break; + + case PSONE: + if (psistype2(st, PSBOOL)) { + b2 = pspopbool(st); + b1 = pspopbool(st); + pspushbool(st, b1 != b2); + } + else if (psistype2(st, PSINT)) { + i2 = pspopint(st); + i1 = pspopint(st); + pspushbool(st, i1 != i2); + } + else { + r2 = pspopreal(st); + r1 = pspopreal(st); + pspushbool(st, r1 != r2); + } + break; + + case PSONEG: + if (psistype(st, PSINT)) + pspushint(st, -pspopint(st)); + else + pspushreal(st, -pspopreal(st)); + break; + + case PSONOT: + if (psistype(st, PSBOOL)) + pspushbool(st, !pspopbool(st)); + else + pspushint(st, ~pspopint(st)); + break; + + case PSOOR: + if (psistype2(st, PSBOOL)) { + b2 = pspopbool(st); + b1 = pspopbool(st); + pspushbool(st, b1 || b2); + } + else { + i2 = pspopint(st); + i1 = pspopint(st); + pspushint(st, i1 | i2); + } + break; + + case PSOPOP: + if (!psunderflow(st, 1)) + st->sp--; + break; + + case PSOROLL: + i2 = pspopint(st); + i1 = pspopint(st); + psroll(st, i1, i2); + break; + + case PSOROUND: + if (!psistype(st, PSINT)) { + r1 = pspopreal(st); + pspushreal(st, (r1 >= 0) ? floorf(r1 + 0.5f) : ceilf(r1 - 0.5f)); + } + break; + + case PSOSIN: + r1 = pspopreal(st); + pspushreal(st, sinf(r1/RADIAN)); + break; + + case PSOSQRT: + r1 = pspopreal(st); + pspushreal(st, sqrtf(r1)); + break; + + case PSOSUB: + if (psistype2(st, PSINT)) { + i2 = pspopint(st); + i1 = pspopint(st); + pspushint(st, i1 - i2); + } + else { + r2 = pspopreal(st); + r1 = pspopreal(st); + pspushreal(st, r1 - r2); + } + break; + + case PSOTRUE: + pspushbool(st, 1); + break; + + case PSOTRUNCATE: + if (!psistype(st, PSINT)) { + r1 = pspopreal(st); + pspushreal(st, (r1 >= 0) ? floorf(r1) : ceilf(r1)); + } + break; + + case PSOXOR: + if (psistype2(st, PSBOOL)) { + b2 = pspopbool(st); + b1 = pspopbool(st); + pspushbool(st, b1 ^ b2); + } + else { + i2 = pspopint(st); + i1 = pspopint(st); + pspushint(st, i1 ^ i2); + } + break; + + case PSOIF: + b1 = pspopbool(st); + if (b1) + psrun(code, st, code[pc + 1].u.block); + pc = code[pc + 2].u.block; + break; + + case PSOIFELSE: + b1 = pspopbool(st); + if (b1) + psrun(code, st, code[pc + 1].u.block); + else + psrun(code, st, code[pc + 0].u.block); + pc = code[pc + 2].u.block; + break; + + case PSORETURN: + return; + + default: + fz_warn("foreign operator in calculator function"); + return; + } + break; + + default: + fz_warn("foreign object in calculator function"); + return; + } + } +} + +static void +resizecode(pdf_function *func, int newsize) +{ + if (newsize >= func->u.p.cap) + { + func->u.p.cap = func->u.p.cap + 64; + func->u.p.code = fz_realloc(func->u.p.code, func->u.p.cap, sizeof(psobj)); + } +} + +static fz_error +parsecode(pdf_function *func, fz_stream *stream, int *codeptr) +{ + fz_error error; + char buf[64]; + int len; + int tok; + int opptr, elseptr, ifptr; + int a, b, mid, cmp; + + memset(buf, 0, sizeof(buf)); + + while (1) + { + error = pdf_lex(&tok, stream, buf, sizeof buf, &len); + if (error) + return fz_rethrow(error, "calculator function lexical error"); + + switch(tok) + { + case PDF_TEOF: + return fz_throw("truncated calculator function"); + + case PDF_TINT: + resizecode(func, *codeptr); + func->u.p.code[*codeptr].type = PSINT; + func->u.p.code[*codeptr].u.i = atoi(buf); + ++*codeptr; + break; + + case PDF_TREAL: + resizecode(func, *codeptr); + func->u.p.code[*codeptr].type = PSREAL; + func->u.p.code[*codeptr].u.f = atof(buf); + ++*codeptr; + break; + + case PDF_TOBRACE: + opptr = *codeptr; + *codeptr += 4; + + resizecode(func, *codeptr); + + ifptr = *codeptr; + error = parsecode(func, stream, codeptr); + if (error) + return fz_rethrow(error, "error in 'if' branch"); + + error = pdf_lex(&tok, stream, buf, sizeof buf, &len); + if (error) + return fz_rethrow(error, "calculator function syntax error"); + + if (tok == PDF_TOBRACE) + { + elseptr = *codeptr; + error = parsecode(func, stream, codeptr); + if (error) + return fz_rethrow(error, "error in 'else' branch"); + + error = pdf_lex(&tok, stream, buf, sizeof buf, &len); + if (error) + return fz_rethrow(error, "calculator function syntax error"); + } + else + { + elseptr = -1; + } + + if (tok == PDF_TKEYWORD) + { + if (!strcmp(buf, "if")) + { + if (elseptr >= 0) + return fz_throw("too many branches for 'if'"); + func->u.p.code[opptr].type = PSOPERATOR; + func->u.p.code[opptr].u.op = PSOIF; + func->u.p.code[opptr+2].type = PSBLOCK; + func->u.p.code[opptr+2].u.block = ifptr; + func->u.p.code[opptr+3].type = PSBLOCK; + func->u.p.code[opptr+3].u.block = *codeptr; + } + else if (!strcmp(buf, "ifelse")) + { + if (elseptr < 0) + return fz_throw("not enough branches for 'ifelse'"); + func->u.p.code[opptr].type = PSOPERATOR; + func->u.p.code[opptr].u.op = PSOIFELSE; + func->u.p.code[opptr+1].type = PSBLOCK; + func->u.p.code[opptr+1].u.block = elseptr; + func->u.p.code[opptr+2].type = PSBLOCK; + func->u.p.code[opptr+2].u.block = ifptr; + func->u.p.code[opptr+3].type = PSBLOCK; + func->u.p.code[opptr+3].u.block = *codeptr; + } + else + { + return fz_throw("unknown keyword in 'if-else' context: '%s'", buf); + } + } + else + { + return fz_throw("missing keyword in 'if-else' context"); + } + break; + + case PDF_TCBRACE: + resizecode(func, *codeptr); + func->u.p.code[*codeptr].type = PSOPERATOR; + func->u.p.code[*codeptr].u.op = PSORETURN; + ++*codeptr; + return fz_okay; + + case PDF_TKEYWORD: + cmp = -1; + a = -1; + b = nelem(psopnames); + while (b - a > 1) + { + mid = (a + b) / 2; + cmp = strcmp(buf, psopnames[mid]); + if (cmp > 0) + a = mid; + else if (cmp < 0) + b = mid; + else + a = b = mid; + } + if (cmp != 0) + return fz_throw("unknown operator: '%s'", buf); + + resizecode(func, *codeptr); + func->u.p.code[*codeptr].type = PSOPERATOR; + func->u.p.code[*codeptr].u.op = a; + ++*codeptr; + break; + + default: + return fz_throw("calculator function syntax error"); + } + } +} + +static fz_error +loadpostscriptfunc(pdf_function *func, pdf_xref *xref, fz_obj *dict, int num, int gen) +{ + fz_error error; + fz_stream *stream; + int codeptr; + char buf[64]; + int tok; + int len; + + pdf_logrsrc("load postscript function (%d %d R)\n", num, gen); + + error = pdf_openstream(&stream, xref, num, gen); + if (error) + return fz_rethrow(error, "cannot open calculator function stream"); + + error = pdf_lex(&tok, stream, buf, sizeof buf, &len); + if (error) + { + fz_close(stream); + return fz_rethrow(error, "stream is not a calculator function"); + } + + if (tok != PDF_TOBRACE) + { + fz_close(stream); + return fz_throw("stream is not a calculator function"); + } + + func->u.p.code = nil; + func->u.p.cap = 0; + + codeptr = 0; + error = parsecode(func, stream, &codeptr); + if (error) + { + fz_close(stream); + return fz_rethrow(error, "cannot parse calculator function (%d %d R)", num, gen); + } + + fz_close(stream); + return fz_okay; +} + +static void +evalpostscriptfunc(pdf_function *func, float *in, float *out) +{ + psstack st; + float x; + int i; + + psinitstack(&st); + + for (i = 0; i < func->m; i++) + { + x = CLAMP(in[i], func->domain[i][0], func->domain[i][1]); + pspushreal(&st, x); + } + + psrun(func->u.p.code, &st, 0); + + for (i = func->n - 1; i >= 0; i--) + { + x = pspopreal(&st); + out[i] = CLAMP(x, func->range[i][0], func->range[i][1]); + } +} + +/* + * Sample function + */ + +static fz_error +loadsamplefunc(pdf_function *func, pdf_xref *xref, fz_obj *dict, int num, int gen) +{ + fz_error error; + fz_stream *stream; + fz_obj *obj; + int samplecount; + int bps; + int i; + + pdf_logrsrc("sampled function {\n"); + + func->u.sa.samples = nil; + + obj = fz_dictgets(dict, "Size"); + if (!fz_isarray(obj) || fz_arraylen(obj) != func->m) + return fz_throw("malformed /Size"); + for (i = 0; i < func->m; i++) + func->u.sa.size[i] = fz_toint(fz_arrayget(obj, i)); + + obj = fz_dictgets(dict, "BitsPerSample"); + if (!fz_isint(obj)) + return fz_throw("malformed /BitsPerSample"); + func->u.sa.bps = bps = fz_toint(obj); + + pdf_logrsrc("bps %d\n", bps); + + obj = fz_dictgets(dict, "Encode"); + if (fz_isarray(obj)) + { + if (fz_arraylen(obj) != func->m * 2) + return fz_throw("malformed /Encode"); + for (i = 0; i < func->m; i++) + { + func->u.sa.encode[i][0] = fz_toreal(fz_arrayget(obj, i*2+0)); + func->u.sa.encode[i][1] = fz_toreal(fz_arrayget(obj, i*2+1)); + } + } + else + { + for (i = 0; i < func->m; i++) + { + func->u.sa.encode[i][0] = 0; + func->u.sa.encode[i][1] = func->u.sa.size[i] - 1; + } + } + + obj = fz_dictgets(dict, "Decode"); + if (fz_isarray(obj)) + { + if (fz_arraylen(obj) != func->n * 2) + return fz_throw("malformed /Decode"); + for (i = 0; i < func->n; i++) + { + func->u.sa.decode[i][0] = fz_toreal(fz_arrayget(obj, i*2+0)); + func->u.sa.decode[i][1] = fz_toreal(fz_arrayget(obj, i*2+1)); + } + } + else + { + for (i = 0; i < func->n; i++) + { + func->u.sa.decode[i][0] = func->range[i][0]; + func->u.sa.decode[i][1] = func->range[i][1]; + } + } + + for (i = 0, samplecount = func->n; i < func->m; i++) + samplecount *= func->u.sa.size[i]; + + pdf_logrsrc("samplecount %d\n", samplecount); + + func->u.sa.samples = fz_calloc(samplecount, sizeof(float)); + + error = pdf_openstream(&stream, xref, num, gen); + if (error) + return fz_rethrow(error, "cannot open samples stream (%d %d R)", num, gen); + + /* read samples */ + for (i = 0; i < samplecount; i++) + { + unsigned int x; + float s; + + if (fz_iseofbits(stream)) + { + fz_close(stream); + return fz_throw("truncated sample stream"); + } + + switch (bps) + { + case 1: s = fz_readbits(stream, 1); break; + case 2: s = fz_readbits(stream, 2) / 3.0f; break; + case 4: s = fz_readbits(stream, 4) / 15.0f; break; + case 8: s = fz_readbyte(stream) / 255.0f; break; + case 12: s = fz_readbits(stream, 12) / 4095.0f; break; + case 16: + x = fz_readbyte(stream) << 8; + x |= fz_readbyte(stream); + s = x / 65535.0f; + break; + case 24: + x = fz_readbyte(stream) << 16; + x |= fz_readbyte(stream) << 8; + x |= fz_readbyte(stream); + s = x / 16777215.0f; + break; + case 32: + x = fz_readbyte(stream) << 24; + x |= fz_readbyte(stream) << 16; + x |= fz_readbyte(stream) << 8; + x |= fz_readbyte(stream); + s = x / 4294967295.0f; + break; + default: + fz_close(stream); + return fz_throw("sample stream bit depth %d unsupported", bps); + } + + func->u.sa.samples[i] = s; + } + + fz_close(stream); + + pdf_logrsrc("}\n"); + + return fz_okay; +} + +static float +interpolatesample(pdf_function *func, int *scale, int *e0, int *e1, float *efrac, int dim, int idx) +{ + float a, b; + int idx0, idx1; + + idx0 = e0[dim] * scale[dim] + idx; + idx1 = e1[dim] * scale[dim] + idx; + + if (dim == 0) + { + a = func->u.sa.samples[idx0]; + b = func->u.sa.samples[idx1]; + } + else + { + a = interpolatesample(func, scale, e0, e1, efrac, dim - 1, idx0); + b = interpolatesample(func, scale, e0, e1, efrac, dim - 1, idx1); + } + + return a + (b - a) * efrac[dim]; +} + +static void +evalsamplefunc(pdf_function *func, float *in, float *out) +{ + int e0[MAXM], e1[MAXM], scale[MAXM]; + float efrac[MAXM]; + float x; + int i; + + /* encode input coordinates */ + for (i = 0; i < func->m; i++) + { + x = CLAMP(in[i], func->domain[i][0], func->domain[i][1]); + x = LERP(x, func->domain[i][0], func->domain[i][1], + func->u.sa.encode[i][0], func->u.sa.encode[i][1]); + x = CLAMP(x, 0, func->u.sa.size[i] - 1); + e0[i] = floorf(x); + e1[i] = ceilf(x); + efrac[i] = x - floorf(x); + } + + scale[0] = func->n; + for (i = 1; i < func->m; i++) + scale[i] = scale[i - 1] * func->u.sa.size[i]; + + for (i = 0; i < func->n; i++) + { + if (func->m == 1) + { + float a = func->u.sa.samples[e0[0] * func->n + i]; + float b = func->u.sa.samples[e1[0] * func->n + i]; + + float ab = a + (b - a) * efrac[0]; + + out[i] = LERP(ab, 0, 1, func->u.sa.decode[i][0], func->u.sa.decode[i][1]); + out[i] = CLAMP(out[i], func->range[i][0], func->range[i][1]); + } + + else if (func->m == 2) + { + int s0 = func->n; + int s1 = s0 * func->u.sa.size[0]; + + float a = func->u.sa.samples[e0[0] * s0 + e0[1] * s1 + i]; + float b = func->u.sa.samples[e1[0] * s0 + e0[1] * s1 + i]; + float c = func->u.sa.samples[e0[0] * s0 + e1[1] * s1 + i]; + float d = func->u.sa.samples[e1[0] * s0 + e1[1] * s1 + i]; + + float ab = a + (b - a) * efrac[0]; + float cd = c + (d - c) * efrac[0]; + float abcd = ab + (cd - ab) * efrac[1]; + + out[i] = LERP(abcd, 0, 1, func->u.sa.decode[i][0], func->u.sa.decode[i][1]); + out[i] = CLAMP(out[i], func->range[i][0], func->range[i][1]); + } + + else + { + float x = interpolatesample(func, scale, e0, e1, efrac, func->m - 1, i); + out[i] = LERP(x, 0, 1, func->u.sa.decode[i][0], func->u.sa.decode[i][1]); + out[i] = CLAMP(out[i], func->range[i][0], func->range[i][1]); + } + } +} + +/* + * Exponential function + */ + +static fz_error +loadexponentialfunc(pdf_function *func, fz_obj *dict) +{ + fz_obj *obj; + int i; + + pdf_logrsrc("exponential function {\n"); + + if (func->m != 1) + return fz_throw("/Domain must be one dimension (%d)", func->m); + + obj = fz_dictgets(dict, "N"); + if (!fz_isint(obj) && !fz_isreal(obj)) + return fz_throw("malformed /N"); + func->u.e.n = fz_toreal(obj); + pdf_logrsrc("n %g\n", func->u.e.n); + + obj = fz_dictgets(dict, "C0"); + if (fz_isarray(obj)) + { + func->n = fz_arraylen(obj); + if (func->n >= MAXN) + return fz_throw("exponential function result array out of range"); + for (i = 0; i < func->n; i++) + func->u.e.c0[i] = fz_toreal(fz_arrayget(obj, i)); + pdf_logrsrc("c0 %d\n", func->n); + } + else + { + func->n = 1; + func->u.e.c0[0] = 0; + } + + obj = fz_dictgets(dict, "C1"); + if (fz_isarray(obj)) + { + if (fz_arraylen(obj) != func->n) + return fz_throw("/C1 must match /C0 length"); + for (i = 0; i < func->n; i++) + func->u.e.c1[i] = fz_toreal(fz_arrayget(obj, i)); + pdf_logrsrc("c1 %d\n", func->n); + } + else + { + if (func->n != 1) + return fz_throw("/C1 must match /C0 length"); + func->u.e.c1[0] = 1; + } + + pdf_logrsrc("}\n"); + + return fz_okay; +} + +static void +evalexponentialfunc(pdf_function *func, float in, float *out) +{ + float x = in; + float tmp; + int i; + + x = CLAMP(x, func->domain[0][0], func->domain[0][1]); + + /* constraint */ + if ((func->u.e.n != (int)func->u.e.n && x < 0) || (func->u.e.n < 0 && x == 0)) + { + fz_warn("constraint error"); + return; + } + + tmp = powf(x, func->u.e.n); + for (i = 0; i < func->n; i++) + { + out[i] = func->u.e.c0[i] + tmp * (func->u.e.c1[i] - func->u.e.c0[i]); + if (func->hasrange) + out[i] = CLAMP(out[i], func->range[i][0], func->range[i][1]); + } +} + +/* + * Stitching function + */ + +static fz_error +loadstitchingfunc(pdf_function *func, pdf_xref *xref, fz_obj *dict) +{ + pdf_function **funcs; + fz_error error; + fz_obj *obj; + fz_obj *sub; + fz_obj *num; + int k; + int i; + + pdf_logrsrc("stitching {\n"); + + func->u.st.k = 0; + + if (func->m != 1) + return fz_throw("/Domain must be one dimension (%d)", func->m); + + obj = fz_dictgets(dict, "Functions"); + if (!fz_isarray(obj)) + return fz_throw("stitching function has no input functions"); + { + k = fz_arraylen(obj); + + pdf_logrsrc("k %d\n", k); + + func->u.st.funcs = fz_calloc(k, sizeof(pdf_function*)); + func->u.st.bounds = fz_calloc(k - 1, sizeof(float)); + func->u.st.encode = fz_calloc(k * 2, sizeof(float)); + funcs = func->u.st.funcs; + + for (i = 0; i < k; i++) + { + sub = fz_arrayget(obj, i); + error = pdf_loadfunction(&funcs[i], xref, sub); + if (error) + return fz_rethrow(error, "cannot load sub function %d (%d %d R)", i, fz_tonum(sub), fz_togen(sub)); + if (funcs[i]->m != 1 || funcs[i]->n != funcs[0]->n) + return fz_throw("sub function %d /Domain or /Range mismatch", i); + func->u.st.k ++; + } + + if (!func->n) + func->n = funcs[0]->n; + else if (func->n != funcs[0]->n) + return fz_throw("sub function /Domain or /Range mismatch"); + } + + obj = fz_dictgets(dict, "Bounds"); + if (!fz_isarray(obj)) + return fz_throw("stitching function has no bounds"); + { + if (!fz_isarray(obj) || fz_arraylen(obj) != k - 1) + return fz_throw("malformed /Bounds (not array or wrong length)"); + + for (i = 0; i < k-1; i++) + { + num = fz_arrayget(obj, i); + if (!fz_isint(num) && !fz_isreal(num)) + return fz_throw("malformed /Bounds (item not real)"); + func->u.st.bounds[i] = fz_toreal(num); + if (i && func->u.st.bounds[i-1] > func->u.st.bounds[i]) + return fz_throw("malformed /Bounds (item not monotonic)"); + } + + if (k != 1 && (func->domain[0][0] > func->u.st.bounds[0] || + func->domain[0][1] < func->u.st.bounds[k-2])) + fz_warn("malformed shading function bounds (domain mismatch), proceeding anyway."); + } + + obj = fz_dictgets(dict, "Encode"); + if (!fz_isarray(obj)) + return fz_throw("stitching function is missing encoding"); + { + if (!fz_isarray(obj) || fz_arraylen(obj) != k * 2) + return fz_throw("malformed /Encode"); + for (i = 0; i < k; i++) + { + func->u.st.encode[i*2+0] = fz_toreal(fz_arrayget(obj, i*2+0)); + func->u.st.encode[i*2+1] = fz_toreal(fz_arrayget(obj, i*2+1)); + } + } + + pdf_logrsrc("}\n"); + + return fz_okay; +} + +static void +evalstitchingfunc(pdf_function *func, float in, float *out) +{ + float low, high; + int k = func->u.st.k; + float *bounds = func->u.st.bounds; + int i; + + in = CLAMP(in, func->domain[0][0], func->domain[0][1]); + + for (i = 0; i < k - 1; i++) + { + if (in < bounds[i]) + break; + } + + if (i == 0 && k == 1) + { + low = func->domain[0][0]; + high = func->domain[0][1]; + } + else if (i == 0) + { + low = func->domain[0][0]; + high = bounds[0]; + } + else if (i == k - 1) + { + low = bounds[k-2]; + high = func->domain[0][1]; + } + else + { + low = bounds[i-1]; + high = bounds[i]; + } + + in = LERP(in, low, high, func->u.st.encode[i*2+0], func->u.st.encode[i*2+1]); + + pdf_evalfunction(func->u.st.funcs[i], &in, 1, out, func->n); +} + +/* + * Common + */ + +pdf_function * +pdf_keepfunction(pdf_function *func) +{ + func->refs ++; + return func; +} + +void +pdf_dropfunction(pdf_function *func) +{ + int i; + if (--func->refs == 0) + { + switch(func->type) + { + case SAMPLE: + fz_free(func->u.sa.samples); + break; + case EXPONENTIAL: + break; + case STITCHING: + for (i = 0; i < func->u.st.k; i++) + pdf_dropfunction(func->u.st.funcs[i]); + fz_free(func->u.st.funcs); + fz_free(func->u.st.bounds); + fz_free(func->u.st.encode); + break; + case POSTSCRIPT: + fz_free(func->u.p.code); + break; + } + fz_free(func); + } +} + +fz_error +pdf_loadfunction(pdf_function **funcp, pdf_xref *xref, fz_obj *dict) +{ + fz_error error; + pdf_function *func; + fz_obj *obj; + int i; + + if ((*funcp = pdf_finditem(xref->store, pdf_dropfunction, dict))) + { + pdf_keepfunction(*funcp); + return fz_okay; + } + + pdf_logrsrc("load function (%d %d R) {\n", fz_tonum(dict), fz_togen(dict)); + + func = fz_malloc(sizeof(pdf_function)); + memset(func, 0, sizeof(pdf_function)); + func->refs = 1; + + obj = fz_dictgets(dict, "FunctionType"); + func->type = fz_toint(obj); + + pdf_logrsrc("type %d\n", func->type); + + /* required for all */ + obj = fz_dictgets(dict, "Domain"); + func->m = fz_arraylen(obj) / 2; + for (i = 0; i < func->m; i++) + { + func->domain[i][0] = fz_toreal(fz_arrayget(obj, i * 2 + 0)); + func->domain[i][1] = fz_toreal(fz_arrayget(obj, i * 2 + 1)); + } + pdf_logrsrc("domain %d\n", func->m); + + /* required for type0 and type4, optional otherwise */ + obj = fz_dictgets(dict, "Range"); + if (fz_isarray(obj)) + { + func->hasrange = 1; + func->n = fz_arraylen(obj) / 2; + for (i = 0; i < func->n; i++) + { + func->range[i][0] = fz_toreal(fz_arrayget(obj, i * 2 + 0)); + func->range[i][1] = fz_toreal(fz_arrayget(obj, i * 2 + 1)); + } + pdf_logrsrc("range %d\n", func->n); + } + else + { + func->hasrange = 0; + func->n = 0; + } + + if (func->m >= MAXM || func->n >= MAXN) + { + fz_free(func); + return fz_throw("assert: /Domain or /Range too big"); + } + + switch(func->type) + { + case SAMPLE: + error = loadsamplefunc(func, xref, dict, fz_tonum(dict), fz_togen(dict)); + if (error) + { + pdf_dropfunction(func); + return fz_rethrow(error, "cannot load sampled function (%d %d R)", fz_tonum(dict), fz_togen(dict)); + } + break; + + case EXPONENTIAL: + error = loadexponentialfunc(func, dict); + if (error) + { + pdf_dropfunction(func); + return fz_rethrow(error, "cannot load exponential function (%d %d R)", fz_tonum(dict), fz_togen(dict)); + } + break; + + case STITCHING: + error = loadstitchingfunc(func, xref, dict); + if (error) + { + pdf_dropfunction(func); + return fz_rethrow(error, "cannot load stitching function (%d %d R)", fz_tonum(dict), fz_togen(dict)); + } + break; + + case POSTSCRIPT: + error = loadpostscriptfunc(func, xref, dict, fz_tonum(dict), fz_togen(dict)); + if (error) + { + pdf_dropfunction(func); + return fz_rethrow(error, "cannot load calculator function (%d %d R)", fz_tonum(dict), fz_togen(dict)); + } + break; + + default: + fz_free(func); + return fz_throw("unknown function type (%d %d R)", fz_tonum(dict), fz_togen(dict)); + } + + pdf_logrsrc("}\n"); + + pdf_storeitem(xref->store, pdf_keepfunction, pdf_dropfunction, dict, func); + + *funcp = func; + return fz_okay; +} + +void +pdf_evalfunction(pdf_function *func, float *in, int inlen, float *out, int outlen) +{ + memset(out, 0, sizeof(float) * outlen); + + if (inlen != func->m) + { + fz_warn("tried to evaluate function with wrong number of inputs"); + return; + } + if (func->n != outlen) + { + fz_warn("tried to evaluate function with wrong number of outputs"); + return; + } + + switch(func->type) + { + case SAMPLE: evalsamplefunc(func, in, out); break; + case EXPONENTIAL: evalexponentialfunc(func, *in, out); break; + case STITCHING: evalstitchingfunc(func, *in, out); break; + case POSTSCRIPT: evalpostscriptfunc(func, in, out); break; + } +} + +/* + * Debugging prints + */ + +static void +pdf_debugindent(char *prefix, int level, char *suffix) +{ + int i; + + printf("%s", prefix); + + for (i = 0; i < level; i++) + printf("\t"); + + printf("%s", suffix); +} + +static void +pdf_debugpsfunccode(psobj *funccode, psobj *code, int level) +{ + int eof, wasop; + + pdf_debugindent("", level, "{"); + + /* Print empty blocks as { }, instead of separating braces on different lines. */ + if (code->type == PSOPERATOR && code->u.op == PSORETURN) + { + printf(" } "); + return; + } + + pdf_debugindent("\n", ++level, ""); + + eof = 0; + wasop = 0; + while (!eof) + { + switch (code->type) + { + case PSINT: + if (wasop) + pdf_debugindent("\n", level, ""); + + printf("%d ", code->u.i); + wasop = 0; + code++; + break; + + case PSREAL: + if (wasop) + pdf_debugindent("\n", level, ""); + + printf("%g ", code->u.f); + wasop = 0; + code++; + break; + + case PSOPERATOR: + if (code->u.op == PSORETURN) + { + printf("\n"); + eof = 1; + } + else if (code->u.op == PSOIF) + { + printf("\n"); + pdf_debugpsfunccode(funccode, &funccode[(code + 2)->u.block], level); + + printf("%s", psopnames[code->u.op]); + code = &funccode[(code + 3)->u.block]; + if (code->type != PSOPERATOR || code->u.op != PSORETURN) + pdf_debugindent("\n", level, ""); + + wasop = 0; + } + else if (code->u.op == PSOIFELSE) + { + printf("\n"); + pdf_debugpsfunccode(funccode, &funccode[(code + 2)->u.block], level); + + printf("\n"); + pdf_debugpsfunccode(funccode, &funccode[(code + 1)->u.block], level); + + printf("%s", psopnames[code->u.op]); + code = &funccode[(code + 3)->u.block]; + if (code->type != PSOPERATOR || code->u.op != PSORETURN) + pdf_debugindent("\n", level, ""); + + wasop = 0; + } + else + { + printf("%s ", psopnames[code->u.op]); + code++; + wasop = 1; + } + break; + } + } + + pdf_debugindent("", --level, "} "); +} + +static void +pdf_debugfunctionimp(pdf_function *func, int level) +{ + int i; + + pdf_debugindent("", level, "function {\n"); + + pdf_debugindent("", ++level, ""); + switch (func->type) + { + case SAMPLE: + printf("sampled"); + break; + case EXPONENTIAL: + printf("exponential"); + break; + case STITCHING: + printf("stitching"); + break; + case POSTSCRIPT: + printf("postscript"); + break; + } + + pdf_debugindent("\n", level, ""); + printf("%d input -> %d output\n", func->m, func->n); + + pdf_debugindent("", level, "domain "); + for (i = 0; i < func->m; i++) + printf("%g %g ", func->domain[i][0], func->domain[i][1]); + printf("\n"); + + if (func->hasrange) + { + pdf_debugindent("", level, "range "); + for (i = 0; i < func->n; i++) + printf("%g %g ", func->range[i][0], func->range[i][1]); + printf("\n"); + } + + switch (func->type) + { + case SAMPLE: + pdf_debugindent("", level, ""); + printf("bps: %d\n", func->u.sa.bps); + + pdf_debugindent("", level, ""); + printf("size: [ "); + for (i = 0; i < func->m; i++) + printf("%d ", func->u.sa.size[i]); + printf("]\n"); + + pdf_debugindent("", level, ""); + printf("encode: [ "); + for (i = 0; i < func->m; i++) + printf("%g %g ", func->u.sa.encode[i][0], func->u.sa.encode[i][1]); + printf("]\n"); + + pdf_debugindent("", level, ""); + printf("decode: [ "); + for (i = 0; i < func->m; i++) + printf("%g %g ", func->u.sa.decode[i][0], func->u.sa.decode[i][1]); + printf("]\n"); + break; + + case EXPONENTIAL: + pdf_debugindent("", level, ""); + printf("n: %g\n", func->u.e.n); + + pdf_debugindent("", level, ""); + printf("c0: [ "); + for (i = 0; i < func->n; i++) + printf("%g ", func->u.e.c0[i]); + printf("]\n"); + + pdf_debugindent("", level, ""); + printf("c1: [ "); + for (i = 0; i < func->n; i++) + printf("%g ", func->u.e.c1[i]); + printf("]\n"); + break; + + case STITCHING: + pdf_debugindent("", level, ""); + printf("%d functions\n", func->u.st.k); + + pdf_debugindent("", level, ""); + printf("bounds: [ "); + for (i = 0; i < func->u.st.k - 1; i++) + printf("%g ", func->u.st.bounds[i]); + printf("]\n"); + + pdf_debugindent("", level, ""); + printf("encode: [ "); + for (i = 0; i < func->u.st.k * 2; i++) + printf("%g ", func->u.st.encode[i]); + printf("]\n"); + + for (i = 0; i < func->u.st.k; i++) + pdf_debugfunctionimp(func->u.st.funcs[i], level); + break; + + case POSTSCRIPT: + pdf_debugpsfunccode(func->u.p.code, func->u.p.code, level); + printf("\n"); + break; + } + + pdf_debugindent("", --level, "}\n"); +} + +void +pdf_debugfunction(pdf_function *func) +{ + pdf_debugfunctionimp(func, 0); +} diff --git a/pdf/pdf_image.c b/pdf/pdf_image.c new file mode 100644 index 00000000..b37c7257 --- /dev/null +++ b/pdf/pdf_image.c @@ -0,0 +1,393 @@ +#include "fitz.h" +#include "mupdf.h" + +/* TODO: store JPEG compressed samples */ +/* TODO: store flate compressed samples */ + +static fz_error pdf_loadjpximage(fz_pixmap **imgp, pdf_xref *xref, fz_obj *dict); + +static void +pdf_maskcolorkey(fz_pixmap *pix, int n, int *colorkey) +{ + unsigned char *p = pix->samples; + int len = pix->w * pix->h; + int k, t; + while (len--) + { + t = 1; + for (k = 0; k < n; k++) + if (p[k] < colorkey[k * 2] || p[k] > colorkey[k * 2 + 1]) + t = 0; + if (t) + for (k = 0; k < pix->n; k++) + p[k] = 0; + p += pix->n; + } +} + +static fz_error +pdf_loadimageimp(fz_pixmap **imgp, pdf_xref *xref, fz_obj *rdb, fz_obj *dict, fz_stream *cstm, int forcemask) +{ + fz_stream *stm; + fz_pixmap *tile; + fz_obj *obj, *res; + fz_error error; + + int w, h, bpc, n; + int imagemask; + int interpolate; + int indexed; + fz_colorspace *colorspace; + fz_pixmap *mask; /* explicit mask/softmask image */ + int usecolorkey; + int colorkey[FZ_MAXCOLORS * 2]; + float decode[FZ_MAXCOLORS * 2]; + + int scale; + int stride; + unsigned char *samples; + int i, len; + + /* special case for JPEG2000 images */ + if (pdf_isjpximage(dict)) + { + tile = nil; + error = pdf_loadjpximage(&tile, xref, dict); + if (error) + return fz_rethrow(error, "cannot load jpx image"); + if (forcemask) + { + if (tile->n != 2) + { + fz_droppixmap(tile); + return fz_throw("softmask must be grayscale"); + } + mask = fz_alphafromgray(tile, 1); + fz_droppixmap(tile); + *imgp = mask; + return fz_okay; + } + *imgp = tile; + return fz_okay; + } + + w = fz_toint(fz_dictgetsa(dict, "Width", "W")); + h = fz_toint(fz_dictgetsa(dict, "Height", "H")); + bpc = fz_toint(fz_dictgetsa(dict, "BitsPerComponent", "BPC")); + imagemask = fz_tobool(fz_dictgetsa(dict, "ImageMask", "IM")); + interpolate = fz_tobool(fz_dictgetsa(dict, "Interpolate", "I")); + + indexed = 0; + usecolorkey = 0; + colorspace = nil; + mask = nil; + + if (imagemask) + bpc = 1; + + if (w == 0) + return fz_throw("image width is zero"); + if (h == 0) + return fz_throw("image height is zero"); + if (bpc == 0) + return fz_throw("image depth is zero"); + if (w > (1 << 16)) + return fz_throw("image is too wide"); + if (h > (1 << 16)) + return fz_throw("image is too high"); + + obj = fz_dictgetsa(dict, "ColorSpace", "CS"); + if (obj && !imagemask && !forcemask) + { + /* colorspace resource lookup is only done for inline images */ + if (fz_isname(obj)) + { + res = fz_dictget(fz_dictgets(rdb, "ColorSpace"), obj); + if (res) + obj = res; + } + + error = pdf_loadcolorspace(&colorspace, xref, obj); + if (error) + return fz_rethrow(error, "cannot load image colorspace"); + + if (!strcmp(colorspace->name, "Indexed")) + indexed = 1; + + n = colorspace->n; + } + else + { + n = 1; + } + + obj = fz_dictgetsa(dict, "Decode", "D"); + if (obj) + { + for (i = 0; i < n * 2; i++) + decode[i] = fz_toreal(fz_arrayget(obj, i)); + } + else + { + float maxval = indexed ? (1 << bpc) - 1 : 1; + for (i = 0; i < n * 2; i++) + decode[i] = i & 1 ? maxval : 0; + } + + obj = fz_dictgetsa(dict, "SMask", "Mask"); + if (fz_isdict(obj)) + { + /* Not allowed for inline images */ + if (!cstm) + { + error = pdf_loadimageimp(&mask, xref, rdb, obj, nil, 1); + if (error) + { + if (colorspace) + fz_dropcolorspace(colorspace); + return fz_rethrow(error, "cannot load image mask/softmask"); + } + } + } + else if (fz_isarray(obj)) + { + usecolorkey = 1; + for (i = 0; i < n * 2; i++) + colorkey[i] = fz_toint(fz_arrayget(obj, i)); + } + + stride = (w * n * bpc + 7) / 8; + + if (cstm) + { + stm = pdf_openinlinestream(cstm, xref, dict, stride * h); + } + else + { + error = pdf_openstream(&stm, xref, fz_tonum(dict), fz_togen(dict)); + if (error) + { + if (colorspace) + fz_dropcolorspace(colorspace); + if (mask) + fz_droppixmap(mask); + return fz_rethrow(error, "cannot open image data stream (%d 0 R)", fz_tonum(dict)); + } + } + + samples = fz_calloc(h, stride); + + len = fz_read(stm, samples, h * stride); + if (len < 0) + { + fz_close(stm); + if (colorspace) + fz_dropcolorspace(colorspace); + if (mask) + fz_droppixmap(mask); + fz_free(samples); + return fz_rethrow(len, "cannot read image data"); + } + + /* Make sure we read the EOF marker (for inline images only) */ + if (cstm) + { + unsigned char tbuf[512]; + int tlen = fz_read(stm, tbuf, sizeof tbuf); + if (tlen < 0) + fz_catch(tlen, "ignoring error at end of image"); + if (tlen > 0) + fz_warn("ignoring garbage at end of image"); + } + + fz_close(stm); + + /* Pad truncated images */ + if (len < stride * h) + { + fz_warn("padding truncated image (%d 0 R)", fz_tonum(dict)); + memset(samples + len, 0, stride * h - len); + } + + /* Invert 1-bit image masks */ + if (imagemask) + { + /* 0=opaque and 1=transparent so we need to invert */ + unsigned char *p = samples; + len = h * stride; + for (i = 0; i < len; i++) + p[i] = ~p[i]; + } + + pdf_logimage("size %dx%d n=%d bpc=%d imagemask=%d indexed=%d\n", w, h, n, bpc, imagemask, indexed); + + /* Unpack samples into pixmap */ + + tile = fz_newpixmap(colorspace, 0, 0, w, h); + + scale = 1; + if (!indexed) + { + switch (bpc) + { + case 1: scale = 255; break; + case 2: scale = 85; break; + case 4: scale = 17; break; + } + } + + fz_unpacktile(tile, samples, n, bpc, stride, scale); + + if (usecolorkey) + pdf_maskcolorkey(tile, n, colorkey); + + if (indexed) + { + fz_pixmap *conv; + + fz_decodeindexedtile(tile, decode, (1 << bpc) - 1); + + conv = pdf_expandindexedpixmap(tile); + fz_droppixmap(tile); + tile = conv; + } + else + { + fz_decodetile(tile, decode); + } + + if (colorspace) + fz_dropcolorspace(colorspace); + + tile->mask = mask; + tile->interpolate = interpolate; + + fz_free(samples); + + *imgp = tile; + return fz_okay; +} + +fz_error +pdf_loadinlineimage(fz_pixmap **pixp, pdf_xref *xref, fz_obj *rdb, fz_obj *dict, fz_stream *file) +{ + fz_error error; + + pdf_logimage("load inline image {\n"); + + error = pdf_loadimageimp(pixp, xref, rdb, dict, file, 0); + if (error) + return fz_rethrow(error, "cannot load inline image"); + + pdf_logimage("}\n"); + + return fz_okay; +} + +int +pdf_isjpximage(fz_obj *dict) +{ + fz_obj *filter; + int i; + + filter = fz_dictgets(dict, "Filter"); + if (!strcmp(fz_toname(filter), "JPXDecode")) + return 1; + for (i = 0; i < fz_arraylen(filter); i++) + if (!strcmp(fz_toname(fz_arrayget(filter, i)), "JPXDecode")) + return 1; + return 0; +} + +static fz_error +pdf_loadjpximage(fz_pixmap **imgp, pdf_xref *xref, fz_obj *dict) +{ + fz_error error; + fz_buffer *buf; + fz_pixmap *img; + fz_obj *obj; + + pdf_logimage("jpeg2000\n"); + + error = pdf_loadstream(&buf, xref, fz_tonum(dict), fz_togen(dict)); + if (error) + return fz_rethrow(error, "cannot load jpx image data"); + + error = fz_loadjpximage(&img, buf->data, buf->len); + if (error) + { + fz_dropbuffer(buf); + return fz_rethrow(error, "cannot load jpx image"); + } + + fz_dropbuffer(buf); + + obj = fz_dictgetsa(dict, "SMask", "Mask"); + if (fz_isdict(obj)) + { + error = pdf_loadimageimp(&img->mask, xref, nil, obj, nil, 1); + if (error) + { + fz_droppixmap(img); + return fz_rethrow(error, "cannot load image mask/softmask"); + } + } + + obj = fz_dictgets(dict, "ColorSpace"); + if (obj) + { + fz_colorspace *original = img->colorspace; + img->colorspace = nil; + + error = pdf_loadcolorspace(&img->colorspace, xref, obj); + if (error) + { + fz_dropcolorspace(original); + return fz_rethrow(error, "cannot load image colorspace"); + } + + if (original->n != img->colorspace->n) + { + fz_warn("jpeg-2000 colorspace (%s) does not match promised colorspace (%s)", original->name, img->colorspace->name); + fz_dropcolorspace(img->colorspace); + img->colorspace = original; + } + else + fz_dropcolorspace(original); + + if (!strcmp(img->colorspace->name, "Indexed")) + { + fz_pixmap *conv; + conv = pdf_expandindexedpixmap(img); + fz_droppixmap(img); + img = conv; + } + } + + *imgp = img; + return fz_okay; +} + +fz_error +pdf_loadimage(fz_pixmap **pixp, pdf_xref *xref, fz_obj *dict) +{ + fz_error error; + + if ((*pixp = pdf_finditem(xref->store, fz_droppixmap, dict))) + { + fz_keeppixmap(*pixp); + return fz_okay; + } + + pdf_logimage("load image (%d 0 R) {\n", fz_tonum(dict)); + + error = pdf_loadimageimp(pixp, xref, nil, dict, nil, 0); + if (error) + return fz_rethrow(error, "cannot load image (%d 0 R)", fz_tonum(dict)); + + pdf_storeitem(xref->store, fz_keeppixmap, fz_droppixmap, dict, *pixp); + + pdf_logimage("}\n"); + + return fz_okay; +} diff --git a/pdf/pdf_interpret.c b/pdf/pdf_interpret.c new file mode 100644 index 00000000..dbeeb997 --- /dev/null +++ b/pdf/pdf_interpret.c @@ -0,0 +1,1464 @@ +#include "fitz.h" +#include "mupdf.h" + +static pdf_csi * +pdf_newcsi(pdf_xref *xref, fz_device *dev, fz_matrix ctm, char *target) +{ + pdf_csi *csi; + + csi = fz_malloc(sizeof(pdf_csi)); + csi->xref = xref; + csi->dev = dev; + csi->target = target; + + csi->top = 0; + csi->obj = nil; + csi->name[0] = 0; + csi->stringlen = 0; + memset(csi->stack, 0, sizeof csi->stack); + + csi->xbalance = 0; + csi->intext = 0; + csi->inarray = 0; + + csi->path = fz_newpath(); + csi->clip = 0; + csi->clipevenodd = 0; + + csi->text = nil; + csi->tlm = fz_identity; + csi->tm = fz_identity; + csi->textmode = 0; + csi->accumulate = 1; + + csi->topctm = ctm; + pdf_initgstate(&csi->gstate[0], ctm); + csi->gtop = 0; + + return csi; +} + +static void +pdf_clearstack(pdf_csi *csi) +{ + int i; + + if (csi->obj) + fz_dropobj(csi->obj); + csi->obj = nil; + + csi->name[0] = 0; + csi->stringlen = 0; + for (i = 0; i < csi->top; i++) + csi->stack[i] = 0; + + csi->top = 0; +} + +pdf_material * +pdf_keepmaterial(pdf_material *mat) +{ + if (mat->colorspace) + fz_keepcolorspace(mat->colorspace); + if (mat->pattern) + pdf_keeppattern(mat->pattern); + if (mat->shade) + fz_keepshade(mat->shade); + return mat; +} + +pdf_material * +pdf_dropmaterial(pdf_material *mat) +{ + if (mat->colorspace) + fz_dropcolorspace(mat->colorspace); + if (mat->pattern) + pdf_droppattern(mat->pattern); + if (mat->shade) + fz_dropshade(mat->shade); + return mat; +} + +void +pdf_gsave(pdf_csi *csi) +{ + pdf_gstate *gs = csi->gstate + csi->gtop; + + if (csi->gtop == nelem(csi->gstate) - 1) + { + fz_warn("gstate overflow in content stream"); + return; + } + + memcpy(&csi->gstate[csi->gtop + 1], &csi->gstate[csi->gtop], sizeof(pdf_gstate)); + + csi->gtop ++; + + pdf_keepmaterial(&gs->stroke); + pdf_keepmaterial(&gs->fill); + if (gs->font) + pdf_keepfont(gs->font); + if (gs->softmask) + pdf_keepxobject(gs->softmask); +} + +void +pdf_grestore(pdf_csi *csi) +{ + pdf_gstate *gs = csi->gstate + csi->gtop; + int clipdepth = gs->clipdepth; + + if (csi->gtop == 0) + { + fz_warn("gstate underflow in content stream"); + return; + } + + pdf_dropmaterial(&gs->stroke); + pdf_dropmaterial(&gs->fill); + if (gs->font) + pdf_dropfont(gs->font); + if (gs->softmask) + pdf_dropxobject(gs->softmask); + + csi->gtop --; + + gs = csi->gstate + csi->gtop; + while (clipdepth > gs->clipdepth) + { + csi->dev->popclip(csi->dev->user); + clipdepth--; + } +} + +static void +pdf_freecsi(pdf_csi *csi) +{ + while (csi->gtop) + pdf_grestore(csi); + + pdf_dropmaterial(&csi->gstate[0].fill); + pdf_dropmaterial(&csi->gstate[0].stroke); + if (csi->gstate[0].font) + pdf_dropfont(csi->gstate[0].font); + if (csi->gstate[0].softmask) + pdf_dropxobject(csi->gstate[0].softmask); + + while (csi->gstate[0].clipdepth--) + csi->dev->popclip(csi->dev->user); + + if (csi->path) fz_freepath(csi->path); + if (csi->text) fz_freetext(csi->text); + + pdf_clearstack(csi); + + fz_free(csi); +} + +static int +pdf_ishiddenocg(pdf_csi *csi, fz_obj *xobj) +{ + char target_state[16]; + fz_obj *obj; + + fz_strlcpy(target_state, csi->target, sizeof target_state); + fz_strlcat(target_state, "State", sizeof target_state); + + obj = fz_dictgets(xobj, "OC"); + obj = fz_dictgets(obj, "OCGs"); + if (fz_isarray(obj)) + obj = fz_arrayget(obj, 0); + obj = fz_dictgets(obj, "Usage"); + obj = fz_dictgets(obj, csi->target); + obj = fz_dictgets(obj, target_state); + return !strcmp(fz_toname(obj), "OFF"); +} + +fz_error +pdf_runxobject(pdf_csi *csi, fz_obj *resources, pdf_xobject *xobj, fz_matrix transform) +{ + fz_error error; + pdf_gstate *gstate; + fz_matrix oldtopctm; + int oldtop; + int popmask; + + pdf_gsave(csi); + + gstate = csi->gstate + csi->gtop; + oldtop = csi->gtop; + popmask = 0; + + /* apply xobject's transform matrix */ + transform = fz_concat(transform, xobj->matrix); + gstate->ctm = fz_concat(transform, gstate->ctm); + + /* apply soft mask, create transparency group and reset state */ + if (xobj->transparency) + { + if (gstate->softmask) + { + pdf_xobject *softmask = gstate->softmask; + fz_rect bbox = fz_transformrect(gstate->ctm, xobj->bbox); + + gstate->softmask = nil; + popmask = 1; + + csi->dev->beginmask(csi->dev->user, bbox, gstate->luminosity, + softmask->colorspace, gstate->softmaskbc); + error = pdf_runxobject(csi, resources, softmask, fz_identity); + if (error) + return fz_rethrow(error, "cannot run softmask"); + csi->dev->endmask(csi->dev->user); + + pdf_dropxobject(softmask); + } + + csi->dev->begingroup(csi->dev->user, + fz_transformrect(gstate->ctm, xobj->bbox), + xobj->isolated, xobj->knockout, gstate->blendmode, gstate->fill.alpha); + + gstate->blendmode = FZ_BNORMAL; + gstate->stroke.alpha = 1; + gstate->fill.alpha = 1; + } + + /* clip to the bounds */ + + fz_moveto(csi->path, xobj->bbox.x0, xobj->bbox.y0); + fz_lineto(csi->path, xobj->bbox.x1, xobj->bbox.y0); + fz_lineto(csi->path, xobj->bbox.x1, xobj->bbox.y1); + fz_lineto(csi->path, xobj->bbox.x0, xobj->bbox.y1); + fz_closepath(csi->path); + csi->clip = 1; + pdf_showpath(csi, 0, 0, 0, 0); + + /* run contents */ + + oldtopctm = csi->topctm; + csi->topctm = gstate->ctm; + + if (xobj->resources) + resources = xobj->resources; + + error = pdf_runcsibuffer(csi, resources, xobj->contents); + if (error) + return fz_rethrow(error, "cannot interpret XObject stream"); + + csi->topctm = oldtopctm; + + while (oldtop < csi->gtop) + pdf_grestore(csi); + + pdf_grestore(csi); + + /* wrap up transparency stacks */ + + if (xobj->transparency) + { + csi->dev->endgroup(csi->dev->user); + if (popmask) + csi->dev->popclip(csi->dev->user); + } + + return fz_okay; +} + +static fz_error +pdf_runinlineimage(pdf_csi *csi, fz_obj *rdb, fz_stream *file, fz_obj *dict) +{ + fz_error error; + fz_pixmap *img; + int ch; + + error = pdf_loadinlineimage(&img, csi->xref, rdb, dict, file); + if (error) + return fz_rethrow(error, "cannot load inline image"); + + /* find EI */ + ch = fz_readbyte(file); + while (ch != 'E' && ch != EOF) + ch = fz_readbyte(file); + ch = fz_readbyte(file); + if (ch != 'I') + { + fz_droppixmap(img); + return fz_rethrow(error, "syntax error after inline image"); + } + + pdf_showimage(csi, img); + + fz_droppixmap(img); + return fz_okay; +} + +static fz_error +pdf_runextgstate(pdf_csi *csi, fz_obj *rdb, fz_obj *extgstate) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + fz_colorspace *colorspace; + int i, k; + + pdf_flushtext(csi); + + for (i = 0; i < fz_dictlen(extgstate); i++) + { + fz_obj *key = fz_dictgetkey(extgstate, i); + fz_obj *val = fz_dictgetval(extgstate, i); + char *s = fz_toname(key); + + if (!strcmp(s, "Font")) + { + if (fz_isarray(val) && fz_arraylen(val) == 2) + { + fz_error error; + fz_obj *font = fz_arrayget(val, 0); + + if (gstate->font) + { + pdf_dropfont(gstate->font); + gstate->font = nil; + } + + error = pdf_loadfont(&gstate->font, csi->xref, rdb, font); + if (error) + return fz_rethrow(error, "cannot load font (%d %d R)", fz_tonum(font), fz_togen(font)); + if (!gstate->font) + return fz_throw("cannot find font in store"); + gstate->size = fz_toreal(fz_arrayget(val, 1)); + } + else + return fz_throw("malformed /Font dictionary"); + } + + else if (!strcmp(s, "LW")) + gstate->strokestate.linewidth = fz_toreal(val); + else if (!strcmp(s, "LC")) + gstate->strokestate.linecap = fz_toint(val); + else if (!strcmp(s, "LJ")) + gstate->strokestate.linejoin = fz_toint(val); + else if (!strcmp(s, "ML")) + gstate->strokestate.miterlimit = fz_toreal(val); + + else if (!strcmp(s, "D")) + { + if (fz_isarray(val) && fz_arraylen(val) == 2) + { + fz_obj *dashes = fz_arrayget(val, 0); + gstate->strokestate.dashlen = MAX(fz_arraylen(dashes), 32); + for (k = 0; k < gstate->strokestate.dashlen; k++) + gstate->strokestate.dashlist[k] = fz_toreal(fz_arrayget(dashes, k)); + gstate->strokestate.dashphase = fz_toreal(fz_arrayget(val, 1)); + } + else + return fz_throw("malformed /D"); + } + + else if (!strcmp(s, "CA")) + gstate->stroke.alpha = fz_toreal(val); + + else if (!strcmp(s, "ca")) + gstate->fill.alpha = fz_toreal(val); + + else if (!strcmp(s, "BM")) + { + if (fz_isarray(val)) + val = fz_arrayget(val, 0); + + gstate->blendmode = FZ_BNORMAL; + for (k = 0; fz_blendnames[k]; k++) + if (!strcmp(fz_blendnames[k], fz_toname(val))) + gstate->blendmode = k; + } + + else if (!strcmp(s, "SMask")) + { + if (fz_isdict(val)) + { + fz_error error; + pdf_xobject *xobj; + fz_obj *group, *luminosity, *bc; + + if (gstate->softmask) + { + pdf_dropxobject(gstate->softmask); + gstate->softmask = nil; + } + + group = fz_dictgets(val, "G"); + if (!group) + return fz_throw("cannot load softmask xobject (%d %d R)", fz_tonum(val), fz_togen(val)); + error = pdf_loadxobject(&xobj, csi->xref, group); + if (error) + return fz_rethrow(error, "cannot load xobject (%d %d R)", fz_tonum(val), fz_togen(val)); + + colorspace = xobj->colorspace; + if (!colorspace) + colorspace = fz_devicegray; + + gstate->softmaskctm = fz_concat(xobj->matrix, gstate->ctm); + gstate->softmask = xobj; + for (k = 0; k < colorspace->n; k++) + gstate->softmaskbc[k] = 0; + + bc = fz_dictgets(val, "BC"); + if (fz_isarray(bc)) + { + for (k = 0; k < colorspace->n; k++) + gstate->softmaskbc[k] = fz_toreal(fz_arrayget(bc, k)); + } + + luminosity = fz_dictgets(val, "S"); + if (fz_isname(luminosity) && !strcmp(fz_toname(luminosity), "Luminosity")) + gstate->luminosity = 1; + else + gstate->luminosity = 0; + } + else if (fz_isname(val) && !strcmp(fz_toname(val), "None")) + { + if (gstate->softmask) + { + pdf_dropxobject(gstate->softmask); + gstate->softmask = nil; + } + } + } + + else if (!strcmp(s, "TR")) + { + if (fz_isname(val) && strcmp(fz_toname(val), "Identity")) + fz_warn("ignoring transfer function"); + } + } + + return fz_okay; +} + +static void pdf_run_BDC(pdf_csi *csi) +{ +} + +static fz_error pdf_run_BI(pdf_csi *csi, fz_obj *rdb, fz_stream *file) +{ + int ch; + fz_error error; + char *buf = csi->xref->scratch; + int buflen = sizeof(csi->xref->scratch); + fz_obj *obj; + + error = pdf_parsedict(&obj, csi->xref, file, buf, buflen); + if (error) + return fz_rethrow(error, "cannot parse inline image dictionary"); + + /* read whitespace after ID keyword */ + ch = fz_readbyte(file); + if (ch == '\r') + if (fz_peekbyte(file) == '\n') + fz_readbyte(file); + + error = pdf_runinlineimage(csi, rdb, file, obj); + fz_dropobj(obj); + if (error) + return fz_rethrow(error, "cannot parse inline image"); + + return fz_okay; +} + +static void pdf_run_B(pdf_csi *csi) +{ + pdf_showpath(csi, 0, 1, 1, 0); +} + +static void pdf_run_BMC(pdf_csi *csi) +{ +} + +static void pdf_run_BT(pdf_csi *csi) +{ + csi->intext = 1; + csi->tm = fz_identity; + csi->tlm = fz_identity; +} + +static void pdf_run_BX(pdf_csi *csi) +{ + csi->xbalance ++; +} + +static void pdf_run_Bstar(pdf_csi *csi) +{ + pdf_showpath(csi, 0, 1, 1, 1); +} + +static fz_error pdf_run_cs_imp(pdf_csi *csi, fz_obj *rdb, int what) +{ + fz_colorspace *colorspace; + fz_obj *obj, *dict; + fz_error error; + + if (!strcmp(csi->name, "Pattern")) + { + pdf_setpattern(csi, what, nil, nil); + } + else + { + if (!strcmp(csi->name, "DeviceGray")) + colorspace = fz_keepcolorspace(fz_devicegray); + else if (!strcmp(csi->name, "DeviceRGB")) + colorspace = fz_keepcolorspace(fz_devicergb); + else if (!strcmp(csi->name, "DeviceCMYK")) + colorspace = fz_keepcolorspace(fz_devicecmyk); + else + { + dict = fz_dictgets(rdb, "ColorSpace"); + if (!dict) + return fz_throw("cannot find ColorSpace dictionary"); + obj = fz_dictgets(dict, csi->name); + if (!obj) + return fz_throw("cannot find colorspace resource '%s'", csi->name); + error = pdf_loadcolorspace(&colorspace, csi->xref, obj); + if (error) + return fz_rethrow(error, "cannot load colorspace (%d 0 R)", fz_tonum(obj)); + } + + pdf_setcolorspace(csi, what, colorspace); + + fz_dropcolorspace(colorspace); + } + return fz_okay; +} + +static void pdf_run_CS(pdf_csi *csi, fz_obj *rdb) +{ + fz_error error; + error = pdf_run_cs_imp(csi, rdb, PDF_MSTROKE); + if (error) + fz_catch(error, "cannot set colorspace"); +} + +static void pdf_run_cs(pdf_csi *csi, fz_obj *rdb) +{ + fz_error error; + error = pdf_run_cs_imp(csi, rdb, PDF_MFILL); + if (error) + fz_catch(error, "cannot set colorspace"); +} + +static void pdf_run_DP(pdf_csi *csi) +{ +} + +static fz_error pdf_run_Do(pdf_csi *csi, fz_obj *rdb) +{ + fz_obj *dict; + fz_obj *obj; + fz_obj *subtype; + fz_error error; + + dict = fz_dictgets(rdb, "XObject"); + if (!dict) + return fz_throw("cannot find XObject dictionary when looking for: '%s'", csi->name); + + obj = fz_dictgets(dict, csi->name); + if (!obj) + return fz_throw("cannot find xobject resource: '%s'", csi->name); + + subtype = fz_dictgets(obj, "Subtype"); + if (!fz_isname(subtype)) + return fz_throw("no XObject subtype specified"); + + if (pdf_ishiddenocg(csi, obj)) + return fz_okay; + + if (!strcmp(fz_toname(subtype), "Form") && fz_dictgets(obj, "Subtype2")) + subtype = fz_dictgets(obj, "Subtype2"); + + if (!strcmp(fz_toname(subtype), "Form")) + { + pdf_xobject *xobj; + + error = pdf_loadxobject(&xobj, csi->xref, obj); + if (error) + return fz_rethrow(error, "cannot load xobject (%d %d R)", fz_tonum(obj), fz_togen(obj)); + + /* Inherit parent resources, in case this one was empty XXX check where it's loaded */ + if (!xobj->resources) + xobj->resources = fz_keepobj(rdb); + + error = pdf_runxobject(csi, xobj->resources, xobj, fz_identity); + if (error) + return fz_rethrow(error, "cannot draw xobject (%d %d R)", fz_tonum(obj), fz_togen(obj)); + + pdf_dropxobject(xobj); + } + + else if (!strcmp(fz_toname(subtype), "Image")) + { + if ((csi->dev->hints & FZ_IGNOREIMAGE) == 0) + { + fz_pixmap *img; + error = pdf_loadimage(&img, csi->xref, obj); + if (error) + return fz_rethrow(error, "cannot load image (%d %d R)", fz_tonum(obj), fz_togen(obj)); + pdf_showimage(csi, img); + fz_droppixmap(img); + } + } + + else if (!strcmp(fz_toname(subtype), "PS")) + { + fz_warn("ignoring XObject with subtype PS"); + } + + else + { + return fz_throw("unknown XObject subtype: '%s'", fz_toname(subtype)); + } + + return fz_okay; +} + +static void pdf_run_EMC(pdf_csi *csi) +{ +} + +static void pdf_run_ET(pdf_csi *csi) +{ + pdf_flushtext(csi); + csi->accumulate = 1; + csi->intext = 0; +} + +static void pdf_run_EX(pdf_csi *csi) +{ + csi->xbalance --; +} + +static void pdf_run_F(pdf_csi *csi) +{ + pdf_showpath(csi, 0, 1, 0, 0); +} + +static void pdf_run_G(pdf_csi *csi) +{ + pdf_setcolorspace(csi, PDF_MSTROKE, fz_devicegray); + pdf_setcolor(csi, PDF_MSTROKE, csi->stack); +} + +static void pdf_run_J(pdf_csi *csi) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + gstate->strokestate.linecap = csi->stack[0]; +} + +static void pdf_run_K(pdf_csi *csi) +{ + pdf_setcolorspace(csi, PDF_MSTROKE, fz_devicecmyk); + pdf_setcolor(csi, PDF_MSTROKE, csi->stack); +} + +static void pdf_run_M(pdf_csi *csi) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + gstate->strokestate.miterlimit = csi->stack[0]; +} + +static void pdf_run_MP(pdf_csi *csi) +{ +} + +static void pdf_run_Q(pdf_csi *csi) +{ + pdf_grestore(csi); +} + +static void pdf_run_RG(pdf_csi *csi) +{ + pdf_setcolorspace(csi, PDF_MSTROKE, fz_devicergb); + pdf_setcolor(csi, PDF_MSTROKE, csi->stack); +} + +static void pdf_run_S(pdf_csi *csi) +{ + pdf_showpath(csi, 0, 0, 1, 0); +} + +static fz_error pdf_run_SC_imp(pdf_csi *csi, fz_obj *rdb, int what, pdf_material *mat) +{ + fz_error error; + fz_obj *patterntype; + fz_obj *dict; + fz_obj *obj; + int kind; + + kind = mat->kind; + if (csi->name[0]) + kind = PDF_MPATTERN; + + switch (kind) + { + case PDF_MNONE: + return fz_throw("cannot set color in mask objects"); + + case PDF_MCOLOR: + pdf_setcolor(csi, what, csi->stack); + break; + + case PDF_MPATTERN: + dict = fz_dictgets(rdb, "Pattern"); + if (!dict) + return fz_throw("cannot find Pattern dictionary"); + + obj = fz_dictgets(dict, csi->name); + if (!obj) + return fz_throw("cannot find pattern resource '%s'", csi->name); + + patterntype = fz_dictgets(obj, "PatternType"); + + if (fz_toint(patterntype) == 1) + { + pdf_pattern *pat; + error = pdf_loadpattern(&pat, csi->xref, obj); + if (error) + return fz_rethrow(error, "cannot load pattern (%d 0 R)", fz_tonum(obj)); + pdf_setpattern(csi, what, pat, csi->top > 0 ? csi->stack : nil); + pdf_droppattern(pat); + } + else if (fz_toint(patterntype) == 2) + { + fz_shade *shd; + error = pdf_loadshading(&shd, csi->xref, obj); + if (error) + return fz_rethrow(error, "cannot load shading (%d 0 R)", fz_tonum(obj)); + pdf_setshade(csi, what, shd); + fz_dropshade(shd); + } + else + { + return fz_throw("unknown pattern type: %d", fz_toint(patterntype)); + } + break; + + case PDF_MSHADE: + return fz_throw("cannot set color in shade objects"); + } + + return fz_okay; +} + +static void pdf_run_SC(pdf_csi *csi, fz_obj *rdb) +{ + fz_error error; + pdf_gstate *gstate = csi->gstate + csi->gtop; + error = pdf_run_SC_imp(csi, rdb, PDF_MSTROKE, &gstate->stroke); + if (error) + fz_catch(error, "cannot set color and colorspace"); +} + +static void pdf_run_sc(pdf_csi *csi, fz_obj *rdb) +{ + fz_error error; + pdf_gstate *gstate = csi->gstate + csi->gtop; + error = pdf_run_SC_imp(csi, rdb, PDF_MFILL, &gstate->fill); + if (error) + fz_catch(error, "cannot set color and colorspace"); +} + +static void pdf_run_Tc(pdf_csi *csi) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + gstate->charspace = csi->stack[0]; +} + +static void pdf_run_Tw(pdf_csi *csi) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + gstate->wordspace = csi->stack[0]; +} + +static void pdf_run_Tz(pdf_csi *csi) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + float a = csi->stack[0] / 100; + pdf_flushtext(csi); + gstate->scale = a; +} + +static void pdf_run_TL(pdf_csi *csi) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + gstate->leading = csi->stack[0]; +} + +static fz_error pdf_run_Tf(pdf_csi *csi, fz_obj *rdb) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + fz_error error; + fz_obj *dict; + fz_obj *obj; + + gstate->size = csi->stack[0]; + if (gstate->font) + pdf_dropfont(gstate->font); + gstate->font = nil; + + dict = fz_dictgets(rdb, "Font"); + if (!dict) + return fz_throw("cannot find Font dictionary"); + + obj = fz_dictgets(dict, csi->name); + if (!obj) + return fz_throw("cannot find font resource: '%s'", csi->name); + + error = pdf_loadfont(&gstate->font, csi->xref, rdb, obj); + if (error) + return fz_rethrow(error, "cannot load font (%d 0 R)", fz_tonum(obj)); + + return fz_okay; +} + +static void pdf_run_Tr(pdf_csi *csi) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + gstate->render = csi->stack[0]; +} + +static void pdf_run_Ts(pdf_csi *csi) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + gstate->rise = csi->stack[0]; +} + +static void pdf_run_Td(pdf_csi *csi) +{ + fz_matrix m = fz_translate(csi->stack[0], csi->stack[1]); + csi->tlm = fz_concat(m, csi->tlm); + csi->tm = csi->tlm; +} + +static void pdf_run_TD(pdf_csi *csi) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + fz_matrix m; + + gstate->leading = -csi->stack[1]; + m = fz_translate(csi->stack[0], csi->stack[1]); + csi->tlm = fz_concat(m, csi->tlm); + csi->tm = csi->tlm; +} + +static void pdf_run_Tm(pdf_csi *csi) +{ + pdf_flushtext(csi); + csi->tm.a = csi->stack[0]; + csi->tm.b = csi->stack[1]; + csi->tm.c = csi->stack[2]; + csi->tm.d = csi->stack[3]; + csi->tm.e = csi->stack[4]; + csi->tm.f = csi->stack[5]; + csi->tlm = csi->tm; +} + +static void pdf_run_Tstar(pdf_csi *csi) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + fz_matrix m = fz_translate(0, -gstate->leading); + csi->tlm = fz_concat(m, csi->tlm); + csi->tm = csi->tlm; +} + +static void pdf_run_Tj(pdf_csi *csi) +{ + if (csi->stringlen) + pdf_showstring(csi, csi->string, csi->stringlen); + else + pdf_showtext(csi, csi->obj); +} + +static void pdf_run_TJ(pdf_csi *csi) +{ + if (csi->stringlen) + pdf_showstring(csi, csi->string, csi->stringlen); + else + pdf_showtext(csi, csi->obj); +} + +static void pdf_run_W(pdf_csi *csi) +{ + csi->clip = 1; + csi->clipevenodd = 0; +} + +static void pdf_run_Wstar(pdf_csi *csi) +{ + csi->clip = 1; + csi->clipevenodd = 1; +} + +static void pdf_run_b(pdf_csi *csi) +{ + pdf_showpath(csi, 1, 1, 1, 0); +} + +static void pdf_run_bstar(pdf_csi *csi) +{ + pdf_showpath(csi, 1, 1, 1, 1); +} + +static void pdf_run_c(pdf_csi *csi) +{ + float a, b, c, d, e, f; + a = csi->stack[0]; + b = csi->stack[1]; + c = csi->stack[2]; + d = csi->stack[3]; + e = csi->stack[4]; + f = csi->stack[5]; + fz_curveto(csi->path, a, b, c, d, e, f); +} + +static void pdf_run_cm(pdf_csi *csi) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + fz_matrix m; + + m.a = csi->stack[0]; + m.b = csi->stack[1]; + m.c = csi->stack[2]; + m.d = csi->stack[3]; + m.e = csi->stack[4]; + m.f = csi->stack[5]; + + gstate->ctm = fz_concat(m, gstate->ctm); +} + +static void pdf_run_d(pdf_csi *csi) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + fz_obj *array; + int i; + + array = csi->obj; + gstate->strokestate.dashlen = MIN(fz_arraylen(array), nelem(gstate->strokestate.dashlist)); + for (i = 0; i < gstate->strokestate.dashlen; i++) + gstate->strokestate.dashlist[i] = fz_toreal(fz_arrayget(array, i)); + gstate->strokestate.dashphase = csi->stack[0]; +} + +static void pdf_run_d0(pdf_csi *csi) +{ +} + +static void pdf_run_d1(pdf_csi *csi) +{ +} + +static void pdf_run_f(pdf_csi *csi) +{ + pdf_showpath(csi, 0, 1, 0, 0); +} + +static void pdf_run_fstar(pdf_csi *csi) +{ + pdf_showpath(csi, 0, 1, 0, 1); +} + +static void pdf_run_g(pdf_csi *csi) +{ + pdf_setcolorspace(csi, PDF_MFILL, fz_devicegray); + pdf_setcolor(csi, PDF_MFILL, csi->stack); +} + +static fz_error pdf_run_gs(pdf_csi *csi, fz_obj *rdb) +{ + fz_error error; + fz_obj *dict; + fz_obj *obj; + + dict = fz_dictgets(rdb, "ExtGState"); + if (!dict) + return fz_throw("cannot find ExtGState dictionary"); + + obj = fz_dictgets(dict, csi->name); + if (!obj) + return fz_throw("cannot find extgstate resource '%s'", csi->name); + + error = pdf_runextgstate(csi, rdb, obj); + if (error) + return fz_rethrow(error, "cannot set ExtGState (%d 0 R)", fz_tonum(obj)); + return fz_okay; +} + +static void pdf_run_h(pdf_csi *csi) +{ + fz_closepath(csi->path); +} + +static void pdf_run_i(pdf_csi *csi) +{ +} + +static void pdf_run_j(pdf_csi *csi) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + gstate->strokestate.linejoin = csi->stack[0]; +} + +static void pdf_run_k(pdf_csi *csi) +{ + pdf_setcolorspace(csi, PDF_MFILL, fz_devicecmyk); + pdf_setcolor(csi, PDF_MFILL, csi->stack); +} + +static void pdf_run_l(pdf_csi *csi) +{ + float a, b; + a = csi->stack[0]; + b = csi->stack[1]; + fz_lineto(csi->path, a, b); +} + +static void pdf_run_m(pdf_csi *csi) +{ + float a, b; + a = csi->stack[0]; + b = csi->stack[1]; + fz_moveto(csi->path, a, b); +} + +static void pdf_run_n(pdf_csi *csi) +{ + pdf_showpath(csi, 0, 0, 0, csi->clipevenodd); +} + +static void pdf_run_q(pdf_csi *csi) +{ + pdf_gsave(csi); +} + +static void pdf_run_re(pdf_csi *csi) +{ + float x, y, w, h; + + x = csi->stack[0]; + y = csi->stack[1]; + w = csi->stack[2]; + h = csi->stack[3]; + + fz_moveto(csi->path, x, y); + fz_lineto(csi->path, x + w, y); + fz_lineto(csi->path, x + w, y + h); + fz_lineto(csi->path, x, y + h); + fz_closepath(csi->path); +} + +static void pdf_run_rg(pdf_csi *csi) +{ + pdf_setcolorspace(csi, PDF_MFILL, fz_devicergb); + pdf_setcolor(csi, PDF_MFILL, csi->stack); +} + +static void pdf_run_ri(pdf_csi *csi) +{ +} + +static void pdf_run_s(pdf_csi *csi) +{ + pdf_showpath(csi, 1, 0, 1, 0); +} + +static fz_error pdf_run_sh(pdf_csi *csi, fz_obj *rdb) +{ + fz_obj *dict; + fz_obj *obj; + fz_shade *shd; + fz_error error; + + dict = fz_dictgets(rdb, "Shading"); + if (!dict) + return fz_throw("cannot find shading dictionary"); + + obj = fz_dictgets(dict, csi->name); + if (!obj) + return fz_throw("cannot find shading resource: '%s'", csi->name); + + if ((csi->dev->hints & FZ_IGNORESHADE) == 0) + { + error = pdf_loadshading(&shd, csi->xref, obj); + if (error) + return fz_rethrow(error, "cannot load shading (%d %d R)", fz_tonum(obj), fz_togen(obj)); + pdf_showshade(csi, shd); + fz_dropshade(shd); + } + return fz_okay; +} + +static void pdf_run_v(pdf_csi *csi) +{ + float a, b, c, d; + a = csi->stack[0]; + b = csi->stack[1]; + c = csi->stack[2]; + d = csi->stack[3]; + fz_curvetov(csi->path, a, b, c, d); +} + +static void pdf_run_w(pdf_csi *csi) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + gstate->strokestate.linewidth = csi->stack[0]; +} + +static void pdf_run_y(pdf_csi *csi) +{ + float a, b, c, d; + a = csi->stack[0]; + b = csi->stack[1]; + c = csi->stack[2]; + d = csi->stack[3]; + fz_curvetoy(csi->path, a, b, c, d); +} + +static void pdf_run_squote(pdf_csi *csi) +{ + fz_matrix m; + pdf_gstate *gstate = csi->gstate + csi->gtop; + + m = fz_translate(0, -gstate->leading); + csi->tlm = fz_concat(m, csi->tlm); + csi->tm = csi->tlm; + + if (csi->stringlen) + pdf_showstring(csi, csi->string, csi->stringlen); + else + pdf_showtext(csi, csi->obj); +} + +static void pdf_run_dquote(pdf_csi *csi) +{ + fz_matrix m; + pdf_gstate *gstate = csi->gstate + csi->gtop; + + gstate->wordspace = csi->stack[0]; + gstate->charspace = csi->stack[1]; + + m = fz_translate(0, -gstate->leading); + csi->tlm = fz_concat(m, csi->tlm); + csi->tm = csi->tlm; + + if (csi->stringlen) + pdf_showstring(csi, csi->string, csi->stringlen); + else + pdf_showtext(csi, csi->obj); +} + +#define A(a) (a) +#define B(a,b) (a | b << 8) +#define C(a,b,c) (a | b << 8 | c << 16) + +static fz_error +pdf_runkeyword(pdf_csi *csi, fz_obj *rdb, fz_stream *file, char *buf) +{ + fz_error error; + int key; + + key = buf[0]; + if (buf[1]) + { + key |= buf[1] << 8; + if (buf[2]) + { + key |= buf[2] << 16; + if (buf[3]) + key = 0; + } + } + + switch (key) + { + case A('"'): pdf_run_dquote(csi); break; + case A('\''): pdf_run_squote(csi); break; + case A('B'): pdf_run_B(csi); break; + case B('B','*'): pdf_run_Bstar(csi); break; + case C('B','D','C'): pdf_run_BDC(csi); break; + case B('B','I'): + error = pdf_run_BI(csi, rdb, file); + if (error) + return fz_rethrow(error, "cannot draw inline image"); + break; + case C('B','M','C'): pdf_run_BMC(csi); break; + case B('B','T'): pdf_run_BT(csi); break; + case B('B','X'): pdf_run_BX(csi); break; + case B('C','S'): pdf_run_CS(csi, rdb); break; + case B('D','P'): pdf_run_DP(csi); break; + case B('D','o'): + error = pdf_run_Do(csi, rdb); + if (error) + fz_catch(error, "cannot draw xobject/image"); + break; + case C('E','M','C'): pdf_run_EMC(csi); break; + case B('E','T'): pdf_run_ET(csi); break; + case B('E','X'): pdf_run_EX(csi); break; + case A('F'): pdf_run_F(csi); break; + case A('G'): pdf_run_G(csi); break; + case A('J'): pdf_run_J(csi); break; + case A('K'): pdf_run_K(csi); break; + case A('M'): pdf_run_M(csi); break; + case B('M','P'): pdf_run_MP(csi); break; + case A('Q'): pdf_run_Q(csi); break; + case B('R','G'): pdf_run_RG(csi); break; + case A('S'): pdf_run_S(csi); break; + case B('S','C'): pdf_run_SC(csi, rdb); break; + case C('S','C','N'): pdf_run_SC(csi, rdb); break; + case B('T','*'): pdf_run_Tstar(csi); break; + case B('T','D'): pdf_run_TD(csi); break; + case B('T','J'): pdf_run_TJ(csi); break; + case B('T','L'): pdf_run_TL(csi); break; + case B('T','c'): pdf_run_Tc(csi); break; + case B('T','d'): pdf_run_Td(csi); break; + case B('T','f'): + error = pdf_run_Tf(csi, rdb); + if (error) + fz_catch(error, "cannot set font"); + break; + case B('T','j'): pdf_run_Tj(csi); break; + case B('T','m'): pdf_run_Tm(csi); break; + case B('T','r'): pdf_run_Tr(csi); break; + case B('T','s'): pdf_run_Ts(csi); break; + case B('T','w'): pdf_run_Tw(csi); break; + case B('T','z'): pdf_run_Tz(csi); break; + case A('W'): pdf_run_W(csi); break; + case B('W','*'): pdf_run_Wstar(csi); break; + case A('b'): pdf_run_b(csi); break; + case B('b','*'): pdf_run_bstar(csi); break; + case A('c'): pdf_run_c(csi); break; + case B('c','m'): pdf_run_cm(csi); break; + case B('c','s'): pdf_run_cs(csi, rdb); break; + case A('d'): pdf_run_d(csi); break; + case B('d','0'): pdf_run_d0(csi); break; + case B('d','1'): pdf_run_d1(csi); break; + case A('f'): pdf_run_f(csi); break; + case B('f','*'): pdf_run_fstar(csi); break; + case A('g'): pdf_run_g(csi); break; + case B('g','s'): + error = pdf_run_gs(csi, rdb); + if (error) + fz_catch(error, "cannot set graphics state"); + break; + case A('h'): pdf_run_h(csi); break; + case A('i'): pdf_run_i(csi); break; + case A('j'): pdf_run_j(csi); break; + case A('k'): pdf_run_k(csi); break; + case A('l'): pdf_run_l(csi); break; + case A('m'): pdf_run_m(csi); break; + case A('n'): pdf_run_n(csi); break; + case A('q'): pdf_run_q(csi); break; + case B('r','e'): pdf_run_re(csi); break; + case B('r','g'): pdf_run_rg(csi); break; + case B('r','i'): pdf_run_ri(csi); break; + case A('s'): pdf_run_s(csi); break; + case B('s','c'): pdf_run_sc(csi, rdb); break; + case C('s','c','n'): pdf_run_sc(csi, rdb); break; + case B('s','h'): + error = pdf_run_sh(csi, rdb); + if (error) + fz_catch(error, "cannot draw shading"); + break; + case A('v'): pdf_run_v(csi); break; + case A('w'): pdf_run_w(csi); break; + case A('y'): pdf_run_y(csi); break; + default: + if (!csi->xbalance) + fz_warn("unknown keyword: '%s'", buf); + break; + } + + return fz_okay; +} + +static fz_error +pdf_runcsifile(pdf_csi *csi, fz_obj *rdb, fz_stream *file, char *buf, int buflen) +{ + fz_error error; + int tok; + int len; + + pdf_clearstack(csi); + + while (1) + { + if (csi->top == nelem(csi->stack) - 1) + return fz_throw("stack overflow"); + + error = pdf_lex(&tok, file, buf, buflen, &len); + if (error) + return fz_rethrow(error, "lexical error in content stream"); + + if (csi->inarray) + { + if (tok == PDF_TCARRAY) + { + csi->inarray = 0; + } + else if (tok == PDF_TINT || tok == PDF_TREAL) + { + pdf_gstate *gstate = csi->gstate + csi->gtop; + pdf_showspace(csi, -atof(buf) * gstate->size * 0.001f); + } + else if (tok == PDF_TSTRING) + { + pdf_showstring(csi, (unsigned char *)buf, len); + } + else if (tok == PDF_TKEYWORD) + { + if (!strcmp(buf, "Tw") || !strcmp(buf, "Tc")) + fz_warn("ignoring keyword '%s' inside array", buf); + else + return fz_throw("syntax error in array"); + } + else if (tok == PDF_TEOF) + return fz_okay; + else + return fz_throw("syntax error in array"); + } + + else switch (tok) + { + case PDF_TENDSTREAM: + case PDF_TEOF: + return fz_okay; + + case PDF_TOARRAY: + if (!csi->intext) + { + error = pdf_parsearray(&csi->obj, csi->xref, file, buf, buflen); + if (error) + return fz_rethrow(error, "cannot parse array"); + } + else + { + csi->inarray = 1; + } + break; + + case PDF_TODICT: + error = pdf_parsedict(&csi->obj, csi->xref, file, buf, buflen); + if (error) + return fz_rethrow(error, "cannot parse dictionary"); + break; + + case PDF_TNAME: + fz_strlcpy(csi->name, buf, sizeof(csi->name)); + break; + + case PDF_TINT: + csi->stack[csi->top] = atoi(buf); + csi->top ++; + break; + + case PDF_TREAL: + csi->stack[csi->top] = atof(buf); + csi->top ++; + break; + + case PDF_TSTRING: + if (len <= sizeof(csi->string)) + { + memcpy(csi->string, buf, len); + csi->stringlen = len; + } + else + { + csi->obj = fz_newstring(buf, len); + } + break; + + case PDF_TKEYWORD: + error = pdf_runkeyword(csi, rdb, file, buf); + if (error) + return fz_rethrow(error, "cannot run keyword"); + pdf_clearstack(csi); + break; + + default: + return fz_throw("syntax error in content stream"); + } + } +} + +fz_error +pdf_runcsibuffer(pdf_csi *csi, fz_obj *rdb, fz_buffer *contents) +{ + fz_stream *file; + fz_error error; + file = fz_openbuffer(contents); + error = pdf_runcsifile(csi, rdb, file, csi->xref->scratch, sizeof csi->xref->scratch); + fz_close(file); + if (error) + return fz_rethrow(error, "cannot parse content stream"); + return fz_okay; +} + +fz_error +pdf_runpagewithtarget(pdf_xref *xref, pdf_page *page, fz_device *dev, fz_matrix ctm, char *target) +{ + pdf_csi *csi; + fz_error error; + pdf_annot *annot; + int flags; + + if (page->transparency) + dev->begingroup(dev->user, + fz_transformrect(ctm, page->mediabox), + 0, 0, FZ_BNORMAL, 1); + + csi = pdf_newcsi(xref, dev, ctm, target); + error = pdf_runcsibuffer(csi, page->resources, page->contents); + pdf_freecsi(csi); + if (error) + return fz_rethrow(error, "cannot parse page content stream"); + + for (annot = page->annots; annot; annot = annot->next) + { + flags = fz_toint(fz_dictgets(annot->obj, "F")); + + /* TODO: NoZoom and NoRotate */ + if (flags & (1 << 0)) /* Invisible */ + continue; + if (flags & (1 << 1)) /* Hidden */ + continue; + if (flags & (1 << 5)) /* NoView */ + continue; + + if (pdf_ishiddenocg(csi, annot->obj)) + continue; + + csi = pdf_newcsi(xref, dev, ctm, target); + error = pdf_runxobject(csi, page->resources, annot->ap, annot->matrix); + pdf_freecsi(csi); + if (error) + return fz_rethrow(error, "cannot parse annotation appearance stream"); + } + + if (page->transparency) + dev->endgroup(dev->user); + + return fz_okay; +} + +fz_error +pdf_runpage(pdf_xref *xref, pdf_page *page, fz_device *dev, fz_matrix ctm) +{ + return pdf_runpagewithtarget(xref, page, dev, ctm, "View"); +} + +fz_error +pdf_runglyph(pdf_xref *xref, fz_obj *resources, fz_buffer *contents, fz_device *dev, fz_matrix ctm) +{ + pdf_csi *csi = pdf_newcsi(xref, dev, ctm, "View"); + fz_error error = pdf_runcsibuffer(csi, resources, contents); + pdf_freecsi(csi); + if (error) + return fz_rethrow(error, "cannot parse glyph content stream"); + return fz_okay; +} diff --git a/pdf/pdf_lex.c b/pdf/pdf_lex.c new file mode 100644 index 00000000..6b2f26e4 --- /dev/null +++ b/pdf/pdf_lex.c @@ -0,0 +1,468 @@ +#include "fitz.h" +#include "mupdf.h" + +#define ISNUMBER \ + '+':case'-':case'.':case'0':case'1':case'2':case'3':\ + case'4':case'5':case'6':case'7':case'8':case'9' +#define ISWHITE \ + '\000':case'\011':case'\012':case'\014':case'\015':case'\040' +#define ISHEX \ + '0':case'1':case'2':case'3':case'4':case'5':case'6':\ + case'7':case'8':case'9':case'A':case'B':case'C':\ + case'D':case'E':case'F':case'a':case'b':case'c':\ + case'd':case'e':case'f' +#define ISDELIM \ + '(':case')':case'<':case'>':case'[':case']':case'{':\ + case'}':case'/':case'%' + +#define RANGE_0_9 \ + '0':case'1':case'2':case'3':case'4':case'5':\ + case'6':case'7':case'8':case'9' +#define RANGE_a_f \ + 'a':case'b':case'c':case'd':case'e':case'f' +#define RANGE_A_F \ + 'A':case'B':case'C':case'D':case'E':case'F' + +static inline int +iswhite(int ch) +{ + return + ch == '\000' || + ch == '\011' || + ch == '\012' || + ch == '\014' || + ch == '\015' || + ch == '\040'; +} + +static inline int +fromhex(int ch) +{ + if (ch >= '0' && ch <= '9') + return ch - '0'; + else if (ch >= 'A' && ch <= 'F') + return ch - 'A' + 0xA; + else if (ch >= 'a' && ch <= 'f') + return ch - 'a' + 0xA; + return 0; +} + +static inline void +lexwhite(fz_stream *f) +{ + int c; + do + { + c = fz_readbyte(f); + } + while ((c <= 32) && (iswhite(c))); + if (c != EOF) + fz_unreadbyte(f); +} + +static inline void +lexcomment(fz_stream *f) +{ + int c; + do + { + c = fz_readbyte(f); + } + while ((c != '\012') && (c != '\015') && (c != EOF)); +} + +static int +lexnumber(fz_stream *f, char *s, int n, int *tok) +{ + char *buf = s; + *tok = PDF_TINT; + + /* Initially we might have +, -, . or a digit */ + if (n > 1) + { + int c = fz_readbyte(f); + switch (c) + { + case '.': + *tok = PDF_TREAL; + *s++ = c; + n--; + goto loop_after_dot; + case '+': + case '-': + case RANGE_0_9: + *s++ = c; + n--; + goto loop_after_sign; + default: + fz_unreadbyte(f); + goto end; + case EOF: + goto end; + } + } + + /* We can't accept a sign from here on in, just . or a digit */ +loop_after_sign: + while (n > 1) + { + int c = fz_readbyte(f); + switch (c) + { + case '.': + *tok = PDF_TREAL; + *s++ = c; + n--; + goto loop_after_dot; + case RANGE_0_9: + *s++ = c; + break; + default: + fz_unreadbyte(f); + goto end; + case EOF: + goto end; + } + n--; + } + + /* In here, we've seen a dot, so can accept just digits */ +loop_after_dot: + while (n > 1) + { + int c = fz_readbyte(f); + switch (c) + { + case RANGE_0_9: + *s++ = c; + break; + default: + fz_unreadbyte(f); + goto end; + case EOF: + goto end; + } + n--; + } + +end: + *s = '\0'; + return s-buf; +} + +static void +lexname(fz_stream *f, char *s, int n) +{ + while (n > 1) + { + int c = fz_readbyte(f); + switch (c) + { + case ISWHITE: + case ISDELIM: + fz_unreadbyte(f); + goto end; + case EOF: + goto end; + case '#': + { + int d; + c = fz_readbyte(f); + switch (c) + { + case RANGE_0_9: + d = (c - '0') << 4; + break; + case RANGE_a_f: + d = (c - 'a' + 10) << 4; + break; + case RANGE_A_F: + d = (c - 'A' + 10) << 4; + break; + default: + fz_unreadbyte(f); + /* fallthrough */ + case EOF: + goto end; + } + c = fz_readbyte(f); + switch (c) + { + case RANGE_0_9: + c -= '0'; + break; + case RANGE_a_f: + c -= 'a' - 10; + break; + case RANGE_A_F: + c -= 'A' - 10; + break; + default: + fz_unreadbyte(f); + /* fallthrough */ + case EOF: + *s++ = d; + n--; + goto end; + } + *s++ = d + c; + n--; + break; + } + default: + *s++ = c; + n--; + break; + } + } +end: + *s = '\0'; +} + +static int +lexstring(fz_stream *f, char *buf, int n) +{ + char *s = buf; + char *e = buf + n; + int bal = 1; + int oct; + int c; + + while (s < e) + { + c = fz_readbyte(f); + switch (c) + { + case EOF: + goto end; + case '(': + bal++; + *s++ = c; + break; + case ')': + bal --; + if (bal == 0) + goto end; + *s++ = c; + break; + case '\\': + c = fz_readbyte(f); + switch (c) + { + case EOF: + goto end; + case 'n': + *s++ = '\n'; + break; + case 'r': + *s++ = '\r'; + break; + case 't': + *s++ = '\t'; + break; + case 'b': + *s++ = '\b'; + break; + case 'f': + *s++ = '\f'; + break; + case '(': + *s++ = '('; + break; + case ')': + *s++ = ')'; + break; + case '\\': + *s++ = '\\'; + break; + case RANGE_0_9: + oct = c - '0'; + c = fz_readbyte(f); + if (c >= '0' && c <= '9') + { + oct = oct * 8 + (c - '0'); + c = fz_readbyte(f); + if (c >= '0' && c <= '9') + oct = oct * 8 + (c - '0'); + else if (c != EOF) + fz_unreadbyte(f); + } + else if (c != EOF) + fz_unreadbyte(f); + *s++ = oct; + break; + case '\n': + break; + case '\r': + c = fz_readbyte(f); + if ((c != '\n') && (c != EOF)) + fz_unreadbyte(f); + break; + default: + *s++ = c; + } + break; + default: + *s++ = c; + break; + } + } +end: + return s - buf; +} + +static int +lexhexstring(fz_stream *f, char *buf, int n) +{ + char *s = buf; + char *e = buf + n; + int a = 0, x = 0; + int c; + + while (s < e) + { + c = fz_readbyte(f); + switch (c) + { + case ISWHITE: + break; + case ISHEX: + if (x) + { + *s++ = a * 16 + fromhex(c); + x = !x; + } + else + { + a = fromhex(c); + x = !x; + } + break; + case '>': + default: + goto end; + } + } +end: + return s - buf; +} + +static int +pdf_tokenfromkeyword(char *key) +{ + switch (*key) + { + case 'R': + if (!strcmp(key, "R")) return PDF_TR; + break; + case 't': + if (!strcmp(key, "true")) return PDF_TTRUE; + if (!strcmp(key, "trailer")) return PDF_TTRAILER; + break; + case 'f': + if (!strcmp(key, "false")) return PDF_TFALSE; + break; + case 'n': + if (!strcmp(key, "null")) return PDF_TNULL; + break; + case 'o': + if (!strcmp(key, "obj")) return PDF_TOBJ; + break; + case 'e': + if (!strcmp(key, "endobj")) return PDF_TENDOBJ; + if (!strcmp(key, "endstream")) return PDF_TENDSTREAM; + break; + case 's': + if (!strcmp(key, "stream")) return PDF_TSTREAM; + if (!strcmp(key, "startxref")) return PDF_TSTARTXREF; + break; + case 'x': + if (!strcmp(key, "xref")) return PDF_TXREF; + break; + default: + break; + } + + return PDF_TKEYWORD; +} + +fz_error +pdf_lex(int *tok, fz_stream *f, char *buf, int n, int *sl) +{ + while (1) + { + int c = fz_readbyte(f); + switch (c) + { + case EOF: + *tok = PDF_TEOF; + return fz_okay; + case ISWHITE: + lexwhite(f); + break; + case '%': + lexcomment(f); + break; + case '/': + lexname(f, buf, n); + *sl = strlen(buf); + *tok = PDF_TNAME; + return fz_okay; + case '(': + *sl = lexstring(f, buf, n); + *tok = PDF_TSTRING; + return fz_okay; + case ')': + *tok = PDF_TERROR; + goto cleanuperror; + case '<': + c = fz_readbyte(f); + if (c == '<') + { + *tok = PDF_TODICT; + } + else + { + fz_unreadbyte(f); + *sl = lexhexstring(f, buf, n); + *tok = PDF_TSTRING; + } + return fz_okay; + case '>': + c = fz_readbyte(f); + if (c == '>') + { + *tok = PDF_TCDICT; + return fz_okay; + } + *tok = PDF_TERROR; + goto cleanuperror; + case '[': + *tok = PDF_TOARRAY; + return fz_okay; + case ']': + *tok = PDF_TCARRAY; + return fz_okay; + case '{': + *tok = PDF_TOBRACE; + return fz_okay; + case '}': + *tok = PDF_TCBRACE; + return fz_okay; + case ISNUMBER: + fz_unreadbyte(f); + *sl = lexnumber(f, buf, n, tok); + return fz_okay; + default: /* isregular: !isdelim && !iswhite && c != EOF */ + fz_unreadbyte(f); + lexname(f, buf, n); + *sl = strlen(buf); + *tok = pdf_tokenfromkeyword(buf); + return fz_okay; + } + } + +cleanuperror: + *tok = PDF_TERROR; + return fz_throw("lexical error"); +} diff --git a/pdf/pdf_nametree.c b/pdf/pdf_nametree.c new file mode 100644 index 00000000..6b2314cd --- /dev/null +++ b/pdf/pdf_nametree.c @@ -0,0 +1,139 @@ +#include "fitz.h" +#include "mupdf.h" + +static fz_obj * +pdf_lookupnameimp(fz_obj *node, fz_obj *needle) +{ + fz_obj *kids = fz_dictgets(node, "Kids"); + fz_obj *names = fz_dictgets(node, "Names"); + + if (fz_isarray(kids)) + { + int l = 0; + int r = fz_arraylen(kids) - 1; + + while (l <= r) + { + int m = (l + r) >> 1; + fz_obj *kid = fz_arrayget(kids, m); + fz_obj *limits = fz_dictgets(kid, "Limits"); + fz_obj *first = fz_arrayget(limits, 0); + fz_obj *last = fz_arrayget(limits, 1); + + if (fz_objcmp(needle, first) < 0) + r = m - 1; + else if (fz_objcmp(needle, last) > 0) + l = m + 1; + else + return pdf_lookupnameimp(kid, needle); + } + } + + if (fz_isarray(names)) + { + int l = 0; + int r = (fz_arraylen(names) / 2) - 1; + + while (l <= r) + { + int m = (l + r) >> 1; + int c; + fz_obj *key = fz_arrayget(names, m * 2); + fz_obj *val = fz_arrayget(names, m * 2 + 1); + + c = fz_objcmp(needle, key); + if (c < 0) + r = m - 1; + else if (c > 0) + l = m + 1; + else + return val; + } + } + + return nil; +} + +fz_obj * +pdf_lookupname(pdf_xref *xref, char *which, fz_obj *needle) +{ + fz_obj *root = fz_dictgets(xref->trailer, "Root"); + fz_obj *names = fz_dictgets(root, "Names"); + fz_obj *tree = fz_dictgets(names, which); + return pdf_lookupnameimp(tree, needle); +} + +fz_obj * +pdf_lookupdest(pdf_xref *xref, fz_obj *needle) +{ + fz_obj *root = fz_dictgets(xref->trailer, "Root"); + fz_obj *dests = fz_dictgets(root, "Dests"); + fz_obj *names = fz_dictgets(root, "Names"); + fz_obj *dest = nil; + + /* PDF 1.1 has destinations in a dictionary */ + if (dests) + { + if (fz_isname(needle)) + return fz_dictget(dests, needle); + else + return fz_dictgets(dests, fz_tostrbuf(needle)); + } + + /* PDF 1.2 has destinations in a name tree */ + if (names && !dest) + { + fz_obj *tree = fz_dictgets(names, "Dests"); + return pdf_lookupnameimp(tree, needle); + } + + return nil; +} + +static void +pdf_loadnametreeimp(fz_obj *dict, pdf_xref *xref, fz_obj *node) +{ + fz_obj *kids = fz_dictgets(node, "Kids"); + fz_obj *names = fz_dictgets(node, "Names"); + int i; + + if (kids) + { + for (i = 0; i < fz_arraylen(kids); i++) + pdf_loadnametreeimp(dict, xref, fz_arrayget(kids, i)); + } + + if (names) + { + for (i = 0; i + 1 < fz_arraylen(names); i += 2) + { + fz_obj *key = fz_arrayget(names, i); + fz_obj *val = fz_arrayget(names, i + 1); + if (fz_isstring(key)) + { + key = pdf_toutf8name(key); + fz_dictput(dict, key, val); + fz_dropobj(key); + } + else if (fz_isname(key)) + { + fz_dictput(dict, key, val); + } + } + } +} + +fz_obj * +pdf_loadnametree(pdf_xref *xref, char *which) +{ + fz_obj *root = fz_dictgets(xref->trailer, "Root"); + fz_obj *names = fz_dictgets(root, "Names"); + fz_obj *tree = fz_dictgets(names, which); + if (fz_isdict(tree)) + { + fz_obj *dict = fz_newdict(100); + pdf_loadnametreeimp(dict, xref, tree); + return dict; + } + return nil; +} diff --git a/pdf/pdf_outline.c b/pdf/pdf_outline.c new file mode 100644 index 00000000..b211060f --- /dev/null +++ b/pdf/pdf_outline.c @@ -0,0 +1,118 @@ +#include "fitz.h" +#include "mupdf.h" + +static pdf_outline * +pdf_loadoutlineimp(pdf_xref *xref, fz_obj *dict) +{ + pdf_outline *node; + fz_obj *obj; + + if (fz_isnull(dict)) + return nil; + + node = fz_malloc(sizeof(pdf_outline)); + node->title = nil; + node->link = nil; + node->child = nil; + node->next = nil; + node->count = 0; + + pdf_logpage("load outline {\n"); + + obj = fz_dictgets(dict, "Title"); + if (obj) + { + node->title = pdf_toutf8(obj); + pdf_logpage("title %s\n", node->title); + } + + obj = fz_dictgets(dict, "Count"); + if (obj) + { + node->count = fz_toint(obj); + } + + if (fz_dictgets(dict, "Dest") || fz_dictgets(dict, "A")) + { + node->link = pdf_loadlink(xref, dict); + } + + obj = fz_dictgets(dict, "First"); + if (obj) + { + node->child = pdf_loadoutlineimp(xref, obj); + } + + pdf_logpage("}\n"); + + obj = fz_dictgets(dict, "Next"); + if (obj) + { + node->next = pdf_loadoutlineimp(xref, obj); + } + + return node; +} + +pdf_outline * +pdf_loadoutline(pdf_xref *xref) +{ + pdf_outline *node; + fz_obj *root, *obj, *first; + + pdf_logpage("load outlines {\n"); + + node = nil; + + root = fz_dictgets(xref->trailer, "Root"); + obj = fz_dictgets(root, "Outlines"); + if (obj) + { + first = fz_dictgets(obj, "First"); + if (first) + node = pdf_loadoutlineimp(xref, first); + } + + pdf_logpage("}\n"); + + return node; +} + +void +pdf_freeoutline(pdf_outline *outline) +{ + if (outline->child) + pdf_freeoutline(outline->child); + if (outline->next) + pdf_freeoutline(outline->next); + if (outline->link) + pdf_freelink(outline->link); + fz_free(outline->title); + fz_free(outline); +} + +void +pdf_debugoutline(pdf_outline *outline, int level) +{ + int i; + while (outline) + { + for (i = 0; i < level; i++) + putchar(' '); + + if (outline->title) + printf("%s ", outline->title); + else + printf("<nil> "); + + if (outline->link) + fz_debugobj(outline->link->dest); + else + printf("<nil>\n"); + + if (outline->child) + pdf_debugoutline(outline->child, level + 2); + + outline = outline->next; + } +} diff --git a/pdf/pdf_page.c b/pdf/pdf_page.c new file mode 100644 index 00000000..869f7a5c --- /dev/null +++ b/pdf/pdf_page.c @@ -0,0 +1,246 @@ +#include "fitz.h" +#include "mupdf.h" + +/* we need to combine all sub-streams into one for the content stream interpreter */ + +static fz_error +pdf_loadpagecontentsarray(fz_buffer **bigbufp, pdf_xref *xref, fz_obj *list) +{ + fz_error error; + fz_buffer *big; + fz_buffer *one; + int i; + + pdf_logpage("multiple content streams: %d\n", fz_arraylen(list)); + + /* TODO: openstream, read, close into big buffer at once */ + + big = fz_newbuffer(32 * 1024); + + for (i = 0; i < fz_arraylen(list); i++) + { + fz_obj *stm = fz_arrayget(list, i); + error = pdf_loadstream(&one, xref, fz_tonum(stm), fz_togen(stm)); + if (error) + { + fz_dropbuffer(big); + return fz_rethrow(error, "cannot load content stream part %d/%d (%d %d R)", i + 1, fz_arraylen(list), fz_tonum(stm), fz_togen(stm)); + } + + if (big->len + one->len + 1 > big->cap) + fz_resizebuffer(big, big->len + one->len + 1); + memcpy(big->data + big->len, one->data, one->len); + big->data[big->len + one->len] = ' '; + big->len += one->len + 1; + + fz_dropbuffer(one); + } + + *bigbufp = big; + return fz_okay; +} + +static fz_error +pdf_loadpagecontents(fz_buffer **bufp, pdf_xref *xref, fz_obj *obj) +{ + fz_error error; + + if (fz_isarray(obj)) + { + error = pdf_loadpagecontentsarray(bufp, xref, obj); + if (error) + return fz_rethrow(error, "cannot load content stream array (%d 0 R)", fz_tonum(obj)); + } + else if (pdf_isstream(xref, fz_tonum(obj), fz_togen(obj))) + { + error = pdf_loadstream(bufp, xref, fz_tonum(obj), fz_togen(obj)); + if (error) + return fz_rethrow(error, "cannot load content stream (%d 0 R)", fz_tonum(obj)); + } + else + { + fz_warn("page contents missing, leaving page blank"); + *bufp = fz_newbuffer(0); + } + + return fz_okay; +} + +/* We need to know whether to install a page-level transparency group */ + +static int pdf_resourcesuseblending(fz_obj *rdb); + +static int +pdf_extgstateusesblending(fz_obj *dict) +{ + fz_obj *obj; + + obj = fz_dictgets(dict, "BM"); + if (fz_isname(obj) && strcmp(fz_toname(obj), "Normal")) + return 1; + + return 0; +} + +static int +pdf_patternusesblending(fz_obj *dict) +{ + fz_obj *obj; + + obj = fz_dictgets(dict, "Resources"); + if (fz_isdict(obj) && pdf_resourcesuseblending(obj)) + return 1; + + obj = fz_dictgets(dict, "ExtGState"); + if (fz_isdict(obj) && pdf_extgstateusesblending(obj)) + return 1; + + return 0; +} + +static int +pdf_xobjectusesblending(fz_obj *dict) +{ + fz_obj *obj; + + obj = fz_dictgets(dict, "Resources"); + if (fz_isdict(obj) && pdf_resourcesuseblending(obj)) + return 1; + + return 0; +} + +static int +pdf_resourcesuseblending(fz_obj *rdb) +{ + fz_obj *dict; + fz_obj *tmp; + int i; + + /* stop on cyclic resource dependencies */ + if (fz_dictgets(rdb, ".useBM")) + return fz_tobool(fz_dictgets(rdb, ".useBM")); + + tmp = fz_newbool(0); + fz_dictputs(rdb, ".useBM", tmp); + fz_dropobj(tmp); + + dict = fz_dictgets(rdb, "ExtGState"); + for (i = 0; i < fz_dictlen(dict); i++) + if (pdf_extgstateusesblending(fz_dictgetval(dict, i))) + goto found; + + dict = fz_dictgets(rdb, "Pattern"); + for (i = 0; i < fz_dictlen(dict); i++) + if (pdf_patternusesblending(fz_dictgetval(dict, i))) + goto found; + + dict = fz_dictgets(rdb, "XObject"); + for (i = 0; i < fz_dictlen(dict); i++) + if (pdf_xobjectusesblending(fz_dictgetval(dict, i))) + goto found; + + return 0; + +found: + tmp = fz_newbool(1); + fz_dictputs(rdb, ".useBM", tmp); + fz_dropobj(tmp); + return 1; +} + +fz_error +pdf_loadpage(pdf_page **pagep, pdf_xref *xref, fz_obj *dict) +{ + fz_error error; + pdf_page *page; + fz_obj *obj; + fz_bbox bbox; + + pdf_logpage("load page {\n"); + + // TODO: move this to a more appropriate place + /* Ensure that we have a store for resource objects */ + if (!xref->store) + xref->store = pdf_newstore(); + + page = fz_malloc(sizeof(pdf_page)); + page->resources = nil; + page->contents = nil; + page->transparency = 0; + page->links = nil; + page->annots = nil; + + obj = fz_dictgets(dict, "MediaBox"); + bbox = fz_roundrect(pdf_torect(obj)); + if (fz_isemptyrect(pdf_torect(obj))) + { + fz_warn("cannot find page bounds, guessing page bounds."); + bbox.x0 = 0; + bbox.y0 = 0; + bbox.x1 = 612; + bbox.y1 = 792; + } + + obj = fz_dictgets(dict, "CropBox"); + if (fz_isarray(obj)) + { + fz_bbox cropbox = fz_roundrect(pdf_torect(obj)); + bbox = fz_intersectbbox(bbox, cropbox); + } + + page->mediabox.x0 = MIN(bbox.x0, bbox.x1); + page->mediabox.y0 = MIN(bbox.y0, bbox.y1); + page->mediabox.x1 = MAX(bbox.x0, bbox.x1); + page->mediabox.y1 = MAX(bbox.y0, bbox.y1); + + if (page->mediabox.x1 - page->mediabox.x0 < 1 || page->mediabox.y1 - page->mediabox.y0 < 1) + return fz_throw("invalid page size"); + + page->rotate = fz_toint(fz_dictgets(dict, "Rotate")); + + pdf_logpage("bbox [%d %d %d %d]\n", bbox.x0, bbox.y0, bbox.x1, bbox.y1); + pdf_logpage("rotate %d\n", page->rotate); + + obj = fz_dictgets(dict, "Annots"); + if (obj) + { + pdf_loadlinks(&page->links, xref, obj); + pdf_loadannots(&page->annots, xref, obj); + } + + page->resources = fz_dictgets(dict, "Resources"); + if (page->resources) + fz_keepobj(page->resources); + + obj = fz_dictgets(dict, "Contents"); + error = pdf_loadpagecontents(&page->contents, xref, obj); + if (error) + { + pdf_freepage(page); + return fz_rethrow(error, "cannot load page contents (%d %d R)", fz_tonum(obj), fz_togen(obj)); + } + + if (page->resources && pdf_resourcesuseblending(page->resources)) + page->transparency = 1; + + pdf_logpage("} %p\n", page); + + *pagep = page; + return fz_okay; +} + +void +pdf_freepage(pdf_page *page) +{ + pdf_logpage("drop page %p\n", page); + if (page->resources) + fz_dropobj(page->resources); + if (page->contents) + fz_dropbuffer(page->contents); + if (page->links) + pdf_freelink(page->links); + if (page->annots) + pdf_freeannot(page->annots); + fz_free(page); +} diff --git a/pdf/pdf_pagetree.c b/pdf/pdf_pagetree.c new file mode 100644 index 00000000..df3c8b8a --- /dev/null +++ b/pdf/pdf_pagetree.c @@ -0,0 +1,141 @@ +#include "fitz.h" +#include "mupdf.h" + +struct info +{ + fz_obj *resources; + fz_obj *mediabox; + fz_obj *cropbox; + fz_obj *rotate; +}; + +int +pdf_getpagecount(pdf_xref *xref) +{ + return xref->pagelen; +} + +fz_obj * +pdf_getpageobject(pdf_xref *xref, int number) +{ + if (number > 0 && number <= xref->pagelen) + return xref->pageobjs[number - 1]; + return nil; +} + +fz_obj * +pdf_getpageref(pdf_xref *xref, int number) +{ + if (number > 0 && number <= xref->pagelen) + return xref->pagerefs[number - 1]; + return nil; +} + +int +pdf_findpageobject(pdf_xref *xref, fz_obj *page) +{ + int num = fz_tonum(page); + int gen = fz_togen(page); + int i; + for (i = 0; i < xref->pagelen; i++) + if (num == fz_tonum(xref->pagerefs[i]) && gen == fz_togen(xref->pagerefs[i])) + return i + 1; + return 0; +} + +static void +pdf_loadpagetreenode(pdf_xref *xref, fz_obj *node, struct info info) +{ + fz_obj *dict, *kids, *count; + fz_obj *obj, *tmp; + int i, n; + + /* prevent infinite recursion */ + if (fz_dictgets(node, ".seen")) + return; + + kids = fz_dictgets(node, "Kids"); + count = fz_dictgets(node, "Count"); + + if (fz_isarray(kids) && fz_isint(count)) + { + obj = fz_dictgets(node, "Resources"); + if (obj) + info.resources = obj; + obj = fz_dictgets(node, "MediaBox"); + if (obj) + info.mediabox = obj; + obj = fz_dictgets(node, "CropBox"); + if (obj) + info.cropbox = obj; + obj = fz_dictgets(node, "Rotate"); + if (obj) + info.rotate = obj; + + tmp = fz_newnull(); + fz_dictputs(node, ".seen", tmp); + fz_dropobj(tmp); + + n = fz_arraylen(kids); + for (i = 0; i < n; i++) + { + obj = fz_arrayget(kids, i); + pdf_loadpagetreenode(xref, obj, info); + } + + fz_dictdels(node, ".seen"); + } + else + { + dict = fz_resolveindirect(node); + + if (info.resources && !fz_dictgets(dict, "Resources")) + fz_dictputs(dict, "Resources", info.resources); + if (info.mediabox && !fz_dictgets(dict, "MediaBox")) + fz_dictputs(dict, "MediaBox", info.mediabox); + if (info.cropbox && !fz_dictgets(dict, "CropBox")) + fz_dictputs(dict, "CropBox", info.cropbox); + if (info.rotate && !fz_dictgets(dict, "Rotate")) + fz_dictputs(dict, "Rotate", info.rotate); + + if (xref->pagelen == xref->pagecap) + { + fz_warn("found more pages than expected"); + xref->pagecap ++; + xref->pagerefs = fz_realloc(xref->pagerefs, xref->pagecap, sizeof(fz_obj*)); + xref->pageobjs = fz_realloc(xref->pageobjs, xref->pagecap, sizeof(fz_obj*)); + } + + xref->pagerefs[xref->pagelen] = fz_keepobj(node); + xref->pageobjs[xref->pagelen] = fz_keepobj(dict); + xref->pagelen ++; + } +} + +fz_error +pdf_loadpagetree(pdf_xref *xref) +{ + struct info info; + fz_obj *catalog = fz_dictgets(xref->trailer, "Root"); + fz_obj *pages = fz_dictgets(catalog, "Pages"); + fz_obj *count = fz_dictgets(pages, "Count"); + + if (!fz_isdict(pages)) + return fz_throw("missing page tree"); + if (!fz_isint(count)) + return fz_throw("missing page count"); + + xref->pagecap = fz_toint(count); + xref->pagelen = 0; + xref->pagerefs = fz_calloc(xref->pagecap, sizeof(fz_obj*)); + xref->pageobjs = fz_calloc(xref->pagecap, sizeof(fz_obj*)); + + info.resources = nil; + info.mediabox = nil; + info.cropbox = nil; + info.rotate = nil; + + pdf_loadpagetreenode(xref, pages, info); + + return fz_okay; +} diff --git a/pdf/pdf_parse.c b/pdf/pdf_parse.c new file mode 100644 index 00000000..b32f4d01 --- /dev/null +++ b/pdf/pdf_parse.c @@ -0,0 +1,538 @@ +#include "fitz.h" +#include "mupdf.h" + +fz_rect pdf_torect(fz_obj *array) +{ + fz_rect r; + float a = fz_toreal(fz_arrayget(array, 0)); + float b = fz_toreal(fz_arrayget(array, 1)); + float c = fz_toreal(fz_arrayget(array, 2)); + float d = fz_toreal(fz_arrayget(array, 3)); + r.x0 = MIN(a, c); + r.y0 = MIN(b, d); + r.x1 = MAX(a, c); + r.y1 = MAX(b, d); + return r; +} + +fz_matrix pdf_tomatrix(fz_obj *array) +{ + fz_matrix m; + m.a = fz_toreal(fz_arrayget(array, 0)); + m.b = fz_toreal(fz_arrayget(array, 1)); + m.c = fz_toreal(fz_arrayget(array, 2)); + m.d = fz_toreal(fz_arrayget(array, 3)); + m.e = fz_toreal(fz_arrayget(array, 4)); + m.f = fz_toreal(fz_arrayget(array, 5)); + return m; +} + +char * +pdf_toutf8(fz_obj *src) +{ + unsigned char *srcptr = (unsigned char *) fz_tostrbuf(src); + char *dstptr, *dst; + int srclen = fz_tostrlen(src); + int dstlen = 0; + int ucs; + int i; + + if (srclen > 2 && srcptr[0] == 254 && srcptr[1] == 255) + { + for (i = 2; i < srclen; i += 2) + { + ucs = (srcptr[i] << 8) | srcptr[i+1]; + dstlen += runelen(ucs); + } + + dstptr = dst = fz_malloc(dstlen + 1); + + for (i = 2; i < srclen; i += 2) + { + ucs = (srcptr[i] << 8) | srcptr[i+1]; + dstptr += runetochar(dstptr, &ucs); + } + } + + else + { + for (i = 0; i < srclen; i++) + dstlen += runelen(pdf_docencoding[srcptr[i]]); + + dstptr = dst = fz_malloc(dstlen + 1); + + for (i = 0; i < srclen; i++) + { + ucs = pdf_docencoding[srcptr[i]]; + dstptr += runetochar(dstptr, &ucs); + } + } + + *dstptr = '\0'; + return dst; +} + +unsigned short * +pdf_toucs2(fz_obj *src) +{ + unsigned char *srcptr = (unsigned char *) fz_tostrbuf(src); + unsigned short *dstptr, *dst; + int srclen = fz_tostrlen(src); + int i; + + if (srclen > 2 && srcptr[0] == 254 && srcptr[1] == 255) + { + dstptr = dst = fz_calloc((srclen - 2) / 2 + 1, sizeof(short)); + for (i = 2; i < srclen; i += 2) + *dstptr++ = (srcptr[i] << 8) | srcptr[i+1]; + } + + else + { + dstptr = dst = fz_calloc(srclen + 1, sizeof(short)); + for (i = 0; i < srclen; i++) + *dstptr++ = pdf_docencoding[srcptr[i]]; + } + + *dstptr = '\0'; + return dst; +} + +fz_obj * +pdf_toutf8name(fz_obj *src) +{ + char *buf = pdf_toutf8(src); + fz_obj *dst = fz_newname(buf); + fz_free(buf); + return dst; +} + +fz_error +pdf_parsearray(fz_obj **op, pdf_xref *xref, fz_stream *file, char *buf, int cap) +{ + fz_error error = fz_okay; + fz_obj *ary = nil; + fz_obj *obj = nil; + int a = 0, b = 0, n = 0; + int tok; + int len; + + ary = fz_newarray(4); + + while (1) + { + error = pdf_lex(&tok, file, buf, cap, &len); + if (error) + { + fz_dropobj(ary); + return fz_rethrow(error, "cannot parse array"); + } + + if (tok != PDF_TINT && tok != PDF_TR) + { + if (n > 0) + { + obj = fz_newint(a); + fz_arraypush(ary, obj); + fz_dropobj(obj); + } + if (n > 1) + { + obj = fz_newint(b); + fz_arraypush(ary, obj); + fz_dropobj(obj); + } + n = 0; + } + + if (tok == PDF_TINT && n == 2) + { + obj = fz_newint(a); + fz_arraypush(ary, obj); + fz_dropobj(obj); + a = b; + n --; + } + + switch (tok) + { + case PDF_TCARRAY: + *op = ary; + return fz_okay; + + case PDF_TINT: + if (n == 0) + a = atoi(buf); + if (n == 1) + b = atoi(buf); + n ++; + break; + + case PDF_TR: + if (n != 2) + { + fz_dropobj(ary); + return fz_throw("cannot parse indirect reference in array"); + } + obj = fz_newindirect(a, b, xref); + fz_arraypush(ary, obj); + fz_dropobj(obj); + n = 0; + break; + + case PDF_TOARRAY: + error = pdf_parsearray(&obj, xref, file, buf, cap); + if (error) + { + fz_dropobj(ary); + return fz_rethrow(error, "cannot parse array"); + } + fz_arraypush(ary, obj); + fz_dropobj(obj); + break; + + case PDF_TODICT: + error = pdf_parsedict(&obj, xref, file, buf, cap); + if (error) + { + fz_dropobj(ary); + return fz_rethrow(error, "cannot parse array"); + } + fz_arraypush(ary, obj); + fz_dropobj(obj); + break; + + case PDF_TNAME: + obj = fz_newname(buf); + fz_arraypush(ary, obj); + fz_dropobj(obj); + break; + case PDF_TREAL: + obj = fz_newreal(atof(buf)); + fz_arraypush(ary, obj); + fz_dropobj(obj); + break; + case PDF_TSTRING: + obj = fz_newstring(buf, len); + fz_arraypush(ary, obj); + fz_dropobj(obj); + break; + case PDF_TTRUE: + obj = fz_newbool(1); + fz_arraypush(ary, obj); + fz_dropobj(obj); + break; + case PDF_TFALSE: + obj = fz_newbool(0); + fz_arraypush(ary, obj); + fz_dropobj(obj); + break; + case PDF_TNULL: + obj = fz_newnull(); + fz_arraypush(ary, obj); + fz_dropobj(obj); + break; + + default: + fz_dropobj(ary); + return fz_throw("cannot parse token in array"); + } + } +} + +fz_error +pdf_parsedict(fz_obj **op, pdf_xref *xref, fz_stream *file, char *buf, int cap) +{ + fz_error error = fz_okay; + fz_obj *dict = nil; + fz_obj *key = nil; + fz_obj *val = nil; + int tok; + int len; + int a, b; + + dict = fz_newdict(8); + + while (1) + { + error = pdf_lex(&tok, file, buf, cap, &len); + if (error) + { + fz_dropobj(dict); + return fz_rethrow(error, "cannot parse dict"); + } + +skip: + if (tok == PDF_TCDICT) + { + *op = dict; + return fz_okay; + } + + /* for BI .. ID .. EI in content streams */ + if (tok == PDF_TKEYWORD && !strcmp(buf, "ID")) + { + *op = dict; + return fz_okay; + } + + if (tok != PDF_TNAME) + { + fz_dropobj(dict); + return fz_throw("invalid key in dict"); + } + + key = fz_newname(buf); + + error = pdf_lex(&tok, file, buf, cap, &len); + if (error) + { + fz_dropobj(key); + fz_dropobj(dict); + return fz_rethrow(error, "cannot parse dict"); + } + + switch (tok) + { + case PDF_TOARRAY: + error = pdf_parsearray(&val, xref, file, buf, cap); + if (error) + { + fz_dropobj(key); + fz_dropobj(dict); + return fz_rethrow(error, "cannot parse dict"); + } + break; + + case PDF_TODICT: + error = pdf_parsedict(&val, xref, file, buf, cap); + if (error) + { + fz_dropobj(key); + fz_dropobj(dict); + return fz_rethrow(error, "cannot parse dict"); + } + break; + + case PDF_TNAME: val = fz_newname(buf); break; + case PDF_TREAL: val = fz_newreal(atof(buf)); break; + case PDF_TSTRING: val = fz_newstring(buf, len); break; + case PDF_TTRUE: val = fz_newbool(1); break; + case PDF_TFALSE: val = fz_newbool(0); break; + case PDF_TNULL: val = fz_newnull(); break; + + case PDF_TINT: + /* 64-bit to allow for numbers > INT_MAX and overflow */ + a = (int) strtoll(buf, 0, 10); + error = pdf_lex(&tok, file, buf, cap, &len); + if (error) + { + fz_dropobj(key); + fz_dropobj(dict); + return fz_rethrow(error, "cannot parse dict"); + } + if (tok == PDF_TCDICT || tok == PDF_TNAME || + (tok == PDF_TKEYWORD && !strcmp(buf, "ID"))) + { + val = fz_newint(a); + fz_dictput(dict, key, val); + fz_dropobj(val); + fz_dropobj(key); + goto skip; + } + if (tok == PDF_TINT) + { + b = atoi(buf); + error = pdf_lex(&tok, file, buf, cap, &len); + if (error) + { + fz_dropobj(key); + fz_dropobj(dict); + return fz_rethrow(error, "cannot parse dict"); + } + if (tok == PDF_TR) + { + val = fz_newindirect(a, b, xref); + break; + } + } + fz_dropobj(key); + fz_dropobj(dict); + return fz_throw("invalid indirect reference in dict"); + + default: + fz_dropobj(key); + fz_dropobj(dict); + return fz_throw("unknown token in dict"); + } + + fz_dictput(dict, key, val); + fz_dropobj(val); + fz_dropobj(key); + } +} + +fz_error +pdf_parsestmobj(fz_obj **op, pdf_xref *xref, fz_stream *file, char *buf, int cap) +{ + fz_error error; + int tok; + int len; + + error = pdf_lex(&tok, file, buf, cap, &len); + if (error) + return fz_rethrow(error, "cannot parse token in object stream"); + + switch (tok) + { + case PDF_TOARRAY: + error = pdf_parsearray(op, xref, file, buf, cap); + if (error) + return fz_rethrow(error, "cannot parse object stream"); + break; + case PDF_TODICT: + error = pdf_parsedict(op, xref, file, buf, cap); + if (error) + return fz_rethrow(error, "cannot parse object stream"); + break; + case PDF_TNAME: *op = fz_newname(buf); break; + case PDF_TREAL: *op = fz_newreal(atof(buf)); break; + case PDF_TSTRING: *op = fz_newstring(buf, len); break; + case PDF_TTRUE: *op = fz_newbool(1); break; + case PDF_TFALSE: *op = fz_newbool(0); break; + case PDF_TNULL: *op = fz_newnull(); break; + case PDF_TINT: *op = fz_newint(atoi(buf)); break; + default: return fz_throw("unknown token in object stream"); + } + + return fz_okay; +} + +fz_error +pdf_parseindobj(fz_obj **op, pdf_xref *xref, + fz_stream *file, char *buf, int cap, + int *onum, int *ogen, int *ostmofs) +{ + fz_error error = fz_okay; + fz_obj *obj = nil; + int num = 0, gen = 0, stmofs; + int tok; + int len; + int a, b; + + error = pdf_lex(&tok, file, buf, cap, &len); + if (error) + return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); + if (tok != PDF_TINT) + return fz_throw("expected object number (%d %d R)", num, gen); + num = atoi(buf); + + error = pdf_lex(&tok, file, buf, cap, &len); + if (error) + return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); + if (tok != PDF_TINT) + return fz_throw("expected generation number (%d %d R)", num, gen); + gen = atoi(buf); + + error = pdf_lex(&tok, file, buf, cap, &len); + if (error) + return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); + if (tok != PDF_TOBJ) + return fz_throw("expected 'obj' keyword (%d %d R)", num, gen); + + error = pdf_lex(&tok, file, buf, cap, &len); + if (error) + return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); + + switch (tok) + { + case PDF_TOARRAY: + error = pdf_parsearray(&obj, xref, file, buf, cap); + if (error) + return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); + break; + + case PDF_TODICT: + error = pdf_parsedict(&obj, xref, file, buf, cap); + if (error) + return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); + break; + + case PDF_TNAME: obj = fz_newname(buf); break; + case PDF_TREAL: obj = fz_newreal(atof(buf)); break; + case PDF_TSTRING: obj = fz_newstring(buf, len); break; + case PDF_TTRUE: obj = fz_newbool(1); break; + case PDF_TFALSE: obj = fz_newbool(0); break; + case PDF_TNULL: obj = fz_newnull(); break; + + case PDF_TINT: + a = atoi(buf); + error = pdf_lex(&tok, file, buf, cap, &len); + if (error) + return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); + if (tok == PDF_TSTREAM || tok == PDF_TENDOBJ) + { + obj = fz_newint(a); + goto skip; + } + if (tok == PDF_TINT) + { + b = atoi(buf); + error = pdf_lex(&tok, file, buf, cap, &len); + if (error) + return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); + if (tok == PDF_TR) + { + obj = fz_newindirect(a, b, xref); + break; + } + } + return fz_throw("expected 'R' keyword (%d %d R)", num, gen); + + case PDF_TENDOBJ: + obj = fz_newnull(); + goto skip; + + default: + return fz_throw("syntax error in object (%d %d R)", num, gen); + } + + error = pdf_lex(&tok, file, buf, cap, &len); + if (error) + { + fz_dropobj(obj); + return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); + } + +skip: + if (tok == PDF_TSTREAM) + { + int c = fz_readbyte(file); + while (c == ' ') + c = fz_readbyte(file); + if (c == '\r') + { + c = fz_peekbyte(file); + if (c != '\n') + fz_warn("line feed missing after stream begin marker (%d %d R)", num, gen); + else + fz_readbyte(file); + } + stmofs = fz_tell(file); + } + else if (tok == PDF_TENDOBJ) + { + stmofs = 0; + } + else + { + fz_warn("expected 'endobj' or 'stream' keyword (%d %d R)", num, gen); + stmofs = 0; + } + + if (onum) *onum = num; + if (ogen) *ogen = gen; + if (ostmofs) *ostmofs = stmofs; + *op = obj; + return fz_okay; +} diff --git a/pdf/pdf_pattern.c b/pdf/pdf_pattern.c new file mode 100644 index 00000000..7d3a38d1 --- /dev/null +++ b/pdf/pdf_pattern.c @@ -0,0 +1,89 @@ +#include "fitz.h" +#include "mupdf.h" + +fz_error +pdf_loadpattern(pdf_pattern **patp, pdf_xref *xref, fz_obj *dict) +{ + fz_error error; + pdf_pattern *pat; + fz_obj *obj; + + if ((*patp = pdf_finditem(xref->store, pdf_droppattern, dict))) + { + pdf_keeppattern(*patp); + return fz_okay; + } + + pdf_logrsrc("load pattern (%d %d R) {\n", fz_tonum(dict), fz_togen(dict)); + + pat = fz_malloc(sizeof(pdf_pattern)); + pat->refs = 1; + pat->resources = nil; + pat->contents = nil; + + /* Store pattern now, to avoid possible recursion if objects refer back to this one */ + pdf_storeitem(xref->store, pdf_keeppattern, pdf_droppattern, dict, pat); + + pat->ismask = fz_toint(fz_dictgets(dict, "PaintType")) == 2; + pat->xstep = fz_toreal(fz_dictgets(dict, "XStep")); + pat->ystep = fz_toreal(fz_dictgets(dict, "YStep")); + + pdf_logrsrc("mask %d\n", pat->ismask); + pdf_logrsrc("xstep %g\n", pat->xstep); + pdf_logrsrc("ystep %g\n", pat->ystep); + + obj = fz_dictgets(dict, "BBox"); + pat->bbox = pdf_torect(obj); + + pdf_logrsrc("bbox [%g %g %g %g]\n", + pat->bbox.x0, pat->bbox.y0, + pat->bbox.x1, pat->bbox.y1); + + obj = fz_dictgets(dict, "Matrix"); + if (obj) + pat->matrix = pdf_tomatrix(obj); + else + pat->matrix = fz_identity; + + pdf_logrsrc("matrix [%g %g %g %g %g %g]\n", + pat->matrix.a, pat->matrix.b, + pat->matrix.c, pat->matrix.d, + pat->matrix.e, pat->matrix.f); + + pat->resources = fz_dictgets(dict, "Resources"); + if (pat->resources) + fz_keepobj(pat->resources); + + error = pdf_loadstream(&pat->contents, xref, fz_tonum(dict), fz_togen(dict)); + if (error) + { + pdf_removeitem(xref->store, pdf_droppattern, dict); + pdf_droppattern(pat); + return fz_rethrow(error, "cannot load pattern stream (%d %d R)", fz_tonum(dict), fz_togen(dict)); + } + + pdf_logrsrc("}\n"); + + *patp = pat; + return fz_okay; +} + +pdf_pattern * +pdf_keeppattern(pdf_pattern *pat) +{ + pat->refs ++; + return pat; +} + +void +pdf_droppattern(pdf_pattern *pat) +{ + if (pat && --pat->refs == 0) + { + if (pat->resources) + fz_dropobj(pat->resources); + if (pat->contents) + fz_dropbuffer(pat->contents); + fz_free(pat); + } +} diff --git a/pdf/pdf_repair.c b/pdf/pdf_repair.c new file mode 100644 index 00000000..436f7303 --- /dev/null +++ b/pdf/pdf_repair.c @@ -0,0 +1,463 @@ +#include "fitz.h" +#include "mupdf.h" + +/* Scan file for objects and reconstruct xref table */ + +struct entry +{ + int num; + int gen; + int ofs; + int stmofs; + int stmlen; +}; + +static fz_error +fz_repairobj(fz_stream *file, char *buf, int cap, int *stmofsp, int *stmlenp, fz_obj **encrypt, fz_obj **id) +{ + fz_error error; + int tok; + int stmlen; + int len; + int n; + + *stmofsp = 0; + *stmlenp = -1; + + stmlen = 0; + + error = pdf_lex(&tok, file, buf, cap, &len); + if (error) + return fz_rethrow(error, "cannot parse object"); + if (tok == PDF_TODICT) + { + fz_obj *dict, *obj; + + /* Send nil xref so we don't try to resolve references */ + error = pdf_parsedict(&dict, nil, file, buf, cap); + if (error) + return fz_rethrow(error, "cannot parse object"); + + obj = fz_dictgets(dict, "Type"); + if (fz_isname(obj) && !strcmp(fz_toname(obj), "XRef")) + { + obj = fz_dictgets(dict, "Encrypt"); + if (obj) + { + if (*encrypt) + fz_dropobj(*encrypt); + *encrypt = fz_keepobj(obj); + } + + obj = fz_dictgets(dict, "ID"); + if (obj) + { + if (*id) + fz_dropobj(*id); + *id = fz_keepobj(obj); + } + } + + obj = fz_dictgets(dict, "Length"); + if (fz_isint(obj)) + stmlen = fz_toint(obj); + + fz_dropobj(dict); + } + + while ( tok != PDF_TSTREAM && + tok != PDF_TENDOBJ && + tok != PDF_TERROR && + tok != PDF_TEOF ) + { + error = pdf_lex(&tok, file, buf, cap, &len); + if (error) + return fz_rethrow(error, "cannot scan for endobj or stream token"); + } + + if (tok == PDF_TSTREAM) + { + int c = fz_readbyte(file); + if (c == '\r') { + c = fz_peekbyte(file); + if (c == '\n') + fz_readbyte(file); + } + + *stmofsp = fz_tell(file); + if (*stmofsp < 0) + return fz_throw("cannot seek in file"); + + if (stmlen > 0) + { + fz_seek(file, *stmofsp + stmlen, 0); + error = pdf_lex(&tok, file, buf, cap, &len); + if (error) + fz_catch(error, "cannot find endstream token, falling back to scanning"); + if (tok == PDF_TENDSTREAM) + goto atobjend; + fz_seek(file, *stmofsp, 0); + } + + n = fz_read(file, (unsigned char *) buf, 9); + if (n < 0) + return fz_rethrow(n, "cannot read from file"); + + while (memcmp(buf, "endstream", 9) != 0) + { + c = fz_readbyte(file); + if (c == EOF) + break; + memmove(buf, buf + 1, 8); + buf[8] = c; + } + + *stmlenp = fz_tell(file) - *stmofsp - 9; + +atobjend: + error = pdf_lex(&tok, file, buf, cap, &len); + if (error) + return fz_rethrow(error, "cannot scan for endobj token"); + if (tok != PDF_TENDOBJ) + fz_warn("object missing 'endobj' token"); + } + + return fz_okay; +} + +static fz_error +pdf_repairobjstm(pdf_xref *xref, int num, int gen) +{ + fz_error error; + fz_obj *obj; + fz_stream *stm; + int tok; + int i, n, count; + char buf[256]; + + error = pdf_loadobject(&obj, xref, num, gen); + if (error) + return fz_rethrow(error, "cannot load object stream object (%d %d R)", num, gen); + + count = fz_toint(fz_dictgets(obj, "N")); + + fz_dropobj(obj); + + error = pdf_openstream(&stm, xref, num, gen); + if (error) + return fz_rethrow(error, "cannot open object stream object (%d %d R)", num, gen); + + for (i = 0; i < count; i++) + { + error = pdf_lex(&tok, stm, buf, sizeof buf, &n); + if (error || tok != PDF_TINT) + { + fz_close(stm); + return fz_rethrow(error, "corrupt object stream (%d %d R)", num, gen); + } + + n = atoi(buf); + if (n >= xref->len) + pdf_resizexref(xref, n + 1); + + xref->table[n].ofs = num; + xref->table[n].gen = i; + xref->table[n].stmofs = 0; + xref->table[n].obj = nil; + xref->table[n].type = 'o'; + + error = pdf_lex(&tok, stm, buf, sizeof buf, &n); + if (error || tok != PDF_TINT) + { + fz_close(stm); + return fz_rethrow(error, "corrupt object stream (%d %d R)", num, gen); + } + } + + fz_close(stm); + return fz_okay; +} + +fz_error +pdf_repairxref(pdf_xref *xref, char *buf, int bufsize) +{ + fz_error error; + fz_obj *dict, *obj; + fz_obj *length; + + fz_obj *encrypt = nil; + fz_obj *id = nil; + fz_obj *root = nil; + fz_obj *info = nil; + + struct entry *list = nil; + int listlen; + int listcap; + int maxnum = 0; + + int num = 0; + int gen = 0; + int tmpofs, numofs = 0, genofs = 0; + int stmlen, stmofs = 0; + int tok; + int next; + int i, n; + + pdf_logxref("repairxref %p\n", xref); + + fz_seek(xref->file, 0, 0); + + listlen = 0; + listcap = 1024; + list = fz_calloc(listcap, sizeof(struct entry)); + + /* look for '%PDF' version marker within first kilobyte of file */ + n = fz_read(xref->file, (unsigned char *)buf, MAX(bufsize, 1024)); + if (n < 0) + { + error = fz_rethrow(n, "cannot read from file"); + goto cleanup; + } + + fz_seek(xref->file, 0, 0); + for (i = 0; i < n - 4; i++) + { + if (memcmp(buf + i, "%PDF", 4) == 0) + { + fz_seek(xref->file, i, 0); + break; + } + } + + while (1) + { + tmpofs = fz_tell(xref->file); + if (tmpofs < 0) + { + error = fz_throw("cannot tell in file"); + goto cleanup; + } + + error = pdf_lex(&tok, xref->file, buf, bufsize, &n); + if (error) + { + fz_catch(error, "ignoring the rest of the file"); + break; + } + + if (tok == PDF_TINT) + { + numofs = genofs; + num = gen; + genofs = tmpofs; + gen = atoi(buf); + } + + if (tok == PDF_TOBJ) + { + error = fz_repairobj(xref->file, buf, bufsize, &stmofs, &stmlen, &encrypt, &id); + if (error) + { + error = fz_rethrow(error, "cannot parse object (%d %d R)", num, gen); + goto cleanup; + } + + pdf_logxref("found object: (%d %d R)\n", num, gen); + + if (listlen + 1 == listcap) + { + listcap = (listcap * 3) / 2; + list = fz_realloc(list, listcap, sizeof(struct entry)); + } + + list[listlen].num = num; + list[listlen].gen = gen; + list[listlen].ofs = numofs; + list[listlen].stmofs = stmofs; + list[listlen].stmlen = stmlen; + listlen ++; + + if (num > maxnum) + maxnum = num; + } + + /* trailer dictionary */ + if (tok == PDF_TODICT) + { + error = pdf_parsedict(&dict, xref, xref->file, buf, bufsize); + if (error) + { + error = fz_rethrow(error, "cannot parse object"); + goto cleanup; + } + + obj = fz_dictgets(dict, "Encrypt"); + if (obj) + { + if (encrypt) + fz_dropobj(encrypt); + encrypt = fz_keepobj(obj); + } + + obj = fz_dictgets(dict, "ID"); + if (obj) + { + if (id) + fz_dropobj(id); + id = fz_keepobj(obj); + } + + obj = fz_dictgets(dict, "Root"); + if (obj) + { + if (root) + fz_dropobj(root); + root = fz_keepobj(obj); + } + + obj = fz_dictgets(dict, "Info"); + if (obj) + { + if (info) + fz_dropobj(info); + info = fz_keepobj(obj); + } + + fz_dropobj(dict); + } + + if (tok == PDF_TERROR) + fz_readbyte(xref->file); + + if (tok == PDF_TEOF) + break; + } + + /* make xref reasonable */ + + pdf_resizexref(xref, maxnum + 1); + + for (i = 0; i < listlen; i++) + { + xref->table[list[i].num].type = 'n'; + xref->table[list[i].num].ofs = list[i].ofs; + xref->table[list[i].num].gen = list[i].gen; + + xref->table[list[i].num].stmofs = list[i].stmofs; + + /* corrected stream length */ + if (list[i].stmlen >= 0) + { + pdf_logxref("correct stream length %d %d = %d\n", + list[i].num, list[i].gen, list[i].stmlen); + + error = pdf_loadobject(&dict, xref, list[i].num, list[i].gen); + if (error) + { + error = fz_rethrow(error, "cannot load stream object (%d %d R)", list[i].num, list[i].gen); + goto cleanup; + } + + length = fz_newint(list[i].stmlen); + fz_dictputs(dict, "Length", length); + fz_dropobj(length); + + fz_dropobj(dict); + } + + } + + xref->table[0].type = 'f'; + xref->table[0].ofs = 0; + xref->table[0].gen = 65535; + xref->table[0].stmofs = 0; + xref->table[0].obj = nil; + + next = 0; + for (i = xref->len - 1; i >= 0; i--) + { + if (xref->table[i].type == 'f') + { + xref->table[i].ofs = next; + if (xref->table[i].gen < 65535) + xref->table[i].gen ++; + next = i; + } + } + + /* create a repaired trailer, Root will be added later */ + + xref->trailer = fz_newdict(5); + + obj = fz_newint(maxnum + 1); + fz_dictputs(xref->trailer, "Size", obj); + fz_dropobj(obj); + + if (root) + { + fz_dictputs(xref->trailer, "Root", root); + fz_dropobj(root); + } + if (info) + { + fz_dictputs(xref->trailer, "Info", info); + fz_dropobj(info); + } + + if (encrypt) + { + if (fz_isindirect(encrypt)) + { + /* create new reference with non-nil xref pointer */ + obj = fz_newindirect(fz_tonum(encrypt), fz_togen(encrypt), xref); + fz_dropobj(encrypt); + encrypt = obj; + } + fz_dictputs(xref->trailer, "Encrypt", encrypt); + fz_dropobj(encrypt); + } + + if (id) + { + if (fz_isindirect(id)) + { + /* create new reference with non-nil xref pointer */ + obj = fz_newindirect(fz_tonum(id), fz_togen(id), xref); + fz_dropobj(id); + id = obj; + } + fz_dictputs(xref->trailer, "ID", id); + fz_dropobj(id); + } + + fz_free(list); + return fz_okay; + +cleanup: + if (encrypt) fz_dropobj(encrypt); + if (id) fz_dropobj(id); + if (root) fz_dropobj(root); + if (info) fz_dropobj(info); + fz_free(list); + return error; /* already rethrown */ +} + +fz_error +pdf_repairobjstms(pdf_xref *xref) +{ + fz_obj *dict; + int i; + + for (i = 0; i < xref->len; i++) + { + if (xref->table[i].stmofs) + { + pdf_loadobject(&dict, xref, i, 0); + if (!strcmp(fz_toname(fz_dictgets(dict, "Type")), "ObjStm")) + pdf_repairobjstm(xref, i, 0); + fz_dropobj(dict); + } + } + + return fz_okay; +} diff --git a/pdf/pdf_shade.c b/pdf/pdf_shade.c new file mode 100644 index 00000000..1b301724 --- /dev/null +++ b/pdf/pdf_shade.c @@ -0,0 +1,1172 @@ +#include "fitz.h" +#include "mupdf.h" + +#define HUGENUM 32000 /* how far to extend axial/radial shadings */ +#define FUNSEGS 32 /* size of sampled mesh for function-based shadings */ +#define RADSEGS 32 /* how many segments to generate for radial meshes */ +#define SUBDIV 3 /* how many levels to subdivide patches */ + +struct vertex +{ + float x, y; + float c[FZ_MAXCOLORS]; +}; + +static void +pdf_growmesh(fz_shade *shade, int amount) +{ + if (shade->meshlen + amount < shade->meshcap) + return; + + if (shade->meshcap == 0) + shade->meshcap = 1024; + + while (shade->meshlen + amount > shade->meshcap) + shade->meshcap = (shade->meshcap * 3) / 2; + + shade->mesh = fz_realloc(shade->mesh, shade->meshcap, sizeof(float)); +} + +static void +pdf_addvertex(fz_shade *shade, struct vertex *v) +{ + int ncomp = shade->usefunction ? 1 : shade->colorspace->n; + int i; + pdf_growmesh(shade, 2 + ncomp); + shade->mesh[shade->meshlen++] = v->x; + shade->mesh[shade->meshlen++] = v->y; + for (i = 0; i < ncomp; i++) + shade->mesh[shade->meshlen++] = v->c[i]; +} + +static void +pdf_addtriangle(fz_shade *shade, + struct vertex *v0, + struct vertex *v1, + struct vertex *v2) +{ + pdf_addvertex(shade, v0); + pdf_addvertex(shade, v1); + pdf_addvertex(shade, v2); +} + +static void +pdf_addquad(fz_shade *shade, + struct vertex *v0, + struct vertex *v1, + struct vertex *v2, + struct vertex *v3) +{ + pdf_addtriangle(shade, v0, v1, v3); + pdf_addtriangle(shade, v1, v3, v2); +} + +/* Subdivide and tesselate tensor-patches */ + +typedef struct pdf_tensorpatch_s pdf_tensorpatch; + +struct pdf_tensorpatch_s +{ + fz_point pole[4][4]; + float color[4][FZ_MAXCOLORS]; +}; + +static void +triangulatepatch(pdf_tensorpatch p, fz_shade *shade) +{ + struct vertex v0, v1, v2, v3; + + v0.x = p.pole[0][0].x; + v0.y = p.pole[0][0].y; + memcpy(v0.c, p.color[0], sizeof(v0.c)); + + v1.x = p.pole[0][3].x; + v1.y = p.pole[0][3].y; + memcpy(v1.c, p.color[1], sizeof(v1.c)); + + v2.x = p.pole[3][3].x; + v2.y = p.pole[3][3].y; + memcpy(v2.c, p.color[2], sizeof(v2.c)); + + v3.x = p.pole[3][0].x; + v3.y = p.pole[3][0].y; + memcpy(v3.c, p.color[3], sizeof(v3.c)); + + pdf_addquad(shade, &v0, &v1, &v2, &v3); +} + +static inline void +midcolor(float *c, float *c1, float *c2) +{ + int i; + for (i = 0; i < FZ_MAXCOLORS; i++) + c[i] = (c1[i] + c2[i]) * 0.5f; +} + +static inline void +splitcurve(fz_point *pole, fz_point *q0, fz_point *q1, int polestep) +{ + /* + split bezier curve given by control points pole[0]..pole[3] + using de casteljau algo at midpoint and build two new + bezier curves q0[0]..q0[3] and q1[0]..q1[3]. all indices + should be multiplies by polestep == 1 for vertical bezier + curves in patch and == 4 for horizontal bezier curves due + to C's multi-dimensional matrix memory layout. + */ + + float x12 = (pole[1 * polestep].x + pole[2 * polestep].x) * 0.5f; + float y12 = (pole[1 * polestep].y + pole[2 * polestep].y) * 0.5f; + + q0[1 * polestep].x = (pole[0 * polestep].x + pole[1 * polestep].x) * 0.5f; + q0[1 * polestep].y = (pole[0 * polestep].y + pole[1 * polestep].y) * 0.5f; + q1[2 * polestep].x = (pole[2 * polestep].x + pole[3 * polestep].x) * 0.5f; + q1[2 * polestep].y = (pole[2 * polestep].y + pole[3 * polestep].y) * 0.5f; + + q0[2 * polestep].x = (q0[1 * polestep].x + x12) * 0.5f; + q0[2 * polestep].y = (q0[1 * polestep].y + y12) * 0.5f; + q1[1 * polestep].x = (x12 + q1[2 * polestep].x) * 0.5f; + q1[1 * polestep].y = (y12 + q1[2 * polestep].y) * 0.5f; + + q0[3 * polestep].x = (q0[2 * polestep].x + q1[1 * polestep].x) * 0.5f; + q0[3 * polestep].y = (q0[2 * polestep].y + q1[1 * polestep].y) * 0.5f; + q1[0 * polestep].x = (q0[2 * polestep].x + q1[1 * polestep].x) * 0.5f; + q1[0 * polestep].y = (q0[2 * polestep].y + q1[1 * polestep].y) * 0.5f; + + q0[0 * polestep].x = pole[0 * polestep].x; + q0[0 * polestep].y = pole[0 * polestep].y; + q1[3 * polestep].x = pole[3 * polestep].x; + q1[3 * polestep].y = pole[3 * polestep].y; +} + +static inline void +splitstripe(pdf_tensorpatch *p, pdf_tensorpatch *s0, pdf_tensorpatch *s1) +{ + /* + split all horizontal bezier curves in patch, + creating two new patches with half the width. + */ + splitcurve(&p->pole[0][0], &s0->pole[0][0], &s1->pole[0][0], 4); + splitcurve(&p->pole[0][1], &s0->pole[0][1], &s1->pole[0][1], 4); + splitcurve(&p->pole[0][2], &s0->pole[0][2], &s1->pole[0][2], 4); + splitcurve(&p->pole[0][3], &s0->pole[0][3], &s1->pole[0][3], 4); + + /* interpolate the colors for the two new patches. */ + memcpy(s0->color[0], p->color[0], sizeof(s0->color[0])); + memcpy(s0->color[1], p->color[1], sizeof(s0->color[1])); + midcolor(s0->color[2], p->color[1], p->color[2]); + midcolor(s0->color[3], p->color[0], p->color[3]); + + memcpy(s1->color[0], s0->color[3], sizeof(s1->color[0])); + memcpy(s1->color[1], s0->color[2], sizeof(s1->color[1])); + memcpy(s1->color[2], p->color[2], sizeof(s1->color[2])); + memcpy(s1->color[3], p->color[3], sizeof(s1->color[3])); +} + +static void +drawstripe(pdf_tensorpatch *p, fz_shade *shade, int depth) +{ + pdf_tensorpatch s0, s1; + + /* split patch into two half-height patches */ + splitstripe(p, &s0, &s1); + + depth--; + if (depth == 0) + { + /* if no more subdividing, draw two new patches... */ + triangulatepatch(s0, shade); + triangulatepatch(s1, shade); + } + else + { + /* ...otherwise, continue subdividing. */ + drawstripe(&s0, shade, depth); + drawstripe(&s1, shade, depth); + } +} + +static inline void +splitpatch(pdf_tensorpatch *p, pdf_tensorpatch *s0, pdf_tensorpatch *s1) +{ + /* + split all vertical bezier curves in patch, + creating two new patches with half the height. + */ + splitcurve(p->pole[0], s0->pole[0], s1->pole[0], 1); + splitcurve(p->pole[1], s0->pole[1], s1->pole[1], 1); + splitcurve(p->pole[2], s0->pole[2], s1->pole[2], 1); + splitcurve(p->pole[3], s0->pole[3], s1->pole[3], 1); + + /* interpolate the colors for the two new patches. */ + memcpy(s0->color[0], p->color[0], sizeof(s0->color[0])); + midcolor(s0->color[1], p->color[0], p->color[1]); + midcolor(s0->color[2], p->color[2], p->color[3]); + memcpy(s0->color[3], p->color[3], sizeof(s0->color[3])); + + memcpy(s1->color[0], s0->color[1], sizeof(s1->color[0])); + memcpy(s1->color[1], p->color[1], sizeof(s1->color[1])); + memcpy(s1->color[2], p->color[2], sizeof(s1->color[2])); + memcpy(s1->color[3], s0->color[2], sizeof(s1->color[3])); +} + +static void +drawpatch(fz_shade *shade, pdf_tensorpatch *p, int depth, int origdepth) +{ + pdf_tensorpatch s0, s1; + + /* split patch into two half-width patches */ + splitpatch(p, &s0, &s1); + + depth--; + if (depth == 0) + { + /* if no more subdividing, draw two new patches... */ + drawstripe(&s0, shade, origdepth); + drawstripe(&s1, shade, origdepth); + } + else + { + /* ...otherwise, continue subdividing. */ + drawpatch(shade, &s0, depth, origdepth); + drawpatch(shade, &s1, depth, origdepth); + } +} + +static inline fz_point +pdf_computetensorinterior( + fz_point a, fz_point b, fz_point c, fz_point d, + fz_point e, fz_point f, fz_point g, fz_point h) +{ + fz_point pt; + + /* see equations at page 330 in pdf 1.7 */ + + pt.x = -4 * a.x; + pt.x += 6 * (b.x + c.x); + pt.x += -2 * (d.x + e.x); + pt.x += 3 * (f.x + g.x); + pt.x += -1 * h.x; + pt.x /= 9; + + pt.y = -4 * a.y; + pt.y += 6 * (b.y + c.y); + pt.y += -2 * (d.y + e.y); + pt.y += 3 * (f.y + g.y); + pt.y += -1 * h.y; + pt.y /= 9; + + return pt; +} + +static inline void +pdf_maketensorpatch(pdf_tensorpatch *p, int type, fz_point *pt) +{ + if (type == 6) + { + /* see control point stream order at page 325 in pdf 1.7 */ + + p->pole[0][0] = pt[0]; + p->pole[0][1] = pt[1]; + p->pole[0][2] = pt[2]; + p->pole[0][3] = pt[3]; + p->pole[1][3] = pt[4]; + p->pole[2][3] = pt[5]; + p->pole[3][3] = pt[6]; + p->pole[3][2] = pt[7]; + p->pole[3][1] = pt[8]; + p->pole[3][0] = pt[9]; + p->pole[2][0] = pt[10]; + p->pole[1][0] = pt[11]; + + /* see equations at page 330 in pdf 1.7 */ + + p->pole[1][1] = pdf_computetensorinterior( + p->pole[0][0], p->pole[0][1], p->pole[1][0], p->pole[0][3], + p->pole[3][0], p->pole[3][1], p->pole[1][3], p->pole[3][3]); + + p->pole[1][2] = pdf_computetensorinterior( + p->pole[0][3], p->pole[0][2], p->pole[1][3], p->pole[0][0], + p->pole[3][3], p->pole[3][2], p->pole[1][0], p->pole[3][0]); + + p->pole[2][1] = pdf_computetensorinterior( + p->pole[3][0], p->pole[3][1], p->pole[2][0], p->pole[3][3], + p->pole[0][0], p->pole[0][1], p->pole[2][3], p->pole[0][3]); + + p->pole[2][2] = pdf_computetensorinterior( + p->pole[3][3], p->pole[3][2], p->pole[2][3], p->pole[3][0], + p->pole[0][3], p->pole[0][2], p->pole[2][0], p->pole[0][0]); + } + else if (type == 7) + { + /* see control point stream order at page 330 in pdf 1.7 */ + + p->pole[0][0] = pt[0]; + p->pole[0][1] = pt[1]; + p->pole[0][2] = pt[2]; + p->pole[0][3] = pt[3]; + p->pole[1][3] = pt[4]; + p->pole[2][3] = pt[5]; + p->pole[3][3] = pt[6]; + p->pole[3][2] = pt[7]; + p->pole[3][1] = pt[8]; + p->pole[3][0] = pt[9]; + p->pole[2][0] = pt[10]; + p->pole[1][0] = pt[11]; + p->pole[1][1] = pt[12]; + p->pole[1][2] = pt[13]; + p->pole[2][2] = pt[14]; + p->pole[2][1] = pt[15]; + } +} + +/* Sample various functions into lookup tables */ + +static void +pdf_samplecompositeshadefunction(fz_shade *shade, pdf_function *func, float t0, float t1) +{ + int i; + float t; + + for (i = 0; i < 256; i++) + { + t = t0 + (i / 255.0f) * (t1 - t0); + pdf_evalfunction(func, &t, 1, shade->function[i], shade->colorspace->n); + shade->function[i][shade->colorspace->n] = 1; + } +} + +static void +pdf_samplecomponentshadefunction(fz_shade *shade, int funcs, pdf_function **func, float t0, float t1) +{ + int i, k; + float t; + + for (i = 0; i < 256; i++) + { + t = t0 + (i / 255.0f) * (t1 - t0); + for (k = 0; k < funcs; k++) + pdf_evalfunction(func[k], &t, 1, &shade->function[i][k], 1); + shade->function[i][k] = 1; + } +} + +static void +pdf_sampleshadefunction(fz_shade *shade, int funcs, pdf_function **func, float t0, float t1) +{ + shade->usefunction = 1; + if (funcs == 1) + pdf_samplecompositeshadefunction(shade, func[0], t0, t1); + else + pdf_samplecomponentshadefunction(shade, funcs, func, t0, t1); +} + +/* Type 1-3 -- Function-based, axial and radial shadings */ + +static void +pdf_loadfunctionbasedshading(fz_shade *shade, pdf_xref *xref, fz_obj *dict, pdf_function *func) +{ + fz_obj *obj; + float x0, y0, x1, y1; + fz_matrix matrix; + struct vertex v[4]; + int xx, yy; + float x, y; + float xn, yn; + int i; + + pdf_logshade("load type1 (function-based) shading\n"); + + x0 = y0 = 0; + x1 = y1 = 1; + obj = fz_dictgets(dict, "Domain"); + if (fz_arraylen(obj) == 4) + { + x0 = fz_toreal(fz_arrayget(obj, 0)); + x1 = fz_toreal(fz_arrayget(obj, 1)); + y0 = fz_toreal(fz_arrayget(obj, 2)); + y1 = fz_toreal(fz_arrayget(obj, 3)); + } + + matrix = fz_identity; + obj = fz_dictgets(dict, "Matrix"); + if (fz_arraylen(obj) == 6) + matrix = pdf_tomatrix(obj); + + for (yy = 0; yy < FUNSEGS; yy++) + { + y = y0 + (y1 - y0) * yy / FUNSEGS; + yn = y0 + (y1 - y0) * (yy + 1) / FUNSEGS; + + for (xx = 0; xx < FUNSEGS; xx++) + { + x = x0 + (x1 - x0) * xx / FUNSEGS; + xn = x0 + (x1 - x0) * (xx + 1) / FUNSEGS; + + v[0].x = x; v[0].y = y; + v[1].x = xn; v[1].y = y; + v[2].x = xn; v[2].y = yn; + v[3].x = x; v[3].y = yn; + + for (i = 0; i < 4; i++) + { + fz_point pt; + float fv[2]; + + fv[0] = v[i].x; + fv[1] = v[i].y; + pdf_evalfunction(func, fv, 2, v[i].c, shade->colorspace->n); + + pt.x = v[i].x; + pt.y = v[i].y; + pt = fz_transformpoint(matrix, pt); + v[i].x = pt.x; + v[i].y = pt.y; + } + + pdf_addquad(shade, &v[0], &v[1], &v[2], &v[3]); + } + } +} + +static void +pdf_loadaxialshading(fz_shade *shade, pdf_xref *xref, fz_obj *dict, int funcs, pdf_function **func) +{ + fz_obj *obj; + float d0, d1; + int e0, e1; + float x0, y0, x1, y1; + struct vertex p1, p2; + + pdf_logshade("load type2 (axial) shading\n"); + + obj = fz_dictgets(dict, "Coords"); + x0 = fz_toreal(fz_arrayget(obj, 0)); + y0 = fz_toreal(fz_arrayget(obj, 1)); + x1 = fz_toreal(fz_arrayget(obj, 2)); + y1 = fz_toreal(fz_arrayget(obj, 3)); + + d0 = 0; + d1 = 1; + obj = fz_dictgets(dict, "Domain"); + if (fz_arraylen(obj) == 2) + { + d0 = fz_toreal(fz_arrayget(obj, 0)); + d1 = fz_toreal(fz_arrayget(obj, 1)); + } + + e0 = e1 = 0; + obj = fz_dictgets(dict, "Extend"); + if (fz_arraylen(obj) == 2) + { + e0 = fz_tobool(fz_arrayget(obj, 0)); + e1 = fz_tobool(fz_arrayget(obj, 1)); + } + + pdf_sampleshadefunction(shade, funcs, func, d0, d1); + + shade->type = FZ_LINEAR; + + shade->extend[0] = e0; + shade->extend[1] = e1; + + p1.x = x0; + p1.y = y0; + p1.c[0] = 0; + pdf_addvertex(shade, &p1); + + p2.x = x1; + p2.y = y1; + p2.c[0] = 0; + pdf_addvertex(shade, &p2); +} + +static void +pdf_loadradialshading(fz_shade *shade, pdf_xref *xref, fz_obj *dict, int funcs, pdf_function **func) +{ + fz_obj *obj; + float d0, d1; + int e0, e1; + float x0, y0, r0, x1, y1, r1; + struct vertex p1, p2; + + pdf_logshade("load type3 (radial) shading\n"); + + obj = fz_dictgets(dict, "Coords"); + x0 = fz_toreal(fz_arrayget(obj, 0)); + y0 = fz_toreal(fz_arrayget(obj, 1)); + r0 = fz_toreal(fz_arrayget(obj, 2)); + x1 = fz_toreal(fz_arrayget(obj, 3)); + y1 = fz_toreal(fz_arrayget(obj, 4)); + r1 = fz_toreal(fz_arrayget(obj, 5)); + + d0 = 0; + d1 = 1; + obj = fz_dictgets(dict, "Domain"); + if (fz_arraylen(obj) == 2) + { + d0 = fz_toreal(fz_arrayget(obj, 0)); + d1 = fz_toreal(fz_arrayget(obj, 1)); + } + + e0 = e1 = 0; + obj = fz_dictgets(dict, "Extend"); + if (fz_arraylen(obj) == 2) + { + e0 = fz_tobool(fz_arrayget(obj, 0)); + e1 = fz_tobool(fz_arrayget(obj, 1)); + } + + pdf_sampleshadefunction(shade, funcs, func, d0, d1); + + shade->type = FZ_RADIAL; + + shade->extend[0] = e0; + shade->extend[1] = e1; + + p1.x = x0; + p1.y = y0; + p1.c[0] = r0; + pdf_addvertex(shade, &p1); + + p2.x = x1; + p2.y = y1; + p2.c[0] = r1; + pdf_addvertex(shade, &p2); +} + +/* Type 4-7 -- Triangle and patch mesh shadings */ + +static inline float +readsample(fz_stream *stream, int bits, float min, float max) +{ + /* we use pow(2,x) because (1<<x) would overflow the math on 32-bit samples */ + float bitscale = 1 / (powf(2, bits) - 1); + return min + fz_readbits(stream, bits) * (max - min) * bitscale; +} + +struct meshparams +{ + int vprow; + int bpflag; + int bpcoord; + int bpcomp; + float x0, x1; + float y0, y1; + float c0[FZ_MAXCOLORS]; + float c1[FZ_MAXCOLORS]; +}; + +static void +pdf_loadmeshparams(pdf_xref *xref, fz_obj *dict, struct meshparams *p) +{ + fz_obj *obj; + int i, n; + + p->x0 = p->y0 = 0; + p->x1 = p->y1 = 1; + for (i = 0; i < FZ_MAXCOLORS; i++) + { + p->c0[i] = 0; + p->c1[i] = 1; + } + + p->vprow = fz_toint(fz_dictgets(dict, "VerticesPerRow")); + p->bpflag = fz_toint(fz_dictgets(dict, "BitsPerFlag")); + p->bpcoord = fz_toint(fz_dictgets(dict, "BitsPerCoordinate")); + p->bpcomp = fz_toint(fz_dictgets(dict, "BitsPerComponent")); + + obj = fz_dictgets(dict, "Decode"); + if (fz_arraylen(obj) >= 6) + { + n = (fz_arraylen(obj) - 4) / 2; + p->x0 = fz_toreal(fz_arrayget(obj, 0)); + p->x1 = fz_toreal(fz_arrayget(obj, 1)); + p->y0 = fz_toreal(fz_arrayget(obj, 2)); + p->y1 = fz_toreal(fz_arrayget(obj, 3)); + for (i = 0; i < n; i++) + { + p->c0[i] = fz_toreal(fz_arrayget(obj, 4 + i * 2)); + p->c1[i] = fz_toreal(fz_arrayget(obj, 5 + i * 2)); + } + } + + if (p->vprow < 2) + p->vprow = 2; + + if (p->bpflag != 2 && p->bpflag != 4 && p->bpflag != 8) + p->bpflag = 8; + + if (p->bpcoord != 1 && p->bpcoord != 2 && p->bpcoord != 4 && + p->bpcoord != 8 && p->bpcoord != 12 && p->bpcoord != 16 && + p->bpcoord != 24 && p->bpcoord != 32) + p->bpcoord = 8; + + if (p->bpcomp != 1 && p->bpcomp != 2 && p->bpcomp != 4 && + p->bpcomp != 8 && p->bpcomp != 12 && p->bpcomp != 16) + p->bpcomp = 8; +} + +static void +pdf_loadtype4shade(fz_shade *shade, pdf_xref *xref, fz_obj *dict, + int funcs, pdf_function **func, fz_stream *stream) +{ + struct meshparams p; + struct vertex va, vb, vc, vd; + int ncomp; + int flag; + int i; + + pdf_logshade("load type4 (free-form triangle mesh) shading\n"); + + pdf_loadmeshparams(xref, dict, &p); + + if (funcs > 0) + { + ncomp = 1; + pdf_sampleshadefunction(shade, funcs, func, p.c0[0], p.c1[0]); + } + else + ncomp = shade->colorspace->n; + + while (!fz_iseofbits(stream)) + { + flag = fz_readbits(stream, p.bpflag); + vd.x = readsample(stream, p.bpcoord, p.x0, p.x1); + vd.y = readsample(stream, p.bpcoord, p.y0, p.y1); + for (i = 0; i < ncomp; i++) + vd.c[i] = readsample(stream, p.bpcomp, p.c0[i], p.c1[i]); + + switch (flag) + { + case 0: /* start new triangle */ + va = vd; + + fz_readbits(stream, p.bpflag); + vb.x = readsample(stream, p.bpcoord, p.x0, p.x1); + vb.y = readsample(stream, p.bpcoord, p.y0, p.y1); + for (i = 0; i < ncomp; i++) + vb.c[i] = readsample(stream, p.bpcomp, p.c0[i], p.c1[i]); + + fz_readbits(stream, p.bpflag); + vc.x = readsample(stream, p.bpcoord, p.x0, p.x1); + vc.y = readsample(stream, p.bpcoord, p.y0, p.y1); + for (i = 0; i < ncomp; i++) + vc.c[i] = readsample(stream, p.bpcomp, p.c0[i], p.c1[i]); + + pdf_addtriangle(shade, &va, &vb, &vc); + break; + + case 1: /* Vb, Vc, Vd */ + va = vb; + vb = vc; + vc = vd; + pdf_addtriangle(shade, &va, &vb, &vc); + break; + + case 2: /* Va, Vc, Vd */ + vb = vc; + vc = vd; + pdf_addtriangle(shade, &va, &vb, &vc); + break; + } + } +} + +static void +pdf_loadtype5shade(fz_shade *shade, pdf_xref *xref, fz_obj *dict, + int funcs, pdf_function **func, fz_stream *stream) +{ + struct meshparams p; + struct vertex *buf, *ref; + int first; + int ncomp; + int i, k; + + pdf_logshade("load type5 (lattice-form triangle mesh) shading\n"); + + pdf_loadmeshparams(xref, dict, &p); + + if (funcs > 0) + { + ncomp = 1; + pdf_sampleshadefunction(shade, funcs, func, p.c0[0], p.c1[0]); + } + else + ncomp = shade->colorspace->n; + + ref = fz_calloc(p.vprow, sizeof(struct vertex)); + buf = fz_calloc(p.vprow, sizeof(struct vertex)); + first = 1; + + while (!fz_iseofbits(stream)) + { + for (i = 0; i < p.vprow; i++) + { + buf[i].x = readsample(stream, p.bpcoord, p.x0, p.x1); + buf[i].y = readsample(stream, p.bpcoord, p.y0, p.y1); + for (k = 0; k < ncomp; k++) + buf[i].c[k] = readsample(stream, p.bpcomp, p.c0[k], p.c1[k]); + } + + if (!first) + for (i = 0; i < p.vprow - 1; i++) + pdf_addquad(shade, + &ref[i], &ref[i+1], &buf[i+1], &buf[i]); + + memcpy(ref, buf, p.vprow * sizeof(struct vertex)); + first = 0; + } + + free(ref); + free(buf); +} + +/* Type 6 & 7 -- Patch mesh shadings */ + +static void +pdf_loadtype6shade(fz_shade *shade, pdf_xref *xref, fz_obj *dict, + int funcs, pdf_function **func, fz_stream *stream) +{ + struct meshparams p; + int haspatch, hasprevpatch; + float prevc[4][FZ_MAXCOLORS]; + fz_point prevp[12]; + int ncomp; + int i, k; + + pdf_logshade("load type6 (coons patch mesh) shading\n"); + + pdf_loadmeshparams(xref, dict, &p); + + if (funcs > 0) + { + ncomp = 1; + pdf_sampleshadefunction(shade, funcs, func, p.c0[0], p.c1[0]); + } + else + ncomp = shade->colorspace->n; + + hasprevpatch = 0; + + while (!fz_iseofbits(stream)) + { + float c[4][FZ_MAXCOLORS]; + fz_point v[12]; + int startcolor; + int startpt; + int flag; + + flag = fz_readbits(stream, p.bpflag); + + if (flag == 0) + { + startpt = 0; + startcolor = 0; + } + else + { + startpt = 4; + startcolor = 2; + } + + for (i = startpt; i < 12; i++) + { + v[i].x = readsample(stream, p.bpcoord, p.x0, p.x1); + v[i].y = readsample(stream, p.bpcoord, p.y0, p.y1); + } + + for (i = startcolor; i < 4; i++) + { + for (k = 0; k < ncomp; k++) + c[i][k] = readsample(stream, p.bpcomp, p.c0[k], p.c1[k]); + } + + haspatch = 0; + + if (flag == 0) + { + haspatch = 1; + } + else if (flag == 1 && hasprevpatch) + { + v[0] = prevp[3]; + v[1] = prevp[4]; + v[2] = prevp[5]; + v[3] = prevp[6]; + memcpy(c[0], prevc[1], ncomp * sizeof(float)); + memcpy(c[1], prevc[2], ncomp * sizeof(float)); + + haspatch = 1; + } + else if (flag == 2 && hasprevpatch) + { + v[0] = prevp[6]; + v[1] = prevp[7]; + v[2] = prevp[8]; + v[3] = prevp[9]; + memcpy(c[0], prevc[2], ncomp * sizeof(float)); + memcpy(c[1], prevc[3], ncomp * sizeof(float)); + + haspatch = 1; + } + else if (flag == 3 && hasprevpatch) + { + v[0] = prevp[ 9]; + v[1] = prevp[10]; + v[2] = prevp[11]; + v[3] = prevp[ 0]; + memcpy(c[0], prevc[3], ncomp * sizeof(float)); + memcpy(c[1], prevc[0], ncomp * sizeof(float)); + + haspatch = 1; + } + + if (haspatch) + { + pdf_tensorpatch patch; + + pdf_maketensorpatch(&patch, 6, v); + + for (i = 0; i < 4; i++) + memcpy(patch.color[i], c[i], ncomp * sizeof(float)); + + drawpatch(shade, &patch, SUBDIV, SUBDIV); + + for (i = 0; i < 12; i++) + prevp[i] = v[i]; + + for (i = 0; i < 4; i++) + memcpy(prevc[i], c[i], ncomp * sizeof(float)); + + hasprevpatch = 1; + } + } +} + +static void +pdf_loadtype7shade(fz_shade *shade, pdf_xref *xref, fz_obj *dict, + int funcs, pdf_function **func, fz_stream *stream) +{ + struct meshparams p; + int haspatch, hasprevpatch; + float prevc[4][FZ_MAXCOLORS]; + fz_point prevp[16]; + int ncomp; + int i, k; + + pdf_logshade("load type7 (tensor-product patch mesh) shading\n"); + + pdf_loadmeshparams(xref, dict, &p); + + if (funcs > 0) + { + ncomp = 1; + pdf_sampleshadefunction(shade, funcs, func, p.c0[0], p.c1[0]); + } + else + ncomp = shade->colorspace->n; + + hasprevpatch = 0; + + while (!fz_iseofbits(stream)) + { + float c[4][FZ_MAXCOLORS]; + fz_point v[16]; + int startcolor; + int startpt; + int flag; + + flag = fz_readbits(stream, p.bpflag); + + if (flag == 0) + { + startpt = 0; + startcolor = 0; + } + else + { + startpt = 4; + startcolor = 2; + } + + for (i = startpt; i < 16; i++) + { + v[i].x = readsample(stream, p.bpcoord, p.x0, p.x1); + v[i].y = readsample(stream, p.bpcoord, p.y0, p.y1); + } + + for (i = startcolor; i < 4; i++) + { + for (k = 0; k < ncomp; k++) + c[i][k] = readsample(stream, p.bpcomp, p.c0[k], p.c1[k]); + } + + haspatch = 0; + + if (flag == 0) + { + haspatch = 1; + } + else if (flag == 1 && hasprevpatch) + { + v[0] = prevp[3]; + v[1] = prevp[4]; + v[2] = prevp[5]; + v[3] = prevp[6]; + memcpy(c[0], prevc[1], ncomp * sizeof(float)); + memcpy(c[1], prevc[2], ncomp * sizeof(float)); + + haspatch = 1; + } + else if (flag == 2 && hasprevpatch) + { + v[0] = prevp[6]; + v[1] = prevp[7]; + v[2] = prevp[8]; + v[3] = prevp[9]; + memcpy(c[0], prevc[2], ncomp * sizeof(float)); + memcpy(c[1], prevc[3], ncomp * sizeof(float)); + + haspatch = 1; + } + else if (flag == 3 && hasprevpatch) + { + v[0] = prevp[ 9]; + v[1] = prevp[10]; + v[2] = prevp[11]; + v[3] = prevp[ 0]; + memcpy(c[0], prevc[3], ncomp * sizeof(float)); + memcpy(c[1], prevc[0], ncomp * sizeof(float)); + + haspatch = 1; + } + + if (haspatch) + { + pdf_tensorpatch patch; + + pdf_maketensorpatch(&patch, 7, v); + + for (i = 0; i < 4; i++) + memcpy(patch.color[i], c[i], ncomp * sizeof(float)); + + drawpatch(shade, &patch, SUBDIV, SUBDIV); + + for (i = 0; i < 16; i++) + prevp[i] = v[i]; + + for (i = 0; i < 4; i++) + memcpy(prevc[i], c[i], FZ_MAXCOLORS * sizeof(float)); + + hasprevpatch = 1; + } + } +} + +/* Load all of the shading dictionary parameters, then switch on the shading type. */ + +static fz_error +pdf_loadshadingdict(fz_shade **shadep, pdf_xref *xref, fz_obj *dict, fz_matrix transform) +{ + fz_error error; + fz_shade *shade; + pdf_function *func[FZ_MAXCOLORS] = { nil }; + fz_stream *stream = nil; + fz_obj *obj; + int funcs; + int type; + int i; + + pdf_logshade("load shading dict (%d %d R) {\n", fz_tonum(dict), fz_togen(dict)); + + shade = fz_malloc(sizeof(fz_shade)); + shade->refs = 1; + shade->type = FZ_MESH; + shade->usebackground = 0; + shade->usefunction = 0; + shade->matrix = transform; + shade->bbox = fz_infiniterect; + shade->extend[0] = 0; + shade->extend[1] = 0; + + shade->meshlen = 0; + shade->meshcap = 0; + shade->mesh = nil; + + shade->colorspace = nil; + + funcs = 0; + + obj = fz_dictgets(dict, "ShadingType"); + type = fz_toint(obj); + + obj = fz_dictgets(dict, "ColorSpace"); + if (!obj) + { + fz_dropshade(shade); + return fz_throw("shading colorspace is missing"); + } + error = pdf_loadcolorspace(&shade->colorspace, xref, obj); + if (error) + { + fz_dropshade(shade); + return fz_rethrow(error, "cannot load colorspace (%d %d R)", fz_tonum(obj), fz_togen(obj)); + } + pdf_logshade("colorspace %s\n", shade->colorspace->name); + + obj = fz_dictgets(dict, "Background"); + if (obj) + { + pdf_logshade("background\n"); + shade->usebackground = 1; + for (i = 0; i < shade->colorspace->n; i++) + shade->background[i] = fz_toreal(fz_arrayget(obj, i)); + } + + obj = fz_dictgets(dict, "BBox"); + if (fz_isarray(obj)) + { + shade->bbox = pdf_torect(obj); + } + + obj = fz_dictgets(dict, "Function"); + if (fz_isdict(obj)) + { + funcs = 1; + + error = pdf_loadfunction(&func[0], xref, obj); + if (error) + { + error = fz_rethrow(error, "cannot load shading function (%d %d R)", fz_tonum(obj), fz_togen(obj)); + goto cleanup; + } + } + else if (fz_isarray(obj)) + { + funcs = fz_arraylen(obj); + if (funcs != 1 && funcs != shade->colorspace->n) + { + error = fz_throw("incorrect number of shading functions"); + goto cleanup; + } + + for (i = 0; i < funcs; i++) + { + error = pdf_loadfunction(&func[i], xref, fz_arrayget(obj, i)); + if (error) + { + error = fz_rethrow(error, "cannot load shading function (%d %d R)", fz_tonum(obj), fz_togen(obj)); + goto cleanup; + } + } + } + + if (type >= 4 && type <= 7) + { + error = pdf_openstream(&stream, xref, fz_tonum(dict), fz_togen(dict)); + if (error) + { + error = fz_rethrow(error, "cannot open shading stream (%d %d R)", fz_tonum(dict), fz_togen(dict)); + goto cleanup; + } + } + + switch (type) + { + case 1: pdf_loadfunctionbasedshading(shade, xref, dict, func[0]); break; + case 2: pdf_loadaxialshading(shade, xref, dict, funcs, func); break; + case 3: pdf_loadradialshading(shade, xref, dict, funcs, func); break; + case 4: pdf_loadtype4shade(shade, xref, dict, funcs, func, stream); break; + case 5: pdf_loadtype5shade(shade, xref, dict, funcs, func, stream); break; + case 6: pdf_loadtype6shade(shade, xref, dict, funcs, func, stream); break; + case 7: pdf_loadtype7shade(shade, xref, dict, funcs, func, stream); break; + default: + error = fz_throw("unknown shading type: %d", type); + goto cleanup; + } + + if (stream) + fz_close(stream); + for (i = 0; i < funcs; i++) + if (func[i]) + pdf_dropfunction(func[i]); + + pdf_logshade("}\n"); + + *shadep = shade; + return fz_okay; + +cleanup: + if (stream) + fz_close(stream); + for (i = 0; i < funcs; i++) + if (func[i]) + pdf_dropfunction(func[i]); + fz_dropshade(shade); + + return fz_rethrow(error, "cannot load shading type %d (%d %d R)", type, fz_tonum(dict), fz_togen(dict)); +} + +fz_error +pdf_loadshading(fz_shade **shadep, pdf_xref *xref, fz_obj *dict) +{ + fz_error error; + fz_matrix mat; + fz_obj *obj; + + if ((*shadep = pdf_finditem(xref->store, fz_dropshade, dict))) + { + fz_keepshade(*shadep); + return fz_okay; + } + + /* Type 2 pattern dictionary */ + if (fz_dictgets(dict, "PatternType")) + { + pdf_logshade("load shading pattern (%d %d R) {\n", fz_tonum(dict), fz_togen(dict)); + + obj = fz_dictgets(dict, "Matrix"); + if (obj) + { + mat = pdf_tomatrix(obj); + pdf_logshade("matrix [%g %g %g %g %g %g]\n", + mat.a, mat.b, mat.c, mat.d, mat.e, mat.f); + } + else + { + mat = fz_identity; + } + + obj = fz_dictgets(dict, "ExtGState"); + if (obj) + { + if (fz_dictgets(obj, "CA") || fz_dictgets(obj, "ca")) + { + fz_warn("shading with alpha not supported"); + } + } + + obj = fz_dictgets(dict, "Shading"); + if (!obj) + return fz_throw("syntaxerror: missing shading dictionary"); + + error = pdf_loadshadingdict(shadep, xref, obj, mat); + if (error) + return fz_rethrow(error, "cannot load shading dictionary (%d %d R)", fz_tonum(obj), fz_togen(obj)); + + pdf_logshade("}\n"); + } + + /* Naked shading dictionary */ + else + { + error = pdf_loadshadingdict(shadep, xref, dict, fz_identity); + if (error) + return fz_rethrow(error, "cannot load shading dictionary (%d %d R)", fz_tonum(dict), fz_togen(dict)); + } + + pdf_storeitem(xref->store, fz_keepshade, fz_dropshade, dict, *shadep); + + return fz_okay; +} diff --git a/pdf/pdf_store.c b/pdf/pdf_store.c new file mode 100644 index 00000000..d6be2088 --- /dev/null +++ b/pdf/pdf_store.c @@ -0,0 +1,224 @@ +#include "fitz.h" +#include "mupdf.h" + +typedef struct pdf_item_s pdf_item; + +struct pdf_item_s +{ + void *dropfunc; + fz_obj *key; + void *val; + int age; + pdf_item *next; +}; + +struct refkey +{ + void *dropfunc; + int num; + int gen; +}; + +struct pdf_store_s +{ + fz_hashtable *hash; /* hash for num/gen keys */ + pdf_item *root; /* linked list for everything else */ +}; + +pdf_store * +pdf_newstore(void) +{ + pdf_store *store; + store = fz_malloc(sizeof(pdf_store)); + store->hash = fz_newhash(4096, sizeof(struct refkey)); + store->root = nil; + return store; +} + +void +pdf_storeitem(pdf_store *store, void *keepfunc, void *dropfunc, fz_obj *key, void *val) +{ + pdf_item *item; + + if (!store) + return; + + item = fz_malloc(sizeof(pdf_item)); + item->dropfunc = dropfunc; + item->key = fz_keepobj(key); + item->val = ((void*(*)(void*))keepfunc)(val); + item->age = 0; + item->next = nil; + + if (fz_isindirect(key)) + { + struct refkey refkey; + pdf_logrsrc("store item (%d %d R) ptr=%p\n", fz_tonum(key), fz_togen(key), val); + refkey.dropfunc = dropfunc; + refkey.num = fz_tonum(key); + refkey.gen = fz_togen(key); + fz_hashinsert(store->hash, &refkey, item); + } + else + { + pdf_logrsrc("store item (...) = %p\n", val); + item->next = store->root; + store->root = item; + } +} + +void * +pdf_finditem(pdf_store *store, void *dropfunc, fz_obj *key) +{ + struct refkey refkey; + pdf_item *item; + + if (!store) + return nil; + + if (key == nil) + return nil; + + if (fz_isindirect(key)) + { + refkey.dropfunc = dropfunc; + refkey.num = fz_tonum(key); + refkey.gen = fz_togen(key); + item = fz_hashfind(store->hash, &refkey); + if (item) + { + item->age = 0; + return item->val; + } + } + else + { + for (item = store->root; item; item = item->next) + { + if (item->dropfunc == dropfunc && !fz_objcmp(item->key, key)) + { + item->age = 0; + return item->val; + } + } + } + + return nil; +} + +void +pdf_removeitem(pdf_store *store, void *dropfunc, fz_obj *key) +{ + struct refkey refkey; + pdf_item *item, *prev, *next; + + if (fz_isindirect(key)) + { + refkey.dropfunc = dropfunc; + refkey.num = fz_tonum(key); + refkey.gen = fz_togen(key); + item = fz_hashfind(store->hash, &refkey); + if (item) + { + fz_hashremove(store->hash, &refkey); + ((void(*)(void*))item->dropfunc)(item->val); + fz_dropobj(item->key); + fz_free(item); + } + } + else + { + prev = nil; + for (item = store->root; item; item = next) + { + next = item->next; + if (item->dropfunc == dropfunc && !fz_objcmp(item->key, key)) + { + if (!prev) + store->root = next; + else + prev->next = next; + ((void(*)(void*))item->dropfunc)(item->val); + fz_dropobj(item->key); + fz_free(item); + } + else + prev = item; + } + } +} + +void +pdf_agestore(pdf_store *store, int maxage) +{ + struct refkey *refkey; + pdf_item *item, *prev, *next; + int i; + + for (i = 0; i < fz_hashlen(store->hash); i++) + { + refkey = fz_hashgetkey(store->hash, i); + item = fz_hashgetval(store->hash, i); + if (item && ++item->age > maxage) + { + fz_hashremove(store->hash, refkey); + ((void(*)(void*))item->dropfunc)(item->val); + fz_dropobj(item->key); + fz_free(item); + i--; /* items with same hash may move into place */ + } + } + + prev = nil; + for (item = store->root; item; item = next) + { + next = item->next; + if (++item->age > maxage) + { + if (!prev) + store->root = next; + else + prev->next = next; + ((void(*)(void*))item->dropfunc)(item->val); + fz_dropobj(item->key); + fz_free(item); + } + else + prev = item; + } +} + +void +pdf_freestore(pdf_store *store) +{ + pdf_agestore(store, 0); + fz_freehash(store->hash); + fz_free(store); +} + +void +pdf_debugstore(pdf_store *store) +{ + pdf_item *item; + pdf_item *next; + struct refkey *refkey; + int i; + + printf("-- resource store contents --\n"); + + for (i = 0; i < fz_hashlen(store->hash); i++) + { + refkey = fz_hashgetkey(store->hash, i); + item = fz_hashgetval(store->hash, i); + if (item) + printf("store[%d] (%d %d R) = %p\n", i, refkey->num, refkey->gen, item->val); + } + + for (item = store->root; item; item = next) + { + next = item->next; + printf("store[*] "); + fz_debugobj(item->key); + printf(" = %p\n", item->val); + } +} diff --git a/pdf/pdf_stream.c b/pdf/pdf_stream.c new file mode 100644 index 00000000..cae95a89 --- /dev/null +++ b/pdf/pdf_stream.c @@ -0,0 +1,396 @@ +#include "fitz.h" +#include "mupdf.h" + +/* + * Check if an object is a stream or not. + */ +int +pdf_isstream(pdf_xref *xref, int num, int gen) +{ + fz_error error; + + if (num < 0 || num >= xref->len) + return 0; + + error = pdf_cacheobject(xref, num, gen); + if (error) + { + fz_catch(error, "cannot load object, ignoring error"); + return 0; + } + + return xref->table[num].stmofs > 0; +} + +/* + * Scan stream dictionary for an explicit /Crypt filter + */ +static int +pdf_streamhascrypt(fz_obj *stm) +{ + fz_obj *filters; + fz_obj *obj; + int i; + + filters = fz_dictgetsa(stm, "Filter", "F"); + if (filters) + { + if (!strcmp(fz_toname(filters), "Crypt")) + return 1; + if (fz_isarray(filters)) + { + for (i = 0; i < fz_arraylen(filters); i++) + { + obj = fz_arrayget(filters, i); + if (!strcmp(fz_toname(obj), "Crypt")) + return 1; + } + } + } + return 0; +} + +/* + * Create a filter given a name and param dictionary. + */ +static fz_stream * +buildfilter(fz_stream *chain, pdf_xref * xref, fz_obj * f, fz_obj * p, int num, int gen) +{ + fz_error error; + char *s; + + s = fz_toname(f); + + if (!strcmp(s, "ASCIIHexDecode") || !strcmp(s, "AHx")) + return fz_openahxd(chain); + + else if (!strcmp(s, "ASCII85Decode") || !strcmp(s, "A85")) + return fz_opena85d(chain); + + else if (!strcmp(s, "CCITTFaxDecode") || !strcmp(s, "CCF")) + return fz_openfaxd(chain, p); + + else if (!strcmp(s, "DCTDecode") || !strcmp(s, "DCT")) + return fz_opendctd(chain, p); + + else if (!strcmp(s, "RunLengthDecode") || !strcmp(s, "RL")) + return fz_openrld(chain); + + else if (!strcmp(s, "FlateDecode") || !strcmp(s, "Fl")) + { + fz_obj *obj = fz_dictgets(p, "Predictor"); + if (fz_toint(obj) > 1) + return fz_openpredict(fz_openflated(chain), p); + return fz_openflated(chain); + } + + else if (!strcmp(s, "LZWDecode") || !strcmp(s, "LZW")) + { + fz_obj *obj = fz_dictgets(p, "Predictor"); + if (fz_toint(obj) > 1) + return fz_openpredict(fz_openlzwd(chain, p), p); + return fz_openlzwd(chain, p); + } + + else if (!strcmp(s, "JBIG2Decode")) + { + fz_obj *obj = fz_dictgets(p, "JBIG2Globals"); + if (obj) + { + fz_buffer *globals; + error = pdf_loadstream(&globals, xref, fz_tonum(obj), fz_togen(obj)); + if (error) + fz_catch(error, "cannot load jbig2 global segments"); + chain = fz_openjbig2d(chain, globals); + fz_dropbuffer(globals); + return chain; + } + return fz_openjbig2d(chain, nil); + } + + else if (!strcmp(s, "JPXDecode")) + return chain; /* JPX decoding is special cased in the image loading code */ + + else if (!strcmp(s, "Crypt")) + { + pdf_cryptfilter cf; + fz_obj *name; + + if (!xref->crypt) + { + fz_warn("crypt filter in unencrypted document"); + return chain; + } + + name = fz_dictgets(p, "Name"); + if (fz_isname(name) && strcmp(fz_toname(name), "Identity") != 0) + { + fz_obj *obj = fz_dictget(xref->crypt->cf, name); + if (fz_isdict(obj)) + { + error = pdf_parsecryptfilter(&cf, obj, xref->crypt->length); + if (error) + fz_catch(error, "cannot parse crypt filter (%d %d R)", fz_tonum(obj), fz_togen(obj)); + else + return pdf_opencrypt(chain, xref->crypt, &cf, num, gen); + } + } + + return chain; + } + + fz_warn("unknown filter name (%s)", s); + return chain; +} + +/* + * Build a chain of filters given filter names and param dicts. + * If head is given, start filter chain with it. + * Assume ownership of head. + */ +static fz_stream * +buildfilterchain(fz_stream *chain, pdf_xref *xref, fz_obj *fs, fz_obj *ps, int num, int gen) +{ + fz_obj *f; + fz_obj *p; + int i; + + for (i = 0; i < fz_arraylen(fs); i++) + { + f = fz_arrayget(fs, i); + p = fz_arrayget(ps, i); + chain = buildfilter(chain, xref, f, p, num, gen); + } + + return chain; +} + +/* + * Build a filter for reading raw stream data. + * This is a null filter to constrain reading to the + * stream length, followed by a decryption filter. + */ +static fz_stream * +pdf_openrawfilter(fz_stream *chain, pdf_xref *xref, fz_obj *stmobj, int num, int gen) +{ + int hascrypt; + int len; + + /* don't close chain when we close this filter */ + fz_keepstream(chain); + + len = fz_toint(fz_dictgets(stmobj, "Length")); + chain = fz_opennull(chain, len); + + hascrypt = pdf_streamhascrypt(stmobj); + if (xref->crypt && !hascrypt) + chain = pdf_opencrypt(chain, xref->crypt, &xref->crypt->stmf, num, gen); + + return chain; +} + +/* + * Construct a filter to decode a stream, constraining + * to stream length and decrypting. + */ +static fz_stream * +pdf_openfilter(fz_stream *chain, pdf_xref *xref, fz_obj *stmobj, int num, int gen) +{ + fz_obj *filters; + fz_obj *params; + + filters = fz_dictgetsa(stmobj, "Filter", "F"); + params = fz_dictgetsa(stmobj, "DecodeParms", "DP"); + + chain = pdf_openrawfilter(chain, xref, stmobj, num, gen); + + if (fz_isname(filters)) + return buildfilter(chain, xref, filters, params, num, gen); + if (fz_arraylen(filters) > 0) + return buildfilterchain(chain, xref, filters, params, num, gen); + + return chain; +} + +/* + * Construct a filter to decode a stream, without + * constraining to stream length, and without decryption. + */ +fz_stream * +pdf_openinlinestream(fz_stream *chain, pdf_xref *xref, fz_obj *stmobj, int length) +{ + fz_obj *filters; + fz_obj *params; + + filters = fz_dictgetsa(stmobj, "Filter", "F"); + params = fz_dictgetsa(stmobj, "DecodeParms", "DP"); + + /* don't close chain when we close this filter */ + fz_keepstream(chain); + + if (fz_isname(filters)) + return buildfilter(chain, xref, filters, params, 0, 0); + if (fz_arraylen(filters) > 0) + return buildfilterchain(chain, xref, filters, params, 0, 0); + + return fz_opennull(chain, length); +} + +/* + * Open a stream for reading the raw (compressed but decrypted) data. + * Using xref->file while this is open is a bad idea. + */ +fz_error +pdf_openrawstream(fz_stream **stmp, pdf_xref *xref, int num, int gen) +{ + pdf_xrefentry *x; + fz_error error; + + if (num < 0 || num >= xref->len) + return fz_throw("object id out of range (%d %d R)", num, gen); + + x = xref->table + num; + + error = pdf_cacheobject(xref, num, gen); + if (error) + return fz_rethrow(error, "cannot load stream object (%d %d R)", num, gen); + + if (x->stmofs) + { + *stmp = pdf_openrawfilter(xref->file, xref, x->obj, num, gen); + fz_seek(xref->file, x->stmofs, 0); + return fz_okay; + } + + return fz_throw("object is not a stream"); +} + +/* + * Open a stream for reading uncompressed data. + * Put the opened file in xref->stream. + * Using xref->file while a stream is open is a Bad idea. + */ +fz_error +pdf_openstream(fz_stream **stmp, pdf_xref *xref, int num, int gen) +{ + pdf_xrefentry *x; + fz_error error; + + if (num < 0 || num >= xref->len) + return fz_throw("object id out of range (%d %d R)", num, gen); + + x = xref->table + num; + + error = pdf_cacheobject(xref, num, gen); + if (error) + return fz_rethrow(error, "cannot load stream object (%d %d R)", num, gen); + + if (x->stmofs) + { + *stmp = pdf_openfilter(xref->file, xref, x->obj, num, gen); + fz_seek(xref->file, x->stmofs, 0); + return fz_okay; + } + + return fz_throw("object is not a stream"); +} + +fz_error +pdf_openstreamat(fz_stream **stmp, pdf_xref *xref, int num, int gen, fz_obj *dict, int stmofs) +{ + if (stmofs) + { + *stmp = pdf_openfilter(xref->file, xref, dict, num, gen); + fz_seek(xref->file, stmofs, 0); + return fz_okay; + } + return fz_throw("object is not a stream"); +} + +/* + * Load raw (compressed but decrypted) contents of a stream into buf. + */ +fz_error +pdf_loadrawstream(fz_buffer **bufp, pdf_xref *xref, int num, int gen) +{ + fz_error error; + fz_stream *stm; + fz_obj *dict; + int len; + + error = pdf_loadobject(&dict, xref, num, gen); + if (error) + return fz_rethrow(error, "cannot load stream dictionary (%d %d R)", num, gen); + + len = fz_toint(fz_dictgets(dict, "Length")); + + fz_dropobj(dict); + + error = pdf_openrawstream(&stm, xref, num, gen); + if (error) + return fz_rethrow(error, "cannot open raw stream (%d %d R)", num, gen); + + error = fz_readall(bufp, stm, len); + if (error) + { + fz_close(stm); + return fz_rethrow(error, "cannot read raw stream (%d %d R)", num, gen); + } + + fz_close(stm); + return fz_okay; +} + +static int +pdf_guessfilterlength(int len, char *filter) +{ + if (!strcmp(filter, "ASCIIHexDecode")) + return len / 2; + if (!strcmp(filter, "ASCII85Decode")) + return len * 4 / 5; + if (!strcmp(filter, "FlateDecode")) + return len * 3; + if (!strcmp(filter, "RunLengthDecode")) + return len * 3; + if (!strcmp(filter, "LZWDecode")) + return len * 2; + return len; +} + +/* + * Load uncompressed contents of a stream into buf. + */ +fz_error +pdf_loadstream(fz_buffer **bufp, pdf_xref *xref, int num, int gen) +{ + fz_error error; + fz_stream *stm; + fz_obj *dict, *obj; + int i, len; + + error = pdf_openstream(&stm, xref, num, gen); + if (error) + return fz_rethrow(error, "cannot open stream (%d %d R)", num, gen); + + error = pdf_loadobject(&dict, xref, num, gen); + if (error) + return fz_rethrow(error, "cannot load stream dictionary (%d %d R)", num, gen); + + len = fz_toint(fz_dictgets(dict, "Length")); + obj = fz_dictgets(dict, "Filter"); + len = pdf_guessfilterlength(len, fz_toname(obj)); + for (i = 0; i < fz_arraylen(obj); i++) + len = pdf_guessfilterlength(len, fz_toname(fz_arrayget(obj, i))); + + fz_dropobj(dict); + + error = fz_readall(bufp, stm, len); + if (error) + { + fz_close(stm); + return fz_rethrow(error, "cannot read raw stream (%d %d R)", num, gen); + } + + fz_close(stm); + return fz_okay; +} diff --git a/pdf/pdf_type3.c b/pdf/pdf_type3.c new file mode 100644 index 00000000..9cc15596 --- /dev/null +++ b/pdf/pdf_type3.c @@ -0,0 +1,166 @@ +#include "fitz.h" +#include "mupdf.h" + +fz_error +pdf_loadtype3font(pdf_fontdesc **fontdescp, pdf_xref *xref, fz_obj *rdb, fz_obj *dict) +{ + fz_error error; + char buf[256]; + char *estrings[256]; + pdf_fontdesc *fontdesc; + fz_obj *encoding; + fz_obj *widths; + fz_obj *charprocs; + fz_obj *obj; + int first, last; + int i, k, n; + fz_rect bbox; + fz_matrix matrix; + + obj = fz_dictgets(dict, "Name"); + if (fz_isname(obj)) + fz_strlcpy(buf, fz_toname(obj), sizeof buf); + else + sprintf(buf, "Unnamed-T3"); + + fontdesc = pdf_newfontdesc(); + + pdf_logfont("load type3 font (%d %d R) ptr=%p {\n", fz_tonum(dict), fz_togen(dict), fontdesc); + pdf_logfont("name %s\n", buf); + + obj = fz_dictgets(dict, "FontMatrix"); + matrix = pdf_tomatrix(obj); + + pdf_logfont("matrix [%g %g %g %g %g %g]\n", + matrix.a, matrix.b, + matrix.c, matrix.d, + matrix.e, matrix.f); + + obj = fz_dictgets(dict, "FontBBox"); + bbox = pdf_torect(obj); + + pdf_logfont("bbox [%g %g %g %g]\n", + bbox.x0, bbox.y0, + bbox.x1, bbox.y1); + + fontdesc->font = fz_newtype3font(buf, matrix); + + fz_setfontbbox(fontdesc->font, bbox.x0, bbox.y0, bbox.x1, bbox.y1); + + /* Encoding */ + + for (i = 0; i < 256; i++) + estrings[i] = nil; + + encoding = fz_dictgets(dict, "Encoding"); + if (!encoding) + { + error = fz_throw("syntaxerror: Type3 font missing Encoding"); + goto cleanup; + } + + if (fz_isname(encoding)) + pdf_loadencoding(estrings, fz_toname(encoding)); + + if (fz_isdict(encoding)) + { + fz_obj *base, *diff, *item; + + base = fz_dictgets(encoding, "BaseEncoding"); + if (fz_isname(base)) + pdf_loadencoding(estrings, fz_toname(base)); + + diff = fz_dictgets(encoding, "Differences"); + if (fz_isarray(diff)) + { + n = fz_arraylen(diff); + k = 0; + for (i = 0; i < n; i++) + { + item = fz_arrayget(diff, i); + if (fz_isint(item)) + k = fz_toint(item); + if (fz_isname(item)) + estrings[k++] = fz_toname(item); + if (k < 0) k = 0; + if (k > 255) k = 255; + } + } + } + + fontdesc->encoding = pdf_newidentitycmap(0, 1); + + error = pdf_loadtounicode(fontdesc, xref, estrings, nil, fz_dictgets(dict, "ToUnicode")); + if (error) + goto cleanup; + + /* Widths */ + + pdf_setdefaulthmtx(fontdesc, 0); + + first = fz_toint(fz_dictgets(dict, "FirstChar")); + last = fz_toint(fz_dictgets(dict, "LastChar")); + + widths = fz_dictgets(dict, "Widths"); + if (!widths) + { + error = fz_throw("syntaxerror: Type3 font missing Widths"); + goto cleanup; + } + + for (i = first; i <= last; i++) + { + float w = fz_toreal(fz_arrayget(widths, i - first)); + w = fontdesc->font->t3matrix.a * w * 1000; + fontdesc->font->t3widths[i] = w * 0.001f; + pdf_addhmtx(fontdesc, i, i, w); + } + + pdf_endhmtx(fontdesc); + + /* Resources -- inherit page resources if the font doesn't have its own */ + + fontdesc->font->t3resources = fz_dictgets(dict, "Resources"); + if (!fontdesc->font->t3resources) + fontdesc->font->t3resources = rdb; + if (fontdesc->font->t3resources) + fz_keepobj(fontdesc->font->t3resources); + if (!fontdesc->font->t3resources) + fz_warn("no resource dictionary for type 3 font!"); + + fontdesc->font->t3xref = xref; + fontdesc->font->t3run = pdf_runglyph; + + /* CharProcs */ + + charprocs = fz_dictgets(dict, "CharProcs"); + if (!charprocs) + { + error = fz_throw("syntaxerror: Type3 font missing CharProcs"); + goto cleanup; + } + + for (i = 0; i < 256; i++) + { + if (estrings[i]) + { + obj = fz_dictgets(charprocs, estrings[i]); + if (pdf_isstream(xref, fz_tonum(obj), fz_togen(obj))) + { + error = pdf_loadstream(&fontdesc->font->t3procs[i], xref, fz_tonum(obj), fz_togen(obj)); + if (error) + goto cleanup; + } + } + } + + pdf_logfont("}\n"); + + *fontdescp = fontdesc; + return fz_okay; + +cleanup: + fz_dropfont(fontdesc->font); + fz_free(fontdesc); + return fz_rethrow(error, "cannot load type3 font (%d %d R)", fz_tonum(dict), fz_togen(dict)); +} diff --git a/pdf/pdf_unicode.c b/pdf/pdf_unicode.c new file mode 100644 index 00000000..6c2d6372 --- /dev/null +++ b/pdf/pdf_unicode.c @@ -0,0 +1,92 @@ +#include "fitz.h" +#include "mupdf.h" + +/* Load or synthesize ToUnicode map for fonts */ + +fz_error +pdf_loadtounicode(pdf_fontdesc *font, pdf_xref *xref, + char **strings, char *collection, fz_obj *cmapstm) +{ + fz_error error = fz_okay; + pdf_cmap *cmap; + int cid; + int ucsbuf[8]; + int ucslen; + int i; + + if (pdf_isstream(xref, fz_tonum(cmapstm), fz_togen(cmapstm))) + { + pdf_logfont("tounicode embedded cmap\n"); + + error = pdf_loadembeddedcmap(&cmap, xref, cmapstm); + if (error) + return fz_rethrow(error, "cannot load embedded cmap (%d %d R)", fz_tonum(cmapstm), fz_togen(cmapstm)); + + font->tounicode = pdf_newcmap(); + + for (i = 0; i < (strings ? 256 : 65536); i++) + { + cid = pdf_lookupcmap(font->encoding, i); + if (cid >= 0) + { + ucslen = pdf_lookupcmapfull(cmap, i, ucsbuf); + if (ucslen == 1) + pdf_maprangetorange(font->tounicode, cid, cid, ucsbuf[0]); + if (ucslen > 1) + pdf_maponetomany(font->tounicode, cid, ucsbuf, ucslen); + } + } + + pdf_sortcmap(font->tounicode); + + pdf_dropcmap(cmap); + } + + else if (collection) + { + pdf_logfont("tounicode cid collection (%s)\n", collection); + + error = fz_okay; + + if (!strcmp(collection, "Adobe-CNS1")) + error = pdf_loadsystemcmap(&font->tounicode, "Adobe-CNS1-UCS2"); + else if (!strcmp(collection, "Adobe-GB1")) + error = pdf_loadsystemcmap(&font->tounicode, "Adobe-GB1-UCS2"); + else if (!strcmp(collection, "Adobe-Japan1")) + error = pdf_loadsystemcmap(&font->tounicode, "Adobe-Japan1-UCS2"); + else if (!strcmp(collection, "Adobe-Japan2")) + error = pdf_loadsystemcmap(&font->tounicode, "Adobe-Japan2-UCS2"); /* where's this? */ + else if (!strcmp(collection, "Adobe-Korea1")) + error = pdf_loadsystemcmap(&font->tounicode, "Adobe-Korea1-UCS2"); + + if (error) + return fz_rethrow(error, "cannot load tounicode system cmap %s-UCS2", collection); + } + + if (strings) + { + pdf_logfont("tounicode strings\n"); + + /* TODO one-to-many mappings */ + + font->ncidtoucs = 256; + font->cidtoucs = fz_calloc(256, sizeof(unsigned short)); + + for (i = 0; i < 256; i++) + { + if (strings[i]) + font->cidtoucs[i] = pdf_lookupagl(strings[i]); + else + font->cidtoucs[i] = '?'; + } + } + + if (!font->tounicode && !font->cidtoucs) + { + pdf_logfont("tounicode could not be loaded\n"); + /* TODO: synthesize a ToUnicode if it's a freetype font with + * cmap and/or post tables or if it has glyph names. */ + } + + return fz_okay; +} diff --git a/pdf/pdf_xobject.c b/pdf/pdf_xobject.c new file mode 100644 index 00000000..44389efc --- /dev/null +++ b/pdf/pdf_xobject.c @@ -0,0 +1,115 @@ +#include "fitz.h" +#include "mupdf.h" + +fz_error +pdf_loadxobject(pdf_xobject **formp, pdf_xref *xref, fz_obj *dict) +{ + fz_error error; + pdf_xobject *form; + fz_obj *obj; + + if ((*formp = pdf_finditem(xref->store, pdf_dropxobject, dict))) + { + pdf_keepxobject(*formp); + return fz_okay; + } + + form = fz_malloc(sizeof(pdf_xobject)); + form->refs = 1; + form->resources = nil; + form->contents = nil; + form->colorspace = nil; + + pdf_logrsrc("load xobject (%d %d R) ptr=%p {\n", fz_tonum(dict), fz_togen(dict), form); + + /* Store item immediately, to avoid possible recursion if objects refer back to this one */ + pdf_storeitem(xref->store, pdf_keepxobject, pdf_dropxobject, dict, form); + + obj = fz_dictgets(dict, "BBox"); + form->bbox = pdf_torect(obj); + + pdf_logrsrc("bbox [%g %g %g %g]\n", + form->bbox.x0, form->bbox.y0, + form->bbox.x1, form->bbox.y1); + + obj = fz_dictgets(dict, "Matrix"); + if (obj) + form->matrix = pdf_tomatrix(obj); + else + form->matrix = fz_identity; + + pdf_logrsrc("matrix [%g %g %g %g %g %g]\n", + form->matrix.a, form->matrix.b, + form->matrix.c, form->matrix.d, + form->matrix.e, form->matrix.f); + + form->isolated = 0; + form->knockout = 0; + form->transparency = 0; + + obj = fz_dictgets(dict, "Group"); + if (obj) + { + fz_obj *attrs = obj; + + form->isolated = fz_tobool(fz_dictgets(attrs, "I")); + form->knockout = fz_tobool(fz_dictgets(attrs, "K")); + + obj = fz_dictgets(attrs, "S"); + if (fz_isname(obj) && !strcmp(fz_toname(obj), "Transparency")) + form->transparency = 1; + + obj = fz_dictgets(attrs, "CS"); + if (obj) + { + error = pdf_loadcolorspace(&form->colorspace, xref, obj); + if (error) + fz_catch(error, "cannot load xobject colorspace"); + pdf_logrsrc("colorspace %s\n", form->colorspace->name); + } + } + + pdf_logrsrc("isolated %d\n", form->isolated); + pdf_logrsrc("knockout %d\n", form->knockout); + pdf_logrsrc("transparency %d\n", form->transparency); + + form->resources = fz_dictgets(dict, "Resources"); + if (form->resources) + fz_keepobj(form->resources); + + error = pdf_loadstream(&form->contents, xref, fz_tonum(dict), fz_togen(dict)); + if (error) + { + pdf_removeitem(xref->store, pdf_dropxobject, dict); + pdf_dropxobject(form); + return fz_rethrow(error, "cannot load xobject content stream (%d %d R)", fz_tonum(dict), fz_togen(dict)); + } + + pdf_logrsrc("stream %d bytes\n", form->contents->len); + pdf_logrsrc("}\n"); + + *formp = form; + return fz_okay; +} + +pdf_xobject * +pdf_keepxobject(pdf_xobject *xobj) +{ + xobj->refs ++; + return xobj; +} + +void +pdf_dropxobject(pdf_xobject *xobj) +{ + if (xobj && --xobj->refs == 0) + { + if (xobj->colorspace) + fz_dropcolorspace(xobj->colorspace); + if (xobj->resources) + fz_dropobj(xobj->resources); + if (xobj->contents) + fz_dropbuffer(xobj->contents); + fz_free(xobj); + } +} diff --git a/pdf/pdf_xref.c b/pdf/pdf_xref.c new file mode 100644 index 00000000..2e98c97e --- /dev/null +++ b/pdf/pdf_xref.c @@ -0,0 +1,967 @@ +#include "fitz.h" +#include "mupdf.h" + +static inline int iswhite(int ch) +{ + return + ch == '\000' || ch == '\011' || ch == '\012' || + ch == '\014' || ch == '\015' || ch == '\040'; +} + +/* + * magic version tag and startxref + */ + +static fz_error +pdf_loadversion(pdf_xref *xref) +{ + char buf[20]; + + fz_seek(xref->file, 0, 0); + fz_readline(xref->file, buf, sizeof buf); + if (memcmp(buf, "%PDF-", 5) != 0) + return fz_throw("cannot recognize version marker"); + + xref->version = atof(buf + 5) * 10; + + pdf_logxref("version %d.%d\n", xref->version / 10, xref->version % 10); + + return fz_okay; +} + +static fz_error +pdf_readstartxref(pdf_xref *xref) +{ + unsigned char buf[1024]; + int t, n; + int i; + + fz_seek(xref->file, 0, 2); + + xref->filesize = fz_tell(xref->file); + + t = MAX(0, xref->filesize - (int)sizeof buf); + fz_seek(xref->file, t, 0); + + n = fz_read(xref->file, buf, sizeof buf); + if (n < 0) + return fz_rethrow(n, "cannot read from file"); + + for (i = n - 9; i >= 0; i--) + { + if (memcmp(buf + i, "startxref", 9) == 0) + { + i += 9; + while (iswhite(buf[i]) && i < n) + i ++; + xref->startxref = atoi((char*)(buf + i)); + pdf_logxref("startxref %d\n", xref->startxref); + return fz_okay; + } + } + + return fz_throw("cannot find startxref"); +} + +/* + * trailer dictionary + */ + +static fz_error +pdf_readoldtrailer(pdf_xref *xref, char *buf, int cap) +{ + fz_error error; + int len; + char *s; + int n; + int t; + int tok; + int c; + + pdf_logxref("load old xref format trailer\n"); + + fz_readline(xref->file, buf, cap); + if (strncmp(buf, "xref", 4) != 0) + return fz_throw("cannot find xref marker"); + + while (1) + { + c = fz_peekbyte(xref->file); + if (!(c >= '0' && c <= '9')) + break; + + fz_readline(xref->file, buf, cap); + s = buf; + fz_strsep(&s, " "); /* ignore ofs */ + if (!s) + return fz_throw("invalid range marker in xref"); + len = atoi(fz_strsep(&s, " ")); + + /* broken pdfs where the section is not on a separate line */ + if (s && *s != '\0') + fz_seek(xref->file, -(2 + (int)strlen(s)), 1); + + t = fz_tell(xref->file); + if (t < 0) + return fz_throw("cannot tell in file"); + + fz_seek(xref->file, t + 20 * len, 0); + } + + error = pdf_lex(&tok, xref->file, buf, cap, &n); + if (error) + return fz_rethrow(error, "cannot parse trailer"); + if (tok != PDF_TTRAILER) + return fz_throw("expected trailer marker"); + + error = pdf_lex(&tok, xref->file, buf, cap, &n); + if (error) + return fz_rethrow(error, "cannot parse trailer"); + if (tok != PDF_TODICT) + return fz_throw("expected trailer dictionary"); + + error = pdf_parsedict(&xref->trailer, xref, xref->file, buf, cap); + if (error) + return fz_rethrow(error, "cannot parse trailer"); + return fz_okay; +} + +static fz_error +pdf_readnewtrailer(pdf_xref *xref, char *buf, int cap) +{ + fz_error error; + + pdf_logxref("load new xref format trailer\n"); + + error = pdf_parseindobj(&xref->trailer, xref, xref->file, buf, cap, nil, nil, nil); + if (error) + return fz_rethrow(error, "cannot parse trailer (compressed)"); + return fz_okay; +} + +static fz_error +pdf_readtrailer(pdf_xref *xref, char *buf, int cap) +{ + fz_error error; + int c; + + fz_seek(xref->file, xref->startxref, 0); + + while (iswhite(fz_peekbyte(xref->file))) + fz_readbyte(xref->file); + + c = fz_peekbyte(xref->file); + if (c == 'x') + { + error = pdf_readoldtrailer(xref, buf, cap); + if (error) + return fz_rethrow(error, "cannot read trailer"); + } + else if (c >= '0' && c <= '9') + { + error = pdf_readnewtrailer(xref, buf, cap); + if (error) + return fz_rethrow(error, "cannot read trailer"); + } + else + { + return fz_throw("cannot recognize xref format: '%c'", c); + } + + return fz_okay; +} + +/* + * xref tables + */ + +void +pdf_resizexref(pdf_xref *xref, int newlen) +{ + int i; + + xref->table = fz_realloc(xref->table, newlen, sizeof(pdf_xrefentry)); + for (i = xref->len; i < newlen; i++) + { + xref->table[i].type = 0; + xref->table[i].ofs = 0; + xref->table[i].gen = 0; + xref->table[i].stmofs = 0; + xref->table[i].obj = nil; + } + xref->len = newlen; +} + +static fz_error +pdf_readoldxref(fz_obj **trailerp, pdf_xref *xref, char *buf, int cap) +{ + fz_error error; + int ofs, len; + char *s; + int n; + int tok; + int i; + int c; + + pdf_logxref("load old xref format\n"); + + fz_readline(xref->file, buf, cap); + if (strncmp(buf, "xref", 4) != 0) + return fz_throw("cannot find xref marker"); + + while (1) + { + c = fz_peekbyte(xref->file); + if (!(c >= '0' && c <= '9')) + break; + + fz_readline(xref->file, buf, cap); + s = buf; + ofs = atoi(fz_strsep(&s, " ")); + len = atoi(fz_strsep(&s, " ")); + + /* broken pdfs where the section is not on a separate line */ + if (s && *s != '\0') + { + fz_warn("broken xref section. proceeding anyway."); + fz_seek(xref->file, -(2 + (int)strlen(s)), 1); + } + + /* broken pdfs where size in trailer undershoots entries in xref sections */ + if (ofs + len > xref->len) + { + fz_warn("broken xref section, proceeding anyway."); + pdf_resizexref(xref, ofs + len); + } + + for (i = ofs; i < ofs + len; i++) + { + n = fz_read(xref->file, (unsigned char *) buf, 20); + if (n < 0) + return fz_rethrow(n, "cannot read xref table"); + if (!xref->table[i].type) + { + s = buf; + + /* broken pdfs where line start with white space */ + while (*s != '\0' && iswhite(*s)) + s++; + + xref->table[i].ofs = atoi(s); + xref->table[i].gen = atoi(s + 11); + xref->table[i].type = s[17]; + if (s[17] != 'f' && s[17] != 'n' && s[17] != 'o') + return fz_throw("unexpected xref type: %#x (%d %d R)", s[17], i, xref->table[i].gen); + } + } + } + + error = pdf_lex(&tok, xref->file, buf, cap, &n); + if (error) + return fz_rethrow(error, "cannot parse trailer"); + if (tok != PDF_TTRAILER) + return fz_throw("expected trailer marker"); + + error = pdf_lex(&tok, xref->file, buf, cap, &n); + if (error) + return fz_rethrow(error, "cannot parse trailer"); + if (tok != PDF_TODICT) + return fz_throw("expected trailer dictionary"); + + error = pdf_parsedict(trailerp, xref, xref->file, buf, cap); + if (error) + return fz_rethrow(error, "cannot parse trailer"); + return fz_okay; +} + +static fz_error +pdf_readnewxrefsection(pdf_xref *xref, fz_stream *stm, int i0, int i1, int w0, int w1, int w2) +{ + int i, n; + + if (i0 < 0 || i0 + i1 > xref->len) + return fz_throw("xref stream has too many entries"); + + for (i = i0; i < i0 + i1; i++) + { + int a = 0; + int b = 0; + int c = 0; + + if (fz_iseof(stm)) + return fz_throw("truncated xref stream"); + + for (n = 0; n < w0; n++) + a = (a << 8) + fz_readbyte(stm); + for (n = 0; n < w1; n++) + b = (b << 8) + fz_readbyte(stm); + for (n = 0; n < w2; n++) + c = (c << 8) + fz_readbyte(stm); + + if (!xref->table[i].type) + { + int t = w0 ? a : 1; + xref->table[i].type = t == 0 ? 'f' : t == 1 ? 'n' : t == 2 ? 'o' : 0; + xref->table[i].ofs = w1 ? b : 0; + xref->table[i].gen = w2 ? c : 0; + } + } + + return fz_okay; +} + +static fz_error +pdf_readnewxref(fz_obj **trailerp, pdf_xref *xref, char *buf, int cap) +{ + fz_error error; + fz_stream *stm; + fz_obj *trailer; + fz_obj *index; + fz_obj *obj; + int num, gen, stmofs; + int size, w0, w1, w2; + int t; + + pdf_logxref("load new xref format\n"); + + error = pdf_parseindobj(&trailer, xref, xref->file, buf, cap, &num, &gen, &stmofs); + if (error) + return fz_rethrow(error, "cannot parse compressed xref stream object"); + + obj = fz_dictgets(trailer, "Size"); + if (!obj) + { + fz_dropobj(trailer); + return fz_throw("xref stream missing Size entry (%d %d R)", num, gen); + } + size = fz_toint(obj); + + if (size > xref->len) + { + pdf_resizexref(xref, size); + } + + if (num < 0 || num >= xref->len) + { + fz_dropobj(trailer); + return fz_throw("object id (%d %d R) out of range (0..%d)", num, gen, xref->len - 1); + } + + pdf_logxref("\tnum=%d gen=%d size=%d\n", num, gen, size); + + obj = fz_dictgets(trailer, "W"); + if (!obj) { + fz_dropobj(trailer); + return fz_throw("xref stream missing W entry (%d %d R)", num, gen); + } + w0 = fz_toint(fz_arrayget(obj, 0)); + w1 = fz_toint(fz_arrayget(obj, 1)); + w2 = fz_toint(fz_arrayget(obj, 2)); + + index = fz_dictgets(trailer, "Index"); + + error = pdf_openstreamat(&stm, xref, num, gen, trailer, stmofs); + if (error) + { + fz_dropobj(trailer); + return fz_rethrow(error, "cannot open compressed xref stream (%d %d R)", num, gen); + } + + if (!index) + { + error = pdf_readnewxrefsection(xref, stm, 0, size, w0, w1, w2); + if (error) + { + fz_close(stm); + fz_dropobj(trailer); + return fz_rethrow(error, "cannot read xref stream (%d %d R)", num, gen); + } + } + else + { + for (t = 0; t < fz_arraylen(index); t += 2) + { + int i0 = fz_toint(fz_arrayget(index, t + 0)); + int i1 = fz_toint(fz_arrayget(index, t + 1)); + error = pdf_readnewxrefsection(xref, stm, i0, i1, w0, w1, w2); + if (error) + { + fz_close(stm); + fz_dropobj(trailer); + return fz_rethrow(error, "cannot read xref stream section (%d %d R)", num, gen); + } + } + } + + fz_close(stm); + + *trailerp = trailer; + + return fz_okay; +} + +static fz_error +pdf_readxref(fz_obj **trailerp, pdf_xref *xref, int ofs, char *buf, int cap) +{ + fz_error error; + int c; + + fz_seek(xref->file, ofs, 0); + + while (iswhite(fz_peekbyte(xref->file))) + fz_readbyte(xref->file); + + c = fz_peekbyte(xref->file); + if (c == 'x') + { + error = pdf_readoldxref(trailerp, xref, buf, cap); + if (error) + return fz_rethrow(error, "cannot read xref (ofs=%d)", ofs); + } + else if (c >= '0' && c <= '9') + { + error = pdf_readnewxref(trailerp, xref, buf, cap); + if (error) + return fz_rethrow(error, "cannot read xref (ofs=%d)", ofs); + } + else + { + return fz_throw("cannot recognize xref format"); + } + + return fz_okay; +} + +static fz_error +pdf_readxrefsections(pdf_xref *xref, int ofs, char *buf, int cap) +{ + fz_error error; + fz_obj *trailer; + fz_obj *prev; + fz_obj *xrefstm; + + error = pdf_readxref(&trailer, xref, ofs, buf, cap); + if (error) + return fz_rethrow(error, "cannot read xref section"); + + /* FIXME: do we overwrite free entries properly? */ + xrefstm = fz_dictgets(trailer, "XRefStm"); + if (xrefstm) + { + pdf_logxref("load xrefstm\n"); + error = pdf_readxrefsections(xref, fz_toint(xrefstm), buf, cap); + if (error) + { + fz_dropobj(trailer); + return fz_rethrow(error, "cannot read /XRefStm xref section"); + } + } + + prev = fz_dictgets(trailer, "Prev"); + if (prev) + { + pdf_logxref("load prev at %#x\n", fz_toint(prev)); + error = pdf_readxrefsections(xref, fz_toint(prev), buf, cap); + if (error) + { + fz_dropobj(trailer); + return fz_rethrow(error, "cannot read /Prev xref section"); + } + } + + fz_dropobj(trailer); + return fz_okay; +} + +/* + * load xref tables from pdf + */ + +static fz_error +pdf_loadxref(pdf_xref *xref, char *buf, int bufsize) +{ + fz_error error; + fz_obj *size; + int i; + + error = pdf_loadversion(xref); + if (error) + return fz_rethrow(error, "cannot read version marker"); + + error = pdf_readstartxref(xref); + if (error) + return fz_rethrow(error, "cannot read startxref"); + + error = pdf_readtrailer(xref, buf, bufsize); + if (error) + return fz_rethrow(error, "cannot read trailer"); + + size = fz_dictgets(xref->trailer, "Size"); + if (!size) + return fz_throw("trailer missing Size entry"); + + pdf_logxref("\tsize %d at %#x\n", fz_toint(size), xref->startxref); + + pdf_resizexref(xref, fz_toint(size)); + + error = pdf_readxrefsections(xref, xref->startxref, buf, bufsize); + if (error) + return fz_rethrow(error, "cannot read xref"); + + /* broken pdfs where first object is not free */ + if (xref->table[0].type != 'f') + return fz_throw("first object in xref is not free"); + + /* broken pdfs where object offsets are out of range */ + for (i = 0; i < xref->len; i++) + if (xref->table[i].type == 'n') + if (xref->table[i].ofs <= 0 || xref->table[i].ofs >= xref->filesize) + return fz_throw("object offset out of range: %d (%d 0 R)", xref->table[i].ofs, i); + + return fz_okay; +} + +/* + * Initialize and load xref tables. + * If password is not null, try to decrypt. + */ + +fz_error +pdf_openxrefwithstream(pdf_xref **xrefp, fz_stream *file, char *password) +{ + pdf_xref *xref; + fz_error error; + fz_obj *encrypt, *id; + fz_obj *dict, *obj; + int i, repaired = 0; + + /* install pdf specific callback */ + fz_resolveindirect = pdf_resolveindirect; + + xref = fz_malloc(sizeof(pdf_xref)); + + memset(xref, 0, sizeof(pdf_xref)); + + pdf_logxref("openxref %p\n", xref); + + xref->file = fz_keepstream(file); + + error = pdf_loadxref(xref, xref->scratch, sizeof xref->scratch); + if (error) + { + fz_catch(error, "trying to repair"); + if (xref->table) + { + fz_free(xref->table); + xref->table = nil; + xref->len = 0; + } + if (xref->trailer) + { + fz_dropobj(xref->trailer); + xref->trailer = nil; + } + error = pdf_repairxref(xref, xref->scratch, sizeof xref->scratch); + if (error) + { + pdf_freexref(xref); + return fz_rethrow(error, "cannot repair document"); + } + repaired = 1; + } + + encrypt = fz_dictgets(xref->trailer, "Encrypt"); + id = fz_dictgets(xref->trailer, "ID"); + if (fz_isdict(encrypt)) + { + error = pdf_newcrypt(&xref->crypt, encrypt, id); + if (error) + { + pdf_freexref(xref); + return fz_rethrow(error, "cannot decrypt document"); + } + } + + if (pdf_needspassword(xref)) + { + /* Only care if we have a password */ + if (password) + { + int okay = pdf_authenticatepassword(xref, password); + if (!okay) + { + pdf_freexref(xref); + return fz_throw("invalid password"); + } + } + } + + if (repaired) + { + int hasroot, hasinfo; + + error = pdf_repairobjstms(xref); + if (error) + { + pdf_freexref(xref); + return fz_rethrow(error, "cannot repair document"); + } + + hasroot = fz_dictgets(xref->trailer, "Root") != nil; + hasinfo = fz_dictgets(xref->trailer, "Info") != nil; + + for (i = 1; i < xref->len; i++) + { + if (xref->table[i].type == 0 || xref->table[i].type == 'f') + continue; + + error = pdf_loadobject(&dict, xref, i, 0); + if (error) + { + fz_catch(error, "ignoring broken object (%d 0 R)", i); + continue; + } + + if (!hasroot) + { + obj = fz_dictgets(dict, "Type"); + if (fz_isname(obj) && !strcmp(fz_toname(obj), "Catalog")) + { + pdf_logxref("found catalog: (%d %d R)\n", i, 0); + obj = fz_newindirect(i, 0, xref); + fz_dictputs(xref->trailer, "Root", obj); + fz_dropobj(obj); + } + } + + if (!hasinfo) + { + if (fz_dictgets(dict, "Creator") || fz_dictgets(dict, "Producer")) + { + pdf_logxref("found info: (%d %d R)\n", i, 0); + obj = fz_newindirect(i, 0, xref); + fz_dictputs(xref->trailer, "Info", obj); + fz_dropobj(obj); + } + } + + fz_dropobj(dict); + } + } + + *xrefp = xref; + return fz_okay; +} + +void +pdf_freexref(pdf_xref *xref) +{ + int i; + + pdf_logxref("freexref %p\n", xref); + + if (xref->store) + pdf_freestore(xref->store); + + if (xref->table) + { + for (i = 0; i < xref->len; i++) + { + if (xref->table[i].obj) + { + fz_dropobj(xref->table[i].obj); + xref->table[i].obj = nil; + } + } + fz_free(xref->table); + } + + if (xref->pageobjs) + { + for (i = 0; i < xref->pagelen; i++) + fz_dropobj(xref->pageobjs[i]); + fz_free(xref->pageobjs); + } + + if (xref->pagerefs) + { + for (i = 0; i < xref->pagelen; i++) + fz_dropobj(xref->pagerefs[i]); + fz_free(xref->pagerefs); + } + + if (xref->file) + fz_close(xref->file); + if (xref->trailer) + fz_dropobj(xref->trailer); + if (xref->crypt) + pdf_freecrypt(xref->crypt); + + fz_free(xref); +} + +void +pdf_debugxref(pdf_xref *xref) +{ + int i; + printf("xref\n0 %d\n", xref->len); + for (i = 0; i < xref->len; i++) + { + printf("%05d: %010d %05d %c (refs=%d, stmofs=%d)\n", i, + xref->table[i].ofs, + xref->table[i].gen, + xref->table[i].type ? xref->table[i].type : '-', + xref->table[i].obj ? xref->table[i].obj->refs : 0, + xref->table[i].stmofs); + } +} + +/* + * compressed object streams + */ + +static fz_error +pdf_loadobjstm(pdf_xref *xref, int num, int gen, char *buf, int cap) +{ + fz_error error; + fz_stream *stm; + fz_obj *objstm; + int *numbuf; + int *ofsbuf; + + fz_obj *obj; + int first; + int count; + int i, n; + int tok; + + pdf_logxref("loadobjstm (%d %d R)\n", num, gen); + + error = pdf_loadobject(&objstm, xref, num, gen); + if (error) + return fz_rethrow(error, "cannot load object stream object (%d %d R)", num, gen); + + count = fz_toint(fz_dictgets(objstm, "N")); + first = fz_toint(fz_dictgets(objstm, "First")); + + pdf_logxref("\tcount %d\n", count); + + numbuf = fz_calloc(count, sizeof(int)); + ofsbuf = fz_calloc(count, sizeof(int)); + + error = pdf_openstream(&stm, xref, num, gen); + if (error) + { + error = fz_rethrow(error, "cannot open object stream (%d %d R)", num, gen); + goto cleanupbuf; + } + + for (i = 0; i < count; i++) + { + error = pdf_lex(&tok, stm, buf, cap, &n); + if (error || tok != PDF_TINT) + { + error = fz_rethrow(error, "corrupt object stream (%d %d R)", num, gen); + goto cleanupstm; + } + numbuf[i] = atoi(buf); + + error = pdf_lex(&tok, stm, buf, cap, &n); + if (error || tok != PDF_TINT) + { + error = fz_rethrow(error, "corrupt object stream (%d %d R)", num, gen); + goto cleanupstm; + } + ofsbuf[i] = atoi(buf); + } + + fz_seek(stm, first, 0); + + for (i = 0; i < count; i++) + { + fz_seek(stm, first + ofsbuf[i], 0); + + error = pdf_parsestmobj(&obj, xref, stm, buf, cap); + if (error) + { + error = fz_rethrow(error, "cannot parse object %d in stream (%d %d R)", i, num, gen); + goto cleanupstm; + } + + if (numbuf[i] < 1 || numbuf[i] >= xref->len) + { + fz_dropobj(obj); + error = fz_throw("object id (%d 0 R) out of range (0..%d)", numbuf[i], xref->len - 1); + goto cleanupstm; + } + + if (xref->table[numbuf[i]].type == 'o' && xref->table[numbuf[i]].ofs == num) + { + if (xref->table[numbuf[i]].obj) + fz_dropobj(xref->table[numbuf[i]].obj); + xref->table[numbuf[i]].obj = obj; + } + else + { + fz_dropobj(obj); + } + } + + fz_close(stm); + fz_free(ofsbuf); + fz_free(numbuf); + fz_dropobj(objstm); + return fz_okay; + +cleanupstm: + fz_close(stm); +cleanupbuf: + fz_free(ofsbuf); + fz_free(numbuf); + fz_dropobj(objstm); + return error; /* already rethrown */ +} + +/* + * object loading + */ + +fz_error +pdf_cacheobject(pdf_xref *xref, int num, int gen) +{ + fz_error error; + pdf_xrefentry *x; + int rnum, rgen; + + if (num < 0 || num >= xref->len) + return fz_throw("object out of range (%d %d R); xref size %d", num, gen, xref->len); + + x = &xref->table[num]; + + if (x->obj) + return fz_okay; + + if (x->type == 'f') + { + x->obj = fz_newnull(); + return fz_okay; + } + else if (x->type == 'n') + { + fz_seek(xref->file, x->ofs, 0); + + error = pdf_parseindobj(&x->obj, xref, xref->file, xref->scratch, sizeof xref->scratch, + &rnum, &rgen, &x->stmofs); + if (error) + return fz_rethrow(error, "cannot parse object (%d %d R)", num, gen); + + if (rnum != num) + return fz_throw("found object (%d %d R) instead of (%d %d R)", rnum, rgen, num, gen); + + if (xref->crypt) + pdf_cryptobj(xref->crypt, x->obj, num, gen); + } + else if (x->type == 'o') + { + if (!x->obj) + { + error = pdf_loadobjstm(xref, x->ofs, 0, xref->scratch, sizeof xref->scratch); + if (error) + return fz_rethrow(error, "cannot load object stream containing object (%d %d R)", num, gen); + if (!x->obj) + return fz_throw("object (%d %d R) was not found in its object stream", num, gen); + } + } + else + { + return fz_throw("assert: corrupt xref struct"); + } + + return fz_okay; +} + +fz_error +pdf_loadobject(fz_obj **objp, pdf_xref *xref, int num, int gen) +{ + fz_error error; + + error = pdf_cacheobject(xref, num, gen); + if (error) + return fz_rethrow(error, "cannot load object (%d %d R) into cache", num, gen); + + assert(xref->table[num].obj); + + *objp = fz_keepobj(xref->table[num].obj); + + return fz_okay; +} + +fz_obj * +pdf_resolveindirect(fz_obj *ref) +{ + if (fz_isindirect(ref)) + { + pdf_xref *xref = ref->u.r.xref; + int num = fz_tonum(ref); + int gen = fz_togen(ref); + if (xref) + { + fz_error error = pdf_cacheobject(xref, num, gen); + if (error) + { + fz_catch(error, "cannot load object (%d %d R) into cache", num, gen); + return ref; + } + if (xref->table[num].obj) + return xref->table[num].obj; + } + } + return ref; +} + +/* Replace numbered object -- for use by pdfclean and similar tools */ +void +pdf_updateobject(pdf_xref *xref, int num, int gen, fz_obj *newobj) +{ + pdf_xrefentry *x; + + if (num < 0 || num >= xref->len) + { + fz_warn("object out of range (%d %d R); xref size %d", num, gen, xref->len); + return; + } + + x = &xref->table[num]; + + if (x->obj) + fz_dropobj(x->obj); + + x->obj = fz_keepobj(newobj); + x->type = 'n'; + x->ofs = 0; +} + +/* + * Convenience function to open a file then call pdf_openxrefwithstream. + */ + +fz_error +pdf_openxref(pdf_xref **xrefp, char *filename, char *password) +{ + fz_error error; + pdf_xref *xref; + fz_stream *file; + + file = fz_openfile(filename); + if (!file) + return fz_throw("cannot open file '%s': %s", filename, strerror(errno)); + + error = pdf_openxrefwithstream(&xref, file, password); + if (error) + return fz_rethrow(error, "cannot load document '%s'", filename); + + fz_close(file); + + *xrefp = xref; + return fz_okay; +} |