summaryrefslogtreecommitdiff
path: root/source/fitz/string.c
diff options
context:
space:
mode:
authorTor Andersson <tor.andersson@artifex.com>2013-06-19 15:29:44 +0200
committerTor Andersson <tor.andersson@artifex.com>2013-06-20 16:45:35 +0200
commit0a927854a10e1e6b9770a81e2e1d9f3093631757 (patch)
tree3d65d820d9fdba2d0d394d99c36290c851b78ca0 /source/fitz/string.c
parent1ae8f19179c5f0f8c6352b3c7855465325d5449a (diff)
downloadmupdf-0a927854a10e1e6b9770a81e2e1d9f3093631757.tar.xz
Rearrange source files.
Diffstat (limited to 'source/fitz/string.c')
-rw-r--r--source/fitz/string.c264
1 files changed, 264 insertions, 0 deletions
diff --git a/source/fitz/string.c b/source/fitz/string.c
new file mode 100644
index 00000000..b29cdbdc
--- /dev/null
+++ b/source/fitz/string.c
@@ -0,0 +1,264 @@
+#include "mupdf/fitz.h"
+
+char *
+fz_strsep(char **stringp, const char *delim)
+{
+ char *ret = *stringp;
+ if (!ret) return NULL;
+ if ((*stringp = strpbrk(*stringp, delim)))
+ *((*stringp)++) = '\0';
+ return ret;
+}
+
+int
+fz_strlcpy(char *dst, const char *src, int siz)
+{
+ register char *d = dst;
+ register const char *s = src;
+ register int n = siz;
+
+ /* Copy as many bytes as will fit */
+ if (n != 0 && --n != 0) {
+ do {
+ if ((*d++ = *s++) == 0)
+ break;
+ } while (--n != 0);
+ }
+
+ /* Not enough room in dst, add NUL and traverse rest of src */
+ if (n == 0) {
+ if (siz != 0)
+ *d = '\0'; /* NUL-terminate dst */
+ while (*s++)
+ ;
+ }
+
+ return(s - src - 1); /* count does not include NUL */
+}
+
+int
+fz_strlcat(char *dst, const char *src, int siz)
+{
+ register char *d = dst;
+ register const char *s = src;
+ register int n = siz;
+ int dlen;
+
+ /* Find the end of dst and adjust bytes left but don't go past end */
+ while (*d != '\0' && n-- != 0)
+ d++;
+ dlen = d - dst;
+ n = siz - dlen;
+
+ if (n == 0)
+ return dlen + strlen(s);
+ while (*s != '\0') {
+ if (n != 1) {
+ *d++ = *s;
+ n--;
+ }
+ s++;
+ }
+ *d = '\0';
+
+ return dlen + (s - src); /* count does not include NUL */
+}
+
+enum
+{
+ UTFmax = 4, /* maximum bytes per rune */
+ Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */
+ Runeself = 0x80, /* rune and UTF sequences are the same (<) */
+ Runeerror = 0xFFFD, /* decoding error in UTF */
+ Runemax = 0x10FFFF, /* maximum rune value */
+};
+
+enum
+{
+ Bit1 = 7,
+ Bitx = 6,
+ Bit2 = 5,
+ Bit3 = 4,
+ Bit4 = 3,
+ Bit5 = 2,
+
+ T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */
+ Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */
+ T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */
+ T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */
+ T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */
+ T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */
+
+ Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0111 1111 */
+ Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0111 1111 1111 */
+ Rune3 = (1<<(Bit3+2*Bitx))-1, /* 1111 1111 1111 1111 */
+ Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0001 1111 1111 1111 1111 1111 */
+
+ Maskx = (1<<Bitx)-1, /* 0011 1111 */
+ Testx = Maskx ^ 0xFF, /* 1100 0000 */
+
+ Bad = Runeerror,
+};
+
+int
+fz_chartorune(int *rune, const char *str)
+{
+ int c, c1, c2, c3;
+ long l;
+
+ /*
+ * one character sequence
+ * 00000-0007F => T1
+ */
+ c = *(const unsigned char*)str;
+ if(c < Tx) {
+ *rune = c;
+ return 1;
+ }
+
+ /*
+ * two character sequence
+ * 0080-07FF => T2 Tx
+ */
+ c1 = *(const unsigned char*)(str+1) ^ Tx;
+ if(c1 & Testx)
+ goto bad;
+ if(c < T3) {
+ if(c < T2)
+ goto bad;
+ l = ((c << Bitx) | c1) & Rune2;
+ if(l <= Rune1)
+ goto bad;
+ *rune = l;
+ return 2;
+ }
+
+ /*
+ * three character sequence
+ * 0800-FFFF => T3 Tx Tx
+ */
+ c2 = *(const unsigned char*)(str+2) ^ Tx;
+ if(c2 & Testx)
+ goto bad;
+ if(c < T4) {
+ l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
+ if(l <= Rune2)
+ goto bad;
+ *rune = l;
+ return 3;
+ }
+
+ /*
+ * four character sequence (21-bit value)
+ * 10000-1FFFFF => T4 Tx Tx Tx
+ */
+ c3 = *(const unsigned char*)(str+3) ^ Tx;
+ if (c3 & Testx)
+ goto bad;
+ if (c < T5) {
+ l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
+ if (l <= Rune3)
+ goto bad;
+ *rune = l;
+ return 4;
+ }
+ /*
+ * Support for 5-byte or longer UTF-8 would go here, but
+ * since we don't have that, we'll just fall through to bad.
+ */
+
+ /*
+ * bad decoding
+ */
+bad:
+ *rune = Bad;
+ return 1;
+}
+
+int
+fz_runetochar(char *str, int rune)
+{
+ /* Runes are signed, so convert to unsigned for range check. */
+ unsigned long c = (unsigned long)rune;
+
+ /*
+ * one character sequence
+ * 00000-0007F => 00-7F
+ */
+ if(c <= Rune1) {
+ str[0] = c;
+ return 1;
+ }
+
+ /*
+ * two character sequence
+ * 0080-07FF => T2 Tx
+ */
+ if(c <= Rune2) {
+ str[0] = T2 | (c >> 1*Bitx);
+ str[1] = Tx | (c & Maskx);
+ return 2;
+ }
+
+ /*
+ * If the Rune is out of range, convert it to the error rune.
+ * Do this test here because the error rune encodes to three bytes.
+ * Doing it earlier would duplicate work, since an out of range
+ * Rune wouldn't have fit in one or two bytes.
+ */
+ if (c > Runemax)
+ c = Runeerror;
+
+ /*
+ * three character sequence
+ * 0800-FFFF => T3 Tx Tx
+ */
+ if (c <= Rune3) {
+ str[0] = T3 | (c >> 2*Bitx);
+ str[1] = Tx | ((c >> 1*Bitx) & Maskx);
+ str[2] = Tx | (c & Maskx);
+ return 3;
+ }
+
+ /*
+ * four character sequence (21-bit value)
+ * 10000-1FFFFF => T4 Tx Tx Tx
+ */
+ str[0] = T4 | (c >> 3*Bitx);
+ str[1] = Tx | ((c >> 2*Bitx) & Maskx);
+ str[2] = Tx | ((c >> 1*Bitx) & Maskx);
+ str[3] = Tx | (c & Maskx);
+ return 4;
+}
+
+int
+fz_runelen(int c)
+{
+ char str[10];
+ return fz_runetochar(str, c);
+}
+
+float fz_atof(const char *s)
+{
+ double d;
+
+ /* The errno voodoo here checks for us reading numbers that are too
+ * big to fit into a double. The checks for FLT_MAX ensure that we
+ * don't read a number that's OK as a double and then become invalid
+ * as we convert to a float. */
+ errno = 0;
+ d = strtod(s, NULL);
+ if (errno == ERANGE || isnan(d)) {
+ /* Return 1.0, as it's a small known value that won't cause a divide by 0. */
+ return 1.0;
+ }
+ d = fz_clampd(d, -FLT_MAX, FLT_MAX);
+ return (float)d;
+}
+
+int fz_atoi(const char *s)
+{
+ if (s == NULL)
+ return 0;
+ return atoi(s);
+}