From 0cdc74747cd941b5879a4d2de6c7a71a9b99a23b Mon Sep 17 00:00:00 2001 From: Tor Andersson Date: Tue, 6 Sep 2011 12:41:44 +0200 Subject: Support empty and little-endian UTF-16 strings. Also don't read out of bounds if the string is cut short in the middle of the last two-byte character. --- pdf/pdf_parse.c | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/pdf/pdf_parse.c b/pdf/pdf_parse.c index d6497bc4..54ed97c1 100644 --- a/pdf/pdf_parse.c +++ b/pdf/pdf_parse.c @@ -40,11 +40,11 @@ pdf_to_utf8(fz_obj *src) int ucs; int i; - if (srclen > 2 && srcptr[0] == 254 && srcptr[1] == 255) + if (srclen >= 2 && srcptr[0] == 254 && srcptr[1] == 255) { for (i = 2; i < srclen; i += 2) { - ucs = (srcptr[i] << 8) | srcptr[i+1]; + ucs = srcptr[i] << 8 | srcptr[i+1]; dstlen += runelen(ucs); } @@ -52,11 +52,26 @@ pdf_to_utf8(fz_obj *src) for (i = 2; i < srclen; i += 2) { - ucs = (srcptr[i] << 8) | srcptr[i+1]; + ucs = srcptr[i] << 8 | srcptr[i+1]; dstptr += runetochar(dstptr, &ucs); } } + else if (srclen >= 2 && srcptr[0] == 255 && srcptr[1] == 254) + { + for (i = 2; i + 1 < srclen; i += 2) + { + ucs = srcptr[i] | srcptr[i+1] << 8; + dstlen += runelen(ucs); + } + dstptr = dst = fz_malloc(dstlen + 1); + + for (i = 2; i + 1 < srclen; i += 2) + { + ucs = srcptr[i] | srcptr[i+1] << 8; + dstptr += runetochar(dstptr, &ucs); + } + } else { for (i = 0; i < srclen; i++) @@ -84,13 +99,18 @@ pdf_to_ucs2(fz_obj *src) int srclen = fz_to_str_len(src); int i; - if (srclen > 2 && srcptr[0] == 254 && srcptr[1] == 255) + if (srclen >= 2 && srcptr[0] == 254 && srcptr[1] == 255) { dstptr = dst = fz_calloc((srclen - 2) / 2 + 1, sizeof(short)); - for (i = 2; i < srclen; i += 2) - *dstptr++ = (srcptr[i] << 8) | srcptr[i+1]; + for (i = 2; i + 1 < srclen; i += 2) + *dstptr++ = srcptr[i] << 8 | srcptr[i+1]; + } + else if (srclen >= 2 && srcptr[0] == 255 && srcptr[1] == 254) + { + dstptr = dst = fz_calloc((srclen - 2) / 2 + 1, sizeof(short)); + for (i = 2; i + 1 < srclen; i += 2) + *dstptr++ = srcptr[i] | srcptr[i+1] << 8; } - else { dstptr = dst = fz_calloc(srclen + 1, sizeof(short)); -- cgit v1.2.3