summaryrefslogtreecommitdiff
path: root/pdf/pdf_cmap_parse.c
diff options
context:
space:
mode:
authorRobin Watts <robin.watts@artifex.com>2012-02-23 15:07:04 +0000
committerRobin Watts <robin.watts@artifex.com>2012-02-25 08:14:40 -0800
commit9ef2a68e77842456ab30594a9a8d2c0535314715 (patch)
tree630c1b23b64ed15984b6c065635929f08bf613e8 /pdf/pdf_cmap_parse.c
parentd28129c2ff6a78c50877426f90167d63334ab18a (diff)
downloadmupdf-9ef2a68e77842456ab30594a9a8d2c0535314715.tar.xz
Revamp pdf lexing code
A huge amount (20%+ on some files) of our runtime is spent in fz_atof. A survey of results on the net suggests we will get much better speed by writing our own atof. Part of the job of doing this involves parsing the string to identify the component parts of the number - ludicrously, we are already doing this as part of the lexing process, so it would make sense to do the atoi/atof as part of this process. In order to do this, we need somewhere to store the lexed results; rather than add a float * and an int * to every single pdf_lex call, we generalise the calls to pass a pdf_lexbuf * pointer instead of separate buffer/max/string length pointers. This should help us overall.
Diffstat (limited to 'pdf/pdf_cmap_parse.c')
-rw-r--r--pdf/pdf_cmap_parse.c130
1 files changed, 65 insertions, 65 deletions
diff --git a/pdf/pdf_cmap_parse.c b/pdf/pdf_cmap_parse.c
index fb37c4a9..5c21393e 100644
--- a/pdf/pdf_cmap_parse.c
+++ b/pdf/pdf_cmap_parse.c
@@ -49,14 +49,14 @@ pdf_code_from_string(char *buf, int len)
}
static int
-pdf_lex_cmap(fz_stream *file, char *buf, int n, int *sl)
+pdf_lex_cmap(fz_stream *file, pdf_lexbuf *buf)
{
- int tok = pdf_lex(file, buf, n, sl);
+ int tok = pdf_lex(file, buf);
/* RJW: Lost debugging here: "cannot parse cmap token" */
if (tok == PDF_TOK_KEYWORD)
- tok = pdf_cmap_token_from_keyword(buf);
+ tok = pdf_cmap_token_from_keyword(buf->scratch);
return tok;
}
@@ -64,15 +64,15 @@ pdf_lex_cmap(fz_stream *file, char *buf, int n, int *sl)
static void
pdf_parse_cmap_name(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
{
- char buf[256];
+ pdf_lexbuf buf;
int tok;
- int len;
- tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+ buf.size = PDF_LEXBUF_SMALL;
+ tok = pdf_lex_cmap(file, &buf);
/* RJW: Lost debugging: "syntaxerror in cmap" */
if (tok == PDF_TOK_NAME)
- fz_strlcpy(cmap->cmap_name, buf, sizeof(cmap->cmap_name));
+ fz_strlcpy(cmap->cmap_name, buf.scratch, sizeof(cmap->cmap_name));
else
fz_warn(ctx, "expected name after CMapName in cmap");
}
@@ -80,15 +80,15 @@ pdf_parse_cmap_name(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
static void
pdf_parse_wmode(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
{
- char buf[256];
+ pdf_lexbuf buf;
int tok;
- int len;
- tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+ buf.size = PDF_LEXBUF_SMALL;
+ tok = pdf_lex_cmap(file, &buf);
/* RJW: Lost debugging: "syntaxerror in cmap" */
if (tok == PDF_TOK_INT)
- pdf_set_wmode(ctx, cmap, atoi(buf));
+ pdf_set_wmode(ctx, cmap, buf.i);
else
fz_warn(ctx, "expected integer after WMode in cmap");
}
@@ -96,14 +96,14 @@ pdf_parse_wmode(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
static void
pdf_parse_codespace_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
{
- char buf[256];
+ pdf_lexbuf buf;
int tok;
- int len;
int lo, hi;
+ buf.size = PDF_LEXBUF_SMALL;
while (1)
{
- tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+ tok = pdf_lex_cmap(file, &buf);
/* RJW: Lost debugging: "syntaxerror in cmap" */
if (tok == TOK_END_CODESPACE_RANGE)
@@ -111,13 +111,13 @@ pdf_parse_codespace_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
else if (tok == PDF_TOK_STRING)
{
- lo = pdf_code_from_string(buf, len);
- tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+ lo = pdf_code_from_string(buf.scratch, buf.len);
+ tok = pdf_lex_cmap(file, &buf);
/* RJW: Lost debugging: "syntaxerror in cmap" */
if (tok == PDF_TOK_STRING)
{
- hi = pdf_code_from_string(buf, len);
- pdf_add_codespace(ctx, cmap, lo, hi, len);
+ hi = pdf_code_from_string(buf.scratch, buf.len);
+ pdf_add_codespace(ctx, cmap, lo, hi, buf.len);
}
else break;
}
@@ -131,14 +131,14 @@ pdf_parse_codespace_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
static void
pdf_parse_cid_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
{
- char buf[256];
+ pdf_lexbuf buf;
int tok;
- int len;
int lo, hi, dst;
+ buf.size = PDF_LEXBUF_SMALL;
while (1)
{
- tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+ tok = pdf_lex_cmap(file, &buf);
/* RJW: Lost debugging: "syntaxerror in cmap" */
if (tok == TOK_END_CID_RANGE)
@@ -147,21 +147,21 @@ pdf_parse_cid_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
else if (tok != PDF_TOK_STRING)
fz_throw(ctx, "expected string or endcidrange");
- lo = pdf_code_from_string(buf, len);
+ lo = pdf_code_from_string(buf.scratch, buf.len);
- tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+ tok = pdf_lex_cmap(file, &buf);
/* RJW: Lost debugging: "syntaxerror in cmap" */
if (tok != PDF_TOK_STRING)
fz_throw(ctx, "expected string");
- hi = pdf_code_from_string(buf, len);
+ hi = pdf_code_from_string(buf.scratch, buf.len);
- tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+ tok = pdf_lex_cmap(file, &buf);
/* RJW: Lost debugging: "syntaxerror in cmap" */
if (tok != PDF_TOK_INT)
fz_throw(ctx, "expected integer");
- dst = atoi(buf);
+ dst = buf.i;
pdf_map_range_to_range(ctx, cmap, lo, hi, dst);
}
@@ -170,14 +170,14 @@ pdf_parse_cid_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
static void
pdf_parse_cid_char(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
{
- char buf[256];
+ pdf_lexbuf buf;
int tok;
- int len;
int src, dst;
+ buf.size = PDF_LEXBUF_SMALL;
while (1)
{
- tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+ tok = pdf_lex_cmap(file, &buf);
/* RJW: "syntaxerror in cmap" */
if (tok == TOK_END_CID_CHAR)
@@ -186,15 +186,15 @@ pdf_parse_cid_char(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
else if (tok != PDF_TOK_STRING)
fz_throw(ctx, "expected string or endcidchar");
- src = pdf_code_from_string(buf, len);
+ src = pdf_code_from_string(buf.scratch, buf.len);
- tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+ tok = pdf_lex_cmap(file, &buf);
/* RJW: "syntaxerror in cmap" */
if (tok != PDF_TOK_INT)
fz_throw(ctx, "expected integer");
- dst = atoi(buf);
+ dst = buf.i;
pdf_map_range_to_range(ctx, cmap, src, src, dst);
}
@@ -203,15 +203,15 @@ pdf_parse_cid_char(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
static void
pdf_parse_bf_range_array(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, int lo, int hi)
{
- char buf[256];
+ pdf_lexbuf buf;
int tok;
- int len;
int dst[256];
int i;
+ buf.size = PDF_LEXBUF_SMALL;
while (1)
{
- tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+ tok = pdf_lex_cmap(file, &buf);
/* RJW: "syntaxerror in cmap" */
if (tok == PDF_TOK_CLOSE_ARRAY)
@@ -221,12 +221,12 @@ pdf_parse_bf_range_array(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, int l
else if (tok != PDF_TOK_STRING)
fz_throw(ctx, "expected string or ]");
- if (len / 2)
+ if (buf.len / 2)
{
- for (i = 0; i < len / 2; i++)
- dst[i] = pdf_code_from_string(buf + i * 2, 2);
+ for (i = 0; i < buf.len / 2; i++)
+ dst[i] = pdf_code_from_string(&buf.scratch[i * 2], 2);
- pdf_map_one_to_many(ctx, cmap, lo, dst, len / 2);
+ pdf_map_one_to_many(ctx, cmap, lo, dst, buf.len / 2);
}
lo ++;
@@ -236,14 +236,14 @@ pdf_parse_bf_range_array(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, int l
static void
pdf_parse_bf_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
{
- char buf[256];
+ pdf_lexbuf buf;
int tok;
- int len;
int lo, hi, dst;
+ buf.size = PDF_LEXBUF_SMALL;
while (1)
{
- tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+ tok = pdf_lex_cmap(file, &buf);
/* RJW: "syntaxerror in cmap" */
if (tok == TOK_END_BF_RANGE)
@@ -252,23 +252,23 @@ pdf_parse_bf_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
else if (tok != PDF_TOK_STRING)
fz_throw(ctx, "expected string or endbfrange");
- lo = pdf_code_from_string(buf, len);
+ lo = pdf_code_from_string(buf.scratch, buf.len);
- tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+ tok = pdf_lex_cmap(file, &buf);
/* RJW: "syntaxerror in cmap" */
if (tok != PDF_TOK_STRING)
fz_throw(ctx, "expected string");
- hi = pdf_code_from_string(buf, len);
+ hi = pdf_code_from_string(buf.scratch, buf.len);
- tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+ tok = pdf_lex_cmap(file, &buf);
/* RJW: "syntaxerror in cmap" */
if (tok == PDF_TOK_STRING)
{
- if (len == 2)
+ if (buf.len == 2)
{
- dst = pdf_code_from_string(buf, len);
+ dst = pdf_code_from_string(buf.scratch, buf.len);
pdf_map_range_to_range(ctx, cmap, lo, hi, dst);
}
else
@@ -276,10 +276,10 @@ pdf_parse_bf_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
int dststr[256];
int i;
- if (len / 2)
+ if (buf.len / 2)
{
- for (i = 0; i < len / 2; i++)
- dststr[i] = pdf_code_from_string(buf + i * 2, 2);
+ for (i = 0; i < buf.len / 2; i++)
+ dststr[i] = pdf_code_from_string(&buf.scratch[i * 2], 2);
while (lo <= hi)
{
@@ -307,16 +307,16 @@ pdf_parse_bf_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
static void
pdf_parse_bf_char(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
{
- char buf[256];
+ pdf_lexbuf buf;
int tok;
- int len;
int dst[256];
int src;
int i;
+ buf.size = PDF_LEXBUF_SMALL;
while (1)
{
- tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+ tok = pdf_lex_cmap(file, &buf);
/* RJW: "syntaxerror in cmap" */
if (tok == TOK_END_BF_CHAR)
@@ -325,18 +325,18 @@ pdf_parse_bf_char(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
else if (tok != PDF_TOK_STRING)
fz_throw(ctx, "expected string or endbfchar");
- src = pdf_code_from_string(buf, len);
+ src = pdf_code_from_string(buf.scratch, buf.len);
- tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+ tok = pdf_lex_cmap(file, &buf);
/* RJW: "syntaxerror in cmap" */
/* Note: does not handle /dstName */
if (tok != PDF_TOK_STRING)
fz_throw(ctx, "expected string");
- if (len / 2)
+ if (buf.len / 2)
{
- for (i = 0; i < len / 2; i++)
- dst[i] = pdf_code_from_string(buf + i * 2, 2);
+ for (i = 0; i < buf.len / 2; i++)
+ dst[i] = pdf_code_from_string(&buf.scratch[i * 2], 2);
pdf_map_one_to_many(ctx, cmap, src, dst, i);
}
}
@@ -347,11 +347,11 @@ pdf_load_cmap(fz_context *ctx, fz_stream *file)
{
pdf_cmap *cmap;
char key[64];
- char buf[256];
+ pdf_lexbuf buf;
int tok;
- int len;
const char *where;
+ buf.size = PDF_LEXBUF_SMALL;
cmap = pdf_new_cmap(ctx);
strcpy(key, ".notdef");
@@ -363,25 +363,25 @@ pdf_load_cmap(fz_context *ctx, fz_stream *file)
while (1)
{
where = "";
- tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+ tok = pdf_lex_cmap(file, &buf);
if (tok == PDF_TOK_EOF || tok == TOK_END_CMAP)
break;
else if (tok == PDF_TOK_NAME)
{
- if (!strcmp(buf, "CMapName"))
+ if (!strcmp(buf.scratch, "CMapName"))
{
where = " after CMapName";
pdf_parse_cmap_name(ctx, cmap, file);
}
- else if (!strcmp(buf, "WMode"))
+ else if (!strcmp(buf.scratch, "WMode"))
{
where = " after WMode";
pdf_parse_wmode(ctx, cmap, file);
}
else
- fz_strlcpy(key, buf, sizeof key);
+ fz_strlcpy(key, buf.scratch, sizeof key);
}
else if (tok == TOK_USECMAP)