summaryrefslogtreecommitdiff
path: root/pdf/mupdf.h
diff options
context:
space:
mode:
authorRobin Watts <robin.watts@artifex.com>2012-02-23 15:07:04 +0000
committerRobin Watts <robin.watts@artifex.com>2012-02-25 08:14:40 -0800
commit9ef2a68e77842456ab30594a9a8d2c0535314715 (patch)
tree630c1b23b64ed15984b6c065635929f08bf613e8 /pdf/mupdf.h
parentd28129c2ff6a78c50877426f90167d63334ab18a (diff)
downloadmupdf-9ef2a68e77842456ab30594a9a8d2c0535314715.tar.xz
Revamp pdf lexing code
A huge amount (20%+ on some files) of our runtime is spent in fz_atof. A survey of results on the net suggests we will get much better speed by writing our own atof. Part of the job of doing this involves parsing the string to identify the component parts of the number - ludicrously, we are already doing this as part of the lexing process, so it would make sense to do the atoi/atof as part of this process. In order to do this, we need somewhere to store the lexed results; rather than add a float * and an int * to every single pdf_lex call, we generalise the calls to pass a pdf_lexbuf * pointer instead of separate buffer/max/string length pointers. This should help us overall.
Diffstat (limited to 'pdf/mupdf.h')
-rw-r--r--pdf/mupdf.h42
1 files changed, 35 insertions, 7 deletions
diff --git a/pdf/mupdf.h b/pdf/mupdf.h
index b233288f..15a96541 100644
--- a/pdf/mupdf.h
+++ b/pdf/mupdf.h
@@ -102,12 +102,40 @@ enum
PDF_NUM_TOKENS
};
-int pdf_lex(fz_stream *f, char *buf, int n, int *len);
+enum
+{
+ PDF_LEXBUF_SMALL = 256,
+ PDF_LEXBUF_LARGE = 65536
+};
+
+
+
+typedef struct pdf_lexbuf_s pdf_lexbuf;
+typedef struct pdf_lexbuf_large_s pdf_lexbuf_large;
+
+struct pdf_lexbuf_s
+{
+ int size;
+ int len;
+ int i;
+ float f;
+ char scratch[PDF_LEXBUF_SMALL];
+};
+
+struct pdf_lexbuf_large_s
+{
+ pdf_lexbuf base;
+ char scratch[PDF_LEXBUF_LARGE - PDF_LEXBUF_SMALL];
+};
+
+
+
+int pdf_lex(fz_stream *f, pdf_lexbuf *lexbuf);
-fz_obj *pdf_parse_array(pdf_document *doc, fz_stream *f, char *buf, int cap);
-fz_obj *pdf_parse_dict(pdf_document *doc, fz_stream *f, char *buf, int cap);
-fz_obj *pdf_parse_stm_obj(pdf_document *doc, fz_stream *f, char *buf, int cap);
-fz_obj *pdf_parse_ind_obj(pdf_document *doc, fz_stream *f, char *buf, int cap, int *num, int *gen, int *stm_ofs);
+fz_obj *pdf_parse_array(pdf_document *doc, fz_stream *f, pdf_lexbuf *buf);
+fz_obj *pdf_parse_dict(pdf_document *doc, fz_stream *f, pdf_lexbuf *buf);
+fz_obj *pdf_parse_stm_obj(pdf_document *doc, fz_stream *f, pdf_lexbuf *buf);
+fz_obj *pdf_parse_ind_obj(pdf_document *doc, fz_stream *f, pdf_lexbuf *buf, int *num, int *gen, int *stm_ofs);
fz_rect pdf_to_rect(fz_context *ctx, fz_obj *array);
fz_matrix pdf_to_matrix(fz_context *ctx, fz_obj *array);
@@ -170,7 +198,7 @@ struct pdf_document_s
fz_obj **page_objs;
fz_obj **page_refs;
- char scratch[65536];
+ pdf_lexbuf_large lexbuf;
};
fz_obj *pdf_resolve_indirect(fz_obj *ref);
@@ -194,7 +222,7 @@ pdf_document *pdf_open_document(fz_context *ctx, const char *filename);
void pdf_close_document(pdf_document *doc);
/* private */
-void pdf_repair_xref(pdf_document *doc, char *buf, int bufsize);
+void pdf_repair_xref(pdf_document *doc, pdf_lexbuf *buf);
void pdf_repair_obj_stms(pdf_document *doc);
void pdf_debug_xref(pdf_document *);
void pdf_resize_xref(pdf_document *doc, int newcap);