From 533684eb51bb72df871a63eb17e589e2ec9bb547 Mon Sep 17 00:00:00 2001 From: Robin Watts Date: Fri, 6 Jul 2018 14:48:08 +0100 Subject: Add debug functionality to show lexed stream contents. If you define DUMP_LEXER_STREAM than the lexer dumps the input that it reads from the stream. --- source/pdf/pdf-lex.c | 60 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 39 insertions(+), 21 deletions(-) (limited to 'source') diff --git a/source/pdf/pdf-lex.c b/source/pdf/pdf-lex.c index d02789a8..bc7393c7 100644 --- a/source/pdf/pdf-lex.c +++ b/source/pdf/pdf-lex.c @@ -27,6 +27,24 @@ #define RANGE_0_7 \ '0':case'1':case'2':case'3':case'4':case'5':case'6':case'7' +/* #define DUMP_LEXER_STREAM */ +#ifdef DUMP_LEXER_STREAM +static inline int lex_byte(fz_context *ctx, fz_stream *stm) +{ + int c = fz_read_byte(ctx, stm); + + if (c == EOF) + fz_write_printf(ctx, fz_stdout(ctx), ""); + else if (c >= 32 && c < 128) + fz_write_printf(ctx, fz_stdout(ctx), "%c", c); + else + fz_write_printf(ctx, fz_stdout(ctx), "<%02x>", c); + return c; +} +#else +#define lex_byte(C,S) fz_read_byte(C,S) +#endif + static inline int iswhite(int ch) { return @@ -56,7 +74,7 @@ lex_white(fz_context *ctx, fz_stream *f) { int c; do { - c = fz_read_byte(ctx, f); + c = lex_byte(ctx, f); } while ((c <= 32) && (iswhite(c))); if (c != EOF) fz_unread_byte(ctx, f); @@ -67,7 +85,7 @@ lex_comment(fz_context *ctx, fz_stream *f) { int c; do { - c = fz_read_byte(ctx, f); + c = lex_byte(ctx, f); } while ((c != '\012') && (c != '\015') && (c != EOF)); } @@ -155,13 +173,13 @@ lex_number(fz_context *ctx, fz_stream *f, pdf_lexbuf *buf, int c) *s++ = c; - c = fz_read_byte(ctx, f); + c = lex_byte(ctx, f); /* skip extra '-' signs at start of number */ if (neg) { while (c == '-') - c = fz_read_byte(ctx, f); + c = lex_byte(ctx, f); } while (s < e) @@ -188,7 +206,7 @@ lex_number(fz_context *ctx, fz_stream *f, pdf_lexbuf *buf, int c) *s++ = c; break; } - c = fz_read_byte(ctx, f); + c = lex_byte(ctx, f); } end: @@ -231,7 +249,7 @@ lex_name(fz_context *ctx, fz_stream *f, pdf_lexbuf *lb) s += pdf_lexbuf_grow(ctx, lb); e = lb->scratch + fz_mini(127, lb->size); } - c = fz_read_byte(ctx, f); + c = lex_byte(ctx, f); switch (c) { case IS_WHITE: @@ -252,13 +270,13 @@ lex_name(fz_context *ctx, fz_stream *f, pdf_lexbuf *lb) case RANGE_0_9: if (i == 1 && c == '0' && hex[0] == 0) goto illegal; - hex[i] = fz_read_byte(ctx, f) - '0'; + hex[i] = lex_byte(ctx, f) - '0'; break; case RANGE_a_f: - hex[i] = fz_read_byte(ctx, f) - 'a' + 10; + hex[i] = lex_byte(ctx, f) - 'a' + 10; break; case RANGE_A_F: - hex[i] = fz_read_byte(ctx, f) - 'A' + 10; + hex[i] = lex_byte(ctx, f) - 'A' + 10; break; default: case EOF: @@ -299,7 +317,7 @@ lex_string(fz_context *ctx, fz_stream *f, pdf_lexbuf *lb) s += pdf_lexbuf_grow(ctx, lb); e = lb->scratch + lb->size; } - c = fz_read_byte(ctx, f); + c = lex_byte(ctx, f); switch (c) { case EOF: @@ -315,7 +333,7 @@ lex_string(fz_context *ctx, fz_stream *f, pdf_lexbuf *lb) *s++ = c; break; case '\\': - c = fz_read_byte(ctx, f); + c = lex_byte(ctx, f); switch (c) { case EOF: @@ -346,11 +364,11 @@ lex_string(fz_context *ctx, fz_stream *f, pdf_lexbuf *lb) break; case RANGE_0_7: oct = c - '0'; - c = fz_read_byte(ctx, f); + c = lex_byte(ctx, f); if (c >= '0' && c <= '7') { oct = oct * 8 + (c - '0'); - c = fz_read_byte(ctx, f); + c = lex_byte(ctx, f); if (c >= '0' && c <= '7') oct = oct * 8 + (c - '0'); else if (c != EOF) @@ -363,7 +381,7 @@ lex_string(fz_context *ctx, fz_stream *f, pdf_lexbuf *lb) case '\n': break; case '\r': - c = fz_read_byte(ctx, f); + c = lex_byte(ctx, f); if ((c != '\n') && (c != EOF)) fz_unread_byte(ctx, f); break; @@ -396,7 +414,7 @@ lex_hex_string(fz_context *ctx, fz_stream *f, pdf_lexbuf *lb) s += pdf_lexbuf_grow(ctx, lb); e = lb->scratch + lb->size; } - c = fz_read_byte(ctx, f); + c = lex_byte(ctx, f); switch (c) { case IS_WHITE: @@ -505,7 +523,7 @@ pdf_lex(fz_context *ctx, fz_stream *f, pdf_lexbuf *buf) { while (1) { - int c = fz_read_byte(ctx, f); + int c = lex_byte(ctx, f); switch (c) { case EOF: @@ -524,14 +542,14 @@ pdf_lex(fz_context *ctx, fz_stream *f, pdf_lexbuf *buf) case ')': return PDF_TOK_ERROR; case '<': - c = fz_read_byte(ctx, f); + c = lex_byte(ctx, f); if (c == '<') return PDF_TOK_OPEN_DICT; if (c != EOF) fz_unread_byte(ctx, f); return lex_hex_string(ctx, f, buf); case '>': - c = fz_read_byte(ctx, f); + c = lex_byte(ctx, f); if (c == '>') return PDF_TOK_CLOSE_DICT; if (c != EOF) @@ -560,7 +578,7 @@ pdf_lex_no_string(fz_context *ctx, fz_stream *f, pdf_lexbuf *buf) { while (1) { - int c = fz_read_byte(ctx, f); + int c = lex_byte(ctx, f); switch (c) { case EOF: @@ -579,14 +597,14 @@ pdf_lex_no_string(fz_context *ctx, fz_stream *f, pdf_lexbuf *buf) case ')': return PDF_TOK_ERROR; /* no strings allowed */ case '<': - c = fz_read_byte(ctx, f); + c = lex_byte(ctx, f); if (c == '<') return PDF_TOK_OPEN_DICT; if (c != EOF) fz_unread_byte(ctx, f); return PDF_TOK_ERROR; /* no strings allowed */ case '>': - c = fz_read_byte(ctx, f); + c = lex_byte(ctx, f); if (c == '>') return PDF_TOK_CLOSE_DICT; if (c != EOF) -- cgit v1.2.3