From 533684eb51bb72df871a63eb17e589e2ec9bb547 Mon Sep 17 00:00:00 2001
From: Robin Watts <robin.watts@artifex.com>
Date: Fri, 6 Jul 2018 14:48:08 +0100
Subject: Add debug functionality to show lexed stream contents.

If you define DUMP_LEXER_STREAM than the lexer dumps the input
that it reads from the stream.
---
 source/pdf/pdf-lex.c | 60 ++++++++++++++++++++++++++++++++++------------------
 1 file changed, 39 insertions(+), 21 deletions(-)

(limited to 'source')
diff --git a/source/pdf/pdf-lex.c b/source/pdf/pdf-lex.c
index d02789a8..bc7393c7 100644
--- a/source/pdf/pdf-lex.c
+++ b/source/pdf/pdf-lex.c
@@ -27,6 +27,24 @@
 #define RANGE_0_7 \
 	'0':case'1':case'2':case'3':case'4':case'5':case'6':case'7'
 
+/* #define DUMP_LEXER_STREAM */
+#ifdef DUMP_LEXER_STREAM
+static inline int lex_byte(fz_context *ctx, fz_stream *stm)
+{
+	int c = fz_read_byte(ctx, stm);
+
+	if (c == EOF)
+		fz_write_printf(ctx, fz_stdout(ctx), "<EOF>");
+	else if (c >= 32 && c < 128)
+		fz_write_printf(ctx, fz_stdout(ctx), "%c", c);
+	else
+		fz_write_printf(ctx, fz_stdout(ctx), "<%02x>", c);
+	return c;
+}
+#else
+#define lex_byte(C,S) fz_read_byte(C,S)
+#endif
+
 static inline int iswhite(int ch)
 {
 	return
@@ -56,7 +74,7 @@ lex_white(fz_context *ctx, fz_stream *f)
 {
 	int c;
 	do {
-		c = fz_read_byte(ctx, f);
+		c = lex_byte(ctx, f);
 	} while ((c <= 32) && (iswhite(c)));
 	if (c != EOF)
 		fz_unread_byte(ctx, f);
@@ -67,7 +85,7 @@ lex_comment(fz_context *ctx, fz_stream *f)
 {
 	int c;
 	do {
-		c = fz_read_byte(ctx, f);
+		c = lex_byte(ctx, f);
 	} while ((c != '\012') && (c != '\015') && (c != EOF));
 }
 
@@ -155,13 +173,13 @@ lex_number(fz_context *ctx, fz_stream *f, pdf_lexbuf *buf, int c)
 
 	*s++ = c;
 
-	c = fz_read_byte(ctx, f);
+	c = lex_byte(ctx, f);
 
 	/* skip extra '-' signs at start of number */
 	if (neg)
 	{
 		while (c == '-')
-			c = fz_read_byte(ctx, f);
+			c = lex_byte(ctx, f);
 	}
 
 	while (s < e)
@@ -188,7 +206,7 @@ lex_number(fz_context *ctx, fz_stream *f, pdf_lexbuf *buf, int c)
 			*s++ = c;
 			break;
 		}
-		c = fz_read_byte(ctx, f);
+		c = lex_byte(ctx, f);
 	}
 
 end:
@@ -231,7 +249,7 @@ lex_name(fz_context *ctx, fz_stream *f, pdf_lexbuf *lb)
 			s += pdf_lexbuf_grow(ctx, lb);
 			e = lb->scratch + fz_mini(127, lb->size);
 		}
-		c = fz_read_byte(ctx, f);
+		c = lex_byte(ctx, f);
 		switch (c)
 		{
 		case IS_WHITE:
@@ -252,13 +270,13 @@ lex_name(fz_context *ctx, fz_stream *f, pdf_lexbuf *lb)
 				case RANGE_0_9:
 					if (i == 1 && c == '0' && hex[0] == 0)
 						goto illegal;
-					hex[i] = fz_read_byte(ctx, f) - '0';
+					hex[i] = lex_byte(ctx, f) - '0';
 					break;
 				case RANGE_a_f:
-					hex[i] = fz_read_byte(ctx, f) - 'a' + 10;
+					hex[i] = lex_byte(ctx, f) - 'a' + 10;
 					break;
 				case RANGE_A_F:
-					hex[i] = fz_read_byte(ctx, f) - 'A' + 10;
+					hex[i] = lex_byte(ctx, f) - 'A' + 10;
 					break;
 				default:
 				case EOF:
@@ -299,7 +317,7 @@ lex_string(fz_context *ctx, fz_stream *f, pdf_lexbuf *lb)
 			s += pdf_lexbuf_grow(ctx, lb);
 			e = lb->scratch + lb->size;
 		}
-		c = fz_read_byte(ctx, f);
+		c = lex_byte(ctx, f);
 		switch (c)
 		{
 		case EOF:
@@ -315,7 +333,7 @@ lex_string(fz_context *ctx, fz_stream *f, pdf_lexbuf *lb)
 			*s++ = c;
 			break;
 		case '\\':
-			c = fz_read_byte(ctx, f);
+			c = lex_byte(ctx, f);
 			switch (c)
 			{
 			case EOF:
@@ -346,11 +364,11 @@ lex_string(fz_context *ctx, fz_stream *f, pdf_lexbuf *lb)
 				break;
 			case RANGE_0_7:
 				oct = c - '0';
-				c = fz_read_byte(ctx, f);
+				c = lex_byte(ctx, f);
 				if (c >= '0' && c <= '7')
 				{
 					oct = oct * 8 + (c - '0');
-					c = fz_read_byte(ctx, f);
+					c = lex_byte(ctx, f);
 					if (c >= '0' && c <= '7')
 						oct = oct * 8 + (c - '0');
 					else if (c != EOF)
@@ -363,7 +381,7 @@ lex_string(fz_context *ctx, fz_stream *f, pdf_lexbuf *lb)
 			case '\n':
 				break;
 			case '\r':
-				c = fz_read_byte(ctx, f);
+				c = lex_byte(ctx, f);
 				if ((c != '\n') && (c != EOF))
 					fz_unread_byte(ctx, f);
 				break;
@@ -396,7 +414,7 @@ lex_hex_string(fz_context *ctx, fz_stream *f, pdf_lexbuf *lb)
 			s += pdf_lexbuf_grow(ctx, lb);
 			e = lb->scratch + lb->size;
 		}
-		c = fz_read_byte(ctx, f);
+		c = lex_byte(ctx, f);
 		switch (c)
 		{
 		case IS_WHITE:
@@ -505,7 +523,7 @@ pdf_lex(fz_context *ctx, fz_stream *f, pdf_lexbuf *buf)
 {
 	while (1)
 	{
-		int c = fz_read_byte(ctx, f);
+		int c = lex_byte(ctx, f);
 		switch (c)
 		{
 		case EOF:
@@ -524,14 +542,14 @@ pdf_lex(fz_context *ctx, fz_stream *f, pdf_lexbuf *buf)
 		case ')':
 			return PDF_TOK_ERROR;
 		case '<':
-			c = fz_read_byte(ctx, f);
+			c = lex_byte(ctx, f);
 			if (c == '<')
 				return PDF_TOK_OPEN_DICT;
 			if (c != EOF)
 				fz_unread_byte(ctx, f);
 			return lex_hex_string(ctx, f, buf);
 		case '>':
-			c = fz_read_byte(ctx, f);
+			c = lex_byte(ctx, f);
 			if (c == '>')
 				return PDF_TOK_CLOSE_DICT;
 			if (c != EOF)
@@ -560,7 +578,7 @@ pdf_lex_no_string(fz_context *ctx, fz_stream *f, pdf_lexbuf *buf)
 {
 	while (1)
 	{
-		int c = fz_read_byte(ctx, f);
+		int c = lex_byte(ctx, f);
 		switch (c)
 		{
 		case EOF:
@@ -579,14 +597,14 @@ pdf_lex_no_string(fz_context *ctx, fz_stream *f, pdf_lexbuf *buf)
 		case ')':
 			return PDF_TOK_ERROR; /* no strings allowed */
 		case '<':
-			c = fz_read_byte(ctx, f);
+			c = lex_byte(ctx, f);
 			if (c == '<')
 				return PDF_TOK_OPEN_DICT;
 			if (c != EOF)
 				fz_unread_byte(ctx, f);
 			return PDF_TOK_ERROR; /* no strings allowed */
 		case '>':
-			c = fz_read_byte(ctx, f);
+			c = lex_byte(ctx, f);
 			if (c == '>')
 				return PDF_TOK_CLOSE_DICT;
 			if (c != EOF)
-- 
cgit v1.2.3