summaryrefslogtreecommitdiff
path: root/source/fitz/filter-lzw.c
diff options
context:
space:
mode:
Diffstat (limited to 'source/fitz/filter-lzw.c')
-rw-r--r--source/fitz/filter-lzw.c224
1 files changed, 224 insertions, 0 deletions
diff --git a/source/fitz/filter-lzw.c b/source/fitz/filter-lzw.c
new file mode 100644
index 00000000..73909ca1
--- /dev/null
+++ b/source/fitz/filter-lzw.c
@@ -0,0 +1,224 @@
+#include "mupdf/fitz.h"
+
+/* TODO: error checking */
+
+enum
+{
+ MIN_BITS = 9,
+ MAX_BITS = 12,
+ NUM_CODES = (1 << MAX_BITS),
+ LZW_CLEAR = 256,
+ LZW_EOD = 257,
+ LZW_FIRST = 258,
+ MAX_LENGTH = 4097
+};
+
+typedef struct lzw_code_s lzw_code;
+
+struct lzw_code_s
+{
+ int prev; /* prev code (in string) */
+ unsigned short length; /* string len, including this token */
+ unsigned char value; /* data value */
+ unsigned char first_char; /* first token of string */
+};
+
+typedef struct fz_lzwd_s fz_lzwd;
+
+struct fz_lzwd_s
+{
+ fz_stream *chain;
+ int eod;
+
+ int early_change;
+
+ int code_bits; /* num bits/code */
+ int code; /* current code */
+ int old_code; /* previously recognized code */
+ int next_code; /* next free entry */
+
+ lzw_code table[NUM_CODES];
+
+ unsigned char bp[MAX_LENGTH];
+ unsigned char *rp, *wp;
+};
+
+static int
+read_lzwd(fz_stream *stm, unsigned char *buf, int len)
+{
+ fz_lzwd *lzw = stm->state;
+ lzw_code *table = lzw->table;
+ unsigned char *p = buf;
+ unsigned char *ep = buf + len;
+ unsigned char *s;
+ int codelen;
+
+ int code_bits = lzw->code_bits;
+ int code = lzw->code;
+ int old_code = lzw->old_code;
+ int next_code = lzw->next_code;
+
+ while (lzw->rp < lzw->wp && p < ep)
+ *p++ = *lzw->rp++;
+
+ while (p < ep)
+ {
+ if (lzw->eod)
+ return 0;
+
+ code = fz_read_bits(lzw->chain, code_bits);
+
+ if (fz_is_eof_bits(lzw->chain))
+ {
+ lzw->eod = 1;
+ break;
+ }
+
+ if (code == LZW_EOD)
+ {
+ lzw->eod = 1;
+ break;
+ }
+
+ if (next_code >= NUM_CODES && code != LZW_CLEAR)
+ {
+ fz_warn(stm->ctx, "missing clear code in lzw decode");
+ code = LZW_CLEAR;
+ }
+
+ if (code == LZW_CLEAR)
+ {
+ code_bits = MIN_BITS;
+ next_code = LZW_FIRST;
+ old_code = -1;
+ continue;
+ }
+
+ /* if stream starts without a clear code, old_code is undefined... */
+ if (old_code == -1)
+ {
+ old_code = code;
+ }
+ else if (code > next_code || next_code >= NUM_CODES)
+ {
+ fz_warn(stm->ctx, "out of range code encountered in lzw decode");
+ }
+ else
+ {
+ /* add new entry to the code table */
+ table[next_code].prev = old_code;
+ table[next_code].first_char = table[old_code].first_char;
+ table[next_code].length = table[old_code].length + 1;
+ if (code < next_code)
+ table[next_code].value = table[code].first_char;
+ else if (code == next_code)
+ table[next_code].value = table[next_code].first_char;
+ else
+ fz_warn(stm->ctx, "out of range code encountered in lzw decode");
+
+ next_code ++;
+
+ if (next_code > (1 << code_bits) - lzw->early_change - 1)
+ {
+ code_bits ++;
+ if (code_bits > MAX_BITS)
+ code_bits = MAX_BITS;
+ }
+
+ old_code = code;
+ }
+
+ /* code maps to a string, copy to output (in reverse...) */
+ if (code > 255)
+ {
+ codelen = table[code].length;
+ lzw->rp = lzw->bp;
+ lzw->wp = lzw->bp + codelen;
+
+ assert(codelen < MAX_LENGTH);
+
+ s = lzw->wp;
+ do {
+ *(--s) = table[code].value;
+ code = table[code].prev;
+ } while (code >= 0 && s > lzw->bp);
+ }
+
+ /* ... or just a single character */
+ else
+ {
+ lzw->bp[0] = code;
+ lzw->rp = lzw->bp;
+ lzw->wp = lzw->bp + 1;
+ }
+
+ /* copy to output */
+ while (lzw->rp < lzw->wp && p < ep)
+ *p++ = *lzw->rp++;
+ }
+
+ lzw->code_bits = code_bits;
+ lzw->code = code;
+ lzw->old_code = old_code;
+ lzw->next_code = next_code;
+
+ return p - buf;
+}
+
+static void
+close_lzwd(fz_context *ctx, void *state_)
+{
+ fz_lzwd *lzw = (fz_lzwd *)state_;
+ fz_close(lzw->chain);
+ fz_free(ctx, lzw);
+}
+
+/* Default: early_change = 1 */
+fz_stream *
+fz_open_lzwd(fz_stream *chain, int early_change)
+{
+ fz_context *ctx = chain->ctx;
+ fz_lzwd *lzw = NULL;
+ int i;
+
+ fz_var(lzw);
+
+ fz_try(ctx)
+ {
+ lzw = fz_malloc_struct(ctx, fz_lzwd);
+ lzw->chain = chain;
+ lzw->eod = 0;
+ lzw->early_change = early_change;
+
+ for (i = 0; i < 256; i++)
+ {
+ lzw->table[i].value = i;
+ lzw->table[i].first_char = i;
+ lzw->table[i].length = 1;
+ lzw->table[i].prev = -1;
+ }
+
+ for (i = 256; i < NUM_CODES; i++)
+ {
+ lzw->table[i].value = 0;
+ lzw->table[i].first_char = 0;
+ lzw->table[i].length = 0;
+ lzw->table[i].prev = -1;
+ }
+
+ lzw->code_bits = MIN_BITS;
+ lzw->code = -1;
+ lzw->next_code = LZW_FIRST;
+ lzw->old_code = -1;
+ lzw->rp = lzw->bp;
+ lzw->wp = lzw->bp;
+ }
+ fz_catch(ctx)
+ {
+ fz_free(ctx, lzw);
+ fz_close(chain);
+ fz_rethrow(ctx);
+ }
+
+ return fz_new_stream(ctx, lzw, read_lzwd, close_lzwd);
+}