summaryrefslogtreecommitdiff
path: root/source/fitz/unzip.c
diff options
context:
space:
mode:
authorSebastian Rasmussen <sebras@gmail.com>2016-08-13 17:45:14 +0800
committerSebastian Rasmussen <sebras@gmail.com>2016-09-08 18:53:00 +0800
commit15cab201d3c98dc6580c8cf592d94ab226f96db5 (patch)
tree352861e9bf8b5ec8289810501faa0007c19b7f68 /source/fitz/unzip.c
parent793ae6a3c2d23743e66039b124f826b232a5a04e (diff)
downloadmupdf-15cab201d3c98dc6580c8cf592d94ab226f96db5.tar.xz
Make fz_archive a generic archive type.
Previously it was inherently tied to zip archives and directories. Now these are separated out into distinct subclasses. This prepares for support for further archive formats.
Diffstat (limited to 'source/fitz/unzip.c')
-rw-r--r--source/fitz/unzip.c357
1 files changed, 153 insertions, 204 deletions
diff --git a/source/fitz/unzip.c b/source/fitz/unzip.c
index 1d386209..3c5c0da4 100644
--- a/source/fitz/unzip.c
+++ b/source/fitz/unzip.c
@@ -17,70 +17,42 @@
#define ZIP_ENCRYPTED_FLAG 0x1
-struct zip_entry
+typedef struct zip_entry_s zip_entry;
+typedef struct fz_zip_archive_s fz_zip_archive;
+
+struct zip_entry_s
{
char *name;
int offset, csize, usize;
};
-struct fz_archive_s
+struct fz_zip_archive_s
{
- char *directory;
- fz_stream *file;
+ fz_archive super;
+
int count;
- struct zip_entry *table;
+ zip_entry *entries;
};
-static inline int zip_toupper(int c)
-{
- if (c >= 'a' && c <= 'z')
- return c - 'a' + 'A';
- return c;
-}
-
-static int zip_strcasecmp(const char *a, const char *b)
-{
- while (zip_toupper(*a) == zip_toupper(*b))
- {
- if (*a++ == 0)
- return 0;
- b++;
- }
- return zip_toupper(*a) - zip_toupper(*b);
-}
-
-static int case_compare_entries(const void *a_, const void *b_)
-{
- const struct zip_entry *a = a_;
- const struct zip_entry *b = b_;
- return zip_strcasecmp(a->name, b->name);
-}
-
-static struct zip_entry *lookup_zip_entry(fz_context *ctx, fz_archive *zip, const char *name)
+static void drop_zip_archive(fz_context *ctx, fz_archive *arch)
{
- int l = 0;
- int r = zip->count - 1;
- while (l <= r)
- {
- int m = (l + r) >> 1;
- int c = zip_strcasecmp(name, zip->table[m].name);
- if (c < 0)
- r = m - 1;
- else if (c > 0)
- l = m + 1;
- else
- return &zip->table[m];
- }
- return NULL;
+ fz_zip_archive *zip = (fz_zip_archive *) arch;
+ int i;
+ for (i = 0; i < zip->count; ++i)
+ fz_free(ctx, zip->entries[i].name);
+ fz_free(ctx, zip->entries);
}
-static void read_zip_dir_imp(fz_context *ctx, fz_archive *zip, int start_offset)
+static void read_zip_dir_imp(fz_context *ctx, fz_zip_archive *zip, int start_offset)
{
- fz_stream *file = zip->file;
+ fz_stream *file = zip->super.file;
int sig;
- int offset, count;
+ int i, count, offset, csize, usize;
int namesize, metasize, commentsize;
- int i;
+ char *name;
+ size_t n;
+
+ zip->count = 0;
fz_seek(ctx, file, start_offset, 0);
@@ -141,10 +113,6 @@ static void read_zip_dir_imp(fz_context *ctx, fz_archive *zip, int start_offset)
}
}
- zip->count = count;
- zip->table = fz_malloc_array(ctx, count, sizeof *zip->table);
- memset(zip->table, 0, count * sizeof *zip->table);
-
fz_seek(ctx, file, offset, 0);
for (i = 0; i < count; i++)
@@ -160,19 +128,21 @@ static void read_zip_dir_imp(fz_context *ctx, fz_archive *zip, int start_offset)
(void) fz_read_int16_le(ctx, file); /* last mod file time */
(void) fz_read_int16_le(ctx, file); /* last mod file date */
(void) fz_read_int32_le(ctx, file); /* crc-32 */
- zip->table[i].csize = fz_read_int32_le(ctx, file);
- zip->table[i].usize = fz_read_int32_le(ctx, file);
+ csize = fz_read_int32_le(ctx, file);
+ usize = fz_read_int32_le(ctx, file);
namesize = fz_read_int16_le(ctx, file);
metasize = fz_read_int16_le(ctx, file);
commentsize = fz_read_int16_le(ctx, file);
(void) fz_read_int16_le(ctx, file); /* disk number start */
(void) fz_read_int16_le(ctx, file); /* int file atts */
(void) fz_read_int32_le(ctx, file); /* ext file atts */
- zip->table[i].offset = fz_read_int32_le(ctx, file);
+ offset = fz_read_int32_le(ctx, file);
- zip->table[i].name = fz_malloc(ctx, namesize + 1);
- fz_read(ctx, file, (unsigned char*)zip->table[i].name, namesize);
- zip->table[i].name[namesize] = 0;
+ name = fz_malloc(ctx, namesize + 1);
+ n = fz_read(ctx, file, (unsigned char*)name, namesize);
+ if (n < namesize)
+ fz_throw(ctx, FZ_ERROR_GENERIC, "premature end of data in zip entry name");
+ name[namesize] = '\0';
while (metasize > 0)
{
@@ -181,19 +151,19 @@ static void read_zip_dir_imp(fz_context *ctx, fz_archive *zip, int start_offset)
if (type == ZIP64_EXTRA_FIELD_SIG)
{
int sizeleft = size;
- if (zip->table[i].usize == 0xFFFFFFFF && sizeleft >= 8)
+ if (usize == 0xFFFFFFFF && sizeleft >= 8)
{
- zip->table[i].usize = fz_read_int64_le(ctx, file);
+ usize = fz_read_int64_le(ctx, file);
sizeleft -= 8;
}
- if (zip->table[i].csize == 0xFFFFFFFF && sizeleft >= 8)
+ if (csize == 0xFFFFFFFF && sizeleft >= 8)
{
- zip->table[i].csize = fz_read_int64_le(ctx, file);
+ csize = fz_read_int64_le(ctx, file);
sizeleft -= 8;
}
- if (zip->table[i].offset == 0xFFFFFFFF && sizeleft >= 8)
+ if (offset == 0xFFFFFFFF && sizeleft >= 8)
{
- zip->table[i].offset = fz_read_int64_le(ctx, file);
+ offset = fz_read_int64_le(ctx, file);
sizeleft -= 8;
}
fz_seek(ctx, file, sizeleft - size, 1);
@@ -201,18 +171,55 @@ static void read_zip_dir_imp(fz_context *ctx, fz_archive *zip, int start_offset)
fz_seek(ctx, file, size, 1);
metasize -= 4 + size;
}
- if (zip->table[i].usize < 0 || zip->table[i].csize < 0 || zip->table[i].offset < 0)
+ if (usize < 0 || csize < 0 || offset < 0)
fz_throw(ctx, FZ_ERROR_GENERIC, "zip64 files larger than 2 GB are not supported");
fz_seek(ctx, file, commentsize, 1);
+
+ zip->entries = fz_resize_array(ctx, zip->entries, zip->count + 1, sizeof *zip->entries);
+
+ zip->entries[zip->count].name = name;
+ zip->entries[zip->count].offset = offset;
+ zip->entries[zip->count].csize = csize;
+ zip->entries[zip->count].usize = usize;
+
+ zip->count++;
}
+}
+
+static int read_zip_entry_header(fz_context *ctx, fz_zip_archive *zip, zip_entry *ent)
+{
+ fz_stream *file = zip->super.file;
+ int sig, general, method, namelength, extralength;
+
+ fz_seek(ctx, file, ent->offset, 0);
- qsort(zip->table, count, sizeof *zip->table, case_compare_entries);
+ sig = fz_read_int32_le(ctx, file);
+ if (sig != ZIP_LOCAL_FILE_SIG)
+ fz_throw(ctx, FZ_ERROR_GENERIC, "wrong zip local file signature (0x%x)", sig);
+
+ (void) fz_read_int16_le(ctx, file); /* version */
+ general = fz_read_int16_le(ctx, file); /* general */
+ if (general & ZIP_ENCRYPTED_FLAG)
+ fz_throw(ctx, FZ_ERROR_GENERIC, "zip content is encrypted");
+
+ method = fz_read_int16_le(ctx, file);
+ (void) fz_read_int16_le(ctx, file); /* file time */
+ (void) fz_read_int16_le(ctx, file); /* file date */
+ (void) fz_read_int32_le(ctx, file); /* crc-32 */
+ (void) fz_read_int32_le(ctx, file); /* csize */
+ (void) fz_read_int32_le(ctx, file); /* usize */
+ namelength = fz_read_int16_le(ctx, file);
+ extralength = fz_read_int16_le(ctx, file);
+
+ fz_seek(ctx, file, namelength + extralength, 1);
+
+ return method;
}
-static void read_zip_dir(fz_context *ctx, fz_archive *zip)
+static void ensure_zip_entries(fz_context *ctx, fz_zip_archive *zip)
{
- fz_stream *file = zip->file;
+ fz_stream *file = zip->super.file;
unsigned char buf[512];
size_t size, back, maxback;
size_t i, n;
@@ -230,53 +237,38 @@ static void read_zip_dir(fz_context *ctx, fz_archive *zip)
if (n < 4)
break;
for (i = n - 4; i > 0; i--)
- {
if (!memcmp(buf + i, "PK\5\6", 4))
{
read_zip_dir_imp(ctx, zip, (int)(size - back + i));
return;
}
- }
back += sizeof buf - 4;
}
fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find end of central directory");
}
-static int read_zip_entry_header(fz_context *ctx, fz_archive *zip, struct zip_entry *ent)
+static zip_entry *lookup_zip_entry(fz_context *ctx, fz_zip_archive *zip, const char *name)
{
- fz_stream *file = zip->file;
- int sig, general, method, namelength, extralength;
-
- fz_seek(ctx, file, ent->offset, 0);
-
- sig = fz_read_int32_le(ctx, file);
- if (sig != ZIP_LOCAL_FILE_SIG)
- fz_throw(ctx, FZ_ERROR_GENERIC, "wrong zip local file signature (0x%x)", sig);
-
- (void) fz_read_int16_le(ctx, file); /* version */
- general = fz_read_int16_le(ctx, file); /* general */
- if (general & ZIP_ENCRYPTED_FLAG)
- fz_throw(ctx, FZ_ERROR_GENERIC, "zip content is encrypted");
-
- method = fz_read_int16_le(ctx, file);
- (void) fz_read_int16_le(ctx, file); /* file time */
- (void) fz_read_int16_le(ctx, file); /* file date */
- (void) fz_read_int32_le(ctx, file); /* crc-32 */
- (void) fz_read_int32_le(ctx, file); /* csize */
- (void) fz_read_int32_le(ctx, file); /* usize */
- namelength = fz_read_int16_le(ctx, file);
- extralength = fz_read_int16_le(ctx, file);
-
- fz_seek(ctx, file, namelength + extralength, 1);
-
- return method;
+ int i;
+ for (i = 0; i < zip->count; i++)
+ if (!fz_strcasecmp(name, zip->entries[i].name))
+ return &zip->entries[i];
+ return NULL;
}
-static fz_stream *open_zip_entry(fz_context *ctx, fz_archive *zip, struct zip_entry *ent)
+static fz_stream *open_zip_entry(fz_context *ctx, fz_archive *arch, const char *name)
{
- fz_stream *file = zip->file;
- int method = read_zip_entry_header(ctx, zip, ent);
+ fz_zip_archive *zip = (fz_zip_archive *) arch;
+ fz_stream *file = zip->super.file;
+ int method;
+ zip_entry *ent;
+
+ ent = lookup_zip_entry(ctx, zip, name);
+ if (!ent)
+ fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find named zip archive entry");
+
+ method = read_zip_entry_header(ctx, zip, ent);
if (method == 0)
return fz_open_null(ctx, file, ent->usize, fz_tell(ctx, file));
if (method == 8)
@@ -284,29 +276,32 @@ static fz_stream *open_zip_entry(fz_context *ctx, fz_archive *zip, struct zip_en
fz_throw(ctx, FZ_ERROR_GENERIC, "unknown zip method: %d", method);
}
-static fz_buffer *read_zip_entry(fz_context *ctx, fz_archive *zip, struct zip_entry *ent)
+static fz_buffer *read_zip_entry(fz_context *ctx, fz_archive *arch, const char *name)
{
- fz_stream *file = zip->file;
+ fz_zip_archive *zip = (fz_zip_archive *) arch;
+ fz_stream *file = zip->super.file;
fz_buffer *ubuf;
unsigned char *cbuf;
int method;
z_stream z;
int code;
int len;
+ zip_entry *ent;
- method = read_zip_entry_header(ctx, zip, ent);
+ ent = lookup_zip_entry(ctx, zip, name);
+ if (!ent)
+ fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find named zip archive entry");
+ method = read_zip_entry_header(ctx, zip, ent);
ubuf = fz_new_buffer(ctx, ent->usize + 1); /* +1 because many callers will add a terminating zero */
- ubuf->len = ent->usize;
if (method == 0)
{
fz_try(ctx)
{
- len = fz_read(ctx, file, ubuf->data, ubuf->len);
- if (len < ubuf->len)
- fz_warn(ctx, "premature end of data in stored archive entry");
- ubuf->len = len;
+ ubuf->len = fz_read(ctx, file, ubuf->data, ent->usize);
+ if (ubuf->len < ent->usize)
+ fz_warn(ctx, "premature end of data in stored zip archive entry");
}
fz_catch(ctx)
{
@@ -315,8 +310,7 @@ static fz_buffer *read_zip_entry(fz_context *ctx, fz_archive *zip, struct zip_en
}
return ubuf;
}
-
- if (method == 8)
+ else if (method == 8)
{
cbuf = fz_malloc(ctx, ent->csize);
fz_try(ctx)
@@ -349,7 +343,7 @@ static fz_buffer *read_zip_entry(fz_context *ctx, fz_archive *zip, struct zip_en
}
len = ent->usize - z.avail_out;
- if (len < ubuf->len)
+ if (len < ent->usize)
fz_warn(ctx, "premature end of data in compressed archive entry");
ubuf->len = len;
}
@@ -369,133 +363,88 @@ static fz_buffer *read_zip_entry(fz_context *ctx, fz_archive *zip, struct zip_en
fz_throw(ctx, FZ_ERROR_GENERIC, "unknown zip method: %d", method);
}
-int
-fz_has_archive_entry(fz_context *ctx, fz_archive *zip, const char *name)
-{
- if (zip->directory)
- {
- char path[2048];
- fz_strlcpy(path, zip->directory, sizeof path);
- fz_strlcat(path, "/", sizeof path);
- fz_strlcat(path, name, sizeof path);
- return fz_file_exists(ctx, path);
- }
- else
- {
- return lookup_zip_entry(ctx, zip, name) != NULL;
- }
-}
-
-fz_stream *
-fz_open_archive_entry(fz_context *ctx, fz_archive *zip, const char *name)
-{
- if (zip->directory)
- {
- char path[2048];
- fz_strlcpy(path, zip->directory, sizeof path);
- fz_strlcat(path, "/", sizeof path);
- fz_strlcat(path, name, sizeof path);
- return fz_open_file(ctx, path);
- }
- else
- {
- struct zip_entry *ent = lookup_zip_entry(ctx, zip, name);
- if (!ent)
- fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find zip entry: '%s'", name);
- return open_zip_entry(ctx, zip, ent);
- }
-}
-
-fz_buffer *
-fz_read_archive_entry(fz_context *ctx, fz_archive *zip, const char *name)
-{
- if (zip->directory)
- {
- char path[2048];
- fz_strlcpy(path, zip->directory, sizeof path);
- fz_strlcat(path, "/", sizeof path);
- fz_strlcat(path, name, sizeof path);
- return fz_read_file(ctx, path);
- }
- else
- {
- struct zip_entry *ent = lookup_zip_entry(ctx, zip, name);
- if (!ent)
- fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find zip entry: '%s'", name);
- return read_zip_entry(ctx, zip, ent);
- }
-}
-
-int
-fz_count_archive_entries(fz_context *ctx, fz_archive *zip)
+static int has_zip_entry(fz_context *ctx, fz_archive *arch, const char *name)
{
- return zip->count;
+ fz_zip_archive *zip = (fz_zip_archive *) arch;
+ zip_entry *ent = lookup_zip_entry(ctx, zip, name);
+ return ent != NULL;
}
-const char *
-fz_list_archive_entry(fz_context *ctx, fz_archive *zip, int idx)
+static const char *list_zip_entry(fz_context *ctx, fz_archive *arch, int idx)
{
+ fz_zip_archive *zip = (fz_zip_archive *) arch;
if (idx < 0 || idx >= zip->count)
return NULL;
- return zip->table[idx].name;
+ return zip->entries[idx].name;
}
-void
-fz_drop_archive(fz_context *ctx, fz_archive *zip)
+static int count_zip_entries(fz_context *ctx, fz_archive *arch)
{
- int i;
- if (zip)
- {
- fz_free(ctx, zip->directory);
- fz_drop_stream(ctx, zip->file);
- for (i = 0; i < zip->count; ++i)
- fz_free(ctx, zip->table[i].name);
- fz_free(ctx, zip->table);
- fz_free(ctx, zip);
- }
+ fz_zip_archive *zip = (fz_zip_archive *) arch;
+ return zip->count;
}
-fz_archive *
-fz_open_directory(fz_context *ctx, const char *dirname)
+int
+fz_is_zip_archive(fz_context *ctx, fz_stream *file)
{
- fz_archive *zip = fz_malloc_struct(ctx, fz_archive);
- zip->directory = fz_strdup(ctx, dirname);
- return zip;
+ const unsigned char signature[4] = { 'P', 'K', 0x03, 0x04 };
+ unsigned char data[4];
+ size_t n;
+
+ fz_seek(ctx, file, 0, 0);
+ n = fz_read(ctx, file, data, nelem(data));
+ if (n != nelem(signature))
+ return 0;
+ if (memcmp(data, signature, nelem(signature)))
+ return 0;
+
+ return 1;
}
fz_archive *
-fz_open_archive_with_stream(fz_context *ctx, fz_stream *file)
+fz_open_zip_archive_with_stream(fz_context *ctx, fz_stream *file)
{
- fz_archive *zip;
+ fz_zip_archive *zip;
- zip = fz_malloc_struct(ctx, fz_archive);
- zip->file = fz_keep_stream(ctx, file);
- zip->count = 0;
- zip->table = NULL;
+ if (!fz_is_zip_archive(ctx, file))
+ fz_throw(ctx, FZ_ERROR_GENERIC, "cannot recognize zip archive");
+
+ zip = fz_new_archive(ctx, file, fz_zip_archive);
fz_try(ctx)
{
- read_zip_dir(ctx, zip);
+
+ zip->super.format = "zip";
+ zip->super.count_entries = count_zip_entries;
+ zip->super.list_entry = list_zip_entry;
+ zip->super.has_entry = has_zip_entry;
+ zip->super.read_entry = read_zip_entry;
+ zip->super.open_entry = open_zip_entry;
+ zip->super.drop_archive = drop_zip_archive;
+
+ ensure_zip_entries(ctx, zip);
}
fz_catch(ctx)
{
- fz_drop_archive(ctx, zip);
+ fz_drop_archive(ctx, &zip->super);
fz_rethrow(ctx);
}
- return zip;
+ return &zip->super;
}
fz_archive *
-fz_open_archive(fz_context *ctx, const char *filename)
+fz_open_zip_archive(fz_context *ctx, const char *filename)
{
+ fz_archive *zip = NULL;
fz_stream *file;
- fz_archive *zip;
file = fz_open_file(ctx, filename);
+ fz_var(zip);
+
fz_try(ctx)
- zip = fz_open_archive_with_stream(ctx, file);
+ zip = fz_open_zip_archive_with_stream(ctx, file);
fz_always(ctx)
fz_drop_stream(ctx, file);
fz_catch(ctx)