diff options
Diffstat (limited to 'source/tools')
-rw-r--r-- | source/tools/pdfcreate.c | 562 |
1 files changed, 113 insertions, 449 deletions
diff --git a/source/tools/pdfcreate.c b/source/tools/pdfcreate.c index 70d0aa42..bb9e9108 100644 --- a/source/tools/pdfcreate.c +++ b/source/tools/pdfcreate.c @@ -2,491 +2,156 @@ * PDF creation tool: Tool for creating pdf content. * * Simple test bed to work with adding content and creating PDFs - * */ #include "mupdf/pdf.h" -#define MAX_IMAGES 10 -#define MAX_FONTS 10 -#define MAX_REF_NAME 32 - -enum { RES_FONT, RES_XOBJECT }; - static void usage(void) { fprintf(stderr, - "usage: mutool create [-o output.pdf] [fonts][images] contents\n" - "\t-f\tfont label:font file\n" - "\t-i\timage label:image file\n" - "\tcontents file, defines page size, graphics, references fonts and images\n" + "usage: mutool create [-o output.pdf] [-adlsz] page.txt [page2.txt ...]\n" + "\t-o\tname of PDF file to create\n" + "\t-a\tascii hex encode binary streams\n" + "\t-d\tdecompress all streams\n" + "\t-l\tlinearize PDF\n" + "\t-s\tclean content streams\n" + "\t-z\tdeflate uncompressed streams\n" + "\tpage.txt file defines page size, fonts, images and contents\n" ); exit(1); } -/* Simple structures to hold and manage contents */ -typedef struct doc_content_s doc_content; -typedef struct resources_s resources; -typedef struct page_resource_xref_s page_resource_xref; +static fz_context *ctx = NULL; +static pdf_document *doc = NULL; +static pdf_write_options opts = { 0 }; -struct page_resource_xref_s +static void add_font_res(pdf_obj *fonts, char *name, char *path) { - char *res_name; - char res_ref[MAX_REF_NAME]; - pdf_obj *obj; - int type; -}; - -struct resources_s -{ - char *name; - int ref; -}; + unsigned char *data; + unsigned int size; + fz_font *font; + pdf_obj *res; + + data = fz_lookup_base14_font(ctx, path, &size); + if (data) + font = fz_new_font_from_memory(ctx, NULL, data, size, 0, 0); + else + font = fz_new_font_from_file(ctx, NULL, path, 0, 0); + + res = pdf_add_simple_font_res(ctx, doc, font); + pdf_dict_puts(ctx, fonts, name, res); + pdf_drop_obj(ctx, res); + + fz_drop_font(ctx, font); +} -struct doc_content_s +static void add_image_res(pdf_obj *images, char *name, char *path) { - fz_stream *stm; - int num_pages; - fz_point *page_sizes; - fz_off_t *content_offsets; - int *content_lengths; - resources *fonts; - int num_fonts; - resources *images; - int num_images; - page_resource_xref **ref_im_resources; - page_resource_xref **ref_font_resources; - int *num_page_im_res; - int *num_page_font_res; -}; + fz_image *image; + pdf_obj *res; -/* Look for the presence of image or font refererences in the content */ -static void check_for_reference(fz_context *ctx, resources *resource, int num_res, char *buffer, int res_type, doc_content *doc, int page_num) -{ - int i; - char * pch; - int length; - char name[MAX_REF_NAME]; - int res_num; + image = fz_new_image_from_file(ctx, path); - for (i = 0; i < num_res; i++) - { - pch = strchr(resource[i].name, ':'); - if (pch != NULL) - { - length = pch - resource[i].name; - if (length < sizeof(name)) - { - memcpy(name, resource[i].name, length); - name[length] = 0; - pch = strstr(buffer, name); - if (pch != NULL) - { - /* Resource reference is in the content for this document. Mark - * it as such so that we know to add it to the document */ - resource[i].ref = 1; + res = pdf_add_image_res(ctx, doc, image, 0); + pdf_dict_puts(ctx, images, name, res); + pdf_drop_obj(ctx, res); - /* Also note this page has a reference to this name so - * we can add it to the page resource list. Select from - * font or image */ - if (res_type == RES_FONT) - { - res_num = doc->num_page_font_res[page_num]; - doc->ref_font_resources[page_num][res_num].obj = NULL; /* Set later */ - doc->ref_font_resources[page_num][res_num].res_name = resource[i].name; - doc->ref_font_resources[page_num][res_num].type = res_type; - memcpy(doc->ref_font_resources[page_num][res_num].res_ref, name, sizeof(name)); - doc->num_page_font_res[page_num] += 1; - } - else - { - res_num = doc->num_page_im_res[page_num]; - doc->ref_im_resources[page_num][res_num].obj = NULL; /* Set later */ - doc->ref_im_resources[page_num][res_num].res_name = resource[i].name; - doc->ref_im_resources[page_num][res_num].type = res_type; - memcpy(doc->ref_im_resources[page_num][res_num].res_ref, name, sizeof(name)); - doc->num_page_im_res[page_num] += 1; - } - } - } - else - fz_throw(ctx, FZ_ERROR_GENERIC, "Image/Font indirect name too long"); - } - else - fz_throw(ctx, FZ_ERROR_GENERIC, "Internal parsing error"); - } + fz_drop_image(ctx, image); } -static void drop_ref_objs(fz_context *ctx, page_resource_xref *refs, int num_refs) +/* +The input is a raw content stream, with commands embedded in comments: + +%%MediaBox LLX LLY URX URY +%%Rotate Angle +%%Font Name Filename (or base 14 font name) +%%Image Name Filename +*/ +static void create_page(char *input) { - int i; - - for (i = 0; i < num_refs; i++) - pdf_drop_obj(ctx, refs[i].obj); -} + fz_rect mediabox = { 0, 0, 595, 842 }; + int rotate = 0; -static void free_contents(fz_context *ctx, doc_content *content) -{ - int i; + char line[4096]; + char *s, *p; + fz_stream *stm; - fz_free(ctx, content->content_offsets); - fz_free(ctx, content->content_lengths); - fz_free(ctx, content->page_sizes); - for (i = 0; i < content->num_pages; i++) - { - drop_ref_objs(ctx, content->ref_font_resources[i], content->num_page_font_res[i]); - fz_free(ctx, content->ref_font_resources[i]); - } - fz_free(ctx, content->ref_font_resources); - for (i = 0; i < content->num_pages; i++) - { - drop_ref_objs(ctx, content->ref_im_resources[i], content->num_page_im_res[i]); - fz_free(ctx, content->ref_im_resources[i]); - } - fz_free(ctx, content->num_page_im_res); - fz_free(ctx, content->num_page_font_res); - fz_free(ctx, content->ref_im_resources); - content->content_offsets = NULL; - content->page_sizes = NULL; - content->content_lengths = NULL; - fz_drop_stream(ctx, content->stm); -} + fz_buffer *contents; + pdf_obj *resources; + pdf_obj *fonts; + pdf_obj *images; + pdf_page *page; -/* This is a VERY simple format to give us something to play with - * in terms of defining pages and content. Here we parse the contents - * defining our page sizes and the command locations for each page. - * The format is as follows: - * 1) Comment lines are preceded by % - * 2) Number of pages is indicated at the begining with /Pages # - * 3) Each page is indicated by /Page # [X Y] (# is zero based) - * 4) The content is in the form of simple PDF content stream that - * may included various drawing commands and reference the - * image and font resources. - * */ -static void init_parse_contents(fz_context *ctx, char *content_fn, doc_content *content) -{ - fz_stream *stm; - char buf[1024]; - int page_count = 0; - fz_off_t pre_off; - int i; + resources = pdf_new_dict(ctx, doc, 2); + fonts = pdf_new_dict(ctx, doc, 10); + images = pdf_new_dict(ctx, doc, 10); + pdf_dict_put(ctx, resources, PDF_NAME_Font, fonts); + pdf_dict_put(ctx, resources, PDF_NAME_XObject, images); - fz_var(stm); + contents = fz_new_buffer(ctx, 1024); - fz_try(ctx) + stm = fz_open_file(ctx, input); + while (fz_read_line(ctx, stm, line, sizeof line)) { - stm = fz_open_file(ctx, content_fn); - while (1) + if (line[0] == '%' && line[1] == '%') { - pre_off = fz_tell(ctx, stm); - fz_read_line(ctx, stm, buf, sizeof buf); - if (buf[0] == '\0') + p = line; + s = fz_strsep(&p, " "); + if (!strcmp(s, "%%MediaBox")) { - if (content->num_pages > (page_count + 1)) - fz_throw(ctx, FZ_ERROR_GENERIC, "Missing defined pages"); - else - { - content->content_lengths[page_count - 1] = - fz_tell(ctx, stm) - content->content_offsets[page_count - 1]; - } - break; + s = fz_strsep(&p, " "); mediabox.x0 = fz_atoi(s); + s = fz_strsep(&p, " "); mediabox.y0 = fz_atoi(s); + s = fz_strsep(&p, " "); mediabox.x1 = fz_atoi(s); + s = fz_strsep(&p, " "); mediabox.y1 = fz_atoi(s); } - if (buf[0] != '%') + else if (!strcmp(s, "%%Rotate")) { - if (strncmp(buf, "/Pages", strlen("/Pages")) == 0) - { - if ((content->num_pages = atoi(&(buf[strlen("/Pages")]))) <= 0) - fz_throw(ctx, FZ_ERROR_GENERIC, "Page count invalid"); - content->content_offsets = fz_malloc_array(ctx, content->num_pages, sizeof(fz_off_t)); - content->content_lengths = fz_malloc_array(ctx, content->num_pages, sizeof(int)); - content->page_sizes = fz_malloc_array(ctx, content->num_pages, sizeof(fz_point)); - content->num_page_im_res = fz_calloc(ctx, content->num_pages, sizeof(int)); - content->num_page_font_res = fz_calloc(ctx, content->num_pages, sizeof(int)); - content->ref_font_resources = fz_malloc_array(ctx, content->num_pages, sizeof(page_resource_xref*)); - content->ref_im_resources = fz_malloc_array(ctx, content->num_pages, sizeof(page_resource_xref*)); - for (i = 0; i < content->num_pages; i++) - { - content->ref_font_resources[i] = fz_malloc_array(ctx, MAX_FONTS, sizeof(page_resource_xref)); - content->ref_im_resources[i] = fz_malloc_array(ctx, MAX_FONTS, sizeof(page_resource_xref)); - } - } - else if (strncmp(buf, "/Page", strlen("/Page")) == 0) - { - int page_num; - if (page_count > 0) - { - content->content_lengths[page_count - 1] = - (int)(pre_off - content->content_offsets[page_count - 1]); - } - if (sscanf(&(buf[strlen("/Page")]), "%d [%f %f]", &page_num, - &(content->page_sizes[page_count].x), &(content->page_sizes[page_count].y)) != 3) - fz_throw(ctx, FZ_ERROR_GENERIC, "Page size invalid"); - if (page_num < 0 || page_num >= content->num_pages) - fz_throw(ctx, FZ_ERROR_GENERIC, "Page value invalid"); - if (content->page_sizes[page_count].x < 0 || content->page_sizes[page_count].y < 0) - fz_throw(ctx, FZ_ERROR_GENERIC, "Page dimensions invalid"); - content->content_offsets[page_count] = fz_tell(ctx, stm); - page_count++; - } - else - { - check_for_reference(ctx, content->images, content->num_images, buf, RES_XOBJECT, content, page_count - 1); - check_for_reference(ctx, content->fonts, content->num_fonts, buf, RES_FONT, content, page_count - 1); - } + s = fz_strsep(&p, " "); rotate = fz_atoi(s); } - } - } - fz_catch(ctx) - { - free_contents(ctx, content); - fz_drop_stream(ctx, stm); - fz_rethrow(ctx); - } - content->stm = stm; -} - -/* Get the page contents */ -static int get_page_contents(fz_context *ctx, int page_num, doc_content *content, - unsigned char *buffer) -{ - int size = content->content_lengths[page_num]; - - if (buffer == NULL) - return size; - fz_seek(ctx, content->stm, content->content_offsets[page_num], SEEK_SET); - fz_read(ctx, content->stm, buffer, size); - return size; -} - -static void update_res(fz_context *ctx, int num_pages, int *num_res, page_resource_xref **page_resource, - char *res_name, pdf_obj *obj) -{ - int j, i; - - for (j = 0; j < num_pages; j++) - for (i = 0; i < num_res[j]; i++) - if (strcmp(res_name, page_resource[j][i].res_name) == 0) - page_resource[j][i].obj = obj; -} - -static pdf_obj* create_page_res_dict(fz_context *ctx, pdf_document *pdf, - page_resource_xref *ref_res, const char type[], int num_items) -{ - pdf_obj *dict = NULL; - int i; - - if (num_items <= 0) - return NULL; - - fz_var(dict); - - fz_try(ctx) - { - dict = pdf_new_dict(ctx, pdf, num_items); - for (i = 0; i < num_items; i++) - { - char text[32]; - snprintf(text, sizeof(text), "%s/%s", type, ref_res[i].res_ref); - pdf_dict_putp(ctx, dict, text, ref_res[i].obj); - } - } - fz_catch(ctx) - { - pdf_drop_obj(ctx, dict); - fz_rethrow(ctx); - } - return dict; -} - -static int create_pdf(fz_context *ctx, char *output, resources fonts[], int num_fonts, - resources images[], int num_images, char *contents) -{ - fz_rect bounds; - pdf_document *pdf = NULL; - pdf_page *newpage = NULL; - unsigned char *buffer = NULL; - fz_buffer *fz_buf = NULL; - fz_buffer *im_font_buff = NULL; - fz_image *image = NULL; - pdf_obj *font_dict = NULL; - pdf_obj *im_dict = NULL; - pdf_obj *im_res = NULL; - pdf_obj *font_res = NULL; - pdf_write_options opts = { 0 }; - doc_content content = { 0 }; - int k; - int length; - char *pch; - - fz_var(pdf); - fz_var(newpage); - fz_var(buffer); - fz_var(fz_buf); - fz_var(im_font_buff); - fz_var(image); - fz_var(font_dict); - fz_var(im_dict); - fz_var(im_res); - fz_var(font_res); - - fz_try(ctx) - { - pdf = pdf_create_document(ctx); - content.num_fonts = num_fonts; - content.num_images = num_images; - content.fonts = fonts; - content.images = images; - init_parse_contents(ctx, contents, &content); - - /* Add the resources, getting the reference numbers in the process. */ - for (k = 0; k < content.num_images; k++) - { - if (content.images[k].ref) + else if (!strcmp(s, "%%Font")) { - /* Get the fz_image */ - pch = strchr(content.images[k].name, ':'); - if (pch != NULL) - { - im_font_buff = fz_read_file(ctx, &(pch[1])); - image = fz_new_image_from_buffer(ctx, im_font_buff); - fz_drop_buffer(ctx, im_font_buff); - im_font_buff = NULL; - im_res = pdf_add_image_res(ctx, pdf, image, 0); - fz_drop_image(ctx, image); - image = NULL; - - /* Look through our image page resources and update the - * indirect reference number. */ - update_res(ctx, content.num_pages, content.num_page_im_res, - content.ref_im_resources, content.images[k].name, im_res); - } - else - fz_throw(ctx, FZ_ERROR_GENERIC, "Image indirect name too long"); + s = fz_strsep(&p, " "); + add_font_res(fonts, s, p); } - } - for (k = 0; k < content.num_fonts; k++) - { - if (content.fonts[k].ref) + else if (!strcmp(s, "%%Image")) { - pch = strchr(content.fonts[k].name, ':'); - if (pch != NULL) - { - fz_font *font; - im_font_buff = fz_read_file(ctx, &(pch[1])); - font = fz_new_font_from_buffer(ctx, NULL, im_font_buff, 0, 0); - font_res = pdf_add_simple_font_res(ctx, pdf, font); - fz_drop_buffer(ctx, im_font_buff); - fz_drop_font(ctx, font); - im_font_buff = NULL; - - /* Look through our font page resources and update the indirect - * reference number */ - update_res(ctx, content.num_pages, content.num_page_font_res, - content.ref_font_resources, content.fonts[k].name, font_res); - } - else - fz_throw(ctx, FZ_ERROR_GENERIC, "Font indirect name too long"); + s = fz_strsep(&p, " "); + add_image_res(images, s, p); } } - - /* Now the page contents */ - for (k = 0; k < content.num_pages; k++) + else { - bounds.x0 = 0; - bounds.y0 = 0; - bounds.x1 = content.page_sizes[k].x; - bounds.y1 = content.page_sizes[k].y; - - length = get_page_contents(ctx, k, &content, NULL); - if (length > 0) - { - buffer = fz_malloc(ctx, length); - length = get_page_contents(ctx, k, &content, buffer); - fz_buf = fz_new_buffer_from_data(ctx, buffer, length); - buffer = NULL; - newpage = pdf_create_page(ctx, pdf, bounds, 0, fz_buf); - /* Create the dicts for the page resources */ - font_dict = create_page_res_dict(ctx, pdf, content.ref_font_resources[k], - "Font", content.num_page_font_res[k]); - im_dict = create_page_res_dict(ctx, pdf, content.ref_im_resources[k], - "XObject", content.num_page_im_res[k]); - if (im_dict != NULL) - { - pdf_dict_puts(ctx, newpage->me, "Resources", im_dict); - pdf_drop_obj(ctx, im_dict); - } - if (font_dict != NULL) - { - pdf_dict_puts(ctx, newpage->me, "Resources", font_dict); - pdf_drop_obj(ctx, font_dict); - } - fz_drop_buffer(ctx, fz_buf); - fz_buf = NULL; - } - else - { - newpage = pdf_create_page(ctx, pdf, bounds, 0, NULL); - } - pdf_insert_page(ctx, pdf, newpage, INT_MAX); - pdf_drop_page(ctx, newpage); - newpage = NULL; + fz_write_buffer(ctx, contents, line, strlen(line)); + fz_write_buffer_byte(ctx, contents, '\n'); } - pdf_save_document(ctx, pdf, output, &opts); - } - fz_always(ctx) - { - pdf_drop_page(ctx, newpage); - pdf_close_document(ctx, pdf); - free_contents(ctx, &content); - fz_free(ctx, buffer); - fz_drop_buffer(ctx, fz_buf); - fz_drop_buffer(ctx, im_font_buff); - fz_drop_image(ctx, image); } - fz_catch(ctx) - { - fz_rethrow(ctx); - } - return 0; + fz_drop_stream(ctx, stm); + + page = pdf_create_page(ctx, doc, &mediabox, rotate, contents, resources); + pdf_insert_page(ctx, doc, page, INT_MAX); + pdf_drop_page(ctx, page); + + fz_drop_buffer(ctx, contents); + pdf_drop_obj(ctx, resources); + pdf_drop_obj(ctx, fonts); + pdf_drop_obj(ctx, images); } int pdfcreate_main(int argc, char **argv) { - char *outfile = "out.pdf"; - resources fonts[MAX_FONTS]; - resources images[MAX_IMAGES]; - char *contents = ""; - int nfonts = 0; - int nimages = 0; - int c; - int errors = 0; - fz_context *ctx; + char *output = "out.pdf"; + int i, c; - while ((c = fz_getopt(argc, argv, "f:i:o:")) != -1) + while ((c = fz_getopt(argc, argv, "adlszo:")) != -1) { switch (c) { - case 'f': - if (nfonts == MAX_FONTS) - { - fprintf(stderr, "max number of fonts exceeded\n"); - exit(1); - } - fonts[nfonts].name = fz_optarg; - fonts[nfonts].ref = 0; - nfonts++; - break; - case 'i': - if (nimages == MAX_IMAGES) - { - fprintf(stderr, "max number of images exceeded\n"); - exit(1); - } - images[nimages].name = fz_optarg; - images[nimages].ref = 0; - nimages++; - break; - case 'o': outfile = fz_optarg; break; + case 'o': output = fz_optarg; break; + case 'a': opts.do_ascii ++; break; + case 'd': opts.do_expand ^= PDF_EXPAND_ALL; break; + case 'l': opts.do_linear ++; break; + case 's': opts.do_clean ++; break; + case 'z': opts.do_deflate ++; break; default: usage(); break; } } @@ -494,8 +159,6 @@ int pdfcreate_main(int argc, char **argv) if (fz_optind == argc) usage(); - contents = argv[fz_optind++]; - ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED); if (!ctx) { @@ -503,15 +166,16 @@ int pdfcreate_main(int argc, char **argv) exit(1); } - fz_try(ctx) - { - create_pdf(ctx, outfile, fonts, nfonts, images, nimages, contents); - } - fz_catch(ctx) - { - errors++; - } - fz_drop_context(ctx); + doc = pdf_create_document(ctx); - return errors != 0; + for (i = fz_optind; i < argc; ++i) + create_page(argv[i]); + + pdf_save_document(ctx, doc, output, &opts); + + pdf_close_document(ctx, doc); + + fz_flush_warnings(ctx); + fz_drop_context(ctx); + return 0; } |