diff options
Diffstat (limited to 'source/tools')
-rw-r--r-- | source/tools/mudraw.c | 2 | ||||
-rw-r--r-- | source/tools/mutool.c | 2 | ||||
-rw-r--r-- | source/tools/pdfcreate.c | 516 |
3 files changed, 519 insertions, 1 deletions
diff --git a/source/tools/mudraw.c b/source/tools/mudraw.c index 47aab044..d63f936c 100644 --- a/source/tools/mudraw.c +++ b/source/tools/mudraw.c @@ -387,7 +387,7 @@ static void drawpage(fz_context *ctx, fz_document *doc, int pagenum) tbounds = bounds; fz_transform_rect(&tbounds, &ctm); - newpage = pdf_create_page(ctx, pdfout, bounds, 72, 0); + newpage = pdf_create_page(ctx, pdfout, bounds, NULL, 0); fz_try(ctx) { diff --git a/source/tools/mutool.c b/source/tools/mutool.c index 6b61dab4..c662ffef 100644 --- a/source/tools/mutool.c +++ b/source/tools/mutool.c @@ -15,6 +15,7 @@ int pdfinfo_main(int argc, char *argv[]); int pdfposter_main(int argc, char *argv[]); int pdfshow_main(int argc, char *argv[]); int pdfpages_main(int argc, char *argv[]); +int pdfcreate_main(int argc, char *argv[]); static struct { int (*func)(int argc, char *argv[]); @@ -28,6 +29,7 @@ static struct { { pdfpages_main, "pages", "show information about pdf pages" }, { pdfposter_main, "poster", "split large page into many tiles" }, { pdfshow_main, "show", "show internal pdf objects" }, + { pdfcreate_main, "create", "create pdf document" }, }; static int diff --git a/source/tools/pdfcreate.c b/source/tools/pdfcreate.c new file mode 100644 index 00000000..4e4a622e --- /dev/null +++ b/source/tools/pdfcreate.c @@ -0,0 +1,516 @@ +/* + * PDF creation tool: Tool for creating pdf content. + * + * Simple test bed to work with adding content and creating PDFs + * + */ + +#include "mupdf/pdf.h" + +#define MAX_IMAGES 10 +#define MAX_FONTS 10 +#define MAX_REF_NAME 32 + +enum { RES_FONT, RES_XOBJECT }; + +static void usage(void) +{ + fprintf(stderr, + "usage: mutool create [-o output.pdf] [fonts][images] contents\n" + "\t-f\tfont label:font file\n" + "\t-i\timage label:image file\n" + "\tcontents file, defines page size, graphics, references fonts and images\n" + ); + exit(1); +} + +/* Simple structures to hold and manage contents */ +typedef struct doc_content_s doc_content; +typedef struct resources_s resources; +typedef struct page_resource_xref_s page_resource_xref; + +struct page_resource_xref_s +{ + char *res_name; + char res_ref[MAX_REF_NAME]; + pdf_obj *obj; + int type; +}; + +struct resources_s +{ + char *name; + int ref; +}; + +struct doc_content_s +{ + fz_stream *stm; + int num_pages; + fz_point *page_sizes; + fz_off_t *content_offsets; + int *content_lengths; + resources *fonts; + int num_fonts; + resources *images; + int num_images; + page_resource_xref **ref_im_resources; + page_resource_xref **ref_font_resources; + int *num_page_im_res; + int *num_page_font_res; +}; + +/* Look for the presence of image or font refererences in the content */ +static void check_for_reference(fz_context *ctx, resources *resource, int num_res, char *buffer, int res_type, doc_content *doc, int page_num) +{ + int i; + char * pch; + int length; + char name[MAX_REF_NAME]; + int res_num; + + for (i = 0; i < num_res; i++) + { + pch = strchr(resource[i].name, ':'); + if (pch != NULL) + { + length = pch - resource[i].name; + if (length < sizeof(name)) + { + memcpy(name, resource[i].name, length); + name[length] = 0; + pch = strstr(buffer, name); + if (pch != NULL) + { + /* Resource reference is in the content for this document. Mark + * it as such so that we know to add it to the document */ + resource[i].ref = 1; + + /* Also note this page has a reference to this name so + * we can add it to the page resource list. Select from + * font or image */ + if (res_type == RES_FONT) + { + res_num = doc->num_page_font_res[page_num]; + doc->ref_font_resources[page_num][res_num].obj = NULL; /* Set later */ + doc->ref_font_resources[page_num][res_num].res_name = resource[i].name; + doc->ref_font_resources[page_num][res_num].type = res_type; + memcpy(doc->ref_font_resources[page_num][res_num].res_ref, name, sizeof(name)); + doc->num_page_font_res[page_num] += 1; + } + else + { + res_num = doc->num_page_im_res[page_num]; + doc->ref_im_resources[page_num][res_num].obj = NULL; /* Set later */ + doc->ref_im_resources[page_num][res_num].res_name = resource[i].name; + doc->ref_im_resources[page_num][res_num].type = res_type; + memcpy(doc->ref_im_resources[page_num][res_num].res_ref, name, sizeof(name)); + doc->num_page_im_res[page_num] += 1; + } + } + } + else + fz_throw(ctx, FZ_ERROR_GENERIC, "Image/Font indirect name too long"); + } + else + fz_throw(ctx, FZ_ERROR_GENERIC, "Internal parsing error"); + } +} + +static void drop_ref_objs(fz_context *ctx, page_resource_xref *refs, int num_refs) +{ + int i; + + for (i = 0; i < num_refs; i++) + pdf_drop_obj(ctx, refs[i].obj); +} + +static void free_contents(fz_context *ctx, doc_content *content) +{ + int i; + + fz_free(ctx, content->content_offsets); + fz_free(ctx, content->content_lengths); + fz_free(ctx, content->page_sizes); + for (i = 0; i < content->num_pages; i++) + { + drop_ref_objs(ctx, content->ref_font_resources[i], content->num_page_font_res[i]); + fz_free(ctx, content->ref_font_resources[i]); + } + fz_free(ctx, content->ref_font_resources); + for (i = 0; i < content->num_pages; i++) + { + drop_ref_objs(ctx, content->ref_im_resources[i], content->num_page_im_res[i]); + fz_free(ctx, content->ref_im_resources[i]); + } + fz_free(ctx, content->num_page_im_res); + fz_free(ctx, content->num_page_font_res); + fz_free(ctx, content->ref_im_resources); + content->content_offsets = NULL; + content->page_sizes = NULL; + content->content_lengths = NULL; + fz_drop_stream(ctx, content->stm); +} + +/* This is a VERY simple format to give us something to play with + * in terms of defining pages and content. Here we parse the contents + * defining our page sizes and the command locations for each page. + * The format is as follows: + * 1) Comment lines are preceded by % + * 2) Number of pages is indicated at the begining with /Pages # + * 3) Each page is indicated by /Page # [X Y] (# is zero based) + * 4) The content is in the form of simple PDF content stream that + * may included various drawing commands and reference the + * image and font resources. + * */ +static void init_parse_contents(fz_context *ctx, char *content_fn, doc_content *content) +{ + fz_stream *stm; + char buf[1024]; + int page_count = 0; + fz_off_t pre_off; + int i; + + fz_var(stm); + + fz_try(ctx) + { + stm = fz_open_file(ctx, content_fn); + while (1) + { + pre_off = fz_tell(ctx, stm); + fz_read_line(ctx, stm, buf, sizeof buf); + if (buf[0] == '\0') + { + if (content->num_pages > (page_count + 1)) + fz_throw(ctx, FZ_ERROR_GENERIC, "Missing defined pages"); + else + { + content->content_lengths[page_count - 1] = + fz_tell(ctx, stm) - content->content_offsets[page_count - 1]; + } + break; + } + if (buf[0] != '%') + { + if (strncmp(buf, "/Pages", strlen("/Pages")) == 0) + { + if ((content->num_pages = atoi(&(buf[strlen("/Pages")]))) <= 0) + fz_throw(ctx, FZ_ERROR_GENERIC, "Page count invalid"); + content->content_offsets = fz_malloc_array(ctx, content->num_pages, sizeof(fz_off_t)); + content->content_lengths = fz_malloc_array(ctx, content->num_pages, sizeof(int)); + content->page_sizes = fz_malloc_array(ctx, content->num_pages, sizeof(fz_point)); + content->num_page_im_res = fz_calloc(ctx, content->num_pages, sizeof(int)); + content->num_page_font_res = fz_calloc(ctx, content->num_pages, sizeof(int)); + content->ref_font_resources = fz_malloc_array(ctx, content->num_pages, sizeof(page_resource_xref*)); + content->ref_im_resources = fz_malloc_array(ctx, content->num_pages, sizeof(page_resource_xref*)); + for (i = 0; i < content->num_pages; i++) + { + content->ref_font_resources[i] = fz_malloc_array(ctx, MAX_FONTS, sizeof(page_resource_xref)); + content->ref_im_resources[i] = fz_malloc_array(ctx, MAX_FONTS, sizeof(page_resource_xref)); + } + } + else if (strncmp(buf, "/Page", strlen("/Page")) == 0) + { + int page_num; + if (page_count > 0) + { + content->content_lengths[page_count - 1] = + (int)(pre_off - content->content_offsets[page_count - 1]); + } + if (sscanf(&(buf[strlen("/Page")]), "%d [%f %f]", &page_num, + &(content->page_sizes[page_count].x), &(content->page_sizes[page_count].y)) != 3) + fz_throw(ctx, FZ_ERROR_GENERIC, "Page size invalid"); + if (page_num < 0 || page_num >= content->num_pages) + fz_throw(ctx, FZ_ERROR_GENERIC, "Page value invalid"); + if (content->page_sizes[page_count].x < 0 || content->page_sizes[page_count].y < 0) + fz_throw(ctx, FZ_ERROR_GENERIC, "Page dimensions invalid"); + content->content_offsets[page_count] = fz_tell(ctx, stm); + page_count++; + } + else + { + check_for_reference(ctx, content->images, content->num_images, buf, RES_XOBJECT, content, page_count - 1); + check_for_reference(ctx, content->fonts, content->num_fonts, buf, RES_FONT, content, page_count - 1); + } + } + } + } + fz_catch(ctx) + { + free_contents(ctx, content); + fz_drop_stream(ctx, stm); + fz_rethrow(ctx); + } + content->stm = stm; +} + +/* Get the page contents */ +static int get_page_contents(fz_context *ctx, int page_num, doc_content *content, + unsigned char *buffer) +{ + int size = content->content_lengths[page_num]; + + if (buffer == NULL) + return size; + fz_seek(ctx, content->stm, content->content_offsets[page_num], SEEK_SET); + fz_read(ctx, content->stm, buffer, size); + return size; +} + +static void update_res(fz_context *ctx, int num_pages, int *num_res, page_resource_xref **page_resource, + char *res_name, pdf_obj *obj) +{ + int j, i; + + for (j = 0; j < num_pages; j++) + for (i = 0; i < num_res[j]; i++) + if (strcmp(res_name, page_resource[j][i].res_name) == 0) + page_resource[j][i].obj = obj; +} + +static pdf_obj* create_page_res_dict(fz_context *ctx, pdf_document *pdf, + page_resource_xref *ref_res, const char type[], int num_items) +{ + pdf_obj *dict = NULL; + int i; + + if (num_items <= 0) + return NULL; + + fz_var(dict); + + fz_try(ctx) + { + dict = pdf_new_dict(ctx, pdf, num_items); + for (i = 0; i < num_items; i++) + { + char text[32]; + snprintf(text, sizeof(text), "%s/%s", type, ref_res[i].res_ref); + pdf_dict_putp(ctx, dict, text, ref_res[i].obj); + } + } + fz_catch(ctx) + { + pdf_drop_obj(ctx, dict); + fz_rethrow(ctx); + } + return dict; +} + +static int create_pdf(fz_context *ctx, char *output, resources fonts[], int num_fonts, + resources images[], int num_images, char *contents) +{ + fz_rect bounds; + pdf_document *pdf = NULL; + pdf_page *newpage = NULL; + unsigned char *buffer = NULL; + fz_buffer *fz_buf = NULL; + fz_buffer *im_font_buff = NULL; + fz_image *image = NULL; + pdf_obj *font_dict = NULL; + pdf_obj *im_dict = NULL; + pdf_res *im_res = NULL; + pdf_res *font_res = NULL; + pdf_write_options opts = { 0 }; + doc_content content = { 0 }; + int k; + int length; + char *pch; + + fz_var(pdf); + fz_var(newpage); + fz_var(buffer); + fz_var(fz_buf); + fz_var(im_font_buff); + fz_var(image); + fz_var(font_dict); + fz_var(im_dict); + fz_var(im_res); + fz_var(font_res); + + fz_try(ctx) + { + pdf = pdf_create_document(ctx); + content.num_fonts = num_fonts; + content.num_images = num_images; + content.fonts = fonts; + content.images = images; + init_parse_contents(ctx, contents, &content); + + /* Add the resources, getting the reference numbers in the process. */ + for (k = 0; k < content.num_images; k++) + { + if (content.images[k].ref) + { + /* Get the fz_image */ + pch = strchr(content.images[k].name, ':'); + if (pch != NULL) + { + im_font_buff = fz_read_file(ctx, &(pch[1])); + image = fz_new_image_from_buffer(ctx, im_font_buff); + fz_drop_buffer(ctx, im_font_buff); + im_font_buff = NULL; + im_res = pdf_add_image_res(ctx, pdf, image, 0); + fz_drop_image(ctx, image); + image = NULL; + + /* Look through our image page resources and update the + * indirect reference number. Here we don't use the numbers + * set by the doc resource holder (i.e im_res->num) since we + * are using our own content specified for pdfcreate */ + update_res(ctx, content.num_pages, content.num_page_im_res, + content.ref_im_resources, content.images[k].name, im_res->obj); + } + else + fz_throw(ctx, FZ_ERROR_GENERIC, "Image indirect name too long"); + } + } + for (k = 0; k < content.num_fonts; k++) + { + if (content.fonts[k].ref) + { + pch = strchr(content.fonts[k].name, ':'); + if (pch != NULL) + { + im_font_buff = fz_read_file(ctx, &(pch[1])); + font_res = pdf_add_simple_font_res(ctx, pdf, im_font_buff); + fz_drop_buffer(ctx, im_font_buff); + im_font_buff = NULL; + + /* Look through our font page resources and update the indirect + * reference number */ + update_res(ctx, content.num_pages, content.num_page_font_res, + content.ref_font_resources, content.fonts[k].name, font_res->obj); + } + else + fz_throw(ctx, FZ_ERROR_GENERIC, "Font indirect name too long"); + } + } + + /* Now the page contents */ + for (k = 0; k < content.num_pages; k++) + { + bounds.x0 = 0; + bounds.y0 = 0; + bounds.x1 = content.page_sizes[k].x; + bounds.y1 = content.page_sizes[k].y; + + length = get_page_contents(ctx, k, &content, NULL); + if (length > 0) + { + buffer = fz_malloc(ctx, length); + length = get_page_contents(ctx, k, &content, buffer); + fz_buf = fz_new_buffer_from_data(ctx, buffer, length); + buffer = NULL; + newpage = pdf_create_page(ctx, pdf, bounds, fz_buf, 0); + /* Create the dicts for the page resources */ + font_dict = create_page_res_dict(ctx, pdf, content.ref_font_resources[k], + "Font", content.num_page_font_res[k]); + im_dict = create_page_res_dict(ctx, pdf, content.ref_im_resources[k], + "XObject", content.num_page_im_res[k]); + if (im_dict != NULL) + { + pdf_dict_puts(ctx, newpage->me, "Resources", im_dict); + pdf_drop_obj(ctx, im_dict); + } + if (font_dict != NULL) + { + pdf_dict_puts(ctx, newpage->me, "Resources", font_dict); + pdf_drop_obj(ctx, font_dict); + } + fz_drop_buffer(ctx, fz_buf); + fz_buf = NULL; + } + else + { + newpage = pdf_create_page(ctx, pdf, bounds, NULL, 0); + } + pdf_insert_page(ctx, pdf, newpage, INT_MAX); + pdf_drop_page(ctx, newpage); + newpage = NULL; + } + pdf_save_document(ctx, pdf, output, &opts); + } + fz_always(ctx) + { + pdf_drop_page(ctx, newpage); + pdf_close_document(ctx, pdf); + free_contents(ctx, &content); + fz_free(ctx, buffer); + fz_drop_buffer(ctx, fz_buf); + fz_drop_buffer(ctx, im_font_buff); + fz_drop_image(ctx, image); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } + return 0; +} + +int pdfcreate_main(int argc, char **argv) +{ + char *outfile = "out.pdf"; + resources fonts[MAX_FONTS]; + resources images[MAX_IMAGES]; + char *contents = ""; + int nfonts = 0; + int nimages = 0; + int c; + int errors = 0; + fz_context *ctx; + + while ((c = fz_getopt(argc, argv, "f:i:o:")) != -1) + { + switch (c) + { + case 'f': + if (nfonts == MAX_FONTS) + { + fprintf(stderr, "max number of fonts exceeded\n"); + exit(1); + } + fonts[nfonts].name = fz_optarg; + fonts[nfonts].ref = 0; + nfonts++; + break; + case 'i': + if (nimages == MAX_IMAGES) + { + fprintf(stderr, "max number of images exceeded\n"); + exit(1); + } + images[nimages].name = fz_optarg; + images[nimages].ref = 0; + nimages++; + break; + case 'o': outfile = fz_optarg; break; + default: usage(); break; + } + } + + if (fz_optind == argc) + usage(); + + contents = argv[fz_optind++]; + + ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED); + if (!ctx) + { + fprintf(stderr, "cannot initialise context\n"); + exit(1); + } + + fz_try(ctx) + { + create_pdf(ctx, outfile, fonts, nfonts, images, nimages, contents); + } + fz_catch(ctx) + { + errors++; + } + fz_drop_context(ctx); + + return errors != 0; +} |