summaryrefslogtreecommitdiff
path: root/source/tools
diff options
context:
space:
mode:
authorTor Andersson <tor.andersson@artifex.com>2016-02-24 21:11:31 +0100
committerTor Andersson <tor.andersson@artifex.com>2016-02-29 15:56:45 +0100
commit6c2f297bae8547820e33b2e2ce382eacdde228cb (patch)
tree2758fa79394a50979922d6d5dd2a9377aea4952e /source/tools
parentff162fe493a331151edf11e3b2cf4da62f79b811 (diff)
downloadmupdf-6c2f297bae8547820e33b2e2ce382eacdde228cb.tar.xz
New and simplified pdfcreate command line tool.
Simplified to show off how easy the new API can be.
Diffstat (limited to 'source/tools')
-rw-r--r--source/tools/pdfcreate.c562
1 files changed, 113 insertions, 449 deletions
diff --git a/source/tools/pdfcreate.c b/source/tools/pdfcreate.c
index 70d0aa42..bb9e9108 100644
--- a/source/tools/pdfcreate.c
+++ b/source/tools/pdfcreate.c
@@ -2,491 +2,156 @@
* PDF creation tool: Tool for creating pdf content.
*
* Simple test bed to work with adding content and creating PDFs
- *
*/
#include "mupdf/pdf.h"
-#define MAX_IMAGES 10
-#define MAX_FONTS 10
-#define MAX_REF_NAME 32
-
-enum { RES_FONT, RES_XOBJECT };
-
static void usage(void)
{
fprintf(stderr,
- "usage: mutool create [-o output.pdf] [fonts][images] contents\n"
- "\t-f\tfont label:font file\n"
- "\t-i\timage label:image file\n"
- "\tcontents file, defines page size, graphics, references fonts and images\n"
+ "usage: mutool create [-o output.pdf] [-adlsz] page.txt [page2.txt ...]\n"
+ "\t-o\tname of PDF file to create\n"
+ "\t-a\tascii hex encode binary streams\n"
+ "\t-d\tdecompress all streams\n"
+ "\t-l\tlinearize PDF\n"
+ "\t-s\tclean content streams\n"
+ "\t-z\tdeflate uncompressed streams\n"
+ "\tpage.txt file defines page size, fonts, images and contents\n"
);
exit(1);
}
-/* Simple structures to hold and manage contents */
-typedef struct doc_content_s doc_content;
-typedef struct resources_s resources;
-typedef struct page_resource_xref_s page_resource_xref;
+static fz_context *ctx = NULL;
+static pdf_document *doc = NULL;
+static pdf_write_options opts = { 0 };
-struct page_resource_xref_s
+static void add_font_res(pdf_obj *fonts, char *name, char *path)
{
- char *res_name;
- char res_ref[MAX_REF_NAME];
- pdf_obj *obj;
- int type;
-};
-
-struct resources_s
-{
- char *name;
- int ref;
-};
+ unsigned char *data;
+ unsigned int size;
+ fz_font *font;
+ pdf_obj *res;
+
+ data = fz_lookup_base14_font(ctx, path, &size);
+ if (data)
+ font = fz_new_font_from_memory(ctx, NULL, data, size, 0, 0);
+ else
+ font = fz_new_font_from_file(ctx, NULL, path, 0, 0);
+
+ res = pdf_add_simple_font_res(ctx, doc, font);
+ pdf_dict_puts(ctx, fonts, name, res);
+ pdf_drop_obj(ctx, res);
+
+ fz_drop_font(ctx, font);
+}
-struct doc_content_s
+static void add_image_res(pdf_obj *images, char *name, char *path)
{
- fz_stream *stm;
- int num_pages;
- fz_point *page_sizes;
- fz_off_t *content_offsets;
- int *content_lengths;
- resources *fonts;
- int num_fonts;
- resources *images;
- int num_images;
- page_resource_xref **ref_im_resources;
- page_resource_xref **ref_font_resources;
- int *num_page_im_res;
- int *num_page_font_res;
-};
+ fz_image *image;
+ pdf_obj *res;
-/* Look for the presence of image or font refererences in the content */
-static void check_for_reference(fz_context *ctx, resources *resource, int num_res, char *buffer, int res_type, doc_content *doc, int page_num)
-{
- int i;
- char * pch;
- int length;
- char name[MAX_REF_NAME];
- int res_num;
+ image = fz_new_image_from_file(ctx, path);
- for (i = 0; i < num_res; i++)
- {
- pch = strchr(resource[i].name, ':');
- if (pch != NULL)
- {
- length = pch - resource[i].name;
- if (length < sizeof(name))
- {
- memcpy(name, resource[i].name, length);
- name[length] = 0;
- pch = strstr(buffer, name);
- if (pch != NULL)
- {
- /* Resource reference is in the content for this document. Mark
- * it as such so that we know to add it to the document */
- resource[i].ref = 1;
+ res = pdf_add_image_res(ctx, doc, image, 0);
+ pdf_dict_puts(ctx, images, name, res);
+ pdf_drop_obj(ctx, res);
- /* Also note this page has a reference to this name so
- * we can add it to the page resource list. Select from
- * font or image */
- if (res_type == RES_FONT)
- {
- res_num = doc->num_page_font_res[page_num];
- doc->ref_font_resources[page_num][res_num].obj = NULL; /* Set later */
- doc->ref_font_resources[page_num][res_num].res_name = resource[i].name;
- doc->ref_font_resources[page_num][res_num].type = res_type;
- memcpy(doc->ref_font_resources[page_num][res_num].res_ref, name, sizeof(name));
- doc->num_page_font_res[page_num] += 1;
- }
- else
- {
- res_num = doc->num_page_im_res[page_num];
- doc->ref_im_resources[page_num][res_num].obj = NULL; /* Set later */
- doc->ref_im_resources[page_num][res_num].res_name = resource[i].name;
- doc->ref_im_resources[page_num][res_num].type = res_type;
- memcpy(doc->ref_im_resources[page_num][res_num].res_ref, name, sizeof(name));
- doc->num_page_im_res[page_num] += 1;
- }
- }
- }
- else
- fz_throw(ctx, FZ_ERROR_GENERIC, "Image/Font indirect name too long");
- }
- else
- fz_throw(ctx, FZ_ERROR_GENERIC, "Internal parsing error");
- }
+ fz_drop_image(ctx, image);
}
-static void drop_ref_objs(fz_context *ctx, page_resource_xref *refs, int num_refs)
+/*
+The input is a raw content stream, with commands embedded in comments:
+
+%%MediaBox LLX LLY URX URY
+%%Rotate Angle
+%%Font Name Filename (or base 14 font name)
+%%Image Name Filename
+*/
+static void create_page(char *input)
{
- int i;
-
- for (i = 0; i < num_refs; i++)
- pdf_drop_obj(ctx, refs[i].obj);
-}
+ fz_rect mediabox = { 0, 0, 595, 842 };
+ int rotate = 0;
-static void free_contents(fz_context *ctx, doc_content *content)
-{
- int i;
+ char line[4096];
+ char *s, *p;
+ fz_stream *stm;
- fz_free(ctx, content->content_offsets);
- fz_free(ctx, content->content_lengths);
- fz_free(ctx, content->page_sizes);
- for (i = 0; i < content->num_pages; i++)
- {
- drop_ref_objs(ctx, content->ref_font_resources[i], content->num_page_font_res[i]);
- fz_free(ctx, content->ref_font_resources[i]);
- }
- fz_free(ctx, content->ref_font_resources);
- for (i = 0; i < content->num_pages; i++)
- {
- drop_ref_objs(ctx, content->ref_im_resources[i], content->num_page_im_res[i]);
- fz_free(ctx, content->ref_im_resources[i]);
- }
- fz_free(ctx, content->num_page_im_res);
- fz_free(ctx, content->num_page_font_res);
- fz_free(ctx, content->ref_im_resources);
- content->content_offsets = NULL;
- content->page_sizes = NULL;
- content->content_lengths = NULL;
- fz_drop_stream(ctx, content->stm);
-}
+ fz_buffer *contents;
+ pdf_obj *resources;
+ pdf_obj *fonts;
+ pdf_obj *images;
+ pdf_page *page;
-/* This is a VERY simple format to give us something to play with
- * in terms of defining pages and content. Here we parse the contents
- * defining our page sizes and the command locations for each page.
- * The format is as follows:
- * 1) Comment lines are preceded by %
- * 2) Number of pages is indicated at the begining with /Pages #
- * 3) Each page is indicated by /Page # [X Y] (# is zero based)
- * 4) The content is in the form of simple PDF content stream that
- * may included various drawing commands and reference the
- * image and font resources.
- * */
-static void init_parse_contents(fz_context *ctx, char *content_fn, doc_content *content)
-{
- fz_stream *stm;
- char buf[1024];
- int page_count = 0;
- fz_off_t pre_off;
- int i;
+ resources = pdf_new_dict(ctx, doc, 2);
+ fonts = pdf_new_dict(ctx, doc, 10);
+ images = pdf_new_dict(ctx, doc, 10);
+ pdf_dict_put(ctx, resources, PDF_NAME_Font, fonts);
+ pdf_dict_put(ctx, resources, PDF_NAME_XObject, images);
- fz_var(stm);
+ contents = fz_new_buffer(ctx, 1024);
- fz_try(ctx)
+ stm = fz_open_file(ctx, input);
+ while (fz_read_line(ctx, stm, line, sizeof line))
{
- stm = fz_open_file(ctx, content_fn);
- while (1)
+ if (line[0] == '%' && line[1] == '%')
{
- pre_off = fz_tell(ctx, stm);
- fz_read_line(ctx, stm, buf, sizeof buf);
- if (buf[0] == '\0')
+ p = line;
+ s = fz_strsep(&p, " ");
+ if (!strcmp(s, "%%MediaBox"))
{
- if (content->num_pages > (page_count + 1))
- fz_throw(ctx, FZ_ERROR_GENERIC, "Missing defined pages");
- else
- {
- content->content_lengths[page_count - 1] =
- fz_tell(ctx, stm) - content->content_offsets[page_count - 1];
- }
- break;
+ s = fz_strsep(&p, " "); mediabox.x0 = fz_atoi(s);
+ s = fz_strsep(&p, " "); mediabox.y0 = fz_atoi(s);
+ s = fz_strsep(&p, " "); mediabox.x1 = fz_atoi(s);
+ s = fz_strsep(&p, " "); mediabox.y1 = fz_atoi(s);
}
- if (buf[0] != '%')
+ else if (!strcmp(s, "%%Rotate"))
{
- if (strncmp(buf, "/Pages", strlen("/Pages")) == 0)
- {
- if ((content->num_pages = atoi(&(buf[strlen("/Pages")]))) <= 0)
- fz_throw(ctx, FZ_ERROR_GENERIC, "Page count invalid");
- content->content_offsets = fz_malloc_array(ctx, content->num_pages, sizeof(fz_off_t));
- content->content_lengths = fz_malloc_array(ctx, content->num_pages, sizeof(int));
- content->page_sizes = fz_malloc_array(ctx, content->num_pages, sizeof(fz_point));
- content->num_page_im_res = fz_calloc(ctx, content->num_pages, sizeof(int));
- content->num_page_font_res = fz_calloc(ctx, content->num_pages, sizeof(int));
- content->ref_font_resources = fz_malloc_array(ctx, content->num_pages, sizeof(page_resource_xref*));
- content->ref_im_resources = fz_malloc_array(ctx, content->num_pages, sizeof(page_resource_xref*));
- for (i = 0; i < content->num_pages; i++)
- {
- content->ref_font_resources[i] = fz_malloc_array(ctx, MAX_FONTS, sizeof(page_resource_xref));
- content->ref_im_resources[i] = fz_malloc_array(ctx, MAX_FONTS, sizeof(page_resource_xref));
- }
- }
- else if (strncmp(buf, "/Page", strlen("/Page")) == 0)
- {
- int page_num;
- if (page_count > 0)
- {
- content->content_lengths[page_count - 1] =
- (int)(pre_off - content->content_offsets[page_count - 1]);
- }
- if (sscanf(&(buf[strlen("/Page")]), "%d [%f %f]", &page_num,
- &(content->page_sizes[page_count].x), &(content->page_sizes[page_count].y)) != 3)
- fz_throw(ctx, FZ_ERROR_GENERIC, "Page size invalid");
- if (page_num < 0 || page_num >= content->num_pages)
- fz_throw(ctx, FZ_ERROR_GENERIC, "Page value invalid");
- if (content->page_sizes[page_count].x < 0 || content->page_sizes[page_count].y < 0)
- fz_throw(ctx, FZ_ERROR_GENERIC, "Page dimensions invalid");
- content->content_offsets[page_count] = fz_tell(ctx, stm);
- page_count++;
- }
- else
- {
- check_for_reference(ctx, content->images, content->num_images, buf, RES_XOBJECT, content, page_count - 1);
- check_for_reference(ctx, content->fonts, content->num_fonts, buf, RES_FONT, content, page_count - 1);
- }
+ s = fz_strsep(&p, " "); rotate = fz_atoi(s);
}
- }
- }
- fz_catch(ctx)
- {
- free_contents(ctx, content);
- fz_drop_stream(ctx, stm);
- fz_rethrow(ctx);
- }
- content->stm = stm;
-}
-
-/* Get the page contents */
-static int get_page_contents(fz_context *ctx, int page_num, doc_content *content,
- unsigned char *buffer)
-{
- int size = content->content_lengths[page_num];
-
- if (buffer == NULL)
- return size;
- fz_seek(ctx, content->stm, content->content_offsets[page_num], SEEK_SET);
- fz_read(ctx, content->stm, buffer, size);
- return size;
-}
-
-static void update_res(fz_context *ctx, int num_pages, int *num_res, page_resource_xref **page_resource,
- char *res_name, pdf_obj *obj)
-{
- int j, i;
-
- for (j = 0; j < num_pages; j++)
- for (i = 0; i < num_res[j]; i++)
- if (strcmp(res_name, page_resource[j][i].res_name) == 0)
- page_resource[j][i].obj = obj;
-}
-
-static pdf_obj* create_page_res_dict(fz_context *ctx, pdf_document *pdf,
- page_resource_xref *ref_res, const char type[], int num_items)
-{
- pdf_obj *dict = NULL;
- int i;
-
- if (num_items <= 0)
- return NULL;
-
- fz_var(dict);
-
- fz_try(ctx)
- {
- dict = pdf_new_dict(ctx, pdf, num_items);
- for (i = 0; i < num_items; i++)
- {
- char text[32];
- snprintf(text, sizeof(text), "%s/%s", type, ref_res[i].res_ref);
- pdf_dict_putp(ctx, dict, text, ref_res[i].obj);
- }
- }
- fz_catch(ctx)
- {
- pdf_drop_obj(ctx, dict);
- fz_rethrow(ctx);
- }
- return dict;
-}
-
-static int create_pdf(fz_context *ctx, char *output, resources fonts[], int num_fonts,
- resources images[], int num_images, char *contents)
-{
- fz_rect bounds;
- pdf_document *pdf = NULL;
- pdf_page *newpage = NULL;
- unsigned char *buffer = NULL;
- fz_buffer *fz_buf = NULL;
- fz_buffer *im_font_buff = NULL;
- fz_image *image = NULL;
- pdf_obj *font_dict = NULL;
- pdf_obj *im_dict = NULL;
- pdf_obj *im_res = NULL;
- pdf_obj *font_res = NULL;
- pdf_write_options opts = { 0 };
- doc_content content = { 0 };
- int k;
- int length;
- char *pch;
-
- fz_var(pdf);
- fz_var(newpage);
- fz_var(buffer);
- fz_var(fz_buf);
- fz_var(im_font_buff);
- fz_var(image);
- fz_var(font_dict);
- fz_var(im_dict);
- fz_var(im_res);
- fz_var(font_res);
-
- fz_try(ctx)
- {
- pdf = pdf_create_document(ctx);
- content.num_fonts = num_fonts;
- content.num_images = num_images;
- content.fonts = fonts;
- content.images = images;
- init_parse_contents(ctx, contents, &content);
-
- /* Add the resources, getting the reference numbers in the process. */
- for (k = 0; k < content.num_images; k++)
- {
- if (content.images[k].ref)
+ else if (!strcmp(s, "%%Font"))
{
- /* Get the fz_image */
- pch = strchr(content.images[k].name, ':');
- if (pch != NULL)
- {
- im_font_buff = fz_read_file(ctx, &(pch[1]));
- image = fz_new_image_from_buffer(ctx, im_font_buff);
- fz_drop_buffer(ctx, im_font_buff);
- im_font_buff = NULL;
- im_res = pdf_add_image_res(ctx, pdf, image, 0);
- fz_drop_image(ctx, image);
- image = NULL;
-
- /* Look through our image page resources and update the
- * indirect reference number. */
- update_res(ctx, content.num_pages, content.num_page_im_res,
- content.ref_im_resources, content.images[k].name, im_res);
- }
- else
- fz_throw(ctx, FZ_ERROR_GENERIC, "Image indirect name too long");
+ s = fz_strsep(&p, " ");
+ add_font_res(fonts, s, p);
}
- }
- for (k = 0; k < content.num_fonts; k++)
- {
- if (content.fonts[k].ref)
+ else if (!strcmp(s, "%%Image"))
{
- pch = strchr(content.fonts[k].name, ':');
- if (pch != NULL)
- {
- fz_font *font;
- im_font_buff = fz_read_file(ctx, &(pch[1]));
- font = fz_new_font_from_buffer(ctx, NULL, im_font_buff, 0, 0);
- font_res = pdf_add_simple_font_res(ctx, pdf, font);
- fz_drop_buffer(ctx, im_font_buff);
- fz_drop_font(ctx, font);
- im_font_buff = NULL;
-
- /* Look through our font page resources and update the indirect
- * reference number */
- update_res(ctx, content.num_pages, content.num_page_font_res,
- content.ref_font_resources, content.fonts[k].name, font_res);
- }
- else
- fz_throw(ctx, FZ_ERROR_GENERIC, "Font indirect name too long");
+ s = fz_strsep(&p, " ");
+ add_image_res(images, s, p);
}
}
-
- /* Now the page contents */
- for (k = 0; k < content.num_pages; k++)
+ else
{
- bounds.x0 = 0;
- bounds.y0 = 0;
- bounds.x1 = content.page_sizes[k].x;
- bounds.y1 = content.page_sizes[k].y;
-
- length = get_page_contents(ctx, k, &content, NULL);
- if (length > 0)
- {
- buffer = fz_malloc(ctx, length);
- length = get_page_contents(ctx, k, &content, buffer);
- fz_buf = fz_new_buffer_from_data(ctx, buffer, length);
- buffer = NULL;
- newpage = pdf_create_page(ctx, pdf, bounds, 0, fz_buf);
- /* Create the dicts for the page resources */
- font_dict = create_page_res_dict(ctx, pdf, content.ref_font_resources[k],
- "Font", content.num_page_font_res[k]);
- im_dict = create_page_res_dict(ctx, pdf, content.ref_im_resources[k],
- "XObject", content.num_page_im_res[k]);
- if (im_dict != NULL)
- {
- pdf_dict_puts(ctx, newpage->me, "Resources", im_dict);
- pdf_drop_obj(ctx, im_dict);
- }
- if (font_dict != NULL)
- {
- pdf_dict_puts(ctx, newpage->me, "Resources", font_dict);
- pdf_drop_obj(ctx, font_dict);
- }
- fz_drop_buffer(ctx, fz_buf);
- fz_buf = NULL;
- }
- else
- {
- newpage = pdf_create_page(ctx, pdf, bounds, 0, NULL);
- }
- pdf_insert_page(ctx, pdf, newpage, INT_MAX);
- pdf_drop_page(ctx, newpage);
- newpage = NULL;
+ fz_write_buffer(ctx, contents, line, strlen(line));
+ fz_write_buffer_byte(ctx, contents, '\n');
}
- pdf_save_document(ctx, pdf, output, &opts);
- }
- fz_always(ctx)
- {
- pdf_drop_page(ctx, newpage);
- pdf_close_document(ctx, pdf);
- free_contents(ctx, &content);
- fz_free(ctx, buffer);
- fz_drop_buffer(ctx, fz_buf);
- fz_drop_buffer(ctx, im_font_buff);
- fz_drop_image(ctx, image);
}
- fz_catch(ctx)
- {
- fz_rethrow(ctx);
- }
- return 0;
+ fz_drop_stream(ctx, stm);
+
+ page = pdf_create_page(ctx, doc, &mediabox, rotate, contents, resources);
+ pdf_insert_page(ctx, doc, page, INT_MAX);
+ pdf_drop_page(ctx, page);
+
+ fz_drop_buffer(ctx, contents);
+ pdf_drop_obj(ctx, resources);
+ pdf_drop_obj(ctx, fonts);
+ pdf_drop_obj(ctx, images);
}
int pdfcreate_main(int argc, char **argv)
{
- char *outfile = "out.pdf";
- resources fonts[MAX_FONTS];
- resources images[MAX_IMAGES];
- char *contents = "";
- int nfonts = 0;
- int nimages = 0;
- int c;
- int errors = 0;
- fz_context *ctx;
+ char *output = "out.pdf";
+ int i, c;
- while ((c = fz_getopt(argc, argv, "f:i:o:")) != -1)
+ while ((c = fz_getopt(argc, argv, "adlszo:")) != -1)
{
switch (c)
{
- case 'f':
- if (nfonts == MAX_FONTS)
- {
- fprintf(stderr, "max number of fonts exceeded\n");
- exit(1);
- }
- fonts[nfonts].name = fz_optarg;
- fonts[nfonts].ref = 0;
- nfonts++;
- break;
- case 'i':
- if (nimages == MAX_IMAGES)
- {
- fprintf(stderr, "max number of images exceeded\n");
- exit(1);
- }
- images[nimages].name = fz_optarg;
- images[nimages].ref = 0;
- nimages++;
- break;
- case 'o': outfile = fz_optarg; break;
+ case 'o': output = fz_optarg; break;
+ case 'a': opts.do_ascii ++; break;
+ case 'd': opts.do_expand ^= PDF_EXPAND_ALL; break;
+ case 'l': opts.do_linear ++; break;
+ case 's': opts.do_clean ++; break;
+ case 'z': opts.do_deflate ++; break;
default: usage(); break;
}
}
@@ -494,8 +159,6 @@ int pdfcreate_main(int argc, char **argv)
if (fz_optind == argc)
usage();
- contents = argv[fz_optind++];
-
ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED);
if (!ctx)
{
@@ -503,15 +166,16 @@ int pdfcreate_main(int argc, char **argv)
exit(1);
}
- fz_try(ctx)
- {
- create_pdf(ctx, outfile, fonts, nfonts, images, nimages, contents);
- }
- fz_catch(ctx)
- {
- errors++;
- }
- fz_drop_context(ctx);
+ doc = pdf_create_document(ctx);
- return errors != 0;
+ for (i = fz_optind; i < argc; ++i)
+ create_page(argv[i]);
+
+ pdf_save_document(ctx, doc, output, &opts);
+
+ pdf_close_document(ctx, doc);
+
+ fz_flush_warnings(ctx);
+ fz_drop_context(ctx);
+ return 0;
}