diff options
-rw-r--r-- | Jamfile | 4 | ||||
-rw-r--r-- | apps/samxml.c | 26 | ||||
-rw-r--r-- | include/samus.h | 29 | ||||
-rw-r--r-- | include/samus/xml.h | 22 | ||||
-rw-r--r-- | include/samus/zip.h | 28 | ||||
-rw-r--r-- | samus/sa_xml.c | 318 |
6 files changed, 398 insertions, 29 deletions
@@ -269,8 +269,7 @@ SubDir TOP samus ; Library libsamus : sa_zip.c - # sa_xml.c - + sa_xml.c ; # -------------------------------------------------------------------------- @@ -287,6 +286,7 @@ APPLIST = pdfmerge pdfselect samzip + samxml ; for APP in $(APPLIST) diff --git a/apps/samxml.c b/apps/samxml.c new file mode 100644 index 00000000..ab5a82e1 --- /dev/null +++ b/apps/samxml.c @@ -0,0 +1,26 @@ +#include "fitz.h" +#include "samus.h" + +int main(int argc, char **argv) +{ + fz_error *error; + fz_file *file; + sa_xmlnode *xml; + + error = fz_openfile(&file, argv[1], FZ_READ); + if (error) + fz_abort(error); + + error = sa_parsexml(&xml, file, 0); + if (error) + fz_abort(error); + + fz_closefile(file); + + sa_debugxml(xml, 0); + + sa_dropxml(xml); + + return 0; +} + diff --git a/include/samus.h b/include/samus.h index c90b8cad..215fe573 100644 --- a/include/samus.h +++ b/include/samus.h @@ -7,31 +7,6 @@ #error "fitz.h must be included before samus.h" #endif -/* - * ZIP archive - */ - -typedef struct sa_zip_s sa_zip; -typedef struct sa_zipent_s sa_zipent; - -struct sa_zipent_s -{ - unsigned offset; - unsigned csize; - unsigned usize; - char *name; -}; - -struct sa_zip_s -{ - fz_file *file; - int len; - sa_zipent *table; -}; - -fz_error *sa_openzip(sa_zip **zipp, char *filename); -void sa_debugzip(sa_zip *zip); -void sa_closezip(sa_zip *zip); -fz_error *sa_openzipstream(sa_zip *zip, char *name); -void sa_closezipstream(sa_zip *zip); +#include "samus/zip.h" +#include "samus/xml.h" diff --git a/include/samus/xml.h b/include/samus/xml.h new file mode 100644 index 00000000..04308b66 --- /dev/null +++ b/include/samus/xml.h @@ -0,0 +1,22 @@ +/* + * XML mini-dom based on Inferno's XML parser API. + * This one uses expat and in-memory objects though... :( + */ + +typedef struct sa_xmlnode_s sa_xmlnode; + +fz_error *sa_parsexml(sa_xmlnode **nodep, fz_file *file, int ns); +void sa_debugxml(sa_xmlnode *node, int level); +void sa_dropxml(sa_xmlnode *node); + +sa_xmlnode *sa_xmlup(sa_xmlnode *node); +sa_xmlnode *sa_xmlnext(sa_xmlnode *node); +sa_xmlnode *sa_xmldown(sa_xmlnode *node); + +int sa_isxmltext(sa_xmlnode *node); +int sa_isxmltag(sa_xmlnode *node); + +char *sa_getxmlname(sa_xmlnode *node); +char *sa_getxmlatt(sa_xmlnode *node, char *att); +char *sa_getxmltext(sa_xmlnode *node); + diff --git a/include/samus/zip.h b/include/samus/zip.h new file mode 100644 index 00000000..656fc731 --- /dev/null +++ b/include/samus/zip.h @@ -0,0 +1,28 @@ +/* + * ZIP archive + */ + +typedef struct sa_zip_s sa_zip; +typedef struct sa_zipent_s sa_zipent; + +struct sa_zipent_s +{ + unsigned offset; + unsigned csize; + unsigned usize; + char *name; +}; + +struct sa_zip_s +{ + fz_file *file; + int len; + sa_zipent *table; +}; + +fz_error *sa_openzip(sa_zip **zipp, char *filename); +void sa_debugzip(sa_zip *zip); +void sa_closezip(sa_zip *zip); +fz_error *sa_openzipstream(sa_zip *zip, char *name); +void sa_closezipstream(sa_zip *zip); + diff --git a/samus/sa_xml.c b/samus/sa_xml.c new file mode 100644 index 00000000..a9b88de0 --- /dev/null +++ b/samus/sa_xml.c @@ -0,0 +1,318 @@ +#include "fitz.h" +#include "samus.h" + +#include <expat.h> + +#define XMLBUFLEN 4096 + +struct sa_xmlnode_s +{ + char *name; + char **atts; + sa_xmlnode *up; + sa_xmlnode *down; + sa_xmlnode *next; +}; + +struct sa_xmlparser_s +{ + fz_error *error; + sa_xmlnode *root; + sa_xmlnode *head; +}; + +static void onopentag(void *zp, const char *name, const char **atts) +{ + struct sa_xmlparser_s *sp = zp; + sa_xmlnode *node; + sa_xmlnode *tail; + int namelen; + int attslen; + int textlen; + char *p; + int i; + + if (sp->error) + return; + + /* count size to alloc */ + + namelen = strlen(name) + 1; + attslen = sizeof(char*); + textlen = 0; + for (i = 0; atts[i]; i++) + { + attslen += sizeof(char*); + textlen += strlen(atts[i]) + 1; + } + + node = fz_malloc(sizeof(sa_xmlnode) + attslen + namelen + textlen); + if (!node) + { + sp->error = fz_outofmem; + return; + } + + /* copy strings to new memory */ + + node->atts = (char**) (((char*)node) + sizeof(sa_xmlnode)); + node->name = ((char*)node) + sizeof(sa_xmlnode) + attslen; + p = ((char*)node) + sizeof(sa_xmlnode) + attslen + namelen; + + strcpy(node->name, name); + for (i = 0; atts[i]; i++) + { + node->atts[i] = p; + strcpy(node->atts[i], atts[i]); + p += strlen(p) + 1; + } + + node->atts[i] = 0; + + /* link node into tree */ + + node->up = sp->head; + node->down = nil; + node->next = nil; + + if (!sp->head) + { + sp->root = node; + sp->head = node; + return; + } + + if (!sp->head->down) + { + sp->head->down = node; + sp->head = node; + return; + } + + tail = sp->head->down; + while (tail->next) + tail = tail->next; + tail->next = node; + sp->head = node; +} + +static void onclosetag(void *zp, const char *name) +{ + struct sa_xmlparser_s *sp = zp; + + if (sp->error) + return; + + if (sp->head) + sp->head = sp->head->up; +} + +static inline int isxmlspace(int c) +{ + return c == ' ' || c == '\t' || c == '\r' || c == '\n'; +} + +static void ontext(void *zp, const char *buf, int len) +{ + struct sa_xmlparser_s *sp = zp; + int i; + + if (sp->error) + return; + + for (i = 0; i < len; i++) + { + if (!isxmlspace(buf[i])) + { + char *tmp = fz_malloc(len + 1); + const char *atts[] = {"", tmp, 0}; + if (!tmp) + { + sp->error = fz_outofmem; + return; + } + memcpy(tmp, buf, len); + tmp[len] = 0; + onopentag(zp, "", atts); + onclosetag(zp, ""); + fz_free(tmp); + return; + } + } +} + +fz_error * +sa_parsexml(sa_xmlnode **nodep, fz_file *file, int ns) +{ + fz_error *error = nil; + struct sa_xmlparser_s sp; + XML_Parser xp; + char *buf; + int len; + + sp.error = nil; + sp.root = nil; + sp.head = nil; + + if (ns) + xp = XML_ParserCreateNS(nil, ns); + else + xp = XML_ParserCreate(nil); + if (!xp) + return fz_outofmem; + + XML_SetUserData(xp, &sp); + XML_SetParamEntityParsing(xp, XML_PARAM_ENTITY_PARSING_NEVER); + + XML_SetStartElementHandler(xp, onopentag); + XML_SetEndElementHandler(xp, onclosetag); + XML_SetCharacterDataHandler(xp, ontext); + + while (1) + { + buf = XML_GetBuffer(xp, XMLBUFLEN); + + len = fz_read(file, buf, XMLBUFLEN); + if (len < 0) + { + error = fz_ferror(file); + goto cleanup; + } + + if (!XML_ParseBuffer(xp, len, len == 0)) + { + error = fz_throw("ioerror: xml: %s", + XML_ErrorString(XML_GetErrorCode(xp))); + goto cleanup; + } + + if (sp.error) + { + error = sp.error; + goto cleanup; + } + + if (len == 0) + break; + } + + *nodep = sp.root; + return nil; + +cleanup: + if (sp.root) + sa_dropxml(sp.root); + XML_ParserFree(xp); + return error; +} + +void +sa_dropxml(sa_xmlnode *node) +{ + sa_xmlnode *next; + while (node) + { + next = node->next; + if (node->down) + sa_dropxml(node->down); + fz_free(node); + node = next; + } +} + +static void indent(int n) +{ + while (n--) + printf(" "); +} + +void +sa_debugxml(sa_xmlnode *node, int level) +{ + int i; + + while (node) + { + indent(level); + + if (sa_isxmltext(node)) + printf("%s\n", sa_getxmltext(node)); + else + { + printf("<%s", node->name); + + for (i = 0; node->atts[i]; i += 2) + printf(" %s=\"%s\"", node->atts[i], node->atts[i+1]); + + if (node->down) + { + printf(">\n"); + sa_debugxml(node->down, level + 1); + indent(level); + printf("</%s>\n", node->name); + } + else + printf(" />\n"); + } + + node = node->next; + } +} + +sa_xmlnode * +sa_xmlup(sa_xmlnode *node) +{ + return node->up; +} + +sa_xmlnode * +sa_xmlnext(sa_xmlnode *node) +{ + return node->next; +} + +sa_xmlnode * +sa_xmldown(sa_xmlnode *node) +{ + return node->down; +} + +int +sa_isxmltext(sa_xmlnode *node) +{ + return node->name[0] == 0; +} + +int +sa_isxmltag(sa_xmlnode *node) +{ + return node->name[0] != 0; +} + +char * +sa_getxmlname(sa_xmlnode *node) +{ + if (sa_isxmltag(node)) + return node->name; + return nil; +} + +char * +sa_getxmlatt(sa_xmlnode *node, char *att) +{ + int i; + for (i = 0; node->atts[i]; i += 2) + if (!strcmp(node->atts[i], att)) + return node->atts[i + 1]; + return nil; +} + +char * +sa_getxmltext(sa_xmlnode *node) +{ + if (sa_isxmltext(node)) + return node->atts[1]; + return nil; +} + + |