add a script for easy regression testing (runs pdftool draw -m on a list of PDFs accessible via http)

author: Krzysztof Kowalczyk <kkowalczyk@gmail.com> 2008-03-28 05:29:19 +0100
committer: Krzysztof Kowalczyk <kkowalczyk@gmail.com> 2008-03-28 05:29:19 +0100
commit: bec4b91f24751be3225b57673c1736a353359a6e (patch)
tree: f19733d8a80b1e85f496315babf9ca3032308055 /test/benchpdfs.py
parent: e0aa0b157db017d14be14f51d917c8ee5749dd75 (diff)
download: mupdf-bec4b91f24751be3225b57673c1736a353359a6e.tar.xz
1 files changed, 151 insertions, 0 deletions
diff --git a/test/benchpdfs.py b/test/benchpdfs.py
new file mode 100755
index 00000000..70ee5f94
--- /dev/null
+++ b/test/benchpdfs.py
@@ -0,0 +1,151 @@
+#!/usr/bin/python
+import sys, os, os.path, urllib2, gzip, bz2, traceback
+
+# Written by Krzysztof Kowalczyk (http://blog.kowalczyk.info)
+# This code is in public domain.
+#
+# A regression testing script
+#
+# Given a list of urls to PDF files, it downloads them and runs
+# pdftool draw -m $file-name
+# on each file. This allows catching crashes e.g. on linux:
+# python test/benchpdfs.py | grep Segmentation
+# will produce an output if pdftool crashed on any of the pdfs
+#
+# Regression PDFs can be put anywhere. They can be gzipp'ed or bzip2'ed
+# to save the bandwidth (in which case url must end in .gz or .bz2)
+# 
+# The script doesn't redownload the file if it has been downloaded before.
+#
+# Missing files are ignored
+
+pdfs_to_test = [
+    "http://darcs.kowalczyk.info/testpdfs/293bcd6b00e006d66fdc62ea436508f3ebb30219.pdf.gz"
+]
+
+local_pdfs_dir = os.path.expanduser("~/testpdfs")
+
+def dir_exists(path):
+    if os.path.exists(path):
+        return os.path.isdir(path)
+    return False
+
+def file_exists(path):
+    if os.path.exists(path):
+        return os.path.isfile(path)
+    return False
+
+# make a directory if doesn't exist yet. 
+def make_dir(path):
+    if not dir_exists(path): os.makedirs(path)
+
+def write_to_file(path, data):
+    fo = open(path, "wb")
+    fo.write(data)
+    fo.close()
+
+# Does HTTP GET or POST (if data != None). Returns body of the response or 
+# None if there was an error
+# If username/pwd are provided, we assume it's for basic authentication
+def do_http(url, data = None, dump_exception=False):
+    body = None
+    try:
+        req = urllib2.Request(url, data)
+        resp = urllib2.urlopen(req)
+        body = resp.read()
+    except:
+        if dump_exception:
+            print "do_http failed",url
+            print '-'*60
+            traceback.print_exc(file=sys.stdout)
+            print '-'*60
+    return body
+
+# Tries to find root of the repository. Starts and pwd and goes up
+# until can't go anymore or finds "mupdf" directory
+def find_repo_root():
+    curdir = os.getcwd()
+    prevdir = None
+    while curdir != prevdir:
+        if dir_exists(os.path.join(curdir, "mupdf")):
+            return curdir
+        prevdir = curdir
+        curdir = os.path.dirname(curdir)
+    return None
+
+def find_pdftool():
+    root = find_repo_root()
+    if root == None:
+        print "Didn't find the root directory"
+        print "Current directory: '%s'" % os.getcwd()
+        sys.exit(1)
+    print root
+    # check build results for Jam and Makefile
+    for f in [os.path.join("obj-rel", "pdftool"), os.path.join("obj-dbg", "pdftool"), 
+              os.path.join("build", "relase", "pdftool"), os.path.join("build", "debug", "pdftool")]:
+        path = os.path.join(root, f)
+        if file_exists(path):
+            return path
+    print "Didn't find pdftool. Did you build it?"
+    print "Root dir: '%s'" % root
+    sys.exit(1)
+
+def is_gzipped(filename): return filename.endswith(".gz")
+def is_bzip2ed(filename): return filename.endswith(".bz2")
+
+def uncompress_if_needed(filepath):
+    if is_gzipped(filepath):
+        finalpath = filepath[:-len(".gz")]
+        print "Uncompressing '%s' to '%s'" % (filepath, finalpath)
+        fin = gzip.open(filepath, "rb")
+        fout = open(finalpath, "wb")
+        data = fin.read()
+        fout.write(data)
+        fin.close()
+        fout.close()
+        os.remove(filepath)
+    elif is_bzip2ed(filepath):
+        finalpath = filepath[:-len(".bz2")]
+        print "Uncompressing '%s' to '%s'" % (filepath, finalpath)
+        fin = bz2.BZ2File(filepath, "r")
+        fout = open(finalpath, "wb")
+        data = fin.read()
+        fout.write(data)
+        fin.close()
+        fout.close()
+        os.remove(filepath)
+
+def pdfname_from_url(url): return url.split("/")[-1]
+
+def final_pdfname_from_url(url):
+    potentially_compressed = pdfname_from_url(url)
+    for suffix in [".gz", ".bz2"]:
+        if potentially_compressed.endswith(suffix):
+            return potentially_compressed[:-len(suffix)]
+    return potentially_compressed
+
+def main():
+    print "Starting the test"
+    pdftool = find_pdftool() # make sure to abort early if pdftool doesn't exist
+    #print "pdftool: '%s'" % pdftool
+    make_dir(local_pdfs_dir)
+    for pdfurl in pdfs_to_test:
+        pdfname = pdfname_from_url(pdfurl)
+        local_pdf_path = os.path.join(local_pdfs_dir, pdfname)
+        final_pdfname = final_pdfname_from_url(pdfurl)
+        local_final_pdf_path = os.path.join(local_pdfs_dir, final_pdfname)
+        # Download the file if not already downloaded
+        if not os.path.exists(local_final_pdf_path):
+            print "Downloading pdf file '%s' as '%s'" % (pdfurl, local_pdf_path)
+            pdf_file_data = do_http(pdfurl)
+            if None == pdf_file_data:
+                print "Failed to download '%s'" % pdfurl
+                continue # don't stop the test just because of that
+            write_to_file(local_pdf_path, pdf_file_data)
+            uncompress_if_needed(local_pdf_path)
+        cmd = pdftool + " draw -m " + local_final_pdf_path
+        print "Running '%s'" % cmd
+        os.system(cmd)
+
+if __name__ == "__main__":
+    main()
author	Krzysztof Kowalczyk <kkowalczyk@gmail.com>	2008-03-28 05:29:19 +0100
committer	Krzysztof Kowalczyk <kkowalczyk@gmail.com>	2008-03-28 05:29:19 +0100
commit	bec4b91f24751be3225b57673c1736a353359a6e (patch)
tree	f19733d8a80b1e85f496315babf9ca3032308055 /test/benchpdfs.py
parent	e0aa0b157db017d14be14f51d917c8ee5749dd75 (diff)
download	mupdf-bec4b91f24751be3225b57673c1736a353359a6e.tar.xz