diff options
Diffstat (limited to 'util/qdo')
-rwxr-xr-x | util/qdo | 205 |
1 files changed, 205 insertions, 0 deletions
diff --git a/util/qdo b/util/qdo new file mode 100755 index 000000000..ca9ffc416 --- /dev/null +++ b/util/qdo @@ -0,0 +1,205 @@ +#! /usr/bin/env python + +# Copyright (c) 2004-2005 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import sys +import os +import re +import time +import optparse + +import pexpect + +progname = os.path.basename(sys.argv[0]) + +usage = "%prog [options] command [command arguments]" +optparser = optparse.OptionParser(usage=usage) +optparser.allow_interspersed_args=False +optparser.add_option('-e', dest='stderr_file', + help='command stderr output file') +optparser.add_option('-o', dest='stdout_file', + help='command stdout output file') +optparser.add_option('-l', dest='save_log', action='store_true', + help='save qsub output log file') +optparser.add_option('-q', dest='qsub_timeout', type='int', + help='qsub queue wait timeout', default=30*60) +optparser.add_option('-t', dest='cmd_timeout', type='int', + help='command execution timeout', default=600*60) + +(options, cmd) = optparser.parse_args() + +if cmd == []: + print >>sys.stderr, "%s: missing command" % progname + sys.exit(1) + +cwd = os.getcwd() + +# Deal with systems where /n is a symlink to /.automount +if cwd.startswith('/.automount/'): + cwd = cwd.replace('/.automount/', '/n/', 1) + +if not cwd.startswith('/n/poolfs/'): + print >>sys.stderr, "Error: current directory must be under /n/poolfs." + sys.exit(1) + +# The Shell class wraps pexpect.spawn with some handy functions that +# assume the thing on the other end is a Bourne/bash shell. +class Shell(pexpect.spawn): + # Regexp to match the shell prompt. We change the prompt to + # something fixed and distinctive to make it easier to match + # reliably. + prompt_re = re.compile('qdo\$ ') + + def __init__(self, cmd): + # initialize base pexpect.spawn object + try: + pexpect.spawn.__init__(self, cmd) + except pexpect.ExceptionPexpect, exc: + print "%s:" % progname, exc + sys.exit(1) + # full_output accumulates the full output of the session + self.full_output = "" + self.quick_timeout = 15 + # wait for a prompt, then change it + try: + self.expect('\$ ', options.qsub_timeout) + except pexpect.TIMEOUT: + print >>sys.stderr, "%s: qsub timed out." % progname + self.kill(15) + self.close(wait=True) + sys.exit(1) + self.do_command('PS1="qdo$ "') + + # version of expect that updates full_output too + def expect(self, regexp, timeout = -1): + pexpect.spawn.expect(self, regexp, timeout) + self.full_output += self.before + self.after + + # Just issue a command and wait for the next prompt. + # Returns a string containing the output of the command. + def do_bare_command(self, cmd, timeout = -1): + global full_output + self.sendline(cmd) + # read back the echo of the command + self.readline() + # wait for the next prompt + self.expect(self.prompt_re, timeout) + output = self.before.rstrip() + return output + + # Issue a command, then query its exit status. + # Returns a (string, int) tuple with the command output and the status. + def do_command(self, cmd, timeout = -1): + # do the command itself + output = self.do_bare_command(cmd, timeout) + # collect status + status = int(self.do_bare_command("echo $?", self.quick_timeout)) + return (output, status) + + # Check to see if the given directory exists. + def dir_exists(self, dirname): + (output, status) = shell.do_command('[ -d %s ]' % dirname, + self.quick_timeout) + return status == 0 + + +# Spawn the interactive pool job. + +# Hack to do link on poolfs... disabled for now since +# compiler/linker/library versioning problems between poolfs and +# nodes. May never work since poolfs is x86-64 and nodes are 32-bit. +if False and len(cmd) > 50: + shell_cmd = 'ssh -t poolfs /bin/sh -l' + print "%s: running %s on poolfs" % (progname, cmd[0]) +else: + shell_cmd = 'qsub -I -S /bin/sh' + +shell = Shell(shell_cmd) + +try: + # chdir to cwd + (output, status) = shell.do_command('cd ' + cwd) + + if status != 0: + raise OSError, "Can't chdir to %s" % cwd + + # wacky hack: sometimes scons will create an output directory then + # fork a job to generate files in that directory, and the job will + # get run before the directory creation propagates through NFS. + # This hack looks for a '-o' option indicating an output file and + # waits for the corresponding directory to appear if necessary. + try: + if 'cc' in cmd[0] or 'g++' in cmd[0]: + output_dir = os.path.dirname(cmd[cmd.index('-o')+1]) + elif 'm5' in cmd[0]: + output_dir = cmd[cmd.index('-d')+1] + else: + output_dir = None + except (ValueError, IndexError): + # no big deal if there's no '-o'/'-d' or if it's the final argument + output_dir = None + + if output_dir: + secs_waited = 0 + while not shell.dir_exists(output_dir) and secs_waited < 45: + time.sleep(5) + secs_waited += 5 + if secs_waited > 10: + print "waited", secs_waited, "seconds for", output_dir + + # run command + if options.stdout_file: + cmd += ['>', options.stdout_file] + if options.stderr_file: + cmd += ['2>', options.stderr_file] + try: + (output, status) = shell.do_command(' '.join(cmd), options.cmd_timeout) + except pexpect.TIMEOUT: + print >>sys.stderr, "%s: command timed out after %d seconds." \ + % (progname, options.cmd_timeout) + shell.sendline('~.') # qsub/ssh termination escape sequence + shell.close(wait=True) + status = 3 + if output: + print output + +finally: + # end job + if shell.isalive(): + shell.sendline('exit') + shell.expect('qsub: job .* completed\r\n') + shell.close(wait=True) + + # if there was an error, log the output even if not requested + if status != 0 or options.save_log: + log = file('qdo-log.' + str(os.getpid()), 'w') + log.write(shell.full_output) + log.close() + +del shell + +sys.exit(status) |