#! /usr/bin/env python2 # Copyright (c) 2004-2005, 2007 The Regents of The University of Michigan # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer; # redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution; # neither the name of the copyright holders nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # Authors: Steve Reinhardt # Ali Saidi # Important! # This script expects a simple $ prompt, if you are using a shell other than # sh which defaults to this you'll need to add something like the following # to your bashrc/bash_profile script: #if [ "$OAR_USER" = "xxxx" ]; then # PS1='$ ' import sys import os import re import time import optparse import pexpect progname = os.path.basename(sys.argv[0]) usage = "%prog [options] command [command arguments]" optparser = optparse.OptionParser(usage=usage) optparser.allow_interspersed_args=False optparser.add_option('-e', dest='stderr_file', help='command stderr output file') optparser.add_option('-o', dest='stdout_file', help='command stdout output file') optparser.add_option('-l', dest='save_log', action='store_true', help='save oarsub output log file') optparser.add_option('-N', dest='job_name', help='oarsub job name') optparser.add_option('-q', dest='dest_queue', help='oarsub destination queue') optparser.add_option('--qwait', dest='oarsub_timeout', type='int', help='oarsub queue wait timeout', default=30*60) optparser.add_option('-t', dest='cmd_timeout', type='int', help='command execution timeout', default=600*60) (options, cmd) = optparser.parse_args() if cmd == []: print >>sys.stderr, "%s: missing command" % progname sys.exit(1) # If we want to do this, need to add check here to make sure cmd[0] is # a valid PBS job name, else oarsub will die on us. # #if not options.job_name: # options.job_name = cmd[0] cwd = os.getcwd() # Deal with systems where /n is a symlink to /.automount if cwd.startswith('/.automount/'): cwd = cwd.replace('/.automount/', '/n/', 1) if not cwd.startswith('/n/poolfs/'): print >>sys.stderr, "Error: current directory must be under /n/poolfs." sys.exit(1) # The Shell class wraps pexpect.spawn with some handy functions that # assume the thing on the other end is a Bourne/bash shell. class Shell(pexpect.spawn): # Regexp to match the shell prompt. We change the prompt to # something fixed and distinctive to make it easier to match # reliably. prompt_re = re.compile('qdo\$ ') def __init__(self, cmd): # initialize base pexpect.spawn object try: pexpect.spawn.__init__(self, cmd) except pexpect.ExceptionPexpect, exc: print "%s:" % progname, exc sys.exit(1) # full_output accumulates the full output of the session self.full_output = "" self.quick_timeout = 15 # wait for a prompt, then change it try: self.expect('\$ ', options.oarsub_timeout) except pexpect.TIMEOUT: print >>sys.stderr, "%s: oarsub timed out." % progname self.kill(9) self.safe_close() sys.exit(1) self.do_command('unset PROMPT_COMMAND; PS1="qdo$ "') # version of expect that updates full_output too def expect(self, regexp, timeout = -1): pexpect.spawn.expect(self, regexp, timeout) self.full_output += self.before + self.after # Just issue a command and wait for the next prompt. # Returns a string containing the output of the command. def do_bare_command(self, cmd, timeout = -1): global full_output self.sendline(cmd) # read back the echo of the command self.readline() # wait for the next prompt self.expect(self.prompt_re, timeout) output = self.before.rstrip() return output # Issue a command, then query its exit status. # Returns a (string, int) tuple with the command output and the status. def do_command(self, cmd, timeout = -1): # do the command itself output = self.do_bare_command(cmd, timeout) # collect status status = int(self.do_bare_command("echo $?", self.quick_timeout)) return (output, status) # Check to see if the given directory exists. def dir_exists(self, dirname): (output, status) = shell.do_command('[ -d %s ]' % dirname, self.quick_timeout) return status == 0 # Don't actually try to close it.. just wait until it closes by itself # We can't actually kill the pid which is what it's trying to do, and if # we call wait we could be in an unfortunate situation of it printing input # right as we call wait, so the input is never read and the process never ends def safe_close(self): count = 0 while self.isalive() and count < 10: time.sleep(1) self.close(force=False) # Spawn the interactive pool job. # Hack to do link on poolfs... disabled for now since # compiler/linker/library versioning problems between poolfs and # nodes. May never work since poolfs is x86-64 and nodes are 32-bit. if False and len(cmd) > 50: shell_cmd = 'ssh -t poolfs /bin/sh -l' print "%s: running %s on poolfs" % (progname, cmd[0]) else: shell_cmd = 'oarsub -I' if options.job_name: shell_cmd += ' -n "%s"' % options.job_name if options.dest_queue: shell_cmd += ' -q ' + options.dest_queue shell_cmd += ' -d %s' % cwd shell = Shell(shell_cmd) try: # chdir to cwd (output, status) = shell.do_command('cd ' + cwd) if status != 0: raise OSError, "Can't chdir to %s" % cwd # wacky hack: sometimes scons will create an output directory then # fork a job to generate files in that directory, and the job will # get run before the directory creation propagates through NFS. # This hack looks for a '-o' option indicating an output file and # waits for the corresponding directory to appear if necessary. try: if 'cc' in cmd[0] or 'g++' in cmd[0]: output_dir = os.path.dirname(cmd[cmd.index('-o')+1]) elif 'm5' in cmd[0]: output_dir = cmd[cmd.index('-d')+1] else: output_dir = None except (ValueError, IndexError): # no big deal if there's no '-o'/'-d' or if it's the final argument output_dir = None if output_dir: secs_waited = 0 while not shell.dir_exists(output_dir) and secs_waited < 90: time.sleep(5) secs_waited += 5 if secs_waited > 30: print "waited", secs_waited, "seconds for", output_dir # run command if options.stdout_file: cmd += ['>', options.stdout_file] if options.stderr_file: cmd += ['2>', options.stderr_file] try: (output, status) = shell.do_command(' '.join(cmd), options.cmd_timeout) except pexpect.TIMEOUT: print >>sys.stderr, "%s: command timed out after %d seconds." \ % (progname, options.cmd_timeout) shell.sendline('~.') # oarsub/ssh termination escape sequence shell.safe_close() status = 3 if output: print output finally: # end job if shell.isalive(): shell.sendline('exit') shell.expect('Disconnected from OAR job .*') shell.safe_close() # if there was an error, log the output even if not requested if status != 0 or options.save_log: log = file('qdo-log.' + str(os.getpid()), 'w') log.write(shell.full_output) log.close() del shell sys.exit(status)