diff options
Diffstat (limited to 'util')
-rwxr-xr-x | util/pbs/job.py | 183 | ||||
-rw-r--r-- | util/pbs/jobfile.py | 83 | ||||
-rwxr-xr-x | util/pbs/pbs.py | 176 | ||||
-rwxr-xr-x | util/pbs/send.py | 190 |
4 files changed, 632 insertions, 0 deletions
diff --git a/util/pbs/job.py b/util/pbs/job.py new file mode 100755 index 000000000..5eed0cd75 --- /dev/null +++ b/util/pbs/job.py @@ -0,0 +1,183 @@ +#!/usr/bin/env python +# Copyright (c) 2005 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Nathan Binkert +# Steve Reinhardt +# Ali Saidi + +import os, os.path, shutil, signal, socket, sys, time +from os import environ as env +from os.path import join as joinpath, expanduser + +class rsync: + def __init__(self): + self.sudo = False + self.rsync = 'rsync' + self.compress = False + self.archive = True + self.delete = False + self.options = '' + + def do(self, src, dst): + args = [] + if self.sudo: + args.append('sudo') + + args.append(self.rsync) + if (self.archive): + args.append('-a') + if (self.compress): + args.append('-z') + if (self.delete): + args.append('--delete') + if len(self.options): + args.append(self.options) + args.append(src) + args.append(dst) + + return os.spawnvp(os.P_WAIT, args[0], args) + +def cleandir(dir): + for root, dirs, files in os.walk(dir, False): + for name in files: + os.remove(joinpath(root, name)) + for name in dirs: + os.rmdir(joinpath(root, name)) + +def date(): + return time.strftime('%a %b %e %H:%M:%S %Z %Y', time.localtime()) + +def remfile(file): + if os.path.isfile(file): + os.unlink(file) + +def readval(filename): + file = open(filename, 'r') + value = file.readline().strip() + file.close() + return value + +if __name__ == '__main__': + rootdir = env.setdefault('ROOTDIR', os.getcwd()) + jobid = env['PBS_JOBID'] + jobname = env['PBS_JOBNAME'] + jobdir = joinpath(rootdir, jobname) + basedir = joinpath(rootdir, 'Base') + user = env['USER'] + + env['POOLJOB'] = 'True' + env['OUTPUT_DIR'] = jobdir + env['JOBFILE'] = joinpath(basedir, 'test.py') + env['JOBNAME'] = jobname + + def echofile(filename, string): + try: + f = file(joinpath(jobdir, filename), 'w') + print >>f, string + f.flush() + f.close() + except IOError,e: + sys.exit(e) + + if os.path.isdir("/work"): + workbase = "/work" + else: + workbase = "/tmp/" + + workdir = joinpath(workbase, '%s.%s' % (user, jobid)) + + os.umask(0022) + + echofile('.start', date()) + echofile('.jobid', jobid) + echofile('.host', socket.gethostname()) + + if os.path.isdir(workdir): + cleandir(workdir) + else: + os.mkdir(workdir) + + if os.path.isdir('/z/dist'): + sync = rsync() + sync.delete = True + sync.sudo = True + sync.do('poolfs::dist/m5/', '/z/dist/m5/') + + try: + os.chdir(workdir) + except OSError,e: + sys.exit(e) + + os.symlink(joinpath(jobdir, 'output'), 'status.out') + + args = [ joinpath(basedir, 'm5'), joinpath(basedir, 'run.mpy') ] + if not len(args): + sys.exit("no arguments") + + print 'starting job... %s' % date() + print ' '.join(args) + print + sys.stdout.flush() + + childpid = os.fork() + if not childpid: + # Execute command + sys.stdin.close() + fd = os.open(joinpath(jobdir, "output"), + os.O_WRONLY | os.O_CREAT | os.O_TRUNC) + os.dup2(fd, sys.stdout.fileno()) + os.dup2(fd, sys.stderr.fileno()) + os.execvp(args[0], args) + + def handler(signum, frame): + if childpid != 0: + os.kill(childpid, signum) + + signal.signal(signal.SIGHUP, handler) + signal.signal(signal.SIGINT, handler) + signal.signal(signal.SIGQUIT, handler) + signal.signal(signal.SIGTERM, handler) + signal.signal(signal.SIGSTOP, handler) + signal.signal(signal.SIGCONT, handler) + signal.signal(signal.SIGUSR1, handler) + signal.signal(signal.SIGUSR2, handler) + + done = 0 + while not done: + try: + thepid,ec = os.waitpid(childpid, 0) + if ec: + print 'Exit code ', ec + echofile('.failure', date()) + else: + echofile('.success', date()) + done = 1 + except OSError: + pass + + print '\njob complete... %s' % date() + echofile('.stop', date()) diff --git a/util/pbs/jobfile.py b/util/pbs/jobfile.py new file mode 100644 index 000000000..570faa61b --- /dev/null +++ b/util/pbs/jobfile.py @@ -0,0 +1,83 @@ +# Copyright (c) 2005 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Nathan Binkert + +from os.path import expanduser +def crossproduct(options): + number = len(options) + indexes = [ 0 ] * number + maxes = [ len(opt) for opt in options ] + def next(): + for i in xrange(number - 1, -1, -1): + indexes[i] += 1 + if indexes[i] < maxes[i]: + return False + + indexes[i] = 0 + return True + + done = False + while not done: + result = [] + for i in xrange(number): + result.append(options[i][indexes[i]]) + yield result + done = next() + +class JobFile(object): + def __init__(self, file): + self.data = {} + execfile(expanduser(file), self.data) + self.options = self.data['options'] + self.environment = self.data['environment'] + self.jobinfo = {} + self.jobs = [] + for job in crossproduct(self.options): + jobname = '.'.join([ id[0] for id in job ]) + self.jobs.append(jobname) + list = [] + for info in job: + for item in info[1:]: + list.append(item) + self.jobinfo[jobname] = list + + def env(self, jobname): + env = {} + for key,val in self.jobinfo[jobname]: + env[key] = val + + for key,val in self.environment: + env[key] = val + return env + + def printinfo(self, jobname): + print '%s:' % jobname + for key,val in self.jobinfo[jobname]: + print ' %s = %s' % (key, val) + + for key,val in self.environment: + print ' %s = %s' % (key, val) diff --git a/util/pbs/pbs.py b/util/pbs/pbs.py new file mode 100755 index 000000000..ecacbeba2 --- /dev/null +++ b/util/pbs/pbs.py @@ -0,0 +1,176 @@ +# Copyright (c) 2005 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Nathan Binkert + +import os, popen2, re, sys + +class MyPOpen(object): + def __init__(self, cmd, input = None, output = None, bufsize = -1): + self.status = -1 + + if input is None: + p2c_read, p2c_write = os.pipe() + self.tochild = os.fdopen(p2c_write, 'w', bufsize) + else: + p2c_write = None + if isinstance(input, file): + p2c_read = input.fileno() + elif isinstance(input, str): + input = file(input, 'r') + p2c_read = input.fileno() + elif isinstance(input, int): + p2c_read = input + else: + raise AttributeError + + if output is None: + c2p_read, c2p_write = os.pipe() + self.fromchild = os.fdopen(c2p_read, 'r', bufsize) + else: + c2p_read = None + if isinstance(output, file): + c2p_write = output.fileno() + elif isinstance(output, str): + output = file(output, 'w') + c2p_write = output.fileno() + elif isinstance(output, int): + c2p_write = output + else: + raise AttributeError + + self.pid = os.fork() + if self.pid == 0: + os.dup2(p2c_read, sys.stdin.fileno()) + os.dup2(c2p_write, sys.stdout.fileno()) + os.dup2(c2p_write, sys.stderr.fileno()) + try: + os.execvp(cmd[0], cmd) + finally: + os._exit(1) + + os.close(p2c_read) + os.close(c2p_write) + + def poll(self): + if self.status < 0: + pid, status = os.waitpid(self.pid, os.WNOHANG) + if pid == self.pid: + self.status = status + return self.status + + def wait(self): + if self.status < 0: + pid, status = os.waitpid(self.pid, 0) + if pid == self.pid: + self.status = status + return self.status + +class qsub: + def __init__(self): + self.hold = False + self.join = False + self.keep_stdout = False + self.keep_stderr = False + self.node_type = '' + self.mail_abort = False + self.mail_begin = False + self.mail_end = False + self.name = '' + self.stdout = '' + self.priority = 0 + self.queue = '' + self.pbshost = '' + self.qsub = 'qsub' + self.env = {} + + def build(self, script, args = []): + self.cmd = [ self.qsub ] + + if self.env: + arg = '-v' + arg += ','.join([ '%s=%s' % i for i in self.env.iteritems() ]) + self.cmd.append(arg) + + if self.hold: + self.cmd.append('-h') + + if len(self.stdout): + self.cmd.append('-olocalhost:' + self.stdout) + + if self.keep_stdout and self.keep_stderr: + self.cmd.append('-koe') + elif self.keep_stdout: + self.cmd.append('-ko') + elif self.keep_stderr: + self.cmd.append('-ke') + else: + self.cmd.append('-kn') + + if self.join: + self.cmd.append('-joe') + + if len(self.node_type): + self.cmd.append('-lnodes=' + self.node_type) + + if self.mail_abort or self.mail_begin or self.mail_end: + flags = '' + if self.mail_abort: + flags.append('a') + if self.mail_begin: + flags.append('b') + if self.mail_end: + flags.append('e') + if len(flags): + self.cmd.append('-m ' + flags) + + if len(self.name): + self.cmd.append("-N%s" % self.name) + + if self.priority != 0: + self.cmd.append('-p' + self.priority) + + if len(self.queue): + self.cmd.append('-q' + self.queue) + + self.cmd.extend(args) + self.script = script + self.command = ' '.join(self.cmd + [ self.script ]) + + def do(self): + pbs = MyPOpen(self.cmd + [ self.script ]) + self.result = pbs.fromchild.read() + ec = pbs.wait() + + if ec != 0 and self.pbshost: + cmd = ' '.join(self.cmd + [ '-' ]) + cmd = [ 'ssh', '-x', self.pbshost, cmd ] + self.command = ' '.join(cmd) + ssh = MyPOpen(cmd, input = self.script) + self.result = ssh.fromchild.read() + ec = ssh.wait() + + return ec diff --git a/util/pbs/send.py b/util/pbs/send.py new file mode 100755 index 000000000..4daf15b45 --- /dev/null +++ b/util/pbs/send.py @@ -0,0 +1,190 @@ +#!/usr/bin/env python +# Copyright (c) 2005 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Ali Saidi +# Nathan Binkert + +import os, os.path, re, socket, sys +from os import environ as env, listdir +from os.path import basename, isdir, isfile, islink, join as joinpath +from filecmp import cmp as filecmp +from shutil import copyfile + +def nfspath(dir): + if dir.startswith('/.automount/'): + dir = '/n/%s' % dir[12:] + elif not dir.startswith('/n/'): + dir = '/n/%s%s' % (socket.gethostname().split('.')[0], dir) + return dir + +progpath = nfspath(sys.path[0]) +progname = basename(sys.argv[0]) +usage = """\ +Usage: + %(progname)s [-c] [-e] [-f] [-q queue] [-v] <regexp> + -c clean directory if job can be run + -e only echo pbs command info, don't actually send the job + -f force the job to run regardless of state + -q <queue> submit job to the named queue + -v be verbose + + %(progname)s -l [-v] <regexp> + -l list job names, don't submit + -v be verbose (list job parameters) + + %(progname)s -h + -h display this help +""" % locals() + +try: + import getopt + opts, args = getopt.getopt(sys.argv[1:], '-cd:efhlq:v') +except getopt.GetoptError: + sys.exit(usage) + +clean = False +onlyecho = False +exprs = [] +force = False +listonly = False +queue = '' +verbose = False +rootdir = nfspath(os.getcwd()) +for opt,arg in opts: + if opt == '-c': + clean = True + if opt == '-d': + rootdir = arg + if opt == '-e': + onlyecho = True + if opt == '-f': + force = True + if opt == '-h': + print usage + sys.exit(0) + if opt == '-l': + listonly = True + if opt == '-q': + queue = arg + if opt == '-v': + verbose = True + +basedir = joinpath(rootdir, 'Base') +linkdir = joinpath(rootdir, 'Link') + +for arg in args: + exprs.append(re.compile(arg)) + +if not listonly and not onlyecho and isdir(linkdir): + if verbose: + print 'Checking for outdated files in Link directory' + entries = listdir(linkdir) + for entry in entries: + link = joinpath(linkdir, entry) + if not islink(link) or not isfile(link): + continue + + base = joinpath(basedir, entry) + if not isfile(base) or not filecmp(link, base): + print 'Base/%s is different than Link/%s: copying' % (entry, entry) + copyfile(link, base) + +import job, jobfile, pbs + +test = jobfile.JobFile(joinpath(basedir, 'test.py')) + +joblist = [] +for jobname in test.jobs: + if not exprs: + joblist.append(jobname) + continue + + for expr in exprs: + if expr.match(jobname): + joblist.append(jobname) + break + +if listonly: + if verbose: + for jobname in joblist: + test.printinfo(jobname) + else: + for jobname in joblist: + print jobname + sys.exit(0) + +if not onlyecho: + jl = [] + for jobname in joblist: + jobdir = joinpath(rootdir, jobname) + if os.path.exists(jobname): + if not force: + if os.path.isfile(joinpath(jobdir, '.success')): + continue + + if os.path.isfile(joinpath(jobdir, '.start')) and \ + not os.path.isfile(joinpath(jobdir, '.stop')): + continue + + if not clean: + sys.exit('job directory not clean!') + + job.cleandir(jobdir) + else: + os.mkdir(jobdir) + jl.append(jobname) + joblist = jl + +for jobname in joblist: + jobdir = joinpath(rootdir, jobname) + + if not onlyecho and not os.path.isdir(jobdir): + sys.exit('%s is not a directory. Cannot build job' % jobdir) + + print 'Job name: %s' % jobname + print 'Job directory: %s' % jobdir + + qsub = pbs.qsub() + qsub.pbshost = 'simpool.eecs.umich.edu' + qsub.stdout = joinpath(jobdir, 'jobout') + qsub.name = jobname + qsub.join = True + qsub.node_type = 'FAST' + qsub.env['ROOTDIR'] = rootdir + if len(queue): + qsub.queue = queue + qsub.build(joinpath(progpath, 'job.py')) + + if verbose: + print 'PBS Command: %s' % qsub.command + + if not onlyecho: + ec = qsub.do() + if ec == 0: + print 'PBS Jobid: %s' % qsub.result + else: + print 'PBS Failed' |