diff options
Diffstat (limited to 'util/pbs/job.py')
-rwxr-xr-x | util/pbs/job.py | 139 |
1 files changed, 101 insertions, 38 deletions
diff --git a/util/pbs/job.py b/util/pbs/job.py index f370862de..e2636c111 100755 --- a/util/pbs/job.py +++ b/util/pbs/job.py @@ -29,10 +29,21 @@ # Steve Reinhardt # Ali Saidi -import os, os.path, shutil, signal, socket, sys, time +import os, os.path, shutil, signal, socket, sys from os import environ as env from os.path import join as joinpath, expanduser +def date(): + import time + return time.strftime('%a %b %e %H:%M:%S %Z %Y', time.localtime()) + +def cleandir(dir): + for root, dirs, files in os.walk(dir, False): + for name in files: + os.remove(joinpath(root, name)) + for name in dirs: + os.rmdir(joinpath(root, name)) + class rsync: def __init__(self): self.sudo = False @@ -61,25 +72,76 @@ class rsync: return os.spawnvp(os.P_WAIT, args[0], args) -def cleandir(dir): - for root, dirs, files in os.walk(dir, False): - for name in files: - os.remove(joinpath(root, name)) - for name in dirs: - os.rmdir(joinpath(root, name)) +class JobDir(object): + def __init__(self, dir): + self.dir = dir -def date(): - return time.strftime('%a %b %e %H:%M:%S %Z %Y', time.localtime()) + def file(self, filename): + return joinpath(self.dir, filename) + + def create(self): + if os.path.exists(self.dir): + if not os.path.isdir(self.dir): + sys.exit('%s is not a directory. Cannot build job' % self.dir) + else: + os.mkdir(self.dir) + + def exists(self): + return os.path.isdir(self.dir) + + def clean(self): + cleandir(self.dir) + + def hasfile(self, filename): + return os.path.isfile(self.file(filename)) + + def echofile(self, filename, string): + filename = self.file(filename) + try: + f = file(filename, 'w') + print >>f, string + f.flush() + f.close() + except IOError,e: + sys.exit(e) -def remfile(file): - if os.path.isfile(file): - os.unlink(file) + def rmfile(self, filename): + filename = self.file(filename) + if os.path.isfile(filename): + os.unlink(filename) -def readval(filename): - file = open(filename, 'r') - value = file.readline().strip() - file.close() - return value + def readval(self, filename): + filename = self.file(filename) + f = file(filename, 'r') + value = f.readline().strip() + f.close() + return value + + def setstatus(self, string): + filename = self.file('.status') + try: + f = file(filename, 'a') + print >>f, string + f.flush() + f.close() + except IOError,e: + sys.exit(e) + + def getstatus(self): + filename = self.file('.status') + try: + f = file(filename, 'r') + except IOError, e: + return 'none' + + # fast forward to the end + for line in f: pass + + # the first word on the last line is the status + return line.split(' ')[0] + + def __str__(self): + return self.dir if __name__ == '__main__': rootdir = env.setdefault('ROOTDIR', os.getcwd()) @@ -97,29 +159,27 @@ if __name__ == '__main__': workbase = "/tmp/" workdir = joinpath(workbase, '%s.%s' % (env['USER'], pbs_jobid)) - - def echofile(filename, string): - try: - f = file(joinpath(outdir, filename), 'w') - print >>f, string - f.flush() - f.close() - except IOError,e: - sys.exit(e) + host = socket.gethostname() os.umask(0022) - echofile('.start', date()) - echofile('.pbs_jobid', pbs_jobid) - echofile('.pbs_jobname', pbs_jobid) - echofile('.host', socket.gethostname()) + jobdir = JobDir(outdir) + + started = date() + jobdir.echofile('.running', started) + jobdir.rmfile('.queued') + jobdir.echofile('.pbs_jobid', pbs_jobid) + jobdir.echofile('.pbs_jobname', pbs_jobid) + jobdir.echofile('.host', host) + + jobdir.setstatus('running on %s on %s' % (host, started)) if os.path.isdir(workdir): cleandir(workdir) else: os.mkdir(workdir) - if os.path.isdir('/z/dist'): + if False and os.path.isdir('/z/dist'): sync = rsync() sync.delete = True sync.sudo = True @@ -130,13 +190,13 @@ if __name__ == '__main__': except OSError,e: sys.exit(e) - os.symlink(joinpath(outdir, 'output'), 'status.out') + os.symlink(jobdir.file('output'), 'status.out') args = [ joinpath(basedir, 'm5'), joinpath(basedir, 'run.py') ] if not len(args): sys.exit("no arguments") - print 'starting job... %s' % date() + print 'starting job... %s' % started print ' '.join(args) print sys.stdout.flush() @@ -145,7 +205,7 @@ if __name__ == '__main__': if not childpid: # Execute command sys.stdin.close() - fd = os.open(joinpath(outdir, "output"), + fd = os.open(jobdir.file("output"), os.O_WRONLY | os.O_CREAT | os.O_TRUNC) os.dup2(fd, sys.stdout.fileno()) os.dup2(fd, sys.stderr.fileno()) @@ -170,12 +230,15 @@ if __name__ == '__main__': thepid,ec = os.waitpid(childpid, 0) if ec: print 'Exit code ', ec - echofile('.failure', date()) + status = 'failure' else: - echofile('.success', date()) + status = 'success' done = 1 except OSError: pass - print '\njob complete... %s' % date() - echofile('.stop', date()) + complete = date() + print '\njob complete... %s' % complete + jobdir.echofile('.%s' % status, complete) + jobdir.rmfile('.running') + jobdir.setstatus('%s on %s' % (status, complete)) |