summaryrefslogtreecommitdiff
path: root/util/pbs/job.py
diff options
context:
space:
mode:
Diffstat (limited to 'util/pbs/job.py')
-rwxr-xr-xutil/pbs/job.py139
1 files changed, 101 insertions, 38 deletions
diff --git a/util/pbs/job.py b/util/pbs/job.py
index f370862de..e2636c111 100755
--- a/util/pbs/job.py
+++ b/util/pbs/job.py
@@ -29,10 +29,21 @@
# Steve Reinhardt
# Ali Saidi
-import os, os.path, shutil, signal, socket, sys, time
+import os, os.path, shutil, signal, socket, sys
from os import environ as env
from os.path import join as joinpath, expanduser
+def date():
+ import time
+ return time.strftime('%a %b %e %H:%M:%S %Z %Y', time.localtime())
+
+def cleandir(dir):
+ for root, dirs, files in os.walk(dir, False):
+ for name in files:
+ os.remove(joinpath(root, name))
+ for name in dirs:
+ os.rmdir(joinpath(root, name))
+
class rsync:
def __init__(self):
self.sudo = False
@@ -61,25 +72,76 @@ class rsync:
return os.spawnvp(os.P_WAIT, args[0], args)
-def cleandir(dir):
- for root, dirs, files in os.walk(dir, False):
- for name in files:
- os.remove(joinpath(root, name))
- for name in dirs:
- os.rmdir(joinpath(root, name))
+class JobDir(object):
+ def __init__(self, dir):
+ self.dir = dir
-def date():
- return time.strftime('%a %b %e %H:%M:%S %Z %Y', time.localtime())
+ def file(self, filename):
+ return joinpath(self.dir, filename)
+
+ def create(self):
+ if os.path.exists(self.dir):
+ if not os.path.isdir(self.dir):
+ sys.exit('%s is not a directory. Cannot build job' % self.dir)
+ else:
+ os.mkdir(self.dir)
+
+ def exists(self):
+ return os.path.isdir(self.dir)
+
+ def clean(self):
+ cleandir(self.dir)
+
+ def hasfile(self, filename):
+ return os.path.isfile(self.file(filename))
+
+ def echofile(self, filename, string):
+ filename = self.file(filename)
+ try:
+ f = file(filename, 'w')
+ print >>f, string
+ f.flush()
+ f.close()
+ except IOError,e:
+ sys.exit(e)
-def remfile(file):
- if os.path.isfile(file):
- os.unlink(file)
+ def rmfile(self, filename):
+ filename = self.file(filename)
+ if os.path.isfile(filename):
+ os.unlink(filename)
-def readval(filename):
- file = open(filename, 'r')
- value = file.readline().strip()
- file.close()
- return value
+ def readval(self, filename):
+ filename = self.file(filename)
+ f = file(filename, 'r')
+ value = f.readline().strip()
+ f.close()
+ return value
+
+ def setstatus(self, string):
+ filename = self.file('.status')
+ try:
+ f = file(filename, 'a')
+ print >>f, string
+ f.flush()
+ f.close()
+ except IOError,e:
+ sys.exit(e)
+
+ def getstatus(self):
+ filename = self.file('.status')
+ try:
+ f = file(filename, 'r')
+ except IOError, e:
+ return 'none'
+
+ # fast forward to the end
+ for line in f: pass
+
+ # the first word on the last line is the status
+ return line.split(' ')[0]
+
+ def __str__(self):
+ return self.dir
if __name__ == '__main__':
rootdir = env.setdefault('ROOTDIR', os.getcwd())
@@ -97,29 +159,27 @@ if __name__ == '__main__':
workbase = "/tmp/"
workdir = joinpath(workbase, '%s.%s' % (env['USER'], pbs_jobid))
-
- def echofile(filename, string):
- try:
- f = file(joinpath(outdir, filename), 'w')
- print >>f, string
- f.flush()
- f.close()
- except IOError,e:
- sys.exit(e)
+ host = socket.gethostname()
os.umask(0022)
- echofile('.start', date())
- echofile('.pbs_jobid', pbs_jobid)
- echofile('.pbs_jobname', pbs_jobid)
- echofile('.host', socket.gethostname())
+ jobdir = JobDir(outdir)
+
+ started = date()
+ jobdir.echofile('.running', started)
+ jobdir.rmfile('.queued')
+ jobdir.echofile('.pbs_jobid', pbs_jobid)
+ jobdir.echofile('.pbs_jobname', pbs_jobid)
+ jobdir.echofile('.host', host)
+
+ jobdir.setstatus('running on %s on %s' % (host, started))
if os.path.isdir(workdir):
cleandir(workdir)
else:
os.mkdir(workdir)
- if os.path.isdir('/z/dist'):
+ if False and os.path.isdir('/z/dist'):
sync = rsync()
sync.delete = True
sync.sudo = True
@@ -130,13 +190,13 @@ if __name__ == '__main__':
except OSError,e:
sys.exit(e)
- os.symlink(joinpath(outdir, 'output'), 'status.out')
+ os.symlink(jobdir.file('output'), 'status.out')
args = [ joinpath(basedir, 'm5'), joinpath(basedir, 'run.py') ]
if not len(args):
sys.exit("no arguments")
- print 'starting job... %s' % date()
+ print 'starting job... %s' % started
print ' '.join(args)
print
sys.stdout.flush()
@@ -145,7 +205,7 @@ if __name__ == '__main__':
if not childpid:
# Execute command
sys.stdin.close()
- fd = os.open(joinpath(outdir, "output"),
+ fd = os.open(jobdir.file("output"),
os.O_WRONLY | os.O_CREAT | os.O_TRUNC)
os.dup2(fd, sys.stdout.fileno())
os.dup2(fd, sys.stderr.fileno())
@@ -170,12 +230,15 @@ if __name__ == '__main__':
thepid,ec = os.waitpid(childpid, 0)
if ec:
print 'Exit code ', ec
- echofile('.failure', date())
+ status = 'failure'
else:
- echofile('.success', date())
+ status = 'success'
done = 1
except OSError:
pass
- print '\njob complete... %s' % date()
- echofile('.stop', date())
+ complete = date()
+ print '\njob complete... %s' % complete
+ jobdir.echofile('.%s' % status, complete)
+ jobdir.rmfile('.running')
+ jobdir.setstatus('%s on %s' % (status, complete))