summaryrefslogtreecommitdiff
path: root/util/pbs/send.py
diff options
context:
space:
mode:
authorNathan Binkert <binkertn@umich.edu>2005-09-17 16:51:26 -0400
committerNathan Binkert <binkertn@umich.edu>2005-09-17 16:51:26 -0400
commit02098f8e7b7f13181b41606d8cc3f0d9bd611e65 (patch)
treec8111e2f2f89bbc8532be585779fb251dff700f2 /util/pbs/send.py
parentf88d7c7456ed0064316ef46b59ab71a3c25e91f1 (diff)
downloadgem5-02098f8e7b7f13181b41606d8cc3f0d9bd611e65.tar.xz
Totally re-work the way that jobfiles are done so there is more
information that can be used for other aspects of sending jobs. New graphing output stuff with matplotlib. util/pbs/job.py: Shuffle code around and create the JobDir class which encapsulates all of the functionality needed for making, organizing, and cleaning a job directory. Better status output util/pbs/jobfile.py: Majory re-working of the jobfile code. A job file now consists of several objects that describe how jobs should be run, it includes information about checkpoints, and graphing. util/pbs/send.py: use the new jobfile code. deal with the 15 character limit of pbs by truncating the name and using the raj hack. util/stats/db.py: fix the __str__ function for nodes provide __getitem__ for the Database class util/stats/stats.py: use the jobfile stuff to figure out what the proper naming and organziation of the graphs should be. move all output code to output.py, get rid of ploticus and use matplotlib --HG-- rename : util/categories.py => util/stats/categories.py extra : convert_revision : 0d793cbf6ad9492290e8ec875ce001c84095e1f7
Diffstat (limited to 'util/pbs/send.py')
-rwxr-xr-xutil/pbs/send.py164
1 files changed, 100 insertions, 64 deletions
diff --git a/util/pbs/send.py b/util/pbs/send.py
index ecb0be0ec..c66fb1c05 100755
--- a/util/pbs/send.py
+++ b/util/pbs/send.py
@@ -96,7 +96,7 @@ Usage:
try:
import getopt
- opts, args = getopt.getopt(sys.argv[1:], '-cd:efhj:lq:v')
+ opts, args = getopt.getopt(sys.argv[1:], '-CRcd:efhj:lq:v')
except getopt.GetoptError:
sys.exit(usage)
@@ -107,13 +107,18 @@ force = False
listonly = False
queue = ''
verbose = False
-rootdir = nfspath(os.getcwd())
-jfile = 'test.py'
+jfile = 'Base/test.py'
+docpts = False
+doruns = True
+runflag = False
+
for opt,arg in opts:
+ if opt == '-C':
+ docpts = True
+ if opt == '-R':
+ runflag = True
if opt == '-c':
clean = True
- if opt == '-d':
- rootdir = arg
if opt == '-e':
onlyecho = True
if opt == '-f':
@@ -130,95 +135,123 @@ for opt,arg in opts:
if opt == '-v':
verbose = True
-basedir = joinpath(rootdir, 'Base')
-linkdir = joinpath(rootdir, 'Link')
+if docpts:
+ doruns = runflag
for arg in args:
exprs.append(re.compile(arg))
-if not listonly and not onlyecho and isdir(linkdir):
+import jobfile, pbs
+from job import JobDir, date
+
+conf = jobfile.JobFile(jfile)
+
+if not listonly and not onlyecho and isdir(conf.linkdir):
if verbose:
print 'Checking for outdated files in Link directory'
- syncdir(linkdir, basedir)
+ syncdir(conf.linkdir, conf.basedir)
-import job, jobfile, pbs
+jobnames = {}
+joblist = []
-test = jobfile.JobFile(joinpath(basedir, jfile))
+if docpts and doruns:
+ gen = conf.alljobs()
+elif docpts:
+ gen = conf.checkpoints()
+elif doruns:
+ gen = conf.jobs()
-joblist = []
-for jobname in test.jobs:
- if not exprs:
- joblist.append(jobname)
+for job in gen:
+ if job.name in jobnames:
continue
- for expr in exprs:
- if expr.match(jobname):
- joblist.append(jobname)
- break
+ if exprs:
+ for expr in exprs:
+ if expr.match(job.name):
+ joblist.append(job)
+ break
+ else:
+ joblist.append(job)
if listonly:
if verbose:
- for jobname in joblist:
- test.printinfo(jobname)
+ for job in joblist:
+ job.printinfo()
else:
- for jobname in joblist:
- print jobname
+ for job in joblist:
+ print job.name
sys.exit(0)
if not onlyecho:
- jl = []
- for jobname in joblist:
- jobdir = joinpath(rootdir, jobname)
- if os.path.exists(jobname):
+ newlist = []
+ for job in joblist:
+ jobdir = JobDir(joinpath(conf.rootdir, job.name))
+ if jobdir.exists():
if not force:
- if os.path.isfile(joinpath(jobdir, '.success')):
+ status = jobdir.getstatus()
+ if status == 'queued':
+ continue
+
+ if status == 'running':
continue
- if os.path.isfile(joinpath(jobdir, '.start')) and \
- not os.path.isfile(joinpath(jobdir, '.stop')):
+ if status == 'success':
continue
if not clean:
- sys.exit('job directory not clean!')
+ sys.exit('job directory %s not clean!' % jobdir)
- job.cleandir(jobdir)
- else:
- os.mkdir(jobdir)
- jl.append(jobname)
- joblist = jl
-
-def setname(jobid, jobname):
- # since pbs can handle jobnames of 15 characters or less, don't
- # use the raj hack.
- if len(jobname) <= 15:
- return
-
- import socket
- s = socket.socket()
- # Connect to pbs.pool and send the jobid/jobname pair to port
- # 24465 (Raj didn't realize that there are only 64k ports and
- # setup inetd to point to port 90001)
- s.connect(("pbs.pool", 24465))
- s.send("%s %s\n" % (jobid, jobname))
- s.close()
-
-for jobname in joblist:
- jobdir = joinpath(rootdir, jobname)
-
- if not onlyecho and not os.path.isdir(jobdir):
- sys.exit('%s is not a directory. Cannot build job' % jobdir)
-
- print 'Job name: %s' % jobname
+ jobdir.clean()
+ newlist.append(job)
+ joblist = newlist
+
+class NameHack(object):
+ def __init__(self, host='pbs.pool', port=24465):
+ self.host = host
+ self.port = port
+ self.socket = None
+
+ def setname(self, jobid, jobname):
+ try:
+ jobid = int(jobid)
+ except ValueError:
+ jobid = int(jobid.strip().split('.')[0])
+
+ jobname = jobname.strip()
+ # since pbs can handle jobnames of 15 characters or less,
+ # don't use the raj hack.
+ if len(jobname) <= 15:
+ return
+
+ if self.socket is None:
+ import socket
+ self.socket = socket.socket()
+ # Connect to pbs.pool and send the jobid/jobname pair to port
+ # 24465 (Raj didn't realize that there are only 64k ports and
+ # setup inetd to point to port 90001)
+ self.socket.connect((self.host, self.port))
+
+ self.socket.send("%s %s\n" % (jobid, jobname))
+
+namehack = NameHack()
+
+for job in joblist:
+ jobdir = JobDir(joinpath(conf.rootdir, job.name))
+
+ if not onlyecho:
+ jobdir.create()
+
+ print 'Job name: %s' % job.name
print 'Job directory: %s' % jobdir
qsub = pbs.qsub()
qsub.pbshost = 'simpool.eecs.umich.edu'
- qsub.stdout = joinpath(jobdir, 'jobout')
- qsub.name = jobname[:15]
+ qsub.stdout = jobdir.file('jobout')
+ qsub.name = job.name[:15]
qsub.join = True
qsub.node_type = 'FAST'
- qsub.env['ROOTDIR'] = rootdir
- qsub.env['JOBNAME'] = jobname
+ qsub.env['ROOTDIR'] = conf.rootdir
+ qsub.env['JOBNAME'] = job.name
if len(queue):
qsub.queue = queue
qsub.build(joinpath(progpath, 'job.py'))
@@ -231,6 +264,9 @@ for jobname in joblist:
if ec == 0:
jobid = qsub.result
print 'PBS Jobid: %s' % jobid
- setname(jobid, jobname)
+ namehack.setname(jobid, job.name)
+ queued = date()
+ jobdir.echofile('.queued', queued)
+ jobdir.setstatus('queued on %s' % queued)
else:
print 'PBS Failed'