8000 avoid import * by adammoody · Pull Request #399 · LLNL/scr · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

avoid import * #399

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jul 16, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion scripts/pyfe/pyfe/joblauncher/auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,14 @@

from pyfe import scr_const

from pyfe.joblauncher import *
from pyfe.joblauncher import (
JobLauncher,
APRUN,
JSRUN,
LRUN,
MPIRUN,
SRUN,
)

class AutoJobLauncher:
def __new__(cls,launcher=None):
Expand Down
8 changes: 7 additions & 1 deletion scripts/pyfe/pyfe/resmgr/auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,13 @@

from pyfe import scr_const

from pyfe.resmgr import *
from pyfe.resmgr import (
ResourceManager,
LSF,
PBSALPS,
PMIX,
SLURM,
)

class AutoResourceManager:
def __new__(cls,resmgr=None):
Expand Down
120 changes: 57 additions & 63 deletions scripts/pyfe/pyfe/resmgr/lsf.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import os, re
from time import time

from pyfe import scr_const
from pyfe.scr_common import runproc, pipeproc
from pyfe.resmgr import nodetests, ResourceManager
Expand All @@ -14,106 +15,99 @@ class LSF(ResourceManager):
def __init__(self):
super(LSF, self).__init__(resmgr='LSF')

# get job id, setting environment flag here
# get LSF jobid
def getjobid(self):
if self.conf['jobid'] is not None:
return self.conf['jobid']
return os.environ.get('LSB_JOBID')

# this doesn't really apply for LSF
def get_jobstep_id(self,user='',pid=-1):
if user=='' or self.conf['jobid'] is None:
return -1
cmd = ['squeue','-h','-s','-u',user,'-j',self.conf['jobid'],'-S','\"-i\"']
output = runproc(argv=cmd,getstdout=True)[0].split('\n')
currjobid=-1
for line in output:
fields = re.split('\s+',line)
jobidparts = fields[0].split('.')
#print "@jobidparts\n";
# the first item is the job step id
# if it is JOBID.0, then it is the allocation ID and we don't want that
# if it's not 0, then assume it's the one we're looking for
if jobidparts[1]!='0':
checkPIDcmd = ['ps','h','-p',str(pid)]
psOutput = runproc(argv=checkPIDcmd,getstdout=True)[0]
if pdOutput is not None:
pdOutput = re.split('\s+',pdOutput.strip())
if pdOutput[0] == str(pid):
currjobid = str(fields[0])
break
return currjobid
return -1

# get node list
def get_job_nodes(self):
val = os.environ.get('LSB_DJOB_HOSTFILE')
if val is not None:
hostfile = os.environ.get('LSB_DJOB_HOSTFILE')
if hostfile is not None:
try:
# make a list from the set -> make a set from the list -> file.readlines().rstrip('\n')
# get a list of lines without newlines and skip the first line
lines = []
with open(val,'r') as hostfile:
# make a list from the set -> make a set from the list -> file.readlines().rstrip('\n')
# get a list of lines without newlines and skip the first line
lines = [line.strip() for line in hostfile.readlines()][1:]
# get a set of unique hostnames, convert list to set and back
if len(lines)==0:
with open(hostfile,'r') as f:
lines = [line.strip() for line in f.readlines()][1:]
if len(lines) == 0:
raise ValueError('Hostfile empty')

# get a set of unique hostnames, convert list to set and back
hostlist = self.compress_hosts(lines)
return hostlist
# failed to read file
except:
# failed to read file
pass
val = os.environ.get('LSB_HOSTS')
if val is not None:
val = val.split(' ')
val = self.compress_hosts(val)
# or, with jobid: squeue -j <jobid> -ho %N
return val

# fall back to try LSB_HOSTS
hosts = os.environ.get('LSB_HOSTS')
if hosts is not None:
hosts = hosts.split(' ')
hosts = hosts[1:]
hosts = self.compress_hosts(hosts)
return hosts

def get_downnodes(self):
val = os.environ.get('LSB_HOSTS')
if val is not None:
# TODO : any way to get list of down nodes in LSF?
pass
# TODO : any way to get list of down nodes in LSF?
return None

def scr_kill_jobstep(self,jobid=-1):
if jobid==-1:
if jobid == -1:
print('You must specify the job step id to kill.')
return 1
return runproc(argv=['bkill','-s','KILL',str(jobid)])[1]
return runproc(argv=['bkill', '-s', 'KILL', str(jobid)])[1]

def get_scr_end_time(self):
if self.conf['jobid'] is None:
return None
curtime = int(time())
bjobs, rc = runproc(argv=['bjobs','-o','time_left'],getstdout=True)
if rc!=0:
return None
# run bjobs to get time remaining in current allocation
bjobs, rc = runproc(argv=['bjobs', '-o', 'time_left'], getstdout=True)
if rc != 0:
return 0

# parse bjobs output
lines = bjobs.split('\n')
for line in lines:
line=line.strip()
if len(line)==0:
line = line.strip()

# skip empty lines
if len(line) == 0:
continue

# the following is printed if there is no limit
# bjobs -o 'time_left'
# TIME_LEFT
# -
# look for the "-", in this case,
# return -1 to indicate there is no limit
if line.startswith('-'):
# the following is printed if there is no limit
# bjobs -o 'time_left'
# TIME_LEFT
# -
# look for the "-", in this case,
# return -1 to indicate there is no limit
# no limit
return -1
pieces = re.split(r'(^\s*)(\d+):(\d+)\s+',line)

# the following is printed if there is a limit
# bjobs -o 'time_left'
# TIME_LEFT
# 0:12 L
# look for a line like "0:12 L",
# avoid matching the "L" since other characters can show up there
if len(pieces)<3:
pieces = re.split(r'(^\s*)(\d+):(\d+)\s+', line)
if len(pieces) < 3:
continue
print(line)

# get current secs since epoch
secs_now = int(time())

# compute seconds left in job
hours = int(pieces[2])
mins = int(pieces[3])
secs = curtime + ((hours * 60) + mins) * 60
mins = int(pieces[3])
secs_remaining = ((hours * 60) + mins) * 60

secs = secs_now + secs_remaining
return secs

# had a problem executing bjobs command
return 0

Expand Down
60 changes: 35 additions & 25 deletions scripts/pyfe/pyfe/resmgr/slurm.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
# SLURM is a subclass of ResourceManager

import os, re
import datetime

from pyfe import scr_const
from pyfe.scr_common import runproc, pipeproc
from pyfe.resmgr import nodetests, ResourceManager
Expand All @@ -15,23 +17,24 @@ class SLURM(ResourceManager):
def __init__(self):
super(SLURM, self).__init__(resmgr='SLURM')

# get job id, setting environment flag here
# get SLURM jobid of current allocation
def getjobid(self):
if self.conf['jobid'] is not None:
return self.conf['jobid']
return os.environ.get('SLURM_JOBID')

def get_jobstep_id(self,user='',pid=-1):
if user=='' or self.conf['jobid'] is None:
# query SLURM for most recent jobstep in current allocation
def get_jobstep_id(self, user='', pid=-1):
jobid = self.getjobid()
if user == '' or jobid is None:
return -1

# get job steps for this user and job, order by decreasing job step
# so first one should be the one we are looking for
# squeue -h -s -u $user -j $jobid -S "-i"
# -h means print no header, so just the data in this order:
# STEPID NAME PARTITION USER TIME NODELIST
cmd = ['squeue','-h','-s','-u',user,'-j',str(self.conf['jobid']),'-S','\"-i\"']
# my $cmd="squeue -h -s -u $user -j $jobid -S \"-i\"";
output = runproc(argv=cmd,getstdout=True)[0]
output = re.search('\d+',output)
cmd = ['squeue', '-h', '-s', '-u', user, '-j', jobid, '-S', '\"-i\"']
output = runproc(argv=cmd, getstdout=True)[0]
output = re.search('\d+', output)
if output is None:
return -1
return output[0]
Expand All @@ -40,14 +43,14 @@ def get_jobstep_id(self,user='',pid=-1):
def get_job_nodes(self):
return os.environ.get('SLURM_NODELIST')

# use sinfo to query SLURM for the list of nodes it thinks to be down
def get_downnodes(self):
val = os.environ.get('SLURM_NODELIST')
if val is not None:
argv = ['sinfo','-ho','%N','-t','down','-n',val]
down, returncode = runproc(argv=argv,getstdout=True)
nodelist = self.get_job_nodes()
if nodelist is not None:
argv = ['sinfo', '-ho', '%N', '-t', 'down', '-n', nodelist]
down, returncode = runproc(argv=argv, getstdout=True)
if returncode == 0:
down = down.strip()
self.conf['down'] = down
return down
return None

Expand All @@ -57,18 +60,25 @@ def scr_kill_jobstep(self,jobid=-1):
return 1
return runproc(argv=['scancel',str(jobid)])[1]

# query SLURM for allocation endtime, expressed as secs since epoch
def get_scr_end_time(self):
if self.conf['jobid'] is None:
return None
argv = []
argv.append(['scontrol','--oneliner','show','job',self.conf['jobid']])
argv.append(['perl','-n','-e','\'m/EndTime=(\\S*)/ and print $1\''])
output = pipeproc(argvs=argv,getstdout=True)[0]
argv = ['date','-d',output.rstrip()]
output = runproc(argv=argv,getstdout=True)[0].strip()
if output.isdigit():
return int(output)
return 0
# get jobid
jobid = self.getjobid()
if jobid is None:
return 0

# ask scontrol for endtime of this job
argv = ['scontrol', '--oneliner', 'show', 'job', jobid]
output = runproc(argv=argv, getstdout=True)[0]
m = re.search('EndTime=(\\S*)', output)
if not m:
return 0

# parse time string like "2021-07-16T14:05:12" into secs since epoch
timestr = m.group(1)
dt = datetime.datetime.strptime(timestr, "%Y-%m-%dT%H:%M:%S")
timestamp = int(dt.strftime("%s"))
return timestamp

# return a hash to define all unavailable (down or excluded) nodes and reason
def list_down_nodes_with_reason(self,nodes=[], scr_env=None, free=False, cntldir_string=None, cachedir_string=None):
Expand Down
0