The CTC runs parallel jobs by disabling the default LoadLeveler scheduler SCHEDULER_API=YES) and running an external scheduler. The CTC has developed this scheduler to meet the needs of its users.
The following figures represent sections of the CTC's LoadL_admin file. Note that not all nodes are shown here.
############################################################################# # DEFAULTS FOR MACHINE, CLASS, USER, AND GROUP STANZAS: # Remove initial # (comment), and edit to suit. ############################################################################# default: type = machine central_manager = false # default not central manager schedd_host = false # default not a public scheduler submit_only = false # default not a submit-only machine pvm_root = /usr/local/app/pvm3 # default pvm3 directory rm_host = true # default is parallel SP2 node # speed = 1 # default machine speed # cpu_speed_scale = false # scale cpu limits by speed default: type = class # default class stanza # priority = 0 # default ClassSysprio # max_processors = -1 # default max processors for class (no
default: type = user # default user stanza # priority = 0 # default UserSysprio default_class = DSI # default class default_group = No_Group # default group = No_Group (not # optional) # maxjobs = -1 # default maximum jobs user is allowed # to run simultaneously (no limit) # maxqueued = -1 # default maximum jobs user is allowed # on system queue (no limit). does not # limit jobs submitted. default: type = group # default group stanza # priority = 0 # default GroupSysprio # maxjobs = -1 # default maximum jobs group is allowed # to run simultaneously (no limit) # maxqueued = -1 # default maximum jobs group is allowed # on system queue (no limit). does not # limit jobs submitted. ############################################################################# # MACHINE STANZAS: # These are the machine stanzas; the first machine is defined as # the central manager. mach1:, mach2:, etc. are machine name labels - # revise these placeholder labels with the names of the machines in the # pool, and specify any schedd_host and submit_only keywords and values # (true or false), if required. ############################################################################# # spscheduler is a 43P running EASY-LL and the Central Manager spscheduler.tc.cornell.edu: type = machine central_manager = true rm_host =false # ctc1 and ctc2 are two 43P's running as dedicated SchedDs ctc1.tc.cornell.edu: type = machine schedd_host = true ctc2.tc.cornell.edu: type = machine schedd_host = true # Submit only node for Sweb server arms.tc.cornell.edu: type = machine submit_only = true
# # Nodes of the SP2 # # Rack 1 # # PIOFS name server, HiPPi router, Switch & JMD primary #r01n01.tc.cornell.edu: type = machine # alias = r01n01-css # r01n02 & r01n05 are interactive nodes r01n03.tc.cornell.edu: type = machine alias = r01n03-css submit_only = true r01n05.tc.cornell.edu: type = machine alias = r01n05-css submit_only = true r01n07.tc.cornell.edu: type = machine alias = r01n07-css r01n09.tc.cornell.edu: type = machine alias = r01n09-css r01n11.tc.cornell.edu: type = machine alias = r01n11-css r01n13.tc.cornell.edu: type = machine alias = r01n13-css r01n15.tc.cornell.edu: type = machine alias = r01n15-css # # Rack 2 # # HPSS/PIOFS backup #r02n01.tc.cornell.edu: type = machine # alias = r02n01-css # r02n03, r02n05, r02n07, r02n09 are splong nodes r02n03.tc.cornell.edu: type = machine alias = r02n03-css submit_only = true r02n05.tc.cornell.edu: type = machine alias = r02n05-css submit_only = true r02n07.tc.cornell.edu: type = machine alias = r02n07-css submit_only = true r02n09.tc.cornell.edu: type = machine alias = r02n09-css submit_only = true # VIS node #r02n11.tc.cornell.edu: type = machine # alias = r02n11-css r02n13.tc.cornell.edu: type = machine alias = r02n13-css r02n15.tc.cornell.edu: type = machine alias = r02n15-css
# # Rack 3 # r03n01.tc.cornell.edu: type = machine alias = r03n01-css r03n02.tc.cornell.edu: type = machine alias = r03n02-css r03n03.tc.cornell.edu: type = machine alias = r03n03-css r03n04.tc.cornell.edu: type = machine alias = r03n04-css r03n05.tc.cornell.edu: type = machine alias = r03n05-css r03n06.tc.cornell.edu: type = machine alias = r03n06-css r03n07.tc.cornell.edu: type = machine alias = r03n07-css r03n08.tc.cornell.edu: type = machine alias = r03n08-css r03n09.tc.cornell.edu: type = machine alias = r03n09-css r03n10.tc.cornell.edu: type = machine alias = r03n10-css r03n11.tc.cornell.edu: type = machine alias = r03n11-css r03n12.tc.cornell.edu: type = machine alias = r03n12-css r03n13.tc.cornell.edu: type = machine alias = r03n13-css r03n14.tc.cornell.edu: type = machine alias = r03n14-css r03n15.tc.cornell.edu: type = machine alias = r03n15-css # ATM/FDDI routing node #r03n16.tc.cornell.edu: type = machine # alias = r03n16-css
# # Rack 4 # r04n01.tc.cornell.edu: type = machine alias = r04n01-css r04n02.tc.cornell.edu: type = machine alias = r04n02-css r04n03.tc.cornell.edu: type = machine alias = r04n03-css r04n04.tc.cornell.edu: type = machine alias = r04n04-css r04n05.tc.cornell.edu: type = machine alias = r04n05-css r04n06.tc.cornell.edu: type = machine alias = r04n06-css r04n07.tc.cornell.edu: type = machine alias = r04n07-css r04n08.tc.cornell.edu: type = machine alias = r04n08-css r04n09.tc.cornell.edu: type = machine alias = r04n09-css r04n10.tc.cornell.edu: type = machine alias = r04n10-css r04n11.tc.cornell.edu: type = machine alias = r04n11-css # r04n12 - r14n16 HPSS nodes #r04n12.tc.cornell.edu: type = machine # alias = r04n12-css #r04n13.tc.cornell.edu: type = machine # alias = r04n13-css #r04n14.tc.cornell.edu: type = machine # alias = r04n14-css #r04n15.tc.cornell.edu: type = machine # alias = r04n15-css #r04n16.tc.cornell.edu: type = machine # alias = r04n16-css # ############################################################################# # CLASS STANZAS: (optional) # These are sample class stanzas; small, medium, large, and nqs are sample # labels for job classes - revise these labels and specify attributes # to each class. ############################################################################# DSI: type = class piofs: type = class #############################################################################
The following represents the CTC's LoadL_config file:
# # Machine Description # ARCH = R6000 # # Specify LoadLeveler Administrators here: # LOADL_ADMIN = loadl admin1 admin2 admin3 admin4 # # Default to starting LoadLeveler daemons when requested # START_DAEMONS = TRUE # # Machine authentication # # If TRUE, only connections from machines in the ADMIN_LIST are accepted. # If FALSE, connections from any machine are accepted. Default if not # specified is FALSE. # MACHINE_AUTHENTICATE = FALSE # # Specify which daemons run on each node # SCHEDD_RUNS_HERE = False STARTD_RUNS_HERE = True # # Specify information for backup central manager # # CENTRAL_MANAGER_HEARTBEAT_INTERVAL = 300 # CENTRAL_MANAGER_TIMEOUT = 6
# # Specify pathnames # RELEASEDIR = /usr/lpp/LoadL/nfs LOCAL_CONFIG = $(tilde)/local/configs/LoadL_config.$(host) ADMIN_FILE = $(tilde)/LoadL_admin LOG = /var/loadl/log SPOOL = /var/loadl/spool EXECUTE = /var/loadl/execute HISTORY = $(SPOOL)/history BIN = $(RELEASEDIR)/bin LIB = $(RELEASEDIR)/lib ETC = $(RELEASEDIR)/etc # # Specify port numbers # COLLECTOR_STREAM_PORT = 9612 MASTER_STREAM_PORT = 9616 NEGOTIATOR_STREAM_PORT = 9614 SCHEDD_STREAM_PORT = 9605 STARTD_STREAM_PORT = 9611 COLLECTOR_DGRAM_PORT = 9613 STARTD_DGRAM_PORT = 9615 MASTER_DGRAM_PORT = 9617 SCHEDULER_API = YES SCHEDULER_PORT = 9624 # # Specify accounting controls # ACCT = A_ON ACCT_VALIDATION = $(BIN)/llacctval GLOBAL_HISTORY = $(SPOOL) # # Specify prolog and epilog path names # JOB_PROLOG = $(ETC)/llprolog JOB_EPILOG = $(ETC)/llepilog JOB_USER_PROLOG = $(ETC)/ll_user_prolog JOB_USER_EPILOG = $(ETC)/ll_user_epilog # # # Refresh AFS token program. # AFS_GETNEWTOKEN = $(ETC)/tokenreviveclient
# # Customized mail delivery program. # # MAIL = # # Customized submit (job command file) filter program. # # SUBMIT_FILTER = # # Specify checkpointing intervals # MIN_CKPT_INTERVAL = 900 MAX_CKPT_INTERVAL = 7200 # LoadL_KeyboardD Macros # KBDD = $(BIN)/LoadL_kbdd KBDD_LOG = $(LOG)/KbdLog MAX_KBDD_LOG = 64000 KBDD_DEBUG = # # Specify whether to start the keyboard daemon # X_RUNS_HERE = False # # Specify whether to use X server XGetIdleTime() protocol extension # USE_X_IDLE_EXTENSION = False # # LoadL_StartD Macros # STARTD = $(BIN)/LoadL_startd STARTD_LOG = $(LOG)/StartLog MAX_STARTD_LOG = 5000000 #STARTD_DEBUG = D_STARTD D_FULLDEBUG D_THREAD STARTD_DEBUG = D_FULLDEBUG POLLING_FREQUENCY = 10 POLLS_PER_UPDATE = 24 JOB_LIMIT_POLICY = 240 JOB_ACCT_Q_POLICY = 3600 # # LoadL_SchedD Macros # SCHEDD = $(BIN)/LoadL_schedd SCHEDD_LOG = $(LOG)/SchedLog MAX_SCHEDD_LOG = 5000000 SCHEDD_DEBUG = D_SCHEDD SCHEDD_INTERVAL = 180 CLIENT_TIMEOUT = 300
# # Negotiator Macros # NEGOTIATOR = $(BIN)/LoadL_negotiator NEGOTIATOR_DEBUG = D_FULLDEBUG D_ALWAYS D_NEGOTIATE NEGOTIATOR_LOG = $(LOG)/NegotiatorLog MAX_NEGOTIATOR_LOG = 5000000 NEGOTIATOR_INTERVAL = 60 MACHINE_UPDATE_INTERVAL = 600 NEGOTIATOR_PARALLEL_DEFER = 1800 NEGOTIATOR_PARALLEL_HOLD = 300 NEGOTIATOR_REDRIVE_PENDING = 1800 NEGOTIATOR_RESCAN_QUEUE = 180 NEGOTIATOR_REMOVE_COMPLETED = 0 # # Sets the interval between recalculation of the SYSPRIO values # for all the jobs in the queue # NEGOTIATOR_RECALCULATE_SYSPRIO_INTERVAL = 0 # # Starter Macros # STARTER = $(BIN)/LoadL_starter STARTER_DEBUG = D_FULLDEBUG STARTER_LOG = $(LOG)/StarterLog MAX_STARTER_LOG = 500000 # # LoadL_Master Macros # MASTER = $(BIN)/LoadL_master MASTER_LOG = $(LOG)/MasterLog MASTER_DEBUG = D_FULLDEBUG MAX_MASTER_LOG = 64000 RESTARTS_PER_HOUR = 12 PUBLISH_OBITUARIES = TRUE OBITUARY_LOG_LENGTH = 25 # # Specify whether log files are truncated when opened # TRUNC_MASTER_LOG_ON_OPEN = False TRUNC_STARTD_LOG_ON_OPEN = False TRUNC_SCHEDD_LOG_ON_OPEN = False TRUNC_KBDD_LOG_ON_OPEN = False TRUNC_STARTER_LOG_ON_OPEN = False TRUNC_COLLECTOR_LOG_ON_OPEN = False TRUNC_NEGOTIATOR_LOG_ON_OPEN = False
# NQS Directory # # # For users of NQS resources: # Specify the directory containing qsub, qstat, qdel # # NQS_DIR = /usr/bin # # Specify Custom metric keywords # # CUSTOM_METRIC = # CUSTOM_METRIC_COMMAND = $(ETC)/sw_chip_number # # Machine control expressions and macros # OpSys : $(OPSYS) Arch : $(ARCH) Machine : $(HOST).$(DOMAIN) # # Expressions used to control starting and stopping of foreign jobs # MINUTE = 60 HOUR = (60 * $(MINUTE)) StateTimer = (CurrentTime - EnteredCurrentState) BackgroundLoad = 0.7 HighLoad = 1.5 StartIdleTime = 15 * $(MINUTE) ContinueIdleTime = 5 * $(MINUTE) MaxSuspendTime = 10 * $(MINUTE) MaxVacateTime = 10 * $(MINUTE) KeyboardBusy= KeyboardIdle < $(POLLING_FREQUENCY) CPU_Idle = LoadAvg <= $(BackgroundLoad) CPU_Busy = LoadAvg >= $(HighLoad) # START : $(CPU_Idle) && KeyboardIdle > $(StartIdleTime) # SUSPEND : $(CPU_Busy) || $(KeyboardBusy) # CONTINUE : $(CPU_Idle) && KeyboardIdle > $(ContinueIdleTime) # VACATE : $(StateTimer) > $(MaxSuspendTime) # KILL : $(StateTimer) > $(MaxVacateTime) START : T SUSPEND : F CONTINUE : T VACATE : F KILL : F
# # Expressions used to prioritize job queue # # Values which can be part of the SYSPRIO expression are: # # QDate Job submission time # UserPrio User priority # UserSysprio System priority value based on userid (from the user # list file with default of 0) # ClassSysprio System priority value based on job class (from the class # list file with default of 0) # UserRunningProcs Number of jobs running for the user # GroupRunningProcs Number of jobs running for the group # # The following expression is an example. # #SYSPRIO: (ClassSysprio * 100) + (UserSysprio * 10) + (GroupSysprio * 1)- (QDate ) # # The following (default) expression for SYSPRIO creates a FIFO job queue. # SYSPRIO: (ClassSysprio * 100) - (QDate)
# # Expressions used to prioritize machines # # The following example orders machines by the load average # normalized for machine speed: # #MACHPRIO: 0 - (1000 * (LoadAvg / (Cpus * Speed))) # # The following (default) expression for MACHPRIO orders # machines by load average. # #MACHPRIO: 0 - (LoadAvg) + (MasterMachPriority * 10000) # The following expression for MACHPRIO orders # machines by increasing ammount of memory and # decreasing node number. # MACHPRIO: 0 - (100 * Memory) + CustomMetric + (MasterMachPriority * 10000) # # The MAX_JOB_REJECT value determines how many times a job can be # rejected before it is canceled or put on hold. The default value # is -1, which indicates no limit to the number of times a job can be # rejected. # MAX_JOB_REJECT = 0 # # When ACTION_ON_MAX_REJECT is HOLD, jobs will be put on user hold # when the number of rejects reaches the MAX_JOB_REJECT value. When # ACTION_ON_MAX_REJECT is CANCEL, jobs will be canceled when the # number of rejects reaches the MAX_JOB_REJECT value. The default # value is HOLD. # ACTION_ON_MAX_REJECT = CANCEL