This appendix contains sample configuration and administration files. These files, as well as other sample files, are located in the directory called /usr/lpp/LoadL/full/samples.
The following is a sample LoadL_admin file.
# *************************************************************************** # LoadL_admin file: Remove comments and edit this file to suit your # installation. This file consists of machine, class, user, group, and # adapter stanzas. Each stanza has defaults, as specified in a "defaults:" # section. Default stanzas are used to set values for fields which are # not specified. Class, user, group, and adapter stanzas are optional. # Refer to "Using and Adminmistering LoadLeveler" for detailed information # on keywords and their associated values. Also, see LoadL_admin.1 in the # ~loadl/samples directory for sample stanzas. ############################################################################# # DEFAULTS FOR MACHINE, CLASS, USER, AND GROUP STANZAS: # Remove initial # (comment), and edit to suit. # default: type = machine # central_manager = false # default not central manager # schedd_host = false # default not a public scheduler # submit_only = false # default not a submit-only machine # speed = 1 # default machine speed # cpu_speed_scale = false # scale cpu limits by speed default: type = class # default class stanza # priority = 0 # default ClassSysprio # max_processors = -1 # default max processors for class (no limit) default: type = user # default user stanza # priority = 0 # default UserSysprio default_class = No_Class # default class = No_Class (not optional) default_group = No_Group # default group = No_Group (not optional) # maxjobs = -1 # default maximum jobs user is allowed # to run simultaneously (no limit) # maxqueued = -1 # default maximum jobs user is allowed # on system queue (no limit). does not # limit jobs submitted. default: type = group # default group stanza # priority = 0 # default GroupSysprio # maxjobs = -1 # default maximum jobs group is allowed # to run simultaneously (no limit) # maxqueued = -1 # default maximum jobs group is allowed # on system queue (no limit). does not # limit jobs submitted.
############################################################################# # MACHINE STANZAS: # These are the machine stanzas; the first machine is defined as # the central manager. mach1:, mach2:, etc. are machine name labels - # revise these placeholder labels with the names of the machines in the # pool, and specify any schedd_host and submit_only keywords and values # (true or false), if required. ############################################################################# CENTRALMANAGER: type = machine central_manager = true # # mach1: type = machine # schedd_host = true # # mach2: type = machine # schedd_host = true # # mach3: type = machine # schedd_host = true # cpu_speed_scale = true # speed = 10 # # mach4: type = machine # adapter_stanzas = k10n01_en0 k10n01_css # # mach5: type = machine # adapter_stanzas = k10n05_en0 k10n05_css # pool_list = 5 # spacct_excluse_enable = True # machine_mode = batch # # mach6: type = machine # # mach7: type = machine # submit_only = true # # mach8: type = machine # submit_only = true # etc: type = machine # ############################################################################# # ADAPTER STANZAS: (optional) # These are sample adapter stanzas; # revise labels and attributes for the adapters on your machines. ############################################################################# # #k10n01_en0: type = adapter # interface_name = k10n01 # adapter_name = en0 # network_type = ethernet # #k10n01_css: type = adapter # interface_name = k10sn01 # adapter_name = css0 # network_type = css # switch_node_number = 0 # interface_address = 9.114.51.129
# #k10n05_en0: type = adapter # interface_name = k10n05 # adapter_name = en0 # network_type = ethernet # #k10n05_css: type = adapter # interface_name = k10sn05 # adapter_name = css0 # network_type = css # switch_node_number = 4 # interface_address = 9.114.51.133 ############################################################################# # CLASS STANZAS: (optional) These are sample class stanzas; small, medium, # large, very large, parallel and nqs are sample labels for job classes. # Revise these labels and specify attributes for each class. ############################################################################# #small: type = class # class for small jobs # priority = 100 # ClassSysprio # include_users=<userlist> # only these users can submit # jobs of this class # exclude_users=<userlist> # only these cannot submit # include_groups=<grouplist> # exclude_groups=<grouplist> # admin = <userlist> # nice = <nice value> # cpu_limit = 00:15:00 # 15 minute run time limit # data_limit = <size> # core_limit = <size> # file_limit = <size> # stack_limit = <size> # rss_limit = <size> # #medium: type = class # class for medium jobs # priority = 60 # ClassSysprio # cpu_limit= 02:00:00 # 2 hour run time limit #large: type = class # class for large jobs # priority = 20 # ClassSysprio # cpu_limit = 24:00:00 # 24 hour run time limit # nice = -10 # Set nice value #verylong: type = class # class_comment = "verylong queue" # nice = 19 # priority = 0 # cpu_limit = 3000:00:00 # job_cpu_limit = 3100:00:00 # #parallel: type = class # class_comment = "restricted access" # include_users = tbel ghtc3 hrrcr bjac3 japost roethl # priority = 0 # cpu_limit = unlimited # job_cpu_limit = unlimited # wall_clock_limit = 1:00,00:45 # Needed for Backfill scheduler # total_tasks = 2 # max_nodes = 2
# #nqs: type = class # class for nqs jobs # NQS_class = true # will be routed to NQS # NQS_submit = nqs_pipe_q_name # name of pipe queue # NQS_query = q_name1@host1 q_name2@host2 ... # names of queues ############################################################################# # GROUP STANZAS: (optional) # These are sample group stanzas; group1, group2 are sample labels # for groups - revise these labels and specify attributes to each group. ############################################################################# #group1: type = group # priority = 80 # maxjobs = 40 # maxqueued = 80 # admin = user1 user2 # max_processors = 8 # # total_tasks = 2 # max_nodes = 2 #group2: type = group # priority = 50 # maxjobs = 20 # maxqueued = 40
############################################################################# # USER STANZAS: (optional, default user stanza not optional) # These are sample user stanzas; user1, user2, user3 are sample labels # for users - revise these labels and specify attributes to each user. ############################################################################# # user1: type = user # priority = 80 # default_class = small # default_group = group1 # maxjobs = 20 # maxqueued = 40 # # user2: type = user # priority = 50 # default_class = medium long verylong # default_group = Unix_Group # maxjobs = 10 # maxqueued = 20 # # user3: type = user # priority = 10 # maxjobs = 5 # maxqueued = 10 # total_tasks = 2 # max_nodes = 2 ############################################################################
The following is a sample LoadL_config file.
# # Machine Description # ARCH = R6000 # # Specify LoadLeveler Administrators here: # LOADL_ADMIN = loadl admin1 # # Default to starting LoadLeveler daemons when requested # START_DAEMONS = TRUE # # Machine authentication # # If TRUE, only connections from machines in the ADMIN_LIST are accepted. # If FALSE, connections from any machine are accepted. Default if not # specified is FALSE. # MACHINE_AUTHENTICATE = FALSE # # Specify which daemons run on each node # SCHEDD_RUNS_HERE = True STARTD_RUNS_HERE = True # # Specify information for backup central manager # # CENTRAL_MANAGER_HEARTBEAT_INTERVAL = 300 # CENTRAL_MANAGER_TIMEOUT = 6 # # Specify pathnames # RELEASEDIR = $(tilde) LOCAL_CONFIG = $(tilde)/LoadL_config.local ADMIN_FILE = $(tilde)/LoadL_admin LOG = $(tilde)/log SPOOL = $(tilde)/spool EXECUTE = $(tilde)/execute HISTORY = $(SPOOL)/history BIN = $(RELEASEDIR)/bin LIB = $(RELEASEDIR)/lib # # Specify port numbers # MASTER_STREAM_PORT = 9616 NEGOTIATOR_STREAM_PORT = 9614 SCHEDD_STREAM_PORT = 9605 STARTD_STREAM_PORT = 9611 COLLECTOR_DGRAM_PORT = 9613 STARTD_DGRAM_PORT = 9615 MASTER_DGRAM_PORT = 9617
# # Turn on/off the internal LoadLeveler scheduling algorithm # Default is on # # For Backfill scheduler SCHEDULER_API = NO SCHEDULER_TYPE = BACKFILL # For default scheduler # SCHEDULER_API = NO # SCHEDULER_TYPE = # For external scheduler # SCHEDULER_API = YES # SCHEDULER_TYPE = # # Specify accounting controls # ACCT = A_OFF ACCT_VALIDATION = $(BIN)/llacctval GLOBAL_HISTORY = $(SPOOL) # # Specify prolog and epilog path names # # JOB_PROLOG = # JOB_EPILOG = # JOB_USER_PROLOG = # JOB_USER_EPILOG = # # Refresh AFS token program. # # AFS_GETNEWTOKEN = # # Customized mail delivery program. # # MAIL = # # Customized submit (job command file) filter program. # # SUBMIT_FILTER = # # Specify checkpointing intervals # MIN_CKPT_INTERVAL = 900 MAX_CKPT_INTERVAL = 7200 # LoadL_KeyboardD Macros # KBDD = $(BIN)/LoadL_kbdd KBDD_LOG = $(LOG)/KbdLog MAX_KBDD_LOG = 64000 KBDD_DEBUG =
# # Specify whether to start the keyboard daemon # X_RUNS_HERE = True # # LoadL_Startd Macros # STARTD = $(BIN)/LoadL_startd STARTD_LOG = $(LOG)/StartLog MAX_STARTD_LOG = 64000 STARTD_DEBUG = POLLING_FREQUENCY = 5 POLLS_PER_UPDATE = 24 JOB_LIMIT_POLICY = 120 JOB_ACCT_Q_POLICY = 300 # # # LoadL_Schedd Macros # SCHEDD = $(BIN)/LoadL_schedd SCHEDD_LOG = $(LOG)/SchedLog MAX_SCHEDD_LOG = 64000 SCHEDD_DEBUG = SCHEDD_INTERVAL = 120 # CLIENT_TIMEOUT = 30 # # Negotiator Macros # NEGOTIATOR = $(BIN)/LoadL_negotiator NEGOTIATOR_DEBUG = NEGOTIATOR_LOG = $(LOG)/NegotiatorLog MAX_NEGOTIATOR_LOG = 64000 NEGOTIATOR_INTERVAL = 60 MACHINE_UPDATE_INTERVAL = 300 NEGOTIATOR_PARALLEL_DEFER = 300 NEGOTIATOR_PARALLEL_HOLD = 300 NEGOTIATOR_REDRIVE_PENDING = 90 NEGOTIATOR_RESCAN_QUEUE = 90 NEGOTIATOR_REMOVE_COMPLETED = 0
# # Sets the interval between recalculation of the SYSPRIO values # for all the jobs in the queue # NEGOTIATOR_RECALCULATE_SYSPRIO_INTERVAL = 0 # # Starter Macros # STARTER = $(BIN)/LoadL_starter STARTER_DEBUG = STARTER_LOG = $(LOG)/StarterLog MAX_STARTER_LOG = 64000 # # LoadL_Master Macros # MASTER = $(BIN)/LoadL_master MASTER_LOG = $(LOG)/MasterLog MASTER_DEBUG = MAX_MASTER_LOG = 64000 RESTARTS_PER_HOUR = 12 PUBLISH_OBITUARIES = TRUE OBITUARY_LOG_LENGTH = 25 # # Specify whether log files are truncated when opened # TRUNC_MASTER_LOG_ON_OPEN = False TRUNC_STARTD_LOG_ON_OPEN = False TRUNC_SCHEDD_LOG_ON_OPEN = False TRUNC_KBDD_LOG_ON_OPEN = False TRUNC_STARTER_LOG_ON_OPEN = False TRUNC_NEGOTIATOR_LOG_ON_OPEN = False # # NQS Directory # # For users of NQS resources: # Specify the directory containing qsub, qstat, qdel # # NQS_DIR = /usr/bin # # Specify machine's relative priority to run jobs # # CUSTOM_METRIC = # CUSTOM_METRIC_COMMAND = # # Machine control expressions and macros # OpSys : "$(OPSYS)" Arch : "$(ARCH)" Machine : "$(HOST).$(DOMAIN)"
# # Expressions used to control starting and stopping of foreign jobs # MINUTE = 60 HOUR = (60 * $(MINUTE)) StateTimer = (CurrentTime - EnteredCurrentState) BackgroundLoad = 0.7 HighLoad = 1.5 StartIdleTime = 15 * $(MINUTE) ContinueIdleTime = 5 * $(MINUTE) MaxSuspendTime = 10 * $(MINUTE) MaxVacateTime = 10 * $(MINUTE) # KeyboardBusy = KeyboardIdle < $(POLLING_FREQUENCY) CPU_Idle = LoadAvg <= $(BackgroundLoad) CPU_Busy = LoadAvg >= $(HighLoad) # # Refer to LoadL_config man page for an explanation of these control # expressions # # START : $(CPU_Idle) && KeyboardIdle > $(StartIdleTime) # SUSPEND : $(CPU_Busy) || $(KeyboardBusy) # CONTINUE : $(CPU_Idle) && KeyboardIdle > $(ContinueIdleTime) # VACATE : $(StateTimer) > $(MaxSuspendTime) # KILL : $(StateTimer) > $(MaxVacateTime) # START : T SUSPEND : F CONTINUE : T VACATE : F KILL : F # # Expressions used to prioritize job queue # The following expression is an example. # #SYSPRIO: (ClassSysprio * 100) + (UserSysprio * 10) + (GroupSysprio * 1)- (QDate) # # The following (default) expression for SYSPRIO creates a FIFO job queue. # SYSPRIO: 0 - (QDate) # # Expressions used to prioritize machines # # The following example orders machines by the load average # normalized for machine speed:
# #MACHPRIO: 0 - (1000 * (LoadAvg / (Cpus * Speed))) # # The following (default) expression for MACHPRIO orders # machines by load average. # MACHPRIO: 0 - (LoadAvg) # # The MAX_JOB_REJECT value determines how many times a job can be # rejected before it is canceled or put on hold. The default is -1, # which indicates no limit to the number of times a job can be rejected. # MAX_JOB_REJECT = -1 # # When ACTION_ON_MAX_REJECT is HOLD, jobs will be put on user hold # when the number of rejects reaches the MAX_JOB_REJECT value. When # ACTION_ON_MAX_REJECT is CANCEL, jobs will be canceled when the # number of rejects reaches the MAX_JOB_REJECT value. The default # value is HOLD. # ACTION_ON_MAX_REJECT = HOLD # # To enable LoadLeveler to support DCE security credential passing, # uncomment the following keyword. An installation can provide its # own executables to pass or establish DCE security credentials for # a LoadLeveler job by replacing the executables specified by the # following keyword. # #DCE_AUTHENTICATION_PAIR = $(BIN)/llgetdce, $(BIN)/llsetdce