mirror of
https://github.com/JHUAPL/kaiju.git
synced 2026-01-08 22:58:05 -05:00
245 lines
10 KiB
Bash
245 lines
10 KiB
Bash
#!/bin/bash
|
|
|
|
# This PBS script illustrates how to run the geo_mpi quickstart case using the
|
|
# MPI version of the kaiju software.
|
|
|
|
# The lines starting with "#PBS" are directives for the PBS job control system,
|
|
# used on pleiades and derecho.
|
|
|
|
# On pleiades, this job should run in roughly 75 minutes of wall-clock time.
|
|
# On derecho, this job should run in roughly 20 minutes of wall-clock time.
|
|
|
|
# Example usage
|
|
# qsub geo_mpi.pbs
|
|
|
|
# IMPORTANT: You *must* do the following in the code below:
|
|
# 1. Uncomment the #PBS lines for your system and model resolution, comment out
|
|
# the #PBS lines specific to other cases. Keep the common #PBS lines.
|
|
# 2. (derecho only) Set the #PBS -A directive to your own account.
|
|
# 3. Uncomment the modules lines for your system, comment out the others.
|
|
# 4. Set kaiju_install_dir to your local kaiju installation.
|
|
# 5. Set PATH to include your bin subdirectory of the build subdirectory of
|
|
# your local kaiju installation.
|
|
# 6. If using the Intel compiler, uncomment the setting for KMP_STACK_SIZE and
|
|
# comment out setting OMP_STACK_SIZE. If not using the Intel compiler, do
|
|
# the opposite - uncomment OMP_STACK_SIZE, comment out KMP_STACK_SIZE.
|
|
# 7. Uncomment the OMP_NUM_THREADS line for your system, comment out the others.
|
|
# 8. Uncomment the placer_cmd definition and mpiexec lines for your system,
|
|
# comment out the others.
|
|
# 9. (CRITICAL derecho-only hack) Add the following element to the end of the
|
|
# <GAMERA> element in geo_mpi.xml:
|
|
# <coupling blockHalo="T" />
|
|
# 10. (CRITICAL derecho-only hack) Add the following element to the end of the
|
|
# <VOLTRON> element in geo_mpi.xml:
|
|
# <coupling doAsyncCoupling="F" />
|
|
|
|
#------------------------------------------------------------------------------
|
|
# START OF PBS DIRECTIVES
|
|
#------------------------------------------------------------------------------
|
|
|
|
# PBS directives for all systems
|
|
|
|
# Provide a useful name for this PBS job. This name will appear in the
|
|
# "qstat" output for this job.
|
|
#PBS -N geo_mpi
|
|
|
|
# Combine job script std(o)ut and std(e)rr into stdout. This combined output
|
|
# will be saved in the file bw3d.oJOB_NUM, where JOB_NUM is the PBS job number
|
|
# assigned when the job is submitted. Note that the output from the kaiju
|
|
# software itself is captured in a separate file (see below).
|
|
#PBS -j oe
|
|
|
|
# Request compute time on the allocated compute node(s). The format is
|
|
# hh:mm:ss, where hh is hours, mm is minutes, and ss is seconds. Set
|
|
# this value high enough to ensure that your job runs to completion,
|
|
# but low ehough to stay within the time limits of the queue you are
|
|
# submitting to.
|
|
#PBS -l walltime=02:00:00
|
|
|
|
# Send an email to the user when the job is (a)borted, (b)egins, and/or (e)nds.
|
|
# An alternate email address may be specified with the #PBS -M directive.
|
|
#PBS -m abe
|
|
|
|
#-----------------------------------------------------------------------------
|
|
|
|
# In the lines below, uncomment the set appropriate for your system, and
|
|
# comment out the other sets. The default is pleiades.
|
|
|
|
#-----------------------------------------------------------------------------
|
|
|
|
# For the pleiades system at NASA Ames HECC:
|
|
|
|
# Submit the job to the PBS queue called "normal".
|
|
#PBS -q normal
|
|
|
|
# This is the line where you request specific resources from the PBS system.
|
|
# system. Uncomment the line for your model resolution.
|
|
# select=2 -> Request 2 compute nodes.
|
|
# ncpus=28 -> Each compute node must have at least 28 cores. This requirement
|
|
# implies the use of the 2-socket, 14 cores/socket Broadwell nodes.
|
|
# mpiprocs=2 -> Each node will run 2 MPI ranks of the kaiju code.
|
|
# ompthreads=14 -> Each MPI rank will run 14 OMP threads.
|
|
# model=bro -> Each compute node must contain Broadwell chips. Specifying
|
|
# "model" is a HECC-specific PBS requirement.
|
|
# For a "D"-resolution model run:
|
|
#PBS -l select=2:ncpus=28:mpiprocs=2:ompthreads=14:model=bro+1:ncpus=28:mpiprocs=1:ompthreads=28:model=bro
|
|
# For a "Q"-resolution model run:
|
|
##PBS -l select=8:ncpus=28:mpiprocs=2:ompthreads=14:model=bro+9:ncpus=28:mpiprocs=1:ompthreads=28:model=bro
|
|
# For a "O"-resolution model run:
|
|
##PBS -l select=48:ncpus=28:mpiprocs=2:ompthreads=14:model=bro+9:ncpus=28:mpiprocs=1:ompthreads=28:model=bro
|
|
|
|
# NOTE: Everything after the "+" is an additional request for resources for
|
|
# "helper" applications in the MPI kaiju code.
|
|
|
|
# IMPORTANT NOTE: In the XML file for your run, the "decomposition" of the
|
|
# problem into MPI ranks is specified by the XML elements <iPdir>, <jPdir>, and
|
|
# <kPdir>. This breakdown is implemented on the resource request line by using
|
|
# select=N/mpiprocs, where N=iPdir*jPdir*kPdir. This tells PBS to give you
|
|
# N/mpiprocs nodes, each of which will run mpiprocs MPI ranks. More simply:
|
|
# iPdir*jPdir*kPdir = select*mpiprocs.
|
|
|
|
#-----------------------------------------------------------------------------
|
|
|
|
# For the derecho system at UCAR:
|
|
|
|
##PBS -A UJHB0019
|
|
##PBS -q main
|
|
##PBS -l select=2:ncpus=128:mpiprocs=2:ompthreads=64+1:ncpus=128:mpiprocs=1:ompthreads=128
|
|
# Use 8 ranks/node for gamera on derecho
|
|
# Use 2 ranks for helpers
|
|
# 1 node separate for voltron
|
|
# Additional PBS chunk
|
|
|
|
#------------------------------------------------------------------------------
|
|
# END OF PBS DIRECTIVES
|
|
#------------------------------------------------------------------------------
|
|
|
|
echo "Job $PBS_JOBID started at `date` on `hostname`."
|
|
|
|
# Specify the ID string for the run. This can be set to any desired string.
|
|
# PBS_JOBNAME is used here as an example, as it is set by the #PBS -N
|
|
# directive near the top of this file.
|
|
runid=$PBS_JOBNAME
|
|
|
|
#------------------------------------------------------------------------------
|
|
# START OF MODULE DIRECTIVES
|
|
#------------------------------------------------------------------------------
|
|
|
|
# Load the required modules for MPI kaiju.
|
|
|
|
# NOTE: This set of modules assumes your kaiju installation was built using
|
|
# this same list of modules. If you used different modules at build time (for
|
|
# example, if you used a GNU compiler), update this list to use the modules
|
|
# from your build-time environment.
|
|
|
|
# Comment out the module lines for all systems except the one you are using.
|
|
module purge
|
|
|
|
# For pleiades:
|
|
module load nas
|
|
module load pkgsrc/2022Q1-rome # For git-lfs and cmake
|
|
module load comp-intel/2020.4.304 # Latest version
|
|
module load mpi-hpe/mpt.2.23
|
|
module load hdf5/1.8.18_mpt
|
|
|
|
# For derecho:
|
|
# module load ncarenv/23.06
|
|
# module load cmake/3.26.3
|
|
# module load craype/2.7.20
|
|
# module load intel/2023.0.0
|
|
# module load geos/3.9.1 # Must come after intel/2023.0.0
|
|
# module load ncarcompilers/1.0.0 # Must come after intel/2023.0.0
|
|
# module load cray-mpich/8.1.25
|
|
# module load hdf5/1.12.2 # NOTE: Not the MPI version
|
|
# module load mkl/2023.0.0
|
|
|
|
echo "The following modules are loaded:"
|
|
module list
|
|
|
|
#------------------------------------------------------------------------------
|
|
# END OF MODULE DIRECTIVES
|
|
#------------------------------------------------------------------------------
|
|
|
|
# Define the kaiju installation location.
|
|
# NOTE: You MUST set this variable to the path to your kaiju directory, which
|
|
# is the top-level directory created when you cloned the kaiju repository.
|
|
kaiju_install_dir=$HOME/kaiju
|
|
|
|
# This script sets KAIJUHOME and other environment variables.
|
|
source $kaiju_install_dir/scripts/setupEnvironment.sh
|
|
|
|
# Add the kaiju binary directory to the command path.
|
|
# NOTE: You *must* set this variable to the path to the bin subdirectory of
|
|
# your kaiju build directory. The setting below assumes that the MPI version of
|
|
# kaiju was built in the build_mpi subdirectory of the kaiju home directory
|
|
# (which is typically the same as kaiju_install_dir).
|
|
export PATH=$KAIJUHOME/build_mpi/bin:$PATH
|
|
|
|
# Set the MPI_TYPE_DEPTH to 32.
|
|
# If this is not done, gamera_mpi.x will crash with a stack traceback that
|
|
# includes an error messge like this:
|
|
# ...
|
|
# MPT ERROR: The program attempted to construct a derived datatype with
|
|
# depth 15, but the maximum allowed depth is 14. You can increase...
|
|
# ...
|
|
# If you see error messages like this at run time, try increasing the value
|
|
# assigned to MPI_TYPE_DEPTH in the line below.
|
|
export MPI_TYPE_DEPTH=32
|
|
|
|
# Set the OMP stack size to prevent a crash.
|
|
# If this setting is ignored, the model may cause the kaiju code to crash
|
|
# with a segmentation fault and core dump. The value of "128M" was chosen
|
|
# ~arbitrarily; experimentation may allow a smaller value to be used.
|
|
# NOTE: OMP_STACKSIZE is the standard OpenMP environment variable for setting
|
|
# the OMP stack size. If using the Intel compiler, comment out the
|
|
# OMP_STACKSIZE line and uncomment the KMP_STACKSIZE line. KMP_STACKSIZE is the
|
|
# Intel-specific environment variable for setting the OMP stack size.
|
|
# export OMP_STACKSIZE=128M
|
|
export KMP_STACKSIZE=128M
|
|
|
|
# Override the value of OMP_NUM_THREADS set by PBS from the ompthreads
|
|
# directive. This is necessary to ensure proper operation of
|
|
# correctOMPenvironment.sh. Uncomment the line for your system, comment out the
|
|
# rest.
|
|
export OMP_NUM_THREADS=28 # pleiades
|
|
# export OMP_NUM_THREADS=128 # derecho
|
|
|
|
echo "The active environment variables are:"
|
|
printenv
|
|
|
|
# Copy the node list file (needed by correctOMPenvironment.sh).
|
|
nodefile=nodefile.$PBS_JOBID
|
|
cp $PBS_NODEFILE $nodefile
|
|
|
|
# Run the model. Direct output from the program is saved in a text file.
|
|
exe=voltron_mpi.x
|
|
echo "Running $exe on model $runid."
|
|
|
|
# NOTE: The omplace tool is part of the MPT implementation of MPI. It is used
|
|
# to ensure efficient pinning of MPI ranks and OMP threads to appropriate
|
|
# sockets and cores. If you omit omplace, your job will still run, but it
|
|
# probably be an order of magnitude slower than it would be when using omplace.
|
|
|
|
# HOWEVER, omplace only works correctly when all compute nodes take the same
|
|
# number of MPI ranks and OMP threads. When the rank/thread distribution is not
|
|
# even (like for a run with voltron.x), the correctOMPenvironment.sh script
|
|
# updates the behavior of omplace to produce the correct distribution of ranks
|
|
# and threads.
|
|
|
|
# On derecho, the MPT implementation of MPI is not used, and so the omplace
|
|
# tool is not available. The pinCpuCores.sh script performs the same function
|
|
# on derecho that correctOMPenvironment.sh and omplace perform on pleiades.
|
|
|
|
# Uncomment the lines below appropriate to the system you are running on, and
|
|
# comment out the others.
|
|
|
|
# pleiades
|
|
placer_cmd="$KAIJUHOME/scripts/preproc/correctOMPenvironment.sh $nodefile omplace"
|
|
mpiexec $placer_cmd $exe $runid.xml >& ${exe}.${runid}.out
|
|
|
|
# derecho
|
|
# placer_cmd="$KAIJUHOME/scripts/preproc/pinCpuCores.sh"
|
|
# mpiexec $placer_cmd $exe $runid.xml >& ${exe}.${runid}.out
|
|
|
|
echo "Job $PBS_JOBID ended at `date` on `hostname`."
|