Files
kaiju/quickstart/geo_mpi/geo_mpi_template.pbs
2024-04-02 16:10:45 -04:00

245 lines
10 KiB
Bash

#!/bin/bash
# This PBS script illustrates how to run the geo_mpi quickstart case using the
# MPI version of the kaiju software.
# The lines starting with "#PBS" are directives for the PBS job control system,
# used on pleiades and derecho.
# On pleiades, this job should run in roughly 75 minutes of wall-clock time.
# On derecho, this job should run in roughly 20 minutes of wall-clock time.
# Example usage
# qsub geo_mpi.pbs
# IMPORTANT: You *must* do the following in the code below:
# 1. Uncomment the #PBS lines for your system and model resolution, comment out
# the #PBS lines specific to other cases. Keep the common #PBS lines.
# 2. (derecho only) Set the #PBS -A directive to your own account.
# 3. Uncomment the modules lines for your system, comment out the others.
# 4. Set kaiju_install_dir to your local kaiju installation.
# 5. Set PATH to include your bin subdirectory of the build subdirectory of
# your local kaiju installation.
# 6. If using the Intel compiler, uncomment the setting for KMP_STACK_SIZE and
# comment out setting OMP_STACK_SIZE. If not using the Intel compiler, do
# the opposite - uncomment OMP_STACK_SIZE, comment out KMP_STACK_SIZE.
# 7. Uncomment the OMP_NUM_THREADS line for your system, comment out the others.
# 8. Uncomment the placer_cmd definition and mpiexec lines for your system,
# comment out the others.
# 9. (CRITICAL derecho-only hack) Add the following element to the end of the
# <GAMERA> element in geo_mpi.xml:
# <coupling blockHalo="T" />
# 10. (CRITICAL derecho-only hack) Add the following element to the end of the
# <VOLTRON> element in geo_mpi.xml:
# <coupling doAsyncCoupling="F" />
#------------------------------------------------------------------------------
# START OF PBS DIRECTIVES
#------------------------------------------------------------------------------
# PBS directives for all systems
# Provide a useful name for this PBS job. This name will appear in the
# "qstat" output for this job.
#PBS -N geo_mpi
# Combine job script std(o)ut and std(e)rr into stdout. This combined output
# will be saved in the file bw3d.oJOB_NUM, where JOB_NUM is the PBS job number
# assigned when the job is submitted. Note that the output from the kaiju
# software itself is captured in a separate file (see below).
#PBS -j oe
# Request compute time on the allocated compute node(s). The format is
# hh:mm:ss, where hh is hours, mm is minutes, and ss is seconds. Set
# this value high enough to ensure that your job runs to completion,
# but low ehough to stay within the time limits of the queue you are
# submitting to.
#PBS -l walltime=02:00:00
# Send an email to the user when the job is (a)borted, (b)egins, and/or (e)nds.
# An alternate email address may be specified with the #PBS -M directive.
#PBS -m abe
#-----------------------------------------------------------------------------
# In the lines below, uncomment the set appropriate for your system, and
# comment out the other sets. The default is pleiades.
#-----------------------------------------------------------------------------
# For the pleiades system at NASA Ames HECC:
# Submit the job to the PBS queue called "normal".
#PBS -q normal
# This is the line where you request specific resources from the PBS system.
# system. Uncomment the line for your model resolution.
# select=2 -> Request 2 compute nodes.
# ncpus=28 -> Each compute node must have at least 28 cores. This requirement
# implies the use of the 2-socket, 14 cores/socket Broadwell nodes.
# mpiprocs=2 -> Each node will run 2 MPI ranks of the kaiju code.
# ompthreads=14 -> Each MPI rank will run 14 OMP threads.
# model=bro -> Each compute node must contain Broadwell chips. Specifying
# "model" is a HECC-specific PBS requirement.
# For a "D"-resolution model run:
#PBS -l select=2:ncpus=28:mpiprocs=2:ompthreads=14:model=bro+1:ncpus=28:mpiprocs=1:ompthreads=28:model=bro
# For a "Q"-resolution model run:
##PBS -l select=8:ncpus=28:mpiprocs=2:ompthreads=14:model=bro+9:ncpus=28:mpiprocs=1:ompthreads=28:model=bro
# For a "O"-resolution model run:
##PBS -l select=48:ncpus=28:mpiprocs=2:ompthreads=14:model=bro+9:ncpus=28:mpiprocs=1:ompthreads=28:model=bro
# NOTE: Everything after the "+" is an additional request for resources for
# "helper" applications in the MPI kaiju code.
# IMPORTANT NOTE: In the XML file for your run, the "decomposition" of the
# problem into MPI ranks is specified by the XML elements <iPdir>, <jPdir>, and
# <kPdir>. This breakdown is implemented on the resource request line by using
# select=N/mpiprocs, where N=iPdir*jPdir*kPdir. This tells PBS to give you
# N/mpiprocs nodes, each of which will run mpiprocs MPI ranks. More simply:
# iPdir*jPdir*kPdir = select*mpiprocs.
#-----------------------------------------------------------------------------
# For the derecho system at UCAR:
##PBS -A UJHB0019
##PBS -q main
##PBS -l select=2:ncpus=128:mpiprocs=2:ompthreads=64+1:ncpus=128:mpiprocs=1:ompthreads=128
# Use 8 ranks/node for gamera on derecho
# Use 2 ranks for helpers
# 1 node separate for voltron
# Additional PBS chunk
#------------------------------------------------------------------------------
# END OF PBS DIRECTIVES
#------------------------------------------------------------------------------
echo "Job $PBS_JOBID started at `date` on `hostname`."
# Specify the ID string for the run. This can be set to any desired string.
# PBS_JOBNAME is used here as an example, as it is set by the #PBS -N
# directive near the top of this file.
runid=$PBS_JOBNAME
#------------------------------------------------------------------------------
# START OF MODULE DIRECTIVES
#------------------------------------------------------------------------------
# Load the required modules for MPI kaiju.
# NOTE: This set of modules assumes your kaiju installation was built using
# this same list of modules. If you used different modules at build time (for
# example, if you used a GNU compiler), update this list to use the modules
# from your build-time environment.
# Comment out the module lines for all systems except the one you are using.
module purge
# For pleiades:
module load nas
module load pkgsrc/2022Q1-rome # For git-lfs and cmake
module load comp-intel/2020.4.304 # Latest version
module load mpi-hpe/mpt.2.23
module load hdf5/1.8.18_mpt
# For derecho:
# module load ncarenv/23.06
# module load cmake/3.26.3
# module load craype/2.7.20
# module load intel/2023.0.0
# module load geos/3.9.1 # Must come after intel/2023.0.0
# module load ncarcompilers/1.0.0 # Must come after intel/2023.0.0
# module load cray-mpich/8.1.25
# module load hdf5/1.12.2 # NOTE: Not the MPI version
# module load mkl/2023.0.0
echo "The following modules are loaded:"
module list
#------------------------------------------------------------------------------
# END OF MODULE DIRECTIVES
#------------------------------------------------------------------------------
# Define the kaiju installation location.
# NOTE: You MUST set this variable to the path to your kaiju directory, which
# is the top-level directory created when you cloned the kaiju repository.
kaiju_install_dir=$HOME/kaiju
# This script sets KAIJUHOME and other environment variables.
source $kaiju_install_dir/scripts/setupEnvironment.sh
# Add the kaiju binary directory to the command path.
# NOTE: You *must* set this variable to the path to the bin subdirectory of
# your kaiju build directory. The setting below assumes that the MPI version of
# kaiju was built in the build_mpi subdirectory of the kaiju home directory
# (which is typically the same as kaiju_install_dir).
export PATH=$KAIJUHOME/build_mpi/bin:$PATH
# Set the MPI_TYPE_DEPTH to 32.
# If this is not done, gamera_mpi.x will crash with a stack traceback that
# includes an error messge like this:
# ...
# MPT ERROR: The program attempted to construct a derived datatype with
# depth 15, but the maximum allowed depth is 14. You can increase...
# ...
# If you see error messages like this at run time, try increasing the value
# assigned to MPI_TYPE_DEPTH in the line below.
export MPI_TYPE_DEPTH=32
# Set the OMP stack size to prevent a crash.
# If this setting is ignored, the model may cause the kaiju code to crash
# with a segmentation fault and core dump. The value of "128M" was chosen
# ~arbitrarily; experimentation may allow a smaller value to be used.
# NOTE: OMP_STACKSIZE is the standard OpenMP environment variable for setting
# the OMP stack size. If using the Intel compiler, comment out the
# OMP_STACKSIZE line and uncomment the KMP_STACKSIZE line. KMP_STACKSIZE is the
# Intel-specific environment variable for setting the OMP stack size.
# export OMP_STACKSIZE=128M
export KMP_STACKSIZE=128M
# Override the value of OMP_NUM_THREADS set by PBS from the ompthreads
# directive. This is necessary to ensure proper operation of
# correctOMPenvironment.sh. Uncomment the line for your system, comment out the
# rest.
export OMP_NUM_THREADS=28 # pleiades
# export OMP_NUM_THREADS=128 # derecho
echo "The active environment variables are:"
printenv
# Copy the node list file (needed by correctOMPenvironment.sh).
nodefile=nodefile.$PBS_JOBID
cp $PBS_NODEFILE $nodefile
# Run the model. Direct output from the program is saved in a text file.
exe=voltron_mpi.x
echo "Running $exe on model $runid."
# NOTE: The omplace tool is part of the MPT implementation of MPI. It is used
# to ensure efficient pinning of MPI ranks and OMP threads to appropriate
# sockets and cores. If you omit omplace, your job will still run, but it
# probably be an order of magnitude slower than it would be when using omplace.
# HOWEVER, omplace only works correctly when all compute nodes take the same
# number of MPI ranks and OMP threads. When the rank/thread distribution is not
# even (like for a run with voltron.x), the correctOMPenvironment.sh script
# updates the behavior of omplace to produce the correct distribution of ranks
# and threads.
# On derecho, the MPT implementation of MPI is not used, and so the omplace
# tool is not available. The pinCpuCores.sh script performs the same function
# on derecho that correctOMPenvironment.sh and omplace perform on pleiades.
# Uncomment the lines below appropriate to the system you are running on, and
# comment out the others.
# pleiades
placer_cmd="$KAIJUHOME/scripts/preproc/correctOMPenvironment.sh $nodefile omplace"
mpiexec $placer_cmd $exe $runid.xml >& ${exe}.${runid}.out
# derecho
# placer_cmd="$KAIJUHOME/scripts/preproc/pinCpuCores.sh"
# mpiexec $placer_cmd $exe $runid.xml >& ${exe}.${runid}.out
echo "Job $PBS_JOBID ended at `date` on `hostname`."