mirror of
https://github.com/JHUAPL/kaiju.git
synced 2026-01-09 15:17:56 -05:00
564 lines
20 KiB
Python
564 lines
20 KiB
Python
#!/usr/bin/env python
|
|
|
|
"""Run MAGE Intel Inspector tests.
|
|
|
|
This script runs a series of tests of the MAGE software using Intel tools.
|
|
|
|
The Intel Inspector memory checks run in about an hour on two derecho nodes.
|
|
|
|
The Intel Inspector thread checks run in about 45 minutes on two derecho nodes.
|
|
|
|
The report script runs in about 2 minutes on one derecho node.
|
|
|
|
Authors
|
|
-------
|
|
Jeff Garretson
|
|
Eric Winter
|
|
|
|
"""
|
|
|
|
|
|
# Import standard modules.
|
|
import datetime
|
|
import os
|
|
import shutil
|
|
import subprocess
|
|
import sys
|
|
|
|
# Import 3rd-party modules.
|
|
from jinja2 import Template
|
|
|
|
# Import project modules.
|
|
import common
|
|
|
|
|
|
# # Program constants
|
|
|
|
# Program description.
|
|
DESCRIPTION = "Script for MAGE checks with Intel Inspector tools"
|
|
|
|
# Home directory of kaiju installation
|
|
KAIJUHOME = os.environ["KAIJUHOME"]
|
|
|
|
# Root of directory tree for this set of tests.
|
|
MAGE_TEST_SET_ROOT = os.environ["MAGE_TEST_SET_ROOT"]
|
|
|
|
# Directory for Intel Inspector checks
|
|
INTEL_CHECKS_DIRECTORY = os.path.join(MAGE_TEST_SET_ROOT, "intelChecks")
|
|
|
|
# Path to directory containing the test scripts
|
|
TEST_SCRIPTS_DIRECTORY = os.path.join(KAIJUHOME, "testingScripts")
|
|
|
|
# Path to directory containing module lists
|
|
MODULE_LIST_DIRECTORY = os.path.join(TEST_SCRIPTS_DIRECTORY,
|
|
"mage_build_test_modules")
|
|
|
|
# Name of file containing names of modules lists to use for Intel checks
|
|
INTEL_CHECKS_LIST_FILE = os.path.join(MODULE_LIST_DIRECTORY,
|
|
"intelChecks.lst")
|
|
|
|
# Path to jinja2 template file for PBS script for the memory tests.
|
|
MEM_CHECK_PBS_TEMPLATE = os.path.join(
|
|
TEST_SCRIPTS_DIRECTORY, "intelCheckSubmitMem-template.pbs"
|
|
)
|
|
|
|
# Path to jinja2 template file for PBS script for the thread tests.
|
|
THREAD_CHECK_PBS_TEMPLATE = os.path.join(
|
|
TEST_SCRIPTS_DIRECTORY, "intelCheckSubmitThread-template.pbs"
|
|
)
|
|
|
|
# Path to jinja2 template file for PBS script for the reporting script.
|
|
REPORT_PBS_TEMPLATE = os.path.join(
|
|
TEST_SCRIPTS_DIRECTORY, "intelCheckSubmitReport-template.pbs"
|
|
)
|
|
|
|
# Prefix for naming Intel Inspector checks directories
|
|
INTEL_CHECKS_DIRECTORY_PREFIX = "intelChecks_"
|
|
|
|
# Name of build subdirectory containing binaries
|
|
BUILD_BIN_DIR = "bin"
|
|
|
|
# Data and configuration files used by the Intel Inspector tests.
|
|
TEST_INPUT_FILES = [
|
|
"tinyCase.xml",
|
|
"memSuppress.sup",
|
|
"threadSuppress.sup",
|
|
]
|
|
|
|
# Name of PBS file for memory checks.
|
|
MEM_CHECK_PBS_FILENAME = "intelCheckSubmitMem.pbs"
|
|
|
|
# Name of PBS file for thread checks.
|
|
THREAD_CHECK_PBS_FILENAME = "intelCheckSubmitThread.pbs"
|
|
|
|
# Name of PBS file for report generation.
|
|
REPORT_PBS_FILENAME = "intelCheckSubmitReport.pbs"
|
|
|
|
# Branch or commit (or tag) used for testing.
|
|
BRANCH_OR_COMMIT = os.environ["BRANCH_OR_COMMIT"]
|
|
|
|
|
|
def intelChecks(args: dict):
|
|
"""Begin main program.
|
|
|
|
This is the main program code.
|
|
|
|
Parameters
|
|
----------
|
|
args : dict
|
|
Dictionary of command-line options.
|
|
|
|
Returns
|
|
-------
|
|
None
|
|
|
|
Raises
|
|
------
|
|
None
|
|
"""
|
|
# Local convenience variables.
|
|
debug = args.get("debug", False)
|
|
be_loud = args.get("loud", False)
|
|
slack_on_fail = args.get("slack_on_fail", False)
|
|
is_test = args.get("test", False)
|
|
verbose = args.get("verbose", False)
|
|
|
|
# ------------------------------------------------------------------------
|
|
|
|
if debug:
|
|
print(f"Starting {sys.argv[0]} at {datetime.datetime.now()}")
|
|
print(f"Current directory is {os.getcwd()}")
|
|
|
|
# ------------------------------------------------------------------------
|
|
|
|
# Make a directory to hold all of the Intel Inspector tests.
|
|
if verbose:
|
|
print(f"Creating {INTEL_CHECKS_DIRECTORY}.")
|
|
os.mkdir(INTEL_CHECKS_DIRECTORY)
|
|
|
|
# ------------------------------------------------------------------------
|
|
|
|
# Make a list of module sets to build with.
|
|
|
|
# Read the list of module sets to use for Intel checks.
|
|
with open(INTEL_CHECKS_LIST_FILE, encoding="utf-8") as f:
|
|
lines = f.readlines()
|
|
module_list_files = [_.rstrip() for _ in lines]
|
|
if debug:
|
|
print(f"module_list_files = {module_list_files}")
|
|
|
|
# ------------------------------------------------------------------------
|
|
|
|
# Read the template for the PBS script used for the memory tests.
|
|
with open(MEM_CHECK_PBS_TEMPLATE, encoding="utf-8") as f:
|
|
template_content = f.read()
|
|
mem_check_pbs_template = Template(template_content)
|
|
if debug:
|
|
print(f"mem_check_pbs_template = {mem_check_pbs_template}")
|
|
|
|
# Read the template for the PBS script used for the thread tests.
|
|
with open(THREAD_CHECK_PBS_TEMPLATE, encoding="utf-8") as f:
|
|
template_content = f.read()
|
|
thread_check_pbs_template = Template(template_content)
|
|
if debug:
|
|
print(f"thread_check_pbs_template = {thread_check_pbs_template}")
|
|
|
|
# Read the template for the PBS script used for the report generation.
|
|
with open(REPORT_PBS_TEMPLATE, encoding="utf-8") as f:
|
|
template_content = f.read()
|
|
report_pbs_template = Template(template_content)
|
|
if debug:
|
|
print(f"report_pbs_template = {report_pbs_template}")
|
|
|
|
# ------------------------------------------------------------------------
|
|
|
|
# Run the Intel Inspector checks with each set of modules.
|
|
|
|
# Create the common make command for all module sets.
|
|
make_cmd = "make gamera_mpi voltron_mpi"
|
|
if debug:
|
|
print(f"make_cmd = {make_cmd}")
|
|
|
|
# Create the list for submit results. Only set to True if all qsub
|
|
# commands for a set are OK.
|
|
submit_ok = [False]*len(module_list_files)
|
|
if debug:
|
|
print(f"submit_ok = {submit_ok}")
|
|
|
|
# Create a list of lists for job IDs. There are 3 job IDs per set - one for
|
|
# memory check, one for thread check, and one for the test report.
|
|
job_ids = [[None, None, None]]*len(module_list_files)
|
|
if debug:
|
|
print(f"job_ids = {job_ids}")
|
|
|
|
# Run Intel checks with each set of modules.
|
|
for (i_module_set, module_list_file) in enumerate(module_list_files):
|
|
if verbose:
|
|
print("Performing Intel Inspector checks with module set "
|
|
f"{module_list_file}.")
|
|
|
|
# Extract the name of the list.
|
|
module_set_name = module_list_file.rstrip(".lst")
|
|
if debug:
|
|
print(f"module_set_name = {module_set_name}.")
|
|
|
|
# Read this module list file, extracting cmake environment and
|
|
# options, if any.
|
|
path = os.path.join(MODULE_LIST_DIRECTORY, module_list_file)
|
|
if debug:
|
|
print(f"path = {path}")
|
|
module_names, cmake_environment, cmake_options = (
|
|
common.read_build_module_list_file(path)
|
|
)
|
|
if debug:
|
|
print(f"module_names = {module_names}")
|
|
print(f"cmake_environment = {cmake_environment}")
|
|
print(f"cmake_options = {cmake_options}")
|
|
|
|
# Add the additional flags needed for Intel Inspector checks.
|
|
cmake_options += " -DDISABLE_DEBUG_BOUNDS_CHECKS=ON"
|
|
cmake_options += " -DCMAKE_BUILD_TYPE=DEBUG"
|
|
if debug:
|
|
print(f"cmake_options = {cmake_options}")
|
|
|
|
# Assemble the commands to load the listed modules.
|
|
module_cmd = (
|
|
f"module --force purge; module load {' '.join(module_names)}"
|
|
)
|
|
if debug:
|
|
print(f"module_cmd = {module_cmd}")
|
|
|
|
# --------------------------------------------------------------------
|
|
|
|
# Prepare the directory and commands for the test.
|
|
|
|
# Make a directory for this test, and go there.
|
|
dir_name = f"{INTEL_CHECKS_DIRECTORY_PREFIX}{module_set_name}"
|
|
build_directory = os.path.join(INTEL_CHECKS_DIRECTORY, dir_name)
|
|
if debug:
|
|
print(f"build_directory = {build_directory}")
|
|
os.mkdir(build_directory)
|
|
os.chdir(build_directory)
|
|
|
|
# Run cmake to build the Makefile.
|
|
if verbose:
|
|
print("Running cmake to create Makefile for module set"
|
|
f" {module_set_name}.")
|
|
cmd = (f"{module_cmd}; {cmake_environment} cmake {cmake_options}"
|
|
f" {KAIJUHOME} >& cmake.out")
|
|
if debug:
|
|
print(f"cmd = {cmd}")
|
|
try:
|
|
# NOTE: stdout and stderr goes cmake.out.
|
|
cproc = subprocess.run(cmd, shell=True, check=True)
|
|
except subprocess.CalledProcessError as e:
|
|
print(f"ERROR: cmake for module set {module_set_name} failed.\n"
|
|
f"e.cmd = {e.cmd}\n"
|
|
f"e.returncode = {e.returncode}\n"
|
|
f"See {os.path.join(build_directory, 'cmake.out')}"
|
|
" for output from cmake.\n"
|
|
"Skipping remaining steps for module set "
|
|
f"{module_set_name}.",
|
|
file=sys.stderr)
|
|
continue
|
|
if debug:
|
|
print(f"cproc = {cproc}")
|
|
|
|
# Run the build.
|
|
if verbose:
|
|
print("Running make to build kaiju for module set"
|
|
f" {module_set_name}.")
|
|
cmd = f"{module_cmd}; {make_cmd} >& make.out"
|
|
if debug:
|
|
print(f"cmd = {cmd}")
|
|
try:
|
|
# NOTE: stdout and stderr go into make.out.
|
|
cproc = subprocess.run(cmd, shell=True, check=True)
|
|
except subprocess.CalledProcessError as e:
|
|
print(f"ERROR: make for module set {module_set_name} failed.\n"
|
|
f"e.cmd = {e.cmd}\n"
|
|
f"e.returncode = {e.returncode}\n"
|
|
f"See {os.path.join(build_directory, 'make.out')}"
|
|
" for output from make.\n"
|
|
"Skipping remaining steps for module set "
|
|
f"{module_set_name}.",
|
|
file=sys.stderr)
|
|
continue
|
|
if debug:
|
|
print(f"cproc = {cproc}")
|
|
|
|
# --------------------------------------------------------------------
|
|
|
|
# Copy or create input files for the tests.
|
|
|
|
# Go to the bin directory for testing.
|
|
os.chdir(BUILD_BIN_DIR)
|
|
|
|
# Copy in the files used by the tests.
|
|
if verbose:
|
|
print("Copying files needed for Intel checks.")
|
|
for filename in TEST_INPUT_FILES:
|
|
from_path = os.path.join(TEST_SCRIPTS_DIRECTORY, filename)
|
|
to_path = os.path.join(".", filename)
|
|
shutil.copyfile(from_path, to_path)
|
|
|
|
# Generate bcwind data file.
|
|
if verbose:
|
|
print("Creating bcwind data file.")
|
|
cmd = "cda2wind -t0 2016-08-09T09:00:00 -t1 2016-08-09T11:00:00"
|
|
if debug:
|
|
print(f"cmd = {cmd}")
|
|
try:
|
|
cproc = subprocess.run(cmd, shell=True, check=True)
|
|
except subprocess.CalledProcessError as e:
|
|
print("ERROR: Unable to create bcwind data file for module set "
|
|
f"{module_set_name}.\n"
|
|
f"e.cmd = {e.cmd}\n"
|
|
f"e.returncode = {e.returncode}\n"
|
|
"See testing log for output from cda2wind.\n"
|
|
"Skipping remaining steps for module set"
|
|
f"{module_set_name}\n")
|
|
continue
|
|
if debug:
|
|
print(f"cproc = {cproc}")
|
|
|
|
# Generate the LFM grid file.
|
|
if verbose:
|
|
print("Creating LFM grid file.")
|
|
cmd = "genLFM -gid D"
|
|
if debug:
|
|
print(f"cmd = {cmd}")
|
|
try:
|
|
cproc = subprocess.run(cmd, shell=True, check=True)
|
|
except subprocess.CalledProcessError as e:
|
|
print("ERROR: Unable to create LFM grid file for module set "
|
|
f"{module_set_name}.\n"
|
|
f"e.cmd = {e.cmd}\n"
|
|
f"e.returncode = {e.returncode}\n"
|
|
"See testing log for output from genLFM.\n"
|
|
"Skipping remaining steps for module set"
|
|
f"{module_set_name}\n")
|
|
continue
|
|
if debug:
|
|
print(f"cproc = {cproc}")
|
|
|
|
# Generate the Raiju configuration file.
|
|
if verbose:
|
|
print("Creating Raiju configuration file.")
|
|
cmd = "genRAIJU"
|
|
if debug:
|
|
print(f"cmd = {cmd}")
|
|
try:
|
|
cproc = subprocess.run(cmd, shell=True, check=True)
|
|
except subprocess.CalledProcessError as e:
|
|
print("ERROR: Unable to create Raiju configuration file"
|
|
f" for module set {module_set_name}.\n"
|
|
f"e.cmd = {e.cmd}\n"
|
|
f"e.returncode = {e.returncode}\n"
|
|
"See testing log for output from genRAIJU.\n"
|
|
"Skipping remaining steps for module set "
|
|
f"{module_set_name}\n")
|
|
continue
|
|
if debug:
|
|
print(f"cproc = {cproc}")
|
|
|
|
# --------------------------------------------------------------------
|
|
|
|
# Create the PBS scripts for the tests.
|
|
|
|
# Assemble common data to fill in the PBS templates.
|
|
pbs_options = {}
|
|
pbs_options["account"] = os.environ["DERECHO_TESTING_ACCOUNT"]
|
|
pbs_options["queue"] = os.environ["DERECHO_TESTING_QUEUE"]
|
|
pbs_options["job_priority"] = os.environ["DERECHO_TESTING_PRIORITY"]
|
|
pbs_options["modules"] = module_names
|
|
pbs_options["kaijuhome"] = KAIJUHOME
|
|
pbs_options["tmpdir"] = os.environ["TMPDIR"]
|
|
pbs_options["slack_bot_token"] = os.environ["SLACK_BOT_TOKEN"]
|
|
pbs_options["mage_test_root"] = os.environ["MAGE_TEST_ROOT"]
|
|
pbs_options["branch_or_commit"] = BRANCH_OR_COMMIT
|
|
pbs_options["mage_test_set_root"] = os.environ["MAGE_TEST_SET_ROOT"]
|
|
pbs_options["conda_environment"] = os.environ["CONDA_ENVIRONMENT"]
|
|
|
|
# Set options specific to the memory check, then render the template.
|
|
pbs_options["job_name"] = "mage_intelCheckSubmitMem"
|
|
pbs_content = mem_check_pbs_template.render(pbs_options)
|
|
with open(MEM_CHECK_PBS_FILENAME, "w", encoding="utf-8") as f:
|
|
f.write(pbs_content)
|
|
|
|
# Set options specific to the thread check, then render the template.
|
|
pbs_options["job_name"] = "mage_intelCheckSubmitThread"
|
|
pbs_content = thread_check_pbs_template.render(pbs_options)
|
|
with open(THREAD_CHECK_PBS_FILENAME, "w", encoding="utf-8") as f:
|
|
f.write(pbs_content)
|
|
|
|
# Set options specific to the report generator, then render the
|
|
# template.
|
|
pbs_options["job_name"] = "mage_intelCheckReportSubmit"
|
|
pbs_options["report_options"] = ""
|
|
if debug:
|
|
pbs_options["report_options"] += " -d"
|
|
if slack_on_fail:
|
|
pbs_options["report_options"] += " -s"
|
|
if is_test:
|
|
pbs_options["report_options"] += " -t"
|
|
if verbose:
|
|
pbs_options["report_options"] += " -v"
|
|
pbs_content = report_pbs_template.render(pbs_options)
|
|
with open(REPORT_PBS_FILENAME, "w", encoding="utf-8") as f:
|
|
f.write(pbs_content)
|
|
|
|
# --------------------------------------------------------------------
|
|
|
|
# Run the PBS scripts for the tests.
|
|
|
|
# Run the memory check job.
|
|
cmd = f"qsub {MEM_CHECK_PBS_FILENAME}"
|
|
if debug:
|
|
print(f"cmd = {cmd}")
|
|
try:
|
|
cproc = subprocess.run(cmd, shell=True, check=True,
|
|
text=True, capture_output=True)
|
|
except subprocess.CalledProcessError as e:
|
|
print("ERROR: qsub failed for memory check.\n"
|
|
f"e.cmd = {e.cmd}\n"
|
|
f"e.returncode = {e.returncode}\n"
|
|
"See test log for output.\n"
|
|
"Skipping remaining steps for module set "
|
|
f"{module_set_name}.",
|
|
file=sys.stderr)
|
|
continue
|
|
job_id = cproc.stdout.split(".")[0]
|
|
if debug:
|
|
print(f"job_id = {job_id}")
|
|
job_ids[i_module_set][0] = job_id
|
|
|
|
# Run the thread check job.
|
|
cmd = f"qsub {THREAD_CHECK_PBS_FILENAME}"
|
|
if debug:
|
|
print(f"cmd = {cmd}")
|
|
try:
|
|
cproc = subprocess.run(cmd, shell=True, check=True,
|
|
text=True, capture_output=True)
|
|
except subprocess.CalledProcessError as e:
|
|
print("ERROR: qsub failed for thread check.\n"
|
|
f"e.cmd = {e.cmd}\n"
|
|
f"e.returncode = {e.returncode}\n"
|
|
"See test log for output.\n"
|
|
"Skipping remaining steps for module set "
|
|
f"{module_set_name}.",
|
|
file=sys.stderr)
|
|
continue
|
|
job_id = cproc.stdout.split(".")[0]
|
|
if debug:
|
|
print(f"job_id = {job_id}")
|
|
job_ids[i_module_set][1] = job_id
|
|
|
|
# Run the report job when the other two jobs are complete.
|
|
cmd = (
|
|
f"qsub -W depend=afterok:{':'.join(job_ids[i_module_set][:-1])} "
|
|
f"{REPORT_PBS_FILENAME}"
|
|
)
|
|
if debug:
|
|
print(f"cmd = {cmd}")
|
|
try:
|
|
cproc = subprocess.run(cmd, shell=True, check=True,
|
|
text=True, capture_output=True)
|
|
except subprocess.CalledProcessError as e:
|
|
print("ERROR: qsub failed for report generation.\n"
|
|
f"e.cmd = {e.cmd}\n"
|
|
f"e.returncode = {e.returncode}\n"
|
|
"See test log for output.\n"
|
|
"Skipping remaining steps for module set "
|
|
f"{module_set_name}.",
|
|
file=sys.stderr)
|
|
continue
|
|
job_id = cproc.stdout.split(".")[0]
|
|
if debug:
|
|
print(f"job_id = {job_id}")
|
|
job_ids[i_module_set][2] = job_id
|
|
|
|
# Record the job IDs for this module set in a file.
|
|
with open("jobs.txt", "w", encoding="utf-8") as f:
|
|
for job_id in job_ids[i_module_set]:
|
|
f.write(f"{job_id}\n")
|
|
|
|
# This module set worked.
|
|
submit_ok[i_module_set] = True
|
|
|
|
# End of loop over module sets.
|
|
|
|
# -----------------------------------------------------------------------
|
|
|
|
# Detail the test results
|
|
test_report_details_string = ""
|
|
test_report_details_string += (
|
|
f"Test results are on `derecho` in `{INTEL_CHECKS_DIRECTORY}`.\n"
|
|
)
|
|
for (i_module_set, module_list_file) in enumerate(module_list_files):
|
|
test_report_details_string = (
|
|
"Submit Intel Inspector tests for module set "
|
|
f"`{module_list_file}`: "
|
|
)
|
|
if submit_ok[i_module_set]:
|
|
test_report_details_string += "*PASSED*"
|
|
else:
|
|
test_report_details_string += "*FAILED*"
|
|
|
|
# Summarize the test results
|
|
test_report_summary_string = (
|
|
f"Intel Inspector test submission for `{BRANCH_OR_COMMIT}`: "
|
|
)
|
|
if "FAILED" in test_report_details_string:
|
|
test_report_summary_string += "*FAILED*"
|
|
else:
|
|
test_report_summary_string += "*PASSED*"
|
|
|
|
# Print the test results summary and details.
|
|
print(test_report_summary_string)
|
|
print(test_report_details_string)
|
|
|
|
# If a test failed, or loud mode is on, post report to Slack.
|
|
if (slack_on_fail and "FAILED" in test_report_summary_string) or be_loud:
|
|
slack_client = common.slack_create_client()
|
|
if debug:
|
|
print(f"slack_client = {slack_client}")
|
|
slack_response_summary = common.slack_send_message(
|
|
slack_client, test_report_summary_string, is_test=is_test
|
|
)
|
|
if debug:
|
|
print(f"slack_response_summary = {slack_response_summary}")
|
|
thread_ts = slack_response_summary["ts"]
|
|
slack_response_summary = common.slack_send_message(
|
|
slack_client, test_report_details_string, thread_ts=thread_ts,
|
|
is_test=is_test
|
|
)
|
|
if debug:
|
|
print(f"slack_response_summary = {slack_response_summary}")
|
|
|
|
# ------------------------------------------------------------------------
|
|
|
|
if debug:
|
|
print(f"Ending {sys.argv[0]} at {datetime.datetime.now()}")
|
|
|
|
|
|
def main():
|
|
"""Driver for command-line version of code."""
|
|
# Set up the command-line parser.
|
|
parser = common.create_command_line_parser(DESCRIPTION)
|
|
|
|
# Parse the command-line arguments.
|
|
args = parser.parse_args()
|
|
if args.debug:
|
|
print(f"args = {args}")
|
|
|
|
# Convert the arguments from Namespace to dict.
|
|
args = vars(args)
|
|
|
|
# Pass the command-line arguments to the main function as a dict.
|
|
intelChecks(args)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|