Files
concrete/ci/benchmark_parser.py

169 lines
6.3 KiB
Python

"""
benchmark_parser
----------------
Parse benchmark raw results.
"""
import argparse
import pathlib
import json
import sys
ONE_HOUR_IN_NANOSECONDS = 3600E9
parser = argparse.ArgumentParser()
parser.add_argument('results_path',
help=('Location of raw benchmark results,'
' could be either a file or a directory.'
'In a case of a directory, this script will attempt to parse all the'
'files containing a .json extension'))
parser.add_argument('output_file', help='File storing parsed results')
parser.add_argument('-d', '--database', dest='database', required=True,
help='Name of the database used to store results')
parser.add_argument('-w', '--hardware', dest='hardware', required=True,
help='Hardware reference used to perform benchmark')
parser.add_argument('-V', '--project-version', dest='project_version', required=True,
help='Commit hash reference')
parser.add_argument('-b', '--branch', dest='branch', required=True,
help='Git branch name on which benchmark was performed')
parser.add_argument('--commit-date', dest='commit_date', required=True,
help='Timestamp of commit hash used in project_version')
parser.add_argument('--bench-date', dest='bench_date', required=True,
help='Timestamp when benchmark was run')
parser.add_argument('--throughput', dest='throughput', action='store_true',
help='Compute and append number of operations per millisecond and'
'operations per dollar, only on mean values')
def parse_results(raw_results, compute_throughput=False, hardware_hourly_cost=None):
"""
Parse raw benchmark results.
:param raw_results: path to file that contains raw results as :class:`pathlib.Path`
:param compute_throughput: compute number of operations per millisecond and operations per
dollar on mean values
:param hardware_hourly_cost: hourly cost of the hardware used in dollar
:return: :class:`list` of data points
"""
raw_results = json.loads(raw_results.read_text())
parsed_results = []
for res in raw_results["benchmarks"]:
test_name = res["name"]
value = res["cpu_time"]
parsed_results.append({"value": value, "test": test_name})
try:
value = res["Throughput"]
parsed_results.append({"value": value, "test": "_".join([test_name, "throughput"])})
except KeyError:
pass
if test_name.endswith("_mean") and compute_throughput:
parsed_results.append({
"value": compute_ops_per_millisecond(value),
"test": "_".join([test_name, "ops_per_ms"])})
if hardware_hourly_cost is not None:
parsed_results.append({
"value": compute_ops_per_dollar(value, hardware_hourly_cost),
"test": "_".join([test_name, "ops_per_dollar"])})
return parsed_results
def recursive_parse(directory, compute_throughput=False, hardware_hourly_cost=None):
"""
Parse all the benchmark results in a directory. It will attempt to parse all the files having a
.json extension at the top-level of this directory.
:param directory: path to directory that contains raw results as :class:`pathlib.Path`
:param compute_throughput: compute number of operations per millisecond and operations per
dollar
:param hardware_hourly_cost: hourly cost of the hardware used in dollar
:return: :class:`list` of data points
"""
result_values = []
for file in directory.glob('*.json'):
try:
result_values.extend(parse_results(file, compute_throughput, hardware_hourly_cost))
except KeyError as err:
print(f"Failed to parse '{file.resolve()}': {repr(err)}")
return result_values
def dump_results(parsed_results, filename, input_args):
"""
Dump parsed results formatted as JSON to file.
:param parsed_results: :class:`list` of data points
:param filename: filename for dump file as :class:`pathlib.Path`
:param input_args: CLI input arguments
"""
filename.parent.mkdir(parents=True, exist_ok=True)
series = {
"database": input_args.database,
"hardware": input_args.hardware,
"project_version": input_args.project_version,
"branch": input_args.branch,
"insert_date": input_args.bench_date,
"commit_date": input_args.commit_date,
"points": parsed_results,
}
filename.write_text(json.dumps(series))
def compute_ops_per_dollar(data_point, product_hourly_cost):
"""
Compute numbers of operations per dollar for a given ``data_point``.
:param data_point: timing value measured during benchmark in nanoseconds
:param product_hourly_cost: cost in dollar per hour of hardware used
:return: number of operations per dollar
"""
return ONE_HOUR_IN_NANOSECONDS / (product_hourly_cost * data_point)
def compute_ops_per_millisecond(data_point):
"""
Compute numbers of operations per millisecond for a given ``data_point``.
:param data_point: timing value measured during benchmark in nanoseconds
:return: number of operations per millisecond
"""
return 1E6 / data_point
if __name__ == "__main__":
args = parser.parse_args()
hardware_cost = None
if args.throughput:
print("Throughput computation enabled")
ec2_costs = json.loads(
pathlib.Path("ci/ec2_products_cost.json").read_text(encoding="utf-8"))
try:
hardware_cost = abs(ec2_costs[args.hardware])
print(f"Hardware hourly cost: {hardware_cost} $/h")
except KeyError:
print(f"Cannot find hardware hourly cost for '{args.hardware}'")
sys.exit(1)
results_path = pathlib.Path(args.results_path)
print("Parsing benchmark results... ")
if results_path.is_dir():
results = recursive_parse(results_path, args.throughput, hardware_cost)
else:
results = parse_results(results_path, args.throughput, hardware_cost)
print("Parsing results done")
output_file = pathlib.Path(args.output_file)
print(f"Dump parsed results into '{output_file.resolve()}' ... ", end="")
dump_results(results, output_file, args)
print("Done")