chore(ci): compute operations throughput on benchmarks results

In addition to values currently parsed and sent to database, now
throughput is also computed. This computation is done only on
mean values. New results are:
 - operations per ms
 - operations per dollar spent which depends on the machine used
   to perform the benchmarks
This commit is contained in:
David Testé
2023-02-24 16:13:55 +01:00
committed by Quentin Bourgerie
parent 291019ba0f
commit 8575435b3e
3 changed files with 73 additions and 10 deletions

View File

@@ -121,7 +121,8 @@ jobs:
--project-version ${COMMIT_HASH} \
--branch ${{ github.ref_name }} \
--commit-date ${COMMIT_DATE} \
--bench-date "${{ env.BENCH_DATE }}"
--bench-date "${{ env.BENCH_DATE }}" \
--throughput
- name: Upload parsed results artifact
uses: actions/upload-artifact@v3

View File

@@ -7,7 +7,9 @@ Parse benchmark raw results.
import argparse
import pathlib
import json
import sys
ONE_HOUR_IN_NANOSECONDS = 3600E9
parser = argparse.ArgumentParser()
parser.add_argument('results_path',
@@ -28,36 +30,57 @@ parser.add_argument('--commit-date', dest='commit_date', required=True,
help='Timestamp of commit hash used in project_version')
parser.add_argument('--bench-date', dest='bench_date', required=True,
help='Timestamp when benchmark was run')
parser.add_argument('--throughput', dest='throughput', action='store_true',
help='Compute and append number of operations per millisecond and'
'operations per dollar, only on mean values')
def parse_results(raw_results):
def parse_results(raw_results, compute_throughput=False, hardware_hourly_cost=None):
"""
Parse raw benchmark results.
:param raw_results: path to file that contains raw results as :class:`pathlib.Path`
:param compute_throughput: compute number of operations per millisecond and operations per
dollar on mean values
:param hardware_hourly_cost: hourly cost of the hardware used in dollar
:return: :class:`list` of data points
"""
raw_results = json.loads(raw_results.read_text())
return [
{"value": res["cpu_time"], "test": res["name"]}
for res in raw_results["benchmarks"]
]
parsed_results = []
for res in raw_results["benchmarks"]:
test_name = res["name"]
value = res["cpu_time"]
parsed_results.append({"value": value, "test": test_name})
if test_name.endswith("_mean") and compute_throughput:
parsed_results.append({
"value": compute_ops_per_millisecond(value),
"test": "_".join([test_name, "ops_per_ms"])})
if hardware_hourly_cost is not None:
parsed_results.append({
"value": compute_ops_per_dollar(value, hardware_hourly_cost),
"test": "_".join([test_name, "ops_per_dollar"])})
return parsed_results
def recursive_parse(directory):
def recursive_parse(directory, compute_throughput=False, hardware_hourly_cost=None):
"""
Parse all the benchmark results in a directory. It will attempt to parse all the files having a
.json extension at the top-level of this directory.
:param directory: path to directory that contains raw results as :class:`pathlib.Path`
:param compute_throughput: compute number of operations per millisecond and operations per
dollar
:param hardware_hourly_cost: hourly cost of the hardware used in dollar
:return: :class:`list` of data points
"""
result_values = []
for file in directory.glob('*.json'):
try:
result_values.extend(parse_results(file))
result_values.extend(parse_results(file, compute_throughput, hardware_hourly_cost))
except KeyError as err:
print(f"Failed to parse '{file.resolve()}': {repr(err)}")
@@ -85,15 +108,50 @@ def dump_results(parsed_results, filename, input_args):
filename.write_text(json.dumps(series))
def compute_ops_per_dollar(data_point, product_hourly_cost):
"""
Compute numbers of operations per dollar for a given ``data_point``.
:param data_point: timing value measured during benchmark in nanoseconds
:param product_hourly_cost: cost in dollar per hour of hardware used
:return: number of operations per dollar
"""
return ONE_HOUR_IN_NANOSECONDS / (product_hourly_cost * data_point)
def compute_ops_per_millisecond(data_point):
"""
Compute numbers of operations per millisecond for a given ``data_point``.
:param data_point: timing value measured during benchmark in nanoseconds
:return: number of operations per millisecond
"""
return 1E6 / data_point
if __name__ == "__main__":
args = parser.parse_args()
hardware_cost = None
if args.throughput:
print("Throughput computation enabled")
ec2_costs = json.loads(
pathlib.Path("ci/ec2_products_cost.json").read_text(encoding="utf-8"))
try:
hardware_cost = abs(ec2_costs[args.hardware])
print(f"Hardware hourly cost: {hardware_cost} $/h")
except KeyError:
print(f"Cannot find hardware hourly cost for '{args.hardware}'")
sys.exit(1)
results_path = pathlib.Path(args.results_path)
print("Parsing benchmark results... ")
if results_path.is_dir():
results = recursive_parse(results_path)
results = recursive_parse(results_path, args.throughput, hardware_cost)
else:
results = parse_results(results_path)
results = parse_results(results_path, args.throughput, hardware_cost)
print("Parsing results done")
output_file = pathlib.Path(args.output_file)

View File

@@ -0,0 +1,4 @@
{
"m6i.metal": 7.168,
"c6a.metal": 7.344
}