From 77729514e0eefa61c581b447264613fd95a80d28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Test=C3=A9?= Date: Tue, 25 Oct 2022 09:35:24 +0200 Subject: [PATCH] chore(ci): parse benchmark results to send to postgres instance Previousely, we were sending parsed benchmark results to a Prometheus instance. Do to its time-series nature, Prometheus would downsample database content to avoid having to much data points for a given range of time. While this behavior is good for a continuous stream of data, like monitoring CPU load, it's not suited for benchmarks. Indeed benchmarks are discrete events that would occurr once in a while (i.e once a day). Downsampling would, at some point, simply omit some of benchmarks results. Using a regular SQL database like PostgreSQL solves this issue. --- .github/workflows/benchmark.yml | 25 +++++++------ ci/benchmark_parser.py | 63 +++++++++++++++------------------ 2 files changed, 42 insertions(+), 46 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index ea6743d0e..485093901 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -79,6 +79,10 @@ jobs: if: ${{ !cancelled() }} needs: start-runner steps: + - name: Get benchmark date + run: | + echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}" + # SSH private key is required as some dependencies are from private repos - uses: webfactory/ssh-agent@v0.5.2 with: @@ -87,6 +91,7 @@ jobs: - name: Fetch submodules uses: actions/checkout@v3 with: + fetch-depth: 0 submodules: recursive token: ${{ secrets.GH_TOKEN }} @@ -101,11 +106,6 @@ jobs: toolchain: stable override: true - - name: Concrete-Optimizer - run: | - cd compiler - make concrete-optimizer-lib - - name: Build compiler and end-to-end benchmarks run: | set -e @@ -134,11 +134,15 @@ jobs: - name: Parse results shell: bash run: | - OPTIMIZER_HASH="$(cd compiler/concrete-optimizer; git rev-parse HEAD)" + COMMIT_DATE="$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})" + COMMIT_HASH="$(git describe --tags --dirty)" python3 ./ci/benchmark_parser.py compiler/benchmarks_results.json ${{ env.RESULTS_FILENAME }} \ - --series-name compiler_end_to_end_benchmarks \ - --series-help "Concrete compiler end-to-end benchmarks timings" \ - --series-tags "{\"compiler_hash\": \"${{ github.sha }}\", \"branch\": \"${{ github.ref_name }}\", \"optimizer_hash\": \"${OPTIMIZER_HASH}\", \"hardware\": \"aws ${{ env.EC2_INSTANCE_TYPE }}\"}" + --schema compiler_benchmarks \ + --hardware ${{ env.EC2_INSTANCE_TYPE }} \ + --project-version ${COMMIT_HASH} \ + --branch ${{ github.ref_name }} \ + --commit-date ${COMMIT_DATE} \ + --bench-date "${{ env.BENCH_DATE }}" - name: Upload parsed results artifact uses: actions/upload-artifact@v3 @@ -162,12 +166,11 @@ jobs: curl -v -k \ -H "Content-Type: application/json" \ -H "X-Slab-Repository: ${{ github.repository }}" \ - -H "X-Slab-Command: plot_data" \ + -H "X-Slab-Command: store_data" \ -H "X-Hub-Signature-256: sha256=${SIGNATURE}" \ -d @${{ env.RESULTS_FILENAME }} \ ${{ secrets.SLAB_URL }} - stop-runner: name: Stop EC2 runner needs: diff --git a/ci/benchmark_parser.py b/ci/benchmark_parser.py index 0b3846feb..c1869c1da 100644 --- a/ci/benchmark_parser.py +++ b/ci/benchmark_parser.py @@ -16,15 +16,18 @@ parser.add_argument('results_path', 'In a case of a directory, this script will attempt to parse all the' 'files containing a .json extension')) parser.add_argument('output_file', help='File storing parsed results') -parser.add_argument('-n', '--series-name', dest='series_name', - default="concrete_compiler_benchmark_timing", - help='Name of the data series (as stored in Prometheus)') -parser.add_argument('-e', '--series-help', dest='series_help', - default="Timings of various type of benchmarks in concrete compiler.", - help='Description of the data series (as stored in Prometheus)') -parser.add_argument('-t', '--series-tags', dest='series_tags', - type=json.loads, default={}, - help='Tags to apply to all the points in the data series') +parser.add_argument('-s', '--schema', dest='schema', required=True, + help='Name of the database schema used to store results') +parser.add_argument('-w', '--hardware', dest='hardware', required=True, + help='Hardware reference used to perform benchmark') +parser.add_argument('-V', '--project-version', dest='project_version', required=True, + help='Commit hash reference') +parser.add_argument('-b', '--branch', dest='branch', required=True, + help='Git branch name on which benchmark was performed') +parser.add_argument('--commit-date', dest='commit_date', required=True, + help='Timestamp of commit hash used in project_version') +parser.add_argument('--bench-date', dest='bench_date', required=True, + help='Timestamp when benchmark was run') def parse_results(raw_results): @@ -35,20 +38,11 @@ def parse_results(raw_results): :return: :class:`list` of data points """ - result_values = list() raw_results = json.loads(raw_results.read_text()) - for res in raw_results["benchmarks"]: - bench_class, action, option_class, application = res["run_name"].split("/") - - for measurement in ("real_time", "cpu_time"): - tags = {"bench_class": bench_class, - "action": action, - "option_class": option_class, - "application": application, - "measurement": measurement} - result_values.append({"value": res[measurement], "tags": tags}) - - return result_values + return [ + {"value": res["cpu_time"], "test": res["run_name"]} + for res in raw_results["benchmarks"] + ] def recursive_parse(directory): @@ -70,24 +64,24 @@ def recursive_parse(directory): return result_values -def dump_results(parsed_results, filename, series_name, - series_help="", series_tags=None): +def dump_results(parsed_results, filename, input_args): """ Dump parsed results formatted as JSON to file. :param parsed_results: :class:`list` of data points :param filename: filename for dump file as :class:`pathlib.Path` - :param series_name: name of the data series as :class:`str` - :param series_help: description of the data series as :class:`str` - :param series_tags: constant tags for the series + :param input_args: CLI input arguments """ filename.parent.mkdir(parents=True, exist_ok=True) - series = [ - {"series_name": series_name, - "series_help": series_help, - "series_tags": series_tags or dict(), - "points": parsed_results}, - ] + series = { + "schema": input_args.schema, + "hardware": input_args.hardware, + "project_version": input_args.project_version, + "branch": input_args.branch, + "insert_date": input_args.bench_date, + "commit_date": input_args.commit_date, + "points": parsed_results, + } filename.write_text(json.dumps(series)) @@ -104,7 +98,6 @@ if __name__ == "__main__": output_file = pathlib.Path(args.output_file) print(f"Dump parsed results into '{output_file.resolve()}' ... ", end="") - dump_results(results, output_file, args.series_name, - series_help=args.series_help, series_tags=args.series_tags) + dump_results(results, output_file, args) print("Done")