Update non-game benchmarks reports to fit new schema (#36)

2026-01-10 22:48:18 -05:00 · 2023-12-26 09:57:55 -08:00
parent 7563ae4483 fee08ae2e3
commit f5c4ff3346
17 changed files with 152 additions and 46 deletions
--- a/7z/manifest.yaml
+++ b/7z/manifest.yaml
@@ -1,5 +1,5 @@
 friendly_name: "7-Zip"
-executable: "7zip.py"
+executable: "sevenzip.py"
 process_name: "7zr.exe"
 disable_presentmon: true
 output_dir: "run"
--- a/7z/sevenzip.py
+++ b/7z/sevenzip.py
@@ -39,36 +39,46 @@ command = command.rstrip()
 t1 = time.time()
 args = ["b"]
 logging.info("Starting 7-Zip benchmark! This may take a minute or so...")
-process = Popen([command, "b"], cwd=os.path.dirname(
-    os.path.realpath(__file__)), stdout=subprocess.PIPE)
-list_of_strings = [x.decode('utf-8').rstrip('\n')
+with Popen([command, "b"], cwd=os.path.dirname(
+    os.path.realpath(__file__)), stdout=subprocess.PIPE) as process:
+    list_of_strings = [x.decode('utf-8').rstrip('\n')
                   for x in iter(process.stdout.readlines())]
-EXIT_CODE = process.wait()
+    EXIT_CODE = process.wait()

-SPEED_PATTERN = '^Avr:\s*([0-9]*)\s.*\|\s*([0-9]*)\s.*$'
-VERSION_PATTERN = '7-Zip \(r\) (.*)\('
+    SPEED_PATTERN = r'^Avr:\s*([0-9]*)\s.*\|\s*([0-9]*)\s.*$'
+    VERSION_PATTERN = r'7-Zip \(r\) (.*)\('

-VERSION = ""
-SPEED_C = ""
-SPEED_D = ""
+    VERSION = ""
+    SPEED_C = ""
+    SPEED_D = ""

-# Strips the newline character
-for line in list_of_strings:
-    if line.isspace():
-        continue
-    logging.info(line.strip())
-    if '7-Zip' in line:
-        VERSION = re.match(VERSION_PATTERN, line).group(1)
-    if 'Avr:' in line:
-        SPEED_C = re.match(SPEED_PATTERN, line).group(1)
-        SPEED_D = re.match(SPEED_PATTERN, line).group(2)
+    # Strips the newline character
+    for line in list_of_strings:
+        if line.isspace():
+            continue
+        logging.info(line.strip())
+        if '7-Zip' in line:
+            VERSION = re.match(VERSION_PATTERN, line).group(1)
+        if 'Avr:' in line:
+            SPEED_C = re.match(SPEED_PATTERN, line).group(1)
+            SPEED_D = re.match(SPEED_PATTERN, line).group(2)

-t2 = time.time()
-logging.info("Benchmark took %s seconds", round((t2 - t1), 3))
-result = {
-    "score": SPEED_C + " Compression (KiB/s) | " + SPEED_D + " Decompression (KiB/s)",
-    "version": VERSION.strip()
-}
+    t2 = time.time()
+    logging.info("Benchmark took %s seconds", round((t2 - t1), 3))
+    result = [
+        {
+            "test": "compression",
+            "score": SPEED_C,
+            "unit": "KiB/s",
+            "version": VERSION.strip()
+        },
+        {
+            "test": "decompression",
+            "score": SPEED_D,
+            "unit": "KiB/s",
+            "version": VERSION.strip()
+        },
+    ]

-with open(os.path.join(log_dir, "report.json"), "w", encoding="utf-8") as file:
-    file.write(json.dumps(result))
+    with open(os.path.join(log_dir, "report.json"), "w", encoding="utf-8") as file:
+        file.write(json.dumps(result))
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,11 @@ All notable changes to this project will be documented in this file.

 Changes are grouped by the date they are merged to the main branch of the repository and are ordered from newest to oldest. Dates use the ISO 8601 extended calendar date format, i.e. YYYY-MM-DD.

+## 2023-12-26
+
+- Fix Blender Benchmark harness not running on device type gpu when Intel Arc GPU present.
+- Update non-game benchmark reports to fit new schema. This is for downstream reporting compatibility.
+
 ## 2023-12-19

 - Update menu navigation and start/end time marking for Rocket League harness
--- a/README.md
+++ b/README.md
@@ -12,11 +12,9 @@ The versions of tests that are available here are taken from snapshots of our pr
 <!-- omit in toc -->
 ## Table of Contents
 - [Getting Started](#getting-started)
-  - [Prerequisites](#prerequisites)
-    - [Python 3.10+](#python-310)
-    - [Poetry](#poetry)
-      - [Downloading dependencies](#downloading-dependencies)
 - [A test and its harness](#a-test-and-its-harness)
+  - [Harness Manifest](#harness-manifest)
+  - [JSON Report](#json-report)
 - [Creating a test harness](#creating-a-test-harness)
 - [Tools in the toolbox](#tools-in-the-toolbox)
  - [Keras OCR](#keras-ocr)
@@ -26,8 +24,10 @@ The versions of tests that are available here are taken from snapshots of our pr
 ## Getting Started
 Configuring your system to execute these tests is straightforward; you'll only need Python, Poetry, and git. However, it's important to note that some of the tests in this repository may necessitate additional services or specific applications to be installed. For instance, if you intend to run the game tests, you will need to possess a valid copy of the respective game title.

+<!-- omit in toc -->
 ### Prerequisites

+<!-- omit in toc -->
 #### Python 3.10+
 Most of the test harnesses are written in Python, which you will need on your system. We use Python 3.11 on our test benches, but should work on versions since 3.10.

@@ -35,6 +35,7 @@ Most of the test harnesses are written in Python, which you will need on your sy
 ##### Installation
 We recommend you install python from the [official downloads page](https://www.python.org/downloads/) and not the Windows Store.

+<!-- omit in toc -->
 #### Poetry
 This project uses [Poetry](https://python-poetry.org/docs/) for dependency management. 

@@ -46,6 +47,7 @@ Open a powershell terminal and execute the following command to download and exe
 ```
 After installation you will want to add poetry to the path. On Windows this path to add is `%APPDATA%\Python\Scripts`. Test that poetry is working by executing `poetry --version`, a version number should be returned, not an error.

+<!-- omit in toc -->
 ##### Downloading dependencies
 1. Open a terminal in the root directory.
 2. Execute `poetry install`
@@ -96,6 +98,70 @@ The test harness is responsible for:
 3. Gathering of assets
 4. Cleanup

+### Harness Manifest
+In MarkBench, the manifest.yaml file serves as a configuration file containing metadata about a test, providing essential information for MarkBench to execute the test correctly.
+
+Example
+```yaml
+friendly_name: "Blender Benchmark"
+executable: "blender.py"
+process_name: "blender.exe"
+disable_presentmon: true
+hidden: 0
+output_dir: "run"
+options:
+  - name: scene
+    type: select
+    values: [all, classroom, junkshop, monster] 
+  - name: version
+    type: select
+    values: ["3.6.0", "3.5.0", "3.4.0", "3.3.0"]
+  - name: device
+    type: select
+    values: ["CPU", "GPU"]
+```
+
+<p align="right">(<a href="#readme-top">back to top</a>)</p>
+
+### JSON Report
+Every harness (optionally) will write out some results in JSON format to a file called report.json. The JSON contents are read and stored in the database by MarkBench.
+
+<!-- omit in toc -->
+#### Non Game Report
+Any test that isn't a game *should* include a report for MarkBench to upload to the database. This report if present requires a value for the following properties:
+- **test** string - indicates any subparameters. For example BlenderBenchmark has the tests (scenes) classroom, junkshop, and monster.
+- **version** string - applicable version of the test or program under test. This property is optional if no version is available.
+- **score** string - a scalar output from the test. This could be a benchmark score, or duration.
+- **unit** string - the unit of measurement of the scalar. If duration, this could be seconds or minutes. If the score is simply a score, than this property can be omitted or left empty.
+- **label** string - optional friendly name for the unit of measurement. For example "fps" vs "Frames Per Second". This property is mainly used to override an axis on a graph downstream. 
+
+The JSON report can be a single object, or array of reports, indicating to MarkBench there is more than one result to record.
+```json
+{ 
+  "test": "FireStrike", 
+  "version": "4.5.1",
+  "score": "16099",
+  "unit": "",
+  "label": "",
+  "start_time": 1702514174861, 
+  "end_time": 1702514209166
+}
+```
+
+<!-- omit in toc -->
+#### Game Report
+Game reports don't require a report as the score is the FPS which is calculated downstream of MarkBench. It is helpful if the report does include things such as resolution, start time, and end time.
+```json
+{ 
+  "resolution": "1920x1080",  
+  "start_time": 1702514174861, 
+  "end_time": 1702514209166
+}
+```
+> Note, start_time and end_time given in the report.json of a game test will be used as the markers of when to start measuring FPS and when to stop.
+
+<p align="right">(<a href="#readme-top">back to top</a>)</p>
+
 ## Creating a test harness
 Let's create a harness for the test FurMark.

--- a/blender_render/blender.py
+++ b/blender_render/blender.py
@@ -52,7 +52,9 @@ try:
        sys.exit(1)

    report = {
+        "test": "barbershop",
        "score": score,
+        "unit": "seconds",
        "version": version,
        "device": args.device,
        "start_time": seconds_to_milliseconds(start_time),
--- a/blender_render/blender_utils.py
+++ b/blender_render/blender_utils.py
@@ -1,5 +1,6 @@

 """Blender render test script"""
+from datetime import datetime
 import logging
 import os
 import re
@@ -36,6 +37,12 @@ def download_barbershop_scene():
            file.write(response.content)
    logging.info('Barbershop already downloaded')

+def time_to_seconds(time_string):
+    """convert string to duration in seconds"""
+    time_obj = datetime.strptime(time_string, "%H:%M:%S.%f")
+    seconds = (time_obj.hour * 3600) + (time_obj.minute * 60) + time_obj.second + (time_obj.microsecond / 1e6)
+    return seconds
+
 def run_blender_render(executable_path: str, log_directory: str, device: str) -> str:
    """Execute the blender render of barbershop, returns the duration as string"""
    blend_log = os.path.join(log_directory, "blender.log")
@@ -59,7 +66,7 @@ def run_blender_render(executable_path: str, log_directory: str, device: str) ->
            if match:
                time = match.group(1)
                break
-    return time
+    return time_to_seconds(time)


 def find_blender():
--- a/blenderbenchmark/blender.py
+++ b/blenderbenchmark/blender.py
@@ -83,16 +83,20 @@ else:
        logging.info(process.stderr)
        logging.error("Test failed!")
    else:
+        OPTIX = "OPTIX"
+        CUDA = "CUDA"
+        HIP = "HIP"
+        ONE_API = "ONEAPI"
        logging.info(process.stdout)
        logging.info(process.stderr)
-        if "OPTIX" in process.stdout or "OPTIX" in process.stderr:
-            DEVICE_TYPE = "OPTIX"  # nvidia
-        if "CUDA" in process.stdout or "CUDA" in process.stderr:
-            DEVICE_TYPE = "CUDA"  # older non rtx nvidia
-        elif "HIP" in process.stdout or "HIP" in process.stderr:
-            DEVICE_TYPE = "HIP"  # amd
-        elif "oneAPI" in process.stdout or "oneAPI" in process.stderr:
-            DEVICE_TYPE = "oneAPI"  # intel
+        if OPTIX in process.stdout or OPTIX in process.stderr:
+            DEVICE_TYPE = OPTIX  # nvidia
+        if CUDA in process.stdout or CUDA in process.stderr:
+            DEVICE_TYPE = CUDA  # older non rtx nvidia
+        elif HIP in process.stdout or HIP in process.stderr:
+            DEVICE_TYPE = HIP  # amd
+        elif ONE_API in process.stdout or ONE_API in process.stderr:
+            DEVICE_TYPE = ONE_API  # intel

 arg_string = ["blender", "list"]
 run_array = [ABS_EXECUTABLE_PATH, "benchmark"] + SCENE + \
@@ -109,11 +113,13 @@ json_array = json.loads(process.stdout)

 json_report = []
 for report in json_array:
+    blender_version = report['blender_version']['version']
    scene_report = {
        "timestamp": report['timestamp'],
-        "version": report['blender_version']['version'],
-        "scene": report['scene']['label'],
+        "version": blender_version,
+        "test": f"{report['scene']['label']}",
        "score": round(report['stats']['samples_per_minute'], 2),
+        "unit": "samples per minute",
        "device": report['device_info']['compute_devices'][0]['name']
    }

--- a/cinebench_2024/cinebench.py
+++ b/cinebench_2024/cinebench.py
@@ -90,7 +90,7 @@ try:
            logging.info("Benchmark took %.2f seconds", elapsed_test_time)

            report = {
-                "test_arg": test_type,
+                "test": test_type,
                "score": score,
                "start_time": seconds_to_milliseconds(start_time),
                "end_time": seconds_to_milliseconds(end_time)
--- a/cspell.json
+++ b/cspell.json
@@ -36,7 +36,14 @@
    "DMARK",
    "dmdef",
    "firestrikegraphicsscorep",
-    "myresults"
+    "myresults",
+    "msikombuster",
+    "msikombustor",
+    "isfile",
+    "rstrip",
+    "keybd",
+    "vkfurrytorus",
+    "GPGPU"
  ],
  "ignoreRegExpList": [
    "import .*"
--- a/deprecated/aida64gpgpu/README.md
+++ b/deprecated/aida64gpgpu/README.md
--- a/deprecated/aida64gpgpu/aida64_gpgpu_benchmark.png
+++ b/deprecated/aida64gpgpu/aida64_gpgpu_benchmark.png
--- a/deprecated/aida64gpgpu/aida64gpgpu.py
+++ b/deprecated/aida64gpgpu/aida64gpgpu.py
--- a/deprecated/aida64gpgpu/manifest.yaml
+++ b/deprecated/aida64gpgpu/manifest.yaml
--- a/deprecated/aida64gpgpu/sample_gpgpu.xml
+++ b/deprecated/aida64gpgpu/sample_gpgpu.xml
--- a/flac/flac.py
+++ b/flac/flac.py
@@ -49,6 +49,8 @@ if EXIT_CODE > 0:
    sys.exit(EXIT_CODE)

 report = {
+    "test": "nin-theslip.wav",
+    "unit": "seconds",
    "score": score,
    "version": "1.4.3"
 }
--- a/superposition/superposition.py
+++ b/superposition/superposition.py
@@ -68,7 +68,7 @@ with open(log_path, encoding="utf-8") as log:
            score = match.group(1)

 report = {
-    "preset": args.preset,
+    "test": args.preset,
    "score": score
 }

--- a/ycruncher/ycruncher.py
+++ b/ycruncher/ycruncher.py
@@ -70,6 +70,7 @@ for line in Lines:

 report = {
    "score": time,
+    "unit": "seconds",
    "test": tuning
 }