add system json for mi300x mlperf (#9786)

* add system json for mi300x mlperf

```
python3 -m mlperf_logging.system_desc_checker examples/mlperf/training_submission_v5.0/tinycorp/systems/tinybox_8xMI300X.json training 4.1.0
INFO -   System description checker passed for tinybox 8xMI300X
```

also removed the rocm from tinybox_red since we are not using it

* update mlperf-logging version
This commit is contained in:
chenyu
2025-04-08 06:36:44 -04:00
committed by GitHub
parent 4a807ee952
commit 8fe83385ec
3 changed files with 41 additions and 3 deletions

View File

@@ -0,0 +1,39 @@
{
"submitter": "tinycorp",
"division": "closed",
"status": "Available on-premise",
"system_name": "tinybox 8xMI300X",
"number_of_nodes": "1",
"host_processors_per_node": "2",
"host_processor_model_name": "AMD EPYC 9354 32-Core Processor",
"host_processor_core_count": "32",
"host_processor_vcpu_count": "64",
"host_processor_frequency": "",
"host_processor_caches": "",
"host_processor_interconnect": "",
"host_memory_capacity": "2304GB",
"host_storage_type": "NVMe SSD",
"host_storage_capacity": "3x 4TB raid array",
"host_networking": "",
"host_networking_topology": "",
"host_memory_configuration": "24x 96GB DDR5",
"accelerators_per_node": "8",
"accelerator_model_name": "AMD Instinct MI300X",
"accelerator_host_interconnect": "PCIe 5.0 x16",
"accelerator_frequency": "",
"accelerator_on-chip_memories": "",
"accelerator_memory_configuration": "HBM3",
"accelerator_memory_capacity": "192GB",
"accelerator_interconnect": "",
"accelerator_interconnect_topology": "",
"cooling": "air",
"hw_notes": "",
"framework": "tinygrad, commit TBD",
"other_software_stack": {
"python": "3.10.16",
"ROCm": "3.0.0+94441cb"
},
"operating_system": "Ubuntu 24.04.1 LTS",
"sw_notes": ""
}

View File

@@ -30,8 +30,7 @@
"hw_notes": "",
"framework": "tinygrad, commit b5546912e24e0a864b35924da4efa5d71cfe368b",
"other_software_stack": {
"python": "3.10.12",
"ROCm": "6.1.3"
"python": "3.10.12"
},
"operating_system": "Ubuntu 22.04.4",
"sw_notes": ""

View File

@@ -44,7 +44,7 @@ setup(name='tinygrad',
"ruff",
"types-tqdm",
],
#'mlperf': ["mlperf-logging @ git+https://github.com/mlperf/logging.git@4.1.0-rc3"],
#'mlperf': ["mlperf-logging @ git+https://github.com/mlperf/logging.git@5.0.0-rc1"],
'testing_minimal': testing_minimal,
'testing_unit': testing_minimal + [
"tqdm",