From 9f5bb03cb4fea394a0d64963c3e3b820a5da5813 Mon Sep 17 00:00:00 2001
From: Hsiao-Wei Wang <hsiaowei.eth@gmail.com>
Date: Thu, 10 Nov 2022 16:33:50 -0500
Subject: [PATCH] Refactor `run_generator`

---
 .../gen_helpers/gen_base/gen_runner.py        | 296 +++++++++++-------
 1 file changed, 175 insertions(+), 121 deletions(-)

diff --git a/tests/core/pyspec/eth2spec/gen_helpers/gen_base/gen_runner.py b/tests/core/pyspec/eth2spec/gen_helpers/gen_base/gen_runner.py
index 0a831a592..5e6ea93d3 100644
--- a/tests/core/pyspec/eth2spec/gen_helpers/gen_base/gen_runner.py
+++ b/tests/core/pyspec/eth2spec/gen_helpers/gen_base/gen_runner.py
@@ -1,4 +1,7 @@
-from eth_utils import encode_hex
+from dataclasses import (
+    dataclass,
+    field,
+)
 import os
 import time
 import shutil
@@ -15,6 +18,8 @@ from ruamel.yaml import (
 from filelock import FileLock
 from snappy import compress
 
+from eth_utils import encode_hex
+
 from eth2spec.test import context
 from eth2spec.test.exceptions import SkippedTest
 
@@ -28,6 +33,14 @@ context.is_pytest = False
 TIME_THRESHOLD_TO_PRINT = 1.0  # seconds
 
 
+@dataclass
+class Diagnostics(object):
+    collected_test_count: int = 0
+    generated_test_count: int = 0
+    skipped_test_count: int = 0
+    test_identifiers: list = field(default_factory=list)
+
+
 def validate_output_dir(path_str):
     path = Path(path_str)
 
@@ -40,6 +53,47 @@ def validate_output_dir(path_str):
     return path
 
 
+def get_test_case_dir(test_case, output_dir):
+    return (
+        Path(output_dir) / Path(test_case.preset_name) / Path(test_case.fork_name)
+        / Path(test_case.runner_name) / Path(test_case.handler_name)
+        / Path(test_case.suite_name) / Path(test_case.case_name)
+    )
+
+
+def get_test_identifier(test_case):
+    return "::".join([
+        test_case.preset_name,
+        test_case.fork_name,
+        test_case.runner_name,
+        test_case.handler_name,
+        test_case.suite_name,
+        test_case.case_name
+    ])
+
+
+def get_incomplete_tag_file(case_dir):
+    return case_dir / "INCOMPLETE"
+
+
+def should_skip_case_dir(case_dir, is_force, diagnostics_obj):
+    is_skip = False
+    incomplete_tag_file = get_incomplete_tag_file(case_dir)
+
+    if case_dir.exists():
+        if not is_force and not incomplete_tag_file.exists():
+            diagnostics_obj.skipped_test_count += 1
+            print(f'Skipping already existing test: {case_dir}')
+            is_skip = True
+        else:
+            print(f'Warning, output directory {case_dir} already exist,'
+                  ' old files will be deleted and it will generate test vector files with the latest version')
+            # Clear the existing case_dir folder
+            shutil.rmtree(case_dir)
+
+    return is_skip, diagnostics_obj
+
+
 def run_generator(generator_name, test_providers: Iterable[TestProvider]):
     """
     Implementation for a general test generator.
@@ -129,10 +183,8 @@ def run_generator(generator_name, test_providers: Iterable[TestProvider]):
         print(f"Filtering test-generator runs to only include presets: {', '.join(presets)}")
 
     collect_only = args.collect_only
-    collected_test_count = 0
-    generated_test_count = 0
-    skipped_test_count = 0
-    test_identifiers = []
+
+    diagnostics_obj = Diagnostics()
 
     provider_start = time.time()
     for tprov in test_providers:
@@ -145,146 +197,114 @@ def run_generator(generator_name, test_providers: Iterable[TestProvider]):
             if len(presets) != 0 and test_case.preset_name not in presets:
                 continue
 
-            case_dir = (
-                Path(output_dir) / Path(test_case.preset_name) / Path(test_case.fork_name)
-                / Path(test_case.runner_name) / Path(test_case.handler_name)
-                / Path(test_case.suite_name) / Path(test_case.case_name)
-            )
-            collected_test_count += 1
+            case_dir = get_test_case_dir(test_case, output_dir)
             print(f"Collected test at: {case_dir}")
+            diagnostics_obj.collected_test_count += 1
 
-            incomplete_tag_file = case_dir / "INCOMPLETE"
+            is_skip, diagnostics_obj = should_skip_case_dir(case_dir, args.force, diagnostics_obj)
+            if is_skip:
+                continue
 
-            if case_dir.exists():
-                if not args.force and not incomplete_tag_file.exists():
-                    skipped_test_count += 1
-                    print(f'Skipping already existing test: {case_dir}')
-                    continue
-                else:
-                    print(f'Warning, output directory {case_dir} already exist,'
-                          f' old files will be deleted and it will generate test vector files with the latest version')
-                    # Clear the existing case_dir folder
-                    shutil.rmtree(case_dir)
-
-            print(f'Generating test: {case_dir}')
-            test_start = time.time()
-
-            written_part = False
-
-            # Add `INCOMPLETE` tag file to indicate that the test generation has not completed.
-            case_dir.mkdir(parents=True, exist_ok=True)
-            with incomplete_tag_file.open("w") as f:
-                f.write("\n")
-
-            try:
-                def output_part(out_kind: str, name: str, fn: Callable[[Path, ], None]):
-                    # make sure the test case directory is created before any test part is written.
-                    case_dir.mkdir(parents=True, exist_ok=True)
-                    try:
-                        fn(case_dir)
-                    except IOError as e:
-                        error_message = (
-                            f'[Error] error when dumping test "{case_dir}", part "{name}", kind "{out_kind}": {e}'
-                        )
-                        # Write to error log file
-                        with log_file.open("a+") as f:
-                            f.write(error_message)
-                            traceback.print_exc(file=f)
-                            f.write('\n')
-
-                        sys.exit(error_message)
-
-                meta = dict()
-
-                try:
-                    for (name, out_kind, data) in test_case.case_fn():
-                        written_part = True
-                        if out_kind == "meta":
-                            meta[name] = data
-                        elif out_kind == "cfg":
-                            output_part(out_kind, name, dump_yaml_fn(data, name, file_mode, cfg_yaml))
-                        elif out_kind == "data":
-                            output_part(out_kind, name, dump_yaml_fn(data, name, file_mode, yaml))
-                        elif out_kind == "ssz":
-                            output_part(out_kind, name, dump_ssz_fn(data, name, file_mode))
-                        else:
-                            assert False  # Unknown kind
-                except SkippedTest as e:
-                    print(e)
-                    skipped_test_count += 1
-                    shutil.rmtree(case_dir)
-                    continue
-
-                # Once all meta data is collected (if any), write it to a meta data file.
-                if len(meta) != 0:
-                    written_part = True
-                    output_part("data", "meta", dump_yaml_fn(meta, "meta", file_mode, yaml))
-
-                if not written_part:
-                    print(f"test case {case_dir} did not produce any test case parts")
-            except Exception as e:
-                error_message = f"[ERROR] failed to generate vector(s) for test {case_dir}: {e}"
-                # Write to error log file
-                with log_file.open("a+") as f:
-                    f.write(error_message)
-                    traceback.print_exc(file=f)
-                    f.write('\n')
-                traceback.print_exc()
-            else:
-                # If no written_part, the only file was incomplete_tag_file. Clear the existing case_dir folder.
-                if not written_part:
-                    shutil.rmtree(case_dir)
-                else:
-                    generated_test_count += 1
-                    test_identifier = "::".join([
-                        test_case.preset_name,
-                        test_case.fork_name,
-                        test_case.runner_name,
-                        test_case.handler_name,
-                        test_case.suite_name,
-                        test_case.case_name
-                    ])
-                    test_identifiers.append(test_identifier)
-                    # Only remove `INCOMPLETE` tag file
-                    os.remove(incomplete_tag_file)
-            test_end = time.time()
-            span = round(test_end - test_start, 2)
-            if span > TIME_THRESHOLD_TO_PRINT:
-                print(f'    - generated in {span} seconds')
+            # generate test vector
+            is_skip, diagnostics_obj = generate_test_vector_and_diagnose(
+                test_case, case_dir, log_file, file_mode,
+                cfg_yaml, yaml, diagnostics_obj,
+            )
+            if is_skip:
+                continue
 
     provider_end = time.time()
     span = round(provider_end - provider_start, 2)
 
     if collect_only:
-        print(f"Collected {collected_test_count} tests in total")
+        print(f"Collected {diagnostics_obj.collected_test_count} tests in total")
     else:
-        summary_message = f"completed generation of {generator_name} with {generated_test_count} tests"
-        summary_message += f" ({skipped_test_count} skipped tests)"
+        summary_message = f"completed generation of {generator_name} with {diagnostics_obj.generated_test_count} tests"
+        summary_message += f" ({diagnostics_obj.skipped_test_count} skipped tests)"
         if span > TIME_THRESHOLD_TO_PRINT:
             summary_message += f" in {span} seconds"
         print(summary_message)
-    diagnostics = {
-        "collected_test_count": collected_test_count,
-        "generated_test_count": generated_test_count,
-        "skipped_test_count": skipped_test_count,
-        "test_identifiers": test_identifiers,
+
+    diagnostics_output = {
+        "collected_test_count": diagnostics_obj.collected_test_count,
+        "generated_test_count": diagnostics_obj.generated_test_count,
+        "skipped_test_count": diagnostics_obj.skipped_test_count,
+        "test_identifiers": diagnostics_obj.test_identifiers,
         "durations": [f"{span} seconds"],
     }
-    diagnostics_path = Path(os.path.join(output_dir, "diagnostics.json"))
-    diagnostics_lock = FileLock(os.path.join(output_dir, "diagnostics.json.lock"))
+    diagnostics_path = Path(os.path.join(output_dir, "diagnostics_obj.json"))
+    diagnostics_lock = FileLock(os.path.join(output_dir, "diagnostics_obj.json.lock"))
     with diagnostics_lock:
         diagnostics_path.touch(exist_ok=True)
         if os.path.getsize(diagnostics_path) == 0:
             with open(diagnostics_path, "w+") as f:
-                json.dump(diagnostics, f)
+                json.dump(diagnostics_output, f)
         else:
             with open(diagnostics_path, "r+") as f:
                 existing_diagnostics = json.load(f)
-                for k, v in diagnostics.items():
+                for k, v in diagnostics_output.items():
                     existing_diagnostics[k] += v
             with open(diagnostics_path, "w+") as f:
                 json.dump(existing_diagnostics, f)
-        print(f"wrote diagnostics to {diagnostics_path}")
+        print(f"wrote diagnostics_obj to {diagnostics_path}")
+
+
+def generate_test_vector_and_diagnose(test_case, case_dir, log_file, file_mode, cfg_yaml, yaml, diagnostics_obj):
+    is_skip = False
+
+    print(f'Generating test: {case_dir}')
+    test_start = time.time()
+
+    written_part = False
+
+    # Add `INCOMPLETE` tag file to indicate that the test generation has not completed.
+    incomplete_tag_file = get_incomplete_tag_file(case_dir)
+    case_dir.mkdir(parents=True, exist_ok=True)
+    with incomplete_tag_file.open("w") as f:
+        f.write("\n")
+
+    try:
+        meta = dict()
+        try:
+            written_part, meta = execute_test(test_case, case_dir, meta, log_file, file_mode, cfg_yaml, yaml)
+        except SkippedTest as e:
+            print(e)
+            diagnostics_obj.skipped_test_count += 1
+            shutil.rmtree(case_dir)
+            is_skip = True
+            return is_skip, diagnostics_obj
+
+        # Once all meta data is collected (if any), write it to a meta data file.
+        if len(meta) != 0:
+            written_part = True
+            output_part(case_dir, log_file, "data", "meta", dump_yaml_fn(meta, "meta", file_mode, yaml))
+
+        if not written_part:
+            print(f"test case {case_dir} did not produce any test case parts")
+    except Exception as e:
+        error_message = f"[ERROR] failed to generate vector(s) for test {case_dir}: {e}"
+        # Write to error log file
+        with log_file.open("a+") as f:
+            f.write(error_message)
+            traceback.print_exc(file=f)
+            f.write('\n')
+        traceback.print_exc()
+    else:
+        # If no written_part, the only file was incomplete_tag_file. Clear the existing case_dir folder.
+        if not written_part:
+            shutil.rmtree(case_dir)
+        else:
+            diagnostics_obj.generated_test_count += 1
+            test_identifier = get_test_identifier(test_case)
+            diagnostics_obj.test_identifiers.append(test_identifier)
+            # Only remove `INCOMPLETE` tag file
+            os.remove(incomplete_tag_file)
+    test_end = time.time()
+    span = round(test_end - test_start, 2)
+    if span > TIME_THRESHOLD_TO_PRINT:
+        print(f'    - generated in {span} seconds')
+
+    return is_skip, diagnostics_obj
 
 
 def dump_yaml_fn(data: Any, name: str, file_mode: str, yaml_encoder: YAML):
@@ -295,6 +315,40 @@ def dump_yaml_fn(data: Any, name: str, file_mode: str, yaml_encoder: YAML):
     return dump
 
 
+def output_part(case_dir, log_file, out_kind: str, name: str, fn: Callable[[Path, ], None]):
+    # make sure the test case directory is created before any test part is written.
+    case_dir.mkdir(parents=True, exist_ok=True)
+    try:
+        fn(case_dir)
+    except IOError as e:
+        error_message = f'[Error] error when dumping test "{case_dir}", part "{name}", kind "{out_kind}": {e}'
+        # Write to error log file
+        with log_file.open("a+") as f:
+            f.write(error_message)
+            traceback.print_exc(file=f)
+            f.write('\n')
+
+        sys.exit(error_message)
+
+
+def execute_test(test_case, case_dir, meta, log_file, file_mode, cfg_yaml, yaml):
+    result = test_case.case_fn()
+    for (name, out_kind, data) in result:
+        written_part = True
+        if out_kind == "meta":
+            meta[name] = data
+        elif out_kind == "cfg":
+            output_part(case_dir, log_file, out_kind, name, dump_yaml_fn(data, name, file_mode, cfg_yaml))
+        elif out_kind == "data":
+            output_part(case_dir, log_file, out_kind, name, dump_yaml_fn(data, name, file_mode, yaml))
+        elif out_kind == "ssz":
+            output_part(case_dir, log_file, out_kind, name, dump_ssz_fn(data, name, file_mode))
+        else:
+            raise ValueError("Unknown out_kind %s" % out_kind)
+
+    return written_part, meta
+
+
 def dump_ssz_fn(data: AnyStr, name: str, file_mode: str):
     def dump(case_path: Path):
         out_path = case_path / Path(name + '.ssz_snappy')