"""Measurement script for the progress tracker""" import argparse import json import os import pathlib import shutil import subprocess import urllib import tqdm def name_to_id(name): """Convert a human readable name to a url friendly id (e.g., `x + y` to `x-plus-y`)""" name = name.replace("-", "minus") name = name.replace(" ** ", "-to-the-power-of-") name = name.replace("+", "plus") name = name.replace("*", "times") name = name.replace("/", "over") name = name.replace("%", "percent") name = name.replace("&", "and") name = name.replace(":", "colon") name = name.replace(" ", "-") name = name.replace("(", "") name = name.replace(")", "") name = name.replace("[", "") name = name.replace("]", "") name = name.replace(",", "") name = name.replace(".", "-") name = name.replace("^", "") return urllib.parse.quote_plus(name.lower()) def register_alert(script, index, line, metrics, alerts): """Parse line, check its correctness, add it to list of alerts if it's valid""" # Extract the alert details alert_line = line.replace("# bench: Alert:", "") # Parse the alert and append it to list of alerts supported_operators = ["==", "!=", "<=", ">=", "<", ">"] for operator in supported_operators: alert_details = alert_line.split(f" {operator} ") # An alert should be of form `{metric} {operator} {constant}` if len(alert_details) == 2: metric_label = alert_details[0].strip() metric_id = name_to_id(metric_label) if metric_id not in metrics: raise SyntaxError( f"An alert is using an undefined metric `{metric_label}` " f"(at line {index + 1} of {script})", ) value_str = alert_details[1].strip() try: value = float(value_str) alerts.append({"metric": metric_id, "comparison": operator, "value": value}) except ValueError as error: raise SyntaxError( f"An alert is not using a constant floating point for comparison " f"(it uses `{value_str}` at line {index + 1} of {script})", ) from error break else: raise SyntaxError( f"An alert is not using any of the supported comparisons " f"{', '.join(supported_operators)} " f"(at line {index + 1} of {script})", ) def identify_metrics_and_alerts(script, lines, metrics, alerts): """Identify the metrics of a script and make sure the annotations are well-formed""" # Create a flag to detect `# Measure: End` without a measurement start in_measurement = False # Create a variable to remember the indentation of the start of the last measurement measurement_indentation = 0 # Create a variable to remember the line number of the start of the last measurement measurement_line = 0 # Identify measurements and store their name and id in `metrics` for index, line in enumerate(lines): # Get the indentation of the line indentation = len(line) - len(line.lstrip()) # Strip the line for easier processing line = line.strip() # Check whether the line is a special line or not if line == "# bench: Measure: End": # Make sure a measurement is active already if not in_measurement: raise SyntaxError( f"Measurements cannot end before they are defined " f"(at line {index + 1} of {script})", ) # Make sure indentation of the current line # matches the indentation of the active measurement line if indentation != measurement_indentation: raise SyntaxError( f"Measurements should finish with the same indentation as they are defined " f"(at lines {measurement_line} and {index + 1} of {script})", ) # Set in_measurement to false as the active measurement has ended in_measurement = False elif line.startswith("# bench: Measure:"): # Make sure a measurement is not active already if in_measurement: raise SyntaxError( f"Nested measurements are not supported " f"(at lines {measurement_line} and {index + 1} of {script})", ) # Extract the measurement details measurement_details = line.replace("# bench: Measure:", "").split("=") # Extract metric name and id metric_label = measurement_details[0].strip() metric_id = name_to_id(metric_label) # Add metric id and metric name to `metrics` metrics[metric_id] = metric_label # Check if the measurement is a timing measurement (does not contain `= expression`) if len(measurement_details) == 1: # We need to see an end in the upcoming lines so update variables accordingly in_measurement = True measurement_line = index + 1 measurement_indentation = indentation elif line.startswith("# bench: Alert:"): register_alert(script, index, line, metrics, alerts) # Make sure there isn't an active measurement that hasn't finished if in_measurement: raise SyntaxError( f"Unfinished measurements are not supported " f"(at line {measurement_line} of {script})", ) def create_modified_script(script, lines, metrics): """Create a modified version of the script which can be used to perform measurements""" with open(f".benchmarks/scripts/{script}", "w", encoding="utf-8") as f: # Import must-have libraries f.write("import json\n") f.write("import time\n") f.write("\n") # Create a measurement dictionary to accumulate values f.write("_measurements_ = {\n") for metric_id in metrics.keys(): f.write(f' "{metric_id}": [],\n') f.write("}\n") # Create a variable to hold the id of the current metric # This is required to determine where to save the measured value current_metric_id = "" # Copy the lines of the original script into the new script for line in lines[1:]: # And modify special lines along the way if line.strip() == "# bench: Measure: End": # Replace `# Measure: End` with # # _end_ = time.time() # _measurements_["id"].append((_end_ - _start_) * 1000) index = line.find("# bench: Measure: End") line = line[:index] f.write(f"{line}_end_ = time.time()\n") value = "(_end_ - _start_) * 1000" line += f'_measurements_["{current_metric_id}"].append({value})\n' elif line.strip().startswith("# bench: Measure:"): # Replace `# Measure: ...` with # # _start_ = time.time() # Replace `# Measure: ... = expression` with # # _measurements_["id"].append(expression) metric_details = line.replace("# bench: Measure:", "").split("=") metric_label = metric_details[0].strip() metric_id = name_to_id(metric_label) index = line.find("# bench: Measure:") line = line[:index] if len(metric_details) == 1: current_metric_id = metric_id line += "_start_ = time.time()\n" else: value = metric_details[1] line += f'_measurements_["{metric_id}"].append({value.strip()})\n' # Write the possibly replaced line back f.write(line) # Dump measurements to a temporary file after the script is executed from start to end f.write("\n") f.write(f'with open(".benchmarks/scripts/{script}.measurements", "w") as f:\n') f.write(" json.dump(_measurements_, f, indent=2)\n") def perform_measurements(path, script, target_id, metrics, samples, result): """Run the modified script multiple times and update the result""" # Create a flag to keep track of the working status working = True print() print(path) print("-" * len(str(path))) # Run the modified script `samples` times and accumulate measurements measurements = {metric_id: [] for metric_id in metrics.keys()} with tqdm.tqdm(total=samples) as pbar: for i in range(samples): # Create the subprocess process = subprocess.run( ["python", f".benchmarks/scripts/{script}"], capture_output=True, check=False, ) # Print sample information pbar.write(f" Sample {i + 1}") pbar.write(f" {'-' * len(f'Sample {i + 1}')}") # If the script raised an exception, discard everything for now if process.returncode != 0: working = False pbar.write(f" Failed (exited with {process.returncode})") pbar.write(f" --------------------{'-' * len(str(process.returncode))}-") stderr = process.stderr.decode("utf-8") for line in stderr.split("\n"): if line.strip() != "": pbar.write(f" {line}") pbar.write("") pbar.update(samples) break # Read the measurements and delete the temporary file with open(f".benchmarks/scripts/{script}.measurements", encoding="utf-8") as f: results = json.load(f) os.unlink(f".benchmarks/scripts/{script}.measurements") # Add the `results` of the current run to `measurements` for metric_id in metrics.keys(): average = sum(results[metric_id]) / len(results[metric_id]) pbar.write(f" {metrics[metric_id]} = {average}") for measurement in results[metric_id]: measurements[metric_id].append(measurement) pbar.write("") pbar.update(1) print() result["targets"][target_id]["working"] = working if working: # Take average of all metrics and store them in `result` result["targets"][target_id]["measurements"].update( {metric_id: sum(metric) / len(metric) for metric_id, metric in measurements.items()} ) # Add metrics of the current script to the result for metric_id, metric_label in metrics.items(): if metric_id not in result["metrics"]: result["metrics"][metric_id] = {"label": metric_label} else: # Delete measurements field of the current target del result["targets"][target_id]["measurements"] def get_scripts_to_benchmark(args): """Get the list of files to benchmark""" base = pathlib.Path(args.base) if args.files_to_benchmark is None: scripts = list(base.glob("*.py")) else: scripts = [pathlib.Path(f) for f in args.files_to_benchmark] if not args.check: print("Will benchmark following files:\n") print(" - " + "\n - ".join(str(s) for s in scripts)) # Clear the previous temporary scripts directory shutil.rmtree(".benchmarks/scripts", ignore_errors=True) # Copy the base directory to the new temporary scripts directory shutil.copytree(base, ".benchmarks/scripts") # Because we copy the entire base directory to the new temporary scripts directory, # the modified scripts will have access to helper modules defined within the base directory # (e.g., we copy `benchmarks/common.py` to `.benchmarks/scripts/common.py` which allows # the modified `.benchmarks/scripts/x_plus_42.py` to access `common` module`) return scripts def main(args): """Measurement script for the progress tracker""" samples = args.samples with open(".benchmarks/machine.json", "r", encoding="utf-8") as f: machine = json.load(f) result = {"machine": machine, "metrics": {}, "targets": {}} scripts = get_scripts_to_benchmark(args) # Process each script under the base directory for path in scripts: # Read the script line by line with open(path, "r", encoding="utf-8") as f: lines = f.readlines() # Find the first non-empty line first_line = "" for line in map(lambda line: line.strip(), lines): if line != "": first_line = line break # Check whether the script is a target or not if first_line.startswith("# bench: Unit Target:"): # Extract target name target_name = first_line.replace("# bench: Unit Target:", "").strip() is_unit = True elif first_line.startswith("# bench: Full Target:"): # Extract target name target_name = first_line.replace("# bench: Full Target:", "").strip() is_unit = False else: if not args.check: print() print(path) print("-" * len(str(path))) with tqdm.tqdm(total=samples) as pbar: pbar.write(" Sample 1") pbar.write(" --------") pbar.write( " Skipped (doesn't have a `# bench: Unit/Full Target:` directive)\n" ) pbar.update(samples) print() continue # Extract target id target_id = name_to_id(target_name) # Check whether the target is already registered if target_id in result["targets"]: raise RuntimeError(f"Target `{target_name}` is already registered") # Create a dictionary to hold `metric_id` to `metric_name` metrics = {} # Create a list to hold alerts in form { "metric": ..., "comparison": ..., "value": ... } alerts = [] # Identify metrics of the current script identify_metrics_and_alerts(path, lines, metrics, alerts) # Extract the script name name = os.path.basename(path) # Create another script to hold the modified version of the current script create_modified_script(name, lines, metrics) # Create an entry in the result for the current target result["targets"][target_id] = { "name": target_name, "measurements": {}, "alerts": alerts, "code": "\n".join(lines), "isUnit": is_unit, } if not args.check: # Perform and save measurements perform_measurements(path, name, target_id, metrics, samples, result) # Dump the latest results to the output file with open(".benchmarks/findings.json", "w", encoding="utf-8") as f: json.dump(result, f, indent=2, ensure_ascii=False) # Delete the modified scripts if the user doesn't care if not args.keep: shutil.rmtree(".benchmarks/scripts", ignore_errors=True) if not args.check: print() if __name__ == "__main__": parser = argparse.ArgumentParser(description="Measurement script for the progress tracker") parser.add_argument("base", type=str, help="directory which contains the benchmarks") parser.add_argument("--check", action="store_true", help="flag to enable just checking mode") parser.add_argument("--samples", type=int, default=30, help="number of samples to take") parser.add_argument("--keep", action="store_true", help="flag to keep measurement scripts") parser.add_argument( "--files_to_benchmark", "-f", nargs="+", type=str, default=None, help="files to benchmark in base directory (with base directory as a prefix)", ) main(parser.parse_args())