test(gossipsub): Performance tests - plot latency history (#1608)

2026-01-09 22:28:27 -05:00 · 2025-08-11 16:11:29 +01:00
parent 71f04d1bb3
commit abee5326dc
8 changed files with 239 additions and 29 deletions
--- a/.github/actions/generate_plots/action.yml
+++ b/.github/actions/generate_plots/action.yml
@@ -15,6 +15,10 @@ runs:
        python -m pip install --upgrade pip
        pip install matplotlib

-    - name: Run plot_metrics.py
+    - name: Plot Docker Stats
      shell: bash
-      run: python performance/scripts/plot_docker_stats.py
+      run: python performance/scripts/plot_docker_stats.py
+
+    - name: Plot Latency History
+      shell: bash
+      run: python performance/scripts/plot_latency_history.py
--- a/.github/actions/publish_history/action.yml
+++ b/.github/actions/publish_history/action.yml
@@ -0,0 +1,34 @@
+name: Publish Latency History
+description: "Publish latency history CSVs in a configurable branch and folder"
+
+runs:
+  using: "composite"
+  steps:
+    - name: Clone the branch
+      uses: actions/checkout@v4
+      with:
+        repository: ${{ github.repository }}
+        ref: ${{ env.PUBLISH_BRANCH_NAME }}
+        path: ${{ env.CHECKOUT_SUBFOLDER_HISTORY }}
+        fetch-depth: 0
+
+    - name: Commit & push latency history CSVs
+      shell: bash
+      run: |
+        cd "$CHECKOUT_SUBFOLDER_HISTORY"
+
+        mkdir -p "$PUBLISH_DIR_LATENCY_HISTORY"
+
+        cp ../$SHARED_VOLUME_PATH/$LATENCY_HISTORY_PREFIX*.csv "$PUBLISH_DIR_LATENCY_HISTORY/"
+        git add "$PUBLISH_DIR_LATENCY_HISTORY"
+
+        if git diff-index --quiet HEAD --; then
+          echo "No changes to commit"
+        else
+          git config --global user.email "github-actions[bot]@users.noreply.github.com"
+          git config --global user.name "github-actions[bot]"
+          git commit -m "Update latency history CSVs"
+          git push origin "$PUBLISH_BRANCH_NAME"
+        fi
+
+        cd ..
--- a/.github/actions/publish_plots/action.yml
+++ b/.github/actions/publish_plots/action.yml
@@ -24,7 +24,8 @@ runs:
        mkdir -p $PUBLISH_DIR_PLOTS/$BRANCH_NAME

        cp ../$SHARED_VOLUME_PATH/*.png $PUBLISH_DIR_PLOTS/$BRANCH_NAME/
-        git add $PUBLISH_DIR_PLOTS/$BRANCH_NAME
+        cp ../$LATENCY_HISTORY_PATH/*.png $PUBLISH_DIR_PLOTS/
+        git add $PUBLISH_DIR_PLOTS/*

        if git diff-index --quiet HEAD --; then
          echo "No changes to commit"
--- a/.github/workflows/performance.yml
+++ b/.github/workflows/performance.yml
@@ -13,7 +13,7 @@ concurrency:
  cancel-in-progress: true

 jobs:
-  examples:
+  performance:
    timeout-minutes: 20
    strategy:
      fail-fast: false
@@ -34,6 +34,11 @@ jobs:
      PUBLISH_BRANCH_NAME: "performance_plots"
      CHECKOUT_SUBFOLDER_SUBPLOTS: "subplots"
      PUBLISH_DIR_PLOTS: "plots"
+      CHECKOUT_SUBFOLDER_HISTORY: "history"
+      PUBLISH_DIR_LATENCY_HISTORY: "latency_history"
+      LATENCY_HISTORY_PATH: "history/latency_history"
+      LATENCY_HISTORY_PREFIX: "pr"
+      LATENCY_HISTORY_PLOT_FILENAME: "latency_history_all_scenarios.png"

    name: "Performance"
    runs-on: ubuntu-22.04
@@ -63,6 +68,9 @@ jobs:
      - name: Process latency and docker stats
        uses: ./.github/actions/process_stats

+      - name: Publish history
+        uses: ./.github/actions/publish_history
+
      - name: Generate plots
        uses: ./.github/actions/generate_plots

--- a/performance/scripts/add_plots_to_summary.nim
+++ b/performance/scripts/add_plots_to_summary.nim
@@ -5,32 +5,83 @@ import strformat
 import strutils
 import tables

-let summaryPath = getEnv("GITHUB_STEP_SUMMARY", "step_summary.md")
-let repo = getEnv("GITHUB_REPOSITORY", "vacp2p/nim-libp2p")
-let branchName = getEnv("BRANCH_NAME", "")
-let plotDir = &"subplots/plots/{branchName}"
+proc getImgUrlBase(repo: string, publishBranchName: string, plotsPath: string): string =
+  &"https://raw.githubusercontent.com/{repo}/refs/heads/{publishBranchName}/{plotsPath}"

 proc extractTestName(base: string): string =
  let parts = base.split("_")
-  return parts[^2]
+  if parts.len >= 2:
+    parts[^2]
+  else:
+    base

-proc makeImgTag(base: string): string =
-  &"<img src=\"https://raw.githubusercontent.com/{repo}/refs/heads/performance_plots/plots/{branchName}/{base}\" width=\"450\" style=\"margin-right:10px;\" />"
+proc makeImgTag(imgUrl: string, width: int): string =
+  &"<img src=\"{imgUrl}\" width=\"{width}\" style=\"margin-right:10px;\" />"

-var grouped: Table[string, seq[string]]
-for path in walkFiles(fmt"{plotDir}/*.png"):
-  let base = path.splitPath.tail
-  let testName = extractTestName(base)
-  let imgTag = makeImgTag(base)
+proc prepareLatencyHistoryImage(
+    imgUrlBase: string, latencyHistoryFilePath: string, width: int = 600
+): string =
+  let latencyImgUrl = &"{imgUrlBase}/{latencyHistoryFilePath}"
+  makeImgTag(latencyImgUrl, width)

-  discard grouped.hasKeyOrPut(testName, @[])
-  grouped[testName].add(imgTag)
+proc prepareDockerStatsImages(
+    plotDir: string, imgUrlBase: string, branchName: string, width: int = 450
+): Table[string, seq[string]] =
+  ## Groups docker stats plot images by test name and returns HTML <img> tags.
+  var grouped: Table[string, seq[string]]

-var summary = &"## Performance Plots for {branchName}\n"
-for test in grouped.keys.toSeq().sorted():
-  let imgs = grouped[test]
-  summary &= &"### {test}\n"
-  summary &= imgs.join(" ") & "<br>\n"
+  for path in walkFiles(&"{plotDir}/*.png"):
+    let plotFile = path.splitPath.tail
+    let testName = extractTestName(plotFile)
+    let imgUrl = &"{imgUrlBase}/{branchName}/{plotFile}"
+    let imgTag = makeImgTag(imgUrl, width)
+    discard grouped.hasKeyOrPut(testName, @[])
+    grouped[testName].add(imgTag)

-writeFile(summaryPath, summary)
-echo summary
+  grouped
+
+proc buildSummary(
+    plotDir: string,
+    repo: string,
+    branchName: string,
+    publishBranchName: string,
+    plotsPath: string,
+    latencyHistoryFilePath: string,
+): string =
+  let imgUrlBase = getImgUrlBase(repo, publishBranchName, plotsPath)
+
+  var buf: seq[string]
+
+  # Latency History section
+  buf.add("## Latency History")
+  buf.add(prepareLatencyHistoryImage(imgUrlBase, latencyHistoryFilePath) & "<br>")
+  buf.add("")
+
+  # Performance Plots section
+  let grouped = prepareDockerStatsImages(plotDir, imgUrlBase, branchName)
+  buf.add(&"## Performance Plots for {branchName}")
+  for test in grouped.keys.toSeq().sorted():
+    let imgs = grouped[test]
+    buf.add(&"### {test}")
+    buf.add(imgs.join(" ") & "<br>")
+
+  buf.join("\n")
+
+proc main() =
+  let summaryPath = getEnv("GITHUB_STEP_SUMMARY", "/tmp/step_summary.md")
+  let repo = getEnv("GITHUB_REPOSITORY", "vacp2p/nim-libp2p")
+  let branchName = getEnv("BRANCH_NAME", "")
+  let publishBranchName = getEnv("PUBLISH_BRANCH_NAME", "performance_plots")
+  let plotsPath = getEnv("PLOTS_PATH", "plots")
+  let latencyHistoryFilePath =
+    getEnv("LATENCY_HISTORY_PLOT_FILENAME", "latency_history_all_scenarios.png")
+  let checkoutSubfolder = getEnv("CHECKOUT_SUBFOLDER", "subplots")
+  let plotDir = &"{checkoutSubfolder}/{plotsPath}/{branchName}"
+
+  let summary = buildSummary(
+    plotDir, repo, branchName, publishBranchName, plotsPath, latencyHistoryFilePath
+  )
+  writeFile(summaryPath, summary)
+  echo summary
+
+main()
--- a/performance/scripts/plot_docker_stats.py
+++ b/performance/scripts/plot_docker_stats.py
@@ -116,11 +116,11 @@ def plot_metrics(data, title, output_path):


 if __name__ == "__main__":
-    docker_stats_dir = os.environ.get("SHARED_VOLUME_PATH", "performance/output")
+    shared_volume_path = os.environ.get("SHARED_VOLUME_PATH", "performance/output")
    docker_stats_prefix = os.environ.get("DOCKER_STATS_PREFIX", "docker_stats_")
-    glob_pattern = os.path.join(docker_stats_dir, f"{docker_stats_prefix}*.csv")
+    glob_pattern = os.path.join(shared_volume_path, f"{docker_stats_prefix}*.csv")
    csv_files = glob.glob(glob_pattern)
    for csv_file in csv_files:
        file_name = os.path.splitext(os.path.basename(csv_file))[0]
        data = parse_csv(csv_file)
-        plot_metrics(data, title=file_name, output_path=os.path.join(docker_stats_dir, f"{file_name}.png"))
+        plot_metrics(data, title=file_name, output_path=os.path.join(shared_volume_path, f"{file_name}.png"))
--- a/performance/scripts/plot_latency_history.py
+++ b/performance/scripts/plot_latency_history.py
@@ -0,0 +1,93 @@
+import os
+import glob
+import csv
+import matplotlib
+
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+
+
+def extract_pr_number(filename):
+    """Extract PR number from filename of format pr{number}_anything.csv"""
+    fname = os.path.basename(filename)
+    parts = fname.split("_", 1)
+    pr_str = parts[0][2:]
+    if not pr_str.isdigit():
+        return None
+    return int(pr_str)
+
+
+def parse_latency_csv(csv_files):
+    pr_numbers = []
+    scenario_data = {}  # scenario -> {pr_num: {min, avg, max}}
+    for csv_file in csv_files:
+        pr_num = extract_pr_number(csv_file)
+        if pr_num is None:
+            continue
+        pr_numbers.append(pr_num)
+        with open(csv_file, newline="") as f:
+            reader = csv.DictReader(f)
+            for row in reader:
+                scenario = row["Scenario"]
+                if scenario not in scenario_data:
+                    scenario_data[scenario] = {}
+                scenario_data[scenario][pr_num] = {
+                    "min": float(row["MinLatencyMs"]),
+                    "avg": float(row["AvgLatencyMs"]),
+                    "max": float(row["MaxLatencyMs"]),
+                }
+    pr_numbers = sorted(set(pr_numbers))
+    return pr_numbers, scenario_data
+
+
+def plot_latency_history(pr_numbers, scenario_data, output_path):
+    num_scenarios = len(scenario_data)
+    fig, axes = plt.subplots(num_scenarios, 1, figsize=(14, 4 * num_scenarios), sharex=True)
+    if num_scenarios == 1:
+        axes = [axes]
+
+    color_map = plt.colormaps.get_cmap("tab10")
+
+    for i, (scenario, pr_stats) in enumerate(scenario_data.items()):
+        ax = axes[i]
+        min_vals = [pr_stats.get(pr, {"min": None})["min"] for pr in pr_numbers]
+        avg_vals = [pr_stats.get(pr, {"avg": None})["avg"] for pr in pr_numbers]
+        max_vals = [pr_stats.get(pr, {"max": None})["max"] for pr in pr_numbers]
+
+        color = color_map(i % color_map.N)
+
+        if any(v is not None for v in avg_vals):
+            ax.plot(pr_numbers, avg_vals, marker="o", label="Avg Latency (ms)", color=color)
+            ax.fill_between(pr_numbers, min_vals, max_vals, color=color, alpha=0.2, label="Min-Max Latency (ms)")
+            for pr, avg, minv, maxv in zip(pr_numbers, avg_vals, min_vals, max_vals):
+                if avg is not None:
+                    ax.scatter(pr, avg, color=color)
+                    ax.text(pr, avg, f"{avg:.3f}", fontsize=14, ha="center", va="bottom")
+                if minv is not None and maxv is not None:
+                    ax.vlines(pr, minv, maxv, color=color, alpha=0.5)
+
+            ax.set_ylabel("Latency (ms)")
+            ax.set_title(f"Scenario: {scenario}")
+            ax.legend(loc="upper left", fontsize="small")
+            ax.grid(True, linestyle="--", alpha=0.5)
+
+    # Set X axis ticks and labels to show all PR numbers as 'PR <number>'
+    axes[-1].set_xlabel("PR Number")
+    axes[-1].set_xticks(pr_numbers)
+    axes[-1].set_xticklabels([f"PR {pr}" for pr in pr_numbers], rotation=45, ha="right", fontsize=14)
+
+    plt.tight_layout()
+    plt.savefig(output_path)
+    print(f"Saved combined plot to {output_path}")
+    plt.close(fig)
+
+
+if __name__ == "__main__":
+    LATENCY_HISTORY_PATH = os.environ.get("LATENCY_HISTORY_PATH", "performance/output")
+    LATENCY_HISTORY_PREFIX = os.environ.get("LATENCY_HISTORY_PREFIX", "pr")
+    LATENCY_HISTORY_PLOT_FILENAME = os.environ.get("LATENCY_HISTORY_PLOT_FILENAME", "pr")
+    glob_pattern = os.path.join(LATENCY_HISTORY_PATH, f"{LATENCY_HISTORY_PREFIX}*_latency.csv")
+    csv_files = sorted(glob.glob(glob_pattern))
+    pr_numbers, scenario_data = parse_latency_csv(csv_files)
+    output_path = os.path.join(LATENCY_HISTORY_PATH, LATENCY_HISTORY_PLOT_FILENAME)
+    plot_latency_history(pr_numbers, scenario_data, output_path)
--- a/performance/scripts/process_latency_stats.nim
+++ b/performance/scripts/process_latency_stats.nim
@@ -106,12 +106,26 @@ proc getMarkdownReport*(
  let runId = getEnv("GITHUB_RUN_ID", "")
  let summaryUrl = fmt"https://github.com/vacp2p/nim-libp2p/actions/runs/{runId}"
  output.add(
-    fmt"### 📊 View full Container Resources stats in the [Workflow Summary]({summaryUrl})"
+    fmt"### 📊 View Latency History and full Container Resources in the [Workflow Summary]({summaryUrl})"
  )

  let markdown = output.join("\n")
  return markdown

+proc getCsvFilename*(outputDir: string): string =
+  let prNum = getEnv("PR_NUMBER", "unknown")
+  result = fmt"{outputDir}/pr{prNum}_latency.csv"
+
+proc getCsvReport*(
+    results: Table[string, Stats], validNodes: Table[string, int]
+): string =
+  var output: seq[string]
+  output.add "Scenario,Nodes,TotalSent,TotalReceived,MinLatencyMs,MaxLatencyMs,AvgLatencyMs"
+  for scenarioName, stats in results.pairs:
+    let nodes = validNodes[scenarioName]
+    output.add fmt"{stats.scenarioName},{nodes},{stats.totalSent},{stats.totalReceived},{stats.latency.minLatencyMs:.3f},{stats.latency.maxLatencyMs:.3f},{stats.latency.avgLatencyMs:.3f}"
+  result = output.join("\n")
+
 proc main() =
  let outputDir = "performance/output"
  let parsedJsons = parseJsonFiles(outputDir)
@@ -119,6 +133,11 @@ proc main() =
  let jsonResults = getJsonResults(parsedJsons)
  let (aggregatedResults, validNodes) = aggregateResults(jsonResults)

+  # For History
+  let csvFilename = getCsvFilename(outputDir)
+  let csvContent = getCsvReport(aggregatedResults, validNodes)
+  writeFile(csvFilename, csvContent)
+
  let marker = getEnv("MARKER", "<!-- marker -->")
  let commitSha = getEnv("PR_HEAD_SHA", getEnv("GITHUB_SHA", "unknown"))
  let markdown = getMarkdownReport(aggregatedResults, validNodes, marker, commitSha)