test(gossipsub): Performance tests - more scenarios (#1585)

This commit is contained in:
Radosław Kamiński
2025-08-01 08:33:39 +01:00
committed by GitHub
parent d9aa393761
commit cd5fea53e3
5 changed files with 208 additions and 48 deletions

View File

@@ -17,7 +17,8 @@ WORKDIR /node
COPY --from=build /node/performance/main /node/main
RUN chmod +x main
RUN chmod +x main \
&& apk add --no-cache curl iproute2
VOLUME ["/output"]

View File

@@ -1,4 +1 @@
import chronos
import ./scenarios
waitFor(baseTest())

View File

@@ -21,10 +21,12 @@ for ((i = 0; i < $PEERS; i++)); do
hostname="$hostname_prefix$i"
docker run -d \
--cap-add=NET_ADMIN \
--name "$hostname" \
-e NODE_ID="$i" \
-e HOSTNAME_PREFIX="$hostname_prefix" \
-v "$output_dir:/output" \
-v /var/run/docker.sock:/var/run/docker.sock \
--hostname="$hostname" \
--network="$network" \
test-node > /dev/null

View File

@@ -1,23 +1,36 @@
# Nim-LibP2P
# Copyright (c) 2025 Status Research & Development GmbH
# Licensed under either of
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE))
# * MIT license ([LICENSE-MIT](LICENSE-MIT))
# at your option.
# This file may not be copied, modified, or distributed except according to
# those terms.
{.used.}
import metrics
import metrics/chronos_httpserver
import os
import strformat
import strutils
import ../libp2p
import ../libp2p/protocols/ping
import ../tests/helpers
import ./utils
from nativesockets import getHostname
proc baseTest*() {.async.} =
proc baseTest*(scenarioName = "Base test") {.async.} =
# --- Scenario ---
let scenario = scenarioName
const
# --- Scenario ---
scenario = "Base test"
nodeCount = 10
publisherCount = 10
publisherCount = 5
peerLimit = 5
msgCount = 200
msgInterval = 20 # ms
msgSize = 500 # bytes
warmupCount = 20
msgCount = 100
msgInterval = 100 # ms
msgSize = 200 # bytes
warmupCount = 10
# --- Node Setup ---
let
@@ -26,6 +39,9 @@ proc baseTest*() {.async.} =
hostname = getHostname()
rng = libp2p.newRng()
if nodeId == 0:
clearSyncFiles()
let (switch, gossipSub, pingProtocol) = setupNode(nodeId, rng)
gossipSub.setGossipSubParams()
@@ -41,13 +57,15 @@ proc baseTest*() {.async.} =
defer:
await switch.stop()
info "Node started, waiting 5s",
info "Node started, synchronizing",
scenario,
nodeId,
address = switch.peerInfo.addrs,
peerId = switch.peerInfo.peerId,
isPublisher = nodeId <= publisherCount,
hostname = hostname
await sleepAsync(5.seconds)
await syncNodes("started", nodeId, nodeCount)
# --- Peer Discovery & Connection ---
var peersAddresses = resolvePeersAddresses(nodeCount, hostnamePrefix, nodeId)
@@ -55,17 +73,19 @@ proc baseTest*() {.async.} =
await connectPeers(switch, peersAddresses, peerLimit, nodeId)
info "Mesh populated, waiting 5s",
info "Mesh populated, synchronizing",
nodeId, meshSize = gossipSub.mesh.getOrDefault(topic).len
await sleepAsync(5.seconds)
await syncNodes("mesh", nodeId, nodeCount)
# --- Message Publishing ---
let sentMessages = await publishMessagesWithWarmup(
gossipSub, warmupCount, msgCount, msgInterval, msgSize, publisherCount, nodeId
)
info "Waiting 2 seconds for message delivery"
await sleepAsync(2.seconds)
info "Waiting for message delivery, synchronizing"
await syncNodes("published", nodeId, nodeCount)
# --- Performance summary ---
let stats = getStats(scenario, receivedMessages[], sentMessages)
@@ -73,3 +93,95 @@ proc baseTest*() {.async.} =
let outputPath = "/output/" & hostname & ".json"
writeResultsToJson(outputPath, scenario, stats)
await syncNodes("finished", nodeId, nodeCount)
suite "Network Performance Tests":
teardown:
checkTrackers()
asyncTest "Base Test":
await baseTest()
asyncTest "Latency Test":
const
latency = 100
jitter = 20
discard execShellCommand(
fmt"{enableTcCommand} netem delay {latency}ms {jitter}ms distribution normal"
)
await baseTest(fmt"Latency {latency}ms {jitter}ms")
discard execShellCommand(disableTcCommand)
asyncTest "Packet Loss Test":
const packetLoss = 5
discard execShellCommand(fmt"{enableTcCommand} netem loss {packetLoss}%")
await baseTest(fmt"Packet Loss {packetLoss}%")
discard execShellCommand(disableTcCommand)
asyncTest "Low Bandwidth Test":
const
rate = "256kbit"
burst = "8kbit"
limit = "5000"
discard
execShellCommand(fmt"{enableTcCommand} tbf rate {rate} burst {burst} limit {limit}")
await baseTest(fmt"Low Bandwidth rate {rate} burst {burst} limit {limit}")
discard execShellCommand(disableTcCommand)
asyncTest "Packet Reorder Test":
const
reorderPercent = 15
reorderCorr = 40
delay = 2
discard execShellCommand(
fmt"{enableTcCommand} netem delay {delay}ms reorder {reorderPercent}% {reorderCorr}%"
)
await baseTest(
fmt"Packet Reorder {reorderPercent}% {reorderCorr}% with {delay}ms delay"
)
discard execShellCommand(disableTcCommand)
asyncTest "Burst Loss Test":
const
lossPercent = 8
lossCorr = 30
discard execShellCommand(fmt"{enableTcCommand} netem loss {lossPercent}% {lossCorr}%")
await baseTest(fmt"Burst Loss {lossPercent}% {lossCorr}%")
discard execShellCommand(disableTcCommand)
asyncTest "Duplication Test":
const duplicatePercent = 2
discard execShellCommand(fmt"{enableTcCommand} netem duplicate {duplicatePercent}%")
await baseTest(fmt"Duplication {duplicatePercent}%")
discard execShellCommand(disableTcCommand)
asyncTest "Corruption Test":
const corruptPercent = 0.5
discard execShellCommand(fmt"{enableTcCommand} netem corrupt {corruptPercent}%")
await baseTest(fmt"Corruption {corruptPercent}%")
discard execShellCommand(disableTcCommand)
asyncTest "Queue Limit Test":
const queueLimit = 5
discard execShellCommand(fmt"{enableTcCommand} netem limit {queueLimit}")
await baseTest(fmt"Queue Limit {queueLimit}")
discard execShellCommand(disableTcCommand)
asyncTest "Combined Network Conditions Test":
discard execShellCommand(
"tc qdisc add dev eth0 root handle 1:0 tbf rate 2mbit burst 32kbit limit 25000"
)
discard execShellCommand(
"tc qdisc add dev eth0 parent 1:1 handle 10: netem delay 100ms 20ms distribution normal loss 5% 20% reorder 10% 30% duplicate 0.5% corrupt 0.05% limit 20"
)
await baseTest("Combined Network Conditions")
discard execShellCommand(disableTcCommand)

View File

@@ -3,6 +3,8 @@ import hashes
import json
import metrics
import metrics/chronos_httpserver
import os
import osproc
import sequtils
import stew/byteutils
import stew/endians2
@@ -13,6 +15,7 @@ import ../libp2p
import ../libp2p/protocols/pubsub/rpc/messages
import ../libp2p/muxers/mplex/lpchannel
import ../libp2p/protocols/ping
import ../tests/helpers
import ./types
const
@@ -98,7 +101,7 @@ proc createMessageHandler*(
let latency = getLatency(sentNs)
receivedMessages[msgId] = latency
info "Message delivered", msgId = msgId, latency = formatLatencyMs(latency), nodeId
debug "Message delivered", msgId = msgId, latency = formatLatencyMs(latency), nodeId
return (messageHandler, receivedMessages)
@@ -125,22 +128,19 @@ proc resolvePeersAddresses*(
proc connectPeers*(
switch: Switch, peersAddresses: seq[MultiAddress], peerLimit: int, nodeId: int
) {.async.} =
var
connected = 0
index = 0
while connected < peerLimit:
while true:
let address = peersAddresses[index]
try:
let peerId =
await switch.connect(address, allowUnknownPeerId = true).wait(5.seconds)
connected.inc()
index.inc()
debug "Connected peer", nodeId, address = address
break
except CatchableError as exc:
warn "Failed to dial, waiting 5s", nodeId, address = address, error = exc.msg
await sleepAsync(5.seconds)
proc connectPeer(address: MultiAddress): Future[bool] {.async.} =
try:
let peerId =
await switch.connect(address, allowUnknownPeerId = true).wait(5.seconds)
debug "Connected peer", nodeId, address, peerId
return true
except CatchableError as exc:
warn "Failed to dial, waiting 1s", nodeId, address = address, error = exc.msg
return false
for index in 0 ..< peerLimit:
checkUntilTimeoutCustom(5.seconds, 500.milliseconds):
await connectPeer(peersAddresses[index])
proc publishMessagesWithWarmup*(
gossipSub: GossipSub,
@@ -151,8 +151,9 @@ proc publishMessagesWithWarmup*(
publisherCount: int,
nodeId: int,
): Future[seq[uint64]] {.async.} =
info "Publishing messages", nodeId
# Warm-up phase
info "Sending warmup messages", nodeId
debug "Sending warmup messages", nodeId
for msg in 0 ..< warmupCount:
await sleepAsync(msgInterval)
discard await gossipSub.publish(topic, warmupData)
@@ -165,7 +166,7 @@ proc publishMessagesWithWarmup*(
let timestamp = Moment.now().epochNanoSeconds()
var data = @(toBytesLE(uint64(timestamp))) & newSeq[byte](msgSize)
info "Sending message", msgId = timestamp, nodeId = nodeId
debug "Sending message", msgId = timestamp, nodeId = nodeId
doAssert((await gossipSub.publish(topic, data)) > 0)
sentMessages.add(uint64(timestamp))
@@ -217,17 +218,64 @@ proc `$`*(stats: Stats): string =
fmt"avg={formatLatencyMs(stats.latency.avgLatencyMs)}"
proc writeResultsToJson*(outputPath: string, scenario: string, stats: Stats) =
let json =
var resultsArr: JsonNode = newJArray()
if fileExists(outputPath):
try:
let existing = parseFile(outputPath)
resultsArr = existing["results"]
except:
discard
let newResult =
%*{
"results": [
{
"scenarioName": scenario,
"totalSent": stats.totalSent,
"totalReceived": stats.totalReceived,
"minLatencyMs": formatLatencyMs(stats.latency.minLatencyMs),
"maxLatencyMs": formatLatencyMs(stats.latency.maxLatencyMs),
"avgLatencyMs": formatLatencyMs(stats.latency.avgLatencyMs),
}
]
"scenarioName": scenario,
"totalSent": stats.totalSent,
"totalReceived": stats.totalReceived,
"minLatencyMs": formatLatencyMs(stats.latency.minLatencyMs),
"maxLatencyMs": formatLatencyMs(stats.latency.maxLatencyMs),
"avgLatencyMs": formatLatencyMs(stats.latency.avgLatencyMs),
}
resultsArr.add(newResult)
let json = %*{"results": resultsArr}
writeFile(outputPath, json.pretty)
const
enableTcCommand* = "tc qdisc add dev eth0 root"
disableTcCommand* = "tc qdisc del dev eth0 root"
proc execShellCommand*(cmd: string): string =
try:
let output = execProcess(
"/bin/sh", args = ["-c", cmd], options = {poUsePath, poStdErrToStdOut}
)
.strip()
debug "Shell command executed", cmd, output
return output
except OSError as e:
raise newException(OSError, "Shell command failed")
const syncDir = "/output/sync"
proc syncNodes*(stage: string, nodeId, nodeCount: int) {.async.} =
# initial wait
await sleepAsync(2.seconds)
let prefix = "sync_"
let myFile = syncDir / (prefix & stage & "_" & $nodeId)
writeFile(myFile, "ok")
let expectedFiles = (0 ..< nodeCount).mapIt(syncDir / (prefix & stage & "_" & $it))
checkUntilTimeoutCustom(5.seconds, 100.milliseconds):
expectedFiles.allIt(fileExists(it))
# final wait
await sleepAsync(500.milliseconds)
proc clearSyncFiles*() =
if not dirExists(syncDir):
createDir(syncDir)
else:
for f in walkDir(syncDir):
if fileExists(f.path):
removeFile(f.path)