- Updated stats url in `main.py` (ruby -> sapphire)
- Added hack in `data_file_handler.py` to keep order of files
- Added new time intervals for 3k nodes
- Update time intervals in `scrape.yaml`
- Fixed num nodes check in example_log_analysis.py
- Change time shift from -2hr to -4hrs
This commit is contained in:
Pearson White
2025-06-25 11:35:16 -04:00
parent 23b0777998
commit 14d4e8f57a
5 changed files with 152 additions and 46 deletions

View File

@@ -27,24 +27,25 @@ if __name__ == "__main__":
data = [
("2025-06-23T18:36:44", "2025-06-23T18:59:09", "local_data/simulations_data/1k_1s_1KB/v0.36.0-rc.0/"),
("2025-06-23T19:01:04", "2025-06-23T20:21:38", "local_data/simulations_data/1k_5s_1KB/v0.36.0-rc.0/"),
("2025-06-23T20:22:57", "2025-06-23T22:32:38", "local_data/simulations_data/1k_10s_1KB/v0.36.0-rc.0/"),
("2025-06-23T22:35:15", "2025-06-23T23:09:16", "local_data/simulations_data/2k_1s_1KB/v0.36.0-rc.0/"),
("2025-06-23T23:08:20", "2025-06-24T00:21:30", "local_data/simulations_data/2k_5s_1KB/v0.36.0-rc.0/"),
("2025-06-24T00:19:33", "2025-06-24T02:29:44", "local_data/simulations_data/2k_10s_1KB/v0.36.0-rc.0/"),
("2025-06-24T03:22:18", "2025-06-24T04:23:57", "local_data/simulations_data/3k_1s_1KB/v0.36.0-rc.0/"),
("2025-06-24T05:33:01", "2025-06-24T06:45:14", "local_data/simulations_data/3k_5s_1KB/v0.36.0-rc.0/"),
("2025-06-24T15:33:51", "2025-06-24T16:14:10", "local_data/simulations_data/3k_10s_1KB/v0.36.0-rc.0/"),
("2025-06-23T18:36:44", "2025-06-23T18:59:09", "local_data/simulations_data/1k_1s_1KB/v0.36.0-rc.0/", 1000),
("2025-06-23T19:01:04", "2025-06-23T20:21:38", "local_data/simulations_data/1k_5s_1KB/v0.36.0-rc.0/", 1000),
("2025-06-23T20:22:57", "2025-06-23T22:32:38", "local_data/simulations_data/1k_10s_1KB/v0.36.0-rc.0/", 1000),
("2025-06-23T22:35:15", "2025-06-23T23:09:16", "local_data/simulations_data/2k_1s_1KB/v0.36.0-rc.0/", 2000),
("2025-06-23T23:08:20", "2025-06-24T00:21:30", "local_data/simulations_data/2k_5s_1KB/v0.36.0-rc.0/", 2000),
("2025-06-24T00:19:33", "2025-06-24T02:29:44", "local_data/simulations_data/2k_10s_1KB/v0.36.0-rc.0/", 2000),
("2025-06-25T09:48:46", "2025-06-25T10:19:22", "local_data/simulations_data/3k_1s_1KB/v0.36.0-rc.0/", 3000),
("2025-06-25T10:21:04", "2025-06-25T11:31:33", "local_data/simulations_data/3k_5s_1KB/v0.36.0-rc.0/", 3000),
("2025-06-25T11:32:09", "2025-06-25T13:35:01", "local_data/simulations_data/3k_10s_1KB/v0.36.0-rc.0/", 3000),
]
for start, end, path in data:
for start, end, path, num_nodes in data:
if not os.path.exists(os.path.join(path, "summary")):
print("data summary DNE. create it.")
stack["start_time"] = start
stack["end_time"] = end
print(f"gen: {start} {end} {path}")
stack["nodes_per_statefulset"] = [num_nodes]
print(f"gen: [{start}, {end}] {path}")
log_analyzer = WakuAnalyzer(dump_analysis_dir=path,
**stack)
log_analyzer.analyze_reliability(n_jobs=6)

View File

@@ -8,65 +8,95 @@ data = {
"1k_1s_1KB": {
"wide": ("2025-06-23 18:36:44", "2025-06-23 18:59:09"),
"narrow": ("2025-06-23 18:45:00", "2025-06-23 18:53:00"),
"nodes": 1000,
},
"1k_5s_1KB": {
"wide": ("2025-06-23 19:01:04", "2025-06-23 20:21:38"),
"narrow": ("2025-06-23 19:26:52", "2025-06-23 20:13:42"),
"nodes": 1000,
},
"1k_10s_1KB": {
"wide": ("2025-06-23 20:22:57", "2025-06-23 22:32:38"),
"narrow": ("2025-06-23 20:42:03", "2025-06-23 22:16:55"),
"nodes": 1000,
},
"2k_1s_1KB": {
"wide": ("2025-06-23 22:35:15", "2025-06-23 23:09:16"),
"narrow": ("2025-06-23 22:52:44", "2025-06-23 23:00:56"),
"nodes": 2000,
},
"2k_5s_1KB": {
"wide": ("2025-06-23 23:08:20", "2025-06-24 00:21:30"),
"narrow": ("2025-06-23 23:24:37", "2025-06-24 00:12:46"),
"nodes": 2000,
},
"2k_10s_1KB": {
"wide": ("2025-06-24 00:19:33", "2025-06-24 02:29:44"),
"narrow": ("2025-06-24 00:37:05", "2025-06-24 02:12:57"),
"nodes": 2000,
},
"3k_1s_1KB": { # questionable.
"wide": ("2025-06-24 03:22:18", "2025-06-24 04:23:57"),
"narrow": ("2025-06-24 03:47:49", "2025-06-24 03:55:46"),
# "3k_1s_1KB": { # questionable.
# "wide": ("2025-06-24 03:22:18", "2025-06-24 04:23:57"),
# "narrow": ("2025-06-24 03:47:49", "2025-06-24 03:55:46"),
# "nodes": 3000,
# },
# "3k_5s_1KB": {
# "wide": ("2025-06-24 05:33:01", "2025-06-24 06:45:14"),
# "narrow": ("2025-06-24 05:48:38", "2025-06-24 06:38:17"),
# "nodes": 3000,
# },
# "3k_10s_1KB": {
# "wide": ("2025-06-24 15:33:51", "2025-06-24 16:14:10"),
# "narrow": ("2025-06-24 15:51:12", "2025-06-24 15:58:26"),
# "nodes": 3000,
# },
# 2025_06_24-25
"3k_1s_1KB": {
"wide": ("2025-06-25 09:48:46", "2025-06-25 10:19:22"),
"narrow": ("2025-06-25 10:03:43", "2025-06-25 10:12:51"),
"nodes": 3000,
},
"3k_5s_1KB": {
"wide": ("2025-06-24 05:33:01", "2025-06-24 06:45:14"),
"narrow": ("2025-06-24 05:48:38", "2025-06-24 06:38:17"),
"wide": ("2025-06-25 10:21:04", "2025-06-25 11:31:33"),
"narrow": ("2025-06-25 10:37:31", "2025-06-25 11:24:13"),
"nodes": 3000,
},
"3k_10s_1KB": {
"wide": ("2025-06-24 15:33:51", "2025-06-24 16:14:10"),
"narrow": ("2025-06-24 15:51:12", "2025-06-24 15:58:26"),
"wide": ("2025-06-25 11:32:09", "2025-06-25 13:35:01"),
"narrow": ("2025-06-25 11:48:15", "2025-06-25 13:25:59"),
"nodes": 3000,
},
}
}
def subtract_hours(time : str, hours=2) -> str:
def subtract_hours(time: str, hours=4) -> str:
dt = datetime.strptime(time, "%Y-%m-%d %H:%M:%S")
new_dt = dt - timedelta(hours=hours)
return new_dt.strftime("%Y-%m-%d %H:%M:%S")
def scrape_yaml_line(version, experiment, time_interval):
base_dump_scrape = "test/nwaku/" # Must match your `scrape.yaml`
base_dump_scrape = "test/nwaku/" # Must match your `scrape.yaml`
start, end = time_interval
start = subtract_hours(start)
end = subtract_hours(end)
# example output: - [ "2025-06-24 05:48:38", "2025-06-24 06:38:17", "3K-5mgs-s-1KB" ]
return f"- [ \"{start}\", \"{end}\", \"{experiment}\" ]"
return f'- [ "{start}", "{end}", "{experiment}" ]'
def log_analysis_line(version, experiment, time_interval):
def log_analysis_line(version, experiment, time_interval, num_nodes):
def transform_date(text):
return re.sub(r'(\d{4}-\d{2}-\d{2}) (\d{2}:\d{2}:\d{2})', r'\1T\2', text)
return re.sub(r"(\d{4}-\d{2}-\d{2}) (\d{2}:\d{2}:\d{2})", r"\1T\2", text)
base_dump_log_analysis = "local_data/simulations_data/"
start_time, end_time = time_interval
start_time = transform_date(start_time)
end_time = transform_date(end_time)
# example output: ("2025-06-23T18:36:44", "2025-06-23T18:59:09", "v0.36.0-rc.0", "1k_1s_1KB"),
# example output: ("2025-06-23T18:36:44", "2025-06-23T18:59:09", "v0.36.0-rc.0", "1k_1s_1KB", 1000),
path = os.path.join(base_dump_log_analysis, experiment, version)
return f"(\"{start_time}\", \"{end_time}\", \"{path}/\"),"
return f'("{start_time}", "{end_time}", "{path}/", {num_nodes}),'
def main():
@@ -74,8 +104,12 @@ def main():
scrape_yaml = []
for version, version_dict in data.items():
for experiment, experiment_dict in version_dict.items():
example_log_analysis.append( log_analysis_line( version, experiment, experiment_dict["wide"] ) )
scrape_yaml.append( scrape_yaml_line( version, experiment, experiment_dict["narrow"]) )
example_log_analysis.append(
log_analysis_line(
version, experiment, experiment_dict["wide"], experiment_dict["nodes"]
)
)
scrape_yaml.append(scrape_yaml_line(version, experiment, experiment_dict["narrow"]))
for line in example_log_analysis:
print(line)
@@ -87,4 +121,4 @@ def main():
if __name__ == "__main__":
main()
main()

View File

@@ -1,5 +1,9 @@
import logging
logging.basicConfig(level=logging.DEBUG, force=True)
# Python Imports
# Project Imports
import src.logger.logger
from src.metrics.scrapper import Scrapper
@@ -9,10 +13,10 @@ from src.utils import file_utils
def main():
url = "https://metrics.riff.cc/select/0/prometheus/api/v1/"
url = "https://metrics.vaclab.org/select/0/prometheus/api/v1/"
scrape_config = "scrape.yaml"
scrapper = Scrapper("rubi.yaml", url, scrape_config)
scrapper = Scrapper("~/vac/configs/sapphire.yaml", url, scrape_config)
scrapper.query_and_dump_metrics()
config_dict = file_utils.read_yaml_file("scrape.yaml")

View File

@@ -1,14 +1,16 @@
general_config:
times_names:
- [ "2025-06-23 16:45:00", "2025-06-23 16:53:00", "1k_1s_1KB" ]
- [ "2025-06-23 17:26:52", "2025-06-23 18:13:42", "1k_5s_1KB" ]
- [ "2025-06-23 18:42:03", "2025-06-23 20:16:55", "1k_10s_1KB" ]
- [ "2025-06-23 20:52:44", "2025-06-23 21:00:56", "2k_1s_1KB" ]
- [ "2025-06-23 21:24:37", "2025-06-23 22:12:46", "2k_5s_1KB" ]
- [ "2025-06-23 22:37:05", "2025-06-24 00:12:57", "2k_10s_1KB" ]
- [ "2025-06-24 01:47:49", "2025-06-24 01:55:46", "3k_1s_1KB" ]
- [ "2025-06-24 03:48:38", "2025-06-24 04:38:17", "3k_5s_1KB" ]
- [ "2025-06-24 13:51:12", "2025-06-24 13:58:26", "3k_10s_1KB" ]
- [ "2025-06-23 14:45:00", "2025-06-23 14:53:00", "1k_1s_1KB" ]
- [ "2025-06-23 15:26:52", "2025-06-23 16:13:42", "1k_5s_1KB" ]
- [ "2025-06-23 16:42:03", "2025-06-23 18:16:55", "1k_10s_1KB" ]
- [ "2025-06-23 18:52:44", "2025-06-23 19:00:56", "2k_1s_1KB" ]
- [ "2025-06-23 19:24:37", "2025-06-23 20:12:46", "2k_5s_1KB" ]
- [ "2025-06-23 20:37:05", "2025-06-23 22:12:57", "2k_10s_1KB" ]
- [ "2025-06-25 06:03:43", "2025-06-25 06:12:51", "3k_1s_1KB" ]
- [ "2025-06-25 06:37:31", "2025-06-25 07:24:13", "3k_5s_1KB" ]
- [ "2025-06-25 07:48:15", "2025-06-25 09:25:59", "3k_10s_1KB" ]
scrape_config:
$__rate_interval: "121s"
step: "60s"
@@ -22,14 +24,27 @@ metrics_to_scrape:
# query: "libp2p_open_streams"
# extract_field: "instance-type-dir"
# folder_name: "libp2p-open-streams/"
libp2p_network_in:
query: "rate(libp2p_network_bytes_total{direction='in', namespace='zerotesting'}[$__rate_interval])"
query: "rate(libp2p_network_bytes_total{direction='in'}[$__rate_interval])"
# query: "rate(libp2p_network_bytes_total{direction='in', namespace='zerotesting'}[$__rate_interval])"
extract_field: "instance"
folder_name: "libp2p-in/"
libp2p_network_out:
query: "rate(libp2p_network_bytes_total{direction='out', namespace='zerotesting'}[$__rate_interval])"
extract_field: "instance"
folder_name: "libp2p-out/"
discv5_network_in:
query: "rate(discv5_network_bytes_total{direction='in', namespace='zerotesting'}[$__rate_interval])"
extract_field: "instance"
folder_name: "discv5-in/"
discv5_network_out:
query: "rate(discv5_network_bytes_total{direction='out', namespace='zerotesting'}[$__rate_interval])"
extract_field: "instance"
folder_name: "discv5-out/"
# container_recv_bytes:
# query: "rate(container_network_receive_bytes_total{namespace='zerotesting'}[$__rate_interval])"
# extract_field: "pod-node"
@@ -55,7 +70,7 @@ metrics_to_scrape:
# extract_field: "pod-node"
# folder_name: "nim-gc-memory/"
plotting:
"bandwidth-0-33-3K":
"bandwidth-0-36-3K":
"ignore_columns": ["bootstrap", "midstrap"]
"data_points": 25
"folder":
@@ -63,10 +78,48 @@ plotting:
"data":
- "libp2p-in"
- "libp2p-out"
# "include_files":
# - "3K-1mgs-s-1KB"
# - "3K-1mgs-5s-1KB"
# - "3K-1mgs-10s-1KB"
"include_files":
- 3k_1s_1KB
- 3k_5s_1KB
- 3k_10s_1KB
"xlabel_name": "Simulation"
"ylabel_name": "KBytes/s"
"show_min_max": false
"outliers": true
"scale-x": 1000
"fig_size": [20, 20]
"bandwidth-0-36-2K":
"ignore_columns": ["bootstrap", "midstrap"]
"data_points": 25
"folder":
- "test/nwaku/v0.36.0-rc.0/"
"data":
- "libp2p-in"
- "libp2p-out"
"include_files":
- 2k_1s_1KB
- 2k_5s_1KB
- 2k_10s_1KB
"xlabel_name": "Simulation"
"ylabel_name": "KBytes/s"
"show_min_max": false
"outliers": true
"scale-x": 1000
"fig_size": [20, 20]
"bandwidth-0-36-1K":
"ignore_columns": ["bootstrap", "midstrap"]
"data_points": 25
"folder":
- "test/nwaku/v0.36.0-rc.0/"
"data":
- "libp2p-in"
- "libp2p-out"
"include_files":
- 1k_1s_1KB
- 1k_5s_1KB
- 1k_10s_1KB
"xlabel_name": "Simulation"
"ylabel_name": "KBytes/s"
"show_min_max": false

View File

@@ -12,6 +12,18 @@ from src.data.data_handler import DataHandler
logger = logging.getLogger(__name__)
def sort_by_file_list(data: List[Path], file_list: Optional[List[str]]) -> List[Path]:
if not file_list:
return sorted(data)
order = {name: index for index, name in enumerate(file_list)}
sorted_data = sorted(
data,
key=lambda path: order.get(path.name, len(file_list))
)
return sorted_data
class DataFileHandler(DataHandler):
def __init__(self, ignore_columns: Optional[List] = None, include_files: Optional[List] = None):
@@ -22,8 +34,10 @@ class DataFileHandler(DataHandler):
for folder in folders:
folder_path = Path(folder)
folder_df = pd.DataFrame()
match file_utils.get_files_from_folder_path(folder_path, self._include_files):
match file_utils.get_files_from_folder_path(folder_path):
case Ok(data_files_names):
if self._include_files:
data_files_names = reversed([name for name in self._include_files if name in data_files_names])
folder_df = self._concat_files_as_mean(folder_df, data_files_names, folder_path,
points)
folder_df["class"] = f"{folder_path.parent.name}/{folder_path.name}"