Minor plot improvements (#41)

* Add more parameters to yaml configuration

* Refactor function to also show min and max values in boxplots
This commit is contained in:
Alberto Soutullo
2024-10-02 13:29:06 +02:00
committed by GitHub
parent ae5142f119
commit 6aefa6d91e
4 changed files with 59 additions and 30 deletions

View File

@@ -56,16 +56,20 @@ metrics_to_scrape:
# extract_field: "pod-node"
# folder_name: "nim-gc-memory/"
plotting:
"bandwidth":
"ignore": ["bootstrap", "midstrap"]
"data_points": 15
"bandwidth-0-33-3K":
"ignore_columns": ["bootstrap", "midstrap"]
"data_points": 25
"folder":
- "test/nwaku0.26-f/"
"data":
# - "libp2p-in"
# - "libp2p-out"
- "asd"
- "asd2"
"xlabel_name": "NºNodes-MsgRate"
- "libp2p-in"
- "libp2p-out"
"include_files":
- "3K-1mgs-s-1KB"
- "3K-1mgs-5s-1KB"
- "3K-1mgs-10s-1KB"
"xlabel_name": "Simulation"
"ylabel_name": "KBytes/s"
"show_min_max": false
"scale-x": 1000
"fig_size": [20, 20]

View File

@@ -14,11 +14,15 @@ logger = logging.getLogger(__name__)
class DataFileHandler(DataHandler):
def __init__(self, ignore_columns: Optional[List] = None, include_files: Optional[List] = None):
super().__init__(ignore_columns)
self._include_files = include_files
def concat_dataframes_from_folders_as_mean(self, folders: List, points: int):
for folder in folders:
folder_path = Path(folder)
folder_df = pd.DataFrame()
match file_utils.get_files_from_folder_path(folder_path):
match file_utils.get_files_from_folder_path(folder_path, self._include_files):
case Ok(data_files_names):
folder_df = self._concat_files_as_mean(folder_df, data_files_names, folder_path,
points)

View File

@@ -28,7 +28,7 @@ class Plotter:
def _create_plot(self, plot_name: str, plot_specs: Dict):
fig, axs = plt.subplots(nrows=1, ncols=len(plot_specs['data']), sharey='row',
figsize=(15, 15))
figsize=plot_specs['fig_size'])
subplot_paths_group = self._create_subplot_paths_group(plot_specs)
self._insert_data_in_axs(subplot_paths_group, axs, plot_specs)
@@ -36,7 +36,8 @@ class Plotter:
def _insert_data_in_axs(self, subplot_paths_group: List, axs: np.ndarray, plot_specs: Dict):
for i, subplot_path_group in enumerate(subplot_paths_group):
file_data_handler = DataFileHandler(plot_specs['ignore'])
include_files = plot_specs.get("include_files")
file_data_handler = DataFileHandler(plot_specs['ignore_columns'], include_files)
file_data_handler.concat_dataframes_from_folders_as_mean(subplot_path_group,
plot_specs['data_points'])
subplot_df = file_data_handler.dataframe
@@ -54,7 +55,7 @@ class Plotter:
subplot_title = plot_specs['data'][index]
axs = axs if type(axs) is not np.ndarray else axs[index]
box_plot = sns.boxplot(data=df, x="variable", y="value", hue="class", ax=axs,
showfliers=False)
showfliers=True)
# Apply the custom formatter to the x-axis ticks
formatter = ticker.FuncFormatter(lambda x, pos: '{:.0f}'.format(x / plot_specs['scale-x']))
@@ -66,7 +67,11 @@ class Plotter:
box_plot.xaxis.set_tick_params(rotation=45)
box_plot.legend(loc='upper right', bbox_to_anchor=(1, 1))
self._add_median_labels(box_plot)
self._add_stat_labels(box_plot)
show_min_max = plot_specs.get("show_min_max", False)
if show_min_max:
self._add_stat_labels(box_plot, value_type="min")
self._add_stat_labels(box_plot, value_type="max")
def _create_subplot_paths_group(self, plot_specs: Dict) -> List:
subplot_path = [[f"{folder}{data}" for folder in plot_specs["folder"]] for data in
@@ -74,29 +79,41 @@ class Plotter:
return subplot_path
def _add_median_labels(self, ax: plt.Axes, fmt: str = ".3f") -> None:
# https://stackoverflow.com/a/63295846
"""Add text labels to the median lines of a seaborn boxplot.
def _add_stat_labels(self, ax: plt.Axes, fmt: str = ".3f", value_type: str = "median") -> None:
# Refactor from https://stackoverflow.com/a/63295846
"""
Add text labels to the median, minimum, or maximum lines of a seaborn boxplot.
Args:
ax: plt.Axes, e.g. the return value of sns.boxplot()
fmt: format string for the median value
ax: plt.Axes, e.g., the return value of sns.boxplot()
fmt: Format string for the value (e.g., min/max/median).
value_type: The type of value to label. Can be 'median', 'min', or 'max'.
"""
lines = ax.get_lines()
boxes = [c for c in ax.get_children() if "Patch" in str(c)]
boxes = [c for c in ax.get_children() if "Patch" in str(c)] # Get box patches
start = 4
if not boxes: # seaborn v0.13 => fill=False => no patches => +1 line
if not boxes: # seaborn v0.13 or above (no patches => need to shift index)
boxes = [c for c in ax.get_lines() if len(c.get_xdata()) == 5]
start += 1
lines_per_box = len(lines) // len(boxes)
for median in lines[start::lines_per_box]:
x, y = (data.mean() for data in median.get_data())
if value_type == "median":
line_idx = start
elif value_type == "min":
line_idx = start - 2 # min line comes 2 positions before the median
elif value_type == "max":
line_idx = start - 1 # max line comes 1 position before the median
else:
raise ValueError("Invalid value_type. Must be 'min', 'max', or 'median'.")
for value_line in lines[line_idx::lines_per_box]:
x, y = (data.mean() for data in value_line.get_data())
# choose value depending on horizontal or vertical plot orientation
value = x if len(set(median.get_xdata())) == 1 else y
text = ax.text(x, y, f'{value/1000:{fmt}}', ha='center', va='center',
fontweight='bold', color='white')
# create median-colored border around white text for contrast
value = x if len(set(value_line.get_xdata())) == 1 else y
text = ax.text(x, y, f'{value / 1000:{fmt}}', ha='center', va='center',
fontweight='bold', color='white', size=10)
# create colored border around white text for contrast
text.set_path_effects([
path_effects.Stroke(linewidth=3, foreground=median.get_color()),
path_effects.Stroke(linewidth=3, foreground=value_line.get_color()),
path_effects.Normal(),
])

View File

@@ -3,7 +3,7 @@ import pandas as pd
import yaml
import logging
from pathlib import Path
from typing import List, Dict
from typing import List, Dict, Optional
from result import Result, Err, Ok
from src.utils import path_utils
@@ -22,14 +22,18 @@ def read_yaml_file(file_path: str) -> Dict:
return data
def get_files_from_folder_path(path: Path, extension: str = '*') -> Result[List[str], str]:
def get_files_from_folder_path(path: Path, include_files: Optional[List[str]] = None, extension: str = '*') \
-> Result[List[str], str]:
if not path.exists():
return Err(f"{path} does not exist.")
if not extension.startswith('*'):
extension = '*.' + extension
files = [p.name for p in path.glob(extension) if p.is_file()]
files = [
p.name for p in path.glob(extension)
if p.is_file() and (include_files is None or p.name in include_files)
]
logger.debug(f"Found {len(files)} files in {path}")
logger.debug(f"Files are: {files}")