Files
wakurtosis/run.sh
0xFugue 996b7323f4 dstats/host-proc analysis (#125)
* cli scaffolding for processings metrics

* pandas description: dstats

* added dstats columns

* added procfs columns

* prune procfs/dstats columns

* sanitize, convert dstats

* re-write dstats and size conversion

* faster Human2ByteConveter

* dstats violin plots

* dstats, all plots, cdf

* all plots, pdf

* added prefix option

* option for cdf/pdf plots

* add jordis propagation delay calc - partial

* dstats takes only one arg now

* refactor jordis module

* added file check to dstats

* make analysis classes singleton

* ProcFS read/processing

* refactoring; added Plot baseclass

* added TODO

* Plot, violin_plots_helper()

* added ContainerID

* procfs plots - v0

* add rpc/metrics and update config.json

* added settling time plots

* setting labels

* added InOctets/OutOctets

* procfs plots - v1

* MiBs

* TODO

* change perms for prometheus and grafana configs

* conditional import tomli/tomllib

* BLKR/BLKW in MiB

* grafana dashboard

* run.sh refactoring

* dstats is now a standalone module

* added dstats dir

* bug fix

* fast pid2node_name

* kurtosis version check

* dstats bug fix

* bug fix

* dstats analysis - v2

* container2node_name via kurtosis inspect

* added dstats to cleanup

* dstats - container2name - v3

* dstats - v3

* host-proc - v2

* delete old host-proc launcher

* host-proc - v2: inline cpu stats

* host proc v2 : analysis : cpu stats

* add kinspect to host-proc

* host-proc-helper.sh

* host-proc v4

* plots 4 total and avg metrics

* clusters: kmeans

* per container clusters

* panelling is now class-wide

* per container clusters

* take care of incomplete samples

* extract the maximal, complete sample set

* grouping is now done by Key + removed HWM

* host-proc bug fix

* uniformise run.sh conditionals

* bug fix run.sh

* --full-uuids

* remove kurtosis inspect altogether

* cleanup run.sh

* kurtosis run cleanup

* added container size

* host-proc-helper.py

* run.sh

* incorporated config.json

* run.sh

* analysis.py

* config.json .plotting.host-proc

* conditional histograms - v0

* host-proc-helper.py omit fields

* conditional histograms

* compare plots done

* config.json dstats/host-proc

* config.json dstats/host-proc

* dstats/host-proc plotting run.sh

* run.sh

* cmd_helper

* analysis requirements.txt

* monitoring/host-proc/analysis.py -> analysis-module/src/hproc.py

* run.sh

* make top_plot strict

* remove/modify prints

* cleanup the old dockers

* set default to compare plots

* hproc.py

* src/main.py

* Dockerfile main.py

* plot_compare_panel

* set common --out-prefix

* build.sh

* work around ifindex issues

* run.sh: synchronous sudo and supressed analysis output

* run.sh

* venv for host-proc

* cleanup container-proc outputs

* add run summary to plot titles

* delete monitoring/host-proc/analysis.py

* remove log.info()

* host-proc: profile only wakunodes with config

* col rename, output plot dir, medians

* CPU % fixed

* export all plots to wakurtosis_log

* setting defaults to config.json

* config.json: num_nodes >= fanout

* run.sh plot dirs

* MemUse to VmPeak/VmSize is now configurable

* install python3-venv

* K -> KiB; i -> fo in summary
2023-06-08 14:11:36 +05:30

227 lines
9.8 KiB
Bash
Executable File

#!/bin/sh
##################### SETUP & CLEANUP
if [ "$#" -eq 0 ]; then
echo "Error: Must select the measurement infra: cadvisor, dstats, host-proc, container-proc"
echo "Usage: sh ./run.sh <measurement_infra> [enclave_name] [config_file]"
exit 1
fi
# get the args
metrics_infra=${1:-"cadvisor"}
enclave_name=${2:-"wakurtosis"}
wakurtosis_config_file=${3:-"config.json"}
dir=$(pwd)
loglevel="error"
echo "- Metrics Infra: " $metrics_infra
echo "- Enclave name: " $enclave_name
echo "- Configuration file: " $wakurtosis_config_file
# cleanup previous runs
echo -e "\Cleaning up previous runs"
sh ./cleanup.sh $enclave_name
echo -e "\Done cleaning up previous runs"
# make sure the prometheus and grafana configs are readable
chmod a+r monitoring/prometheus.yml monitoring/configuration/config/grafana.ini ./monitoring/configuration/config/provisioning/dashboards/dashboard.yaml
##################### END
##################### GENNET
echo -e "\nRunning network generation"
docker run --name cgennet -v ${dir}/config/:/config:ro gennet --config-file /config/${wakurtosis_config_file} --traits-dir /config/traits
err=$?
if [ $err != 0 ]; then
echo "Gennet failed with error code $err"
exit
fi
# copy the network generated TODO: remove this extra copy
docker cp cgennet:/gennet/network_data ${dir}/config/topology_generated
docker rm cgennet > /dev/null 2>&1
##################### END
kurtosis_run="kurtosis_run.log"
kurtosis_inspect="kurtosis_inspect.log"
usr=`id -u`
grp=`id -g`
stats_dir=stats
signal_fifo=/tmp/hostproc-signal.fifo # do not create fifo under ./stats, or inside the repo
##################### MONITORING MODULE PROLOGUES
if [ "$metrics_infra" = "cadvisor" ]; then #CADVISOR
# prepare the enclave
echo "Preparing the enclave..."
kurtosis --cli-log-level $loglevel enclave add --name ${enclave_name}
enclave_prefix=$(kurtosis --cli-log-level $loglevel enclave inspect --full-uuids $enclave_name | grep UUID: | awk '{print $2}')
echo "Enclave network: "$enclave_prefix
# get the last IP of the enclave
subnet="$(docker network inspect $enclave_prefix | jq -r '.[].IPAM.Config[0].Subnet')"
echo "Enclave subnetork: $subnet"
last_ip="$(ipcalc $subnet | grep HostMax | awk '{print $2}')"
echo "cAdvisor IP: $last_ip"
# set up the cadvisor
docker run --volume=/:/rootfs:ro --volume=/var/run:/var/run:rw --volume=/var/lib/docker/:/var/lib/docker:ro --volume=/dev/disk/:/dev/disk:ro --volume=/sys:/sys:ro --volume=/etc/machine-id:/etc/machine-id:ro --publish=8080:8080 --detach=true --name=cadvisor --privileged --device=/dev/kmsg --network $enclave_prefix --ip=$last_ip gcr.io/cadvisor/cadvisor:v0.47.0
elif [ "$metrics_infra" = "dstats" ]; then # HOST-PROC
odir=./monitoring/dstats/$stats_dir
mkdir $odir
elif [ "$metrics_infra" = "host-proc" ]; then # HOST-PROC
odir=./monitoring/host-proc/$stats_dir
rclist=$odir/docker-rc-list.out
mkdir $odir
mkfifo $signal_fifo
chmod 0777 $signal_fifo
# get the sudo sorted out in the main thread itself
echo "host-proc: need sudo rights, please enter suitable credentials at the prompt"
sudo echo "host-proc: got the credentials, starting the host-proc helper" # dummy sudo cmd
sudo sh ./monitoring/host-proc/host-proc-helper.sh $rclist $odir $usr $grp $signal_fifo &
fi
##################### END
##################### KURTOSIS RUN
# Create the new enclave and run the simulation
jobs=$(cat config/${wakurtosis_config_file} | jq -r ".kurtosis.jobs")
echo -e "\nSetting up the enclave: $enclave_name"
kurtosis_cmd="kurtosis --cli-log-level \"$loglevel\" run --full-uuids --enclave ${enclave_name} . '{\"wakurtosis_config_file\" : \"config/${wakurtosis_config_file}\"}' --parallelism ${jobs} > $kurtosis_run 2>&1"
START=$(date +%s)
eval $kurtosis_cmd
END1=$(date +%s)
DIFF1=$(( $END1 - $START ))
echo -e "Enclave $enclave_name is up and running: took $DIFF1 secs to setup"
sed -n '/Starlark code successfully run. No output was returned./,$p' $kurtosis_run > $kurtosis_inspect
# Extract the WLS service name
wls_service_name=$(grep "\<wls\>" $kurtosis_inspect | awk '{print $1}')
echo "\n--> To see simulation logs run: kurtosis service logs $enclave_name $wls_service_name <--"
# Get the container prefix/suffix for the WLS service
wls_sname=$(grep $wls_service_name $kurtosis_inspect | awk '{print $2}')
wls_suuid=$(grep $wls_service_name $kurtosis_inspect | awk '{print $1}')
# Construct the fully qualified container name that kurtosis has created
wls_cid="$wls_sname--$wls_suuid"
#echo "The WLS_CID = $wls_cid"
##################### END
##################### MONITORING MODULE EPILOGUE: WLS SIGNALLING
if [ "$metrics_infra" = "cadvisor" ]; then
echo "cadvisor: signaling WLS"
docker exec $wls_cid touch /wls/start.signal
elif [ "$metrics_infra" = "dstats" ]; then
echo "Starting dstats measurements.."
# collect container/node mapping via kurtosis
kinspect=$odir/docker-kinspect.out
cp $kurtosis_inspect $kinspect
sh ./monitoring/dstats/dstats.sh $wls_cid $odir & # the process subtree takes care of itself
elif [ "$metrics_infra" = "host-proc" ]; then
echo "Starting host-proc measurements.."
kinspect=$odir/docker-kinspect.out
cp $kurtosis_inspect $kinspect
sh ./monitoring/host-proc/host-proc.sh $wls_cid $odir $signal_fifo &
elif [ "$metrics_infra" = "container-proc" ]; then
echo "Starting monitoring with probes in the containers"
# Start process level monitoring (in background, will wait to WSL to be created)
docker run \
-v /var/run/docker.sock:/var/run/docker.sock \
-v $(pwd)/monitoring/container-proc/:/cproc-mon/ \
-v $(pwd)/config/config.json:/cproc-mon/config/config.json \
container-proc:latest &
monitor_pid=$!
fi
##################### END
##################### GRAFANA
# Fetch the Grafana address & port
#grafana_host=$(kurtosis enclave inspect $enclave_name | grep "\<grafana\>" | awk '{print $6}')
grafana_host=$(grep "\<grafana\>" $kurtosis_inspect | awk '{print $6}')
echo -e "\n--> Statistics in Grafana server at http://$grafana_host/ <--"
echo "Output of kurtosis run command written in $kurtosis_run"
##################### END
##################### WAIT FOR THE WLS TO FINISH
# Wait for the container to halt; this will block
echo -e "Waiting for simulation to finish ..."
status_code="$(docker container wait $wls_cid)"
echo -e "Simulation ended with code $status_code Results in ./${enclave_name}_logs"
END2=$(date +%s)
DIFF2=$(( $END2 - $END1 ))
echo "Simulation took $DIFF1 + $DIFF2 = $(( $END2 - $START)) secs"
##################### END
##################### GATHER CONFIG, LOGS & METRICS
# give time for the messages to settle down before we collect the logs
sleep 60
# dump logs
echo "Dumping Kurtosis logs"
kurtosis enclave dump ${enclave_name} ${enclave_name}_logs > /dev/null 2>&1
cp $kurtosis_run $kurtosis_inspect ${enclave_name}_logs
# copy metrics data, config, network_data to the logs dir
cp -r ./config ${enclave_name}_logs
##################### MONITORING MODULE - COPY
if [ "$metrics_infra" = "dstats" ]; then
# unfortunately there is no way to introduce a race-free finish signalling
echo "dstats: copying the dstats data"
cp -r ./monitoring/dstats/stats ${enclave_name}_logs/dstats-data
elif [ "$metrics_infra" = "host-proc" ]; then
echo "Copying the host-proc data"
cp -r ./monitoring/host-proc/stats ${enclave_name}_logs/host-proc-data
elif [ "$metrics_infra" = "container-proc" ]; then
echo -e "Waiting monitoring to finish ..."
wait $monitor_pid
echo "Copying the container-proc measurements"
cp ./monitoring/container-proc/cproc_metrics.json "./${enclave_name}_logs/cproc_metrics.json" > /dev/null 2>&1
# \rm -r ./monitoring/container-proc/cproc_metrics.json > /dev/null 2>&1
fi
echo "- Metrics Infra: $metrics_infra" > ./${enclave_name}_logs/run_args
echo "- Enclave name: $enclave_name" >> ./${enclave_name}_logs/run_args
echo "- Configuration file: $wakurtosis_config_file" >> ./${enclave_name}_logs/run_args
# Copy simulation results
docker cp "$wls_cid:/wls/network_topology/network_data.json" "./${enclave_name}_logs"
docker cp "$wls_cid:/wls/messages.json" "./${enclave_name}_logs"
# Run analysis
if jq -e ."plotting" >/dev/null 2>&1 "./config/${wakurtosis_config_file}"; then
if [ "$metrics_infra" = "dstats" ]; then
docker run --name "dstats" --network "host" -v "$(pwd)/wakurtosis_logs:/simulation_data/" --add-host=host.docker.internal:host-gateway analysis src/hproc.py dstats /simulation_data/ --config-file /simulation_data/config/config.json >/dev/null 2>&1
docker cp dstats:/analysis/plots/ wakurtosis_logs/dstats-plots
cd wakurtosis_logs
ln -s dstats-plots/output-dstats-compare.pdf analysis.pdf
cd ..
elif [ "$metrics_infra" = "host-proc" ]; then
docker run --name "host-proc" --network "host" -v "$(pwd)/wakurtosis_logs:/simulation_data/" --add-host=host.docker.internal:host-gateway analysis src/hproc.py host-proc /simulation_data/ --config-file /simulation_data/config/config.json >/dev/null 2>&1
docker cp host-proc:/analysis/plots/ wakurtosis_logs/host-proc-plots
cd wakurtosis_logs
ln -s host-proc-plots/output-host-proc-compare.pdf analysis.pdf
cd ..
elif [ "$metrics_infra" = "container-proc" ]; then
docker run --network "host" -v "$(pwd)/wakurtosis_logs:/simulation_data/" --add-host=host.docker.internal:host-gateway analysis src/main.py -i container-proc >/dev/null 2>&1
elif [ "$metrics_infra" = "cadvisor" ]; then
prometheus_port=$(grep "\<prometheus\>" $kurtosis_inspect | awk '{print $6}' | awk -F':' '{print $2}')
docker run --network "host" -v "$(pwd)/wakurtosis_logs:/simulation_data/" --add-host=host.docker.internal:host-gateway analysis src/main.py -i cadvisor -p "$prometheus_port" >/dev/null 2>&1
fi
fi
echo "Done."
##################### END