Files
research/waku_scalability/cases.py
Farooq 93c710b4f7 waku latency/bandwidth theoretical analysis (#206)
* equations/discussion for latency, bandwidth and duplicates added. some equations corrected!

* some minor adjustments in formulas

* updated theoratical waku latency/bandwidth computations

* Update waku_scalability/waku_scaling.md

typo fix

Co-authored-by: Alberto Soutullo <alberto.soutullo639@gmail.com>

* corrected latency description

---------

Co-authored-by: Alberto Soutullo <alberto.soutullo639@gmail.com>
2025-10-07 15:53:45 +05:00

413 lines
12 KiB
Python

from abc import ABC, abstractmethod
import io
import math
from typing import Any, Callable, List
import numpy as np
from pydantic import BaseModel, Field, PositiveInt
from assumptions import (
a1,
a2,
a3,
a4,
a5,
a6,
a7,
a8,
a9,
a10,
a11,
a12,
a13,
a14,
a15,
a16,
a17,
a21,
a31,
a32,
a33,
a41,
a42,
a34,
a35,
a36,
a37,
a43,
)
from assumptions import (
message_size,
messages_sent_per_hour,
average_node_degree,
d_lazy,
mcache_gossip,
gossip_message_size,
avg_ratio_gossip_replys,
avg_nodes_per_shard,
avg_shards_per_node,
average_delay_per_hop,
ratio_of_big_messages,
idontwant_message_size,
big_message_size,
small_message_size,
idontwant_too_late,
average_peer_bandwidth,
)
from utils import load_color_fmt, magnitude_fmt, get_header, sizeof_fmt
def get_assumptions_str(xs) -> str:
with io.StringIO() as builder:
builder.write("Assumptions/Simplifications:\n")
for x in xs:
builder.write(f"{x}\n")
return builder.getvalue()
def usage_str(load_users_fn: Callable[[PositiveInt, Any], object], n_users: PositiveInt):
load = load_users_fn(
n_users,
)
return load_color_fmt(
load,
"For "
+ magnitude_fmt(n_users)
+ " users, receiving bandwidth is "
+ sizeof_fmt(load_users_fn(n_users))
+ "/hour",
)
def get_usages_str(load_users) -> str:
usages = [
usage_str(load_users, n_users)
for n_users in [
100,
100 * 100,
100 * 100 * 100,
]
]
return "\n".join(usages)
def latency_str(latency_users_fn, n_users, degree):
latency = latency_users_fn(n_users, degree)
return load_color_fmt(
latency,
"For "
+ magnitude_fmt(n_users)
+ " the maximum latency is "
+ ("%.3f" % latency_users_fn(n_users, degree))
+ " s",
)
def get_latency_str(latency_users):
latencies = [
latency_str(latency_users, n_users, average_node_degree)
for n_users in [
100,
100 * 100,
100 * 100 * 100,
]
]
with io.StringIO() as builder:
latencies_strs = "\n".join(latencies)
builder.write(f"{latencies_strs}\n")
return builder.getvalue()
def num_edges_dregular(num_nodes, degree):
# we assume and even d; d-regular graphs with both where both n and d are odd don't exist
assert (
num_nodes % 2 == 0 if isinstance(num_nodes, int) else all(n % 2 == 0 for n in num_nodes)
), f"Broken assumption: Expected num_nodes to be even. Instead n = {num_nodes}"
assert (
degree % 2 == 0 if isinstance(degree, int) else all(d % 2 == 0 for d in degree)
), f"Broken assumption: Expected degree should be even. Instead d = {degree}"
return num_nodes * (degree / 2)
def avg_node_distance_upper_bound(n_users, degree):
return math.log(n_users, degree)
# Cases Load Per Node
# -----------------------------------------------------------
class Case(ABC, BaseModel):
label: str = Field(description="String to use as label on plot.")
legend: str = Field(description="String to use in legend of plot.")
@abstractmethod
def load(self, n_users, **kargs) -> object:
pass
@property
@abstractmethod
def header(self) -> str:
pass
@property
@abstractmethod
def assumptions(self) -> List[str]:
pass
@property
def description(self) -> str:
return "\n".join(
[
"",
get_header(self.header),
get_assumptions_str(self.assumptions),
get_usages_str(self.load),
"",
"------------------------------------------------------------",
]
)
# Case 1 :: single shard, unique messages, store
class Case1(Case):
label: str = "case 1"
legend: str = "Case 1. top: 6-regular; store load (also: naive light node)"
def load(self, n_users, **kwargs):
return message_size * messages_sent_per_hour * n_users
@property
def header(self) -> str:
return "Load case 1 (store load; corresponds to received load per naive light node)"
@property
def assumptions(self) -> List[str]:
return [a1, a2, a3, a4, a7, a21]
# Case 2 :: single shard, (n*d)/2 messages
class Case2(Case):
label: str = "case 2"
legend: str = "Case 2. top: 6-regular; receive load per node, send delay to reduce duplicates"
def load(self, n_users, **kwargs):
return (
message_size * messages_sent_per_hour * num_edges_dregular(n_users, average_node_degree)
)
@property
def header(self) -> str:
return "Load case 2 (received load per node)"
@property
def assumptions(self):
return [a1, a2, a3, a4, a5, a7, a31]
# Case 3 :: single shard n*(d-1) messages
class Case3(Case):
label: str = "case 3"
legend: str = "Case 3. top: 6-regular; receive load per node, current operation"
def load(self, n_users, **kwargs):
return message_size * messages_sent_per_hour * (n_users * (average_node_degree - 1) +1)
@property
def header(self) -> str:
return "Load case 3 (received load per node)"
@property
def assumptions(self):
return [a1, a2, a3, a4, a6, a7, a31]
# Case 4:single shard n*(d-1) messages, gossip
class Case4(Case):
label: str = "case 4"
legend: str = "Case 4. top: 6-regular; receive load per node, current operation, incl. gossip"
def load(self, n_users, **kwargs):
messages_received_per_hour = (
messages_sent_per_hour * (n_users * (average_node_degree - 1) + 1)
) # see case 3
messages_load = message_size * messages_received_per_hour
num_ihave = messages_sent_per_hour * n_users * d_lazy * mcache_gossip # batched messages? n * heartbeat_count * (d-1)_batches * batch size?
ihave_load = num_ihave * gossip_message_size
gossip_response_load = (
num_ihave * message_size #computing receive load only, IWANT load not included
) * avg_ratio_gossip_replys # reply load contains both an IWANT (from requester to sender), and the actual wanted message (from sender to requester)
gossip_total = ihave_load + gossip_response_load
return messages_load + gossip_total
@property
def header(self) -> str:
return "Load case 4 (received load per node incl. gossip)"
@property
def assumptions(self):
return [a1, a2, a3, a4, a6, a7, a32, a33]
# Case 5:single shard n*(d-1) messages, IDONTWANT
class Case5(Case):
label: str = "case 5"
legend: str = (
"Case 5. top: 6-regular; receive load per node, incl. IDONTWANT without IHAVE/IWANT"
)
def load(self, n_users, **kwargs):
# Of all messages in the graph, the ratio of relayed messages from another node
# Derived from the fact that "per-node messages" = xd - x, where x = messages_sent_per_hour.
portion_not_originator = (average_node_degree - 1) / average_node_degree
# Of the messages a node sees, the ratio of seeing for the first time.
# Let d = average_node_degree
# For each `d` entrances to the node,
# we first see the message through edge 1
# then we see message from the other `d - 1` edges.
portion_seen_first = 1 / average_node_degree
# Start per-node calculations.
total_small_messages = (
messages_sent_per_hour
* average_node_degree
* (1 - ratio_of_big_messages)
* portion_not_originator
)
total_big_messages = (
messages_sent_per_hour
* average_node_degree
* ratio_of_big_messages
* portion_not_originator
)
num_big_seen_first = total_big_messages * portion_seen_first
# Number of messages (per node) which come after the first seen message of its type.
# In other words: count(2nd, 3rd, 4th... instance of a big message).
num_big_after = total_big_messages * (1 - portion_seen_first)
# Not all of the above messages come into existence (see `idontwant_too_late`).
num_big_seen_after = num_big_after * idontwant_too_late
# Factor in message sizes.
small_message_load = small_message_size * total_small_messages
big_message_load = big_message_size * (num_big_seen_first + num_big_seen_after)
# End of per-node calculations. Factor in `n_users`.
dontwant_load = n_users * num_big_seen_first * idontwant_message_size
messages_load = n_users * (big_message_load + small_message_load)
return messages_load + dontwant_load
@property
def header(self) -> str:
return "Load case 5 (received load per node with IDONTWANT messages)"
@property
def assumptions(self):
return [a1, a2, a3, a4, a6, a7, a16, a17, a34, a35, a36, a37]
# sharding case 1: multi shard, n*(d-1) messages, gossip
class ShardingCase1(Case):
label: str = "case 1"
legend: str = "Sharding case 1. sharding: top: 6-regular; receive load per node, incl gossip"
def load(self, n_users, **kwargs):
case_4 = Case4()
load_per_node_per_shard = case_4.load(np.minimum(n_users / 3, avg_nodes_per_shard))
return avg_shards_per_node * load_per_node_per_shard
@property
def header(self) -> str:
return "load sharding case 1 (received load per node incl. gossip)"
@property
def assumptions(self):
return [a1, a2, a3, a4, a6, a8, a9, a10, a11, a32, a33]
# sharding case 2: multi shard, n*(d-1) messages, gossip, 1:1 chat
class ShardingCase2(Case):
label: str = "case 2"
legend: str = (
"Sharding case 2. sharding: top: 6-regular; receive load per node, incl gossip and 1:1 chat"
)
def load(self, n_users, **kwargs):
case_4 = Case4()
load_per_node_per_shard = case_4.load(np.minimum(n_users / 3, avg_nodes_per_shard))
load_per_node_1to1_shard = case_4.load(np.minimum(n_users, avg_nodes_per_shard))
return (avg_shards_per_node * load_per_node_per_shard) + load_per_node_1to1_shard
@property
def header(self) -> str:
return "load sharding case 2 (received load per node incl. gossip and 1:1 chat)"
@property
def assumptions(self):
return [a1, a2, a3, a4, a6, a8, a9, a10, a11, a12, a13, a14, a32, a33]
# sharding case 3: multi shard, naive light node
class ShardingCase3(Case):
label: str = "case 3"
legend: str = "Sharding case 3. sharding: top: 6-regular; regular load for naive light node"
def load(self, n_users, **kwargs):
case_1 = Case1()
load_per_node_per_shard = case_1.load(np.minimum(n_users / 3, avg_nodes_per_shard))
return avg_shards_per_node * load_per_node_per_shard
@property
def header(self) -> str:
return "load sharding case 3 (received load naive light node.)"
@property
def assumptions(self):
return [a1, a2, a3, a4, a6, a8, a9, a10, a15, a32, a33]
# Cases average latency
# -----------------------------------------------------------
class LatencyCase1(Case):
label: str = "latency case 1"
legend: str = "Latency case 1. topology: 6-regular graph. No gossip"
def load(self, n_users, degree):
#ceil(log_d(n)) can provide closer approximation of longest path involved
longest_path = math.ceil(avg_node_distance_upper_bound(n_users, degree))
data_per_hour = n_users * messages_sent_per_hour * message_size
#on average, every peer make d/2 transmissions for each message
data_rate = (data_per_hour * (average_node_degree/2) * 8) / 3600 #Mbps
tx_time = longest_path * (data_rate / average_peer_bandwidth) #sec
propagation_time = longest_path * average_delay_per_hop #sec
return propagation_time + tx_time
@property
def header(self) -> str:
return (
"Latency case 1 :: Topology: 6-regular graph. No gossip (note: gossip would help here)"
)
@property
def description(self) -> str:
return "\n".join(
[
"",
get_header(self.header),
get_assumptions_str(self.assumptions),
get_latency_str(self.load),
"------------------------------------------------------------",
]
)
@property
def assumptions(self):
return [a3, a41, a42, a43]