Files
lollms_hub/useful_scripts/mbra_simulator.py
Saifeddine ALOUI 2d2bc728c0 feat: enhance system architecture, introduce space_hunter, and update admin interfaces
Summary of Changes:
- [INFRA] Added `space_hunter.py` to the project root for new system utilities.
- [CONFIG] Updated `.gitignore` to include `*.db-wal` files and exclude `docs/article` directory.
- [UI] Refreshed admin templates (`models_manager.html`, `settings.html`) to reflect new architectural capabilities.
- [CORE] Modified `architect_manager.py` to support image generation capabilities (`supports_images`).
- [CORE] Updated `memory_manager.py` to improve context formatting and category handling.
- [API] Implemented new route logic and rate limiting dependencies in `proxy.py` and `admin.py`.
- [MODELS] Updated `models.py` to support new fields and `migrations.py` to handle schema evolution.

Technical Notes:
- The `architect_manager` now manages assets including image nodes.
- Memory formatting has been streamlined to handle multiple titles per category.
- Admin settings and model management pages have been updated to visualize these new capabilities.

BREAKING CHANGE: None.
2026-04-17 19:22:34 +02:00

185 lines
7.6 KiB
Python

import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity
import time
# --- CONSTANTS & CONFIGURATION ---
EXPERIMENTS = 20 # Number of full simulation sessions
TRIALS = 1000 # Requests per session
KNOBS = {
"W_PRIORITY": 1.0,
"W_RELIABILITY": 3.0,
"W_ECOLOGY": 2.0,
"W_SEMANTIC": 4.0
}
PROMPTS = [
{"text": "Hello!", "c": 0.05, "tags": ["greeting"]},
{"text": "Debug this Rust memory leak.", "c": 0.9, "tags": ["code", "analysis"]},
{"text": "Summarize this medical case.", "c": 0.7, "tags": ["medical", "data"]},
{"text": "Calculate the orbital mechanics of Mars.", "c": 0.95, "tags": ["math", "analysis"]},
{"text": "What's the weather like?", "c": 0.1, "tags": ["greeting"]}
]
UNIQUE_TAGS = sorted(list(set([t for p in PROMPTS for t in p["tags"]])))
TAG_MAP = {tag: i for i, tag in enumerate(UNIQUE_TAGS)}
DIM = len(UNIQUE_TAGS)
class ServerNode:
def __init__(self, name, priority, success_rate, tps, sigma, profile_vec, rate_limit=5):
self.name = name
self.priority = priority
self.base_success_rate = success_rate
self.avg_tps = tps
self.sigma = sigma
self.profile_vec = np.array([profile_vec])
# State Tracking
self.alpha = 10.0
self.beta = 1.0
self.active_requests = 0
self.rate_limit = rate_limit
self.is_oom = False
self.oom_cooldown = 0
def sample_reliability(self):
return random.betavariate(self.alpha, self.beta)
def process_request(self, complexity):
# 1. Handle OOM Outages
if self.is_oom:
self.oom_cooldown -= 1
if self.oom_cooldown <= 0: self.is_oom = False
return False, 0, "NODE_OOM"
# 2. Simulate Random OOM Crash (Giants crash more on long prompts)
if self.sigma == 3 and complexity > 0.8 and random.random() < 0.05:
self.is_oom = True
self.oom_cooldown = 20 # Out for 20 trials
return False, 0, "CRASH_OOM"
# 3. Simulate Rate Limiting
if self.active_requests >= self.rate_limit:
return False, 0, "RATE_LIMIT_EXCEEDED"
# 4. Success Logic
self.active_requests += 1
success = random.random() < self.base_success_rate
# Jittered Latency
latency = max(0.1, (1.0 / self.avg_tps) + np.random.normal(0, 0.05))
self.active_requests -= 1
if not success: return False, 0, "NETWORK_TIMEOUT"
return True, self.avg_tps, "SUCCESS"
def run_simulation():
algos = ["RoundRobin", "FixedPriority", "ReliabilityOnly", "SB-MRA (Ours)"]
# Cumulative stats across all experiments
# {algo: { metric: [value_per_experiment] }}
stats_accumulator = {a: {"success": [], "waste": [], "dist": []} for a in algos}
print(f"🚀 Starting Monte Carlo Simulation: {EXPERIMENTS} sessions of {TRIALS} trials...")
for exp in range(EXPERIMENTS):
# RESET CLUSTER FOR EACH EXPERIMENT
def create_vec(active_tags):
v = [0.0] * DIM
for t in active_tags: v[TAG_MAP[t]] = 1.0
return v
nodes = [
ServerNode("Giant-GPU", priority=1, success_rate=0.99, tps=80, sigma=3, profile_vec=[0.5]*DIM, rate_limit=10),
ServerNode("Code-Specialist", priority=5, success_rate=0.95, tps=40, sigma=2, profile_vec=create_vec(['code', 'analysis'])),
ServerNode("Math-Specialist", priority=5, success_rate=0.96, tps=35, sigma=2, profile_vec=create_vec(['math'])),
ServerNode("Tiny-Llama", priority=10, success_rate=0.98, tps=120, sigma=1, profile_vec=create_vec(['greeting'])),
ServerNode("Unstable-Cloud", priority=2, success_rate=0.70, tps=100, sigma=3, profile_vec=[0.5]*DIM, rate_limit=2)
]
session_metrics = {a: {"success_count": 0, "total_waste": 0, "total_dist": 0} for a in algos}
for i in range(TRIALS):
prompt = random.choice(PROMPTS)
p_complexity = prompt["c"]
p_vec = np.array([create_vec(prompt["tags"])])
# Add Semantic Noise
p_vec += np.random.normal(0, 0.1, p_vec.shape)
p_vec = np.clip(p_vec, 0, 1)
for algo in algos:
chosen_node = None
if algo == "RoundRobin":
chosen_node = nodes[i % len(nodes)]
elif algo == "FixedPriority":
# Blindly pick Giant-GPU
chosen_node = nodes[0]
elif algo == "ReliabilityOnly":
chosen_node = max(nodes, key=lambda n: n.sample_reliability())
elif algo == "SB-MRA (Ours)":
scores = []
for n in nodes:
rel_p = (1.0 - n.sample_reliability()) * 100
epp = (n.sigma - (p_complexity * 3))**2 if n.sigma > (p_complexity * 3) else 0
dist = 1.0 - cosine_similarity(p_vec, n.profile_vec)[0][0]
s = (KNOBS["W_PRIORITY"] * n.priority) + \
(KNOBS["W_RELIABILITY"] * rel_p) + \
(KNOBS["W_ECOLOGY"] * epp * 10) + \
(KNOBS["W_SEMANTIC"] * dist * 100)
scores.append((s, n))
chosen_node = min(scores, key=lambda x: x[0])[1]
# EXECUTE
success, tps, err_code = chosen_node.process_request(p_complexity)
# UPDATE
if algo == "SB-MRA (Ours)":
# Fractional Bayesian Update
if success:
reward = min(1.0, tps / 80.0)
chosen_node.alpha = (chosen_node.alpha * 0.95) + reward
chosen_node.beta = (chosen_node.beta * 0.95) + (1.0 - reward)
else:
chosen_node.beta += 1.0 # Significant penalty for failure
# LOGGING
if success: metrics[algo]["success_count"] += 1
else: metrics[algo]["failures"][err_code] = metrics[algo]["failures"].get(err_code, 0) + 1
metrics[algo]["total_waste"] += max(0, chosen_node.sigma - (p_complexity * 3))
metrics[algo]["total_dist"] += 1.0 - cosine_similarity(p_vec, chosen_node.profile_vec)[0][0]
metrics[algo]["history"].append(metrics[algo]["success_count"] / (i + 1))
# LOGGING SESSION
session_metrics[algo]["success_count"] += 1 if success else 0
session_metrics[algo]["total_waste"] += max(0, chosen_node.sigma - (p_complexity * 3))
session_metrics[algo]["total_dist"] += 1.0 - cosine_similarity(p_vec, chosen_node.profile_vec)[0][0]
# Store results of this session
for a in algos:
stats_accumulator[a]["success"].append(session_metrics[a]["success_count"] / TRIALS)
stats_accumulator[a]["waste"].append(session_metrics[a]["total_waste"] / TRIALS)
stats_accumulator[a]["dist"].append(session_metrics[a]["total_dist"] / TRIALS)
# FINAL STATISTICAL REPORT
print("\n--- MONTE CARLO STATISTICAL ANALYSIS ---")
report_data = []
for a in algos:
report_data.append({
"Algorithm": a,
"Success (Mean)": f"{np.mean(stats_accumulator[a]['success'])*100:.2f}%",
"Success (Var)": f"{np.var(stats_accumulator[a]['success']):.6f}",
"Waste (Mean)": f"{np.mean(stats_accumulator[a]['waste']):.3f}",
"Waste (Var)": f"{np.var(stats_accumulator[a]['waste']):.6f}",
"Sem. Dist (Mean)": f"{np.mean(stats_accumulator[a]['dist']):.3f}"
})
print(pd.DataFrame(report_data).to_string(index=False))
if __name__ == "__main__":
run_simulation()