Add price tier to LLM model metadata and registry

Introduces a 'priceTier' attribute (1=cheapest, 2=medium, 3=expensive) to LlmModel in the database schema, model metadata, and registry logic. Updates migrations and seed data to support price tier for LLM models, enabling cost-based filtering and selection in the LLM Picker UI.
This commit is contained in:
Bentlybro
2026-01-22 11:52:37 +00:00
parent 8e3aabd558
commit 9496b33a1c
5 changed files with 117 additions and 79 deletions

View File

@@ -1,11 +1,25 @@
"""Type definitions for LLM model metadata."""
from typing import NamedTuple
from typing import Literal, NamedTuple
class ModelMetadata(NamedTuple):
"""Metadata for an LLM model."""
"""Metadata for an LLM model.
Attributes:
provider: The provider identifier (e.g., "openai", "anthropic")
context_window: Maximum context window size in tokens
max_output_tokens: Maximum output tokens (None if unlimited)
display_name: Human-readable name for the model
provider_name: Human-readable provider name (e.g., "OpenAI", "Anthropic")
creator_name: Name of the organization that created the model
price_tier: Relative cost tier (1=cheapest, 2=medium, 3=expensive)
"""
provider: str
context_window: int
max_output_tokens: int | None
display_name: str
provider_name: str
creator_name: str
price_tier: Literal[1, 2, 3]

View File

@@ -136,10 +136,26 @@ async def refresh_llm_registry() -> None:
provider_name = (
record.Provider.name if record.Provider else record.providerId
)
provider_display_name = (
record.Provider.displayName if record.Provider else record.providerId
)
# Creator name: prefer Creator.name, fallback to provider display name
creator_name = (
record.Creator.name if record.Creator else provider_display_name
)
# Price tier: default to 1 (cheapest) if not set
price_tier = getattr(record, "priceTier", 1) or 1
# Clamp to valid range 1-3
price_tier = max(1, min(3, price_tier))
metadata = ModelMetadata(
provider=provider_name,
context_window=record.contextWindow,
max_output_tokens=record.maxOutputTokens,
display_name=record.displayName,
provider_name=provider_display_name,
creator_name=creator_name,
price_tier=price_tier, # type: ignore[arg-type]
)
costs = tuple(
RegistryModelCost(

View File

@@ -16,10 +16,11 @@ VALUES
ON CONFLICT ("name") DO NOTHING;
-- Insert Models (using CTEs to reference provider IDs)
-- priceTier: 1=cheapest, 2=medium, 3=expensive (based on original MODEL_METADATA)
WITH provider_ids AS (
SELECT "id", "name" FROM "LlmProvider"
)
INSERT INTO "LlmModel" ("id", "slug", "displayName", "description", "providerId", "contextWindow", "maxOutputTokens", "isEnabled", "capabilities", "metadata")
INSERT INTO "LlmModel" ("id", "slug", "displayName", "description", "providerId", "contextWindow", "maxOutputTokens", "priceTier", "isEnabled", "capabilities", "metadata")
SELECT
gen_random_uuid(),
model_slug,
@@ -28,93 +29,94 @@ SELECT
p."id",
context_window,
max_output_tokens,
price_tier,
true,
'{}'::jsonb,
'{}'::jsonb
FROM (VALUES
-- OpenAI models
('o3', 'O3', 'openai', 200000, 100000),
('o3-mini', 'O3 Mini', 'openai', 200000, 100000),
('o1', 'O1', 'openai', 200000, 100000),
('o1-mini', 'O1 Mini', 'openai', 128000, 65536),
('gpt-5-2025-08-07', 'GPT 5', 'openai', 400000, 128000),
('gpt-5.1-2025-11-13', 'GPT 5.1', 'openai', 400000, 128000),
('gpt-5-mini-2025-08-07', 'GPT 5 Mini', 'openai', 400000, 128000),
('gpt-5-nano-2025-08-07', 'GPT 5 Nano', 'openai', 400000, 128000),
('gpt-5-chat-latest', 'GPT 5 Chat', 'openai', 400000, 16384),
('gpt-4.1-2025-04-14', 'GPT 4.1', 'openai', 1047576, 32768),
('gpt-4.1-mini-2025-04-14', 'GPT 4.1 Mini', 'openai', 1047576, 32768),
('gpt-4o-mini', 'GPT 4o Mini', 'openai', 128000, 16384),
('gpt-4o', 'GPT 4o', 'openai', 128000, 16384),
('gpt-4-turbo', 'GPT 4 Turbo', 'openai', 128000, 4096),
('gpt-3.5-turbo', 'GPT 3.5 Turbo', 'openai', 16385, 4096),
-- OpenAI models (slug, display_name, provider, context_window, max_output, price_tier)
('o3', 'O3', 'openai', 200000, 100000, 2),
('o3-mini', 'O3 Mini', 'openai', 200000, 100000, 1),
('o1', 'O1', 'openai', 200000, 100000, 3),
('o1-mini', 'O1 Mini', 'openai', 128000, 65536, 2),
('gpt-5-2025-08-07', 'GPT 5', 'openai', 400000, 128000, 1),
('gpt-5.1-2025-11-13', 'GPT 5.1', 'openai', 400000, 128000, 2),
('gpt-5-mini-2025-08-07', 'GPT 5 Mini', 'openai', 400000, 128000, 1),
('gpt-5-nano-2025-08-07', 'GPT 5 Nano', 'openai', 400000, 128000, 1),
('gpt-5-chat-latest', 'GPT 5 Chat', 'openai', 400000, 16384, 2),
('gpt-4.1-2025-04-14', 'GPT 4.1', 'openai', 1047576, 32768, 1),
('gpt-4.1-mini-2025-04-14', 'GPT 4.1 Mini', 'openai', 1047576, 32768, 1),
('gpt-4o-mini', 'GPT 4o Mini', 'openai', 128000, 16384, 1),
('gpt-4o', 'GPT 4o', 'openai', 128000, 16384, 2),
('gpt-4-turbo', 'GPT 4 Turbo', 'openai', 128000, 4096, 3),
('gpt-3.5-turbo', 'GPT 3.5 Turbo', 'openai', 16385, 4096, 1),
-- Anthropic models
('claude-opus-4-1-20250805', 'Claude 4.1 Opus', 'anthropic', 200000, 32000),
('claude-opus-4-20250514', 'Claude 4 Opus', 'anthropic', 200000, 32000),
('claude-sonnet-4-20250514', 'Claude 4 Sonnet', 'anthropic', 200000, 64000),
('claude-opus-4-5-20251101', 'Claude 4.5 Opus', 'anthropic', 200000, 64000),
('claude-sonnet-4-5-20250929', 'Claude 4.5 Sonnet', 'anthropic', 200000, 64000),
('claude-haiku-4-5-20251001', 'Claude 4.5 Haiku', 'anthropic', 200000, 64000),
('claude-3-7-sonnet-20250219', 'Claude 3.7 Sonnet', 'anthropic', 200000, 64000),
('claude-3-haiku-20240307', 'Claude 3 Haiku', 'anthropic', 200000, 4096),
('claude-opus-4-1-20250805', 'Claude 4.1 Opus', 'anthropic', 200000, 32000, 3),
('claude-opus-4-20250514', 'Claude 4 Opus', 'anthropic', 200000, 32000, 3),
('claude-sonnet-4-20250514', 'Claude 4 Sonnet', 'anthropic', 200000, 64000, 2),
('claude-opus-4-5-20251101', 'Claude 4.5 Opus', 'anthropic', 200000, 64000, 3),
('claude-sonnet-4-5-20250929', 'Claude 4.5 Sonnet', 'anthropic', 200000, 64000, 3),
('claude-haiku-4-5-20251001', 'Claude 4.5 Haiku', 'anthropic', 200000, 64000, 2),
('claude-3-7-sonnet-20250219', 'Claude 3.7 Sonnet', 'anthropic', 200000, 64000, 2),
('claude-3-haiku-20240307', 'Claude 3 Haiku', 'anthropic', 200000, 4096, 1),
-- AI/ML API models
('Qwen/Qwen2.5-72B-Instruct-Turbo', 'Qwen 2.5 72B', 'aiml_api', 32000, 8000),
('nvidia/llama-3.1-nemotron-70b-instruct', 'Llama 3.1 Nemotron 70B', 'aiml_api', 128000, 40000),
('meta-llama/Llama-3.3-70B-Instruct-Turbo', 'Llama 3.3 70B', 'aiml_api', 128000, NULL),
('meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo', 'Meta Llama 3.1 70B', 'aiml_api', 131000, 2000),
('meta-llama/Llama-3.2-3B-Instruct-Turbo', 'Llama 3.2 3B', 'aiml_api', 128000, NULL),
('Qwen/Qwen2.5-72B-Instruct-Turbo', 'Qwen 2.5 72B', 'aiml_api', 32000, 8000, 1),
('nvidia/llama-3.1-nemotron-70b-instruct', 'Llama 3.1 Nemotron 70B', 'aiml_api', 128000, 40000, 1),
('meta-llama/Llama-3.3-70B-Instruct-Turbo', 'Llama 3.3 70B', 'aiml_api', 128000, NULL, 1),
('meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo', 'Meta Llama 3.1 70B', 'aiml_api', 131000, 2000, 1),
('meta-llama/Llama-3.2-3B-Instruct-Turbo', 'Llama 3.2 3B', 'aiml_api', 128000, NULL, 1),
-- Groq models
('llama-3.3-70b-versatile', 'Llama 3.3 70B', 'groq', 128000, 32768),
('llama-3.1-8b-instant', 'Llama 3.1 8B', 'groq', 128000, 8192),
('llama-3.3-70b-versatile', 'Llama 3.3 70B', 'groq', 128000, 32768, 1),
('llama-3.1-8b-instant', 'Llama 3.1 8B', 'groq', 128000, 8192, 1),
-- Ollama models
('llama3.3', 'Llama 3.3', 'ollama', 8192, NULL),
('llama3.2', 'Llama 3.2', 'ollama', 8192, NULL),
('llama3', 'Llama 3', 'ollama', 8192, NULL),
('llama3.1:405b', 'Llama 3.1 405B', 'ollama', 8192, NULL),
('dolphin-mistral:latest', 'Dolphin Mistral', 'ollama', 32768, NULL),
('llama3.3', 'Llama 3.3', 'ollama', 8192, NULL, 1),
('llama3.2', 'Llama 3.2', 'ollama', 8192, NULL, 1),
('llama3', 'Llama 3', 'ollama', 8192, NULL, 1),
('llama3.1:405b', 'Llama 3.1 405B', 'ollama', 8192, NULL, 1),
('dolphin-mistral:latest', 'Dolphin Mistral', 'ollama', 32768, NULL, 1),
-- OpenRouter models
('google/gemini-2.5-pro-preview-03-25', 'Gemini 2.5 Pro', 'open_router', 1050000, 8192),
('google/gemini-3-pro-preview', 'Gemini 3 Pro Preview', 'open_router', 1048576, 65535),
('google/gemini-2.5-flash', 'Gemini 2.5 Flash', 'open_router', 1048576, 65535),
('google/gemini-2.0-flash-001', 'Gemini 2.0 Flash', 'open_router', 1048576, 8192),
('google/gemini-2.5-flash-lite-preview-06-17', 'Gemini 2.5 Flash Lite Preview', 'open_router', 1048576, 65535),
('google/gemini-2.0-flash-lite-001', 'Gemini 2.0 Flash Lite', 'open_router', 1048576, 8192),
('mistralai/mistral-nemo', 'Mistral Nemo', 'open_router', 128000, 4096),
('cohere/command-r-08-2024', 'Command R', 'open_router', 128000, 4096),
('cohere/command-r-plus-08-2024', 'Command R Plus', 'open_router', 128000, 4096),
('deepseek/deepseek-chat', 'DeepSeek Chat', 'open_router', 64000, 2048),
('deepseek/deepseek-r1-0528', 'DeepSeek R1', 'open_router', 163840, 163840),
('perplexity/sonar', 'Perplexity Sonar', 'open_router', 127000, 8000),
('perplexity/sonar-pro', 'Perplexity Sonar Pro', 'open_router', 200000, 8000),
('perplexity/sonar-deep-research', 'Perplexity Sonar Deep Research', 'open_router', 128000, 16000),
('nousresearch/hermes-3-llama-3.1-405b', 'Hermes 3 Llama 3.1 405B', 'open_router', 131000, 4096),
('nousresearch/hermes-3-llama-3.1-70b', 'Hermes 3 Llama 3.1 70B', 'open_router', 12288, 12288),
('openai/gpt-oss-120b', 'GPT OSS 120B', 'open_router', 131072, 131072),
('openai/gpt-oss-20b', 'GPT OSS 20B', 'open_router', 131072, 32768),
('amazon/nova-lite-v1', 'Amazon Nova Lite', 'open_router', 300000, 5120),
('amazon/nova-micro-v1', 'Amazon Nova Micro', 'open_router', 128000, 5120),
('amazon/nova-pro-v1', 'Amazon Nova Pro', 'open_router', 300000, 5120),
('microsoft/wizardlm-2-8x22b', 'WizardLM 2 8x22B', 'open_router', 65536, 4096),
('gryphe/mythomax-l2-13b', 'MythoMax L2 13B', 'open_router', 4096, 4096),
('meta-llama/llama-4-scout', 'Llama 4 Scout', 'open_router', 131072, 131072),
('meta-llama/llama-4-maverick', 'Llama 4 Maverick', 'open_router', 1048576, 1000000),
('x-ai/grok-4', 'Grok 4', 'open_router', 256000, 256000),
('x-ai/grok-4-fast', 'Grok 4 Fast', 'open_router', 2000000, 30000),
('x-ai/grok-4.1-fast', 'Grok 4.1 Fast', 'open_router', 2000000, 30000),
('x-ai/grok-code-fast-1', 'Grok Code Fast 1', 'open_router', 256000, 10000),
('moonshotai/kimi-k2', 'Kimi K2', 'open_router', 131000, 131000),
('qwen/qwen3-235b-a22b-thinking-2507', 'Qwen 3 235B Thinking', 'open_router', 262144, 262144),
('qwen/qwen3-coder', 'Qwen 3 Coder', 'open_router', 262144, 262144),
('google/gemini-2.5-pro-preview-03-25', 'Gemini 2.5 Pro', 'open_router', 1050000, 8192, 2),
('google/gemini-3-pro-preview', 'Gemini 3 Pro Preview', 'open_router', 1048576, 65535, 2),
('google/gemini-2.5-flash', 'Gemini 2.5 Flash', 'open_router', 1048576, 65535, 1),
('google/gemini-2.0-flash-001', 'Gemini 2.0 Flash', 'open_router', 1048576, 8192, 1),
('google/gemini-2.5-flash-lite-preview-06-17', 'Gemini 2.5 Flash Lite Preview', 'open_router', 1048576, 65535, 1),
('google/gemini-2.0-flash-lite-001', 'Gemini 2.0 Flash Lite', 'open_router', 1048576, 8192, 1),
('mistralai/mistral-nemo', 'Mistral Nemo', 'open_router', 128000, 4096, 1),
('cohere/command-r-08-2024', 'Command R', 'open_router', 128000, 4096, 1),
('cohere/command-r-plus-08-2024', 'Command R Plus', 'open_router', 128000, 4096, 2),
('deepseek/deepseek-chat', 'DeepSeek Chat', 'open_router', 64000, 2048, 1),
('deepseek/deepseek-r1-0528', 'DeepSeek R1', 'open_router', 163840, 163840, 1),
('perplexity/sonar', 'Perplexity Sonar', 'open_router', 127000, 8000, 1),
('perplexity/sonar-pro', 'Perplexity Sonar Pro', 'open_router', 200000, 8000, 2),
('perplexity/sonar-deep-research', 'Perplexity Sonar Deep Research', 'open_router', 128000, 16000, 3),
('nousresearch/hermes-3-llama-3.1-405b', 'Hermes 3 Llama 3.1 405B', 'open_router', 131000, 4096, 1),
('nousresearch/hermes-3-llama-3.1-70b', 'Hermes 3 Llama 3.1 70B', 'open_router', 12288, 12288, 1),
('openai/gpt-oss-120b', 'GPT OSS 120B', 'open_router', 131072, 131072, 1),
('openai/gpt-oss-20b', 'GPT OSS 20B', 'open_router', 131072, 32768, 1),
('amazon/nova-lite-v1', 'Amazon Nova Lite', 'open_router', 300000, 5120, 1),
('amazon/nova-micro-v1', 'Amazon Nova Micro', 'open_router', 128000, 5120, 1),
('amazon/nova-pro-v1', 'Amazon Nova Pro', 'open_router', 300000, 5120, 1),
('microsoft/wizardlm-2-8x22b', 'WizardLM 2 8x22B', 'open_router', 65536, 4096, 1),
('gryphe/mythomax-l2-13b', 'MythoMax L2 13B', 'open_router', 4096, 4096, 1),
('meta-llama/llama-4-scout', 'Llama 4 Scout', 'open_router', 131072, 131072, 1),
('meta-llama/llama-4-maverick', 'Llama 4 Maverick', 'open_router', 1048576, 1000000, 1),
('x-ai/grok-4', 'Grok 4', 'open_router', 256000, 256000, 3),
('x-ai/grok-4-fast', 'Grok 4 Fast', 'open_router', 2000000, 30000, 1),
('x-ai/grok-4.1-fast', 'Grok 4.1 Fast', 'open_router', 2000000, 30000, 1),
('x-ai/grok-code-fast-1', 'Grok Code Fast 1', 'open_router', 256000, 10000, 1),
('moonshotai/kimi-k2', 'Kimi K2', 'open_router', 131000, 131000, 1),
('qwen/qwen3-235b-a22b-thinking-2507', 'Qwen 3 235B Thinking', 'open_router', 262144, 262144, 1),
('qwen/qwen3-coder', 'Qwen 3 Coder', 'open_router', 262144, 262144, 3),
-- Llama API models
('Llama-4-Scout-17B-16E-Instruct-FP8', 'Llama 4 Scout', 'llama_api', 128000, 4028),
('Llama-4-Maverick-17B-128E-Instruct-FP8', 'Llama 4 Maverick', 'llama_api', 128000, 4028),
('Llama-3.3-8B-Instruct', 'Llama 3.3 8B', 'llama_api', 128000, 4028),
('Llama-3.3-70B-Instruct', 'Llama 3.3 70B', 'llama_api', 128000, 4028),
('Llama-4-Scout-17B-16E-Instruct-FP8', 'Llama 4 Scout', 'llama_api', 128000, 4028, 1),
('Llama-4-Maverick-17B-128E-Instruct-FP8', 'Llama 4 Maverick', 'llama_api', 128000, 4028, 1),
('Llama-3.3-8B-Instruct', 'Llama 3.3 8B', 'llama_api', 128000, 4028, 1),
('Llama-3.3-70B-Instruct', 'Llama 3.3 70B', 'llama_api', 128000, 4028, 1),
-- v0 models
('v0-1.5-md', 'v0 1.5 MD', 'v0', 128000, 64000),
('v0-1.5-lg', 'v0 1.5 LG', 'v0', 512000, 64000),
('v0-1.0-md', 'v0 1.0 MD', 'v0', 128000, 64000)
) AS models(model_slug, model_display_name, provider_name, context_window, max_output_tokens)
('v0-1.5-md', 'v0 1.5 MD', 'v0', 128000, 64000, 1),
('v0-1.5-lg', 'v0 1.5 LG', 'v0', 512000, 64000, 1),
('v0-1.0-md', 'v0 1.0 MD', 'v0', 128000, 64000, 1)
) AS models(model_slug, model_display_name, provider_name, context_window, max_output_tokens, price_tier)
JOIN provider_ids p ON p."name" = models.provider_name
ON CONFLICT ("slug") DO NOTHING;

View File

@@ -0,0 +1,5 @@
-- Add priceTier column to LlmModel table
-- This extends model metadata for the LLM Picker UI
-- priceTier: 1=cheapest, 2=medium, 3=expensive
ALTER TABLE "LlmModel" ADD COLUMN IF NOT EXISTS "priceTier" INTEGER NOT NULL DEFAULT 1;

View File

@@ -1168,6 +1168,7 @@ model LlmModel {
contextWindow Int
maxOutputTokens Int?
priceTier Int @default(1) // 1=cheapest, 2=medium, 3=expensive
isEnabled Boolean @default(true)
isRecommended Boolean @default(false)