Merge branch 'dev' into feat/task-decomposition-copilot

This commit is contained in:
An Vy Le
2026-04-22 07:19:02 +02:00
committed by GitHub
5 changed files with 436 additions and 5 deletions

View File

@@ -13,6 +13,7 @@ from backend.blocks._base import (
BlockSchemaInput,
BlockSchemaOutput,
)
from backend.blocks.llm import extract_openrouter_cost
from backend.data.block import BlockInput
from backend.data.model import (
APIKeyCredentials,
@@ -239,12 +240,24 @@ class PerplexityBlock(Block):
if "message" in choice and "annotations" in choice["message"]:
annotations = choice["message"]["annotations"]
# Update execution stats
# Update execution stats. ``execution_stats`` is instance state,
# so always reset token counters — a response without ``usage``
# must not leak a previous run's tokens into ``PlatformCostLog``.
self.execution_stats.input_token_count = 0
self.execution_stats.output_token_count = 0
if response.usage:
self.execution_stats.input_token_count = response.usage.prompt_tokens
self.execution_stats.output_token_count = (
response.usage.completion_tokens
)
# OpenRouter's ``x-total-cost`` response header carries the real
# per-request USD cost. Piping it into ``provider_cost`` lets the
# direct-run ``PlatformCostLog`` flow
# (``executor.cost_tracking::log_system_credential_cost``) record
# the actual operator-side spend instead of inferring from tokens.
# Always overwrite — ``execution_stats`` is instance state, so a
# response without the header must not reuse a previous run's cost.
self.execution_stats.provider_cost = extract_openrouter_cost(response)
return {"response": response_content, "annotations": annotations or []}

View File

@@ -9,8 +9,32 @@ when Redis is unavailable to avoid blocking users.
Storing microdollars rather than tokens means the counter already reflects
real model pricing (including cache discounts and provider surcharges), so
this module carries no pricing table — the cost comes from OpenRouter's
``usage.cost`` field (baseline) or the Claude Agent SDK's reported total
cost (SDK path).
``usage.cost`` field (baseline), the Claude Agent SDK's reported total
cost (SDK path), web_search tool calls, and the prompt-simulation harness.
Boundary with the credit wallet
===============================
Microdollars (this module) and credits (``backend.data.block_cost_config``)
are intentionally separate budgets:
* **Credits** are the user-facing prepaid wallet. Every block invocation
that has a ``BlockCost`` entry decrements credits — this is what the
user buys, tops up, and sees on the billing page. Marketplace blocks
may also charge credits to block creators. The credit charge is a flat
per-run amount sourced from ``BLOCK_COSTS``. Copilot ``run_block``
calls go through this path too: block execution bills the user's
credit wallet, not this counter.
* **Microdollars** meter AutoGPT's **operator-side infrastructure cost**
for the copilot **LLM turn itself** — the real USD we spend on the
baseline model, Claude Agent SDK runs, the web_search tool, and the
prompt simulator. They gate the chat loop so a single user can't burn
the daily / weekly infra budget driving the chat regardless of their
credit balance. BYOK runs (user supplied their own API key) do **not**
decrement this counter — the user is paying the provider, not us.
A future option is to unify these into one wallet; until then the
boundary above is the contract.
"""
import asyncio

View File

@@ -26,7 +26,10 @@ _USER = "test-user-helpers"
_SESSION = "test-session-helpers"
def _make_block(block_id: str = "block-1", name: str = "TestBlock"):
def _make_block(
block_id: str = "block-1",
name: str = "TestBlock",
):
"""Create a minimal mock block for execute_block()."""
mock = MagicMock()
mock.id = block_id
@@ -205,6 +208,154 @@ class TestExecuteBlockCreditCharging:
assert result.success is True
# ---------------------------------------------------------------------------
# Unregistered block regression: blocks without BLOCK_COSTS entry still run
# ---------------------------------------------------------------------------
@pytest.mark.asyncio(loop_scope="session")
class TestUnregisteredBlockRunsFree:
"""Ensure blocks not listed in BLOCK_COSTS execute cleanly at zero cost.
A future refactor that accidentally turns an unregistered block into a
non-zero charge — or crashes when the BLOCK_COSTS lookup returns no
entry — would silently bill free blocks. ``block_usage_cost`` already
returns ``(0, {})`` for unregistered blocks; this test locks that
contract in at the copilot execution boundary.
"""
async def test_unregistered_block_runs_without_charge(self):
block = _make_block(block_id="unregistered-block", name="UnregisteredBlock")
credit_patch, mock_credit = _patch_credit_db()
with (
_patch_workspace(),
credit_patch,
):
result = await execute_block(
block=block,
block_id="unregistered-block",
input_data={},
user_id=_USER,
session_id=_SESSION,
node_exec_id="exec-unreg",
matched_credentials={},
dry_run=False,
)
assert isinstance(result, BlockOutputResponse)
assert result.success is True
# Zero-cost lookup must not touch either credit-wallet endpoint.
mock_credit.get_credits.assert_not_awaited()
mock_credit.spend_credits.assert_not_awaited()
# ---------------------------------------------------------------------------
# BLOCK_COSTS regression: newly-registered paid-API blocks must decrement credits
# ---------------------------------------------------------------------------
class TestNewlyRegisteredBlockCosts:
"""Regression coverage for the cost-tracking leak closure.
Every block listed here was missing from BLOCK_COSTS before this PR and
would silently no-op ``spend_credits`` when invoked via copilot
``run_block``. Adding a block id to this test locks in the credit wall
so a future refactor can't quietly drop the entry.
"""
def test_perplexity_block_registered(self):
from backend.blocks.perplexity import PerplexityBlock, PerplexityModel
from backend.data.block_cost_config import BLOCK_COSTS
assert PerplexityBlock in BLOCK_COSTS
entries = BLOCK_COSTS[PerplexityBlock]
# Pin model->cost mapping so swapped prices fail the regression test.
costs_by_model = {
entry.cost_filter["model"]: entry.cost_amount for entry in entries
}
assert costs_by_model == {
PerplexityModel.SONAR: 1,
PerplexityModel.SONAR_PRO: 5,
PerplexityModel.SONAR_DEEP_RESEARCH: 10,
}
def test_fact_checker_block_registered(self):
from backend.blocks.jina.fact_checker import FactCheckerBlock
from backend.data.block_cost_config import BLOCK_COSTS
assert FactCheckerBlock in BLOCK_COSTS
assert BLOCK_COSTS[FactCheckerBlock][0].cost_amount == 1
def test_mem0_blocks_registered(self):
from backend.blocks.mem0 import (
AddMemoryBlock,
GetAllMemoriesBlock,
GetLatestMemoryBlock,
SearchMemoryBlock,
)
from backend.data.block_cost_config import BLOCK_COSTS
for block_cls in (
AddMemoryBlock,
SearchMemoryBlock,
GetAllMemoriesBlock,
GetLatestMemoryBlock,
):
assert block_cls in BLOCK_COSTS, f"{block_cls.__name__} missing"
assert BLOCK_COSTS[block_cls][0].cost_amount == 1
def test_screenshotone_block_registered(self):
from backend.blocks.screenshotone import ScreenshotWebPageBlock
from backend.data.block_cost_config import BLOCK_COSTS
assert ScreenshotWebPageBlock in BLOCK_COSTS
assert BLOCK_COSTS[ScreenshotWebPageBlock][0].cost_amount == 2
def test_nvidia_deepfake_block_registered(self):
from backend.blocks.nvidia.deepfake import NvidiaDeepfakeDetectBlock
from backend.data.block_cost_config import BLOCK_COSTS
assert NvidiaDeepfakeDetectBlock in BLOCK_COSTS
assert BLOCK_COSTS[NvidiaDeepfakeDetectBlock][0].cost_amount == 2
def test_smartlead_blocks_registered(self):
from backend.blocks.smartlead.campaign import (
AddLeadToCampaignBlock,
CreateCampaignBlock,
SaveCampaignSequencesBlock,
)
from backend.data.block_cost_config import BLOCK_COSTS
assert BLOCK_COSTS[CreateCampaignBlock][0].cost_amount == 2
assert BLOCK_COSTS[AddLeadToCampaignBlock][0].cost_amount == 1
assert BLOCK_COSTS[SaveCampaignSequencesBlock][0].cost_amount == 1
def test_zerobounce_validate_block_registered(self):
from backend.blocks.zerobounce.validate_emails import ValidateEmailsBlock
from backend.data.block_cost_config import BLOCK_COSTS
assert ValidateEmailsBlock in BLOCK_COSTS
assert BLOCK_COSTS[ValidateEmailsBlock][0].cost_amount == 2
def test_claude_code_block_registered(self):
"""ClaudeCodeBlock spawns an E2B sandbox + runs Claude inside it.
Cost is dominated by the in-sandbox LLM spend ($0.50-$2/run typical),
not the sandbox compute itself. Flat 100 credits ($1.00) is the
conservative estimate until we wire the in-sandbox x-total-cost back
into NodeExecutionStats.provider_cost.
"""
from backend.blocks.claude_code import ClaudeCodeBlock
from backend.data.block_cost_config import BLOCK_COSTS
assert ClaudeCodeBlock in BLOCK_COSTS
assert BLOCK_COSTS[ClaudeCodeBlock][0].cost_amount == 100
# Filter keys on `e2b_credentials` (not `credentials`) — verifies the
# cost gate matches the block's actual input field name.
assert "e2b_credentials" in BLOCK_COSTS[ClaudeCodeBlock][0].cost_filter
# ---------------------------------------------------------------------------
# Type coercion tests
# ---------------------------------------------------------------------------

View File

@@ -12,6 +12,7 @@ from backend.blocks.ai_shortform_video_block import (
from backend.blocks.apollo.organization import SearchOrganizationsBlock
from backend.blocks.apollo.people import SearchPeopleBlock
from backend.blocks.apollo.person import GetPersonDetailBlock
from backend.blocks.claude_code import ClaudeCodeBlock
from backend.blocks.codex import CodeGenerationBlock, CodexModel
from backend.blocks.enrichlayer.linkedin import (
GetLinkedinProfileBlock,
@@ -22,6 +23,7 @@ from backend.blocks.enrichlayer.linkedin import (
from backend.blocks.flux_kontext import AIImageEditorBlock, FluxKontextModelName
from backend.blocks.ideogram import IdeogramModelBlock
from backend.blocks.jina.embeddings import JinaEmbeddingBlock
from backend.blocks.jina.fact_checker import FactCheckerBlock
from backend.blocks.jina.search import ExtractWebsiteContentBlock, SearchTheWebBlock
from backend.blocks.llm import (
MODEL_METADATA,
@@ -32,29 +34,50 @@ from backend.blocks.llm import (
AITextSummarizerBlock,
LlmModel,
)
from backend.blocks.mem0 import (
AddMemoryBlock,
GetAllMemoriesBlock,
GetLatestMemoryBlock,
SearchMemoryBlock,
)
from backend.blocks.nvidia.deepfake import NvidiaDeepfakeDetectBlock
from backend.blocks.orchestrator import OrchestratorBlock
from backend.blocks.perplexity import PerplexityBlock, PerplexityModel
from backend.blocks.replicate.flux_advanced import ReplicateFluxAdvancedModelBlock
from backend.blocks.replicate.replicate_block import ReplicateModelBlock
from backend.blocks.screenshotone import ScreenshotWebPageBlock
from backend.blocks.smartlead.campaign import (
AddLeadToCampaignBlock,
CreateCampaignBlock,
SaveCampaignSequencesBlock,
)
from backend.blocks.talking_head import CreateTalkingAvatarVideoBlock
from backend.blocks.text_to_speech_block import UnrealTextToSpeechBlock
from backend.blocks.video.narration import VideoNarrationBlock
from backend.blocks.zerobounce.validate_emails import ValidateEmailsBlock
from backend.integrations.credentials_store import (
aiml_api_credentials,
anthropic_credentials,
apollo_credentials,
did_credentials,
e2b_credentials,
elevenlabs_credentials,
enrichlayer_credentials,
groq_credentials,
ideogram_credentials,
jina_credentials,
llama_api_credentials,
mem0_credentials,
nvidia_credentials,
open_router_credentials,
openai_credentials,
replicate_credentials,
revid_credentials,
screenshotone_credentials,
smartlead_credentials,
unreal_credentials,
v0_credentials,
zerobounce_credentials,
)
# =============== Configure the cost for each LLM Model call =============== #
@@ -292,6 +315,23 @@ LLM_COST = (
)
# =============== This is the exhaustive list of cost for each Block =============== #
#
# BLOCK_COSTS drives the **credit wallet** — the user-facing balance that funds
# block executions regardless of where they run (builder, graph execution,
# copilot ``run_block`` tool). A missing entry here makes the block run for
# free from the wallet's perspective, even when the upstream provider charges
# real USD. See ``backend.executor.utils::block_usage_cost`` for the lookup
# and ``backend.copilot.tools.helpers::execute_block`` for the copilot-side
# charge path.
#
# Credits are **not** the same as copilot microdollar rate-limit counters
# (``backend.copilot.rate_limit``). Microdollars track AutoGPT's infra cost
# (OpenRouter / Anthropic inference spend) and gate the chat loop; credits
# track the user's prepaid balance. A block running inside copilot ``run_block``
# decrements only the credit wallet via this table — microdollars stay scoped
# to copilot LLM turns and are not double-charged from block execution.
# See the module docstring on ``backend.copilot.rate_limit`` for the full
# boundary.
BLOCK_COSTS: dict[Type[Block], list[BlockCost]] = {
AIConversationBlock: LLM_COST,
@@ -714,6 +754,62 @@ BLOCK_COSTS: dict[Type[Block], list[BlockCost]] = {
},
),
],
PerplexityBlock: [
# Sonar Deep Research: up to $5/1K searches + $8/1M reasoning tokens.
# Flat-charge 10 credits mirrors the LLM table's SONAR_DEEP_RESEARCH
# entry. Block execution decrements only the user credit wallet via
# spend_credits(); the microdollar rate-limit counter is not touched
# for run_block invocations. The actual per-run provider spend is
# recorded separately as provider_cost on PlatformCostLog when
# OpenRouter reports usage.
BlockCost(
cost_amount=10,
cost_filter={
"model": PerplexityModel.SONAR_DEEP_RESEARCH,
"credentials": {
"id": open_router_credentials.id,
"provider": open_router_credentials.provider,
"type": open_router_credentials.type,
},
},
),
# Sonar Pro: $1/1M input + $1/1M output + $0.005/search.
BlockCost(
cost_amount=5,
cost_filter={
"model": PerplexityModel.SONAR_PRO,
"credentials": {
"id": open_router_credentials.id,
"provider": open_router_credentials.provider,
"type": open_router_credentials.type,
},
},
),
# Sonar (default): $0.2/1M input + $0.2/1M output + $0.005/search.
BlockCost(
cost_amount=1,
cost_filter={
"model": PerplexityModel.SONAR,
"credentials": {
"id": open_router_credentials.id,
"provider": open_router_credentials.provider,
"type": open_router_credentials.type,
},
},
),
],
FactCheckerBlock: [
BlockCost(
cost_amount=1,
cost_filter={
"credentials": {
"id": jina_credentials.id,
"provider": jina_credentials.provider,
"type": jina_credentials.type,
}
},
)
],
OrchestratorBlock: LLM_COST,
VideoNarrationBlock: [
BlockCost(
@@ -727,4 +823,151 @@ BLOCK_COSTS: dict[Type[Block], list[BlockCost]] = {
},
)
],
# Mem0: Starter $19/mo for 50K adds + 5K retrievals → $0.0004/add,
# $0.004/retrieval. Floor at 1 credit covers raw cost with margin.
AddMemoryBlock: [
BlockCost(
cost_amount=1,
cost_filter={
"credentials": {
"id": mem0_credentials.id,
"provider": mem0_credentials.provider,
"type": mem0_credentials.type,
}
},
)
],
SearchMemoryBlock: [
BlockCost(
cost_amount=1,
cost_filter={
"credentials": {
"id": mem0_credentials.id,
"provider": mem0_credentials.provider,
"type": mem0_credentials.type,
}
},
)
],
GetAllMemoriesBlock: [
BlockCost(
cost_amount=1,
cost_filter={
"credentials": {
"id": mem0_credentials.id,
"provider": mem0_credentials.provider,
"type": mem0_credentials.type,
}
},
)
],
GetLatestMemoryBlock: [
BlockCost(
cost_amount=1,
cost_filter={
"credentials": {
"id": mem0_credentials.id,
"provider": mem0_credentials.provider,
"type": mem0_credentials.type,
}
},
)
],
# ScreenshotOne: $17 / 2K screenshots = $0.0085/call (Basic tier).
ScreenshotWebPageBlock: [
BlockCost(
cost_amount=2,
cost_filter={
"credentials": {
"id": screenshotone_credentials.id,
"provider": screenshotone_credentials.provider,
"type": screenshotone_credentials.type,
}
},
)
],
# NVIDIA NIM hosted endpoints: no public per-call SKU; estimate based on
# peer deepfake APIs (Hive/Sightengine ~$0.005-0.01/call).
NvidiaDeepfakeDetectBlock: [
BlockCost(
cost_amount=2,
cost_filter={
"credentials": {
"id": nvidia_credentials.id,
"provider": nvidia_credentials.provider,
"type": nvidia_credentials.type,
}
},
)
],
# Smartlead: $39/mo Basic = $0.0065 per email-equivalent. Campaign
# creation touches multiple records → 2 credits; per-lead and config
# writes are lighter → 1 credit.
CreateCampaignBlock: [
BlockCost(
cost_amount=2,
cost_filter={
"credentials": {
"id": smartlead_credentials.id,
"provider": smartlead_credentials.provider,
"type": smartlead_credentials.type,
}
},
)
],
AddLeadToCampaignBlock: [
BlockCost(
cost_amount=1,
cost_filter={
"credentials": {
"id": smartlead_credentials.id,
"provider": smartlead_credentials.provider,
"type": smartlead_credentials.type,
}
},
)
],
SaveCampaignSequencesBlock: [
BlockCost(
cost_amount=1,
cost_filter={
"credentials": {
"id": smartlead_credentials.id,
"provider": smartlead_credentials.provider,
"type": smartlead_credentials.type,
}
},
)
],
# ZeroBounce: $16 / 2K validations = $0.008 per email. One email per call.
ValidateEmailsBlock: [
BlockCost(
cost_amount=2,
cost_filter={
"credentials": {
"id": zerobounce_credentials.id,
"provider": zerobounce_credentials.provider,
"type": zerobounce_credentials.type,
}
},
)
],
# ClaudeCodeBlock runs an E2B sandbox (~$0.00003/sec compute) AND
# executes Claude Sonnet inside it. Real session cost is dominated by
# the LLM and varies $0.50$2 per typical run. Flat 100 credits ($1.00)
# is a conservative-but-fair estimate; revisit once we expose the
# x-total-cost header from the in-sandbox Claude calls back to
# NodeExecutionStats.provider_cost.
ClaudeCodeBlock: [
BlockCost(
cost_amount=100,
cost_filter={
"e2b_credentials": {
"id": e2b_credentials.id,
"provider": e2b_credentials.provider,
"type": e2b_credentials.type,
}
},
)
],
}

View File

@@ -58,7 +58,7 @@ Tool and block identifiers provided in `tools` and `blocks` are validated at run
| system_context | Optional additional context prepended to the prompt. Use this to constrain autopilot behavior, provide domain context, or set output format requirements. | str | No |
| session_id | Session ID to continue an existing autopilot conversation. Leave empty to start a new session. Use the session_id output from a previous run to continue. | str | No |
| max_recursion_depth | Maximum nesting depth when the autopilot calls this block recursively (sub-agent pattern). Prevents infinite loops. | int | No |
| tools | Tool names to filter. Works with tools_exclude to form an allow-list or deny-list. Leave empty to apply no tool filter. | List["add_understanding" \| "ask_question" \| "bash_exec" \| "browser_act" \| "browser_navigate" \| "browser_screenshot" \| "connect_integration" \| "continue_run_block" \| "create_agent" \| "create_feature_request" \| "create_folder" \| "customize_agent" \| "delete_folder" \| "delete_workspace_file" \| "edit_agent" \| "find_agent" \| "find_block" \| "find_library_agent" \| "fix_agent_graph" \| "get_agent_building_guide" \| "get_doc_page" \| "get_mcp_guide" \| "get_sub_session_result" \| "list_folders" \| "list_workspace_files" \| "memory_forget_confirm" \| "memory_forget_search" \| "memory_search" \| "memory_store" \| "move_agents_to_folder" \| "move_folder" \| "read_workspace_file" \| "run_agent" \| "run_block" \| "run_mcp_tool" \| "run_sub_session" \| "search_docs" \| "search_feature_requests" \| "update_folder" \| "validate_agent_graph" \| "view_agent_output" \| "web_fetch" \| "write_workspace_file" \| "Agent" \| "Edit" \| "Glob" \| "Grep" \| "Read" \| "Task" \| "TodoWrite" \| "WebSearch" \| "Write"] | No |
| tools | Tool names to filter. Works with tools_exclude to form an allow-list or deny-list. Leave empty to apply no tool filter. | List["add_understanding" \| "ask_question" \| "bash_exec" \| "browser_act" \| "browser_navigate" \| "browser_screenshot" \| "connect_integration" \| "continue_run_block" \| "create_agent" \| "create_feature_request" \| "create_folder" \| "customize_agent" \| "delete_folder" \| "delete_workspace_file" \| "edit_agent" \| "find_agent" \| "find_block" \| "find_library_agent" \| "fix_agent_graph" \| "get_agent_building_guide" \| "get_doc_page" \| "get_mcp_guide" \| "get_sub_session_result" \| "list_folders" \| "list_workspace_files" \| "memory_forget_confirm" \| "memory_forget_search" \| "memory_search" \| "memory_store" \| "move_agents_to_folder" \| "move_folder" \| "read_workspace_file" \| "run_agent" \| "run_block" \| "run_mcp_tool" \| "run_sub_session" \| "search_docs" \| "search_feature_requests" \| "update_folder" \| "validate_agent_graph" \| "view_agent_output" \| "web_fetch" \| "web_search" \| "write_workspace_file" \| "Agent" \| "Edit" \| "Glob" \| "Grep" \| "Read" \| "Task" \| "TodoWrite" \| "WebSearch" \| "Write"] | No |
| tools_exclude | Controls how the 'tools' list is interpreted. True (default): 'tools' is a deny-list — listed tools are blocked, all others are allowed. An empty 'tools' list means allow everything. False: 'tools' is an allow-list — only listed tools are permitted. | bool | No |
| blocks | Block identifiers to filter when the copilot uses run_block. Each entry can be: a block name (e.g. 'HTTP Request'), a full block UUID, or the first 8 hex characters of the UUID (e.g. 'c069dc6b'). Works with blocks_exclude. Leave empty to apply no block filter. | List[str] | No |
| blocks_exclude | Controls how the 'blocks' list is interpreted. True (default): 'blocks' is a deny-list — listed blocks are blocked, all others are allowed. An empty 'blocks' list means allow everything. False: 'blocks' is an allow-list — only listed blocks are permitted. | bool | No |