mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-01-12 00:28:31 -05:00
Compare commits
36 Commits
dev
...
claude-ima
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4c212f2b59 | ||
|
|
ea2910c560 | ||
|
|
27d0f03db3 | ||
|
|
1cc8981799 | ||
|
|
cebbdde75e | ||
|
|
eddcc97814 | ||
|
|
c1e8451c85 | ||
|
|
643d1a9e3f | ||
|
|
a4fc0d6206 | ||
|
|
5bb43c31c5 | ||
|
|
96ffa64971 | ||
|
|
d86a41147b | ||
|
|
d3425cae46 | ||
|
|
7682cbbe6c | ||
|
|
80ee8c61c4 | ||
|
|
cba05365e9 | ||
|
|
5aadbfe98a | ||
|
|
3e0bcbc7e4 | ||
|
|
b8749f7590 | ||
|
|
3aafa53f3b | ||
|
|
20b4a0e37f | ||
|
|
23095f466a | ||
|
|
769c75e6ac | ||
|
|
11ef0486ff | ||
|
|
d72c93c037 | ||
|
|
841500f378 | ||
|
|
b052413ab4 | ||
|
|
d31167958c | ||
|
|
a1a52b9569 | ||
|
|
50ad4a34dd | ||
|
|
81c403e103 | ||
|
|
2bfaf4d80c | ||
|
|
31e49fb55c | ||
|
|
da88da9a17 | ||
|
|
fed426ff77 | ||
|
|
33390ff7fe |
@@ -4,6 +4,16 @@ from abc import ABC
|
||||
from enum import Enum, EnumMeta
|
||||
from json import JSONDecodeError
|
||||
from types import MappingProxyType
|
||||
from typing import TYPE_CHECKING, Any, Iterable, List, Literal, NamedTuple, Optional
|
||||
|
||||
from pydantic import BaseModel, SecretStr
|
||||
|
||||
from backend.data.model import NodeExecutionStats
|
||||
from backend.integrations.providers import ProviderName
|
||||
from backend.util.file import MediaFile, store_media_file
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from enum import _EnumMemberT
|
||||
from typing import Any, Iterable, List, Literal, NamedTuple, Optional
|
||||
|
||||
import anthropic
|
||||
@@ -64,9 +74,43 @@ def AICredentialsField() -> AICredentials:
|
||||
)
|
||||
|
||||
|
||||
class ModelProvider(str, Enum):
|
||||
OPENAI = "openai"
|
||||
ANTHROPIC = "anthropic"
|
||||
GROQ = "groq"
|
||||
OLLAMA = "ollama"
|
||||
OPEN_ROUTER = "open_router"
|
||||
|
||||
|
||||
class ModelCreator(str, Enum):
|
||||
ANTHROPIC = "anthropic"
|
||||
META = "meta"
|
||||
GOOGLE = "google"
|
||||
OPENAI = "openai"
|
||||
MISTRAL = "mistral"
|
||||
COHERE = "cohere"
|
||||
DEEPSEEK = "deepseek"
|
||||
PERPLEXITY = "perplexity"
|
||||
QWEN = "qwen"
|
||||
NOUS = "nous"
|
||||
AMAZON = "amazon"
|
||||
MICROSOFT = "microsoft"
|
||||
GRYPHE = "gryphe"
|
||||
EVA = "eva"
|
||||
|
||||
|
||||
class ModelCapabilities(NamedTuple):
|
||||
supports_images: bool = False
|
||||
supports_functions: bool = False
|
||||
supports_vision: bool = False
|
||||
is_local: bool = False
|
||||
|
||||
|
||||
class ModelMetadata(NamedTuple):
|
||||
provider: str
|
||||
provider: ModelProvider
|
||||
creator: ModelCreator
|
||||
context_window: int
|
||||
capabilities: ModelCapabilities = ModelCapabilities()
|
||||
max_output_tokens: int | None
|
||||
|
||||
|
||||
@@ -154,68 +198,114 @@ class LlmModel(str, Enum, metaclass=LlmModelMeta):
|
||||
|
||||
|
||||
MODEL_METADATA = {
|
||||
# https://platform.openai.com/docs/models
|
||||
LlmModel.O3_MINI: ModelMetadata("openai", 200000, 100000), # o3-mini-2025-01-31
|
||||
LlmModel.O1: ModelMetadata("openai", 200000, 100000), # o1-2024-12-17
|
||||
LlmModel.O1_PREVIEW: ModelMetadata(
|
||||
"openai", 128000, 32768
|
||||
), # o1-preview-2024-09-12
|
||||
LlmModel.O1_MINI: ModelMetadata("openai", 128000, 65536), # o1-mini-2024-09-12
|
||||
LlmModel.GPT4O_MINI: ModelMetadata(
|
||||
"openai", 128000, 16384
|
||||
), # gpt-4o-mini-2024-07-18
|
||||
LlmModel.GPT4O: ModelMetadata("openai", 128000, 16384), # gpt-4o-2024-08-06
|
||||
LlmModel.GPT4_TURBO: ModelMetadata(
|
||||
"openai", 128000, 4096
|
||||
), # gpt-4-turbo-2024-04-09
|
||||
LlmModel.GPT3_5_TURBO: ModelMetadata("openai", 16385, 4096), # gpt-3.5-turbo-0125
|
||||
# https://docs.anthropic.com/en/docs/about-claude/models
|
||||
LlmModel.CLAUDE_3_5_SONNET: ModelMetadata(
|
||||
"anthropic", 200000, 8192
|
||||
), # claude-3-5-sonnet-20241022
|
||||
LlmModel.CLAUDE_3_5_HAIKU: ModelMetadata(
|
||||
"anthropic", 200000, 8192
|
||||
), # claude-3-5-haiku-20241022
|
||||
LlmModel.CLAUDE_3_HAIKU: ModelMetadata(
|
||||
"anthropic", 200000, 4096
|
||||
), # claude-3-haiku-20240307
|
||||
# https://console.groq.com/docs/models
|
||||
LlmModel.GEMMA2_9B: ModelMetadata("groq", 8192, None),
|
||||
LlmModel.LLAMA3_3_70B: ModelMetadata("groq", 128000, 32768),
|
||||
LlmModel.LLAMA3_1_8B: ModelMetadata("groq", 128000, 8192),
|
||||
LlmModel.LLAMA3_70B: ModelMetadata("groq", 8192, None),
|
||||
LlmModel.LLAMA3_8B: ModelMetadata("groq", 8192, None),
|
||||
LlmModel.MIXTRAL_8X7B: ModelMetadata("groq", 32768, None),
|
||||
LlmModel.DEEPSEEK_LLAMA_70B: ModelMetadata("groq", 128000, None),
|
||||
# https://ollama.com/library
|
||||
LlmModel.OLLAMA_LLAMA3_3: ModelMetadata("ollama", 8192, None),
|
||||
LlmModel.OLLAMA_LLAMA3_2: ModelMetadata("ollama", 8192, None),
|
||||
LlmModel.OLLAMA_LLAMA3_8B: ModelMetadata("ollama", 8192, None),
|
||||
LlmModel.OLLAMA_LLAMA3_405B: ModelMetadata("ollama", 8192, None),
|
||||
LlmModel.OLLAMA_DOLPHIN: ModelMetadata("ollama", 32768, None),
|
||||
# https://openrouter.ai/models
|
||||
LlmModel.GEMINI_FLASH_1_5: ModelMetadata("open_router", 1000000, 8192),
|
||||
LlmModel.GROK_BETA: ModelMetadata("open_router", 131072, 131072),
|
||||
LlmModel.MISTRAL_NEMO: ModelMetadata("open_router", 128000, 4096),
|
||||
LlmModel.COHERE_COMMAND_R_08_2024: ModelMetadata("open_router", 128000, 4096),
|
||||
LlmModel.COHERE_COMMAND_R_PLUS_08_2024: ModelMetadata("open_router", 128000, 4096),
|
||||
LlmModel.EVA_QWEN_2_5_32B: ModelMetadata("open_router", 16384, 4096),
|
||||
LlmModel.DEEPSEEK_CHAT: ModelMetadata("open_router", 64000, 2048),
|
||||
LlmModel.PERPLEXITY_LLAMA_3_1_SONAR_LARGE_128K_ONLINE: ModelMetadata(
|
||||
"open_router", 127072, 127072
|
||||
ModelProvider.OPENAI,
|
||||
ModelCreator.OPENAI,
|
||||
32000,
|
||||
ModelCapabilities(supports_images=True),
|
||||
),
|
||||
LlmModel.O1_MINI: ModelMetadata(
|
||||
ModelProvider.OPENAI,
|
||||
ModelCreator.OPENAI,
|
||||
62000,
|
||||
ModelCapabilities(supports_images=True),
|
||||
),
|
||||
LlmModel.GPT4O_MINI: ModelMetadata(
|
||||
ModelProvider.OPENAI,
|
||||
ModelCreator.OPENAI,
|
||||
128000,
|
||||
ModelCapabilities(supports_images=True),
|
||||
),
|
||||
LlmModel.GPT4O: ModelMetadata(ModelProvider.OPENAI, ModelCreator.OPENAI, 128000),
|
||||
LlmModel.GPT4_TURBO: ModelMetadata(
|
||||
ModelProvider.OPENAI, ModelCreator.OPENAI, 128000
|
||||
),
|
||||
LlmModel.GPT3_5_TURBO: ModelMetadata(
|
||||
ModelProvider.OPENAI, ModelCreator.OPENAI, 16385
|
||||
),
|
||||
LlmModel.CLAUDE_3_5_SONNET: ModelMetadata(
|
||||
ModelProvider.ANTHROPIC,
|
||||
ModelCreator.ANTHROPIC,
|
||||
200000,
|
||||
ModelCapabilities(supports_images=True),
|
||||
),
|
||||
LlmModel.CLAUDE_3_HAIKU: ModelMetadata(
|
||||
ModelProvider.ANTHROPIC,
|
||||
ModelCreator.ANTHROPIC,
|
||||
200000,
|
||||
ModelCapabilities(supports_images=True),
|
||||
),
|
||||
LlmModel.LLAMA3_8B: ModelMetadata(ModelProvider.GROQ, ModelCreator.META, 8192),
|
||||
LlmModel.LLAMA3_70B: ModelMetadata(ModelProvider.GROQ, ModelCreator.META, 8192),
|
||||
LlmModel.MIXTRAL_8X7B: ModelMetadata(
|
||||
ModelProvider.GROQ, ModelCreator.MISTRAL, 32768
|
||||
),
|
||||
LlmModel.GEMMA_7B: ModelMetadata(ModelProvider.GROQ, ModelCreator.GOOGLE, 8192),
|
||||
LlmModel.GEMMA2_9B: ModelMetadata(ModelProvider.GROQ, ModelCreator.GOOGLE, 8192),
|
||||
LlmModel.LLAMA3_1_405B: ModelMetadata(ModelProvider.GROQ, ModelCreator.META, 8192),
|
||||
# Limited to 16k during preview
|
||||
LlmModel.LLAMA3_1_70B: ModelMetadata(ModelProvider.GROQ, ModelCreator.META, 131072),
|
||||
LlmModel.LLAMA3_1_8B: ModelMetadata(ModelProvider.GROQ, ModelCreator.META, 131072),
|
||||
LlmModel.OLLAMA_LLAMA3_2: ModelMetadata(
|
||||
ModelProvider.OLLAMA, ModelCreator.META, 8192, ModelCapabilities(is_local=True)
|
||||
),
|
||||
LlmModel.OLLAMA_LLAMA3_8B: ModelMetadata(
|
||||
ModelProvider.OLLAMA, ModelCreator.META, 8192, ModelCapabilities(is_local=True)
|
||||
),
|
||||
LlmModel.OLLAMA_LLAMA3_405B: ModelMetadata(
|
||||
ModelProvider.OLLAMA, ModelCreator.META, 8192, ModelCapabilities(is_local=True)
|
||||
),
|
||||
LlmModel.OLLAMA_DOLPHIN: ModelMetadata(
|
||||
ModelProvider.OLLAMA, ModelCreator.META, 32768, ModelCapabilities(is_local=True)
|
||||
),
|
||||
LlmModel.GEMINI_FLASH_1_5_8B: ModelMetadata(
|
||||
ModelProvider.OPEN_ROUTER, ModelCreator.GOOGLE, 8192
|
||||
),
|
||||
LlmModel.GROK_BETA: ModelMetadata(
|
||||
ModelProvider.OPEN_ROUTER, ModelCreator.GOOGLE, 8192
|
||||
),
|
||||
LlmModel.MISTRAL_NEMO: ModelMetadata(
|
||||
ModelProvider.OPEN_ROUTER, ModelCreator.MISTRAL, 4000
|
||||
),
|
||||
LlmModel.COHERE_COMMAND_R_08_2024: ModelMetadata(
|
||||
ModelProvider.OPEN_ROUTER, ModelCreator.COHERE, 4000
|
||||
),
|
||||
LlmModel.COHERE_COMMAND_R_PLUS_08_2024: ModelMetadata(
|
||||
ModelProvider.OPEN_ROUTER, ModelCreator.COHERE, 4000
|
||||
),
|
||||
LlmModel.EVA_QWEN_2_5_32B: ModelMetadata(
|
||||
ModelProvider.OPEN_ROUTER, ModelCreator.EVA, 4000
|
||||
),
|
||||
LlmModel.DEEPSEEK_CHAT: ModelMetadata(
|
||||
ModelProvider.OPEN_ROUTER, ModelCreator.DEEPSEEK, 8192
|
||||
),
|
||||
LlmModel.PERPLEXITY_LLAMA_3_1_SONAR_LARGE_128K_ONLINE: ModelMetadata(
|
||||
ModelProvider.OPEN_ROUTER, ModelCreator.PERPLEXITY, 8192
|
||||
),
|
||||
LlmModel.QWEN_QWQ_32B_PREVIEW: ModelMetadata(
|
||||
ModelProvider.OPEN_ROUTER, ModelCreator.QWEN, 4000
|
||||
),
|
||||
LlmModel.QWEN_QWQ_32B_PREVIEW: ModelMetadata("open_router", 32768, 32768),
|
||||
LlmModel.NOUSRESEARCH_HERMES_3_LLAMA_3_1_405B: ModelMetadata(
|
||||
"open_router", 131000, 4096
|
||||
ModelProvider.OPEN_ROUTER, ModelCreator.NOUS, 4000
|
||||
),
|
||||
LlmModel.NOUSRESEARCH_HERMES_3_LLAMA_3_1_70B: ModelMetadata(
|
||||
"open_router", 12288, 12288
|
||||
ModelProvider.OPEN_ROUTER, ModelCreator.NOUS, 4000
|
||||
),
|
||||
LlmModel.AMAZON_NOVA_LITE_V1: ModelMetadata(
|
||||
ModelProvider.OPEN_ROUTER, ModelCreator.AMAZON, 4000
|
||||
),
|
||||
LlmModel.AMAZON_NOVA_MICRO_V1: ModelMetadata(
|
||||
ModelProvider.OPEN_ROUTER, ModelCreator.AMAZON, 4000
|
||||
),
|
||||
LlmModel.AMAZON_NOVA_PRO_V1: ModelMetadata(
|
||||
ModelProvider.OPEN_ROUTER, ModelCreator.AMAZON, 4000
|
||||
),
|
||||
LlmModel.MICROSOFT_WIZARDLM_2_8X22B: ModelMetadata(
|
||||
ModelProvider.OPEN_ROUTER, ModelCreator.MICROSOFT, 4000
|
||||
),
|
||||
LlmModel.GRYPHE_MYTHOMAX_L2_13B: ModelMetadata(
|
||||
ModelProvider.OPEN_ROUTER, ModelCreator.GRYPHE, 4000
|
||||
),
|
||||
LlmModel.AMAZON_NOVA_LITE_V1: ModelMetadata("open_router", 300000, 5120),
|
||||
LlmModel.AMAZON_NOVA_MICRO_V1: ModelMetadata("open_router", 128000, 5120),
|
||||
LlmModel.AMAZON_NOVA_PRO_V1: ModelMetadata("open_router", 300000, 5120),
|
||||
LlmModel.MICROSOFT_WIZARDLM_2_8X22B: ModelMetadata("open_router", 65536, 4096),
|
||||
LlmModel.GRYPHE_MYTHOMAX_L2_13B: ModelMetadata("open_router", 4096, 4096),
|
||||
}
|
||||
|
||||
for model in LlmModel:
|
||||
@@ -518,6 +608,11 @@ def llm_call(
|
||||
raise ValueError(f"Unsupported LLM provider: {provider}")
|
||||
|
||||
|
||||
class MessageWithMedia(Message):
|
||||
role: MessageRole
|
||||
content: str | MediaFile
|
||||
|
||||
|
||||
class AIBlockBase(Block, ABC):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
@@ -540,7 +635,7 @@ class AIStructuredResponseGeneratorBlock(AIBlockBase):
|
||||
)
|
||||
model: LlmModel = SchemaField(
|
||||
title="LLM Model",
|
||||
default=LlmModel.GPT4O,
|
||||
default=LlmModel.CLAUDE_3_5_SONNET,
|
||||
description="The language model to use for answering the prompt.",
|
||||
advanced=False,
|
||||
)
|
||||
@@ -1367,3 +1462,335 @@ class AIListGeneratorBlock(AIBlockBase):
|
||||
logger.debug(f"Retry prompt: {prompt}")
|
||||
|
||||
logger.debug("AIListGeneratorBlock.run completed")
|
||||
|
||||
|
||||
class ClaudeWithImageBlock(Block):
|
||||
"""Block for calling Claude API with support for images"""
|
||||
|
||||
class Input(BlockSchema):
|
||||
|
||||
prompt: str = SchemaField(
|
||||
description="The prompt to send to the language model.",
|
||||
placeholder="Enter your prompt here...",
|
||||
)
|
||||
expected_format: dict[str, str] = SchemaField(
|
||||
description="Expected format of the response. If provided, the response will be validated against this format. "
|
||||
"The keys should be the expected fields in the response, and the values should be the description of the field.",
|
||||
)
|
||||
model: LlmModel = SchemaField(
|
||||
title="LLM Model",
|
||||
default=LlmModel.CLAUDE_3_5_SONNET,
|
||||
description="The language model to use for the conversation.",
|
||||
)
|
||||
credentials: AICredentials = AICredentialsField()
|
||||
sys_prompt: str = SchemaField(
|
||||
title="System Prompt",
|
||||
default="",
|
||||
description="The system prompt to provide additional context to the model.",
|
||||
)
|
||||
conversation_history: list[MessageWithMedia] = SchemaField(
|
||||
default=[],
|
||||
description="The conversation history to provide context for the prompt.",
|
||||
)
|
||||
retry: int = SchemaField(
|
||||
title="Retry Count",
|
||||
default=3,
|
||||
description="Number of times to retry the LLM call if the response does not match the expected format.",
|
||||
)
|
||||
prompt_values: dict[str, str | MediaFile] = SchemaField(
|
||||
advanced=False,
|
||||
default={},
|
||||
description="Values used to fill in the prompt. Images can be provided as base64 encoded data with MIME type.",
|
||||
)
|
||||
max_tokens: int | None = SchemaField(
|
||||
advanced=True,
|
||||
default=None,
|
||||
description="The maximum number of tokens to generate in the chat completion.",
|
||||
)
|
||||
|
||||
class Output(BlockSchema):
|
||||
response: dict[str, Any] = SchemaField(
|
||||
description="The response object generated by the language model."
|
||||
)
|
||||
error: str = SchemaField(description="Error message if the API call failed.")
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
id="bc043b3e-2926-4ed7-b276-735535d1a945",
|
||||
description="Call Claude with support for images to generate formatted object based on the given prompt.",
|
||||
categories={BlockCategory.AI},
|
||||
input_schema=ClaudeWithImageBlock.Input,
|
||||
output_schema=ClaudeWithImageBlock.Output,
|
||||
test_input={
|
||||
"model": LlmModel.CLAUDE_3_5_SONNET,
|
||||
"credentials": TEST_CREDENTIALS_INPUT,
|
||||
"expected_format": {
|
||||
"key1": "value1",
|
||||
"key2": "value2",
|
||||
},
|
||||
"prompt": "Describe this image",
|
||||
"prompt_values": {
|
||||
"image": {
|
||||
"data": "",
|
||||
}
|
||||
},
|
||||
},
|
||||
test_credentials=TEST_CREDENTIALS,
|
||||
test_output=("response", {"key1": "key1Value", "key2": "key2Value"}),
|
||||
test_mock={
|
||||
"llm_call": lambda *args, **kwargs: (
|
||||
json.dumps(
|
||||
{
|
||||
"key1": "key1Value",
|
||||
"key2": "key2Value",
|
||||
}
|
||||
),
|
||||
0,
|
||||
0,
|
||||
)
|
||||
},
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def llm_call(
|
||||
credentials: APIKeyCredentials,
|
||||
llm_model: LlmModel,
|
||||
prompt: list[dict],
|
||||
max_tokens: int | None = None,
|
||||
) -> tuple[str, int, int]:
|
||||
"""
|
||||
Call the Claude API with support for images in the messages.
|
||||
|
||||
Args:
|
||||
credentials: API credentials for Claude
|
||||
llm_model: The LLM model to use (must be Claude)
|
||||
prompt: List of message dictionaries that can include image content
|
||||
max_tokens: Maximum tokens to generate
|
||||
|
||||
Returns:
|
||||
tuple containing:
|
||||
- The text response
|
||||
- Number of input tokens used
|
||||
- Number of output tokens used
|
||||
"""
|
||||
if llm_model.metadata.provider != "anthropic":
|
||||
raise ValueError("Only Claude models are supported for image processing")
|
||||
|
||||
# Extract system prompt if present
|
||||
system_messages = [p["content"] for p in prompt if p["role"] == "system"]
|
||||
sysprompt = " ".join(system_messages)
|
||||
|
||||
# Build messages array with content that can include images
|
||||
messages = []
|
||||
last_role = None
|
||||
|
||||
for p in prompt:
|
||||
if p["role"] in ["user", "assistant"]:
|
||||
message_content = []
|
||||
|
||||
# Handle text content
|
||||
if isinstance(p["content"], str):
|
||||
message_content.append({"type": "text", "text": p["content"]})
|
||||
# Handle mixed content array with images
|
||||
elif isinstance(p["content"], list):
|
||||
message_content.extend(p["content"])
|
||||
|
||||
if p["role"] != last_role:
|
||||
messages.append({"role": p["role"], "content": message_content})
|
||||
last_role = p["role"]
|
||||
else:
|
||||
# Combine with previous message if same role
|
||||
messages[-1]["content"].extend(message_content)
|
||||
|
||||
client = anthropic.Anthropic(api_key=credentials.api_key.get_secret_value())
|
||||
|
||||
try:
|
||||
resp = client.messages.create(
|
||||
model=llm_model.value,
|
||||
system=sysprompt,
|
||||
messages=messages,
|
||||
max_tokens=max_tokens or 8192,
|
||||
)
|
||||
|
||||
if not resp.content:
|
||||
raise ValueError("No content returned from Anthropic.")
|
||||
|
||||
return (
|
||||
(
|
||||
resp.content[0].name
|
||||
if isinstance(resp.content[0], anthropic.types.ToolUseBlock)
|
||||
else resp.content[0].text
|
||||
),
|
||||
resp.usage.input_tokens,
|
||||
resp.usage.output_tokens,
|
||||
)
|
||||
|
||||
except anthropic.APIError as e:
|
||||
error_message = f"Anthropic API error: {str(e)}"
|
||||
logger.error(error_message)
|
||||
raise ValueError(error_message)
|
||||
|
||||
def run(
|
||||
self,
|
||||
input_data: Input,
|
||||
*,
|
||||
graph_exec_id: str,
|
||||
credentials: APIKeyCredentials,
|
||||
**kwargs,
|
||||
) -> BlockOutput:
|
||||
logger.debug(f"Calling Claude with input data: {input_data}")
|
||||
|
||||
# Start with any existing conversation history
|
||||
prompt = [p.model_dump() for p in input_data.conversation_history]
|
||||
|
||||
def trim_prompt(s: str) -> str:
|
||||
lines = s.strip().split("\n")
|
||||
return "\n".join([line.strip().lstrip("|") for line in lines])
|
||||
|
||||
# Handle prompt values including images
|
||||
content = []
|
||||
values: dict[str, str | MediaFile] = input_data.prompt_values
|
||||
|
||||
# Add any images from prompt_values
|
||||
for key, value in values.items():
|
||||
# This is an image
|
||||
if isinstance(value, MediaFile):
|
||||
# media file is a base64 encoded image
|
||||
# read the media file
|
||||
media_path = store_media_file(
|
||||
graph_exec_id=graph_exec_id, file=value, return_content=True
|
||||
)
|
||||
|
||||
content.append(
|
||||
{
|
||||
"type": "image",
|
||||
"source": {
|
||||
"type": "base64",
|
||||
"media_type": media_path.split(";")[0].split(":")[1],
|
||||
"data": media_path,
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
# Add the text prompt
|
||||
if input_data.prompt:
|
||||
content.append(
|
||||
{
|
||||
"type": "text",
|
||||
"text": fmt.format_string(
|
||||
input_data.prompt,
|
||||
{k: v for k, v in values.items() if isinstance(v, str)},
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
# Add system prompt if provided
|
||||
if input_data.sys_prompt:
|
||||
prompt.append(
|
||||
{
|
||||
"role": "system",
|
||||
"content": fmt.format_string(input_data.sys_prompt, values),
|
||||
}
|
||||
)
|
||||
|
||||
# Add expected format if provided
|
||||
if input_data.expected_format:
|
||||
expected_format = [
|
||||
f'"{k}": "{v}"' for k, v in input_data.expected_format.items()
|
||||
]
|
||||
format_prompt = ",\n ".join(expected_format)
|
||||
sys_prompt = trim_prompt(
|
||||
f"""
|
||||
|Reply strictly only in the following JSON format:
|
||||
|{{
|
||||
| {format_prompt}
|
||||
|}}
|
||||
"""
|
||||
)
|
||||
prompt.append({"role": "system", "content": sys_prompt})
|
||||
|
||||
# Add the main prompt with images and text
|
||||
prompt.append({"role": "user", "content": content})
|
||||
|
||||
def parse_response(resp: str) -> tuple[dict[str, Any], str | None]:
|
||||
try:
|
||||
parsed = json.loads(resp)
|
||||
if not isinstance(parsed, dict):
|
||||
return {}, f"Expected a dictionary, but got {type(parsed)}"
|
||||
if input_data.expected_format:
|
||||
miss_keys = set(input_data.expected_format.keys()) - set(
|
||||
parsed.keys()
|
||||
)
|
||||
if miss_keys:
|
||||
return parsed, f"Missing keys: {miss_keys}"
|
||||
return parsed, None
|
||||
except JSONDecodeError as e:
|
||||
return {}, f"JSON decode error: {e}"
|
||||
|
||||
logger.info(f"Claude request: {prompt}")
|
||||
retry_prompt = ""
|
||||
llm_model = input_data.model
|
||||
|
||||
for retry_count in range(input_data.retry):
|
||||
try:
|
||||
response_text, input_token, output_token = self.llm_call(
|
||||
credentials=credentials,
|
||||
llm_model=llm_model,
|
||||
prompt=prompt,
|
||||
max_tokens=input_data.max_tokens,
|
||||
)
|
||||
|
||||
self.merge_stats(
|
||||
{
|
||||
"input_token_count": input_token,
|
||||
"output_token_count": output_token,
|
||||
}
|
||||
)
|
||||
|
||||
logger.info(f"Claude attempt-{retry_count} response: {response_text}")
|
||||
|
||||
if input_data.expected_format:
|
||||
parsed_dict, parsed_error = parse_response(response_text)
|
||||
if not parsed_error:
|
||||
yield "response", {
|
||||
k: (
|
||||
json.loads(v)
|
||||
if isinstance(v, str)
|
||||
and v.startswith("[")
|
||||
and v.endswith("]")
|
||||
else (", ".join(v) if isinstance(v, list) else v)
|
||||
)
|
||||
for k, v in parsed_dict.items()
|
||||
}
|
||||
return
|
||||
else:
|
||||
yield "response", {"response": response_text}
|
||||
return
|
||||
|
||||
retry_prompt = trim_prompt(
|
||||
f"""
|
||||
|This is your previous error response:
|
||||
|--
|
||||
|{response_text}
|
||||
|--
|
||||
|
|
||||
|And this is the error:
|
||||
|--
|
||||
|{parsed_error}
|
||||
|--
|
||||
"""
|
||||
)
|
||||
prompt.append({"role": "user", "content": retry_prompt})
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"Error calling Claude: {e}")
|
||||
retry_prompt = f"Error calling Claude: {e}"
|
||||
finally:
|
||||
self.merge_stats(
|
||||
{
|
||||
"llm_call_count": retry_count + 1,
|
||||
"llm_retry_count": retry_count,
|
||||
}
|
||||
)
|
||||
|
||||
raise RuntimeError(retry_prompt)
|
||||
|
||||
Reference in New Issue
Block a user