Compare commits

...

36 Commits

Author SHA1 Message Date
Nicholas Tindle
4c212f2b59 Merge branch 'dev' into claude-image-blcok 2025-04-04 13:51:42 -05:00
Nicholas Tindle
ea2910c560 Merge branch 'dev' into claude-image-blcok 2025-04-01 16:34:43 -05:00
Nicholas Tindle
27d0f03db3 Merge branch 'dev' into claude-image-blcok 2025-02-03 08:25:24 -06:00
Nicholas Tindle
1cc8981799 Merge branch 'dev' into claude-image-blcok 2025-01-29 05:13:09 -06:00
Nicholas Tindle
cebbdde75e fix: lint 2025-01-26 19:06:52 +00:00
Nicholas Tindle
eddcc97814 feat: more changes i guess 2025-01-26 19:05:22 +00:00
Nicholas Tindle
c1e8451c85 Merge branch 'zamilmajdy/multimediafilesupport' into claude-image-blcok 2025-01-26 15:48:30 +01:00
Zamil Majdy
643d1a9e3f Merge branch 'dev' into zamilmajdy/multimediafilesupport 2025-01-26 15:46:16 +01:00
Zamil Majdy
a4fc0d6206 type alias string into MediaFile 2025-01-26 15:45:32 +01:00
Nicholas Tindle
5bb43c31c5 Merge branch 'zamilmajdy/multimediafilesupport' into claude-image-blcok 2025-01-26 15:05:11 +01:00
Nicholas Tindle
96ffa64971 fix: remove block/unblock as its been removed by twitter 2025-01-26 13:15:26 +00:00
Nicholas Tindle
d86a41147b Delete autogpt_platform/backend/backend/blocks/screenshotone.py 2025-01-26 07:11:50 -06:00
Nicholas Tindle
d3425cae46 Discard changes to autogpt_platform/frontend/src/lib/autogpt-server-api/types.ts 2025-01-26 14:11:28 +01:00
Nicholas Tindle
7682cbbe6c Discard changes to autogpt_platform/frontend/src/components/integrations/credentials-provider.tsx 2025-01-26 14:11:21 +01:00
Nicholas Tindle
80ee8c61c4 Discard changes to autogpt_platform/frontend/src/components/integrations/credentials-input.tsx 2025-01-26 14:11:13 +01:00
Nicholas Tindle
cba05365e9 Discard changes to autogpt_platform/backend/backend/util/settings.py 2025-01-26 14:11:07 +01:00
Nicholas Tindle
5aadbfe98a Discard changes to autogpt_platform/backend/backend/integrations/providers.py 2025-01-26 14:10:59 +01:00
Nicholas Tindle
3e0bcbc7e4 Discard changes to autogpt_platform/backend/backend/integrations/credentials_store.py 2025-01-26 14:10:50 +01:00
Nicholas Tindle
b8749f7590 fix: use pydantic not typed dict 2025-01-26 13:09:10 +00:00
Nicholas Tindle
3aafa53f3b fix: lint 2025-01-26 13:09:10 +00:00
Nicholas Tindle
20b4a0e37f feat: image block for claude 2025-01-26 13:09:09 +00:00
Nicholas Tindle
23095f466a feat: screenshotone 2025-01-26 13:09:09 +00:00
Zamil Majdy
769c75e6ac Merge branch 'dev' into zamilmajdy/multimediafilesupport 2025-01-26 05:49:24 +01:00
Zamil Majdy
11ef0486ff Merge branch 'dev' into zamilmajdy/multimediafilesupport 2025-01-25 14:08:50 +01:00
Zamil Majdy
d72c93c037 Merge branch 'dev' into zamilmajdy/multimediafilesupport 2025-01-25 03:56:12 +07:00
Zamil Majdy
841500f378 Merge branch 'dev' of github.com:Significant-Gravitas/AutoGPT into zamilmajdy/multimediafilesupport 2025-01-24 18:17:05 +01:00
Zamil Majdy
b052413ab4 Merge branch 'dev' into zamilmajdy/multimediafilesupport 2025-01-24 20:47:40 +07:00
Zamil Majdy
d31167958c Address changes 2025-01-24 14:45:26 +01:00
Zamil Majdy
a1a52b9569 Revert 2025-01-23 19:16:46 +01:00
Zamil Majdy
50ad4a34dd fix 2025-01-23 18:58:21 +01:00
Zamil Majdy
81c403e103 Merge remote-tracking branch 'origin/zamilmajdy/multimediafilesupport' into zamilmajdy/multimediafilesupport 2025-01-23 17:50:26 +01:00
Zamil Majdy
2bfaf4d80c Remove test 2025-01-23 17:50:18 +01:00
Zamil Majdy
31e49fb55c Merge branch 'dev' into zamilmajdy/multimediafilesupport 2025-01-23 23:48:57 +07:00
Zamil Majdy
da88da9a17 feat(platform): Add multimedia file support & add basic Video blocks 2025-01-23 17:48:07 +01:00
Zamil Majdy
fed426ff77 feat(platform): Add multimedia file support & add basic Video blocks 2025-01-23 17:46:45 +01:00
Zamil Majdy
33390ff7fe feat(platform): Add multimedia file support & add basic Video blocks 2025-01-23 17:39:19 +01:00

View File

@@ -4,6 +4,16 @@ from abc import ABC
from enum import Enum, EnumMeta from enum import Enum, EnumMeta
from json import JSONDecodeError from json import JSONDecodeError
from types import MappingProxyType from types import MappingProxyType
from typing import TYPE_CHECKING, Any, Iterable, List, Literal, NamedTuple, Optional
from pydantic import BaseModel, SecretStr
from backend.data.model import NodeExecutionStats
from backend.integrations.providers import ProviderName
from backend.util.file import MediaFile, store_media_file
if TYPE_CHECKING:
from enum import _EnumMemberT
from typing import Any, Iterable, List, Literal, NamedTuple, Optional from typing import Any, Iterable, List, Literal, NamedTuple, Optional
import anthropic import anthropic
@@ -64,9 +74,43 @@ def AICredentialsField() -> AICredentials:
) )
class ModelProvider(str, Enum):
OPENAI = "openai"
ANTHROPIC = "anthropic"
GROQ = "groq"
OLLAMA = "ollama"
OPEN_ROUTER = "open_router"
class ModelCreator(str, Enum):
ANTHROPIC = "anthropic"
META = "meta"
GOOGLE = "google"
OPENAI = "openai"
MISTRAL = "mistral"
COHERE = "cohere"
DEEPSEEK = "deepseek"
PERPLEXITY = "perplexity"
QWEN = "qwen"
NOUS = "nous"
AMAZON = "amazon"
MICROSOFT = "microsoft"
GRYPHE = "gryphe"
EVA = "eva"
class ModelCapabilities(NamedTuple):
supports_images: bool = False
supports_functions: bool = False
supports_vision: bool = False
is_local: bool = False
class ModelMetadata(NamedTuple): class ModelMetadata(NamedTuple):
provider: str provider: ModelProvider
creator: ModelCreator
context_window: int context_window: int
capabilities: ModelCapabilities = ModelCapabilities()
max_output_tokens: int | None max_output_tokens: int | None
@@ -154,68 +198,114 @@ class LlmModel(str, Enum, metaclass=LlmModelMeta):
MODEL_METADATA = { MODEL_METADATA = {
# https://platform.openai.com/docs/models
LlmModel.O3_MINI: ModelMetadata("openai", 200000, 100000), # o3-mini-2025-01-31
LlmModel.O1: ModelMetadata("openai", 200000, 100000), # o1-2024-12-17
LlmModel.O1_PREVIEW: ModelMetadata( LlmModel.O1_PREVIEW: ModelMetadata(
"openai", 128000, 32768 ModelProvider.OPENAI,
), # o1-preview-2024-09-12 ModelCreator.OPENAI,
LlmModel.O1_MINI: ModelMetadata("openai", 128000, 65536), # o1-mini-2024-09-12 32000,
LlmModel.GPT4O_MINI: ModelMetadata( ModelCapabilities(supports_images=True),
"openai", 128000, 16384 ),
), # gpt-4o-mini-2024-07-18 LlmModel.O1_MINI: ModelMetadata(
LlmModel.GPT4O: ModelMetadata("openai", 128000, 16384), # gpt-4o-2024-08-06 ModelProvider.OPENAI,
LlmModel.GPT4_TURBO: ModelMetadata( ModelCreator.OPENAI,
"openai", 128000, 4096 62000,
), # gpt-4-turbo-2024-04-09 ModelCapabilities(supports_images=True),
LlmModel.GPT3_5_TURBO: ModelMetadata("openai", 16385, 4096), # gpt-3.5-turbo-0125 ),
# https://docs.anthropic.com/en/docs/about-claude/models LlmModel.GPT4O_MINI: ModelMetadata(
LlmModel.CLAUDE_3_5_SONNET: ModelMetadata( ModelProvider.OPENAI,
"anthropic", 200000, 8192 ModelCreator.OPENAI,
), # claude-3-5-sonnet-20241022 128000,
LlmModel.CLAUDE_3_5_HAIKU: ModelMetadata( ModelCapabilities(supports_images=True),
"anthropic", 200000, 8192 ),
), # claude-3-5-haiku-20241022 LlmModel.GPT4O: ModelMetadata(ModelProvider.OPENAI, ModelCreator.OPENAI, 128000),
LlmModel.CLAUDE_3_HAIKU: ModelMetadata( LlmModel.GPT4_TURBO: ModelMetadata(
"anthropic", 200000, 4096 ModelProvider.OPENAI, ModelCreator.OPENAI, 128000
), # claude-3-haiku-20240307 ),
# https://console.groq.com/docs/models LlmModel.GPT3_5_TURBO: ModelMetadata(
LlmModel.GEMMA2_9B: ModelMetadata("groq", 8192, None), ModelProvider.OPENAI, ModelCreator.OPENAI, 16385
LlmModel.LLAMA3_3_70B: ModelMetadata("groq", 128000, 32768), ),
LlmModel.LLAMA3_1_8B: ModelMetadata("groq", 128000, 8192), LlmModel.CLAUDE_3_5_SONNET: ModelMetadata(
LlmModel.LLAMA3_70B: ModelMetadata("groq", 8192, None), ModelProvider.ANTHROPIC,
LlmModel.LLAMA3_8B: ModelMetadata("groq", 8192, None), ModelCreator.ANTHROPIC,
LlmModel.MIXTRAL_8X7B: ModelMetadata("groq", 32768, None), 200000,
LlmModel.DEEPSEEK_LLAMA_70B: ModelMetadata("groq", 128000, None), ModelCapabilities(supports_images=True),
# https://ollama.com/library ),
LlmModel.OLLAMA_LLAMA3_3: ModelMetadata("ollama", 8192, None), LlmModel.CLAUDE_3_HAIKU: ModelMetadata(
LlmModel.OLLAMA_LLAMA3_2: ModelMetadata("ollama", 8192, None), ModelProvider.ANTHROPIC,
LlmModel.OLLAMA_LLAMA3_8B: ModelMetadata("ollama", 8192, None), ModelCreator.ANTHROPIC,
LlmModel.OLLAMA_LLAMA3_405B: ModelMetadata("ollama", 8192, None), 200000,
LlmModel.OLLAMA_DOLPHIN: ModelMetadata("ollama", 32768, None), ModelCapabilities(supports_images=True),
# https://openrouter.ai/models ),
LlmModel.GEMINI_FLASH_1_5: ModelMetadata("open_router", 1000000, 8192), LlmModel.LLAMA3_8B: ModelMetadata(ModelProvider.GROQ, ModelCreator.META, 8192),
LlmModel.GROK_BETA: ModelMetadata("open_router", 131072, 131072), LlmModel.LLAMA3_70B: ModelMetadata(ModelProvider.GROQ, ModelCreator.META, 8192),
LlmModel.MISTRAL_NEMO: ModelMetadata("open_router", 128000, 4096), LlmModel.MIXTRAL_8X7B: ModelMetadata(
LlmModel.COHERE_COMMAND_R_08_2024: ModelMetadata("open_router", 128000, 4096), ModelProvider.GROQ, ModelCreator.MISTRAL, 32768
LlmModel.COHERE_COMMAND_R_PLUS_08_2024: ModelMetadata("open_router", 128000, 4096), ),
LlmModel.EVA_QWEN_2_5_32B: ModelMetadata("open_router", 16384, 4096), LlmModel.GEMMA_7B: ModelMetadata(ModelProvider.GROQ, ModelCreator.GOOGLE, 8192),
LlmModel.DEEPSEEK_CHAT: ModelMetadata("open_router", 64000, 2048), LlmModel.GEMMA2_9B: ModelMetadata(ModelProvider.GROQ, ModelCreator.GOOGLE, 8192),
LlmModel.PERPLEXITY_LLAMA_3_1_SONAR_LARGE_128K_ONLINE: ModelMetadata( LlmModel.LLAMA3_1_405B: ModelMetadata(ModelProvider.GROQ, ModelCreator.META, 8192),
"open_router", 127072, 127072 # Limited to 16k during preview
LlmModel.LLAMA3_1_70B: ModelMetadata(ModelProvider.GROQ, ModelCreator.META, 131072),
LlmModel.LLAMA3_1_8B: ModelMetadata(ModelProvider.GROQ, ModelCreator.META, 131072),
LlmModel.OLLAMA_LLAMA3_2: ModelMetadata(
ModelProvider.OLLAMA, ModelCreator.META, 8192, ModelCapabilities(is_local=True)
),
LlmModel.OLLAMA_LLAMA3_8B: ModelMetadata(
ModelProvider.OLLAMA, ModelCreator.META, 8192, ModelCapabilities(is_local=True)
),
LlmModel.OLLAMA_LLAMA3_405B: ModelMetadata(
ModelProvider.OLLAMA, ModelCreator.META, 8192, ModelCapabilities(is_local=True)
),
LlmModel.OLLAMA_DOLPHIN: ModelMetadata(
ModelProvider.OLLAMA, ModelCreator.META, 32768, ModelCapabilities(is_local=True)
),
LlmModel.GEMINI_FLASH_1_5_8B: ModelMetadata(
ModelProvider.OPEN_ROUTER, ModelCreator.GOOGLE, 8192
),
LlmModel.GROK_BETA: ModelMetadata(
ModelProvider.OPEN_ROUTER, ModelCreator.GOOGLE, 8192
),
LlmModel.MISTRAL_NEMO: ModelMetadata(
ModelProvider.OPEN_ROUTER, ModelCreator.MISTRAL, 4000
),
LlmModel.COHERE_COMMAND_R_08_2024: ModelMetadata(
ModelProvider.OPEN_ROUTER, ModelCreator.COHERE, 4000
),
LlmModel.COHERE_COMMAND_R_PLUS_08_2024: ModelMetadata(
ModelProvider.OPEN_ROUTER, ModelCreator.COHERE, 4000
),
LlmModel.EVA_QWEN_2_5_32B: ModelMetadata(
ModelProvider.OPEN_ROUTER, ModelCreator.EVA, 4000
),
LlmModel.DEEPSEEK_CHAT: ModelMetadata(
ModelProvider.OPEN_ROUTER, ModelCreator.DEEPSEEK, 8192
),
LlmModel.PERPLEXITY_LLAMA_3_1_SONAR_LARGE_128K_ONLINE: ModelMetadata(
ModelProvider.OPEN_ROUTER, ModelCreator.PERPLEXITY, 8192
),
LlmModel.QWEN_QWQ_32B_PREVIEW: ModelMetadata(
ModelProvider.OPEN_ROUTER, ModelCreator.QWEN, 4000
), ),
LlmModel.QWEN_QWQ_32B_PREVIEW: ModelMetadata("open_router", 32768, 32768),
LlmModel.NOUSRESEARCH_HERMES_3_LLAMA_3_1_405B: ModelMetadata( LlmModel.NOUSRESEARCH_HERMES_3_LLAMA_3_1_405B: ModelMetadata(
"open_router", 131000, 4096 ModelProvider.OPEN_ROUTER, ModelCreator.NOUS, 4000
), ),
LlmModel.NOUSRESEARCH_HERMES_3_LLAMA_3_1_70B: ModelMetadata( LlmModel.NOUSRESEARCH_HERMES_3_LLAMA_3_1_70B: ModelMetadata(
"open_router", 12288, 12288 ModelProvider.OPEN_ROUTER, ModelCreator.NOUS, 4000
),
LlmModel.AMAZON_NOVA_LITE_V1: ModelMetadata(
ModelProvider.OPEN_ROUTER, ModelCreator.AMAZON, 4000
),
LlmModel.AMAZON_NOVA_MICRO_V1: ModelMetadata(
ModelProvider.OPEN_ROUTER, ModelCreator.AMAZON, 4000
),
LlmModel.AMAZON_NOVA_PRO_V1: ModelMetadata(
ModelProvider.OPEN_ROUTER, ModelCreator.AMAZON, 4000
),
LlmModel.MICROSOFT_WIZARDLM_2_8X22B: ModelMetadata(
ModelProvider.OPEN_ROUTER, ModelCreator.MICROSOFT, 4000
),
LlmModel.GRYPHE_MYTHOMAX_L2_13B: ModelMetadata(
ModelProvider.OPEN_ROUTER, ModelCreator.GRYPHE, 4000
), ),
LlmModel.AMAZON_NOVA_LITE_V1: ModelMetadata("open_router", 300000, 5120),
LlmModel.AMAZON_NOVA_MICRO_V1: ModelMetadata("open_router", 128000, 5120),
LlmModel.AMAZON_NOVA_PRO_V1: ModelMetadata("open_router", 300000, 5120),
LlmModel.MICROSOFT_WIZARDLM_2_8X22B: ModelMetadata("open_router", 65536, 4096),
LlmModel.GRYPHE_MYTHOMAX_L2_13B: ModelMetadata("open_router", 4096, 4096),
} }
for model in LlmModel: for model in LlmModel:
@@ -518,6 +608,11 @@ def llm_call(
raise ValueError(f"Unsupported LLM provider: {provider}") raise ValueError(f"Unsupported LLM provider: {provider}")
class MessageWithMedia(Message):
role: MessageRole
content: str | MediaFile
class AIBlockBase(Block, ABC): class AIBlockBase(Block, ABC):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
@@ -540,7 +635,7 @@ class AIStructuredResponseGeneratorBlock(AIBlockBase):
) )
model: LlmModel = SchemaField( model: LlmModel = SchemaField(
title="LLM Model", title="LLM Model",
default=LlmModel.GPT4O, default=LlmModel.CLAUDE_3_5_SONNET,
description="The language model to use for answering the prompt.", description="The language model to use for answering the prompt.",
advanced=False, advanced=False,
) )
@@ -1367,3 +1462,335 @@ class AIListGeneratorBlock(AIBlockBase):
logger.debug(f"Retry prompt: {prompt}") logger.debug(f"Retry prompt: {prompt}")
logger.debug("AIListGeneratorBlock.run completed") logger.debug("AIListGeneratorBlock.run completed")
class ClaudeWithImageBlock(Block):
"""Block for calling Claude API with support for images"""
class Input(BlockSchema):
prompt: str = SchemaField(
description="The prompt to send to the language model.",
placeholder="Enter your prompt here...",
)
expected_format: dict[str, str] = SchemaField(
description="Expected format of the response. If provided, the response will be validated against this format. "
"The keys should be the expected fields in the response, and the values should be the description of the field.",
)
model: LlmModel = SchemaField(
title="LLM Model",
default=LlmModel.CLAUDE_3_5_SONNET,
description="The language model to use for the conversation.",
)
credentials: AICredentials = AICredentialsField()
sys_prompt: str = SchemaField(
title="System Prompt",
default="",
description="The system prompt to provide additional context to the model.",
)
conversation_history: list[MessageWithMedia] = SchemaField(
default=[],
description="The conversation history to provide context for the prompt.",
)
retry: int = SchemaField(
title="Retry Count",
default=3,
description="Number of times to retry the LLM call if the response does not match the expected format.",
)
prompt_values: dict[str, str | MediaFile] = SchemaField(
advanced=False,
default={},
description="Values used to fill in the prompt. Images can be provided as base64 encoded data with MIME type.",
)
max_tokens: int | None = SchemaField(
advanced=True,
default=None,
description="The maximum number of tokens to generate in the chat completion.",
)
class Output(BlockSchema):
response: dict[str, Any] = SchemaField(
description="The response object generated by the language model."
)
error: str = SchemaField(description="Error message if the API call failed.")
def __init__(self):
super().__init__(
id="bc043b3e-2926-4ed7-b276-735535d1a945",
description="Call Claude with support for images to generate formatted object based on the given prompt.",
categories={BlockCategory.AI},
input_schema=ClaudeWithImageBlock.Input,
output_schema=ClaudeWithImageBlock.Output,
test_input={
"model": LlmModel.CLAUDE_3_5_SONNET,
"credentials": TEST_CREDENTIALS_INPUT,
"expected_format": {
"key1": "value1",
"key2": "value2",
},
"prompt": "Describe this image",
"prompt_values": {
"image": {
"data": "data:image/jpeg;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAB5JREFUOE9jZPjP8J+BAsA4agDDaBgwjIYBw7AIAwCV5B/xAsMbygAAAABJRU5ErkJggg==",
}
},
},
test_credentials=TEST_CREDENTIALS,
test_output=("response", {"key1": "key1Value", "key2": "key2Value"}),
test_mock={
"llm_call": lambda *args, **kwargs: (
json.dumps(
{
"key1": "key1Value",
"key2": "key2Value",
}
),
0,
0,
)
},
)
@staticmethod
def llm_call(
credentials: APIKeyCredentials,
llm_model: LlmModel,
prompt: list[dict],
max_tokens: int | None = None,
) -> tuple[str, int, int]:
"""
Call the Claude API with support for images in the messages.
Args:
credentials: API credentials for Claude
llm_model: The LLM model to use (must be Claude)
prompt: List of message dictionaries that can include image content
max_tokens: Maximum tokens to generate
Returns:
tuple containing:
- The text response
- Number of input tokens used
- Number of output tokens used
"""
if llm_model.metadata.provider != "anthropic":
raise ValueError("Only Claude models are supported for image processing")
# Extract system prompt if present
system_messages = [p["content"] for p in prompt if p["role"] == "system"]
sysprompt = " ".join(system_messages)
# Build messages array with content that can include images
messages = []
last_role = None
for p in prompt:
if p["role"] in ["user", "assistant"]:
message_content = []
# Handle text content
if isinstance(p["content"], str):
message_content.append({"type": "text", "text": p["content"]})
# Handle mixed content array with images
elif isinstance(p["content"], list):
message_content.extend(p["content"])
if p["role"] != last_role:
messages.append({"role": p["role"], "content": message_content})
last_role = p["role"]
else:
# Combine with previous message if same role
messages[-1]["content"].extend(message_content)
client = anthropic.Anthropic(api_key=credentials.api_key.get_secret_value())
try:
resp = client.messages.create(
model=llm_model.value,
system=sysprompt,
messages=messages,
max_tokens=max_tokens or 8192,
)
if not resp.content:
raise ValueError("No content returned from Anthropic.")
return (
(
resp.content[0].name
if isinstance(resp.content[0], anthropic.types.ToolUseBlock)
else resp.content[0].text
),
resp.usage.input_tokens,
resp.usage.output_tokens,
)
except anthropic.APIError as e:
error_message = f"Anthropic API error: {str(e)}"
logger.error(error_message)
raise ValueError(error_message)
def run(
self,
input_data: Input,
*,
graph_exec_id: str,
credentials: APIKeyCredentials,
**kwargs,
) -> BlockOutput:
logger.debug(f"Calling Claude with input data: {input_data}")
# Start with any existing conversation history
prompt = [p.model_dump() for p in input_data.conversation_history]
def trim_prompt(s: str) -> str:
lines = s.strip().split("\n")
return "\n".join([line.strip().lstrip("|") for line in lines])
# Handle prompt values including images
content = []
values: dict[str, str | MediaFile] = input_data.prompt_values
# Add any images from prompt_values
for key, value in values.items():
# This is an image
if isinstance(value, MediaFile):
# media file is a base64 encoded image
# read the media file
media_path = store_media_file(
graph_exec_id=graph_exec_id, file=value, return_content=True
)
content.append(
{
"type": "image",
"source": {
"type": "base64",
"media_type": media_path.split(";")[0].split(":")[1],
"data": media_path,
},
}
)
# Add the text prompt
if input_data.prompt:
content.append(
{
"type": "text",
"text": fmt.format_string(
input_data.prompt,
{k: v for k, v in values.items() if isinstance(v, str)},
),
}
)
# Add system prompt if provided
if input_data.sys_prompt:
prompt.append(
{
"role": "system",
"content": fmt.format_string(input_data.sys_prompt, values),
}
)
# Add expected format if provided
if input_data.expected_format:
expected_format = [
f'"{k}": "{v}"' for k, v in input_data.expected_format.items()
]
format_prompt = ",\n ".join(expected_format)
sys_prompt = trim_prompt(
f"""
|Reply strictly only in the following JSON format:
|{{
| {format_prompt}
|}}
"""
)
prompt.append({"role": "system", "content": sys_prompt})
# Add the main prompt with images and text
prompt.append({"role": "user", "content": content})
def parse_response(resp: str) -> tuple[dict[str, Any], str | None]:
try:
parsed = json.loads(resp)
if not isinstance(parsed, dict):
return {}, f"Expected a dictionary, but got {type(parsed)}"
if input_data.expected_format:
miss_keys = set(input_data.expected_format.keys()) - set(
parsed.keys()
)
if miss_keys:
return parsed, f"Missing keys: {miss_keys}"
return parsed, None
except JSONDecodeError as e:
return {}, f"JSON decode error: {e}"
logger.info(f"Claude request: {prompt}")
retry_prompt = ""
llm_model = input_data.model
for retry_count in range(input_data.retry):
try:
response_text, input_token, output_token = self.llm_call(
credentials=credentials,
llm_model=llm_model,
prompt=prompt,
max_tokens=input_data.max_tokens,
)
self.merge_stats(
{
"input_token_count": input_token,
"output_token_count": output_token,
}
)
logger.info(f"Claude attempt-{retry_count} response: {response_text}")
if input_data.expected_format:
parsed_dict, parsed_error = parse_response(response_text)
if not parsed_error:
yield "response", {
k: (
json.loads(v)
if isinstance(v, str)
and v.startswith("[")
and v.endswith("]")
else (", ".join(v) if isinstance(v, list) else v)
)
for k, v in parsed_dict.items()
}
return
else:
yield "response", {"response": response_text}
return
retry_prompt = trim_prompt(
f"""
|This is your previous error response:
|--
|{response_text}
|--
|
|And this is the error:
|--
|{parsed_error}
|--
"""
)
prompt.append({"role": "user", "content": retry_prompt})
except Exception as e:
logger.exception(f"Error calling Claude: {e}")
retry_prompt = f"Error calling Claude: {e}"
finally:
self.merge_stats(
{
"llm_call_count": retry_count + 1,
"llm_retry_count": retry_count,
}
)
raise RuntimeError(retry_prompt)