mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-08 03:00:28 -04:00
feat(blocks): Add pinecone and jina blocks (#8401)
* add pinecone and jina blocks * udpate based on comments * backend updates * frontend updates * type hint * more type hints * another type hint * update run signature * shared jina provider * fix linting * lockfile * remove noqa * remove noqa * remove vector db folder * line * update pincone credentials provider * fix imports * formating * update frontend * Test (#8425) * h * Discard changes to autogpt_platform/backend/poetry.lock * fix: broken dep --------- Co-authored-by: Nicholas Tindle <nicholas.tindle@agpt.co>
This commit is contained in:
39
autogpt_platform/backend/backend/blocks/jina/_auth.py
Normal file
39
autogpt_platform/backend/backend/blocks/jina/_auth.py
Normal file
@@ -0,0 +1,39 @@
|
||||
from typing import Literal
|
||||
|
||||
from autogpt_libs.supabase_integration_credentials_store.types import APIKeyCredentials
|
||||
from pydantic import SecretStr
|
||||
|
||||
from backend.data.model import CredentialsField, CredentialsMetaInput
|
||||
|
||||
JinaCredentials = APIKeyCredentials
|
||||
JinaCredentialsInput = CredentialsMetaInput[
|
||||
Literal["jina"],
|
||||
Literal["api_key"],
|
||||
]
|
||||
|
||||
|
||||
def JinaCredentialsField() -> JinaCredentialsInput:
|
||||
"""
|
||||
Creates a Jina credentials input on a block.
|
||||
|
||||
"""
|
||||
return CredentialsField(
|
||||
provider="jina",
|
||||
supported_credential_types={"api_key"},
|
||||
description="The Jina integration can be used with an API Key.",
|
||||
)
|
||||
|
||||
|
||||
TEST_CREDENTIALS = APIKeyCredentials(
|
||||
id="01234567-89ab-cdef-0123-456789abcdef",
|
||||
provider="jina",
|
||||
api_key=SecretStr("mock-jina-api-key"),
|
||||
title="Mock Jina API key",
|
||||
expires_at=None,
|
||||
)
|
||||
TEST_CREDENTIALS_INPUT = {
|
||||
"provider": TEST_CREDENTIALS.provider,
|
||||
"id": TEST_CREDENTIALS.id,
|
||||
"type": TEST_CREDENTIALS.type,
|
||||
"title": TEST_CREDENTIALS.type,
|
||||
}
|
||||
69
autogpt_platform/backend/backend/blocks/jina/chunking.py
Normal file
69
autogpt_platform/backend/backend/blocks/jina/chunking.py
Normal file
@@ -0,0 +1,69 @@
|
||||
import requests
|
||||
|
||||
from backend.blocks.jina._auth import (
|
||||
JinaCredentials,
|
||||
JinaCredentialsField,
|
||||
JinaCredentialsInput,
|
||||
)
|
||||
from backend.data.block import Block, BlockCategory, BlockOutput, BlockSchema
|
||||
from backend.data.model import SchemaField
|
||||
|
||||
|
||||
class JinaChunkingBlock(Block):
|
||||
class Input(BlockSchema):
|
||||
texts: list = SchemaField(description="List of texts to chunk")
|
||||
|
||||
credentials: JinaCredentialsInput = JinaCredentialsField()
|
||||
max_chunk_length: int = SchemaField(
|
||||
description="Maximum length of each chunk", default=1000
|
||||
)
|
||||
return_tokens: bool = SchemaField(
|
||||
description="Whether to return token information", default=False
|
||||
)
|
||||
|
||||
class Output(BlockSchema):
|
||||
chunks: list = SchemaField(description="List of chunked texts")
|
||||
tokens: list = SchemaField(
|
||||
description="List of token information for each chunk", optional=True
|
||||
)
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
id="806fb15e-830f-4796-8692-557d300ff43c",
|
||||
description="Chunks texts using Jina AI's segmentation service",
|
||||
categories={BlockCategory.AI, BlockCategory.TEXT},
|
||||
input_schema=JinaChunkingBlock.Input,
|
||||
output_schema=JinaChunkingBlock.Output,
|
||||
)
|
||||
|
||||
def run(
|
||||
self, input_data: Input, *, credentials: JinaCredentials, **kwargs
|
||||
) -> BlockOutput:
|
||||
url = "https://segment.jina.ai/"
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {credentials.api_key.get_secret_value()}",
|
||||
}
|
||||
|
||||
all_chunks = []
|
||||
all_tokens = []
|
||||
|
||||
for text in input_data.texts:
|
||||
data = {
|
||||
"content": text,
|
||||
"return_tokens": str(input_data.return_tokens).lower(),
|
||||
"return_chunks": "true",
|
||||
"max_chunk_length": str(input_data.max_chunk_length),
|
||||
}
|
||||
|
||||
response = requests.post(url, headers=headers, json=data)
|
||||
response.raise_for_status()
|
||||
result = response.json()
|
||||
|
||||
all_chunks.extend(result.get("chunks", []))
|
||||
if input_data.return_tokens:
|
||||
all_tokens.extend(result.get("tokens", []))
|
||||
|
||||
yield "chunks", all_chunks
|
||||
if input_data.return_tokens:
|
||||
yield "tokens", all_tokens
|
||||
44
autogpt_platform/backend/backend/blocks/jina/embeddings.py
Normal file
44
autogpt_platform/backend/backend/blocks/jina/embeddings.py
Normal file
@@ -0,0 +1,44 @@
|
||||
import requests
|
||||
|
||||
from backend.blocks.jina._auth import (
|
||||
JinaCredentials,
|
||||
JinaCredentialsField,
|
||||
JinaCredentialsInput,
|
||||
)
|
||||
from backend.data.block import Block, BlockCategory, BlockOutput, BlockSchema
|
||||
from backend.data.model import SchemaField
|
||||
|
||||
|
||||
class JinaEmbeddingBlock(Block):
|
||||
class Input(BlockSchema):
|
||||
texts: list = SchemaField(description="List of texts to embed")
|
||||
credentials: JinaCredentialsInput = JinaCredentialsField()
|
||||
model: str = SchemaField(
|
||||
description="Jina embedding model to use",
|
||||
default="jina-embeddings-v2-base-en",
|
||||
)
|
||||
|
||||
class Output(BlockSchema):
|
||||
embeddings: list = SchemaField(description="List of embeddings")
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
id="7c56b3ab-62e7-43a2-a2dc-4ec4245660b6",
|
||||
description="Generates embeddings using Jina AI",
|
||||
categories={BlockCategory.AI},
|
||||
input_schema=JinaEmbeddingBlock.Input,
|
||||
output_schema=JinaEmbeddingBlock.Output,
|
||||
)
|
||||
|
||||
def run(
|
||||
self, input_data: Input, *, credentials: JinaCredentials, **kwargs
|
||||
) -> BlockOutput:
|
||||
url = "https://api.jina.ai/v1/embeddings"
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {credentials.api_key.get_secret_value()}",
|
||||
}
|
||||
data = {"input": input_data.texts, "model": input_data.model}
|
||||
response = requests.post(url, headers=headers, json=data)
|
||||
embeddings = [e["embedding"] for e in response.json()["data"]]
|
||||
yield "embeddings", embeddings
|
||||
131
autogpt_platform/backend/backend/blocks/pinecone.py
Normal file
131
autogpt_platform/backend/backend/blocks/pinecone.py
Normal file
@@ -0,0 +1,131 @@
|
||||
from typing import Literal
|
||||
|
||||
from autogpt_libs.supabase_integration_credentials_store import APIKeyCredentials
|
||||
from pinecone import Pinecone, ServerlessSpec
|
||||
|
||||
from backend.data.block import Block, BlockCategory, BlockOutput, BlockSchema
|
||||
from backend.data.model import CredentialsField, CredentialsMetaInput, SchemaField
|
||||
|
||||
PineconeCredentials = APIKeyCredentials
|
||||
PineconeCredentialsInput = CredentialsMetaInput[
|
||||
Literal["pinecone"],
|
||||
Literal["api_key"],
|
||||
]
|
||||
|
||||
|
||||
def PineconeCredentialsField() -> PineconeCredentialsInput:
|
||||
"""
|
||||
Creates a Pinecone credentials input on a block.
|
||||
|
||||
"""
|
||||
return CredentialsField(
|
||||
provider="pinecone",
|
||||
supported_credential_types={"api_key"},
|
||||
description="The Pinecone integration can be used with an API Key.",
|
||||
)
|
||||
|
||||
|
||||
class PineconeInitBlock(Block):
|
||||
class Input(BlockSchema):
|
||||
credentials: PineconeCredentialsInput = PineconeCredentialsField()
|
||||
index_name: str = SchemaField(description="Name of the Pinecone index")
|
||||
dimension: int = SchemaField(
|
||||
description="Dimension of the vectors", default=768
|
||||
)
|
||||
metric: str = SchemaField(
|
||||
description="Distance metric for the index", default="cosine"
|
||||
)
|
||||
cloud: str = SchemaField(
|
||||
description="Cloud provider for serverless", default="aws"
|
||||
)
|
||||
region: str = SchemaField(
|
||||
description="Region for serverless", default="us-east-1"
|
||||
)
|
||||
|
||||
class Output(BlockSchema):
|
||||
index: str = SchemaField(description="Name of the initialized Pinecone index")
|
||||
message: str = SchemaField(description="Status message")
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
id="48d8fdab-8f03-41f3-8407-8107ba11ec9b",
|
||||
description="Initializes a Pinecone index",
|
||||
categories={BlockCategory.LOGIC},
|
||||
input_schema=PineconeInitBlock.Input,
|
||||
output_schema=PineconeInitBlock.Output,
|
||||
)
|
||||
|
||||
def run(
|
||||
self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
|
||||
) -> BlockOutput:
|
||||
pc = Pinecone(api_key=credentials.api_key.get_secret_value())
|
||||
|
||||
try:
|
||||
existing_indexes = pc.list_indexes()
|
||||
if input_data.index_name not in [index.name for index in existing_indexes]:
|
||||
pc.create_index(
|
||||
name=input_data.index_name,
|
||||
dimension=input_data.dimension,
|
||||
metric=input_data.metric,
|
||||
spec=ServerlessSpec(
|
||||
cloud=input_data.cloud, region=input_data.region
|
||||
),
|
||||
)
|
||||
message = f"Created new index: {input_data.index_name}"
|
||||
else:
|
||||
message = f"Using existing index: {input_data.index_name}"
|
||||
|
||||
yield "index", input_data.index_name
|
||||
yield "message", message
|
||||
except Exception as e:
|
||||
yield "message", f"Error initializing Pinecone index: {str(e)}"
|
||||
|
||||
|
||||
class PineconeQueryBlock(Block):
|
||||
class Input(BlockSchema):
|
||||
credentials: PineconeCredentialsInput = PineconeCredentialsField()
|
||||
query_vector: list = SchemaField(description="Query vector")
|
||||
namespace: str = SchemaField(
|
||||
description="Namespace to query in Pinecone", default=""
|
||||
)
|
||||
top_k: int = SchemaField(
|
||||
description="Number of top results to return", default=3
|
||||
)
|
||||
include_values: bool = SchemaField(
|
||||
description="Whether to include vector values in the response",
|
||||
default=False,
|
||||
)
|
||||
include_metadata: bool = SchemaField(
|
||||
description="Whether to include metadata in the response", default=True
|
||||
)
|
||||
host: str = SchemaField(description="Host for pinecone")
|
||||
|
||||
class Output(BlockSchema):
|
||||
results: dict = SchemaField(description="Query results from Pinecone")
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
id="9ad93d0f-91b4-4c9c-8eb1-82e26b4a01c5",
|
||||
description="Queries a Pinecone index",
|
||||
categories={BlockCategory.LOGIC},
|
||||
input_schema=PineconeQueryBlock.Input,
|
||||
output_schema=PineconeQueryBlock.Output,
|
||||
)
|
||||
|
||||
def run(
|
||||
self,
|
||||
input_data: Input,
|
||||
*,
|
||||
credentials: APIKeyCredentials,
|
||||
**kwargs,
|
||||
) -> BlockOutput:
|
||||
pc = Pinecone(api_key=credentials.api_key.get_secret_value())
|
||||
idx = pc.Index(host=input_data.host)
|
||||
results = idx.query(
|
||||
namespace=input_data.namespace,
|
||||
vector=input_data.query_vector,
|
||||
top_k=input_data.top_k,
|
||||
include_values=input_data.include_values,
|
||||
include_metadata=input_data.include_metadata,
|
||||
)
|
||||
yield "results", results
|
||||
61
autogpt_platform/backend/poetry.lock
generated
61
autogpt_platform/backend/poetry.lock
generated
@@ -17,13 +17,13 @@ yarl = "*"
|
||||
|
||||
[[package]]
|
||||
name = "aiohappyeyeballs"
|
||||
version = "2.4.2"
|
||||
version = "2.4.3"
|
||||
description = "Happy Eyeballs for asyncio"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "aiohappyeyeballs-2.4.2-py3-none-any.whl", hash = "sha256:8522691d9a154ba1145b157d6d5c15e5c692527ce6a53c5e5f9876977f6dab2f"},
|
||||
{file = "aiohappyeyeballs-2.4.2.tar.gz", hash = "sha256:4ca893e6c5c1f5bf3888b04cb5a3bee24995398efef6e0b9f747b5e89d84fd74"},
|
||||
{file = "aiohappyeyeballs-2.4.3-py3-none-any.whl", hash = "sha256:8a7a83727b2756f394ab2895ea0765a0a8c475e3c71e98d43d76f22b4b435572"},
|
||||
{file = "aiohappyeyeballs-2.4.3.tar.gz", hash = "sha256:75cf88a15106a5002a8eb1dab212525c00d1f4c0fa96e551c9fbe6f09a621586"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1841,6 +1841,57 @@ files = [
|
||||
{file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pinecone"
|
||||
version = "5.3.1"
|
||||
description = "Pinecone client and SDK"
|
||||
optional = false
|
||||
python-versions = "<4.0,>=3.8"
|
||||
files = [
|
||||
{file = "pinecone-5.3.1-py3-none-any.whl", hash = "sha256:dd180963d29cd648f2d58becf18b21f150362aef80446dd3a7ed15cbe85bb4c7"},
|
||||
{file = "pinecone-5.3.1.tar.gz", hash = "sha256:a216630331753958f4ebcdc6e6d473402d17152f2194af3e19b3416c73b0dcc4"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
certifi = ">=2019.11.17"
|
||||
pinecone-plugin-inference = ">=1.1.0,<2.0.0"
|
||||
pinecone-plugin-interface = ">=0.0.7,<0.0.8"
|
||||
python-dateutil = ">=2.5.3"
|
||||
tqdm = ">=4.64.1"
|
||||
typing-extensions = ">=3.7.4"
|
||||
urllib3 = [
|
||||
{version = ">=1.26.0", markers = "python_version >= \"3.8\" and python_version < \"3.12\""},
|
||||
{version = ">=1.26.5", markers = "python_version >= \"3.12\" and python_version < \"4.0\""},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
grpc = ["googleapis-common-protos (>=1.53.0)", "grpcio (>=1.44.0)", "grpcio (>=1.59.0)", "lz4 (>=3.1.3)", "protobuf (>=4.25,<5.0)", "protoc-gen-openapiv2 (>=0.0.1,<0.0.2)"]
|
||||
|
||||
[[package]]
|
||||
name = "pinecone-plugin-inference"
|
||||
version = "1.1.0"
|
||||
description = "Embeddings plugin for Pinecone SDK"
|
||||
optional = false
|
||||
python-versions = "<4.0,>=3.8"
|
||||
files = [
|
||||
{file = "pinecone_plugin_inference-1.1.0-py3-none-any.whl", hash = "sha256:32c61aba21c9a28fdcd0e782204c1ca641aeb3fd6e42764fbf0de8186eb657ec"},
|
||||
{file = "pinecone_plugin_inference-1.1.0.tar.gz", hash = "sha256:283e5ae4590b901bf2179beb56fc3d1b715e63582f37ec7abb0708cf70912d1f"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
pinecone-plugin-interface = ">=0.0.7,<0.0.8"
|
||||
|
||||
[[package]]
|
||||
name = "pinecone-plugin-interface"
|
||||
version = "0.0.7"
|
||||
description = "Plugin interface for the Pinecone python client"
|
||||
optional = false
|
||||
python-versions = "<4.0,>=3.8"
|
||||
files = [
|
||||
{file = "pinecone_plugin_interface-0.0.7-py3-none-any.whl", hash = "sha256:875857ad9c9fc8bbc074dbe780d187a2afd21f5bfe0f3b08601924a61ef1bba8"},
|
||||
{file = "pinecone_plugin_interface-0.0.7.tar.gz", hash = "sha256:b8e6675e41847333aa13923cc44daa3f85676d7157324682dc1640588a982846"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "platformdirs"
|
||||
version = "4.3.6"
|
||||
@@ -2092,8 +2143,8 @@ files = [
|
||||
annotated-types = ">=0.6.0"
|
||||
pydantic-core = "2.23.4"
|
||||
typing-extensions = [
|
||||
{version = ">=4.12.2", markers = "python_version >= \"3.13\""},
|
||||
{version = ">=4.6.1", markers = "python_version < \"3.13\""},
|
||||
{version = ">=4.12.2", markers = "python_version >= \"3.13\""},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
@@ -3668,4 +3719,4 @@ type = ["pytest-mypy"]
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.10"
|
||||
content-hash = "0962d61ced1a8154c64c6bbdb3f72aca558831adfbfda68eb66f39b535466f77"
|
||||
content-hash = "f9293b504ef813f98f43a8c3ab1b779ff9d7dc2e3bd9412fccc6da5102915e6b"
|
||||
|
||||
@@ -27,7 +27,7 @@ jsonref = "^1.1.0"
|
||||
jsonschema = "^4.22.0"
|
||||
ollama = "^0.3.0"
|
||||
openai = "^1.35.7"
|
||||
praw = "^7.7.1"
|
||||
praw = "~7.7.1"
|
||||
prisma = "^0.13.1"
|
||||
psutil = "^5.9.8"
|
||||
pydantic = "^2.7.2"
|
||||
@@ -45,7 +45,7 @@ websockets = "^12.0"
|
||||
youtube-transcript-api = "^0.6.2"
|
||||
googlemaps = "^4.10.0"
|
||||
replicate = "^0.34.1"
|
||||
|
||||
pinecone = "^5.3.1"
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
poethepoet = "^0.26.1"
|
||||
httpx = "^0.27.0"
|
||||
@@ -55,6 +55,7 @@ ruff = "^0.5.2"
|
||||
pyright = "^1.1.371"
|
||||
isort = "^5.13.2"
|
||||
black = "^24.4.2"
|
||||
aiohappyeyeballs = "^2.4.3"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
|
||||
@@ -7,7 +7,7 @@ import useCredentials from "@/hooks/useCredentials";
|
||||
import { zodResolver } from "@hookform/resolvers/zod";
|
||||
import AutoGPTServerAPI from "@/lib/autogpt-server-api";
|
||||
import { NotionLogoIcon } from "@radix-ui/react-icons";
|
||||
import { FaGithub, FaGoogle } from "react-icons/fa";
|
||||
import { FaGithub, FaGoogle, FaKey } from "react-icons/fa";
|
||||
import { FC, useMemo, useState } from "react";
|
||||
import { CredentialsMetaInput } from "@/lib/autogpt-server-api/types";
|
||||
import { IconKey, IconKeyPlus, IconUserPlus } from "@/components/ui/icons";
|
||||
@@ -41,6 +41,8 @@ export const providerIcons: Record<string, React.FC<{ className?: string }>> = {
|
||||
github: FaGithub,
|
||||
google: FaGoogle,
|
||||
notion: NotionLogoIcon,
|
||||
jina: FaKey,
|
||||
pinecone: FaKey,
|
||||
};
|
||||
// --8<-- [end:ProviderIconsEmbed]
|
||||
|
||||
|
||||
@@ -12,7 +12,13 @@ import {
|
||||
} from "react";
|
||||
|
||||
// --8<-- [start:CredentialsProviderNames]
|
||||
const CREDENTIALS_PROVIDER_NAMES = ["github", "google", "notion"] as const;
|
||||
const CREDENTIALS_PROVIDER_NAMES = [
|
||||
"github",
|
||||
"google",
|
||||
"notion",
|
||||
"jina",
|
||||
"pinecone",
|
||||
] as const;
|
||||
|
||||
export type CredentialsProviderName =
|
||||
(typeof CREDENTIALS_PROVIDER_NAMES)[number];
|
||||
@@ -21,6 +27,8 @@ const providerDisplayNames: Record<CredentialsProviderName, string> = {
|
||||
github: "GitHub",
|
||||
google: "Google",
|
||||
notion: "Notion",
|
||||
jina: "Jina",
|
||||
pinecone: "Pinecone",
|
||||
};
|
||||
// --8<-- [end:CredentialsProviderNames]
|
||||
|
||||
|
||||
@@ -96,7 +96,7 @@ export type CredentialsType = "api_key" | "oauth2";
|
||||
|
||||
// --8<-- [start:BlockIOCredentialsSubSchema]
|
||||
export type BlockIOCredentialsSubSchema = BlockIOSubSchemaMeta & {
|
||||
credentials_provider: "github" | "google" | "notion";
|
||||
credentials_provider: "github" | "google" | "notion" | "jina" | "pinecone";
|
||||
credentials_scopes?: string[];
|
||||
credentials_types: Array<CredentialsType>;
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user