feat(blocks): Add pinecone and jina blocks (#8401)

* add pinecone and jina blocks

* udpate based on comments

* backend updates

* frontend updates

* type hint

* more type hints

* another type hint

* update run signature

* shared jina provider

* fix linting

* lockfile

* remove noqa

* remove noqa

* remove vector db folder

* line

* update pincone credentials provider

* fix imports

* formating

* update frontend

* Test (#8425)

* h

* Discard changes to autogpt_platform/backend/poetry.lock

* fix: broken dep

---------

Co-authored-by: Nicholas Tindle <nicholas.tindle@agpt.co>
This commit is contained in:
Aarushi
2024-10-24 21:01:53 +01:00
committed by GitHub
parent 6d812acb20
commit e2df6019fb
9 changed files with 355 additions and 10 deletions

View File

@@ -0,0 +1,39 @@
from typing import Literal
from autogpt_libs.supabase_integration_credentials_store.types import APIKeyCredentials
from pydantic import SecretStr
from backend.data.model import CredentialsField, CredentialsMetaInput
JinaCredentials = APIKeyCredentials
JinaCredentialsInput = CredentialsMetaInput[
Literal["jina"],
Literal["api_key"],
]
def JinaCredentialsField() -> JinaCredentialsInput:
"""
Creates a Jina credentials input on a block.
"""
return CredentialsField(
provider="jina",
supported_credential_types={"api_key"},
description="The Jina integration can be used with an API Key.",
)
TEST_CREDENTIALS = APIKeyCredentials(
id="01234567-89ab-cdef-0123-456789abcdef",
provider="jina",
api_key=SecretStr("mock-jina-api-key"),
title="Mock Jina API key",
expires_at=None,
)
TEST_CREDENTIALS_INPUT = {
"provider": TEST_CREDENTIALS.provider,
"id": TEST_CREDENTIALS.id,
"type": TEST_CREDENTIALS.type,
"title": TEST_CREDENTIALS.type,
}

View File

@@ -0,0 +1,69 @@
import requests
from backend.blocks.jina._auth import (
JinaCredentials,
JinaCredentialsField,
JinaCredentialsInput,
)
from backend.data.block import Block, BlockCategory, BlockOutput, BlockSchema
from backend.data.model import SchemaField
class JinaChunkingBlock(Block):
class Input(BlockSchema):
texts: list = SchemaField(description="List of texts to chunk")
credentials: JinaCredentialsInput = JinaCredentialsField()
max_chunk_length: int = SchemaField(
description="Maximum length of each chunk", default=1000
)
return_tokens: bool = SchemaField(
description="Whether to return token information", default=False
)
class Output(BlockSchema):
chunks: list = SchemaField(description="List of chunked texts")
tokens: list = SchemaField(
description="List of token information for each chunk", optional=True
)
def __init__(self):
super().__init__(
id="806fb15e-830f-4796-8692-557d300ff43c",
description="Chunks texts using Jina AI's segmentation service",
categories={BlockCategory.AI, BlockCategory.TEXT},
input_schema=JinaChunkingBlock.Input,
output_schema=JinaChunkingBlock.Output,
)
def run(
self, input_data: Input, *, credentials: JinaCredentials, **kwargs
) -> BlockOutput:
url = "https://segment.jina.ai/"
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {credentials.api_key.get_secret_value()}",
}
all_chunks = []
all_tokens = []
for text in input_data.texts:
data = {
"content": text,
"return_tokens": str(input_data.return_tokens).lower(),
"return_chunks": "true",
"max_chunk_length": str(input_data.max_chunk_length),
}
response = requests.post(url, headers=headers, json=data)
response.raise_for_status()
result = response.json()
all_chunks.extend(result.get("chunks", []))
if input_data.return_tokens:
all_tokens.extend(result.get("tokens", []))
yield "chunks", all_chunks
if input_data.return_tokens:
yield "tokens", all_tokens

View File

@@ -0,0 +1,44 @@
import requests
from backend.blocks.jina._auth import (
JinaCredentials,
JinaCredentialsField,
JinaCredentialsInput,
)
from backend.data.block import Block, BlockCategory, BlockOutput, BlockSchema
from backend.data.model import SchemaField
class JinaEmbeddingBlock(Block):
class Input(BlockSchema):
texts: list = SchemaField(description="List of texts to embed")
credentials: JinaCredentialsInput = JinaCredentialsField()
model: str = SchemaField(
description="Jina embedding model to use",
default="jina-embeddings-v2-base-en",
)
class Output(BlockSchema):
embeddings: list = SchemaField(description="List of embeddings")
def __init__(self):
super().__init__(
id="7c56b3ab-62e7-43a2-a2dc-4ec4245660b6",
description="Generates embeddings using Jina AI",
categories={BlockCategory.AI},
input_schema=JinaEmbeddingBlock.Input,
output_schema=JinaEmbeddingBlock.Output,
)
def run(
self, input_data: Input, *, credentials: JinaCredentials, **kwargs
) -> BlockOutput:
url = "https://api.jina.ai/v1/embeddings"
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {credentials.api_key.get_secret_value()}",
}
data = {"input": input_data.texts, "model": input_data.model}
response = requests.post(url, headers=headers, json=data)
embeddings = [e["embedding"] for e in response.json()["data"]]
yield "embeddings", embeddings

View File

@@ -0,0 +1,131 @@
from typing import Literal
from autogpt_libs.supabase_integration_credentials_store import APIKeyCredentials
from pinecone import Pinecone, ServerlessSpec
from backend.data.block import Block, BlockCategory, BlockOutput, BlockSchema
from backend.data.model import CredentialsField, CredentialsMetaInput, SchemaField
PineconeCredentials = APIKeyCredentials
PineconeCredentialsInput = CredentialsMetaInput[
Literal["pinecone"],
Literal["api_key"],
]
def PineconeCredentialsField() -> PineconeCredentialsInput:
"""
Creates a Pinecone credentials input on a block.
"""
return CredentialsField(
provider="pinecone",
supported_credential_types={"api_key"},
description="The Pinecone integration can be used with an API Key.",
)
class PineconeInitBlock(Block):
class Input(BlockSchema):
credentials: PineconeCredentialsInput = PineconeCredentialsField()
index_name: str = SchemaField(description="Name of the Pinecone index")
dimension: int = SchemaField(
description="Dimension of the vectors", default=768
)
metric: str = SchemaField(
description="Distance metric for the index", default="cosine"
)
cloud: str = SchemaField(
description="Cloud provider for serverless", default="aws"
)
region: str = SchemaField(
description="Region for serverless", default="us-east-1"
)
class Output(BlockSchema):
index: str = SchemaField(description="Name of the initialized Pinecone index")
message: str = SchemaField(description="Status message")
def __init__(self):
super().__init__(
id="48d8fdab-8f03-41f3-8407-8107ba11ec9b",
description="Initializes a Pinecone index",
categories={BlockCategory.LOGIC},
input_schema=PineconeInitBlock.Input,
output_schema=PineconeInitBlock.Output,
)
def run(
self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
) -> BlockOutput:
pc = Pinecone(api_key=credentials.api_key.get_secret_value())
try:
existing_indexes = pc.list_indexes()
if input_data.index_name not in [index.name for index in existing_indexes]:
pc.create_index(
name=input_data.index_name,
dimension=input_data.dimension,
metric=input_data.metric,
spec=ServerlessSpec(
cloud=input_data.cloud, region=input_data.region
),
)
message = f"Created new index: {input_data.index_name}"
else:
message = f"Using existing index: {input_data.index_name}"
yield "index", input_data.index_name
yield "message", message
except Exception as e:
yield "message", f"Error initializing Pinecone index: {str(e)}"
class PineconeQueryBlock(Block):
class Input(BlockSchema):
credentials: PineconeCredentialsInput = PineconeCredentialsField()
query_vector: list = SchemaField(description="Query vector")
namespace: str = SchemaField(
description="Namespace to query in Pinecone", default=""
)
top_k: int = SchemaField(
description="Number of top results to return", default=3
)
include_values: bool = SchemaField(
description="Whether to include vector values in the response",
default=False,
)
include_metadata: bool = SchemaField(
description="Whether to include metadata in the response", default=True
)
host: str = SchemaField(description="Host for pinecone")
class Output(BlockSchema):
results: dict = SchemaField(description="Query results from Pinecone")
def __init__(self):
super().__init__(
id="9ad93d0f-91b4-4c9c-8eb1-82e26b4a01c5",
description="Queries a Pinecone index",
categories={BlockCategory.LOGIC},
input_schema=PineconeQueryBlock.Input,
output_schema=PineconeQueryBlock.Output,
)
def run(
self,
input_data: Input,
*,
credentials: APIKeyCredentials,
**kwargs,
) -> BlockOutput:
pc = Pinecone(api_key=credentials.api_key.get_secret_value())
idx = pc.Index(host=input_data.host)
results = idx.query(
namespace=input_data.namespace,
vector=input_data.query_vector,
top_k=input_data.top_k,
include_values=input_data.include_values,
include_metadata=input_data.include_metadata,
)
yield "results", results

View File

@@ -17,13 +17,13 @@ yarl = "*"
[[package]]
name = "aiohappyeyeballs"
version = "2.4.2"
version = "2.4.3"
description = "Happy Eyeballs for asyncio"
optional = false
python-versions = ">=3.8"
files = [
{file = "aiohappyeyeballs-2.4.2-py3-none-any.whl", hash = "sha256:8522691d9a154ba1145b157d6d5c15e5c692527ce6a53c5e5f9876977f6dab2f"},
{file = "aiohappyeyeballs-2.4.2.tar.gz", hash = "sha256:4ca893e6c5c1f5bf3888b04cb5a3bee24995398efef6e0b9f747b5e89d84fd74"},
{file = "aiohappyeyeballs-2.4.3-py3-none-any.whl", hash = "sha256:8a7a83727b2756f394ab2895ea0765a0a8c475e3c71e98d43d76f22b4b435572"},
{file = "aiohappyeyeballs-2.4.3.tar.gz", hash = "sha256:75cf88a15106a5002a8eb1dab212525c00d1f4c0fa96e551c9fbe6f09a621586"},
]
[[package]]
@@ -1841,6 +1841,57 @@ files = [
{file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"},
]
[[package]]
name = "pinecone"
version = "5.3.1"
description = "Pinecone client and SDK"
optional = false
python-versions = "<4.0,>=3.8"
files = [
{file = "pinecone-5.3.1-py3-none-any.whl", hash = "sha256:dd180963d29cd648f2d58becf18b21f150362aef80446dd3a7ed15cbe85bb4c7"},
{file = "pinecone-5.3.1.tar.gz", hash = "sha256:a216630331753958f4ebcdc6e6d473402d17152f2194af3e19b3416c73b0dcc4"},
]
[package.dependencies]
certifi = ">=2019.11.17"
pinecone-plugin-inference = ">=1.1.0,<2.0.0"
pinecone-plugin-interface = ">=0.0.7,<0.0.8"
python-dateutil = ">=2.5.3"
tqdm = ">=4.64.1"
typing-extensions = ">=3.7.4"
urllib3 = [
{version = ">=1.26.0", markers = "python_version >= \"3.8\" and python_version < \"3.12\""},
{version = ">=1.26.5", markers = "python_version >= \"3.12\" and python_version < \"4.0\""},
]
[package.extras]
grpc = ["googleapis-common-protos (>=1.53.0)", "grpcio (>=1.44.0)", "grpcio (>=1.59.0)", "lz4 (>=3.1.3)", "protobuf (>=4.25,<5.0)", "protoc-gen-openapiv2 (>=0.0.1,<0.0.2)"]
[[package]]
name = "pinecone-plugin-inference"
version = "1.1.0"
description = "Embeddings plugin for Pinecone SDK"
optional = false
python-versions = "<4.0,>=3.8"
files = [
{file = "pinecone_plugin_inference-1.1.0-py3-none-any.whl", hash = "sha256:32c61aba21c9a28fdcd0e782204c1ca641aeb3fd6e42764fbf0de8186eb657ec"},
{file = "pinecone_plugin_inference-1.1.0.tar.gz", hash = "sha256:283e5ae4590b901bf2179beb56fc3d1b715e63582f37ec7abb0708cf70912d1f"},
]
[package.dependencies]
pinecone-plugin-interface = ">=0.0.7,<0.0.8"
[[package]]
name = "pinecone-plugin-interface"
version = "0.0.7"
description = "Plugin interface for the Pinecone python client"
optional = false
python-versions = "<4.0,>=3.8"
files = [
{file = "pinecone_plugin_interface-0.0.7-py3-none-any.whl", hash = "sha256:875857ad9c9fc8bbc074dbe780d187a2afd21f5bfe0f3b08601924a61ef1bba8"},
{file = "pinecone_plugin_interface-0.0.7.tar.gz", hash = "sha256:b8e6675e41847333aa13923cc44daa3f85676d7157324682dc1640588a982846"},
]
[[package]]
name = "platformdirs"
version = "4.3.6"
@@ -2092,8 +2143,8 @@ files = [
annotated-types = ">=0.6.0"
pydantic-core = "2.23.4"
typing-extensions = [
{version = ">=4.12.2", markers = "python_version >= \"3.13\""},
{version = ">=4.6.1", markers = "python_version < \"3.13\""},
{version = ">=4.12.2", markers = "python_version >= \"3.13\""},
]
[package.extras]
@@ -3668,4 +3719,4 @@ type = ["pytest-mypy"]
[metadata]
lock-version = "2.0"
python-versions = "^3.10"
content-hash = "0962d61ced1a8154c64c6bbdb3f72aca558831adfbfda68eb66f39b535466f77"
content-hash = "f9293b504ef813f98f43a8c3ab1b779ff9d7dc2e3bd9412fccc6da5102915e6b"

View File

@@ -27,7 +27,7 @@ jsonref = "^1.1.0"
jsonschema = "^4.22.0"
ollama = "^0.3.0"
openai = "^1.35.7"
praw = "^7.7.1"
praw = "~7.7.1"
prisma = "^0.13.1"
psutil = "^5.9.8"
pydantic = "^2.7.2"
@@ -45,7 +45,7 @@ websockets = "^12.0"
youtube-transcript-api = "^0.6.2"
googlemaps = "^4.10.0"
replicate = "^0.34.1"
pinecone = "^5.3.1"
[tool.poetry.group.dev.dependencies]
poethepoet = "^0.26.1"
httpx = "^0.27.0"
@@ -55,6 +55,7 @@ ruff = "^0.5.2"
pyright = "^1.1.371"
isort = "^5.13.2"
black = "^24.4.2"
aiohappyeyeballs = "^2.4.3"
[build-system]
requires = ["poetry-core"]

View File

@@ -7,7 +7,7 @@ import useCredentials from "@/hooks/useCredentials";
import { zodResolver } from "@hookform/resolvers/zod";
import AutoGPTServerAPI from "@/lib/autogpt-server-api";
import { NotionLogoIcon } from "@radix-ui/react-icons";
import { FaGithub, FaGoogle } from "react-icons/fa";
import { FaGithub, FaGoogle, FaKey } from "react-icons/fa";
import { FC, useMemo, useState } from "react";
import { CredentialsMetaInput } from "@/lib/autogpt-server-api/types";
import { IconKey, IconKeyPlus, IconUserPlus } from "@/components/ui/icons";
@@ -41,6 +41,8 @@ export const providerIcons: Record<string, React.FC<{ className?: string }>> = {
github: FaGithub,
google: FaGoogle,
notion: NotionLogoIcon,
jina: FaKey,
pinecone: FaKey,
};
// --8<-- [end:ProviderIconsEmbed]

View File

@@ -12,7 +12,13 @@ import {
} from "react";
// --8<-- [start:CredentialsProviderNames]
const CREDENTIALS_PROVIDER_NAMES = ["github", "google", "notion"] as const;
const CREDENTIALS_PROVIDER_NAMES = [
"github",
"google",
"notion",
"jina",
"pinecone",
] as const;
export type CredentialsProviderName =
(typeof CREDENTIALS_PROVIDER_NAMES)[number];
@@ -21,6 +27,8 @@ const providerDisplayNames: Record<CredentialsProviderName, string> = {
github: "GitHub",
google: "Google",
notion: "Notion",
jina: "Jina",
pinecone: "Pinecone",
};
// --8<-- [end:CredentialsProviderNames]

View File

@@ -96,7 +96,7 @@ export type CredentialsType = "api_key" | "oauth2";
// --8<-- [start:BlockIOCredentialsSubSchema]
export type BlockIOCredentialsSubSchema = BlockIOSubSchemaMeta & {
credentials_provider: "github" | "google" | "notion";
credentials_provider: "github" | "google" | "notion" | "jina" | "pinecone";
credentials_scopes?: string[];
credentials_types: Array<CredentialsType>;
};