feat(blocks): Add pinecone and jina blocks (#8401)

* add pinecone and jina blocks * udpate based on comments * backend updates * frontend updates * type hint * more type hints * another type hint * update run signature * shared jina provider * fix linting * lockfile * remove noqa * remove noqa * remove vector db folder * line * update pincone credentials provider * fix imports * formating * update frontend * Test (#8425) * h * Discard changes to autogpt_platform/backend/poetry.lock * fix: broken dep --------- Co-authored-by: Nicholas Tindle <nicholas.tindle@agpt.co>
2026-04-08 03:00:28 -04:00 · 2024-10-24 21:01:53 +01:00
parent 6d812acb20
commit e2df6019fb
9 changed files with 355 additions and 10 deletions
--- a/autogpt_platform/backend/backend/blocks/jina/_auth.py
+++ b/autogpt_platform/backend/backend/blocks/jina/_auth.py
@@ -0,0 +1,39 @@
+from typing import Literal
+
+from autogpt_libs.supabase_integration_credentials_store.types import APIKeyCredentials
+from pydantic import SecretStr
+
+from backend.data.model import CredentialsField, CredentialsMetaInput
+
+JinaCredentials = APIKeyCredentials
+JinaCredentialsInput = CredentialsMetaInput[
+    Literal["jina"],
+    Literal["api_key"],
+]
+
+
+def JinaCredentialsField() -> JinaCredentialsInput:
+    """
+    Creates a Jina credentials input on a block.
+
+    """
+    return CredentialsField(
+        provider="jina",
+        supported_credential_types={"api_key"},
+        description="The Jina integration can be used with an API Key.",
+    )
+
+
+TEST_CREDENTIALS = APIKeyCredentials(
+    id="01234567-89ab-cdef-0123-456789abcdef",
+    provider="jina",
+    api_key=SecretStr("mock-jina-api-key"),
+    title="Mock Jina API key",
+    expires_at=None,
+)
+TEST_CREDENTIALS_INPUT = {
+    "provider": TEST_CREDENTIALS.provider,
+    "id": TEST_CREDENTIALS.id,
+    "type": TEST_CREDENTIALS.type,
+    "title": TEST_CREDENTIALS.type,
+}
--- a/autogpt_platform/backend/backend/blocks/jina/chunking.py
+++ b/autogpt_platform/backend/backend/blocks/jina/chunking.py
@@ -0,0 +1,69 @@
+import requests
+
+from backend.blocks.jina._auth import (
+    JinaCredentials,
+    JinaCredentialsField,
+    JinaCredentialsInput,
+)
+from backend.data.block import Block, BlockCategory, BlockOutput, BlockSchema
+from backend.data.model import SchemaField
+
+
+class JinaChunkingBlock(Block):
+    class Input(BlockSchema):
+        texts: list = SchemaField(description="List of texts to chunk")
+
+        credentials: JinaCredentialsInput = JinaCredentialsField()
+        max_chunk_length: int = SchemaField(
+            description="Maximum length of each chunk", default=1000
+        )
+        return_tokens: bool = SchemaField(
+            description="Whether to return token information", default=False
+        )
+
+    class Output(BlockSchema):
+        chunks: list = SchemaField(description="List of chunked texts")
+        tokens: list = SchemaField(
+            description="List of token information for each chunk", optional=True
+        )
+
+    def __init__(self):
+        super().__init__(
+            id="806fb15e-830f-4796-8692-557d300ff43c",
+            description="Chunks texts using Jina AI's segmentation service",
+            categories={BlockCategory.AI, BlockCategory.TEXT},
+            input_schema=JinaChunkingBlock.Input,
+            output_schema=JinaChunkingBlock.Output,
+        )
+
+    def run(
+        self, input_data: Input, *, credentials: JinaCredentials, **kwargs
+    ) -> BlockOutput:
+        url = "https://segment.jina.ai/"
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {credentials.api_key.get_secret_value()}",
+        }
+
+        all_chunks = []
+        all_tokens = []
+
+        for text in input_data.texts:
+            data = {
+                "content": text,
+                "return_tokens": str(input_data.return_tokens).lower(),
+                "return_chunks": "true",
+                "max_chunk_length": str(input_data.max_chunk_length),
+            }
+
+            response = requests.post(url, headers=headers, json=data)
+            response.raise_for_status()
+            result = response.json()
+
+            all_chunks.extend(result.get("chunks", []))
+            if input_data.return_tokens:
+                all_tokens.extend(result.get("tokens", []))
+
+        yield "chunks", all_chunks
+        if input_data.return_tokens:
+            yield "tokens", all_tokens
--- a/autogpt_platform/backend/backend/blocks/jina/embeddings.py
+++ b/autogpt_platform/backend/backend/blocks/jina/embeddings.py
@@ -0,0 +1,44 @@
+import requests
+
+from backend.blocks.jina._auth import (
+    JinaCredentials,
+    JinaCredentialsField,
+    JinaCredentialsInput,
+)
+from backend.data.block import Block, BlockCategory, BlockOutput, BlockSchema
+from backend.data.model import SchemaField
+
+
+class JinaEmbeddingBlock(Block):
+    class Input(BlockSchema):
+        texts: list = SchemaField(description="List of texts to embed")
+        credentials: JinaCredentialsInput = JinaCredentialsField()
+        model: str = SchemaField(
+            description="Jina embedding model to use",
+            default="jina-embeddings-v2-base-en",
+        )
+
+    class Output(BlockSchema):
+        embeddings: list = SchemaField(description="List of embeddings")
+
+    def __init__(self):
+        super().__init__(
+            id="7c56b3ab-62e7-43a2-a2dc-4ec4245660b6",
+            description="Generates embeddings using Jina AI",
+            categories={BlockCategory.AI},
+            input_schema=JinaEmbeddingBlock.Input,
+            output_schema=JinaEmbeddingBlock.Output,
+        )
+
+    def run(
+        self, input_data: Input, *, credentials: JinaCredentials, **kwargs
+    ) -> BlockOutput:
+        url = "https://api.jina.ai/v1/embeddings"
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {credentials.api_key.get_secret_value()}",
+        }
+        data = {"input": input_data.texts, "model": input_data.model}
+        response = requests.post(url, headers=headers, json=data)
+        embeddings = [e["embedding"] for e in response.json()["data"]]
+        yield "embeddings", embeddings
--- a/autogpt_platform/backend/backend/blocks/pinecone.py
+++ b/autogpt_platform/backend/backend/blocks/pinecone.py
@@ -0,0 +1,131 @@
+from typing import Literal
+
+from autogpt_libs.supabase_integration_credentials_store import APIKeyCredentials
+from pinecone import Pinecone, ServerlessSpec
+
+from backend.data.block import Block, BlockCategory, BlockOutput, BlockSchema
+from backend.data.model import CredentialsField, CredentialsMetaInput, SchemaField
+
+PineconeCredentials = APIKeyCredentials
+PineconeCredentialsInput = CredentialsMetaInput[
+    Literal["pinecone"],
+    Literal["api_key"],
+]
+
+
+def PineconeCredentialsField() -> PineconeCredentialsInput:
+    """
+    Creates a Pinecone credentials input on a block.
+
+    """
+    return CredentialsField(
+        provider="pinecone",
+        supported_credential_types={"api_key"},
+        description="The Pinecone integration can be used with an API Key.",
+    )
+
+
+class PineconeInitBlock(Block):
+    class Input(BlockSchema):
+        credentials: PineconeCredentialsInput = PineconeCredentialsField()
+        index_name: str = SchemaField(description="Name of the Pinecone index")
+        dimension: int = SchemaField(
+            description="Dimension of the vectors", default=768
+        )
+        metric: str = SchemaField(
+            description="Distance metric for the index", default="cosine"
+        )
+        cloud: str = SchemaField(
+            description="Cloud provider for serverless", default="aws"
+        )
+        region: str = SchemaField(
+            description="Region for serverless", default="us-east-1"
+        )
+
+    class Output(BlockSchema):
+        index: str = SchemaField(description="Name of the initialized Pinecone index")
+        message: str = SchemaField(description="Status message")
+
+    def __init__(self):
+        super().__init__(
+            id="48d8fdab-8f03-41f3-8407-8107ba11ec9b",
+            description="Initializes a Pinecone index",
+            categories={BlockCategory.LOGIC},
+            input_schema=PineconeInitBlock.Input,
+            output_schema=PineconeInitBlock.Output,
+        )
+
+    def run(
+        self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
+    ) -> BlockOutput:
+        pc = Pinecone(api_key=credentials.api_key.get_secret_value())
+
+        try:
+            existing_indexes = pc.list_indexes()
+            if input_data.index_name not in [index.name for index in existing_indexes]:
+                pc.create_index(
+                    name=input_data.index_name,
+                    dimension=input_data.dimension,
+                    metric=input_data.metric,
+                    spec=ServerlessSpec(
+                        cloud=input_data.cloud, region=input_data.region
+                    ),
+                )
+                message = f"Created new index: {input_data.index_name}"
+            else:
+                message = f"Using existing index: {input_data.index_name}"
+
+            yield "index", input_data.index_name
+            yield "message", message
+        except Exception as e:
+            yield "message", f"Error initializing Pinecone index: {str(e)}"
+
+
+class PineconeQueryBlock(Block):
+    class Input(BlockSchema):
+        credentials: PineconeCredentialsInput = PineconeCredentialsField()
+        query_vector: list = SchemaField(description="Query vector")
+        namespace: str = SchemaField(
+            description="Namespace to query in Pinecone", default=""
+        )
+        top_k: int = SchemaField(
+            description="Number of top results to return", default=3
+        )
+        include_values: bool = SchemaField(
+            description="Whether to include vector values in the response",
+            default=False,
+        )
+        include_metadata: bool = SchemaField(
+            description="Whether to include metadata in the response", default=True
+        )
+        host: str = SchemaField(description="Host for pinecone")
+
+    class Output(BlockSchema):
+        results: dict = SchemaField(description="Query results from Pinecone")
+
+    def __init__(self):
+        super().__init__(
+            id="9ad93d0f-91b4-4c9c-8eb1-82e26b4a01c5",
+            description="Queries a Pinecone index",
+            categories={BlockCategory.LOGIC},
+            input_schema=PineconeQueryBlock.Input,
+            output_schema=PineconeQueryBlock.Output,
+        )
+
+    def run(
+        self,
+        input_data: Input,
+        *,
+        credentials: APIKeyCredentials,
+        **kwargs,
+    ) -> BlockOutput:
+        pc = Pinecone(api_key=credentials.api_key.get_secret_value())
+        idx = pc.Index(host=input_data.host)
+        results = idx.query(
+            namespace=input_data.namespace,
+            vector=input_data.query_vector,
+            top_k=input_data.top_k,
+            include_values=input_data.include_values,
+            include_metadata=input_data.include_metadata,
+        )
+        yield "results", results
--- a/autogpt_platform/backend/poetry.lock
+++ b/autogpt_platform/backend/poetry.lock
@@ -17,13 +17,13 @@ yarl = "*"

 [[package]]
 name = "aiohappyeyeballs"
-version = "2.4.2"
+version = "2.4.3"
 description = "Happy Eyeballs for asyncio"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "aiohappyeyeballs-2.4.2-py3-none-any.whl", hash = "sha256:8522691d9a154ba1145b157d6d5c15e5c692527ce6a53c5e5f9876977f6dab2f"},
-    {file = "aiohappyeyeballs-2.4.2.tar.gz", hash = "sha256:4ca893e6c5c1f5bf3888b04cb5a3bee24995398efef6e0b9f747b5e89d84fd74"},
+    {file = "aiohappyeyeballs-2.4.3-py3-none-any.whl", hash = "sha256:8a7a83727b2756f394ab2895ea0765a0a8c475e3c71e98d43d76f22b4b435572"},
+    {file = "aiohappyeyeballs-2.4.3.tar.gz", hash = "sha256:75cf88a15106a5002a8eb1dab212525c00d1f4c0fa96e551c9fbe6f09a621586"},
 ]

 [[package]]
@@ -1841,6 +1841,57 @@ files = [
    {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"},
 ]

+[[package]]
+name = "pinecone"
+version = "5.3.1"
+description = "Pinecone client and SDK"
+optional = false
+python-versions = "<4.0,>=3.8"
+files = [
+    {file = "pinecone-5.3.1-py3-none-any.whl", hash = "sha256:dd180963d29cd648f2d58becf18b21f150362aef80446dd3a7ed15cbe85bb4c7"},
+    {file = "pinecone-5.3.1.tar.gz", hash = "sha256:a216630331753958f4ebcdc6e6d473402d17152f2194af3e19b3416c73b0dcc4"},
+]
+
+[package.dependencies]
+certifi = ">=2019.11.17"
+pinecone-plugin-inference = ">=1.1.0,<2.0.0"
+pinecone-plugin-interface = ">=0.0.7,<0.0.8"
+python-dateutil = ">=2.5.3"
+tqdm = ">=4.64.1"
+typing-extensions = ">=3.7.4"
+urllib3 = [
+    {version = ">=1.26.0", markers = "python_version >= \"3.8\" and python_version < \"3.12\""},
+    {version = ">=1.26.5", markers = "python_version >= \"3.12\" and python_version < \"4.0\""},
+]
+
+[package.extras]
+grpc = ["googleapis-common-protos (>=1.53.0)", "grpcio (>=1.44.0)", "grpcio (>=1.59.0)", "lz4 (>=3.1.3)", "protobuf (>=4.25,<5.0)", "protoc-gen-openapiv2 (>=0.0.1,<0.0.2)"]
+
+[[package]]
+name = "pinecone-plugin-inference"
+version = "1.1.0"
+description = "Embeddings plugin for Pinecone SDK"
+optional = false
+python-versions = "<4.0,>=3.8"
+files = [
+    {file = "pinecone_plugin_inference-1.1.0-py3-none-any.whl", hash = "sha256:32c61aba21c9a28fdcd0e782204c1ca641aeb3fd6e42764fbf0de8186eb657ec"},
+    {file = "pinecone_plugin_inference-1.1.0.tar.gz", hash = "sha256:283e5ae4590b901bf2179beb56fc3d1b715e63582f37ec7abb0708cf70912d1f"},
+]
+
+[package.dependencies]
+pinecone-plugin-interface = ">=0.0.7,<0.0.8"
+
+[[package]]
+name = "pinecone-plugin-interface"
+version = "0.0.7"
+description = "Plugin interface for the Pinecone python client"
+optional = false
+python-versions = "<4.0,>=3.8"
+files = [
+    {file = "pinecone_plugin_interface-0.0.7-py3-none-any.whl", hash = "sha256:875857ad9c9fc8bbc074dbe780d187a2afd21f5bfe0f3b08601924a61ef1bba8"},
+    {file = "pinecone_plugin_interface-0.0.7.tar.gz", hash = "sha256:b8e6675e41847333aa13923cc44daa3f85676d7157324682dc1640588a982846"},
+]
+
 [[package]]
 name = "platformdirs"
 version = "4.3.6"
@@ -2092,8 +2143,8 @@ files = [
 annotated-types = ">=0.6.0"
 pydantic-core = "2.23.4"
 typing-extensions = [
-    {version = ">=4.12.2", markers = "python_version >= \"3.13\""},
    {version = ">=4.6.1", markers = "python_version < \"3.13\""},
+    {version = ">=4.12.2", markers = "python_version >= \"3.13\""},
 ]

 [package.extras]
@@ -3668,4 +3719,4 @@ type = ["pytest-mypy"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.10"
-content-hash = "0962d61ced1a8154c64c6bbdb3f72aca558831adfbfda68eb66f39b535466f77"
+content-hash = "f9293b504ef813f98f43a8c3ab1b779ff9d7dc2e3bd9412fccc6da5102915e6b"
--- a/autogpt_platform/backend/pyproject.toml
+++ b/autogpt_platform/backend/pyproject.toml
@@ -27,7 +27,7 @@ jsonref = "^1.1.0"
 jsonschema = "^4.22.0"
 ollama = "^0.3.0"
 openai = "^1.35.7"
-praw = "^7.7.1"
+praw = "~7.7.1"
 prisma = "^0.13.1"
 psutil = "^5.9.8"
 pydantic = "^2.7.2"
@@ -45,7 +45,7 @@ websockets = "^12.0"
 youtube-transcript-api = "^0.6.2"
 googlemaps = "^4.10.0"
 replicate = "^0.34.1"
-
+pinecone = "^5.3.1"
 [tool.poetry.group.dev.dependencies]
 poethepoet = "^0.26.1"
 httpx = "^0.27.0"
@@ -55,6 +55,7 @@ ruff = "^0.5.2"
 pyright = "^1.1.371"
 isort = "^5.13.2"
 black = "^24.4.2"
+aiohappyeyeballs = "^2.4.3"

 [build-system]
 requires = ["poetry-core"]
--- a/autogpt_platform/frontend/src/components/integrations/credentials-input.tsx
+++ b/autogpt_platform/frontend/src/components/integrations/credentials-input.tsx
@@ -7,7 +7,7 @@ import useCredentials from "@/hooks/useCredentials";
 import { zodResolver } from "@hookform/resolvers/zod";
 import AutoGPTServerAPI from "@/lib/autogpt-server-api";
 import { NotionLogoIcon } from "@radix-ui/react-icons";
-import { FaGithub, FaGoogle } from "react-icons/fa";
+import { FaGithub, FaGoogle, FaKey } from "react-icons/fa";
 import { FC, useMemo, useState } from "react";
 import { CredentialsMetaInput } from "@/lib/autogpt-server-api/types";
 import { IconKey, IconKeyPlus, IconUserPlus } from "@/components/ui/icons";
@@ -41,6 +41,8 @@ export const providerIcons: Record<string, React.FC<{ className?: string }>> = {
  github: FaGithub,
  google: FaGoogle,
  notion: NotionLogoIcon,
+  jina: FaKey,
+  pinecone: FaKey,
 };
 // --8<-- [end:ProviderIconsEmbed]

--- a/autogpt_platform/frontend/src/components/integrations/credentials-provider.tsx
+++ b/autogpt_platform/frontend/src/components/integrations/credentials-provider.tsx
@@ -12,7 +12,13 @@ import {
 } from "react";

 // --8<-- [start:CredentialsProviderNames]
-const CREDENTIALS_PROVIDER_NAMES = ["github", "google", "notion"] as const;
+const CREDENTIALS_PROVIDER_NAMES = [
+  "github",
+  "google",
+  "notion",
+  "jina",
+  "pinecone",
+] as const;

 export type CredentialsProviderName =
  (typeof CREDENTIALS_PROVIDER_NAMES)[number];
@@ -21,6 +27,8 @@ const providerDisplayNames: Record<CredentialsProviderName, string> = {
  github: "GitHub",
  google: "Google",
  notion: "Notion",
+  jina: "Jina",
+  pinecone: "Pinecone",
 };
 // --8<-- [end:CredentialsProviderNames]

--- a/autogpt_platform/frontend/src/lib/autogpt-server-api/types.ts
+++ b/autogpt_platform/frontend/src/lib/autogpt-server-api/types.ts
@@ -96,7 +96,7 @@ export type CredentialsType = "api_key" | "oauth2";

 // --8<-- [start:BlockIOCredentialsSubSchema]
 export type BlockIOCredentialsSubSchema = BlockIOSubSchemaMeta & {
-  credentials_provider: "github" | "google" | "notion";
+  credentials_provider: "github" | "google" | "notion" | "jina" | "pinecone";
  credentials_scopes?: string[];
  credentials_types: Array<CredentialsType>;
 };