Compare commits

...

3 Commits

Author SHA1 Message Date
Otto
f3df841ea3 fix: correct docstring - pin validation is handled elsewhere 2026-02-14 13:07:52 +00:00
Otto
6cb794cbf8 fix(builder): auto-cleanup invalid/orphan edges during graph operations
Fixes graph desync issues where edge deletions don't persist, causing
stale connections that users cannot remove.

Frontend changes:
- edgeStore: Validate links during addLinks() - skip edges referencing
  non-existent nodes
- edgeStore: Filter invalid edges in getBackendLinks() before save
- useSaveGraph: Sync edge store with authoritative backend state after
  save to prevent desync

Backend changes:
- graph.py: Add prune_invalid_links() method that removes:
  - Links referencing non-existent nodes
  - Links with invalid block IDs
- Called during graph validation to auto-cleanup orphan edges

This ensures:
1. Invalid edges are filtered out when loading a graph
2. Invalid edges are not sent to backend during save
3. Frontend syncs with backend state after save
4. Backend cleans up any orphan edges that slip through

Closes: SECRT-1959
2026-02-14 12:42:23 +00:00
DEEVEN SERU
b8f5c208d0 Handle errors in Jina ExtractWebsiteContentBlock (#12048)
## Summary
- catch Jina reader client/server errors in ExtractWebsiteContentBlock
and surface a clear error output keyed to the user URL
- guard empty responses to return an explicit error instead of yielding
blank content
- add regression tests covering the happy path and HTTP client failures
via a monkeypatched fetch

## Testing
- not run (pytest unavailable in this environment)

---------

Co-authored-by: Nicholas Tindle <nicktindle@outlook.com>
Co-authored-by: Nicholas Tindle <nicholas.tindle@agpt.co>
2026-02-13 19:15:09 +00:00
5 changed files with 190 additions and 3 deletions

View File

@@ -17,6 +17,7 @@ from backend.blocks.jina._auth import (
from backend.blocks.search import GetRequest
from backend.data.model import SchemaField
from backend.util.exceptions import BlockExecutionError
from backend.util.request import HTTPClientError, HTTPServerError, validate_url
class SearchTheWebBlock(Block, GetRequest):
@@ -110,7 +111,12 @@ class ExtractWebsiteContentBlock(Block, GetRequest):
self, input_data: Input, *, credentials: JinaCredentials, **kwargs
) -> BlockOutput:
if input_data.raw_content:
url = input_data.url
try:
parsed_url, _, _ = await validate_url(input_data.url, [])
url = parsed_url.geturl()
except ValueError as e:
yield "error", f"Invalid URL: {e}"
return
headers = {}
else:
url = f"https://r.jina.ai/{input_data.url}"
@@ -119,5 +125,20 @@ class ExtractWebsiteContentBlock(Block, GetRequest):
"Authorization": f"Bearer {credentials.api_key.get_secret_value()}",
}
content = await self.get_request(url, json=False, headers=headers)
try:
content = await self.get_request(url, json=False, headers=headers)
except HTTPClientError as e:
yield "error", f"Client error ({e.status_code}) fetching {input_data.url}: {e}"
return
except HTTPServerError as e:
yield "error", f"Server error ({e.status_code}) fetching {input_data.url}: {e}"
return
except Exception as e:
yield "error", f"Failed to fetch {input_data.url}: {e}"
return
if not content:
yield "error", f"No content returned for {input_data.url}"
return
yield "content", content

View File

@@ -867,9 +867,67 @@ class GraphModel(Graph, GraphMeta):
return node_errors
@staticmethod
def prune_invalid_links(graph: BaseGraph) -> int:
"""
Remove invalid/orphan links from the graph.
This removes links that:
- Reference non-existent source or sink nodes
- Reference invalid block IDs
Note: Pin name validation is handled separately in _validate_graph_structure.
Returns the number of links pruned.
"""
node_map = {v.id: v for v in graph.nodes}
original_count = len(graph.links)
valid_links = []
for link in graph.links:
source_node = node_map.get(link.source_id)
sink_node = node_map.get(link.sink_id)
# Skip if either node doesn't exist
if not source_node or not sink_node:
logger.warning(
f"Pruning orphan link: source={link.source_id}, sink={link.sink_id} "
f"- node(s) not found"
)
continue
# Skip if source block doesn't exist
source_block = get_block(source_node.block_id)
if not source_block:
logger.warning(
f"Pruning link with invalid source block: {source_node.block_id}"
)
continue
# Skip if sink block doesn't exist
sink_block = get_block(sink_node.block_id)
if not sink_block:
logger.warning(
f"Pruning link with invalid sink block: {sink_node.block_id}"
)
continue
valid_links.append(link)
graph.links = valid_links
pruned_count = original_count - len(valid_links)
if pruned_count > 0:
logger.info(f"Pruned {pruned_count} invalid link(s) from graph {graph.id}")
return pruned_count
@staticmethod
def _validate_graph_structure(graph: BaseGraph):
"""Validate graph structure (links, connections, etc.)"""
# First, prune invalid links to clean up orphan edges
GraphModel.prune_invalid_links(graph)
node_map = {v.id: v for v in graph.nodes}
def is_static_output_block(nid: str) -> bool:

View File

@@ -0,0 +1,66 @@
from typing import cast
import pytest
from backend.blocks.jina._auth import (
TEST_CREDENTIALS,
TEST_CREDENTIALS_INPUT,
JinaCredentialsInput,
)
from backend.blocks.jina.search import ExtractWebsiteContentBlock
from backend.util.request import HTTPClientError
@pytest.mark.asyncio
async def test_extract_website_content_returns_content(monkeypatch):
block = ExtractWebsiteContentBlock()
input_data = block.Input(
url="https://example.com",
credentials=cast(JinaCredentialsInput, TEST_CREDENTIALS_INPUT),
raw_content=True,
)
async def fake_get_request(url, json=False, headers=None):
assert url == "https://example.com"
assert headers == {}
return "page content"
monkeypatch.setattr(block, "get_request", fake_get_request)
results = [
output
async for output in block.run(
input_data=input_data, credentials=TEST_CREDENTIALS
)
]
assert ("content", "page content") in results
assert all(key != "error" for key, _ in results)
@pytest.mark.asyncio
async def test_extract_website_content_handles_http_error(monkeypatch):
block = ExtractWebsiteContentBlock()
input_data = block.Input(
url="https://example.com",
credentials=cast(JinaCredentialsInput, TEST_CREDENTIALS_INPUT),
raw_content=False,
)
async def fake_get_request(url, json=False, headers=None):
raise HTTPClientError("HTTP 400 Error: Bad Request", 400)
monkeypatch.setattr(block, "get_request", fake_get_request)
results = [
output
async for output in block.run(
input_data=input_data, credentials=TEST_CREDENTIALS
)
]
assert ("content", "page content") not in results
error_messages = [value for key, value in results if key == "error"]
assert error_messages
assert "Client error (400)" in error_messages[0]
assert "https://example.com" in error_messages[0]

View File

@@ -13,6 +13,7 @@ import { Graph } from "@/app/api/__generated__/models/graph";
import { useNodeStore } from "../stores/nodeStore";
import { useEdgeStore } from "../stores/edgeStore";
import { graphsEquivalent } from "../components/NewControlPanel/NewSaveControl/helpers";
import { linkToCustomEdge } from "../components/helper";
import { useGraphStore } from "../stores/graphStore";
import { useShallow } from "zustand/react/shallow";
import {
@@ -21,6 +22,18 @@ import {
getTempFlowId,
} from "@/services/builder-draft/draft-service";
/**
* Sync the edge store with the authoritative backend state.
* This ensures the frontend matches what the backend accepted after save.
*/
function syncEdgesWithBackend(links: GraphModel["links"]) {
if (links !== undefined) {
// Replace all edges with the authoritative backend state
const newEdges = links.map(linkToCustomEdge);
useEdgeStore.getState().setEdges(newEdges);
}
}
export type SaveGraphOptions = {
showToast?: boolean;
onSuccess?: (graph: GraphModel) => void;
@@ -64,6 +77,9 @@ export const useSaveGraph = ({
flowVersion: data.version,
});
// Sync edge store with authoritative backend state
syncEdgesWithBackend(data.links);
const tempFlowId = getTempFlowId();
if (tempFlowId) {
await draftService.deleteDraft(tempFlowId);
@@ -101,6 +117,9 @@ export const useSaveGraph = ({
flowVersion: data.version,
});
// Sync edge store with authoritative backend state
syncEdgesWithBackend(data.links);
// Clear the draft for this flow after successful save
if (data.id) {
await draftService.deleteDraft(data.id);

View File

@@ -120,10 +120,33 @@ export const useEdgeStore = create<EdgeStore>((set, get) => ({
isOutputConnected: (nodeId, handle) =>
get().edges.some((e) => e.source === nodeId && e.sourceHandle === handle),
getBackendLinks: () => get().edges.map(customEdgeToLink),
getBackendLinks: () => {
// Filter out edges referencing non-existent nodes before converting to links
const nodeIds = new Set(useNodeStore.getState().nodes.map((n) => n.id));
const validEdges = get().edges.filter((edge) => {
const isValid = nodeIds.has(edge.source) && nodeIds.has(edge.target);
if (!isValid) {
console.warn(
`[EdgeStore] Filtering out invalid edge during save: source=${edge.source}, target=${edge.target}`,
);
}
return isValid;
});
return validEdges.map(customEdgeToLink);
},
addLinks: (links) => {
// Get current node IDs to validate links
const nodeIds = new Set(useNodeStore.getState().nodes.map((n) => n.id));
links.forEach((link) => {
// Skip invalid links (orphan edges referencing non-existent nodes)
if (!nodeIds.has(link.source_id) || !nodeIds.has(link.sink_id)) {
console.warn(
`[EdgeStore] Skipping invalid link: source=${link.source_id}, sink=${link.sink_id} - node(s) not found`,
);
return;
}
get().addEdge(linkToCustomEdge(link));
});
},