improvement(kb): deferred content fetching and metadata-based hashes for connectors (#4044)

* improvement(kb): deferred content fetching and metadata-based hashes for connectors

* fix(kb): remove message count from outlook contentHash to prevent list/get divergence

* fix(kb): increase outlook getDocument message limit from 50 to 250

* fix(kb): skip outlook messages without conversationId to prevent broken stubs

* fix(kb): scope outlook getDocument to same folder as listDocuments to prevent hash divergence

* fix(kb): add missing connector sync cron job to Helm values

The connector sync endpoint existed but had no cron job configured to trigger it,
meaning scheduled syncs would never fire.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Waleed
2026-04-08 00:59:54 -07:00
committed by GitHub
parent d0d35dd406
commit 3c7bfa797a
17 changed files with 349 additions and 304 deletions

View File

@@ -2,7 +2,7 @@ import { createLogger } from '@sim/logger'
import { AsanaIcon } from '@/components/icons'
import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils'
import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types'
import { computeContentHash, joinTagArray, parseTagDate } from '@/connectors/utils'
import { joinTagArray, parseTagDate } from '@/connectors/utils'
const logger = createLogger('AsanaConnector')
@@ -240,7 +240,6 @@ export const asanaConnector: ConnectorConfig = {
for (const task of result.data) {
const content = buildTaskContent(task)
const contentHash = await computeContentHash(content)
const tagNames = task.tags?.map((t) => t.name).filter(Boolean) || []
documents.push({
@@ -249,7 +248,7 @@ export const asanaConnector: ConnectorConfig = {
content,
mimeType: 'text/plain',
sourceUrl: task.permalink_url || undefined,
contentHash,
contentHash: `asana:${task.gid}:${task.modified_at ?? ''}`,
metadata: {
project: currentProjectGid,
assignee: task.assignee?.name,
@@ -315,7 +314,6 @@ export const asanaConnector: ConnectorConfig = {
if (!task) return null
const content = buildTaskContent(task)
const contentHash = await computeContentHash(content)
const tagNames = task.tags?.map((t) => t.name).filter(Boolean) || []
return {
@@ -324,7 +322,7 @@ export const asanaConnector: ConnectorConfig = {
content,
mimeType: 'text/plain',
sourceUrl: task.permalink_url || undefined,
contentHash,
contentHash: `asana:${task.gid}:${task.modified_at ?? ''}`,
metadata: {
assignee: task.assignee?.name,
completed: task.completed,

View File

@@ -2,7 +2,7 @@ import { createLogger } from '@sim/logger'
import { FirefliesIcon } from '@/components/icons'
import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils'
import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types'
import { computeContentHash, parseTagDate } from '@/connectors/utils'
import { parseTagDate } from '@/connectors/utils'
const logger = createLogger('FirefliesConnector')
@@ -196,17 +196,6 @@ export const firefliesConnector: ConnectorConfig = {
id
name
}
sentences {
index
speaker_name
text
}
summary {
keywords
action_items
overview
short_summary
}
}
}`,
variables
@@ -214,32 +203,27 @@ export const firefliesConnector: ConnectorConfig = {
const transcripts = (data.transcripts || []) as FirefliesTranscript[]
const documents: ExternalDocument[] = await Promise.all(
transcripts.map(async (transcript) => {
const content = formatTranscriptContent(transcript)
const contentHash = await computeContentHash(content)
const documents: ExternalDocument[] = transcripts.map((transcript) => {
const meetingDate = transcript.date ? new Date(transcript.date).toISOString() : undefined
const speakerNames = transcript.speakers?.map((s) => s.name).filter(Boolean) ?? []
const meetingDate = transcript.date ? new Date(transcript.date).toISOString() : undefined
const speakerNames = transcript.speakers?.map((s) => s.name).filter(Boolean) ?? []
return {
externalId: transcript.id,
title: transcript.title || 'Untitled Meeting',
content,
mimeType: 'text/plain' as const,
sourceUrl: transcript.transcript_url || undefined,
contentHash,
metadata: {
hostEmail: transcript.host_email,
duration: transcript.duration,
meetingDate,
participants: transcript.participants,
speakers: speakerNames,
keywords: transcript.summary?.keywords,
},
}
})
)
return {
externalId: transcript.id,
title: transcript.title || 'Untitled Meeting',
content: '',
contentDeferred: true,
mimeType: 'text/plain' as const,
sourceUrl: transcript.transcript_url || undefined,
contentHash: `fireflies:${transcript.id}:${transcript.date ?? ''}:${transcript.duration ?? ''}`,
metadata: {
hostEmail: transcript.host_email,
duration: transcript.duration,
meetingDate,
participants: transcript.participants,
speakers: speakerNames,
},
}
})
const totalFetched = ((syncContext?.totalDocsFetched as number) ?? 0) + documents.length
if (syncContext) syncContext.totalDocsFetched = totalFetched
@@ -296,7 +280,7 @@ export const firefliesConnector: ConnectorConfig = {
if (!transcript) return null
const content = formatTranscriptContent(transcript)
const contentHash = await computeContentHash(content)
const contentHash = `fireflies:${transcript.id}:${transcript.date ?? ''}:${transcript.duration ?? ''}`
const meetingDate = transcript.date ? new Date(transcript.date).toISOString() : undefined
const speakerNames = transcript.speakers?.map((s) => s.name).filter(Boolean) ?? []
@@ -305,6 +289,7 @@ export const firefliesConnector: ConnectorConfig = {
externalId: transcript.id,
title: transcript.title || 'Untitled Meeting',
content,
contentDeferred: false,
mimeType: 'text/plain',
sourceUrl: transcript.transcript_url || undefined,
contentHash,

View File

@@ -2,7 +2,7 @@ import { createLogger } from '@sim/logger'
import { GoogleCalendarIcon } from '@/components/icons'
import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils'
import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types'
import { computeContentHash, parseTagDate } from '@/connectors/utils'
import { parseTagDate } from '@/connectors/utils'
const logger = createLogger('GoogleCalendarConnector')
@@ -195,14 +195,12 @@ function getTimeRange(sourceConfig: Record<string, unknown>): { timeMin: string;
/**
* Converts a CalendarEvent to an ExternalDocument.
*/
async function eventToDocument(event: CalendarEvent): Promise<ExternalDocument | null> {
function eventToDocument(event: CalendarEvent): ExternalDocument | null {
if (event.status === 'cancelled') return null
const content = eventToContent(event)
if (!content.trim()) return null
const contentHash = await computeContentHash(content)
const startTime = event.start?.dateTime || event.start?.date || ''
const attendeeCount = event.attendees?.filter((a) => !a.resource).length || 0
@@ -212,7 +210,7 @@ async function eventToDocument(event: CalendarEvent): Promise<ExternalDocument |
content,
mimeType: 'text/plain',
sourceUrl: event.htmlLink || `https://calendar.google.com/calendar/event?eid=${event.id}`,
contentHash,
contentHash: `gcal:${event.id}:${event.updated ?? ''}`,
metadata: {
startTime,
endTime: event.end?.dateTime || event.end?.date || '',
@@ -348,7 +346,7 @@ export const googleCalendarConnector: ConnectorConfig = {
const documents: ExternalDocument[] = []
for (const event of events) {
const doc = await eventToDocument(event)
const doc = eventToDocument(event)
if (doc) documents.push(doc)
}
@@ -392,7 +390,7 @@ export const googleCalendarConnector: ConnectorConfig = {
if (event.status === 'cancelled') return null
return eventToDocument(event)
return eventToDocument(event) ?? null
},
validateConfig: async (

View File

@@ -2,7 +2,7 @@ import { createLogger } from '@sim/logger'
import { GoogleDocsIcon } from '@/components/icons'
import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils'
import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types'
import { computeContentHash, joinTagArray, parseTagDate } from '@/connectors/utils'
import { joinTagArray, parseTagDate } from '@/connectors/utils'
const logger = createLogger('GoogleDocsConnector')
@@ -117,40 +117,23 @@ async function fetchDocContent(accessToken: string, documentId: string): Promise
}
/**
* Converts a Drive file entry into an ExternalDocument by fetching its content
* from the Google Docs API.
* Creates a lightweight stub from a Drive file entry. Content is deferred
* and only fetched via getDocument for new or changed documents.
*/
async function fileToDocument(
accessToken: string,
file: DriveFile
): Promise<ExternalDocument | null> {
try {
const content = await fetchDocContent(accessToken, file.id)
if (!content.trim()) {
logger.info(`Skipping empty document: ${file.name} (${file.id})`)
return null
}
const contentHash = await computeContentHash(content)
return {
externalId: file.id,
title: file.name || 'Untitled',
content,
mimeType: 'text/plain',
sourceUrl: file.webViewLink || `https://docs.google.com/document/d/${file.id}/edit`,
contentHash,
metadata: {
modifiedTime: file.modifiedTime,
createdTime: file.createdTime,
owners: file.owners?.map((o) => o.displayName || o.emailAddress).filter(Boolean),
},
}
} catch (error) {
logger.warn(`Failed to extract content from document: ${file.name} (${file.id})`, {
error: error instanceof Error ? error.message : String(error),
})
return null
function fileToStub(file: DriveFile): ExternalDocument {
return {
externalId: file.id,
title: file.name || 'Untitled',
content: '',
contentDeferred: true,
mimeType: 'text/plain',
sourceUrl: file.webViewLink || `https://docs.google.com/document/d/${file.id}/edit`,
contentHash: `gdocs:${file.id}:${file.modifiedTime ?? ''}`,
metadata: {
modifiedTime: file.modifiedTime,
createdTime: file.createdTime,
owners: file.owners?.map((o) => o.displayName || o.emailAddress).filter(Boolean),
},
}
}
@@ -246,18 +229,11 @@ export const googleDocsConnector: ConnectorConfig = {
const maxDocs = sourceConfig.maxDocs ? Number(sourceConfig.maxDocs) : 0
const previouslyFetched = (syncContext?.totalDocsFetched as number) ?? 0
const CONCURRENCY = 5
const documents: ExternalDocument[] = []
for (let i = 0; i < files.length; i += CONCURRENCY) {
if (maxDocs > 0 && previouslyFetched + documents.length >= maxDocs) break
const batch = files.slice(i, i + CONCURRENCY)
const results = await Promise.all(batch.map((file) => fileToDocument(accessToken, file)))
documents.push(...(results.filter(Boolean) as ExternalDocument[]))
}
let documents = files.map(fileToStub)
if (maxDocs > 0) {
const remaining = maxDocs - previouslyFetched
if (documents.length > remaining) {
documents.splice(remaining)
documents = documents.slice(0, remaining)
}
}
@@ -300,7 +276,17 @@ export const googleDocsConnector: ConnectorConfig = {
if (file.trashed) return null
if (file.mimeType !== 'application/vnd.google-apps.document') return null
return fileToDocument(accessToken, file)
try {
const content = await fetchDocContent(accessToken, file.id)
if (!content.trim()) return null
return { ...fileToStub(file), content, contentDeferred: false }
} catch (error) {
logger.warn(`Failed to extract content from document: ${file.name} (${file.id})`, {
error: error instanceof Error ? error.message : String(error),
})
return null
}
},
validateConfig: async (

View File

@@ -2,7 +2,7 @@ import { createLogger } from '@sim/logger'
import { GoogleSheetsIcon } from '@/components/icons'
import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils'
import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types'
import { computeContentHash, parseTagDate } from '@/connectors/utils'
import { parseTagDate } from '@/connectors/utils'
const logger = createLogger('GoogleSheetsConnector')
@@ -168,7 +168,6 @@ async function sheetToDocument(
return null
}
const contentHash = await computeContentHash(content)
const rowCount = dataRows.length
return {
@@ -177,7 +176,7 @@ async function sheetToDocument(
content,
mimeType: 'text/plain',
sourceUrl: `https://docs.google.com/spreadsheets/d/${spreadsheetId}/edit#gid=${sheet.sheetId}`,
contentHash,
contentHash: `gsheets:${spreadsheetId}:${sheet.sheetId}:${modifiedTime ?? ''}`,
metadata: {
spreadsheetId,
spreadsheetTitle,
@@ -259,22 +258,24 @@ export const googleSheetsConnector: ConnectorConfig = {
sheetCount: sheets.length,
})
const documents: ExternalDocument[] = []
for (let i = 0; i < sheets.length; i += CONCURRENCY) {
const batch = sheets.slice(i, i + CONCURRENCY)
const results = await Promise.all(
batch.map((sheet) =>
sheetToDocument(
accessToken,
spreadsheetId,
metadata.properties.title,
sheet,
modifiedTime
)
)
)
documents.push(...(results.filter(Boolean) as ExternalDocument[]))
}
const documents: ExternalDocument[] = sheets.map((sheet) => ({
externalId: `${spreadsheetId}__sheet__${sheet.sheetId}`,
title: `${metadata.properties.title} - ${sheet.title}`,
content: '',
contentDeferred: true,
mimeType: 'text/plain',
sourceUrl: `https://docs.google.com/spreadsheets/d/${spreadsheetId}/edit#gid=${sheet.sheetId}`,
contentHash: `gsheets:${spreadsheetId}:${sheet.sheetId}:${modifiedTime ?? ''}`,
metadata: {
spreadsheetId,
spreadsheetTitle: metadata.properties.title,
sheetTitle: sheet.title,
sheetId: sheet.sheetId,
rowCount: sheet.gridProperties?.rowCount,
columnCount: sheet.gridProperties?.columnCount,
...(modifiedTime ? { modifiedTime } : {}),
},
}))
return {
documents,
@@ -324,13 +325,15 @@ export const googleSheetsConnector: ConnectorConfig = {
return null
}
return sheetToDocument(
const doc = await sheetToDocument(
accessToken,
spreadsheetId,
metadata.properties.title,
sheetEntry.properties,
modifiedTime
)
if (!doc) return null
return { ...doc, contentDeferred: false }
},
validateConfig: async (

View File

@@ -2,7 +2,7 @@ import { createLogger } from '@sim/logger'
import { HubspotIcon } from '@/components/icons'
import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils'
import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types'
import { computeContentHash, parseTagDate } from '@/connectors/utils'
import { parseTagDate } from '@/connectors/utils'
const logger = createLogger('HubSpotConnector')
@@ -140,16 +140,15 @@ function buildRecordContent(objectType: string, properties: Record<string, strin
/**
* Converts a HubSpot CRM record to an ExternalDocument.
*/
async function recordToDocument(
function recordToDocument(
record: Record<string, unknown>,
objectType: string,
portalId: string
): Promise<ExternalDocument> {
): ExternalDocument {
const id = record.id as string
const properties = (record.properties || {}) as Record<string, string | null>
const content = buildRecordContent(objectType, properties)
const contentHash = await computeContentHash(content)
const title = buildRecordTitle(objectType, properties)
const lastModified =
@@ -161,7 +160,7 @@ async function recordToDocument(
content,
mimeType: 'text/plain',
sourceUrl: `https://app.hubspot.com/contacts/${portalId}/record/${objectType}/${id}`,
contentHash,
contentHash: `hubspot:${id}:${lastModified ?? ''}`,
metadata: {
objectType,
owner: properties.hubspot_owner_id || undefined,
@@ -260,8 +259,8 @@ export const hubspotConnector: ConnectorConfig = {
const paging = data.paging as { next?: { after?: string } } | undefined
const nextCursor = paging?.next?.after
const documents: ExternalDocument[] = await Promise.all(
results.map((record) => recordToDocument(record, objectType, portalId))
const documents: ExternalDocument[] = results.map((record) =>
recordToDocument(record, objectType, portalId)
)
const previouslyFetched = (syncContext?.totalDocsFetched as number) ?? 0
@@ -322,7 +321,7 @@ export const hubspotConnector: ConnectorConfig = {
}
const record = await response.json()
return recordToDocument(record, objectType, portalId)
return recordToDocument(record as Record<string, unknown>, objectType, portalId)
},
validateConfig: async (

View File

@@ -2,7 +2,7 @@ import { createLogger } from '@sim/logger'
import { IntercomIcon } from '@/components/icons'
import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils'
import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types'
import { computeContentHash, htmlToPlainText, parseTagDate } from '@/connectors/utils'
import { htmlToPlainText, parseTagDate } from '@/connectors/utils'
const logger = createLogger('IntercomConnector')
@@ -309,7 +309,6 @@ export const intercomConnector: ConnectorConfig = {
const content = formatArticle(article)
if (!content.trim()) continue
const contentHash = await computeContentHash(content)
const updatedAt = new Date(article.updated_at * 1000).toISOString()
documents.push({
@@ -318,7 +317,7 @@ export const intercomConnector: ConnectorConfig = {
content,
mimeType: 'text/plain',
sourceUrl: `https://app.intercom.com/a/apps/_/articles/articles/${article.id}/show`,
contentHash,
contentHash: `intercom:article-${article.id}:${article.updated_at}`,
metadata: {
type: 'article',
state: article.state,
@@ -337,28 +336,23 @@ export const intercomConnector: ConnectorConfig = {
const conversations = await fetchConversations(accessToken, maxItems, conversationState)
for (const conversation of conversations) {
const detail = await fetchConversationDetail(accessToken, conversation.id)
const content = formatConversation(detail)
if (!content.trim()) continue
const contentHash = await computeContentHash(content)
const updatedAt = new Date(conversation.updated_at * 1000).toISOString()
const tags = conversation.tags?.tags?.map((t) => t.name) || []
documents.push({
externalId: `conversation-${conversation.id}`,
title: conversation.title || `Conversation #${conversation.id}`,
content,
content: '',
contentDeferred: true,
mimeType: 'text/plain',
sourceUrl: `https://app.intercom.com/a/apps/_/inbox/inbox/all/conversations/${conversation.id}`,
contentHash,
contentHash: `intercom:conversation-${conversation.id}:${conversation.updated_at}`,
metadata: {
type: 'conversation',
state: conversation.state,
tags: tags.join(', '),
updatedAt,
createdAt: new Date(conversation.created_at * 1000).toISOString(),
messageCount: (detail.conversation_parts?.total_count ?? 0) + 1,
},
})
}
@@ -383,7 +377,6 @@ export const intercomConnector: ConnectorConfig = {
const content = formatArticle(article)
if (!content.trim()) return null
const contentHash = await computeContentHash(content)
const updatedAt = new Date(article.updated_at * 1000).toISOString()
return {
@@ -392,7 +385,7 @@ export const intercomConnector: ConnectorConfig = {
content,
mimeType: 'text/plain',
sourceUrl: `https://app.intercom.com/a/apps/_/articles/articles/${article.id}/show`,
contentHash,
contentHash: `intercom:article-${article.id}:${article.updated_at}`,
metadata: {
type: 'article',
state: article.state,
@@ -410,7 +403,6 @@ export const intercomConnector: ConnectorConfig = {
const content = formatConversation(detail)
if (!content.trim()) return null
const contentHash = await computeContentHash(content)
const updatedAt = new Date(detail.updated_at * 1000).toISOString()
const tags = detail.tags?.tags?.map((t) => t.name) || []
@@ -418,9 +410,10 @@ export const intercomConnector: ConnectorConfig = {
externalId,
title: detail.title || `Conversation #${detail.id}`,
content,
contentDeferred: false,
mimeType: 'text/plain',
sourceUrl: `https://app.intercom.com/a/apps/_/inbox/inbox/all/conversations/${detail.id}`,
contentHash,
contentHash: `intercom:conversation-${detail.id}:${detail.updated_at}`,
metadata: {
type: 'conversation',
state: detail.state,

View File

@@ -2,7 +2,7 @@ import { createLogger } from '@sim/logger'
import { JiraIcon } from '@/components/icons'
import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils'
import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types'
import { computeContentHash, joinTagArray, parseTagDate } from '@/connectors/utils'
import { joinTagArray, parseTagDate } from '@/connectors/utils'
import { extractAdfText, getJiraCloudId } from '@/tools/jira/utils'
const logger = createLogger('JiraConnector')
@@ -33,16 +33,12 @@ function buildIssueContent(fields: Record<string, unknown>): string {
}
/**
* Converts a Jira issue API response to an ExternalDocument.
* Extracts common metadata fields from a Jira issue into an ExternalDocument
* stub with deferred content. The contentHash is metadata-based so it is
* identical whether produced during listing or full fetch.
*/
async function issueToDocument(
issue: Record<string, unknown>,
domain: string
): Promise<ExternalDocument> {
function issueToStub(issue: Record<string, unknown>, domain: string): ExternalDocument {
const fields = (issue.fields || {}) as Record<string, unknown>
const content = buildIssueContent(fields)
const contentHash = await computeContentHash(content)
const key = issue.key as string
const issueType = fields.issuetype as Record<string, unknown> | undefined
const status = fields.status as Record<string, unknown> | undefined
@@ -51,14 +47,16 @@ async function issueToDocument(
const reporter = fields.reporter as Record<string, unknown> | undefined
const project = fields.project as Record<string, unknown> | undefined
const labels = Array.isArray(fields.labels) ? (fields.labels as string[]) : []
const updated = (fields.updated as string) ?? ''
return {
externalId: String(issue.id),
title: `${key}: ${(fields.summary as string) || 'Untitled'}`,
content,
content: '',
contentDeferred: true,
mimeType: 'text/plain',
sourceUrl: `https://${domain}/browse/${key}`,
contentHash,
contentHash: `jira:${issue.id}:${updated}`,
metadata: {
key,
issueType: issueType?.name,
@@ -74,6 +72,22 @@ async function issueToDocument(
}
}
/**
* Converts a fully-fetched Jira issue (with description and comments) into an
* ExternalDocument with resolved content.
*/
function issueToFullDocument(issue: Record<string, unknown>, domain: string): ExternalDocument {
const stub = issueToStub(issue, domain)
const fields = (issue.fields || {}) as Record<string, unknown>
const content = buildIssueContent(fields)
return {
...stub,
content,
contentDeferred: false,
}
}
export const jiraConnector: ConnectorConfig = {
id: 'jira',
name: 'Jira',
@@ -162,7 +176,7 @@ export const jiraConnector: ConnectorConfig = {
params.append('maxResults', String(Math.min(PAGE_SIZE, remaining)))
params.append(
'fields',
'summary,description,comment,issuetype,status,priority,assignee,reporter,project,labels,created,updated'
'summary,issuetype,status,priority,assignee,reporter,project,labels,created,updated'
)
const url = `https://api.atlassian.com/ex/jira/${cloudId}/rest/api/3/search?${params.toString()}`
@@ -190,9 +204,7 @@ export const jiraConnector: ConnectorConfig = {
const issues = (data.issues || []) as Record<string, unknown>[]
const total = (data.total as number) ?? 0
const documents: ExternalDocument[] = await Promise.all(
issues.map((issue) => issueToDocument(issue, domain))
)
const documents: ExternalDocument[] = issues.map((issue) => issueToStub(issue, domain))
const nextStart = startAt + issues.length
const hasMore = nextStart < total && (maxIssues <= 0 || nextStart < maxIssues)
@@ -239,7 +251,7 @@ export const jiraConnector: ConnectorConfig = {
}
const issue = await response.json()
return issueToDocument(issue, domain)
return issueToFullDocument(issue, domain)
},
validateConfig: async (

View File

@@ -3,7 +3,7 @@ import { LinearIcon } from '@/components/icons'
import type { RetryOptions } from '@/lib/knowledge/documents/utils'
import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils'
import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types'
import { computeContentHash, joinTagArray, parseTagDate } from '@/connectors/utils'
import { joinTagArray, parseTagDate } from '@/connectors/utils'
const logger = createLogger('LinearConnector')
@@ -278,36 +278,34 @@ export const linearConnector: ConnectorConfig = {
const nodes = (issuesConn.nodes || []) as Record<string, unknown>[]
const pageInfo = issuesConn.pageInfo as Record<string, unknown>
const documents: ExternalDocument[] = await Promise.all(
nodes.map(async (issue) => {
const content = buildIssueContent(issue)
const contentHash = await computeContentHash(content)
const documents: ExternalDocument[] = nodes.map((issue) => {
const content = buildIssueContent(issue)
const contentHash = `linear:${issue.id}:${issue.updatedAt}`
const labelNodes = ((issue.labels as Record<string, unknown>)?.nodes || []) as Record<
string,
unknown
>[]
const labelNodes = ((issue.labels as Record<string, unknown>)?.nodes || []) as Record<
string,
unknown
>[]
return {
externalId: issue.id as string,
title: `${(issue.identifier as string) || ''}: ${(issue.title as string) || 'Untitled'}`,
content,
mimeType: 'text/plain' as const,
sourceUrl: (issue.url as string) || undefined,
contentHash,
metadata: {
identifier: issue.identifier,
state: (issue.state as Record<string, unknown>)?.name,
priority: issue.priorityLabel,
assignee: (issue.assignee as Record<string, unknown>)?.name,
labels: labelNodes.map((l) => l.name as string),
team: (issue.team as Record<string, unknown>)?.name,
project: (issue.project as Record<string, unknown>)?.name,
lastModified: issue.updatedAt,
},
}
})
)
return {
externalId: issue.id as string,
title: `${(issue.identifier as string) || ''}: ${(issue.title as string) || 'Untitled'}`,
content,
mimeType: 'text/plain' as const,
sourceUrl: (issue.url as string) || undefined,
contentHash,
metadata: {
identifier: issue.identifier,
state: (issue.state as Record<string, unknown>)?.name,
priority: issue.priorityLabel,
assignee: (issue.assignee as Record<string, unknown>)?.name,
labels: labelNodes.map((l) => l.name as string),
team: (issue.team as Record<string, unknown>)?.name,
project: (issue.project as Record<string, unknown>)?.name,
lastModified: issue.updatedAt,
},
}
})
const hasNextPage = Boolean(pageInfo.hasNextPage)
const endCursor = (pageInfo.endCursor as string) || undefined
@@ -335,7 +333,7 @@ export const linearConnector: ConnectorConfig = {
if (!issue) return null
const content = buildIssueContent(issue)
const contentHash = await computeContentHash(content)
const contentHash = `linear:${issue.id}:${issue.updatedAt}`
const labelNodes = ((issue.labels as Record<string, unknown>)?.nodes || []) as Record<
string,
@@ -346,7 +344,7 @@ export const linearConnector: ConnectorConfig = {
externalId: issue.id as string,
title: `${(issue.identifier as string) || ''}: ${(issue.title as string) || 'Untitled'}`,
content,
mimeType: 'text/plain',
mimeType: 'text/plain' as const,
sourceUrl: (issue.url as string) || undefined,
contentHash,
metadata: {
@@ -379,7 +377,6 @@ export const linearConnector: ConnectorConfig = {
}
try {
// Verify the token works by fetching teams
const data = await linearGraphQL(accessToken, TEAMS_QUERY, undefined, VALIDATE_RETRY_OPTIONS)
const teamsConn = data.teams as Record<string, unknown>
const teams = (teamsConn.nodes || []) as Record<string, unknown>[]
@@ -391,7 +388,6 @@ export const linearConnector: ConnectorConfig = {
}
}
// If teamId specified, verify it exists
const teamId = sourceConfig.teamId as string | undefined
if (teamId) {
const found = teams.some((t) => t.id === teamId)

View File

@@ -2,14 +2,37 @@ import { createLogger } from '@sim/logger'
import { OutlookIcon } from '@/components/icons'
import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils'
import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types'
import { computeContentHash, htmlToPlainText, parseTagDate } from '@/connectors/utils'
import { htmlToPlainText, parseTagDate } from '@/connectors/utils'
const logger = createLogger('OutlookConnector')
const GRAPH_API_BASE = 'https://graph.microsoft.com/v1.0/me'
const DEFAULT_MAX_CONVERSATIONS = 500
const MESSAGES_PER_PAGE = 50
const MESSAGE_FIELDS = [
/**
* Fields requested when listing messages (no body — deferred to getDocument).
*/
const LIST_MESSAGE_FIELDS = [
'id',
'conversationId',
'subject',
'from',
'toRecipients',
'receivedDateTime',
'sentDateTime',
'categories',
'importance',
'inferenceClassification',
'hasAttachments',
'webLink',
'isDraft',
'parentFolderId',
].join(',')
/**
* Fields requested when fetching full message content in getDocument.
*/
const FULL_MESSAGE_FIELDS = [
'id',
'conversationId',
'subject',
@@ -84,7 +107,7 @@ function buildInitialUrl(sourceConfig: Record<string, unknown>): string {
const params = new URLSearchParams({
$top: String(MESSAGES_PER_PAGE),
$select: MESSAGE_FIELDS,
$select: LIST_MESSAGE_FIELDS,
})
// Build $filter clauses
@@ -353,7 +376,6 @@ export const outlookConnector: ConnectorConfig = {
const headers: Record<string, string> = {
Authorization: `Bearer ${accessToken}`,
Accept: 'application/json',
Prefer: 'outlook.body-content-type="text"',
}
const response = await fetchWithRetry(url, { method: 'GET', headers })
@@ -385,7 +407,8 @@ export const outlookConnector: ConnectorConfig = {
continue
}
const convId = msg.conversationId || msg.id
if (!msg.conversationId) continue
const convId = msg.conversationId
if (!conversations[convId]) {
conversations[convId] = []
}
@@ -407,8 +430,8 @@ export const outlookConnector: ConnectorConfig = {
}
}
// Phase 2: Group conversations into documents
logger.info('Grouping Outlook messages into conversations', {
// Phase 2: Build lightweight stubs — content is deferred to getDocument
logger.info('Building Outlook conversation stubs', {
totalMessages: syncContext?._totalMessagesFetched,
totalConversations: Object.keys(conversations).length,
})
@@ -433,23 +456,26 @@ export const outlookConnector: ConnectorConfig = {
const documents: ExternalDocument[] = []
for (const [convId, msgs] of limited) {
const result = formatConversation(convId, msgs)
if (!result) continue
if (msgs.length === 0) continue
const contentHash = await computeContentHash(result.content)
const lastDate = msgs.reduce((max, m) => {
const d = m.receivedDateTime || ''
return d > max ? d : max
}, '')
// Use the first message's webLink as the source URL
const subject = msgs[0].subject || 'No Subject'
const firstWithLink = msgs.find((m) => m.webLink)
const sourceUrl = firstWithLink?.webLink || `https://outlook.office.com/mail/inbox`
const sourceUrl = firstWithLink?.webLink || 'https://outlook.office.com/mail/inbox'
documents.push({
externalId: convId,
title: result.subject,
content: result.content,
title: subject,
content: '',
contentDeferred: true,
mimeType: 'text/plain',
sourceUrl,
contentHash,
metadata: result.metadata,
contentHash: `outlook:${convId}:${lastDate}`,
metadata: {},
})
}
@@ -462,14 +488,25 @@ export const outlookConnector: ConnectorConfig = {
externalId: string
): Promise<ExternalDocument | null> => {
try {
// Fetch messages for this conversation
// Scope to the same folder as listDocuments so contentHash stays consistent
const folder = (sourceConfig.folder as string) || 'inbox'
const basePath =
folder === 'all'
? `${GRAPH_API_BASE}/messages`
: `${GRAPH_API_BASE}/mailFolders/${WELL_KNOWN_FOLDERS[folder] || folder}/messages`
const filterParts = [
`conversationId eq '${externalId.replace(/'/g, "''")}'`,
'isDraft eq false',
]
const params = new URLSearchParams({
$filter: `conversationId eq '${externalId.replace(/'/g, "''")}'`,
$select: MESSAGE_FIELDS,
$top: '50',
$filter: filterParts.join(' and '),
$select: FULL_MESSAGE_FIELDS,
$top: '250',
})
const url = `${GRAPH_API_BASE}/messages?${params.toString()}`
const url = `${basePath}?${params.toString()}`
const response = await fetchWithRetry(url, {
method: 'GET',
@@ -493,16 +530,21 @@ export const outlookConnector: ConnectorConfig = {
const result = formatConversation(externalId, messages)
if (!result) return null
const contentHash = await computeContentHash(result.content)
const lastDate = messages.reduce((max, m) => {
const d = m.receivedDateTime || ''
return d > max ? d : max
}, '')
const firstWithLink = messages.find((m) => m.webLink)
return {
externalId,
title: result.subject,
content: result.content,
contentDeferred: false,
mimeType: 'text/plain',
sourceUrl: firstWithLink?.webLink || 'https://outlook.office.com/mail/inbox',
contentHash,
contentHash: `outlook:${externalId}:${lastDate}`,
metadata: result.metadata,
}
} catch (error) {

View File

@@ -2,7 +2,7 @@ import { createLogger } from '@sim/logger'
import { RedditIcon } from '@/components/icons'
import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils'
import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types'
import { computeContentHash, parseTagDate } from '@/connectors/utils'
import { parseTagDate } from '@/connectors/utils'
const logger = createLogger('RedditConnector')
@@ -338,29 +338,23 @@ export const redditConnector: ConnectorConfig = {
afterToken
)
const documents: ExternalDocument[] = []
for (const post of posts) {
const content = await formatPostContent(accessToken, post, COMMENTS_PER_POST)
const contentHash = await computeContentHash(content)
documents.push({
externalId: post.id,
title: post.title,
content,
mimeType: 'text/plain',
sourceUrl: `https://www.reddit.com${post.permalink}`,
contentHash,
metadata: {
author: post.author,
score: post.score,
commentCount: post.num_comments,
flair: post.link_flair_text ?? undefined,
postDate: new Date(post.created_utc * 1000).toISOString(),
subreddit: post.subreddit,
},
})
}
const documents: ExternalDocument[] = posts.map((post) => ({
externalId: post.id,
title: post.title,
content: '',
contentDeferred: true,
mimeType: 'text/plain',
sourceUrl: `https://www.reddit.com${post.permalink}`,
contentHash: `reddit:${post.id}:${post.created_utc}`,
metadata: {
author: post.author,
score: post.score,
commentCount: post.num_comments,
flair: post.link_flair_text ?? undefined,
postDate: new Date(post.created_utc * 1000).toISOString(),
subreddit: post.subreddit,
},
}))
const totalCollected = collectedSoFar + documents.length
const hasMore = after !== null && totalCollected < maxPosts
@@ -397,15 +391,15 @@ export const redditConnector: ConnectorConfig = {
const comments =
data.length >= 2 ? extractComments(data[1] as RedditListing, COMMENTS_PER_POST) : []
const content = await formatPostContent(accessToken, post, COMMENTS_PER_POST, comments)
const contentHash = await computeContentHash(content)
return {
externalId: post.id,
title: post.title,
content,
contentDeferred: false,
mimeType: 'text/plain',
sourceUrl: `https://www.reddit.com${post.permalink}`,
contentHash,
contentHash: `reddit:${post.id}:${post.created_utc}`,
metadata: {
author: post.author,
score: post.score,

View File

@@ -2,7 +2,7 @@ import { createLogger } from '@sim/logger'
import { SalesforceIcon } from '@/components/icons'
import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils'
import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types'
import { computeContentHash, htmlToPlainText, parseTagDate } from '@/connectors/utils'
import { htmlToPlainText, parseTagDate } from '@/connectors/utils'
const logger = createLogger('SalesforceConnector')
@@ -12,7 +12,14 @@ const PAGE_SIZE = 200
/** SOQL field lists per object type. */
const OBJECT_FIELDS: Record<string, string[]> = {
KnowledgeArticleVersion: ['Id', 'Title', 'Summary', 'LastModifiedDate', 'ArticleNumber'],
KnowledgeArticleVersion: [
'Id',
'Title',
'Summary',
'LastModifiedDate',
'ArticleNumber',
'PublishStatus',
],
Case: ['Id', 'Subject', 'Description', 'Status', 'LastModifiedDate', 'CaseNumber'],
Account: ['Id', 'Name', 'Description', 'Industry', 'LastModifiedDate'],
Opportunity: [
@@ -146,36 +153,52 @@ function getRecordStatus(objectType: string, record: Record<string, unknown>): s
}
/**
* Converts a Salesforce record to an ExternalDocument.
* Creates a lightweight stub for a Salesforce record with metadata-based hash.
* Content is deferred and fetched later via getDocument only for new/changed docs.
*/
async function recordToDocument(
function recordToStub(
record: Record<string, unknown>,
objectType: string,
instanceUrl: string
): Promise<ExternalDocument> {
): ExternalDocument {
const id = record.Id as string
const content = buildRecordContent(objectType, record)
const contentHash = await computeContentHash(content)
const title = buildRecordTitle(objectType, record)
const lastModified = (record.LastModifiedDate as string) || ''
const baseUrl = instanceUrl.replace(`/services/data/${API_VERSION}/`, '')
return {
externalId: id,
title,
content,
content: '',
contentDeferred: true,
mimeType: 'text/plain',
sourceUrl: `${baseUrl}/${id}`,
contentHash,
contentHash: `salesforce:${id}:${lastModified}`,
metadata: {
objectType,
lastModified: (record.LastModifiedDate as string) || undefined,
lastModified: lastModified || undefined,
recordNumber: getRecordNumber(objectType, record),
status: getRecordStatus(objectType, record),
},
}
}
/**
* Builds a full ExternalDocument with content from a Salesforce record.
*/
function recordToDocument(
record: Record<string, unknown>,
objectType: string,
instanceUrl: string
): ExternalDocument {
const stub = recordToStub(record, objectType, instanceUrl)
return {
...stub,
content: buildRecordContent(objectType, record),
contentDeferred: false,
}
}
export const salesforceConnector: ConnectorConfig = {
id: 'salesforce',
name: 'Salesforce',
@@ -257,8 +280,8 @@ export const salesforceConnector: ConnectorConfig = {
const records = (data.records || []) as Record<string, unknown>[]
const nextRecordsUrl = data.nextRecordsUrl as string | undefined
const documents: ExternalDocument[] = await Promise.all(
records.map((record) => recordToDocument(record, objectType, instanceUrl))
const documents: ExternalDocument[] = records.map((record) =>
recordToStub(record, objectType, instanceUrl)
)
const previouslyFetched = (syncContext?.totalDocsFetched as number) ?? 0

View File

@@ -2,7 +2,7 @@ import { createLogger } from '@sim/logger'
import { ServiceNowIcon } from '@/components/icons'
import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils'
import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types'
import { computeContentHash, htmlToPlainText, parseTagDate } from '@/connectors/utils'
import { htmlToPlainText, parseTagDate } from '@/connectors/utils'
const logger = createLogger('ServiceNowConnector')
@@ -184,15 +184,13 @@ function priorityLabel(priority: string | undefined): string {
/**
* Converts a KB article record to an ExternalDocument.
*/
async function kbArticleToDocument(
article: KBArticle,
instanceUrl: string
): Promise<ExternalDocument> {
function kbArticleToDocument(article: KBArticle, instanceUrl: string): ExternalDocument {
const title = rawValue(article.short_description) || rawValue(article.number) || article.sys_id
const articleText = rawValue(article.text) || rawValue(article.wiki) || ''
const content = htmlToPlainText(articleText)
const contentHash = await computeContentHash(content)
const sysId = rawValue(article.sys_id as unknown as string) || article.sys_id
const updatedOn = rawValue(article.sys_updated_on) || ''
const contentHash = `servicenow:${sysId}:${updatedOn}`
const sourceUrl = `${instanceUrl}/kb_view.do?sys_kb_id=${sysId}`
return {
@@ -218,10 +216,7 @@ async function kbArticleToDocument(
/**
* Converts an incident record to an ExternalDocument.
*/
async function incidentToDocument(
incident: Incident,
instanceUrl: string
): Promise<ExternalDocument> {
function incidentToDocument(incident: Incident, instanceUrl: string): ExternalDocument {
const number = rawValue(incident.number)
const shortDesc = rawValue(incident.short_description)
const title = number ? `${number}: ${shortDesc || 'Untitled'}` : shortDesc || incident.sys_id
@@ -258,8 +253,9 @@ async function incidentToDocument(
}
const content = parts.join('\n')
const contentHash = await computeContentHash(content)
const sysId = rawValue(incident.sys_id as unknown as string) || incident.sys_id
const updatedOn = rawValue(incident.sys_updated_on) || ''
const contentHash = `servicenow:${sysId}:${updatedOn}`
const sourceUrl = `${instanceUrl}/incident.do?sys_id=${sysId}`
return {
@@ -478,8 +474,8 @@ export const servicenowConnector: ConnectorConfig = {
const documents: ExternalDocument[] = []
for (const record of result) {
const doc = isKB
? await kbArticleToDocument(record as unknown as KBArticle, instanceUrl)
: await incidentToDocument(record as unknown as Incident, instanceUrl)
? kbArticleToDocument(record as unknown as KBArticle, instanceUrl)
: incidentToDocument(record as unknown as Incident, instanceUrl)
if (doc.content.trim()) {
documents.push(doc)
@@ -532,8 +528,8 @@ export const servicenowConnector: ConnectorConfig = {
const record = result[0]
const doc = isKB
? await kbArticleToDocument(record as unknown as KBArticle, instanceUrl)
: await incidentToDocument(record as unknown as Incident, instanceUrl)
? kbArticleToDocument(record as unknown as KBArticle, instanceUrl)
: incidentToDocument(record as unknown as Incident, instanceUrl)
return doc.content.trim() ? doc : null
} catch (error) {

View File

@@ -2,7 +2,7 @@ import { createLogger } from '@sim/logger'
import { WebflowIcon } from '@/components/icons'
import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils'
import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types'
import { computeContentHash, htmlToPlainText, parseTagDate } from '@/connectors/utils'
import { htmlToPlainText, parseTagDate } from '@/connectors/utils'
const logger = createLogger('WebflowConnector')
@@ -194,8 +194,8 @@ export const webflowConnector: ConnectorConfig = {
}
const items = data.items || []
let documents: ExternalDocument[] = await Promise.all(
items.map((item) => itemToDocument(item, currentCollectionId, collectionName))
let documents: ExternalDocument[] = items.map((item) =>
itemToDocument(item, currentCollectionId, collectionName)
)
if (maxItems > 0) {
@@ -373,13 +373,14 @@ export const webflowConnector: ConnectorConfig = {
/**
* Converts a Webflow CMS item to an ExternalDocument.
*/
async function itemToDocument(
function itemToDocument(
item: WebflowItem,
collectionId: string,
collectionName: string
): Promise<ExternalDocument> {
): ExternalDocument {
const plainText = itemToPlainText(item, collectionName)
const contentHash = await computeContentHash(plainText)
const lastModified = item.lastUpdated || item.lastPublished || item.createdOn || ''
const contentHash = `webflow:${item.id}:${lastModified}`
const title = extractItemTitle(item)
const slug = (item.fieldData?.slug as string) || ''

View File

@@ -2,7 +2,7 @@ import { createLogger } from '@sim/logger'
import { WordpressIcon } from '@/components/icons'
import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils'
import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types'
import { computeContentHash, htmlToPlainText, joinTagArray, parseTagDate } from '@/connectors/utils'
import { htmlToPlainText, joinTagArray, parseTagDate } from '@/connectors/utils'
const logger = createLogger('WordPressConnector')
@@ -59,10 +59,10 @@ function extractTagNames(tags: Record<string, { name: string }>): string[] {
/**
* Converts a WordPress post to an ExternalDocument.
*/
async function postToDocument(post: WordPressPost): Promise<ExternalDocument> {
function postToDocument(post: WordPressPost): ExternalDocument {
const plainText = htmlToPlainText(post.content)
const fullContent = `# ${post.title}\n\n${plainText}`
const contentHash = await computeContentHash(fullContent)
const contentHash = `wordpress:${post.ID}:${post.modified || ''}`
const categories = extractCategoryNames(post.categories)
const tags = extractTagNames(post.tags)
@@ -182,7 +182,7 @@ export const wordpressConnector: ConnectorConfig = {
const data = (await response.json()) as WordPressPostsResponse
const posts = data.posts || []
const documents = await Promise.all(posts.map(postToDocument))
const documents = posts.map(postToDocument)
const totalFetched = totalDocsFetched + documents.length
if (syncContext) syncContext.totalDocsFetched = totalFetched
@@ -226,7 +226,7 @@ export const wordpressConnector: ConnectorConfig = {
}
const post = (await response.json()) as WordPressPost
return await postToDocument(post)
return postToDocument(post)
} catch (error) {
logger.warn('Failed to get WordPress document', {
externalId,

View File

@@ -2,7 +2,7 @@ import { createLogger } from '@sim/logger'
import { ZendeskIcon } from '@/components/icons'
import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils'
import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types'
import { computeContentHash, htmlToPlainText, joinTagArray, parseTagDate } from '@/connectors/utils'
import { htmlToPlainText, joinTagArray, parseTagDate } from '@/connectors/utils'
const logger = createLogger('ZendeskConnector')
@@ -207,14 +207,11 @@ function formatTicketContent(ticket: ZendeskTicket, comments: ZendeskComment[]):
}
/**
* Converts an article to an ExternalDocument.
* Converts an article to an ExternalDocument with inline content.
* Articles return body inline from the list API so no deferral is needed.
*/
async function articleToDocument(
article: ZendeskArticle,
subdomain: string
): Promise<ExternalDocument> {
function articleToDocument(article: ZendeskArticle, subdomain: string): ExternalDocument {
const content = htmlToPlainText(article.body || '')
const contentHash = await computeContentHash(content)
return {
externalId: `article-${article.id}`,
@@ -222,7 +219,7 @@ async function articleToDocument(
content,
mimeType: 'text/plain',
sourceUrl: article.html_url || `https://${subdomain}.zendesk.com/hc/articles/${article.id}`,
contentHash,
contentHash: `zendesk:article:${article.id}:${article.updated_at}`,
metadata: {
type: 'article',
articleId: article.id,
@@ -238,23 +235,50 @@ async function articleToDocument(
}
/**
* Converts a ticket (with comments) to an ExternalDocument.
* Creates a deferred stub for a ticket. Content is not fetched here because
* each ticket requires a separate comments API call. Full content is fetched
* lazily via getDocument only for new/changed documents.
*/
async function ticketToDocument(
function ticketToStub(ticket: ZendeskTicket, subdomain: string): ExternalDocument {
return {
externalId: `ticket-${ticket.id}`,
title: `Ticket #${ticket.id}: ${ticket.subject}`,
content: '',
contentDeferred: true,
mimeType: 'text/plain',
sourceUrl: `https://${subdomain}.zendesk.com/agent/tickets/${ticket.id}`,
contentHash: `zendesk:ticket:${ticket.id}:${ticket.updated_at}`,
metadata: {
type: 'ticket',
ticketId: ticket.id,
status: ticket.status,
priority: ticket.priority,
tags: ticket.tags,
createdAt: ticket.created_at,
updatedAt: ticket.updated_at,
},
}
}
/**
* Converts a ticket (with comments) to a full ExternalDocument.
* Used by getDocument to resolve deferred ticket stubs.
*/
function ticketToDocument(
ticket: ZendeskTicket,
comments: ZendeskComment[],
subdomain: string
): Promise<ExternalDocument> {
): ExternalDocument {
const content = formatTicketContent(ticket, comments)
const contentHash = await computeContentHash(content)
return {
externalId: `ticket-${ticket.id}`,
title: `Ticket #${ticket.id}: ${ticket.subject}`,
content,
contentDeferred: false,
mimeType: 'text/plain',
sourceUrl: `https://${subdomain}.zendesk.com/agent/tickets/${ticket.id}`,
contentHash,
contentHash: `zendesk:ticket:${ticket.id}:${ticket.updated_at}`,
metadata: {
type: 'ticket',
ticketId: ticket.id,
@@ -375,8 +399,7 @@ export const zendeskConnector: ConnectorConfig = {
for (const article of articles) {
if (!article.body?.trim()) continue
const doc = await articleToDocument(article, subdomain)
documents.push(doc)
documents.push(articleToDocument(article, subdomain))
}
}
@@ -391,21 +414,8 @@ export const zendeskConnector: ConnectorConfig = {
)
logger.info(`Fetched ${tickets.length} tickets from Zendesk`)
const BATCH_SIZE = 5
for (let i = 0; i < tickets.length; i += BATCH_SIZE) {
const batch = tickets.slice(i, i + BATCH_SIZE)
const batchResults = await Promise.all(
batch.map(async (ticket) => {
const comments = await fetchTicketComments(
subdomain,
accessToken,
sourceConfig,
ticket.id
)
return ticketToDocument(ticket, comments, subdomain)
})
)
documents.push(...batchResults)
for (const ticket of tickets) {
documents.push(ticketToStub(ticket, subdomain))
}
}

View File

@@ -985,6 +985,15 @@ cronjobs:
successfulJobsHistoryLimit: 3
failedJobsHistoryLimit: 1
connectorSync:
enabled: true
name: connector-sync
schedule: "*/5 * * * *"
path: "/api/knowledge/connectors/sync"
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 3
failedJobsHistoryLimit: 1
# Global CronJob settings
image:
repository: curlimages/curl