feat(kb): Adding support for more tags to the KB (#2433)

* creating boolean, number and date tags with different equality matchings

* feat: add UI for tag field types with filter operators

- Update base-tags-modal with field type selector dropdown
- Update document-tags-modal with different input types per fieldType
- Update knowledge-tag-filters with operator dropdown and type-specific inputs
- Update search routes to support all tag slot types
- Update hook to use AllTagSlot type

* feat: add field type support to document-tag-entry component

- Add dropdown with all field types (Text, Number, Date, Boolean)
- Render different value inputs based on field type
- Update slot counting to include all field types (28 total)

* fix: resolve MAX_TAG_SLOTS error and z-index dropdown issue

- Replace MAX_TAG_SLOTS with totalSlots in document-tag-entry
- Add z-index to SelectContent in base-tags-modal for proper layering

* fix: handle non-text columns in getTagUsage query

- Only apply empty string check for text columns (tag1-tag7)
- Numeric/date/boolean columns only check IS NOT NULL
- Cast values to text for consistent output

* refactor: use EMCN components for KB UI

- Replace @/components/ui imports with @/components/emcn
- Use Combobox instead of Select for dropdowns
- Use EMCN Switch, Button, Input, Label components
- Remove unsupported 'size' prop from EMCN Button

* fix: layout for delete button next to date picker

- Change delete button from absolute to inline positioning
- Add proper column width (w-10) for delete button
- Add empty header cell for delete column
- Apply fix to both document-tag-entry and knowledge-tag-filters

* fix: clear value when switching tag field type

- Reset value to empty when changing type (e.g., boolean to text)
- Reset value when tag name changes and type differs
- Prevents 'true'/'false' from sticking in text inputs

* feat: add full support for number/date/boolean tag filtering in KB search

- Copy all tag types (number, date, boolean) from document to embedding records
- Update processDocumentTags to handle all field types with proper type conversion
- Add number/date/boolean columns to document queries in checkDocumentWriteAccess
- Update chunk creation to inherit all tag types from parent document
- Add getSearchResultFields helper for consistent query result selection
- Support structured filters with operators (eq, gt, lt, between, etc.)
- Fix search queries to include all 28 tag fields in results

* fixing tags import issue

* fix rm file

* reduced number to 3 and date to 2

* fixing lint

* fixed the prop size issue

* increased number from 3 to 5 and boolean from 7 to 2

* fixed number the sql stuff

* progress

* fix document tag and kb tag modals

* update datepicker emcn component

* fix ui

* progress on KB block tags UI

* fix issues with date filters

* fix execution parsing of types for KB tags

* remove migration before merge

* regen migrations

* fix tests and types

* address greptile comments

* fix more greptile comments

* fix filtering logic for multiple of same row

* fix tests

---------

Co-authored-by: priyanshu.solanki <priyanshu.solanki@saviynt.com>
Co-authored-by: Vikhyath Mondreti <vikhyath@simstudio.ai>
This commit is contained in:
Priyanshu Solanki
2025-12-19 22:00:35 -07:00
committed by GitHub
parent a1a189f328
commit 4f69b171f2
36 changed files with 11459 additions and 947 deletions

View File

@@ -18,11 +18,56 @@ export const DEFAULT_TEAM_STORAGE_LIMIT_GB = 500
export const DEFAULT_ENTERPRISE_STORAGE_LIMIT_GB = 500
/**
* Tag slots available for knowledge base documents and embeddings
* Text tag slots for knowledge base documents and embeddings
*/
export const TAG_SLOTS = ['tag1', 'tag2', 'tag3', 'tag4', 'tag5', 'tag6', 'tag7'] as const
export const TEXT_TAG_SLOTS = ['tag1', 'tag2', 'tag3', 'tag4', 'tag5', 'tag6', 'tag7'] as const
/**
* Type for tag slot names
* Number tag slots for knowledge base documents and embeddings (5 slots)
*/
export const NUMBER_TAG_SLOTS = ['number1', 'number2', 'number3', 'number4', 'number5'] as const
/**
* Date tag slots for knowledge base documents and embeddings (2 slots)
*/
export const DATE_TAG_SLOTS = ['date1', 'date2'] as const
/**
* Boolean tag slots for knowledge base documents and embeddings (3 slots)
*/
export const BOOLEAN_TAG_SLOTS = ['boolean1', 'boolean2', 'boolean3'] as const
/**
* All tag slots combined (for backwards compatibility)
*/
export const TAG_SLOTS = [
...TEXT_TAG_SLOTS,
...NUMBER_TAG_SLOTS,
...DATE_TAG_SLOTS,
...BOOLEAN_TAG_SLOTS,
] as const
/**
* Type for all tag slot names
*/
export type TagSlot = (typeof TAG_SLOTS)[number]
/**
* Type for text tag slot names
*/
export type TextTagSlot = (typeof TEXT_TAG_SLOTS)[number]
/**
* Type for number tag slot names
*/
export type NumberTagSlot = (typeof NUMBER_TAG_SLOTS)[number]
/**
* Type for date tag slot names
*/
export type DateTagSlot = (typeof DATE_TAG_SLOTS)[number]
/**
* Type for boolean tag slot names
*/
export type BooleanTagSlot = (typeof BOOLEAN_TAG_SLOTS)[number]

View File

@@ -0,0 +1,40 @@
ALTER TABLE "document" ADD COLUMN "number1" double precision;--> statement-breakpoint
ALTER TABLE "document" ADD COLUMN "number2" double precision;--> statement-breakpoint
ALTER TABLE "document" ADD COLUMN "number3" double precision;--> statement-breakpoint
ALTER TABLE "document" ADD COLUMN "number4" double precision;--> statement-breakpoint
ALTER TABLE "document" ADD COLUMN "number5" double precision;--> statement-breakpoint
ALTER TABLE "document" ADD COLUMN "date1" timestamp;--> statement-breakpoint
ALTER TABLE "document" ADD COLUMN "date2" timestamp;--> statement-breakpoint
ALTER TABLE "document" ADD COLUMN "boolean1" boolean;--> statement-breakpoint
ALTER TABLE "document" ADD COLUMN "boolean2" boolean;--> statement-breakpoint
ALTER TABLE "document" ADD COLUMN "boolean3" boolean;--> statement-breakpoint
ALTER TABLE "embedding" ADD COLUMN "number1" double precision;--> statement-breakpoint
ALTER TABLE "embedding" ADD COLUMN "number2" double precision;--> statement-breakpoint
ALTER TABLE "embedding" ADD COLUMN "number3" double precision;--> statement-breakpoint
ALTER TABLE "embedding" ADD COLUMN "number4" double precision;--> statement-breakpoint
ALTER TABLE "embedding" ADD COLUMN "number5" double precision;--> statement-breakpoint
ALTER TABLE "embedding" ADD COLUMN "date1" timestamp;--> statement-breakpoint
ALTER TABLE "embedding" ADD COLUMN "date2" timestamp;--> statement-breakpoint
ALTER TABLE "embedding" ADD COLUMN "boolean1" boolean;--> statement-breakpoint
ALTER TABLE "embedding" ADD COLUMN "boolean2" boolean;--> statement-breakpoint
ALTER TABLE "embedding" ADD COLUMN "boolean3" boolean;--> statement-breakpoint
CREATE INDEX "doc_number1_idx" ON "document" USING btree ("number1");--> statement-breakpoint
CREATE INDEX "doc_number2_idx" ON "document" USING btree ("number2");--> statement-breakpoint
CREATE INDEX "doc_number3_idx" ON "document" USING btree ("number3");--> statement-breakpoint
CREATE INDEX "doc_number4_idx" ON "document" USING btree ("number4");--> statement-breakpoint
CREATE INDEX "doc_number5_idx" ON "document" USING btree ("number5");--> statement-breakpoint
CREATE INDEX "doc_date1_idx" ON "document" USING btree ("date1");--> statement-breakpoint
CREATE INDEX "doc_date2_idx" ON "document" USING btree ("date2");--> statement-breakpoint
CREATE INDEX "doc_boolean1_idx" ON "document" USING btree ("boolean1");--> statement-breakpoint
CREATE INDEX "doc_boolean2_idx" ON "document" USING btree ("boolean2");--> statement-breakpoint
CREATE INDEX "doc_boolean3_idx" ON "document" USING btree ("boolean3");--> statement-breakpoint
CREATE INDEX "emb_number1_idx" ON "embedding" USING btree ("number1");--> statement-breakpoint
CREATE INDEX "emb_number2_idx" ON "embedding" USING btree ("number2");--> statement-breakpoint
CREATE INDEX "emb_number3_idx" ON "embedding" USING btree ("number3");--> statement-breakpoint
CREATE INDEX "emb_number4_idx" ON "embedding" USING btree ("number4");--> statement-breakpoint
CREATE INDEX "emb_number5_idx" ON "embedding" USING btree ("number5");--> statement-breakpoint
CREATE INDEX "emb_date1_idx" ON "embedding" USING btree ("date1");--> statement-breakpoint
CREATE INDEX "emb_date2_idx" ON "embedding" USING btree ("date2");--> statement-breakpoint
CREATE INDEX "emb_boolean1_idx" ON "embedding" USING btree ("boolean1");--> statement-breakpoint
CREATE INDEX "emb_boolean2_idx" ON "embedding" USING btree ("boolean2");--> statement-breakpoint
CREATE INDEX "emb_boolean3_idx" ON "embedding" USING btree ("boolean3");

File diff suppressed because it is too large Load Diff

View File

@@ -876,6 +876,13 @@
"when": 1766133598113,
"tag": "0125_eager_lily_hollister",
"breakpoints": true
},
{
"idx": 126,
"version": "7",
"when": 1766203036010,
"tag": "0126_dapper_midnight",
"breakpoints": true
}
]
}

View File

@@ -5,6 +5,7 @@ import {
check,
customType,
decimal,
doublePrecision,
index,
integer,
json,
@@ -1047,6 +1048,7 @@ export const document = pgTable(
deletedAt: timestamp('deleted_at'), // Soft delete
// Document tags for filtering (inherited by all chunks)
// Text tags (7 slots)
tag1: text('tag1'),
tag2: text('tag2'),
tag3: text('tag3'),
@@ -1054,6 +1056,19 @@ export const document = pgTable(
tag5: text('tag5'),
tag6: text('tag6'),
tag7: text('tag7'),
// Number tags (5 slots)
number1: doublePrecision('number1'),
number2: doublePrecision('number2'),
number3: doublePrecision('number3'),
number4: doublePrecision('number4'),
number5: doublePrecision('number5'),
// Date tags (2 slots)
date1: timestamp('date1'),
date2: timestamp('date2'),
// Boolean tags (3 slots)
boolean1: boolean('boolean1'),
boolean2: boolean('boolean2'),
boolean3: boolean('boolean3'),
// Timestamps
uploadedAt: timestamp('uploaded_at').notNull().defaultNow(),
@@ -1068,7 +1083,7 @@ export const document = pgTable(
table.knowledgeBaseId,
table.processingStatus
),
// Tag indexes for filtering
// Text tag indexes
tag1Idx: index('doc_tag1_idx').on(table.tag1),
tag2Idx: index('doc_tag2_idx').on(table.tag2),
tag3Idx: index('doc_tag3_idx').on(table.tag3),
@@ -1076,6 +1091,19 @@ export const document = pgTable(
tag5Idx: index('doc_tag5_idx').on(table.tag5),
tag6Idx: index('doc_tag6_idx').on(table.tag6),
tag7Idx: index('doc_tag7_idx').on(table.tag7),
// Number tag indexes (5 slots)
number1Idx: index('doc_number1_idx').on(table.number1),
number2Idx: index('doc_number2_idx').on(table.number2),
number3Idx: index('doc_number3_idx').on(table.number3),
number4Idx: index('doc_number4_idx').on(table.number4),
number5Idx: index('doc_number5_idx').on(table.number5),
// Date tag indexes (2 slots)
date1Idx: index('doc_date1_idx').on(table.date1),
date2Idx: index('doc_date2_idx').on(table.date2),
// Boolean tag indexes (3 slots)
boolean1Idx: index('doc_boolean1_idx').on(table.boolean1),
boolean2Idx: index('doc_boolean2_idx').on(table.boolean2),
boolean3Idx: index('doc_boolean3_idx').on(table.boolean3),
})
)
@@ -1137,6 +1165,7 @@ export const embedding = pgTable(
endOffset: integer('end_offset').notNull(),
// Tag columns inherited from document for efficient filtering
// Text tags (7 slots)
tag1: text('tag1'),
tag2: text('tag2'),
tag3: text('tag3'),
@@ -1144,6 +1173,19 @@ export const embedding = pgTable(
tag5: text('tag5'),
tag6: text('tag6'),
tag7: text('tag7'),
// Number tags (5 slots)
number1: doublePrecision('number1'),
number2: doublePrecision('number2'),
number3: doublePrecision('number3'),
number4: doublePrecision('number4'),
number5: doublePrecision('number5'),
// Date tags (2 slots)
date1: timestamp('date1'),
date2: timestamp('date2'),
// Boolean tags (3 slots)
boolean1: boolean('boolean1'),
boolean2: boolean('boolean2'),
boolean3: boolean('boolean3'),
// Chunk state - enable/disable from knowledge base
enabled: boolean('enabled').notNull().default(true),
@@ -1182,7 +1224,7 @@ export const embedding = pgTable(
ef_construction: 64,
}),
// Tag indexes for efficient filtering
// Text tag indexes
tag1Idx: index('emb_tag1_idx').on(table.tag1),
tag2Idx: index('emb_tag2_idx').on(table.tag2),
tag3Idx: index('emb_tag3_idx').on(table.tag3),
@@ -1190,6 +1232,19 @@ export const embedding = pgTable(
tag5Idx: index('emb_tag5_idx').on(table.tag5),
tag6Idx: index('emb_tag6_idx').on(table.tag6),
tag7Idx: index('emb_tag7_idx').on(table.tag7),
// Number tag indexes (5 slots)
number1Idx: index('emb_number1_idx').on(table.number1),
number2Idx: index('emb_number2_idx').on(table.number2),
number3Idx: index('emb_number3_idx').on(table.number3),
number4Idx: index('emb_number4_idx').on(table.number4),
number5Idx: index('emb_number5_idx').on(table.number5),
// Date tag indexes (2 slots)
date1Idx: index('emb_date1_idx').on(table.date1),
date2Idx: index('emb_date2_idx').on(table.date2),
// Boolean tag indexes (3 slots)
boolean1Idx: index('emb_boolean1_idx').on(table.boolean1),
boolean2Idx: index('emb_boolean2_idx').on(table.boolean2),
boolean3Idx: index('emb_boolean3_idx').on(table.boolean3),
// Full-text search index
contentFtsIdx: index('emb_content_fts_idx').using('gin', table.contentTsv),