diff --git a/js/app/packages/block-md/component/TaskDuplicateList.tsx b/js/app/packages/block-md/component/TaskDuplicateList.tsx index 6d2379c3d3..1b84d03032 100644 --- a/js/app/packages/block-md/component/TaskDuplicateList.tsx +++ b/js/app/packages/block-md/component/TaskDuplicateList.tsx @@ -1,7 +1,6 @@ import { QUERY_FILTERS_BASE } from '@app/component/next-soup/filters/query-filters'; import { TaskListEntity } from '@app/component/next-soup/soup-view/views/tasks/TaskListEntity'; -import { useFeatureFlag } from '@app/lib/analytics/posthog'; -import { ENABLE_TASK_DUPLICATES_FLAG } from '@core/constant/featureFlags'; +import { useTaskDedupFlag } from '@core/constant/featureFlags'; import { ListLayoutProvider } from '@entity'; import CaretRightIcon from '@phosphor/caret-right.svg'; import CopyIcon from '@phosphor/copy.svg'; @@ -124,7 +123,7 @@ export function SimilarTasksSection(props: { content: Accessor; onOpenTask: (taskId: string) => void; }) { - const flag = useFeatureFlag(ENABLE_TASK_DUPLICATES_FLAG); + const flag = useTaskDedupFlag(); const [debounced, setDebounced] = createSignal({ title: props.title(), diff --git a/js/app/packages/block-md/component/TaskDuplicateMatches.tsx b/js/app/packages/block-md/component/TaskDuplicateMatches.tsx index ebb538f643..6bebab7677 100644 --- a/js/app/packages/block-md/component/TaskDuplicateMatches.tsx +++ b/js/app/packages/block-md/component/TaskDuplicateMatches.tsx @@ -3,10 +3,7 @@ import { useFeatureFlag } from '@app/lib/analytics/posthog'; import { useBlockId } from '@core/block'; import { DocumentMention } from '@core/component/LexicalMarkdown/component/decorator/DocumentMention'; import { toast } from '@core/component/Toast/Toast'; -import { - ENABLE_TASK_DUPLICATES_FLAG, - ENABLE_TASK_DUPLICATES_OVERRIDE, -} from '@core/constant/featureFlags'; +import { useTaskDedupFlag } from '@core/constant/featureFlags'; import CaretDownIcon from '@phosphor/caret-down.svg'; import WarningIcon from '@phosphor/warning.svg'; import { @@ -18,9 +15,8 @@ import { Button, cn, Dropdown } from '@ui'; import { createMemo, createSignal, For, Show, Suspense } from 'solid-js'; export function TaskDuplicateMatchPill() { - const flag = useFeatureFlag(ENABLE_TASK_DUPLICATES_FLAG, { - enabledOverride: ENABLE_TASK_DUPLICATES_OVERRIDE, - }); + const flag = useTaskDedupFlag(); + const matches = useTaskDuplicateMatches(); const [open, setOpen] = createSignal(false); @@ -58,9 +54,8 @@ export function TaskDuplicateMatchPill() { } export function TaskDuplicateMatchesSidePanelSection() { - const flag = useFeatureFlag(ENABLE_TASK_DUPLICATES_FLAG, { - enabledOverride: ENABLE_TASK_DUPLICATES_OVERRIDE, - }); + const flag = useTaskDedupFlag(); + const matches = useTaskDuplicateMatches(); return ( diff --git a/js/app/packages/core/constant/featureFlags.ts b/js/app/packages/core/constant/featureFlags.ts index b0e8a04bc7..5989af7115 100644 --- a/js/app/packages/core/constant/featureFlags.ts +++ b/js/app/packages/core/constant/featureFlags.ts @@ -1,4 +1,5 @@ import { analytics } from '@app/lib/analytics'; +import { useFeatureFlag } from '@app/lib/analytics/posthog'; /** * This constant reflects whether the app is running locally with hot reload enabled @@ -362,8 +363,13 @@ export const ENABLE_TEAM_INVITE_TIERS_OVERRIDE = DEV_MODE_ENV export const ENABLE_SOUP_GROUP_BY_OVERRIDE = DEV_MODE_ENV ? true : undefined; -export const ENABLE_TASK_DUPLICATES_FLAG = 'enable-task-duplicates'; -export const ENABLE_TASK_DUPLICATES_OVERRIDE = DEV_MODE_ENV ? true : undefined; +const ENABLE_TASK_DUPLICATES_FLAG = 'enable-task-duplicates'; +const ENABLE_TASK_DUPLICATES_OVERRIDE = DEV_MODE_ENV ? true : undefined; + +export const useTaskDedupFlag = () => + useFeatureFlag(ENABLE_TASK_DUPLICATES_FLAG, { + enabledOverride: ENABLE_TASK_DUPLICATES_OVERRIDE, + }); // Snippets: reusable markdown documents, the `c` launcher entry, and the `;` // insert menu. PostHog-gated (currently targeted at the Macro team) with a diff --git a/js/app/packages/queries/storage/task-duplicates.ts b/js/app/packages/queries/storage/task-duplicates.ts index f93c455aee..9a86d2d42e 100644 --- a/js/app/packages/queries/storage/task-duplicates.ts +++ b/js/app/packages/queries/storage/task-duplicates.ts @@ -100,6 +100,7 @@ async function searchSimilarTasks( export function useTaskSimilaritySearchQuery( input: Accessor ) { + console.log('USER TAKS DEDUP'); return useQuery(() => ({ queryKey: taskSimilaritySearchKeys.forInput(input()).queryKey, queryFn: () => searchSimilarTasks(input()), diff --git a/rust/cloud-storage/.sqlx/query-ae1b88cef7fd6cc6ef1c302a0f3a04d156d39f39abea24050e63caf56267898a.json b/rust/cloud-storage/.sqlx/query-b619adf41fdd3561cfc8eff09b07167968d753bd1fc70b90cdcb4590d72b41d8.json similarity index 54% rename from rust/cloud-storage/.sqlx/query-ae1b88cef7fd6cc6ef1c302a0f3a04d156d39f39abea24050e63caf56267898a.json rename to rust/cloud-storage/.sqlx/query-b619adf41fdd3561cfc8eff09b07167968d753bd1fc70b90cdcb4590d72b41d8.json index e4b3c2a5e5..1c9e9f0367 100644 --- a/rust/cloud-storage/.sqlx/query-ae1b88cef7fd6cc6ef1c302a0f3a04d156d39f39abea24050e63caf56267898a.json +++ b/rust/cloud-storage/.sqlx/query-b619adf41fdd3561cfc8eff09b07167968d753bd1fc70b90cdcb4590d72b41d8.json @@ -1,6 +1,6 @@ { "db_name": "PostgreSQL", - "query": "\n WITH query AS (\n SELECT key, vec::vector AS vec\n FROM unnest($1::text[], $2::text[]) AS t(key, vec)\n ),\n scored AS (\n SELECT\n e.document_id,\n e.search_key,\n e.content,\n e.embedding::text AS embedding_text,\n MAX(1 - (e.embedding <=> q.vec))::real AS score\n FROM task_duplicate_embedding e\n JOIN \"Document\" d ON d.id = e.document_id\n JOIN document_sub_type dst ON dst.document_id = d.id AND dst.sub_type = 'task'\n LEFT JOIN team_task tt ON tt.document_id = d.id\n CROSS JOIN query q\n WHERE d.\"deletedAt\" IS NULL\n AND (\n d.owner = $3\n OR ($4::uuid IS NOT NULL AND tt.team_id = $4)\n )\n AND ($5::text IS NULL OR e.document_id <> $5)\n AND (\n NOT $6\n OR NOT EXISTS (\n SELECT 1\n FROM task_duplicate_match m\n WHERE m.task_id = LEAST($5, e.document_id)\n AND m.duplicate_task_id = GREATEST($5, e.document_id)\n AND m.status = 'dismissed'\n )\n )\n GROUP BY e.document_id, e.search_key, e.content, e.embedding\n ),\n ranked AS (\n SELECT document_id, MAX(score) AS best\n FROM scored\n GROUP BY document_id\n ORDER BY best DESC\n LIMIT $7\n )\n SELECT\n s.document_id AS \"document_id!\",\n s.search_key AS \"search_key!\",\n s.content AS \"content!\",\n s.embedding_text AS \"embedding_text!\",\n s.score AS \"score!\"\n FROM scored s\n JOIN ranked r ON r.document_id = s.document_id\n ORDER BY r.best DESC, s.document_id, s.score DESC\n ", + "query": "\n WITH query AS (\n SELECT key, vec::vector AS vec\n FROM unnest($1::text[], $2::text[]) AS t(key, vec)\n ),\n scored AS (\n SELECT\n e.document_id,\n e.search_key,\n e.content,\n e.embedding::text AS embedding_text,\n MAX(1 - (e.embedding <=> q.vec))::real AS score\n FROM task_duplicate_embedding e\n JOIN \"Document\" d ON d.id = e.document_id\n JOIN document_sub_type dst ON dst.document_id = d.id AND dst.sub_type = 'task'\n LEFT JOIN team_task tt ON tt.document_id = d.id\n CROSS JOIN query q\n WHERE d.\"deletedAt\" IS NULL\n AND (\n d.owner = $3\n OR ($4::uuid IS NOT NULL AND tt.team_id = $4)\n )\n AND ($5::text IS NULL OR e.document_id <> $5)\n AND (\n NOT $6\n OR NOT EXISTS (\n SELECT 1\n FROM task_duplicate_match m\n WHERE m.task_id = LEAST($5, e.document_id)\n AND m.duplicate_task_id = GREATEST($5, e.document_id)\n AND m.status = 'dismissed'\n )\n )\n -- only_incomplete: drop tasks whose Status system property\n -- (system_properties::SystemPropertyKey::Status) is set to\n -- the Completed or Canceled option. Tasks without a status\n -- row are kept.\n AND (\n NOT $8\n OR NOT EXISTS (\n SELECT 1\n FROM entity_properties ep\n WHERE ep.entity_id = e.document_id\n AND ep.entity_type = 'TASK'\n AND ep.property_definition_id = '00000001-0000-0000-0000-000000000002'\n AND ep.values->'value' ?| ARRAY[\n '00000001-0000-0000-0002-000000000004',\n '00000001-0000-0000-0002-000000000005'\n ]\n )\n )\n GROUP BY e.document_id, e.search_key, e.content, e.embedding\n ),\n ranked AS (\n SELECT document_id, MAX(score) AS best\n FROM scored\n GROUP BY document_id\n ORDER BY best DESC\n LIMIT $7\n )\n SELECT\n s.document_id AS \"document_id!\",\n s.search_key AS \"search_key!\",\n s.content AS \"content!\",\n s.embedding_text AS \"embedding_text!\",\n s.score AS \"score!\"\n FROM scored s\n JOIN ranked r ON r.document_id = s.document_id\n ORDER BY r.best DESC, s.document_id, s.score DESC\n ", "describe": { "columns": [ { @@ -37,7 +37,8 @@ "Uuid", "Text", "Bool", - "Int8" + "Int8", + "Bool" ] }, "nullable": [ @@ -48,5 +49,5 @@ null ] }, - "hash": "ae1b88cef7fd6cc6ef1c302a0f3a04d156d39f39abea24050e63caf56267898a" + "hash": "b619adf41fdd3561cfc8eff09b07167968d753bd1fc70b90cdcb4590d72b41d8" } diff --git a/rust/cloud-storage/task_dedup/src/domain/models.rs b/rust/cloud-storage/task_dedup/src/domain/models.rs index b512899bb0..5c74305286 100644 --- a/rust/cloud-storage/task_dedup/src/domain/models.rs +++ b/rust/cloud-storage/task_dedup/src/domain/models.rs @@ -53,6 +53,10 @@ pub struct TaskSearchParameters { /// When true, drop candidates already dismissed against /// [`exclude_document_id`](Self::exclude_document_id). pub exclude_dismissed: bool, + /// When true, only return incomplete tasks: candidates whose Status + /// property is Completed or Canceled are dropped (tasks without a status + /// are kept). + pub only_incomplete: bool, } /// A duplicate task candidate shown on the task surface. diff --git a/rust/cloud-storage/task_dedup/src/domain/service.rs b/rust/cloud-storage/task_dedup/src/domain/service.rs index 61931d3ea3..265225adbb 100644 --- a/rust/cloud-storage/task_dedup/src/domain/service.rs +++ b/rust/cloud-storage/task_dedup/src/domain/service.rs @@ -198,6 +198,9 @@ where limit: self.config.vector_candidate_limit, exclude_document_id: None, exclude_dismissed: false, + // The composer shows these as actionable duplicates, so tasks that + // are already completed or canceled are not useful matches. + only_incomplete: true, }; let results = self .vector_db @@ -263,6 +266,9 @@ where limit: self.config.vector_candidate_limit, exclude_document_id: Some(task.document_id.clone()), exclude_dismissed: true, + // A persisted match against a completed task is still informative + // on the task surface (the work may already be done). + only_incomplete: false, }; let results = self .vector_db diff --git a/rust/cloud-storage/task_dedup/src/domain/service/test.rs b/rust/cloud-storage/task_dedup/src/domain/service/test.rs index 04947ab2f4..a79dc28e2a 100644 --- a/rust/cloud-storage/task_dedup/src/domain/service/test.rs +++ b/rust/cloud-storage/task_dedup/src/domain/service/test.rs @@ -509,6 +509,7 @@ async fn detect_creates_match_reranks_and_notifies() { .unwrap(); assert_eq!(params.exclude_document_id.as_deref(), Some("NEW")); assert!(params.exclude_dismissed); + assert!(!params.only_incomplete); // A single ordered match was written, tagged with the judge's model. let upserts = h.matches.upserted_matches.lock().unwrap(); @@ -706,7 +707,8 @@ async fn similarity_search_filters_ranks_and_persists_nothing() { assert_eq!(calls.len(), 1); assert_eq!(calls[0].1, vec!["cx".to_string(), "cz".to_string()]); - // The similarity path does not scope-exclude self or dismissed pairs. + // The similarity path does not scope-exclude self or dismissed pairs, but + // it does restrict candidates to incomplete tasks. let params = h .vector_db .inner @@ -717,6 +719,7 @@ async fn similarity_search_filters_ranks_and_persists_nothing() { .unwrap(); assert!(params.exclude_document_id.is_none()); assert!(!params.exclude_dismissed); + assert!(params.only_incomplete); // Nothing was judged or persisted. assert!(h.judge.calls.lock().unwrap().is_empty()); diff --git a/rust/cloud-storage/task_dedup/src/outbound/postgres.rs b/rust/cloud-storage/task_dedup/src/outbound/postgres.rs index 6429d18445..04e8b63907 100644 --- a/rust/cloud-storage/task_dedup/src/outbound/postgres.rs +++ b/rust/cloud-storage/task_dedup/src/outbound/postgres.rs @@ -150,6 +150,24 @@ impl VectorStore for PgTaskVectorDb { AND m.status = 'dismissed' ) ) + -- only_incomplete: drop tasks whose Status system property + -- (system_properties::SystemPropertyKey::Status) is set to + -- the Completed or Canceled option. Tasks without a status + -- row are kept. + AND ( + NOT $8 + OR NOT EXISTS ( + SELECT 1 + FROM entity_properties ep + WHERE ep.entity_id = e.document_id + AND ep.entity_type = 'TASK' + AND ep.property_definition_id = '00000001-0000-0000-0000-000000000002' + AND ep.values->'value' ?| ARRAY[ + '00000001-0000-0000-0002-000000000004', + '00000001-0000-0000-0002-000000000005' + ] + ) + ) GROUP BY e.document_id, e.search_key, e.content, e.embedding ), ranked AS ( @@ -176,6 +194,7 @@ impl VectorStore for PgTaskVectorDb { params.exclude_document_id, params.exclude_dismissed, params.limit, + params.only_incomplete, ) .fetch_all(&mut *tx) .await?; diff --git a/rust/cloud-storage/task_dedup/src/outbound/postgres/test.rs b/rust/cloud-storage/task_dedup/src/outbound/postgres/test.rs index 87e5ff5993..ee3a9f742b 100644 --- a/rust/cloud-storage/task_dedup/src/outbound/postgres/test.rs +++ b/rust/cloud-storage/task_dedup/src/outbound/postgres/test.rs @@ -17,6 +17,12 @@ const TEAM_ID: Uuid = uuid::uuid!("a0000000-0000-0000-0000-000000000001"); const TASK_ONE: &str = "d1000000-0000-0000-0000-000000000001"; const TASK_TWO: &str = "d1000000-0000-0000-0000-000000000002"; const TASK_THREE: &str = "d1000000-0000-0000-0000-000000000003"; +const TASK_FOUR: &str = "d1000000-0000-0000-0000-000000000004"; + +// Status system property options (see `system_properties::StatusOption`). +const STATUS_IN_PROGRESS: Uuid = uuid::uuid!("00000001-0000-0000-0002-000000000002"); +const STATUS_COMPLETED: Uuid = uuid::uuid!("00000001-0000-0000-0002-000000000004"); +const STATUS_CANCELED: Uuid = uuid::uuid!("00000001-0000-0000-0002-000000000005"); type TestService = TaskDedupService; @@ -157,6 +163,31 @@ async fn insert_task_embedding(pool: &PgPool, document_id: &str, title: &str, bo } } +/// Sets a task's Status system property to the given option uuid (e.g. +/// `system_properties` Completed `…0002-000000000004`). +async fn set_task_status(pool: &PgPool, document_id: &str, status_option: Uuid) { + sqlx::query!( + r#" + INSERT INTO entity_properties (id, entity_id, entity_type, property_definition_id, values) + VALUES ( + $1, + $2, + 'TASK', + '00000001-0000-0000-0000-000000000002', + jsonb_build_object('type', 'SelectOption', 'value', jsonb_build_array($3::text)) + ) + ON CONFLICT (entity_id, entity_type, property_definition_id) DO UPDATE + SET values = EXCLUDED.values + "#, + Uuid::new_v4(), + document_id, + status_option.to_string(), + ) + .execute(pool) + .await + .unwrap(); +} + async fn insert_match(pool: &PgPool, task_id: &str, duplicate_task_id: &str) -> Uuid { let id = Uuid::new_v4(); let (task_id, duplicate_task_id) = @@ -450,6 +481,64 @@ async fn detection_closes_existing_duplicate_component(pool: PgPool) { assert_eq!(duplicate_ids, vec![TASK_TWO, TASK_THREE]); } +#[sqlx::test( + migrator = "MACRO_DB_MIGRATIONS", + fixtures( + path = "../../../../documents/fixtures", + scripts("documents_test_data") + ) +)] +async fn similarity_search_only_returns_incomplete_tasks(pool: PgPool) { + setup_tasks(&pool).await; + insert_task(&pool, TASK_FOUR, "Detect duplicated tasks", OWNER).await; + for task in [TASK_ONE, TASK_TWO, TASK_THREE, TASK_FOUR] { + insert_task_embedding(&pool, task, DETECTION_TITLE, DETECTION_BODY).await; + } + set_task_status(&pool, TASK_ONE, STATUS_COMPLETED).await; + set_task_status(&pool, TASK_TWO, STATUS_CANCELED).await; + set_task_status(&pool, TASK_THREE, STATUS_IN_PROGRESS).await; + // TASK_FOUR has no status row and counts as incomplete. + + let service = service(pool.clone()); + let results = service + .similarity_search(OWNER, Some(TEAM_ID), DETECTION_TITLE, DETECTION_BODY) + .await + .unwrap(); + + let mut ids: Vec<&str> = results.iter().map(|r| r.task_id.as_str()).collect(); + ids.sort(); + assert_eq!( + ids, + vec![TASK_THREE, TASK_FOUR], + "completed and canceled tasks should be dropped; in-progress and status-less kept" + ); +} + +#[sqlx::test( + migrator = "MACRO_DB_MIGRATIONS", + fixtures( + path = "../../../../documents/fixtures", + scripts("documents_test_data") + ) +)] +async fn detection_still_matches_completed_tasks(pool: PgPool) { + setup_tasks(&pool).await; + insert_task_embedding(&pool, TASK_TWO, DETECTION_TITLE, DETECTION_BODY).await; + set_task_status(&pool, TASK_TWO, STATUS_COMPLETED).await; + + let service = service(pool.clone()); + service + .detect_new_task(detection_task(TASK_ONE)) + .await + .unwrap(); + + // The new-task path keeps completed candidates: a match against finished + // work is still worth surfacing on the task page. + let duplicates = service.active_duplicates(TASK_ONE).await.unwrap(); + assert_eq!(duplicates.len(), 1); + assert_eq!(duplicates[0].task_id, TASK_TWO); +} + #[sqlx::test( migrator = "MACRO_DB_MIGRATIONS", fixtures(