diff --git a/docs/examples_notebooks/drift_search.ipynb b/docs/examples_notebooks/drift_search.ipynb index e33e8bc94f..81cd193eec 100644 --- a/docs/examples_notebooks/drift_search.ipynb +++ b/docs/examples_notebooks/drift_search.ipynb @@ -17,7 +17,6 @@ "outputs": [], "source": [ "import os\n", - "from pathlib import Path\n", "\n", "import pandas as pd\n", "from graphrag.config.enums import ModelType\n", @@ -62,15 +61,13 @@ "# load description embeddings to an in-memory lancedb vectorstore\n", "# to connect to a remote db, specify url and port values.\n", "description_embedding_store = LanceDBVectorStore(\n", - " vector_store_schema_config=VectorStoreSchemaConfig(\n", - " index_name=\"default-entity-description\"\n", - " ),\n", + " vector_store_schema_config=VectorStoreSchemaConfig(index_name=\"entity_description\"),\n", ")\n", "description_embedding_store.connect(db_uri=LANCEDB_URI)\n", "\n", "full_content_embedding_store = LanceDBVectorStore(\n", " vector_store_schema_config=VectorStoreSchemaConfig(\n", - " index_name=\"default-community-full_content\"\n", + " index_name=\"community_full_content\"\n", " )\n", ")\n", "full_content_embedding_store.connect(db_uri=LANCEDB_URI)\n", @@ -88,7 +85,11 @@ "text_units = read_indexer_text_units(text_unit_df)\n", "\n", "print(f\"Text unit records: {len(text_unit_df)}\")\n", - "text_unit_df.head()" + "text_unit_df.head()\n", + "\n", + "report_df = pd.read_parquet(f\"{INPUT_DIR}/{COMMUNITY_REPORT_TABLE}.parquet\")\n", + "reports = read_indexer_reports(report_df, community_df, COMMUNITY_LEVEL)\n", + "read_indexer_report_embeddings(reports, full_content_embedding_store)" ] }, { @@ -118,7 +119,7 @@ " api_key=api_key,\n", " type=ModelType.Embedding,\n", " model_provider=\"openai\",\n", - " model=\"text-embedding-3-small\",\n", + " model=\"text-embedding-3-large\",\n", " max_retries=20,\n", ")\n", "\n", @@ -129,31 +130,6 @@ ")" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def read_community_reports(\n", - " input_dir: str,\n", - " community_report_table: str = COMMUNITY_REPORT_TABLE,\n", - "):\n", - " \"\"\"Embeds the full content of the community reports and saves the DataFrame with embeddings to the output path.\"\"\"\n", - " input_path = Path(input_dir) / f\"{community_report_table}.parquet\"\n", - " return pd.read_parquet(input_path)\n", - "\n", - "\n", - "report_df = read_community_reports(INPUT_DIR)\n", - "reports = read_indexer_reports(\n", - " report_df,\n", - " community_df,\n", - " COMMUNITY_LEVEL,\n", - " content_embedding_col=\"full_content_embeddings\",\n", - ")\n", - "read_indexer_report_embeddings(reports, full_content_embedding_store)" - ] - }, { "cell_type": "code", "execution_count": null, @@ -161,12 +137,9 @@ "outputs": [], "source": [ "drift_params = DRIFTSearchConfig(\n", - " temperature=0,\n", - " max_tokens=12_000,\n", " primer_folds=1,\n", " drift_k_followups=3,\n", " n_depth=3,\n", - " n=1,\n", ")\n", "\n", "context_builder = DRIFTSearchContextBuilder(\n", @@ -216,7 +189,7 @@ ], "metadata": { "kernelspec": { - "display_name": "graphrag", + "display_name": "Python 3", "language": "python", "name": "python3" }, diff --git a/docs/examples_notebooks/inputs/operation dulce/communities.parquet b/docs/examples_notebooks/inputs/operation dulce/communities.parquet index 885ced1b15..4f4c1a864f 100644 Binary files a/docs/examples_notebooks/inputs/operation dulce/communities.parquet and b/docs/examples_notebooks/inputs/operation dulce/communities.parquet differ diff --git a/docs/examples_notebooks/inputs/operation dulce/community_reports.parquet b/docs/examples_notebooks/inputs/operation dulce/community_reports.parquet index d633ec0047..e6e45e947b 100644 Binary files a/docs/examples_notebooks/inputs/operation dulce/community_reports.parquet and b/docs/examples_notebooks/inputs/operation dulce/community_reports.parquet differ diff --git a/docs/examples_notebooks/inputs/operation dulce/covariates.parquet b/docs/examples_notebooks/inputs/operation dulce/covariates.parquet index cc7b212fbf..1150cd0b00 100644 Binary files a/docs/examples_notebooks/inputs/operation dulce/covariates.parquet and b/docs/examples_notebooks/inputs/operation dulce/covariates.parquet differ diff --git a/docs/examples_notebooks/inputs/operation dulce/embeddings.community_full_content.parquet b/docs/examples_notebooks/inputs/operation dulce/embeddings.community_full_content.parquet new file mode 100644 index 0000000000..d9dd43ce6f Binary files /dev/null and b/docs/examples_notebooks/inputs/operation dulce/embeddings.community_full_content.parquet differ diff --git a/docs/examples_notebooks/inputs/operation dulce/entities.parquet b/docs/examples_notebooks/inputs/operation dulce/entities.parquet index 4378317c7a..466f5058ae 100644 Binary files a/docs/examples_notebooks/inputs/operation dulce/entities.parquet and b/docs/examples_notebooks/inputs/operation dulce/entities.parquet differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/community_full_content.lance/_indices/dd5917d9-d48d-4af3-bc2e-43a53b2fdbe6/auxiliary.idx b/docs/examples_notebooks/inputs/operation dulce/lancedb/community_full_content.lance/_indices/dd5917d9-d48d-4af3-bc2e-43a53b2fdbe6/auxiliary.idx new file mode 100644 index 0000000000..57eee5fba5 Binary files /dev/null and b/docs/examples_notebooks/inputs/operation dulce/lancedb/community_full_content.lance/_indices/dd5917d9-d48d-4af3-bc2e-43a53b2fdbe6/auxiliary.idx differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/community_full_content.lance/_indices/dd5917d9-d48d-4af3-bc2e-43a53b2fdbe6/index.idx b/docs/examples_notebooks/inputs/operation dulce/lancedb/community_full_content.lance/_indices/dd5917d9-d48d-4af3-bc2e-43a53b2fdbe6/index.idx new file mode 100644 index 0000000000..1b8db50ecf Binary files /dev/null and b/docs/examples_notebooks/inputs/operation dulce/lancedb/community_full_content.lance/_indices/dd5917d9-d48d-4af3-bc2e-43a53b2fdbe6/index.idx differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/community_full_content.lance/_transactions/0-a943ac34-0e87-43c2-80d0-8f83fb80f4f5.txn b/docs/examples_notebooks/inputs/operation dulce/lancedb/community_full_content.lance/_transactions/0-a943ac34-0e87-43c2-80d0-8f83fb80f4f5.txn new file mode 100644 index 0000000000..f987e5fb4a Binary files /dev/null and b/docs/examples_notebooks/inputs/operation dulce/lancedb/community_full_content.lance/_transactions/0-a943ac34-0e87-43c2-80d0-8f83fb80f4f5.txn differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/community_full_content.lance/_transactions/1-ec798d7b-a8bf-4985-a5d0-784434802168.txn b/docs/examples_notebooks/inputs/operation dulce/lancedb/community_full_content.lance/_transactions/1-ec798d7b-a8bf-4985-a5d0-784434802168.txn new file mode 100644 index 0000000000..f4850f70d7 Binary files /dev/null and b/docs/examples_notebooks/inputs/operation dulce/lancedb/community_full_content.lance/_transactions/1-ec798d7b-a8bf-4985-a5d0-784434802168.txn differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/community_full_content.lance/_transactions/2-78887911-d792-4dc9-b28d-f2858db1139a.txn b/docs/examples_notebooks/inputs/operation dulce/lancedb/community_full_content.lance/_transactions/2-78887911-d792-4dc9-b28d-f2858db1139a.txn new file mode 100644 index 0000000000..53f6243871 Binary files /dev/null and b/docs/examples_notebooks/inputs/operation dulce/lancedb/community_full_content.lance/_transactions/2-78887911-d792-4dc9-b28d-f2858db1139a.txn differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/community_full_content.lance/_transactions/3-c16be721-5d7e-46a0-98c2-34d5d9c29383.txn b/docs/examples_notebooks/inputs/operation dulce/lancedb/community_full_content.lance/_transactions/3-c16be721-5d7e-46a0-98c2-34d5d9c29383.txn new file mode 100644 index 0000000000..add345bded Binary files /dev/null and b/docs/examples_notebooks/inputs/operation dulce/lancedb/community_full_content.lance/_transactions/3-c16be721-5d7e-46a0-98c2-34d5d9c29383.txn differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/community_full_content.lance/_versions/1.manifest b/docs/examples_notebooks/inputs/operation dulce/lancedb/community_full_content.lance/_versions/1.manifest new file mode 100644 index 0000000000..f9e39ec20b Binary files /dev/null and b/docs/examples_notebooks/inputs/operation dulce/lancedb/community_full_content.lance/_versions/1.manifest differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/community_full_content.lance/_versions/2.manifest b/docs/examples_notebooks/inputs/operation dulce/lancedb/community_full_content.lance/_versions/2.manifest new file mode 100644 index 0000000000..c9ed390a4f Binary files /dev/null and b/docs/examples_notebooks/inputs/operation dulce/lancedb/community_full_content.lance/_versions/2.manifest differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/community_full_content.lance/_versions/3.manifest b/docs/examples_notebooks/inputs/operation dulce/lancedb/community_full_content.lance/_versions/3.manifest new file mode 100644 index 0000000000..408240bf4d Binary files /dev/null and b/docs/examples_notebooks/inputs/operation dulce/lancedb/community_full_content.lance/_versions/3.manifest differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/community_full_content.lance/_versions/4.manifest b/docs/examples_notebooks/inputs/operation dulce/lancedb/community_full_content.lance/_versions/4.manifest new file mode 100644 index 0000000000..0769fe9c6f Binary files /dev/null and b/docs/examples_notebooks/inputs/operation dulce/lancedb/community_full_content.lance/_versions/4.manifest differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/community_full_content.lance/data/667cccff-01b5-4b70-a2a6-8cf4d6ada077.lance b/docs/examples_notebooks/inputs/operation dulce/lancedb/community_full_content.lance/data/667cccff-01b5-4b70-a2a6-8cf4d6ada077.lance new file mode 100644 index 0000000000..04aad5948c Binary files /dev/null and b/docs/examples_notebooks/inputs/operation dulce/lancedb/community_full_content.lance/data/667cccff-01b5-4b70-a2a6-8cf4d6ada077.lance differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/community_full_content.lance/data/a84d995f-111c-45d1-ba5a-32b3747b8a18.lance b/docs/examples_notebooks/inputs/operation dulce/lancedb/community_full_content.lance/data/a84d995f-111c-45d1-ba5a-32b3747b8a18.lance new file mode 100644 index 0000000000..3b9d4fb218 Binary files /dev/null and b/docs/examples_notebooks/inputs/operation dulce/lancedb/community_full_content.lance/data/a84d995f-111c-45d1-ba5a-32b3747b8a18.lance differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-community-full_content.lance/_transactions/0-2fed1d8b-daac-41b0-a93a-e115cda75be3.txn b/docs/examples_notebooks/inputs/operation dulce/lancedb/default-community-full_content.lance/_transactions/0-2fed1d8b-daac-41b0-a93a-e115cda75be3.txn deleted file mode 100644 index 4ae06f643c..0000000000 --- a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-community-full_content.lance/_transactions/0-2fed1d8b-daac-41b0-a93a-e115cda75be3.txn +++ /dev/null @@ -1,2 +0,0 @@ -$2fed1d8b-daac-41b0-a93a-e115cda75be3²À$id ÿÿÿÿÿÿÿÿÿ*string08Zdefault(text ÿÿÿÿÿÿÿÿÿ*string08Zdefault>vector ÿÿÿÿÿÿÿÿÿ*fixed_size_list:float:153608Zdefault. -attributes ÿÿÿÿÿÿÿÿÿ*string08Zdefault \ No newline at end of file diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-community-full_content.lance/_transactions/1-61dbb7c2-aec3-4796-b223-941fc7cc93cc.txn b/docs/examples_notebooks/inputs/operation dulce/lancedb/default-community-full_content.lance/_transactions/1-61dbb7c2-aec3-4796-b223-941fc7cc93cc.txn deleted file mode 100644 index 7af4506c1f..0000000000 Binary files a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-community-full_content.lance/_transactions/1-61dbb7c2-aec3-4796-b223-941fc7cc93cc.txn and /dev/null differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-community-full_content.lance/_transactions/2-60012692-a153-48f9-8f4e-c479b44cbf3f.txn b/docs/examples_notebooks/inputs/operation dulce/lancedb/default-community-full_content.lance/_transactions/2-60012692-a153-48f9-8f4e-c479b44cbf3f.txn deleted file mode 100644 index 8989d3ff3e..0000000000 --- a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-community-full_content.lance/_transactions/2-60012692-a153-48f9-8f4e-c479b44cbf3f.txn +++ /dev/null @@ -1,2 +0,0 @@ -$60012692-a153-48f9-8f4e-c479b44cbf3f²À$id ÿÿÿÿÿÿÿÿÿ*string08Zdefault(text ÿÿÿÿÿÿÿÿÿ*string08Zdefault>vector ÿÿÿÿÿÿÿÿÿ*fixed_size_list:float:153608Zdefault. -attributes ÿÿÿÿÿÿÿÿÿ*string08Zdefault \ No newline at end of file diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-community-full_content.lance/_transactions/3-0d2dc9a1-094f-4220-83c7-6ad6f26fac2b.txn b/docs/examples_notebooks/inputs/operation dulce/lancedb/default-community-full_content.lance/_transactions/3-0d2dc9a1-094f-4220-83c7-6ad6f26fac2b.txn deleted file mode 100644 index 63c1b2f07d..0000000000 Binary files a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-community-full_content.lance/_transactions/3-0d2dc9a1-094f-4220-83c7-6ad6f26fac2b.txn and /dev/null differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-community-full_content.lance/_versions/1.manifest b/docs/examples_notebooks/inputs/operation dulce/lancedb/default-community-full_content.lance/_versions/1.manifest deleted file mode 100644 index 321504ba88..0000000000 Binary files a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-community-full_content.lance/_versions/1.manifest and /dev/null differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-community-full_content.lance/_versions/2.manifest b/docs/examples_notebooks/inputs/operation dulce/lancedb/default-community-full_content.lance/_versions/2.manifest deleted file mode 100644 index 6cf8a61ee3..0000000000 Binary files a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-community-full_content.lance/_versions/2.manifest and /dev/null differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-community-full_content.lance/_versions/3.manifest b/docs/examples_notebooks/inputs/operation dulce/lancedb/default-community-full_content.lance/_versions/3.manifest deleted file mode 100644 index 022bec3439..0000000000 Binary files a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-community-full_content.lance/_versions/3.manifest and /dev/null differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-community-full_content.lance/_versions/4.manifest b/docs/examples_notebooks/inputs/operation dulce/lancedb/default-community-full_content.lance/_versions/4.manifest deleted file mode 100644 index ce22c6edb5..0000000000 Binary files a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-community-full_content.lance/_versions/4.manifest and /dev/null differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-community-full_content.lance/data/1e7b2d94-ed06-4aa0-b22e-86a71d416bc6.lance b/docs/examples_notebooks/inputs/operation dulce/lancedb/default-community-full_content.lance/data/1e7b2d94-ed06-4aa0-b22e-86a71d416bc6.lance deleted file mode 100644 index 92d80e6d52..0000000000 Binary files a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-community-full_content.lance/data/1e7b2d94-ed06-4aa0-b22e-86a71d416bc6.lance and /dev/null differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-community-full_content.lance/data/1ed9f301-ce30-46a8-8c0b-9c2a60a3cf43.lance b/docs/examples_notebooks/inputs/operation dulce/lancedb/default-community-full_content.lance/data/1ed9f301-ce30-46a8-8c0b-9c2a60a3cf43.lance deleted file mode 100644 index afc4df25e9..0000000000 Binary files a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-community-full_content.lance/data/1ed9f301-ce30-46a8-8c0b-9c2a60a3cf43.lance and /dev/null differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-entity-description.lance/_transactions/0-92c031e5-7558-451e-9d0f-f5514db9616d.txn b/docs/examples_notebooks/inputs/operation dulce/lancedb/default-entity-description.lance/_transactions/0-92c031e5-7558-451e-9d0f-f5514db9616d.txn deleted file mode 100644 index 9e3261cb97..0000000000 --- a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-entity-description.lance/_transactions/0-92c031e5-7558-451e-9d0f-f5514db9616d.txn +++ /dev/null @@ -1,2 +0,0 @@ -$92c031e5-7558-451e-9d0f-f5514db9616d²À$id ÿÿÿÿÿÿÿÿÿ*string08Zdefault(text ÿÿÿÿÿÿÿÿÿ*string08Zdefault>vector ÿÿÿÿÿÿÿÿÿ*fixed_size_list:float:153608Zdefault. -attributes ÿÿÿÿÿÿÿÿÿ*string08Zdefault \ No newline at end of file diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-entity-description.lance/_transactions/1-7b3cb8d8-3512-4584-a003-91838fed8911.txn b/docs/examples_notebooks/inputs/operation dulce/lancedb/default-entity-description.lance/_transactions/1-7b3cb8d8-3512-4584-a003-91838fed8911.txn deleted file mode 100644 index daf150cc07..0000000000 Binary files a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-entity-description.lance/_transactions/1-7b3cb8d8-3512-4584-a003-91838fed8911.txn and /dev/null differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-entity-description.lance/_transactions/2-7de627d2-4c57-49e9-bf73-c17a9582ead4.txn b/docs/examples_notebooks/inputs/operation dulce/lancedb/default-entity-description.lance/_transactions/2-7de627d2-4c57-49e9-bf73-c17a9582ead4.txn deleted file mode 100644 index f4f8f42725..0000000000 --- a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-entity-description.lance/_transactions/2-7de627d2-4c57-49e9-bf73-c17a9582ead4.txn +++ /dev/null @@ -1,2 +0,0 @@ -$7de627d2-4c57-49e9-bf73-c17a9582ead4²À$id ÿÿÿÿÿÿÿÿÿ*string08Zdefault(text ÿÿÿÿÿÿÿÿÿ*string08Zdefault>vector ÿÿÿÿÿÿÿÿÿ*fixed_size_list:float:153608Zdefault. -attributes ÿÿÿÿÿÿÿÿÿ*string08Zdefault \ No newline at end of file diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-entity-description.lance/_transactions/3-9ad29d69-9a69-43a8-8b26-252ea267958d.txn b/docs/examples_notebooks/inputs/operation dulce/lancedb/default-entity-description.lance/_transactions/3-9ad29d69-9a69-43a8-8b26-252ea267958d.txn deleted file mode 100644 index d3497cbd54..0000000000 Binary files a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-entity-description.lance/_transactions/3-9ad29d69-9a69-43a8-8b26-252ea267958d.txn and /dev/null differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-entity-description.lance/_versions/1.manifest b/docs/examples_notebooks/inputs/operation dulce/lancedb/default-entity-description.lance/_versions/1.manifest deleted file mode 100644 index b35c6002ee..0000000000 Binary files a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-entity-description.lance/_versions/1.manifest and /dev/null differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-entity-description.lance/_versions/2.manifest b/docs/examples_notebooks/inputs/operation dulce/lancedb/default-entity-description.lance/_versions/2.manifest deleted file mode 100644 index 6837b2effb..0000000000 Binary files a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-entity-description.lance/_versions/2.manifest and /dev/null differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-entity-description.lance/_versions/3.manifest b/docs/examples_notebooks/inputs/operation dulce/lancedb/default-entity-description.lance/_versions/3.manifest deleted file mode 100644 index 4cafb59f5b..0000000000 Binary files a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-entity-description.lance/_versions/3.manifest and /dev/null differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-entity-description.lance/_versions/4.manifest b/docs/examples_notebooks/inputs/operation dulce/lancedb/default-entity-description.lance/_versions/4.manifest deleted file mode 100644 index 95defe4551..0000000000 Binary files a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-entity-description.lance/_versions/4.manifest and /dev/null differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-entity-description.lance/data/a34575c4-5260-457f-bebe-3f40bc0e2ee3.lance b/docs/examples_notebooks/inputs/operation dulce/lancedb/default-entity-description.lance/data/a34575c4-5260-457f-bebe-3f40bc0e2ee3.lance deleted file mode 100644 index 6777f9c87a..0000000000 Binary files a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-entity-description.lance/data/a34575c4-5260-457f-bebe-3f40bc0e2ee3.lance and /dev/null differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-entity-description.lance/data/eabd7580-86f5-4022-8aa7-fe0aff816d98.lance b/docs/examples_notebooks/inputs/operation dulce/lancedb/default-entity-description.lance/data/eabd7580-86f5-4022-8aa7-fe0aff816d98.lance deleted file mode 100644 index 789725b40d..0000000000 Binary files a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-entity-description.lance/data/eabd7580-86f5-4022-8aa7-fe0aff816d98.lance and /dev/null differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-text_unit-text.lance/_transactions/0-fd0434ac-e5cd-4ddd-9dd5-e5048d4edb59.txn b/docs/examples_notebooks/inputs/operation dulce/lancedb/default-text_unit-text.lance/_transactions/0-fd0434ac-e5cd-4ddd-9dd5-e5048d4edb59.txn deleted file mode 100644 index 81d66a0b44..0000000000 --- a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-text_unit-text.lance/_transactions/0-fd0434ac-e5cd-4ddd-9dd5-e5048d4edb59.txn +++ /dev/null @@ -1,2 +0,0 @@ -$fd0434ac-e5cd-4ddd-9dd5-e5048d4edb59²À$id ÿÿÿÿÿÿÿÿÿ*string08Zdefault(text ÿÿÿÿÿÿÿÿÿ*string08Zdefault>vector ÿÿÿÿÿÿÿÿÿ*fixed_size_list:float:153608Zdefault. -attributes ÿÿÿÿÿÿÿÿÿ*string08Zdefault \ No newline at end of file diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-text_unit-text.lance/_transactions/1-14bb4b1d-cc00-420b-9b14-3626f0bd8c0b.txn b/docs/examples_notebooks/inputs/operation dulce/lancedb/default-text_unit-text.lance/_transactions/1-14bb4b1d-cc00-420b-9b14-3626f0bd8c0b.txn deleted file mode 100644 index 86b642720a..0000000000 Binary files a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-text_unit-text.lance/_transactions/1-14bb4b1d-cc00-420b-9b14-3626f0bd8c0b.txn and /dev/null differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-text_unit-text.lance/_transactions/2-8e74264c-f72d-44f5-a6f4-b3b61ae6a43b.txn b/docs/examples_notebooks/inputs/operation dulce/lancedb/default-text_unit-text.lance/_transactions/2-8e74264c-f72d-44f5-a6f4-b3b61ae6a43b.txn deleted file mode 100644 index b26d571dc9..0000000000 --- a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-text_unit-text.lance/_transactions/2-8e74264c-f72d-44f5-a6f4-b3b61ae6a43b.txn +++ /dev/null @@ -1,2 +0,0 @@ -$8e74264c-f72d-44f5-a6f4-b3b61ae6a43b²À$id ÿÿÿÿÿÿÿÿÿ*string08Zdefault(text ÿÿÿÿÿÿÿÿÿ*string08Zdefault>vector ÿÿÿÿÿÿÿÿÿ*fixed_size_list:float:153608Zdefault. -attributes ÿÿÿÿÿÿÿÿÿ*string08Zdefault \ No newline at end of file diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-text_unit-text.lance/_transactions/3-7516fb71-9db3-4666-bdef-ea04c1eb9697.txn b/docs/examples_notebooks/inputs/operation dulce/lancedb/default-text_unit-text.lance/_transactions/3-7516fb71-9db3-4666-bdef-ea04c1eb9697.txn deleted file mode 100644 index 9b0915ab9b..0000000000 Binary files a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-text_unit-text.lance/_transactions/3-7516fb71-9db3-4666-bdef-ea04c1eb9697.txn and /dev/null differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-text_unit-text.lance/_versions/1.manifest b/docs/examples_notebooks/inputs/operation dulce/lancedb/default-text_unit-text.lance/_versions/1.manifest deleted file mode 100644 index a35b96156d..0000000000 Binary files a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-text_unit-text.lance/_versions/1.manifest and /dev/null differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-text_unit-text.lance/_versions/2.manifest b/docs/examples_notebooks/inputs/operation dulce/lancedb/default-text_unit-text.lance/_versions/2.manifest deleted file mode 100644 index c14970b78c..0000000000 Binary files a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-text_unit-text.lance/_versions/2.manifest and /dev/null differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-text_unit-text.lance/_versions/3.manifest b/docs/examples_notebooks/inputs/operation dulce/lancedb/default-text_unit-text.lance/_versions/3.manifest deleted file mode 100644 index acb1546ca7..0000000000 Binary files a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-text_unit-text.lance/_versions/3.manifest and /dev/null differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-text_unit-text.lance/_versions/4.manifest b/docs/examples_notebooks/inputs/operation dulce/lancedb/default-text_unit-text.lance/_versions/4.manifest deleted file mode 100644 index 61ef1262a3..0000000000 Binary files a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-text_unit-text.lance/_versions/4.manifest and /dev/null differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-text_unit-text.lance/data/2794bf5b-de3d-4202-ab16-e76bc27c8e6a.lance b/docs/examples_notebooks/inputs/operation dulce/lancedb/default-text_unit-text.lance/data/2794bf5b-de3d-4202-ab16-e76bc27c8e6a.lance deleted file mode 100644 index 8758d96515..0000000000 Binary files a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-text_unit-text.lance/data/2794bf5b-de3d-4202-ab16-e76bc27c8e6a.lance and /dev/null differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-text_unit-text.lance/data/2f74c8e8-3f35-4209-889c-a13cf0780eb3.lance b/docs/examples_notebooks/inputs/operation dulce/lancedb/default-text_unit-text.lance/data/2f74c8e8-3f35-4209-889c-a13cf0780eb3.lance deleted file mode 100644 index 8758d96515..0000000000 Binary files a/docs/examples_notebooks/inputs/operation dulce/lancedb/default-text_unit-text.lance/data/2f74c8e8-3f35-4209-889c-a13cf0780eb3.lance and /dev/null differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_indices/b1bd07f3-ad25-40bc-b91c-14215386e477/auxiliary.idx b/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_indices/b1bd07f3-ad25-40bc-b91c-14215386e477/auxiliary.idx new file mode 100644 index 0000000000..57eee5fba5 Binary files /dev/null and b/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_indices/b1bd07f3-ad25-40bc-b91c-14215386e477/auxiliary.idx differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_indices/b1bd07f3-ad25-40bc-b91c-14215386e477/index.idx b/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_indices/b1bd07f3-ad25-40bc-b91c-14215386e477/index.idx new file mode 100644 index 0000000000..cfbb01f872 Binary files /dev/null and b/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_indices/b1bd07f3-ad25-40bc-b91c-14215386e477/index.idx differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_transactions/0-5b75ba0a-bae9-4244-8a6b-31de09f7e03d.txn b/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_transactions/0-5b75ba0a-bae9-4244-8a6b-31de09f7e03d.txn new file mode 100644 index 0000000000..9796ad0ee9 Binary files /dev/null and b/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_transactions/0-5b75ba0a-bae9-4244-8a6b-31de09f7e03d.txn differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_transactions/1-71f2ac8d-a101-467d-b57d-2dea6d14f7a7.txn b/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_transactions/1-71f2ac8d-a101-467d-b57d-2dea6d14f7a7.txn new file mode 100644 index 0000000000..eba1611930 Binary files /dev/null and b/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_transactions/1-71f2ac8d-a101-467d-b57d-2dea6d14f7a7.txn differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_transactions/2-d621e621-8e92-419f-99e4-f1c7d163bcc2.txn b/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_transactions/2-d621e621-8e92-419f-99e4-f1c7d163bcc2.txn new file mode 100644 index 0000000000..2b154eac74 Binary files /dev/null and b/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_transactions/2-d621e621-8e92-419f-99e4-f1c7d163bcc2.txn differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_transactions/3-4ada922f-cf85-44df-bc8b-b132e35009d0.txn b/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_transactions/3-4ada922f-cf85-44df-bc8b-b132e35009d0.txn new file mode 100644 index 0000000000..1ae8892c8c Binary files /dev/null and b/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_transactions/3-4ada922f-cf85-44df-bc8b-b132e35009d0.txn differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_transactions/4-dcac110d-2a49-4777-a51e-5078fed1b0df.txn b/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_transactions/4-dcac110d-2a49-4777-a51e-5078fed1b0df.txn new file mode 100644 index 0000000000..655b06d660 --- /dev/null +++ b/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_transactions/4-dcac110d-2a49-4777-a51e-5078fed1b0df.txn @@ -0,0 +1 @@ +$dcac110d-2a49-4777-a51e-5078fed1b0dfªid = '__DUMMY__' \ No newline at end of file diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_transactions/5-ec86af9f-b799-4457-b07a-24a3459dd952.txn b/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_transactions/5-ec86af9f-b799-4457-b07a-24a3459dd952.txn new file mode 100644 index 0000000000..6512848103 Binary files /dev/null and b/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_transactions/5-ec86af9f-b799-4457-b07a-24a3459dd952.txn differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_transactions/6-c2afb84c-4b3a-4ccd-8843-0deaa25bd971.txn b/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_transactions/6-c2afb84c-4b3a-4ccd-8843-0deaa25bd971.txn new file mode 100644 index 0000000000..821dd8ca62 --- /dev/null +++ b/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_transactions/6-c2afb84c-4b3a-4ccd-8843-0deaa25bd971.txn @@ -0,0 +1 @@ +$c2afb84c-4b3a-4ccd-8843-0deaa25bd971ªid = '__DUMMY__' \ No newline at end of file diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_transactions/7-6798041f-3283-4b54-8313-54f80e00d338.txn b/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_transactions/7-6798041f-3283-4b54-8313-54f80e00d338.txn new file mode 100644 index 0000000000..fef8f6ccbd Binary files /dev/null and b/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_transactions/7-6798041f-3283-4b54-8313-54f80e00d338.txn differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_versions/1.manifest b/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_versions/1.manifest new file mode 100644 index 0000000000..a3a4bdc459 Binary files /dev/null and b/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_versions/1.manifest differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_versions/2.manifest b/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_versions/2.manifest new file mode 100644 index 0000000000..6f79fc2d12 Binary files /dev/null and b/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_versions/2.manifest differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_versions/3.manifest b/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_versions/3.manifest new file mode 100644 index 0000000000..9deea08227 Binary files /dev/null and b/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_versions/3.manifest differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_versions/4.manifest b/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_versions/4.manifest new file mode 100644 index 0000000000..c7eee87b90 Binary files /dev/null and b/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_versions/4.manifest differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_versions/5.manifest b/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_versions/5.manifest new file mode 100644 index 0000000000..baf532593d Binary files /dev/null and b/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_versions/5.manifest differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_versions/6.manifest b/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_versions/6.manifest new file mode 100644 index 0000000000..e8c03b450e Binary files /dev/null and b/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_versions/6.manifest differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_versions/7.manifest b/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_versions/7.manifest new file mode 100644 index 0000000000..3f8fed8f6b Binary files /dev/null and b/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_versions/7.manifest differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_versions/8.manifest b/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_versions/8.manifest new file mode 100644 index 0000000000..9c4380be0b Binary files /dev/null and b/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/_versions/8.manifest differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/data/2677858d-16a4-4c0c-9515-ed5a9ee32fd7.lance b/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/data/2677858d-16a4-4c0c-9515-ed5a9ee32fd7.lance new file mode 100644 index 0000000000..f19cd20010 Binary files /dev/null and b/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/data/2677858d-16a4-4c0c-9515-ed5a9ee32fd7.lance differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/data/4ed23c16-d11c-49b5-869d-653cfbd9c271.lance b/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/data/4ed23c16-d11c-49b5-869d-653cfbd9c271.lance new file mode 100644 index 0000000000..04aad5948c Binary files /dev/null and b/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/data/4ed23c16-d11c-49b5-869d-653cfbd9c271.lance differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/data/5370b2ef-efd3-434e-9745-9d046b53bb4a.lance b/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/data/5370b2ef-efd3-434e-9745-9d046b53bb4a.lance new file mode 100644 index 0000000000..45878991ef Binary files /dev/null and b/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/data/5370b2ef-efd3-434e-9745-9d046b53bb4a.lance differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/data/ec59d894-a2a6-4a44-8266-d07bbd684c33.lance b/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/data/ec59d894-a2a6-4a44-8266-d07bbd684c33.lance new file mode 100644 index 0000000000..44e433bdce Binary files /dev/null and b/docs/examples_notebooks/inputs/operation dulce/lancedb/entity_description.lance/data/ec59d894-a2a6-4a44-8266-d07bbd684c33.lance differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/text_unit_text.lance/_indices/f5099c4f-df9a-476d-a736-1eac0a498173/auxiliary.idx b/docs/examples_notebooks/inputs/operation dulce/lancedb/text_unit_text.lance/_indices/f5099c4f-df9a-476d-a736-1eac0a498173/auxiliary.idx new file mode 100644 index 0000000000..57eee5fba5 Binary files /dev/null and b/docs/examples_notebooks/inputs/operation dulce/lancedb/text_unit_text.lance/_indices/f5099c4f-df9a-476d-a736-1eac0a498173/auxiliary.idx differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/text_unit_text.lance/_indices/f5099c4f-df9a-476d-a736-1eac0a498173/index.idx b/docs/examples_notebooks/inputs/operation dulce/lancedb/text_unit_text.lance/_indices/f5099c4f-df9a-476d-a736-1eac0a498173/index.idx new file mode 100644 index 0000000000..cfbb01f872 Binary files /dev/null and b/docs/examples_notebooks/inputs/operation dulce/lancedb/text_unit_text.lance/_indices/f5099c4f-df9a-476d-a736-1eac0a498173/index.idx differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/text_unit_text.lance/_transactions/0-9abee4db-9914-4c35-b600-d995235f8e27.txn b/docs/examples_notebooks/inputs/operation dulce/lancedb/text_unit_text.lance/_transactions/0-9abee4db-9914-4c35-b600-d995235f8e27.txn new file mode 100644 index 0000000000..d6a0e07b3a Binary files /dev/null and b/docs/examples_notebooks/inputs/operation dulce/lancedb/text_unit_text.lance/_transactions/0-9abee4db-9914-4c35-b600-d995235f8e27.txn differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/text_unit_text.lance/_transactions/1-302f5740-fbd0-4887-9933-13a2842ec8e4.txn b/docs/examples_notebooks/inputs/operation dulce/lancedb/text_unit_text.lance/_transactions/1-302f5740-fbd0-4887-9933-13a2842ec8e4.txn new file mode 100644 index 0000000000..3ae71df2c4 Binary files /dev/null and b/docs/examples_notebooks/inputs/operation dulce/lancedb/text_unit_text.lance/_transactions/1-302f5740-fbd0-4887-9933-13a2842ec8e4.txn differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/text_unit_text.lance/_transactions/2-551a917c-5ab9-46a7-9085-fd82aa879717.txn b/docs/examples_notebooks/inputs/operation dulce/lancedb/text_unit_text.lance/_transactions/2-551a917c-5ab9-46a7-9085-fd82aa879717.txn new file mode 100644 index 0000000000..6eadc32172 Binary files /dev/null and b/docs/examples_notebooks/inputs/operation dulce/lancedb/text_unit_text.lance/_transactions/2-551a917c-5ab9-46a7-9085-fd82aa879717.txn differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/text_unit_text.lance/_transactions/3-b0b6e7bb-a152-4148-be3b-d1ab50215bda.txn b/docs/examples_notebooks/inputs/operation dulce/lancedb/text_unit_text.lance/_transactions/3-b0b6e7bb-a152-4148-be3b-d1ab50215bda.txn new file mode 100644 index 0000000000..869892f078 Binary files /dev/null and b/docs/examples_notebooks/inputs/operation dulce/lancedb/text_unit_text.lance/_transactions/3-b0b6e7bb-a152-4148-be3b-d1ab50215bda.txn differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/text_unit_text.lance/_versions/1.manifest b/docs/examples_notebooks/inputs/operation dulce/lancedb/text_unit_text.lance/_versions/1.manifest new file mode 100644 index 0000000000..23b69adf1a Binary files /dev/null and b/docs/examples_notebooks/inputs/operation dulce/lancedb/text_unit_text.lance/_versions/1.manifest differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/text_unit_text.lance/_versions/2.manifest b/docs/examples_notebooks/inputs/operation dulce/lancedb/text_unit_text.lance/_versions/2.manifest new file mode 100644 index 0000000000..bfdf351394 Binary files /dev/null and b/docs/examples_notebooks/inputs/operation dulce/lancedb/text_unit_text.lance/_versions/2.manifest differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/text_unit_text.lance/_versions/3.manifest b/docs/examples_notebooks/inputs/operation dulce/lancedb/text_unit_text.lance/_versions/3.manifest new file mode 100644 index 0000000000..d1ad0b0a8e Binary files /dev/null and b/docs/examples_notebooks/inputs/operation dulce/lancedb/text_unit_text.lance/_versions/3.manifest differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/text_unit_text.lance/_versions/4.manifest b/docs/examples_notebooks/inputs/operation dulce/lancedb/text_unit_text.lance/_versions/4.manifest new file mode 100644 index 0000000000..1b1609101e Binary files /dev/null and b/docs/examples_notebooks/inputs/operation dulce/lancedb/text_unit_text.lance/_versions/4.manifest differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/text_unit_text.lance/data/4105076e-7a55-4549-b86a-51c6bfa68ea5.lance b/docs/examples_notebooks/inputs/operation dulce/lancedb/text_unit_text.lance/data/4105076e-7a55-4549-b86a-51c6bfa68ea5.lance new file mode 100644 index 0000000000..04aad5948c Binary files /dev/null and b/docs/examples_notebooks/inputs/operation dulce/lancedb/text_unit_text.lance/data/4105076e-7a55-4549-b86a-51c6bfa68ea5.lance differ diff --git a/docs/examples_notebooks/inputs/operation dulce/lancedb/text_unit_text.lance/data/a28d4629-848b-4e1d-854d-cad6297a0c1a.lance b/docs/examples_notebooks/inputs/operation dulce/lancedb/text_unit_text.lance/data/a28d4629-848b-4e1d-854d-cad6297a0c1a.lance new file mode 100644 index 0000000000..2f101f12d5 Binary files /dev/null and b/docs/examples_notebooks/inputs/operation dulce/lancedb/text_unit_text.lance/data/a28d4629-848b-4e1d-854d-cad6297a0c1a.lance differ diff --git a/docs/examples_notebooks/inputs/operation dulce/relationships.parquet b/docs/examples_notebooks/inputs/operation dulce/relationships.parquet index 4bdf4c85f2..ee07a21e51 100644 Binary files a/docs/examples_notebooks/inputs/operation dulce/relationships.parquet and b/docs/examples_notebooks/inputs/operation dulce/relationships.parquet differ diff --git a/docs/examples_notebooks/inputs/operation dulce/text_units.parquet b/docs/examples_notebooks/inputs/operation dulce/text_units.parquet index 09349e3f65..c4df9cc5ee 100644 Binary files a/docs/examples_notebooks/inputs/operation dulce/text_units.parquet and b/docs/examples_notebooks/inputs/operation dulce/text_units.parquet differ diff --git a/packages/graphrag/graphrag/index/operations/embed_text/embed_text.py b/packages/graphrag/graphrag/index/operations/embed_text/embed_text.py index 08f941f908..7ca6c80920 100644 --- a/packages/graphrag/graphrag/index/operations/embed_text/embed_text.py +++ b/packages/graphrag/graphrag/index/operations/embed_text/embed_text.py @@ -28,18 +28,11 @@ async def embed_text( num_threads: int, vector_store: BaseVectorStore, id_column: str = "id", - title_column: str | None = None, ): """Embed a piece of text into a vector space. The operation outputs a new column containing a mapping between doc_id and vector.""" if embed_column not in input.columns: msg = f"Column {embed_column} not found in input dataframe with columns {input.columns}" raise ValueError(msg) - title = title_column or embed_column - if title not in input.columns: - msg = ( - f"Column {title} not found in input dataframe with columns {input.columns}" - ) - raise ValueError(msg) if id_column not in input.columns: msg = f"Column {id_column} not found in input dataframe with columns {input.columns}" raise ValueError(msg) diff --git a/packages/graphrag/graphrag/index/operations/finalize_community_reports.py b/packages/graphrag/graphrag/index/operations/finalize_community_reports.py index 124e17e430..4446e3ee8f 100644 --- a/packages/graphrag/graphrag/index/operations/finalize_community_reports.py +++ b/packages/graphrag/graphrag/index/operations/finalize_community_reports.py @@ -3,11 +3,10 @@ """All the steps to transform final entities.""" -from uuid import uuid4 - import pandas as pd from graphrag.data_model.schemas import COMMUNITY_REPORTS_FINAL_COLUMNS +from graphrag.index.utils.hashing import gen_sha512_hash def finalize_community_reports( @@ -25,7 +24,9 @@ def finalize_community_reports( community_reports["community"] = community_reports["community"].astype(int) community_reports["human_readable_id"] = community_reports["community"] - community_reports["id"] = [uuid4().hex for _ in range(len(community_reports))] + community_reports["id"] = community_reports.apply( + lambda row: gen_sha512_hash(row, ["full_content"]), axis=1 + ) return community_reports.loc[ :, diff --git a/packages/graphrag/graphrag/prompts/query/drift_search_system_prompt.py b/packages/graphrag/graphrag/prompts/query/drift_search_system_prompt.py index 3faae89a0e..4cb220f69d 100644 --- a/packages/graphrag/graphrag/prompts/query/drift_search_system_prompt.py +++ b/packages/graphrag/graphrag/prompts/query/drift_search_system_prompt.py @@ -65,7 +65,7 @@ Add sections and commentary to the response as appropriate for the length and format. -Additionally provide a score between 0 and 100 representing how well the response addresses the overall research question: {global_query}. Based on your response, suggest up to five follow-up questions that could be asked to further explore the topic as it relates to the overall research question. Do not include scores or follow up questions in the 'response' field of the JSON, add them to the respective 'score' and 'follow_up_queries' keys of the JSON output. Format your response in JSON with the following keys and values: +Additionally provide a score between 0 and 100 representing how well the response addresses the overall research question: {global_query}. Based on your response, suggest up to {followups} follow-up questions that could be asked to further explore the topic as it relates to the overall research question. Do not include scores or follow up questions in the 'response' field of the JSON, add them to the respective 'score' and 'follow_up_queries' keys of the JSON output. Format your response in JSON with the following keys and values: {{'response': str, Put your answer, formatted in markdown, here. Do not answer the global query in this section. 'score': int, diff --git a/packages/graphrag/graphrag/query/indexer_adapters.py b/packages/graphrag/graphrag/query/indexer_adapters.py index 0c6e54a8af..56064d8c2a 100644 --- a/packages/graphrag/graphrag/query/indexer_adapters.py +++ b/packages/graphrag/graphrag/query/indexer_adapters.py @@ -11,14 +11,12 @@ import pandas as pd -from graphrag.config.models.graph_rag_config import GraphRagConfig from graphrag.data_model.community import Community from graphrag.data_model.community_report import CommunityReport from graphrag.data_model.covariate import Covariate from graphrag.data_model.entity import Entity from graphrag.data_model.relationship import Relationship from graphrag.data_model.text_unit import TextUnit -from graphrag.language_model.manager import ModelManager from graphrag.language_model.protocol.base import EmbeddingModel from graphrag.query.input.loaders.dfs import ( read_communities, @@ -76,8 +74,6 @@ def read_indexer_reports( final_communities: pd.DataFrame, community_level: int | None, dynamic_community_selection: bool = False, - content_embedding_col: str = "full_content_embedding", - config: GraphRagConfig | None = None, ) -> list[CommunityReport]: """Read in the Community Reports from the raw indexing outputs. @@ -102,29 +98,7 @@ def read_indexer_reports( filtered_community_df, on="community", how="inner" ) - if config and ( - content_embedding_col not in reports_df.columns - or reports_df.loc[:, content_embedding_col].isna().any() - ): - # TODO: Find a way to retrieve the right embedding model id. - embedding_model_settings = config.get_language_model_config( - "default_embedding_model" - ) - embedder = ModelManager().get_or_create_embedding_model( - name="default_embedding", - model_type=embedding_model_settings.type, - config=embedding_model_settings, - ) - reports_df = embed_community_reports( - reports_df, embedder, embedding_col=content_embedding_col - ) - - return read_community_reports( - df=reports_df, - id_col="id", - short_id_col="community", - content_embedding_col=content_embedding_col, - ) + return read_community_reports(df=reports_df, id_col="id", short_id_col="community") def read_indexer_report_embeddings( diff --git a/packages/graphrag/graphrag/query/input/loaders/dfs.py b/packages/graphrag/graphrag/query/input/loaders/dfs.py index 17aeb604a2..a2b636f1dd 100644 --- a/packages/graphrag/graphrag/query/input/loaders/dfs.py +++ b/packages/graphrag/graphrag/query/input/loaders/dfs.py @@ -197,7 +197,6 @@ def read_community_reports( summary_col: str = "summary", content_col: str = "full_content", rank_col: str | None = "rank", - content_embedding_col: str | None = "full_content_embedding", attributes_cols: list[str] | None = None, ) -> list[CommunityReport]: """Read community reports from a dataframe using pre-converted records.""" @@ -213,9 +212,6 @@ def read_community_reports( summary=to_str(row, summary_col), full_content=to_str(row, content_col), rank=to_optional_float(row, rank_col), - full_content_embedding=to_optional_list( - row, content_embedding_col, item_type=float - ), attributes=( {col: row.get(col) for col in attributes_cols} if attributes_cols diff --git a/packages/graphrag/graphrag/query/structured_search/drift_search/action.py b/packages/graphrag/graphrag/query/structured_search/drift_search/action.py index 8f1da3d721..23cc811a39 100644 --- a/packages/graphrag/graphrag/query/structured_search/drift_search/action.py +++ b/packages/graphrag/graphrag/query/structured_search/drift_search/action.py @@ -50,7 +50,13 @@ def is_complete(self) -> bool: """Check if the action is complete (i.e., an answer is available).""" return self.answer is not None - async def search(self, search_engine: Any, global_query: str, scorer: Any = None): + async def search( + self, + search_engine: Any, + global_query: str, + k_followups: int, + scorer: Any = None, + ): """ Execute an asynchronous search using the search engine, and update the action with the results. @@ -71,7 +77,9 @@ async def search(self, search_engine: Any, global_query: str, scorer: Any = None return self search_result = await search_engine.search( - drift_query=global_query, query=self.query + query=self.query, + drift_query=global_query, + k_followups=k_followups, ) # Do not launch exception as it will roll up with other steps diff --git a/packages/graphrag/graphrag/query/structured_search/drift_search/drift_context.py b/packages/graphrag/graphrag/query/structured_search/drift_search/drift_context.py index 4b4325ae2e..9e1e9c317a 100644 --- a/packages/graphrag/graphrag/query/structured_search/drift_search/drift_context.py +++ b/packages/graphrag/graphrag/query/structured_search/drift_search/drift_context.py @@ -40,6 +40,7 @@ class DRIFTSearchContextBuilder(DRIFTContextBuilder): def __init__( self, model: ChatModel, + config: DRIFTSearchConfig, text_embedder: EmbeddingModel, entities: list[Entity], entity_text_embeddings: BaseVectorStore, @@ -49,14 +50,13 @@ def __init__( covariates: dict[str, list[Covariate]] | None = None, tokenizer: Tokenizer | None = None, embedding_vectorstore_key: str = EntityVectorStoreKey.ID, - config: DRIFTSearchConfig | None = None, local_system_prompt: str | None = None, local_mixed_context: LocalSearchMixedContext | None = None, reduce_system_prompt: str | None = None, response_type: str | None = None, ): """Initialize the DRIFT search context builder with necessary components.""" - self.config = config or DRIFTSearchConfig() + self.config = config self.model = model self.text_embedder = text_embedder self.tokenizer = tokenizer or get_tokenizer() diff --git a/packages/graphrag/graphrag/query/structured_search/drift_search/search.py b/packages/graphrag/graphrag/query/structured_search/drift_search/search.py index 64a8e52b43..14f208c77a 100644 --- a/packages/graphrag/graphrag/query/structured_search/drift_search/search.py +++ b/packages/graphrag/graphrag/query/structured_search/drift_search/search.py @@ -156,7 +156,11 @@ def _process_primer_results( raise ValueError(error_msg) async def _search_step( - self, global_query: str, search_engine: LocalSearch, actions: list[DriftAction] + self, + global_query: str, + k_followups: int, + search_engine: LocalSearch, + actions: list[DriftAction], ) -> list[DriftAction]: """ Perform an asynchronous search step by executing each DriftAction asynchronously. @@ -171,7 +175,11 @@ async def _search_step( list[DriftAction]: The results from executing the search actions asynchronously. """ tasks = [ - action.search(search_engine=search_engine, global_query=global_query) + action.search( + search_engine=search_engine, + global_query=global_query, + k_followups=k_followups, + ) for action in actions ] return await tqdm_asyncio.gather(*tasks, leave=False) @@ -241,7 +249,10 @@ async def search( ) # Process actions results = await self._search_step( - global_query=query, search_engine=self.local_search, actions=actions + global_query=query, + k_followups=self.context_builder.config.drift_k_followups, + search_engine=self.local_search, + actions=actions, ) # Update query state diff --git a/packages/graphrag/graphrag/query/structured_search/local_search/search.py b/packages/graphrag/graphrag/query/structured_search/local_search/search.py index fdd72949da..64fc884213 100644 --- a/packages/graphrag/graphrag/query/structured_search/local_search/search.py +++ b/packages/graphrag/graphrag/query/structured_search/local_search/search.py @@ -76,6 +76,7 @@ async def search( context_data=context_result.context_chunks, response_type=self.response_type, global_query=drift_query, + followups=kwargs.get("k_followups", 0), ) else: search_prompt = self.system_prompt.format( diff --git a/packages/graphrag/graphrag/vector_stores/cosmosdb.py b/packages/graphrag/graphrag/vector_stores/cosmosdb.py index 23b2c8f821..7ad06950c2 100644 --- a/packages/graphrag/graphrag/vector_stores/cosmosdb.py +++ b/packages/graphrag/graphrag/vector_stores/cosmosdb.py @@ -156,14 +156,10 @@ def load_documents(self, documents: list[VectorStoreDocument]) -> None: # Upload documents to CosmosDB for doc in documents: if doc.vector is not None: - print("Document to store:") # noqa: T201 - print(doc) # noqa: T201 doc_json = { self.id_field: doc.id, self.vector_field: doc.vector, } - print("Storing document in CosmosDB:") # noqa: T201 - print(doc_json) # noqa: T201 self._container_client.upsert_item(doc_json) def similarity_search_by_vector( diff --git a/packages/graphrag/graphrag/vector_stores/lancedb.py b/packages/graphrag/graphrag/vector_stores/lancedb.py index 2b589d5bb0..bddc83cab2 100644 --- a/packages/graphrag/graphrag/vector_stores/lancedb.py +++ b/packages/graphrag/graphrag/vector_stores/lancedb.py @@ -84,12 +84,7 @@ def load_documents(self, documents: list[VectorStoreDocument]) -> None: }) if data: - self.document_collection = self.db_connection.create_table( - self.index_name if self.index_name else "", - data=data, - mode="overwrite", - schema=data.schema, - ) + self.document_collection.add(data) def similarity_search_by_vector( self, query_embedding: list[float] | np.ndarray, k: int = 10 diff --git a/tests/fixtures/min-csv/config.json b/tests/fixtures/min-csv/config.json index 90b7c26061..99291c05cb 100644 --- a/tests/fixtures/min-csv/config.json +++ b/tests/fixtures/min-csv/config.json @@ -98,7 +98,7 @@ }, { "query": "What is the major conflict in this story and who are the protagonist and antagonist?", - "method": "global" + "method": "drift" } ], "slow": false diff --git a/tests/mock_provider.py b/tests/mock_provider.py index 9ffb52fb48..97b7aa9076 100644 --- a/tests/mock_provider.py +++ b/tests/mock_provider.py @@ -100,25 +100,22 @@ def __init__(self, **kwargs: Any): self.config = LanguageModelConfig( type=ModelType.MockEmbedding, model="text-embedding-3-large", api_key="mock" ) + self.vectors = 3072 * [1.0] def embed_batch(self, text_list: list[str], **kwargs: Any) -> list[list[float]]: """Generate an embedding for the input text.""" - if isinstance(text_list, str): - return [[1.0, 1.0, 1.0]] - return [[1.0, 1.0, 1.0] for _ in text_list] + return [self.vectors for _ in text_list] def embed(self, text: str, **kwargs: Any) -> list[float]: """Generate an embedding for the input text.""" - return [1.0, 1.0, 1.0] + return self.vectors async def aembed(self, text: str, **kwargs: Any) -> list[float]: """Generate an embedding for the input text.""" - return [1.0, 1.0, 1.0] + return self.vectors async def aembed_batch( self, text_list: list[str], **kwargs: Any ) -> list[list[float]]: """Generate an embedding for the input text.""" - if isinstance(text_list, str): - return [[1.0, 1.0, 1.0]] - return [[1.0, 1.0, 1.0] for _ in text_list] + return [self.vectors for _ in text_list]