Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .semversioner/next-release/patch-20250422232634719243.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"type": "patch",
"description": "Align embeddings table loading with configured fields."
}
34 changes: 26 additions & 8 deletions graphrag/index/workflows/generate_text_embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,11 @@
from graphrag.index.operations.embed_text import embed_text
from graphrag.index.typing.context import PipelineRunContext
from graphrag.index.typing.workflow import WorkflowFunctionOutput
from graphrag.utils.storage import load_table_from_storage, write_table_to_storage
from graphrag.utils.storage import (
load_table_from_storage,
storage_has_table,
write_table_to_storage,
)

log = logging.getLogger(__name__)

Expand All @@ -35,13 +39,23 @@ async def run_workflow(
context: PipelineRunContext,
) -> WorkflowFunctionOutput:
"""All the steps to transform community reports."""
documents = await load_table_from_storage("documents", context.storage)
relationships = await load_table_from_storage("relationships", context.storage)
text_units = await load_table_from_storage("text_units", context.storage)
entities = await load_table_from_storage("entities", context.storage)
community_reports = await load_table_from_storage(
"community_reports", context.storage
)
documents = None
relationships = None
text_units = None
entities = None
community_reports = None
if await storage_has_table("documents", context.storage):
documents = await load_table_from_storage("documents", context.storage)
Comment thread
natoverse marked this conversation as resolved.
if await storage_has_table("relationships", context.storage):
relationships = await load_table_from_storage("relationships", context.storage)
if await storage_has_table("text_units", context.storage):
text_units = await load_table_from_storage("text_units", context.storage)
if await storage_has_table("entities", context.storage):
entities = await load_table_from_storage("entities", context.storage)
if await storage_has_table("community_reports", context.storage):
community_reports = await load_table_from_storage(
"community_reports", context.storage
)

embedded_fields = get_embedded_fields(config)
text_embed = get_embedding_settings(config)
Expand Down Expand Up @@ -133,6 +147,10 @@ async def generate_text_embeddings(
log.info("Creating embeddings")
outputs = {}
for field in embedded_fields:
if embedding_param_map[field]["data"] is None:
msg = f"Embedding {field} is specified but data table is not in storage."
raise ValueError(msg)

outputs[field] = await _run_and_snapshot_embeddings(
name=field,
callbacks=callbacks,
Expand Down
Loading