diff --git a/.semversioner/next-release/patch-20250430001238698062.json b/.semversioner/next-release/patch-20250430001238698062.json new file mode 100644 index 0000000000..c0b54982b0 --- /dev/null +++ b/.semversioner/next-release/patch-20250430001238698062.json @@ -0,0 +1,4 @@ +{ + "type": "patch", + "description": "Fix graph creation missing edge weights." +} diff --git a/graphrag/index/operations/finalize_entities.py b/graphrag/index/operations/finalize_entities.py index 45536ab639..4539e53b79 100644 --- a/graphrag/index/operations/finalize_entities.py +++ b/graphrag/index/operations/finalize_entities.py @@ -24,7 +24,7 @@ def finalize_entities( layout_enabled: bool = False, ) -> pd.DataFrame: """All the steps to transform final entities.""" - graph = create_graph(relationships) + graph = create_graph(relationships, edge_attr=["weight"]) graph_embeddings = None if embed_config is not None and embed_config.enabled: graph_embeddings = embed_graph( diff --git a/graphrag/index/operations/finalize_relationships.py b/graphrag/index/operations/finalize_relationships.py index 78d2c14e14..21ba413667 100644 --- a/graphrag/index/operations/finalize_relationships.py +++ b/graphrag/index/operations/finalize_relationships.py @@ -19,7 +19,7 @@ def finalize_relationships( relationships: pd.DataFrame, ) -> pd.DataFrame: """All the steps to transform final relationships.""" - graph = create_graph(relationships) + graph = create_graph(relationships, edge_attr=["weight"]) degrees = compute_degree(graph) final_relationships = relationships.drop_duplicates(subset=["source", "target"]) diff --git a/graphrag/index/workflows/create_communities.py b/graphrag/index/workflows/create_communities.py index 03c7eebfc4..b19eb18113 100644 --- a/graphrag/index/workflows/create_communities.py +++ b/graphrag/index/workflows/create_communities.py @@ -52,7 +52,7 @@ def create_communities( seed: int | None = None, ) -> pd.DataFrame: """All the steps to transform final communities.""" - graph = create_graph(relationships) + graph = create_graph(relationships, edge_attr=["weight"]) clusters = cluster_graph( graph, diff --git a/tests/verbs/data/communities.parquet b/tests/verbs/data/communities.parquet index ea04328c8a..37d8cafb44 100644 Binary files a/tests/verbs/data/communities.parquet and b/tests/verbs/data/communities.parquet differ diff --git a/tests/verbs/data/community_reports.parquet b/tests/verbs/data/community_reports.parquet index d41b98a8dd..2e32151a8d 100644 Binary files a/tests/verbs/data/community_reports.parquet and b/tests/verbs/data/community_reports.parquet differ diff --git a/tests/verbs/data/covariates.parquet b/tests/verbs/data/covariates.parquet index 3906e9bc4d..a9d1439d1a 100644 Binary files a/tests/verbs/data/covariates.parquet and b/tests/verbs/data/covariates.parquet differ diff --git a/tests/verbs/data/documents.parquet b/tests/verbs/data/documents.parquet index b8a40153f6..acf8ca2d85 100644 Binary files a/tests/verbs/data/documents.parquet and b/tests/verbs/data/documents.parquet differ diff --git a/tests/verbs/data/entities.parquet b/tests/verbs/data/entities.parquet index 4df6a73a24..b0b9cfb7fb 100644 Binary files a/tests/verbs/data/entities.parquet and b/tests/verbs/data/entities.parquet differ diff --git a/tests/verbs/data/relationships.parquet b/tests/verbs/data/relationships.parquet index 8c913e5584..75557d32ad 100644 Binary files a/tests/verbs/data/relationships.parquet and b/tests/verbs/data/relationships.parquet differ diff --git a/tests/verbs/data/text_units.parquet b/tests/verbs/data/text_units.parquet index 453dd14adf..0cefa7abe4 100644 Binary files a/tests/verbs/data/text_units.parquet and b/tests/verbs/data/text_units.parquet differ diff --git a/tests/verbs/test_finalize_graph.py b/tests/verbs/test_finalize_graph.py index 40e9269bae..7ff25ca35d 100644 --- a/tests/verbs/test_finalize_graph.py +++ b/tests/verbs/test_finalize_graph.py @@ -28,8 +28,8 @@ async def test_finalize_graph(): nodes_actual = await load_table_from_storage("entities", context.storage) edges_actual = await load_table_from_storage("relationships", context.storage) - assert len(nodes_actual) == 251 - assert len(edges_actual) == 372 + assert len(nodes_actual) == 291 + assert len(edges_actual) == 452 # x and y will be zero with the default configuration, because we do not embed/umap assert nodes_actual["x"].sum() == 0 @@ -54,8 +54,8 @@ async def test_finalize_graph_umap(): nodes_actual = await load_table_from_storage("entities", context.storage) edges_actual = await load_table_from_storage("relationships", context.storage) - assert len(nodes_actual) == 251 - assert len(edges_actual) == 372 + assert len(nodes_actual) == 291 + assert len(edges_actual) == 452 # x and y should have some value other than zero due to umap assert nodes_actual["x"].sum() != 0 diff --git a/tests/verbs/test_prune_graph.py b/tests/verbs/test_prune_graph.py index 3801dd27c0..b30546bc92 100644 --- a/tests/verbs/test_prune_graph.py +++ b/tests/verbs/test_prune_graph.py @@ -28,4 +28,4 @@ async def test_prune_graph(): nodes_actual = await load_table_from_storage("entities", context.storage) - assert len(nodes_actual) == 20 + assert len(nodes_actual) == 21