From 6cb803c878b189e8647b7c10570e9ccf277d3e07 Mon Sep 17 00:00:00 2001 From: majiayu000 <1835304752@qq.com> Date: Sun, 28 Dec 2025 10:38:15 +0800 Subject: [PATCH] Fix DynamicCommunitySelection type mismatch for children IDs Convert children IDs to strings before checking membership in self.reports to handle cases where children list contains integers while reports dict uses string keys. Fixes #2004 Signed-off-by: majiayu000 <1835304752@qq.com> --- .../patch-20251228023802955549.json | 4 + .../dynamic_community_selection.py | 8 +- .../test_dynamic_community_selection.py | 205 ++++++++++++++++++ 3 files changed, 214 insertions(+), 3 deletions(-) create mode 100644 .semversioner/next-release/patch-20251228023802955549.json create mode 100644 tests/unit/query/context_builder/test_dynamic_community_selection.py diff --git a/.semversioner/next-release/patch-20251228023802955549.json b/.semversioner/next-release/patch-20251228023802955549.json new file mode 100644 index 0000000000..d5e9773fc2 --- /dev/null +++ b/.semversioner/next-release/patch-20251228023802955549.json @@ -0,0 +1,4 @@ +{ + "type": "patch", + "description": "Fix DynamicCommunitySelection type mismatch where children IDs as integers were not matching string report keys" +} diff --git a/graphrag/query/context_builder/dynamic_community_selection.py b/graphrag/query/context_builder/dynamic_community_selection.py index 904478f738..506f4a8b57 100644 --- a/graphrag/query/context_builder/dynamic_community_selection.py +++ b/graphrag/query/context_builder/dynamic_community_selection.py @@ -123,12 +123,14 @@ async def select(self, query: str) -> tuple[list[CommunityReport], dict[str, Any # TODO check why some sub_communities are NOT in report_df if community in self.communities: for child in self.communities[community].children: - if child in self.reports: - communities_to_rate.append(child) + # Ensure child is a string to match self.reports keys + child_id = str(child) + if child_id in self.reports: + communities_to_rate.append(child_id) else: logger.debug( "dynamic community selection: cannot find community %s in reports", - child, + child_id, ) # remove parent node if the current node is deemed relevant if not self.keep_parent and community in self.communities: diff --git a/tests/unit/query/context_builder/test_dynamic_community_selection.py b/tests/unit/query/context_builder/test_dynamic_community_selection.py new file mode 100644 index 0000000000..ba63f0c774 --- /dev/null +++ b/tests/unit/query/context_builder/test_dynamic_community_selection.py @@ -0,0 +1,205 @@ +# Copyright (c) 2024 Microsoft Corporation. +# Licensed under the MIT License + +"""Tests for dynamic community selection with type handling.""" + +from unittest.mock import MagicMock + +from graphrag.data_model.community import Community +from graphrag.data_model.community_report import CommunityReport +from graphrag.query.context_builder.dynamic_community_selection import ( + DynamicCommunitySelection, +) + + +def create_mock_tokenizer() -> MagicMock: + """Create a mock tokenizer.""" + tokenizer = MagicMock() + tokenizer.encode.return_value = [1, 2, 3] + return tokenizer + + +def create_mock_model() -> MagicMock: + """Create a mock chat model.""" + return MagicMock() + + +def test_dynamic_community_selection_handles_int_children(): + """Test that DynamicCommunitySelection correctly handles children IDs as integers. + + This tests the fix for issue #2004 where children IDs could be integers + while self.reports keys are strings, causing child communities to be skipped. + """ + # Create communities with integer children (simulating the bug scenario) + # Note: Even though the type annotation says list[str], actual data may have ints + communities = [ + Community( + id="comm-0", + short_id="0", + title="Root Community", + level="0", + parent="", + children=[1, 2], # type: ignore[list-item] # Integer children - testing bug fix + ), + Community( + id="comm-1", + short_id="1", + title="Child Community 1", + level="1", + parent="0", + children=[], + ), + Community( + id="comm-2", + short_id="2", + title="Child Community 2", + level="1", + parent="0", + children=[], + ), + ] + + # Create community reports with string community_id + reports = [ + CommunityReport( + id="report-0", + short_id="0", + title="Report 0", + community_id="0", + summary="Root community summary", + full_content="Root community full content", + rank=1.0, + ), + CommunityReport( + id="report-1", + short_id="1", + title="Report 1", + community_id="1", + summary="Child 1 summary", + full_content="Child 1 full content", + rank=1.0, + ), + CommunityReport( + id="report-2", + short_id="2", + title="Report 2", + community_id="2", + summary="Child 2 summary", + full_content="Child 2 full content", + rank=1.0, + ), + ] + + model = create_mock_model() + tokenizer = create_mock_tokenizer() + + selector = DynamicCommunitySelection( + community_reports=reports, + communities=communities, + model=model, + tokenizer=tokenizer, + threshold=1, + keep_parent=False, + max_level=2, + ) + + # Verify that reports are keyed by string + assert "0" in selector.reports + assert "1" in selector.reports + assert "2" in selector.reports + + # Verify that communities are keyed by string short_id + assert "0" in selector.communities + assert "1" in selector.communities + assert "2" in selector.communities + + # Verify that the children are properly accessible + # Before the fix, int children would fail the `in self.reports` check + root_community = selector.communities["0"] + for child in root_community.children: + child_id = str(child) + # This should now work with the fix + assert child_id in selector.reports, ( + f"Child {child} (as '{child_id}') should be found in reports" + ) + + +def test_dynamic_community_selection_handles_str_children(): + """Test that DynamicCommunitySelection works correctly with string children IDs.""" + communities = [ + Community( + id="comm-0", + short_id="0", + title="Root Community", + level="0", + parent="", + children=["1", "2"], # String children - expected type + ), + Community( + id="comm-1", + short_id="1", + title="Child Community 1", + level="1", + parent="0", + children=[], + ), + Community( + id="comm-2", + short_id="2", + title="Child Community 2", + level="1", + parent="0", + children=[], + ), + ] + + reports = [ + CommunityReport( + id="report-0", + short_id="0", + title="Report 0", + community_id="0", + summary="Root community summary", + full_content="Root community full content", + rank=1.0, + ), + CommunityReport( + id="report-1", + short_id="1", + title="Report 1", + community_id="1", + summary="Child 1 summary", + full_content="Child 1 full content", + rank=1.0, + ), + CommunityReport( + id="report-2", + short_id="2", + title="Report 2", + community_id="2", + summary="Child 2 summary", + full_content="Child 2 full content", + rank=1.0, + ), + ] + + model = create_mock_model() + tokenizer = create_mock_tokenizer() + + selector = DynamicCommunitySelection( + community_reports=reports, + communities=communities, + model=model, + tokenizer=tokenizer, + threshold=1, + keep_parent=False, + max_level=2, + ) + + # Verify that children can be found in reports + root_community = selector.communities["0"] + for child in root_community.children: + child_id = str(child) + assert child_id in selector.reports, ( + f"Child {child} (as '{child_id}') should be found in reports" + )