diff --git a/packages/graphrag/graphrag/query/indexer_adapters.py b/packages/graphrag/graphrag/query/indexer_adapters.py index 7119ad842c..4d406ccb50 100644 --- a/packages/graphrag/graphrag/query/indexer_adapters.py +++ b/packages/graphrag/graphrag/query/indexer_adapters.py @@ -221,5 +221,5 @@ def _filter_under_community_level( ) -> pd.DataFrame: return cast( "pd.DataFrame", - df[df.level <= community_level], + df[(df.level.isna()) | (df.level <= community_level)], ) diff --git a/tests/unit/query/test_indexer_adapters.py b/tests/unit/query/test_indexer_adapters.py new file mode 100644 index 0000000000..73f2e27dc3 --- /dev/null +++ b/tests/unit/query/test_indexer_adapters.py @@ -0,0 +1,27 @@ +# Copyright (c) 2024 Microsoft Corporation. +# Licensed under the MIT License + +import pandas as pd +from graphrag.query.indexer_adapters import _filter_under_community_level + + +def test_filter_under_community_level_keeps_missing_levels() -> None: + df = pd.DataFrame({ + "id": [ + "missing_nan", + "missing_none", + "below_threshold", + "at_threshold", + "above_threshold", + ], + "level": [float("nan"), None, 1, 2, 3], + }) + + result = _filter_under_community_level(df, community_level=2) + + assert result["id"].to_list() == [ + "missing_nan", + "missing_none", + "below_threshold", + "at_threshold", + ]