Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions .semversioner/3.0.2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
{
"changes": [
{
"description": "Add CSVTableProvider",
"type": "patch"
},
{
"description": "Add DataReader class for typed dataframe loading from TableProvider across indexing workflows and query CLI",
"type": "patch"
},
{
"description": "Add TableProvider abstraction for table-based storage operations",
"type": "patch"
},
{
"description": "Add async iterator support to InputReader and use it in load_input_documents and load_update_documents workflows.",
"type": "patch"
},
{
"description": "Add table provider factory.",
"type": "patch"
},
{
"description": "Fix missed py 3.13.",
"type": "patch"
},
{
"description": "Move document ID, human_readable_id, and raw_data initialization from create_final_documents into load_input_documents and load_update_documents.",
"type": "patch"
},
{
"description": "Remove NetworkX dependency from graph utilities; move to DataFrame-based implementations in graphrag.graphs package.",
"type": "patch"
},
{
"description": "Remove unnecessary response format check. Fixes: #2203",
"type": "patch"
},
{
"description": "add profiling to get memory usage",
"type": "patch"
},
{
"description": "update notebooks",
"type": "patch"
}
],
"created_at": "2026-02-13T18:01:44+00:00",
"version": "3.0.2"
}
4 changes: 0 additions & 4 deletions .semversioner/next-release/patch-20260127131016120694.json

This file was deleted.

4 changes: 0 additions & 4 deletions .semversioner/next-release/patch-20260205172351041030.json

This file was deleted.

4 changes: 0 additions & 4 deletions .semversioner/next-release/patch-20260205212125616221.json

This file was deleted.

4 changes: 0 additions & 4 deletions .semversioner/next-release/patch-20260206194808781905.json

This file was deleted.

4 changes: 0 additions & 4 deletions .semversioner/next-release/patch-20260206205026841660.json

This file was deleted.

4 changes: 0 additions & 4 deletions .semversioner/next-release/patch-20260206214141420353.json

This file was deleted.

4 changes: 0 additions & 4 deletions .semversioner/next-release/patch-20260210011450472481.json

This file was deleted.

4 changes: 0 additions & 4 deletions .semversioner/next-release/patch-20260211211707376370.json

This file was deleted.

4 changes: 0 additions & 4 deletions .semversioner/next-release/patch-20260211214912747264.json

This file was deleted.

4 changes: 0 additions & 4 deletions .semversioner/next-release/patch-20260211221626814603.json

This file was deleted.

4 changes: 4 additions & 0 deletions .semversioner/next-release/patch-20260212002508389038.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"type": "patch",
"description": "add streamming to the two first workflows"
}
4 changes: 0 additions & 4 deletions .semversioner/next-release/patch-20260212211908142161.json

This file was deleted.

4 changes: 4 additions & 0 deletions .semversioner/next-release/patch-20260213160631396575.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"type": "patch",
"description": "add csv table smoke tests"
}
14 changes: 14 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,20 @@
# Changelog
Note: version releases in the 0.x.y range may introduce breaking changes.

## 3.0.2

- patch: Add CSVTableProvider
- patch: Add DataReader class for typed dataframe loading from TableProvider across indexing workflows and query CLI
- patch: Add TableProvider abstraction for table-based storage operations
- patch: Add async iterator support to InputReader and use it in load_input_documents and load_update_documents workflows.
- patch: Add table provider factory.
- patch: Fix missed py 3.13.
- patch: Move document ID, human_readable_id, and raw_data initialization from create_final_documents into load_input_documents and load_update_documents.
- patch: Remove NetworkX dependency from graph utilities; move to DataFrame-based implementations in graphrag.graphs package.
- patch: Remove unnecessary response format check. Fixes: #2203
- patch: add profiling to get memory usage
- patch: update notebooks

## 3.0.1

- patch: Fix missing dependency.
Expand Down
6 changes: 3 additions & 3 deletions packages/graphrag-cache/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "graphrag-cache"
version = "3.0.1"
version = "3.0.2"
description = "GraphRAG cache package."
authors = [
{name = "Alonso Guevara Fernández", email = "alonsog@microsoft.com"},
Expand Down Expand Up @@ -31,8 +31,8 @@ classifiers = [
"Programming Language :: Python :: 3.13",
]
dependencies = [
"graphrag-common==3.0.1",
"graphrag-storage==3.0.1",
"graphrag-common==3.0.2",
"graphrag-storage==3.0.2",
]

[project.urls]
Expand Down
4 changes: 2 additions & 2 deletions packages/graphrag-chunking/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "graphrag-chunking"
version = "3.0.1"
version = "3.0.2"
description = "Chunking utilities for GraphRAG"
authors = [
{name = "Alonso Guevara Fernández", email = "alonsog@microsoft.com"},
Expand Down Expand Up @@ -30,7 +30,7 @@ classifiers = [
"Programming Language :: Python :: 3.13",
]
dependencies = [
"graphrag-common==3.0.1",
"graphrag-common==3.0.2",
"pydantic~=2.10",
]

Expand Down
2 changes: 1 addition & 1 deletion packages/graphrag-common/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "graphrag-common"
version = "3.0.1"
version = "3.0.2"
description = "Common utilities and types for GraphRAG"
authors = [
{name = "Alonso Guevara Fernández", email = "alonsog@microsoft.com"},
Expand Down
6 changes: 6 additions & 0 deletions packages/graphrag-input/graphrag_input/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,18 @@

import csv
import logging
import sys

from graphrag_input.structured_file_reader import StructuredFileReader
from graphrag_input.text_document import TextDocument

logger = logging.getLogger(__name__)

try:
csv.field_size_limit(sys.maxsize)
except OverflowError:
csv.field_size_limit(100 * 1024 * 1024)


class CSVFileReader(StructuredFileReader):
"""Reader implementation for csv files."""
Expand Down
6 changes: 3 additions & 3 deletions packages/graphrag-input/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "graphrag-input"
version = "3.0.1"
version = "3.0.2"
description = "Input document loading utilities for GraphRAG"
authors = [
{name = "Alonso Guevara Fernández", email = "alonsog@microsoft.com"},
Expand Down Expand Up @@ -30,8 +30,8 @@ classifiers = [
"Programming Language :: Python :: 3.13",
]
dependencies = [
"graphrag-common==3.0.1",
"graphrag-storage==3.0.1 ",
"graphrag-common==3.0.2",
"graphrag-storage==3.0.2 ",
"pydantic~=2.10",
"markitdown~=0.1.0",
"markitdown[pdf]"
Expand Down
6 changes: 3 additions & 3 deletions packages/graphrag-llm/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "graphrag-llm"
version = "3.0.1"
version = "3.0.2"
description = "GraphRAG LLM package."
authors = [
{name = "Alonso Guevara Fernández", email = "alonsog@microsoft.com"},
Expand Down Expand Up @@ -33,8 +33,8 @@ classifiers = [
]
dependencies = [
"azure-identity~=1.25",
"graphrag-cache==3.0.1",
"graphrag-common==3.0.1",
"graphrag-cache==3.0.2",
"graphrag-common==3.0.2",
"jinja2~=3.1",
"litellm~=1.80",
"nest-asyncio2~=1.7",
Expand Down
4 changes: 4 additions & 0 deletions packages/graphrag-storage/graphrag_storage/file_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,10 @@ async def get_creation_date(self, key: str) -> str:

return get_timestamp_formatted_with_local_tz(creation_time_utc)

def get_path(self, key: str) -> Path:
"""Get the full file path for a key (for streaming access)."""
return _join_path(self._base_dir, key)


def _join_path(file_path: Path, file_name: str) -> Path:
"""Join a path and a file. Independent of the OS."""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

"""Table provider module for GraphRAG storage."""

from .table import Table
from .table_provider import TableProvider

__all__ = ["TableProvider"]
__all__ = ["Table", "TableProvider"]
Loading
Loading