diff --git a/.semversioner/next-release/patch-20250424210340222198.json b/.semversioner/next-release/patch-20250424210340222198.json new file mode 100644 index 0000000000..2a749219d6 --- /dev/null +++ b/.semversioner/next-release/patch-20250424210340222198.json @@ -0,0 +1,4 @@ +{ + "type": "patch", + "description": "add semversioner file" +} diff --git a/.vscode/extensions.json b/.vscode/extensions.json index 2e5e67a214..bd72dc72c1 100644 --- a/.vscode/extensions.json +++ b/.vscode/extensions.json @@ -6,7 +6,6 @@ "ms-python.vscode-pylance", "bierner.markdown-mermaid", "streetsidesoftware.code-spell-checker", - "ronnidc.nunjucks", "lucien-martijn.parquet-visualizer", ] } diff --git a/graphrag/storage/blob_pipeline_storage.py b/graphrag/storage/blob_pipeline_storage.py index 04b971ca70..12f00d2a9f 100644 --- a/graphrag/storage/blob_pipeline_storage.py +++ b/graphrag/storage/blob_pipeline_storage.py @@ -330,59 +330,6 @@ def create_blob_storage(**kwargs: Any) -> PipelineStorage: ) -def validate_blob_container_name(container_name: str): - """ - Check if the provided blob container name is valid based on Azure rules. - - - A blob container name must be between 3 and 63 characters in length. - - Start with a letter or number - - All letters used in blob container names must be lowercase. - - Contain only letters, numbers, or the hyphen. - - Consecutive hyphens are not permitted. - - Cannot end with a hyphen. - - Args: - ----- - container_name (str) - The blob container name to be validated. - - Returns - ------- - bool: True if valid, False otherwise. - """ - # Check the length of the name - if len(container_name) < 3 or len(container_name) > 63: - return ValueError( - f"Container name must be between 3 and 63 characters in length. Name provided was {len(container_name)} characters long." - ) - - # Check if the name starts with a letter or number - if not container_name[0].isalnum(): - return ValueError( - f"Container name must start with a letter or number. Starting character was {container_name[0]}." - ) - - # Check for valid characters (letters, numbers, hyphen) and lowercase letters - if not re.match(r"^[a-z0-9-]+$", container_name): - return ValueError( - f"Container name must only contain:\n- lowercase letters\n- numbers\n- or hyphens\nName provided was {container_name}." - ) - - # Check for consecutive hyphens - if "--" in container_name: - return ValueError( - f"Container name cannot contain consecutive hyphens. Name provided was {container_name}." - ) - - # Check for hyphens at the end of the name - if container_name[-1] == "-": - return ValueError( - f"Container name cannot end with a hyphen. Name provided was {container_name}." - ) - - return True - - def _create_progress_status( num_loaded: int, num_filtered: int, num_total: int ) -> Progress: diff --git a/graphrag/storage/factory.py b/graphrag/storage/factory.py index 8a6e0df4d6..ffdcec4ad3 100644 --- a/graphrag/storage/factory.py +++ b/graphrag/storage/factory.py @@ -37,18 +37,16 @@ def register(cls, storage_type: str, storage: type): def create_storage( cls, storage_type: OutputType | str, kwargs: dict ) -> PipelineStorage: - """Create or get a storage object from the provided type.""" - match storage_type: - case OutputType.blob: - return create_blob_storage(**kwargs) - case OutputType.cosmosdb: - return create_cosmosdb_storage(**kwargs) - case OutputType.file: - return create_file_storage(**kwargs) - case OutputType.memory: - return MemoryPipelineStorage() - case _: - if storage_type in cls.storage_types: - return cls.storage_types[storage_type](**kwargs) - msg = f"Unknown storage type: {storage_type}" - raise ValueError(msg) + """Get a storage object from the provided type.""" + if storage_type not in cls.storage_types: + msg = f"Storage implementation '{storage_type}' is not registered." + raise ValueError(msg) + return cls.storage_types[storage_type](**kwargs) + + +StorageFactory.register(OutputType.blob, OutputType.blob, create_blob_storage) +StorageFactory.register( + OutputType.cosmosdb, OutputType.cosmosdb, create_cosmosdb_storage +) +StorageFactory.register(OutputType.file, OutputType.file, create_file_storage) +StorageFactory.register(OutputType.memory, OutputType.memory, MemoryPipelineStorage) diff --git a/graphrag/utils/api.py b/graphrag/utils/api.py index 9b69ef97a9..b43ea9caf9 100644 --- a/graphrag/utils/api.py +++ b/graphrag/utils/api.py @@ -241,9 +241,9 @@ def load_search_prompt(root_dir: str, prompt_config: str | None) -> str | None: def create_storage_from_config(output: OutputConfig) -> PipelineStorage: """Create a storage object from the config.""" storage_config = output.model_dump() - return StorageFactory().create_storage( - storage_type=storage_config["type"], - kwargs=storage_config, + return StorageFactory.create_storage( + storage_config["type"], + storage_config, )