From 141a9fbdb76d060d624b5dafd118cdbd3cd50af9 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 19 May 2025 23:19:27 +0100 Subject: [PATCH 01/12] dev --- README.md | 2 +- cf/__init__.py | 2 +- cf/cfimplementation.py | 6 +- cf/data/array/__init__.py | 1 + cf/read_write/read.py | 559 +++++++++++++++---------------------- cf/test/test_read_write.py | 100 ++++--- docs/source/tutorial.rst | 4 +- 7 files changed, 302 insertions(+), 372 deletions(-) diff --git a/README.md b/README.md index bd6656ecf6..9f5c75a8b6 100644 --- a/README.md +++ b/README.md @@ -84,7 +84,7 @@ The `cf` package uses [Dask](https://ncas-cms.github.io/cf-python/performance.html) for all of its array manipulation and can: -* read field constructs from netCDF, CDL, PP and UM datasets with a +* read field constructs from netCDF, CDL, Zarr, PP and UM datasets with a choice of netCDF backends,and in local, http, and s3 locations, * create new field constructs in memory, * write and append field and domain constructs to netCDF datasets on disk, diff --git a/cf/__init__.py b/cf/__init__.py index ae86fe4eff..f6d2073b75 100644 --- a/cf/__init__.py +++ b/cf/__init__.py @@ -11,7 +11,7 @@ The `cf` package uses `dask` for all of its array manipulation and can: -* read field constructs from netCDF, CDL, PP and UM datasets, +* read field constructs from netCDF, CDL, Zarr, PP and UM datasets, * read field constructs and domain constructs from netCDF, CDL, PP and UM datasets with a choice of netCDF backends, diff --git a/cf/cfimplementation.py b/cf/cfimplementation.py index 3de62364df..5ac48ae061 100644 --- a/cf/cfimplementation.py +++ b/cf/cfimplementation.py @@ -38,6 +38,7 @@ RaggedIndexedArray, RaggedIndexedContiguousArray, SubsampledArray, + ZarrArray, ) from .functions import CF @@ -151,6 +152,7 @@ def set_construct(self, parent, construct, axes=None, copy=True, **kwargs): RaggedIndexedContiguousArray=RaggedIndexedContiguousArray, SubsampledArray=SubsampledArray, TiePointIndex=TiePointIndex, + ZarrArray=ZarrArray, ) @@ -205,7 +207,9 @@ def implementation(): 'RaggedIndexedArray': cf.data.array.raggedindexedarray.RaggedIndexedArray, 'RaggedIndexedContiguousArray': cf.data.array.raggedindexedcontiguousarray.RaggedIndexedContiguousArray, 'SubsampledArray': cf.data.array.subsampledarray.SubsampledArray, - 'TiePointIndex': cf.tiepointindex.TiePointIndex} + 'TiePointIndex': cf.tiepointindex.TiePointIndex, + 'ZarrArray': cf.data.array.zarrarray.ZarrArray, + } """ return _implementation.copy() diff --git a/cf/data/array/__init__.py b/cf/data/array/__init__.py index 693fec0fb4..2b3d03c54f 100644 --- a/cf/data/array/__init__.py +++ b/cf/data/array/__init__.py @@ -12,3 +12,4 @@ from .raggedindexedcontiguousarray import RaggedIndexedContiguousArray from .subsampledarray import SubsampledArray from .umarray import UMArray +from .zarrarray import ZarrArray diff --git a/cf/read_write/read.py b/cf/read_write/read.py index f2402c25ad..3daf4ca442 100644 --- a/cf/read_write/read.py +++ b/cf/read_write/read.py @@ -1,20 +1,16 @@ import logging -import os -from glob import glob -from os.path import isdir +from functools import partial from re import Pattern -from urllib.parse import urlparse import cfdm from cfdm.read_write.exceptions import DatasetTypeError -from cfdm.read_write.netcdf import NetCDFRead from ..aggregate import aggregate as cf_aggregate from ..cfimplementation import implementation from ..decorators import _manage_log_level_via_verbosity from ..domainlist import DomainList from ..fieldlist import FieldList -from ..functions import _DEPRECATION_ERROR_FUNCTION_KWARGS, flat +from ..functions import _DEPRECATION_ERROR_FUNCTION_KWARGS from ..query import Query from .um import UMRead @@ -24,18 +20,22 @@ class read(cfdm.read): """Read field or domain constructs from files. - The following file formats are supported: netCDF, CFA-netCDF, CDL, - UM fields file, and PP. + The following file formats are supported: netCDF, CDL, Zarr, PP, + and UM fields file. - Input datasets are mapped to constructs in memory which are - returned as elements of a `FieldList` or if the *domain* parameter - is True, a `DomainList`. + NetCDF and Zarr datasets may be on local disk, on an OPeNDAP + server, or in an S3 object store. - NetCDF files may be on disk, on an OPeNDAP server, or in an S3 - object store. + CDL, PP, and UM fields files must be on local disk. Any amount of files of any combination of file types may be read. + Input datasets are mapped to `Field` constructs which are returned + as elements of a `FieldList`, or if the *domain* parameter is + True, `Domain` constructs returned as elements of a + `DomainList`. The returned constructs are sorted by the netCDF + variable names of their corresponding data or domain variables. + **NetCDF unlimited dimensions** Domain axis constructs that correspond to NetCDF unlimited @@ -136,7 +136,7 @@ class read(cfdm.read): However, when two or more field or domain constructs are aggregated to form a single construct then the data arrays of some - metadata constructs (coordinates, cell measures, etc.) must be + metadata constructs (coordinates, cell measures, etc.) must be compared non-lazily to ascertain if aggregation is possible. .. seealso:: `cf.aggregate`, `cf.write`, `cf.Field`, `cf.Domain`, @@ -144,52 +144,9 @@ class read(cfdm.read): :Parameters: - files: (arbitrarily nested sequence of) `str` - A string or arbitrarily nested sequence of strings giving - the file names, directory names, or OPenDAP URLs from - which to read field constructs. Various type of expansion - are applied to the names: - - ==================== ====================================== - Expansion Description - ==================== ====================================== - Tilde An initial component of ``~`` or - ``~user`` is replaced by that *user*'s - home directory. - - Environment variable Substrings of the form ``$name`` or - ``${name}`` are replaced by the value - of environment variable *name*. - - Pathname A string containing UNIX file name - metacharacters as understood by the - Python `glob` module is replaced by - the list of matching file names. This - type of expansion is ignored for - OPenDAP URLs. - ==================== ====================================== - - Where more than one type of expansion is used in the same - string, they are applied in the order given in the above - table. + {{read datasets: (arbitrarily nested sequence of) `str`}} - *Parameter example:* - The file ``file.nc`` in the user's home directory could - be described by any of the following: - ``'$HOME/file.nc'``, ``'${HOME}/file.nc'``, - ``'~/file.nc'``, ``'~/tmp/../file.nc'``. - - When a directory is specified, all files in that directory - are read. Sub-directories are not read unless the - *recursive* parameter is True. If any directories contain - files that are not valid datasets then an exception will - be raised, unless the *ignore_unknown_type* parameter is - True. - - As a special case, if the `cdl_string` parameter is set to - True, the interpretation of `files` changes so that each - value is assumed to be a string of CDL input rather - than the above. + {{read cdl_string: `bool`, optional}} {{read external: (sequence of) `str`, optional}} @@ -199,35 +156,20 @@ class read(cfdm.read): {{read warnings: `bool`, optional}} - {{read file_type: (sequence of) `str`, optional}} + {{read dataset_type: (sequence of) `str`, optional}} Valid file types are: ============ ============================================ file type Description ============ ============================================ - ``'netCDF'`` Binary netCDF-3 or netCDF-4 files - ``'CDL'`` Text CDL representations of netCDF files - ``'UM'`` UM fields files or PP files + ``'netCDF'`` Binary netCDF-3 or netCDF-4 file + ``'CDL'`` Text CDL representations of a netCDF dataset + ``'Zarr'`` A Zarr v2 (xarray) or Zarr v3 hierarchy + ``'UM'`` UM fields file or PP file ============ ============================================ - .. versionadded:: 3.17.0 - - cdl_string: `bool`, optional - If True and the format to read is CDL, read a string - input, or sequence of string inputs, each being interpreted - as a string of CDL rather than names of locations from - which field constructs can be read from, as standard. - - By default, each string input or string element in the input - sequence is taken to be a file or directory name or an - OPenDAP URL from which to read field constructs, rather - than a string of CDL input, including when the `fmt` - parameter is set as CDL. - - Note that when `cdl_string` is True, the `fmt` parameter is - ignored as the format is assumed to be CDL, so in that case - it is not necessary to also specify ``fmt='CDL'``. + .. versionadded:: NEXTVERSION um: `dict`, optional For Met Office (UK) PP files and Met Office (UK) fields @@ -422,10 +364,13 @@ class read(cfdm.read): Use the *dask_chunks* parameter instead. fmt: deprecated at version 3.17.0 - Use the *file_type* parameter instead. + Use the dataset_type* parameter instead. ignore_read_error: deprecated at version 3.17.0 - Use the *file_type* parameter instead. + Use the *dataset_type* parameter instead. + + file_type: deprecated at version NEXTVERSION + Use the *dataset_type* parameter instead. :Returns: @@ -478,7 +423,7 @@ class read(cfdm.read): @_manage_log_level_via_verbosity def __new__( cls, - files, + datasets, external=None, verbose=None, warnings=False, @@ -486,7 +431,7 @@ def __new__( nfields=None, squeeze=False, unsqueeze=False, - file_type=None, + dataset_type=None, cdl_string=False, select=None, extra=None, @@ -513,8 +458,11 @@ def __new__( chunks="auto", ignore_read_error=False, fmt=None, + file_type=None, ): """Read field or domain constructs from a dataset.""" + kwargs = locals() + if field: _DEPRECATION_ERROR_FUNCTION_KWARGS( "cf.read", @@ -568,7 +516,7 @@ def __new__( _DEPRECATION_ERROR_FUNCTION_KWARGS( "cf.read", {"fmt": fmt}, - "Use keyword 'file_type' instead.", + "Use keyword 'dataset_type' instead.", version="3.17.0", removed_at="5.0.0", ) # pragma: no cover @@ -577,25 +525,123 @@ def __new__( _DEPRECATION_ERROR_FUNCTION_KWARGS( "cf.read", {"ignore_read_error": ignore_read_error}, - "Use keyword 'file_type' instead.", + "Use keyword 'dataset_type' instead.", version="3.17.0", removed_at="5.0.0", ) # pragma: no cover - info = cfdm.is_log_level_info(logger) + if file_type is not None: + _DEPRECATION_ERROR_FUNCTION_KWARGS( + "cf.read", + {"file_type": file_type}, + "Use keyword 'dataset_type' instead.", + version="NEXTVERSION", + removed_at="5.0.0", + ) # pragma: no cover + + return super().__new__(**kwargs) + + def _finalise(self): + """Actions to take after all datasets have been read. + + Called by `__new__`. + + .. versionadded:: NEXTVERSION + + :Returns: + + `None` + + """ + UM = "UM" in self.unique_dataset_categories + + # ---------------------------------------------------------------- + # Select matching fields from non-UM files (before aggregation) + # ---------------------------------------------------------------- + select = self.select + if select and not UM: + self.constructs = self.constructs.select_by_identity(*select) + + # ---------------------------------------------------------------- + # Aggregate the output fields/domains + # ---------------------------------------------------------------- + constructs = self.constructs + if self.aggregate and len(constructs) > 1: + aggregate_options = self.aggregate_options + # Set defaults specific to UM fields + if UM and "strict_units" not in aggregate_options: + aggregate_options["relaxed_units"] = True + + self.constructs = cf_aggregate(constructs, **aggregate_options) + + # ---------------------------------------------------------------- + # Add standard names to UM/PP fields (after aggregation) + # ---------------------------------------------------------------- + if UM: + for f in self.constructs: + standard_name = f._custom.get("standard_name", None) + if standard_name is not None: + f.set_property("standard_name", standard_name, copy=False) + del f._custom["standard_name"] + + # ---------------------------------------------------------------- + # Select matching fields from UM files (post setting of their + # standard names) + # ---------------------------------------------------------------- + if select and UM: + self.constructs = self.constructs.select_by_identity(*select) + + super()._finalise() + + def _initialise(self): + """Actions to take before any datasets have been read. + + Called by `__new__`. + + .. versionadded:: NEXTVERSION + + :Returns: + + `None` + + """ + super()._initialise() - cls.netcdf = NetCDFRead(cls.implementation) - cls.um = UMRead(cls.implementation) + # Initialise the list of output constructs + if self.field: + self.constructs = FieldList() + else: + self.constructs = DomainList() + + # Recognised UM dataset formats + self.UM_dataset_types = set(("UM",)) + + # Allowed dataset formats + self.allowed_dataset_types.update(self.UM_dataset_types) + + # ------------------------------------------------------------ + # Parse the 'um' keyword parameter + # ------------------------------------------------------------ + kwargs = self.kwargs + um = kwargs["um"] + if not um: + um = {} + + self.um = um # ------------------------------------------------------------ # Parse the 'select' keyword parameter # ------------------------------------------------------------ + select = kwargs["select"] if isinstance(select, (str, Query, Pattern)): select = (select,) + self.select = select + # ------------------------------------------------------------ # Parse the 'aggregate' keyword parameter # ------------------------------------------------------------ + aggregate = kwargs["aggregate"] if isinstance(aggregate, dict): aggregate_options = aggregate.copy() aggregate = True @@ -604,257 +650,112 @@ def __new__( aggregate_options["copy"] = False - # ------------------------------------------------------------ - # Parse the 'file_type' keyword parameter - # ------------------------------------------------------------ - netCDF_file_types = set(("netCDF", "CDL")) - UM_file_types = set(("UM",)) - if file_type is not None: - if isinstance(file_type, str): - file_type = (file_type,) + self.aggregate = aggregate + self.aggregate_options = aggregate_options - file_type = set(file_type) + def _read(self, dataset): + """Read a given dataset into field or domain constructs. - # ------------------------------------------------------------ - # Parse the 'um' keyword parameter - # ------------------------------------------------------------ - if not um: - um = {} + The constructs are stored in the `dataset_contents` attribute. - # ------------------------------------------------------------ - # Parse the 'cdl_string' keyword parameter - # ------------------------------------------------------------ - if cdl_string and file_type is not None: - raise ValueError("Can't set file_type when cdl_string=True") + Called by `__new__`. - # ------------------------------------------------------------ - # Parse the 'follow_symlinks' and 'recursive' keyword - # parameters - # ------------------------------------------------------------ - if follow_symlinks and not recursive: - raise ValueError( - f"Can't set follow_symlinks={follow_symlinks!r} " - f"when recursive={recursive!r}" - ) - - # Initialise the output list of fields/domains - if domain: - out = DomainList() - else: - out = FieldList() - - # Count the number of fields (in all files) and the number of - # files - field_counter = -1 - file_counter = 0 - - if cdl_string: - if isinstance(files, str): - files = (files,) - - files = [ - NetCDFRead.string_to_cdl(cdl_string) for cdl_string in files - ] - file_type = set(("CDL",)) - - for file_glob in flat(files): - # Expand variables - file_glob = os.path.expanduser(os.path.expandvars(file_glob)) - - scheme = urlparse(file_glob).scheme - if scheme in ("https", "http", "s3"): - # Do not glob a remote URL - files2 = (file_glob,) - else: - # Glob files on disk - files2 = glob(file_glob) - - if not files2: - # Trigger a FileNotFoundError error - open(file_glob) - - files3 = [] - for x in files2: - if isdir(x): - # Walk through directories, possibly recursively - for path, subdirs, filenames in os.walk( - x, followlinks=followlinks - ): - files3.extend( - os.path.join(path, f) for f in filenames - ) - if not recursive: - break - else: - files3.append(x) - - files2 = files3 - - # The types of all of the input files - ftypes = set() - - for filename in files2: - if info: - logger.info(f"File: {filename}") # pragma: no cover - - # ---------------------------------------------------- - # Read the file - # ---------------------------------------------------- - file_contents = [] - - # The type of this file - ftype = None - - # Record file type errors - file_format_errors = [] - - if ftype is None and ( - file_type is None - or file_type.intersection(netCDF_file_types) - ): - # Try to read as netCDF - try: - file_contents = super().__new__( - cls, - filename=filename, - external=external, - extra=extra, - verbose=verbose, - warnings=warnings, - mask=mask, - unpack=unpack, - warn_valid=warn_valid, - domain=domain, - storage_options=storage_options, - netcdf_backend=netcdf_backend, - dask_chunks=dask_chunks, - store_dataset_chunks=store_dataset_chunks, - cache=cache, - cfa=cfa, - cfa_write=cfa_write, - to_memory=to_memory, - squeeze=squeeze, - unsqueeze=unsqueeze, - file_type=file_type, - ) - except DatasetTypeError as error: - if file_type is None: - file_format_errors.append(error) - else: - file_format_errors = [] - ftype = "netCDF" - - if ftype is None and ( - file_type is None or file_type.intersection(UM_file_types) - ): - # Try to read as UM - try: - file_contents = cls.um.read( - filename, - um_version=um.get("version"), - verbose=verbose, - set_standard_name=False, - height_at_top_of_model=height_at_top_of_model, - fmt=um.get("fmt"), - word_size=um.get("word_size"), - endian=um.get("endian"), - select=select, - squeeze=squeeze, - unsqueeze=unsqueeze, - domain=domain, - file_type=file_type, - unpack=unpack, - ) - except DatasetTypeError as error: - if file_type is None: - file_format_errors.append(error) - else: - file_format_errors = [] - ftype = "UM" - - if file_format_errors: - error = "\n".join(map(str, file_format_errors)) - raise DatasetTypeError(f"\n{error}") - - if domain: - file_contents = DomainList(file_contents) - - file_contents = FieldList(file_contents) - - if ftype: - ftypes.add(ftype) - - # Select matching fields (only for netCDF files at - # this stage - we'll other it for other file types - # later) - if select and ftype == "netCDF": - file_contents = file_contents.select_by_identity(*select) - - # Add this file's contents to that already read from - # other files - out.extend(file_contents) - - field_counter = len(out) - file_counter += 1 + .. versionadded:: NEXTVERSION - # ---------------------------------------------------------------- - # Aggregate the output fields/domains - # ---------------------------------------------------------------- - if aggregate and len(out) > 1: - org_len = len(out) # pragma: no cover + :Parameters: - if "UM" in ftypes: - # Set defaults specific to UM fields - if "strict_units" not in aggregate_options: - aggregate_options["relaxed_units"] = True + dataset: `str` + The pathname of the dataset to be read. - out = cf_aggregate(out, **aggregate_options) + :Returns: - n = len(out) # pragma: no cover - if info: - logger.info( - f"{org_len} input field{cls._plural(org_len)} " - f"aggregated into {n} field{cls._plural(n)}" - ) # pragma: no cover + `None` - # ---------------------------------------------------------------- - # Sort by netCDF variable name - # ---------------------------------------------------------------- - if len(out) > 1: - out.sort(key=lambda f: f.nc_get_variable("")) + """ + dataset_type = self.dataset_type - # ---------------------------------------------------------------- - # Add standard names to UM/PP fields (post aggregation) - # ---------------------------------------------------------------- - for f in out: - standard_name = f._custom.get("standard_name", None) - if standard_name is not None: - f.set_property("standard_name", standard_name, copy=False) - del f._custom["standard_name"] - - # ---------------------------------------------------------------- - # Select matching fields from UM files (post setting of their - # standard names) - # ---------------------------------------------------------------- - if select and "UM" in ftypes: - out = out.select_by_identity(*select) + # ------------------------------------------------------------ + # Try to read as a netCDF dataset + # ------------------------------------------------------------ + super()._read(dataset) - if info: - logger.info( - f"Read {field_counter} field{cls._plural(field_counter)} " - f"from {file_counter} file{cls._plural(file_counter)}" - ) # pragma: no cover + if self.dataset_contents: + return - if nfields is not None and len(out) != nfields: - raise ValueError( - f"{nfields} field{cls._plural(nfields)} requested but " - f"{len(out)} field/domain constuct{cls._plural(len(out))}" - f" found in file{cls._plural(file_counter)}" - ) + # ------------------------------------------------------------ + # Try to read as a PP/UM dataset + # ------------------------------------------------------------ + if dataset_type is None or dataset_type.intersection( + self.UM_dataset_types + ): + if not hasattr(self, "um_read"): + # Initialise the UM read function + kwargs = self.kwargs + um_kwargs = { + key: kwargs[key] + for key in ( + "verbose", + "height_at_top_of_model", + "squeeze", + "unsqueeze", + "domain", + "file_type", + "unpack", + ) + } + um_kwargs["set_standard_name"] = False + um_kwargs["select"] = self.select + um = self.um + um_kwargs["um_version"] = um.get("version") + um_kwargs["fmt"] = um.get("fmt") + um_kwargs["word_size"] = um.get("word_size") + um_kwargs["endian"] = um.get("endian") + + self.um_read = partial( + UMRead(self.implementation).read, **um_kwargs + ) + + try: + # Try to read the dataset + self.dataset_contents = self.um_read(dataset) + except DatasetTypeError as error: + if dataset_type is None: + self.dataset_format_errors.append(error) + else: + # Successfully read the dataset + self.unique_dataset_categories.add("UM") - return out + if self.dataset_contents: + return - @staticmethod - def _plural(n): # pragma: no cover - """Return a suffix which reflects a word's plural.""" - return "s" if n != 1 else "" # pragma: no cover + # ------------------------------------------------------------ + # Try to read as a GRIB dataset + # ------------------------------------------------------------ + # Not yet available! The framework will be: + # + # if dataset_type is None or dataset_type.intersection( + # self.GRIB_dataset_types + # ): + # if not hasattr(self, "grib_read"): + # # Initialise the GRIB read function + # kwargs = self.kwargs + # grib_kwargs = { + # + # } + # + # self.um_read = partial( + # GRIBRead(self.implementation).read, **grib_kwargs + # ) + # + # try: + # # Try to read the dataset + # self.dataset_contents = self.grid_read(dataset) + # except DatasetTypeError as error: + # if dataset_type is None: + # self.dataset_format_errors.append(error) + # else: + # # Successfully read the dataset + # self.unique_dataset_categories.add("GRIB") + # + # if self.dataset_contents: + # return diff --git a/cf/test/test_read_write.py b/cf/test/test_read_write.py index d4a0c1726e..e013010fe6 100644 --- a/cf/test/test_read_write.py +++ b/cf/test/test_read_write.py @@ -51,6 +51,14 @@ def _remove_tmpfiles(): class read_writeTest(unittest.TestCase): filename = filename + zarr2 = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "example_field_0.zarr2" + ) + + zarr3 = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "example_field_0.zarr3" + ) + broken_bounds = os.path.join( os.path.dirname(os.path.abspath(__file__)), "broken_bounds.cdl" ) @@ -696,9 +704,8 @@ def test_read_CDL(self): cf.read("test_read_write.py") def test_read_cdl_string(self): - """Test the `cdl_string` keyword of the `read` function.""" - f = self.f0 - cf.write(f, tmpfile0) + """Test the cf.read 'cdl_string' keyword.""" + f = cf.read("example_field_0.nc")[0] # Test CDL in full, header-only and coordinate-only type: tempfile_to_option_mapping = { @@ -709,7 +716,7 @@ def test_read_cdl_string(self): for tempf, option in tempfile_to_option_mapping.items(): # Set up the CDL string to test... - command_to_run = ["ncdump", tmpfile0, ">", tempf] + command_to_run = ["ncdump", "example_field_0.nc", ">", tempf] if option: command_to_run.insert(1, option) @@ -720,12 +727,15 @@ def test_read_cdl_string(self): for cdl_input in (cdl_string_1, (cdl_string_1,)): f_from_str = cf.read(cdl_input, cdl_string=True) self.assertEqual(len(f_from_str), 1) - self.assertEqual(f_from_str[0], f) + if not option: + self.assertTrue(f_from_str[0].equals(f)) - # Check compatibility with the 'file_type' kwarg. - for file_type in ("netCDF", "CDL", "UM", ()): - with self.assertRaises(ValueError): - cf.read(cdl_string_1, cdl_string=True, file_type=file_type) + # Check compatibility with the 'dataset_type' kwarg. + f_from_str = cf.read(cdl_string_1, cdl_string=True, dataset_type="CDL") + self.assertEqual(len(f_from_str), 1) + + with self.assertRaises(ValueError): + cf.read(cdl_string_1, cdl_string=True, dataset_type="netCDF") # If the user forgets the cdl_string=True argument they will # accidentally attempt to create a file with a very long name @@ -788,6 +798,7 @@ def test_read_write_domain(self): e = cf.read(tmpfile, domain=True, verbose=1) self.assertEqual(len(e), 1) + print(type(e)) self.assertIsInstance(e, cf.DomainList) e = e[0] self.assertIsInstance(e, cf.Domain) @@ -855,9 +866,9 @@ def test_write_omit_data(self): self.assertFalse(np.ma.count(g.array)) self.assertTrue(np.ma.count(g.construct("grid_latitude").array)) - @unittest.skipUnless( - True, "URL TEST: UNRELIABLE FLAKEY URL DESTINATION. TODO REPLACE URL" - ) + # @unittest.skipUnless( + # True, "URL TEST: UNRELIABLE FLAKEY URL DESTINATION. TODO REPLACE URL" + # ) def test_read_url(self): """Test reading urls.""" for scheme in ("http", "https"): @@ -866,21 +877,20 @@ def test_read_url(self): f = cf.read(remote) self.assertEqual(len(f), 1) - def test_read_file_type(self): - """Test the cf.read 'file_type' keyword.""" - # netCDF file - for file_type in ( + def test_read_dataset_type(self): + """Test the cf.read 'dataset_type' keyword.""" + # netCDF dataset + for dataset_type in ( None, "netCDF", ("netCDF",), ("netCDF", "CDL"), - ("netCDF", "bad value"), ): - f = cf.read(self.filename, file_type=file_type) + f = cf.read(self.filename, dataset_type=dataset_type) self.assertEqual(len(f), 1) - for file_type in ("CDL", "bad value", ()): - f = cf.read(self.filename, file_type=file_type) + for dataset_type in ("CDL", ("CDL", "Zarr"), ()): + f = cf.read(self.filename, dataset_type=dataset_type) self.assertEqual(len(f), 0) # CDL file @@ -889,36 +899,50 @@ def test_read_file_type(self): shell=True, check=True, ) - for file_type in ( + for dataset_type in ( None, "CDL", - ("netCDF", "CDL"), - ("CDL", "bad value"), + ("CDL", "netCDF"), ): - f = cf.read(tmpfile, file_type=file_type) - self.assertEqual(len(f), 1) - - for file_type in ("netCDF", "bad value", ()): - f = cf.read(tmpfile, file_type=file_type) - self.assertEqual(len(f), 0) - - # UM file - for file_type in (None, "UM", ("UM",), ("UM", "bad value")): - f = cf.read("file1.pp", file_type=file_type) + f = cf.read(tmpfile, dataset_type=dataset_type) self.assertEqual(len(f), 1) - for file_type in ("netCDF", "bad value", ()): - f = cf.read("file1.pp", file_type=file_type) + for dataset_type in ("netCDF", ()): + f = cf.read(tmpfile, dataset_type=dataset_type) self.assertEqual(len(f), 0) - # Not a netCDF, CDL, or UM file + # Not a netCDF or CDL file with self.assertRaises(DatasetTypeError): f = cf.read("test_read_write.py") - for file_type in ("netCDF", "CDL", "bad value", ()): - f = cf.read("test_read_write.py", file_type=file_type) + for dataset_type in ("netCDF", ()): + f = cf.read("test_read_write.py", dataset_type=dataset_type) self.assertEqual(len(f), 0) + # Bad values + for dataset_type in ("bad value", ("bad value", "netCDF")): + with self.assertRaises(ValueError): + cf.read(self.filename, dataset_type=dataset_type) + + def test_read_zarr(self): + """Test the cf.read of a zarr dataset.""" + n = cf.read("example_field_0.nc")[0] + for zarr_dataset in (self.zarr2, self.zarr3): + z = cf.read(zarr_dataset, dask_chunks=3) + self.assertEqual(len(z), 1) + z = z[0] + self.assertTrue(z.equals(n)) + + cf.write(z, tmpfile) + n2 = cf.read(tmpfile)[0] + self.assertTrue(n2.equals(n)) + + z = cf.read(zarr_dataset, dataset_type="netCDF") + self.assertEqual(len(z), 0) + + z = cf.read(zarr_dataset, dataset_type="Zarr") + self.assertEqual(len(z), 1) + if __name__ == "__main__": print("Run date:", datetime.datetime.now()) diff --git a/docs/source/tutorial.rst b/docs/source/tutorial.rst index 326d93e608..53ba7de55d 100644 --- a/docs/source/tutorial.rst +++ b/docs/source/tutorial.rst @@ -193,7 +193,7 @@ replacing any file name with a directory name. An attempt will be made to read all files in the directory, which will result in an error if any have a non-supported format. Non-supported files may be ignored by being more specific about the file type intended for reading in -using the *file_type* keyword: +using the *dataset_type* keyword: .. code-block:: python :caption: *Read all of the files in the current working directory.* @@ -202,7 +202,7 @@ using the *file_type* keyword: Traceback (most recent call last): ... Exception: Can't determine format of file cf_tutorial_files.zip - >>> y = cf.read('$PWD', file_type='netCDF') + >>> y = cf.read('$PWD', dataset_type='netCDF') >>> len(y) 15 From ab9f1ac251a34b80cb7d7fa4ff6b4cb34d45a159 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 19 May 2025 23:20:09 +0100 Subject: [PATCH 02/12] dev --- cf/data/array/zarrarray.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 cf/data/array/zarrarray.py diff --git a/cf/data/array/zarrarray.py b/cf/data/array/zarrarray.py new file mode 100644 index 0000000000..acbb878c6b --- /dev/null +++ b/cf/data/array/zarrarray.py @@ -0,0 +1,12 @@ +import cfdm + +from ...mixin_container import Container +#from .mixin import ActiveStorageMixin + + +class ZarrArray( +# ActiveStorageMixin, + Container, + cfdm.ZarrArray, +): + """A Zarr array accessed with `zarr`.""" From 8a8f866a895ba428480e62c774a95ac879adfe27 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 19 May 2025 23:21:50 +0100 Subject: [PATCH 03/12] dev --- cf/test/example_field_0.nc | Bin 0 -> 23279 bytes cf/test/example_field_0.zarr2/.zattrs | 3 + cf/test/example_field_0.zarr2/.zgroup | 3 + cf/test/example_field_0.zarr2/.zmetadata | 171 ++++++++++ cf/test/example_field_0.zarr2/lat/.zarray | 20 ++ cf/test/example_field_0.zarr2/lat/.zattrs | 8 + cf/test/example_field_0.zarr2/lat/0 | Bin 0 -> 56 bytes .../example_field_0.zarr2/lat_bnds/.zarray | 22 ++ .../example_field_0.zarr2/lat_bnds/.zattrs | 6 + .../example_field_0.zarr2/lat_bnds/.zattrs~ | 6 + cf/test/example_field_0.zarr2/lat_bnds/0.0 | Bin 0 -> 64 bytes cf/test/example_field_0.zarr2/lat_bnds/1.0 | Bin 0 -> 64 bytes cf/test/example_field_0.zarr2/lon/.zarray | 20 ++ cf/test/example_field_0.zarr2/lon/.zattrs | 8 + cf/test/example_field_0.zarr2/lon/0 | Bin 0 -> 80 bytes .../example_field_0.zarr2/lon_bnds/.zarray | 22 ++ .../example_field_0.zarr2/lon_bnds/.zattrs | 6 + .../example_field_0.zarr2/lon_bnds/.zattrs~ | 6 + cf/test/example_field_0.zarr2/lon_bnds/0.0 | Bin 0 -> 80 bytes cf/test/example_field_0.zarr2/lon_bnds/1.0 | Bin 0 -> 80 bytes cf/test/example_field_0.zarr2/q/.zarray | 22 ++ cf/test/example_field_0.zarr2/q/.zattrs | 11 + cf/test/example_field_0.zarr2/q/0.0 | Bin 0 -> 112 bytes cf/test/example_field_0.zarr2/q/0.1 | Bin 0 -> 112 bytes cf/test/example_field_0.zarr2/q/1.0 | Bin 0 -> 112 bytes cf/test/example_field_0.zarr2/q/1.1 | Bin 0 -> 112 bytes cf/test/example_field_0.zarr2/time/.zarray | 10 + cf/test/example_field_0.zarr2/time/.zattrs | 5 + cf/test/example_field_0.zarr2/time/.zattrs~ | 6 + cf/test/example_field_0.zarr2/time/0 | Bin 0 -> 8 bytes cf/test/example_field_0.zarr3/lat/c/0 | Bin 0 -> 41 bytes cf/test/example_field_0.zarr3/lat/zarr.json | 47 +++ cf/test/example_field_0.zarr3/lat_bnds/c/0/0 | Bin 0 -> 39 bytes .../example_field_0.zarr3/lat_bnds/zarr.json | 47 +++ cf/test/example_field_0.zarr3/lon/c/0 | Bin 0 -> 50 bytes cf/test/example_field_0.zarr3/lon/zarr.json | 47 +++ cf/test/example_field_0.zarr3/lon_bnds/c/0/0 | Bin 0 -> 50 bytes .../example_field_0.zarr3/lon_bnds/zarr.json | 47 +++ cf/test/example_field_0.zarr3/q/c/0/0 | Bin 0 -> 235 bytes cf/test/example_field_0.zarr3/q/zarr.json | 52 ++++ cf/test/example_field_0.zarr3/time/c | Bin 0 -> 17 bytes cf/test/example_field_0.zarr3/time/zarr.json | 39 +++ cf/test/example_field_0.zarr3/zarr.json | 292 ++++++++++++++++++ 43 files changed, 926 insertions(+) create mode 100644 cf/test/example_field_0.nc create mode 100644 cf/test/example_field_0.zarr2/.zattrs create mode 100644 cf/test/example_field_0.zarr2/.zgroup create mode 100644 cf/test/example_field_0.zarr2/.zmetadata create mode 100644 cf/test/example_field_0.zarr2/lat/.zarray create mode 100644 cf/test/example_field_0.zarr2/lat/.zattrs create mode 100644 cf/test/example_field_0.zarr2/lat/0 create mode 100644 cf/test/example_field_0.zarr2/lat_bnds/.zarray create mode 100644 cf/test/example_field_0.zarr2/lat_bnds/.zattrs create mode 100644 cf/test/example_field_0.zarr2/lat_bnds/.zattrs~ create mode 100644 cf/test/example_field_0.zarr2/lat_bnds/0.0 create mode 100644 cf/test/example_field_0.zarr2/lat_bnds/1.0 create mode 100644 cf/test/example_field_0.zarr2/lon/.zarray create mode 100644 cf/test/example_field_0.zarr2/lon/.zattrs create mode 100644 cf/test/example_field_0.zarr2/lon/0 create mode 100644 cf/test/example_field_0.zarr2/lon_bnds/.zarray create mode 100644 cf/test/example_field_0.zarr2/lon_bnds/.zattrs create mode 100644 cf/test/example_field_0.zarr2/lon_bnds/.zattrs~ create mode 100644 cf/test/example_field_0.zarr2/lon_bnds/0.0 create mode 100644 cf/test/example_field_0.zarr2/lon_bnds/1.0 create mode 100644 cf/test/example_field_0.zarr2/q/.zarray create mode 100644 cf/test/example_field_0.zarr2/q/.zattrs create mode 100644 cf/test/example_field_0.zarr2/q/0.0 create mode 100644 cf/test/example_field_0.zarr2/q/0.1 create mode 100644 cf/test/example_field_0.zarr2/q/1.0 create mode 100644 cf/test/example_field_0.zarr2/q/1.1 create mode 100644 cf/test/example_field_0.zarr2/time/.zarray create mode 100644 cf/test/example_field_0.zarr2/time/.zattrs create mode 100644 cf/test/example_field_0.zarr2/time/.zattrs~ create mode 100644 cf/test/example_field_0.zarr2/time/0 create mode 100644 cf/test/example_field_0.zarr3/lat/c/0 create mode 100644 cf/test/example_field_0.zarr3/lat/zarr.json create mode 100644 cf/test/example_field_0.zarr3/lat_bnds/c/0/0 create mode 100644 cf/test/example_field_0.zarr3/lat_bnds/zarr.json create mode 100644 cf/test/example_field_0.zarr3/lon/c/0 create mode 100644 cf/test/example_field_0.zarr3/lon/zarr.json create mode 100644 cf/test/example_field_0.zarr3/lon_bnds/c/0/0 create mode 100644 cf/test/example_field_0.zarr3/lon_bnds/zarr.json create mode 100644 cf/test/example_field_0.zarr3/q/c/0/0 create mode 100644 cf/test/example_field_0.zarr3/q/zarr.json create mode 100644 cf/test/example_field_0.zarr3/time/c create mode 100644 cf/test/example_field_0.zarr3/time/zarr.json create mode 100644 cf/test/example_field_0.zarr3/zarr.json diff --git a/cf/test/example_field_0.nc b/cf/test/example_field_0.nc new file mode 100644 index 0000000000000000000000000000000000000000..585bc9bc0cace2823d6b9858704a8a719360b4c7 GIT binary patch literal 23279 zcmeHP3sh9c8J@e#W5ESBicpoBRja1*2;iEkg@9dFH1e>*F{v2VW%ml(z%K4Ch^fyu zwrSFn_GqFB$)OrsW11RD<0BrmiZM|eZ1W%qte91~*AQ9J+4{0pwE$ccK6 zo|ywXcV7Se^UXhV@8190`Q?n9=~0oxBQ-i5!Ei~0u6aBsRTTUod3{>pjGQ?d{Wi^L zmZyynMc5=Tj?@qZstipaAvDrzNMH?2>+gbkTB0X9sA_h*YE_rd>2`U^5LysU|26dA zJUuxrB`w`xpx5LW$fUu-Bf@jDv^>cK!X>n$VS9ID1XDjMUQuEpN24f8C|uBhu{IVo zzQdSB^?;B~OoNE&5|tSm4JF#d9-ta>2gO02MgzlCSPO_1HdwCBdqObkh)6^PM`9ZE zt0l1%3#z8IK=s)j3r%LX+v9M$Y(CXX3{(d`b|V`E)qw?!A)!wsR1qx+6M*JTD=K18 z0(1aL&dhvELD9^@0&9_ZS{~D{`<*2zrW|LL({XthAddyl8+TW73`s8x1;7Y25g3lT z)30Q_j>NB^5<;0hXd^i`_k?k&Yk1_hlF;DT&_Mw<*E}fRQMoXQ3egc@qoHD=DZ~^< zgt+gKGf<;N%U)4gnx?+cJY!baaL80b5d82mca6*8O`j-WX}S1j1Ju*gcH@Rf{W}UL*ap8C|N+oHO0%a8{`gos?CsEuhJo6;E*@HGHCpm5xZukPx`4%kCa(=X zt<3rcWb&v##?UFw03jR3to{XH&PR7x*x65@5)EM^;8O{~wz^82&ss+3?C-)(pqP+N zM~=J>LV|Az`uTiHV~7EkYR`(wn^{y$hEW-?*Z>>A0k#|L5@V(^6w&~5&^rxU56DtM zNaKmDmY$KDVc(>oiAO{~k{f=Y4Z>yuDp4Sy=9&w~Y7xxKF`-qgKUxPJ=mtYgt%4i4 z7zEpdwZt$UN(p(4M}*A9i|ZdhzTkP~TPEB@Qs_U7 zvR3uLvdT8_@W&X-i-SU4#Y?ldXlUZb!TMMySn-xD@& zY2DX@_TiFeg z{-A(-l%o&G$NDf45KunqkN1bb{1{%{UmxX*Z1N~J#0(ir1SA3y0f~S_Kq4R!kO)Ww zBmxoviGW1l1|U#8$6|pO*7z0&i_-Dh(ra};O zbDV{p6JkH{d&&JQ5PV$4IJnDqpq_E=fu1J3hjaKq{Vwqv$^wCBao&C7INtArqI=;f zFLl%(>+xQ0AInQEqEc8KTqq&v;?q($BD%YHm8#>XGke^g!=;y_#zA>}mx9NP)&@FTU@l9biKxgQTC!}lT zy}5unau{IELC{sYU2I9cC`^qH|G;;7aS4ya=d-)U-48;7oB8!+Hh`Cv4MEHqcvlC+ zYyb>CskXL(($H`~#HM~p4{jH6{mjr&zAlw98}v)L7e0oh0;^+=Wx8dKrNC^lGFHbu zR2OiTJ8V9iSEY0bq<8q~Iz$KMbmODJ*eB})ITbjqIA@sFM5M&SP<6P&hGsHp%;*3^ zJzZ;k5(NNR01Jr%s?F=uC-uV<0H--Lq9#<%hyvHJfeUK`D+?#dvuSG@ZU)=q&Ux>Z z_%{OvGN>NhlEsESL6{z`+mMUgl_Ig%VHvLHtI!lvVVr$1G=|b_2E1*F*XVV+?5Z(6 zHEnWoT6%J78cY|MtYD^G$<%;xKx-eRd=ar@(PV#NNX51jY%>sBHL@tQbS9B$5XPuk zN(eG3LZFWwcX82S><*}eAOOmuqI_QRP|I${YYU=6U8#M6aj0qkFx&Fwwb>ta#@CXN zgls~}|E+5e_D+ehuKJMoh}c=N$iJmwhKwZw5&?;TL_i`S5xDITfM<#QO0+4}l%Vt2 zs|QW3UfJBNkhCqKO~D=|fXREc@Il7qrTBaU&sY#J%~SGIA|Mfv2uK7Z0ulj6jPpN7W_A08V!GNbKJ8&@?SO-@Q0H7YrI{0kd4y!z^|nicxe?&8CPrv!V- zq(7GHGfWb%4Y3fPCNai*8A}8t0uljl1%)HuvC(^|_yxZa%)xHEc-buHyVrvyT3A#pmlT z9x7Ql!qsN4+qd(hgByQ8{)y_?r>f5t?o9rA>h3&S{u>>W_AZ(Kwdslf9-IA2>ZbDZ z&!7CyR|#z^4v(ZR+u7sqw%3)ua^~XLVPBqG|LeC7uHAj6v$g4WNn0nnmzR8e>QY8x z%fq#EJ6F{&%hee+@2Q~zt-`>%ZtvR zUK@Ax>D^0yO`R{7>}e`{yJPj) zmHS6V9$!Vs9~;MPp;EW)Xt1|u9$KU59ys%1mM5$8aGc@$EgxE&FYP)IL8b0oZ0_oc zQjI&#d~wS1?)`D6wN2kUc~b3I*79^rSz~*vY0*2+uYS-kbwT{-zuZ11CPyL>kO)Ww zBm%b_0-T(bqfr6*_R^Pj`CQ!uiCyb0tY=o;Bg;aB|H3xD;c zXD>Fs@i|Q3Wz&!4lqVhSnkrE3asODg`z|8^dsMG#^Vlo&CLljug&lPVl?O7t)vDdO z&}p|;)X*c!oW3P``ZF~+VCRbSa)3w&Ez&>@Aj*kjltdlt z=|)o$X{Rh84-C_ugY%xbIhr6D5_?8XkR18QSn@u9V;6FTAv>^9khp{yI3f0cau#KAaAN@}cyD1PC8SH>5cL02`bMZ2$lO literal 0 HcmV?d00001 diff --git a/cf/test/example_field_0.zarr2/lon_bnds/1.0 b/cf/test/example_field_0.zarr2/lon_bnds/1.0 new file mode 100644 index 0000000000000000000000000000000000000000..4461e510e92cd3bb68c020a0b129a8a1fd18cf71 GIT binary patch literal 80 rcmZQ#H0E$%U|;~@03Zf~hBOB-t&oFCKPZ6kVf2P#2p>i_lsNzZV$2HZ literal 0 HcmV?d00001 diff --git a/cf/test/example_field_0.zarr2/q/.zarray b/cf/test/example_field_0.zarr2/q/.zarray new file mode 100644 index 0000000000..e6e2c23d0d --- /dev/null +++ b/cf/test/example_field_0.zarr2/q/.zarray @@ -0,0 +1,22 @@ +{ + "chunks": [ + 3, + 4 + ], + "compressor": { + "blocksize": 0, + "clevel": 5, + "cname": "lz4", + "id": "blosc", + "shuffle": 1 + }, + "dtype": "+iS-3ANs1X$Nop~(PRE3&8|;Oe51d-)y3Ky#+Zub*xLx)q%l6He(qC)8%aeym IyS>dG0J;D#egFUf literal 0 HcmV?d00001 diff --git a/cf/test/example_field_0.zarr2/q/1.0 b/cf/test/example_field_0.zarr2/q/1.0 new file mode 100644 index 0000000000000000000000000000000000000000..b71f5dc59971cee259c58961f1f3f0e0ad83f4f1 GIT binary patch literal 112 zcmZQ#H0DTPU|;~@0w9)T3FFh+6m7zv{B~ hluWL$x4!S+J3Via{q3J%0jT^!d!P|O@S`3V4FI+jCtd&m literal 0 HcmV?d00001 diff --git a/cf/test/example_field_0.zarr2/q/1.1 b/cf/test/example_field_0.zarr2/q/1.1 new file mode 100644 index 0000000000000000000000000000000000000000..377eb07cb782bc18c866ac4ad785aaa5ad77e488 GIT binary patch literal 112 zcmZQ#H0DTPU|;~@0w9)T3FFhN0ra>71 DamfvK literal 0 HcmV?d00001 diff --git a/cf/test/example_field_0.zarr3/lon/zarr.json b/cf/test/example_field_0.zarr3/lon/zarr.json new file mode 100644 index 0000000000..42f9657211 --- /dev/null +++ b/cf/test/example_field_0.zarr3/lon/zarr.json @@ -0,0 +1,47 @@ +{ + "shape": [ + 8 + ], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 8 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": { + "units": "degrees_east", + "standard_name": "longitude", + "bounds": "lon_bnds", + }, + "dimension_names": [ + "lon" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} diff --git a/cf/test/example_field_0.zarr3/lon_bnds/c/0/0 b/cf/test/example_field_0.zarr3/lon_bnds/c/0/0 new file mode 100644 index 0000000000000000000000000000000000000000..3a05139fabd77d1a74d4826bad22504dba20c39a GIT binary patch literal 50 zcmdPcs{dD^!IzO?0RuyWn*&4GgM@}Og`5Wk8;Tps95@*qjzOWDLZz$jh9e9S0MM2a A%>V!Z literal 0 HcmV?d00001 diff --git a/cf/test/example_field_0.zarr3/lon_bnds/zarr.json b/cf/test/example_field_0.zarr3/lon_bnds/zarr.json new file mode 100644 index 0000000000..ed74f33e81 --- /dev/null +++ b/cf/test/example_field_0.zarr3/lon_bnds/zarr.json @@ -0,0 +1,47 @@ +{ + "shape": [ + 8, + 2 + ], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 8, + 2 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": { + }, + "dimension_names": [ + "lon", + "bounds2" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} diff --git a/cf/test/example_field_0.zarr3/q/c/0/0 b/cf/test/example_field_0.zarr3/q/c/0/0 new file mode 100644 index 0000000000000000000000000000000000000000..924dc5043c1f5bd21df06523b786c973e1bb19a3 GIT binary patch literal 235 zcmVFAwm_Hc=Rt_z)oj+P+qCX1Pqi{V6r$1OZw8(b8uctrbrJKYcu(EhROgD)^ zVCAqsDO``j^(eeQ_*1~Y!aov&z{;UTwm*^Wd_Of_x`}NLh3m1cvVxI#+xek6w8(b8pNc;f04qztz|I@O1(FFIn!+H<+~iwj lHZoZpDwk-xSs49O<=ZNmqV^6U%R^;YDs53M9m*{hn`0yfZNLBk literal 0 HcmV?d00001 diff --git a/cf/test/example_field_0.zarr3/q/zarr.json b/cf/test/example_field_0.zarr3/q/zarr.json new file mode 100644 index 0000000000..7b895a0ecd --- /dev/null +++ b/cf/test/example_field_0.zarr3/q/zarr.json @@ -0,0 +1,52 @@ +{ + "shape": [ + 5, + 8 + ], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 5, + 8 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": { + "project": "research", + "standard_name": "specific_humidity", + "units": "1", + "cell_methods": "area: mean", + "coordinates": "time", + }, + "dimension_names": [ + "lat", + "lon" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} diff --git a/cf/test/example_field_0.zarr3/time/c b/cf/test/example_field_0.zarr3/time/c new file mode 100644 index 0000000000000000000000000000000000000000..16e658b14d90c05cf1c523593ca4c6f8efd0b32d GIT binary patch literal 17 ScmdPcs{dDk!;t|B>>U6kDFX5U literal 0 HcmV?d00001 diff --git a/cf/test/example_field_0.zarr3/time/zarr.json b/cf/test/example_field_0.zarr3/time/zarr.json new file mode 100644 index 0000000000..32ebdf512d --- /dev/null +++ b/cf/test/example_field_0.zarr3/time/zarr.json @@ -0,0 +1,39 @@ +{ + "shape": [], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": { + "standard_name": "time", + "units": "days since 2018-12-01" + }, + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} diff --git a/cf/test/example_field_0.zarr3/zarr.json b/cf/test/example_field_0.zarr3/zarr.json new file mode 100644 index 0000000000..54ac972c19 --- /dev/null +++ b/cf/test/example_field_0.zarr3/zarr.json @@ -0,0 +1,292 @@ +{ + "attributes": { + "Conventions": "CF-1.12" + }, + "zarr_format": 3, + "consolidated_metadata": { + "kind": "inline", + "must_understand": false, + "metadata": { + "lon": { + "shape": [ + 8 + ], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 8 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": { + "units": "degrees_east", + "standard_name": "longitude", + "bounds": "lon_bnds" + }, + "dimension_names": [ + "lon" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "lat_bnds": { + "shape": [ + 5, + 2 + ], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 5, + 2 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": { + }, + "dimension_names": [ + "lat", + "bounds2" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "lat": { + "shape": [ + 5 + ], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 5 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": { + "units": "degrees_north", + "standard_name": "latitude", + "bounds": "lat_bnds" + }, + "dimension_names": [ + "lat" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "time": { + "shape": [], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": { + "standard_name": "time", + "units": "days since 2018-12-01" + }, + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "lon_bnds": { + "shape": [ + 8, + 2 + ], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 8, + 2 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": { + }, + "dimension_names": [ + "lon", + "bounds2" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "q": { + "shape": [ + 5, + 8 + ], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 5, + 8 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": { + "project": "research", + "standard_name": "specific_humidity", + "units": "1", + "cell_methods": "area: mean", + "coordinates": "time" + }, + "dimension_names": [ + "lat", + "lon" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + } + } + }, + "node_type": "group" +} From 488f52eaef7ef6efcae8cc7760325df652c4b27c Mon Sep 17 00:00:00 2001 From: David Hassell Date: Tue, 20 May 2025 11:40:02 +0100 Subject: [PATCH 04/12] dev --- cf/cfimplementation.py | 16 ++++++++-------- cf/data/data.py | 10 +++++----- cf/data/mixin/deprecations.py | 14 ++++++++------ cf/data/utils.py | 4 ++-- cf/field.py | 12 ++++++------ cf/test/example_field_0.zarr2/lat_bnds/.zattrs~ | 6 ------ cf/test/example_field_0.zarr2/lon_bnds/.zattrs~ | 6 ------ cf/test/example_field_0.zarr2/time/.zattrs~ | 6 ------ cf/test/test_Data.py | 12 ++++++------ docs/source/class/cf.AuxiliaryCoordinate.rst | 6 +++--- docs/source/class/cf.Bounds.rst | 6 +++--- docs/source/class/cf.CellConnectivity.rst | 16 ++++++++-------- docs/source/class/cf.CellMeasure.rst | 6 +++--- docs/source/class/cf.Count.rst | 6 +++--- docs/source/class/cf.Data.rst | 6 +++--- docs/source/class/cf.DimensionCoordinate.rst | 6 +++--- docs/source/class/cf.DomainAncillary.rst | 6 +++--- docs/source/class/cf.DomainTopology.rst | 16 ++++++++-------- docs/source/class/cf.Field.rst | 6 +++--- docs/source/class/cf.FieldAncillary.rst | 6 +++--- docs/source/class/cf.Index.rst | 6 +++--- docs/source/class/cf.List.rst | 6 +++--- docs/source/introduction.rst | 2 ++ docs/source/tutorial.rst | 4 ++++ setup.py | 4 +++- 25 files changed, 93 insertions(+), 101 deletions(-) delete mode 100644 cf/test/example_field_0.zarr2/lat_bnds/.zattrs~ delete mode 100644 cf/test/example_field_0.zarr2/lon_bnds/.zattrs~ delete mode 100644 cf/test/example_field_0.zarr2/time/.zattrs~ diff --git a/cf/cfimplementation.py b/cf/cfimplementation.py index 5ac48ae061..ece90bde2c 100644 --- a/cf/cfimplementation.py +++ b/cf/cfimplementation.py @@ -50,8 +50,8 @@ class CFImplementation(cfdm.CFDMImplementation): """ - def nc_set_hdf5_chunksizes(self, data, sizes, override=False): - """Set the data HDF5 chunksizes. + def nc_set_dataset_chunksizes(self, data, sizes, override=False): + """Set the data dataset chunksizes. .. versionadded:: 3.16.2 @@ -61,21 +61,21 @@ def nc_set_hdf5_chunksizes(self, data, sizes, override=False): The data. sizes: sequence of `int` - The new HDF5 chunk sizes. + The new dataset chunk sizes. override: `bool`, optional - If True then set the HDF5 chunks sizes even if some + If True then set the dataset chunks sizes even if some have already been specified. If False, the default, - then only set the HDF5 chunks sizes if some none have - already been specified. + then only set the dataset chunks sizes if some none + have already been specified. :Returns: `None` """ - if override or not data.nc_hdf5_chunksizes(): - data.nc_set_hdf5_chunksizes(sizes) + if override or not data.nc_dataset_chunksizes(): + data.nc_set_dataset_chunksizes(sizes) def set_construct(self, parent, construct, axes=None, copy=True, **kwargs): """Insert a construct into a field or domain. diff --git a/cf/data/data.py b/cf/data/data.py index 5f024f4770..f425518415 100644 --- a/cf/data/data.py +++ b/cf/data/data.py @@ -5532,7 +5532,7 @@ def outerproduct(self, a, inplace=False, i=False): d = _inplace_enabled_define_and_cleanup(self) shape = d.shape - chunksizes0 = d.nc_hdf5_chunksizes() + chunksizes0 = d.nc_dataset_chunksizes() # Cast 'a' as a Data object so that it definitely has sensible # Units. We don't mind if the units of 'a' are incompatible @@ -5563,8 +5563,8 @@ def outerproduct(self, a, inplace=False, i=False): for a_axis in a._cyclic: d.cyclic(ndim + a._axes.index(a_axis)) - # Update the HDF5 chunking strategy - chunksizes1 = a.nc_hdf5_chunksizes() + # Update the dataset chunking strategy + chunksizes1 = a.nc_dataset_chunksizes() if chunksizes0 or chunksizes1: if isinstance(chunksizes0, tuple): if isinstance(chunksizes1, tuple): @@ -5572,10 +5572,10 @@ def outerproduct(self, a, inplace=False, i=False): else: chunksizes = chunksizes0 + a.shape - d.nc_set_hdf5_chunksizes(chunksizes) + d.nc_set_dataset_chunksizes(chunksizes) elif isinstance(chunksizes1, tuple): chunksizes = shape + chunksizes1 - d.nc_set_hdf5_chunksizes(chunksizes) + d.nc_set_dataset_chunksizes(chunksizes) d._update_deterministic(a) return d diff --git a/cf/data/mixin/deprecations.py b/cf/data/mixin/deprecations.py index aef006622d..c8f2124524 100644 --- a/cf/data/mixin/deprecations.py +++ b/cf/data/mixin/deprecations.py @@ -441,11 +441,13 @@ def HDF_chunks(self, *chunks): `Data` objects to be written to netCDF files. Deprecated at version 3.14.0 and is no longer available. Use - the methods `nc_clear_hdf5_chunksizes`, `nc_hdf5_chunksizes`, - and `nc_set_hdf5_chunksizes` instead. + the methods `nc_clear_dataset_chunksizes`, + `nc_dataset_chunksizes`, and `nc_set_dataset_chunksizes` + instead. - .. seealso:: `nc_clear_hdf5_chunksizes`, `nc_hdf5_chunksizes`, - `nc_set_hdf5_chunksizes` + .. seealso:: `nc_clear_dataset_chunksizes`, + `nc_dataset_chunksizes`, + `nc_set_dataset_chunksizes` :Parameters: @@ -506,8 +508,8 @@ def HDF_chunks(self, *chunks): _DEPRECATION_ERROR_METHOD( self, "HDF_chunks", - message="Use the methods 'nc_clear_hdf5_chunksizes', " - "'nc_hdf5_chunksizes', and 'nc_set_hdf5_chunksizes' " + message="Use the methods 'nc_clear_dataset_chunksizes', " + "'nc_dataset_chunksizes', and 'nc_set_dataset_chunksizes' " "instead.", version="3.14.0", removed_at="5.0.0", diff --git a/cf/data/utils.py b/cf/data/utils.py index 63b2a40e88..bcd8aef289 100644 --- a/cf/data/utils.py +++ b/cf/data/utils.py @@ -435,8 +435,8 @@ def collapse( d._axes = [a for i, a in enumerate(d._axes) if i not in axis] if d.size != original_size: - # Remove the out-dated HDF5 chunking strategy - d.nc_clear_hdf5_chunksizes() + # Remove the out-dated dataset chunking strategy + d.nc_clear_dataset_chunksizes() return d, weights diff --git a/cf/field.py b/cf/field.py index 9c7dcd93e6..55305eb817 100644 --- a/cf/field.py +++ b/cf/field.py @@ -13993,17 +13993,17 @@ def field( def HDF_chunks(self, *chunksizes): """Deprecated at version 3.0.0. - Use methods 'Data.nc_hdf5_chunksizes', - 'Data.nc_set_hdf5_chunksizes', 'Data.nc_clear_hdf5_chunksizes' - instead. + Use methods 'Data.nc_dataset_chunksizes', + 'Data.nc_set_dataset_chunksizes', + 'Data.nc_clear_dataset_chunksizes' instead. """ _DEPRECATION_ERROR_METHOD( self, "HDF_chunks", - "Use methods 'Data.nc_hdf5_chunksizes', " - "'Data.nc_set_hdf5_chunksizes', " - "'Data.nc_clear_hdf5_chunksizes' instead.", + "Use methods 'Data.nc_dataset_chunksizes', " + "'Data.nc_set_dataset_chunksizes', " + "'Data.nc_clear_dataset_chunksizes' instead.", version="3.0.0", removed_at="4.0.0", ) # pragma: no cover diff --git a/cf/test/example_field_0.zarr2/lat_bnds/.zattrs~ b/cf/test/example_field_0.zarr2/lat_bnds/.zattrs~ deleted file mode 100644 index 07dd7427d4..0000000000 --- a/cf/test/example_field_0.zarr2/lat_bnds/.zattrs~ +++ /dev/null @@ -1,6 +0,0 @@ -{ - "_ARRAY_DIMENSIONS": [ - "lat", - "bounds2" - ], -} \ No newline at end of file diff --git a/cf/test/example_field_0.zarr2/lon_bnds/.zattrs~ b/cf/test/example_field_0.zarr2/lon_bnds/.zattrs~ deleted file mode 100644 index 16a2159d16..0000000000 --- a/cf/test/example_field_0.zarr2/lon_bnds/.zattrs~ +++ /dev/null @@ -1,6 +0,0 @@ -{ - "_ARRAY_DIMENSIONS": [ - "lon", - "bounds2" - ], -} \ No newline at end of file diff --git a/cf/test/example_field_0.zarr2/time/.zattrs~ b/cf/test/example_field_0.zarr2/time/.zattrs~ deleted file mode 100644 index 9c43b14245..0000000000 --- a/cf/test/example_field_0.zarr2/time/.zattrs~ +++ /dev/null @@ -1,6 +0,0 @@ -{ - "_ARRAY_DIMENSIONS": [], - "calendar": "proleptic_gregorian", - "standard_name": "time", - "units": "days since 2018-12-01" -} \ No newline at end of file diff --git a/cf/test/test_Data.py b/cf/test/test_Data.py index 6692737993..026ad9e2c8 100644 --- a/cf/test/test_Data.py +++ b/cf/test/test_Data.py @@ -4661,28 +4661,28 @@ def test_Data_collapse_axes_hdf_chunks(self): """Test that _axes and hdf_chunks are updated after a collapse.""" d = cf.Data([[1, 2, 3, 4]]) chunks = d.shape - d.nc_set_hdf5_chunksizes(chunks) + d.nc_set_dataset_chunksizes(chunks) e = d.mean(axes=1) self.assertEqual(d._axes, ("dim0", "dim1")) - self.assertEqual(d.nc_hdf5_chunksizes(), chunks) + self.assertEqual(d.nc_dataset_chunksizes(), chunks) e = d.mean(axes=1) self.assertNotEqual(e.size, d.size) self.assertEqual(e._axes, d._axes) - self.assertEqual(e.nc_hdf5_chunksizes(), None) + self.assertEqual(e.nc_dataset_chunksizes(), None) e = d.mean(axes=1, squeeze=True) self.assertEqual(e._axes, d._axes[:1]) - self.assertEqual(e.nc_hdf5_chunksizes(), None) + self.assertEqual(e.nc_dataset_chunksizes(), None) e = d.mean(axes=0) self.assertEqual(e.size, d.size) self.assertEqual(e._axes, d._axes) - self.assertEqual(e.nc_hdf5_chunksizes(), chunks) + self.assertEqual(e.nc_dataset_chunksizes(), chunks) e = d.mean(axes=0, squeeze=True) self.assertEqual(e._axes, d._axes[1:]) - self.assertEqual(e.nc_hdf5_chunksizes(), chunks) + self.assertEqual(e.nc_dataset_chunksizes(), chunks) if __name__ == "__main__": diff --git a/docs/source/class/cf.AuxiliaryCoordinate.rst b/docs/source/class/cf.AuxiliaryCoordinate.rst index 0b2847c661..18ff389890 100644 --- a/docs/source/class/cf.AuxiliaryCoordinate.rst +++ b/docs/source/class/cf.AuxiliaryCoordinate.rst @@ -502,9 +502,9 @@ NetCDF ~cf.AuxiliaryCoordinate.nc_set_node_coordinate_variable ~cf.AuxiliaryCoordinate.nc_set_node_coordinate_variable_groups ~cf.AuxiliaryCoordinate.nc_clear_node_coordinate_variable_groups - ~cf.AuxiliaryCoordinate.nc_clear_hdf5_chunksizes - ~cf.AuxiliaryCoordinate.nc_hdf5_chunksizes - ~cf.AuxiliaryCoordinate.nc_set_hdf5_chunksizes + ~cf.AuxiliaryCoordinate.nc_clear_dataset_chunksizes + ~cf.AuxiliaryCoordinate.nc_dataset_chunksizes + ~cf.AuxiliaryCoordinate.nc_set_dataset_chunksizes Groups ^^^^^^ diff --git a/docs/source/class/cf.Bounds.rst b/docs/source/class/cf.Bounds.rst index ea6558c98d..1b85f7eeb8 100644 --- a/docs/source/class/cf.Bounds.rst +++ b/docs/source/class/cf.Bounds.rst @@ -414,9 +414,9 @@ NetCDF ~cf.Bounds.nc_get_dimension ~cf.Bounds.nc_has_dimension ~cf.Bounds.nc_set_dimension - ~cf.Bounds.nc_clear_hdf5_chunksizes - ~cf.Bounds.nc_hdf5_chunksizes - ~cf.Bounds.nc_set_hdf5_chunksizes + ~cf.Bounds.nc_clear_dataset_chunksizes + ~cf.Bounds.nc_dataset_chunksizes + ~cf.Bounds.nc_set_dataset_chunksizes Aggregation ----------- diff --git a/docs/source/class/cf.CellConnectivity.rst b/docs/source/class/cf.CellConnectivity.rst index 969ebab2bd..f4c7b120e0 100644 --- a/docs/source/class/cf.CellConnectivity.rst +++ b/docs/source/class/cf.CellConnectivity.rst @@ -431,9 +431,9 @@ NetCDF ~cf.CellConnectivity.nc_get_variable ~cf.CellConnectivity.nc_has_variable ~cf.CellConnectivity.nc_set_variable - ~cf.CellConnectivity.nc_clear_hdf5_chunksizes - ~cf.CellConnectivity.nc_hdf5_chunksizes - ~cf.CellConnectivity.nc_set_hdf5_chunksizes + ~cf.CellConnectivity.nc_clear_dataset_chunksizes + ~cf.CellConnectivity.nc_dataset_chunksizes + ~cf.CellConnectivity.nc_set_dataset_chunksizes Aggregation ----------- @@ -477,8 +477,8 @@ Groups ~cf.CellConnectivity.nc_clear_variable_groups ~cf.CellConnectivity.nc_set_variable_groups -HDF5 chunks -^^^^^^^^^^^ +Dataset chunks +^^^^^^^^^^^^^^ .. rubric:: Methods @@ -487,9 +487,9 @@ HDF5 chunks :toctree: ../method/ :template: method.rst - ~cf.CellConnectivity.nc_hdf5_chunksizes - ~cf.CellConnectivity.nc_set_hdf5_chunksizes - ~cf.CellConnectivity.nc_clear_hdf5_chunksizes + ~cf.CellConnectivity.nc_dataset_chunksizes + ~cf.CellConnectivity.nc_set_dataset_chunksizes + ~cf.CellConnectivity.nc_clear_dataset_chunksizes Aliases ------- diff --git a/docs/source/class/cf.CellMeasure.rst b/docs/source/class/cf.CellMeasure.rst index 02b4871231..66c1c93e99 100644 --- a/docs/source/class/cf.CellMeasure.rst +++ b/docs/source/class/cf.CellMeasure.rst @@ -434,9 +434,9 @@ NetCDF ~cf.CellMeasure.nc_set_variable ~cf.CellMeasure.nc_get_external ~cf.CellMeasure.nc_set_external - ~cf.CellMeasure.nc_clear_hdf5_chunksizes - ~cf.CellMeasure.nc_hdf5_chunksizes - ~cf.CellMeasure.nc_set_hdf5_chunksizes + ~cf.CellMeasure.nc_clear_dataset_chunksizes + ~cf.CellMeasure.nc_dataset_chunksizes + ~cf.CellMeasure.nc_set_dataset_chunksizes Aggregation ----------- diff --git a/docs/source/class/cf.Count.rst b/docs/source/class/cf.Count.rst index 43017e9d5b..5ab4dbea0b 100644 --- a/docs/source/class/cf.Count.rst +++ b/docs/source/class/cf.Count.rst @@ -411,9 +411,9 @@ NetCDF ~cf.Count.nc_get_sample_dimension ~cf.Count.nc_has_sample_dimension ~cf.Count.nc_set_sample_dimension - ~cf.Count.nc_clear_hdf5_chunksizes - ~cf.Count.nc_hdf5_chunksizes - ~cf.Count.nc_set_hdf5_chunksizes + ~cf.Count.nc_clear_dataset_chunksizes + ~cf.Count.nc_dataset_chunksizes + ~cf.Count.nc_set_dataset_chunksizes Aggregation ----------- diff --git a/docs/source/class/cf.Data.rst b/docs/source/class/cf.Data.rst index df1e94cf98..295bdb833c 100644 --- a/docs/source/class/cf.Data.rst +++ b/docs/source/class/cf.Data.rst @@ -639,9 +639,9 @@ Performance :toctree: ../method/ :template: method.rst - ~cf.Data.nc_clear_hdf5_chunksizes - ~cf.Data.nc_hdf5_chunksizes - ~cf.Data.nc_set_hdf5_chunksizes + ~cf.Data.nc_clear_dataset_chunksizes + ~cf.Data.nc_dataset_chunksizes + ~cf.Data.nc_set_dataset_chunksizes ~cf.Data.rechunk ~cf.Data.close ~cf.Data.chunks diff --git a/docs/source/class/cf.DimensionCoordinate.rst b/docs/source/class/cf.DimensionCoordinate.rst index 7c46395bc0..fafd795e79 100644 --- a/docs/source/class/cf.DimensionCoordinate.rst +++ b/docs/source/class/cf.DimensionCoordinate.rst @@ -508,9 +508,9 @@ NetCDF ~cf.DimensionCoordinate.nc_get_variable ~cf.DimensionCoordinate.nc_has_variable ~cf.DimensionCoordinate.nc_set_variable - ~cf.DimensionCoordinate.nc_clear_hdf5_chunksizes - ~cf.DimensionCoordinate.nc_hdf5_chunksizes - ~cf.DimensionCoordinate.nc_set_hdf5_chunksizes + ~cf.DimensionCoordinate.nc_clear_dataset_chunksizes + ~cf.DimensionCoordinate.nc_dataset_chunksizes + ~cf.DimensionCoordinate.nc_set_dataset_chunksizes Groups ^^^^^^ diff --git a/docs/source/class/cf.DomainAncillary.rst b/docs/source/class/cf.DomainAncillary.rst index 118b2fe4e1..d290694a4a 100644 --- a/docs/source/class/cf.DomainAncillary.rst +++ b/docs/source/class/cf.DomainAncillary.rst @@ -463,9 +463,9 @@ NetCDF ~cf.DomainAncillary.nc_get_variable ~cf.DomainAncillary.nc_has_variable ~cf.DomainAncillary.nc_set_variable - ~cf.DomainAncillary.nc_clear_hdf5_chunksizes - ~cf.DomainAncillary.nc_hdf5_chunksizes - ~cf.DomainAncillary.nc_set_hdf5_chunksizes + ~cf.DomainAncillary.nc_clear_dataset_chunksizes + ~cf.DomainAncillary.nc_dataset_chunksizes + ~cf.DomainAncillary.nc_set_dataset_chunksizes Aggregation ----------- diff --git a/docs/source/class/cf.DomainTopology.rst b/docs/source/class/cf.DomainTopology.rst index a9b7d7bd32..7468b7f0df 100644 --- a/docs/source/class/cf.DomainTopology.rst +++ b/docs/source/class/cf.DomainTopology.rst @@ -432,9 +432,9 @@ NetCDF ~cf.DomainTopology.nc_get_variable ~cf.DomainTopology.nc_has_variable ~cf.DomainTopology.nc_set_variable - ~cf.DomainTopology.nc_clear_hdf5_chunksizes - ~cf.DomainTopology.nc_hdf5_chunksizes - ~cf.DomainTopology.nc_set_hdf5_chunksizes + ~cf.DomainTopology.nc_clear_dataset_chunksizes + ~cf.DomainTopology.nc_dataset_chunksizes + ~cf.DomainTopology.nc_set_dataset_chunksizes Aggregation ----------- @@ -478,8 +478,8 @@ Groups ~cf.DomainTopology.nc_clear_variable_groups ~cf.DomainTopology.nc_set_variable_groups -HDF5 chunks -^^^^^^^^^^^ +Dataset chunks +^^^^^^^^^^^^^^ .. rubric:: Methods @@ -488,9 +488,9 @@ HDF5 chunks :toctree: ../method/ :template: method.rst - ~cf.DomainTopology.nc_hdf5_chunksizes - ~cf.DomainTopology.nc_set_hdf5_chunksizes - ~cf.DomainTopology.nc_clear_hdf5_chunksizes + ~cf.DomainTopology.nc_dataset_chunksizes + ~cf.DomainTopology.nc_set_dataset_chunksizes + ~cf.DomainTopology.nc_clear_dataset_chunksizes Aliases ------- diff --git a/docs/source/class/cf.Field.rst b/docs/source/class/cf.Field.rst index ecf849dabf..de91a5c2c3 100644 --- a/docs/source/class/cf.Field.rst +++ b/docs/source/class/cf.Field.rst @@ -417,9 +417,9 @@ NetCDF ~cf.Field.nc_set_global_attribute ~cf.Field.nc_set_global_attributes ~cf.Field.ncdimensions - ~cf.Field.nc_clear_hdf5_chunksizes - ~cf.Field.nc_hdf5_chunksizes - ~cf.Field.nc_set_hdf5_chunksizes + ~cf.Field.nc_clear_dataset_chunksizes + ~cf.Field.nc_dataset_chunksizes + ~cf.Field.nc_set_dataset_chunksizes Groups ^^^^^^ diff --git a/docs/source/class/cf.FieldAncillary.rst b/docs/source/class/cf.FieldAncillary.rst index e83f3c2eae..f1ca9ffd62 100644 --- a/docs/source/class/cf.FieldAncillary.rst +++ b/docs/source/class/cf.FieldAncillary.rst @@ -407,9 +407,9 @@ NetCDF ~cf.FieldAncillary.nc_get_variable ~cf.FieldAncillary.nc_has_variable ~cf.FieldAncillary.nc_set_variable - ~cf.FieldAncillary.nc_clear_hdf5_chunksizes - ~cf.FieldAncillary.nc_hdf5_chunksizes - ~cf.FieldAncillary.nc_set_hdf5_chunksizes + ~cf.FieldAncillary.nc_clear_dataset_chunksizes + ~cf.FieldAncillary.nc_dataset_chunksizes + ~cf.FieldAncillary.nc_set_dataset_chunksizes Aggregation ----------- diff --git a/docs/source/class/cf.Index.rst b/docs/source/class/cf.Index.rst index 339fc61099..b74eef381f 100644 --- a/docs/source/class/cf.Index.rst +++ b/docs/source/class/cf.Index.rst @@ -412,9 +412,9 @@ NetCDF ~cf.Index.nc_get_sample_dimension ~cf.Index.nc_has_sample_dimension ~cf.Index.nc_set_sample_dimension - ~cf.Index.nc_clear_hdf5_chunksizes - ~cf.Index.nc_hdf5_chunksizes - ~cf.Index.nc_set_hdf5_chunksizes + ~cf.Index.nc_clear_dataset_chunksizes + ~cf.Index.nc_dataset_chunksizes + ~cf.Index.nc_set_dataset_chunksizes Aggregation ----------- diff --git a/docs/source/class/cf.List.rst b/docs/source/class/cf.List.rst index 52cd8e4193..744214d0f7 100644 --- a/docs/source/class/cf.List.rst +++ b/docs/source/class/cf.List.rst @@ -404,9 +404,9 @@ NetCDF ~cf.List.nc_get_variable ~cf.List.nc_has_variable ~cf.List.nc_set_variable - ~cf.List.nc_clear_hdf5_chunksizes - ~cf.List.nc_hdf5_chunksizes - ~cf.List.nc_set_hdf5_chunksizes + ~cf.List.nc_clear_dataset_chunksizes + ~cf.List.nc_dataset_chunksizes + ~cf.List.nc_set_dataset_chunksizes Aggregation ----------- diff --git a/docs/source/introduction.rst b/docs/source/introduction.rst index b55d242488..1e9c66ed0d 100644 --- a/docs/source/introduction.rst +++ b/docs/source/introduction.rst @@ -70,6 +70,8 @@ manipulation and can: * read files from OPeNDAP servers and S3 object stores, +* be fully flexible with respect to dataset chunking, + * create new field constructs in memory, * write and append field constructs to netCDF datasets on disk, diff --git a/docs/source/tutorial.rst b/docs/source/tutorial.rst index 53ba7de55d..bbe6da510a 100644 --- a/docs/source/tutorial.rst +++ b/docs/source/tutorial.rst @@ -130,6 +130,10 @@ The following file types can be read: with or without the data array values. .. + +* Datasets in `Zarr v2 (xarray) `_ and `Zarr v3 `_ formats. + +.. * `CFA-netCDF `_ diff --git a/setup.py b/setup.py index 498a6f8eef..706d89af63 100755 --- a/setup.py +++ b/setup.py @@ -177,7 +177,9 @@ def compile(): The ``cf`` package can: -* read field constructs from netCDF, CDL, PP and UM datasets, +* read field constructs from netCDF, CDL, Zarr, PP and UM datasets, + +* be fully flexible with respect to dataset storage chunking, * create new field constructs in memory, From 0a4eb7e0357d6bd6d66a57e3135eb2139ac3a0ee Mon Sep 17 00:00:00 2001 From: David Hassell Date: Tue, 20 May 2025 14:04:29 +0100 Subject: [PATCH 05/12] dev --- cf/data/array/zarrarray.py | 5 +++-- cf/read_write/read.py | 25 +++++++++++++------------ cf/read_write/um/umread.py | 14 +++++++------- 3 files changed, 23 insertions(+), 21 deletions(-) diff --git a/cf/data/array/zarrarray.py b/cf/data/array/zarrarray.py index acbb878c6b..07cbb764b3 100644 --- a/cf/data/array/zarrarray.py +++ b/cf/data/array/zarrarray.py @@ -1,11 +1,12 @@ import cfdm from ...mixin_container import Container -#from .mixin import ActiveStorageMixin + +# from .mixin import ActiveStorageMixin class ZarrArray( -# ActiveStorageMixin, + # ActiveStorageMixin, Container, cfdm.ZarrArray, ): diff --git a/cf/read_write/read.py b/cf/read_write/read.py index 3daf4ca442..17f9df70f4 100644 --- a/cf/read_write/read.py +++ b/cf/read_write/read.py @@ -556,7 +556,8 @@ def _finalise(self): UM = "UM" in self.unique_dataset_categories # ---------------------------------------------------------------- - # Select matching fields from non-UM files (before aggregation) + # Select matching constructs from non-UM files (before + # aggregation) # ---------------------------------------------------------------- select = self.select if select and not UM: @@ -585,8 +586,8 @@ def _finalise(self): del f._custom["standard_name"] # ---------------------------------------------------------------- - # Select matching fields from UM files (post setting of their - # standard names) + # Select matching constructs from UM files (after setting + # their standard names) # ---------------------------------------------------------------- if select and UM: self.constructs = self.constructs.select_by_identity(*select) @@ -679,7 +680,7 @@ def _read(self, dataset): # ------------------------------------------------------------ super()._read(dataset) - if self.dataset_contents: + if self.dataset_contents is not None: return # ------------------------------------------------------------ @@ -694,13 +695,13 @@ def _read(self, dataset): um_kwargs = { key: kwargs[key] for key in ( - "verbose", "height_at_top_of_model", "squeeze", "unsqueeze", "domain", - "file_type", + "dataset_type", "unpack", + "verbose", ) } um_kwargs["set_standard_name"] = False @@ -725,14 +726,14 @@ def _read(self, dataset): # Successfully read the dataset self.unique_dataset_categories.add("UM") - if self.dataset_contents: + if self.dataset_contents is not None: return # ------------------------------------------------------------ # Try to read as a GRIB dataset - # ------------------------------------------------------------ - # Not yet available! The framework will be: # + # Not yet available! The framework will be: + # ------------------------------------------------------------ # if dataset_type is None or dataset_type.intersection( # self.GRIB_dataset_types # ): @@ -743,13 +744,13 @@ def _read(self, dataset): # # } # - # self.um_read = partial( + # self.grib_read = partial( # GRIBRead(self.implementation).read, **grib_kwargs # ) # # try: # # Try to read the dataset - # self.dataset_contents = self.grid_read(dataset) + # self.dataset_contents = self.grib_read(dataset) # except DatasetTypeError as error: # if dataset_type is None: # self.dataset_format_errors.append(error) @@ -757,5 +758,5 @@ def _read(self, dataset): # # Successfully read the dataset # self.unique_dataset_categories.add("GRIB") # - # if self.dataset_contents: + # if self.dataset_contents is not None: # return diff --git a/cf/read_write/um/umread.py b/cf/read_write/um/umread.py index 52ee560d5d..3289ea88b4 100644 --- a/cf/read_write/um/umread.py +++ b/cf/read_write/um/umread.py @@ -3407,7 +3407,7 @@ def read( squeeze=False, unsqueeze=False, domain=False, - file_type=None, + dataset_type=None, ignore_unknown_type=False, unpack=True, ): @@ -3555,14 +3555,14 @@ def read( byte_ordering = None # ------------------------------------------------------------ - # Parse the 'file_type' keyword parameter + # Parse the 'dataset_type' keyword parameter # ------------------------------------------------------------ - if file_type is not None: - if isinstance(file_type, str): - file_type = (file_type,) + if dataset_type is not None: + if isinstance(dataset_type, str): + dataset_type = (dataset_type,) - file_type = set(file_type) - if not file_type.intersection(("UM",)): + dataset_type = set(dataset_type) + if not dataset_type.intersection(("UM",)): # Return now if there are valid file types return [] From 73fc17dd08937ea5571878d88f498e462b133616 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Wed, 21 May 2025 17:46:24 +0100 Subject: [PATCH 06/12] dev --- cf/data/data.py | 2 +- cf/read_write/read.py | 28 ++++++++++++++++------------ 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/cf/data/data.py b/cf/data/data.py index f425518415..7fcd08024f 100644 --- a/cf/data/data.py +++ b/cf/data/data.py @@ -6259,7 +6259,7 @@ def reshape(self, *shape, merge_chunks=True, limit=None, inplace=False): # Clear cyclic axes, as we can't help but lose them in this # operation - d._cyclic = _empty_set + del d._cyclic return d diff --git a/cf/read_write/read.py b/cf/read_write/read.py index 17f9df70f4..14c3b3efb3 100644 --- a/cf/read_write/read.py +++ b/cf/read_write/read.py @@ -163,10 +163,10 @@ class read(cfdm.read): ============ ============================================ file type Description ============ ============================================ - ``'netCDF'`` Binary netCDF-3 or netCDF-4 file - ``'CDL'`` Text CDL representations of a netCDF dataset - ``'Zarr'`` A Zarr v2 (xarray) or Zarr v3 hierarchy - ``'UM'`` UM fields file or PP file + ``'netCDF'`` A netCDF-3 or netCDF-4 dataset + ``'CDL'`` A text CDL file of a netCDF dataset + ``'Zarr'`` A Zarr v2 (xarray) or Zarr v3 dataset + ``'UM'`` A UM fields file or PP dataset ============ ============================================ .. versionadded:: NEXTVERSION @@ -566,14 +566,15 @@ def _finalise(self): # ---------------------------------------------------------------- # Aggregate the output fields/domains # ---------------------------------------------------------------- - constructs = self.constructs - if self.aggregate and len(constructs) > 1: + if self.aggregate and len(self.constructs) > 1: aggregate_options = self.aggregate_options # Set defaults specific to UM fields if UM and "strict_units" not in aggregate_options: aggregate_options["relaxed_units"] = True - self.constructs = cf_aggregate(constructs, **aggregate_options) + self.constructs = cf_aggregate( + self.constructs, **aggregate_options + ) # ---------------------------------------------------------------- # Add standard names to UM/PP fields (after aggregation) @@ -611,7 +612,7 @@ def _initialise(self): # Initialise the list of output constructs if self.field: self.constructs = FieldList() - else: + elif self.domain: self.constructs = DomainList() # Recognised UM dataset formats @@ -681,6 +682,7 @@ def _read(self, dataset): super()._read(dataset) if self.dataset_contents is not None: + # Successfully read the dataset return # ------------------------------------------------------------ @@ -727,22 +729,23 @@ def _read(self, dataset): self.unique_dataset_categories.add("UM") if self.dataset_contents is not None: + # Successfully read the dataset return # ------------------------------------------------------------ # Try to read as a GRIB dataset # - # Not yet available! The framework will be: + # Not yet available! When (if) the time comes, the framework + # will be: # ------------------------------------------------------------ + # # if dataset_type is None or dataset_type.intersection( # self.GRIB_dataset_types # ): # if not hasattr(self, "grib_read"): # # Initialise the GRIB read function # kwargs = self.kwargs - # grib_kwargs = { - # - # } + # grib_kwargs = ... # # # self.grib_read = partial( # GRIBRead(self.implementation).read, **grib_kwargs @@ -759,4 +762,5 @@ def _read(self, dataset): # self.unique_dataset_categories.add("GRIB") # # if self.dataset_contents is not None: + # # Successfully read the dataset # return From 6849ef8874a867d1e53b264ae20f547fba541d19 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Thu, 22 May 2025 16:03:39 +0100 Subject: [PATCH 07/12] dev --- Changelog.rst | 11 +++++++++++ cf/data/array/zarrarray.py | 4 +++- cf/data/mixin/deprecations.py | 6 +++--- cf/field.py | 9 +++++---- cf/read_write/read.py | 24 +++++------------------- cf/test/test_read_write.py | 1 - docs/source/tutorial.rst | 6 +++++- 7 files changed, 32 insertions(+), 29 deletions(-) diff --git a/Changelog.rst b/Changelog.rst index 20102713b6..de79ec130b 100644 --- a/Changelog.rst +++ b/Changelog.rst @@ -1,3 +1,14 @@ +Version NEXTVERSION +---------------- + +**2025-??-??** + +* Read Zarr datasets with `cf.read` + (https://github.com/NCAS-CMS/cf-python/issues/863) +* Changed dependency: ``cfdm>=1.12.2.0, <1.12.3.0`` + +---- + version 3.17.0 -------------- diff --git a/cf/data/array/zarrarray.py b/cf/data/array/zarrarray.py index 07cbb764b3..2d3d8c784f 100644 --- a/cf/data/array/zarrarray.py +++ b/cf/data/array/zarrarray.py @@ -2,11 +2,13 @@ from ...mixin_container import Container +# Uncomment when we can use active storage on Zarr datasets: # from .mixin import ActiveStorageMixin class ZarrArray( - # ActiveStorageMixin, + # Uncomment when we can use active storage on Zarr datasets: + # ActiveStorageMixin, Container, cfdm.ZarrArray, ): diff --git a/cf/data/mixin/deprecations.py b/cf/data/mixin/deprecations.py index c8f2124524..f729ef2764 100644 --- a/cf/data/mixin/deprecations.py +++ b/cf/data/mixin/deprecations.py @@ -437,14 +437,14 @@ def dumps(self): def HDF_chunks(self, *chunks): """Get or set HDF chunk sizes. - The HDF chunk sizes may be used by external code that allows - `Data` objects to be written to netCDF files. - Deprecated at version 3.14.0 and is no longer available. Use the methods `nc_clear_dataset_chunksizes`, `nc_dataset_chunksizes`, and `nc_set_dataset_chunksizes` instead. + The HDF chunk sizes may be used by external code that allows + `Data` objects to be written to netCDF files. + .. seealso:: `nc_clear_dataset_chunksizes`, `nc_dataset_chunksizes`, `nc_set_dataset_chunksizes` diff --git a/cf/field.py b/cf/field.py index 55305eb817..c9eb0c19bd 100644 --- a/cf/field.py +++ b/cf/field.py @@ -13991,11 +13991,12 @@ def field( ) # pragma: no cover def HDF_chunks(self, *chunksizes): - """Deprecated at version 3.0.0. + """Get or set HDF chunk sizes. - Use methods 'Data.nc_dataset_chunksizes', - 'Data.nc_set_dataset_chunksizes', - 'Data.nc_clear_dataset_chunksizes' instead. + Deprecated at version 3.0.0 and is no longer available. Use + methods `Data.nc_dataset_chunksizes`, + `Data.nc_set_dataset_chunksizes`, + `Data.nc_clear_dataset_chunksizes` instead. """ _DEPRECATION_ERROR_METHOD( diff --git a/cf/read_write/read.py b/cf/read_write/read.py index 14c3b3efb3..6f7ada1912 100644 --- a/cf/read_write/read.py +++ b/cf/read_write/read.py @@ -146,6 +146,10 @@ class read(cfdm.read): {{read datasets: (arbitrarily nested sequence of) `str`}} + {{read recursive: `bool`, optional}} + + {{read followlinks: `bool`, optional}} + {{read cdl_string: `bool`, optional}} {{read external: (sequence of) `str`, optional}} @@ -165,7 +169,7 @@ class read(cfdm.read): ============ ============================================ ``'netCDF'`` A netCDF-3 or netCDF-4 dataset ``'CDL'`` A text CDL file of a netCDF dataset - ``'Zarr'`` A Zarr v2 (xarray) or Zarr v3 dataset + ``'Zarr'`` A Zarr v2 (xarray-style) or Zarr v3 dataset ``'UM'`` A UM fields file or PP dataset ============ ============================================ @@ -276,24 +280,6 @@ class read(cfdm.read): select='air_temperature')`` is equivalent to ``fl = cf.read(file).select_by_identity('air_temperature')``. - recursive: `bool`, optional - If True then recursively read sub-directories of any - directories specified with the *files* parameter. - - followlinks: `bool`, optional - If True, and *recursive* is True, then also search for - files in sub-directories which resolve to symbolic - links. By default directories which resolve to symbolic - links are ignored. Ignored of *recursive* is False. Files - which are symbolic links are always followed. - - Note that setting ``recursive=True, followlinks=True`` can - lead to infinite recursion if a symbolic link points to a - parent directory of itself. - - This parameter replaces the deprecated *follow_symlinks* - parameter. - {{read warn_valid: `bool`, optional}} .. versionadded:: 3.4.0 diff --git a/cf/test/test_read_write.py b/cf/test/test_read_write.py index e013010fe6..82e3e919e8 100644 --- a/cf/test/test_read_write.py +++ b/cf/test/test_read_write.py @@ -798,7 +798,6 @@ def test_read_write_domain(self): e = cf.read(tmpfile, domain=True, verbose=1) self.assertEqual(len(e), 1) - print(type(e)) self.assertIsInstance(e, cf.DomainList) e = e[0] self.assertIsInstance(e, cf.Domain) diff --git a/docs/source/tutorial.rst b/docs/source/tutorial.rst index bbe6da510a..c741aed4e1 100644 --- a/docs/source/tutorial.rst +++ b/docs/source/tutorial.rst @@ -131,7 +131,11 @@ The following file types can be read: .. -* Datasets in `Zarr v2 (xarray) `_ and `Zarr v3 `_ formats. +* Datasets in `Zarr v2 (xarray-style) + `_ + and `Zarr v3 + `_ + formats. .. From 0e06ba090eb9acecef67a62d95d49405742feddd Mon Sep 17 00:00:00 2001 From: David Hassell Date: Thu, 22 May 2025 20:06:14 +0100 Subject: [PATCH 08/12] dev --- cf/__init__.py | 1 + cf/read_write/read.py | 30 +++--- docs/source/class.rst | 1 + docs/source/class/cf.ZarrArray.rst | 150 +++++++++++++++++++++++++++++ docs/source/conf.py | 1 + 5 files changed, 168 insertions(+), 15 deletions(-) create mode 100644 docs/source/class/cf.ZarrArray.rst diff --git a/cf/__init__.py b/cf/__init__.py index f6d2073b75..c36dcc9bc1 100644 --- a/cf/__init__.py +++ b/cf/__init__.py @@ -288,6 +288,7 @@ RaggedIndexedContiguousArray, SubsampledArray, UMArray, + ZarrArray, ) from .aggregate import aggregate, climatology_cells diff --git a/cf/read_write/read.py b/cf/read_write/read.py index 6f7ada1912..73cf5ac187 100644 --- a/cf/read_write/read.py +++ b/cf/read_write/read.py @@ -152,28 +152,28 @@ class read(cfdm.read): {{read cdl_string: `bool`, optional}} - {{read external: (sequence of) `str`, optional}} + {{read dataset_type: `None` or (sequence of) `str`, optional}} - {{read extra: (sequence of) `str`, optional}} + Valid file types are: - {{read verbose: `int` or `str` or `None`, optional}} + ============== ========================================== + *dataset_type* Description + ============== ========================================== + ``'netCDF'`` A netCDF-3 or netCDF-4 dataset + ``'CDL'`` A text CDL file of a netCDF dataset + ``'Zarr'`` A Zarr v2 (xarray) or Zarr v3 dataset + ``'UM'`` A UM fields file or PP dataset + ============== ========================================== - {{read warnings: `bool`, optional}} + .. versionadded:: NEXTVERSION - {{read dataset_type: (sequence of) `str`, optional}} + {{read external: (sequence of) `str`, optional}} - Valid file types are: + {{read extra: (sequence of) `str`, optional}} - ============ ============================================ - file type Description - ============ ============================================ - ``'netCDF'`` A netCDF-3 or netCDF-4 dataset - ``'CDL'`` A text CDL file of a netCDF dataset - ``'Zarr'`` A Zarr v2 (xarray-style) or Zarr v3 dataset - ``'UM'`` A UM fields file or PP dataset - ============ ============================================ + {{read verbose: `int` or `str` or `None`, optional}} - .. versionadded:: NEXTVERSION + {{read warnings: `bool`, optional}} um: `dict`, optional For Met Office (UK) PP files and Met Office (UK) fields diff --git a/docs/source/class.rst b/docs/source/class.rst index 70b9730095..d7941ac067 100644 --- a/docs/source/class.rst +++ b/docs/source/class.rst @@ -83,6 +83,7 @@ Data classes cf.NetCDF4Array cf.FullArray cf.UMArray + cf.ZarrArray Data compression classes ------------------------ diff --git a/docs/source/class/cf.ZarrArray.rst b/docs/source/class/cf.ZarrArray.rst new file mode 100644 index 0000000000..7740d90d7e --- /dev/null +++ b/docs/source/class/cf.ZarrArray.rst @@ -0,0 +1,150 @@ +.. currentmodule:: cf +.. default-role:: obj + +cf.ZarrArray +============ + +---- + +.. autoclass:: cf.ZarrArray + :no-members: + :no-inherited-members: + +Inspection +---------- + +.. rubric:: Methods + +.. autosummary:: + :nosignatures: + :toctree: ../method/ + :template: method.rst + + ~cf.ZarrArray.get_compression_type + ~cf.ZarrArray.get_subspace + ~cf.ZarrArray.get_attributes + ~cf.ZarrArray.index + ~cf.ZarrArray.is_subspace + +.. rubric:: Attributes + +.. autosummary:: + :nosignatures: + :toctree: ../attribute/ + :template: attribute.rst + + ~cf.ZarrArray.array + ~cf.ZarrArray.astype + ~cf.ZarrArray.dtype + ~cf.ZarrArray.ndim + ~cf.ZarrArray.shape + ~cf.ZarrArray.size + ~cf.ZarrArray.original_shape + ~cf.ZarrArray.reference_shape + +Units +----- + +.. rubric:: Methods + +.. autosummary:: + :nosignatures: + :toctree: ../method/ + :template: method.rst + + ~cf.ZarrArray.get_calendar + ~cf.ZarrArray.get_units + ~cf.ZarrArray.Units + + + +File +---- + +.. rubric:: Methods + +.. autosummary:: + :nosignatures: + :toctree: ../method/ + :template: method.rst + + ~cf.ZarrArray.get_address + ~cf.ZarrArray.get_addresses + ~cf.ZarrArray.close + ~cf.ZarrArray.open + ~cf.ZarrArray.get_filename + ~cf.ZarrArray.get_filenames + ~cf.ZarrArray.get_format + ~cf.ZarrArray.get_formats + ~cf.ZarrArray.get_groups + ~cf.ZarrArray.get_mask + ~cf.ZarrArray.get_unpack + ~cf.ZarrArray.get_storage_options + ~cf.ZarrArray.add_file_location + ~cf.ZarrArray.del_file_location + ~cf.ZarrArray.file_locations + ~cf.ZarrArray.file_directory + ~cf.ZarrArray.replace_directory + ~cf.ZarrArray.replace_filename + ~cf.ZarrArray._lock + + +Miscellaneous +------------- + +.. autosummary:: + :nosignatures: + :toctree: ../method/ + :template: method.rst + + ~cf.ZarrArray.copy + ~cf.ZarrArray.to_memory + +Active storage +-------------- + +.. autosummary:: + :nosignatures: + :toctree: ../method/ + :template: method.rst + + ~cf.ZarrArray.active_storage + +Special +------- + +.. autosummary:: + :nosignatures: + :toctree: ../method/ + :template: method.rst + + ~cf.ZarrArray.__dask_tokenize__ + ~cf.ZarrArray.__getitem__ + +Docstring substitutions +----------------------- + +.. rubric:: Methods + +.. autosummary:: + :nosignatures: + :toctree: ../method/ + :template: method.rst + + ~cf.ZarrArray._docstring_special_substitutions + ~cf.ZarrArray._docstring_substitutions + ~cf.ZarrArray._docstring_package_depth + ~cf.ZarrArray._docstring_method_exclusions + +Deprecated +---------- + +.. rubric:: Methods + +.. autosummary:: + :nosignatures: + :toctree: ../method/ + :template: method.rst + + ~cf.ZarrArray.filename + ~cf.ZarrArray.get_missing_values diff --git a/docs/source/conf.py b/docs/source/conf.py index eaaf3da986..aa32bf1717 100755 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -157,6 +157,7 @@ def _get_date(): "matplotlib": ("https://matplotlib.org/stable/", None), # REVIEW: h5: new intersphinx mapping "h5netcdf": ("https://h5netcdf.org", None), + "zarr": ("https://zarr.readthedocs.io", None), } # This extension is meant to help with the common pattern of having From 370de445672d28fd967b1233b697ace0a2776954 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Fri, 23 May 2025 09:27:17 +0100 Subject: [PATCH 09/12] Deprecated methods --- docs/source/class/cf.AuxiliaryCoordinate.rst | 3 +++ docs/source/class/cf.Bounds.rst | 3 +++ docs/source/class/cf.CellConnectivity.rst | 4 ++++ docs/source/class/cf.CellMeasure.rst | 3 +++ docs/source/class/cf.Count.rst | 3 +++ docs/source/class/cf.Data.rst | 5 ++++- docs/source/class/cf.DimensionCoordinate.rst | 3 +++ docs/source/class/cf.DomainAncillary.rst | 3 +++ docs/source/class/cf.DomainTopology.rst | 3 +++ docs/source/class/cf.Field.rst | 3 +++ docs/source/class/cf.FieldAncillary.rst | 3 +++ docs/source/class/cf.Index.rst | 3 +++ docs/source/class/cf.List.rst | 3 +++ 13 files changed, 41 insertions(+), 1 deletion(-) diff --git a/docs/source/class/cf.AuxiliaryCoordinate.rst b/docs/source/class/cf.AuxiliaryCoordinate.rst index 18ff389890..e5af934219 100644 --- a/docs/source/class/cf.AuxiliaryCoordinate.rst +++ b/docs/source/class/cf.AuxiliaryCoordinate.rst @@ -731,6 +731,9 @@ Deprecated ~cf.AuxiliaryCoordinate.remove_data ~cf.AuxiliaryCoordinate.select ~cf.AuxiliaryCoordinate.setprop + ~cf.AuxiliaryCoordinate.nc_clear_hdf5_chunksizes + ~cf.AuxiliaryCoordinate.nc_hdf5_chunksizes + ~cf.AuxiliaryCoordinate.nc_set_hdf5_chunksizes .. rubric:: Attributes diff --git a/docs/source/class/cf.Bounds.rst b/docs/source/class/cf.Bounds.rst index 1b85f7eeb8..98d8caec77 100644 --- a/docs/source/class/cf.Bounds.rst +++ b/docs/source/class/cf.Bounds.rst @@ -651,3 +651,6 @@ Deprecated ~cf.Bounds.select ~cf.Bounds.setprop ~cf.Bounds.unsafe_array + ~cf.Bounds.nc_clear_hdf5_chunksizes + ~cf.Bounds.nc_hdf5_chunksizes + ~cf.Bounds.nc_set_hdf5_chunksizes diff --git a/docs/source/class/cf.CellConnectivity.rst b/docs/source/class/cf.CellConnectivity.rst index f4c7b120e0..0723d8f9af 100644 --- a/docs/source/class/cf.CellConnectivity.rst +++ b/docs/source/class/cf.CellConnectivity.rst @@ -583,3 +583,7 @@ Deprecated ~cf.CellConnectivity.select ~cf.CellConnectivity.setprop ~cf.CellConnectivity.unsafe_array + ~cf.CellConnectivity.nc_clear_hdf5_chunksizes + ~cf.CellConnectivity.nc_hdf5_chunksizes + ~cf.CellConnectivity.nc_set_hdf5_chunksizes + diff --git a/docs/source/class/cf.CellMeasure.rst b/docs/source/class/cf.CellMeasure.rst index 66c1c93e99..bdb3ef502e 100644 --- a/docs/source/class/cf.CellMeasure.rst +++ b/docs/source/class/cf.CellMeasure.rst @@ -676,3 +676,6 @@ Deprecated ~cf.CellMeasure.select ~cf.CellMeasure.setprop ~cf.CellMeasure.unsafe_array + ~cf.CellMeasure.nc_clear_hdf5_chunksizes + ~cf.CellMeasure.nc_hdf5_chunksizes + ~cf.CellMeasure.nc_set_hdf5_chunksizes diff --git a/docs/source/class/cf.Count.rst b/docs/source/class/cf.Count.rst index 5ab4dbea0b..011938c558 100644 --- a/docs/source/class/cf.Count.rst +++ b/docs/source/class/cf.Count.rst @@ -660,3 +660,6 @@ Deprecated ~cf.Count.select ~cf.Count.setprop ~cf.Count.unsafe_array + ~cf.Count.nc_clear_hdf5_chunksizes + ~cf.Count.nc_hdf5_chunksizes + ~cf.Count.nc_set_hdf5_chunksizes diff --git a/docs/source/class/cf.Data.rst b/docs/source/class/cf.Data.rst index 295bdb833c..488ea75df8 100644 --- a/docs/source/class/cf.Data.rst +++ b/docs/source/class/cf.Data.rst @@ -896,7 +896,10 @@ Deprecated ~cf.Data.to_disk ~cf.Data.to_memory ~cf.Data.unsafe_array - + ~cf.Data.nc_clear_hdf5_chunksizes + ~cf.Data.nc_hdf5_chunksizes + ~cf.Data.nc_set_hdf5_chunksizes + .. rubric:: Attributes .. autosummary:: diff --git a/docs/source/class/cf.DimensionCoordinate.rst b/docs/source/class/cf.DimensionCoordinate.rst index fafd795e79..d3e4944747 100644 --- a/docs/source/class/cf.DimensionCoordinate.rst +++ b/docs/source/class/cf.DimensionCoordinate.rst @@ -744,6 +744,9 @@ Deprecated ~cf.DimensionCoordinate.remove_data ~cf.DimensionCoordinate.select ~cf.DimensionCoordinate.setprop + ~cf.DimensionCoordinate.nc_clear_hdf5_chunksizes + ~cf.DimensionCoordinate.nc_hdf5_chunksizes + ~cf.DimensionCoordinate.nc_set_hdf5_chunksizes .. rubric:: Attributes diff --git a/docs/source/class/cf.DomainAncillary.rst b/docs/source/class/cf.DomainAncillary.rst index d290694a4a..13f4f78f6f 100644 --- a/docs/source/class/cf.DomainAncillary.rst +++ b/docs/source/class/cf.DomainAncillary.rst @@ -706,3 +706,6 @@ Deprecated ~cf.DomainAncillary.select ~cf.DomainAncillary.setprop ~cf.DomainAncillary.unsafe_array + ~cf.DomainAncillary.nc_clear_hdf5_chunksizes + ~cf.DomainAncillary.nc_hdf5_chunksizes + ~cf.DomainAncillary.nc_set_hdf5_chunksizes diff --git a/docs/source/class/cf.DomainTopology.rst b/docs/source/class/cf.DomainTopology.rst index 7468b7f0df..9e7e84e22b 100644 --- a/docs/source/class/cf.DomainTopology.rst +++ b/docs/source/class/cf.DomainTopology.rst @@ -584,3 +584,6 @@ Deprecated ~cf.DomainTopology.select ~cf.DomainTopology.setprop ~cf.DomainTopology.unsafe_array + ~cf.DomainTopology.nc_clear_hdf5_chunksizes + ~cf.DomainTopology.nc_hdf5_chunksizes + ~cf.DomainTopology.nc_set_hdf5_chunksizes diff --git a/docs/source/class/cf.Field.rst b/docs/source/class/cf.Field.rst index de91a5c2c3..72f9780687 100644 --- a/docs/source/class/cf.Field.rst +++ b/docs/source/class/cf.Field.rst @@ -1036,6 +1036,9 @@ Deprecated ~cf.Field.setprop ~cf.Field.transpose_item ~cf.Field.unlimited + ~cf.Field.nc_clear_hdf5_chunksizes + ~cf.Field.nc_hdf5_chunksizes + ~cf.Field.nc_set_hdf5_chunksizes .. rubric:: Attributes diff --git a/docs/source/class/cf.FieldAncillary.rst b/docs/source/class/cf.FieldAncillary.rst index f1ca9ffd62..679574045a 100644 --- a/docs/source/class/cf.FieldAncillary.rst +++ b/docs/source/class/cf.FieldAncillary.rst @@ -648,3 +648,6 @@ Deprecated ~cf.FieldAncillary.select ~cf.FieldAncillary.setprop ~cf.FieldAncillary.unsafe_array + ~cf.FieldAncillary.nc_clear_hdf5_chunksizes + ~cf.FieldAncillary.nc_hdf5_chunksizes + ~cf.FieldAncillary.nc_set_hdf5_chunksizes diff --git a/docs/source/class/cf.Index.rst b/docs/source/class/cf.Index.rst index b74eef381f..4a9a431127 100644 --- a/docs/source/class/cf.Index.rst +++ b/docs/source/class/cf.Index.rst @@ -661,3 +661,6 @@ Deprecated ~cf.Index.select ~cf.Index.setprop ~cf.Index.unsafe_array + ~cf.Index.nc_clear_hdf5_chunksizes + ~cf.Index.nc_hdf5_chunksizes + ~cf.Index.nc_set_hdf5_chunksizes diff --git a/docs/source/class/cf.List.rst b/docs/source/class/cf.List.rst index 744214d0f7..4f343ecc4c 100644 --- a/docs/source/class/cf.List.rst +++ b/docs/source/class/cf.List.rst @@ -647,3 +647,6 @@ Deprecated ~cf.List.select ~cf.List.setprop ~cf.List.unsafe_array + ~cf.List.nc_clear_hdf5_chunksizes + ~cf.List.nc_hdf5_chunksizes + ~cf.List.nc_set_hdf5_chunksizes From 28d9077493d077186810e2e14871d5383e99fa09 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Fri, 23 May 2025 11:03:43 +0100 Subject: [PATCH 10/12] dev --- cf/read_write/read.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/cf/read_write/read.py b/cf/read_write/read.py index 73cf5ac187..835baf1a89 100644 --- a/cf/read_write/read.py +++ b/cf/read_write/read.py @@ -539,23 +539,27 @@ def _finalise(self): `None` """ - UM = "UM" in self.unique_dataset_categories + # Whether or not there were only netCDF datasets + only_netCDF = self.unique_dataset_categories == set(("netCDF",)) + + # Whether or not there were any UM datasets + some_UM = "UM" in self.unique_dataset_categories # ---------------------------------------------------------------- - # Select matching constructs from non-UM files (before + # Select matching constructs from netCDF datasets (before # aggregation) # ---------------------------------------------------------------- select = self.select - if select and not UM: + if select and only_netCDF: self.constructs = self.constructs.select_by_identity(*select) # ---------------------------------------------------------------- - # Aggregate the output fields/domains + # Aggregate the output fields or domains # ---------------------------------------------------------------- if self.aggregate and len(self.constructs) > 1: aggregate_options = self.aggregate_options # Set defaults specific to UM fields - if UM and "strict_units" not in aggregate_options: + if some_UM and "strict_units" not in aggregate_options: aggregate_options["relaxed_units"] = True self.constructs = cf_aggregate( @@ -563,9 +567,9 @@ def _finalise(self): ) # ---------------------------------------------------------------- - # Add standard names to UM/PP fields (after aggregation) + # Add standard names to non-netCDF fields (after aggregation) # ---------------------------------------------------------------- - if UM: + if not only_netCDF: for f in self.constructs: standard_name = f._custom.get("standard_name", None) if standard_name is not None: @@ -573,10 +577,10 @@ def _finalise(self): del f._custom["standard_name"] # ---------------------------------------------------------------- - # Select matching constructs from UM files (after setting - # their standard names) + # Select matching constructs from non-netCDF files (after + # setting their standard names) # ---------------------------------------------------------------- - if select and UM: + if select and not only_netCDF: self.constructs = self.constructs.select_by_identity(*select) super()._finalise() @@ -721,7 +725,7 @@ def _read(self, dataset): # ------------------------------------------------------------ # Try to read as a GRIB dataset # - # Not yet available! When (if) the time comes, the framework + # Not yet availabl. When (if!) the time comes, the framework # will be: # ------------------------------------------------------------ # From cc44618b3aea29c975ae8dddb3b2224909821616 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Tue, 27 May 2025 13:45:26 +0100 Subject: [PATCH 11/12] dev --- cf/functions.py | 138 +++++++++++++------------------------- cf/test/test_functions.py | 11 +-- 2 files changed, 53 insertions(+), 96 deletions(-) diff --git a/cf/functions.py b/cf/functions.py index 2018a5d9ff..33c400b8c0 100644 --- a/cf/functions.py +++ b/cf/functions.py @@ -1,12 +1,9 @@ import atexit import csv -import ctypes.util -import importlib import logging import os import platform import re -import sys import warnings from collections.abc import Iterable from itertools import product @@ -34,7 +31,7 @@ from .docstring import _docstring_substitution_definitions -# Instruction to close /proc/mem at exit. +# Instruction to close /proc/meminfo at exit. def _close_proc_meminfo(): try: _meminfo_file.close() @@ -3134,34 +3131,6 @@ def _section(x, axes=None, stop=None, chunks=False, min_step=1): return out -def _get_module_info(module, alternative_name=False, try_except=False): - """Helper function for processing modules for cf.environment.""" - if try_except: - module_name = None - try: - importlib.import_module(module) - module_name = module - except ImportError: - if ( - alternative_name - ): # where a module has a different (e.g. old) name - try: - importlib.import_module(alternative_name) - module_name = alternative_name - except ImportError: - pass - - if not module_name: - return ("not available", "") - else: - module_name = module - - return ( - importlib.import_module(module_name).__version__, - importlib.util.find_spec(module_name).origin, - ) - - def environment(display=True, paths=True): """Return the names and versions of the cf package and its dependencies. @@ -3188,97 +3157,84 @@ def environment(display=True, paths=True): >>> cf.environment() Platform: Linux-6.8.0-60-generic-x86_64-with-glibc2.39 + Python: 3.12.8 /home/miniconda3/bin/python + packaging: 24.1 /home/miniconda3/lib/python3.12/site-packages/packaging/__init__.py + numpy: 2.2.6 /home/miniconda3/lib/python3.12/site-packages/numpy/__init__.py + cfdm.core: 1.12.2.0 /home/miniconda3/lib/python3.12/site-packages/cfdm/cfdm/core/__init__.py + packaging: 24.1 /home/miniconda3/lib/python3.12/site-packages/packaging/__init__.py + udunits2 library: libudunits2.so.0 HDF5 library: 1.14.2 netcdf library: 4.9.4-development - udunits2 library: libudunits2.so.0 - esmpy/ESMF: 8.7.0 /home/miniconda/lib/python3.12/site-packages/esmpy/__init__.py - Python: 3.12.8 /home/miniconda/bin/python - dask: 2025.5.1 /home/miniconda/lib/python3.12/site-packages/dask/__init__.py - netCDF4: 1.7.2 /home/miniconda/lib/python3.12/site-packages/netCDF4/__init__.py - h5netcdf: 1.3.0 /home/miniconda/lib/python3.12/site-packages/h5netcdf/__init__.py - h5py: 3.12.1 /home/miniconda/lib/python3.12/site-packages/h5py/__init__.py - s3fs: 2024.12.0 /home/miniconda/lib/python3.12/site-packages/s3fs/__init__.py - psutil: 6.1.1 /home/miniconda/lib/python3.12/site-packages/psutil/__init__.py - packaging: 24.2 /home/miniconda/lib/python3.12/site-packages/packaging/__init__.py - numpy: 2.2.2 /home/miniconda/lib/python3.12/site-packages/numpy/__init__.py - scipy: 1.15.2 /home/miniconda/lib/python3.12/site-packages/scipy/__init__.py - matplotlib: 3.10.0 /home/miniconda/lib/python3.12/site-packages/matplotlib/__init__.py - cftime: 1.6.4.post1 /home/miniconda/lib/python3.12/site-packages/cftime/__init__.py - cfunits: 3.3.7 /home/miniconda/lib/python3.12/site-packages/cfunits/__init__.py - cfplot: 3.3.0 /home/miniconda/lib/python3.12/site-packages/cfplot/__init__.py - cfdm: 1.12.2.0 /home/miniconda/lib/python3.12/site-packages/cfdm/__init__.py - cf: NEXTVERSION /home/miniconda/lib/python3.12/site-packages/cf/__init__.py + netCDF4: 1.7.2 /home/miniconda3/lib/python3.12/site-packages/netCDF4/__init__.py + h5netcdf: 1.3.0 /home/miniconda3/lib/python3.12/site-packages/h5netcdf/__init__.py + h5py: 3.12.1 /home/miniconda3/lib/python3.12/site-packages/h5py/__init__.py + zarr: 3.0.8 /home/miniconda3/lib/python3.12/site-packages/zarr/__init__.py + s3fs: 2024.12.0 /home/miniconda3/lib/python3.12/site-packages/s3fs/__init__.py + scipy: 1.15.1 /home/miniconda3/lib/python3.12/site-packages/scipy/__init__.py + dask: 2025.5.1 /home/miniconda3/lib/python3.12/site-packages/dask/__init__.py + cftime: 1.6.4.post1 /home/miniconda3/lib/python3.12/site-packages/cftime/__init__.py + cfunits: 3.3.7 /home/miniconda3/lib/python3.12/site-packages/cfunits/__init__.py + cfdm: 1.12.2.0 /home/miniconda3/lib/python3.12/site-packages/cfdm/__init__.py + esmpy/ESMF: 8.7.0 /home/miniconda3/lib/python3.12/site-packages/esmpy/__init__.py + psutil: 6.1.1 /home/miniconda3/lib/python3.12/site-packages/psutil/__init__.py + matplotlib: 3.10.0 /home/miniconda3/lib/python3.12/site-packages/matplotlib/__init__.py + cfplot: 3.4.0 /home/miniconda3/lib/python3.12/site-packages/cfplot/__init__.py + cf: NEXTVERSION /home/miniconda3/lib/python3.12/site-packages/cf/__init__.py >>> cf.environment(paths=False) Platform: Linux-6.8.0-60-generic-x86_64-with-glibc2.39 + Python: 3.12.8 + packaging: 24.1 + numpy: 2.2.6 + cfdm.core: 1.12.2.0 + packaging: 24.1 + udunits2 library: libudunits2.so.0 HDF5 library: 1.14.2 netcdf library: 4.9.4-development - udunits2 library: libudunits2.so.0 - esmpy/ESMF: 8.7.0 - Python: 3.12.8 - dask: 2025.5.1 netCDF4: 1.7.2 h5netcdf: 1.3.0 h5py: 3.12.1 + zarr: 3.0.8 s3fs: 2024.12.0 - psutil: 6.1.1 - packaging: 24.2 - numpy: 2.2.2 - scipy: 1.15.2 - matplotlib: 3.10.0 + scipy: 1.15.1 + dask: 2025.5.1 cftime: 1.6.4.post1 cfunits: 3.3.7 - cfplot: 3.3.0 cfdm: 1.12.2.0 + esmpy/ESMF: 8.7.0 + psutil: 6.1.1 + matplotlib: 3.10.0 + cfplot: 3.4.0 cf: NEXTVERSION """ + # Get cfdm env + out = cfdm.environment(display=False, paths=paths) + + _get_module_info = cfdm.functions._get_module_info dependency_version_paths_mapping = { - # Platform first, then use an ordering to group libraries as follows... - "Platform": (platform.platform(), ""), - # Underlying C and Fortran based libraries first - "HDF5 library": (netCDF4.__hdf5libversion__, ""), - "netcdf library": (netCDF4.__netcdf4libversion__, ""), - "udunits2 library": (ctypes.util.find_library("udunits2"), ""), "esmpy/ESMF": ( _get_module_info("esmpy", alternative_name="ESMF", try_except=True) ), - # Now Python itself - "Python": (platform.python_version(), sys.executable), - # Then Dask (cover first from below as it's important under-the-hood) - "dask": _get_module_info("dask"), - # Then Python libraries not related to CF - "netCDF4": _get_module_info("netCDF4"), - "h5netcdf": _get_module_info("h5netcdf"), - "h5py": _get_module_info("h5py"), - "s3fs": _get_module_info("s3fs"), "psutil": _get_module_info("psutil"), - "packaging": _get_module_info("packaging"), - "numpy": _get_module_info("numpy"), - "scipy": _get_module_info("scipy"), "matplotlib": _get_module_info("matplotlib", try_except=True), - # Finally the CF related Python libraries, with the cf version last - # as it is the most relevant (cfdm penultimate for similar reason) - "cftime": _get_module_info("cftime"), - "cfunits": _get_module_info("cfunits"), "cfplot": _get_module_info("cfplot", try_except=True), - "cfdm": _get_module_info("cfdm"), "cf": (__version__, _os_path_abspath(__file__)), } string = "{0}: {1!s}" if paths: - # Include path information, else exclude, when unpacking tuple + # Include path information, else exclude, when unpacking tuple. string += " {2!s}" - out = [ - string.format(dep, *info) - for dep, info in dependency_version_paths_mapping.items() - ] - - out = "\n".join(out) + out.extend( + [ + string.format(dep, *info) + for dep, info in dependency_version_paths_mapping.items() + ] + ) if display: - print(out) # pragma: no cover + print("\n".join(out)) # pragma: no cover else: return out diff --git a/cf/test/test_functions.py b/cf/test/test_functions.py index fa7dc24458..7ddf71d0e0 100644 --- a/cf/test/test_functions.py +++ b/cf/test/test_functions.py @@ -286,13 +286,14 @@ def test_Configuration(self): def test_environment(self): e = cf.environment(display=False) ep = cf.environment(display=False, paths=False) - self.assertIsInstance(e, str) - self.assertIsInstance(ep, str) - components = ["Platform: ", "udunits2 library: ", "numpy: ", "cfdm: "] + self.assertIsInstance(e, list) + self.assertIsInstance(ep, list) + + components = ["Platform: ", "netCDF4: ", "numpy: ", "cftime: "] for component in components: - self.assertIn(component, e) - self.assertIn(component, ep) + self.assertTrue(any(s.startswith(component) for s in e)) + self.assertTrue(any(s.startswith(component) for s in ep)) for component in [ f"cf: {cf.__version__} {os.path.abspath(cf.__file__)}", f"Python: {platform.python_version()} {sys.executable}", From 6ee1a0ae4599afcdb490f2f27527d563909346d3 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 2 Jun 2025 09:34:58 +0100 Subject: [PATCH 12/12] tidy --- cf/functions.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/cf/functions.py b/cf/functions.py index 33c400b8c0..2f05115048 100644 --- a/cf/functions.py +++ b/cf/functions.py @@ -3158,10 +3158,9 @@ def environment(display=True, paths=True): >>> cf.environment() Platform: Linux-6.8.0-60-generic-x86_64-with-glibc2.39 Python: 3.12.8 /home/miniconda3/bin/python - packaging: 24.1 /home/miniconda3/lib/python3.12/site-packages/packaging/__init__.py + packaging: 24.2 /home/miniconda3/lib/python3.12/site-packages/packaging/__init__.py numpy: 2.2.6 /home/miniconda3/lib/python3.12/site-packages/numpy/__init__.py cfdm.core: 1.12.2.0 /home/miniconda3/lib/python3.12/site-packages/cfdm/cfdm/core/__init__.py - packaging: 24.1 /home/miniconda3/lib/python3.12/site-packages/packaging/__init__.py udunits2 library: libudunits2.so.0 HDF5 library: 1.14.2 netcdf library: 4.9.4-development @@ -3184,10 +3183,9 @@ def environment(display=True, paths=True): >>> cf.environment(paths=False) Platform: Linux-6.8.0-60-generic-x86_64-with-glibc2.39 Python: 3.12.8 - packaging: 24.1 + packaging: 24.2 numpy: 2.2.6 cfdm.core: 1.12.2.0 - packaging: 24.1 udunits2 library: libudunits2.so.0 HDF5 library: 1.14.2 netcdf library: 4.9.4-development