diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 65a9d0caf..0c255051e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,5 @@ default_language_version: python: python3.10 - repos: - repo: https://github.com/pycqa/isort rev: 7.0.0 @@ -16,4 +15,4 @@ repos: - repo: https://github.com/pycqa/flake8 rev: 7.3.0 # Use the latest version of flake8 hooks: - - id: flake8 \ No newline at end of file + - id: flake8 diff --git a/polytope_feature/datacube/datacube_axis.py b/polytope_feature/datacube/datacube_axis.py index e6a5c4935..49cdb055f 100644 --- a/polytope_feature/datacube/datacube_axis.py +++ b/polytope_feature/datacube/datacube_axis.py @@ -101,39 +101,54 @@ def _remap_val_to_axis_range(self, value): value = transformation._remap_val_to_axis_range(value, self) return value - def find_standard_indices_between(self, indexes, low, up, datacube, method=None): - indexes_between_ranges = [] - - if self.name in datacube.complete_axes and self.name not in datacube.transformed_axes: - # Find the range of indexes between lower and upper - # https://pandas.pydata.org/docs/reference/api/pandas.Index.searchsorted.html - # Assumes the indexes are already sorted (could sort to be sure) and monotonically increasing - if method == "surrounding" or method == "nearest": - start = indexes.searchsorted(low, "left") - end = indexes.searchsorted(up, "right") - start = max(start - 1, 0) - end = min(end + 1, len(indexes)) - indexes_between = indexes[start:end].to_list() - indexes_between_ranges.extend(indexes_between) - else: - start = indexes.searchsorted(low, "left") - end = indexes.searchsorted(up, "right") - indexes_between = indexes[start:end].to_list() - indexes_between_ranges.extend(indexes_between) + def _mixed_key(self, x): + + if isinstance(x, pd.Timedelta): + return (0, x.total_seconds()) + elif isinstance(x, str): + return (1, x) else: - if method == "surrounding" or method == "nearest": - start = bisect.bisect_left(indexes, low) - end = bisect.bisect_right(indexes, up) + return (2, x) + + def _is_mixed(self, indexes): + types = {type(x) for x in indexes} + return len(types) > 1 + + def find_standard_indices_between(self, indexes, low, up, datacube, method=None): + + indexes_list = list(indexes) + + # If homogeneous → use fast path + if not self._is_mixed(indexes_list): + + if method in ("surrounding", "nearest"): + start = bisect.bisect_left(indexes_list, low) + end = bisect.bisect_right(indexes_list, up) start = max(start - 1, 0) - end = min(end + 1, len(indexes)) - indexes_between = indexes[start:end] - indexes_between_ranges.extend(indexes_between) + end = min(end + 1, len(indexes_list)) else: - lower_idx = bisect.bisect_left(indexes, low) - upper_idx = bisect.bisect_right(indexes, up) - indexes_between = indexes[lower_idx:upper_idx] - indexes_between_ranges.extend(indexes_between) - return indexes_between_ranges + start = bisect.bisect_left(indexes_list, low) + end = bisect.bisect_right(indexes_list, up) + + return indexes_list[start:end] + + # Mixed types → fallback (robust) + low_k = self._mixed_key(low) + up_k = self._mixed_key(up) + + filtered = [x for x in indexes_list if low_k <= self._mixed_key(x) <= up_k] + + if method in ("surrounding", "nearest") and filtered: + # add neighbors manually + first_idx = indexes_list.index(filtered[0]) + last_idx = indexes_list.index(filtered[-1]) + + start = max(first_idx - 1, 0) + end = min(last_idx + 2, len(indexes_list)) + + return indexes_list[start:end] + + return filtered def find_indices_between(self, indexes_ranges, low, up, datacube, method=None): indexes_between_ranges = self.find_standard_indices_between(indexes_ranges, low, up, datacube, method) @@ -275,13 +290,20 @@ def __init__(self): def parse(self, value: Any) -> Any: if isinstance(value, np.str_): value = str(value) + if isinstance(value, str) and "-" in value: + return value return pd.Timedelta(value) def to_float(self, value: pd.Timedelta): if isinstance(value, np.timedelta64): return value.astype("timedelta64[s]").astype(int) else: - return float(value.value / 10**9) + if isinstance(value, str): + if "-" in value: + return value + return float(value) + else: + return float(value.value / 10**9) def from_float(self, value): return pd.Timedelta(int(value), unit="s") diff --git a/polytope_feature/datacube/switching_grid_helper.py b/polytope_feature/datacube/switching_grid_helper.py deleted file mode 100644 index 3abc909b1..000000000 --- a/polytope_feature/datacube/switching_grid_helper.py +++ /dev/null @@ -1,210 +0,0 @@ -import json -import math -import os -import tempfile - -import eccodes - -# from polytope_feature.options import MapperConfig - - -def get_first_grib_message(req): - import pyfdb - - fdb = pyfdb.FDB() - - # Make sure that we are accessing a single georef so that the grid is consistent - assert "georef" in req.keys() - - first_field = next(fdb.list(req, keys=True))["keys"] - - field = fdb.retrieve(first_field) - - # Normalize the retrieve() result into a plain `bytes` object - if hasattr(field, "read"): - # file-like object: read the contents - data = field.read() - else: - data = field - - # Convert common buffer types to bytes - if isinstance(data, bytes): - msg_bytes = data - elif isinstance(data, bytearray): - msg_bytes = bytes(data) - elif isinstance(data, memoryview): - msg_bytes = data.tobytes() - else: - # last resort: try to construct bytes (may raise) - try: - msg_bytes = bytes(data) - except Exception as e: - raise TypeError(f"Unsupported GRIB message type: {type(data)!r}") from e - - gid = eccodes.codes_new_from_message(msg_bytes) - return gid - - -def get_gridspec_lamebert_conformal(gid): - # Lambert lam grid - - to_rad = math.pi / 180 - - md5hash = eccodes.codes_get(gid, "md5GridSection") - - earth_round = (eccodes.codes_get(gid, "shapeOfTheEarth") == 0) or (eccodes.codes_get(gid, "shapeOfTheEarth") == 6) - - if earth_round: - if eccodes.codes_get(gid, "shapeOfTheEarth") == 6: - radius = 6371229 - elif eccodes.codes_get(gid, "shapeOfTheEarth") == 0: - radius = 6367470 - else: - # TODO: set the earth major and minor axis accordingly - pass - - nv = eccodes.codes_get(gid, "NV") - nx = eccodes.codes_get(gid, "Nx") - ny = eccodes.codes_get(gid, "Ny") - LoVInDegrees = eccodes.codes_get(gid, "LoV") / 1000000 - Dx = eccodes.codes_get(gid, "Dx") - Dy = eccodes.codes_get(gid, "Dy") - latFirstInRadians = eccodes.codes_get(gid, "latitudeOfFirstGridPoint") / 1000000 * to_rad - lonFirstInRadians = eccodes.codes_get(gid, "longitudeOfFirstGridPoint") / 1000000 * to_rad - LoVInRadians = eccodes.codes_get(gid, "LoV") / 1000000 * to_rad - Latin1InRadians = eccodes.codes_get(gid, "Latin1") / 1000000 * to_rad - Latin2InRadians = eccodes.codes_get(gid, "Latin2") / 1000000 * to_rad - LaDInRadians = eccodes.codes_get(gid, "LaD") / 1000000 * to_rad - - gridspec = { - "type": "lambert_conformal", - "earth_round": earth_round, - "radius": radius, - "nv": nv, - "nx": nx, - "ny": ny, - "LoVInDegrees": LoVInDegrees, - "Dx": Dx, - "Dy": Dy, - "latFirstInRadians": latFirstInRadians, - "lonFirstInRadians": lonFirstInRadians, - "LoVInRadians": LoVInRadians, - "Latin1InRadians": Latin1InRadians, - "Latin2InRadians": Latin2InRadians, - "LaDInRadians": LaDInRadians, - } - return (gridspec, md5hash) - - -def get_gridspec_icon(gid): - # ICON - # TODO: Need the following: - # uuid: Optional[str] = None - md5hash = eccodes.codes_get(gid, "md5GridSection") - gridspec = {} - return (gridspec, md5hash) - - -def get_gridspec_and_hash(gid): - grid_type = eccodes.codes_get(gid, "gridType") - if grid_type == "lambert_lam": - return get_gridspec_lamebert_conformal(gid) - elif grid_type == "icon": - return get_gridspec_icon(gid) - else: - raise ValueError(f"Unsupported grid type: {grid_type}") - - -# TODO: extract the right info and then write it to file, one for the grid hash and one for the actual config - - -def lookup_grid_config(req): - gid = get_first_grib_message(req) - req_georef = req["georef"] - - # Cache file stored alongside this module - GRID_CACHE_FILE = os.path.join(os.path.dirname(__file__), "grid_cache.json") - - def _load_cache(): - try: - with open(GRID_CACHE_FILE, "r", encoding="utf-8") as fh: - return json.load(fh) - except FileNotFoundError: - return {} - except Exception: - return {} - - def _save_cache(cache): - dirpath = os.path.dirname(GRID_CACHE_FILE) - os.makedirs(dirpath, exist_ok=True) - fd, tmp = tempfile.mkstemp(dir=dirpath, prefix=".grid_cache.") - try: - with os.fdopen(fd, "w", encoding="utf-8") as fh: - json.dump(cache, fh, indent=2, sort_keys=True) - os.replace(tmp, GRID_CACHE_FILE) - finally: - if os.path.exists(tmp): - try: - os.remove(tmp) - except Exception: - pass - - # Use a stable serialization of the georef as the cache key - try: - cache_key = json.dumps(req_georef, sort_keys=True, default=str) - except Exception: - cache_key = str(req_georef) - - cache = _load_cache() - - try: - if cache_key in cache: - entry = cache[cache_key] - return (entry.get("gridspec"), entry.get("md5hash")) - - gridspec, md5hash = get_gridspec_and_hash(gid) - cache[cache_key] = {"gridspec": gridspec, "md5hash": md5hash} - try: - _save_cache(cache) - except Exception: - # Swallow cache write errors but continue to return computed value - pass - return (gridspec, md5hash) - finally: - eccodes.codes_release(gid) - - -# def gridspec_to_grid_config(gridspec, md5hash): -# if gridspec.get("type") == "lambert_conformal": -# mc = MapperConfig( -# name="mapper", -# type="lambert_conformal", -# md5_hash=md5hash, -# is_spherical=gridspec.get("earth_round"), -# radius=gridspec.get("radius"), -# nv=gridspec.get("nv"), -# nx=gridspec.get("nx"), -# ny=gridspec.get("ny"), -# LoVInDegrees=gridspec.get("LoVInDegrees"), -# Dx=gridspec.get("Dx"), -# Dy=gridspec.get("Dy"), -# latFirstInRadians=gridspec.get("latFirstInRadians"), -# lonFirstInRadians=gridspec.get("lonFirstInRadians"), -# LoVInRadians=gridspec.get("LoVInRadians"), -# Latin1InRadians=gridspec.get("Latin1InRadians"), -# Latin2InRadians=gridspec.get("Latin2InRadians"), -# LaDInRadians=gridspec.get("LaDInRadians"), -# ) -# return mc -# return None - -# def replace_grid_config_in_options(options, req): -# gridspec, md5hash = lookup_grid_config(req) -# grid_config = gridspec_to_grid_config(gridspec, md5hash) -# if grid_config is not None: -# for axis_conf in options.axis_config: -# for idx, transformation in enumerate(axis_conf.transformations): -# if getattr(transformation, "name", None) == "mapper": -# axis_conf.transformations[idx] = grid_config -# return True -# return False diff --git a/polytope_feature/datacube/transformations/datacube_type_change/datacube_type_change.py b/polytope_feature/datacube/transformations/datacube_type_change/datacube_type_change.py index 186facb3b..c623b13da 100644 --- a/polytope_feature/datacube/transformations/datacube_type_change/datacube_type_change.py +++ b/polytope_feature/datacube/transformations/datacube_type_change/datacube_type_change.py @@ -30,7 +30,7 @@ def change_val_type(self, axis_name, values): return_idx = [self._final_transformation.transform_type(val) for val in values] if None in return_idx: return None - return_idx.sort() + return_idx.sort(key=lambda x: (isinstance(x, str), x)) return return_idx def make_str(self, value): @@ -188,7 +188,11 @@ def transform_type(self, value): if isinstance(value, str) and value.isdigit(): return pd.Timedelta(hours=int(value)) - if isinstance(value, str): + elif isinstance(value, str) and "-" in value: + # Step range is not parsed here + return value + + elif isinstance(value, str): # Extract days, hours, minutes and seconds using regex h_match = re.search(r"(\d+)\s*h", value) m_match = re.search(r"(\d+)\s*m(?:in)?", value) @@ -197,7 +201,6 @@ def transform_type(self, value): hours = int(h_match.group(1)) if h_match else 0 minutes = int(m_match.group(1)) if m_match else 0 seconds = int(s_match.group(1)) if s_match else 0 - return pd.Timedelta(hours=hours, minutes=minutes, seconds=seconds) raise ValueError(f"Unsupported timestep format: {value}") @@ -205,6 +208,9 @@ def transform_type(self, value): def make_str(self, value): return_vals = [] for val in value: + if isinstance(val, str) and "-" in val: + return_vals.append(val) + continue total_seconds = int(val.total_seconds()) hours, rem = divmod(total_seconds, 3600) minutes, seconds = divmod(rem, 60) @@ -236,7 +242,8 @@ def make_str(self, value): return_vals.append(f"{hours}h{minutes}m") else: return_vals.append(f"{hours}h{minutes}m{seconds}s") - return return_vals + unique_list = list(dict.fromkeys(return_vals)) + return unique_list _type_to_datacube_type_change_lookup = { diff --git a/polytope_feature/engine/hullslicer.py b/polytope_feature/engine/hullslicer.py index 63fb7cc7c..f3f8f69bb 100644 --- a/polytope_feature/engine/hullslicer.py +++ b/polytope_feature/engine/hullslicer.py @@ -48,9 +48,13 @@ def _build_unsliceable_child(self, polytope, ax, node, datacube, lowers, next_no raise ValueError(errmsg) def find_values_between(self, polytope, ax, node, datacube, lower, upper): - tol = ax.tol - lower = ax.from_float(lower - tol) - upper = ax.from_float(upper + tol) + if isinstance(lower, str) and isinstance(upper, str): + pass + else: + tol = ax.tol + # print("WHAT IS LOW AND UP HERE ", lower, upper) + lower = ax.from_float(lower - tol) + upper = ax.from_float(upper + tol) flattened = node.flatten() method = polytope.method diff --git a/polytope_feature/options.py b/polytope_feature/options.py index 6117aa318..10791867e 100644 --- a/polytope_feature/options.py +++ b/polytope_feature/options.py @@ -1,5 +1,4 @@ import argparse -import logging from abc import ABC from typing import Dict, List, Literal, Optional, Tuple, Union @@ -85,7 +84,6 @@ class Config(ConfigModel): alternative_axes: Optional[List[GribJumpAxesConfig]] = [] use_catalogue: Optional[bool] = False engine_options: Optional[Dict[str, str]] = {} - dynamic_grid: Optional[bool] = False class PolytopeOptions(ABC): @@ -98,24 +96,10 @@ def get_polytope_options(options): axis_config = config_options.axis_config compressed_axes_config = config_options.compressed_axes_config pre_path = config_options.pre_path - dynamic_grid = config_options.dynamic_grid alternative_axes = config_options.alternative_axes use_catalogue = config_options.use_catalogue engine_options = config_options.engine_options - if dynamic_grid: - # TODO: look at the pre-path and query the eccodes function to get the new grid option - # TODO: then change the grid option inside of the axis_config - try: - replaced = replace_grid_config_in_options(config_options, pre_path) - if replaced: - axis_config = config_options.axis_config - except Exception as e: - logging.warning( - "Dynamic grid replacement failed for georef '%s': %s. Using static grid config.", - pre_path.get("georef", "unknown"), - e, - ) return ( axis_config, compressed_axes_config, @@ -150,17 +134,3 @@ def gridspec_to_grid_config(gridspec, md5hash): ) return mc return None - - -def replace_grid_config_in_options(options, req): - from polytope_feature.datacube.switching_grid_helper import lookup_grid_config - - gridspec, md5hash = lookup_grid_config(req) - grid_config = gridspec_to_grid_config(gridspec, md5hash) - if grid_config is not None: - for axis_conf in options.axis_config: - for idx, transformation in enumerate(axis_conf.transformations): - if getattr(transformation, "name", None) == "mapper": - axis_conf.transformations[idx] = grid_config - return True - return False diff --git a/polytope_feature/polytope.py b/polytope_feature/polytope.py index 7f45a776e..02fbc739c 100644 --- a/polytope_feature/polytope.py +++ b/polytope_feature/polytope.py @@ -174,6 +174,8 @@ def slice(self, datacube, polytopes: List[ConvexPolytope]): return request def find_engine(self, ax): + if ax.name not in self.engine_options: + raise ValueError(f"No engine specified for axis {ax.name}") slicer_type = self.engine_options[ax.name] return self.engines[slicer_type] diff --git a/pyproject.toml b/pyproject.toml index bd21e4156..3f375d353 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,7 +46,6 @@ dependencies = [ tests = [ "pytest", "pytest-cov", - "cffi", "eccodes", "h5netcdf", "h5py", @@ -63,11 +62,6 @@ catalogue = [ "qubed" ] -switching_grids = [ - "eccodes", - "pyfdb<=0.1.3" -] - [project.urls] repository = "https://github.com/ecmwf/polytope" documentation = "https://polytope.readthedocs.io/en/latest/"