diff --git a/pyproject.toml b/pyproject.toml index 57b4edb..ca7cc22 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,7 +28,7 @@ Tracker = "https://github.com/polusai/microjson/issues" ipykernel = "^6.27.1" [tool.poetry.dependencies] -python = ">=3.9.15,<3.14" +python = ">=3.11,<3.14" pydantic = "^2.3.0" geojson-pydantic = "^1.2.0" geojson2vt = "^1.0.1" diff --git a/src/microjson/automodel.py b/src/microjson/_legacy/automodel.py similarity index 100% rename from src/microjson/automodel.py rename to src/microjson/_legacy/automodel.py diff --git a/src/microjson/roundtrip.py b/src/microjson/_legacy/roundtrip.py similarity index 100% rename from src/microjson/roundtrip.py rename to src/microjson/_legacy/roundtrip.py diff --git a/src/microjson/microjson2vt/convert.py b/src/microjson/microjson2vt/convert.py index ed37038..1e7d281 100644 --- a/src/microjson/microjson2vt/convert.py +++ b/src/microjson/microjson2vt/convert.py @@ -6,7 +6,6 @@ import math from abc import ABC, abstractmethod -from .simplify import simplify from .feature import Slice, create_feature # converts Microjson feature into an intermediate projected JSON vector format @@ -17,12 +16,14 @@ def convert(data, options): """ wrapper around AbstractProjector.convert """ - projector = options.get('projector') - bounds = options.get('bounds') + projector = options.get("projector") + bounds = options.get("bounds") if projector is None: - projector = CartesianProjector( - options.get('bounds')) if bounds is not None else \ - MercatorProjector() + projector = ( + CartesianProjector(options.get("bounds")) + if bounds is not None + else MercatorProjector() + ) return projector.convert(data, options) @@ -33,6 +34,7 @@ class AbstractProjector(ABC): Concrete classes should implement the project_x and project_y methods. """ + def __init__(self, bounds=None): self.bounds = bounds @@ -46,18 +48,15 @@ def project_y(self, y): def convert(self, data, options): features = [] - if data.get('type') == 'FeatureCollection': - for i in range(len(data.get('features'))): - self.convert_feature( - features, - data.get('features')[i], - options, i) + if data.get("type") == "FeatureCollection": + for i in range(len(data.get("features"))): + self.convert_feature(features, data.get("features")[i], options, i) # check that geometry is not empty - if len(features[-1].get('geometry')) == 0: + if len(features[-1].get("geometry")) == 0: # remove feature with index i features.pop() - elif data.get('type') == 'Feature': + elif data.get("type") == "Feature": self.convert_feature(features, data, options) else: # single geometry or a geometry collection @@ -65,69 +64,76 @@ def convert(self, data, options): return features def convert_feature(self, features, geojson, options, index=None): - if geojson.get('geometry', None) is None: + if geojson.get("geometry", None) is None: return - coords = geojson.get('geometry').get('coordinates') + coords = geojson.get("geometry").get("coordinates") if coords is not None and len(coords) == 0: return - type_ = geojson.get('geometry').get('type') - tolerance = math.pow(options.get( - 'tolerance') / ((1 << options.get('maxZoom')) * options.get( - 'extent')), 2) + type_ = geojson.get("geometry").get("type") + tolerance = math.pow( + options.get("tolerance") + / ((1 << options.get("maxZoom")) * options.get("extent")), + 2, + ) geometry = Slice([]) - id_ = geojson.get('id') - if options.get('promoteId', None) is not None and geojson.get( - 'properties', None) is not None and 'promoteId' in geojson.get( - 'properties'): - id_ = geojson['properties'][options.get('promoteId')] - elif options.get('generateId', False): + id_ = geojson.get("id") + if ( + options.get("promoteId", None) is not None + and geojson.get("properties", None) is not None + and "promoteId" in geojson.get("properties") + ): + id_ = geojson["properties"][options.get("promoteId")] + elif options.get("generateId", False): id_ = index if index is not None else 0 - if type_ == 'Point': + if type_ == "Point": self.convert_point(coords, geometry) - elif type_ == 'MultiPoint': + elif type_ == "MultiPoint": for p in coords: self.convert_point(p, geometry) - elif type_ == 'LineString': + elif type_ == "LineString": self.convert_line(coords, geometry, tolerance, False) - elif type_ == 'MultiLineString': - if options.get('lineMetrics'): + elif type_ == "MultiLineString": + if options.get("lineMetrics"): # explode into linestrings to be able to track metrics for line in coords: geometry = Slice([]) self.convert_line(line, geometry, tolerance, False) features.append( create_feature( - id_, - 'LineString', - geometry, - geojson.get('properties'))) + id_, "LineString", geometry, geojson.get("properties") + ) + ) return else: self.convert_lines(coords, geometry, tolerance, False) - elif type_ == 'Polygon': + elif type_ == "Polygon": self.convert_lines(coords, geometry, tolerance, True) - elif type_ == 'MultiPolygon': + elif type_ == "MultiPolygon": for polygon in coords: newPolygon = [] self.convert_lines(polygon, newPolygon, tolerance, True) geometry.append(newPolygon) - elif type_ == 'GeometryCollection': - for singleGeometry in geojson['geometry']['geometries']: - self.convert_feature(features, { - "id": str(id_), - "geometry": singleGeometry, - "properties": geojson.get('properties') - }, options, index) + elif type_ == "GeometryCollection": + for singleGeometry in geojson["geometry"]["geometries"]: + self.convert_feature( + features, + { + "id": str(id_), + "geometry": singleGeometry, + "properties": geojson.get("properties"), + }, + options, + index, + ) return else: - raise Exception('Input data is not a valid GeoJSON object.') + raise Exception("Input data is not a valid GeoJSON object.") - features.append(create_feature( - id_, type_, geometry, geojson.get('properties'))) + features.append(create_feature(id_, type_, geometry, geojson.get("properties"))) def convert_point(self, coords, out): out.append(self.project_x(coords[0])) @@ -151,7 +157,8 @@ def convert_line(self, ring, out, tolerance, isPolygon): size += (x0 * y - x * y0) / 2 # area else: size += math.sqrt( - math.pow(x - x0, 2) + math.pow(y - y0, 2)) # length + math.pow(x - x0, 2) + math.pow(y - y0, 2) + ) # length x0 = x y0 = y @@ -187,13 +194,13 @@ def project_y(self, y): class MercatorProjector(AbstractProjector): def project_x(self, x): - return x / 360. + 0.5 + return x / 360.0 + 0.5 def project_y(self, y): - sin = math.sin(y * math.pi / 180.) - if sin == 1.: - return 0. - if sin == -1.: - return 1. - y2 = 0.5 - 0.25 * math.log((1. + sin) / (1. - sin)) / math.pi - return 0 if y2 < 0. else (1. if y2 > 1. else y2) + sin = math.sin(y * math.pi / 180.0) + if sin == 1.0: + return 0.0 + if sin == -1.0: + return 1.0 + y2 = 0.5 - 0.25 * math.log((1.0 + sin) / (1.0 - sin)) / math.pi + return 0 if y2 < 0.0 else (1.0 if y2 > 1.0 else y2) diff --git a/src/microjson/microjson2vt/microjson2vt.py b/src/microjson/microjson2vt/microjson2vt.py index 0e9f7cd..d06cd8a 100644 --- a/src/microjson/microjson2vt/microjson2vt.py +++ b/src/microjson/microjson2vt/microjson2vt.py @@ -16,71 +16,76 @@ def default_tolerance_func(z, options): """Calculates the default simplification tolerance based on zoom level.""" # Ensure options exist and have defaults if necessary - tolerance_val = options.get('tolerance', 50) # Use default if not present - extent_val = options.get('extent', 4096) # Use default if not present + tolerance_val = options.get("tolerance", 50) # Use default if not present + extent_val = options.get("extent", 4096) # Use default if not present denominator = (1 << z) * extent_val if denominator == 0: - # Avoid division by zero, return a very small tolerance - # Consider if raising an error might be better depending on context - return 1e-12 + # Avoid division by zero, return a very small tolerance + # Consider if raising an error might be better depending on context + return 1e-12 return (tolerance_val / denominator) ** 2 # --- Alternative Tolerance Functions --- + def linear_tolerance_func(z, options): """Linear scaling: tolerance decreases linearly with map scale.""" - tolerance_val = options.get('tolerance', 50) - extent_val = options.get('extent', 4096) + tolerance_val = options.get("tolerance", 50) + extent_val = options.get("extent", 4096) denominator = (1 << z) * extent_val if denominator == 0: - return 1e-12 # Avoid division by zero + return 1e-12 # Avoid division by zero # Note: No square here compared to default return tolerance_val / denominator + def constant_tolerance_func(z, options): """Constant tolerance relative to extent (same simplification regardless of zoom).""" - tolerance_val = options.get('tolerance', 50) - extent_val = options.get('extent', 4096) + tolerance_val = options.get("tolerance", 50) + extent_val = options.get("extent", 4096) if extent_val == 0: - return 1e-12 # Avoid division by zero + return 1e-12 # Avoid division by zero # Apply the base tolerance scaled by extent, squared like the default, but without zoom factor return (tolerance_val / extent_val) ** 2 # Alternative: return a fixed value if extent scaling is not desired e.g. options.get('tolerance', 50) + def slow_exponential_tolerance_func(z, options, exponent=1.5): """Slower exponential decay (exponent < 2). Tune exponent as needed.""" - tolerance_val = options.get('tolerance', 50) - extent_val = options.get('extent', 4096) + tolerance_val = options.get("tolerance", 50) + extent_val = options.get("extent", 4096) denominator = (1 << z) * extent_val if denominator == 0: return 1e-12 return (tolerance_val / denominator) ** exponent + def logarithmic_tolerance_func(z, options): """Logarithmic scaling: tolerance decreases slowly, especially at high zooms.""" - tolerance_val = options.get('tolerance', 50) - extent_val = options.get('extent', 4096) + tolerance_val = options.get("tolerance", 50) + extent_val = options.get("extent", 4096) # Use log(z + 2) to avoid log(0) or log(1) issues at low zooms log_factor = math.log(z + 2) if extent_val == 0 or log_factor == 0: - return 1e-12 # Avoid division by zero + return 1e-12 # Avoid division by zero # Example scaling - adjust as needed return tolerance_val / (log_factor * extent_val) + def step_tolerance_func(z, options): """Step function: different tolerance levels for different zoom ranges.""" - base_tolerance = options.get('tolerance', 50) - extent = options.get('extent', 4096) - index_max_zoom = options.get('indexMaxZoom', 5) - max_zoom = options.get('maxZoom', 8) + base_tolerance = options.get("tolerance", 50) + extent = options.get("extent", 4096) + index_max_zoom = options.get("indexMaxZoom", 5) + max_zoom = options.get("maxZoom", 8) # Define zoom thresholds and corresponding multipliers - if z < index_max_zoom - 1: # Low zooms (e.g., < 4 if indexMaxZoom is 5) + if z < index_max_zoom - 1: # Low zooms (e.g., < 4 if indexMaxZoom is 5) effective_tolerance = base_tolerance * 4 - elif z < max_zoom - 1: # Mid zooms (e.g., 4-6 if maxZoom is 8) + elif z < max_zoom - 1: # Mid zooms (e.g., 4-6 if maxZoom is 8) effective_tolerance = base_tolerance * 1.5 - else: # High zooms (e.g., >= 7 if maxZoom is 8) + else: # High zooms (e.g., >= 7 if maxZoom is 8) effective_tolerance = base_tolerance * 0.5 # Apply scaling based on extent and zoom, similar to default @@ -104,20 +109,21 @@ def step_tolerance_func(z, options): "step": step_tolerance_func, } + def get_default_options(): return { - "maxZoom": 8, # max zoom to preserve detail on - "indexMaxZoom": 5, # max zoom in the tile index + "maxZoom": 8, # max zoom to preserve detail on + "indexMaxZoom": 5, # max zoom in the tile index "indexMaxPoints": 100000, # max number of points per tile in the index - "tolerance": 50, # simplification tolerance (higher - simpler) - "extent": 4096, # tile extent - "buffer": 64, # tile buffer on each side - "lineMetrics": False, # whether to calculate line metrics - "promoteId": None, # name of a feature property to be promoted - "generateId": False, # whether to generate feature ids. - "projector": None, # which projection to use - "bounds": None, # [west, south, east, north] - "tolerance_function": default_tolerance_func # function to calculate tolerance per zoom + "tolerance": 50, # simplification tolerance (higher - simpler) + "extent": 4096, # tile extent + "buffer": 64, # tile buffer on each side + "lineMetrics": False, # whether to calculate line metrics + "promoteId": None, # name of a feature property to be promoted + "generateId": False, # whether to generate feature ids. + "projector": None, # which projection to use + "bounds": None, # [west, south, east, north] + "tolerance_function": default_tolerance_func, # function to calculate tolerance per zoom } @@ -126,6 +132,7 @@ class MicroJsonVt: MicroJsonVt class, which is the main class for generating vector tiles from MicroJSON data """ + def __init__(self, data, options, log_level=logging.INFO): """ Constructor for MicroJsonVt class @@ -136,14 +143,19 @@ def __init__(self, data, options, log_level=logging.INFO): log_level (int): The logging level to be used """ logging.basicConfig( - level=log_level, format='%(asctime)s %(levelname)s %(message)s') - options = self.options = extend(get_default_options(), options) + level=log_level, format="%(asctime)s %(levelname)s %(message)s" + ) + defaults = get_default_options() + defaults.update(options) + options = self.options = defaults # Validate and resolve tolerance_function - tolerance_setting = options.get('tolerance_function') + tolerance_setting = options.get("tolerance_function") if isinstance(tolerance_setting, str): if tolerance_setting in AVAILABLE_TOLERANCE_FUNCTIONS: - options['tolerance_function'] = AVAILABLE_TOLERANCE_FUNCTIONS[tolerance_setting] + options["tolerance_function"] = AVAILABLE_TOLERANCE_FUNCTIONS[ + tolerance_setting + ] else: raise ValueError( f"Invalid tolerance function key: '{tolerance_setting}'. " @@ -155,15 +167,14 @@ def __init__(self, data, options, log_level=logging.INFO): ) # If it's already callable, we use it directly. - logging.debug('preprocess data start') + logging.debug("preprocess data start") - if options.get('maxZoom') < 0 or options.get('maxZoom') > 24: - raise Exception('maxZoom should be in the 0-24 range') - if options.get( - 'promoteId', None) is not None and options.get( - 'generateId', False): - raise Exception( - 'promoteId and generateId cannot be used together.') + if options.get("maxZoom") < 0 or options.get("maxZoom") > 24: + raise Exception("maxZoom should be in the 0-24 range") + if options.get("promoteId", None) is not None and options.get( + "generateId", False + ): + raise Exception("promoteId and generateId cannot be used together.") # projects and adds simplification info # Create a new instance of a CartesianProjector @@ -171,25 +182,26 @@ def __init__(self, data, options, log_level=logging.INFO): features = convert(data, options) # Create a separate geometry for each zoom level - for z in range(options.get('maxZoom') + 1): + for z in range(options.get("maxZoom") + 1): for feature in features: - feature[f'geometry_z{z}'] = feature['geometry'].copy() + feature[f"geometry_z{z}"] = feature["geometry"].copy() - tolerance_func = options['tolerance_function'] # Resolved above + tolerance_func = options["tolerance_function"] # Resolved above # Simplify features for each zoom level - for z in range(options.get('maxZoom') + 1): + for z in range(options.get("maxZoom") + 1): # Calculate tolerance using the provided or default function tolerance = tolerance_func(z, options) for feature in features: - geometry_key = f'geometry_z{z}' + geometry_key = f"geometry_z{z}" # check feature type only simplify Polygon - if feature['type'] == 'Polygon': + if feature["type"] == "Polygon": for iring in range(len(feature[geometry_key])): ring = feature[geometry_key][iring] # Convert geom to list of [x, y] pairs - coords = [[ring[i], ring[i + 1]] for i in range( - 0, len(ring), 3)] + coords = [ + [ring[i], ring[i + 1]] for i in range(0, len(ring), 3) + ] scoords = simplify(coords, tolerance) # Check that it has at least 4 pairs of coordinates if len(scoords) < 4: @@ -254,34 +266,30 @@ def split_tile(self, features, z, x, y, cz=None, cx=None, cy=None): if tile is None: # Use simplified geometries for this zoom level simplified_features = [ - { - **feature, - "geometry": feature[f'geometry_z{z}'] - } + {**feature, "geometry": feature[f"geometry_z{z}"]} for feature in features ] - self.tiles[id_] = create_tile( - simplified_features, z, x, y, options) + self.tiles[id_] = create_tile(simplified_features, z, x, y, options) tile = self.tiles[id_] - self.tile_coords.append({'z': z, 'x': x, 'y': y}) + self.tile_coords.append({"z": z, "x": x, "y": y}) - self.stats[f'z{z}'] = self.stats.get(f'z{z}', 0) + 1 + self.stats[f"z{z}"] = self.stats.get(f"z{z}", 0) + 1 self.total += 1 # save reference to original geometry in tile so that we can drill # down later if we stop now - tile['source'] = features + tile["source"] = features # if it's the first-pass tiling if cz is None: # stop tiling if we reached max zoom, or if the tile is too # simple - if z == options.get( - 'indexMaxZoom') or tile.get( - 'numPoints') <= options.get('indexMaxPoints'): + if z == options.get("indexMaxZoom") or tile.get( + "numPoints" + ) <= options.get("indexMaxPoints"): continue # if a drilldown to a specific tile - elif z == options.get('maxZoom') or z == cz: + elif z == options.get("maxZoom") or z == cz: # stop tiling if we reached base zoom or our target tile zoom continue elif cz is not None: @@ -291,15 +299,15 @@ def split_tile(self, features, z, x, y, cz=None, cx=None, cy=None): continue # if we slice further down, no need to keep source geometry - tile['source'] = None + tile["source"] = None if not features or len(features) == 0: continue - logging.debug('clipping start') + logging.debug("clipping start") # values we'll use for clipping - k1 = 0.5 * options.get('buffer') / options.get('extent') + k1 = 0.5 * options.get("buffer") / options.get("extent") k2 = 0.5 - k1 k3 = 0.5 + k1 k4 = 1 + k1 @@ -309,27 +317,81 @@ def split_tile(self, features, z, x, y, cz=None, cx=None, cy=None): tr = None br = None - left = clip(features, z2, x - k1, x + k3, 0, - tile['minX'], tile['maxX'], options, z+1) - right = clip(features, z2, x + k2, x + k4, 0, - tile['minX'], tile['maxX'], options, z+1) + left = clip( + features, + z2, + x - k1, + x + k3, + 0, + tile["minX"], + tile["maxX"], + options, + z + 1, + ) + right = clip( + features, + z2, + x + k2, + x + k4, + 0, + tile["minX"], + tile["maxX"], + options, + z + 1, + ) features = None if left is not None: - tl = clip(left, z2, y - k1, y + k3, 1, - tile['minY'], tile['maxY'], options, z+1) - bl = clip(left, z2, y + k2, y + k4, 1, - tile['minY'], tile['maxY'], options, z+1) + tl = clip( + left, + z2, + y - k1, + y + k3, + 1, + tile["minY"], + tile["maxY"], + options, + z + 1, + ) + bl = clip( + left, + z2, + y + k2, + y + k4, + 1, + tile["minY"], + tile["maxY"], + options, + z + 1, + ) left = None if right is not None: - tr = clip(right, z2, y - k1, y + k3, 1, - tile['minY'], tile['maxY'], options, z+1) - br = clip(right, z2, y + k2, y + k4, 1, - tile['minY'], tile['maxY'], options, z+1) + tr = clip( + right, + z2, + y - k1, + y + k3, + 1, + tile["minY"], + tile["maxY"], + options, + z + 1, + ) + br = clip( + right, + z2, + y + k2, + y + k4, + 1, + tile["minY"], + tile["maxY"], + options, + z + 1, + ) right = None - logging.debug('clipping ended') + logging.debug("clipping ended") stack.append(tl if tl is not None else []) stack.append(z + 1) @@ -357,7 +419,7 @@ def get_tile(self, z, x, y): y = int(y) options = self.options - extent = options.get('extent') + extent = options.get("extent") if z < 0 or z > 24: return None @@ -370,7 +432,7 @@ def get_tile(self, z, x, y): if current_tile is not None: return transform_tile(self.tiles[id_], extent) - logging.debug(f'drilling down to z{z}-{x}-{y}') + logging.debug(f"drilling down to z{z}-{x}-{y}") z0 = z x0 = x @@ -383,21 +445,23 @@ def get_tile(self, z, x, y): y0 = y0 >> 1 parent = self.tiles.get(to_Id(z0, x0, y0), None) - if parent is None or parent.get('source', None) is None: + if parent is None or parent.get("source", None) is None: return None # if we found a parent tile containing the original geometry, we can # drill down from it - logging.debug(f'found parent tile z{z0}-{x0}-{y0}') - logging.debug('drilling down start') + logging.debug(f"found parent tile z{z0}-{x0}-{y0}") + logging.debug("drilling down start") - self.split_tile(parent.get('source'), z0, x0, y0, z, x, y) + self.split_tile(parent.get("source"), z0, x0, y0, z, x, y) - logging.debug('drilling down end') + logging.debug("drilling down end") - transformed = transform_tile( - self.tiles[id_], extent) if self.tiles.get( - id_, None) is not None else None + transformed = ( + transform_tile(self.tiles[id_], extent) + if self.tiles.get(id_, None) is not None + else None + ) return transformed @@ -414,19 +478,6 @@ def to_Id(z, x, y): return id_ -def extend(dest, src): - """ - Extends the destination dictionary with the source dictionary - - Args: - dest (dict): The destination dictionary - src (dict): The source dictionary - """ - for key, _ in src.items(): - dest[key] = src[key] - return dest - - def microjson2vt(data, options, log_level=logging.INFO): """ Converts MicroJSON data to intermediate vector tiles diff --git a/src/microjson/microjson2vt/simplify.py b/src/microjson/microjson2vt/simplify.py index 974b6b9..21d7908 100644 --- a/src/microjson/microjson2vt/simplify.py +++ b/src/microjson/microjson2vt/simplify.py @@ -4,6 +4,7 @@ # Modifications by PolusAI, 2024 + def simplify(coords, sq_tolerance, min_vertices=3): """Simplifies a list of coordinates using the Ramer-Douglas-Peucker algorithm.""" @@ -29,7 +30,7 @@ def get_sq_seg_dist(px, py, x, y, bx, by): return dx * dx + dy * dy def simplify_recursive(coords, sq_tolerance): - """ Recursive step""" + """Recursive step""" first = 0 last = len(coords) - 1 max_sq_dist = 0 @@ -37,18 +38,20 @@ def simplify_recursive(coords, sq_tolerance): for i in range(1, last): sq_dist = get_sq_seg_dist( - coords[i][0], coords[i][1], - coords[first][0], coords[first][1], - coords[last][0], coords[last][1]) + coords[i][0], + coords[i][1], + coords[first][0], + coords[first][1], + coords[last][0], + coords[last][1], + ) if sq_dist > max_sq_dist: index = i max_sq_dist = sq_dist if max_sq_dist > sq_tolerance and sq_tolerance > 0: - left_simplified = simplify_recursive( - coords[:index+1], sq_tolerance) - right_simplified = simplify_recursive( - coords[index:], sq_tolerance) + left_simplified = simplify_recursive(coords[: index + 1], sq_tolerance) + right_simplified = simplify_recursive(coords[index:], sq_tolerance) new_coords = left_simplified[:-1] + right_simplified @@ -63,13 +66,13 @@ def simplify_recursive(coords, sq_tolerance): if len(coords) <= min_vertices: return coords else: - proceed = True - while proceed: + max_iterations = 50 + for _ in range(max_iterations): simplified = simplify_recursive(coords, sq_tolerance) # check that the simplification has enough vertices - if len(simplified) <= min_vertices: - # try again with a lower tolerance - sq_tolerance /= 2 - else: - proceed = False - return simplified + if len(simplified) > min_vertices: + return simplified + # try again with a lower tolerance + sq_tolerance /= 2 + # exhausted iterations — return original coordinates + return coords diff --git a/src/microjson/microjson2vt/tile.py b/src/microjson/microjson2vt/tile.py index c6c6e91..62f1511 100644 --- a/src/microjson/microjson2vt/tile.py +++ b/src/microjson/microjson2vt/tile.py @@ -7,10 +7,11 @@ def create_tile(features, z, tx, ty, options): features = features if features is not None else [] - tolerance = 0 if z == options.get( - 'maxZoom') else options.get( - 'tolerance') / \ - ((1 << z) * options.get('extent')) + tolerance = ( + 0 + if z == options.get("maxZoom") + else options.get("tolerance") / ((1 << z) * options.get("extent")) + ) tile = { "features": [], "numPoints": 0, @@ -24,7 +25,7 @@ def create_tile(features, z, tx, ty, options): "minX": 2, "minY": 1, "maxX": -1, - "maxY": 0 + "maxY": 0, } for feature in features: add_feature(tile, feature, tolerance, options) @@ -32,63 +33,64 @@ def create_tile(features, z, tx, ty, options): def add_feature(tile, feature, tolerance, options): - geom = feature.get('geometry') - type_ = feature.get('type') + geom = feature.get("geometry") + type_ = feature.get("type") simplified = [] - tile['minX'] = min(tile['minX'], feature['minX']) - tile['minY'] = min(tile['minY'], feature['minY']) - tile['maxX'] = max(tile['maxX'], feature['maxX']) - tile['maxY'] = max(tile['maxY'], feature['maxY']) + tile["minX"] = min(tile["minX"], feature["minX"]) + tile["minY"] = min(tile["minY"], feature["minY"]) + tile["maxX"] = max(tile["maxX"], feature["maxX"]) + tile["maxY"] = max(tile["maxY"], feature["maxY"]) - if type_ == 'Point' or type_ == 'MultiPoint': + if type_ == "Point" or type_ == "MultiPoint": for i in range(0, len(geom), 3): simplified.append(geom[i]) simplified.append(geom[i + 1]) - tile['numPoints'] += 1 - tile['numSimplified'] += 1 + tile["numPoints"] += 1 + tile["numSimplified"] += 1 - elif type_ == 'LineString': + elif type_ == "LineString": add_line(simplified, geom, tile, tolerance, False, False) - elif type_ == 'MultiLineString' or type_ == 'Polygon': + elif type_ == "MultiLineString" or type_ == "Polygon": for i in range(len(geom)): - add_line(simplified, geom[i], tile, - tolerance, type_ == 'Polygon', i == 0) + add_line(simplified, geom[i], tile, tolerance, type_ == "Polygon", i == 0) - elif type_ == 'MultiPolygon': + elif type_ == "MultiPolygon": for k in range(len(geom)): polygon = geom[k] for i in range(len(polygon)): add_line(simplified, polygon[i], tile, tolerance, True, i == 0) if len(simplified) > 0: - tags = feature.get('tags') + tags = feature.get("tags") - if type_ == 'LineString' and options.get('lineMetrics'): + if type_ == "LineString" and options.get("lineMetrics"): tags = {} - for key in feature.get('tags'): - tags[key] = feature['tags'][key] - tags['mapbox_clip_start'] = geom.start / geom.size - tags['mapbox_clip_end'] = geom.end / geom.size + for key in feature.get("tags"): + tags[key] = feature["tags"][key] + tags["mapbox_clip_start"] = geom.start / geom.size + tags["mapbox_clip_end"] = geom.end / geom.size tileFeature = { "geometry": simplified, - "type": 3 if type_ == 'Polygon' or type_ == 'MultiPolygon' else ( - 2 if type_ == 'LineString' or type_ == 'MultiLineString' - else 1), - "tags": tags + "type": ( + 3 + if type_ == "Polygon" or type_ == "MultiPolygon" + else (2 if type_ == "LineString" or type_ == "MultiLineString" else 1) + ), + "tags": tags, } - current_id = feature.get('id', None) + current_id = feature.get("id", None) if current_id is not None: - tileFeature['id'] = current_id - tile['features'].append(tileFeature) + tileFeature["id"] = current_id + tile["features"].append(tileFeature) def add_line(result, geom, tile, tolerance, is_polygon, is_outer): # Convert geom to list of [x, y] pairs - coords = [[geom[i], geom[i+1]] for i in range(0, len(geom), 3)] + coords = [[geom[i], geom[i + 1]] for i in range(0, len(geom), 3)] # No simplification at the single tile simplified_coords = coords diff --git a/src/microjson/microjson2vt/wrap.py b/src/microjson/microjson2vt/wrap.py index 89a1761..31d7ca3 100644 --- a/src/microjson/microjson2vt/wrap.py +++ b/src/microjson/microjson2vt/wrap.py @@ -10,20 +10,19 @@ def wrap(features, options): # wrap not needed for cartesian coordinates - buffer = options.get('buffer') / options.get('extent') + buffer = options.get("buffer") / options.get("extent") merged = features - left = clip(features, 1, -1 - buffer, buffer, - 0, -1, 2, options) # left world copy - right = clip(features, 1, 1 - buffer, 2 + buffer, - 0, -1, 2, options) # right world copy + left = clip(features, 1, -1 - buffer, buffer, 0, -1, 2, options) # left world copy + right = clip( + features, 1, 1 - buffer, 2 + buffer, 0, -1, 2, options + ) # right world copy if left is not None or right is not None: c = clip(features, 1, -buffer, 1 + buffer, 0, -1, 2, options) merged = c if c is not None else [] # :nter world copy if left is not None: - merged = shift_feature_coords( - left, 1) + merged # merge left into center + merged = shift_feature_coords(left, 1) + merged # merge left into center if right is not None: # merge right into center merged = merged + (shift_feature_coords(right, -1)) @@ -36,26 +35,27 @@ def shift_feature_coords(features, offset): for i in range(len(features)): feature = features[i] - type_ = feature.get('type') + type_ = feature.get("type") # new_geometry = None new_geometry = [] - if type_ == 'Point' or type_ == 'MultiPoint' or type_ == 'LineString': - new_geometry = shift_coords(feature.get('geometry'), offset) - elif type_ == 'MultiLineString' or type_ == 'Polygon': + if type_ == "Point" or type_ == "MultiPoint" or type_ == "LineString": + new_geometry = shift_coords(feature.get("geometry"), offset) + elif type_ == "MultiLineString" or type_ == "Polygon": new_geometry = [] - for line in feature.get('geometry'): + for line in feature.get("geometry"): new_geometry.append(shift_coords(line, offset)) - elif type_ == 'MultiPolygon': + elif type_ == "MultiPolygon": new_geometry = [] - for polygon in feature.get('geometry'): + for polygon in feature.get("geometry"): new_polygon = [] for line in polygon: new_polygon.append(shift_coords(line, offset)) new_geometry.append(new_polygon) - new_features.append(create_feature( - feature.get('id'), type_, new_geometry, feature.get('tags'))) + new_features.append( + create_feature(feature.get("id"), type_, new_geometry, feature.get("tags")) + ) return new_features diff --git a/src/microjson/model.py b/src/microjson/model.py index 7e67215..5c22fcd 100644 --- a/src/microjson/model.py +++ b/src/microjson/model.py @@ -1,6 +1,7 @@ """MicroJSON and GeoJSON models, defined manually using pydantic.""" -from typing import Any, Optional, Union, Dict, TypeVar -from pydantic import BaseModel, StrictInt, StrictStr, RootModel + +from typing import Any, Optional, Union, Dict +from pydantic import StrictInt, StrictStr, RootModel from .provenance import Workflow from .provenance import WorkflowCollection from .provenance import Artifact @@ -9,7 +10,6 @@ from geojson_pydantic import Point, MultiPoint, LineString, MultiLineString from geojson_pydantic import Polygon, MultiPolygon - GeometryType = Union[ # type: ignore Point, MultiPoint, @@ -21,8 +21,6 @@ type(None), ] -Props = TypeVar("Props", bound=Union[Dict[str, Any], BaseModel]) - class GeoJSON(RootModel): """The root object of a GeoJSON file""" @@ -64,17 +62,14 @@ class MicroFeatureCollection(FeatureCollection): ArtifactCollection]]): The provenance of the feature collection """ - properties: Optional[Union[Props, None]] = None # type: ignore + properties: Optional[Dict[str, Any]] = None id: Optional[Union[StrictStr, StrictInt]] = None - provenance: Optional[Union[Workflow, - WorkflowCollection, - Artifact, - ArtifactCollection]] = None + provenance: Optional[ + Union[Workflow, WorkflowCollection, Artifact, ArtifactCollection] + ] = None class MicroJSON(RootModel): """The root object of a MicroJSON file""" - root: Union[MicroFeature, # type: ignore - MicroFeatureCollection, - GeometryType] + root: Union[MicroFeature, MicroFeatureCollection, GeometryType] # type: ignore diff --git a/src/microjson/tilemodel.py b/src/microjson/tilemodel.py index e72e846..97db54a 100644 --- a/src/microjson/tilemodel.py +++ b/src/microjson/tilemodel.py @@ -1,12 +1,12 @@ from typing import List, Optional, Union, Dict, Literal -from enum import Enum +from enum import StrEnum from pydantic import BaseModel, AnyUrl, conlist, RootModel from pydantic import StrictStr from pathlib import Path class TileLayer(BaseModel): - """ A vector layer in a TileJSON file. + """A vector layer in a TileJSON file. Args: id (str): The unique identifier for the layer. @@ -21,6 +21,7 @@ class TileLayer(BaseModel): fielddescriptions (Optional[Dict[str, str]]): The descriptions of the fields. """ + id: str fields: Union[None, Dict[str, str]] = None minzoom: Optional[int] = 0 @@ -31,7 +32,7 @@ class TileLayer(BaseModel): fielddescriptions: Optional[Dict[str, str]] = None -class Unit(Enum): +class Unit(StrEnum): """A unit of measurement""" ANGSTROM = "angstrom" @@ -65,7 +66,7 @@ class Unit(Enum): DEGREE = "degree" -class AxisType(Enum): +class AxisType(StrEnum): """The type of an axis""" SPACE = "space" @@ -145,7 +146,7 @@ class Multiscale(BaseModel): class TileModel(BaseModel): - """ A TileJSON object. + """A TileJSON object. Args: tilejson (str): The TileJSON version. @@ -183,14 +184,8 @@ class TileModel(BaseModel): data: Optional[Union[Path, AnyUrl]] = None minzoom: Optional[int] = 0 maxzoom: Optional[int] = 22 - bounds: Optional[conlist( # type: ignore - float, - min_length=4, - max_length=10)] = None - center: Optional[conlist( # type: ignore - float, - min_length=3, - max_length=6)] = None + bounds: Optional[conlist(float, min_length=4, max_length=10)] = None # type: ignore + center: Optional[conlist(float, min_length=3, max_length=6)] = None # type: ignore fillzoom: Optional[int] = None vector_layers: List[TileLayer] multiscale: Optional[Multiscale] = None @@ -198,5 +193,6 @@ class TileModel(BaseModel): class TileJSON(RootModel): - """ The root object of a TileJSON file.""" + """The root object of a TileJSON file.""" + root: TileModel diff --git a/src/microjson/tilereader.py b/src/microjson/tilereader.py index 1c9c190..f042d10 100644 --- a/src/microjson/tilereader.py +++ b/src/microjson/tilereader.py @@ -1,5 +1,5 @@ from typing import Any -from microjson.tilehandler import TileHandler +from .tilehandler import TileHandler import mapbox_vector_tile # type: ignore import json import os @@ -10,8 +10,7 @@ class TileReader(TileHandler): Class to read tiles and generate MicroJSON data """ - def tiles2microjson(self, - zlvl: int = 0) -> dict[str, Any]: + def tiles2microjson(self, zlvl: int = 0) -> dict[str, Any]: """ Generate MicroJSON data from tiles in form of JSON or PBF files. Get the TileJSON configuration and the PBF flag from the class @@ -29,10 +28,12 @@ def tiles2microjson(self, """ # check if zlvl is within the maxzoom and minzoom of the tilejson - if (self.tile_json.minzoom is None or - zlvl < self.tile_json.minzoom or - self.tile_json.maxzoom is None or - zlvl > self.tile_json.maxzoom): + if ( + self.tile_json.minzoom is None + or zlvl < self.tile_json.minzoom + or self.tile_json.maxzoom is None + or zlvl > self.tile_json.maxzoom + ): return {} # get the bounds from the tilejson @@ -44,7 +45,7 @@ def tiles2microjson(self, miny = float(bounds[1]) maxx = float(bounds[2]) maxy = float(bounds[3]) - ntiles = 2 ** zlvl + ntiles = 2**zlvl xstep = (maxx - minx) / ntiles ystep = (maxy - miny) / ntiles xstarts = [minx + x * xstep for x in range(ntiles)] @@ -56,21 +57,17 @@ def tiles2microjson(self, # ystarts = ystarts[::-1] # ystops = ystops[::-1] - def project(coord, xmin, ymin, xmax, ymax, - extent=4096): + def project(coord, xmin, ymin, xmax, ymax, extent=4096): return [ (coord[0] / extent * (xmax - xmin) + xmin), - (coord[1] / extent * (ymax - ymin) + ymin) + (coord[1] / extent * (ymax - ymin) + ymin), ] # get the tilepath from the tilejson tilepath = str(self.tile_json.tiles[0]) # initialize the microjson data - microjson_data = { - "type": "FeatureCollection", - "features": [] - } + microjson_data = {"type": "FeatureCollection", "features": []} # read the tiles and extract the geometries for x in range(ntiles): @@ -86,17 +83,16 @@ def project(coord, xmin, ymin, xmax, ymax, continue with open( - str(tile_file), - 'rb' if str(tile_file).endswith('.pbf') else 'r') as f: + str(tile_file), "rb" if str(tile_file).endswith(".pbf") else "r" + ) as f: tile_data = f.read() # decode the tile data if self.pbf: tile_data = mapbox_vector_tile.decode( tile_data, - default_options={ - "geojson": True, - "y_coord_down": True}) + default_options={"geojson": True, "y_coord_down": True}, + ) else: tile_data = json.loads(tile_data) @@ -106,54 +102,53 @@ def project(coord, xmin, ymin, xmax, ymax, # with open(filename, "w") as f: # json.dump(tile_data, f) - tile_data = tile_data['geojsonLayer'] + tile_data = tile_data["geojsonLayer"] # extract the geometries - if 'features' in tile_data.keys(): - for feature in tile_data['features']: + if "features" in tile_data.keys(): + for feature in tile_data["features"]: # Transform the coordinates to the global coordinate # system please note that the coordinates may be in # up to 5 nested lists transform the coordinates in # place - if 'geometry' in feature: - geom = feature['geometry'] - coord = geom['coordinates'] - if 'type' in geom: - if geom['type'] == 'Point': - geom['coordinates'] = project( - coord, xstart, ystart, xstop, ystop) - elif geom['type'] == 'LineString': - geom['coordinates'] = [ - project(coord, xstart, ystart, xstop, - ystop) - for coord in geom['coordinates'] + if "geometry" in feature: + geom = feature["geometry"] + coord = geom["coordinates"] + if "type" in geom: + if geom["type"] == "Point": + geom["coordinates"] = project( + coord, xstart, ystart, xstop, ystop + ) + elif geom["type"] == "LineString": + geom["coordinates"] = [ + project(coord, xstart, ystart, xstop, ystop) + for coord in geom["coordinates"] ] - elif geom['type'] == 'Polygon': - geom['coordinates'] = [ + elif geom["type"] == "Polygon": + geom["coordinates"] = [ [ - project(coord, xstart, ystart, - xstop, ystop) + project(coord, xstart, ystart, xstop, ystop) for coord in ring ] - for ring in geom['coordinates'] + for ring in geom["coordinates"] ] - elif geom['type'] == 'MultiPolygon': - geom['coordinates'] = [ + elif geom["type"] == "MultiPolygon": + geom["coordinates"] = [ [ [ - project(coord, xstart, ystart, - xstop, ystop) + project( + coord, xstart, ystart, xstop, ystop + ) for coord in ring ] for ring in poly ] - for poly in geom['coordinates'] + for poly in geom["coordinates"] ] else: continue # add the feature to the microjson data - microjson_data['features'].append( # type: ignore - feature) + microjson_data["features"].append(feature) # type: ignore return microjson_data diff --git a/src/microjson/tilewriter.py b/src/microjson/tilewriter.py index 963c3d8..ac6b313 100644 --- a/src/microjson/tilewriter.py +++ b/src/microjson/tilewriter.py @@ -1,7 +1,7 @@ import os from .microjson2vt.microjson2vt import microjson2vt from .tilehandler import TileHandler -from microjson import MicroJSON +from .model import MicroJSON import json from pydantic import ValidationError @@ -14,9 +14,6 @@ logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) -import pandas as pd -import geopandas as gpd - def getbounds(microjson_file: str, square: bool = False) -> List[float]: """ @@ -29,24 +26,24 @@ def getbounds(microjson_file: str, square: bool = False) -> List[float]: Returns: List[float]: List of the bounds [minx, miny, maxx, maxy] """ - with open(microjson_file, 'r') as file: + with open(microjson_file, "r") as file: data = json.load(file) # get the bounds - minx = miny = float('inf') - maxx = maxy = float('-inf') - if 'features' in data: - for feature in data['features']: - if 'geometry' in feature: - if feature['geometry']['type'] == 'Polygon': - for ring in feature['geometry']['coordinates']: + minx = miny = float("inf") + maxx = maxy = float("-inf") + if "features" in data: + for feature in data["features"]: + if "geometry" in feature: + if feature["geometry"]["type"] == "Polygon": + for ring in feature["geometry"]["coordinates"]: for coord in ring: minx = min(minx, coord[0]) miny = min(miny, coord[1]) maxx = max(maxx, coord[0]) maxy = max(maxy, coord[1]) - if feature['geometry']['type'] == 'MultiPolygon': - for polygon in feature['geometry']['coordinates']: + if feature["geometry"]["type"] == "MultiPolygon": + for polygon in feature["geometry"]["coordinates"]: for ring in polygon: for coord in ring: minx = min(minx, coord[0]) @@ -63,12 +60,12 @@ def getbounds(microjson_file: str, square: bool = False) -> List[float]: def geojson2vt_to_shapely(geometry_data): # Extract coordinates and type - geom_type = geometry_data['type'] + geom_type = geometry_data["type"] # Based on the `type` field, determine the geometry shape if geom_type == 3: # 3 usually represents Polygon in such data formats - coordinates = geometry_data['geometry'][0] # Only take outer ring - geometry_data['geometry'] = Polygon(coordinates) + coordinates = geometry_data["geometry"][0] # Only take outer ring + geometry_data["geometry"] = Polygon(coordinates) else: raise ValueError("Unsupported geometry type") @@ -89,26 +86,26 @@ def extract_fields_ranges_enums(microjson_file: str): def get_json_type(value): if value is None: # Set to String if None - return 'String' + return "String" if isinstance(value, bool): - return 'Boolean' + return "Boolean" if isinstance(value, (int, float)): - return 'Number' + return "Number" if isinstance(value, dict): - return 'Object' + return "Object" if isinstance(value, list): - return 'Array' - return 'String' + return "Array" + return "String" - with open(microjson_file, 'r') as file: + with open(microjson_file, "r") as file: data = json.load(file) field_names: dict[str, str] = {} field_ranges = {} field_enums: dict[str, set[str]] = {} - for feature in data.get('features', []): - props = feature.get('properties', {}) + for feature in data.get("features", []): + props = feature.get("properties", {}) for key, val in props.items(): if key not in field_names.keys(): field_names[key] = get_json_type(val) @@ -126,13 +123,14 @@ def get_json_type(value): return field_names, field_ranges, field_enums -class TileWriter (TileHandler): +class TileWriter(TileHandler): - def microjson2tiles(self, - microjson_data_path: Union[str, Path], - validate: bool = False, - tolerance_key: str = "default" - ) -> List[str]: + def microjson2tiles( + self, + microjson_data_path: Union[str, Path], + validate: bool = False, + tolerance_key: str = "default", + ) -> List[str]: """ Generate tiles in form of JSON or PBF files from MicroJSON data. @@ -145,6 +143,7 @@ def microjson2tiles(self, Returns: List[str]: List of paths to the generated tiles """ + def save_tile(tile_data, z, x, y, tiles_path_template): """ Save a single tile to a file based on the template path. @@ -165,13 +164,10 @@ def save_tile(tile_data, z, x, y, tiles_path_template): # Save the tile data (this assumes tile_data is already in the # correct format, e.g., PBF or JSON) - if tile_path.endswith('.parquet'): + if tile_path.endswith(".parquet"): tile_data.to_parquet(tile_path) else: - with open( - tile_path, - 'wb' if tile_path.endswith('.pbf') else 'w' - ) as f: + with open(tile_path, "wb" if tile_path.endswith(".pbf") else "w") as f: f.write(tile_data) # return the path to the saved tile @@ -196,7 +192,7 @@ def convert_id_to_int(data) -> int | dict | list: # check if data is a dict elif isinstance(data, dict): for key, value in data.items(): - if key == 'id': + if key == "id": if value is None: data[key] = self.id_counter self.id_counter += 1 @@ -216,7 +212,7 @@ def convert_id_to_int(data) -> int | dict | list: return int(data) # Load the MicroJSON data - with open(microjson_data_path, 'r') as file: + with open(microjson_data_path, "r") as file: microjson_data = json.load(file) # Validate the MicroJSON data @@ -230,18 +226,20 @@ def convert_id_to_int(data) -> int | dict | list: # TODO currently only supports one tile layer # calculate maxzoom and minzoom from layer and global tilejson - maxzoom = min(self.tile_json.maxzoom, - self.tile_json.vector_layers[0].maxzoom) # type: ignore - minzoom = max(self.tile_json.minzoom, - self.tile_json.vector_layers[0].minzoom) # type: ignore + maxzoom = min( + self.tile_json.maxzoom, self.tile_json.vector_layers[0].maxzoom + ) # type: ignore + minzoom = max( + self.tile_json.minzoom, self.tile_json.vector_layers[0].minzoom + ) # type: ignore # Options for geojson2vt from TileJSON options = { - 'maxZoom': maxzoom, # max zoom in the final tileset - 'indexMaxZoom': self.tile_json.maxzoom, # tile index max zoom - 'indexMaxPoints': 0, # max number of points per tile, 0 if none - 'bounds': self.tile_json.bounds, - 'tolerance_function': tolerance_key # Pass the string key + "maxZoom": maxzoom, # max zoom in the final tileset + "indexMaxZoom": self.tile_json.maxzoom, # tile index max zoom + "indexMaxPoints": 0, # max number of points per tile, 0 if none + "bounds": self.tile_json.bounds, + "tolerance_function": tolerance_key, # Pass the string key } # Convert GeoJSON to intermediate vector tiles @@ -261,53 +259,32 @@ def convert_id_to_int(data) -> int | dict | list: continue tile_data = tile_index.get_tile(z, x, y) - for item in tile_data['features']: - if 'id' in item: - item['id'] = int(item['id']) + for item in tile_data["features"]: + if "id" in item: + item["id"] = int(item["id"]) # add name to the tile_data tile_data["name"] = "tile" - # print('tile_data before encoding') - # print(tile_data.keys()) - # print(json.dumps(tile_data['features'])) - - # convert this dictionary to a geodataframe using gpd.GeoDataFrame.from_features - # tmp = gpd.GeoDataFrame.from_features(tile_data['features']) - # print(tmp) - - # print('-------------------------') - # print('-------------------------') - - # print('testing geopandas!') - # gdf = gpd.GeoDataFrame(tile_data) - # print(gdf) - - # print('self.pbf', self.pbf) - # print('self.parquet', self.parquet) - if self.pbf: # Using vt2pbf to encode tile data to PBF encoded_data = vt2pbf(tile_data) elif self.parquet: - # encoded_data = pd.DataFrame(tile_data) + import geopandas as gpd + encoded_data = gpd.GeoDataFrame(tile_data) # drop metadata columns - encoded_data['new_geometry'] = encoded_data['features'].apply(lambda x: x['geometry']) - # encoded_data['Label'] = encoded_data['features'].apply(lambda x: x['Label']) - encoded_data = encoded_data[['new_geometry']] + encoded_data["new_geometry"] = encoded_data["features"].apply( + lambda x: x["geometry"] + ) + encoded_data = encoded_data[["new_geometry"]] else: encoded_data = json.dumps(tile_data) - # print('self.tile_json.tiles[0]') - # print(self.tile_json.tiles[0]) - - # print('type(encoded_data)') - # print(type(encoded_data)) - - generated_tiles.append(save_tile( - encoded_data, z, x, y, self.tile_json.tiles[0])) + generated_tiles.append( + save_tile(encoded_data, z, x, y, self.tile_json.tiles[0]) + ) return generated_tiles diff --git a/src/microjson/utils.py b/src/microjson/utils.py index 18f4db0..308b8b2 100644 --- a/src/microjson/utils.py +++ b/src/microjson/utils.py @@ -10,10 +10,6 @@ from concurrent.futures import ThreadPoolExecutor from itertools import product -import skimage as sk -from skimage import measure -from skimage import morphology - from sys import platform from pathlib import Path from typing import Any @@ -21,28 +17,10 @@ import microjson.model as mj from multiprocessing import cpu_count -import warnings from microjson import MicroJSON from pydantic import ValidationError from microjson.model import Feature -from typing import Union import pydantic -import matplotlib.pyplot as plt - - -#define conditional imports -try: - from bfio import BioReader - import filepattern as fp - from scipy import ndimage - import vaex -except ImportError as e: - print("""Packages bfio, filepattern, scipy, vaex not installed - please install using pip install microjson[all]""") - raise e - - -warnings.filterwarnings("ignore") logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) @@ -71,6 +49,8 @@ def __init__( ) -> None: """Convert each object polygons (series of points, rectangle) to microjson.""" + from bfio import BioReader + self.out_dir = out_dir self.file_path = file_path self.polygon_type = polygon_type @@ -81,6 +61,8 @@ def __init__( def _tile_read(self) -> None: """Reading of Image in a tile and compute encodings for it.""" + from skimage import morphology + with ThreadPoolExecutor(max_workers=NUM_THREADS) as executor: idx = 0 for i, (z, y, x) in enumerate( @@ -116,15 +98,13 @@ def _tile_read(self) -> None: label, coordinates = future.result() if len(label) and len(coordinates) > 0: - label = [i + idx for i in range( - 1, len(label) + 1)] + label = [i + idx for i in range(1, len(label) + 1)] idx = 0 if len(label) == 1: idx += label[0] else: idx += label[-1] - self.polygons_to_microjson( - i, label, coordinates) + self.polygons_to_microjson(i, label, coordinates) else: future = executor.submit( @@ -135,25 +115,23 @@ def _tile_read(self) -> None: ) label, coordinates = future.result() if len(label) and len(coordinates) > 0: - label = [i + idx for i in range( - 1, len(label) + 1)] + label = [i + idx for i in range(1, len(label) + 1)] idx = 0 if len(label) == 1: idx += label[0] else: idx += label[-1] - self.polygons_to_microjson( - i, label, coordinates) + self.polygons_to_microjson(i, label, coordinates) def get_line_number(self, filename, target_string) -> int: line_number = 0 - with open(filename, 'r') as file: + with open(filename, "r") as file: for line in file: line_number += 1 if target_string in line: return line_number return line_number - + def cleaning_directories(self): out_combined = Path(self.out_dir, "tmp") for file in out_combined.iterdir(): @@ -163,23 +141,28 @@ def cleaning_directories(self): def write_single_json(self) -> None: """Combine microjsons from tiled images into combined json file.""" + import filepattern as fp + self._tile_read() out_combined = Path(self.out_dir, "tmp") - out_file = Path(self.file_path).name.split( - ".")[0] + "_" + str(self.polygon_type) + ".json" + out_file = ( + Path(self.file_path).name.split(".")[0] + + "_" + + str(self.polygon_type) + + ".json" + ) if not out_combined.exists(): out_combined.mkdir(exist_ok=True) - + fname = re.split(r"[\W']+", str(Path(self.file_path).name))[0] files = fp.FilePattern(self.out_dir, f"{fname}.*json") if len(files) > 1: with Path.open(Path(out_combined, out_file), "w") as fw: for i, fl in zip(range(1, len(files) + 1), files()): file = fl[1][0] - line_number = self.get_line_number( - file, - "multiscale") - total_lines = len([line for line in open(file, 'r')]) + line_number = self.get_line_number(file, "multiscale") + with open(file, "r") as fh: + total_lines = sum(1 for _ in fh) index = (total_lines - line_number) + 3 outname = re.split(r"[_\.]+", file.name)[:-2] outname = "_".join(outname) + ".json" # type: ignore @@ -205,9 +188,12 @@ def segmentations_encodings( ) -> tuple[Any, list[list[list[Any]]]]: """Calculate object boundaries as series of vertices/points forming a polygon.""" + from scipy import ndimage + from skimage import measure + label, coordinates = [], [] objects = ndimage.measurements.find_objects(label_image) - for i in range(len(objects)+1): + for i in range(len(objects) + 1): mask = np.zeros((label_image.shape[0], label_image.shape[1])) mask[(label_image == i)] = 1 contour_thresh = 0.8 @@ -235,6 +221,8 @@ def rectangular_polygons( y: int, ) -> tuple[list[int], list[str]]: """Calculate Rectangular polygon for each object.""" + from scipy import ndimage + objects = ndimage.measurements.find_objects(label_image) label, coordinates = [], [] for i, obj in enumerate(objects): @@ -267,6 +255,8 @@ def polygons_to_microjson( # noqa: PLR0915 coordinates: list[Any], ) -> None: # : 183 """Create microjson overlays in JSON Format.""" + import vaex + x_dimension = np.repeat(self.br.X, len(label)) y_dimension = np.repeat(self.br.Y, len(label)) channel = np.repeat(self.br.C, len(label)) @@ -331,8 +321,7 @@ def polygons_to_microjson( # noqa: PLR0915 else: cor_value = cor + [cor[0]] - geometry = GeometryClass(type=row["geometry_type"], - coordinates=[cor_value]) + geometry = GeometryClass(type=row["geometry_type"], coordinates=[cor_value]) # Create a new Feature object feature = mj.MicroFeature( @@ -342,7 +331,6 @@ def polygons_to_microjson( # noqa: PLR0915 ) features.append(feature) - desc_meta = {key: f"{data[key].values[0]}" for key in str_columns} # Create a new FeatureCollection object @@ -350,33 +338,9 @@ def polygons_to_microjson( # noqa: PLR0915 type="FeatureCollection", properties=desc_meta, features=features, - multiscale={ - "axes": [ - { - "name": "x", - "unit": "micrometer", - "type": "space", - "description": "x-axis", - }, - { - "name": "y", - "unit": "micrometer", - "type": "space", - "description": "y-axis", - }, - ], - "origo": "top-left", - }, ) fname = re.split(r"[\W']+", str(Path(self.file_path).name))[0] - outname = ( - str(fname) - + "_" - + str(self.polygon_type) - + "_" - + str(i) - + ".json" - ) + outname = str(fname) + "_" + str(self.polygon_type) + "_" + str(i) + ".json" if len(feature_collection.model_dump_json()) == 0: msg = "JSON file is empty" raise ValueError(msg) @@ -384,14 +348,14 @@ def polygons_to_microjson( # noqa: PLR0915 out_name = Path(self.out_dir, outname) with Path.open(out_name, "w") as f: f.write( - feature_collection.model_dump_json( - indent=2, exclude_unset=True), + feature_collection.model_dump_json(indent=2, exclude_unset=True), ) logger.info(f"Saving overlay json file: {out_name}") class CustomValidation(pydantic.BaseModel): """Properties with validation.""" + out_dir: Union[str, Path] file_path: Union[str, Path] @@ -408,8 +372,7 @@ def validate_out_dir(cls, value): @classmethod def validate_file_path(cls, value): if not Path(value).exists(): - raise FileNotFoundError( - errno.ENOENT, os.strerror(errno.ENOENT), value) + raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), value) if not MicroJSON.model_validate(json.load(Path.open(Path(value)))): raise ValidationError(f"Not a valid MicroJSON {value.name}") if isinstance(value, str): @@ -424,18 +387,21 @@ class MicrojsonBinaryModel(CustomValidation): out_dir: Path to output directory. file_path: Microjson file path """ + out_dir: Union[str, Path] file_path: Union[str, Path] def microjson_to_binary(self) -> None: """Convert polygon coordinates (series of points, rectangle) of all objects to binary mask""" + import skimage as sk + logger.info(f"Converting microjson to binary mask: {self.file_path}") data = json.load(Path.open(Path(self.file_path))) - items = [Feature(**item) for item in data['features']] + items = [Feature(**item) for item in data["features"]] poly = [i.geometry.coordinates for i in items] - meta = data['properties'] + meta = data["properties"] image_name = meta.get("Image") x = int(meta.get("X")) y = int(meta.get("Y")) @@ -444,9 +410,9 @@ def microjson_to_binary(self) -> None: image = fmask.copy() pol = np.array(poly[i][0]) mask = sk.draw.polygon2mask((x, y), pol) - image[mask == False] = 0 - image[mask == True] = 1 - fmask += image + image[~mask] = 0 + image[mask] = 1 + fmask += image fmask = np.rot90(fmask) fmask = np.flipud(fmask) outname = Path(self.out_dir, image_name)