From d6336a8ec380313901bdf4175e969e3ed985016a Mon Sep 17 00:00:00 2001 From: jesspav <202656197+jesspav@users.noreply.github.com> Date: Mon, 8 Dec 2025 16:22:09 -0800 Subject: [PATCH 1/2] add rs_srid and rs_crs functions --- Cargo.lock | 1 + rust/sedona-raster-functions/Cargo.toml | 1 + .../benches/native-raster-functions.rs | 3 + rust/sedona-raster-functions/src/executor.rs | 8 +- rust/sedona-raster-functions/src/lib.rs | 1 + rust/sedona-raster-functions/src/register.rs | 2 + .../sedona-raster-functions/src/rs_example.rs | 2 +- rust/sedona-raster-functions/src/rs_srid.rs | 269 ++++++++++++++++++ rust/sedona-testing/src/rasters.rs | 3 +- 9 files changed, 286 insertions(+), 4 deletions(-) create mode 100644 rust/sedona-raster-functions/src/rs_srid.rs diff --git a/Cargo.lock b/Cargo.lock index ed693605a..791486f89 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5056,6 +5056,7 @@ dependencies = [ "sedona-raster", "sedona-schema", "sedona-testing", + "serde_json", ] [[package]] diff --git a/rust/sedona-raster-functions/Cargo.toml b/rust/sedona-raster-functions/Cargo.toml index 6c681f8a3..1b27e5bab 100644 --- a/rust/sedona-raster-functions/Cargo.toml +++ b/rust/sedona-raster-functions/Cargo.toml @@ -37,6 +37,7 @@ sedona-common = { workspace = true } sedona-expr = { workspace = true } sedona-raster = { workspace = true } sedona-schema = { workspace = true } +serde_json = { workspace = true } [dev-dependencies] criterion = { workspace = true } diff --git a/rust/sedona-raster-functions/benches/native-raster-functions.rs b/rust/sedona-raster-functions/benches/native-raster-functions.rs index 4892ae2c4..ba3fe1c01 100644 --- a/rust/sedona-raster-functions/benches/native-raster-functions.rs +++ b/rust/sedona-raster-functions/benches/native-raster-functions.rs @@ -19,6 +19,8 @@ use sedona_testing::benchmark_util::{benchmark, BenchmarkArgSpec::*, BenchmarkAr fn criterion_benchmark(c: &mut Criterion) { let f = sedona_raster_functions::register::default_function_set(); + + benchmark::scalar(c, &f, "native-raster", "rs_crs", Raster(64, 64)); benchmark::scalar(c, &f, "native-raster", "rs_height", Raster(64, 64)); benchmark::scalar( c, @@ -45,6 +47,7 @@ fn criterion_benchmark(c: &mut Criterion) { benchmark::scalar(c, &f, "native-raster", "rs_scaley", Raster(64, 64)); benchmark::scalar(c, &f, "native-raster", "rs_skewx", Raster(64, 64)); benchmark::scalar(c, &f, "native-raster", "rs_skewy", Raster(64, 64)); + benchmark::scalar(c, &f, "native-raster", "rs_srid", Raster(64, 64)); benchmark::scalar(c, &f, "native-raster", "rs_upperleftx", Raster(64, 64)); benchmark::scalar(c, &f, "native-raster", "rs_upperlefty", Raster(64, 64)); benchmark::scalar(c, &f, "native-raster", "rs_width", Raster(64, 64)); diff --git a/rust/sedona-raster-functions/src/executor.rs b/rust/sedona-raster-functions/src/executor.rs index 4cf0f8016..917a11189 100644 --- a/rust/sedona-raster-functions/src/executor.rs +++ b/rust/sedona-raster-functions/src/executor.rs @@ -95,8 +95,12 @@ impl<'a, 'b> RasterExecutor<'a, 'b> { ColumnarValue::Scalar(scalar_value) => match scalar_value { ScalarValue::Struct(arc_struct) => { let raster_array = RasterStructArray::new(arc_struct.as_ref()); - let raster = raster_array.get(0)?; - func(0, Some(raster)) + if raster_array.is_null(0) { + func(0, None) + } else { + let raster = raster_array.get(0)?; + func(0, Some(raster)) + } } ScalarValue::Null => func(0, None), _ => Err(DataFusionError::Internal( diff --git a/rust/sedona-raster-functions/src/lib.rs b/rust/sedona-raster-functions/src/lib.rs index 34345ceed..1a6cf1ff5 100644 --- a/rust/sedona-raster-functions/src/lib.rs +++ b/rust/sedona-raster-functions/src/lib.rs @@ -21,4 +21,5 @@ pub mod rs_example; pub mod rs_geotransform; pub mod rs_rastercoordinate; pub mod rs_size; +pub mod rs_srid; pub mod rs_worldcoordinate; diff --git a/rust/sedona-raster-functions/src/register.rs b/rust/sedona-raster-functions/src/register.rs index e2a21ba6d..91f14187f 100644 --- a/rust/sedona-raster-functions/src/register.rs +++ b/rust/sedona-raster-functions/src/register.rs @@ -50,6 +50,8 @@ pub fn default_function_set() -> FunctionSet { crate::rs_rastercoordinate::rs_worldtorastercoordy_udf, crate::rs_size::rs_height_udf, crate::rs_size::rs_width_udf, + crate::rs_srid::rs_crs_udf, + crate::rs_srid::rs_srid_udf, crate::rs_worldcoordinate::rs_rastertoworldcoord_udf, crate::rs_worldcoordinate::rs_rastertoworldcoordx_udf, crate::rs_worldcoordinate::rs_rastertoworldcoordy_udf, diff --git a/rust/sedona-raster-functions/src/rs_example.rs b/rust/sedona-raster-functions/src/rs_example.rs index d5bce5c85..4379192ae 100644 --- a/rust/sedona-raster-functions/src/rs_example.rs +++ b/rust/sedona-raster-functions/src/rs_example.rs @@ -83,7 +83,7 @@ impl SedonaScalarKernel for RsExample { skew_x: 1.0, skew_y: 1.0, }; - let crs = lnglat().unwrap().to_json(); + let crs = lnglat().unwrap().to_crs_string(); builder.start_raster(&raster_metadata, Some(&crs))?; let nodata_value = 127u8; for band_id in 1..=3 { diff --git a/rust/sedona-raster-functions/src/rs_srid.rs b/rust/sedona-raster-functions/src/rs_srid.rs new file mode 100644 index 000000000..4bab89f5d --- /dev/null +++ b/rust/sedona-raster-functions/src/rs_srid.rs @@ -0,0 +1,269 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +use std::{sync::Arc, vec}; + +use crate::executor::RasterExecutor; +use arrow_array::builder::StringBuilder; +use arrow_array::builder::UInt32Builder; +use arrow_schema::DataType; +use datafusion_common::error::Result; +use datafusion_common::DataFusionError; +use datafusion_expr::{ + scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation, Volatility, +}; +use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; +use sedona_raster::traits::RasterRef; +use sedona_schema::crs::deserialize_crs; +use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher}; + +/// RS_SRID() scalar UDF implementation +/// +/// Extract the SRID (Spatial Reference ID) of the raster +pub fn rs_srid_udf() -> SedonaScalarUDF { + SedonaScalarUDF::new( + "rs_srid", + vec![Arc::new(RsSrid {})], + Volatility::Immutable, + Some(rs_srid_doc()), + ) +} + +/// RS_CRS() scalar UDF implementation +/// +/// Extract the CRS (Coordinate Reference System) of the raster +pub fn rs_crs_udf() -> SedonaScalarUDF { + SedonaScalarUDF::new( + "rs_crs", + vec![Arc::new(RsCrs {})], + Volatility::Immutable, + Some(rs_crs_doc()), + ) +} + +fn rs_srid_doc() -> Documentation { + Documentation::builder( + DOC_SECTION_OTHER, + "Return the spatial reference system identifier (SRID) of the raster".to_string(), + "RS_SRID(raster: Raster)".to_string(), + ) + .with_argument("raster", "Raster: Input raster") + .with_sql_example("SELECT RS_SRID(RS_Example())".to_string()) + .build() +} + +fn rs_crs_doc() -> Documentation { + Documentation::builder( + DOC_SECTION_OTHER, + "Return the coordinate reference system (CRS) of the raster".to_string(), + "RS_CRS(raster: Raster)".to_string(), + ) + .with_argument("raster", "Raster: Input raster") + .with_sql_example("SELECT RS_CRS(RS_Example())".to_string()) + .build() +} + +#[derive(Debug)] +struct RsSrid {} + +impl SedonaScalarKernel for RsSrid { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_raster()], + SedonaType::Arrow(DataType::UInt32), + ); + + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + let executor = RasterExecutor::new(arg_types, args); + let mut builder = UInt32Builder::with_capacity(executor.num_iterations()); + + executor.execute_raster_void(|_i, raster_opt| { + match raster_opt { + None => builder.append_null(), + Some(raster) => { + match raster.crs() { + None => { + // When no CRS is set, SRID is 0 + builder.append_value(0); + } + Some(crs_str) => { + let crs_value = serde_json::Value::String(crs_str.to_string()); + let crs = deserialize_crs(&crs_value).map_err(|e| { + DataFusionError::Execution(format!( + "Failed to deserialize CRS: {}", + e + )) + })?; + + match crs { + Some(crs_ref) => { + let srid = crs_ref.srid().map_err(|e| { + DataFusionError::Execution(format!( + "Failed to get SRID from CRS: {}", + e + )) + })?; + + match srid { + Some(srid_val) => builder.append_value(srid_val), + None => { + return Err(DataFusionError::Execution( + "CRS has no SRID".to_string(), + )) + } + } + } + None => builder.append_value(0), + } + } + } + } + } + Ok(()) + })?; + + executor.finish(Arc::new(builder.finish())) + } +} + +#[derive(Debug)] +struct RsCrs {} + +impl SedonaScalarKernel for RsCrs { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_raster()], + SedonaType::Arrow(DataType::Utf8), + ); + + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + let executor = RasterExecutor::new(arg_types, args); + let preallocate_bytes = "EPSG:4326".len() * executor.num_iterations(); + let mut builder = + StringBuilder::with_capacity(executor.num_iterations(), preallocate_bytes); + + executor.execute_raster_void(|_i, raster_opt| { + match raster_opt { + None => builder.append_null(), + Some(raster) => match raster.crs() { + None => builder.append_null(), + Some(crs_str) => { + let crs_value = serde_json::Value::String(crs_str.to_string()); + let crs = deserialize_crs(&crs_value).map_err(|e| { + DataFusionError::Execution(format!( + "Failed to deserialize CRS: {}", + e + )) + })?; + + let crs_string = crs + .ok_or_else(|| { + DataFusionError::Execution( + "Failed to parse non-null CRS string".to_string(), + ) + })? + .to_crs_string(); + builder.append_value(crs_string); + } + }, + } + Ok(()) + })?; + + executor.finish(Arc::new(builder.finish())) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use arrow_array::{StringArray, UInt32Array}; + use datafusion_common::ScalarValue; + use datafusion_expr::ScalarUDF; + use sedona_schema::datatypes::RASTER; + use sedona_testing::compare::assert_array_equal; + use sedona_testing::rasters::generate_test_rasters; + use sedona_testing::testers::ScalarUdfTester; + + #[test] + fn udf_metadata() { + let udf: ScalarUDF = rs_srid_udf().into(); + assert_eq!(udf.name(), "rs_srid"); + assert!(udf.documentation().is_some()); + + let udf: ScalarUDF = rs_crs_udf().into(); + assert_eq!(udf.name(), "rs_crs"); + assert!(udf.documentation().is_some()); + } + + #[test] + fn udf_srid() { + let udf: ScalarUDF = rs_srid_udf().into(); + let tester = ScalarUdfTester::new(udf.into(), vec![RASTER]); + + tester.assert_return_type(DataType::UInt32); + + // Test with rasters that have CRS set (generate_test_rasters sets OGC:CRS84 which maps to 4326) + let rasters = generate_test_rasters(3, Some(1)).unwrap(); + let expected: Arc = + Arc::new(UInt32Array::from(vec![Some(4326), None, Some(4326)])); + + let result = tester.invoke_array(Arc::new(rasters)).unwrap(); + assert_array_equal(&result, &expected); + + // Test with null scalar + let result = tester.invoke_scalar(ScalarValue::Null).unwrap(); + tester.assert_scalar_result_equals(result, ScalarValue::UInt32(None)); + } + + #[test] + fn udf_crs() { + let udf: ScalarUDF = rs_crs_udf().into(); + let tester = ScalarUdfTester::new(udf.into(), vec![RASTER]); + + tester.assert_return_type(DataType::Utf8); + + // Test with rasters that have CRS set (generate_test_rasters sets OGC:CRS84) + let rasters = generate_test_rasters(3, Some(1)).unwrap(); + let expected_crs = "OGC:CRS84".to_string(); + let expected: Arc = Arc::new(StringArray::from(vec![ + Some(expected_crs.clone()), + None, + Some(expected_crs.clone()), + ])); + + let result = tester.invoke_array(Arc::new(rasters)).unwrap(); + assert_array_equal(&result, &expected); + + // Test with null scalar + let result = tester.invoke_scalar(ScalarValue::Null).unwrap(); + tester.assert_scalar_result_equals(result, ScalarValue::Utf8(None)); + } +} diff --git a/rust/sedona-testing/src/rasters.rs b/rust/sedona-testing/src/rasters.rs index ad95762c8..d8a384429 100644 --- a/rust/sedona-testing/src/rasters.rs +++ b/rust/sedona-testing/src/rasters.rs @@ -91,6 +91,7 @@ pub fn generate_tiled_rasters( let (x_tiles, y_tiles) = number_of_tiles; let mut raster_builder = RasterBuilder::new(x_tiles * y_tiles); let band_count = 3; + let crs = lnglat().unwrap().to_crs_string(); for tile_y in 0..y_tiles { for tile_x in 0..x_tiles { @@ -108,7 +109,7 @@ pub fn generate_tiled_rasters( skew_y: 0.0, }; - raster_builder.start_raster(&raster_metadata, None)?; + raster_builder.start_raster(&raster_metadata, Some(&crs))?; for _ in 0..band_count { // Set a nodata value appropriate for the data type From 28d67ea5fec5f9bb7e2c644cf11f965cb10d902d Mon Sep 17 00:00:00 2001 From: jesspav <202656197+jesspav@users.noreply.github.com> Date: Mon, 8 Dec 2025 16:27:53 -0800 Subject: [PATCH 2/2] clippy --- rust/sedona-raster-functions/src/rs_srid.rs | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/rust/sedona-raster-functions/src/rs_srid.rs b/rust/sedona-raster-functions/src/rs_srid.rs index 4bab89f5d..ec6907ac8 100644 --- a/rust/sedona-raster-functions/src/rs_srid.rs +++ b/rust/sedona-raster-functions/src/rs_srid.rs @@ -177,12 +177,9 @@ impl SedonaScalarKernel for RsCrs { Some(crs_str) => { let crs_value = serde_json::Value::String(crs_str.to_string()); let crs = deserialize_crs(&crs_value).map_err(|e| { - DataFusionError::Execution(format!( - "Failed to deserialize CRS: {}", - e - )) + DataFusionError::Execution(format!("Failed to deserialize CRS: {}", e)) })?; - + let crs_string = crs .ok_or_else(|| { DataFusionError::Execution( @@ -226,7 +223,7 @@ mod tests { #[test] fn udf_srid() { let udf: ScalarUDF = rs_srid_udf().into(); - let tester = ScalarUdfTester::new(udf.into(), vec![RASTER]); + let tester = ScalarUdfTester::new(udf, vec![RASTER]); tester.assert_return_type(DataType::UInt32); @@ -246,7 +243,7 @@ mod tests { #[test] fn udf_crs() { let udf: ScalarUDF = rs_crs_udf().into(); - let tester = ScalarUdfTester::new(udf.into(), vec![RASTER]); + let tester = ScalarUdfTester::new(udf, vec![RASTER]); tester.assert_return_type(DataType::Utf8);