From 3eafa69f396017cebfd71133cd5784d9786c0bdf Mon Sep 17 00:00:00 2001 From: Hiroaki Yutani Date: Mon, 8 Dec 2025 21:54:37 +0900 Subject: [PATCH 1/6] Add a script --- docs/scripts/validate_sql.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 docs/scripts/validate_sql.py diff --git a/docs/scripts/validate_sql.py b/docs/scripts/validate_sql.py new file mode 100644 index 00000000..5bcccb8a --- /dev/null +++ b/docs/scripts/validate_sql.py @@ -0,0 +1,25 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import sedonadb + + +with open('docs/reference/sql.md', 'r'): + lines = f.readlines() + # Headers with `##` are the function names. + sql_functions = [line[2:-1] for line in lines if line.startswith('## ')] + From 8bbc4574583928e14d36c91f42e31df36f978c09 Mon Sep 17 00:00:00 2001 From: Hiroaki Yutani Date: Mon, 8 Dec 2025 22:01:15 +0900 Subject: [PATCH 2/6] Fix a typo --- docs/reference/sql.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/sql.md b/docs/reference/sql.md index 9881c4e4..a8366357 100644 --- a/docs/reference/sql.md +++ b/docs/reference/sql.md @@ -1550,7 +1550,7 @@ Since: v0.2. SELECT ST_Translate(ST_GeomFromText('POINT(-71.01 42.37)'), 1, 2); ``` -## vST_UnaryUnion +## ST_UnaryUnion This variant of ST_Union operates on a single geometry input. The input geometry can be a simple Geometry type, a MultiGeometry, or a GeometryCollection. The function calculates the geometric union across all components and elements within the provided geometry object. From 17c57249807b99dccc5015ac9fe72c63563a6bcd Mon Sep 17 00:00:00 2001 From: Hiroaki Yutani Date: Mon, 8 Dec 2025 22:43:11 +0900 Subject: [PATCH 3/6] Improve --- docs/scripts/validate_sql.py | 44 ++++++++++++++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 2 deletions(-) diff --git a/docs/scripts/validate_sql.py b/docs/scripts/validate_sql.py index 5bcccb8a..4ea40687 100644 --- a/docs/scripts/validate_sql.py +++ b/docs/scripts/validate_sql.py @@ -16,10 +16,50 @@ # under the License. import sedonadb +import difflib -with open('docs/reference/sql.md', 'r'): +with open("docs/reference/sql.md", "r") as f: lines = f.readlines() # Headers with `##` are the function names. - sql_functions = [line[2:-1] for line in lines if line.startswith('## ')] + st_funs_in_doc = [line[3:-1] for line in lines if line.startswith("## ")] + +### Check if all the functions are documented + +sd = sedonadb.connect() +df = sd.sql(r""" +SELECT DISTINCT routine_name +FROM information_schema.routines +WHERE routine_type = 'FUNCTION' AND routine_name LIKE 'st\_%' ESCAPE '\' +ORDER BY routine_name +""").to_pandas() +st_funs_in_impl_set = set(df["routine_name"].tolist()) + +st_funs_in_doc_set = set(f.lower() for f in st_funs_in_doc) + +funs_only_in_impl = sorted(st_funs_in_impl_set - st_funs_in_doc_set) +funs_only_in_doc = sorted(st_funs_in_doc_set - st_funs_in_impl_set) + +if funs_only_in_impl or funs_only_in_doc: + print("\nFunctions only in implementation:\n - ", end="") + print("\n - ".join(funs_only_in_impl)) + print("\nFunctions only in document:\n - ", end="") + print("\n - ".join(funs_only_in_doc)) + print("\n") + + raise RuntimeError( + "There are some mismatch between the SQL reference and the actual implementation!" + ) + + +### Check if the function order is sorted + +if st_funs_in_doc != sorted(st_funs_in_doc): + diff = difflib.unified_diff( + st_funs_in_doc, sorted(st_funs_in_doc), fromfile="current", tofile="sorted" + ) + + print("\n".join(diff)) + + raise RuntimeError("The SQL functions are not sorted in alphabetical order") From 2ba264ebafb37484375c3474e9d47933b33bd447 Mon Sep 17 00:00:00 2001 From: Hiroaki Yutani Date: Tue, 9 Dec 2025 08:53:11 +0900 Subject: [PATCH 4/6] Update docs/scripts/validate_sql.py Co-authored-by: Dewey Dunnington --- docs/scripts/validate_sql.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/scripts/validate_sql.py b/docs/scripts/validate_sql.py index 4ea40687..f823776e 100644 --- a/docs/scripts/validate_sql.py +++ b/docs/scripts/validate_sql.py @@ -17,9 +17,11 @@ import sedonadb import difflib +from pathlib import Path +HERE = Path(__file__).parent -with open("docs/reference/sql.md", "r") as f: +with open(HERE.parent / "reference" / "sql.md", "r") as f: lines = f.readlines() # Headers with `##` are the function names. st_funs_in_doc = [line[3:-1] for line in lines if line.startswith("## ")] From 44dd90d8798986fd483313f7647ff46efa61b53b Mon Sep 17 00:00:00 2001 From: Hiroaki Yutani Date: Sat, 13 Dec 2025 13:50:00 +0900 Subject: [PATCH 5/6] Include `rs_` function --- docs/scripts/validate_sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/scripts/validate_sql.py b/docs/scripts/validate_sql.py index 4ea40687..f1d4e194 100644 --- a/docs/scripts/validate_sql.py +++ b/docs/scripts/validate_sql.py @@ -31,7 +31,7 @@ df = sd.sql(r""" SELECT DISTINCT routine_name FROM information_schema.routines -WHERE routine_type = 'FUNCTION' AND routine_name LIKE 'st\_%' ESCAPE '\' +WHERE routine_type = 'FUNCTION' AND regexp_like(routine_name, '^(st_|rs_)') ORDER BY routine_name """).to_pandas() st_funs_in_impl_set = set(df["routine_name"].tolist()) From 765eb3736a464a946d41ccb1bb1d64f45cfc2f66 Mon Sep 17 00:00:00 2001 From: Hiroaki Yutani Date: Sat, 13 Dec 2025 14:25:22 +0900 Subject: [PATCH 6/6] Improve SQL --- docs/scripts/validate_sql.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/docs/scripts/validate_sql.py b/docs/scripts/validate_sql.py index f1d4e194..761a3fd2 100644 --- a/docs/scripts/validate_sql.py +++ b/docs/scripts/validate_sql.py @@ -22,24 +22,32 @@ with open("docs/reference/sql.md", "r") as f: lines = f.readlines() # Headers with `##` are the function names. - st_funs_in_doc = [line[3:-1] for line in lines if line.startswith("## ")] + funs_in_doc = [line[3:-1] for line in lines if line.startswith("## ")] ### Check if all the functions are documented sd = sedonadb.connect() df = sd.sql(r""" -SELECT DISTINCT routine_name +SELECT DISTINCT + routine_name, + function_type, + CASE substr(routine_name, 1, 2) + WHEN 'st' THEN 'vector' + WHEN 'rs' THEN 'raster' + ELSE 'unknown' + END AS data_type, + count(*) OVER (PARTITION BY description) > 1 as has_alias FROM information_schema.routines WHERE routine_type = 'FUNCTION' AND regexp_like(routine_name, '^(st_|rs_)') ORDER BY routine_name """).to_pandas() -st_funs_in_impl_set = set(df["routine_name"].tolist()) +funs_in_impl_set = set(df["routine_name"].tolist()) -st_funs_in_doc_set = set(f.lower() for f in st_funs_in_doc) +funs_in_doc_set = set(f.lower() for f in funs_in_doc) -funs_only_in_impl = sorted(st_funs_in_impl_set - st_funs_in_doc_set) -funs_only_in_doc = sorted(st_funs_in_doc_set - st_funs_in_impl_set) +funs_only_in_impl = sorted(funs_in_impl_set - funs_in_doc_set) +funs_only_in_doc = sorted(funs_in_doc_set - funs_in_impl_set) if funs_only_in_impl or funs_only_in_doc: print("\nFunctions only in implementation:\n - ", end="") @@ -55,9 +63,9 @@ ### Check if the function order is sorted -if st_funs_in_doc != sorted(st_funs_in_doc): +if funs_in_doc != sorted(funs_in_doc): diff = difflib.unified_diff( - st_funs_in_doc, sorted(st_funs_in_doc), fromfile="current", tofile="sorted" + funs_in_doc, sorted(funs_in_doc), fromfile="current", tofile="sorted" ) print("\n".join(diff))