From 3eafa69f396017cebfd71133cd5784d9786c0bdf Mon Sep 17 00:00:00 2001
From: Hiroaki Yutani <yutani.ini@gmail.com>
Date: Mon, 8 Dec 2025 21:54:37 +0900
Subject: [PATCH 1/6] Add a script

---
 docs/scripts/validate_sql.py | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 docs/scripts/validate_sql.py

diff --git a/docs/scripts/validate_sql.py b/docs/scripts/validate_sql.py
new file mode 100644
index 00000000..5bcccb8a
--- /dev/null
+++ b/docs/scripts/validate_sql.py
@@ -0,0 +1,25 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import sedonadb
+
+
+with open('docs/reference/sql.md', 'r'):
+    lines = f.readlines()
+    # Headers with `##` are the function names.
+    sql_functions = [line[2:-1] for line in lines if line.startswith('## ')]
+

From 8bbc4574583928e14d36c91f42e31df36f978c09 Mon Sep 17 00:00:00 2001
From: Hiroaki Yutani <yutani@mierune.co.jp>
Date: Mon, 8 Dec 2025 22:01:15 +0900
Subject: [PATCH 2/6] Fix a typo

---
 docs/reference/sql.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/reference/sql.md b/docs/reference/sql.md
index 9881c4e4..a8366357 100644
--- a/docs/reference/sql.md
+++ b/docs/reference/sql.md
@@ -1550,7 +1550,7 @@ Since: v0.2.
 SELECT ST_Translate(ST_GeomFromText('POINT(-71.01 42.37)'), 1, 2);
 ```
 
-## vST_UnaryUnion
+## ST_UnaryUnion
 
 This variant of ST_Union operates on a single geometry input. The input geometry can be a simple Geometry type, a MultiGeometry, or a GeometryCollection. The function calculates the geometric union across all components and elements within the provided geometry object.
 

From 17c57249807b99dccc5015ac9fe72c63563a6bcd Mon Sep 17 00:00:00 2001
From: Hiroaki Yutani <yutani@mierune.co.jp>
Date: Mon, 8 Dec 2025 22:43:11 +0900
Subject: [PATCH 3/6] Improve

---
 docs/scripts/validate_sql.py | 44 ++++++++++++++++++++++++++++++++++--
 1 file changed, 42 insertions(+), 2 deletions(-)

diff --git a/docs/scripts/validate_sql.py b/docs/scripts/validate_sql.py
index 5bcccb8a..4ea40687 100644
--- a/docs/scripts/validate_sql.py
+++ b/docs/scripts/validate_sql.py
@@ -16,10 +16,50 @@
 # under the License.
 
 import sedonadb
+import difflib
 
 
-with open('docs/reference/sql.md', 'r'):
+with open("docs/reference/sql.md", "r") as f:
     lines = f.readlines()
     # Headers with `##` are the function names.
-    sql_functions = [line[2:-1] for line in lines if line.startswith('## ')]
+    st_funs_in_doc = [line[3:-1] for line in lines if line.startswith("## ")]
 
+
+### Check if all the functions are documented
+
+sd = sedonadb.connect()
+df = sd.sql(r"""
+SELECT DISTINCT routine_name
+FROM information_schema.routines
+WHERE routine_type = 'FUNCTION' AND routine_name LIKE 'st\_%' ESCAPE '\'
+ORDER BY routine_name
+""").to_pandas()
+st_funs_in_impl_set = set(df["routine_name"].tolist())
+
+st_funs_in_doc_set = set(f.lower() for f in st_funs_in_doc)
+
+funs_only_in_impl = sorted(st_funs_in_impl_set - st_funs_in_doc_set)
+funs_only_in_doc = sorted(st_funs_in_doc_set - st_funs_in_impl_set)
+
+if funs_only_in_impl or funs_only_in_doc:
+    print("\nFunctions only in implementation:\n  - ", end="")
+    print("\n  - ".join(funs_only_in_impl))
+    print("\nFunctions only in document:\n  - ", end="")
+    print("\n  - ".join(funs_only_in_doc))
+    print("\n")
+
+    raise RuntimeError(
+        "There are some mismatch between the SQL reference and the actual implementation!"
+    )
+
+
+### Check if the function order is sorted
+
+if st_funs_in_doc != sorted(st_funs_in_doc):
+    diff = difflib.unified_diff(
+        st_funs_in_doc, sorted(st_funs_in_doc), fromfile="current", tofile="sorted"
+    )
+
+    print("\n".join(diff))
+
+    raise RuntimeError("The SQL functions are not sorted in alphabetical order")

From 2ba264ebafb37484375c3474e9d47933b33bd447 Mon Sep 17 00:00:00 2001
From: Hiroaki Yutani <yutani.ini@gmail.com>
Date: Tue, 9 Dec 2025 08:53:11 +0900
Subject: [PATCH 4/6] Update docs/scripts/validate_sql.py

Co-authored-by: Dewey Dunnington <dewey@dunnington.ca>
---
 docs/scripts/validate_sql.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/docs/scripts/validate_sql.py b/docs/scripts/validate_sql.py
index 4ea40687..f823776e 100644
--- a/docs/scripts/validate_sql.py
+++ b/docs/scripts/validate_sql.py
@@ -17,9 +17,11 @@
 
 import sedonadb
 import difflib
+from pathlib import Path
 
+HERE = Path(__file__).parent
 
-with open("docs/reference/sql.md", "r") as f:
+with open(HERE.parent / "reference" / "sql.md", "r") as f:
     lines = f.readlines()
     # Headers with `##` are the function names.
     st_funs_in_doc = [line[3:-1] for line in lines if line.startswith("## ")]

From 44dd90d8798986fd483313f7647ff46efa61b53b Mon Sep 17 00:00:00 2001
From: Hiroaki Yutani <yutani@mierune.co.jp>
Date: Sat, 13 Dec 2025 13:50:00 +0900
Subject: [PATCH 5/6] Include `rs_` function

---
 docs/scripts/validate_sql.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/scripts/validate_sql.py b/docs/scripts/validate_sql.py
index 4ea40687..f1d4e194 100644
--- a/docs/scripts/validate_sql.py
+++ b/docs/scripts/validate_sql.py
@@ -31,7 +31,7 @@
 df = sd.sql(r"""
 SELECT DISTINCT routine_name
 FROM information_schema.routines
-WHERE routine_type = 'FUNCTION' AND routine_name LIKE 'st\_%' ESCAPE '\'
+WHERE routine_type = 'FUNCTION' AND regexp_like(routine_name, '^(st_|rs_)')
 ORDER BY routine_name
 """).to_pandas()
 st_funs_in_impl_set = set(df["routine_name"].tolist())

From 765eb3736a464a946d41ccb1bb1d64f45cfc2f66 Mon Sep 17 00:00:00 2001
From: Hiroaki Yutani <yutani@mierune.co.jp>
Date: Sat, 13 Dec 2025 14:25:22 +0900
Subject: [PATCH 6/6] Improve SQL

---
 docs/scripts/validate_sql.py | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/docs/scripts/validate_sql.py b/docs/scripts/validate_sql.py
index f1d4e194..761a3fd2 100644
--- a/docs/scripts/validate_sql.py
+++ b/docs/scripts/validate_sql.py
@@ -22,24 +22,32 @@
 with open("docs/reference/sql.md", "r") as f:
     lines = f.readlines()
     # Headers with `##` are the function names.
-    st_funs_in_doc = [line[3:-1] for line in lines if line.startswith("## ")]
+    funs_in_doc = [line[3:-1] for line in lines if line.startswith("## ")]
 
 
 ### Check if all the functions are documented
 
 sd = sedonadb.connect()
 df = sd.sql(r"""
-SELECT DISTINCT routine_name
+SELECT DISTINCT
+  routine_name,
+  function_type,
+  CASE substr(routine_name, 1, 2)
+    WHEN 'st' THEN 'vector'
+    WHEN 'rs' THEN 'raster'
+    ELSE 'unknown'
+  END AS data_type,
+  count(*) OVER (PARTITION BY description) > 1 as has_alias
 FROM information_schema.routines
 WHERE routine_type = 'FUNCTION' AND regexp_like(routine_name, '^(st_|rs_)')
 ORDER BY routine_name
 """).to_pandas()
-st_funs_in_impl_set = set(df["routine_name"].tolist())
+funs_in_impl_set = set(df["routine_name"].tolist())
 
-st_funs_in_doc_set = set(f.lower() for f in st_funs_in_doc)
+funs_in_doc_set = set(f.lower() for f in funs_in_doc)
 
-funs_only_in_impl = sorted(st_funs_in_impl_set - st_funs_in_doc_set)
-funs_only_in_doc = sorted(st_funs_in_doc_set - st_funs_in_impl_set)
+funs_only_in_impl = sorted(funs_in_impl_set - funs_in_doc_set)
+funs_only_in_doc = sorted(funs_in_doc_set - funs_in_impl_set)
 
 if funs_only_in_impl or funs_only_in_doc:
     print("\nFunctions only in implementation:\n  - ", end="")
@@ -55,9 +63,9 @@
 
 ### Check if the function order is sorted
 
-if st_funs_in_doc != sorted(st_funs_in_doc):
+if funs_in_doc != sorted(funs_in_doc):
     diff = difflib.unified_diff(
-        st_funs_in_doc, sorted(st_funs_in_doc), fromfile="current", tofile="sorted"
+        funs_in_doc, sorted(funs_in_doc), fromfile="current", tofile="sorted"
     )
 
     print("\n".join(diff))