Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/reference/sql.md
Original file line number Diff line number Diff line change
Expand Up @@ -1550,7 +1550,7 @@ Since: v0.2.
SELECT ST_Translate(ST_GeomFromText('POINT(-71.01 42.37)'), 1, 2);
```

## vST_UnaryUnion
## ST_UnaryUnion

This variant of ST_Union operates on a single geometry input. The input geometry can be a simple Geometry type, a MultiGeometry, or a GeometryCollection. The function calculates the geometric union across all components and elements within the provided geometry object.

Expand Down
75 changes: 75 additions & 0 deletions docs/scripts/validate_sql.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

import sedonadb
import difflib
from pathlib import Path

HERE = Path(__file__).parent

with open(HERE.parent / "reference" / "sql.md", "r") as f:
lines = f.readlines()
# Headers with `##` are the function names.
funs_in_doc = [line[3:-1] for line in lines if line.startswith("## ")]


### Check if all the functions are documented

sd = sedonadb.connect()
df = sd.sql(r"""
SELECT DISTINCT
routine_name,
function_type,
CASE substr(routine_name, 1, 2)
WHEN 'st' THEN 'vector'
WHEN 'rs' THEN 'raster'
ELSE 'unknown'
END AS data_type,
count(*) OVER (PARTITION BY description) > 1 as has_alias
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems the information about aliases cannot be retrieved by SQL. I added a tweak to guess it from description, but I'm not sure if this is reliable enough.

I might rewrite the validation script by using Rust just like DataFusion does to generate the documentation.

https://github.com/apache/datafusion/blob/5a01e68643a198a1aaa7124524d7be5be7df24ec/datafusion/core/src/bin/print_functions_docs.rs#L174-L179

FROM information_schema.routines
WHERE routine_type = 'FUNCTION' AND regexp_like(routine_name, '^(st_|rs_)')
ORDER BY routine_name
""").to_pandas()
funs_in_impl_set = set(df["routine_name"].tolist())

funs_in_doc_set = set(f.lower() for f in funs_in_doc)

funs_only_in_impl = sorted(funs_in_impl_set - funs_in_doc_set)
funs_only_in_doc = sorted(funs_in_doc_set - funs_in_impl_set)

if funs_only_in_impl or funs_only_in_doc:
print("\nFunctions only in implementation:\n - ", end="")
print("\n - ".join(funs_only_in_impl))
print("\nFunctions only in document:\n - ", end="")
print("\n - ".join(funs_only_in_doc))
print("\n")

raise RuntimeError(
"There are some mismatch between the SQL reference and the actual implementation!"
)


### Check if the function order is sorted

if funs_in_doc != sorted(funs_in_doc):
diff = difflib.unified_diff(
funs_in_doc, sorted(funs_in_doc), fromfile="current", tofile="sorted"
)

print("\n".join(diff))

raise RuntimeError("The SQL functions are not sorted in alphabetical order")