diff --git a/docs/reference/sql.md b/docs/reference/sql.md index 9881c4e48..a83663571 100644 --- a/docs/reference/sql.md +++ b/docs/reference/sql.md @@ -1550,7 +1550,7 @@ Since: v0.2. SELECT ST_Translate(ST_GeomFromText('POINT(-71.01 42.37)'), 1, 2); ``` -## vST_UnaryUnion +## ST_UnaryUnion This variant of ST_Union operates on a single geometry input. The input geometry can be a simple Geometry type, a MultiGeometry, or a GeometryCollection. The function calculates the geometric union across all components and elements within the provided geometry object. diff --git a/docs/scripts/validate_sql.py b/docs/scripts/validate_sql.py new file mode 100644 index 000000000..fc390e473 --- /dev/null +++ b/docs/scripts/validate_sql.py @@ -0,0 +1,75 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import sedonadb +import difflib +from pathlib import Path + +HERE = Path(__file__).parent + +with open(HERE.parent / "reference" / "sql.md", "r") as f: + lines = f.readlines() + # Headers with `##` are the function names. + funs_in_doc = [line[3:-1] for line in lines if line.startswith("## ")] + + +### Check if all the functions are documented + +sd = sedonadb.connect() +df = sd.sql(r""" +SELECT DISTINCT + routine_name, + function_type, + CASE substr(routine_name, 1, 2) + WHEN 'st' THEN 'vector' + WHEN 'rs' THEN 'raster' + ELSE 'unknown' + END AS data_type, + count(*) OVER (PARTITION BY description) > 1 as has_alias +FROM information_schema.routines +WHERE routine_type = 'FUNCTION' AND regexp_like(routine_name, '^(st_|rs_)') +ORDER BY routine_name +""").to_pandas() +funs_in_impl_set = set(df["routine_name"].tolist()) + +funs_in_doc_set = set(f.lower() for f in funs_in_doc) + +funs_only_in_impl = sorted(funs_in_impl_set - funs_in_doc_set) +funs_only_in_doc = sorted(funs_in_doc_set - funs_in_impl_set) + +if funs_only_in_impl or funs_only_in_doc: + print("\nFunctions only in implementation:\n - ", end="") + print("\n - ".join(funs_only_in_impl)) + print("\nFunctions only in document:\n - ", end="") + print("\n - ".join(funs_only_in_doc)) + print("\n") + + raise RuntimeError( + "There are some mismatch between the SQL reference and the actual implementation!" + ) + + +### Check if the function order is sorted + +if funs_in_doc != sorted(funs_in_doc): + diff = difflib.unified_diff( + funs_in_doc, sorted(funs_in_doc), fromfile="current", tofile="sorted" + ) + + print("\n".join(diff)) + + raise RuntimeError("The SQL functions are not sorted in alphabetical order")