Skip to content

Commit

Permalink
[CALCITE-6156] Add ENDSWITH, STARTSWITH functions (enabled in Postgre…
Browse files Browse the repository at this point in the history
…s, Snowflake libraries)
  • Loading branch information
tanclary committed Dec 21, 2023
1 parent a816267 commit e74ff3e
Show file tree
Hide file tree
Showing 9 changed files with 192 additions and 59 deletions.
6 changes: 6 additions & 0 deletions core/src/main/java/org/apache/calcite/sql/SqlKind.java
Original file line number Diff line number Diff line change
Expand Up @@ -806,6 +806,12 @@ public enum SqlKind {
/** {@code SUBSTR} function (PostgreSQL semantics). */
SUBSTR_POSTGRESQL,

/** {@code ENDS_WITH} function. */
ENDS_WITH,

/** {@code STARTS_WITH} function. */
STARTS_WITH,

/** Call to a function using JDBC function syntax. */
JDBC_FN,

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,11 @@
package org.apache.calcite.sql.dialect;

import org.apache.calcite.avatica.util.Casing;
import org.apache.calcite.sql.SqlCall;
import org.apache.calcite.sql.SqlDialect;
import org.apache.calcite.sql.SqlWriter;
import org.apache.calcite.sql.fun.SqlLibraryOperators;
import org.apache.calcite.sql.parser.SqlParserPos;

/**
* A <code>SqlDialect</code> implementation for the Snowflake database.
Expand All @@ -36,6 +40,24 @@ public SnowflakeSqlDialect(Context context) {
super(context);
}

@Override public void unparseCall(final SqlWriter writer, final SqlCall call, final int leftPrec,
final int rightPrec) {
switch (call.getKind()) {
case ENDS_WITH:
SqlCall endsWithCall = SqlLibraryOperators.ENDSWITH
.createCall(SqlParserPos.ZERO, call.getOperandList());
super.unparseCall(writer, endsWithCall, leftPrec, rightPrec);
break;
case STARTS_WITH:
SqlCall startsWithCall = SqlLibraryOperators.STARTSWITH
.createCall(SqlParserPos.ZERO, call.getOperandList());
super.unparseCall(writer, startsWithCall, leftPrec, rightPrec);
break;
default:
super.unparseCall(writer, call, leftPrec, rightPrec);
}
}

@Override public boolean supportsApproxCountDistinct() {
return true;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ public enum SqlLibrary {
/** A collection of operators that are in PostgreSQL but not in standard
* SQL. */
POSTGRESQL("p", "postgresql"),
/** A collection of operators that are in Snowflake but not in standard SQL. */
SNOWFLAKE("f", "snowflake"),
/** A collection of operators that are in Apache Spark but not in standard
* SQL. */
SPARK("s", "spark");
Expand All @@ -97,7 +99,7 @@ public List<SqlLibrary> children() {
switch (this) {
case ALL:
return ImmutableList.of(BIG_QUERY, CALCITE, HIVE, MSSQL, MYSQL, ORACLE,
POSTGRESQL, SPARK);
POSTGRESQL, SNOWFLAKE, SPARK);
default:
return ImmutableList.of();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
import static org.apache.calcite.sql.fun.SqlLibrary.MYSQL;
import static org.apache.calcite.sql.fun.SqlLibrary.ORACLE;
import static org.apache.calcite.sql.fun.SqlLibrary.POSTGRESQL;
import static org.apache.calcite.sql.fun.SqlLibrary.SNOWFLAKE;
import static org.apache.calcite.sql.fun.SqlLibrary.SPARK;
import static org.apache.calcite.util.Static.RESOURCE;

Expand Down Expand Up @@ -360,17 +361,25 @@ static RelDataType deriveTypeSplit(SqlOperatorBinding operatorBinding,
OperandTypes.STRING_INTEGER_OPTIONAL_INTEGER,
SqlFunctionCategory.STRING);

/** The "ENDS_WITH(value1, value2)" function (BigQuery). */
@LibraryOperator(libraries = {BIG_QUERY})
public static final SqlFunction ENDS_WITH =
SqlBasicFunction.create("ENDS_WITH", ReturnTypes.BOOLEAN_NULLABLE,
OperandTypes.STRING_SAME_SAME, SqlFunctionCategory.STRING);
/** The "ENDS_WITH(value1, value2)" function (BigQuery, PostgreSQL). */
@LibraryOperator(libraries = {BIG_QUERY, POSTGRESQL})
public static final SqlBasicFunction ENDS_WITH =
SqlBasicFunction.create(SqlKind.ENDS_WITH, ReturnTypes.BOOLEAN_NULLABLE,
OperandTypes.STRING_SAME_SAME);

/** The "STARTS_WITH(value1, value2)" function (BigQuery). */
@LibraryOperator(libraries = {BIG_QUERY})
public static final SqlFunction STARTS_WITH =
SqlBasicFunction.create("STARTS_WITH", ReturnTypes.BOOLEAN_NULLABLE,
OperandTypes.STRING_SAME_SAME, SqlFunctionCategory.STRING);
/** The "ENDSWITH(value1, value2)" function (Snowflake). */
@LibraryOperator(libraries = {SNOWFLAKE})
public static final SqlFunction ENDSWITH = ENDS_WITH.withName("ENDSWITH");

/** The "STARTS_WITH(value1, value2)" function (BigQuery, PostgreSQL). */
@LibraryOperator(libraries = {BIG_QUERY, POSTGRESQL})
public static final SqlBasicFunction STARTS_WITH =
SqlBasicFunction.create(SqlKind.STARTS_WITH, ReturnTypes.BOOLEAN_NULLABLE,
OperandTypes.STRING_SAME_SAME);

/** The "STARTSWITH(value1, value2)" function (Snowflake). */
@LibraryOperator(libraries = {SNOWFLAKE})
public static final SqlFunction STARTSWITH = STARTS_WITH.withName("STARTSWITH");

/** BigQuery's "SUBSTR(string, position [, substringLength ])" function. */
@LibraryOperator(libraries = {BIG_QUERY})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,8 @@ private StandardConvertletTable() {
addAlias(SqlStdOperatorTable.PERCENT_REMAINDER, SqlStdOperatorTable.MOD);
addAlias(SqlLibraryOperators.IFNULL, SqlLibraryOperators.NVL);
addAlias(SqlLibraryOperators.REGEXP_SUBSTR, SqlLibraryOperators.REGEXP_EXTRACT);
addAlias(SqlLibraryOperators.ENDSWITH, SqlLibraryOperators.ENDS_WITH);
addAlias(SqlLibraryOperators.STARTSWITH, SqlLibraryOperators.STARTS_WITH);

// Register convertlets for specific objects.
registerOp(SqlStdOperatorTable.CAST, this::convertCast);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6662,6 +6662,40 @@ private void checkLiteral2(String expression, String expected) {
sql(sql).withMysql().ok(expectedMysql);
}

/** Test case for
* <a href="https://issues.apache.org/jira/browse/CALCITE-6156">[CALCITE-6156]
* Add ENDSWITH, STARTSWITH functions (enabled in Postgres, Snowflake libraries)</a>. */
@Test void testSnowflakeStartsWith() {
final String query = "select startswith(\"brand_name\", 'a')\n"
+ "from \"product\"";
final String expectedBigQuery = "SELECT STARTS_WITH(brand_name, 'a')\n"
+ "FROM foodmart.product";
final String expectedPostgres = "SELECT STARTS_WITH(\"brand_name\", 'a')\n"
+ "FROM \"foodmart\".\"product\"";
final String expectedSnowflake = "SELECT STARTSWITH(\"brand_name\", 'a')\n"
+ "FROM \"foodmart\".\"product\"";
sql(query).withLibrary(SqlLibrary.SNOWFLAKE).withBigQuery().ok(expectedBigQuery);
sql(query).withLibrary(SqlLibrary.SNOWFLAKE).withPostgresql().ok(expectedPostgres);
sql(query).withLibrary(SqlLibrary.SNOWFLAKE).withSnowflake().ok(expectedSnowflake);
}

/** Test case for
* <a href="https://issues.apache.org/jira/browse/CALCITE-6156">[CALCITE-6156]
* Add ENDSWITH, STARTSWITH functions (enabled in Postgres, Snowflake libraries)</a>. */
@Test void testSnowflakeEndsWith() {
final String query = "select endswith(\"brand_name\", 'a')\n"
+ "from \"product\"";
final String expectedBigQuery = "SELECT ENDS_WITH(brand_name, 'a')\n"
+ "FROM foodmart.product";
final String expectedPostgres = "SELECT ENDS_WITH(\"brand_name\", 'a')\n"
+ "FROM \"foodmart\".\"product\"";
final String expectedSnowflake = "SELECT ENDSWITH(\"brand_name\", 'a')\n"
+ "FROM \"foodmart\".\"product\"";
sql(query).withLibrary(SqlLibrary.SNOWFLAKE).withBigQuery().ok(expectedBigQuery);
sql(query).withLibrary(SqlLibrary.SNOWFLAKE).withPostgresql().ok(expectedPostgres);
sql(query).withLibrary(SqlLibrary.SNOWFLAKE).withSnowflake().ok(expectedSnowflake);
}

@Test void testSubstringInSpark() {
final String query = "select substring(\"brand_name\" from 2) "
+ "from \"product\"\n";
Expand Down
8 changes: 4 additions & 4 deletions core/src/test/java/org/apache/calcite/util/UtilTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -951,16 +951,16 @@ private List<Integer> makeConsList(int start, int end) {

assertThat(SqlLibrary.expand(ImmutableList.of(a)),
hasToString("[ALL, BIG_QUERY, CALCITE, HIVE, MSSQL, MYSQL, ORACLE, "
+ "POSTGRESQL, SPARK]"));
+ "POSTGRESQL, SNOWFLAKE, SPARK]"));
assertThat(SqlLibrary.expand(ImmutableList.of(a, c)),
hasToString("[ALL, BIG_QUERY, CALCITE, HIVE, MSSQL, MYSQL, ORACLE, "
+ "POSTGRESQL, SPARK]"));
+ "POSTGRESQL, SNOWFLAKE, SPARK]"));
assertThat(SqlLibrary.expand(ImmutableList.of(c, a)),
hasToString("[CALCITE, ALL, BIG_QUERY, HIVE, MSSQL, MYSQL, ORACLE, "
+ "POSTGRESQL, SPARK]"));
+ "POSTGRESQL, SNOWFLAKE, SPARK]"));
assertThat(SqlLibrary.expand(ImmutableList.of(c, o, a)),
hasToString("[CALCITE, ORACLE, ALL, BIG_QUERY, HIVE, MSSQL, MYSQL, "
+ "POSTGRESQL, SPARK]"));
+ "POSTGRESQL, SNOWFLAKE, SPARK]"));
assertThat(SqlLibrary.expand(ImmutableList.of(o, c, o)),
hasToString("[ORACLE, CALCITE]"));

Expand Down
7 changes: 5 additions & 2 deletions site/_docs/reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -2629,6 +2629,7 @@ The 'C' (compatibility) column contains value:
* '*' for all libraries,
* 'b' for Google BigQuery ('fun=bigquery' in the connect string),
* 'c' for Apache Calcite ('fun=calcite' in the connect string),
* 'f' for Snowflake ('fun=snowflake' in the connect string),
* 'h' for Apache Hive ('fun=hive' in the connect string),
* 'm' for MySQL ('fun=mysql' in the connect string),
* 'q' for Microsoft SQL Server ('fun=mssql' in the connect string),
Expand Down Expand Up @@ -2728,7 +2729,8 @@ BigQuery's type system uses confusingly different names for types and functions:
| b | DATE_TRUNC(date, timeUnit) | Truncates *date* to the granularity of *timeUnit*, rounding to the beginning of the unit
| o | DECODE(value, value1, result1 [, valueN, resultN ]* [, default ]) | Compares *value* to each *valueN* value one by one; if *value* is equal to a *valueN*, returns the corresponding *resultN*, else returns *default*, or NULL if *default* is not specified
| p | DIFFERENCE(string, string) | Returns a measure of the similarity of two strings, namely the number of character positions that their `SOUNDEX` values have in common: 4 if the `SOUNDEX` values are same and 0 if the `SOUNDEX` values are totally different
| b | ENDS_WITH(string1, string2) | Returns whether *string2* is a suffix of *string1*
| f | ENDSWITH(string1, string2) | Returns whether *string2* is a suffix of *string1*
| b p | ENDS_WITH(string1, string2) | Equivalent to `ENDSWITH(string1, string2)`
| o | EXTRACT(xml, xpath, [, namespaces ]) | Returns the XML fragment of the element or elements matched by the XPath expression. The optional namespace value that specifies a default mapping or namespace mapping for prefixes, which is used when evaluating the XPath expression
| o | EXISTSNODE(xml, xpath, [, namespaces ]) | Determines whether traversal of a XML document using a specified xpath results in any nodes. Returns 0 if no nodes remain after applying the XPath traversal on the document fragment of the element or elements matched by the XPath expression. Returns 1 if any nodes remain. The optional namespace value that specifies a default mapping or namespace mapping for prefixes, which is used when evaluating the XPath expression.
| m | EXTRACTVALUE(xml, xpathExpr)) | Returns the text of the first text node which is a child of the element or elements matched by the XPath expression.
Expand Down Expand Up @@ -2825,7 +2827,8 @@ BigQuery's type system uses confusingly different names for types and functions:
| s | SOUNDEX(string) | Returns the phonetic representation of *string*; return original *string* if *string* is encoded with multi-byte encoding such as UTF-8
| m | SPACE(integer) | Returns a string of *integer* spaces; returns an empty string if *integer* is less than 1
| b | SPLIT(string [, delimiter ]) | Returns the string array of *string* split at *delimiter* (if omitted, default is comma). If the *string* is empty it returns an empty array, otherwise, if the *delimiter* is empty, it returns an array containing the original *string*.
| b | STARTS_WITH(string1, string2) | Returns whether *string2* is a prefix of *string1*
| f | STARTSWITH(string1, string2) | Returns whether *string2* is a prefix of *string1*
| b p | STARTS_WITH(string1, string2) | Equivalent to `STARTSWITH(string1, string2)`
| m | STRCMP(string, string) | Returns 0 if both of the strings are same and returns -1 when the first argument is smaller than the second and 1 when the second one is smaller than the first one
| b p | STRPOS(string, substring) | Equivalent to `POSITION(substring IN string)`
| b m o p | SUBSTR(string, position [, substringLength ]) | Returns a portion of *string*, beginning at character *position*, *substringLength* characters long. SUBSTR calculates lengths using characters as defined by the input character set
Expand Down
Loading

0 comments on commit e74ff3e

Please sign in to comment.