Skip to content

Commit

Permalink
[CALCITE-6310] Add REGEXP_REPLACE function (enabled in PostgreSQL lib…
Browse files Browse the repository at this point in the history
…rary)

Reworked the overloading of REGEXP_REPLACE:
* There is a two argument version that is only for Amazon Redshift, replacement string is the empty string
* There is a three argument version that is for everything except PostgreSQL
* PostgreSQL has its own 3 and 4 argument versions
* PostgreSQL 3 argument version will only replace first occurrence
* The g flag is now support for the 4 argument function in PostgreSQL
* BigQuery only supports a 3 argument version REGEXP_REPLACE
* BigQuery uses different syntax to refer to capture groups
* There is a new REGEXP_REPLACE for BigQuery
* No longer need to examine the ConformanceEnum to pick the correct implementation
* MySQL and Amazon Redshift have two signatures with 5 arguments
* Oracle only has one signature with 5 arguments
  • Loading branch information
jduo authored and mihaibudiu committed Jul 30, 2024
1 parent ad66246 commit 0c6a3c1
Show file tree
Hide file tree
Showing 17 changed files with 365 additions and 175 deletions.
15 changes: 15 additions & 0 deletions babel/src/test/resources/sql/postgresql.iq
Original file line number Diff line number Diff line change
Expand Up @@ -1231,4 +1231,19 @@ X
1
!ok

SELECT regexp_replace('abc def GHI', '[a-z]+', 'X') AS x;
X
X def GHI
!ok

SELECT regexp_replace('abc def GHI', '[a-z]+', 'X', 'g') AS x;
X
X X GHI
!ok

SELECT regexp_replace('ABC def GHI', '[a-z]+', 'X', 'i') AS x;
X
X def GHI
!ok

# End postgresql.iq
10 changes: 10 additions & 0 deletions babel/src/test/resources/sql/redshift.iq
Original file line number Diff line number Diff line change
Expand Up @@ -2232,6 +2232,16 @@ USER
sa
!ok

SELECT regexp_replace('abcabc', 'b') AS x;
X
acac
!ok

SELECT regexp_replace('abc def GHI', '[a-z]+', 'X') AS x;
X
X X GHI
!ok

# VERSION
# Returns details about the currently installed release,
# with specific Amazon Redshift version information at the end.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,15 @@
import static org.apache.calcite.sql.fun.SqlLibraryOperators.REGEXP_EXTRACT_ALL;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.REGEXP_INSTR;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.REGEXP_LIKE;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.REGEXP_REPLACE;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.REGEXP_REPLACE_2;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.REGEXP_REPLACE_3;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.REGEXP_REPLACE_4;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.REGEXP_REPLACE_5;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.REGEXP_REPLACE_5_ORACLE;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.REGEXP_REPLACE_6;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.REGEXP_REPLACE_BIG_QUERY_3;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.REGEXP_REPLACE_PG_3;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.REGEXP_REPLACE_PG_4;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.REPEAT;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.REVERSE;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.REVERSE_SPARK;
Expand Down Expand Up @@ -864,7 +872,16 @@ Builder populate2() {
NotImplementor.of(insensitiveImplementor));
map.put(NEGATED_POSIX_REGEX_CASE_SENSITIVE,
NotImplementor.of(sensitiveImplementor));
map.put(REGEXP_REPLACE, new RegexpReplaceImplementor());
defineReflective(REGEXP_REPLACE_2, BuiltInMethod.REGEXP_REPLACE2.method);
defineReflective(REGEXP_REPLACE_3, BuiltInMethod.REGEXP_REPLACE3.method);
defineReflective(REGEXP_REPLACE_4, BuiltInMethod.REGEXP_REPLACE4.method);
defineReflective(REGEXP_REPLACE_5, BuiltInMethod.REGEXP_REPLACE5_OCCURRENCE.method,
BuiltInMethod.REGEXP_REPLACE5_MATCHTYPE.method);
defineReflective(REGEXP_REPLACE_5_ORACLE, BuiltInMethod.REGEXP_REPLACE5_OCCURRENCE.method);
defineReflective(REGEXP_REPLACE_6, BuiltInMethod.REGEXP_REPLACE6.method);
defineReflective(REGEXP_REPLACE_BIG_QUERY_3, BuiltInMethod.REGEXP_REPLACE_BIG_QUERY_3.method);
defineReflective(REGEXP_REPLACE_PG_3, BuiltInMethod.REGEXP_REPLACE_PG_3.method);
defineReflective(REGEXP_REPLACE_PG_4, BuiltInMethod.REGEXP_REPLACE_PG_4.method);


// Multisets & arrays
Expand Down Expand Up @@ -2512,37 +2529,6 @@ private static class ContainsSubstrImplementor extends AbstractRexCallImplemento
}
}

/** Implementor for the {@code REGEXP_REPLACE} function. */
private static class RegexpReplaceImplementor extends AbstractRexCallImplementor {
RegexpReplaceImplementor() {
super("regexp_replace", NullPolicy.STRICT, false);
}

@Override Expression implementSafe(final RexToLixTranslator translator,
final RexCall call, final List<Expression> argValueList) {
// Boolean indicating if dialect uses default $-based indexing for
// regex capturing group (false means double-backslash-based indexing)
final boolean dollarIndexed =
translator.conformance.isRegexReplaceCaptureGroupDollarIndexed();

// Standard REGEXP_REPLACE implementation for default indexing.
if (dollarIndexed) {
final ReflectiveImplementor implementor =
new ReflectiveImplementor(
ImmutableList.of(BuiltInMethod.REGEXP_REPLACE3.method,
BuiltInMethod.REGEXP_REPLACE4.method,
BuiltInMethod.REGEXP_REPLACE5.method,
BuiltInMethod.REGEXP_REPLACE6.method));
return implementor.implementSafe(translator, call, argValueList);
}

// Custom regexp replace method to preprocess double-backslashes into $-based indices.
return Expressions.call(Expressions.new_(SqlFunctions.RegexFunction.class),
"regexpReplaceNonDollarIndexed",
argValueList);
}
}

/** Implementor for the {@code MONTHNAME} and {@code DAYNAME} functions.
* Each takes a {@link java.util.Locale} argument. */
private static class PeriodNameImplementor extends AbstractRexCallImplementor {
Expand Down
29 changes: 28 additions & 1 deletion core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
Original file line number Diff line number Diff line change
Expand Up @@ -632,6 +632,11 @@ public int regexpInstr(String value, String regex, int position,
return matchIndex;
}

/** SQL {@code REGEXP_REPLACE} function with 2 arguments. */
public String regexpReplace(String s, String regex) {
return regexpReplace(s, regex, "", 1, 0, null);
}

/** SQL {@code REGEXP_REPLACE} function with 3 arguments. */
public String regexpReplace(String s, String regex,
String replacement) {
Expand All @@ -644,12 +649,18 @@ public String regexpReplace(String s, String regex, String replacement,
return regexpReplace(s, regex, replacement, pos, 0, null);
}

/** SQL {@code REGEXP_REPLACE} function with 5 arguments. */
/** SQL {@code REGEXP_REPLACE} function with 5 arguments. Last argument is occurrence. */
public String regexpReplace(String s, String regex, String replacement,
int pos, int occurrence) {
return regexpReplace(s, regex, replacement, pos, occurrence, null);
}

/** SQL {@code REGEXP_REPLACE} function with 5 arguments. Last argument is match type */
public String regexpReplace(String s, String regex, String replacement,
int pos, String matchType) {
return regexpReplace(s, regex, replacement, pos, 0, matchType);
}

/** SQL {@code REGEXP_REPLACE} function with 6 arguments. */
public String regexpReplace(String s, String regex, String replacement,
int pos, int occurrence, @Nullable String matchType) {
Expand All @@ -663,6 +674,18 @@ public String regexpReplace(String s, String regex, String replacement,
return Unsafe.regexpReplace(s, pattern, replacement, pos, occurrence);
}

/** SQL {@code REGEXP_REPLACE} function for PostgreSQL with 3 arguments. */
public String regexpReplacePg(String s, String regex, String replacement) {
return regexpReplace(s, regex, replacement, 1, 1, null);
}

/** SQL {@code REGEXP_REPLACE} function for PostgreSQL with 4 arguments. */
public String regexpReplacePg(String s, String regex, String replacement, String matchType) {
// Translate g flag to occurrence
final int occurrence = matchType.contains("g") ? 0 : 1;
return regexpReplace(s, regex, replacement, 1, occurrence, matchType);
}

/** SQL {@code REGEXP_REPLACE} function with 3 arguments with
* {@code \\} based indexing for capturing groups. */
public String regexpReplaceNonDollarIndexed(String s, String regex,
Expand Down Expand Up @@ -706,6 +729,10 @@ private static int makeRegexpFlags(String stringFlags) {
// for consistency.
flags &= ~Pattern.DOTALL;
break;
case 'g':
// This flag is in PostgreSQL but doesn't apply to other libraries. Skip here since
// this is actually occurrence.
break;
default:
throw RESOURCE.invalidInputForRegexpReplace(stringFlags).ex();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -565,11 +565,107 @@ static RelDataType deriveTypeSplit(SqlOperatorBinding operatorBinding,
OperandTypes.STRING_STRING_OPTIONAL_INTEGER_OPTIONAL_INTEGER_OPTIONAL_INTEGER,
SqlFunctionCategory.STRING);

/** The "REGEXP_REPLACE(value, regexp, rep [, pos [, occurrence [, matchType]]])"
/** The "REGEXP_REPLACE(value, regexp)"
* function. Replaces all substrings of value that match regexp with
* {@code rep} and returns modified value. */
@LibraryOperator(libraries = {BIG_QUERY, MYSQL, ORACLE, REDSHIFT})
public static final SqlFunction REGEXP_REPLACE = new SqlRegexpReplaceFunction();
@LibraryOperator(libraries = {REDSHIFT})
public static final SqlFunction REGEXP_REPLACE_2 =
new SqlBasicFunction("REGEXP_REPLACE", SqlKind.OTHER_FUNCTION,
SqlSyntax.FUNCTION, true, ReturnTypes.VARCHAR_NULLABLE, null,
OperandHandlers.DEFAULT, OperandTypes.STRING_STRING, 0,
SqlFunctionCategory.STRING, call -> SqlMonotonicity.NOT_MONOTONIC, false) { };

/** The "REGEXP_REPLACE(value, regexp, rep)"
* function. Replaces all substrings of value that match regexp with
* {@code rep} and returns modified value. */
@LibraryOperator(libraries = {MYSQL, ORACLE, REDSHIFT})
public static final SqlFunction REGEXP_REPLACE_3 =
SqlBasicFunction.create("REGEXP_REPLACE", ReturnTypes.VARCHAR_NULLABLE,
OperandTypes.STRING_STRING_STRING, SqlFunctionCategory.STRING);

/** The "REGEXP_REPLACE(value, regexp, rep, pos)"
* function. Replaces all substrings of value that match regexp with
* {@code rep} and returns modified value. Start searching value from character position
* pos. */
@LibraryOperator(libraries = {MYSQL, ORACLE, REDSHIFT})
public static final SqlFunction REGEXP_REPLACE_4 =
new SqlBasicFunction("REGEXP_REPLACE", SqlKind.OTHER_FUNCTION,
SqlSyntax.FUNCTION, true, ReturnTypes.VARCHAR_NULLABLE, null,
OperandHandlers.DEFAULT, OperandTypes.family(SqlTypeFamily.STRING, SqlTypeFamily.STRING,
SqlTypeFamily.STRING, SqlTypeFamily.INTEGER),
0, SqlFunctionCategory.STRING, call -> SqlMonotonicity.NOT_MONOTONIC, false) { };

/** The "REGEXP_REPLACE(value, regexp, rep, pos, [ occurrence | matchType ])"
* function. Replaces all substrings of value that match regexp with
* {@code rep} and returns modified value. Start searching value from character position
* pos. Replace only the occurrence match or all matches if occurrence is 0. matchType
* is a string of flags to apply to the search. */
@LibraryOperator(libraries = {MYSQL, REDSHIFT})
public static final SqlFunction REGEXP_REPLACE_5 =
new SqlBasicFunction("REGEXP_REPLACE", SqlKind.OTHER_FUNCTION,
SqlSyntax.FUNCTION, true, ReturnTypes.VARCHAR_NULLABLE, null,
OperandHandlers.DEFAULT,
OperandTypes.or(
OperandTypes.family(SqlTypeFamily.STRING, SqlTypeFamily.STRING,
SqlTypeFamily.STRING, SqlTypeFamily.INTEGER, SqlTypeFamily.INTEGER),
OperandTypes.family(SqlTypeFamily.STRING, SqlTypeFamily.STRING,
SqlTypeFamily.STRING, SqlTypeFamily.INTEGER, SqlTypeFamily.STRING)),
0, SqlFunctionCategory.STRING, call -> SqlMonotonicity.NOT_MONOTONIC, false) { };

/** The "REGEXP_REPLACE(value, regexp, rep, pos, matchType)"
* function. Replaces all substrings of value that match regexp with
* {@code rep} and returns modified value. Start searching value from character position
* pos. Replace only the occurrence match or all matches if occurrence is 0. */
@LibraryOperator(libraries = {ORACLE})
public static final SqlFunction REGEXP_REPLACE_5_ORACLE =
new SqlBasicFunction("REGEXP_REPLACE", SqlKind.OTHER_FUNCTION,
SqlSyntax.FUNCTION, true, ReturnTypes.VARCHAR_NULLABLE, null,
OperandHandlers.DEFAULT, OperandTypes.family(SqlTypeFamily.STRING, SqlTypeFamily.STRING,
SqlTypeFamily.STRING, SqlTypeFamily.INTEGER, SqlTypeFamily.INTEGER),
0, SqlFunctionCategory.STRING, call -> SqlMonotonicity.NOT_MONOTONIC, false) { };

/** The "REGEXP_REPLACE(value, regexp, rep, pos, occurrence, matchType)"
* function. Replaces all substrings of value that match regexp with
* {@code rep} and returns modified value. Start searching value from character position
* pos. Replace only the occurrence match or all matches if occurrence is 0. matchType
* is a string of flags to apply to the search. */
@LibraryOperator(libraries = {MYSQL, ORACLE, REDSHIFT})
public static final SqlFunction REGEXP_REPLACE_6 =
new SqlBasicFunction("REGEXP_REPLACE", SqlKind.OTHER_FUNCTION,
SqlSyntax.FUNCTION, true, ReturnTypes.VARCHAR_NULLABLE, null,
OperandHandlers.DEFAULT, OperandTypes.family(SqlTypeFamily.STRING, SqlTypeFamily.STRING,
SqlTypeFamily.STRING, SqlTypeFamily.INTEGER, SqlTypeFamily.INTEGER, SqlTypeFamily.STRING),
0, SqlFunctionCategory.STRING, call -> SqlMonotonicity.NOT_MONOTONIC, false) { };

/** The "REGEXP_REPLACE(value, regexp, rep)"
* function. Replaces all substrings of value that match regexp with
* {@code rep} and returns modified value. */
@LibraryOperator(libraries = {BIG_QUERY})
public static final SqlFunction REGEXP_REPLACE_BIG_QUERY_3 =
new SqlBasicFunction("REGEXP_REPLACE", SqlKind.OTHER_FUNCTION,
SqlSyntax.FUNCTION, true, ReturnTypes.VARCHAR_NULLABLE, null,
OperandHandlers.DEFAULT, OperandTypes.STRING_STRING_STRING, 0,
SqlFunctionCategory.STRING, call -> SqlMonotonicity.NOT_MONOTONIC, false) { };

/** The "REGEXP_REPLACE(value, regexp, rep)"
* function. Replaces all substrings of value that match regexp with
* {@code rep} and returns modified value. */
@LibraryOperator(libraries = {POSTGRESQL}, exceptLibraries = REDSHIFT)
public static final SqlFunction REGEXP_REPLACE_PG_3 =
new SqlBasicFunction("REGEXP_REPLACE", SqlKind.OTHER_FUNCTION,
SqlSyntax.FUNCTION, true, ReturnTypes.VARCHAR_NULLABLE, null,
OperandHandlers.DEFAULT, OperandTypes.STRING_STRING_STRING, 0,
SqlFunctionCategory.STRING, call -> SqlMonotonicity.NOT_MONOTONIC, false) { };

/** The "REGEXP_REPLACE(value, regexp, rep, flags)"
* function. Replaces all substrings of value that match regexp with
* {@code rep} and returns modified value. flags are applied to the search. */
@LibraryOperator(libraries = {POSTGRESQL}, exceptLibraries = REDSHIFT)
public static final SqlFunction REGEXP_REPLACE_PG_4 =
new SqlBasicFunction("REGEXP_REPLACE", SqlKind.OTHER_FUNCTION,
SqlSyntax.FUNCTION, true, ReturnTypes.VARCHAR_NULLABLE, null,
OperandHandlers.DEFAULT, OperandTypes.STRING_STRING_STRING_STRING, 0,
SqlFunctionCategory.STRING, call -> SqlMonotonicity.NOT_MONOTONIC, false) { };

/** The "REGEXP_SUBSTR(value, regexp[, position[, occurrence]])" function.
* Returns the substring in value that matches the regexp. Returns NULL if there is no match. */
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,10 @@ public static SqlOperandTypeChecker variadic(
// Second and Third operand both are optional (operand index 0, 1, 2)
number -> number == 1 || number == 2);

public static final FamilyOperandTypeChecker STRING_STRING_STRING_STRING =
family(SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.STRING,
SqlTypeFamily.STRING);

public static final FamilyOperandTypeChecker STRING_NUMERIC_OPTIONAL_STRING =
family(
ImmutableList.of(SqlTypeFamily.STRING, SqlTypeFamily.NUMERIC,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,10 +105,6 @@ public abstract class SqlAbstractConformance implements SqlConformance {
return SqlConformanceEnum.DEFAULT.isOffsetLimitAllowed();
}

@Override public boolean isRegexReplaceCaptureGroupDollarIndexed() {
return SqlConformanceEnum.DEFAULT.isRegexReplaceCaptureGroupDollarIndexed();
}

@Override public boolean isPercentRemainderAllowed() {
return SqlConformanceEnum.DEFAULT.isPercentRemainderAllowed();
}
Expand Down
Loading

0 comments on commit 0c6a3c1

Please sign in to comment.