Skip to content

Commit e01125d

Browse files
maropudongjoon-hyun
authored andcommitted
[SPARK-31562][SQL] Update ExpressionDescription for substring, current_date, and current_timestamp
### What changes were proposed in this pull request? This PR intends to add entries for substring, current_date, and current_timestamp in the SQL built-in function documents. Specifically, the entries are as follows; - SELECT current_date; - SELECT current_timestamp; - SELECT substring('abcd' FROM 1); - SELECT substring('abcd' FROM 1 FOR 2); ### Why are the changes needed? To make the SQL (built-in functions) references complete. ### Does this PR introduce any user-facing change? <img width="1040" alt="Screen Shot 2020-04-25 at 16 51 07" src="https://user-images.githubusercontent.com/692303/80274851-6ca5ee00-8718-11ea-9a35-9ae82008cb4b.png"> <img width="974" alt="Screen Shot 2020-04-25 at 17 24 24" src="https://user-images.githubusercontent.com/692303/80275032-a88d8300-8719-11ea-92ec-95b80169ae28.png"> <img width="862" alt="Screen Shot 2020-04-25 at 17 27 48" src="https://user-images.githubusercontent.com/692303/80275114-36696e00-871a-11ea-8e39-02e93eabb92f.png"> ### How was this patch tested? Added test examples. Closes apache#28342 from maropu/SPARK-31562. Authored-by: Takeshi Yamamuro <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]>
1 parent fe07b21 commit e01125d

File tree

7 files changed

+74
-22
lines changed

7 files changed

+74
-22
lines changed

Diff for: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -404,7 +404,7 @@ object FunctionRegistry {
404404
expression[Month]("month"),
405405
expression[MonthsBetween]("months_between"),
406406
expression[NextDay]("next_day"),
407-
expression[CurrentTimestamp]("now", true),
407+
expression[Now]("now"),
408408
expression[Quarter]("quarter"),
409409
expression[Second]("second"),
410410
expression[ParseToTimestamp]("to_timestamp"),

Diff for: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala

+2-4
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,11 @@ package org.apache.spark.sql.catalyst.analysis
1919

2020
import org.apache.spark.internal.Logging
2121
import org.apache.spark.sql.AnalysisException
22-
import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, AttributeSet, CurrentDate, CurrentTimestamp, MonotonicallyIncreasingID}
22+
import org.apache.spark.sql.catalyst.expressions.{Attribute, CurrentDate, CurrentTimestamp, MonotonicallyIncreasingID, Now}
2323
import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
24-
import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys
2524
import org.apache.spark.sql.catalyst.plans._
2625
import org.apache.spark.sql.catalyst.plans.logical._
2726
import org.apache.spark.sql.catalyst.streaming.InternalOutputModes
28-
import org.apache.spark.sql.internal.SQLConf
2927
import org.apache.spark.sql.streaming.OutputMode
3028

3129
/**
@@ -412,7 +410,7 @@ object UnsupportedOperationChecker extends Logging {
412410

413411
subPlan.expressions.foreach { e =>
414412
if (e.collectLeaves().exists {
415-
case (_: CurrentTimestamp | _: CurrentDate) => true
413+
case (_: CurrentTimestamp | _: Now | _: CurrentDate) => true
416414
case _ => false
417415
}) {
418416
throwError(s"Continuous processing does not support current time operations.")

Diff for: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala

+51-12
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ import org.apache.commons.text.StringEscapeUtils
2828
import org.apache.spark.SparkUpgradeException
2929
import org.apache.spark.sql.AnalysisException
3030
import org.apache.spark.sql.catalyst.InternalRow
31-
import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
3231
import org.apache.spark.sql.catalyst.expressions.codegen._
3332
import org.apache.spark.sql.catalyst.expressions.codegen.Block._
3433
import org.apache.spark.sql.catalyst.util.{DateTimeUtils, LegacyDateFormats, TimestampFormatter}
@@ -62,7 +61,21 @@ trait TimeZoneAwareExpression extends Expression {
6261
* There is no code generation since this expression should get constant folded by the optimizer.
6362
*/
6463
@ExpressionDescription(
65-
usage = "_FUNC_() - Returns the current date at the start of query evaluation.",
64+
usage = """
65+
_FUNC_() - Returns the current date at the start of query evaluation.
66+
67+
_FUNC_ - Returns the current date at the start of query evaluation.
68+
""",
69+
examples = """
70+
Examples:
71+
> SELECT _FUNC_();
72+
2020-04-25
73+
> SELECT _FUNC_;
74+
2020-04-25
75+
""",
76+
note = """
77+
The syntax without braces has been supported since 2.0.1.
78+
""",
6679
group = "datetime_funcs",
6780
since = "1.5.0")
6881
case class CurrentDate(timeZoneId: Option[String] = None)
@@ -83,26 +96,52 @@ case class CurrentDate(timeZoneId: Option[String] = None)
8396
override def prettyName: String = "current_date"
8497
}
8598

99+
abstract class CurrentTimestampLike() extends LeafExpression with CodegenFallback {
100+
override def foldable: Boolean = true
101+
override def nullable: Boolean = false
102+
override def dataType: DataType = TimestampType
103+
override def eval(input: InternalRow): Any = currentTimestamp()
104+
}
105+
86106
/**
87107
* Returns the current timestamp at the start of query evaluation.
88108
* All calls of current_timestamp within the same query return the same value.
89109
*
90110
* There is no code generation since this expression should get constant folded by the optimizer.
91111
*/
92112
@ExpressionDescription(
93-
usage = "_FUNC_() - Returns the current timestamp at the start of query evaluation.",
113+
usage = """
114+
_FUNC_() - Returns the current timestamp at the start of query evaluation.
115+
116+
_FUNC_ - Returns the current timestamp at the start of query evaluation.
117+
""",
118+
examples = """
119+
Examples:
120+
> SELECT _FUNC_();
121+
2020-04-25 15:49:11.914
122+
> SELECT _FUNC_;
123+
2020-04-25 15:49:11.914
124+
""",
125+
note = """
126+
The syntax without braces has been supported since 2.0.1.
127+
""",
94128
group = "datetime_funcs",
95129
since = "1.5.0")
96-
case class CurrentTimestamp() extends LeafExpression with CodegenFallback {
97-
override def foldable: Boolean = true
98-
override def nullable: Boolean = false
99-
100-
override def dataType: DataType = TimestampType
101-
102-
override def eval(input: InternalRow): Any = currentTimestamp()
130+
case class CurrentTimestamp() extends CurrentTimestampLike {
131+
override def prettyName: String = "current_timestamp"
132+
}
103133

104-
override def prettyName: String =
105-
getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("current_timestamp")
134+
@ExpressionDescription(
135+
usage = "_FUNC_() - Returns the current timestamp at the start of query evaluation.",
136+
examples = """
137+
Examples:
138+
> SELECT _FUNC_();
139+
2020-04-25 15:49:11.914
140+
""",
141+
group = "datetime_funcs",
142+
since = "1.6.0")
143+
case class Now() extends CurrentTimestampLike {
144+
override def prettyName: String = "now"
106145
}
107146

108147
/**

Diff for: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala

+11-1
Original file line numberDiff line numberDiff line change
@@ -1619,7 +1619,11 @@ case class StringSpace(child: Expression)
16191619
*/
16201620
// scalastyle:off line.size.limit
16211621
@ExpressionDescription(
1622-
usage = "_FUNC_(str, pos[, len]) - Returns the substring of `str` that starts at `pos` and is of length `len`, or the slice of byte array that starts at `pos` and is of length `len`.",
1622+
usage = """
1623+
_FUNC_(str, pos[, len]) - Returns the substring of `str` that starts at `pos` and is of length `len`, or the slice of byte array that starts at `pos` and is of length `len`.
1624+
1625+
_FUNC_(str FROM pos[ FOR len]]) - Returns the substring of `str` that starts at `pos` and is of length `len`, or the slice of byte array that starts at `pos` and is of length `len`.
1626+
""",
16231627
examples = """
16241628
Examples:
16251629
> SELECT _FUNC_('Spark SQL', 5);
@@ -1628,6 +1632,12 @@ case class StringSpace(child: Expression)
16281632
SQL
16291633
> SELECT _FUNC_('Spark SQL', 5, 1);
16301634
k
1635+
> SELECT _FUNC_('Spark SQL' FROM 5);
1636+
k SQL
1637+
> SELECT _FUNC_('Spark SQL' FROM -3);
1638+
SQL
1639+
> SELECT _FUNC_('Spark SQL' FROM 5 FOR 1);
1640+
k
16311641
""",
16321642
since = "1.5.0")
16331643
// scalastyle:on line.size.limit

Diff for: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ object ComputeCurrentTime extends Rule[LogicalPlan] {
8585
LocalDate.now(DateTimeUtils.getZoneId(timeZoneId)),
8686
DateType)
8787
})
88-
case CurrentTimestamp() => currentTime
88+
case CurrentTimestamp() | Now() => currentTime
8989
}
9090
}
9191
}

Diff for: sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala

+3
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,9 @@ class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession {
120120
val ignoreSet = Set(
121121
// One of examples shows getting the current timestamp
122122
"org.apache.spark.sql.catalyst.expressions.UnixTimestamp",
123+
"org.apache.spark.sql.catalyst.expressions.CurrentDate",
124+
"org.apache.spark.sql.catalyst.expressions.CurrentTimestamp",
125+
"org.apache.spark.sql.catalyst.expressions.Now",
123126
// Random output without a seed
124127
"org.apache.spark.sql.catalyst.expressions.Rand",
125128
"org.apache.spark.sql.catalyst.expressions.Randn",

Diff for: sql/gen-sql-functions-docs.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -104,9 +104,11 @@ def _make_pretty_usage(infos):
104104
result.append(" <tbody>")
105105

106106
for info in infos:
107-
# Extracts (signature, description) pairs from `info.usage`, e.g.,
108-
# the signature is `func(expr)` and the description is `...` in an usage `func(expr) - ...`.
109-
usages = iter(re.split(r"(%s\(.*\)) - " % info.name, info.usage.strip())[1:])
107+
# Extracts (signature, description) pairs from `info.usage`.
108+
# Expected formats are as follows;
109+
# - `_FUNC_(...) - description`, or
110+
# - `_FUNC_ - description`
111+
usages = iter(re.split(r"(%s.*) - " % info.name, info.usage.strip())[1:])
110112
for (sig, description) in zip(usages, usages):
111113
result.append(" <tr>")
112114
result.append(" <td>%s</td>" % sig)

0 commit comments

Comments
 (0)