diff --git a/integ-test/src/integration/scala/org/opensearch/flint/spark/FlintSparkSuite.scala b/integ-test/src/integration/scala/org/opensearch/flint/spark/FlintSparkSuite.scala index a9bbac710..3f843dbe4 100644 --- a/integ-test/src/integration/scala/org/opensearch/flint/spark/FlintSparkSuite.scala +++ b/integ-test/src/integration/scala/org/opensearch/flint/spark/FlintSparkSuite.scala @@ -264,6 +264,35 @@ trait FlintSparkSuite extends QueryTest with FlintSuite with OpenSearchSuite wit | ) |""".stripMargin) + // Insert data into the new table + sql(s""" + | INSERT INTO $testTable + | PARTITION (year=2023, month=4) + | VALUES ('Jake', 'Engineer', 'England' , 100000), + | ('Hello', 'Artist', 'USA', 70000), + | ('John', 'Doctor', 'Canada', 120000), + | ('David', 'Doctor', 'USA', 120000), + | ('David', 'Unemployed', 'Canada', 0), + | ('Jane', 'Scientist', 'Canada', 90000) + | """.stripMargin) + } + + protected def createOccupationTopRareTable(testTable: String): Unit = { + sql(s""" + | CREATE TABLE $testTable + | ( + | name STRING, + | occupation STRING, + | country STRING, + | salary INT + | ) + | USING $tableType $tableOptions + | PARTITIONED BY ( + | year INT, + | month INT + | ) + |""".stripMargin) + // Insert data into the new table sql(s""" | INSERT INTO $testTable diff --git a/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLTopAndRareITSuite.scala b/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLTopAndRareITSuite.scala index 0e50b9845..f10b6e2f5 100644 --- a/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLTopAndRareITSuite.scala +++ b/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLTopAndRareITSuite.scala @@ -27,7 +27,7 @@ class FlintSparkPPLTopAndRareITSuite super.beforeAll() // Create test tables - createOccupationTable(newTestTable) + createOccupationTopRareTable(newTestTable) createPartitionedMultiRowAddressTable(testTable) } diff --git a/ppl-spark-integration/README.md b/ppl-spark-integration/README.md index bc8a96c52..972a1bebe 100644 --- a/ppl-spark-integration/README.md +++ b/ppl-spark-integration/README.md @@ -306,6 +306,15 @@ Limitation: Overriding existing field is unsupported, following queries throw ex - `source=accounts | top 1 gender` - `source=accounts | top 1 age by gender` +**Parse** +- `source=accounts | top gender` +- `source=accounts | parse email '.+@(?.+)' | fields email, host ` +- `source=accounts | parse email '.+@(?.+)' | top 1 host ` +- `source=accounts | parse email '.+@(?.+)' | stats count() by host` +- `source=accounts | parse email '.+@(?.+)' | eval eval_result=1 | fields host, eval_result` +- `source=accounts | parse email '.+@(?.+)' | where age > 45 | sort - age | fields age, email, host` +- `source=accounts | parse address '(?\d+) (?.+)' | where streetNumber > 500 | sort num(streetNumber) | fields streetNumber, street` + > For additional details on PPL commands - view [PPL Commands Docs](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/index.rst)