From c74e2dda20928a16bbf8406225af40409019da90 Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Mon, 30 Sep 2024 17:14:59 +0800 Subject: [PATCH 1/3] Only test years after 1900 for LEGACY mode (#11545) Signed-off-by: Chong Gao Co-authored-by: Chong Gao --- integration_tests/src/main/python/date_time_test.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/integration_tests/src/main/python/date_time_test.py b/integration_tests/src/main/python/date_time_test.py index 5143c2b0bda..0129e216a23 100644 --- a/integration_tests/src/main/python/date_time_test.py +++ b/integration_tests/src/main/python/date_time_test.py @@ -460,8 +460,9 @@ def test_to_timestamp(parser_policy): { "spark.sql.legacy.timeParserPolicy": parser_policy}) @pytest.mark.skipif(not is_supported_time_zone(), reason="not all time zones are supported now, refer to https://github.com/NVIDIA/spark-rapids/issues/6839, please update after all time zones are supported") +# Test years after 1900, refer to issues: https://github.com/NVIDIA/spark-rapids/issues/11543, https://github.com/NVIDIA/spark-rapids/issues/11539 def test_yyyyMMdd_format_for_legacy_mode(): - gen = StringGen("[0-9]{3}[1-9](0[1-9]|1[0-2])(0[1-9]|[1-2][0-9])") + gen = StringGen('(19[0-9]{2}|[2-9][0-9]{3})([0-9]{4})') assert_gpu_and_cpu_are_equal_sql( lambda spark : unary_op_df(spark, gen), "tab", @@ -470,8 +471,8 @@ def test_yyyyMMdd_format_for_legacy_mode(): date_format(to_timestamp(a, 'yyyyMMdd'), 'yyyyMMdd') from tab ''', - { 'spark.sql.legacy.timeParserPolicy': 'LEGACY', - 'spark.rapids.sql.incompatibleDateFormats.enabled': True}) + {'spark.sql.legacy.timeParserPolicy': 'LEGACY', + 'spark.rapids.sql.incompatibleDateFormats.enabled': True}) @tz_sensitive_test @pytest.mark.skipif(not is_supported_time_zone(), reason="not all time zones are supported now, refer to https://github.com/NVIDIA/spark-rapids/issues/6839, please update after all time zones are supported") From 5fae8839b46e7c7669406cfa4ccbacde6ba9662f Mon Sep 17 00:00:00 2001 From: Alessandro Bellina Date: Tue, 1 Oct 2024 08:32:04 -0500 Subject: [PATCH 2/3] Fix negative rs. shuffle write time (#11548) * Fix negative rs. shuffle write time Signed-off-by: Alessandro Bellina * Stop double counting openTimeNs in shuffleWriteTimeMetric --------- Signed-off-by: Alessandro Bellina --- .../sql/rapids/RapidsShuffleInternalManagerBase.scala | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/RapidsShuffleInternalManagerBase.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/RapidsShuffleInternalManagerBase.scala index 3c3bf8ce3dc..da54735aaf4 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/RapidsShuffleInternalManagerBase.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/RapidsShuffleInternalManagerBase.scala @@ -308,6 +308,8 @@ abstract class RapidsShuffleThreadedWriterBase[K, V]( private def write(records: TimeTrackingIterator): Unit = { withResource(new NvtxRange("ThreadedWriter.write", NvtxColor.RED)) { _ => withResource(new NvtxRange("compute", NvtxColor.GREEN)) { _ => + // Timestamp when the main processing begins + val processingStart: Long = System.nanoTime() val mapOutputWriter = shuffleExecutorComponents.createMapOutputWriter( shuffleId, mapId, @@ -342,8 +344,7 @@ abstract class RapidsShuffleThreadedWriterBase[K, V]( var waitTimeOnLimiterNs: Long = 0L // Time spent computing ColumnarBatch sizes var batchSizeComputeTimeNs: Long = 0L - // Timestamp when the main processing begins - val processingStart: Long = System.nanoTime() + try { while (records.hasNext) { // get the record @@ -447,7 +448,7 @@ abstract class RapidsShuffleThreadedWriterBase[K, V]( serializationTimeMetric.foreach(_ += (serializationRatio * writeTimeNs).toLong) // we add all three here because this metric is meant to show the time // we are blocked on writes - shuffleWriteTimeMetric.foreach(_ += (openTimeNs + writeTimeNs + combineTimeNs)) + shuffleWriteTimeMetric.foreach(_ += (writeTimeNs + combineTimeNs)) shuffleCombineTimeMetric.foreach(_ += combineTimeNs) pl } From 8207f7bd94c118008fa473f5f79251d2ff9db819 Mon Sep 17 00:00:00 2001 From: "Robert (Bobby) Evans" Date: Tue, 1 Oct 2024 10:18:37 -0500 Subject: [PATCH 3/3] Update test now that code is fixed (#11496) Signed-off-by: Robert (Bobby) Evans