Merge branch 'branch-24.10' into fix_merge_conflict

Signed-off-by: Robert (Bobby) Evans <[email protected]>
NVIDIA · Oct 7, 2024 · 1432649 · 1432649
2 parents 2036f16 + 8207f7b
commit 1432649
Show file tree

Hide file tree

Showing 2 changed files with 8 additions and 7 deletions.
diff --git a/integration_tests/src/main/python/date_time_test.py b/integration_tests/src/main/python/date_time_test.py
@@ -462,8 +462,8 @@ def test_to_timestamp(parser_policy):
 # mm: minute; MM: month
 @pytest.mark.skipif(not is_supported_time_zone(), reason="not all time zones are supported now, refer to https://github.com/NVIDIA/spark-rapids/issues/6839, please update after all time zones are supported")
 @pytest.mark.parametrize("format", ['yyyyMMdd', 'yyyymmdd'], ids=idfn)
-# these regexps exclude zero year, python does not like zero year
-@pytest.mark.parametrize("data_gen_regexp", ['([0-9]{3}[1-9])([0-5][0-9])([0-3][0-9])', '([0-9]{3}[1-9])([0-9]{4})'], ids=idfn)
+# Test years after 1900, refer to issues: https://github.com/NVIDIA/spark-rapids/issues/11543, https://github.com/NVIDIA/spark-rapids/issues/11539
+@pytest.mark.parametrize("data_gen_regexp", ['(19[0-9]{2})([0-5][0-9])([0-3][0-9])', '(19[0-9]{2})([0-9]{4})'], ids=idfn)
 def test_formats_for_legacy_mode(format, data_gen_regexp):
     gen = StringGen(data_gen_regexp)
     assert_gpu_and_cpu_are_equal_sql(
@@ -474,8 +474,8 @@ def test_formats_for_legacy_mode(format, data_gen_regexp):
                   date_format(to_timestamp(a, '{}'), '{}')
            from tab
         '''.format(format, format, format, format, format),
-        {  'spark.sql.legacy.timeParserPolicy': 'LEGACY',
-           'spark.rapids.sql.incompatibleDateFormats.enabled': True})
+        {'spark.sql.legacy.timeParserPolicy': 'LEGACY',
+         'spark.rapids.sql.incompatibleDateFormats.enabled': True})
 
 @tz_sensitive_test
 @pytest.mark.skipif(not is_supported_time_zone(), reason="not all time zones are supported now, refer to https://github.com/NVIDIA/spark-rapids/issues/6839, please update after all time zones are supported")

diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/RapidsShuffleInternalManagerBase.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/RapidsShuffleInternalManagerBase.scala
@@ -308,6 +308,8 @@ abstract class RapidsShuffleThreadedWriterBase[K, V](
   private def write(records: TimeTrackingIterator): Unit = {
     withResource(new NvtxRange("ThreadedWriter.write", NvtxColor.RED)) { _ =>
       withResource(new NvtxRange("compute", NvtxColor.GREEN)) { _ =>
+        // Timestamp when the main processing begins
+        val processingStart: Long = System.nanoTime()
         val mapOutputWriter = shuffleExecutorComponents.createMapOutputWriter(
           shuffleId,
           mapId,
@@ -342,8 +344,7 @@ abstract class RapidsShuffleThreadedWriterBase[K, V](
             var waitTimeOnLimiterNs: Long = 0L
             // Time spent computing ColumnarBatch sizes
             var batchSizeComputeTimeNs: Long = 0L
-            // Timestamp when the main processing begins
-            val processingStart: Long = System.nanoTime()
+
             try {
               while (records.hasNext) {
                 // get the record
@@ -447,7 +448,7 @@ abstract class RapidsShuffleThreadedWriterBase[K, V](
             serializationTimeMetric.foreach(_ += (serializationRatio * writeTimeNs).toLong)
             // we add all three here because this metric is meant to show the time
             // we are blocked on writes
-            shuffleWriteTimeMetric.foreach(_ += (openTimeNs + writeTimeNs + combineTimeNs))
+            shuffleWriteTimeMetric.foreach(_ += (writeTimeNs + combineTimeNs))
             shuffleCombineTimeMetric.foreach(_ += combineTimeNs)
             pl
           }