From e3f3f51f686ce372717c42b21c647c9966e2de77 Mon Sep 17 00:00:00 2001 From: Jason Lowe Date: Wed, 16 Oct 2024 16:50:57 -0500 Subject: [PATCH] Ensure repartition overflow test always overflows (#11614) Signed-off-by: Jason Lowe --- integration_tests/src/main/python/repart_test.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/integration_tests/src/main/python/repart_test.py b/integration_tests/src/main/python/repart_test.py index 7f299373ff6..17991bc64de 100644 --- a/integration_tests/src/main/python/repart_test.py +++ b/integration_tests/src/main/python/repart_test.py @@ -313,16 +313,16 @@ def test_hash_repartition_exact_longs_no_overflow(num_parts, is_ansi_mode): @pytest.mark.parametrize('num_parts', [17], ids=idfn) @allow_non_gpu(*non_utc_allow) def test_hash_repartition_long_overflow_ansi_exception(num_parts): - data_gen = [('a', long_gen)] - part_on = [f.col('a') + 15] conf = ansi_enabled_conf def test_function(spark): - return gen_df(spark, data_gen, length=1024) \ - .withColumn('plus15', f.col('a') + 15) \ - .repartition(num_parts, f.col('plus15')) \ + df = gen_df(spark, [('a', long_gen)], length=1024) + maxVal = df.selectExpr("max(a) as m").head()['m'] + overflowVal = (1 << 63) - maxVal + return df.withColumn('plus', f.col('a') + overflowVal) \ + .repartition(num_parts, f.col('plus')) \ .withColumn('id', f.spark_partition_id()) \ - .withColumn('hashed', f.hash(*part_on)) \ + .withColumn('hashed', f.hash(f.col('a') + overflowVal)) \ .selectExpr('*', 'pmod(hashed, {})'.format(num_parts)) assert_gpu_and_cpu_error(