clippy

adriangb · adriangb · commit af6b4117cb3e · 2025-10-22T11:48:33.000-05:00
diff --git a/datafusion/physical-plan/src/repartition/mod.rs b/datafusion/physical-plan/src/repartition/mod.rs
@@ -82,6 +82,14 @@ type MaybeBatch = Option<Result<RepartitionBatch>>;
 type InputPartitionsToCurrentPartitionSender = Vec<DistributionSender<MaybeBatch>>;
 type InputPartitionsToCurrentPartitionReceiver = Vec<DistributionReceiver<MaybeBatch>>;
 
+/// Output channel with its associated memory reservation and spill pool
+#[derive(Clone)]
+struct OutputChannel {
+    sender: DistributionSender<MaybeBatch>,
+    reservation: SharedMemoryReservation,
+    spill_pool: Arc<Mutex<SpillPool>>,
+}
+
 /// Channels and resources for a single output partition
 struct PartitionChannels {
     /// Senders for each input partition to send data to this output partition
@@ -242,12 +250,13 @@ impl RepartitionExecState {
             ));
 
             // Create SpillPool with configured max file size
-            let max_file_size = context.session_config().options().execution.max_spill_file_size_bytes;
-            let spill_pool = SpillPool::new(
-                max_file_size,
-                Arc::clone(&spill_manager),
-                input.schema(),
-            );
+            let max_file_size = context
+                .session_config()
+                .options()
+                .execution
+                .max_spill_file_size_bytes;
+            let spill_pool =
+                SpillPool::new(max_file_size, Arc::clone(&spill_manager), input.schema());
 
             channels.insert(
                 partition,
@@ -270,11 +279,11 @@ impl RepartitionExecState {
                 .map(|(partition, channels)| {
                     (
                         *partition,
-                        (
-                            channels.tx[i].clone(),
-                            Arc::clone(&channels.reservation),
-                            Arc::clone(&channels.spill_pool),
-                        ),
+                        OutputChannel {
+                            sender: channels.tx[i].clone(),
+                            reservation: Arc::clone(&channels.reservation),
+                            spill_pool: Arc::clone(&channels.spill_pool),
+                        },
                     )
                 })
                 .collect();
@@ -291,9 +300,7 @@ impl RepartitionExecState {
             let wait_for_task = SpawnedTask::spawn(RepartitionExec::wait_for_task(
                 input_task,
                 txs.into_iter()
-                    .map(|(partition, (tx, _reservation, _spill_manager))| {
-                        (partition, tx)
-                    })
+                    .map(|(partition, channel)| (partition, channel.sender))
                     .collect(),
             ));
             spawned_tasks.push(wait_for_task);
@@ -758,12 +765,7 @@ impl ExecutionPlan for RepartitionExec {
                     .remove(&partition)
                     .expect("partition not used yet");
 
-                (
-                    rx,
-                    reservation,
-                    spill_pool,
-                    Arc::clone(&state.abort_helper),
-                )
+                (rx, reservation, spill_pool, Arc::clone(&state.abort_helper))
             };
 
             trace!(
@@ -1051,14 +1053,7 @@ impl RepartitionExec {
     /// txs hold the output sending channels for each output partition
     async fn pull_from_input(
         mut stream: SendableRecordBatchStream,
-        mut output_channels: HashMap<
-            usize,
-            (
-                DistributionSender<MaybeBatch>,
-                SharedMemoryReservation,
-                Arc<Mutex<SpillPool>>,
-            ),
-        >,
+        mut output_channels: HashMap<usize, OutputChannel>,
         partitioning: Partitioning,
         metrics: RepartitionMetrics,
     ) -> Result<()> {
@@ -1090,30 +1085,28 @@ impl RepartitionExec {
 
                 let timer = metrics.send_time[partition].timer();
                 // if there is still a receiver, send to it
-                if let Some((tx, reservation, spill_pool)) =
-                    output_channels.get_mut(&partition)
-                {
+                if let Some(channel) = output_channels.get_mut(&partition) {
                     let (batch_to_send, is_memory_batch) =
-                        match reservation.lock().try_grow(size) {
+                        match channel.reservation.lock().try_grow(size) {
                             Ok(_) => {
                                 // Memory available - send in-memory batch
                                 (RepartitionBatch::Memory(batch), true)
                             }
                             Err(_) => {
                                 // We're memory limited - spill to SpillPool
                                 // SpillPool handles file handle reuse and rotation
-                                spill_pool.lock().push_batch(&batch)?;
+                                channel.spill_pool.lock().push_batch(&batch)?;
 
                                 // Send marker indicating batch was spilled
                                 (RepartitionBatch::Spilled, false)
                             }
                         };
 
-                    if tx.send(Some(Ok(batch_to_send))).await.is_err() {
+                    if channel.sender.send(Some(Ok(batch_to_send))).await.is_err() {
                         // If the other end has hung up, it was an early shutdown (e.g. LIMIT)
                         // Only shrink memory if it was a memory batch
                         if is_memory_batch {
-                            reservation.lock().shrink(size);
+                            channel.reservation.lock().shrink(size);
                         }
                         output_channels.remove(&partition);
                     }
diff --git a/datafusion/physical-plan/src/spill/spill_pool.rs b/datafusion/physical-plan/src/spill/spill_pool.rs
@@ -204,12 +204,20 @@ impl SpillPool {
 
     /// Returns the number of files currently in the pool
     pub fn file_count(&self) -> usize {
-        self.files.len() + if self.current_write_file.is_some() { 1 } else { 0 }
+        self.files.len()
+            + if self.current_write_file.is_some() {
+                1
+            } else {
+                0
+            }
     }
 
     /// Returns the total number of unread batches across all files
     pub fn batch_count(&self) -> usize {
-        self.files.iter().map(|f| f.remaining_batches()).sum::<usize>()
+        self.files
+            .iter()
+            .map(|f| f.remaining_batches())
+            .sum::<usize>()
             + self.current_batch_count
     }
 
@@ -385,7 +393,8 @@ mod tests {
         let env = Arc::new(RuntimeEnv::default());
         let metrics = SpillMetrics::new(&ExecutionPlanMetricsSet::new(), 0);
         let schema = create_test_schema();
-        let spill_manager = Arc::new(SpillManager::new(env, metrics, Arc::clone(&schema)));
+        let spill_manager =
+            Arc::new(SpillManager::new(env, metrics, Arc::clone(&schema)));
 
         SpillPool::new(max_file_size, spill_manager, schema)
     }
diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt
@@ -223,6 +223,7 @@ datafusion.execution.keep_partition_by_columns false
 datafusion.execution.listing_table_factory_infer_partitions true
 datafusion.execution.listing_table_ignore_subdirectory true
 datafusion.execution.max_buffered_batches_per_output_file 2
+datafusion.execution.max_spill_file_size_bytes 104857600
 datafusion.execution.meta_fetch_concurrency 32
 datafusion.execution.minimum_parallel_output_files 4
 datafusion.execution.objectstore_writer_buffer_size 10485760
@@ -343,6 +344,7 @@ datafusion.execution.keep_partition_by_columns false Should DataFusion keep the
 datafusion.execution.listing_table_factory_infer_partitions true Should a `ListingTable` created through the `ListingTableFactory` infer table partitions from Hive compliant directories. Defaults to true (partition columns are inferred and will be represented in the table schema).
 datafusion.execution.listing_table_ignore_subdirectory true Should sub directories be ignored when scanning directories for data files. Defaults to true (ignores subdirectories), consistent with Hive. Note that this setting does not affect reading partitioned tables (e.g. `/table/year=2021/month=01/data.parquet`).
 datafusion.execution.max_buffered_batches_per_output_file 2 This is the maximum number of RecordBatches buffered for each output file being worked. Higher values can potentially give faster write performance at the cost of higher peak memory consumption
+datafusion.execution.max_spill_file_size_bytes 104857600 Maximum size in bytes for individual spill files before rotating to a new file. When operators spill data to disk (e.g., RepartitionExec, SortExec), they write multiple batches to the same file until this size limit is reached, then rotate to a new file. This reduces syscall overhead compared to one-file-per-batch while preventing files from growing too large. A larger value reduces file creation overhead but may hold more disk space. A smaller value creates more files but allows finer-grained space reclamation (especially in LIFO mode where files are truncated after reading). Default: 100 MB
 datafusion.execution.meta_fetch_concurrency 32 Number of files to read in parallel when inferring schema and statistics
 datafusion.execution.minimum_parallel_output_files 4 Guarantees a minimum level of output files running in parallel. RecordBatches will be distributed in round robin fashion to each parallel writer. Each writer is closed and a new file opened once soft_max_rows_per_output_file is reached.
 datafusion.execution.objectstore_writer_buffer_size 10485760 Size (bytes) of data buffer DataFusion uses when writing output files. This affects the size of the data chunks that are uploaded to remote object stores (e.g. AWS S3). If very large (>= 100 GiB) output files are being written, it may be necessary to increase this size to avoid errors from the remote end point.