apache
diff --git a/‎docs/sql-migration-guide.md‎
Lines changed: 4 additions & 0 deletions b/‎docs/sql-migration-guide.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala‎
Lines changed: 12 additions & 0 deletions b/‎sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetColumnVector.java‎
Lines changed: 18 additions & 30 deletions b/‎sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetColumnVector.java‎
Lines changed: 18 additions & 30 deletions
diff --git a/‎sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java‎
Lines changed: 22 additions & 18 deletions b/‎sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java‎
Lines changed: 22 additions & 18 deletions
diff --git a/‎sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java‎
Lines changed: 65 additions & 4 deletions b/‎sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java‎
Lines changed: 65 additions & 4 deletions
diff --git a/‎sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java‎
Lines changed: 1 addition & 1 deletion b/‎sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java‎
Lines changed: 1 addition & 1 deletion b/‎sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java‎
Lines changed: 1 addition & 1 deletion b/‎sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java‎
Lines changed: 1 addition & 1 deletion
@@ -22,6 +22,10 @@ license: |
 * Table of contents
 {:toc}
 
+## Upgrading from Spark SQL 4.0 to 4.1
+
+- Since Spark 4.1, the Parquet reader no longer assumes all struct values to be null, if all the requested fields are missing in the parquet file. The new default behavior is to read an additional struct field that is present in the file to determine nullness. To restore the previous behavior, set `spark.sql.legacy.parquet.returnNullStructIfAllFieldsMissing` to `true`.
+
 ## Upgrading from Spark SQL 3.5 to 4.0
 
 - Since Spark 4.0, `spark.sql.ansi.enabled` is on by default. To restore the previous behavior, set `spark.sql.ansi.enabled` to `false` or `SPARK_ANSI_SQL_MODE` to `false`.
 
@@ -1534,6 +1534,18 @@ object SQLConf {
       .booleanConf
       .createWithDefault(true)
 
+  val LEGACY_PARQUET_RETURN_NULL_STRUCT_IF_ALL_FIELDS_MISSING =
+    buildConf("spark.sql.legacy.parquet.returnNullStructIfAllFieldsMissing")
+      .internal()
+      .doc("When true, if all requested fields of a struct are missing in a parquet file, assume " +
+        "the struct is always null, even if other fields are present. The default behavior is " +
+        "to fetch and read an arbitrary non-requested field present in the file to determine " +
+        "struct nullness. If enabled, schema pruning may cause non-null structs to be read as " +
+        "null.")
+      .version("4.1.0")
+      .booleanConf
+      .createWithDefault(false)
+
   val PARQUET_RECORD_FILTER_ENABLED = buildConf("spark.sql.parquet.recordLevelFilter.enabled")
     .doc("If true, enables Parquet's native record-level filtering using the pushed down " +
       "filters. " +
 
@@ -21,14 +21,10 @@
 import java.util.List;
 import java.util.Set;
 
-import org.apache.spark.memory.MemoryMode;
 import org.apache.spark.network.util.JavaUtils;
-import org.apache.spark.sql.execution.vectorized.OffHeapColumnVector;
-import org.apache.spark.sql.execution.vectorized.OnHeapColumnVector;
 import org.apache.spark.sql.execution.vectorized.WritableColumnVector;
 import org.apache.spark.sql.types.ArrayType;
 import org.apache.spark.sql.types.DataType;
-import org.apache.spark.sql.catalyst.types.DataTypeUtils;
 import org.apache.spark.sql.types.DataTypes;
 import org.apache.spark.sql.types.MapType;
 import org.apache.spark.sql.types.StructType;
@@ -69,16 +65,9 @@ final class ParquetColumnVector {
       ParquetColumn column,
       WritableColumnVector vector,
       int capacity,
-      MemoryMode memoryMode,
       Set<ParquetColumn> missingColumns,
       boolean isTopLevel,
       Object defaultValue) {
-    DataType sparkType = column.sparkType();
-    if (!DataTypeUtils.sameType(sparkType, vector.dataType())) {
-      throw new IllegalArgumentException("Spark type: " + sparkType +
-        " doesn't match the type: " + vector.dataType() + " in column vector");
-    }
-
     this.column = column;
     this.vector = vector;
     this.children = new ArrayList<>();
@@ -111,35 +100,41 @@ final class ParquetColumnVector {
 
     if (column.variantFileType().isDefined()) {
       ParquetColumn fileContentCol = column.variantFileType().get();
-      WritableColumnVector fileContent = memoryMode == MemoryMode.OFF_HEAP
-          ? new OffHeapColumnVector(capacity, fileContentCol.sparkType())
-          : new OnHeapColumnVector(capacity, fileContentCol.sparkType());
-      ParquetColumnVector contentVector = new ParquetColumnVector(fileContentCol,
-          fileContent, capacity, memoryMode, missingColumns, false, null);
+      WritableColumnVector fileContent = vector.reserveNewColumn(
+        capacity, fileContentCol.sparkType());
+      ParquetColumnVector contentVector = new ParquetColumnVector(fileContentCol, fileContent,
+        capacity, missingColumns, /* isTopLevel= */ false, /* defaultValue= */ null);
       children.add(contentVector);
       variantSchema = SparkShreddingUtils.buildVariantSchema(fileContentCol.sparkType());
       fieldsToExtract = SparkShreddingUtils.getFieldsToExtract(column.sparkType(), variantSchema);
       repetitionLevels = contentVector.repetitionLevels;
       definitionLevels = contentVector.definitionLevels;
     } else if (isPrimitive) {
       if (column.repetitionLevel() > 0) {
-        repetitionLevels = allocateLevelsVector(capacity, memoryMode);
+        repetitionLevels = vector.reserveNewColumn(capacity, DataTypes.IntegerType);
       }
       // We don't need to create and store definition levels if the column is top-level.
       if (!isTopLevel) {
-        definitionLevels = allocateLevelsVector(capacity, memoryMode);
+        definitionLevels = vector.reserveNewColumn(capacity, DataTypes.IntegerType);
       }
     } else {
-      JavaUtils.checkArgument(column.children().size() == vector.getNumChildren(),
-        "The number of column children is different from the number of vector children");
+      // If a child is not present in the allocated vectors, it means we don't care about this
+      // child's data, we just want to read its levels to help assemble some parent struct. So we
+      // create a dummy vector below to hold the child's data. There can only be one such child.
+      JavaUtils.checkArgument(column.children().size() == vector.getNumChildren() ||
+        column.children().size() == vector.getNumChildren() + 1,
+        "The number of column children is not equal to the number of vector children or that + 1");
       boolean allChildrenAreMissing = true;
 
       for (int i = 0; i < column.children().size(); i++) {
-        ParquetColumnVector childCv = new ParquetColumnVector(column.children().apply(i),
-          vector.getChild(i), capacity, memoryMode, missingColumns, false, null);
+        ParquetColumn childColumn = column.children().apply(i);
+        WritableColumnVector childVector = i < vector.getNumChildren()
+          ? vector.getChild(i)
+          : vector.reserveNewColumn(capacity, childColumn.sparkType());
+        ParquetColumnVector childCv = new ParquetColumnVector(childColumn, childVector, capacity,
+          missingColumns, /* isTopLevel= */ false, /* defaultValue= */ null);
         children.add(childCv);
 
-
         // Only use levels from non-missing child, this can happen if only some but not all
         // fields of a struct are missing.
         if (!childCv.vector.isAllNull()) {
@@ -375,13 +370,6 @@ private void assembleStruct() {
     vector.addElementsAppended(rowId);
   }
 
-  private static WritableColumnVector allocateLevelsVector(int capacity, MemoryMode memoryMode) {
-    return switch (memoryMode) {
-      case ON_HEAP -> new OnHeapColumnVector(capacity, DataTypes.IntegerType);
-      case OFF_HEAP -> new OffHeapColumnVector(capacity, DataTypes.IntegerType);
-    };
-  }
-
   /**
    * For a collection (i.e., array or map) element at index 'idx', returns the starting index of
    * the next collection after it.
 
@@ -87,6 +87,8 @@ public abstract class SpecificParquetRecordReaderBase<T> extends RecordReader<Vo
 
   protected ParquetRowGroupReader reader;
 
+  protected Configuration configuration;
+
   @Override
   public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
       throws IOException, InterruptedException {
@@ -99,7 +101,7 @@ public void initialize(
       Option<HadoopInputFile> inputFile,
       Option<SeekableInputStream> inputStream,
       Option<ParquetMetadata> fileFooter) throws IOException, InterruptedException {
-    Configuration configuration = taskAttemptContext.getConfiguration();
+    this.configuration = taskAttemptContext.getConfiguration();
     FileSplit split = (FileSplit) inputSplit;
     this.file = split.getPath();
     ParquetReadOptions options = HadoopReadOptions
@@ -164,22 +166,22 @@ public void initialize(
    * configurations.
    */
   protected void initialize(String path, List<String> columns) throws IOException {
-    Configuration config = new Configuration();
-    config.setBoolean(SQLConf.PARQUET_BINARY_AS_STRING().key() , false);
-    config.setBoolean(SQLConf.PARQUET_INT96_AS_TIMESTAMP().key(), false);
-    config.setBoolean(SQLConf.CASE_SENSITIVE().key(), false);
-    config.setBoolean(SQLConf.PARQUET_INFER_TIMESTAMP_NTZ_ENABLED().key(), false);
-    config.setBoolean(SQLConf.LEGACY_PARQUET_NANOS_AS_LONG().key(), false);
+    this.configuration = new Configuration();
+    this.configuration.setBoolean(SQLConf.PARQUET_BINARY_AS_STRING().key() , false);
+    this.configuration.setBoolean(SQLConf.PARQUET_INT96_AS_TIMESTAMP().key(), false);
+    this.configuration.setBoolean(SQLConf.CASE_SENSITIVE().key(), false);
+    this.configuration.setBoolean(SQLConf.PARQUET_INFER_TIMESTAMP_NTZ_ENABLED().key(), false);
+    this.configuration.setBoolean(SQLConf.LEGACY_PARQUET_NANOS_AS_LONG().key(), false);
 
     this.file = new Path(path);
-    long length = this.file.getFileSystem(config).getFileStatus(this.file).getLen();
+    long length = this.file.getFileSystem(configuration).getFileStatus(this.file).getLen();
 
     ParquetReadOptions options = HadoopReadOptions
-      .builder(config, file)
+      .builder(configuration, file)
       .withRange(0, length)
       .build();
     ParquetFileReader fileReader = ParquetFileReader.open(
-      HadoopInputFile.fromPath(file, config), options);
+      HadoopInputFile.fromPath(file, configuration), options);
     this.reader = new ParquetRowGroupReaderImpl(fileReader);
     this.fileSchema = fileReader.getFooter().getFileMetaData().getSchema();
 
@@ -201,9 +203,10 @@ protected void initialize(String path, List<String> columns) throws IOException
       }
     }
     fileReader.setRequestedSchema(requestedSchema);
-    this.parquetColumn = new ParquetToSparkSchemaConverter(config)
+    this.parquetColumn = new ParquetToSparkSchemaConverter(configuration)
       .convertParquetColumn(requestedSchema, Option.empty());
     this.sparkSchema = (StructType) parquetColumn.sparkType();
+    this.sparkRequestedSchema = this.sparkSchema;
     this.totalRowCount = fileReader.getFilteredRecordCount();
   }
 
@@ -216,15 +219,16 @@ protected void initialize(
     this.reader = rowGroupReader;
     this.fileSchema = fileSchema;
     this.requestedSchema = requestedSchema;
-    Configuration config = new Configuration();
-    config.setBoolean(SQLConf.PARQUET_BINARY_AS_STRING().key() , false);
-    config.setBoolean(SQLConf.PARQUET_INT96_AS_TIMESTAMP().key(), false);
-    config.setBoolean(SQLConf.CASE_SENSITIVE().key(), false);
-    config.setBoolean(SQLConf.PARQUET_INFER_TIMESTAMP_NTZ_ENABLED().key(), false);
-    config.setBoolean(SQLConf.LEGACY_PARQUET_NANOS_AS_LONG().key(), false);
-    this.parquetColumn = new ParquetToSparkSchemaConverter(config)
+    this.configuration = new Configuration();
+    this.configuration.setBoolean(SQLConf.PARQUET_BINARY_AS_STRING().key() , false);
+    this.configuration.setBoolean(SQLConf.PARQUET_INT96_AS_TIMESTAMP().key(), false);
+    this.configuration.setBoolean(SQLConf.CASE_SENSITIVE().key(), false);
+    this.configuration.setBoolean(SQLConf.PARQUET_INFER_TIMESTAMP_NTZ_ENABLED().key(), false);
+    this.configuration.setBoolean(SQLConf.LEGACY_PARQUET_NANOS_AS_LONG().key(), false);
+    this.parquetColumn = new ParquetToSparkSchemaConverter(configuration)
       .convertParquetColumn(requestedSchema, Option.empty());
     this.sparkSchema = (StructType) parquetColumn.sparkType();
+    this.sparkRequestedSchema = this.sparkSchema;
     this.totalRowCount = totalRowCount;
   }
 
 
@@ -48,10 +48,10 @@
 import org.apache.spark.sql.execution.vectorized.OffHeapColumnVector;
 import org.apache.spark.sql.execution.vectorized.OnHeapColumnVector;
 import org.apache.spark.sql.execution.vectorized.WritableColumnVector;
+import org.apache.spark.sql.internal.SQLConf$;
+import org.apache.spark.sql.types.*;
 import org.apache.spark.sql.vectorized.ColumnVector;
 import org.apache.spark.sql.vectorized.ColumnarBatch;
-import org.apache.spark.sql.types.StructField;
-import org.apache.spark.sql.types.StructType;
 
 /**
  * A specialized RecordReader that reads into InternalRows or ColumnarBatches directly using the
@@ -265,7 +265,15 @@ private void initBatch(
       MemoryMode memMode,
       StructType partitionColumns,
       InternalRow partitionValues) {
-    StructType batchSchema = new StructType(sparkSchema.fields());
+    boolean returnNullStructIfAllFieldsMissing = configuration.getBoolean(
+      SQLConf$.MODULE$.LEGACY_PARQUET_RETURN_NULL_STRUCT_IF_ALL_FIELDS_MISSING().key(),
+      (boolean) SQLConf$.MODULE$.LEGACY_PARQUET_RETURN_NULL_STRUCT_IF_ALL_FIELDS_MISSING()
+        .defaultValue().get());
+    StructType batchSchema = returnNullStructIfAllFieldsMissing
+      ? new StructType(sparkSchema.fields())
+      // Truncate to match requested schema to make sure extra struct field that we read for
+      // nullability is not included in columnarBatch and exposed outside.
+      : (StructType) truncateType(sparkSchema, sparkRequestedSchema);
 
     int constantColumnLength = 0;
     if (partitionColumns != null) {
@@ -287,7 +295,8 @@ private void initBatch(
         defaultValue = ResolveDefaultColumns.existenceDefaultValues(sparkRequestedSchema)[i];
       }
       columnVectors[i] = new ParquetColumnVector(parquetColumn.children().apply(i),
-        (WritableColumnVector) vectors[i], capacity, memMode, missingColumns, true, defaultValue);
+        (WritableColumnVector) vectors[i], capacity, missingColumns, /* isTopLevel= */ true,
+        defaultValue);
     }
 
     if (partitionColumns != null) {
@@ -309,6 +318,58 @@ public void initBatch(StructType partitionColumns, InternalRow partitionValues)
     initBatch(MEMORY_MODE, partitionColumns, partitionValues);
   }
 
+  /**
+   * Keeps the hierarchy and fields of readType, recursively truncating struct fields from the end
+   * of the fields list to match the same number of fields in requestedType. This is used to get rid
+   * of the extra fields that are added to the structs when the fields we wanted to read initially
+   * were missing in the file schema. So this returns a type that we would be reading if everything
+   * was present in the file, matching Spark's expected schema.
+   *
+   * <p> Example: <pre>{@code
+   * readType:      array<struct<a:int,b:long,c:int>>
+   * requestedType: array<struct<a:int,b:long>>
+   * returns:       array<struct<a:int,b:long>>
+   * }</pre>
+   * We cannot return requestedType here because there might be slight differences, like nullability
+   * of fields or the type precision (smallint/int)
+   */
+  @VisibleForTesting
+  static DataType truncateType(DataType readType, DataType requestedType) {
+    if (requestedType instanceof UserDefinedType<?> requestedUDT) {
+      requestedType = requestedUDT.sqlType();
+    }
+
+    if (readType instanceof StructType readStruct &&
+        requestedType instanceof StructType requestedStruct) {
+      StructType result = new StructType();
+      for (int i = 0; i < requestedStruct.fields().length; i++) {
+        StructField readField = readStruct.fields()[i];
+        StructField requestedField = requestedStruct.fields()[i];
+        DataType truncatedType = truncateType(readField.dataType(), requestedField.dataType());
+        result = result.add(readField.copy(
+          readField.name(), truncatedType, readField.nullable(), readField.metadata()));
+      }
+      return result;
+    }
+
+    if (readType instanceof ArrayType readArray &&
+        requestedType instanceof ArrayType requestedArray) {
+      DataType truncatedElementType = truncateType(
+        readArray.elementType(), requestedArray.elementType());
+      return readArray.copy(truncatedElementType, readArray.containsNull());
+    }
+
+    if (readType instanceof MapType readMap && requestedType instanceof MapType requestedMap) {
+      DataType truncatedKeyType = truncateType(readMap.keyType(), requestedMap.keyType());
+      DataType truncatedValueType = truncateType(readMap.valueType(), requestedMap.valueType());
+      return readMap.copy(truncatedKeyType, truncatedValueType, readMap.valueContainsNull());
+    }
+
+    assert !ParquetSchemaConverter.isComplexType(readType);
+    assert !ParquetSchemaConverter.isComplexType(requestedType);
+    return readType;
+  }
+
   /**
    * Returns the ColumnarBatch object that will be used for all rows returned by this reader.
    * This object is reused. Calling this enables the vectorized reader. This should be called
 
@@ -633,7 +633,7 @@ protected void reserveInternal(int newCapacity) {
   }
 
   @Override
-  protected OffHeapColumnVector reserveNewColumn(int capacity, DataType type) {
+  public OffHeapColumnVector reserveNewColumn(int capacity, DataType type) {
     return new OffHeapColumnVector(capacity, type);
   }
 }
@@ -646,7 +646,7 @@ protected void reserveInternal(int newCapacity) {
   }
 
   @Override
-  protected OnHeapColumnVector reserveNewColumn(int capacity, DataType type) {
+  public OnHeapColumnVector reserveNewColumn(int capacity, DataType type) {
     return new OnHeapColumnVector(capacity, type);
   }
 }
@@ -944,7 +944,7 @@ public final boolean isAllNull() {
   /**
    * Reserve a new column.
    */
-  protected abstract WritableColumnVector reserveNewColumn(int capacity, DataType type);
+  public abstract WritableColumnVector reserveNewColumn(int capacity, DataType type);
 
   protected boolean isArray() {
     return type instanceof ArrayType || type instanceof BinaryType || type instanceof StringType ||
Original file line number	Diff line number	Diff line change
`@@ -633,7 +633,7 @@ protected void reserveInternal(int newCapacity) {`
`633`	`633`	`}`
`634`	`634`
`635`	`635`	`@Override`
`636`		`- protected OffHeapColumnVector reserveNewColumn(int capacity, DataType type) {`
	`636`	`+ public OffHeapColumnVector reserveNewColumn(int capacity, DataType type) {`
`637`	`637`	`return new OffHeapColumnVector(capacity, type);`
`638`	`638`	`}`
`639`	`639`	`}`
Original file line number	Diff line number	Diff line change
`@@ -646,7 +646,7 @@ protected void reserveInternal(int newCapacity) {`
`646`	`646`	`}`
`647`	`647`
`648`	`648`	`@Override`
`649`		`- protected OnHeapColumnVector reserveNewColumn(int capacity, DataType type) {`
	`649`	`+ public OnHeapColumnVector reserveNewColumn(int capacity, DataType type) {`
`650`	`650`	`return new OnHeapColumnVector(capacity, type);`
`651`	`651`	`}`
`652`	`652`	`}`