[SPARK-53656][SS] Fix ambiguity when both SparkSession and SQLContext are defined as implicit variables

ganeshas-db · ganeshas-db · commit 7a8de696626a · 2025-10-04T14:00:23.000+05:30
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/runtime/memory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/runtime/memory.scala
@@ -24,7 +24,7 @@ import javax.annotation.concurrent.GuardedBy
 import scala.collection.mutable.ListBuffer
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.{Encoder, SparkSession}
+import org.apache.spark.sql.{Encoder, SparkSession, SQLContext}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.encoders.{encoderFor, ExpressionEncoder}
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow
@@ -43,7 +43,7 @@ import org.apache.spark.sql.internal.connector.SimpleTableProvider
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
-object MemoryStream {
+object MemoryStream extends LowPriorityMemoryStreamImplicits {
   protected val currentBlockId = new AtomicInteger(0)
   protected val memoryStreamId = new AtomicInteger(0)
 
@@ -54,6 +54,27 @@ object MemoryStream {
     new MemoryStream[A](memoryStreamId.getAndIncrement(), sparkSession, Some(numPartitions))
 }
 
+/**
+ * Provides lower-priority implicits for MemoryStream to prevent ambiguity when both
+ * SparkSession and SQLContext are in scope. The implicits in the companion object,
+ * which use SparkSession, take higher precedence.
+ */
+trait LowPriorityMemoryStreamImplicits {
+  this: MemoryStream.type =>
+
+  // Deprecated: Used when an implicit SQLContext is in scope
+  @deprecated("Use MemoryStream.apply with an implicit SparkSession instead of SQLContext", "4.1.0")
+  def apply[A: Encoder]()(implicit sqlContext: SQLContext): MemoryStream[A] =
+    new MemoryStream[A](memoryStreamId.getAndIncrement(), sqlContext.sparkSession)
+
+  @deprecated("Use MemoryStream.apply with an implicit SparkSession instead of SQLContext", "4.1.0")
+  def apply[A: Encoder](numPartitions: Int)(implicit sqlContext: SQLContext): MemoryStream[A] =
+    new MemoryStream[A](
+      memoryStreamId.getAndIncrement(),
+      sqlContext.sparkSession,
+      Some(numPartitions))
+}
+
 /**
  * A base class for memory stream implementations. Supports adding data and resetting.
  */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MemorySinkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MemorySinkSuite.scala
@@ -343,6 +343,28 @@ class MemorySinkSuite extends StreamTest with BeforeAndAfter {
       intsToDF(expected)(schema))
   }
 
+  test("LowPriorityMemoryStreamImplicits works with implicit sqlContext") {
+    // Test that MemoryStream can be created using implicit sqlContext
+    implicit val sqlContext: SQLContext = spark.sqlContext
+
+    // Test MemoryStream[A]() with implicit sqlContext
+    val stream1 = MemoryStream[Int]()
+    assert(stream1 != null)
+
+    // Test MemoryStream[A](numPartitions) with implicit sqlContext
+    val stream2 = MemoryStream[String](3)
+    assert(stream2 != null)
+
+    // Verify the streams work correctly
+    stream1.addData(1, 2, 3)
+    val df1 = stream1.toDF()
+    assert(df1.schema.fieldNames.contains("value"))
+
+    stream2.addData("a", "b", "c")
+    val df2 = stream2.toDF()
+    assert(df2.schema.fieldNames.contains("value"))
+  }
+
   private implicit def intsToDF(seq: Seq[Int])(implicit schema: StructType): DataFrame = {
     require(schema.fields.length === 1)
     sqlContext.createDataset(seq).toDF(schema.fieldNames.head)