Change resultBlockId for RddComputeMonotasks.

This commit changes the resultBlockId used by RddComputeMonotasks from being a RDDBlockId to being a MonotaskResultBlockId. There's no reason this result to use a RDDBlockId (because it's temporary data and not where the RDD will more permanently be stored), and storing it with RDDBlockId can sometimes trigger a race condition in BlockManager between when the monotask's result gets cleaned up and when a DiskWriteMonotask writes the result (NetSys/spark-monotasks#26).
kayousterhout · Jul 20, 2015 · d62c5cd · d62c5cd
1 parent 5df3963
commit d62c5cd
Show file tree

Hide file tree

Showing 2 changed files with 5 additions and 4 deletions.
diff --git a/core/src/main/scala/org/apache/spark/monotasks/compute/RddComputeMonotask.scala b/core/src/main/scala/org/apache/spark/monotasks/compute/RddComputeMonotask.scala
@@ -20,7 +20,7 @@ import java.nio.ByteBuffer
 
 import org.apache.spark.{Partition, SparkEnv, TaskContextImpl}
 import org.apache.spark.rdd.RDD
-import org.apache.spark.storage.{RDDBlockId, StorageLevel}
+import org.apache.spark.storage.{MonotaskResultBlockId, StorageLevel}
 
 /**
  * Computes the specified partition of the specified RDD and stores the result in the BlockManager.
@@ -30,12 +30,12 @@ import org.apache.spark.storage.{RDDBlockId, StorageLevel}
 private[spark] class RddComputeMonotask[T](context: TaskContextImpl, rdd: RDD[T], split: Partition)
   extends ComputeMonotask(context) {
 
-  resultBlockId = Some(new RDDBlockId(rdd.id, split.index))
+  resultBlockId = Some(new MonotaskResultBlockId(taskId))
 
   override def execute(): Option[ByteBuffer] = {
     val iterator = rdd.compute(split, context)
     SparkEnv.get.blockManager.cacheIterator(
-      getResultBlockId(), iterator, StorageLevel.MEMORY_ONLY, true)
+      getResultBlockId(), iterator, StorageLevel.MEMORY_ONLY, tellMaster = false)
     None
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -398,7 +398,8 @@ abstract class RDD[T: ClassTag](
 
       // Create a SerializationMonotask to serialize the block and a DiskWriteMonotask to write it
       // to disk.
-      val serializationMonotask = new SerializationMonotask(context, blockId)
+      val serializationMonotask =
+        new SerializationMonotask(context, rddComputeMonotask.getResultBlockId())
       serializationMonotask.addDependency(rddComputeMonotask)
       val diskWriteMonotask = new DiskWriteMonotask(
         context, blockId, serializationMonotask.getResultBlockId())