-
Notifications
You must be signed in to change notification settings - Fork 37
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Support resumability of DynamoDB migrations
- Track in savepoint files the scan segments that have been fully migrated - Generalize the savepoints management to support both CQL and DynamoDB - Update documentation accordingly Fixes #165
- Loading branch information
Showing
17 changed files
with
335 additions
and
100 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
116 changes: 116 additions & 0 deletions
116
migrator/src/main/scala/com/scylladb/migrator/SavepointsManager.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
package com.scylladb.migrator | ||
|
||
import com.scylladb.migrator.config.MigratorConfig | ||
import org.apache.log4j.LogManager | ||
import sun.misc.{ Signal, SignalHandler } | ||
|
||
import java.nio.charset.StandardCharsets | ||
import java.nio.file.{ Files, Paths } | ||
import java.util.concurrent.{ ScheduledThreadPoolExecutor, TimeUnit } | ||
|
||
/** | ||
* A component that manages savepoints. Savepoints provide a way to resume an interrupted migration. | ||
* | ||
* This component periodically stores savepoints according to the schedule defined in the configuration. | ||
* It also automatically stores a savepoint in case of early termination (e.g. due to a SIGTERM signal). | ||
* | ||
* Internally, it works by writing modified copies of the original migration configuration. These copies | ||
* specify which parts of the source dataset have already been migrated and can safely be skipped when | ||
* restarting the migration. | ||
* | ||
* Make sure to call the method `close` when you don’t need the savepoints manager anymore so that it | ||
* releases the resources it was using. | ||
* | ||
* This class is abstract. Subclasses are responsible for implementing how to track the migration progress, | ||
* and for communicating the updated state of the migration via the method `updateConfigWithMigrationState`. | ||
*/ | ||
abstract class SavepointsManager(migratorConfig: MigratorConfig) extends AutoCloseable { | ||
|
||
val log = LogManager.getLogger(this.getClass.getName) | ||
private val scheduler = new ScheduledThreadPoolExecutor(1) | ||
|
||
createSavepointsDirectory() | ||
addUSR2Handler() | ||
startSavepointSchedule() | ||
|
||
private def createSavepointsDirectory(): Unit = { | ||
val savepointsDirectory = Paths.get(migratorConfig.savepoints.path) | ||
if (!Files.exists(savepointsDirectory)) { | ||
log.debug( | ||
s"Directory ${savepointsDirectory.normalize().toString} does not exist. Creating it...") | ||
Files.createDirectories(savepointsDirectory) | ||
} | ||
} | ||
|
||
private def savepointFilename(path: String): String = | ||
s"${path}/savepoint_${System.currentTimeMillis / 1000}.yaml" | ||
|
||
private def addUSR2Handler(): Unit = { | ||
log.info( | ||
"Installing SIGINT/TERM/USR2 handler. Send this to dump the current progress to a savepoint.") | ||
|
||
val handler = new SignalHandler { | ||
override def handle(signal: Signal): Unit = | ||
dumpMigrationState(signal.toString) | ||
} | ||
|
||
Signal.handle(new Signal("USR2"), handler) | ||
Signal.handle(new Signal("TERM"), handler) | ||
Signal.handle(new Signal("INT"), handler) | ||
} | ||
|
||
private def startSavepointSchedule(): Unit = { | ||
val runnable = new Runnable { | ||
override def run(): Unit = | ||
try dumpMigrationState("schedule") | ||
catch { | ||
case e: Throwable => | ||
log.error("Could not create the savepoint. This will be retried.", e) | ||
} | ||
} | ||
|
||
log.info( | ||
s"Starting savepoint schedule; will write a savepoint every ${migratorConfig.savepoints.intervalSeconds} seconds") | ||
|
||
scheduler.scheduleAtFixedRate( | ||
runnable, | ||
migratorConfig.savepoints.intervalSeconds, | ||
migratorConfig.savepoints.intervalSeconds, | ||
TimeUnit.SECONDS) | ||
} | ||
|
||
/** | ||
* Dump the current state of the migration into a configuration file that can be | ||
* used to resume the migration. | ||
* @param reason Human-readable, informal, event that caused the dump. | ||
*/ | ||
final def dumpMigrationState(reason: String): Unit = { | ||
val filename = | ||
Paths.get(savepointFilename(migratorConfig.savepoints.path)).normalize | ||
|
||
val modifiedConfig = updateConfigWithMigrationState() | ||
|
||
Files.write(filename, modifiedConfig.render.getBytes(StandardCharsets.UTF_8)) | ||
|
||
log.info( | ||
s"Created a savepoint config at ${filename} due to ${reason}. ${describeMigrationState()}") | ||
} | ||
|
||
/** | ||
* Stop the periodic creation of savepoints and release the associated resources. | ||
*/ | ||
final def close(): Unit = | ||
scheduler.shutdown() | ||
|
||
/** | ||
* Provide readable logs by describing which parts of the migration have been completed already. | ||
*/ | ||
def describeMigrationState(): String | ||
|
||
/** | ||
* A copy of the original migration configuration, updated to describe which parts of the migration | ||
* have been completed already. | ||
*/ | ||
def updateConfigWithMigrationState(): MigratorConfig | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
97 changes: 97 additions & 0 deletions
97
migrator/src/main/scala/com/scylladb/migrator/alternator/DynamoDbSavepointsManager.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
package com.scylladb.migrator.alternator | ||
|
||
import com.scylladb.migrator.SavepointsManager | ||
import com.scylladb.migrator.config.MigratorConfig | ||
import org.apache.hadoop.dynamodb.DynamoDBItemWritable | ||
import org.apache.hadoop.dynamodb.split.DynamoDBSplit | ||
import org.apache.hadoop.io.Text | ||
import org.apache.hadoop.mapred.InputSplit | ||
import org.apache.log4j.LogManager | ||
import org.apache.spark.rdd.RDD | ||
import org.apache.spark.scheduler.{ SparkListener, SparkListenerTaskEnd } | ||
import org.apache.spark.{ Partition, SerializableWritable, SparkContext, Success => TaskEndSuccess } | ||
|
||
import scala.util.{ Failure, Success, Try } | ||
|
||
/** | ||
* Manage DynamoDB-based migrations by tracking the migrated scan segments. | ||
*/ | ||
class DynamoDbSavepointsManager(migratorConfig: MigratorConfig, | ||
segmentsAccumulator: IntSetAccumulator) | ||
extends SavepointsManager(migratorConfig) { | ||
|
||
def describeMigrationState(): String = | ||
s"Segments to skip: ${segmentsAccumulator.value}" | ||
|
||
def updateConfigWithMigrationState(): MigratorConfig = | ||
migratorConfig.copy(skipSegments = Some(segmentsAccumulator.value)) | ||
|
||
} | ||
|
||
object DynamoDbSavepointsManager { | ||
|
||
private val log = LogManager.getLogger(classOf[DynamoDbSavepointsManager]) | ||
|
||
def apply(migratorConfig: MigratorConfig, | ||
segmentsAccumulator: IntSetAccumulator): DynamoDbSavepointsManager = | ||
new DynamoDbSavepointsManager(migratorConfig, segmentsAccumulator) | ||
|
||
/** | ||
* Set up a savepoints manager that tracks the scan segments migrated from the source RDD. | ||
*/ | ||
def setup(migratorConfig: MigratorConfig, | ||
sourceRDD: RDD[(Text, DynamoDBItemWritable)], | ||
spark: SparkContext): DynamoDbSavepointsManager = { | ||
val segmentsAccumulator = IntSetAccumulator.empty | ||
spark.addSparkListener(new SparkListener { | ||
override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = { | ||
val partitionId = taskEnd.taskInfo.partitionId | ||
log.debug(s"Migration of partition ${partitionId} ended: ${taskEnd.reason}.") | ||
if (taskEnd.reason == TaskEndSuccess) { | ||
scanSegments(sourceRDD, partitionId) match { | ||
case Success(segments) => | ||
segments.forEach(segment => segmentsAccumulator.add(segment)) | ||
log.info(s"Marked segments ${segments} as migrated.") | ||
case Failure(error) => | ||
log.error( | ||
s"Unable to collect the segments scanned in partition ${partitionId}. The next savepoint will not include them.", | ||
error) | ||
} | ||
} | ||
} | ||
}) | ||
DynamoDbSavepointsManager(migratorConfig, segmentsAccumulator) | ||
} | ||
|
||
/** | ||
* @return The scan segments processed in partition `partitionId` of `rdd`. | ||
*/ | ||
private def scanSegments(rdd: RDD[(Text, DynamoDBItemWritable)], | ||
partitionId: Int): Try[java.util.List[Integer]] = | ||
if (partitionId >= 0 && partitionId < rdd.getNumPartitions) { | ||
val partition = rdd.partitions(partitionId) | ||
inputSplit(partition).map(_.getSegments) | ||
} else { | ||
Failure(new Exception(s"Partition ${partitionId} not found in the RDD.")) | ||
} | ||
|
||
/** | ||
* @return The `DynamoDBSplit` wrapped by the `partition`. | ||
* Fails if the `partition` is not a `HadoopPartition` containing a `DynamoDBSplit`. | ||
*/ | ||
private def inputSplit(partition: Partition): Try[DynamoDBSplit] = Try { | ||
// Unfortunately, class `HadoopPartition` is private, so we can’t simply | ||
// pattern match on it. We use reflection to access its `inputSplit` member. | ||
if (partition.getClass.getName != "org.apache.spark.rdd.HadoopPartition") { | ||
throw new Exception(s"Unexpected partition type: ${partition.getClass.getName}.") | ||
} | ||
val inputSplitMember = partition.getClass.getMethod("inputSplit") | ||
val inputSplitResult = | ||
inputSplitMember.invoke(partition).asInstanceOf[SerializableWritable[InputSplit]] | ||
inputSplitResult.value match { | ||
case dynamoDbSplit: DynamoDBSplit => dynamoDbSplit | ||
case other => throw new Exception(s"Unexpected InputSplit type: ${other.getClass.getName}.") | ||
} | ||
} | ||
|
||
} |
Oops, something went wrong.