-
Notifications
You must be signed in to change notification settings - Fork 37
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #190 from julienrf/dynamodb-resumability
Support resumability of DynamoDB migrations
- Loading branch information
Showing
20 changed files
with
492 additions
and
99 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
================================================= | ||
Resume an Interrupted Migration Where it Left Off | ||
================================================= | ||
|
||
.. note:: This feature is currently supported only when migrating from Apache Cassandra or DynamoDB. | ||
|
||
If, for some reason, the migration is interrupted (e.g., because of a networking issue, or if you need to manually stop it for some reason), the migrator is able to resume it from a “savepoints”. | ||
|
||
Savepoints are configuration files that contain information about the already migrated items, which can be skipped when the migration is resumed. The savepoint files are automatically generated during the migration. To use a savepoint, start a migration using it as configuration file. | ||
|
||
You can control the savepoints location and the interval at which they are generated in the configuration file under the top-level property ``savepoints``. See `the corresponding section of the configuration reference </configuration#savepoints>`_. | ||
|
||
During the migration, the savepoints are generated with file names like ``savepoint_xxx.yaml``, where ``xxx`` is a timestamp looking like ``1234567890``. To resume a migration, start a new migration with the latest savepoint as configuration file. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
116 changes: 116 additions & 0 deletions
116
migrator/src/main/scala/com/scylladb/migrator/SavepointsManager.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
package com.scylladb.migrator | ||
|
||
import com.scylladb.migrator.config.MigratorConfig | ||
import org.apache.log4j.LogManager | ||
import sun.misc.{ Signal, SignalHandler } | ||
|
||
import java.nio.charset.StandardCharsets | ||
import java.nio.file.{ Files, Paths } | ||
import java.util.concurrent.{ ScheduledThreadPoolExecutor, TimeUnit } | ||
|
||
/** | ||
* A component that manages savepoints. Savepoints provide a way to resume an interrupted migration. | ||
* | ||
* This component periodically stores savepoints according to the schedule defined in the configuration. | ||
* It also automatically stores a savepoint in case of early termination (e.g. due to a SIGTERM signal). | ||
* | ||
* Internally, it works by writing modified copies of the original migration configuration. These copies | ||
* specify which parts of the source dataset have already been migrated and can safely be skipped when | ||
* restarting the migration. | ||
* | ||
* Make sure to call the method `close` when you don’t need the savepoints manager anymore so that it | ||
* releases the resources it was using. | ||
* | ||
* This class is abstract. Subclasses are responsible for implementing how to track the migration progress, | ||
* and for communicating the updated state of the migration via the method `updateConfigWithMigrationState`. | ||
*/ | ||
abstract class SavepointsManager(migratorConfig: MigratorConfig) extends AutoCloseable { | ||
|
||
val log = LogManager.getLogger(this.getClass.getName) | ||
private val scheduler = new ScheduledThreadPoolExecutor(1) | ||
|
||
createSavepointsDirectory() | ||
addUSR2Handler() | ||
startSavepointSchedule() | ||
|
||
private def createSavepointsDirectory(): Unit = { | ||
val savepointsDirectory = Paths.get(migratorConfig.savepoints.path) | ||
if (!Files.exists(savepointsDirectory)) { | ||
log.debug( | ||
s"Directory ${savepointsDirectory.normalize().toString} does not exist. Creating it...") | ||
Files.createDirectories(savepointsDirectory) | ||
} | ||
} | ||
|
||
private def savepointFilename(path: String): String = | ||
s"${path}/savepoint_${System.currentTimeMillis / 1000}.yaml" | ||
|
||
private def addUSR2Handler(): Unit = { | ||
log.info( | ||
"Installing SIGINT/TERM/USR2 handler. Send this to dump the current progress to a savepoint.") | ||
|
||
val handler = new SignalHandler { | ||
override def handle(signal: Signal): Unit = | ||
dumpMigrationState(signal.toString) | ||
} | ||
|
||
Signal.handle(new Signal("USR2"), handler) | ||
Signal.handle(new Signal("TERM"), handler) | ||
Signal.handle(new Signal("INT"), handler) | ||
} | ||
|
||
private def startSavepointSchedule(): Unit = { | ||
val runnable = new Runnable { | ||
override def run(): Unit = | ||
try dumpMigrationState("schedule") | ||
catch { | ||
case e: Throwable => | ||
log.error("Could not create the savepoint. This will be retried.", e) | ||
} | ||
} | ||
|
||
log.info( | ||
s"Starting savepoint schedule; will write a savepoint every ${migratorConfig.savepoints.intervalSeconds} seconds") | ||
|
||
scheduler.scheduleAtFixedRate( | ||
runnable, | ||
migratorConfig.savepoints.intervalSeconds, | ||
migratorConfig.savepoints.intervalSeconds, | ||
TimeUnit.SECONDS) | ||
} | ||
|
||
/** | ||
* Dump the current state of the migration into a configuration file that can be | ||
* used to resume the migration. | ||
* @param reason Human-readable, informal, event that caused the dump. | ||
*/ | ||
final def dumpMigrationState(reason: String): Unit = { | ||
val filename = | ||
Paths.get(savepointFilename(migratorConfig.savepoints.path)).normalize | ||
|
||
val modifiedConfig = updateConfigWithMigrationState() | ||
|
||
Files.write(filename, modifiedConfig.render.getBytes(StandardCharsets.UTF_8)) | ||
|
||
log.info( | ||
s"Created a savepoint config at ${filename} due to ${reason}. ${describeMigrationState()}") | ||
} | ||
|
||
/** | ||
* Stop the periodic creation of savepoints and release the associated resources. | ||
*/ | ||
final def close(): Unit = | ||
scheduler.shutdown() | ||
|
||
/** | ||
* Provide readable logs by describing which parts of the migration have been completed already. | ||
*/ | ||
def describeMigrationState(): String | ||
|
||
/** | ||
* A copy of the original migration configuration, updated to describe which parts of the migration | ||
* have been completed already. | ||
*/ | ||
def updateConfigWithMigrationState(): MigratorConfig | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
98 changes: 98 additions & 0 deletions
98
migrator/src/main/scala/com/scylladb/migrator/alternator/DynamoDbSavepointsManager.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
package com.scylladb.migrator.alternator | ||
|
||
import com.scylladb.migrator.SavepointsManager | ||
import com.scylladb.migrator.config.MigratorConfig | ||
import org.apache.hadoop.dynamodb.DynamoDBItemWritable | ||
import org.apache.hadoop.dynamodb.split.DynamoDBSplit | ||
import org.apache.hadoop.io.Text | ||
import org.apache.hadoop.mapred.InputSplit | ||
import org.apache.log4j.LogManager | ||
import org.apache.spark.rdd.RDD | ||
import org.apache.spark.scheduler.{ SparkListener, SparkListenerTaskEnd } | ||
import org.apache.spark.{ Partition, SerializableWritable, SparkContext, Success => TaskEndSuccess } | ||
|
||
import scala.util.{ Failure, Success, Try } | ||
|
||
/** | ||
* Manage DynamoDB-based migrations by tracking the migrated scan segments. | ||
*/ | ||
class DynamoDbSavepointsManager(migratorConfig: MigratorConfig, | ||
segmentsAccumulator: IntSetAccumulator) | ||
extends SavepointsManager(migratorConfig) { | ||
|
||
def describeMigrationState(): String = | ||
s"Segments to skip: ${segmentsAccumulator.value}" | ||
|
||
def updateConfigWithMigrationState(): MigratorConfig = | ||
migratorConfig.copy(skipSegments = Some(segmentsAccumulator.value)) | ||
|
||
} | ||
|
||
object DynamoDbSavepointsManager { | ||
|
||
private val log = LogManager.getLogger(classOf[DynamoDbSavepointsManager]) | ||
|
||
def apply(migratorConfig: MigratorConfig, | ||
segmentsAccumulator: IntSetAccumulator): DynamoDbSavepointsManager = | ||
new DynamoDbSavepointsManager(migratorConfig, segmentsAccumulator) | ||
|
||
/** | ||
* Set up a savepoints manager that tracks the scan segments migrated from the source RDD. | ||
*/ | ||
def setup(migratorConfig: MigratorConfig, | ||
sourceRDD: RDD[(Text, DynamoDBItemWritable)], | ||
spark: SparkContext): DynamoDbSavepointsManager = { | ||
val segmentsAccumulator = | ||
IntSetAccumulator(migratorConfig.skipSegments.getOrElse(Set.empty)) | ||
spark.addSparkListener(new SparkListener { | ||
override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = { | ||
val partitionId = taskEnd.taskInfo.partitionId | ||
log.debug(s"Migration of partition ${partitionId} ended: ${taskEnd.reason}.") | ||
if (taskEnd.reason == TaskEndSuccess) { | ||
scanSegments(sourceRDD, partitionId) match { | ||
case Success(segments) => | ||
segments.forEach(segment => segmentsAccumulator.add(segment)) | ||
log.info(s"Marked segments ${segments} as migrated.") | ||
case Failure(error) => | ||
log.error( | ||
s"Unable to collect the segments scanned in partition ${partitionId}. The next savepoint will not include them.", | ||
error) | ||
} | ||
} | ||
} | ||
}) | ||
DynamoDbSavepointsManager(migratorConfig, segmentsAccumulator) | ||
} | ||
|
||
/** | ||
* @return The scan segments processed in partition `partitionId` of `rdd`. | ||
*/ | ||
private def scanSegments(rdd: RDD[(Text, DynamoDBItemWritable)], | ||
partitionId: Int): Try[java.util.List[Integer]] = | ||
if (partitionId >= 0 && partitionId < rdd.getNumPartitions) { | ||
val partition = rdd.partitions(partitionId) | ||
inputSplit(partition).map(_.getSegments) | ||
} else { | ||
Failure(new Exception(s"Partition ${partitionId} not found in the RDD.")) | ||
} | ||
|
||
/** | ||
* @return The `DynamoDBSplit` wrapped by the `partition`. | ||
* Fails if the `partition` is not a `HadoopPartition` containing a `DynamoDBSplit`. | ||
*/ | ||
private def inputSplit(partition: Partition): Try[DynamoDBSplit] = Try { | ||
// Unfortunately, class `HadoopPartition` is private, so we can’t simply | ||
// pattern match on it. We use reflection to access its `inputSplit` member. | ||
if (partition.getClass.getName != "org.apache.spark.rdd.HadoopPartition") { | ||
throw new Exception(s"Unexpected partition type: ${partition.getClass.getName}.") | ||
} | ||
val inputSplitMember = partition.getClass.getMethod("inputSplit") | ||
val inputSplitResult = | ||
inputSplitMember.invoke(partition).asInstanceOf[SerializableWritable[InputSplit]] | ||
inputSplitResult.value match { | ||
case dynamoDbSplit: DynamoDBSplit => dynamoDbSplit | ||
case other => throw new Exception(s"Unexpected InputSplit type: ${other.getClass.getName}.") | ||
} | ||
} | ||
|
||
} |
Oops, something went wrong.