diff --git a/docs/source/conf.py b/docs/source/conf.py
index 74df9ee3..915c5379 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -109,7 +109,7 @@
"hide_feedback_buttons": "false",
"github_issues_repository": "scylladb/scylla-migrator",
"github_repository": "scylladb/scylla-migrator",
- "site_description": "Migrate data using Spark from Cassandra or DynamoDB to Scylla.",
+ "site_description": "Migrate data using Spark from Apache Cassandra or DynamoDB to ScyllaDB.",
"hide_version_dropdown": [],
"zendesk_tag": "",
"versions_unstable": UNSTABLE_VERSIONS,
diff --git a/docs/source/configuration.rst b/docs/source/configuration.rst
index 75c411fd..51daeb14 100644
--- a/docs/source/configuration.rst
+++ b/docs/source/configuration.rst
@@ -38,16 +38,16 @@ The ``source`` property describes the type of data to read from. It must be an o
Valid values for the source ``type`` are:
-- ``cassandra`` for a CQL-compatible source (Cassandra or ScyllaDB).
+- ``cassandra`` for a CQL-compatible source (Apache Cassandra or ScyllaDB).
- ``parquet`` for a dataset stored using the Parquet format.
- ``dynamodb`` for a DynamoDB-compatible source (AWS DynamoDB or ScyllaDB Alternator).
- ``dynamodb-s3-export`` for a DynamoDB table exported to S3.
The following subsections detail the schema of each source type.
-^^^^^^^^^^^^^^^^
-Cassandra Source
-^^^^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^^^^^^^^^
+Apache Cassandra Source
+^^^^^^^^^^^^^^^^^^^^^^^
A source of type ``cassandra`` can be used together with a target of type ``cassandra`` only.
@@ -80,7 +80,7 @@ A source of type ``cassandra`` can be used together with a target of type ``cass
# available in the Spark cluster, and optimally more; higher splits will lead
# to more fine-grained resumes. Aim for 8 * (Spark cores).
splitCount: 256
- # Number of connections to use to Cassandra when copying
+ # Number of connections to use to Apache Cassandra when copying
connections: 8
# Number of rows to fetch in each read
fetchSize: 1000
@@ -132,9 +132,9 @@ A source of type ``dynamodb`` can be used together with a target of type ``dynam
type: dynamodb
# Name of the table to write. If it does not exist, it will be created on the fly.
table:
- # Connect to a custom endpoint. Mandatory if writing to Scylla Alternator.
+ # Connect to a custom endpoint. Mandatory if writing to ScyllaDB Alternator.
endpoint:
- # If writing to Scylla Alternator, prefix the hostname with 'http://'.
+ # If writing to ScyllaDB Alternator, prefix the hostname with 'http://'.
host:
port:
# Optional - AWS availability region.
@@ -208,14 +208,14 @@ The ``target`` property describes the type of data to write. It must be an objec
Valid values for the target ``type`` are:
-- ``cassandra`` for a CQL-compatible target (Cassandra or ScyllaDB).
+- ``cassandra`` for a CQL-compatible target (Apache Cassandra or ScyllaDB).
- ``dynamodb`` for a DynamoDB-compatible target (DynamoDB or ScyllaDB Alternator).
The following subsections detail the schema of each target type.
-^^^^^^^^^^^^^^^^
-Cassandra Target
-^^^^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^^^^^^^^^
+Apache Cassandra Target
+^^^^^^^^^^^^^^^^^^^^^^^
.. code-block:: yaml
@@ -240,7 +240,7 @@ Cassandra Target
# Consistency Level for the target connection
# Options are: LOCAL_ONE, ONE, LOCAL_QUORUM, QUORUM.
consistencyLevel: LOCAL_QUORUM
- # Number of connections to use to Scylla/Cassandra when copying
+ # Number of connections to use to ScyllaDB / Apache Cassandra when copying
connections: 16
# Spark pads decimals with zeros appropriate to their scale. This causes values
# like '3.5' to be copied as '3.5000000000...' to the target. There's no good way
diff --git a/docs/source/getting-started/ansible.rst b/docs/source/getting-started/ansible.rst
index bbad9cb5..14e26dfa 100644
--- a/docs/source/getting-started/ansible.rst
+++ b/docs/source/getting-started/ansible.rst
@@ -26,10 +26,10 @@ An `Ansible `_ playbook is provided in the `ansible fo
- Ensure networking is configured to allow you access spark master node via TCP ports 8080 and 4040
- visit ``http://:8080``
-8. `Review and modify config.yaml `_ based whether you're performing a migration to CQL or Alternator
+8. `Review and modify config.yaml <../#configure-the-migration>`_ based whether you're performing a migration to CQL or Alternator
- - If you're migrating to Scylla CQL interface (from Cassandra, Scylla, or other CQL source), make a copy review the comments in ``config.yaml.example``, and edit as directed.
- - If you're migrating to Alternator (from DynamoDB or other Scylla Alternator), make a copy, review the comments in ``config.dynamodb.yml``, and edit as directed.
+ - If you're migrating to ScyllaDB CQL interface (from Apache Cassandra, ScyllaDB, or other CQL source), make a copy review the comments in ``config.yaml.example``, and edit as directed.
+ - If you're migrating to Alternator (from DynamoDB or other ScyllaDB Alternator), make a copy, review the comments in ``config.dynamodb.yml``, and edit as directed.
9. As part of ansible deployment, sample submit jobs were created. You may edit and use the submit jobs.
diff --git a/docs/source/getting-started/aws-emr.rst b/docs/source/getting-started/aws-emr.rst
index 52e354dd..5e6e13e7 100644
--- a/docs/source/getting-started/aws-emr.rst
+++ b/docs/source/getting-started/aws-emr.rst
@@ -12,7 +12,7 @@ This page describes how to use the Migrator in `Amazon EMR `_ according to your needs.
+2. `Configure the migration <../#configure-the-migration>`_ according to your needs.
3. Download the latest release of the Migrator.
@@ -67,7 +67,7 @@ This page describes how to use the Migrator in `Amazon EMR `_.
+ See also our `general recommendations to tune the Spark job <../#run-the-migration>`_.
- Add a Bootstrap action to download the Migrator and the migration configuration:
diff --git a/docs/source/getting-started/docker.rst b/docs/source/getting-started/docker.rst
index bfaf6606..6d4653cd 100644
--- a/docs/source/getting-started/docker.rst
+++ b/docs/source/getting-started/docker.rst
@@ -38,7 +38,7 @@ This page describes how to set up a Spark cluster locally on your machine by usi
127.0.0.1 spark-master
127.0.0.1 spark-worker
-5. Rename the file ``config.yaml.example`` to ``config.yaml``, and `configure `_ it according to your needs.
+5. Rename the file ``config.yaml.example`` to ``config.yaml``, and `configure <../#configure-the-migration>`_ it according to your needs.
6. Finally, run the migration.
@@ -52,7 +52,7 @@ This page describes how to set up a Spark cluster locally on your machine by usi
The ``spark-master`` container mounts the ``./migrator/target/scala-2.13`` dir on ``/jars`` and the repository root on ``/app``.
- See also our `general recommendations to tune the Spark job `_.
+ See also our `general recommendations to tune the Spark job <../#run-the-migration>`_.
7. You can monitor progress by observing the Spark web console you opened in step 4. Additionally, after the job has started, you can track progress via ``http://localhost:4040``.
diff --git a/docs/source/getting-started/index.rst b/docs/source/getting-started/index.rst
index 380325fe..875d8209 100644
--- a/docs/source/getting-started/index.rst
+++ b/docs/source/getting-started/index.rst
@@ -25,8 +25,8 @@ Configure the Migration
Once you have a Spark cluster ready to run the ``scylla-migrator-assembly.jar``, download the file `config.yaml.example `_ and rename it to ``config.yaml``. This file contains properties such as ``source`` or ``target`` defining how to connect to the source database and to the target database, as well as other settings to perform the migration. Adapt it to your case according to the following guides:
-- :doc:`migrate from Cassandra or Parquet files to ScyllaDB `,
-- or, :doc:`migrate from DynamoDB to ScyllaDB’s Alternator `.
+- :doc:`Migrate from Apache Cassandra or Parquet files to ScyllaDB `.
+- Or, :doc:`migrate from DynamoDB to ScyllaDB’s Alternator `.
-----------------
Run the Migration
diff --git a/docs/source/getting-started/spark-standalone.rst b/docs/source/getting-started/spark-standalone.rst
index a2972360..9378e3ca 100644
--- a/docs/source/getting-started/spark-standalone.rst
+++ b/docs/source/getting-started/spark-standalone.rst
@@ -21,7 +21,7 @@ This page describes how to set up a Spark cluster on your infrastructure and to
wget https://github.com/scylladb/scylla-migrator/raw/master/config.yaml.example \
--output-document=config.yaml
-4. `Configure the migration `_ according to your needs.
+4. `Configure the migration <../#configure-the-migration>`_ according to your needs.
5. Finally, run the migration as follows from the Spark master node.
@@ -32,6 +32,6 @@ This page describes how to set up a Spark cluster on your infrastructure and to
--conf spark.scylla.config= \
- See also our `general recommendations to tune the Spark job `_.
+ See also our `general recommendations to tune the Spark job <../#run-the-migration>`_.
6. You can monitor progress from the `Spark web UI `_.
diff --git a/docs/source/index.rst b/docs/source/index.rst
index e7f1c371..4e26d901 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -4,10 +4,10 @@ ScyllaDB Migrator Documentation
The ScyllaDB Migrator is a Spark application that migrates data to ScyllaDB. Its main features are the following:
-* it can read from Cassandra, Parquet, DynamoDB, or a DynamoDB S3 export,
-* it can be distributed over multiple nodes of a Spark cluster to scale with your database cluster,
-* it can rename columns along the way,
-* when migrating from DynamoDB it can transfer a snapshot of the source data, or continuously migrate new data as they come.
+* It can read from Apache Cassandra, Parquet, DynamoDB, or a DynamoDB S3 export.
+* It can be distributed over multiple nodes of a Spark cluster to scale with your database cluster.
+* It can rename columns along the way.
+* When migrating from DynamoDB it can transfer a snapshot of the source data, or continuously migrate new data as they come.
Read over the :doc:`Getting Started ` page to set up a Spark cluster for a migration.
diff --git a/docs/source/migrate-from-cassandra-or-parquet.rst b/docs/source/migrate-from-cassandra-or-parquet.rst
index dc4b6a4c..a31226b2 100644
--- a/docs/source/migrate-from-cassandra-or-parquet.rst
+++ b/docs/source/migrate-from-cassandra-or-parquet.rst
@@ -1,11 +1,11 @@
-=============================================
-Migrate from Cassandra or from a Parquet File
-=============================================
+====================================================
+Migrate from Apache Cassandra or from a Parquet File
+====================================================
-This page explains how to fill the ``source`` and ``target`` properties of the `configuration file `_ to migrate data:
+This page explains how to fill the ``source`` and ``target`` properties of the `configuration file <../configuration>`_ to migrate data:
-- from Cassandra, ScyllaDB, or from a `Parquet `_ file,
-- to Cassandra or ScyllaDB.
+- from Apache Cassandra, ScyllaDB, or from a `Parquet `_ file,
+- to Apache Cassandra or ScyllaDB.
In file ``config.yaml``, make sure to keep only one ``source`` property and one ``target`` property, and configure them as explained in the following subsections according to your case.
@@ -13,13 +13,13 @@ In file ``config.yaml``, make sure to keep only one ``source`` property and one
Configuring the Source
----------------------
-The data ``source`` can be a Cassandra or ScyllaDB table, or a Parquet file.
+The data ``source`` can be an Apache Cassandra or ScyllaDB table, or a Parquet file.
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-Reading from Cassandra or ScyllaDB
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Reading from Apache Cassandra or ScyllaDB
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-In both cases, when reading from Cassandra or ScyllaDB, the type of source should be ``cassandra`` in the configuration file. Here is a minimal ``source`` configuration:
+In both cases, when reading from Apache Cassandra or ScyllaDB, the type of source should be ``cassandra`` in the configuration file. Here is a minimal ``source`` configuration:
.. code-block:: yaml
@@ -44,7 +44,7 @@ In both cases, when reading from Cassandra or ScyllaDB, the type of source shoul
# available in the Spark cluster, and optimally more; higher splits will lead
# to more fine-grained resumes. Aim for 8 * (Spark cores).
splitCount: 256
- # Number of connections to use to Cassandra when copying
+ # Number of connections to use to Apache Cassandra when copying
connections: 8
# Number of rows to fetch in each read
fetchSize: 1000
@@ -124,13 +124,13 @@ In case the object is not public in the S3 bucket, you can provide the AWS crede
Where ```` and ```` should be replaced with your actual AWS access key and secret key.
-The Migrator also supports advanced AWS authentication options such as using `AssumeRole `_. Please read the `configuration reference `__ for more details.
+The Migrator also supports advanced AWS authentication options such as using `AssumeRole `_. Please read the `configuration reference <../configuration#aws-authentication>`__ for more details.
---------------------------
Configuring the Destination
---------------------------
-The migration ``target`` can be Cassandra or Scylla. In both cases, we use the type ``cassandra`` in the configuration. Here is a minimal ``target`` configuration to write to Cassandra or ScyllaDB:
+The migration ``target`` can be Apache Cassandra or ScyllaDB. In both cases, we use the type ``cassandra`` in the configuration. Here is a minimal ``target`` configuration to write to Cassandra or ScyllaDB:
.. code-block:: yaml
@@ -150,7 +150,7 @@ The migration ``target`` can be Cassandra or Scylla. In both cases, we use the t
# Consistency Level for the target connection
# Options are: LOCAL_ONE, ONE, LOCAL_QUORUM, QUORUM.
consistencyLevel: LOCAL_QUORUM
- # Number of connections to use to Scylla/Cassandra when copying
+ # Number of connections to use to ScyllaDB / Apache Cassandra when copying
connections: 16
# Spark pads decimals with zeros appropriate to their scale. This causes values
# like '3.5' to be copied as '3.5000000000...' to the target. There's no good way
diff --git a/docs/source/migrate-from-dynamodb.rst b/docs/source/migrate-from-dynamodb.rst
index c822dccb..01883419 100644
--- a/docs/source/migrate-from-dynamodb.rst
+++ b/docs/source/migrate-from-dynamodb.rst
@@ -2,7 +2,7 @@
Migrate from DynamoDB
=====================
-This page explains how to fill the ``source`` and ``target`` properties of the `configuration file `_ to migrate data:
+This page explains how to fill the ``source`` and ``target`` properties of the `configuration file <../configuration>`_ to migrate data:
- from a DynamoDB table, a ScyllaDB Alternator table, or a `DynamoDB S3 export `_,
- to a DynamoDB table or a ScyllaDB Alternator table.
@@ -57,7 +57,7 @@ In practice, your source database (DynamoDB or Alternator) may require authentic
Where ```` and ```` should be replaced with your actual AWS access key and secret key.
-The Migrator also supports advanced AWS authentication options such as using `AssumeRole `_. Please read the `configuration reference `_ for more details.
+The Migrator also supports advanced AWS authentication options such as using `AssumeRole `_. Please read the `configuration reference <../configuration#aws-authentication>`_ for more details.
Last, you can provide the following optional properties:
@@ -150,7 +150,7 @@ Additionally, you can provide the following optional properties:
Where ````, ````, ````, ````, and ```` should be replaced with your specific values.
-The Migrator also supports advanced AWS authentication options such as using `AssumeRole `_. Please read the `configuration reference `_ for more details.
+The Migrator also supports advanced AWS authentication options such as using `AssumeRole `_. Please read the `configuration reference <../configuration#aws-authentication>`_ for more details.
---------------------------
Configuring the Destination
@@ -179,9 +179,9 @@ Additionally, you can also set the following optional properties:
target:
# ... same as above
- # Connect to a custom endpoint. Mandatory if writing to Scylla Alternator.
+ # Connect to a custom endpoint. Mandatory if writing to ScyllaDB Alternator.
endpoint:
- # If writing to Scylla Alternator, prefix the hostname with 'http://'.
+ # If writing to ScyllaDB Alternator, prefix the hostname with 'http://'.
host:
port:
@@ -214,4 +214,4 @@ Additionally, you can also set the following optional properties:
Where ````, ````, ````, ````, and ```` are replaced with your specific values.
-The Migrator also supports advanced AWS authentication options such as using `AssumeRole `_. Please read the `configuration reference `_ for more details.
+The Migrator also supports advanced AWS authentication options such as using `AssumeRole `_. Please read the `configuration reference <../configuration#aws-authentication>`_ for more details.