Skip to content

Commit

Permalink
fix(bucket-locations): add templating to backup_bucket_location
Browse files Browse the repository at this point in the history
since now we are doing backup/restore in the test region
and `backup_bucket_region` was remove.

the code for this configuration need to be templated as it was done
for restore nemesis

otherwise we fail like the following:
```
23:22:43  Command: 'sudo sctool backup -c a05d9ea2-312c-4c8d-99b0-9f9b57e8cbde
          --keyspace scylla_bench,keyspace1  --location s3:manager-backup-tests-us-east-1 '
23:22:43  Exit code: 1
23:22:43  Stdout:
23:22:43  Stderr:
23:22:43  Error: create backup target: location is not accessible
23:22:43  10.4.2.70: agent [HTTP 400] operation put: s3 upload: 301 Moved Permanently:
          The bucket you are attempting to access must be addressed using the specified endpoint.
          Please send all future requests to this endpoint. (code:PermanentRedirect) - make sure the location
          is correct and credentials are set, to debug SSH to 10.4.2.70 and run
          "scylla-manager-agent check-location -L s3:manager-backup-tests-us-east-1 --debug"
23:22:43  Trace ID: DXvyKGPjQMKUCkJpyxlKCQ (grep in scylla-manager logs)
```

(cherry picked from commit fe55645)
  • Loading branch information
fruch authored and actions-user committed Jan 13, 2025
1 parent 77d1f4d commit 19b2fab
Show file tree
Hide file tree
Showing 4 changed files with 300 additions and 4 deletions.
2 changes: 1 addition & 1 deletion defaults/aws_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ ami_id_db_oracle: ''
use_preinstalled_scylla: true

backup_bucket_backend: 's3'
backup_bucket_location: 'manager-backup-tests-us-east-1'
backup_bucket_location: 'manager-backup-tests-{region}'

data_volume_disk_num: 0
data_volume_disk_type: 'gp2'
Expand Down
3 changes: 2 additions & 1 deletion functional_tests/scylla_operator/test_functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,8 @@ def test_mgmt_backup(db_cluster, manager_version):

# Run manager backup operation
mgr_cluster = db_cluster.get_cluster_manager()
backup_bucket_location = db_cluster.params.get('backup_bucket_location')
region = next(iter(db_cluster.params.region_names), '')
backup_bucket_location = db_cluster.params.get('backup_bucket_location').format(region=region)
bucket_name = f"s3:{backup_bucket_location.split()[0]}"
mgr_task = mgr_cluster.create_backup_task(location_list=[bucket_name, ])
assert mgr_task, "Failed to create backup task"
Expand Down
296 changes: 295 additions & 1 deletion mgmt_cli_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ def download_from_azure(self, node, source, destination):
def get_table_id(self, node, table_name, keyspace_name=None, remove_hyphen=True):
"""
<<<<<<< HEAD
:param keyspace_name: not mandatory. Should be used when there's more than one table with the same
name in different keyspaces
:param remove_hyphen: In the table's directory, scylla removes the hyphens from the id. Setting
Expand All @@ -177,6 +178,298 @@ def get_table_id(self, node, table_name, keyspace_name=None, remove_hyphen=True)
if remove_hyphen:
return base_id.replace('-', '')
return base_id
||||||| parent of fe5564501 (fix(bucket-locations): add templating to `backup_bucket_location`)
class SnapshotOperations(ClusterTester):

@staticmethod
def get_snapshot_data(snapshot_name: str) -> SnapshotData:
snapshots_config = "defaults/manager_restore_benchmark_snapshots.yaml"
with open(snapshots_config, encoding="utf-8") as snapshots_yaml:
all_snapshots_dict = yaml.safe_load(snapshots_yaml)

try:
snapshot_dict = all_snapshots_dict["sizes"][snapshot_name]
except KeyError:
raise ValueError(f"Snapshot data for size '{snapshot_name}'GB was not found in the {snapshots_config} file")

ks_tables_map = {}
for ks, ts in snapshot_dict["schema"].items():
t_names = [list(t.keys())[0] for t in ts]
ks_tables_map[ks] = t_names

snapshot_data = SnapshotData(
bucket=all_snapshots_dict["bucket"],
tag=snapshot_dict["tag"],
exp_timeout=snapshot_dict["exp_timeout"],
keyspaces=list(snapshot_dict["schema"].keys()),
ks_tables_map=ks_tables_map,
cs_read_cmd_template=all_snapshots_dict["cs_read_cmd_template"],
prohibit_verification_read=snapshot_dict["prohibit_verification_read"],
number_of_rows=snapshot_dict["number_of_rows"],
node_ids=snapshot_dict.get("node_ids"),
)
return snapshot_data

@staticmethod
def _get_all_snapshot_files_s3(cluster_id, bucket_name, region_name):
file_set = set()
s3_client = boto3.client('s3', region_name=region_name)
paginator = s3_client.get_paginator('list_objects')
pages = paginator.paginate(Bucket=bucket_name, Prefix=f'backup/sst/cluster/{cluster_id}')
for page in pages:
# No Contents key means that no snapshot file of the cluster exist,
# probably no backup ran before this function
if "Contents" in page:
content_list = page["Contents"]
file_set.update([item["Key"] for item in content_list])
return file_set

@staticmethod
def _get_all_snapshot_files_gce(cluster_id, bucket_name):
file_set = set()
storage_client, _ = get_gce_storage_client()
blobs = storage_client.list_blobs(bucket_or_name=bucket_name, prefix=f'backup/sst/cluster/{cluster_id}')
for listing_object in blobs:
file_set.add(listing_object.name)
# Unlike S3, if no files match the prefix, no error will occur
return file_set

@staticmethod
def _get_all_snapshot_files_azure(cluster_id, bucket_name):
file_set = set()
azure_service = AzureService()
container_client = azure_service.blob.get_container_client(container=bucket_name)
dir_listing = container_client.list_blobs(name_starts_with=f'backup/sst/cluster/{cluster_id}')
for listing_object in dir_listing:
file_set.add(listing_object.name)
return file_set

def get_all_snapshot_files(self, cluster_id):
bucket_name = self.params.get('backup_bucket_location').split()[0]
if self.params.get('backup_bucket_backend') == 's3':
region_name = next(iter(self.params.region_names), '')
return self._get_all_snapshot_files_s3(cluster_id=cluster_id, bucket_name=bucket_name,
region_name=region_name)
elif self.params.get('backup_bucket_backend') == 'gcs':
return self._get_all_snapshot_files_gce(cluster_id=cluster_id, bucket_name=bucket_name)
elif self.params.get('backup_bucket_backend') == 'azure':
return self._get_all_snapshot_files_azure(cluster_id=cluster_id, bucket_name=bucket_name)
else:
raise ValueError(f'"{self.params.get("backup_bucket_backend")}" not supported')


class ManagerTestFunctionsMixIn(
DatabaseOperations,
StressLoadOperations,
ClusterOperations,
BucketOperations,
SnapshotOperations,
):
test_config = TestConfig()
manager_test_metrics = ManagerTestMetrics()

def get_email_data(self):
self.log.info("Prepare data for email")

email_data = self._get_common_email_data()

restore_parameters = self.params.get("mgmt_restore_extra_params")

agent_backup_config = self.params.get("mgmt_agent_backup_config")
if agent_backup_config:
agent_backup_config = agent_backup_config.dict()

email_data.update(
{
"manager_server_repo": self.params.get("scylla_mgmt_address"),
"manager_agent_repo": (self.params.get("scylla_mgmt_agent_address") or
self.params.get("scylla_mgmt_address")),
"backup_time": str(self.manager_test_metrics.backup_time),
"restore_time": str(self.manager_test_metrics.restore_time),
"restore_parameters": restore_parameters,
"agent_backup_config": agent_backup_config,
}
)
return email_data

@cached_property
def locations(self) -> list[str]:
backend = self.params.get("backup_bucket_backend")

buckets = self.params.get("backup_bucket_location")
if not isinstance(buckets, list):
buckets = buckets.split()

# FIXME: Make it works with multiple locations or file a bug for scylla-manager.
return [f"{backend}:{location}" for location in buckets[:1]]

# pylint: disable=too-many-arguments
def verify_backup_success(self, mgr_cluster, backup_task, ks_names: list = None, tables_names: list = None,
truncate=True, restore_data_with_task=False, timeout=None):
if ks_names is None:
ks_names = ['keyspace1']
if tables_names is None:
tables_names = ['standard1']
ks_tables_map = {keyspace: tables_names for keyspace in ks_names}
if truncate:
for ks, tables in ks_tables_map.items():
for table_name in tables:
self.log.info(f'running truncate on {ks}.{table_name}')
self.db_cluster.nodes[0].run_cqlsh(f'TRUNCATE {ks}.{table_name}')
if restore_data_with_task:
self.restore_backup_with_task(mgr_cluster=mgr_cluster, snapshot_tag=backup_task.get_snapshot_tag(),
timeout=timeout, restore_data=True)
else:
snapshot_tag = backup_task.get_snapshot_tag()
self.restore_backup_without_manager(mgr_cluster=mgr_cluster, snapshot_tag=snapshot_tag,
ks_tables_list=ks_tables_map)
=======
class SnapshotOperations(ClusterTester):

@staticmethod
def get_snapshot_data(snapshot_name: str) -> SnapshotData:
snapshots_config = "defaults/manager_restore_benchmark_snapshots.yaml"
with open(snapshots_config, encoding="utf-8") as snapshots_yaml:
all_snapshots_dict = yaml.safe_load(snapshots_yaml)

try:
snapshot_dict = all_snapshots_dict["sizes"][snapshot_name]
except KeyError:
raise ValueError(f"Snapshot data for size '{snapshot_name}'GB was not found in the {snapshots_config} file")

ks_tables_map = {}
for ks, ts in snapshot_dict["schema"].items():
t_names = [list(t.keys())[0] for t in ts]
ks_tables_map[ks] = t_names

snapshot_data = SnapshotData(
bucket=all_snapshots_dict["bucket"],
tag=snapshot_dict["tag"],
exp_timeout=snapshot_dict["exp_timeout"],
keyspaces=list(snapshot_dict["schema"].keys()),
ks_tables_map=ks_tables_map,
cs_read_cmd_template=all_snapshots_dict["cs_read_cmd_template"],
prohibit_verification_read=snapshot_dict["prohibit_verification_read"],
number_of_rows=snapshot_dict["number_of_rows"],
node_ids=snapshot_dict.get("node_ids"),
)
return snapshot_data

@staticmethod
def _get_all_snapshot_files_s3(cluster_id, bucket_name, region_name):
file_set = set()
s3_client = boto3.client('s3', region_name=region_name)
paginator = s3_client.get_paginator('list_objects')
pages = paginator.paginate(Bucket=bucket_name, Prefix=f'backup/sst/cluster/{cluster_id}')
for page in pages:
# No Contents key means that no snapshot file of the cluster exist,
# probably no backup ran before this function
if "Contents" in page:
content_list = page["Contents"]
file_set.update([item["Key"] for item in content_list])
return file_set

@staticmethod
def _get_all_snapshot_files_gce(cluster_id, bucket_name):
file_set = set()
storage_client, _ = get_gce_storage_client()
blobs = storage_client.list_blobs(bucket_or_name=bucket_name, prefix=f'backup/sst/cluster/{cluster_id}')
for listing_object in blobs:
file_set.add(listing_object.name)
# Unlike S3, if no files match the prefix, no error will occur
return file_set

@staticmethod
def _get_all_snapshot_files_azure(cluster_id, bucket_name):
file_set = set()
azure_service = AzureService()
container_client = azure_service.blob.get_container_client(container=bucket_name)
dir_listing = container_client.list_blobs(name_starts_with=f'backup/sst/cluster/{cluster_id}')
for listing_object in dir_listing:
file_set.add(listing_object.name)
return file_set

def get_all_snapshot_files(self, cluster_id):
region_name = next(iter(self.params.region_names), '')
bucket_name = self.params.get('backup_bucket_location').split()[0].format(region=region_name)
if self.params.get('backup_bucket_backend') == 's3':
return self._get_all_snapshot_files_s3(cluster_id=cluster_id, bucket_name=bucket_name,
region_name=region_name)
elif self.params.get('backup_bucket_backend') == 'gcs':
return self._get_all_snapshot_files_gce(cluster_id=cluster_id, bucket_name=bucket_name)
elif self.params.get('backup_bucket_backend') == 'azure':
return self._get_all_snapshot_files_azure(cluster_id=cluster_id, bucket_name=bucket_name)
else:
raise ValueError(f'"{self.params.get("backup_bucket_backend")}" not supported')


class ManagerTestFunctionsMixIn(
DatabaseOperations,
StressLoadOperations,
ClusterOperations,
BucketOperations,
SnapshotOperations,
):
test_config = TestConfig()
manager_test_metrics = ManagerTestMetrics()

def get_email_data(self):
self.log.info("Prepare data for email")

email_data = self._get_common_email_data()

restore_parameters = self.params.get("mgmt_restore_extra_params")

agent_backup_config = self.params.get("mgmt_agent_backup_config")
if agent_backup_config:
agent_backup_config = agent_backup_config.dict()

email_data.update(
{
"manager_server_repo": self.params.get("scylla_mgmt_address"),
"manager_agent_repo": (self.params.get("scylla_mgmt_agent_address") or
self.params.get("scylla_mgmt_address")),
"backup_time": str(self.manager_test_metrics.backup_time),
"restore_time": str(self.manager_test_metrics.restore_time),
"restore_parameters": restore_parameters,
"agent_backup_config": agent_backup_config,
}
)
return email_data

@cached_property
def locations(self) -> list[str]:
backend = self.params.get("backup_bucket_backend")

region = next(iter(self.params.region_names), '')
buckets = self.params.get("backup_bucket_location").format(region=region)
if not isinstance(buckets, list):
buckets = buckets.split()

# FIXME: Make it works with multiple locations or file a bug for scylla-manager.
return [f"{backend}:{location}" for location in buckets[:1]]

# pylint: disable=too-many-arguments
def verify_backup_success(self, mgr_cluster, backup_task, ks_names: list = None, tables_names: list = None,
truncate=True, restore_data_with_task=False, timeout=None):
if ks_names is None:
ks_names = ['keyspace1']
if tables_names is None:
tables_names = ['standard1']
ks_tables_map = {keyspace: tables_names for keyspace in ks_names}
if truncate:
for ks, tables in ks_tables_map.items():
for table_name in tables:
self.log.info(f'running truncate on {ks}.{table_name}')
self.db_cluster.nodes[0].run_cqlsh(f'TRUNCATE {ks}.{table_name}')
if restore_data_with_task:
self.restore_backup_with_task(mgr_cluster=mgr_cluster, snapshot_tag=backup_task.get_snapshot_tag(),
timeout=timeout, restore_data=True)
else:
snapshot_tag = backup_task.get_snapshot_tag()
self.restore_backup_without_manager(mgr_cluster=mgr_cluster, snapshot_tag=snapshot_tag,
ks_tables_list=ks_tables_map)
>>>>>>> fe5564501 (fix(bucket-locations): add templating to `backup_bucket_location`)

def restore_backup_without_manager(self, mgr_cluster, snapshot_tag, ks_tables_list, location=None,
precreated_backup=False):
Expand Down Expand Up @@ -559,7 +852,8 @@ def test_restore_multiple_backup_snapshots(self): # pylint: disable=too-many-lo
self.log.error("Test supports only AWS ATM")
return
persistent_manager_snapshots_dict = get_persistent_snapshots()
target_bucket = persistent_manager_snapshots_dict[cluster_backend]["bucket"]
region = next(iter(self.params.region_names), '')
target_bucket = persistent_manager_snapshots_dict[cluster_backend]["bucket"].format(region=region)
backup_bucket_backend = self.params.get("backup_bucket_backend")
location_list = [f"{backup_bucket_backend}:{target_bucket}"]
confirmation_stress_template = persistent_manager_snapshots_dict[cluster_backend]["confirmation_stress_template"]
Expand Down
3 changes: 2 additions & 1 deletion sdcm/nemesis.py
Original file line number Diff line number Diff line change
Expand Up @@ -3127,7 +3127,8 @@ def _mgmt_backup(self, backup_specific_tables):
raise UnsupportedNemesis('backup bucket location configuration is not defined!')

backup_bucket_backend = self.cluster.params.get("backup_bucket_backend")
backup_bucket_location = self.cluster.params.get("backup_bucket_location")
region = next(iter(self.cluster.params.region_names), '')
backup_bucket_location = self.cluster.params.get("backup_bucket_location").format(region=region)
location = f"{backup_bucket_backend}:{backup_bucket_location.split()[0]}"
self._delete_existing_backups(mgr_cluster)
if backup_specific_tables:
Expand Down

0 comments on commit 19b2fab

Please sign in to comment.