diff --git a/.changes/next-release/enhancement-s3-67553.json b/.changes/next-release/enhancement-s3-67553.json new file mode 100644 index 000000000000..9d81f9c38255 --- /dev/null +++ b/.changes/next-release/enhancement-s3-67553.json @@ -0,0 +1,5 @@ +{ + "type": "enhancement", + "category": "``s3``", + "description": "Add parameter to validate source and destination S3 URIs to the ``mv`` command." +} diff --git a/awscli/customizations/s3/subcommands.py b/awscli/customizations/s3/subcommands.py index 25f62ca54a01..996d103309a0 100644 --- a/awscli/customizations/s3/subcommands.py +++ b/awscli/customizations/s3/subcommands.py @@ -15,7 +15,7 @@ import sys from botocore.client import Config -from botocore.utils import is_s3express_bucket +from botocore.utils import is_s3express_bucket, ensure_boolean from dateutil.parser import parse from dateutil.tz import tzlocal @@ -31,7 +31,8 @@ from awscli.customizations.s3.s3handler import S3TransferHandlerFactory from awscli.customizations.s3.utils import find_bucket_key, AppendFilter, \ find_dest_path_comp_key, human_readable_size, \ - RequestParamsMapper, split_s3_bucket_key, block_unsupported_resources + RequestParamsMapper, split_s3_bucket_key, block_unsupported_resources, \ + S3PathResolver from awscli.customizations.utils import uni_print from awscli.customizations.s3.syncstrategy.base import MissingFileSync, \ SizeAndLastModifiedSync, NeverSync @@ -430,6 +431,27 @@ ) } +VALIDATE_SAME_S3_PATHS = { + 'name': 'validate-same-s3-paths', 'action': 'store_true', + 'help_text': ( + 'Resolves the source and destination S3 URIs to their ' + 'underlying buckets and verifies that the file or object ' + 'is not being moved onto itself. If you are using any type ' + 'of access point ARNs or access point aliases in your S3 URIs, ' + 'we strongly recommended using this parameter to help prevent ' + 'accidental deletions of the source file or object. This ' + 'parameter resolves the underlying buckets of S3 access point ' + 'ARNs and aliases, S3 on Outposts access point ARNs, and ' + 'Multi-Region Access Point ARNs. S3 on Outposts access point ' + 'aliases are not supported. Instead of using this parameter, ' + 'you can set the environment variable ' + '``AWS_CLI_S3_MV_VALIDATE_SAME_S3_PATHS`` to ``true``. ' + 'NOTE: Path validation requires making additional API calls. ' + 'Future updates to this path-validation mechanism might change ' + 'which API calls are made.' + ) +} + TRANSFER_ARGS = [DRYRUN, QUIET, INCLUDE, EXCLUDE, ACL, FOLLOW_SYMLINKS, NO_FOLLOW_SYMLINKS, NO_GUESS_MIME_TYPE, SSE, SSE_C, SSE_C_KEY, SSE_KMS_KEY_ID, SSE_C_COPY_SOURCE, @@ -685,7 +707,9 @@ def _run_main(self, parsed_args, parsed_globals): self._convert_path_args(parsed_args) params = self._build_call_parameters(parsed_args, {}) cmd_params = CommandParameters(self.NAME, params, - self.USAGE) + self.USAGE, + self._session, + parsed_globals) cmd_params.add_region(parsed_globals) cmd_params.add_endpoint_url(parsed_globals) cmd_params.add_verify_ssl(parsed_globals) @@ -735,13 +759,13 @@ class CpCommand(S3TransferCommand): class MvCommand(S3TransferCommand): NAME = 'mv' - DESCRIPTION = "Moves a local file or S3 object to " \ - "another location locally or in S3." + DESCRIPTION = BasicCommand.FROM_FILE('s3', 'mv', '_description.rst') USAGE = " or " \ "or " ARG_TABLE = [{'name': 'paths', 'nargs': 2, 'positional_arg': True, 'synopsis': USAGE}] + TRANSFER_ARGS +\ - [METADATA, METADATA_DIRECTIVE, RECURSIVE] + [METADATA, METADATA_DIRECTIVE, RECURSIVE, VALIDATE_SAME_S3_PATHS] + class RmCommand(S3TransferCommand): NAME = 'rm' @@ -1123,7 +1147,8 @@ class CommandParameters(object): This class is used to do some initial error based on the parameters and arguments passed to the command line. """ - def __init__(self, cmd, parameters, usage): + def __init__(self, cmd, parameters, usage, + session=None, parsed_globals=None): """ Stores command name and parameters. Ensures that the ``dir_op`` flag is true if a certain command is being used. @@ -1136,6 +1161,8 @@ def __init__(self, cmd, parameters, usage): self.cmd = cmd self.parameters = parameters self.usage = usage + self._session = session + self._parsed_globals = parsed_globals if 'dir_op' not in parameters: self.parameters['dir_op'] = False if 'follow_symlinks' not in parameters: @@ -1198,9 +1225,12 @@ def _validate_streaming_paths(self): def _validate_path_args(self): # If we're using a mv command, you can't copy the object onto itself. params = self.parameters - if self.cmd == 'mv' and self._same_path(params['src'], params['dest']): - raise ValueError("Cannot mv a file onto itself: '%s' - '%s'" % ( - params['src'], params['dest'])) + if self.cmd == 'mv' and params['paths_type']=='s3s3': + self._raise_if_mv_same_paths(params['src'], params['dest']) + if self._should_validate_same_underlying_s3_paths(): + self._validate_same_underlying_s3_paths() + if self._should_emit_validate_s3_paths_warning(): + self._emit_validate_s3_paths_warning() # If the user provided local path does not exist, hard fail because # we know that we will not be able to upload the file. @@ -1225,6 +1255,66 @@ def _same_path(self, src, dest): src_base = os.path.basename(src) return src == os.path.join(dest, src_base) + def _same_key(self, src, dest): + _, src_key = split_s3_bucket_key(src) + _, dest_key = split_s3_bucket_key(dest) + return self._same_path(f'/{src_key}', f'/{dest_key}') + + def _validate_same_s3_paths_enabled(self): + validate_env_var = ensure_boolean( + os.environ.get('AWS_CLI_S3_MV_VALIDATE_SAME_S3_PATHS')) + return (self.parameters.get('validate_same_s3_paths') or + validate_env_var) + + def _should_emit_validate_s3_paths_warning(self): + is_same_key = self._same_key( + self.parameters['src'], self.parameters['dest']) + src_has_underlying_path = S3PathResolver.has_underlying_s3_path( + self.parameters['src']) + dest_has_underlying_path = S3PathResolver.has_underlying_s3_path( + self.parameters['dest']) + return (is_same_key and not self._validate_same_s3_paths_enabled() and + (src_has_underlying_path or dest_has_underlying_path)) + + def _emit_validate_s3_paths_warning(self): + msg = ( + "warning: Provided s3 paths may resolve to same underlying " + "s3 object(s) and result in deletion instead of being moved. " + "To resolve and validate underlying s3 paths are not the same, " + "specify the --validate-same-s3-paths flag or set the " + "AWS_CLI_S3_MV_VALIDATE_SAME_S3_PATHS environment variable to true. " + "To resolve s3 outposts access point path, the arn must be " + "used instead of the alias.\n" + ) + uni_print(msg, sys.stderr) + + def _should_validate_same_underlying_s3_paths(self): + is_same_key = self._same_key( + self.parameters['src'], self.parameters['dest']) + return is_same_key and self._validate_same_s3_paths_enabled() + + def _validate_same_underlying_s3_paths(self): + src_paths = S3PathResolver.from_session( + self._session, + self.parameters.get('source_region', self._parsed_globals.region), + self._parsed_globals.verify_ssl + ).resolve_underlying_s3_paths(self.parameters['src']) + dest_paths = S3PathResolver.from_session( + self._session, + self._parsed_globals.region, + self._parsed_globals.verify_ssl + ).resolve_underlying_s3_paths(self.parameters['dest']) + for src_path in src_paths: + for dest_path in dest_paths: + self._raise_if_mv_same_paths(src_path, dest_path) + + def _raise_if_mv_same_paths(self, src, dest): + if self._same_path(src, dest): + raise ValueError( + "Cannot mv a file onto itself: " + f"{self.parameters['src']} - {self.parameters['dest']}" + ) + def _normalize_s3_trailing_slash(self, paths): for i, path in enumerate(paths): if path.startswith('s3://'): diff --git a/awscli/customizations/s3/utils.py b/awscli/customizations/s3/utils.py index 95a1d75119d9..8dda7331c4c0 100644 --- a/awscli/customizations/s3/utils.py +++ b/awscli/customizations/s3/utils.py @@ -796,3 +796,114 @@ def read(self, amt=None): return self._fileobj.read() else: return self._fileobj.read(amt) + + +class S3PathResolver: + _S3_ACCESSPOINT_ARN_TO_ACCOUNT_NAME_REGEX = re.compile( + r'^arn:aws.*:s3:[a-z0-9\-]+:(?P[0-9]{12}):accesspoint[:/]' + r'(?P[a-z0-9\-]{3,50})$' + ) + _S3_OUTPOST_ACCESSPOINT_ARN_TO_ACCOUNT_REGEX = re.compile( + r'^arn:aws.*:s3-outposts:[a-z0-9\-]+:(?P[0-9]{12}):outpost/' + r'op-[a-zA-Z0-9]+/accesspoint[:/][a-z0-9\-]{3,50}$' + ) + _S3_MRAP_ARN_TO_ACCOUNT_ALIAS_REGEX = re.compile( + r'^arn:aws:s3::(?P[0-9]{12}):accesspoint[:/]' + r'(?P[a-zA-Z0-9]+\.mrap)$' + ) + + def __init__(self, s3control_client, sts_client): + self._s3control_client = s3control_client + self._sts_client = sts_client + + @classmethod + def has_underlying_s3_path(self, path): + bucket, _ = split_s3_bucket_key(path) + return bool( + self._S3_ACCESSPOINT_ARN_TO_ACCOUNT_NAME_REGEX.match(bucket) or + self._S3_OUTPOST_ACCESSPOINT_ARN_TO_ACCOUNT_REGEX.match(bucket) or + self._S3_MRAP_ARN_TO_ACCOUNT_ALIAS_REGEX.match(bucket) or + bucket.endswith('-s3alias') or bucket.endswith('--op-s3')) + + @classmethod + def from_session(cls, session, region, verify_ssl): + s3control_client = session.create_client( + 's3control', + region_name=region, + verify=verify_ssl, + ) + sts_client = session.create_client( + 'sts', + verify=verify_ssl, + ) + return cls(s3control_client, sts_client) + + def resolve_underlying_s3_paths(self, path): + bucket, key = split_s3_bucket_key(path) + match = self._S3_ACCESSPOINT_ARN_TO_ACCOUNT_NAME_REGEX.match(bucket) + if match: + return self._resolve_accesspoint_arn( + match.group('account'), match.group('name'), key + ) + match = self._S3_OUTPOST_ACCESSPOINT_ARN_TO_ACCOUNT_REGEX.match(bucket) + if match: + return self._resolve_accesspoint_arn( + match.group('account'), bucket, key + ) + match = self._S3_MRAP_ARN_TO_ACCOUNT_ALIAS_REGEX.match(bucket) + if match: + return self._resolve_mrap_alias( + match.group('account'), match.group('alias'), key + ) + if bucket.endswith('-s3alias'): + return self._resolve_accesspoint_alias(bucket, key) + if bucket.endswith('--op-s3'): + raise ValueError( + "Can't resolve underlying bucket name of s3 outposts " + "access point alias. Use arn instead to resolve the " + "bucket name and validate the mv command." + ) + return [path] + + def _resolve_accesspoint_arn(self, account, name, key): + bucket = self._get_access_point_bucket(account, name) + return [f"s3://{bucket}/{key}"] + + def _resolve_accesspoint_alias(self, alias, key): + account = self._get_account_id() + bucket = self._get_access_point_bucket(account, alias) + return [f"s3://{bucket}/{key}"] + + def _resolve_mrap_alias(self, account, alias, key): + buckets = self._get_mrap_buckets(account, alias) + return [f"s3://{bucket}/{key}" for bucket in buckets] + + def _get_access_point_bucket(self, account, name): + return self._s3control_client.get_access_point( + AccountId=account, + Name=name + )['Bucket'] + + def _get_account_id(self): + return self._sts_client.get_caller_identity()['Account'] + + def _get_mrap_buckets(self, account, alias): + next_token = None + while True: + args = {"AccountId": account} + if next_token: + args['NextToken'] = next_token + response = self._s3control_client.list_multi_region_access_points( + **args + ) + for access_point in response['AccessPoints']: + if access_point['Alias'] == alias: + return [ + region["Bucket"] for region in access_point["Regions"] + ] + next_token = response.get('NextToken') + if not next_token: + raise ValueError( + "Couldn't find multi-region access point " + f"with alias {alias} in account {account}" + ) diff --git a/awscli/examples/s3/mv/_description.rst b/awscli/examples/s3/mv/_description.rst new file mode 100644 index 000000000000..8c6d2552606e --- /dev/null +++ b/awscli/examples/s3/mv/_description.rst @@ -0,0 +1,13 @@ +Moves a local file or S3 object to another location locally or in S3. + +.. WARNING:: + If you are using any type of access point ARNs or access point aliases + in your S3 URIs, you must take extra care to make sure that your source + and destination S3 URIs resolve to different underlying buckets. If the + source and destination buckets are the same, the source file or object + can be moved onto itself, which can result in accidental deletion of + your source file or object. + + To verify that the source and destination buckets are not the same, + use the ``--validate-same-s3-paths`` parameter, or set the environment + variable ``AWS_CLI_S3_MV_VALIDATE_SAME_S3_PATHS`` to ``true``. \ No newline at end of file diff --git a/tests/functional/s3/test_mv_command.py b/tests/functional/s3/test_mv_command.py index 78d7cec1272e..49af07994395 100644 --- a/tests/functional/s3/test_mv_command.py +++ b/tests/functional/s3/test_mv_command.py @@ -12,7 +12,9 @@ # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. from awscli.compat import six +from awscli.customizations.s3.utils import S3PathResolver from tests.functional.s3 import BaseS3TransferCommandTest +from tests import requires_crt class TestMvCommand(BaseS3TransferCommandTest): @@ -129,3 +131,287 @@ def test_copy_move_with_request_payer(self): 'sourcebucket', 'sourcekey', RequestPayer='requester') ] ) + + +class TestMvCommandWithValidateSameS3Paths(BaseS3TransferCommandTest): + + prefix = 's3 mv ' + + def assert_validates_cannot_mv_onto_itself(self, cmd): + stderr = self.run_cmd(cmd, expected_rc=255)[1] + self.assertIn('Cannot mv a file onto itself', stderr) + + def assert_runs_mv_without_validation(self, cmd): + self.parsed_responses = [ + self.head_object_response(), + self.copy_object_response(), + self.delete_object_response(), + ] + self.run_cmd(cmd, expected_rc=0) + self.assertEqual(len(self.operations_called), 3, + self.operations_called) + self.assertEqual(self.operations_called[0][0].name, 'HeadObject') + self.assertEqual(self.operations_called[1][0].name, 'CopyObject') + self.assertEqual(self.operations_called[2][0].name, 'DeleteObject') + + def assert_raises_warning(self, cmd): + self.parsed_responses = [ + self.head_object_response(), + self.copy_object_response(), + self.delete_object_response(), + ] + stderr = self.run_cmd(cmd, expected_rc=0)[1] + self.assertIn('warning: Provided s3 paths may resolve', stderr) + + def test_cant_mv_object_onto_itself_access_point_arn(self): + cmdline = (f"{self.prefix}s3://bucket/key " + "s3://arn:aws:s3:us-west-2:123456789012:accesspoint/" + "myaccesspoint/key " + "--validate-same-s3-paths") + self.parsed_responses = [ + {"Bucket": "bucket"} + ] + self.assert_validates_cannot_mv_onto_itself(cmdline) + + def test_cant_mv_object_onto_itself_access_point_arn_as_source(self): + cmdline = (f"{self.prefix}s3://arn:aws:s3:us-west-2:123456789012:" + "accesspoint/myaccesspoint/key " + "s3://bucket/key " + "--validate-same-s3-paths") + self.parsed_responses = [ + {"Bucket": "bucket"} + ] + self.assert_validates_cannot_mv_onto_itself(cmdline) + + def test_cant_mv_object_onto_itself_access_point_arn_with_env_var(self): + self.environ['AWS_CLI_S3_MV_VALIDATE_SAME_S3_PATHS'] = 'true' + cmdline = (f"{self.prefix}s3://bucket/key " + "s3://arn:aws:s3:us-west-2:123456789012:accesspoint/" + "myaccesspoint/key") + self.parsed_responses = [ + {"Bucket": "bucket"} + ] + self.assert_validates_cannot_mv_onto_itself(cmdline) + + def test_cant_mv_object_onto_itself_access_point_arn_base_key(self): + cmdline = (f"{self.prefix}s3://bucket/key " + "s3://arn:aws:s3:us-west-2:123456789012:accesspoint/" + "myaccesspoint/ " + "--validate-same-s3-paths") + self.parsed_responses = [ + {"Bucket": "bucket"} + ] + self.assert_validates_cannot_mv_onto_itself(cmdline) + + def test_cant_mv_object_onto_itself_access_point_arn_base_prefix(self): + cmdline = (f"{self.prefix}s3://bucket/prefix/key " + "s3://arn:aws:s3:us-west-2:123456789012:accesspoint/" + "myaccesspoint/prefix/ " + "--validate-same-s3-paths") + self.parsed_responses = [ + {"Bucket": "bucket"} + ] + self.assert_validates_cannot_mv_onto_itself(cmdline) + + def test_cant_mv_object_onto_itself_access_point_alias(self): + cmdline = (f"{self.prefix} s3://bucket/key " + "s3://myaccesspoint-foobar-s3alias/key " + "--validate-same-s3-paths") + self.parsed_responses = [ + {"Account": "123456789012"}, + {"Bucket": "bucket"} + ] + self.assert_validates_cannot_mv_onto_itself(cmdline) + + def test_cant_mv_object_onto_itself_outpost_access_point_arn(self): + cmdline = (f"{self.prefix}s3://bucket/key " + "s3://arn:aws:s3-outposts:us-east-1:123456789012:outpost/" + "op-foobar/accesspoint/myaccesspoint/key " + "--validate-same-s3-paths") + self.parsed_responses = [ + {"Bucket": "bucket"} + ] + self.assert_validates_cannot_mv_onto_itself(cmdline) + + def test_outpost_access_point_alias_raises_error(self): + cmdline = (f"{self.prefix} s3://bucket/key " + "s3://myaccesspoint-foobar--op-s3/key " + "--validate-same-s3-paths") + stderr = self.run_cmd(cmdline, expected_rc=255)[1] + self.assertIn("Can't resolve underlying bucket name", stderr) + + def test_cant_mv_object_onto_itself_mrap_arn(self): + cmdline = (f"{self.prefix} s3://bucket/key " + "s3://arn:aws:s3::123456789012:accesspoint/foobar.mrap/key " + "--validate-same-s3-paths") + self.parsed_responses = [ + { + "AccessPoints": [{ + "Alias": "foobar.mrap", + "Regions": [ + {"Bucket": "differentbucket"}, + {"Bucket": "bucket"} + ] + }] + } + ] + self.assert_validates_cannot_mv_onto_itself(cmdline) + + def test_get_mrap_buckets_raises_if_alias_not_found(self): + cmdline = (f"{self.prefix} s3://bucket/key " + "s3://arn:aws:s3::123456789012:accesspoint/foobar.mrap/key " + "--validate-same-s3-paths") + self.parsed_responses = [ + { + "AccessPoints": [{ + "Alias": "baz.mrap", + "Regions": [ + {"Bucket": "differentbucket"}, + {"Bucket": "bucket"} + ] + }] + } + ] + stderr = self.run_cmd(cmdline, expected_rc=255)[1] + self.assertEqual( + "\nCouldn't find multi-region access point with alias foobar.mrap " + "in account 123456789012\n", + stderr + ) + + def test_mv_works_if_access_point_arn_resolves_to_different_bucket(self): + cmdline = (f"{self.prefix}s3://bucket/key " + "s3://arn:aws:s3:us-west-2:123456789012:accesspoint/" + "myaccesspoint/key " + "--validate-same-s3-paths") + self.parsed_responses = [ + {"Bucket": "differentbucket"}, + self.head_object_response(), + self.copy_object_response(), + self.delete_object_response(), + ] + self.run_cmd(cmdline, expected_rc=0) + self.assertEqual(len(self.operations_called), 4, + self.operations_called) + self.assertEqual(self.operations_called[0][0].name, 'GetAccessPoint') + self.assertEqual(self.operations_called[1][0].name, 'HeadObject') + self.assertEqual(self.operations_called[2][0].name, 'CopyObject') + self.assertEqual(self.operations_called[3][0].name, 'DeleteObject') + + def test_mv_works_if_access_point_alias_resolves_to_different_bucket(self): + cmdline = (f"{self.prefix} s3://bucket/key " + "s3://myaccesspoint-foobar-s3alias/key " + "--validate-same-s3-paths") + self.parsed_responses = [ + {"Account": "123456789012"}, + {"Bucket": "differentbucket"}, + self.head_object_response(), + self.copy_object_response(), + self.delete_object_response(), + ] + self.run_cmd(cmdline, expected_rc=0) + self.assertEqual(len(self.operations_called), 5, + self.operations_called) + self.assertEqual(self.operations_called[0][0].name, 'GetCallerIdentity') + self.assertEqual(self.operations_called[1][0].name, 'GetAccessPoint') + self.assertEqual(self.operations_called[2][0].name, 'HeadObject') + self.assertEqual(self.operations_called[3][0].name, 'CopyObject') + self.assertEqual(self.operations_called[4][0].name, 'DeleteObject') + + def test_mv_works_if_outpost_access_point_arn_resolves_to_different_bucket(self): + cmdline = (f"{self.prefix}s3://bucket/key " + "s3://arn:aws:s3-outposts:us-east-1:123456789012:outpost/" + "op-foobar/accesspoint/myaccesspoint/key " + "--validate-same-s3-paths") + self.parsed_responses = [ + {"Bucket": "differentbucket"}, + self.head_object_response(), + self.copy_object_response(), + self.delete_object_response(), + ] + self.run_cmd(cmdline, expected_rc=0) + self.assertEqual(len(self.operations_called), 4, + self.operations_called) + self.assertEqual(self.operations_called[0][0].name, 'GetAccessPoint') + self.assertEqual(self.operations_called[1][0].name, 'HeadObject') + self.assertEqual(self.operations_called[2][0].name, 'CopyObject') + self.assertEqual(self.operations_called[3][0].name, 'DeleteObject') + + @requires_crt + def test_mv_works_if_mrap_arn_resolves_to_different_bucket(self): + cmdline = (f"{self.prefix} s3://bucket/key " + "s3://arn:aws:s3::123456789012:accesspoint/foobar.mrap/key " + "--validate-same-s3-paths") + self.parsed_responses = [ + { + "AccessPoints": [{ + "Alias": "foobar.mrap", + "Regions": [ + {"Bucket": "differentbucket"}, + ] + }] + }, + self.head_object_response(), + self.copy_object_response(), + self.delete_object_response(), + ] + self.run_cmd(cmdline, expected_rc=0) + self.assertEqual(len(self.operations_called), 4, + self.operations_called) + self.assertEqual(self.operations_called[0][0].name, 'ListMultiRegionAccessPoints') + self.assertEqual(self.operations_called[1][0].name, 'HeadObject') + self.assertEqual(self.operations_called[2][0].name, 'CopyObject') + self.assertEqual(self.operations_called[3][0].name, 'DeleteObject') + + def test_skips_validation_if_keys_are_different_accesspoint_arn(self): + cmdline = (f"{self.prefix}s3://bucket/key " + "s3://arn:aws:s3:us-west-2:123456789012:accesspoint/" + "myaccesspoint/key2 " + "--validate-same-s3-paths") + self.assert_runs_mv_without_validation(cmdline) + + def test_skips_validation_if_prefixes_are_different_accesspoint_arn(self): + cmdline = (f"{self.prefix}s3://bucket/key " + "s3://arn:aws:s3:us-west-2:123456789012:accesspoint/" + "myaccesspoint/prefix/ " + "--validate-same-s3-paths") + self.assert_runs_mv_without_validation(cmdline) + + def test_skips_validation_if_keys_are_different_accesspoint_alias(self): + cmdline = (f"{self.prefix} s3://bucket/key " + "s3://myaccesspoint-foobar-s3alias/key2 " + "--validate-same-s3-paths") + self.assert_runs_mv_without_validation(cmdline) + + def test_skips_validation_if_keys_are_different_outpost_arn(self): + cmdline = (f"{self.prefix}s3://bucket/key " + "s3://arn:aws:s3-outposts:us-east-1:123456789012:outpost/" + "op-foobar/accesspoint/myaccesspoint/key2 " + "--validate-same-s3-paths") + self.assert_runs_mv_without_validation(cmdline) + + def test_skips_validation_if_keys_are_different_outpost_alias(self): + cmdline = (f"{self.prefix} s3://bucket/key " + "s3://myaccesspoint-foobar--op-s3/key2 " + "--validate-same-s3-paths") + self.assert_runs_mv_without_validation(cmdline) + + @requires_crt + def test_skips_validation_if_keys_are_different_mrap_arn(self): + cmdline = (f"{self.prefix} s3://bucket/key " + "s3://arn:aws:s3::123456789012:accesspoint/foobar.mrap/key2 " + "--validate-same-s3-paths") + self.assert_runs_mv_without_validation(cmdline) + + def test_raises_warning_if_validation_not_set(self): + cmdline = (f"{self.prefix}s3://bucket/key " + "s3://arn:aws:s3:us-west-2:123456789012:accesspoint/" + "myaccesspoint/key") + self.assert_raises_warning(cmdline) + + def test_raises_warning_if_validation_not_set_source(self): + cmdline = (f"{self.prefix}" + "s3://arn:aws:s3:us-west-2:123456789012:accesspoint/" + "myaccesspoint/key " + "s3://bucket/key") + self.assert_raises_warning(cmdline) diff --git a/tests/unit/customizations/s3/test_subcommands.py b/tests/unit/customizations/s3/test_subcommands.py index 1179a4a939b6..0af7a595643a 100644 --- a/tests/unit/customizations/s3/test_subcommands.py +++ b/tests/unit/customizations/s3/test_subcommands.py @@ -553,6 +553,11 @@ def setUp(self): self.file_creator = FileCreator() self.loc_files = make_loc_files(self.file_creator) self.bucket = 's3testbucket' + self.session = mock.Mock() + self.parsed_global = FakeArgs( + region='us-west-2', + endpoint_url=None, + verify_ssl=False) def tearDown(self): self.environ_patch.stop() @@ -577,7 +582,8 @@ def test_check_path_type_pass(self): 'locallocal': [local_file, local_file]} for cmd in cmds.keys(): - cmd_param = CommandParameters(cmd, {}, '') + cmd_param = CommandParameters(cmd, {}, '', + self.session, self.parsed_global) cmd_param.add_region(mock.Mock()) correct_paths = cmds[cmd] for path_args in correct_paths: @@ -605,7 +611,8 @@ def test_check_path_type_fail(self): 'locallocal': [local_file, local_file]} for cmd in cmds.keys(): - cmd_param = CommandParameters(cmd, {}, '') + cmd_param = CommandParameters(cmd, {}, '', + self.session, self.parsed_global) cmd_param.add_region(mock.Mock()) wrong_paths = cmds[cmd] for path_args in wrong_paths: @@ -696,7 +703,9 @@ def test_validate_sse_c_args_wrong_path_type(self): def test_adds_is_move(self): params = {} - CommandParameters('mv', params, '') + CommandParameters('mv', params, '', + session=self.session, + parsed_globals=self.parsed_global) self.assertTrue(params.get('is_move')) # is_move should only be true for mv diff --git a/tests/unit/customizations/s3/test_utils.py b/tests/unit/customizations/s3/test_utils.py index 3190028c9521..0256677f08b2 100644 --- a/tests/unit/customizations/s3/test_utils.py +++ b/tests/unit/customizations/s3/test_utils.py @@ -39,13 +39,43 @@ ProvideUploadContentTypeSubscriber, ProvideCopyContentTypeSubscriber, ProvideLastModifiedTimeSubscriber, DirectoryCreatorSubscriber, DeleteSourceObjectSubscriber, DeleteSourceFileSubscriber, - DeleteCopySourceObjectSubscriber, NonSeekableStream, CreateDirectoryError) + DeleteCopySourceObjectSubscriber, NonSeekableStream, CreateDirectoryError, + S3PathResolver) from awscli.customizations.s3.results import WarningResult from tests.unit.customizations.s3 import FakeTransferFuture from tests.unit.customizations.s3 import FakeTransferFutureMeta from tests.unit.customizations.s3 import FakeTransferFutureCallArgs +@pytest.fixture +def s3control_client(): + client = mock.MagicMock() + client.get_access_point.return_value = { + "Bucket": "mybucket" + } + client.list_multi_region_access_points.return_value = { + "AccessPoints": [{ + "Alias": "myalias.mrap", + "Regions": [{"Bucket": "mybucket"}] + }] + } + return client + + +@pytest.fixture +def sts_client(): + client = mock.MagicMock() + client.get_caller_identity.return_value = { + "Account": "123456789012" + } + return client + + +@pytest.fixture +def s3_path_resolver(s3control_client, sts_client): + return S3PathResolver(s3control_client, sts_client) + + @pytest.mark.parametrize( 'bytes_int, expected', ( @@ -976,3 +1006,141 @@ def test_can_make_stream_unseekable(self): def test_can_specify_amount_for_nonseekable_stream(self): nonseekable_fileobj = NonSeekableStream(StringIO('foobar')) self.assertEqual(nonseekable_fileobj.read(3), 'foo') + + +class TestS3PathResolver: + _BASE_ACCESSPOINT_ARN = ( + "s3://arn:aws:s3:us-west-2:123456789012:accesspoint/myaccesspoint") + _BASE_OUTPOST_ACCESSPOINT_ARN = ( + "s3://arn:aws:s3-outposts:us-east-1:123456789012:outpost" + "/op-foo/accesspoint/myaccesspoint") + _BASE_ACCESSPOINT_ALIAS = "s3://myaccesspoint-foobar12345-s3alias" + _BASE_OUTPOST_ACCESSPOINT_ALIAS = "s3://myaccesspoint-foobar12345--op-s3" + _BASE_MRAP_ARN = "s3://arn:aws:s3::123456789012:accesspoint/myalias.mrap" + + @pytest.mark.parametrize( + "path,resolved", + [(_BASE_ACCESSPOINT_ARN,"s3://mybucket/"), + (f"{_BASE_ACCESSPOINT_ARN}/","s3://mybucket/"), + (f"{_BASE_ACCESSPOINT_ARN}/mykey","s3://mybucket/mykey"), + (f"{_BASE_ACCESSPOINT_ARN}/myprefix/","s3://mybucket/myprefix/"), + (f"{_BASE_ACCESSPOINT_ARN}/myprefix/mykey", + "s3://mybucket/myprefix/mykey")] + ) + def test_resolves_accesspoint_arn( + self, path, resolved, s3_path_resolver, s3control_client + ): + resolved_paths = s3_path_resolver.resolve_underlying_s3_paths(path) + assert resolved_paths == [resolved] + s3control_client.get_access_point.assert_called_with( + AccountId="123456789012", + Name="myaccesspoint" + ) + + @pytest.mark.parametrize( + "path,resolved", + [(_BASE_OUTPOST_ACCESSPOINT_ARN,"s3://mybucket/"), + (f"{_BASE_OUTPOST_ACCESSPOINT_ARN}/","s3://mybucket/"), + (f"{_BASE_OUTPOST_ACCESSPOINT_ARN}/mykey","s3://mybucket/mykey"), + (f"{_BASE_OUTPOST_ACCESSPOINT_ARN}/myprefix/", + "s3://mybucket/myprefix/"), + (f"{_BASE_OUTPOST_ACCESSPOINT_ARN}/myprefix/mykey", + "s3://mybucket/myprefix/mykey")] + ) + def test_resolves_outpost_accesspoint_arn( + self, path, resolved, s3_path_resolver, s3control_client + ): + resolved_paths = s3_path_resolver.resolve_underlying_s3_paths(path) + assert resolved_paths == [resolved] + s3control_client.get_access_point.assert_called_with( + AccountId="123456789012", + Name=("arn:aws:s3-outposts:us-east-1:123456789012:outpost" + "/op-foo/accesspoint/myaccesspoint") + ) + + @pytest.mark.parametrize( + "path,resolved", + [(_BASE_ACCESSPOINT_ALIAS,"s3://mybucket/"), + (f"{_BASE_ACCESSPOINT_ALIAS}/","s3://mybucket/"), + (f"{_BASE_ACCESSPOINT_ALIAS}/mykey","s3://mybucket/mykey"), + (f"{_BASE_ACCESSPOINT_ALIAS}/myprefix/","s3://mybucket/myprefix/"), + (f"{_BASE_ACCESSPOINT_ALIAS}/myprefix/mykey", + "s3://mybucket/myprefix/mykey")] + ) + def test_resolves_accesspoint_alias( + self, path, resolved, s3_path_resolver, s3control_client, sts_client + ): + resolved_paths = s3_path_resolver.resolve_underlying_s3_paths(path) + assert resolved_paths == [resolved] + sts_client.get_caller_identity.assert_called_once() + s3control_client.get_access_point.assert_called_with( + AccountId="123456789012", + Name="myaccesspoint-foobar12345-s3alias" + ) + + @pytest.mark.parametrize( + "path", + [(_BASE_OUTPOST_ACCESSPOINT_ALIAS), + (f"{_BASE_OUTPOST_ACCESSPOINT_ALIAS}/"), + (f"{_BASE_OUTPOST_ACCESSPOINT_ALIAS}/mykey"), + (f"{_BASE_OUTPOST_ACCESSPOINT_ALIAS}/myprefix/"), + (f"{_BASE_OUTPOST_ACCESSPOINT_ALIAS}/myprefix/mykey")] + ) + def test_outpost_accesspoint_alias_raises_exception( + self, path, s3_path_resolver + ): + with pytest.raises(ValueError) as e: + s3_path_resolver.resolve_underlying_s3_paths(path) + assert "Can't resolve underlying bucket name" in str(e.value) + + @pytest.mark.parametrize( + "path,resolved", + [(_BASE_MRAP_ARN,"s3://mybucket/"), + (f"{_BASE_MRAP_ARN}/","s3://mybucket/"), + (f"{_BASE_MRAP_ARN}/mykey","s3://mybucket/mykey"), + (f"{_BASE_MRAP_ARN}/myprefix/","s3://mybucket/myprefix/"), + (f"{_BASE_MRAP_ARN}/myprefix/mykey","s3://mybucket/myprefix/mykey")] + ) + def test_resolves_mrap_arn( + self, path, resolved, s3_path_resolver, s3control_client + ): + resolved_paths = s3_path_resolver.resolve_underlying_s3_paths(path) + assert resolved_paths == [resolved] + s3control_client.list_multi_region_access_points.assert_called_with( + AccountId="123456789012" + ) + + @pytest.mark.parametrize( + "path,resolved,name", + [(f"{_BASE_ACCESSPOINT_ARN}-s3alias/mykey","s3://mybucket/mykey", + "myaccesspoint-s3alias"), + (f"{_BASE_OUTPOST_ACCESSPOINT_ARN}--op-s3/mykey", + "s3://mybucket/mykey", + f"{_BASE_OUTPOST_ACCESSPOINT_ARN[5:]}--op-s3")] + ) + def test_alias_suffixes_dont_match_accesspoint_arns( + self, path, resolved, name, s3_path_resolver, s3control_client + ): + resolved_paths = s3_path_resolver.resolve_underlying_s3_paths(path) + assert resolved_paths == [resolved] + s3control_client.get_access_point.assert_called_with( + AccountId="123456789012", + Name=name + ) + + @pytest.mark.parametrize( + "path,expected_has_underlying_s3_path", + [(_BASE_ACCESSPOINT_ARN,True), + (f"{_BASE_ACCESSPOINT_ARN}/mykey",True), + (f"{_BASE_ACCESSPOINT_ARN}/myprefix/mykey",True), + (_BASE_ACCESSPOINT_ALIAS,True), + (_BASE_OUTPOST_ACCESSPOINT_ARN,True), + (_BASE_OUTPOST_ACCESSPOINT_ALIAS,True), + (_BASE_MRAP_ARN,True), + ("s3://mybucket/",False), + ("s3://mybucket/mykey",False), + ("s3://mybucket/myprefix/mykey",False)] + ) + def test_has_underlying_s3_path(self, path, expected_has_underlying_s3_path): + has_underlying_s3_path = S3PathResolver.has_underlying_s3_path(path) + assert has_underlying_s3_path == expected_has_underlying_s3_path