From 0ed43fb76cf41a75434b94945bc42d4a4f2ffcc4 Mon Sep 17 00:00:00 2001 From: Steve Yoo <106777148+hssyoo@users.noreply.github.com> Date: Fri, 26 Jul 2024 12:26:09 -0400 Subject: [PATCH] Add s3 integration tests using directory buckets (#8795) --- awscli/testutils.py | 54 ++++++- .../customizations/s3/test_plugin.py | 138 +++++++++++++----- 2 files changed, 151 insertions(+), 41 deletions(-) diff --git a/awscli/testutils.py b/awscli/testutils.py index 950290a0567d..ae36528aa426 100644 --- a/awscli/testutils.py +++ b/awscli/testutils.py @@ -168,6 +168,45 @@ def create_bucket(session, name=None, region=None): return bucket_name +def create_dir_bucket(session, name=None, location=None): + """ + Creates a S3 directory bucket + :returns: the name of the bucket created + """ + if not location: + location = ('us-west-2', 'usw2-az1') + region, az = location + client = session.create_client('s3', region_name=region) + if name: + bucket_name = name + else: + bucket_name = f"{random_bucket_name()}--{az}--x-s3" + params = { + 'Bucket': bucket_name, + 'CreateBucketConfiguration': { + 'Location': { + 'Type': 'AvailabilityZone', + 'Name': az + }, + 'Bucket': { + 'Type': 'Directory', + 'DataRedundancy': 'SingleAvailabilityZone' + } + } + } + try: + client.create_bucket(**params) + except ClientError as e: + if e.response['Error'].get('Code') == 'BucketAlreadyOwnedByYou': + # This can happen in the retried request, when the first one + # succeeded on S3 but somehow the response never comes back. + # We still got a bucket ready for test anyway. + pass + else: + raise + return bucket_name + + def random_chars(num_chars): """Returns random hex characters. @@ -757,6 +796,19 @@ def create_bucket(self, name=None, region=None): self.delete_public_access_block(bucket_name) return bucket_name + def create_dir_bucket(self, name=None, location=None): + if location: + region, _ = location + else: + region = self.region + bucket_name = create_dir_bucket(self.session, name, location) + self.regions[bucket_name] = region + self.addCleanup(self.delete_bucket, bucket_name) + + # Wait for the bucket to exist before letting it be used. + self.wait_bucket_exists(bucket_name) + return bucket_name + def put_object(self, bucket_name, key_name, contents='', extra_args=None): client = self.create_client_for_bucket(bucket_name) call_args = { @@ -805,7 +857,7 @@ def delete_bucket(self, bucket_name, attempts=5, delay=5): def remove_all_objects(self, bucket_name): client = self.create_client_for_bucket(bucket_name) - paginator = client.get_paginator('list_objects') + paginator = client.get_paginator('list_objects_v2') pages = paginator.paginate(Bucket=bucket_name) key_names = [] for page in pages: diff --git a/tests/integration/customizations/s3/test_plugin.py b/tests/integration/customizations/s3/test_plugin.py index 6968b1702600..e943eaee46d0 100644 --- a/tests/integration/customizations/s3/test_plugin.py +++ b/tests/integration/customizations/s3/test_plugin.py @@ -46,6 +46,8 @@ LOG = logging.getLogger('awscli.tests.integration') _SHARED_BUCKET = random_bucket_name() _DEFAULT_REGION = 'us-west-2' +_DEFAULT_AZ = 'usw2-az1' +_SHARED_DIR_BUCKET = f'{random_bucket_name()}--{_DEFAULT_AZ}--x-s3' def setup_module(): @@ -58,14 +60,29 @@ def setup_module(): }, 'ObjectOwnership': 'ObjectWriter' } + dir_bucket_params = { + 'Bucket': _SHARED_DIR_BUCKET, + 'CreateBucketConfiguration': { + 'Location': { + 'Type': 'AvailabilityZone', + 'Name': _DEFAULT_AZ + }, + 'Bucket': { + 'Type': 'Directory', + 'DataRedundancy': 'SingleAvailabilityZone' + } + } + } try: s3.create_bucket(**params) + s3.create_bucket(**dir_bucket_params) except Exception as e: # A create_bucket can fail for a number of reasons. # We're going to defer to the waiter below to make the # final call as to whether or not the bucket exists. LOG.debug("create_bucket() raised an exception: %s", e, exc_info=True) waiter.wait(Bucket=_SHARED_BUCKET) + waiter.wait(Bucket=_SHARED_DIR_BUCKET) s3.delete_public_access_block( Bucket=_SHARED_BUCKET ) @@ -74,7 +91,7 @@ def setup_module(): def clear_out_bucket(bucket, delete_bucket=False): s3 = botocore.session.get_session().create_client( 's3', region_name=_DEFAULT_REGION) - page = s3.get_paginator('list_objects') + page = s3.get_paginator('list_objects_v2') # Use pages paired with batch delete_objects(). for page in page.paginate(Bucket=bucket): keys = [{'Key': obj['Key']} for obj in page.get('Contents', [])] @@ -96,6 +113,7 @@ def clear_out_bucket(bucket, delete_bucket=False): def teardown_module(): clear_out_bucket(_SHARED_BUCKET, delete_bucket=True) + clear_out_bucket(_SHARED_DIR_BUCKET, delete_bucket=True) @contextlib.contextmanager @@ -141,25 +159,23 @@ class BaseS3IntegrationTest(BaseS3CLICommand): def setUp(self): clear_out_bucket(_SHARED_BUCKET) + clear_out_bucket(_SHARED_DIR_BUCKET) super(BaseS3IntegrationTest, self).setUp() class TestMoveCommand(BaseS3IntegrationTest): - - def test_mv_local_to_s3(self): - bucket_name = _SHARED_BUCKET + def assert_mv_local_to_s3(self, bucket_name): full_path = self.files.create_file('foo.txt', 'this is foo.txt') p = aws('s3 mv %s s3://%s/foo.txt' % (full_path, - bucket_name)) + bucket_name)) self.assert_no_errors(p) # When we move an object, the local file is gone: self.assertTrue(not os.path.exists(full_path)) # And now resides in s3. self.assert_key_contents_equal(bucket_name, 'foo.txt', - 'this is foo.txt') + 'this is foo.txt') - def test_mv_s3_to_local(self): - bucket_name = _SHARED_BUCKET + def assert_mv_s3_to_local(self, bucket_name): self.put_object(bucket_name, 'foo.txt', 'this is foo.txt') full_path = self.files.full_path('foo.txt') self.assertTrue(self.key_exists(bucket_name, key_name='foo.txt')) @@ -171,9 +187,8 @@ def test_mv_s3_to_local(self): # The s3 file should not be there anymore. self.assertTrue(self.key_not_exists(bucket_name, key_name='foo.txt')) - def test_mv_s3_to_s3(self): - from_bucket = _SHARED_BUCKET - to_bucket = self.create_bucket() + def assert_mv_s3_to_s3(self, from_bucket, create_bucket_call): + to_bucket = create_bucket_call() self.put_object(from_bucket, 'foo.txt', 'this is foo.txt') p = aws('s3 mv s3://%s/foo.txt s3://%s/foo.txt' % (from_bucket, @@ -184,6 +199,30 @@ def test_mv_s3_to_s3(self): # And verify that the object no longer exists in the from_bucket. self.assertTrue(self.key_not_exists(from_bucket, key_name='foo.txt')) + def test_mv_local_to_s3(self): + self.assert_mv_local_to_s3(_SHARED_BUCKET) + + def test_mv_local_to_s3_express(self): + self.assert_mv_local_to_s3(_SHARED_DIR_BUCKET) + + def test_mv_s3_to_local(self): + self.assert_mv_s3_to_local(_SHARED_BUCKET) + + def test_mv_s3_express_to_local(self): + self.assert_mv_s3_to_local(_SHARED_DIR_BUCKET) + + def test_mv_s3_to_s3(self): + self.assert_mv_s3_to_s3(_SHARED_BUCKET, self.create_bucket) + + def test_mv_s3_to_s3_express(self): + self.assert_mv_s3_to_s3(_SHARED_BUCKET, self.create_dir_bucket) + + def test_mv_s3_express_to_s3_express(self): + self.assert_mv_s3_to_s3(_SHARED_DIR_BUCKET, self.create_dir_bucket) + + def test_mv_s3_express_to_s3(self): + self.assert_mv_s3_to_s3(_SHARED_DIR_BUCKET, self.create_bucket) + @pytest.mark.slow def test_mv_s3_to_s3_multipart(self): from_bucket = _SHARED_BUCKET @@ -298,6 +337,14 @@ def test_cant_move_large_file_onto_itself(self): class TestRm(BaseS3IntegrationTest): + def assert_rm_with_page_size(self, bucket_name): + self.put_object(bucket_name, 'foo.txt', contents='hello world') + self.put_object(bucket_name, 'bar.txt', contents='hello world2') + p = aws('s3 rm s3://%s/ --recursive --page-size 1' % bucket_name) + self.assert_no_errors(p) + + self.assertTrue(self.key_not_exists(bucket_name, key_name='foo.txt')) + self.assertTrue(self.key_not_exists(bucket_name, key_name='bar.txt')) @skip_if_windows('Newline in filename test not valid on windows.') # Windows won't let you do this. You'll get: # [Errno 22] invalid mode ('w') or filename: @@ -320,23 +367,18 @@ def test_rm_with_newlines(self): self.assertTrue(self.key_not_exists(bucket_name, key_name='foo\r.txt')) def test_rm_with_page_size(self): - bucket_name = _SHARED_BUCKET - self.put_object(bucket_name, 'foo.txt', contents='hello world') - self.put_object(bucket_name, 'bar.txt', contents='hello world2') - p = aws('s3 rm s3://%s/ --recursive --page-size 1' % bucket_name) - self.assert_no_errors(p) + self.assert_rm_with_page_size(_SHARED_BUCKET) - self.assertTrue(self.key_not_exists(bucket_name, key_name='foo.txt')) - self.assertTrue(self.key_not_exists(bucket_name, key_name='bar.txt')) + def test_s3_express_rm_with_page_size(self): + self.assert_rm_with_page_size(_SHARED_DIR_BUCKET) class TestCp(BaseS3IntegrationTest): - def test_cp_to_and_from_s3(self): + def assert_cp_to_and_from_s3(self, bucket_name): # This tests the ability to put a single file in s3 # move it to a different bucket. # and download the file locally - bucket_name = _SHARED_BUCKET # copy file into bucket. foo_txt = self.files.create_file('foo.txt', 'this is foo.txt') @@ -361,6 +403,12 @@ def test_cp_to_and_from_s3(self): with open(full_path, 'r') as f: self.assertEqual(f.read(), 'this is foo.txt') + def test_cp_to_and_from_s3(self): + self.assert_cp_to_and_from_s3(_SHARED_BUCKET) + + def test_cp_to_and_from_s3_express(self): + self.assert_cp_to_and_from_s3(_SHARED_DIR_BUCKET) + def test_cp_without_trailing_slash(self): # There's a unit test for this, but we still want to verify this # with an integration test. @@ -1133,6 +1181,28 @@ class TestLs(BaseS3IntegrationTest): This tests using the ``ls`` command. """ + def assert_ls_with_prefix(self, bucket_name): + self.put_object(bucket_name, 'foo.txt', 'contents') + self.put_object(bucket_name, 'foo', 'contents') + self.put_object(bucket_name, 'bar.txt', 'contents') + self.put_object(bucket_name, 'subdir/foo.txt', 'contents') + p = aws('s3 ls s3://%s' % bucket_name) + self.assertIn('PRE subdir/', p.stdout) + self.assertIn('8 foo.txt', p.stdout) + self.assertIn('8 foo', p.stdout) + self.assertIn('8 bar.txt', p.stdout) + + def assert_ls_recursive(self, bucket_name): + self.put_object(bucket_name, 'foo.txt', 'contents') + self.put_object(bucket_name, 'foo', 'contents') + self.put_object(bucket_name, 'bar.txt', 'contents') + self.put_object(bucket_name, 'subdir/foo.txt', 'contents') + p = aws('s3 ls s3://%s --recursive' % bucket_name) + self.assertIn('8 foo.txt', p.stdout) + self.assertIn('8 foo', p.stdout) + self.assertIn('8 bar.txt', p.stdout) + self.assertIn('8 subdir/foo.txt', p.stdout) + def test_ls_bucket(self): p = aws('s3 ls') self.assert_no_errors(p) @@ -1163,28 +1233,16 @@ def test_ls_non_existent_bucket(self): self.assertEqual(p.stdout, '') def test_ls_with_prefix(self): - bucket_name = _SHARED_BUCKET - self.put_object(bucket_name, 'foo.txt', 'contents') - self.put_object(bucket_name, 'foo', 'contents') - self.put_object(bucket_name, 'bar.txt', 'contents') - self.put_object(bucket_name, 'subdir/foo.txt', 'contents') - p = aws('s3 ls s3://%s' % bucket_name) - self.assertIn('PRE subdir/', p.stdout) - self.assertIn('8 foo.txt', p.stdout) - self.assertIn('8 foo', p.stdout) - self.assertIn('8 bar.txt', p.stdout) + self.assert_ls_with_prefix(_SHARED_BUCKET) + + def test_s3_express_ls_with_prefix(self): + self.assert_ls_with_prefix(_SHARED_DIR_BUCKET) def test_ls_recursive(self): - bucket_name = _SHARED_BUCKET - self.put_object(bucket_name, 'foo.txt', 'contents') - self.put_object(bucket_name, 'foo', 'contents') - self.put_object(bucket_name, 'bar.txt', 'contents') - self.put_object(bucket_name, 'subdir/foo.txt', 'contents') - p = aws('s3 ls s3://%s --recursive' % bucket_name) - self.assertIn('8 foo.txt', p.stdout) - self.assertIn('8 foo', p.stdout) - self.assertIn('8 bar.txt', p.stdout) - self.assertIn('8 subdir/foo.txt', p.stdout) + self.assert_ls_recursive(_SHARED_BUCKET) + + def test_s3_express_ls_recursive(self): + self.assert_ls_recursive(_SHARED_DIR_BUCKET) def test_ls_without_prefix(self): # The ls command does not require an s3:// prefix,