From 3f0165fe17a5f57b3c5dd8f08a707c243a06886c Mon Sep 17 00:00:00 2001 From: wingkwong Date: Sun, 9 Feb 2020 17:23:48 +0800 Subject: [PATCH 1/5] #4832 Add --include --exclude to aws s3 ls --- awscli/customizations/s3/subcommands.py | 58 +++++++++++++++++++++---- 1 file changed, 50 insertions(+), 8 deletions(-) diff --git a/awscli/customizations/s3/subcommands.py b/awscli/customizations/s3/subcommands.py index 0ce3378d0446..1da2fb0b822b 100644 --- a/awscli/customizations/s3/subcommands.py +++ b/awscli/customizations/s3/subcommands.py @@ -35,6 +35,7 @@ from awscli.customizations.s3.syncstrategy.base import MissingFileSync, \ SizeAndLastModifiedSync, NeverSync from awscli.customizations.s3 import transferconfig +import fnmatch LOGGER = logging.getLogger(__name__) @@ -461,7 +462,7 @@ class ListCommand(S3Command): USAGE = " or NONE" ARG_TABLE = [{'name': 'paths', 'nargs': '?', 'default': 's3://', 'positional_arg': True, 'synopsis': USAGE}, RECURSIVE, - PAGE_SIZE, HUMAN_READABLE, SUMMARIZE, REQUEST_PAYER] + PAGE_SIZE, HUMAN_READABLE, SUMMARIZE, REQUEST_PAYER, INCLUDE, EXCLUDE] def _run_main(self, parsed_args, parsed_globals): super(ListCommand, self)._run_main(parsed_args, parsed_globals) @@ -470,6 +471,8 @@ def _run_main(self, parsed_args, parsed_globals): self._size_accumulator = 0 self._total_objects = 0 self._human_readable = parsed_args.human_readable + self.filters = parsed_args.filters + path = parsed_args.paths if path.startswith('s3://'): path = path[5:] @@ -512,6 +515,27 @@ def _list_all_objects(self, bucket, key, page_size=None, for response_data in iterator: self._display_page(response_data) + def _is_match_pattern(self, pattern_type, path_pattern, file_path): + file_status = None + + is_match = fnmatch.fnmatch(file_path, path_pattern) + if is_match and pattern_type == 'include': + file_status = True + LOGGER.debug("%s matched include filter: %s", + file_path, path_pattern) + elif is_match and pattern_type == 'exclude': + file_status = False + LOGGER.debug("%s matched exclude filter: %s", + file_path, path_pattern) + else: + LOGGER.debug("%s did not match %s filter: %s", + file_path, pattern_type, path_pattern) + if pattern_type == 'include': + file_status = False + else: + file_status = True + return file_status + def _display_page(self, response_data, use_basename=True): common_prefixes = response_data.get('CommonPrefixes', []) contents = response_data.get('Contents', []) @@ -523,20 +547,38 @@ def _display_page(self, response_data, use_basename=True): prefix = prefix_components[-2] pre_string = "PRE".rjust(30, " ") print_str = pre_string + ' ' + prefix + '/\n' - uni_print(print_str) + for content in contents: last_mod_str = self._make_last_mod_str(content['LastModified']) - self._size_accumulator += int(content['Size']) - self._total_objects += 1 - size_str = self._make_size_str(content['Size']) + if use_basename: filename_components = content['Key'].split('/') filename = filename_components[-1] else: filename = content['Key'] - print_str = last_mod_str + ' ' + size_str + ' ' + \ - filename + '\n' - uni_print(print_str) + + file_status = None + + if self.filters is not None: + for filter in self.filters: + pattern_type = filter[0].lstrip('-') + path_pattern = filter[1] + file_status = self._is_match_pattern(pattern_type, path_pattern, filename) + + if file_status is True: + self._size_accumulator += int(content['Size']) + self._total_objects += 1 + size_str = self._make_size_str(content['Size']) + print_str = last_mod_str + ' ' + size_str + ' ' + \ + filename + '\n' + uni_print(print_str) + else: + self._size_accumulator += int(content['Size']) + self._total_objects += 1 + size_str = self._make_size_str(content['Size']) + print_str = last_mod_str + ' ' + size_str + ' ' + \ + filename + '\n' + uni_print(print_str) self._at_first_page = False def _list_all_buckets(self): From 7c92aed40ca782cbd2edbfb6ce773650de8a73af Mon Sep 17 00:00:00 2001 From: wingkwong Date: Tue, 11 Feb 2020 17:46:50 +0800 Subject: [PATCH 2/5] #4832 Add --include --exclude to aws s3 ls test case --- .../customizations/s3/test_subcommands.py | 21 +++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/tests/unit/customizations/s3/test_subcommands.py b/tests/unit/customizations/s3/test_subcommands.py index a4418a79dae4..12e42c31c91c 100644 --- a/tests/unit/customizations/s3/test_subcommands.py +++ b/tests/unit/customizations/s3/test_subcommands.py @@ -95,7 +95,7 @@ def test_ls_command_for_bucket(self): ls_command = ListCommand(self.session) parsed_args = FakeArgs(paths='s3://mybucket/', dir_op=False, page_size='5', human_readable=False, - summarize=False, request_payer=None) + summarize=False, request_payer=None, filters=[['--include', '*']]) parsed_globals = mock.Mock() ls_command._run_main(parsed_args, parsed_globals) call = self.session.create_client.return_value.list_objects_v2 @@ -118,7 +118,7 @@ def test_ls_command_with_no_args(self): verify_ssl=None) parsed_args = FakeArgs(dir_op=False, paths='s3://', human_readable=False, summarize=False, - request_payer=None) + request_payer=None, filters=[['--include', '*']]) ls_command._run_main(parsed_args, parsed_global) # We should only be a single call. call = self.session.create_client.return_value.list_buckets @@ -139,7 +139,7 @@ def test_ls_with_verify_argument(self): verify_ssl=False) parsed_args = FakeArgs(paths='s3://', dir_op=False, human_readable=False, summarize=False, - request_payer=None) + request_payer=None, filters=[['--include', '*']]) ls_command._run_main(parsed_args, parsed_global) # Verify get_client get_client = self.session.create_client @@ -152,7 +152,7 @@ def test_ls_with_requester_pays(self): ls_command = ListCommand(self.session) parsed_args = FakeArgs(paths='s3://mybucket/', dir_op=False, human_readable=False, summarize=False, - request_payer='requester', page_size='5') + request_payer='requester', page_size='5', filters=[['--include', '*']]) parsed_globals = mock.Mock() ls_command._run_main(parsed_args, parsed_globals) call = self.session.create_client.return_value.list_objects @@ -171,6 +171,19 @@ def test_ls_with_requester_pays(self): paginate.assert_called_with(**ref_call_args) + def test_ls_with_filters(self): + ls_command = ListCommand(self.session) + parsed_global = FakeArgs(region='us-west-2', endpoint_url=None, + verify_ssl=False) + parsed_args = FakeArgs(paths='s3://', dir_op=False, + human_readable=False, summarize=False, + request_payer=None, filters=[['--include', '*']]) + ls_command._run_main(parsed_args, parsed_global) + get_client = self.session.create_client + args = get_client.call_args + self.assertEqual(args, mock.call( + 's3', region_name='us-west-2', endpoint_url=None, verify=False, + config=None)) class CommandArchitectureTest(BaseAWSCommandParamsTest): def setUp(self): From 9b34d9d0c8d3ba513cd3388cfdd389eb7854799e Mon Sep 17 00:00:00 2001 From: wingkwong Date: Tue, 11 Feb 2020 19:29:39 +0800 Subject: [PATCH 3/5] #4832 Add --include --exclude to aws s3 ls example --- awscli/examples/s3/ls.rst | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/awscli/examples/s3/ls.rst b/awscli/examples/s3/ls.rst index 43c50e03ac91..4b812c2b772a 100644 --- a/awscli/examples/s3/ls.rst +++ b/awscli/examples/s3/ls.rst @@ -71,3 +71,15 @@ Output:: Total Objects: 10 Total Size: 2.9 MiB + +You can combine ``--exclude`` and ``--include`` options to list only objects that match a pattern, excluding all others:: + + aws s3 ls s3://mybucket --recursive --human-readable --summarize --exclude "*" --include "*.txt" + +Output:: + + 2013-09-02 21:37:53 10 Bytes a.txt + 2013-09-02 21:32:57 398 Bytes z.txt + + Total Objects: 2 + Total Size: 408 Bytes \ No newline at end of file From 0abf55965fe5b0ea23849126f06b9b0ff4f163cb Mon Sep 17 00:00:00 2001 From: wingkwong Date: Tue, 11 Feb 2020 22:19:13 +0800 Subject: [PATCH 4/5] Revise test cases for s3 ls --- .../customizations/s3/test_subcommands.py | 59 ++++++++++++++----- 1 file changed, 44 insertions(+), 15 deletions(-) diff --git a/tests/unit/customizations/s3/test_subcommands.py b/tests/unit/customizations/s3/test_subcommands.py index 12e42c31c91c..9fdf33a7d7a3 100644 --- a/tests/unit/customizations/s3/test_subcommands.py +++ b/tests/unit/customizations/s3/test_subcommands.py @@ -95,7 +95,7 @@ def test_ls_command_for_bucket(self): ls_command = ListCommand(self.session) parsed_args = FakeArgs(paths='s3://mybucket/', dir_op=False, page_size='5', human_readable=False, - summarize=False, request_payer=None, filters=[['--include', '*']]) + summarize=False, request_payer=None, filters=None) parsed_globals = mock.Mock() ls_command._run_main(parsed_args, parsed_globals) call = self.session.create_client.return_value.list_objects_v2 @@ -118,7 +118,7 @@ def test_ls_command_with_no_args(self): verify_ssl=None) parsed_args = FakeArgs(dir_op=False, paths='s3://', human_readable=False, summarize=False, - request_payer=None, filters=[['--include', '*']]) + request_payer=None, filters=None) ls_command._run_main(parsed_args, parsed_global) # We should only be a single call. call = self.session.create_client.return_value.list_buckets @@ -139,7 +139,7 @@ def test_ls_with_verify_argument(self): verify_ssl=False) parsed_args = FakeArgs(paths='s3://', dir_op=False, human_readable=False, summarize=False, - request_payer=None, filters=[['--include', '*']]) + request_payer=None, filters=None) ls_command._run_main(parsed_args, parsed_global) # Verify get_client get_client = self.session.create_client @@ -152,7 +152,7 @@ def test_ls_with_requester_pays(self): ls_command = ListCommand(self.session) parsed_args = FakeArgs(paths='s3://mybucket/', dir_op=False, human_readable=False, summarize=False, - request_payer='requester', page_size='5', filters=[['--include', '*']]) + request_payer='requester', page_size='5', filters=None) parsed_globals = mock.Mock() ls_command._run_main(parsed_args, parsed_globals) call = self.session.create_client.return_value.list_objects @@ -173,17 +173,46 @@ def test_ls_with_requester_pays(self): def test_ls_with_filters(self): ls_command = ListCommand(self.session) - parsed_global = FakeArgs(region='us-west-2', endpoint_url=None, - verify_ssl=False) - parsed_args = FakeArgs(paths='s3://', dir_op=False, - human_readable=False, summarize=False, - request_payer=None, filters=[['--include', '*']]) - ls_command._run_main(parsed_args, parsed_global) - get_client = self.session.create_client - args = get_client.call_args - self.assertEqual(args, mock.call( - 's3', region_name='us-west-2', endpoint_url=None, verify=False, - config=None)) + parsed_args = FakeArgs(paths='s3://mybucket/', dir_op=False, + page_size='5', human_readable=False, + summarize=False, request_payer=None, filters=[['--include', '*', '--exclude', '*']]) + parsed_globals = mock.Mock() + ls_command._run_main(parsed_args, parsed_globals) + call = self.session.create_client.return_value.list_objects_v2 + paginate = self.session.create_client.return_value.get_paginator\ + .return_value.paginate + # We should make no operation calls. + self.assertEqual(call.call_count, 0) + # And only a single pagination call to ListObjectsV2. + self.session.create_client.return_value.get_paginator.\ + assert_called_with('list_objects_v2') + ref_call_args = {'Bucket': u'mybucket', 'Delimiter': '/', + 'Prefix': u'', + 'PaginationConfig': {'PageSize': u'5'}} + + paginate.assert_called_with(**ref_call_args) + + def test_ls_is_match_pattern(self): + ls_command = ListCommand(self.session) + # Match with Include + ls_command._is_match_pattern = MagicMock(return_value=True) + file_status = ls_command._is_match_pattern('include', '*.txt', '/foo/bar.txt', is_match=True) + self.assertEqual(file_status, True) + + # Match with Exclude + ls_command._is_match_pattern = MagicMock(return_value=False) + file_status = ls_command._is_match_pattern('exclude', '*.txt', '/foo/bar.txt', is_match=True) + self.assertEqual(file_status, False) + + # Not match with Include + ls_command._is_match_pattern = MagicMock(return_value=False) + file_status = ls_command._is_match_pattern('include', '*.zip', '/foo/bar.txt', is_match=False) + self.assertEqual(file_status, False) + + # Not match with Exclude + ls_command._is_match_pattern = MagicMock(return_value=True) + file_status = ls_command._is_match_pattern('exclude', '*.zip', '/foo/bar.txt', is_match=False) + self.assertEqual(file_status, True) class CommandArchitectureTest(BaseAWSCommandParamsTest): def setUp(self): From 71f5624e1e7e87f5c177c78192508b001bb5acb1 Mon Sep 17 00:00:00 2001 From: Wing-Kam Wong Date: Sun, 17 Jul 2022 12:06:12 +0800 Subject: [PATCH 5/5] Remove duplicate title --- awscli/examples/s3/ls.rst | 2 -- 1 file changed, 2 deletions(-) diff --git a/awscli/examples/s3/ls.rst b/awscli/examples/s3/ls.rst index 1410d54b27ad..8861f070e3bc 100644 --- a/awscli/examples/s3/ls.rst +++ b/awscli/examples/s3/ls.rst @@ -83,8 +83,6 @@ Output:: Total Objects: 2 Total Size: 408 Bytes -**Listing from an S3 access point** - **Example 6: Listing from an S3 access point** The following ``ls`` command list objects from access point (``myaccesspoint``)::