diff --git a/awscli/customizations/s3/subcommands.py b/awscli/customizations/s3/subcommands.py index b045b2380ccc..e6a3201971c2 100644 --- a/awscli/customizations/s3/subcommands.py +++ b/awscli/customizations/s3/subcommands.py @@ -36,6 +36,7 @@ from awscli.customizations.s3.syncstrategy.base import MissingFileSync, \ SizeAndLastModifiedSync, NeverSync from awscli.customizations.s3 import transferconfig +import fnmatch LOGGER = logging.getLogger(__name__) @@ -483,7 +484,7 @@ class ListCommand(S3Command): USAGE = " or NONE" ARG_TABLE = [{'name': 'paths', 'nargs': '?', 'default': 's3://', 'positional_arg': True, 'synopsis': USAGE}, RECURSIVE, - PAGE_SIZE, HUMAN_READABLE, SUMMARIZE, REQUEST_PAYER] + PAGE_SIZE, HUMAN_READABLE, SUMMARIZE, REQUEST_PAYER, INCLUDE, EXCLUDE] def _run_main(self, parsed_args, parsed_globals): super(ListCommand, self)._run_main(parsed_args, parsed_globals) @@ -492,6 +493,8 @@ def _run_main(self, parsed_args, parsed_globals): self._size_accumulator = 0 self._total_objects = 0 self._human_readable = parsed_args.human_readable + self.filters = parsed_args.filters + path = parsed_args.paths if path.startswith('s3://'): path = path[5:] @@ -534,6 +537,27 @@ def _list_all_objects(self, bucket, key, page_size=None, for response_data in iterator: self._display_page(response_data) + def _is_match_pattern(self, pattern_type, path_pattern, file_path): + file_status = None + + is_match = fnmatch.fnmatch(file_path, path_pattern) + if is_match and pattern_type == 'include': + file_status = True + LOGGER.debug("%s matched include filter: %s", + file_path, path_pattern) + elif is_match and pattern_type == 'exclude': + file_status = False + LOGGER.debug("%s matched exclude filter: %s", + file_path, path_pattern) + else: + LOGGER.debug("%s did not match %s filter: %s", + file_path, pattern_type, path_pattern) + if pattern_type == 'include': + file_status = False + else: + file_status = True + return file_status + def _display_page(self, response_data, use_basename=True): common_prefixes = response_data.get('CommonPrefixes', []) contents = response_data.get('Contents', []) @@ -545,20 +569,38 @@ def _display_page(self, response_data, use_basename=True): prefix = prefix_components[-2] pre_string = "PRE".rjust(30, " ") print_str = pre_string + ' ' + prefix + '/\n' - uni_print(print_str) + for content in contents: last_mod_str = self._make_last_mod_str(content['LastModified']) - self._size_accumulator += int(content['Size']) - self._total_objects += 1 - size_str = self._make_size_str(content['Size']) + if use_basename: filename_components = content['Key'].split('/') filename = filename_components[-1] else: filename = content['Key'] - print_str = last_mod_str + ' ' + size_str + ' ' + \ - filename + '\n' - uni_print(print_str) + + file_status = None + + if self.filters is not None: + for filter in self.filters: + pattern_type = filter[0].lstrip('-') + path_pattern = filter[1] + file_status = self._is_match_pattern(pattern_type, path_pattern, filename) + + if file_status is True: + self._size_accumulator += int(content['Size']) + self._total_objects += 1 + size_str = self._make_size_str(content['Size']) + print_str = last_mod_str + ' ' + size_str + ' ' + \ + filename + '\n' + uni_print(print_str) + else: + self._size_accumulator += int(content['Size']) + self._total_objects += 1 + size_str = self._make_size_str(content['Size']) + print_str = last_mod_str + ' ' + size_str + ' ' + \ + filename + '\n' + uni_print(print_str) self._at_first_page = False def _list_all_buckets(self, page_size=None): diff --git a/awscli/examples/s3/ls.rst b/awscli/examples/s3/ls.rst index 3754f80d0165..d69fd31a6cfc 100644 --- a/awscli/examples/s3/ls.rst +++ b/awscli/examples/s3/ls.rst @@ -8,7 +8,7 @@ Output:: 2013-07-11 17:08:50 mybucket 2013-07-24 14:55:44 mybucket2 - + **Example 2: Listing all prefixes and objects in a bucket** The following ``ls`` command lists objects and common prefixes under a specified bucket and prefix. In this example, the user owns the bucket ``mybucket`` with the objects ``test.txt`` and ``somePrefix/test.txt``. The ``LastWriteTime`` and ``Length`` are arbitrary. Note that since the ``ls`` command has no interaction with the local filesystem, the ``s3://`` URI scheme is not required to resolve ambiguity and may be omitted. :: @@ -29,7 +29,7 @@ The following ``ls`` command lists objects and common prefixes under a specified Output:: None - + **Example 4: Recursively listing all prefixes and objects in a bucket** The following ``ls`` command will recursively list objects in a bucket. Rather than showing ``PRE dirname/`` in the output, all the content in a bucket will be listed in order. :: @@ -73,7 +73,19 @@ Output:: 2013-09-02 21:32:57 398 Bytes z.txt Total Objects: 10 - Total Size: 2.9 MiB + Total Size: 2.9 MiB + +You can combine ``--exclude`` and ``--include`` options to list only objects that match a pattern, excluding all others:: + + aws s3 ls s3://mybucket --recursive --human-readable --summarize --exclude "*" --include "*.txt" + +Output:: + + 2013-09-02 21:37:53 10 Bytes a.txt + 2013-09-02 21:32:57 398 Bytes z.txt + + Total Objects: 2 + Total Size: 408 Bytes **Example 6: Listing from an S3 access point** diff --git a/tests/unit/customizations/s3/test_subcommands.py b/tests/unit/customizations/s3/test_subcommands.py index f68f3cc642bd..0aa943bda73d 100644 --- a/tests/unit/customizations/s3/test_subcommands.py +++ b/tests/unit/customizations/s3/test_subcommands.py @@ -92,7 +92,7 @@ def test_ls_command_for_bucket(self): ls_command = ListCommand(self.session) parsed_args = FakeArgs(paths='s3://mybucket/', dir_op=False, page_size='5', human_readable=False, - summarize=False, request_payer=None) + summarize=False, request_payer=None, filters=None) parsed_globals = mock.Mock() ls_command._run_main(parsed_args, parsed_globals) call = self.session.create_client.return_value.list_objects_v2 @@ -115,7 +115,7 @@ def test_ls_command_with_no_args(self): verify_ssl=None) parsed_args = FakeArgs(dir_op=False, paths='s3://', human_readable=False, summarize=False, - request_payer=None, page_size=None) + request_payer=None, page_size=None, filters=None) ls_command._run_main(parsed_args, parsed_global) call = self.session.create_client.return_value.list_buckets paginate = self.session.create_client.return_value.get_paginator\ @@ -144,7 +144,7 @@ def test_ls_with_verify_argument(self): verify_ssl=False) parsed_args = FakeArgs(paths='s3://', dir_op=False, human_readable=False, summarize=False, - request_payer=None, page_size=None) + request_payer=None, page_size=None, filters=None) ls_command._run_main(parsed_args, parsed_global) # Verify get_client get_client = self.session.create_client @@ -157,7 +157,7 @@ def test_ls_with_requester_pays(self): ls_command = ListCommand(self.session) parsed_args = FakeArgs(paths='s3://mybucket/', dir_op=False, human_readable=False, summarize=False, - request_payer='requester', page_size='5') + request_payer='requester', page_size='5', filters=None) parsed_globals = mock.Mock() ls_command._run_main(parsed_args, parsed_globals) call = self.session.create_client.return_value.list_objects @@ -176,6 +176,48 @@ def test_ls_with_requester_pays(self): paginate.assert_called_with(**ref_call_args) + def test_ls_with_filters(self): + ls_command = ListCommand(self.session) + parsed_args = FakeArgs(paths='s3://mybucket/', dir_op=False, + page_size='5', human_readable=False, + summarize=False, request_payer=None, filters=[['--include', '*', '--exclude', '*']]) + parsed_globals = mock.Mock() + ls_command._run_main(parsed_args, parsed_globals) + call = self.session.create_client.return_value.list_objects_v2 + paginate = self.session.create_client.return_value.get_paginator\ + .return_value.paginate + # We should make no operation calls. + self.assertEqual(call.call_count, 0) + # And only a single pagination call to ListObjectsV2. + self.session.create_client.return_value.get_paginator.\ + assert_called_with('list_objects_v2') + ref_call_args = {'Bucket': u'mybucket', 'Delimiter': '/', + 'Prefix': u'', + 'PaginationConfig': {'PageSize': u'5'}} + + paginate.assert_called_with(**ref_call_args) + + def test_ls_is_match_pattern(self): + ls_command = ListCommand(self.session) + # Match with Include + ls_command._is_match_pattern = MagicMock(return_value=True) + file_status = ls_command._is_match_pattern('include', '*.txt', '/foo/bar.txt', is_match=True) + self.assertEqual(file_status, True) + + # Match with Exclude + ls_command._is_match_pattern = MagicMock(return_value=False) + file_status = ls_command._is_match_pattern('exclude', '*.txt', '/foo/bar.txt', is_match=True) + self.assertEqual(file_status, False) + + # Not match with Include + ls_command._is_match_pattern = MagicMock(return_value=False) + file_status = ls_command._is_match_pattern('include', '*.zip', '/foo/bar.txt', is_match=False) + self.assertEqual(file_status, False) + + # Not match with Exclude + ls_command._is_match_pattern = MagicMock(return_value=True) + file_status = ls_command._is_match_pattern('exclude', '*.zip', '/foo/bar.txt', is_match=False) + self.assertEqual(file_status, True) class CommandArchitectureTest(BaseAWSCommandParamsTest): def setUp(self):