Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add --include --exclude to s3 ls #4941

Draft
wants to merge 13 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 50 additions & 8 deletions awscli/customizations/s3/subcommands.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from awscli.customizations.s3.syncstrategy.base import MissingFileSync, \
SizeAndLastModifiedSync, NeverSync
from awscli.customizations.s3 import transferconfig
import fnmatch


LOGGER = logging.getLogger(__name__)
Expand Down Expand Up @@ -461,7 +462,7 @@ class ListCommand(S3Command):
USAGE = "<S3Uri> or NONE"
ARG_TABLE = [{'name': 'paths', 'nargs': '?', 'default': 's3://',
'positional_arg': True, 'synopsis': USAGE}, RECURSIVE,
PAGE_SIZE, HUMAN_READABLE, SUMMARIZE, REQUEST_PAYER]
PAGE_SIZE, HUMAN_READABLE, SUMMARIZE, REQUEST_PAYER, INCLUDE, EXCLUDE]

def _run_main(self, parsed_args, parsed_globals):
super(ListCommand, self)._run_main(parsed_args, parsed_globals)
Expand All @@ -470,6 +471,8 @@ def _run_main(self, parsed_args, parsed_globals):
self._size_accumulator = 0
self._total_objects = 0
self._human_readable = parsed_args.human_readable
self.filters = parsed_args.filters

path = parsed_args.paths
if path.startswith('s3://'):
path = path[5:]
Expand Down Expand Up @@ -512,6 +515,27 @@ def _list_all_objects(self, bucket, key, page_size=None,
for response_data in iterator:
self._display_page(response_data)

def _is_match_pattern(self, pattern_type, path_pattern, file_path):
file_status = None

is_match = fnmatch.fnmatch(file_path, path_pattern)
if is_match and pattern_type == 'include':
file_status = True
LOGGER.debug("%s matched include filter: %s",
file_path, path_pattern)
elif is_match and pattern_type == 'exclude':
file_status = False
LOGGER.debug("%s matched exclude filter: %s",
file_path, path_pattern)
else:
LOGGER.debug("%s did not match %s filter: %s",
file_path, pattern_type, path_pattern)
if pattern_type == 'include':
file_status = False
else:
file_status = True
return file_status

def _display_page(self, response_data, use_basename=True):
common_prefixes = response_data.get('CommonPrefixes', [])
contents = response_data.get('Contents', [])
Expand All @@ -523,20 +547,38 @@ def _display_page(self, response_data, use_basename=True):
prefix = prefix_components[-2]
pre_string = "PRE".rjust(30, " ")
print_str = pre_string + ' ' + prefix + '/\n'
uni_print(print_str)

for content in contents:
last_mod_str = self._make_last_mod_str(content['LastModified'])
self._size_accumulator += int(content['Size'])
self._total_objects += 1
size_str = self._make_size_str(content['Size'])

if use_basename:
filename_components = content['Key'].split('/')
filename = filename_components[-1]
else:
filename = content['Key']
print_str = last_mod_str + ' ' + size_str + ' ' + \
filename + '\n'
uni_print(print_str)

file_status = None

if self.filters is not None:
for filter in self.filters:
pattern_type = filter[0].lstrip('-')
path_pattern = filter[1]
file_status = self._is_match_pattern(pattern_type, path_pattern, filename)

if file_status is True:
self._size_accumulator += int(content['Size'])
self._total_objects += 1
size_str = self._make_size_str(content['Size'])
print_str = last_mod_str + ' ' + size_str + ' ' + \
filename + '\n'
uni_print(print_str)
else:
self._size_accumulator += int(content['Size'])
self._total_objects += 1
size_str = self._make_size_str(content['Size'])
print_str = last_mod_str + ' ' + size_str + ' ' + \
filename + '\n'
uni_print(print_str)
self._at_first_page = False

def _list_all_buckets(self):
Expand Down
20 changes: 17 additions & 3 deletions awscli/examples/s3/ls.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Output::

2013-07-11 17:08:50 mybucket
2013-07-24 14:55:44 mybucket2

**Example 2: Listing all prefixes and objects in a bucket**

The following ``ls`` command lists objects and common prefixes under a specified bucket and prefix. In this example, the user owns the bucket ``mybucket`` with the objects ``test.txt`` and ``somePrefix/test.txt``. The ``LastWriteTime`` and ``Length`` are arbitrary. Note that since the ``ls`` command has no interaction with the local filesystem, the ``s3://`` URI scheme is not required to resolve ambiguity and may be omitted::
Expand All @@ -29,7 +29,7 @@ The following ``ls`` command lists objects and common prefixes under a specified
Output::

None

**Example 4: Recursively listing all prefixes and objects in a bucket**

The following ``ls`` command will recursively list objects in a bucket. Rather than showing ``PRE dirname/`` in the output, all the content in a bucket will be listed in order::
Expand Down Expand Up @@ -69,7 +69,21 @@ Output::
2013-09-02 21:32:57 398 Bytes z.txt

Total Objects: 10
Total Size: 2.9 MiB
Total Size: 2.9 MiB

You can combine ``--exclude`` and ``--include`` options to list only objects that match a pattern, excluding all others::

aws s3 ls s3://mybucket --recursive --human-readable --summarize --exclude "*" --include "*.txt"

Output::

2013-09-02 21:37:53 10 Bytes a.txt
2013-09-02 21:32:57 398 Bytes z.txt

Total Objects: 2
Total Size: 408 Bytes

**Listing from an S3 access point**

**Example 6: Listing from an S3 access point**
wingkwong marked this conversation as resolved.
Show resolved Hide resolved

Expand Down
50 changes: 46 additions & 4 deletions tests/unit/customizations/s3/test_subcommands.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def test_ls_command_for_bucket(self):
ls_command = ListCommand(self.session)
parsed_args = FakeArgs(paths='s3://mybucket/', dir_op=False,
page_size='5', human_readable=False,
summarize=False, request_payer=None)
summarize=False, request_payer=None, filters=None)
parsed_globals = mock.Mock()
ls_command._run_main(parsed_args, parsed_globals)
call = self.session.create_client.return_value.list_objects_v2
Expand All @@ -118,7 +118,7 @@ def test_ls_command_with_no_args(self):
verify_ssl=None)
parsed_args = FakeArgs(dir_op=False, paths='s3://',
human_readable=False, summarize=False,
request_payer=None)
request_payer=None, filters=None)
ls_command._run_main(parsed_args, parsed_global)
# We should only be a single call.
call = self.session.create_client.return_value.list_buckets
Expand All @@ -139,7 +139,7 @@ def test_ls_with_verify_argument(self):
verify_ssl=False)
parsed_args = FakeArgs(paths='s3://', dir_op=False,
human_readable=False, summarize=False,
request_payer=None)
request_payer=None, filters=None)
ls_command._run_main(parsed_args, parsed_global)
# Verify get_client
get_client = self.session.create_client
Expand All @@ -152,7 +152,7 @@ def test_ls_with_requester_pays(self):
ls_command = ListCommand(self.session)
parsed_args = FakeArgs(paths='s3://mybucket/', dir_op=False,
human_readable=False, summarize=False,
request_payer='requester', page_size='5')
request_payer='requester', page_size='5', filters=None)
parsed_globals = mock.Mock()
ls_command._run_main(parsed_args, parsed_globals)
call = self.session.create_client.return_value.list_objects
Expand All @@ -171,6 +171,48 @@ def test_ls_with_requester_pays(self):

paginate.assert_called_with(**ref_call_args)

def test_ls_with_filters(self):
ls_command = ListCommand(self.session)
parsed_args = FakeArgs(paths='s3://mybucket/', dir_op=False,
page_size='5', human_readable=False,
summarize=False, request_payer=None, filters=[['--include', '*', '--exclude', '*']])
parsed_globals = mock.Mock()
ls_command._run_main(parsed_args, parsed_globals)
call = self.session.create_client.return_value.list_objects_v2
paginate = self.session.create_client.return_value.get_paginator\
.return_value.paginate
# We should make no operation calls.
self.assertEqual(call.call_count, 0)
# And only a single pagination call to ListObjectsV2.
self.session.create_client.return_value.get_paginator.\
assert_called_with('list_objects_v2')
ref_call_args = {'Bucket': u'mybucket', 'Delimiter': '/',
'Prefix': u'',
'PaginationConfig': {'PageSize': u'5'}}

paginate.assert_called_with(**ref_call_args)

def test_ls_is_match_pattern(self):
ls_command = ListCommand(self.session)
# Match with Include
ls_command._is_match_pattern = MagicMock(return_value=True)
file_status = ls_command._is_match_pattern('include', '*.txt', '/foo/bar.txt', is_match=True)
self.assertEqual(file_status, True)

# Match with Exclude
ls_command._is_match_pattern = MagicMock(return_value=False)
file_status = ls_command._is_match_pattern('exclude', '*.txt', '/foo/bar.txt', is_match=True)
self.assertEqual(file_status, False)

# Not match with Include
ls_command._is_match_pattern = MagicMock(return_value=False)
file_status = ls_command._is_match_pattern('include', '*.zip', '/foo/bar.txt', is_match=False)
self.assertEqual(file_status, False)

# Not match with Exclude
ls_command._is_match_pattern = MagicMock(return_value=True)
file_status = ls_command._is_match_pattern('exclude', '*.zip', '/foo/bar.txt', is_match=False)
self.assertEqual(file_status, True)

class CommandArchitectureTest(BaseAWSCommandParamsTest):
def setUp(self):
Expand Down