Skip to content

Commit aae9530

Browse files
committed
Accommodating pipes in programs.py
Adding in docker pipes unit test Added docs to docker_call and fixed docker_call unit test Changed datasize for docker_call pipes unit test to 1GB Polished docker_call functionality Added stderr to file handle support in docker_call
1 parent 007088e commit aae9530

File tree

2 files changed

+118
-17
lines changed

2 files changed

+118
-17
lines changed

src/toil_scripts/lib/programs.py

Lines changed: 97 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import subprocess
33
import logging
44
from bd2k.util.exceptions import panic
5+
from toil_scripts.lib import require
56

67
_log = logging.getLogger(__name__)
78

@@ -15,34 +16,66 @@ def mock_mode():
1516
return True if int(os.environ.get('TOIL_SCRIPTS_MOCK_MODE', '0')) else False
1617

1718

18-
def docker_call(tool,
19+
def docker_call(tool=None,
20+
tools=None,
1921
parameters=None,
2022
work_dir='.',
2123
rm=True,
2224
env=None,
2325
outfile=None,
26+
errfile=None,
2427
inputs=None,
2528
outputs=None,
2629
docker_parameters=None,
2730
check_output=False,
31+
return_stderr=False,
2832
mock=None):
2933
"""
3034
Calls Docker, passing along parameters and tool.
3135
32-
:param str tool: Name of the Docker image to be used (e.g. quay.io/ucsc_cgl/samtools)
36+
:param (str tool | list[str] tools): Name of the Docker image to be used (e.g. quay.io/ucsc_cgl/samtools)
37+
OR str list of names of the Docker images and order to be used when piping commands to
38+
Docker. (e.g. ['quay.io/ucsc_cgl/samtools', 'ubuntu']). Both tool and tools are mutually
39+
exclusive parameters to docker_call.
3340
:param list[str] parameters: Command line arguments to be passed to the tool
3441
:param str work_dir: Directory to mount into the container via `-v`. Destination convention is /data
3542
:param bool rm: Set to True to pass `--rm` flag.
3643
:param dict[str,str] env: Environment variables to be added (e.g. dict(JAVA_OPTS='-Xmx15G'))
3744
:param bool sudo: If True, prepends `sudo` to the docker call
38-
:param file outfile: Pipe output of Docker call to file handle
45+
:param file outfile: Pipe stdout of Docker call to file handle
46+
:param file errfile: Pipe stderr of Docker call to file handle
3947
:param list[str] inputs: A list of the input files.
4048
:param dict[str,str] outputs: A dictionary containing the outputs files as keys with either None
4149
or a url. The value is only used if mock=True
4250
:param dict[str,str] docker_parameters: Parameters to pass to docker
4351
:param bool check_output: When True, this function returns docker's output
52+
:param bool return_stderr: When True, this function includes stderr in docker's output
4453
:param bool mock: Whether to run in mock mode. If this variable is unset, its value will be determined by
4554
the environment variable.
55+
56+
Piping docker commands can be done in one of two ways depending on use case:
57+
Running a pipe in docker in 'pipe-in-single-container' mode produces command structure
58+
docker '... | ... | ...' where each '...' command corresponds to each element in the 'parameters'
59+
argument that uses a docker container. This is the most efficient method if you want to run a pipe of
60+
commands where each command uses the same docker container.
61+
62+
Running a pipe in docker in 'pipe-of-containers' mode produces command structure
63+
docker '...' | docker '...' | docker '...' where each '...' command corresponds to each element in
64+
the 'parameters' argument that uses a docker container and each 'docker' tool in the pipe
65+
corresponds to each element in the 'tool' argument
66+
67+
Examples for running command 'head -c 1M /dev/urandom | gzip | gunzip | md5sum 1>&2':
68+
Running 'pipe-in-single-container' mode:
69+
command= ['head -c 1M /dev/urandom', 'gzip', 'gunzip', 'md5sum 1>&2']
70+
docker_work_dir=curr_work_dir
71+
docker_tools=['ubuntu']
72+
stdout = docker_call(work_dir=docker_work_dir, parameters=command, tools=docker_tools, check_output=True)
73+
74+
Running 'pipe-of-containers' mode:
75+
command= ['head -c 1M /dev/urandom', 'gzip', 'gunzip', 'md5sum 1>&2']
76+
docker_work_dir=curr_work_dir
77+
docker_tools=['ubuntu', 'ubuntu', 'ubuntu', 'ubuntu']
78+
stdout = docker_call(work_dir=docker_work_dir, parameters=command, tools=docker_tools, check_output=True)
4679
"""
4780
from toil_scripts.lib.urls import download_url
4881

@@ -83,37 +116,72 @@ def docker_call(tool,
83116
if env:
84117
for e, v in env.iteritems():
85118
base_docker_call.extend(['-e', '{}={}'.format(e, v)])
119+
86120
if docker_parameters:
87121
base_docker_call += docker_parameters
122+
123+
docker_call = []
124+
125+
require(bool(tools) != bool(tool), 'Either "tool" or "tools" must contain a value, but not both')
126+
127+
# Pipe functionality
128+
# each element in the parameters list must represent a sub-pipe command
129+
if bool(tools):
130+
if len(tools) > 1:
131+
require(len(tools) == len(parameters), "Both 'tools'({}) and 'parameters'({}) arguments must\
132+
contain the same number of elements".format(len(tools), len(parameters)))
133+
# If tool is a list containing multiple docker container name strings
134+
# then format the docker call in the 'pipe-of-containers' mode
135+
docker_call.extend(base_docker_call + ['--entrypoint /bin/bash', tools[0], '-c \'{}\''.format(parameters[0])])
136+
for i in xrange(1, len(tools)):
137+
docker_call.extend(['|'] + base_docker_call + ['-i --entrypoint /bin/bash', tools[i], '-c \'{}\''.format(parameters[i])])
138+
docker_call = " ".join(docker_call)
139+
_log.debug("Calling docker with %s." % docker_call)
140+
141+
elif len(tools) == 1:
142+
# If tool is a list containing a single docker container name string
143+
# then format the docker call in the 'pipe-in-single-container' mode
144+
docker_call.extend(base_docker_call + ['--entrypoint /bin/bash', tools[0], '-c \'{}\''.format(" | ".join(parameters))])
145+
docker_call = " ".join(docker_call)
146+
_log.debug("Calling docker with %s." % docker_call)
147+
148+
else:
149+
assert False
150+
else:
151+
docker_call = " ".join(base_docker_call + [tool] + parameters)
152+
_log.debug("Calling docker with %s." % docker_call)
88153

89-
_log.debug("Calling docker with %s." % " ".join(base_docker_call + [tool] + parameters))
90-
91-
docker_call = base_docker_call + [tool] + parameters
92-
154+
93155
try:
94156
if outfile:
95-
subprocess.check_call(docker_call, stdout=outfile)
157+
if errfile:
158+
subprocess.check_call(docker_call, stdout=outfile, stderr=errfile, shell=True)
159+
else:
160+
subprocess.check_call(docker_call, stdout=outfile, shell=True)
96161
else:
97162
if check_output:
98-
return subprocess.check_output(docker_call)
163+
if return_stderr:
164+
return subprocess.check_output(docker_call, shell=True, stderr=subprocess.STDOUT)
165+
else:
166+
return subprocess.check_output(docker_call, shell=True)
99167
else:
100-
subprocess.check_call(docker_call)
168+
subprocess.check_call(docker_call, shell=True)
101169
# Fix root ownership of output files
102170
except:
103171
# Panic avoids hiding the exception raised in the try block
104172
with panic():
105-
_fix_permissions(base_docker_call, tool, work_dir)
173+
_fix_permissions(base_docker_call, tool, tools, work_dir)
106174
else:
107-
_fix_permissions(base_docker_call, tool, work_dir)
175+
_fix_permissions(base_docker_call, tool, tools, work_dir)
108176

109177
for filename in outputs.keys():
110178
if not os.path.isabs(filename):
111179
filename = os.path.join(work_dir, filename)
112180
assert(os.path.isfile(filename))
113181

114182

115-
def _fix_permissions(base_docker_call, tool, work_dir):
116-
"""
183+
def _fix_permissions(base_docker_call, tool, tools, work_dir):
184+
"""
117185
Fix permission of a mounted Docker directory by reusing the tool
118186
119187
:param list base_docker_call: Docker run parameters
@@ -122,5 +190,18 @@ def _fix_permissions(base_docker_call, tool, work_dir):
122190
"""
123191
base_docker_call.append('--entrypoint=chown')
124192
stat = os.stat(work_dir)
125-
command = base_docker_call + [tool] + ['-R', '{}:{}'.format(stat.st_uid, stat.st_gid), '/data']
126-
subprocess.check_call(command)
193+
if tools:
194+
command_list = []
195+
for tool in tools:
196+
command = base_docker_call + [tool] + ['-R', '{}:{}'.format(stat.st_uid, stat.st_gid), '/data']
197+
command_list.append(command)
198+
199+
for command in command_list:
200+
subprocess.check_call(command)
201+
else:
202+
command = base_docker_call + [tool] + ['-R', '{}:{}'.format(stat.st_uid, stat.st_gid), '/data']
203+
subprocess.check_call(command)
204+
205+
206+
207+

src/toil_scripts/lib/test/test_programs.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import os
2-
2+
import re
33

44
def test_docker_call(tmpdir):
55
from toil_scripts.lib.programs import docker_call
@@ -12,3 +12,23 @@ def test_docker_call(tmpdir):
1212
with open(fpath, 'w') as f:
1313
docker_call(tool='ubuntu', env=dict(foo='bar'), parameters=['printenv', 'foo'], outfile=f)
1414
assert open(fpath).read() == 'bar\n'
15+
16+
# Test pipe functionality
17+
# download ubuntu docker image
18+
docker_call(work_dir=work_dir, tool="ubuntu")
19+
command1 = ['head -c 1G /dev/urandom | tee /data/first', 'gzip', 'gunzip', 'md5sum 1>&2']
20+
command2 = ['md5sum /data/first 1>&2']
21+
# Test 'pipe-in-single-container' mode
22+
docker_tools1=['ubuntu']
23+
stdout1 = docker_call(work_dir=work_dir, parameters=command1, tools=docker_tools1, check_output=True, return_stderr=True)
24+
stdout2 = docker_call(work_dir=work_dir, parameters=command2, tool='ubuntu', check_output=True, return_stderr=True)
25+
test1 = re.findall(r"([a-fA-F\d]{32})", stdout1)
26+
test2 = re.findall(r"([a-fA-F\d]{32})", stdout2)
27+
assert test1[0] == test2[0]
28+
# Test 'pipe-of-containers' mode
29+
docker_tools2=['ubuntu', 'ubuntu', 'ubuntu', 'ubuntu']
30+
stdout1 = docker_call(work_dir=work_dir, parameters=command1, tools=docker_tools2, check_output=True, return_stderr=True)
31+
stdout2 = docker_call(work_dir=work_dir, parameters=command2, tool='ubuntu', check_output=True, return_stderr=True)
32+
test1 = re.findall(r"([a-fA-F\d]{32})", stdout1)
33+
test2 = re.findall(r"([a-fA-F\d]{32})", stdout2)
34+
assert test1[0] == test2[0]

0 commit comments

Comments
 (0)