|
19 | 19 |
|
20 | 20 | @click.command(context_settings=dict(ignore_unknown_options=True))
|
21 | 21 | @click.argument("scheduler", type=str, required=False)
|
22 |
| -@click.option( |
23 |
| - "--tls-ca-file", |
24 |
| - type=pem_file_option_type, |
25 |
| - default=None, |
26 |
| - help="CA cert(s) file for TLS (in PEM format)", |
27 |
| -) |
28 |
| -@click.option( |
29 |
| - "--tls-cert", |
30 |
| - type=pem_file_option_type, |
31 |
| - default=None, |
32 |
| - help="certificate file for TLS (in PEM format)", |
33 |
| -) |
34 |
| -@click.option( |
35 |
| - "--tls-key", |
36 |
| - type=pem_file_option_type, |
37 |
| - default=None, |
38 |
| - help="private key file for TLS (in PEM format)", |
39 |
| -) |
40 |
| -@click.option("--dashboard-address", type=str, default=":0", help="dashboard address") |
41 |
| -@click.option( |
42 |
| - "--dashboard/--no-dashboard", |
43 |
| - "dashboard", |
44 |
| - default=True, |
45 |
| - show_default=True, |
46 |
| - required=False, |
47 |
| - help="Launch dashboard", |
| 22 | +@click.argument( |
| 23 | + "preload_argv", nargs=-1, type=click.UNPROCESSED, callback=validate_preload_argv |
48 | 24 | )
|
49 | 25 | @click.option(
|
50 | 26 | "--host",
|
51 | 27 | type=str,
|
52 | 28 | default=None,
|
53 |
| - help="Serving host. Should be an ip address that is" |
54 |
| - " visible to the scheduler and other workers. " |
55 |
| - "See --listen-address and --contact-address if you " |
56 |
| - "need different listen and contact addresses. " |
57 |
| - "See --interface.", |
| 29 | + help="""IP address of serving host; should be visible to the scheduler and other |
| 30 | + workers. Can be a string (like ``"127.0.0.1"``) or ``None`` to fall back on the |
| 31 | + address of the interface specified by ``--interface`` or the default interface.""", |
58 | 32 | )
|
59 | 33 | @click.option(
|
60 |
| - "--interface", |
61 |
| - type=str, |
62 |
| - default=None, |
63 |
| - help="The external interface used to connect to the scheduler, usually " |
64 |
| - "an ethernet interface is used for connection, and not an InfiniBand " |
65 |
| - "interface (if one is available).", |
| 34 | + "--nthreads", |
| 35 | + type=int, |
| 36 | + default=1, |
| 37 | + show_default=True, |
| 38 | + help="Number of threads to be used for each Dask worker process.", |
66 | 39 | )
|
67 |
| -@click.option("--nthreads", type=int, default=1, help="Number of threads per process.") |
68 | 40 | @click.option(
|
69 | 41 | "--name",
|
70 | 42 | type=str,
|
71 | 43 | default=None,
|
72 |
| - help="A unique name for this worker like 'worker-1'. " |
73 |
| - "If used with --nprocs then the process number " |
74 |
| - "will be appended like name-0, name-1, name-2, ...", |
| 44 | + help="""A unique name for the worker. Can be a string (like ``"worker-1"``) or |
| 45 | + ``None`` for a nameless worker. If used with ``--nprocs``, then the process number |
| 46 | + will be appended to the worker name, e.g. ``"worker-1-0"``, ``"worker-1-1"``, |
| 47 | + ``"worker-1-2"``.""", |
75 | 48 | )
|
76 | 49 | @click.option(
|
77 | 50 | "--memory-limit",
|
78 | 51 | default="auto",
|
79 |
| - help="Bytes of memory per process that the worker can use. " |
80 |
| - "This can be an integer (bytes), " |
81 |
| - "float (fraction of total system memory), " |
82 |
| - "string (like 5GB or 5000M), " |
83 |
| - "'auto', or zero for no memory management", |
| 52 | + show_default=True, |
| 53 | + help="""Bytes of memory per process that the worker can use. Can be an integer |
| 54 | + (bytes), float (fraction of total system memory), string (like ``"5GB"`` or |
| 55 | + ``"5000M"``), or ``"auto"`` or 0 for no memory management.""", |
84 | 56 | )
|
85 | 57 | @click.option(
|
86 | 58 | "--device-memory-limit",
|
87 | 59 | default="0.8",
|
88 |
| - help="Specifies the size of the CUDA device LRU cache, which " |
89 |
| - "is used to determine when the worker starts spilling to host " |
90 |
| - "memory. This can be a float (fraction of total device " |
91 |
| - "memory), an integer (bytes), a string (like 5GB or 5000M), " |
92 |
| - "and 'auto' or 0 to disable spilling to host (i.e., allow " |
93 |
| - "full device memory usage). Default is 0.8, 80% of the " |
94 |
| - "worker's total device memory.", |
| 60 | + show_default=True, |
| 61 | + help="""Size of the CUDA device LRU cache, which is used to determine when the |
| 62 | + worker starts spilling to host memory. Can be an integer (bytes), float (fraction of |
| 63 | + total device memory), string (like ``"5GB"`` or ``"5000M"``), or ``"auto"`` or 0 to |
| 64 | + disable spilling to host (i.e. allow full device memory usage).""", |
95 | 65 | )
|
96 | 66 | @click.option(
|
97 | 67 | "--rmm-pool-size",
|
98 | 68 | default=None,
|
99 |
| - help="If specified, initialize each worker with an RMM pool of " |
100 |
| - "the given size, otherwise no RMM pool is created. This can be " |
101 |
| - "an integer (bytes) or string (like 5GB or 5000M)." |
102 |
| - "NOTE: This size is a per worker (i.e., per GPU) configuration, " |
103 |
| - "and not cluster-wide!", |
| 69 | + help="""RMM pool size to initialize each worker with. Can be an integer (bytes), |
| 70 | + string (like ``"5GB"`` or ``"5000M"``), or ``None`` to disable RMM pools. |
| 71 | +
|
| 72 | + .. note:: |
| 73 | + This size is a per-worker configuration, and not cluster-wide.""", |
104 | 74 | )
|
105 | 75 | @click.option(
|
106 | 76 | "--rmm-managed-memory/--no-rmm-managed-memory",
|
107 | 77 | default=False,
|
108 |
| - help="If enabled, initialize each worker with RMM and set it to " |
109 |
| - "use managed memory. If disabled, RMM may still be used if " |
110 |
| - "--rmm-pool-size is specified, but in that case with default " |
111 |
| - "(non-managed) memory type." |
112 |
| - "WARNING: managed memory is currently incompatible with NVLink, " |
113 |
| - "trying to enable both will result in an exception.", |
| 78 | + show_default=True, |
| 79 | + help="""Initialize each worker with RMM and set it to use managed memory. If |
| 80 | + disabled, RMM may still be used by specifying ``--rmm-pool-size``. |
| 81 | +
|
| 82 | + .. warning:: |
| 83 | + Managed memory is currently incompatible with NVLink. Trying to enable both will |
| 84 | + result in failure.""", |
114 | 85 | )
|
115 | 86 | @click.option(
|
116 | 87 | "--rmm-async/--no-rmm-async",
|
|
119 | 90 | help="""Initialize each worker withh RMM and set it to use RMM's asynchronous
|
120 | 91 | allocator. See ``rmm.mr.CudaAsyncMemoryResource`` for more info.
|
121 | 92 |
|
122 |
| - .. note:: |
| 93 | + .. warning:: |
123 | 94 | The asynchronous allocator requires CUDA Toolkit 11.2 or newer. It is also
|
124 | 95 | incompatible with RMM pools and managed memory, trying to enable both will
|
125 | 96 | result in failure.""",
|
126 | 97 | )
|
127 | 98 | @click.option(
|
128 | 99 | "--rmm-log-directory",
|
129 | 100 | default=None,
|
130 |
| - help="Directory to write per-worker RMM log files to; the client " |
131 |
| - "and scheduler are not logged here." |
132 |
| - "NOTE: Logging will only be enabled if --rmm-pool-size, " |
133 |
| - "--rmm-managed-memory, or --rmm-async are specified.", |
| 101 | + help="""Directory to write per-worker RMM log files to. The client and scheduler are |
| 102 | + not logged here. Can be a string (like ``"/path/to/logs/"``) or ``None`` to disable |
| 103 | + logging. |
| 104 | +
|
| 105 | + .. note:: |
| 106 | + Logging will only be enabled if ``--rmm-pool-size`` or ``--rmm-managed-memory`` |
| 107 | + are specified.""", |
| 108 | +) |
| 109 | +@click.option( |
| 110 | + "--pid-file", type=str, default="", help="File to write the process PID.", |
| 111 | +) |
| 112 | +@click.option( |
| 113 | + "--resources", |
| 114 | + type=str, |
| 115 | + default="", |
| 116 | + help="""Resources for task constraints like ``"GPU=2 MEM=10e9"``. Resources are |
| 117 | + applied separately to each worker process (only relevant when starting multiple |
| 118 | + worker processes with ``--nprocs``).""", |
134 | 119 | )
|
135 | 120 | @click.option(
|
136 |
| - "--reconnect/--no-reconnect", |
| 121 | + "--dashboard/--no-dashboard", |
| 122 | + "dashboard", |
137 | 123 | default=True,
|
138 |
| - help="Reconnect to scheduler if disconnected", |
| 124 | + show_default=True, |
| 125 | + required=False, |
| 126 | + help="Launch the dashboard.", |
139 | 127 | )
|
140 |
| -@click.option("--pid-file", type=str, default="", help="File to write the process PID") |
141 | 128 | @click.option(
|
142 |
| - "--local-directory", default=None, type=str, help="Directory to place worker files" |
| 129 | + "--dashboard-address", |
| 130 | + type=str, |
| 131 | + default=":0", |
| 132 | + show_default=True, |
| 133 | + help="Relative address to serve the dashboard (if enabled).", |
143 | 134 | )
|
144 | 135 | @click.option(
|
145 |
| - "--resources", |
| 136 | + "--local-directory", |
| 137 | + default=None, |
146 | 138 | type=str,
|
147 |
| - default="", |
148 |
| - help='Resources for task constraints like "GPU=2 MEM=10e9". ' |
149 |
| - "Resources are applied separately to each worker process " |
150 |
| - "(only relevant when starting multiple worker processes with '--nprocs').", |
| 139 | + help="""Path on local machine to store temporary files. Can be a string (like |
| 140 | + ``"path/to/files"``) or ``None`` to fall back on the value of |
| 141 | + ``dask.temporary-directory`` in the local Dask configuration, using the current |
| 142 | + working directory if this is not set.""", |
151 | 143 | )
|
152 | 144 | @click.option(
|
153 | 145 | "--scheduler-file",
|
154 | 146 | type=str,
|
155 | 147 | default="",
|
156 |
| - help="Filename to JSON encoded scheduler information. " |
157 |
| - "Use with dask-scheduler --scheduler-file", |
| 148 | + help="""Filename to JSON encoded scheduler information. To be used in conjunction |
| 149 | + with the equivalent ``dask-scheduler`` option.""", |
158 | 150 | )
|
159 | 151 | @click.option(
|
160 |
| - "--dashboard-prefix", type=str, default=None, help="Prefix for the Dashboard" |
| 152 | + "--interface", |
| 153 | + type=str, |
| 154 | + default=None, |
| 155 | + help="""External interface used to connect to the scheduler. Usually an ethernet |
| 156 | + interface is used for connection, and not an InfiniBand interface (if one is |
| 157 | + available). Can be a string (like ``"eth0"`` for NVLink or ``"ib0"`` for |
| 158 | + InfiniBand) or ``None`` to fall back on the default interface.""", |
161 | 159 | )
|
162 | 160 | @click.option(
|
163 | 161 | "--preload",
|
164 | 162 | type=str,
|
165 | 163 | multiple=True,
|
166 | 164 | is_eager=True,
|
167 |
| - help="Module that should be loaded by each worker process " |
168 |
| - 'like "foo.bar" or "/path/to/foo.py"', |
| 165 | + help="""Module that should be loaded by each worker process like ``"foo.bar"`` or |
| 166 | + ``"/path/to/foo.py"``.""", |
169 | 167 | )
|
170 |
| -@click.argument( |
171 |
| - "preload_argv", nargs=-1, type=click.UNPROCESSED, callback=validate_preload_argv |
| 168 | +@click.option( |
| 169 | + "--dashboard-prefix", |
| 170 | + type=str, |
| 171 | + default=None, |
| 172 | + help="""Prefix for the dashboard. Can be a string (like ...) or ``None`` for no |
| 173 | + prefix.""", |
| 174 | +) |
| 175 | +@click.option( |
| 176 | + "--tls-ca-file", |
| 177 | + type=pem_file_option_type, |
| 178 | + default=None, |
| 179 | + help="""CA certificate(s) file for TLS (in PEM format). Can be a string (like |
| 180 | + ``"path/to/certs"``), or ``None`` for no certificate(s).""", |
| 181 | +) |
| 182 | +@click.option( |
| 183 | + "--tls-cert", |
| 184 | + type=pem_file_option_type, |
| 185 | + default=None, |
| 186 | + help="""Certificate file for TLS (in PEM format). Can be a string (like |
| 187 | + ``"path/to/certs"``), or ``None`` for no certificate(s).""", |
| 188 | +) |
| 189 | +@click.option( |
| 190 | + "--tls-key", |
| 191 | + type=pem_file_option_type, |
| 192 | + default=None, |
| 193 | + help="""Private key file for TLS (in PEM format). Can be a string (like |
| 194 | + ``"path/to/certs"``), or ``None`` for no private key.""", |
172 | 195 | )
|
173 | 196 | @click.option(
|
174 | 197 | "--enable-tcp-over-ucx/--disable-tcp-over-ucx",
|
175 | 198 | default=False,
|
176 |
| - help="Enable TCP communication over UCX", |
| 199 | + show_default=True, |
| 200 | + help="""Set environment variables to enable TCP over UCX, even if InfiniBand and |
| 201 | + NVLink are not supported or disabled.""", |
177 | 202 | )
|
178 | 203 | @click.option(
|
179 | 204 | "--enable-infiniband/--disable-infiniband",
|
180 | 205 | default=False,
|
181 |
| - help="Enable InfiniBand communication", |
| 206 | + show_default=True, |
| 207 | + help="""Set environment variables to enable UCX over InfiniBand, implies |
| 208 | + ``--enable-tcp-over-ucx``.""", |
182 | 209 | )
|
183 | 210 | @click.option(
|
184 |
| - "--enable-rdmacm/--disable-rdmacm", |
| 211 | + "--enable-nvlink/--disable-nvlink", |
185 | 212 | default=False,
|
186 |
| - help="Enable RDMA connection manager, currently requires InfiniBand enabled.", |
| 213 | + show_default=True, |
| 214 | + help="""Set environment variables to enable UCX over NVLink, implies |
| 215 | + ``--enable-tcp-over-ucx``.""", |
187 | 216 | )
|
188 | 217 | @click.option(
|
189 |
| - "--enable-nvlink/--disable-nvlink", |
| 218 | + "--enable-rdmacm/--disable-rdmacm", |
190 | 219 | default=False,
|
191 |
| - help="Enable NVLink communication", |
| 220 | + show_default=True, |
| 221 | + help="""Set environment variables to enable UCX RDMA connection manager support, |
| 222 | + requires ``--enable-infiniband``.""", |
192 | 223 | )
|
193 | 224 | @click.option(
|
194 | 225 | "--net-devices",
|
195 | 226 | type=str,
|
196 | 227 | default=None,
|
197 |
| - help="When None (default), 'UCX_NET_DEVICES' will be left to its default. " |
198 |
| - "Otherwise, it must be a non-empty string with the interface name, such as " |
199 |
| - "such as 'eth0' or 'auto' to allow for automatically choosing the closest " |
200 |
| - "interface based on the system's topology. Normally used only with " |
201 |
| - "--enable-infiniband to specify the interface to be used by the worker, " |
202 |
| - "such as 'mlx5_0:1' or 'ib0'. " |
203 |
| - "WARNING: 'auto' requires UCX-Py to be installed and compiled with hwloc " |
204 |
| - "support. Additionally that will always use the closest interface, and " |
205 |
| - "that may cause unexpected errors if that interface is not properly " |
206 |
| - "configured or is disconnected, for that reason it's limited to " |
207 |
| - "InfiniBand only and will still cause unpredictable errors if not _ALL_ " |
208 |
| - "interfaces are connected and properly configured.", |
| 228 | + help="""Interface(s) used by workers for UCX communication. Can be a string (like |
| 229 | + ``"eth0"`` for NVLink or ``"mlx5_0:1"``/``"ib0"`` for InfiniBand), ``"auto"`` |
| 230 | + (requires ``--enable-infiniband``) to pick the optimal interface per-worker based on |
| 231 | + the system's topology, or ``None`` to stay with the default value of ``"all"`` (use |
| 232 | + all available interfaces). |
| 233 | +
|
| 234 | + .. warning:: |
| 235 | + ``"auto"`` requires UCX-Py to be installed and compiled with hwloc support. |
| 236 | + Unexpected errors can occur when using ``"auto"`` if any interfaces are |
| 237 | + disconnected or improperly configured.""", |
209 | 238 | )
|
210 | 239 | @click.option(
|
211 | 240 | "--enable-jit-unspill/--disable-jit-unspill",
|
212 | 241 | default=None,
|
213 |
| - help="Enable just-in-time unspilling. This is experimental and doesn't " |
214 |
| - "support memory spilling to disk Please see proxy_object.ProxyObject " |
215 |
| - "and proxify_host_file.ProxifyHostFile.", |
| 242 | + help="""Enable just-in-time unspilling. Can be a boolean or ``None`` to fall back on |
| 243 | + the value of ``dask.jit-unspill`` in the local Dask configuration, disabling |
| 244 | + unspilling if this is not set. |
| 245 | +
|
| 246 | + .. note:: |
| 247 | + This is experimental and doesn't support memory spilling to disk. See |
| 248 | + ``proxy_object.ProxyObject`` and ``proxify_host_file.ProxifyHostFile`` for more |
| 249 | + info.""", |
216 | 250 | )
|
217 | 251 | def main(
|
218 | 252 | scheduler,
|
|
0 commit comments