Skip to content

Commit ef328d2

Browse files
flaviuvadansambhav
andauthored
Add check that auto-genned params do not have already set artifact names (#544)
Noticed that when adding a param to a script and then setting an artifact as the value of that script input can result in both an artifact and a parameter added to the template. # Reproducible example (main) ```python from hera.workflows import DAG, Artifact, Workflow, script @script(outputs=Artifact(name="result", path="/tmp/result")) def produce(): import pickle result = "foo testing" with open("/tmp/result", "wb") as f: pickle.dump(result, f) @script(inputs=Artifact(name="i", path="/tmp/i")) def consume(i): import pickle with open("/tmp/i", "rb") as f: i = pickle.load(f) print(i) with Workflow(generate_name="fv-test-", entrypoint="d") as w: with DAG(name="d"): p = produce() c = consume(arguments=Artifact(name="i", from_="{{tasks.produce.outputs.artifacts.result}}")) p >> c print(w.to_yaml()) ``` Above results in: ```yaml ... image: python:3.7 source: "import os\nimport sys\nsys.path.append(os.getcwd())\nimport pickle\n\ \nresult = \"foo testing\"\nwith open(\"/tmp/result\", \"wb\") as f:\n \ \ pickle.dump(result, f)\n" - inputs: artifacts: - name: i path: /tmp/i parameters: - name: i name: consume script: command: ... ``` # Post-fix (this branch, isolated to problem section) ```yaml ... \nresult = \"foo testing\"\nwith open(\"/tmp/result\", \"wb\") as f:\n \ \ pickle.dump(result, f)\n" - inputs: artifacts: - name: i path: /tmp/i name: consume script: command: ... ``` --------- Signed-off-by: Flaviu Vadan <[email protected]> Signed-off-by: Sambhav Kothari <[email protected]> Co-authored-by: Sambhav Kothari <[email protected]>
1 parent fe6ef10 commit ef328d2

File tree

5 files changed

+161
-2
lines changed

5 files changed

+161
-2
lines changed
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
# Script Auto Infer
2+
3+
4+
5+
6+
7+
8+
=== "Hera"
9+
10+
```python linenums="1"
11+
from hera.workflows import DAG, Artifact, Workflow, script
12+
13+
14+
@script(outputs=Artifact(name="result", path="/tmp/result"))
15+
def produce():
16+
import pickle
17+
18+
result = "foo testing"
19+
with open("/tmp/result", "wb") as f:
20+
pickle.dump(result, f)
21+
22+
23+
@script(inputs=Artifact(name="i", path="/tmp/i"))
24+
def consume(i):
25+
import pickle
26+
27+
with open("/tmp/i", "rb") as f:
28+
i = pickle.load(f)
29+
print(i)
30+
31+
32+
with Workflow(generate_name="fv-test-", entrypoint="d") as w:
33+
with DAG(name="d"):
34+
p = produce()
35+
c = consume(arguments=Artifact(name="i", from_="{{tasks.produce.outputs.artifacts.result}}"))
36+
p >> c
37+
```
38+
39+
=== "YAML"
40+
41+
```yaml linenums="1"
42+
apiVersion: argoproj.io/v1alpha1
43+
kind: Workflow
44+
metadata:
45+
generateName: fv-test-
46+
spec:
47+
entrypoint: d
48+
templates:
49+
- dag:
50+
tasks:
51+
- name: produce
52+
template: produce
53+
- arguments:
54+
artifacts:
55+
- from: '{{tasks.produce.outputs.artifacts.result}}'
56+
name: i
57+
depends: produce
58+
name: consume
59+
template: consume
60+
name: d
61+
- name: produce
62+
outputs:
63+
artifacts:
64+
- name: result
65+
path: /tmp/result
66+
script:
67+
command:
68+
- python
69+
image: python:3.7
70+
source: "import os\nimport sys\nsys.path.append(os.getcwd())\nimport pickle\n\
71+
\nresult = \"foo testing\"\nwith open(\"/tmp/result\", \"wb\") as f:\n \
72+
\ pickle.dump(result, f)\n"
73+
- inputs:
74+
artifacts:
75+
- name: i
76+
path: /tmp/i
77+
name: consume
78+
script:
79+
command:
80+
- python
81+
image: python:3.7
82+
source: "import os\nimport sys\nsys.path.append(os.getcwd())\nimport json\n\n\
83+
import pickle\n\nwith open(\"/tmp/i\", \"rb\") as f:\n i = pickle.load(f)\n\
84+
print(i)\n"
85+
```
86+
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
apiVersion: argoproj.io/v1alpha1
2+
kind: Workflow
3+
metadata:
4+
generateName: fv-test-
5+
spec:
6+
entrypoint: d
7+
templates:
8+
- dag:
9+
tasks:
10+
- name: produce
11+
template: produce
12+
- arguments:
13+
artifacts:
14+
- from: '{{tasks.produce.outputs.artifacts.result}}'
15+
name: i
16+
depends: produce
17+
name: consume
18+
template: consume
19+
name: d
20+
- name: produce
21+
outputs:
22+
artifacts:
23+
- name: result
24+
path: /tmp/result
25+
script:
26+
command:
27+
- python
28+
image: python:3.7
29+
source: "import os\nimport sys\nsys.path.append(os.getcwd())\nimport pickle\n\
30+
\nresult = \"foo testing\"\nwith open(\"/tmp/result\", \"wb\") as f:\n \
31+
\ pickle.dump(result, f)\n"
32+
- inputs:
33+
artifacts:
34+
- name: i
35+
path: /tmp/i
36+
name: consume
37+
script:
38+
command:
39+
- python
40+
image: python:3.7
41+
source: "import os\nimport sys\nsys.path.append(os.getcwd())\nimport json\n\n\
42+
import pickle\n\nwith open(\"/tmp/i\", \"rb\") as f:\n i = pickle.load(f)\n\
43+
print(i)\n"
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
from hera.workflows import DAG, Artifact, Workflow, script
2+
3+
4+
@script(outputs=Artifact(name="result", path="/tmp/result"))
5+
def produce():
6+
import pickle
7+
8+
result = "foo testing"
9+
with open("/tmp/result", "wb") as f:
10+
pickle.dump(result, f)
11+
12+
13+
@script(inputs=Artifact(name="i", path="/tmp/i"))
14+
def consume(i):
15+
import pickle
16+
17+
with open("/tmp/i", "rb") as f:
18+
i = pickle.load(f)
19+
print(i)
20+
21+
22+
with Workflow(generate_name="fv-test-", entrypoint="d") as w:
23+
with DAG(name="d"):
24+
p = produce()
25+
c = consume(arguments=Artifact(name="i", from_="{{tasks.produce.outputs.artifacts.result}}"))
26+
p >> c

src/hera/workflows/_mixins.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -436,8 +436,11 @@ def __call__(self, *args, **kwargs) -> Optional[SubNodeMixin]:
436436
# these are the already set parameters. If a users has already set a parameter argument, then Hera
437437
# uses the user-provided value rather than the inferred value
438438
arguments = self.arguments if isinstance(self.arguments, list) else [self.arguments] # type: ignore
439+
arguments = list(filter(lambda x: x is not None, arguments))
439440
parameters = [arg for arg in arguments if isinstance(arg, ModelParameter) or isinstance(arg, Parameter)]
440441
parameter_names = {p.name for p in parameters}
442+
artifacts = [arg for arg in arguments if isinstance(arg, ModelArtifact) or isinstance(arg, Artifact)]
443+
artifact_names = {a.name for a in artifacts}
441444
if "source" in kwargs and "with_param" in kwargs:
442445
# Argo uses the `inputs` field to indicate the expected parameters of a specific template whereas the
443446
# `arguments` are used to indicate exactly what _values_ are assigned to the set inputs. Here,
@@ -447,7 +450,7 @@ def __call__(self, *args, **kwargs) -> Optional[SubNodeMixin]:
447450
new_parameters = _get_params_from_source(kwargs["source"])
448451
if new_parameters is not None:
449452
for p in new_parameters:
450-
if p.name not in parameter_names:
453+
if p.name not in parameter_names and p.name not in artifact_names:
451454
arguments.append(p)
452455
elif "source" in kwargs and "with_items" in kwargs:
453456
# similarly to the above, we can infer the arguments to create based on the content of `with_items`.

src/hera/workflows/script.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,8 +150,9 @@ def _build_inputs(self) -> Optional[ModelInputs]:
150150
inputs = ModelInputs(parameters=func_parameters)
151151

152152
already_set_params = {p.name for p in inputs.parameters or []}
153+
already_set_artifacts = {p.name for p in inputs.artifacts or []}
153154
for param in func_parameters:
154-
if param.name not in already_set_params:
155+
if param.name not in already_set_params and param.name not in already_set_artifacts:
155156
inputs.parameters = [param] if inputs.parameters is None else inputs.parameters + [param]
156157
return inputs
157158

0 commit comments

Comments
 (0)