6
6
import subprocess
7
7
import tempfile
8
8
import time
9
- from typing import List , Optional , Union
9
+ from typing import Callable , List , Optional , Union
10
10
import attr
11
11
from collections import OrderedDict
12
12
13
13
import hail as hl
14
14
15
- from data_pipeline .config import config
15
+ from data_pipeline .config import PipelineConfig
16
16
17
17
logger = logging .getLogger ("gnomad_data_pipeline" )
18
18
logger .setLevel (logging .INFO )
@@ -57,23 +57,24 @@ def modified_time(path):
57
57
return file_system .modified_time (check_path )
58
58
59
59
60
- _pipeline_config = {}
60
+ # _pipeline_config = {}
61
61
62
- _pipeline_config ["output_root" ] = config .data_paths .root
62
+ # _pipeline_config["output_root"] = config.output_paths .root
63
63
64
64
65
65
@attr .define
66
66
class DownloadTask :
67
+ _config : PipelineConfig
67
68
_name : str
68
69
_url : str
69
70
_output_path : str
70
71
71
72
@classmethod
72
- def create (cls , name , url , output_path ):
73
- return cls (name , url , output_path )
73
+ def create (cls , config : PipelineConfig , name : str , url : str , output_path : str ):
74
+ return cls (config , name , url , output_path )
74
75
75
76
def get_output_path (self ):
76
- return _pipeline_config [ "output_root" ] + self ._output_path
77
+ return self . _config . output_paths . root + self ._output_path
77
78
78
79
def should_run (self ):
79
80
output_path = self .get_output_path ()
@@ -82,6 +83,9 @@ def should_run(self):
82
83
83
84
return (False , None )
84
85
86
+ def get_inputs (self ):
87
+ raise NotImplementedError ("Method not valid for DownloadTask" )
88
+
85
89
def run (self , force = False ):
86
90
output_path = self .get_output_path ()
87
91
should_run , reason = (True , "Forced" ) if force else self .should_run ()
@@ -106,17 +110,19 @@ def run(self, force=False):
106
110
107
111
@attr .define
108
112
class Task :
113
+ _config : PipelineConfig
109
114
_name : str
110
- _task_function : str
115
+ _task_function : Callable
111
116
_output_path : str
112
117
_inputs : dict
113
118
_params : dict
114
119
115
120
@classmethod
116
121
def create (
117
122
cls ,
123
+ config : PipelineConfig ,
118
124
name : str ,
119
- task_function : str ,
125
+ task_function : Callable ,
120
126
output_path : str ,
121
127
inputs : Optional [dict ] = None ,
122
128
params : Optional [dict ] = None ,
@@ -125,10 +131,10 @@ def create(
125
131
inputs = {}
126
132
if params is None :
127
133
params = {}
128
- return cls (name , task_function , output_path , inputs , params )
134
+ return cls (config , name , task_function , output_path , inputs , params )
129
135
130
136
def get_output_path (self ):
131
- return _pipeline_config [ "output_root" ] + self ._output_path
137
+ return self . _config . output_paths . root + self ._output_path
132
138
133
139
def get_inputs (self ):
134
140
paths = {}
@@ -138,7 +144,7 @@ def get_inputs(self):
138
144
paths .update ({k : v .get_output_path ()})
139
145
else :
140
146
logger .info (v )
141
- paths .update ({k : os .path .join (config . data_paths .root , v )})
147
+ paths .update ({k : os .path .join (self . _config . output_paths .root , v )})
142
148
143
149
return paths
144
150
@@ -173,14 +179,14 @@ def run(self, force=False):
173
179
174
180
@attr .define
175
181
class Pipeline :
176
- name : str
182
+ config : PipelineConfig
177
183
_tasks : OrderedDict = OrderedDict ()
178
184
_outputs : dict = {}
179
185
180
186
def add_task (
181
187
self ,
182
188
name : str ,
183
- task_function : str ,
189
+ task_function : Callable ,
184
190
output_path : str ,
185
191
inputs : Optional [dict ] = None ,
186
192
params : Optional [dict ] = None ,
@@ -189,12 +195,12 @@ def add_task(
189
195
inputs = {}
190
196
if params is None :
191
197
params = {}
192
- task = Task .create (name , task_function , output_path , inputs , params )
198
+ task = Task .create (self . config , name , task_function , output_path , inputs , params )
193
199
self ._tasks [name ] = task
194
200
return task
195
201
196
202
def add_download_task (self , name , * args , ** kwargs ) -> DownloadTask :
197
- task = DownloadTask .create (name , * args , ** kwargs )
203
+ task = DownloadTask .create (self . config , name , * args , ** kwargs )
198
204
self ._tasks [name ] = task
199
205
return task
200
206
@@ -232,8 +238,8 @@ def run_pipeline(pipeline):
232
238
group .add_argument ("--force-all" , action = "store_true" )
233
239
args = parser .parse_args ()
234
240
235
- if args .output_root :
236
- _pipeline_config ["output_root" ] = args .output_root .rstrip ("/" )
241
+ # if args.output_root:
242
+ # _pipeline_config["output_root"] = args.output_root.rstrip("/")
237
243
238
244
pipeline_args = {}
239
245
if args .force_all :
0 commit comments