forked from jigglepuff/StlOpenDataEtl
-
Notifications
You must be signed in to change notification settings - Fork 6
/
app.py
57 lines (48 loc) · 1.88 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
'''
StlOpenDataEtl
'''
import os
import sys
import logging.config
from etl.constants import *
from etl import command_line_args, extractor, fetcher, fetcher_local, loader, \
parser, transformer, utils
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
if __name__ == '__main__':
# Parse Command line arguments
commandLineArgs = command_line_args.getCommandLineArgs()
# Setup logging
logging.config.fileConfig('data/logger/config.ini')
logger = logging.getLogger(__name__)
# notify user if the app will be using test or prod db
if (commandLineArgs.db == 'prod'):
logger.info('Using production database...')
db_yaml = utils.get_yaml('data/database/config_prod.yml')
else:
logger.info('Using development database...')
db_yaml = utils.get_yaml('data/database/config_dev.yml')
# delete local db from previous run
utils.silentremove(db_yaml['database_credentials']['db_name'])
# Fetcher
if (commandLineArgs.local_sources):
logger.info("Using local data files: {}".format(' '.join(map(str, commandLineArgs.local_sources))))
fetcher = fetcher_local.FetcherLocal()
filenames = commandLineArgs.local_sources
responses = fetcher.fetch_all(filenames)
else:
fetcher = fetcher.Fetcher()
src_yaml = utils.get_yaml('data/sources/sources.yml')
responses = fetcher.fetch_all(src_yaml)
# Parser
parser = parser.Parser()
responses = parser.parse_all(responses)
# Extractor
extractor = extractor.Extractor()
entity_dict = extractor.extract_all(responses)
# Transformer
transform_tasks = utils.get_yaml('data/transform_tasks/transform_tasks.yml')
transformer = transformer.Transformer()
transformed_dict = transformer.transform_all(entity_dict, transform_tasks)
# Loader
loader = loader.Loader(db_yaml)
loader.load_all(transformed_dict)