1
+ import os
1
2
import sys
2
3
from typing import Any , Optional
3
4
4
5
import click
6
+ import pandas as pd
5
7
6
8
from gcf_data_mapper .parsers .collection import collection
7
9
from gcf_data_mapper .parsers .document import document
8
10
from gcf_data_mapper .parsers .family import family
11
+ from gcf_data_mapper .read import read
9
12
10
13
11
14
@click .command ()
12
- @click .option ("--debug/--no-debug" , default = False )
15
+ @click .option (
16
+ "--gcf_projects_file" ,
17
+ default = os .path .join (os .getcwd (), "data" , "gcf-projects.json" ),
18
+ type = click .Path (exists = True ),
19
+ )
20
+ @click .option (
21
+ "--mcf_projects_file" ,
22
+ # trunk-ignore(cspell/error)
23
+ default = os .path .join (os .getcwd (), "data" , "MCFprojects.csv" ),
24
+ type = click .Path (exists = True ),
25
+ )
26
+ @click .option (
27
+ "--mcf_docs_file" ,
28
+ # trunk-ignore(cspell/error)
29
+ default = os .path .join (os .getcwd (), "data" , "MCFdocuments-v2.csv" ),
30
+ type = click .Path (exists = True ),
31
+ )
32
+ @click .option (
33
+ "--output_file" ,
34
+ default = os .path .join (os .getcwd (), "output.json" ),
35
+ type = click .Path (exists = False ),
36
+ )
37
+ @click .option ("--debug/--no-debug" , default = True )
13
38
@click .version_option ("0.1.0" , "--version" , "-v" , help = "Show the version and exit." )
14
- def entrypoint (debug : bool ):
39
+ def entrypoint (
40
+ gcf_projects_file , mcf_projects_file , mcf_docs_file , output_file , debug : bool
41
+ ):
15
42
"""Simple program that wrangles GCF data into bulk import format.
16
43
44
+ :param str gcf_projects_file: The GCF projects filename.
45
+ :param str mcf_projects_file: The MCF projects filename.
46
+ :param str mcf_docs_file: The MCF projects filename.
47
+ :param str output_file: The output filename.
17
48
:param bool debug: Whether debug mode is on.
18
49
"""
19
50
click .echo ("🚀 Starting the GCF data mapping process." )
51
+ if debug :
52
+ click .echo ("📝 Input files:" )
53
+ click .echo (f"- { click .format_filename (gcf_projects_file )} " )
54
+ click .echo (f"- { click .format_filename (mcf_projects_file )} " )
55
+ click .echo (f"- { click .format_filename (mcf_docs_file )} " )
20
56
21
57
try :
22
- wrangle_to_json (debug )
58
+ project_info , doc_info = read (
59
+ gcf_projects_file , mcf_projects_file , mcf_docs_file , debug
60
+ )
61
+ mapped_data = wrangle_to_json (project_info , doc_info , debug )
23
62
except Exception as e :
24
63
click .echo (f"❌ Failed to map GCF data to expected JSON. Error: { e } ." )
25
64
sys .exit (1 )
@@ -28,31 +67,48 @@ def entrypoint(debug: bool):
28
67
29
68
click .echo ()
30
69
click .echo ("🚀 Dumping GCF data to output file" )
31
- dump_output ()
70
+ dump_output (mapped_data , output_file , debug )
32
71
click .echo ("✅ Finished dumping mapped GCF data." )
33
72
34
73
35
- def wrangle_to_json (debug ) -> dict [str , list [Optional [dict [str , Any ]]]]:
74
+ def wrangle_to_json (
75
+ project_info : pd .DataFrame , doc_info : pd .DataFrame , debug : bool
76
+ ) -> dict [str , list [Optional [dict [str , Any ]]]]:
36
77
"""Put the mapped GCF data into a dictionary ready for dumping.
37
78
38
79
The output of this function will get dumped as JSON to the output
39
80
file.
40
81
82
+ :param pd.DataFrame project_info: The GCF and MCF joined project
83
+ info.
84
+ :param pd.DataFrame doc_info: The MCF docs info.
41
85
:param bool debug: Whether debug mode is on.
42
86
:return dict[str, list[Optional[dict[str, Any]]]]: The GCF data
43
87
mapped to the Document-Family-Collection-Event entity it
44
88
corresponds to.
45
89
"""
46
90
return {
47
91
"collections" : collection (debug ),
48
- "families" : family (debug ),
49
- "documents" : document (debug ),
92
+ "families" : family (project_info , debug ),
93
+ "documents" : document (doc_info , debug ),
50
94
"events" : [],
51
95
}
52
96
53
97
54
- def dump_output ():
55
- pass
98
+ def dump_output (
99
+ mapped_data : dict [str , list [Optional [dict [str , Any ]]]],
100
+ output_file : str ,
101
+ debug : bool ,
102
+ ):
103
+ """Dump the wrangled JSON to the output file.
104
+
105
+ :param dict[str, list[Optional[dict[str, Any]]]] mapped_data: The
106
+ mapped GCF data.
107
+ :param str output_file: The output filename.
108
+ :param bool debug: Whether debug mode is on.
109
+ """
110
+ if debug :
111
+ click .echo (f"📝 Output file { click .format_filename (output_file )} " )
56
112
57
113
58
114
if __name__ == "__main__" :
0 commit comments