-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrun_batch_wrapper.py
91 lines (64 loc) · 3.19 KB
/
run_batch_wrapper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
'''
Things to address
filename conventions for mp3
'''
import argparse,re,pandas as pd,os,csv,sys,glob
from num2words import num2words
from langdetect import detect
def dir_path(path):
if os.path.isdir(path) and os.access(path, os.R_OK):
return path
else:
raise argparse.ArgumentTypeError(f"readable_dir:{path} is not a valid directory path")
def out_dir_path(path):
if os.path.isdir(path) and os.access(path, os.W_OK):
return path
else:
raise argparse.ArgumentTypeError(f"readable_dir:{path} is not a valid directory path")
def file_path(path):
if os.path.isfile(path) and os.access(path, os.R_OK):
return path
else:
raise argparse.ArgumentTypeError(f"readable_dir:{path} is not a valid file path")
parser = argparse.ArgumentParser(
description='This batch wrapper creates the arguments to call aeneas scripts. It expects input dirs to contain data in filesetid named dirs ')
required_args = parser.add_argument_group('required arguments')
required_args.add_argument(
'-batch_file', required=True, nargs=1, type=file_path, help='Full path to xlsx/csv core scripts for batch processing')
required_args.add_argument(
'-batch_files_dir', required=True, nargs=1, type=dir_path, help='Full path to dir for writing batch tmp output files')
required_args.add_argument(
'-stock_to_fileset_mapping_file', required=True, nargs=1, type=file_path, help='Full path to file for matching stock# to filesetid')
required_args.add_argument(
'-input_audio_dir', required=True, nargs=1, type=dir_path, help='Full path to dir for input')
required_args.add_argument(
'-input_core_scripts_dir', required=True, nargs=1, type=dir_path, help='Full path to dir having core scripts')
required_args.add_argument(
'-input_qinfo_dir', required=True, nargs=1, type=dir_path, help='Full path to dir having qinfo files')
required_args.add_argument(
'-output_timing_dir', required=True, nargs=1, type=out_dir_path, help='Full path to output dir for writing timing files in SAB format for upload')
args = parser.parse_args()
print(args)
batch_corescripts_file=args.batch_file[0]
stock_to_fileset_mapping_file=args.stock_to_fileset_mapping_file[0]
def load_excel_file(file):
#Get file extension and load file accordingly
fn, fe = os.path.splitext(file)
#Load the file
if fe=='.XLSX':
input_df=pd.read_excel(file,encoding='utf-8').astype(str)
print(input_df['verse_content1'])
elif fe=='.csv':
input_df = pd.read_csv(file, encoding='utf-8').astype(str)
return input_df
batch_df=load_excel_file(batch_corescripts_file)
filesetid_df=load_excel_file(stock_to_fileset_mapping_file)
merged_df = pd.merge(batch_df, filesetid_df, how="inner", on='stocknumber')
for index,row in batch_df.iterrows():
if str(row['stocknumber']).__contains__('2'):
audio_type = 'audio_drama'
else:
audio_type = 'audio'
corescript_file=row['corescript_file']
filesetid=((merged_df[ (merged_df['type']==audio_type) & (merged_df["filesetid_y"].str.contains('16')==0) & (merged_df["stocknumber"]==row['stocknumber']) ]['filesetid_y']).drop_duplicates()).to_string(index=False).strip()
print(corescript_file)