Skip to content

Commit

Permalink
Fix: test_labels and train_labels reshape added
Browse files Browse the repository at this point in the history
  • Loading branch information
dadit97 committed Sep 11, 2022
1 parent 467e465 commit 604bb16
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def make_roc(labels, results, name):
# If true, the balancing will be done before resulting in a great performances gain
earlyBalance = True
problem_to_solve = 'CANCELLED' # The alternative is 'DIVERTED'
usePyspark = False # If true, uses PySpark, otherwise Pandas
usePyspark = True # If true, uses PySpark, otherwise Pandas
# If false, only #records_per_file records will be sampled from the most recent year csv
sample_from_all_files = True
records_per_file = 500000
Expand Down
3 changes: 3 additions & 0 deletions src/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,9 @@ def preprocess(index: str, useAllFrames: bool, size: int, balance_size: int, use
numpy.array(test_data.collect()),
numpy.array(test_labels.collect()))

result[1].shape = [result[1].shape[0]]
result[3].shape = [result[3].shape[0]]

finish_time = tm.time() - start_time
print_and_save_time("Dataset splitting concluded: " +
str(finish_time) + " seconds")
Expand Down

0 comments on commit 604bb16

Please sign in to comment.