From a545c8b1c61e1a4d402baf120de248a9d432a190 Mon Sep 17 00:00:00 2001 From: Simeon Wong Date: Sun, 4 Feb 2024 22:30:34 -0500 Subject: [PATCH 1/6] bugfix: only compute mean of Min Delay, cannot mean non numeric values --- Analyze.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Analyze.py b/Analyze.py index 3f07dbf..a4ce6d2 100644 --- a/Analyze.py +++ b/Analyze.py @@ -14,7 +14,7 @@ def compute_avg_delay(data): """ Compute the average delay for each route """ - return data.groupby('Route').mean().reset_index() + return data.groupby('Route')['Min Delay'].mean().reset_index() def main(): From bc0e903fbf6c31faa0ee677246fe38879768a0c3 Mon Sep 17 00:00:00 2001 From: Simeon Wong Date: Sun, 4 Feb 2024 22:54:46 -0500 Subject: [PATCH 2/6] feature: group by arbitrary columns --- Analyze.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Analyze.py b/Analyze.py index 3f07dbf..048bcae 100644 --- a/Analyze.py +++ b/Analyze.py @@ -10,11 +10,11 @@ def import_data(): return pd.read_csv('ttc-bus-delay-data-2023.csv') -def compute_avg_delay(data): +def compute_avg_delay(data, group_by: str = 'Route'): """ - Compute the average delay for each route + Compute the average delay aggregated by specified column """ - return data.groupby('Route').mean().reset_index() + return data.groupby(group_by).mean().reset_index() def main(): From b079b371df0e610015b25b17d821715511ccb1ae Mon Sep 17 00:00:00 2001 From: Simeon Wong Date: Mon, 5 Feb 2024 10:51:33 -0500 Subject: [PATCH 3/6] specify grouping directly in compute_avg_delay function call --- Analyze.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Analyze.py b/Analyze.py index 048bcae..1512d02 100644 --- a/Analyze.py +++ b/Analyze.py @@ -22,7 +22,7 @@ def main(): Main function """ data = import_data() - avg_delay = compute_avg_delay(data) + avg_delay = compute_avg_delay(data, group_by='Route') plt.hist(avg_delay['Min Delay'], bins=30) plt.title('Average Delay Distribution') From 8ed093787a8e3099337019d75bbe040d8849a1cb Mon Sep 17 00:00:00 2001 From: Calen Blackwell <156455477+Cnblackwell@users.noreply.github.com> Date: Wed, 7 Feb 2024 19:09:52 -0500 Subject: [PATCH 4/6] Fixed merge conflict --- Analyze.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Analyze.py b/Analyze.py index a0b5ad0..bb0bdb6 100644 --- a/Analyze.py +++ b/Analyze.py @@ -1,4 +1,4 @@ -#RESOLVED FIXED +# FIXED AND TESTED PYTHON CODE import pandas as pd import matplotlib.pyplot as plt import numpy as np @@ -15,11 +15,11 @@ def compute_avg_delay(data, group_by: str = 'Route'): """ Compute the average delay aggregated by specified column """ -<<<<<<< HEAD + return data.groupby('Route')['Min Delay'].mean().reset_index() -======= + return data.groupby(group_by).mean().reset_index() ->>>>>>> origin/feature1 + def main(): From a6681a1b793c1c61a69a2a3b7e023737f4ad82b9 Mon Sep 17 00:00:00 2001 From: Calen Blackwell <156455477+Cnblackwell@users.noreply.github.com> Date: Sun, 11 Feb 2024 19:52:10 -0500 Subject: [PATCH 5/6] Fixed Python code --- .history/Analyze_20240211193044.py | 33 +++++++++++++++++++++++++++++ .history/Analyze_20240211195121.py | 34 ++++++++++++++++++++++++++++++ .history/Analyze_20240211195122.py | 34 ++++++++++++++++++++++++++++++ .history/Analyze_20240211195123.py | 34 ++++++++++++++++++++++++++++++ .history/Analyze_20240211195125.py | 34 ++++++++++++++++++++++++++++++ Analyze.py | 3 ++- 6 files changed, 171 insertions(+), 1 deletion(-) create mode 100644 .history/Analyze_20240211193044.py create mode 100644 .history/Analyze_20240211195121.py create mode 100644 .history/Analyze_20240211195122.py create mode 100644 .history/Analyze_20240211195123.py create mode 100644 .history/Analyze_20240211195125.py diff --git a/.history/Analyze_20240211193044.py b/.history/Analyze_20240211193044.py new file mode 100644 index 0000000..3f07dbf --- /dev/null +++ b/.history/Analyze_20240211193044.py @@ -0,0 +1,33 @@ +import pandas as pd +import matplotlib.pyplot as plt +import numpy as np + + +def import_data(): + """ + Import data from the TTC Bus Delay dataset + """ + return pd.read_csv('ttc-bus-delay-data-2023.csv') + + +def compute_avg_delay(data): + """ + Compute the average delay for each route + """ + return data.groupby('Route').mean().reset_index() + + +def main(): + """ + Main function + """ + data = import_data() + avg_delay = compute_avg_delay(data) + + plt.hist(avg_delay['Min Delay'], bins=30) + plt.title('Average Delay Distribution') + plt.xlabel('Average Delay (minutes)') + + +if __name__ == '__main__': + main() diff --git a/.history/Analyze_20240211195121.py b/.history/Analyze_20240211195121.py new file mode 100644 index 0000000..81e17fb --- /dev/null +++ b/.history/Analyze_20240211195121.py @@ -0,0 +1,34 @@ +import pandas as pd +import matplotlib.pyplot as plt +import numpy as np + + +def import_data(): + """ + Import data from the TTC Bus Delay dataset + """ + return pd.read_csv('ttc-bus-delay-data-2023.csv') + + +def compute_avg_delay(data): + """ + Compute the average delay for each route + """ + return data.groupby('Route')['Min Delay'].mean().reset_index() + + +def main(): + """ + Main function + """ + data = import_data() + avg_delay = compute_avg_delay(data) + + plt.hist(avg_delay['Min Delay'], bins=30) + plt.title('Average Delay Distribution') + plt.xlabel('Average Delay (minutes)') + plt.show() + + +if __name__ == '__main__': + main() diff --git a/.history/Analyze_20240211195122.py b/.history/Analyze_20240211195122.py new file mode 100644 index 0000000..81e17fb --- /dev/null +++ b/.history/Analyze_20240211195122.py @@ -0,0 +1,34 @@ +import pandas as pd +import matplotlib.pyplot as plt +import numpy as np + + +def import_data(): + """ + Import data from the TTC Bus Delay dataset + """ + return pd.read_csv('ttc-bus-delay-data-2023.csv') + + +def compute_avg_delay(data): + """ + Compute the average delay for each route + """ + return data.groupby('Route')['Min Delay'].mean().reset_index() + + +def main(): + """ + Main function + """ + data = import_data() + avg_delay = compute_avg_delay(data) + + plt.hist(avg_delay['Min Delay'], bins=30) + plt.title('Average Delay Distribution') + plt.xlabel('Average Delay (minutes)') + plt.show() + + +if __name__ == '__main__': + main() diff --git a/.history/Analyze_20240211195123.py b/.history/Analyze_20240211195123.py new file mode 100644 index 0000000..81e17fb --- /dev/null +++ b/.history/Analyze_20240211195123.py @@ -0,0 +1,34 @@ +import pandas as pd +import matplotlib.pyplot as plt +import numpy as np + + +def import_data(): + """ + Import data from the TTC Bus Delay dataset + """ + return pd.read_csv('ttc-bus-delay-data-2023.csv') + + +def compute_avg_delay(data): + """ + Compute the average delay for each route + """ + return data.groupby('Route')['Min Delay'].mean().reset_index() + + +def main(): + """ + Main function + """ + data = import_data() + avg_delay = compute_avg_delay(data) + + plt.hist(avg_delay['Min Delay'], bins=30) + plt.title('Average Delay Distribution') + plt.xlabel('Average Delay (minutes)') + plt.show() + + +if __name__ == '__main__': + main() diff --git a/.history/Analyze_20240211195125.py b/.history/Analyze_20240211195125.py new file mode 100644 index 0000000..81e17fb --- /dev/null +++ b/.history/Analyze_20240211195125.py @@ -0,0 +1,34 @@ +import pandas as pd +import matplotlib.pyplot as plt +import numpy as np + + +def import_data(): + """ + Import data from the TTC Bus Delay dataset + """ + return pd.read_csv('ttc-bus-delay-data-2023.csv') + + +def compute_avg_delay(data): + """ + Compute the average delay for each route + """ + return data.groupby('Route')['Min Delay'].mean().reset_index() + + +def main(): + """ + Main function + """ + data = import_data() + avg_delay = compute_avg_delay(data) + + plt.hist(avg_delay['Min Delay'], bins=30) + plt.title('Average Delay Distribution') + plt.xlabel('Average Delay (minutes)') + plt.show() + + +if __name__ == '__main__': + main() diff --git a/Analyze.py b/Analyze.py index 3f07dbf..81e17fb 100644 --- a/Analyze.py +++ b/Analyze.py @@ -14,7 +14,7 @@ def compute_avg_delay(data): """ Compute the average delay for each route """ - return data.groupby('Route').mean().reset_index() + return data.groupby('Route')['Min Delay'].mean().reset_index() def main(): @@ -27,6 +27,7 @@ def main(): plt.hist(avg_delay['Min Delay'], bins=30) plt.title('Average Delay Distribution') plt.xlabel('Average Delay (minutes)') + plt.show() if __name__ == '__main__': From 14a55fbe5d01f2669b57698d432f089772526df8 Mon Sep 17 00:00:00 2001 From: Calen Blackwell <156455477+Cnblackwell@users.noreply.github.com> Date: Sun, 11 Feb 2024 20:41:26 -0500 Subject: [PATCH 6/6] Updated README.md with instructions --- DSI_git_assignment | 1 + README.md | 1 + 2 files changed, 2 insertions(+) create mode 160000 DSI_git_assignment diff --git a/DSI_git_assignment b/DSI_git_assignment new file mode 160000 index 0000000..ce31e28 --- /dev/null +++ b/DSI_git_assignment @@ -0,0 +1 @@ +Subproject commit ce31e28393811bbc669a05dba9fd34ad8949a5a6 diff --git a/README.md b/README.md index c9d8518..0a1dcf1 100644 --- a/README.md +++ b/README.md @@ -1 +1,2 @@ # Load, analyze, and visualize TTC bus delay data +# Use bright colours when visualizing the data