diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..5008ddf Binary files /dev/null and b/.DS_Store differ diff --git a/.history/Analyze_20240207183142.py b/.history/Analyze_20240207183142.py new file mode 100644 index 0000000..aa9943c --- /dev/null +++ b/.history/Analyze_20240207183142.py @@ -0,0 +1,37 @@ +import pandas as pd +import matplotlib.pyplot as plt +import numpy as np + + +def import_data(): + """ + Import data from the TTC Bus Delay dataset + """ + return pd.read_csv('ttc-bus-delay-data-2023.csv') + + +def compute_avg_delay(data, group_by: str = 'Route'): + """ + Compute the average delay aggregated by specified column + """ +<<<<<<< HEAD + return data.groupby('Route')['Min Delay'].mean().reset_index() +======= + return data.groupby(group_by).mean().reset_index() +>>>>>>> origin/feature1 + + +def main(): + """ + Main function + """ + data = import_data() + avg_delay = compute_avg_delay(data, group_by='Route') + + plt.hist(avg_delay['Min Delay'], bins=30) + plt.title('Average Delay Distribution') + plt.xlabel('Average Delay (minutes)') + + +if __name__ == '__main__': + main() diff --git a/.history/Analyze_20240207183321.py b/.history/Analyze_20240207183321.py new file mode 100644 index 0000000..5c23c2a --- /dev/null +++ b/.history/Analyze_20240207183321.py @@ -0,0 +1,38 @@ + +import pandas as pd +import matplotlib.pyplot as plt +import numpy as np + + +def import_data(): + """ + Import data from the TTC Bus Delay dataset + """ + return pd.read_csv('ttc-bus-delay-data-2023.csv') + + +def compute_avg_delay(data, group_by: str = 'Route'): + """ + Compute the average delay aggregated by specified column + """ +<<<<<<< HEAD + return data.groupby('Route')['Min Delay'].mean().reset_index() +======= + return data.groupby(group_by).mean().reset_index() +>>>>>>> origin/feature1 + + +def main(): + """ + Main function + """ + data = import_data() + avg_delay = compute_avg_delay(data, group_by='Route') + + plt.hist(avg_delay['Min Delay'], bins=30) + plt.title('Average Delay Distribution') + plt.xlabel('Average Delay (minutes)') + + +if __name__ == '__main__': + main() diff --git a/.history/Analyze_20240207183332.py b/.history/Analyze_20240207183332.py new file mode 100644 index 0000000..ff6fe76 --- /dev/null +++ b/.history/Analyze_20240207183332.py @@ -0,0 +1,38 @@ +#RESOLVED +import pandas as pd +import matplotlib.pyplot as plt +import numpy as np + + +def import_data(): + """ + Import data from the TTC Bus Delay dataset + """ + return pd.read_csv('ttc-bus-delay-data-2023.csv') + + +def compute_avg_delay(data, group_by: str = 'Route'): + """ + Compute the average delay aggregated by specified column + """ +<<<<<<< HEAD + return data.groupby('Route')['Min Delay'].mean().reset_index() +======= + return data.groupby(group_by).mean().reset_index() +>>>>>>> origin/feature1 + + +def main(): + """ + Main function + """ + data = import_data() + avg_delay = compute_avg_delay(data, group_by='Route') + + plt.hist(avg_delay['Min Delay'], bins=30) + plt.title('Average Delay Distribution') + plt.xlabel('Average Delay (minutes)') + + +if __name__ == '__main__': + main() diff --git a/.history/Analyze_20240207183335.py b/.history/Analyze_20240207183335.py new file mode 100644 index 0000000..a0b5ad0 --- /dev/null +++ b/.history/Analyze_20240207183335.py @@ -0,0 +1,38 @@ +#RESOLVED FIXED +import pandas as pd +import matplotlib.pyplot as plt +import numpy as np + + +def import_data(): + """ + Import data from the TTC Bus Delay dataset + """ + return pd.read_csv('ttc-bus-delay-data-2023.csv') + + +def compute_avg_delay(data, group_by: str = 'Route'): + """ + Compute the average delay aggregated by specified column + """ +<<<<<<< HEAD + return data.groupby('Route')['Min Delay'].mean().reset_index() +======= + return data.groupby(group_by).mean().reset_index() +>>>>>>> origin/feature1 + + +def main(): + """ + Main function + """ + data = import_data() + avg_delay = compute_avg_delay(data, group_by='Route') + + plt.hist(avg_delay['Min Delay'], bins=30) + plt.title('Average Delay Distribution') + plt.xlabel('Average Delay (minutes)') + + +if __name__ == '__main__': + main() diff --git a/.history/Analyze_20240211193044.py b/.history/Analyze_20240211193044.py new file mode 100644 index 0000000..3f07dbf --- /dev/null +++ b/.history/Analyze_20240211193044.py @@ -0,0 +1,33 @@ +import pandas as pd +import matplotlib.pyplot as plt +import numpy as np + + +def import_data(): + """ + Import data from the TTC Bus Delay dataset + """ + return pd.read_csv('ttc-bus-delay-data-2023.csv') + + +def compute_avg_delay(data): + """ + Compute the average delay for each route + """ + return data.groupby('Route').mean().reset_index() + + +def main(): + """ + Main function + """ + data = import_data() + avg_delay = compute_avg_delay(data) + + plt.hist(avg_delay['Min Delay'], bins=30) + plt.title('Average Delay Distribution') + plt.xlabel('Average Delay (minutes)') + + +if __name__ == '__main__': + main() diff --git a/.history/Analyze_20240211195121.py b/.history/Analyze_20240211195121.py new file mode 100644 index 0000000..81e17fb --- /dev/null +++ b/.history/Analyze_20240211195121.py @@ -0,0 +1,34 @@ +import pandas as pd +import matplotlib.pyplot as plt +import numpy as np + + +def import_data(): + """ + Import data from the TTC Bus Delay dataset + """ + return pd.read_csv('ttc-bus-delay-data-2023.csv') + + +def compute_avg_delay(data): + """ + Compute the average delay for each route + """ + return data.groupby('Route')['Min Delay'].mean().reset_index() + + +def main(): + """ + Main function + """ + data = import_data() + avg_delay = compute_avg_delay(data) + + plt.hist(avg_delay['Min Delay'], bins=30) + plt.title('Average Delay Distribution') + plt.xlabel('Average Delay (minutes)') + plt.show() + + +if __name__ == '__main__': + main() diff --git a/.history/Analyze_20240211195122.py b/.history/Analyze_20240211195122.py new file mode 100644 index 0000000..81e17fb --- /dev/null +++ b/.history/Analyze_20240211195122.py @@ -0,0 +1,34 @@ +import pandas as pd +import matplotlib.pyplot as plt +import numpy as np + + +def import_data(): + """ + Import data from the TTC Bus Delay dataset + """ + return pd.read_csv('ttc-bus-delay-data-2023.csv') + + +def compute_avg_delay(data): + """ + Compute the average delay for each route + """ + return data.groupby('Route')['Min Delay'].mean().reset_index() + + +def main(): + """ + Main function + """ + data = import_data() + avg_delay = compute_avg_delay(data) + + plt.hist(avg_delay['Min Delay'], bins=30) + plt.title('Average Delay Distribution') + plt.xlabel('Average Delay (minutes)') + plt.show() + + +if __name__ == '__main__': + main() diff --git a/.history/Analyze_20240211195123.py b/.history/Analyze_20240211195123.py new file mode 100644 index 0000000..81e17fb --- /dev/null +++ b/.history/Analyze_20240211195123.py @@ -0,0 +1,34 @@ +import pandas as pd +import matplotlib.pyplot as plt +import numpy as np + + +def import_data(): + """ + Import data from the TTC Bus Delay dataset + """ + return pd.read_csv('ttc-bus-delay-data-2023.csv') + + +def compute_avg_delay(data): + """ + Compute the average delay for each route + """ + return data.groupby('Route')['Min Delay'].mean().reset_index() + + +def main(): + """ + Main function + """ + data = import_data() + avg_delay = compute_avg_delay(data) + + plt.hist(avg_delay['Min Delay'], bins=30) + plt.title('Average Delay Distribution') + plt.xlabel('Average Delay (minutes)') + plt.show() + + +if __name__ == '__main__': + main() diff --git a/.history/Analyze_20240211195125.py b/.history/Analyze_20240211195125.py new file mode 100644 index 0000000..81e17fb --- /dev/null +++ b/.history/Analyze_20240211195125.py @@ -0,0 +1,34 @@ +import pandas as pd +import matplotlib.pyplot as plt +import numpy as np + + +def import_data(): + """ + Import data from the TTC Bus Delay dataset + """ + return pd.read_csv('ttc-bus-delay-data-2023.csv') + + +def compute_avg_delay(data): + """ + Compute the average delay for each route + """ + return data.groupby('Route')['Min Delay'].mean().reset_index() + + +def main(): + """ + Main function + """ + data = import_data() + avg_delay = compute_avg_delay(data) + + plt.hist(avg_delay['Min Delay'], bins=30) + plt.title('Average Delay Distribution') + plt.xlabel('Average Delay (minutes)') + plt.show() + + +if __name__ == '__main__': + main() diff --git a/.history/Analyze_20240211225832.py b/.history/Analyze_20240211225832.py new file mode 100644 index 0000000..ba0805a --- /dev/null +++ b/.history/Analyze_20240211225832.py @@ -0,0 +1,43 @@ +# FIXED AND TESTED PYTHON CODE +import pandas as pd +import matplotlib.pyplot as plt +import numpy as np + + +def import_data(): + """ + Import data from the TTC Bus Delay dataset + """ + return pd.read_csv('ttc-bus-delay-data-2023.csv') + + +def compute_avg_delay(data, group_by: str = 'Route'): + """ + Compute the average delay aggregated by specified column + """ + + + return data.groupby('Route')['Min Delay'].mean().reset_index() + + return data.groupby(group_by).mean().reset_index() + + + return data.groupby('Route')['Min Delay'].mean().reset_index() + + + +def main(): + """ + Main function + """ + data = import_data() + avg_delay = compute_avg_delay(data, group_by='Route') + + plt.hist(avg_delay['Min Delay'], bins=30) + plt.title('Average Delay Distribution') + plt.xlabel('Average Delay (minutes)') + plt.show() + + +if __name__ == '__main__': + main() diff --git a/Analyze.py b/Analyze.py index 3f07dbf..ba0805a 100644 --- a/Analyze.py +++ b/Analyze.py @@ -1,3 +1,4 @@ +# FIXED AND TESTED PYTHON CODE import pandas as pd import matplotlib.pyplot as plt import numpy as np @@ -10,11 +11,19 @@ def import_data(): return pd.read_csv('ttc-bus-delay-data-2023.csv') -def compute_avg_delay(data): +def compute_avg_delay(data, group_by: str = 'Route'): """ - Compute the average delay for each route + Compute the average delay aggregated by specified column """ - return data.groupby('Route').mean().reset_index() + + + return data.groupby('Route')['Min Delay'].mean().reset_index() + + return data.groupby(group_by).mean().reset_index() + + + return data.groupby('Route')['Min Delay'].mean().reset_index() + def main(): @@ -22,11 +31,12 @@ def main(): Main function """ data = import_data() - avg_delay = compute_avg_delay(data) + avg_delay = compute_avg_delay(data, group_by='Route') plt.hist(avg_delay['Min Delay'], bins=30) plt.title('Average Delay Distribution') plt.xlabel('Average Delay (minutes)') + plt.show() if __name__ == '__main__': diff --git a/DSI_git_assignment b/DSI_git_assignment new file mode 160000 index 0000000..ce31e28 --- /dev/null +++ b/DSI_git_assignment @@ -0,0 +1 @@ +Subproject commit ce31e28393811bbc669a05dba9fd34ad8949a5a6 diff --git a/README.md b/README.md index c9d8518..0a1dcf1 100644 --- a/README.md +++ b/README.md @@ -1 +1,2 @@ # Load, analyze, and visualize TTC bus delay data +# Use bright colours when visualizing the data