1
+ import plotnine
2
+ from plotnine import *
3
+
4
+ def climbers_above_replacement_level (df , color_fill , filename ):
5
+ attempt_counts = df ['Name' ].value_counts ()
6
+ climbers_above = [sum (attempt_counts > level ) for level in REPLACEMENT_LEVELS ]
7
+ plot_df = pd .DataFrame ({
8
+ 'Replacement Level' : REPLACEMENT_LEVELS ,
9
+ 'Climbers Above' : climbers_above })
10
+ plot_df ['Replacement Level' ] = plot_df ['Replacement Level' ].astype ('category' )
11
+
12
+ plotnine .options .figure_size = (14 ,6 )
13
+
14
+ p = (
15
+ ggplot (plot_df , aes (x = 'Replacement Level' , y = 'Climbers Above' ))
16
+ + geom_bar (stat = 'identity' , fill = color_fill , alpha = 0.7 , width = 0.9 )
17
+ + geom_text (aes (label = 'Climbers Above' ), va = 'bottom' , size = 10 )
18
+ + labs (x = 'Replacement Level $N$ (# of Problems Attempted)' ,
19
+ y = '# Climbers Above Replacement Level' )
20
+ + theme_bw ()
21
+ + theme (axis_title = element_text (size = 16 ),
22
+ axis_text = element_text (size = 12 ))
23
+ + scale_x_discrete ())
24
+ p .save (f'results/eda/{ filename } .png' , dpi = 1000 )
25
+ print (f"Saved file { filename } .png" )
26
+
27
+ return p
28
+
29
+
30
+ def height_histogram (df , color_fill , filename ):
31
+ plotnine .options .figure_size = (12 ,6 )
32
+
33
+ p = (ggplot (df , aes (x = 'Height' ))
34
+ + geom_histogram (binwidth = 2 , fill = color_fill , alpha = 0.7 )
35
+ + labs (x = 'Height (cm)' , y = '# of Climbers' )
36
+ + theme_bw ()
37
+ + scale_x_continuous (breaks = range (160 , 191 , 2 ))
38
+ + scale_y_continuous (breaks = range (0 , 15 , 3 ))
39
+ + theme (axis_title = element_text (size = 24 ),
40
+ axis_text = element_text (size = 18 )))
41
+
42
+ p .save (f'results/eda/{ filename } .png' , dpi = 1000 )
43
+ print (f"Saved file { filename } .png" )
44
+
45
+ return p
46
+
47
+ def problem_attempts (df , color_fill , filename ):
48
+ problem_id_counts = df ['Problem_ID' ].value_counts ().reset_index ()
49
+ problem_id_counts .columns = ['Problem_ID' , 'Frequency' ]
50
+
51
+ plotnine .options .figure_size = (16 ,6 )
52
+ p = (
53
+ ggplot (problem_id_counts , aes (x = 'Frequency' ))
54
+ + geom_histogram (binwidth = 5 , fill = color_fill , alpha = 0.8 )
55
+ + labs (x = '# of Climbers Attempted' ,
56
+ y = '# of Problems' )
57
+ + theme_bw ()
58
+ + theme (
59
+ axis_title = element_text (size = 24 ),
60
+ axis_text = element_text (size = 18 ))
61
+ + scale_x_continuous (breaks = range (0 , 167 , 10 ))
62
+ + scale_y_continuous (breaks = range (0 , 900 , 60 )))
63
+
64
+ p .save (f'results/eda/{ filename } .png' , dpi = 1000 )
65
+ print (f"Saved file { filename } .png" )
66
+
67
+ return p
68
+
69
+ if __name__ == "__main__" :
70
+ import pandas as pd
71
+ df = pd .read_csv ('data/men_data.csv' )
72
+ heights = pd .read_csv ('data/climbers_heights.csv' , index_col = 0 )
73
+ REPLACEMENT_LEVELS = [25 , 50 , 100 , 250 , 500 , 1000 ]
74
+
75
+ climbers_above_replacement_level (df ,'#00abff' , 'climbers_above_replacement_level' )
76
+ height_histogram (heights ,'#00abff' , 'height_histogram' )
77
+ problem_attempts (df , '#00abff' , 'problem_id_frequency_histogram' )
0 commit comments