-
Notifications
You must be signed in to change notification settings - Fork 20
/
crimebythenumbers.toc
194 lines (194 loc) · 16.1 KB
/
crimebythenumbers.toc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
\contentsline {fm}{Preface}{i}{chapter*.1}%
\contentsline {section}{How to contribute to this book}{v}{section*.11}%
\contentsline {section}{Where to find data included in this book}{vi}{section*.12}%
\contentsline {section}{Where to find code included in this book}{vii}{section*.13}%
\contentsline {fm}{About the author}{ix}{chapter*.14}%
\contentsline {part}{I\hspace {1em}Introduction}{1}{part.1}%
\contentsline {chapter}{\numberline {1}A soup to nuts project example}{3}{chapter.1}%
\contentsline {section}{\numberline {1.1}Big picture data example}{3}{section.1.1}%
\contentsline {section}{\numberline {1.2}Little picture data example}{7}{section.1.2}%
\contentsline {subsection}{\numberline {1.2.1}Loading packages}{7}{subsection.1.2.1}%
\contentsline {subsection}{\numberline {1.2.2}Loading data}{8}{subsection.1.2.2}%
\contentsline {subsection}{\numberline {1.2.3}Cleaning}{8}{subsection.1.2.3}%
\contentsline {subsection}{\numberline {1.2.4}Aggregating}{9}{subsection.1.2.4}%
\contentsline {subsection}{\numberline {1.2.5}Graphing}{10}{subsection.1.2.5}%
\contentsline {section}{\numberline {1.3}Reusing and modifying code}{11}{section.1.3}%
\contentsline {chapter}{\numberline {2}Introduction to R and RStudio}{15}{chapter.2}%
\contentsline {section}{\numberline {2.1}Using RStudio}{15}{section.2.1}%
\contentsline {subsection}{\numberline {2.1.1}Opening an R Script}{17}{subsection.2.1.1}%
\contentsline {subsection}{\numberline {2.1.2}Setting the working directory}{18}{subsection.2.1.2}%
\contentsline {subsection}{\numberline {2.1.3}Changing RStudio}{19}{subsection.2.1.3}%
\contentsline {subsubsection}{\numberline {2.1.3.1}General}{19}{subsubsection.2.1.3.1}%
\contentsline {subsubsection}{\numberline {2.1.3.2}Code}{20}{subsubsection.2.1.3.2}%
\contentsline {paragraph}{\numberline {2.1.3.2.1}Saving}{21}{paragraph.2.1.3.2.1}%
\contentsline {subsubsection}{\numberline {2.1.3.3}Appearance}{22}{subsubsection.2.1.3.3}%
\contentsline {subsubsection}{\numberline {2.1.3.4}Pane Layout}{23}{subsubsection.2.1.3.4}%
\contentsline {subsection}{\numberline {2.1.4}Helpful cheat sheets}{24}{subsection.2.1.4}%
\contentsline {section}{\numberline {2.2}Assigning variables}{25}{section.2.2}%
\contentsline {section}{\numberline {2.3}What are functions (and packages)?}{27}{section.2.3}%
\contentsline {section}{\numberline {2.4}Reading data into R}{32}{section.2.4}%
\contentsline {subsection}{\numberline {2.4.1}Loading data}{32}{subsection.2.4.1}%
\contentsline {section}{\numberline {2.5}First steps to exploring data}{32}{section.2.5}%
\contentsline {chapter}{\numberline {3}Data types and structures}{37}{chapter.3}%
\contentsline {section}{\numberline {3.1}Data types}{37}{section.3.1}%
\contentsline {section}{\numberline {3.2}Numeric, character, and logical (boolean)}{38}{section.3.2}%
\contentsline {section}{\numberline {3.3}Data structures}{42}{section.3.3}%
\contentsline {subsection}{\numberline {3.3.1}Vectors (collections of ``things'')}{42}{subsection.3.3.1}%
\contentsline {subsection}{\numberline {3.3.2}Data.frames}{43}{subsection.3.3.2}%
\contentsline {subsection}{\numberline {3.3.3}Other data structures}{46}{subsection.3.3.3}%
\contentsline {chapter}{\numberline {4}Reading and writing data}{49}{chapter.4}%
\contentsline {section}{\numberline {4.1}Reading data into R}{49}{section.4.1}%
\contentsline {subsection}{\numberline {4.1.1}R}{49}{subsection.4.1.1}%
\contentsline {subsubsection}{\numberline {4.1.1.1}.rda and .rdata files}{49}{subsubsection.4.1.1.1}%
\contentsline {subsubsection}{\numberline {4.1.1.2}.rds files}{50}{subsubsection.4.1.1.2}%
\contentsline {subsection}{\numberline {4.1.2}Excel}{50}{subsection.4.1.2}%
\contentsline {subsection}{\numberline {4.1.3}Stata}{52}{subsection.4.1.3}%
\contentsline {subsection}{\numberline {4.1.4}SAS}{53}{subsection.4.1.4}%
\contentsline {subsection}{\numberline {4.1.5}SPSS}{53}{subsection.4.1.5}%
\contentsline {subsection}{\numberline {4.1.6}Fixed-width ASCII}{53}{subsection.4.1.6}%
\contentsline {section}{\numberline {4.2}Writing data}{54}{section.4.2}%
\contentsline {subsection}{\numberline {4.2.1}R}{55}{subsection.4.2.1}%
\contentsline {subsubsection}{\numberline {4.2.1.1}.rda and .rdata}{55}{subsubsection.4.2.1.1}%
\contentsline {subsubsection}{\numberline {4.2.1.2}.rds}{55}{subsubsection.4.2.1.2}%
\contentsline {subsection}{\numberline {4.2.2}Excel}{55}{subsection.4.2.2}%
\contentsline {subsection}{\numberline {4.2.3}Stata}{55}{subsection.4.2.3}%
\contentsline {subsection}{\numberline {4.2.4}SAS}{56}{subsection.4.2.4}%
\contentsline {subsection}{\numberline {4.2.5}SPSS}{56}{subsection.4.2.5}%
\contentsline {part}{II\hspace {1em}Project Management}{57}{part.2}%
\contentsline {chapter}{\numberline {5}\emph {Mise en place}}{59}{chapter.5}%
\contentsline {section}{\numberline {5.1}Starting with a pencil and paper}{59}{section.5.1}%
\contentsline {subsection}{\numberline {5.1.1}Tables and graphs}{60}{subsection.5.1.1}%
\contentsline {section}{\numberline {5.2}R Projects}{62}{section.5.2}%
\contentsline {subsection}{\numberline {5.2.1}Folders}{66}{subsection.5.2.1}%
\contentsline {section}{\numberline {5.3}Modular R scripts}{68}{section.5.3}%
\contentsline {section}{\numberline {5.4}Modular code}{70}{section.5.4}%
\contentsline {subsection}{\numberline {5.4.1}Section labels}{70}{subsection.5.4.1}%
\contentsline {subsection}{\numberline {5.4.2}Helper R scripts}{72}{subsection.5.4.2}%
\contentsline {chapter}{\numberline {6}Collaboration}{73}{chapter.6}%
\contentsline {section}{\numberline {6.1}Code review}{73}{section.6.1}%
\contentsline {subsection}{\numberline {6.1.1}Style guidelines}{74}{subsection.6.1.1}%
\contentsline {section}{\numberline {6.2}Documentation}{75}{section.6.2}%
\contentsline {subsection}{\numberline {6.2.1}Comments}{75}{subsection.6.2.1}%
\contentsline {subsection}{\numberline {6.2.2}Vignettes}{76}{subsection.6.2.2}%
\contentsline {chapter}{\numberline {7}R Markdown}{79}{chapter.7}%
\contentsline {section}{\numberline {7.1}Code}{83}{section.7.1}%
\contentsline {subsection}{\numberline {7.1.1}Hiding code in the output}{85}{subsection.7.1.1}%
\contentsline {section}{\numberline {7.2}Inline Code}{85}{section.7.2}%
\contentsline {section}{\numberline {7.3}Tables}{86}{section.7.3}%
\contentsline {section}{\numberline {7.4}Footnotes}{86}{section.7.4}%
\contentsline {section}{\numberline {7.5}Citation}{87}{section.7.5}%
\contentsline {section}{\numberline {7.6}Spell check}{92}{section.7.6}%
\contentsline {section}{\numberline {7.7}Making the output file}{92}{section.7.7}%
\contentsline {chapter}{\numberline {8}Testing your code}{95}{chapter.8}%
\contentsline {section}{\numberline {8.1}Why test your code?}{95}{section.8.1}%
\contentsline {section}{\numberline {8.2}Unit tests}{98}{section.8.2}%
\contentsline {subsection}{\numberline {8.2.1}Modular test scripts}{100}{subsection.8.2.1}%
\contentsline {subsection}{\numberline {8.2.2}How to write unit tests}{103}{subsection.8.2.2}%
\contentsline {subsection}{\numberline {8.2.3}What to test}{105}{subsection.8.2.3}%
\contentsline {subsubsection}{\numberline {8.2.3.1}Tests for research projects}{106}{subsubsection.8.2.3.1}%
\contentsline {subsubsection}{\numberline {8.2.3.2}Tests for data collection}{106}{subsubsection.8.2.3.2}%
\contentsline {section}{\numberline {8.3}Test-driven development (TDD)}{107}{section.8.3}%
\contentsline {chapter}{\numberline {9}Git}{109}{chapter.9}%
\contentsline {section}{\numberline {9.1}What is Git, and why do I need it?}{109}{section.9.1}%
\contentsline {section}{\numberline {9.2}Git basics}{111}{section.9.2}%
\contentsline {section}{\numberline {9.3}Using Git}{112}{section.9.3}%
\contentsline {subsection}{\numberline {9.3.1}Setting up Git}{113}{subsection.9.3.1}%
\contentsline {subsection}{\numberline {9.3.2}Setting up GitHub}{114}{subsection.9.3.2}%
\contentsline {section}{\numberline {9.4}Setting up Git on an already-made R Project}{119}{section.9.4}%
\contentsline {section}{\numberline {9.5}Using Git through RStudio}{122}{section.9.5}%
\contentsline {section}{\numberline {9.6}When to commit}{128}{section.9.6}%
\contentsline {section}{\numberline {9.7}Other resources}{128}{section.9.7}%
\contentsline {part}{III\hspace {1em}Clean}{129}{part.3}%
\contentsline {chapter}{\numberline {10}Subsetting: Making big things small}{131}{chapter.10}%
\contentsline {section}{\numberline {10.1}Select specific values}{131}{section.10.1}%
\contentsline {section}{\numberline {10.2}Logical values and operations}{134}{section.10.2}%
\contentsline {subsection}{\numberline {10.2.1}Matching a single value}{135}{subsection.10.2.1}%
\contentsline {subsection}{\numberline {10.2.2}Matching multiple values}{136}{subsection.10.2.2}%
\contentsline {subsection}{\numberline {10.2.3}Does not match}{137}{subsection.10.2.3}%
\contentsline {subsection}{\numberline {10.2.4}Greater than or less than}{138}{subsection.10.2.4}%
\contentsline {subsection}{\numberline {10.2.5}Combining conditional statements - or, and}{138}{subsection.10.2.5}%
\contentsline {section}{\numberline {10.3}Subsetting a data.frame}{141}{section.10.3}%
\contentsline {subsection}{\numberline {10.3.1}Select specific columns}{148}{subsection.10.3.1}%
\contentsline {subsection}{\numberline {10.3.2}Select specific rows}{149}{subsection.10.3.2}%
\contentsline {subsection}{\numberline {10.3.3}Subset Colorado data}{156}{subsection.10.3.3}%
\contentsline {subsubsection}{\numberline {10.3.3.1}Subsetting using \texttt {dplyr}}{158}{subsubsection.10.3.3.1}%
\contentsline {chapter}{\numberline {11}Exploratory data analysis}{163}{chapter.11}%
\contentsline {section}{\numberline {11.1}Summary and Table}{165}{section.11.1}%
\contentsline {section}{\numberline {11.2}Graphing}{170}{section.11.2}%
\contentsline {section}{\numberline {11.3}Aggregating (summaries of groups)}{173}{section.11.3}%
\contentsline {section}{\numberline {11.4}Pipes in \texttt {dplyr}}{180}{section.11.4}%
\contentsline {chapter}{\numberline {12}Regular Expressions}{185}{chapter.12}%
\contentsline {section}{\numberline {12.1}Finding patterns in text with \texttt {grep()}}{187}{section.12.1}%
\contentsline {section}{\numberline {12.2}Finding and replacing patterns in text with \texttt {gsub()}}{191}{section.12.2}%
\contentsline {section}{\numberline {12.3}Useful special characters}{194}{section.12.3}%
\contentsline {subsection}{\numberline {12.3.1}Multiple characters \texttt {{[}{]}}}{195}{subsection.12.3.1}%
\contentsline {subsection}{\numberline {12.3.2}n-many of previous character \texttt {\{n\}}}{197}{subsection.12.3.2}%
\contentsline {subsection}{\numberline {12.3.3}n-many to m-many of previous character \texttt {\{n,m\}}}{197}{subsection.12.3.3}%
\contentsline {subsection}{\numberline {12.3.4}Start of string}{200}{subsection.12.3.4}%
\contentsline {subsection}{\numberline {12.3.5}End of string \texttt {\$}}{200}{subsection.12.3.5}%
\contentsline {subsection}{\numberline {12.3.6}Anything \texttt {.}}{200}{subsection.12.3.6}%
\contentsline {subsection}{\numberline {12.3.7}One or more of previous \texttt {+}}{200}{subsection.12.3.7}%
\contentsline {subsection}{\numberline {12.3.8}Zero or more of previous \texttt {*}}{201}{subsection.12.3.8}%
\contentsline {subsection}{\numberline {12.3.9}Multiple patterns \texttt {\textbar {}}}{203}{subsection.12.3.9}%
\contentsline {subsection}{\numberline {12.3.10}Parentheses \texttt {()}}{203}{subsection.12.3.10}%
\contentsline {subsection}{\numberline {12.3.11}Optional text \texttt {?}}{204}{subsection.12.3.11}%
\contentsline {section}{\numberline {12.4}Changing capitalization}{204}{section.12.4}%
\contentsline {chapter}{\numberline {13}Reshaping data}{209}{chapter.13}%
\contentsline {section}{\numberline {13.1}Reshaping a single column}{213}{section.13.1}%
\contentsline {section}{\numberline {13.2}Reshaping multiple columns}{218}{section.13.2}%
\contentsline {part}{IV\hspace {1em}Visualize}{221}{part.4}%
\contentsline {chapter}{\numberline {14}Graphing with \texttt {ggplot2}}{223}{chapter.14}%
\contentsline {section}{\numberline {14.1}What does the data look like?}{224}{section.14.1}%
\contentsline {section}{\numberline {14.2}Graphing data}{225}{section.14.2}%
\contentsline {section}{\numberline {14.3}Time-series plots}{226}{section.14.3}%
\contentsline {section}{\numberline {14.4}Scatter plots}{238}{section.14.4}%
\contentsline {section}{\numberline {14.5}Color blindness}{239}{section.14.5}%
\contentsline {chapter}{\numberline {15}More graphing with \texttt {ggplot2}}{243}{chapter.15}%
\contentsline {section}{\numberline {15.1}Exploring data}{244}{section.15.1}%
\contentsline {section}{\numberline {15.2}Graphing a single numeric variable}{255}{section.15.2}%
\contentsline {subsection}{\numberline {15.2.1}Histogram}{256}{subsection.15.2.1}%
\contentsline {subsection}{\numberline {15.2.2}Density plot}{260}{subsection.15.2.2}%
\contentsline {subsection}{\numberline {15.2.3}Count graph}{261}{subsection.15.2.3}%
\contentsline {section}{\numberline {15.3}Graphing a categorical variable}{262}{section.15.3}%
\contentsline {subsection}{\numberline {15.3.1}Bar graph}{262}{subsection.15.3.1}%
\contentsline {section}{\numberline {15.4}Graphing data over time}{269}{section.15.4}%
\contentsline {section}{\numberline {15.5}Pretty graphs}{273}{section.15.5}%
\contentsline {subsection}{\numberline {15.5.1}Themes}{274}{subsection.15.5.1}%
\contentsline {chapter}{\numberline {16}Hotspot maps}{279}{chapter.16}%
\contentsline {section}{\numberline {16.1}A simple map}{281}{section.16.1}%
\contentsline {section}{\numberline {16.2}What really are maps?}{288}{section.16.2}%
\contentsline {section}{\numberline {16.3}Making a hotspot map}{289}{section.16.3}%
\contentsline {subsection}{\numberline {16.3.1}Colors}{292}{subsection.16.3.1}%
\contentsline {chapter}{\numberline {17}Choropleth maps}{295}{chapter.17}%
\contentsline {section}{\numberline {17.1}Spatial joins}{301}{section.17.1}%
\contentsline {section}{\numberline {17.2}Making choropleth maps}{308}{section.17.2}%
\contentsline {chapter}{\numberline {18}Interactive maps}{313}{chapter.18}%
\contentsline {section}{\numberline {18.1}Why do interactive graphs matter?}{314}{section.18.1}%
\contentsline {subsection}{\numberline {18.1.1}Understanding your data}{314}{subsection.18.1.1}%
\contentsline {subsection}{\numberline {18.1.2}Police departments use them}{314}{subsection.18.1.2}%
\contentsline {section}{\numberline {18.2}Making the interactive map}{314}{section.18.2}%
\contentsline {section}{\numberline {18.3}Adding popup information}{319}{section.18.3}%
\contentsline {section}{\numberline {18.4}Dealing with too many markers}{323}{section.18.4}%
\contentsline {section}{\numberline {18.5}Interactive choropleth maps}{325}{section.18.5}%
\contentsline {part}{V\hspace {1em}Collect}{335}{part.5}%
\contentsline {chapter}{\numberline {19}Webscraping with \texttt {rvest}}{337}{chapter.19}%
\contentsline {section}{\numberline {19.1}Scraping one page}{339}{section.19.1}%
\contentsline {section}{\numberline {19.2}Cleaning the webscraped data}{343}{section.19.2}%
\contentsline {chapter}{\numberline {20}Functions}{345}{chapter.20}%
\contentsline {section}{\numberline {20.1}A simple function}{345}{section.20.1}%
\contentsline {section}{\numberline {20.2}Adding parameters}{347}{section.20.2}%
\contentsline {section}{\numberline {20.3}Making a function to scrape recipes}{348}{section.20.3}%
\contentsline {chapter}{\numberline {21}For loops}{351}{chapter.21}%
\contentsline {section}{\numberline {21.1}Basic for loops}{351}{section.21.1}%
\contentsline {section}{\numberline {21.2}Scraping multiple recipes}{354}{section.21.2}%
\contentsline {chapter}{\numberline {22}Scraping tables from PDFs}{357}{chapter.22}%
\contentsline {section}{\numberline {22.1}Scraping the first table}{360}{section.22.1}%
\contentsline {section}{\numberline {22.2}Making a function}{368}{section.22.2}%
\contentsline {chapter}{\numberline {23}More scraping tables from PDFs}{375}{chapter.23}%
\contentsline {section}{\numberline {23.1}Texas jail data}{375}{section.23.1}%
\contentsline {section}{\numberline {23.2}Pregnant women incarcerated}{389}{section.23.2}%
\contentsline {section}{\numberline {23.3}Making PDF-scraped data available to others}{395}{section.23.3}%
\contentsline {chapter}{\numberline {24}Geocoding}{399}{chapter.24}%
\contentsline {section}{\numberline {24.1}Geocoding a single address}{399}{section.24.1}%
\contentsline {section}{\numberline {24.2}Geocoding San Francisco marijuana dispensary locations}{405}{section.24.2}%
\contentsline {fm}{Bibliography}{411}{chapter*.16}%