This is the analysis behind the TrendCT.org story: Where are the most vehicles towed in Hartford?
Looking at 1.5 years of towing data compiled by Matt Zagaja from Hartford’s Open Data portal.
For other analyses, check out our Trend CT data projects repo.
library(dplyr)
library(lubridate)
library(leaflet)
library(ggmap)
library(knitr)
library(stringr)
library(geosphere)
library(ggplot2)
require(ggmap)
require(scales)
library(sp)
require(rgdal)
require(maptools)
#install.packages("devtools")
#devtools::install_github("hrecht/censusapi")
library("censusapi")
# Bring in the data
tows <- read.csv("data/tows.csv", stringsAsFactors=F)
Cleaning and prepping the data
## There are many blank fields in the Tow Firm and Address column
## There are no blank fields in the phone number column
## So let's figure out tow firm name and address based on phone number
### Subset dataframe of complete data
tows_sub <- subset(tows, Tow_Firm!="")
tows_sub <- subset(tows_sub, !duplicated(Tow_Firm))
# Getting rid of one of the tow firm names who used a phone number but two different names
tows_sub <- subset(tows_sub, Tow_Firm!="CROSS COUNTRY AUTO")
#### Geolocate tow firms
geo <- geocode(location = tows_sub$Tow_Firm_Address, output="latlon", source="google")
tows_sub <- cbind(tows_sub, geo)
#### Delete Tow_Firm and Tow_Firm_Address columns in original dataframe
tows <- tows[,-3]
tows <- tows[,-3]
#### Prep dataframe for joining
tows_sub <- tows_sub[c("Tow_Firm", "Tow_Firm_Address", "Tow_Firm_Phone", "lon", "lat")]
#### Join the dataframes
tows <- left_join(tows, tows_sub)
#### Clean up time and dates
tows$Date <- ymd(tows$Date)
tows$Time <- hms(tows$Time)
tows$created_at <- ymd_hms(tows$created_at)
tows$created_at <- ymd_hms(tows$updated_at)
tows$removed_at <- ymd_hms(tows$removed_at)
#### Prepping Lat/Lon
tows$tow_lon <- gsub(",.*", "", tows$geom)
tows$tow_lat <- gsub(".*,", "", tows$geom)
Exploring the data
About 20682 vehicles have been towed in Hartford since 2015.
There are 21 towing companies in the city.
3455 vehicles had no license plate listed.
Where are the vehicles towed from?
# Creates a circle leaflet, the radius corresponding with the number of tows at that location
address2 <- tows %>%
group_by(Tow_From_Address, tow_lat, tow_lon) %>%
summarise(count=n()) %>%
arrange(-count)
# Creating a column plugging in histograms of tows by hours by address (in the hours folder)
# The histograms were created with the histograms.R script and uploaded to our server
address2$png <- gsub(" ", "", address2$Tow_From_Address)
address2$png <- paste0("<img src='http://projects.ctmirror.org/content/trend/2016/08/towed/hours/", address2$png, ".png' width='250px'></img>")
address2$pop <- ifelse(address2$count < 5, paste0(address2$Tow_From_Address, "<br /><strong>Tows: </strong>", address2$count), paste0("<strong>Tows: </strong>", address2$count, "<br />", address2$png))
leaflet(address2) %>% addTiles('http://a.tiles.mapbox.com/v3/borzechowski.gcj2gonc/{z}/{x}/{y}.png', attribution='<a href="http://www.openstreetmap.org/copyright">OpenStreetMap</a>') %>%
setView(-72.690940, 41.751426, zoom = 13) %>%
addCircles(~tow_lon, ~tow_lat, popup=address2$pop, weight = 3, radius=address2$count*1.5,
color="#ffa500", stroke = TRUE, fillOpacity = 0.2) %>%
addLegend("bottomright", colors= "#ffa500", labels="Towed'", title="In Hartford")
Where are vehicles towed to?
#hartbox <- make_bbox(lon = tows$tow_lon, lat =tows$tow_lat, f = .2)
hart_map <- get_map(location = "Hartford", zoom=12, maptype = "roadmap", source = "google")
townborders <- readOGR(dsn="maps", layer="ctgeo")
## OGR data source with driver: ESRI Shapefile
## Source: "maps", layer: "ctgeo"
## with 169 features
## It has 6 fields
townborders_only <- townborders
townborders<- fortify(townborders, region="NAME10")
# Subset the town borders to just Hamden since that's the department we're looking at
town_borders <- subset(townborders, id=="Hartford")
tm_ct <- ggmap(hart_map) +
geom_point(data=tows, aes(x=lon, y=lat), color="red", size=5, alpha=0.5) +
#geom_polygon(data = total_map, aes(x=long, y=lat, group=group, fill=total), color = "black", size=0.2) +
geom_polygon(data = town_borders, aes(x=long, y=lat, group=group, fill=total), color = "black", fill=NA, size=0.5) +
coord_map() +
theme_nothing(legend=TRUE) +
labs(title="Where vehicles are towed to", fill="")
print(tm_ct)
Most common year?
years <- tows %>%
group_by(Vehicle_Year) %>%
summarise(count=n()) %>%
arrange(-count)
kable(head(years,10))
2001 |
1506 |
2003 |
1444 |
2000 |
1418 |
2002 |
1402 |
2004 |
1206 |
2005 |
1151 |
1999 |
1129 |
2006 |
1048 |
2007 |
923 |
1998 |
914 |
Most common vehicle?
make <- tows %>%
group_by(Make) %>%
summarise(count=n()) %>%
arrange(-count)
kable(head(make, 10))
HONDA |
3353 |
NISSAN |
2590 |
TOYOTA |
1869 |
FORD |
1495 |
ACURA |
976 |
DODGE |
786 |
CHEVY |
772 |
HYUNDAI |
595 |
MAZDA |
426 |
JEEP |
423 |
Most common model?
model <- tows %>%
group_by(Model) %>%
summarise(count=n()) %>%
arrange(-count)
kable(head(model,10))
ACCORD |
1826 |
MAXIMA |
997 |
ALTIMA |
848 |
CIVIC |
805 |
CAMRY |
729 |
COROLLA |
505 |
TL |
281 |
SENTRA |
278 |
ELANTRA |
239 |
SONATA |
232 |
Year and make?
make_year <- tows %>%
group_by(Vehicle_Year, Make) %>%
summarise(count=n()) %>%
arrange(-count)
kable(head(make_year,10))
2000 |
NISSAN |
250 |
2001 |
HONDA |
246 |
2003 |
HONDA |
238 |
2001 |
NISSAN |
222 |
2000 |
HONDA |
221 |
1998 |
HONDA |
211 |
1999 |
HONDA |
199 |
2002 |
HONDA |
186 |
2002 |
NISSAN |
171 |
1997 |
HONDA |
170 |
Year and model?
model_year <- tows %>%
group_by(Vehicle_Year, Model) %>%
summarise(count=n()) %>%
arrange(-count)
kable(head(model_year,20))
2000 |
MAXIMA |
194 |
2001 |
ACCORD |
136 |
2001 |
MAXIMA |
130 |
2000 |
ACCORD |
124 |
2002 |
ACCORD |
119 |
2003 |
ACCORD |
119 |
1998 |
ACCORD |
118 |
1996 |
ACCORD |
110 |
1999 |
ACCORD |
110 |
1997 |
ACCORD |
98 |
1998 |
MAXIMA |
85 |
2002 |
MAXIMA |
84 |
2004 |
ACCORD |
82 |
2005 |
ACCORD |
78 |
2007 |
ACCORD |
77 |
1999 |
MAXIMA |
75 |
2005 |
ALTIMA |
75 |
1994 |
ACCORD |
72 |
2002 |
ALTIMA |
68 |
2009 |
ACCORD |
67 |
## Any particul |
ar color? |
|
color <- tows %>%
group_by(Color) %>%
summarise(count=n()) %>%
arrange(-count)
kable(head(color,10))
GRAY |
2614 |
BLACK |
2208 |
RED |
2011 |
WHITE |
1931 |
BLUE |
1863 |
GRY |
1779 |
TAN |
1585 |
GREEN |
1196 |
BLK |
1186 |
SILVER |
905 |
Most common time of day towed?
tows$hour <- hour(tows$Time)
hour <- tows %>%
group_by(hour) %>%
summarise(count=n()) %>%
arrange(-count)
kable(head(hour,10))
13 |
1444 |
12 |
1428 |
14 |
1256 |
15 |
1085 |
16 |
1032 |
17 |
1028 |
6 |
863 |
5 |
854 |
1 |
843 |
11 |
835 |
ggplot(tows, aes(x=hour)) + geom_histogram(binwidth=1)
Bad-luck drivers– who’s towed more often?
plates <- tows %>%
group_by(Vehicle_Year, Model, Vehicle_Plate) %>%
summarise(count=n()) %>%
arrange(-count) %>%
filter(Vehicle_Plate!="") %>%
filter(!is.na(Vehicle_Plate)) %>%
filter(Vehicle_Plate!="NONE")
kable(head(plates,10))
2002 |
LANCER |
6AFSU6 |
11 |
2000 |
MAXIMA |
7AFST5 |
9 |
2004 |
G35 |
4ARHP7 |
9 |
2010 |
CIVIC |
8APVS9 |
9 |
1993 |
ACCORD |
AB67814 |
7 |
2003 |
CIVIC |
AA23938 |
7 |
2014 |
CHARGER |
6AMAA1 |
7 |
1995 |
CAMRY |
2ADTU4 |
6 |
2000 |
PASSAT |
339KYX |
6 |
2001 |
E320 |
9AMEJ2 |
6 |
Which tow yards are most prolific?
firms <- tows %>%
group_by(Tow_Firm) %>%
summarise(count=n()) %>%
arrange(-count)
kable(head(firms,10))
CROSS COUNTRY TOWING |
3135 |
WHITEY’S FRAME SHOP |
1765 |
CORONA’S AUTO PARTS,INC. |
1673 |
FRIENDLY AUTO BODY & TOWING |
1659 |
MODERN GARAGE |
1512 |
A & N AUTO |
1482 |
METRO AUTOBODY & TOWING INC |
1450 |
BENTON AUTO BODY |
1438 |
RENO’S AUTO BODY |
1437 |
CAPITOL TOWING |
1246 |
Most common address?
address <- tows %>%
group_by(Tow_From_Address) %>%
summarise(count=n()) %>%
arrange(-count)
kable(head(address, 10))
1000 MAIN ST |
180 |
200 BLOOMFIELD AV |
157 |
100 WESTON ST |
144 |
967 ASYLUM AV |
140 |
105 SHERBROOKE AV |
116 |
265 WASHINGTON ST |
99 |
200 PEARL ST |
97 |
253 HIGH ST |
93 |
2995 MAIN ST |
91 |
1429 PARK ST |
80 |
Most common address targeted by tow companies?
tow_address <- tows %>%
group_by(Tow_From_Address, Tow_Firm) %>%
summarise(count=n()) %>%
arrange(-count)
kable(head(tow_address, 20))
200 BLOOMFIELD AV |
CROSS COUNTRY TOWING |
78 |
105 SHERBROOKE AV |
CROSS COUNTRY TOWING |
36 |
1000 MAIN ST |
CROSS COUNTRY TOWING |
34 |
100 WESTON ST |
CORONA’S AUTO PARTS,INC. |
30 |
57 SUMNER ST |
CROSS COUNTRY TOWING |
29 |
100 WESTON ST |
INTERNATIONAL TOWING COMPANY |
27 |
100 WESTON ST |
METRO AUTOBODY & TOWING INC |
26 |
198 SIGOURNEY ST |
CROSS COUNTRY TOWING |
22 |
887 ASYLUM AV |
CROSS COUNTRY TOWING |
22 |
1000 MAIN ST |
CORONA’S AUTO PARTS,INC. |
21 |
967 ASYLUM AV |
CORONA’S AUTO PARTS,INC. |
21 |
28 TOWNLEY ST |
CROSS COUNTRY TOWING |
20 |
1 WESTON PARK RD |
FRIENDLY AUTO BODY & TOWING |
18 |
265 WASHINGTON ST |
CROSS COUNTRY TOWING |
18 |
265 WASHINGTON ST |
MODERN GARAGE |
18 |
200 PEARL ST |
CROSS COUNTRY TOWING |
17 |
967 ASYLUM AV |
CAPITOL TOWING |
17 |
100 WESTON ST |
WHITEY’S FRAME SHOP |
16 |
1450 MAIN ST |
CROSS COUNTRY TOWING |
16 |
31 GILLETT ST |
CROSS COUNTRY TOWING |
16 |
Where do these tow yards target?
tows$tow_lon <- as.numeric(tows$tow_lon)
tows$tow_lat <- as.numeric(tows$tow_lat)
tm_ct <- ggmap(hart_map) +
geom_point(data=tows, aes(x=tow_lon, y=tow_lat, color="tomato"), size=1, alpha=0.5) +
geom_polygon(data = town_borders, aes(x=long, y=lat, group=group, fill=total), color = "black", fill=NA, size=0.5) +
facet_wrap(~Tow_Firm, ncol=3) +
coord_map() +
theme_nothing(legend=T)
print(tm_ct)