Marijuana citations in Connecticut analysis

This is the exploratory visualizaton behind the Trend CT story: Who in CT is being cited for marijuana possession and by whom?.

Visit the repo for the data used in this analysis. (Also, check out the reproducible scripts and data behind many of our other stories in our central data stories repo)

The data used in this analysis (marijuana citations between 2011 and 2014) is from the Connecticut State Police via a FOIA request made by Evan Anderson via Muckrock.com.

What’s in this walkthrough

Exploratory analysis and visualizations of marijuana citations.

library(dplyr)
library(lubridate)
library(ggplot2)
library(tidyr)
library(stringr)
library(extrafont)
library(ggalt)
library(scales)
library(gridExtra)
library(grid)
library(knitr)
#install.packages("devtools")
#devtools::install_github("trendct/ctnamecleaner")
library(ctnamecleaner)
#devtools::install_github("hrecht/censusapi")
library("censusapi")
source("keys.R")

Loading and prepping the data

mj <- read.csv("data/15-218_Marijuana_Arrests_by_Agency_2011-2013_NIBRS.csv", stringsAsFactors=F)

colnames(mj) <- c("date", "description", "gender", "race", "ethnicity", "age")

mj2 <- filter(mj,
              date!="" &
              !grepl("Unit of", date) &
              !grepl("NSS ", date) &
              !grepl("Report ", date) &
              !grepl("Arrests ", date) &
              !grepl("IncidentDate", date)
                )

mj2$department <- "temp"

department_name <- "temp"

for (i in 1:nrow(mj2)) {
  #print(i)
  department_name <- ifelse(grepl("/", mj2$date[i]), department_name, mj2$date[i])
  mj2$department[i] <- department_name
  #print( mj2$department[i] )
}

mj2 <- mj2 %>%
  filter(gender=="M" | gender=="F")

mj2$date <- mdy(mj2$date)
mj2$year <- year(mj2$date)
mj2$month <- month(mj2$date)

Men and women

mj_sex <- mj2 %>%
  group_by(gender) %>%
  summarise(citations=n())

kable(mj_sex)

gender	citations
F	1674
M	10061

Age distribution

mj2$age <- as.numeric(mj2$age)

ggplot(mj2, aes(mj2$age)) + geom_histogram(binwidth=1, aes(fill = ..count..)) + ggtitle("Marijuana citations by age in Connecticut")

Departments that arrested the most

mj_most <- mj2 %>%
  group_by(department) %>%
  summarise(arrests=n()) %>%
  arrange(-arrests)

kable(head(mj_most, 10))

department	arrests
Connecticut State Police	1480
New Haven Police Department	1451
Stamford Police Department	770
Norwalk Police Department	761
West Hartford Police Department	401
New London Police Department	384
Middletown Police Department	356
East Hartford Police Department	328
Southington Police Department	289
Glastonbury Police Department	282

After adjusting for population

mj_most$town <- gsub(" Police Department", "", mj_most$department)
mj_most$town <- gsub(" Police Dept.", "", mj_most$town)

mj_most2 <- ctpopulator(town, mj_most)

## [1] "Checking to see if names match..."

non_mj_most <- subset(mj_most2, is.na(pop2013))

mj_most2_map <- subset(mj_most2, !is.na(pop2013))
mj_most2_map$per_capita <- (mj_most2_map$arrests/mj_most2_map$pop2013)*10000

mj_most2_map <- mj_most2_map[c("town", "per_capita", "arrests")]
colnames(mj_most2_map) <- c("Town", "Per capita arrests", "Total arrests")

mj_most2_map$Town <- str_to_title(mj_most2_map$Town)
mj_most2_map <- arrange(mj_most2_map, -`Per capita arrests`)
kable(head(mj_most2_map,10))

Town	Per capita arrests	Total arrests
Clinton	179.01654	237
New London	139.19095	384
Derby	132.38844	170
New Haven	111.32594	1451
Granby	104.57285	118
Norwalk	87.97790	761
Windsor	84.87973	247
Glastonbury	81.61848	282
Farmington	81.00987	206
Old Saybrook	79.93761	82

Arrests over time by department (total)

mj_arrests_years <- mj2 %>%
  group_by(department, year) %>%
  summarise(arrests=n())

mj_arrests_years$town <- gsub(" Police Department", "", mj_arrests_years$department)
mj_arrests_years$town <- gsub(" Police Dept.", "", mj_arrests_years$town)

mj_arrests_years <- ctpopulator(town, mj_arrests_years)

## [1] "Checking to see if names match..."

mj_arrests_years2 <- subset(mj_arrests_years, !is.na(pop2013))
mj_arrests_years2$per_capita <- mj_arrests_years2$arrests/mj_arrests_years2$pop2013*10000


gg <- ggplot(mj_arrests_years, aes(x=year, y=arrests)) 
gg <- gg + geom_bar(stat="identity") 
gg <- gg + facet_wrap(~department, ncol = 3)
gg <- gg + labs(x=NULL, y=NULL, title="Total marijuana citations",
                subtitle="Between 2011 and 2014.",
                caption="SOURCE: National Incident-Based Reporting System, U.S. Census \nAndrew Ba Tran/TrendCT.org")
gg <- gg + theme_bw(base_family="Lato Regular")
gg <- gg + theme(axis.ticks.y=element_blank())
gg <- gg + theme(panel.border=element_blank())
gg <- gg + theme(legend.key=element_blank())
gg <- gg + theme(plot.title=element_text(face="bold", family="Lato Regular", size=22))
gg <- gg + theme(plot.caption=element_text(face="bold", family="Lato Regular", size=9, color="gray", margin=margin(t=10, r=80)))
gg <- gg + theme(legend.position="none")
gg

Arrests over time by department (per capita)

gg <- ggplot(mj_arrests_years2, aes(x=year, y=per_capita)) 
gg <- gg + geom_bar(stat="identity") 
gg <- gg + facet_wrap(~department, ncol = 3)
gg <- gg + labs(x=NULL, y=NULL, title="Per capita marijuana citations",
                subtitle="Per 10,000 residents. Between 2011 and 2014.",
                caption="SOURCE: National Incident-Based Reporting System, U.S. Census \nAndrew Ba Tran/TrendCT.org")
gg <- gg + theme_bw(base_family="Lato Regular")
gg <- gg + theme(axis.ticks.y=element_blank())
gg <- gg + theme(panel.border=element_blank())
gg <- gg + theme(legend.key=element_blank())
gg <- gg + theme(plot.title=element_text(face="bold", family="Lato Regular", size=22))
gg <- gg + theme(plot.caption=element_text(face="bold", family="Lato Regular", size=9, color="gray", margin=margin(t=10, r=80)))
gg <- gg + theme(legend.position="none")
gg

mj_arrests_years <- mj2 %>%
  group_by(department, year) %>%
  summarise(arrests=n()) %>%
  spread(year, arrests) %>%
  mutate(per_change=round((`2013` - `2011`) / `2011` *100, 2)) %>%
  arrange(-per_change)

kable(head(mj_arrests_years,10))

department	2011	2012	2013	per_change
Thomaston Police Department	2	2	7	250.00
C.C.S.U. Police Department	2	3	5	150.00
Orange Police Department	16	33	26	62.50
Ansonia Police Department	25	28	39	56.00
Plainfield Police Department	11	NA	16	45.45
Norwalk Police Department	221	225	315	42.53
Granby Police Department	29	48	41	41.38
Clinton Police Department	61	93	83	36.07
East Hartford Police Department	117	61	150	28.21
New Milford Police Department	22	21	28	27.27

Arrests by race by department

mj2$race_ethnicity <- ifelse(mj2$ethnicity=="H", "Hispanic", mj2$race)
mj2$race_ethnicity <- ifelse(mj2$race_ethnicity=="A", "Asian", mj2$race_ethnicity)
mj2$race_ethnicity <- ifelse(mj2$race_ethnicity=="B", "Black", mj2$race_ethnicity)
mj2$race_ethnicity <- ifelse(mj2$race_ethnicity=="W", "White", mj2$race_ethnicity)

mj2$race_ethnicity <- ifelse(mj2$race_ethnicity=="I", "Indian", mj2$race_ethnicity)
mj2$race_ethnicity <- ifelse(mj2$race_ethnicity=="U", "Unknown", mj2$race_ethnicity)


mj_arrests_race <- mj2 %>%
  group_by(department, race_ethnicity) %>%
  summarise(arrests=n())

## chart
ggplot(mj_arrests_race, aes(x=race_ethnicity, y=arrests)) + geom_bar(stat="identity") + coord_flip() + facet_wrap(~department, ncol = 4, scales = "free_x")

Bringing in census population data

# B02001_001E - Total
# B02001_002E - White
# B02001_003E - Black
# B02001_004E - Indian
# B02001_005E - Asian
# B03001_003E - Hispanic

race_towns <- getCensus(name="acs5",
                         vintage=2014,
                         key=census_key,
                         vars=c("NAME", "B02001_001E", "B02001_002E", "B02001_003E",
                                "B02001_004E", "B02001_005E", "B03001_003E"),
                         region="county subdivision:*", regionin="state:09")

colnames(race_towns) <- c("town", "state", "county", "countysub", "total_pop", "White", "Black", "Indian", "Asian", "Hispanic")
race_towns <- race_towns[c("town", "total_pop", "White", "Black", "Indian", "Asian", "Hispanic")]
race_towns <- subset(race_towns, !grepl("County subdivisions", town))
race_towns$town <- gsub(" town.*", "", race_towns$town)

race_towns_long <- race_towns %>%
  gather("race_ethnicity", "population", 3:7) %>%
  mutate(percent_population=round(population/total_pop*100,2))

Percent of tickets by race compared to percent of population by race

mj_arrests_race_spread <- mj_arrests_race %>%
  group_by(department) %>%
  mutate(total=sum(arrests, na.rm=T), percent=round(arrests/total*100,2))

mj_arrests_race_spread$town <- gsub(" Police Department", "", mj_arrests_race_spread$department)
mj_arrests_race_spread$town <- gsub(" Police Dept.", "", mj_arrests_race_spread$town)



mj_arrests_race_spread <- left_join(mj_arrests_race_spread, race_towns_long)

mj_arrests_filtered <- subset(mj_arrests_race_spread, !is.na(total_pop))
mj_arrests_filtered <- filter(mj_arrests_filtered, race_ethnicity!="Indian")

mj_filtered <- mj_arrests_filtered[c ("percent", "percent_population")]

gg <- ggplot(mj_arrests_filtered, aes(percent, percent_population)) 
gg <- gg + geom_abline(intercept = 1, color="grey65")
gg <- gg + geom_point(data = mj_filtered, color = "grey85") 
gg <- gg + geom_point(aes(color=race_ethnicity)) 
gg <- gg + facet_wrap(~race_ethnicity, ncol=4)
gg <- gg + labs(y="Percent population", x="Percent cited", 
                title="Marijuana citations by race compared to town's population", 
                subtitle="Minorities tend to be cited disproportionately to the proportion of the population of the towns they live in.\nBased on citations between 2011 and 2014.", 
                caption="SOURCE: National Incident-Based Reporting System, U.S. Census \nAndrew Ba Tran/TrendCT.org")
gg <- gg + theme_bw(base_family="Lato Regular")
gg <- gg + theme(axis.ticks.y=element_blank())
gg <- gg + theme(panel.border=element_blank())
gg <- gg + theme(legend.key=element_blank())
gg <- gg + theme(plot.title=element_text(face="bold", family="Lato Regular", size=22))
gg <- gg + theme(plot.caption=element_text(face="bold", family="Lato Regular", size=9, color="gray", margin=margin(t=10, r=80)))
gg <- gg + theme(legend.position="none")
gg