This is the exploratory visualizaton behind the Trend CT story: Who in CT is being cited for marijuana possession and by whom?.

Visit the repo for the data used in this analysis. (Also, check out the reproducible scripts and data behind many of our other stories in our central data stories repo)

The data used in this analysis (marijuana citations between 2011 and 2014) is from the Connecticut State Police via a FOIA request made by Evan Anderson via Muckrock.com.

What’s in this walkthrough

Exploratory analysis and visualizations of marijuana citations.

library(dplyr)
library(lubridate)
library(ggplot2)
library(tidyr)
library(stringr)
library(extrafont)
library(ggalt)
library(scales)
library(gridExtra)
library(grid)
library(knitr)
#install.packages("devtools")
#devtools::install_github("trendct/ctnamecleaner")
library(ctnamecleaner)
#devtools::install_github("hrecht/censusapi")
library("censusapi")
source("keys.R")

Loading and prepping the data

mj <- read.csv("data/15-218_Marijuana_Arrests_by_Agency_2011-2013_NIBRS.csv", stringsAsFactors=F)

colnames(mj) <- c("date", "description", "gender", "race", "ethnicity", "age")

mj2 <- filter(mj,
              date!="" &
              !grepl("Unit of", date) &
              !grepl("NSS ", date) &
              !grepl("Report ", date) &
              !grepl("Arrests ", date) &
              !grepl("IncidentDate", date)
                )

mj2$department <- "temp"

department_name <- "temp"

for (i in 1:nrow(mj2)) {
  #print(i)
  department_name <- ifelse(grepl("/", mj2$date[i]), department_name, mj2$date[i])
  mj2$department[i] <- department_name
  #print( mj2$department[i] )
}

mj2 <- mj2 %>%
  filter(gender=="M" | gender=="F")

mj2$date <- mdy(mj2$date)
mj2$year <- year(mj2$date)
mj2$month <- month(mj2$date)

Men and women

mj_sex <- mj2 %>%
  group_by(gender) %>%
  summarise(citations=n())

kable(mj_sex)
gender citations
F 1674
M 10061

Age distribution

mj2$age <- as.numeric(mj2$age)

ggplot(mj2, aes(mj2$age)) + geom_histogram(binwidth=1, aes(fill = ..count..)) + ggtitle("Marijuana citations by age in Connecticut")

Departments that arrested the most

mj_most <- mj2 %>%
  group_by(department) %>%
  summarise(arrests=n()) %>%
  arrange(-arrests)

kable(head(mj_most, 10))
department arrests
Connecticut State Police 1480
New Haven Police Department 1451
Stamford Police Department 770
Norwalk Police Department 761
West Hartford Police Department 401
New London Police Department 384
Middletown Police Department 356
East Hartford Police Department 328
Southington Police Department 289
Glastonbury Police Department 282

After adjusting for population

mj_most$town <- gsub(" Police Department", "", mj_most$department)
mj_most$town <- gsub(" Police Dept.", "", mj_most$town)

mj_most2 <- ctpopulator(town, mj_most)
## [1] "Checking to see if names match..."
non_mj_most <- subset(mj_most2, is.na(pop2013))

mj_most2_map <- subset(mj_most2, !is.na(pop2013))
mj_most2_map$per_capita <- (mj_most2_map$arrests/mj_most2_map$pop2013)*10000

mj_most2_map <- mj_most2_map[c("town", "per_capita", "arrests")]
colnames(mj_most2_map) <- c("Town", "Per capita arrests", "Total arrests")

mj_most2_map$Town <- str_to_title(mj_most2_map$Town)
mj_most2_map <- arrange(mj_most2_map, -`Per capita arrests`)
kable(head(mj_most2_map,10))
Town Per capita arrests Total arrests
Clinton 179.01654 237
New London 139.19095 384
Derby 132.38844 170
New Haven 111.32594 1451
Granby 104.57285 118
Norwalk 87.97790 761
Windsor 84.87973 247
Glastonbury 81.61848 282
Farmington 81.00987 206
Old Saybrook 79.93761 82

Arrests over time by department (total)

mj_arrests_years <- mj2 %>%
  group_by(department, year) %>%
  summarise(arrests=n())

mj_arrests_years$town <- gsub(" Police Department", "", mj_arrests_years$department)
mj_arrests_years$town <- gsub(" Police Dept.", "", mj_arrests_years$town)

mj_arrests_years <- ctpopulator(town, mj_arrests_years)
## [1] "Checking to see if names match..."
mj_arrests_years2 <- subset(mj_arrests_years, !is.na(pop2013))
mj_arrests_years2$per_capita <- mj_arrests_years2$arrests/mj_arrests_years2$pop2013*10000


gg <- ggplot(mj_arrests_years, aes(x=year, y=arrests)) 
gg <- gg + geom_bar(stat="identity") 
gg <- gg + facet_wrap(~department, ncol = 3)
gg <- gg + labs(x=NULL, y=NULL, title="Total marijuana citations",
                subtitle="Between 2011 and 2014.",
                caption="SOURCE: National Incident-Based Reporting System, U.S. Census \nAndrew Ba Tran/TrendCT.org")
gg <- gg + theme_bw(base_family="Lato Regular")
gg <- gg + theme(axis.ticks.y=element_blank())
gg <- gg + theme(panel.border=element_blank())
gg <- gg + theme(legend.key=element_blank())
gg <- gg + theme(plot.title=element_text(face="bold", family="Lato Regular", size=22))
gg <- gg + theme(plot.caption=element_text(face="bold", family="Lato Regular", size=9, color="gray", margin=margin(t=10, r=80)))
gg <- gg + theme(legend.position="none")
gg