Talk:List of U.S. states and territories by intentional homicide rate/Table creation code

From WikiProjectMed
Jump to navigation Jump to search

For those interested here are the instructions for downloading the data and the code for producing the rate table (and a table for totals). The language used is the R (programming language).

Table for homicide by type

This code uses data from the CDC.

R code for US homicide by type
# Attempt to set working directory
# setwd(getSrcDirectory()[1]) # if running entire file
setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) # if running section
options(scipen=999) # don't use scientific notation
library(dplyr)

# https://wonder.cdc.gov/ucd-icd10-expanded.html
# Section 1 will vary depending on the data needed. Leave sections 2, 3, 4, 5 as default. In section 6, above the large box, see the five dots and select "Injury Intent and Mechanism"; select either "All Causes" or "Firearm". In section 7 check all boxes except "Show totals".

CDC_import = function(file_name) {
  df = read.table(file_name, header=T, sep="\t", fill=T) %>%
    filter(Notes == "") %>% select(-Notes,-Crude.Rate) %>%
    mutate(Deaths = as.numeric(ifelse(Deaths == "Suppressed", NA, Deaths))) %>%
    mutate(Rate = Deaths/Population*10^5)
  return(df)
}

# set flag links
fix_links = function(df) {
  df = df %>%
  mutate(State = paste0("{{flagg|uspeft|pref=Crime in|",State,"}}")) %>%
  mutate(State = ifelse(stringr::str_detect(State,"United States"),
                        "{{noflag|'''United States'''}}", State)) %>%
  mutate(State = ifelse(stringr::str_detect(State,"Georgia"),
                        "{{flagg|uspeft|pref=Crime in|Georgia (U.S. state)|name=Georgia}}", State)) %>%
  mutate(State = ifelse(stringr::str_detect(State,"District"),
                        "{{flagg|uspeft|pref=Crime in|District of Columbia|name=District of Columbia}}", State))
  return(df)
}

# homicide/suicide totals
# In section 1, group by State, Injury Intent. Type in a title "CDC - State Intent External".
state.homicide = CDC_import("intentional/CDC - State Intent External.txt") %>%
  select(State,Injury.Intent,Deaths,Population) %>%
  filter(Injury.Intent %in% c("Homicide")) %>%
  select(State,Population,Deaths) %>%
  bind_rows(setNames(data.frame("United States", sum(.$Population), sum(.$Deaths)), names(.))) %>%
  mutate(Total = Deaths/Population*10^5)

# mechanism, all US
# In section 1, group by Mechanism. Type in a title "CDC - Homicide Mechanism.txt". In section 6, view Intent and Mechanism, select Homicide.
homicide.mechanism = CDC_import("intentional/CDC - Homicide Mechanism.txt") %>%
  setNames(nm = c("Mechanism","Code","Deaths","Population","Rate")) %>%
  select(Mechanism,Rate) %>%
  filter(Rate > 0) %>%
  mutate(Mechanism = c("Stab","Drown.Other","Fall.Other","Fire","Hot.Other","Gun",
                       "Transport.Other","Poison","Struck","Choke")) %>%
  filter(!stringr::str_detect(Mechanism, "Other"))

# mechanism by state
# In section 1, group by State, Mechanism. Type in a title "CDC - State Homicide Mechanism.txt". In section 6, view Intent and Mechanism, select Homicide.
state.homicide.mechanism = CDC_import("intentional/CDC - State Homicide Mechanism.txt") %>%
  select(State,3,Rate) %>%
  tidyr::pivot_wider(names_from = 2, values_from = Rate, values_fill=0) %>%
  select(State,2,5,7,16,17,18) %>%
  setNames(nm = c("State","Stab","Fire","Gun","Poison","Struck","Choke")) %>%
  
  bind_rows(setNames(data.frame("United States", t(homicide.mechanism$Rate)), names(.))) %>%
  inner_join(state.homicide) %>%
  select(State,Total,Gun,Stab,Choke,Struck,Poison,Fire) %>%
  arrange(desc(Total)) %>%
  arrange(-stringr::str_detect(State, "United States")) %>%
  mutate(across(where(is.numeric), round, 1)) %>%
  fix_links()
write.csv(state.homicide.mechanism,"intentional/state_homicide_mechanism.csv",row.names=F)

Tables for homicide by year / decade

This code uses data from the FBI.

R code for US state homicide data by year/decade
# Attempt to set working directory
# setwd(getSrcDirectory()[1]) # if running entire file
setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) # if running section
options(scipen=999) # don't use scientific notation
library(dplyr)

# FBI data
# https://cde.ucr.cjis.gov/LATEST/webapp/#/pages/downloads
# Additional Datasets > Summary Reporting System (SRS) > Download

# all crimes, states, years
crime_yearly = read.csv("estimated_crimes_1979_2022.csv") %>%
  mutate(across(where(is.numeric), ~ifelse(is.na(.), 0, .))) %>%
  filter(state_abbr != "") %>%
  mutate(rape_legacy = rape_legacy + rape_revised) %>%
  rename(State = state_name) %>%
  select(-c(state_abbr,rape_revised))

# US overall
us_yearly = crime_yearly %>%
  group_by(year) %>%
  summarise(across(where(is.numeric), sum)) %>%
  ungroup %>%
  mutate(State = "United States")

# append US totals
crime_yearly = crime_yearly %>%
  bind_rows(us_yearly)

# homicide by location and year
homicide_yearly = crime_yearly %>%
  select(year,State,population,homicide) %>%
  mutate(rate = homicide/population*10^5)

# function to set flag links
fix_links = function(df) {
  df = df %>%
  mutate(State = paste0("{{flagg|uspeft|pref=Crime in|",State,"}}")) %>%
  mutate(State = ifelse(stringr::str_detect(State,"United States"), 
                        "{{noflag|'''United States'''}}", State)) %>%
  mutate(State = ifelse(stringr::str_detect(State,"Georgia"), 
                        "{{flagg|uspeft|pref=Crime in|Georgia (U.S. state)|name=Georgia}}", State)) %>%
  mutate(State = ifelse(stringr::str_detect(State,"District"), 
                        "{{flagg|uspeft|pref=Crime in|District of Columbia|name=District of Columbia}}", State))
  return(df)
}

# most recent five years ##
homicide_five_years = homicide_yearly %>%
  filter(year > max(year)-5) %>%
  select(-population) %>%
  tidyr::pivot_wider(names_from = year, values_from = c(homicide,rate)) %>%
  fix_links()

homicide_five_years_total = homicide_five_years %>%
  select(State,homicide_2018:homicide_2022) %>%
  setNames(nm = c("Location",2018:2022))
write.csv(homicide_five_years_total,"homicide_five_years_total.csv",row.names=F)

homicide_five_years_rate = homicide_five_years %>%
  select(State,rate_2018:rate_2022) %>%
  setNames(nm = c("Location",2018:2022))
write.csv(homicide_five_years_rate,"homicide_five_years_rate.csv",row.names=F)

# average of each decade ##
homicide_decade = homicide_rate_yearly %>%
  filter(year > 1979) %>%
  mutate(decade = paste0(floor(year/10)*10,"s")) %>%
  select(decade,State,homicide,rate) %>%
  
  group_by(decade,State) %>%
  summarise(across(where(is.numeric),mean)) %>%
  ungroup %>%
  
  tidyr::pivot_wider(names_from = decade, values_from = c(homicide,rate)) %>%
  fix_links()

total_table = homicide_decade %>%
  select(State,homicide_1980s:homicide_2020s) %>%
  arrange(desc(homicide_2020s)) %>%
  arrange(-stringr::str_detect(State, "United States"))
write.csv(total_table,"state_homicide_total_decade.csv",row.names=F)

rate_table = homicide_decade %>%
  select(State,rate_1980s:rate_2020s) %>%
  arrange(desc(rate_2020s)) %>%
  arrange(-stringr::str_detect(State, "United States"))
write.csv(rate_table,"state_homicide_rate_decade.csv",row.names=F)