Crimes Against Women

India

Author

Arun Koundinya Parasa

Published

July 22, 2024

I have not written any inferences in this blog. I will be sharing my opinions along with my inferences in my personal blog. Stay Tuned!!!

0. Loading Libraries

This particular visualization is done on R to visualize the data in shape files.

Loading Libraries
library(tidyverse)
library(jpeg)
library(grid)

1. Data Loading

Loading Data
data <- readr::read_csv('districtwise-crime-against-women.csv')

## For Loading Shape Files
# https://projects.datameet.org/maps/#state-boundaries
INDIA_MAP = st_read("States/Admin2.shp")

## Indian Census Population from Wikipedia 
url <- "https://en.wikipedia.org/wiki/List_of_states_and_union_territories_of_India_by_population"
webpage <- read_html(url)
tables <- html_nodes(webpage, "table")
population_table <- html_table(tables[[2]], fill = TRUE)
rm(tables,webpage,url)

2. Data Manipulation

Data Manipulation
## Creating Category for Easier Consumption for Reader
data |>
  mutate(
    year_yr = lubridate::year(data$year)
  ) |>
  pivot_longer(
    cols = c(8:29),
    names_to = "Category",
    values_to = "Count"
) |>
  mutate(
   Category_Update = case_when(
     Category %in% c("dowry_deaths","abetment_to_suicide_of_women","cruelty_by_husband_or_his_relatives",
                     "dowry_prohibition","protection_of_women_from_domestic_violence_act") ~ "Domestic Violence",
     Category %in% c("miscarriage","acid_attack","attempt_to_acid_attack",
                     "kidnapping_and_abduction_of_women_total","insult_to_the_modesty_of_women") ~ "Harassment",
     Category %in% c("murder_with_rape_or_gang_rape","assault_on_women_with_intent_to_outrage_her_modesty",
                     "indecent_representation_of_women_prohibition") ~ "Sexual Harassment",
     Category %in% c("selling_of_minor_girls","buying_of_minor_girls","prot_of_children_frm_sexual_viol_girl_child_victims") ~ "POCSO",
     Category %in% c("human_trafficking","rape","attempt_to_commit_rape","immoral_traffic_prevention_act_total") ~ "Sexual Violence",
     Category %in% c("cyber_crimes_or_infor_tech_women_centric_crimes") ~ "Cyber Crime",
     Category %in% c("total_crime_against_women") ~ "Total",
     TRUE ~ "Others"
   )
  ) -> Crime_Data

## Taking only subset for specific years
Crime_Data |>
  filter(
    year_yr %in% c(2018,2019,2020,2021)
  ) -> Crime_Data_2018_21

## Creating CrimeData Over All Summary
Crime_Data_2018_21_Overall_Summary <-
  Crime_Data_2018_21 |>
  group_by(year_yr,Category_Update) |>
  summarise(TotalCrimes = sum(Count), .groups = "drop") |>
  filter (
    Category_Update == "Total"
  ) |>
  mutate(
    CrimesPerDay = round(TotalCrimes/365,0),
    CrimesPerDayPerState = round(round(TotalCrimes/365,0)/36,0)
  )

## Creating CrimeData Summary Year Wise
Crime_Data_2018_21_Overall <-
  Crime_Data_2018_21 |>
  group_by(year_yr,Category_Update) |>
  summarise(TotalCrimes = sum(Count), .groups = "drop") |>
  filter (
    Category_Update != "Total"
  ) |>
  mutate(
    CrimesPerDay = round(TotalCrimes/365,0),
    CrimesPerDayPerState = round(round(TotalCrimes/365,0)/36,0)
  )
  
## Creating CrimeData Summary Year Wise and State Wise
Crime_Data_2018_21_State <-
  Crime_Data_2018_21 |>
  group_by(year_yr,state_name,Category_Update) |>
  summarise(TotalCrimes = sum(Count), .groups = "drop") |>
  filter (
    Category_Update != "Total"
    ) |>
  mutate(
    CrimesPerDay = round(TotalCrimes/365,0),
    NAME_1 = case_when(
      state_name == "Andaman And Nicobar Islands" ~ "Andaman & Nicobar",
      state_name == "The Dadra And Nagar Haveli And Daman And Diu" ~ "Dadra and Nagar Haveli and Daman and Diu",
      state_name == "Jammu And Kashmir" ~ "Jammu & Kashmir",
      TRUE ~ state_name
    )
  ) 

## Loading Indian Map with State Boundaries from shape files downloaded from https://gadm.org/download_country.html#google_vignette
india_map = data.frame(INDIA_MAP)
india_map <-
  india_map |>
  select(NAME_1 = `ST_NM`,geometry=`geometry`)

## Creattion of Population Data from Loaded Wikipedia page
population_table <- population_table[c(2:37),c(2,3)]

population_table |>
  select(State = `State or Union Territory`, Population = `Population[18][19]`) |>
  mutate(
    Population = as.numeric(gsub(",", "", Population)), 
    State = trimws(State),
    PopulationPer1Lakh = round(Population/100000,0),
    PopulationPer1Lakh = case_when(
      State == "Andhra Pradesh" ~ 496,
      TRUE ~ PopulationPer1Lakh
    )
  ) |>
  mutate(
    NAME_1 = case_when(
      State == "Andaman and Nicobar Islands" ~ "Andaman & Nicobar",
      State == "Dadra and Nagar Haveli and Daman and Diu" ~ "Dadra and Nagar Haveli and Daman and Diu",
      State == "Jammu and Kashmir" ~ "Jammu & Kashmir",
      State == "Manipur[d]" ~ "Manipur",
      TRUE ~ State
      )
  ) |> 
  select(NAME_1 = `NAME_1`, PopuplationPerLakh = `PopulationPer1Lakh`) -> Population

3. Plots

3.1. Does Crimes Against Women Start at Home ?

Plot1 Code
# Plot1 - Bar Chart
category_order <- c("Cyber Crime","Sexual Violence","POCSO","Harassment","Sexual Harassment","Domestic Violence")

Crime_Data_2018_21_Overall |>
  ggplot(aes(x = factor(Category_Update, levels = category_order), y = TotalCrimes,fill = as.character(year_yr))) +
  geom_bar(stat = "identity",position = position_dodge(width = 0.8)) +
  geom_text(aes(label = TotalCrimes), position = position_dodge(width = 0.8), hjust = -0.1, color ="black") + 
  coord_flip() +
  scale_fill_manual(values = c("#581845", "#11BBFF", "#FFC300","#C70039")) +
  theme_minimal() +
  labs(
    x = "Category of Violence",
    y = "#Crimes ",
    title = "YonY Crimes Against Women",
    subtitle = "Period: 2018 to 2021"
  ) +
  guides(
    fill = guide_legend(title = "Year:")
  ) +
  theme(
    legend.position = c(0.7,0.3),
    panel.grid.minor.y = element_blank(),
    panel.grid.minor.x = element_blank(),
    panel.grid.major.y = element_blank(),
    plot.title = element_text(hjust = 0.5, size = 18, face = "bold"),
    plot.subtitle = element_text(hjust = 0.5, size = 14) 
  ) 

3.2. Geographic Distribution of Crimes Across Years

3.2.1 Trend of Absolute Violences

Plot2A Code
# Plot 2A Data Preperation
Crime_Data_2018_21_State_Summary <-
  Crime_Data_2018_21_State |>
  group_by(year_yr,NAME_1) |>
  summarise(Total_Crimes = sum(TotalCrimes),.groups = "drop") |>
  pivot_wider(
    names_from = year_yr,
    values_from = Total_Crimes
    )

# Plot 2B Data Preperation
Crime_Data_2018_21_State_Summary2 <-
  Crime_Data_2018_21_State |>
  group_by(year_yr,NAME_1) |>
  summarise(Total_Crimes = sum(TotalCrimes),.groups = "drop")
Crime_Data_2018_21_State_Summary2 <- merge(Crime_Data_2018_21_State_Summary2,Population,by.x = "NAME_1",by.y = "NAME_1",all.x = TRUE)
Crime_Data_2018_21_State_Summary2 |>
  mutate(
    CrimesPer1LakhPop = round(Total_Crimes/PopuplationPerLakh,1)
  ) |>
  select(NAME_1 = `NAME_1`, year_yr = `year_yr`,CrimesPer1LakhPop=`CrimesPer1LakhPop`) |>
  pivot_wider(
    names_from = year_yr,
    values_from = CrimesPer1LakhPop
  ) -> Crime_Data_2018_21_State_Summary2

# Plot 2A Data Preperation
india_map_state <- merge(india_map,Crime_Data_2018_21_State_Summary,by.x = "NAME_1",by.y = "NAME_1",all.x = TRUE)
india_map_state = st_as_sf(india_map_state)

# Plot 2B Data Preperation
india_map_state_pc <- merge(india_map,Crime_Data_2018_21_State_Summary2,by.x = "NAME_1",by.y = "NAME_1",all.x = TRUE)
india_map_state_pc = st_as_sf(india_map_state_pc)

# Plot 2A 2018 Plot
india_map_state |>
  ggplot() + 
  geom_sf(aes(fill = `2018`), color = "black") +
  scale_fill_viridis_c(option = "plasma", na.value = "grey50") +  
  theme_minimal() +
  labs(
    title = "Heatmap of Total Crimes in 2018",
    subtitle = "State-wise distribution"
  ) +
  theme(
    legend.position = c(0.8,0.2),
    legend.key.size = unit(0.5, "cm"),  
    legend.text = element_text(size = 7) 
  ) -> plot_2018

# Plot 2A 2019 Plot
india_map_state |>
  ggplot() + 
  geom_sf(aes(fill = `2019`), color = "black") +
  scale_fill_viridis_c(option = "plasma", na.value = "grey50") +  
  theme_minimal() +
  labs(
    title = "Heatmap of Total Crimes in 2019",
    subtitle = "State-wise distribution"
  ) +
  theme(
    legend.position = c(0.8,0.2),
    legend.key.size = unit(0.5, "cm"),  
    legend.text = element_text(size = 7) 
  ) -> plot_2019

# Plot 2A 2020 Plot
india_map_state |>
  ggplot() + 
  geom_sf(aes(fill = `2020`), color = "black") +
  scale_fill_viridis_c(option = "plasma", na.value = "grey50") +  
  theme_minimal() +
  labs(
    title = "Heatmap of Total Crimes in 2020",
    subtitle = "State-wise distribution"
  ) +
  theme(
    legend.position = c(0.8,0.2),
    legend.key.size = unit(0.5, "cm"),  
    legend.text = element_text(size = 7) 
  ) -> plot_2020

# Plot 2A 2021 Plot
india_map_state |>
  ggplot() + 
  geom_sf(aes(fill = `2021`), color = "black") +
  scale_fill_viridis_c(option = "plasma", na.value = "grey50") +  
  theme_minimal() +
  labs(
    title = "Heatmap of Total Crimes in 2021",
    subtitle = "State-wise distribution"
  ) +
  theme(
    legend.position = c(0.8,0.2),
    legend.key.size = unit(0.5, "cm"),  
    legend.text = element_text(size = 7) 
  ) -> plot_2021


# Combine the plots into a 2x2 grid
combined_plot <- (plot_2018 | plot_2019) / (plot_2020 | plot_2021)

# Display the combined plot
combined_plot

3.2.2 Trend of Violences Per 1 Lakh of Population

Plot2B Code
# Plot 2B 2018 Plot
india_map_state_pc |>
  ggplot() + 
  geom_sf(aes(fill = `2018`), color = "black") +
  scale_fill_viridis_c(option = "plasma", na.value = "grey50") +  
  theme_minimal() +
  labs(
    title = "Heatmap of Total Crimes in 2018",
    subtitle = "Crimes Per 1 Lac of Population"
  ) +
  theme(
    legend.position = c(0.8,0.2),
    legend.key.size = unit(0.5, "cm"),  
    legend.text = element_text(size = 7) 
  ) -> plot_2018_pc

# Plot 2B 2019 Plot
india_map_state_pc |>
  ggplot() + 
  geom_sf(aes(fill = `2019`), color = "black") +
  scale_fill_viridis_c(option = "plasma", na.value = "grey50") +  
  theme_minimal() +
  labs(
    title = "Heatmap of Total Crimes in 2019",
    subtitle = "Crimes Per 1 Lac of Population"
  ) +
  theme(
    legend.position = c(0.8,0.2),
    legend.key.size = unit(0.5, "cm"),  
    legend.text = element_text(size = 7) 
  ) -> plot_2019_pc

# Plot 2B 2020 Plot
india_map_state_pc |>
  ggplot() + 
  geom_sf(aes(fill = `2020`), color = "black") +
  scale_fill_viridis_c(option = "plasma", na.value = "grey50") +  
  theme_minimal() +
  labs(
    title = "Heatmap of Total Crimes in 2020",
    subtitle = "Crimes Per 1 Lac of Population"
  ) +
  theme(
    legend.position = c(0.8,0.2),
    legend.key.size = unit(0.5, "cm"),  
    legend.text = element_text(size = 7) 
  ) -> plot_2020_pc

# Plot 2B 2021 Plot
india_map_state_pc |>
  ggplot() + 
  geom_sf(aes(fill = `2021`), color = "black") +
  scale_fill_viridis_c(option = "plasma", na.value = "grey50") +  
  theme_minimal() +
  labs(
    title = "Heatmap of Total Crimes in 2021",
    subtitle = "Crimes Per 1 Lac of Population"
  ) +
  theme(
    legend.position = c(0.8,0.2),
    legend.key.size = unit(0.5, "cm"),  
    legend.text = element_text(size = 7) 
  ) -> plot_2021_pc


# Combine the plots into a 2x2 grid
combined_plot_pc <- (plot_2018_pc | plot_2019_pc) / (plot_2020_pc | plot_2021_pc)

# Display the combined plot
combined_plot_pc

3.3. Geographic Distribution of Violences by Category

3.3.1 Trend of Absolute Violences

Plot3A Code
# Plot 3A Data Preperation
Crime_Data_21_State_Summary <- subset(Crime_Data_2018_21_State, Crime_Data_2018_21_State$year_yr == 2021)
Crime_Data_21_State_Summary <- Crime_Data_21_State_Summary[,c("NAME_1","TotalCrimes","Category_Update")]
Crime_Data_21_State_Summary <- Crime_Data_21_State_Summary |>
  pivot_wider(
    names_from = Category_Update,
    values_from = TotalCrimes
  )

# Plot 3B Data Preperation
Crime_Data_21_State_Summary_PC <- subset(Crime_Data_2018_21_State, Crime_Data_2018_21_State$year_yr == 2021)
Crime_Data_21_State_Summary_PC <- Crime_Data_21_State_Summary_PC[,c("NAME_1","TotalCrimes","Category_Update")]
Crime_Data_21_State_Summary_PC <- merge(Crime_Data_21_State_Summary_PC,Population,by.x = "NAME_1",by.y = "NAME_1",all.x = TRUE)
Crime_Data_21_State_Summary_PC |>
  mutate(
    CrimesPer1LakhPop = round(TotalCrimes/PopuplationPerLakh,1)
  ) |>
  select(NAME_1 = `NAME_1`, Category_Update = `Category_Update`,CrimesPer1LakhPop=`CrimesPer1LakhPop`) |>
  pivot_wider(
    names_from = Category_Update,
    values_from = CrimesPer1LakhPop
  ) -> Crime_Data_21_State_Summary_PC


# Plot 3A Data Preperation
india_map_state_new <- merge(india_map,Crime_Data_21_State_Summary,by.x = "NAME_1",by.y = "NAME_1",all.x = TRUE)
india_map_state_new = st_as_sf(india_map_state_new)

# Plot 3B Data Preperation
india_map_state_new_pc <- merge(india_map,Crime_Data_21_State_Summary_PC,by.x = "NAME_1",by.y = "NAME_1",all.x = TRUE)
india_map_state_new_pc = st_as_sf(india_map_state_new_pc)


# Plot 3A Plots by Category
india_map_state_new |>
  ggplot() + 
  geom_sf(aes(fill = `Domestic Violence`), color = "black") +
  scale_fill_viridis_c(option = "plasma", na.value = "grey50") +  
  theme_void() +
  labs(
    title = "Domestic Violence Crimes in 2021",
    subtitle = "State-wise distribution"
  ) +
  theme(
    legend.position = c(0.6,0.2),
    legend.key.size = unit(0.4, "cm"),  
    legend.text = element_text(size = 7) 
  ) -> plot_dv

india_map_state_new |>
  ggplot() + 
  geom_sf(aes(fill = `Harassment`), color = "black") +
  scale_fill_viridis_c(option = "plasma", na.value = "grey50") +  
  theme_void() +
  labs(
    title = "Harassment Crimes in 2021",
    subtitle = "State-wise distribution"
  ) +
  theme(
    legend.position = c(0.6,0.2),
    legend.key.size = unit(0.4, "cm"),  
    legend.text = element_text(size = 7) 
  ) -> plot_hrsmt


india_map_state_new |>
  ggplot() + 
  geom_sf(aes(fill = `Sexual Harassment`), color = "black") +
  scale_fill_viridis_c(option = "plasma", na.value = "grey50") +  
  theme_void() +
  labs(
    title = "Sexual Harassment Crimes in 2021",
    subtitle = "State-wise distribution"
  ) +
  theme(
    legend.position = c(0.6,0.2),
    legend.key.size = unit(0.4, "cm"),  
    legend.text = element_text(size = 7) 
  ) -> plot_sexhrsmt


india_map_state_new |>
  ggplot() + 
  geom_sf(aes(fill = `POCSO`), color = "black") +
  scale_fill_viridis_c(option = "plasma", na.value = "grey50") +  
  theme_void() +
  labs(
    title = "Crimes on Children in 2021",
    subtitle = "State-wise distribution"
  ) +
  theme(
    legend.position = c(0.6,0.2),
    legend.key.size = unit(0.4, "cm"),  
    legend.text = element_text(size = 7) 
  ) -> plot_chld


india_map_state_new |>
  ggplot() + 
  geom_sf(aes(fill = `Sexual Violence`), color = "black") +
  scale_fill_viridis_c(option = "plasma", na.value = "grey50") +  
  theme_void() +
  labs(
    title = "Rape Crimes in 2021",
    subtitle = "State-wise distribution"
  ) +
  theme(
    legend.position = c(0.6,0.2),
    legend.key.size = unit(0.4, "cm"),  
    legend.text = element_text(size = 7) 
  ) -> plot_rape

india_map_state_new |>
  ggplot() + 
  geom_sf(aes(fill = `Cyber Crime`), color = "black") +
  scale_fill_viridis_c(option = "plasma", na.value = "grey50") +  
  theme_void() +
  labs(
    title = "Cyber Crimes in 2021",
    subtitle = "State-wise distribution"
  ) +
  theme(
    legend.position = c(0.6,0.2),
    legend.key.size = unit(0.4, "cm"),  
    legend.text = element_text(size = 7) 
  ) -> plot_cc



combined_plot2 <- (plot_dv | plot_hrsmt | plot_sexhrsmt) / (plot_chld | plot_rape | plot_cc)

combined_plot2

3.3.2 Trend of Violences Per 1 Lakh of Population

Plot3B Code
# Plot 3B  Plots

india_map_state_new_pc |>
  ggplot() + 
  geom_sf(aes(fill = `Domestic Violence`), color = "black") +
  scale_fill_viridis_c(option = "plasma", na.value = "grey50") +  
  theme_void() +
  labs(
    title = "Domestic Violence Crimes in 2021",
    subtitle = "Crimes Per 1 Lac of Population"
  ) +
  theme(
    legend.position = c(0.6,0.2),
    legend.key.size = unit(0.4, "cm"),  
    legend.text = element_text(size = 7) 
  ) -> plot_dv_pc

india_map_state_new_pc |>
  ggplot() + 
  geom_sf(aes(fill = `Harassment`), color = "black") +
  scale_fill_viridis_c(option = "plasma", na.value = "grey50") +  
  theme_void() +
  labs(
    title = "Harassment Crimes in 2021",
    subtitle = "Crimes Per 1 Lac of Population"
  ) +
  theme(
    legend.position = c(0.6,0.2),
    legend.key.size = unit(0.4, "cm"),  
    legend.text = element_text(size = 7) 
  ) -> plot_hrsmt_pc


india_map_state_new_pc |>
  ggplot() + 
  geom_sf(aes(fill = `Sexual Harassment`), color = "black") +
  scale_fill_viridis_c(option = "plasma", na.value = "grey50") +  
  theme_void() +
  labs(
    title = "Sexual Harassment Crimes in 2021",
    subtitle = "Crimes Per 1 Lac of Population"
  ) +
  theme(
    legend.position = c(0.6,0.2),
    legend.key.size = unit(0.4, "cm"),  
    legend.text = element_text(size = 7) 
  ) -> plot_sexhrsmt_pc


india_map_state_new_pc |>
  ggplot() + 
  geom_sf(aes(fill = `POCSO`), color = "black") +
  scale_fill_viridis_c(option = "plasma", na.value = "grey50") +  
  theme_void() +
  labs(
    title = "Crimes on Children in 2021",
    subtitle = "Crimes Per 1 Lac of Population"
  ) +
  theme(
    legend.position = c(0.6,0.2),
    legend.key.size = unit(0.4, "cm"),  
    legend.text = element_text(size = 7) 
  ) -> plot_chld_pc


india_map_state_new_pc |>
  ggplot() + 
  geom_sf(aes(fill = `Sexual Violence`), color = "black") +
  scale_fill_viridis_c(option = "plasma", na.value = "grey50") +  
  theme_void() +
  labs(
    title = "Rape Crimes in 2021",
    subtitle = "Crimes Per 1 Lac of Population"
  ) +
  theme(
    legend.position = c(0.6,0.2),
    legend.key.size = unit(0.4, "cm"),  
    legend.text = element_text(size = 7) 
  ) -> plot_rape_pc

india_map_state_new_pc |>
  ggplot() + 
  geom_sf(aes(fill = `Cyber Crime`), color = "black") +
  scale_fill_viridis_c(option = "plasma", na.value = "grey50") +  
  theme_void() +
  labs(
    title = "Cyber Crimes in 2021",
    subtitle = "Crimes Per 1 Lac of Population"
  ) +
  theme(
    legend.position = c(0.6,0.2),
    legend.key.size = unit(0.4, "cm"),  
    legend.text = element_text(size = 7) 
  ) -> plot_cc_pc



combined_plot2_pc <- (plot_dv_pc | plot_hrsmt_pc | plot_sexhrsmt_pc) / (plot_chld_pc | plot_rape_pc | plot_cc_pc)

combined_plot2_pc

3.4. Any Correlation across Violence Categories ?

Plot4 Code
# Plot4 - Correlation Chart

cor_crime_data = as.data.frame(as.table(cor(Crime_Data_21_State_Summary[,c(2:7)])))

cor_crime_data |>
  ggplot(aes(x = Var1, y = Var2, fill = Freq)) +
  geom_tile() +
  scale_fill_viridis_c(option = "plasma", na.value = "grey50") + 
  theme_minimal() +
    labs(
    title = "Correlation Plot across Crimes categories",
    subtitle = "For Crimes Against Women for Year 2021",
    x = "",
    y = "",
    fill = ""
  ) +
  geom_text(aes(label=round(Freq,2)),color="white") +
  theme(
    legend.position = "bottom",
    plot.title = element_text(hjust = 0.5, size = 18, face = "bold"),
    plot.subtitle = element_text(hjust = 0.5, size = 14) 
  ) 

4. References

  • ChatGPT for shapemap errors