-
Notifications
You must be signed in to change notification settings - Fork 0
/
Graphs.R
194 lines (157 loc) · 8.83 KB
/
Graphs.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
# Install and load necessary packages
if (!require("rnaturalearth")) install.packages("rnaturalearth")
if (!require("rnaturalearthdata")) install.packages("rnaturalearthdata")
if (!require("sf")) install.packages("sf")
if (!require("ggplot2")) install.packages("ggplot2")
if (!require("dplyr")) install.packages("dplyr")
# Load necessary libraries
library(rnaturalearth)
library(rnaturalearthdata)
library(sf)
library(ggplot2)
library(dplyr)
# Set the directory where your datasets are located (this is mine)
setwd("C:/Users/sofia/Desktop/Mujeres")
# Load the datasets
femicide_rate <- read.csv("Tasa_bruta_homicidio.csv", encoding = "latin1")
income_gap_men_women <- read.csv("tdr05_brecha_hom_muj.csv", encoding = "latin1")
violence_aggressor <- read.csv("Violencia_cualquier_agresor_largo_de_su_vida.csv", encoding = "latin1")
women_in_executive_positions <- read.csv("MujeresEnPuestosDirectivos.csv", encoding = "latin1")
# Basic cleaning for each dataset
datasets <- list(perception_of_insecurity, femicide_rate, income_gap_men_women, violence_help,
violence_aggressor, physical_sexual_violence, lifetime_violence, public_sector_executives,
women_in_executive_positions, women_without_income)
# Perform basic cleaning (remove empty rows and columns)
datasets_cleaned <- lapply(datasets, function(df) {
df <- df[complete.cases(df), ] # Remove rows with NA values
df <- df[, colSums(is.na(df)) < nrow(df)] # Remove completely empty columns
return(df)
})
# Assign the cleaned datasets back to the original variables
perception_of_insecurity <- datasets_cleaned[[1]]
femicide_rate <- datasets_cleaned[[2]]
income_gap_men_women <- datasets_cleaned[[3]]
violence_help <- datasets_cleaned[[4]]
violence_aggressor <- datasets_cleaned[[5]]
physical_sexual_violence <- datasets_cleaned[[6]]
lifetime_violence <- datasets_cleaned[[7]]
public_sector_executives <- datasets_cleaned[[8]]
women_in_executive_positions <- datasets_cleaned[[9]]
women_without_income <- datasets_cleaned[[10]]
###########################################
# Check column names
names(income_gap_men_women)
# Ensure correct columns for men's and women's income
income_gap_men_women$Income_Women <- as.numeric(income_gap_men_women$Ingreso.promedio.por.hora.trabajada.de.la.población.ocupada.de.mujeres)
income_gap_men_women$Income_Men <- as.numeric(income_gap_men_women$Ingreso.promedio.por.hora.trabajada.de.la.población.ocupada.de.hombres)
# Define the regions and assign each state to a region
regions <- list(
North = c("Baja California", "Sonora", "Chihuahua", "Coahuila de Zaragoza", "Nuevo León", "Tamaulipas"),
Center = c("Aguascalientes", "San Luis Potosí", "Guanajuato", "Querétaro", "Hidalgo", "Estado de México", "Ciudad de México", "Morelos"),
South = c("Michoacán", "Guerrero", "Oaxaca", "Puebla", "Tlaxcala"),
Southeast = c("Veracruz", "Tabasco", "Campeche", "Yucatán", "Quintana Roo", "Chiapas")
)
# Create a new column in the dataset to assign each state to a region
income_gap_men_women$Region <- sapply(income_gap_men_women$Entidad.federativa, function(state) {
if (state %in% regions$North) return("North")
if (state %in% regions$Center) return("Center")
if (state %in% regions$South) return("South")
if (state %in% regions$Southeast) return("Southeast")
return(NA)
})
# Filter data to remove any state without a region assignment
income_gap_men_women <- income_gap_men_women[!is.na(income_gap_men_women$Region), ]
ggplot(income_gap_men_women, aes(x = Periodo)) +
geom_line(aes(y = Income_Women, color = Region, linetype = "Women"), linewidth = 1.2) +
geom_line(aes(y = Income_Men, color = Region, linetype = "Men"), linewidth = 1.2) +
labs(title = "Evolution of Average Income of Men and Women by Region",
x = "Year",
y = "Average Income",
color = "Region",
linetype = "Gender") +
theme_minimal() +
theme(legend.position = "bottom")
###########################################
names(women_in_executive_positions)
# Rename the column X._women for clarity
colnames(women_in_executive_positions)[colnames(women_in_executive_positions) == "X._mujeres"] <- "Leadership_Percentage"
# Convert the percentage of women in leadership positions column to numeric
women_in_executive_positions$Leadership_Percentage <- as.numeric(women_in_executive_positions$Leadership_Percentage)
# Assign each state to a region using the previously defined list of regions
women_in_executive_positions$Region <- sapply(women_in_executive_positions$Entidad.federativa, function(state) {
if (state %in% regions$North) return("North")
if (state %in% regions$Center) return("Center")
if (state %in% regions$South) return("South")
if (state %in% regions$Southeast) return("Southeast")
return(NA)
})
# Filter data to remove any state without a region assignment
women_in_executive_positions <- women_in_executive_positions[!is.na(women_in_executive_positions$Region), ]
# Calculate the average percentage of women in leadership by region and year
regional_women_in_executive_positions <- aggregate(Leadership_Percentage ~ Año + Region, data = women_in_executive_positions, FUN = mean)
ggplot(regional_women_in_executive_positions, aes(x = Año, y = Leadership_Percentage, color = Region)) +
geom_line(size = 1.2) +
labs(title = "Evolution of the Percentage of Women in Leadership Positions by Region",
x = "Year",
y = "Percentage of Women in Leadership (%)",
color = "Region") +
theme_minimal() +
theme(legend.position = "bottom")
###########################################
names(violence_aggressor)
# Obtain Mexico's map at the state level
mexico_map <- ne_states(country = "Mexico", returnclass = "sf")
# Convert state names to uppercase to ensure consistency
mexico_map$name <- toupper(mexico_map$name)
unique(mexico_map$name)
# Convert to uppercase in `violence_aggressor`
violence_aggressor$Entidad.federativa <- toupper(violence_aggressor$Entidad.federativa)
# Join the violence index with Mexico's map
map_data <- mexico_map %>%
left_join(violence_aggressor, by = c("name" = "Entidad.federativa"))
# Change specific names to match the map
violence_aggressor$Entidad.federativa <- gsub("MICHOACÁN DE OCAMPO", "MICHOACÁN", violence_aggressor$Entidad.federativa)
violence_aggressor$Entidad.federativa <- gsub("VERACRUZ DE IGNACIO DE LA LLAVE", "VERACRUZ", violence_aggressor$Entidad.federativa)
violence_aggressor$Entidad.federativa <- gsub("COAHUILA DE ZARAGOZA", "COAHUILA", violence_aggressor$Entidad.federativa)
violence_aggressor$Entidad.federativa <- gsub("CIUDAD DE MÉXICO", "DISTRITO FEDERAL", violence_aggressor$Entidad.federativa)
violence_aggressor$Entidad.federativa <- gsub("SAN LUIS POTOSÍ", "SAN LUIS POTOSÍ", violence_aggressor$Entidad.federativa)
# Create a heatmap using the `X2016` column
ggplot(data = map_data) +
geom_sf(aes(fill = X2016), color = "black", lwd = 0.2) +
labs(title = "Violence Heat Map by State in Mexico",
subtitle = "Average Violence Index by State in 2016",
fill = "Violence Index") +
scale_fill_gradient(low = "yellow", high = "red", na.value = "grey50") + # Optional: change na.value if you want a different color for NA values
theme_minimal() +
theme(panel.background = element_rect(fill = "white"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
###########################################
names(femicide_rate)
# Filter data for the most recent year available
femicide_rate <- femicide_rate %>%
filter(Año == max(Año))
# Calculate the national average homicide rate
national_average <- mean(femicide_rate$Tasa)
# Create a column to highlight states above the average
femicide_rate <- femicide_rate %>%
mutate(Above_Average = ifelse(Tasa > national_average, "Above Average", "Below Average"))
# Replace "Veracruz de Ignacio de la Llave" with "Veracruz" in the dataframe
femicide_rate <- femicide_rate %>%
mutate(Entidad = ifelse(Entidad == "Veracruz de Ignacio de la Llave", "Veracruz", Entidad))
# Bar chart
ggplot(femicide_rate, aes(x = reorder(Entidad, Tasa), y = Tasa, fill = Above_Average)) +
geom_bar(stat = "identity") +
geom_hline(yintercept = national_average, linetype = "dashed", color = "red") +
labs(
title = "Comparison of Femicide Rate by State Against National Average",
subtitle = "Highlighting states with femicide rates above the 2019 average",
x = "Entity",
y = "Homicide Rate",
fill = "Comparison"
) +
scale_fill_manual(values = c("Above Average" = "red", "Below Average" = "lightblue")) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
annotate("text", x = 1, y = national_average, label = paste("National Average:", round(national_average, 2)),
vjust = -1, color = "red", size = 3)