Recently Published
Weekly Lab Homework Assignment: Measurement Errors in Psychological Research
Week 2: Homework, Intro To Stats
Plot
plot 2
Confirmation of SHR 2024 National "Risk of Social Isolation" Rankings (table)
library(easypackages)
libraries(c("readxl", "gtsummary", "ggmap", "ggiraph", "ggforce", "ggcorrplot", "ggthemes", "ggsignif", "ggsflabel", "ggrepel", "ggpubr", "ggsci", "glue", "gt", "janitor", "maptools", "mapview", "magrittr", "plyr", "prettyunits", "progress", "progressr", "psych", "rgeos", "rio", "rms", "Hmisc", "robustbase", "rspat", "s2", "sfheaders", "sfweight", "snakecase", "smoothr", "sp", "spatial", "spatialEco", "spatstat", "spatstat.linnet", "spatstat.model", "rpart", "spatstat.explore", "nlme", "spatstat.random", "spatstat.geom", "spatstat.data", "spdep", "sf", "spData", "abind", "summarytools", "terra", "tidycensus", "tidylog", "tidyselect", "lubridate", "forcats", "stringr", "dplyr", "purrr", "readr", "tidyr", "tibble", "ggplot2", "tidyverse", "tigris", "tmap", "vctrs", "viridis", "viridisLite", "vroom", "waldo", "wk", "stats", "graphics", "grDevices", "utils", "datasets", "methods", "base", "haven", "foreign", "survey", "srvyr", "sitrep", "questionr", "srvyr", "stringr", "readxl", "gtsummary"))
acs_2022_vars <- load_variables(2022, "acs5", cache = TRUE)
score_test <- read_excel("2024_socialisolation_vals_maybe_seniorhealthrankigs_mapranks.xlsx")
shr_2024 <- read_excel("2024_socialisolation_vals_maybe_seniorhealthrankigs.xlsx")
states_w_abbr <- read_excel("states_w_abbr.xlsx",col_names = FALSE)
##do USA variables first
#############NOTE: this code is meant to confirm the findings of the 2024 Senior Report from the American Health Rankings
score_test <- read_excel("2024_socialisolation_vals_maybe_seniorhealthrankigs_mapranks.xlsx")
shr_2024 <- read_excel("2024_socialisolation_vals_maybe_seniorhealthrankigs.xlsx")
states_w_abbr <- read_excel("states_w_abbr.xlsx",col_names = FALSE)
get_acs("state", table = "B18101", year= 2022, survey = "acs5", geometry = TRUE) %>% left_join(., acs_2022_vars, by = c("variable"="name")) %>% .[,-8] %>% separate_wider_delim(cols="label", delim = "!!", names_sep = "", too_few = "align_start") -> disability_USAt
#Poverty
get_acs("state", table = "B17001", year= 2022, survey = "acs5", geometry = TRUE) %>% left_join(., acs_2022_vars, by = c("variable"="name")) %>% separate_wider_delim(cols="label", delim = "!!", names_sep = "", too_few = "align_start") -> poverty_USAt
#Marital Status (i.e. Never Married, Separated, Widowed, Divorced)
get_acs("state", table = "B12002", year= 2022, survey = "acs5", geometry = TRUE) %>% left_join(., acs_2022_vars, by = c("variable"="name")) %>% separate_wider_delim(cols="label", delim = "!!", names_sep = "", too_few = "align_start") -> maritalstatus_USAt
#Living Alone 65+ (why they didn't check for multicollinearity between this and marital status or whatever I don't know but w/e)
get_acs("state", table = "B11007", year= 2022, survey = "acs5", summary_var = "B11007_002", geometry = TRUE) %>% left_join(., acs_2022_vars, by = c("variable"="name")) %>% separate_wider_delim(cols="label", delim = "!!", names_sep = "", too_few = "align_start") -> living_alone_65_USAt
#Disability
get_acs("state", table = "B18101", year= 2022, survey = "acs5", geometry = TRUE) %>% left_join(., acs_2022_vars, by = c("variable"="name")) %>% .[,-8] %>% separate_wider_delim(cols="label", delim = "!!", names_sep = "", too_few = "align_start") -> disability_USAt
#Independent Living
get_acs("state", table = "B18107", year= 2022, survey = "acs5", geometry = TRUE) %>% left_join(., acs_2022_vars, by = c("variable"="name")) %>% separate_wider_delim(cols="label", delim = "!!", names_sep = "", too_few = "align_start") -> USA_independentlivingt
#...
#USA Disability scores (1)
#confirmed
#standard output
disability_USAt %>%
#single out only the elderly (65+)
filter(label4 == "65 to 74 years:" | label4 == "75 years and over:") %>%
#remove all the "grand total" rows
filter(!is.na(label5)) %>%
#calculate the "grand total" that we need to use by summing the disabled + not disabled across men and women
transform(total_elders = ave(.$estimate, .$NAME, FUN=sum)) %>% mutate(age = "65+") %>%
#calculate the percentage of those that are disable and sum them up, and then divide by total elders to get the total proportion of disabled
filter(label5 == "With a disability") %>% mutate(prop = 100 * (estimate/total_elders)) %>%
##this feels REALLY stupid, but it's what they did, I swear to God
transform(total_disabled = ave(.$prop, .$NAME, FUN=sum)) %>%
#calculate the z_score for the total proportion of elders disabled by state as compared to national average
mutate(z_score = (total_disabled - mean(total_disabled)/sd(total_disabled))) %>% dplyr::select(NAME, total_disabled, z_score, geometry) %>%left_join(., states_w_abbr %>% dplyr::rename(., state=1, abbr=2), by=c("NAME" = "state")) %>% unique(.) %>% left_join(., shr_2024 %>% filter(Measure == "Disabled"), by = c("abbr" = "State")) %>% filter(abbr != "DC" & abbr != "PR") %>% unique(.) %>% arrange(desc(-total_disabled)) %>% mutate(total_disabled = round(total_disabled, 1), rank = rank(total_disabled, ties.method = "min"), total_prop = total_disabled) %>% dplyr::select(NAME, abbr, Measure, total_prop, rank, z_score, Value, Rank, geometry) %>% st_as_sf(.) -> disability_USA_Zt
#USA Poverty scores (2)
#confirmed
poverty_USAt %>% filter(label5 == "65 to 74 years" | label5 == "75 years and over") %>% transform(total_elders = ave(.$estimate, .$NAME, FUN=sum)) %>% mutate(age = "65+") %>% filter(grepl("below", label3)) %>% mutate(prop = 100 * (estimate/total_elders)) %>% transform(total_poverty = ave(.$prop, .$NAME, FUN=sum)) %>% mutate(z_score = (total_poverty - mean(total_poverty)/sd(total_poverty))) %>% left_join(., states_w_abbr %>% dplyr::rename(., state=1, abbr=2), by=c("NAME" = "state")) %>% unique(.) %>% left_join(., shr_2024 %>% filter(Measure == "Living in Poverty"), by = c("abbr" = "State")) %>% filter(abbr != "DC" & abbr != "PR") %>% arrange(desc(-total_poverty)) %>% mutate(total_poverty = round(total_poverty, 1), rank = rank(total_poverty, ties.method = "min"), total_prop = total_poverty) %>% dplyr::select(NAME, abbr, Measure, total_prop, rank, z_score, Value, Rank, geometry) %>% st_as_sf(.) ->poverty_USA_Zt
#USA Living Alone scores (3)
#confirmed!!
living_alone_65_USAt %>% filter(variable == "B11007_003") %>% mutate(prop = 100 * (estimate/summary_est)) %>% mutate(age = "65+") %>% mutate(z_score = (prop - mean(prop))/sd(prop)) %>% left_join(., states_w_abbr %>% dplyr::rename(., state=1, abbr=2), by=c("NAME" = "state")) %>% unique(.) %>% arrange(desc(-prop)) %>% filter(abbr != "DC" & abbr != "PR") %>% mutate(total_prop = round(prop, 1), rank = rank(prop, ties.method = "min")) %>% filter(abbr != "DC" & abbr != "PR") %>% left_join(., shr_2024 %>% filter(Measure == "Living Alone"), by = c("abbr" = "State")) %>% dplyr::select(NAME, abbr, Measure, total_prop, rank, z_score, Value, Rank, geometry) %>% st_as_sf(.) -> living_alone_65_USA_Zt
#USA Not Married scores (4)
#confirmed!
maritalstatus_USAt %>% filter(label5 == "65 to 74 years" | label5 == "75 to 84 years" | label5 == "85 years and over" | label6 == "65 to 74 years" | label6 == "75 to 84 years" | label6 == "85 years and over" | label7 == "65 to 74 years" | label7 == "75 to 84 years" | label7 == "85 years and over") %>% transform(total_elders = ave(.$estimate, .$NAME, FUN=sum)) %>% mutate(age = "65+") %>% filter(label4 == "Never married:") %>% transform(not_married_sum = ave(.$estimate, .$NAME, FUN=sum)) %>% mutate(prop = 100 * (not_married_sum/total_elders)) %>% mutate(z_score = (prop - mean(prop))/sd(prop)) %>% dplyr::select(NAME, not_married_sum, prop, z_score, geometry) %>% unique(.) %>% transform(total_prop = ave(.$prop, .$NAME, FUN=sum)) %>% left_join(., states_w_abbr %>% dplyr::rename(., state=1, abbr=2), by=c("NAME" = "state")) %>% unique(.) %>% filter(abbr != "DC" & abbr != "PR") %>% arrange(desc(-not_married_sum)) %>% mutate(total_prop = round(total_prop, 1), rank = rank(total_prop, ties.method = "min")) %>% left_join(., shr_2024 %>% filter(Measure == "Never Married"), by = c("abbr" = "State")) %>% dplyr::select(NAME, abbr, Measure, total_prop, rank, z_score, Value, Rank, geometry) %>% st_as_sf(.) -> USA_not_married_Zt
#USA Widowed, Separated, or Divorced scores (5)
#confirmed!
##minor note: this is technically an inaccuracy; it should be including "B12002_155" through "B12002_157" because "Other" for that category is functionally equivalent to "Separated" for our purposes -- their being apart owing to marital discord or not wasn't the point, 'not being together physically' was, but also who cares; source: https://www2.census.gov/programs-surveys/acs/tech_docs/subject_definitions/2023_ACSSubjectDefinitions.pdf
maritalstatus_USAt %>% filter(label5 == "65 to 74 years" | label5 == "75 to 84 years" | label5 == "85 years and over" | label6 == "65 to 74 years" | label6 == "75 to 84 years" | label6 == "85 years and over" | label7 == "65 to 74 years" | label7 == "75 to 84 years" | label7 == "85 years and over") %>% transform(total_elders = ave(.$estimate, .$NAME, FUN=sum)) %>% mutate(age = "65+") %>% filter(label4 == "Widowed:" | label4 == "Divorced:" | label6 == "Separated:") %>% transform(widow_separate_divorced_sum = ave(.$estimate, .$NAME, FUN=sum)) %>% transform(div_sep_wid_sum = ave(.$estimate, .$NAME, FUN=sum)) %>% mutate(prop = 100 * (div_sep_wid_sum/total_elders)) %>% mutate(z_score = (prop - mean(prop))/sd(prop)) %>% dplyr::select(NAME, div_sep_wid_sum, prop, z_score, geometry) %>% unique(.) %>% transform(total_prop = ave(.$prop, .$NAME, FUN=sum)) %>% left_join(., states_w_abbr %>% dplyr::rename(., state=1, abbr=2), by=c("NAME" = "state")) %>% unique(.) %>% filter(abbr != "DC" & abbr != "PR") %>% arrange(desc(-div_sep_wid_sum)) %>% mutate(total_prop = round(total_prop, 1), rank = rank(total_prop, ties.method = "min")) %>% left_join(., shr_2024 %>% filter(Measure == "Divorced, Separated or Widowed"), by = c("abbr" = "State")) %>% dplyr::select(NAME, abbr, Measure, total_prop, rank, z_score, Value, Rank, geometry) %>% st_as_sf(.) -> USA_widow_separate_divorced_Zt
#USA Independent Living scores (6)
#confirmed!
USA_independentlivingt %>% filter(label4 == "65 to 74 years:" | label4 == "75 years and over:") %>% filter(!is.na(label5)) %>% transform(total_elders = ave(.$estimate, .$NAME, FUN=sum)) %>% mutate(age = "65+") %>% filter(label5 == "With an independent living difficulty") %>% transform(diff_indep_sum = ave(.$estimate, .$NAME, FUN=sum)) %>% transform(diff_indep_sum = ave(.$estimate, .$NAME, FUN=sum)) %>% mutate(prop = 100 * (diff_indep_sum/total_elders)) %>% mutate(z_score = (prop - mean(prop))/sd(prop)) %>% dplyr::select(NAME, diff_indep_sum, prop, z_score, geometry) %>% unique(.) %>% transform(total_prop = ave(.$prop, .$NAME, FUN=sum)) %>% left_join(., states_w_abbr %>% dplyr::rename(., state=1, abbr=2), by=c("NAME" = "state")) %>% unique(.) %>% filter(abbr != "DC" & abbr != "PR") %>% arrange(desc(-diff_indep_sum)) %>% mutate(total_prop = round(total_prop, 1), rank = rank(total_prop, ties.method = "min")) %>% left_join(., shr_2024 %>% filter(Measure == "Independent Living Difficulty"), by = c("abbr" = "State")) %>% dplyr::select(NAME, abbr, Measure, total_prop, rank, z_score, Value, Rank, geometry) %>% st_as_sf(.) -> USA_independentliving_Zt
#rbind(poverty_USA_Z, living_alone_65_USA_Z, USA_widow_separate_divorced_Z, USA_not_married_Z, USA_widow_separate_divorced_Z, disability_USA_Z, USA_independentliving_Z) %>% mutate(z_score = replace(z_score, z_score >2, 2)) %>% transform(composite_score = ave(.$z_score, .$NAME, FUN=sum)) %>% unique(.) %>% mutate(z_score_percentile = percent_rank(z_score) * 100) %>% mutate(FLAG=all.equal(as.numeric(total_prop), as.numeric(Value))) -> equality_check
##old one, don't use
rbind(poverty_USA_Zt, living_alone_65_USA_Zt, USA_widow_separate_divorced_Zt, USA_not_married_Zt, USA_widow_separate_divorced_Zt, disability_USA_Zt, USA_independentliving_Zt) %>% unique(.) %>% mutate(FLAG=all.equal(as.numeric(total_prop), as.numeric(Value))) %>% group_by(Measure) %>% mutate(my_z_score = (total_prop - mean(total_prop))/sd(total_prop), their_z_score = (Value - mean(Value))/sd(Value), z_FLAG=all.equal(as.numeric(my_z_score), as.numeric(their_z_score))) %>% ungroup(.) %>% transform(my_state_mean_z = ave(.$my_z_score, .$abbr, FUN=mean), their_state_mean_Z = ave(.$their_z_score, .$abbr, FUN=mean)) %>% mutate(my_possible_composite = my_state_mean_z %>% scales::rescale(., to=c(1,100)) %>% round(.), my_possible_rank = dense_rank(my_possible_composite)) %>% unique(.) %>% dplyr::select(abbr, my_possible_composite, my_possible_rank, my_state_mean_z) %>% unique(.) %>% left_join(., score_test, by=c("abbr"="State")) %>% arrange(., -desc(my_possible_composite)) %>% mutate(my_possible_rank_two = 1:50) -> composite_score_equality_check
composite_score_equality_check %>% filter(abbr != "HI" & abbr != "AK") %>% st_bbox(.) ->mainland_bbox
tmap_mode("view")
composite_score_equality_check %>% tm_shape(., bbox = mainland_bbox) + tm_polygons(col = "my_possible_composite", palette = "blues", breaks = c(0,37, 48, 57, 70, 100), popup.vars = c("abbr", "my_possible_composite", "Value", "Rank", "Measure")) -> test_map_w_bounds
#compare with https://www.americashealthrankings.org/explore/measures/isolationrisk_sr_b
Ingénierie des données
Dans ce notebook, je partage quelques travaux personnels (ou en groupes) liés à l'Ingénierie des données.
*
R Lab 2
Validity and Reliability
Analisis Multivariat Assigment 1
Anggota :
1. Raden Roro Azzahra Tzitziliani Foulin (23031554003)
2. Fardaniyah Hazhiratul Dzauq (23031554045)
3. Novanna Zahrah Zahrani (23031554141)