Recently Published

Confirmation of SHR 2024 National "Risk of Social Isolation" Rankings (table)
library(easypackages) libraries(c("readxl", "gtsummary", "ggmap", "ggiraph", "ggforce", "ggcorrplot", "ggthemes", "ggsignif", "ggsflabel", "ggrepel", "ggpubr", "ggsci", "glue", "gt", "janitor", "maptools", "mapview", "magrittr", "plyr", "prettyunits", "progress", "progressr", "psych", "rgeos", "rio", "rms", "Hmisc", "robustbase", "rspat", "s2", "sfheaders", "sfweight", "snakecase", "smoothr", "sp", "spatial", "spatialEco", "spatstat", "spatstat.linnet", "spatstat.model", "rpart", "spatstat.explore", "nlme", "spatstat.random", "spatstat.geom", "spatstat.data", "spdep", "sf", "spData", "abind", "summarytools", "terra", "tidycensus", "tidylog", "tidyselect", "lubridate", "forcats", "stringr", "dplyr", "purrr", "readr", "tidyr", "tibble", "ggplot2", "tidyverse", "tigris", "tmap", "vctrs", "viridis", "viridisLite", "vroom", "waldo", "wk", "stats", "graphics", "grDevices", "utils", "datasets", "methods", "base", "haven", "foreign", "survey", "srvyr", "sitrep", "questionr", "srvyr", "stringr", "readxl", "gtsummary")) acs_2022_vars <- load_variables(2022, "acs5", cache = TRUE) score_test <- read_excel("2024_socialisolation_vals_maybe_seniorhealthrankigs_mapranks.xlsx") shr_2024 <- read_excel("2024_socialisolation_vals_maybe_seniorhealthrankigs.xlsx") states_w_abbr <- read_excel("states_w_abbr.xlsx",col_names = FALSE) ##do USA variables first #############NOTE: this code is meant to confirm the findings of the 2024 Senior Report from the American Health Rankings score_test <- read_excel("2024_socialisolation_vals_maybe_seniorhealthrankigs_mapranks.xlsx") shr_2024 <- read_excel("2024_socialisolation_vals_maybe_seniorhealthrankigs.xlsx") states_w_abbr <- read_excel("states_w_abbr.xlsx",col_names = FALSE) get_acs("state", table = "B18101", year= 2022, survey = "acs5", geometry = TRUE) %>% left_join(., acs_2022_vars, by = c("variable"="name")) %>% .[,-8] %>% separate_wider_delim(cols="label", delim = "!!", names_sep = "", too_few = "align_start") -> disability_USAt #Poverty get_acs("state", table = "B17001", year= 2022, survey = "acs5", geometry = TRUE) %>% left_join(., acs_2022_vars, by = c("variable"="name")) %>% separate_wider_delim(cols="label", delim = "!!", names_sep = "", too_few = "align_start") -> poverty_USAt #Marital Status (i.e. Never Married, Separated, Widowed, Divorced) get_acs("state", table = "B12002", year= 2022, survey = "acs5", geometry = TRUE) %>% left_join(., acs_2022_vars, by = c("variable"="name")) %>% separate_wider_delim(cols="label", delim = "!!", names_sep = "", too_few = "align_start") -> maritalstatus_USAt #Living Alone 65+ (why they didn't check for multicollinearity between this and marital status or whatever I don't know but w/e) get_acs("state", table = "B11007", year= 2022, survey = "acs5", summary_var = "B11007_002", geometry = TRUE) %>% left_join(., acs_2022_vars, by = c("variable"="name")) %>% separate_wider_delim(cols="label", delim = "!!", names_sep = "", too_few = "align_start") -> living_alone_65_USAt #Disability get_acs("state", table = "B18101", year= 2022, survey = "acs5", geometry = TRUE) %>% left_join(., acs_2022_vars, by = c("variable"="name")) %>% .[,-8] %>% separate_wider_delim(cols="label", delim = "!!", names_sep = "", too_few = "align_start") -> disability_USAt #Independent Living get_acs("state", table = "B18107", year= 2022, survey = "acs5", geometry = TRUE) %>% left_join(., acs_2022_vars, by = c("variable"="name")) %>% separate_wider_delim(cols="label", delim = "!!", names_sep = "", too_few = "align_start") -> USA_independentlivingt #... #USA Disability scores (1) #confirmed #standard output disability_USAt %>% #single out only the elderly (65+) filter(label4 == "65 to 74 years:" | label4 == "75 years and over:") %>% #remove all the "grand total" rows filter(!is.na(label5)) %>% #calculate the "grand total" that we need to use by summing the disabled + not disabled across men and women transform(total_elders = ave(.$estimate, .$NAME, FUN=sum)) %>% mutate(age = "65+") %>% #calculate the percentage of those that are disable and sum them up, and then divide by total elders to get the total proportion of disabled filter(label5 == "With a disability") %>% mutate(prop = 100 * (estimate/total_elders)) %>% ##this feels REALLY stupid, but it's what they did, I swear to God transform(total_disabled = ave(.$prop, .$NAME, FUN=sum)) %>% #calculate the z_score for the total proportion of elders disabled by state as compared to national average mutate(z_score = (total_disabled - mean(total_disabled)/sd(total_disabled))) %>% dplyr::select(NAME, total_disabled, z_score, geometry) %>%left_join(., states_w_abbr %>% dplyr::rename(., state=1, abbr=2), by=c("NAME" = "state")) %>% unique(.) %>% left_join(., shr_2024 %>% filter(Measure == "Disabled"), by = c("abbr" = "State")) %>% filter(abbr != "DC" & abbr != "PR") %>% unique(.) %>% arrange(desc(-total_disabled)) %>% mutate(total_disabled = round(total_disabled, 1), rank = rank(total_disabled, ties.method = "min"), total_prop = total_disabled) %>% dplyr::select(NAME, abbr, Measure, total_prop, rank, z_score, Value, Rank, geometry) %>% st_as_sf(.) -> disability_USA_Zt #USA Poverty scores (2) #confirmed poverty_USAt %>% filter(label5 == "65 to 74 years" | label5 == "75 years and over") %>% transform(total_elders = ave(.$estimate, .$NAME, FUN=sum)) %>% mutate(age = "65+") %>% filter(grepl("below", label3)) %>% mutate(prop = 100 * (estimate/total_elders)) %>% transform(total_poverty = ave(.$prop, .$NAME, FUN=sum)) %>% mutate(z_score = (total_poverty - mean(total_poverty)/sd(total_poverty))) %>% left_join(., states_w_abbr %>% dplyr::rename(., state=1, abbr=2), by=c("NAME" = "state")) %>% unique(.) %>% left_join(., shr_2024 %>% filter(Measure == "Living in Poverty"), by = c("abbr" = "State")) %>% filter(abbr != "DC" & abbr != "PR") %>% arrange(desc(-total_poverty)) %>% mutate(total_poverty = round(total_poverty, 1), rank = rank(total_poverty, ties.method = "min"), total_prop = total_poverty) %>% dplyr::select(NAME, abbr, Measure, total_prop, rank, z_score, Value, Rank, geometry) %>% st_as_sf(.) ->poverty_USA_Zt #USA Living Alone scores (3) #confirmed!! living_alone_65_USAt %>% filter(variable == "B11007_003") %>% mutate(prop = 100 * (estimate/summary_est)) %>% mutate(age = "65+") %>% mutate(z_score = (prop - mean(prop))/sd(prop)) %>% left_join(., states_w_abbr %>% dplyr::rename(., state=1, abbr=2), by=c("NAME" = "state")) %>% unique(.) %>% arrange(desc(-prop)) %>% filter(abbr != "DC" & abbr != "PR") %>% mutate(total_prop = round(prop, 1), rank = rank(prop, ties.method = "min")) %>% filter(abbr != "DC" & abbr != "PR") %>% left_join(., shr_2024 %>% filter(Measure == "Living Alone"), by = c("abbr" = "State")) %>% dplyr::select(NAME, abbr, Measure, total_prop, rank, z_score, Value, Rank, geometry) %>% st_as_sf(.) -> living_alone_65_USA_Zt #USA Not Married scores (4) #confirmed! maritalstatus_USAt %>% filter(label5 == "65 to 74 years" | label5 == "75 to 84 years" | label5 == "85 years and over" | label6 == "65 to 74 years" | label6 == "75 to 84 years" | label6 == "85 years and over" | label7 == "65 to 74 years" | label7 == "75 to 84 years" | label7 == "85 years and over") %>% transform(total_elders = ave(.$estimate, .$NAME, FUN=sum)) %>% mutate(age = "65+") %>% filter(label4 == "Never married:") %>% transform(not_married_sum = ave(.$estimate, .$NAME, FUN=sum)) %>% mutate(prop = 100 * (not_married_sum/total_elders)) %>% mutate(z_score = (prop - mean(prop))/sd(prop)) %>% dplyr::select(NAME, not_married_sum, prop, z_score, geometry) %>% unique(.) %>% transform(total_prop = ave(.$prop, .$NAME, FUN=sum)) %>% left_join(., states_w_abbr %>% dplyr::rename(., state=1, abbr=2), by=c("NAME" = "state")) %>% unique(.) %>% filter(abbr != "DC" & abbr != "PR") %>% arrange(desc(-not_married_sum)) %>% mutate(total_prop = round(total_prop, 1), rank = rank(total_prop, ties.method = "min")) %>% left_join(., shr_2024 %>% filter(Measure == "Never Married"), by = c("abbr" = "State")) %>% dplyr::select(NAME, abbr, Measure, total_prop, rank, z_score, Value, Rank, geometry) %>% st_as_sf(.) -> USA_not_married_Zt #USA Widowed, Separated, or Divorced scores (5) #confirmed! ##minor note: this is technically an inaccuracy; it should be including "B12002_155" through "B12002_157" because "Other" for that category is functionally equivalent to "Separated" for our purposes -- their being apart owing to marital discord or not wasn't the point, 'not being together physically' was, but also who cares; source: https://www2.census.gov/programs-surveys/acs/tech_docs/subject_definitions/2023_ACSSubjectDefinitions.pdf maritalstatus_USAt %>% filter(label5 == "65 to 74 years" | label5 == "75 to 84 years" | label5 == "85 years and over" | label6 == "65 to 74 years" | label6 == "75 to 84 years" | label6 == "85 years and over" | label7 == "65 to 74 years" | label7 == "75 to 84 years" | label7 == "85 years and over") %>% transform(total_elders = ave(.$estimate, .$NAME, FUN=sum)) %>% mutate(age = "65+") %>% filter(label4 == "Widowed:" | label4 == "Divorced:" | label6 == "Separated:") %>% transform(widow_separate_divorced_sum = ave(.$estimate, .$NAME, FUN=sum)) %>% transform(div_sep_wid_sum = ave(.$estimate, .$NAME, FUN=sum)) %>% mutate(prop = 100 * (div_sep_wid_sum/total_elders)) %>% mutate(z_score = (prop - mean(prop))/sd(prop)) %>% dplyr::select(NAME, div_sep_wid_sum, prop, z_score, geometry) %>% unique(.) %>% transform(total_prop = ave(.$prop, .$NAME, FUN=sum)) %>% left_join(., states_w_abbr %>% dplyr::rename(., state=1, abbr=2), by=c("NAME" = "state")) %>% unique(.) %>% filter(abbr != "DC" & abbr != "PR") %>% arrange(desc(-div_sep_wid_sum)) %>% mutate(total_prop = round(total_prop, 1), rank = rank(total_prop, ties.method = "min")) %>% left_join(., shr_2024 %>% filter(Measure == "Divorced, Separated or Widowed"), by = c("abbr" = "State")) %>% dplyr::select(NAME, abbr, Measure, total_prop, rank, z_score, Value, Rank, geometry) %>% st_as_sf(.) -> USA_widow_separate_divorced_Zt #USA Independent Living scores (6) #confirmed! USA_independentlivingt %>% filter(label4 == "65 to 74 years:" | label4 == "75 years and over:") %>% filter(!is.na(label5)) %>% transform(total_elders = ave(.$estimate, .$NAME, FUN=sum)) %>% mutate(age = "65+") %>% filter(label5 == "With an independent living difficulty") %>% transform(diff_indep_sum = ave(.$estimate, .$NAME, FUN=sum)) %>% transform(diff_indep_sum = ave(.$estimate, .$NAME, FUN=sum)) %>% mutate(prop = 100 * (diff_indep_sum/total_elders)) %>% mutate(z_score = (prop - mean(prop))/sd(prop)) %>% dplyr::select(NAME, diff_indep_sum, prop, z_score, geometry) %>% unique(.) %>% transform(total_prop = ave(.$prop, .$NAME, FUN=sum)) %>% left_join(., states_w_abbr %>% dplyr::rename(., state=1, abbr=2), by=c("NAME" = "state")) %>% unique(.) %>% filter(abbr != "DC" & abbr != "PR") %>% arrange(desc(-diff_indep_sum)) %>% mutate(total_prop = round(total_prop, 1), rank = rank(total_prop, ties.method = "min")) %>% left_join(., shr_2024 %>% filter(Measure == "Independent Living Difficulty"), by = c("abbr" = "State")) %>% dplyr::select(NAME, abbr, Measure, total_prop, rank, z_score, Value, Rank, geometry) %>% st_as_sf(.) -> USA_independentliving_Zt #rbind(poverty_USA_Z, living_alone_65_USA_Z, USA_widow_separate_divorced_Z, USA_not_married_Z, USA_widow_separate_divorced_Z, disability_USA_Z, USA_independentliving_Z) %>% mutate(z_score = replace(z_score, z_score >2, 2)) %>% transform(composite_score = ave(.$z_score, .$NAME, FUN=sum)) %>% unique(.) %>% mutate(z_score_percentile = percent_rank(z_score) * 100) %>% mutate(FLAG=all.equal(as.numeric(total_prop), as.numeric(Value))) -> equality_check ##old one, don't use rbind(poverty_USA_Zt, living_alone_65_USA_Zt, USA_widow_separate_divorced_Zt, USA_not_married_Zt, USA_widow_separate_divorced_Zt, disability_USA_Zt, USA_independentliving_Zt) %>% unique(.) %>% mutate(FLAG=all.equal(as.numeric(total_prop), as.numeric(Value))) %>% group_by(Measure) %>% mutate(my_z_score = (total_prop - mean(total_prop))/sd(total_prop), their_z_score = (Value - mean(Value))/sd(Value), z_FLAG=all.equal(as.numeric(my_z_score), as.numeric(their_z_score))) %>% ungroup(.) %>% transform(my_state_mean_z = ave(.$my_z_score, .$abbr, FUN=mean), their_state_mean_Z = ave(.$their_z_score, .$abbr, FUN=mean)) %>% mutate(my_possible_composite = my_state_mean_z %>% scales::rescale(., to=c(1,100)) %>% round(.), my_possible_rank = dense_rank(my_possible_composite)) %>% unique(.) %>% dplyr::select(abbr, my_possible_composite, my_possible_rank, my_state_mean_z) %>% unique(.) %>% left_join(., score_test, by=c("abbr"="State")) %>% arrange(., -desc(my_possible_composite)) %>% mutate(my_possible_rank_two = 1:50) -> composite_score_equality_check composite_score_equality_check %>% filter(abbr != "HI" & abbr != "AK") %>% st_bbox(.) ->mainland_bbox tmap_mode("view") composite_score_equality_check %>% tm_shape(., bbox = mainland_bbox) + tm_polygons(col = "my_possible_composite", palette = "blues", breaks = c(0,37, 48, 57, 70, 100), popup.vars = c("abbr", "my_possible_composite", "Value", "Rank", "Measure")) -> test_map_w_bounds #compare with https://www.americashealthrankings.org/explore/measures/isolationrisk_sr_b
Ingénierie des données
Dans ce notebook, je partage quelques travaux personnels (ou en groupes) liés à l'Ingénierie des données. *
R Lab 2
Validity and Reliability
Analisis Multivariat Assigment 1
Anggota : 1. Raden Roro Azzahra Tzitziliani Foulin (23031554003) 2. Fardaniyah Hazhiratul Dzauq (23031554045) 3. Novanna Zahrah Zahrani (23031554141)
Risk of Social Isolation by County (Texas) (5-Year Composite Score) (2023-2019)
############ This map is a "modern" iteration of the Senior Health Report from "America's Health Rankings" by the United Health Foundation using 2023-2018 U.S Census data to build its values. As of 02/17/2025, the Senior Health Report using this data has not been released. Some methodology data can be found here: https://www.americashealthrankings.org/about/methodology/rankings The maps themselves can be found here: https://www.americashealthrankings.org/explore/measures/isolationrisk_sr_b/CA#measure-trend-summary #################################### original code below: library(easypackages) libraries(c("readxl", "gtsummary", "ggmap", "ggiraph", "ggforce", "ggcorrplot", "ggthemes", "ggsignif", "ggsflabel", "ggrepel", "ggpubr", "ggsci", "glue", "gt", "janitor", "maptools", "mapview", "magrittr", "plyr", "prettyunits", "progress", "progressr", "psych", "rgeos", "rio", "rms", "Hmisc", "robustbase", "rspat", "s2", "sfheaders", "sfweight", "snakecase", "smoothr", "sp", "spatial", "spatialEco", "spatstat", "spatstat.linnet", "spatstat.model", "rpart", "spatstat.explore", "nlme", "spatstat.random", "spatstat.geom", "spatstat.data", "spdep", "sf", "spData", "abind", "summarytools", "terra", "tidycensus", "tidylog", "tidyselect", "lubridate", "forcats", "stringr", "dplyr", "purrr", "readr", "tidyr", "tibble", "ggplot2", "tidyverse", "tigris", "tmap", "vctrs", "viridis", "viridisLite", "vroom", "waldo", "wk", "stats", "graphics", "grDevices", "utils", "datasets", "methods", "base", "haven", "foreign", "survey", "srvyr", "sitrep", "questionr", "srvyr", "stringr", "readxl", "gtsummary")) ##note: switch to 2022 if you want to replicate the SHR map here: https://assets.americashealthrankings.org/app/uploads/rosi2024_all.pdf # acs_2022_vars <- load_variables(2022, "acs5", cache = TRUE) # ##Poverty # get_acs("county", table = "B17001", year= 2022, state="TX", survey = "acs5", geometry = TRUE) %>% left_join(., acs_2022_vars, by = c("variable"="name")) %>% separate_wider_delim(cols="label", delim = "!!", names_sep = "", too_few = "align_start") -> poverty_USA # ##Marital Status (i.e. Never Married, Separated, Widowed, Divorced) # get_acs("county", table = "B12002", state = "TX", year= 2022, survey = "acs5", geometry = TRUE) %>% left_join(., acs_2022_vars, by = c("variable"="name")) %>% separate_wider_delim(cols="label", delim = "!!", names_sep = "", too_few = "align_start") -> maritalstatus_USA # ##Living Alone 65+ (why they didn't check for multicollinearity between this and marital status or whatever I don't know but w/e) # get_acs("county", table = "B11007", state="TX", year= 2022, survey = "acs5", summary_var = "B11007_002", geometry = TRUE) %>% left_join(., acs_2022_vars, by = c("variable"="name")) %>% separate_wider_delim(cols="label", delim = "!!", names_sep = "", too_few = "align_start") -> living_alone_65_USA # ##Disability # get_acs("county", table = "B18101", state="TX", year= 2022, survey = "acs5", geometry = TRUE) %>% left_join(., acs_2022_vars, by = c("variable"="name")) %>% .[,-8] %>% separate_wider_delim(cols="label", delim = "!!", names_sep = "", too_few = "align_start") -> disability_USA # ##Independent Living # get_acs("county", table = "B18107", state="TX", year= 2022, survey = "acs5", geometry = TRUE) %>% left_join(., acs_2022_vars, by = c("variable"="name")) %>% separate_wider_delim(cols="label", delim = "!!", names_sep = "", too_few = "align_start") -> USA_independentliving #########uncomment the singles above to replicate acs_2023_vars <- load_variables(2023, "acs5", cache = TRUE) #Poverty get_acs("county", table = "B17001", year= 2023, state="TX", survey = "acs5", geometry = TRUE) %>% left_join(., acs_2023_vars, by = c("variable"="name")) %>% separate_wider_delim(cols="label", delim = "!!", names_sep = "", too_few = "align_start") -> poverty_USA #Marital Status (i.e. Never Married, Separated, Widowed, Divorced) get_acs("county", table = "B12002", state = "TX", year= 2023, survey = "acs5", geometry = TRUE) %>% left_join(., acs_2023_vars, by = c("variable"="name")) %>% separate_wider_delim(cols="label", delim = "!!", names_sep = "", too_few = "align_start") -> maritalstatus_USA #Living Alone 65+ (why they didn't check for multicollinearity between this and marital status or whatever I don't know but w/e) get_acs("county", table = "B11007", state="TX", year= 2023, survey = "acs5", summary_var = "B11007_002", geometry = TRUE) %>% left_join(., acs_2023_vars, by = c("variable"="name")) %>% separate_wider_delim(cols="label", delim = "!!", names_sep = "", too_few = "align_start") -> living_alone_65_USA #Disability get_acs("county", table = "B18101", state="TX", year= 2023, survey = "acs5", geometry = TRUE) %>% left_join(., acs_2023_vars, by = c("variable"="name")) %>% .[,-8] %>% separate_wider_delim(cols="label", delim = "!!", names_sep = "", too_few = "align_start") -> disability_USA #Independent Living get_acs("county", table = "B18107", state="TX", year= 2023, survey = "acs5", geometry = TRUE) %>% left_join(., acs_2023_vars, by = c("variable"="name")) %>% separate_wider_delim(cols="label", delim = "!!", names_sep = "", too_few = "align_start") -> USA_independentliving ## current state z-composite draft (2/15/24, 1:22pm) #USA Disability scores (1) disability_USA %>% #single out only the elderly (65+) filter(label4 == "65 to 74 years:" | label4 == "75 years and over:") %>% #remove all the "grand total" rows filter(!is.na(label5)) %>% #calculate the "grand total" that we need to use by summing the disabled + not disabled across men and women transform(total_elders = ave(.$estimate, .$NAME, FUN=sum)) %>% mutate(age = "65+") %>% #calculate the percentage of those that are disable and sum them up, and then divide by total elders to get the total proportion of disabled filter(label5 == "With a disability") %>% mutate(prop = 100 * (estimate/total_elders)) %>% ##this feels REALLY [adjective], but it's what they did, I swear to God transform(total_prop = ave(.$prop, .$NAME, FUN=sum)) %>% mutate(rank = rank(total_prop, ties.method = "min"), measure = "Disabled") %>% dplyr::select(NAME, measure, total_prop, rank, geometry) %>% unique(.) %>% arrange(desc(-total_prop)) %>% st_as_sf(.) ->disability_USA_Z #USA Poverty scores (2) poverty_USA %>% filter(label5 == "65 to 74 years" | label5 == "75 years and over") %>% transform(total_elders = ave(.$estimate, .$NAME, FUN=sum)) %>% mutate(age = "65+") %>% filter(grepl("below", label3)) %>% mutate(prop = 100 * (estimate/total_elders)) %>% transform(total_prop = ave(.$prop, .$NAME, FUN=sum)) %>% arrange(desc(-total_prop)) %>% mutate(rank = rank(total_prop, ties.method = "min"), measure = "Poverty") %>% dplyr::select(NAME, measure, total_prop, rank, geometry) %>% st_as_sf(.) ->poverty_USA_Z #USA Living Alone scores (3) living_alone_65_USA %>% filter(variable == "B11007_003") %>% mutate(prop = 100 * (estimate/summary_est)) %>% mutate(age = "65+") %>% unique(.) %>% arrange(desc(-prop)) %>% mutate(total_prop = prop, rank = rank(prop, ties.method = "min"), measure = "Living Alone") %>% dplyr::select(NAME, measure, total_prop, rank, geometry) %>% st_as_sf(.) -> living_alone_65_USA_Z #USA Not Married scores (4) maritalstatus_USA %>% filter(label5 == "65 to 74 years" | label5 == "75 to 84 years" | label5 == "85 years and over" | label6 == "65 to 74 years" | label6 == "75 to 84 years" | label6 == "85 years and over" | label7 == "65 to 74 years" | label7 == "75 to 84 years" | label7 == "85 years and over") %>% transform(total_elders = ave(.$estimate, .$NAME, FUN=sum)) %>% mutate(age = "65+") %>% filter(label4 == "Never married:") %>% transform(not_married_sum = ave(.$estimate, .$NAME, FUN=sum)) %>% mutate(prop = 100 * (not_married_sum/total_elders)) %>% dplyr::select(NAME, not_married_sum, prop, geometry) %>% unique(.) %>% transform(total_prop = ave(.$prop, .$NAME, FUN=sum)) %>% unique(.) %>% mutate(rank = rank(total_prop, ties.method = "min"), measure = "Not Married") %>% arrange(desc(-total_prop)) %>% dplyr::select(NAME, measure, total_prop, rank, geometry) %>% st_as_sf(.) -> USA_not_married_Z #USA Widowed, Separated, or Divorced scores (5) maritalstatus_USA %>% filter(label5 == "65 to 74 years" | label5 == "75 to 84 years" | label5 == "85 years and over" | label6 == "65 to 74 years" | label6 == "75 to 84 years" | label6 == "85 years and over" | label7 == "65 to 74 years" | label7 == "75 to 84 years" | label7 == "85 years and over") %>% transform(total_elders = ave(.$estimate, .$NAME, FUN=sum)) %>% mutate(age = "65+") %>% filter(label4 == "Widowed:" | label4 == "Divorced:" | label6 == "Separated:") %>% transform(widow_separate_divorced_sum = ave(.$estimate, .$NAME, FUN=sum)) %>% transform(div_sep_wid_sum = ave(.$estimate, .$NAME, FUN=sum)) %>% mutate(prop = 100 * (div_sep_wid_sum/total_elders)) %>% dplyr::select(NAME, div_sep_wid_sum, prop, geometry) %>% unique(.) %>% transform(total_prop = ave(.$prop, .$NAME, FUN=sum)) %>% unique(.) %>% arrange(desc(-div_sep_wid_sum)) %>% mutate(rank = rank(total_prop, ties.method = "min"), measure = "Widowed, Divorced, or Separated") %>% dplyr::select(NAME, measure, total_prop, rank, geometry) %>% st_as_sf(.) -> USA_widow_separate_divorced_Z #USA Independent Living scores (6) USA_independentliving %>% filter(label4 == "65 to 74 years:" | label4 == "75 years and over:") %>% filter(!is.na(label5)) %>% transform(total_elders = ave(.$estimate, .$NAME, FUN=sum)) %>% mutate(age = "65+") %>% filter(label5 == "With an independent living difficulty") %>% transform(diff_indep_sum = ave(.$estimate, .$NAME, FUN=sum)) %>% transform(diff_indep_sum = ave(.$estimate, .$NAME, FUN=sum)) %>% mutate(prop = 100 * (diff_indep_sum/total_elders)) %>% dplyr::select(NAME, diff_indep_sum, prop, geometry) %>% unique(.) %>% transform(total_prop = ave(.$prop, .$NAME, FUN=sum)) %>% unique(.) %>% arrange(desc(-diff_indep_sum)) %>% mutate(rank = rank(total_prop, ties.method = "min"), measure = "Difficulty Living Independently") %>% dplyr::select(NAME, measure, total_prop, rank, geometry) %>% st_as_sf(.) -> USA_independentliving_Z rbind(poverty_USA_Z, living_alone_65_USA_Z, USA_widow_separate_divorced_Z, USA_not_married_Z, USA_widow_separate_divorced_Z, disability_USA_Z, USA_independentliving_Z) %>% unique(.) %>% group_by(measure) %>% mutate(my_z_score = (total_prop - mean(total_prop))/sd(total_prop)) %>% ungroup(.) %>% transform(my_state_mean_z = ave(.$my_z_score, .$NAME, FUN=mean)) %>% mutate(my_possible_composite = my_state_mean_z %>% scales::rescale(., to=c(1,100)) %>% round(.), my_possible_rank = dense_rank(my_possible_composite)) %>% unique(.) %>% dplyr::select(NAME, my_possible_composite, my_possible_rank, my_state_mean_z) %>% unique(.) -> z_composites_county z_composites_county %>% dplyr::rename(., County = 1, `Composite Score` = 2, `County Rank` = 3 ) %>% tm_shape(.) + tm_polygons(col = "Composite Score", palette = blues9, breaks = c(1,34, 39, 45, 51, 100), popup.vars = c("County", "Composite Score", "County Rank"), as.count=TRUE) + tm_layout(title = "Risk of Social Isolation by County (5-Year Composite Score) (2023-2019)") + tm_credits("Aggregate Index using U.S. Census American Community Survey Values. Thanks to United Heath Foundation for original concept.")
Risk of Social Isolation by County (Texas) (5-Year Composite Score) (2022-2018)
library(easypackages) libraries(c("readxl", "gtsummary", "ggmap", "ggiraph", "ggforce", "ggcorrplot", "ggthemes", "ggsignif", "ggsflabel", "ggrepel", "ggpubr", "ggsci", "glue", "gt", "janitor", "maptools", "mapview", "magrittr", "plyr", "prettyunits", "progress", "progressr", "psych", "rgeos", "rio", "rms", "Hmisc", "robustbase", "rspat", "s2", "sfheaders", "sfweight", "snakecase", "smoothr", "sp", "spatial", "spatialEco", "spatstat", "spatstat.linnet", "spatstat.model", "rpart", "spatstat.explore", "nlme", "spatstat.random", "spatstat.geom", "spatstat.data", "spdep", "sf", "spData", "abind", "summarytools", "terra", "tidycensus", "tidylog", "tidyselect", "lubridate", "forcats", "stringr", "dplyr", "purrr", "readr", "tidyr", "tibble", "ggplot2", "tidyverse", "tigris", "tmap", "vctrs", "viridis", "viridisLite", "vroom", "waldo", "wk", "stats", "graphics", "grDevices", "utils", "datasets", "methods", "base", "haven", "foreign", "survey", "srvyr", "sitrep", "questionr", "srvyr", "stringr", "readxl", "gtsummary")) ##note: switch to 2022 if you want to replicate the SHR map here: https://assets.americashealthrankings.org/app/uploads/rosi2024_all.pdf acs_2022_vars <- load_variables(2022, "acs5", cache = TRUE) ##Poverty get_acs("county", table = "B17001", year= 2022, state="TX", survey = "acs5", geometry = TRUE) %>% left_join(., acs_2022_vars, by = c("variable"="name")) %>% separate_wider_delim(cols="label", delim = "!!", names_sep = "", too_few = "align_start") -> poverty_USA ##Marital Status (i.e. Never Married, Separated, Widowed, Divorced) get_acs("county", table = "B12002", state = "TX", year= 2022, survey = "acs5", geometry = TRUE) %>% left_join(., acs_2022_vars, by = c("variable"="name")) %>% separate_wider_delim(cols="label", delim = "!!", names_sep = "", too_few = "align_start") -> maritalstatus_USA ##Living Alone 65+ (why they didn't check for multicollinearity between this and marital status or whatever I don't know but w/e) get_acs("county", table = "B11007", state="TX", year= 2022, survey = "acs5", summary_var = "B11007_002", geometry = TRUE) %>% left_join(., acs_2022_vars, by = c("variable"="name")) %>% separate_wider_delim(cols="label", delim = "!!", names_sep = "", too_few = "align_start") -> living_alone_65_USA ##Disability get_acs("county", table = "B18101", state="TX", year= 2022, survey = "acs5", geometry = TRUE) %>% left_join(., acs_2022_vars, by = c("variable"="name")) %>% .[,-8] %>% separate_wider_delim(cols="label", delim = "!!", names_sep = "", too_few = "align_start") -> disability_USA ##Independent Living get_acs("county", table = "B18107", state="TX", year= 2022, survey = "acs5", geometry = TRUE) %>% left_join(., acs_2022_vars, by = c("variable"="name")) %>% separate_wider_delim(cols="label", delim = "!!", names_sep = "", too_few = "align_start") -> USA_independentliving ##########uncomment the singles above to replicate # # acs_2023_vars <- load_variables(2023, "acs5", cache = TRUE) # #Poverty # get_acs("county", table = "B17001", year= 2023, state="TX", survey = "acs5", geometry = TRUE) %>% left_join(., acs_2023_vars, by = c("variable"="name")) %>% separate_wider_delim(cols="label", delim = "!!", names_sep = "", too_few = "align_start") -> poverty_USA # #Marital Status (i.e. Never Married, Separated, Widowed, Divorced) # get_acs("county", table = "B12002", state = "TX", year= 2023, survey = "acs5", geometry = TRUE) %>% left_join(., acs_2023_vars, by = c("variable"="name")) %>% separate_wider_delim(cols="label", delim = "!!", names_sep = "", too_few = "align_start") -> maritalstatus_USA # #Living Alone 65+ (why they didn't check for multicollinearity between this and marital status or whatever I don't know but w/e) # get_acs("county", table = "B11007", state="TX", year= 2023, survey = "acs5", summary_var = "B11007_002", geometry = TRUE) %>% left_join(., acs_2023_vars, by = c("variable"="name")) %>% separate_wider_delim(cols="label", delim = "!!", names_sep = "", too_few = "align_start") -> living_alone_65_USA # #Disability # get_acs("county", table = "B18101", state="TX", year= 2023, survey = "acs5", geometry = TRUE) %>% left_join(., acs_2023_vars, by = c("variable"="name")) %>% .[,-8] %>% separate_wider_delim(cols="label", delim = "!!", names_sep = "", too_few = "align_start") -> disability_USA # #Independent Living # get_acs("county", table = "B18107", state="TX", year= 2023, survey = "acs5", geometry = TRUE) %>% left_join(., acs_2023_vars, by = c("variable"="name")) %>% separate_wider_delim(cols="label", delim = "!!", names_sep = "", too_few = "align_start") -> USA_independentliving ## current state z-composite draft (2/15/24, 1:22pm) #USA Disability scores (1) disability_USA %>% #single out only the elderly (65+) filter(label4 == "65 to 74 years:" | label4 == "75 years and over:") %>% #remove all the "grand total" rows filter(!is.na(label5)) %>% #calculate the "grand total" that we need to use by summing the disabled + not disabled across men and women transform(total_elders = ave(.$estimate, .$NAME, FUN=sum)) %>% mutate(age = "65+") %>% #calculate the percentage of those that are disable and sum them up, and then divide by total elders to get the total proportion of disabled filter(label5 == "With a disability") %>% mutate(prop = 100 * (estimate/total_elders)) %>% ##this feels REALLY [adjective], but it's what they did, I swear to God transform(total_prop = ave(.$prop, .$NAME, FUN=sum)) %>% mutate(rank = rank(total_prop, ties.method = "min"), measure = "Disabled") %>% dplyr::select(NAME, measure, total_prop, rank, geometry) %>% unique(.) %>% arrange(desc(-total_prop)) %>% st_as_sf(.) ->disability_USA_Z #USA Poverty scores (2) poverty_USA %>% filter(label5 == "65 to 74 years" | label5 == "75 years and over") %>% transform(total_elders = ave(.$estimate, .$NAME, FUN=sum)) %>% mutate(age = "65+") %>% filter(grepl("below", label3)) %>% mutate(prop = 100 * (estimate/total_elders)) %>% transform(total_prop = ave(.$prop, .$NAME, FUN=sum)) %>% arrange(desc(-total_prop)) %>% mutate(rank = rank(total_prop, ties.method = "min"), measure = "Poverty") %>% dplyr::select(NAME, measure, total_prop, rank, geometry) %>% st_as_sf(.) ->poverty_USA_Z #USA Living Alone scores (3) living_alone_65_USA %>% filter(variable == "B11007_003") %>% mutate(prop = 100 * (estimate/summary_est)) %>% mutate(age = "65+") %>% unique(.) %>% arrange(desc(-prop)) %>% mutate(total_prop = prop, rank = rank(prop, ties.method = "min"), measure = "Living Alone") %>% dplyr::select(NAME, measure, total_prop, rank, geometry) %>% st_as_sf(.) -> living_alone_65_USA_Z #USA Not Married scores (4) maritalstatus_USA %>% filter(label5 == "65 to 74 years" | label5 == "75 to 84 years" | label5 == "85 years and over" | label6 == "65 to 74 years" | label6 == "75 to 84 years" | label6 == "85 years and over" | label7 == "65 to 74 years" | label7 == "75 to 84 years" | label7 == "85 years and over") %>% transform(total_elders = ave(.$estimate, .$NAME, FUN=sum)) %>% mutate(age = "65+") %>% filter(label4 == "Never married:") %>% transform(not_married_sum = ave(.$estimate, .$NAME, FUN=sum)) %>% mutate(prop = 100 * (not_married_sum/total_elders)) %>% dplyr::select(NAME, not_married_sum, prop, geometry) %>% unique(.) %>% transform(total_prop = ave(.$prop, .$NAME, FUN=sum)) %>% unique(.) %>% mutate(rank = rank(total_prop, ties.method = "min"), measure = "Not Married") %>% arrange(desc(-total_prop)) %>% dplyr::select(NAME, measure, total_prop, rank, geometry) %>% st_as_sf(.) -> USA_not_married_Z #USA Widowed, Separated, or Divorced scores (5) maritalstatus_USA %>% filter(label5 == "65 to 74 years" | label5 == "75 to 84 years" | label5 == "85 years and over" | label6 == "65 to 74 years" | label6 == "75 to 84 years" | label6 == "85 years and over" | label7 == "65 to 74 years" | label7 == "75 to 84 years" | label7 == "85 years and over") %>% transform(total_elders = ave(.$estimate, .$NAME, FUN=sum)) %>% mutate(age = "65+") %>% filter(label4 == "Widowed:" | label4 == "Divorced:" | label6 == "Separated:") %>% transform(widow_separate_divorced_sum = ave(.$estimate, .$NAME, FUN=sum)) %>% transform(div_sep_wid_sum = ave(.$estimate, .$NAME, FUN=sum)) %>% mutate(prop = 100 * (div_sep_wid_sum/total_elders)) %>% dplyr::select(NAME, div_sep_wid_sum, prop, geometry) %>% unique(.) %>% transform(total_prop = ave(.$prop, .$NAME, FUN=sum)) %>% unique(.) %>% arrange(desc(-div_sep_wid_sum)) %>% mutate(rank = rank(total_prop, ties.method = "min"), measure = "Widowed, Divorced, or Separated") %>% dplyr::select(NAME, measure, total_prop, rank, geometry) %>% st_as_sf(.) -> USA_widow_separate_divorced_Z #USA Independent Living scores (6) USA_independentliving %>% filter(label4 == "65 to 74 years:" | label4 == "75 years and over:") %>% filter(!is.na(label5)) %>% transform(total_elders = ave(.$estimate, .$NAME, FUN=sum)) %>% mutate(age = "65+") %>% filter(label5 == "With an independent living difficulty") %>% transform(diff_indep_sum = ave(.$estimate, .$NAME, FUN=sum)) %>% transform(diff_indep_sum = ave(.$estimate, .$NAME, FUN=sum)) %>% mutate(prop = 100 * (diff_indep_sum/total_elders)) %>% dplyr::select(NAME, diff_indep_sum, prop, geometry) %>% unique(.) %>% transform(total_prop = ave(.$prop, .$NAME, FUN=sum)) %>% unique(.) %>% arrange(desc(-diff_indep_sum)) %>% mutate(rank = rank(total_prop, ties.method = "min"), measure = "Difficulty Living Independently") %>% dplyr::select(NAME, measure, total_prop, rank, geometry) %>% st_as_sf(.) -> USA_independentliving_Z rbind(poverty_USA_Z, living_alone_65_USA_Z, USA_widow_separate_divorced_Z, USA_not_married_Z, USA_widow_separate_divorced_Z, disability_USA_Z, USA_independentliving_Z) %>% unique(.) %>% group_by(measure) %>% mutate(my_z_score = (total_prop - mean(total_prop))/sd(total_prop)) %>% ungroup(.) %>% transform(my_state_mean_z = ave(.$my_z_score, .$NAME, FUN=mean)) %>% mutate(my_possible_composite = my_state_mean_z %>% scales::rescale(., to=c(1,100)) %>% round(.), my_possible_rank = dense_rank(my_possible_composite)) %>% unique(.) %>% dplyr::select(NAME, my_possible_composite, my_possible_rank, my_state_mean_z) %>% unique(.) -> z_composites_county ########## note: use this code to build the map for States instead of counties ## ########## when computing for counties, remove the filter for Puerto Rico and DC # rbind(poverty_USA_Z, living_alone_65_USA_Z, USA_widow_separate_divorced_Z, USA_not_married_Z, USA_widow_separate_divorced_Z, disability_USA_Z, USA_independentliving_Z) %>% filter(NAME != "Puerto Rico" & NAME != "District of Columbia") %>% unique(.) %>% group_by(measure) %>% mutate(my_z_score = (total_prop - mean(total_prop))/sd(total_prop)) %>% ungroup(.) %>% dplyr::select(NAME, my_z_score, geometry) %>% unique(.) %>% transform(my_county_mean_z = ave(.$my_z_score, .$NAME, FUN=mean)) %>% dplyr::select(NAME, my_county_mean_z, geometry) %>% unique(.) %>% mutate(my_possible_composite = my_county_mean_z %>% scales::rescale(., to=c(1,100)) %>% round(.), my_possible_rank = dense_rank(my_possible_composite)) %>% unique(.) -> z_composites_state ## z_composites_state %>% dplyr::rename(., State = 1, `Composite Score` = 4, `State Rank` = 5) %>% tm_shape(.) + tm_polygons(col = "Composite Score", palette = blues9, breaks = c(1, 37, 48, 57, 70, 100), popup.vars = c("State", "Composite Score", "State Rank")) + tm_layout(title = "Risk of Social Isolation by State (5-Year Composite Score) (2023-2019)") ################ #z_composites_county %>% dplyr::rename(., County = 1, `Composite Score` = 2, `County Rank` = 3 ) %>% tm_shape(.) + tm_polygons(col = "Composite Score", palette = blues9, breaks = c(1,35, 40, 46, 52, 100), popup.vars = c("County", "Composite Score", "County Rank")) + tm_layout(title = "Risk of Social Isolation by County (5-Year Composite Score) (2023-2019)") + tm_credits("Aggregate Index using U.S. Census American Community Survey Values. Thanks to United Heath Foundation for original concept.") z_composites_county %>% dplyr::rename(., County = 1, `Composite Score` = 2, `County Rank` = 3 ) %>% tm_shape(.) + tm_polygons(col = "Composite Score", palette = blues9, breaks = c(1,34, 39, 45, 51, 100), popup.vars = c("County", "Composite Score", "County Rank"), as.count=TRUE) + tm_layout(title = "Risk of Social Isolation by County (5-Year Composite Score) (2022-2018)") + tm_credits("Aggregate Index using U.S. Census American Community Survey Values. Thanks to United Heath Foundation for original concept.")
Assignment 4