Recently Published

SPSS - Types of Measurement
Review Matrix Algebra dan Tutorial Mengunggah ke RPubs melalui RStudio
Tugas Analisis Multivariat S1 Sains Data 2023 Nama Anggota : 1. Destia Rika (23031554052) 2. Nadira Zahra Ramadhani (23031554099) 3. Tarisa Dwita Abadi (23031554134)
Coba
percobaan
week3_hw
Plot
plot 2
Document
Confirmation of SHR 2024 National "Risk of Social Isolation" Rankings (table)
library(easypackages) libraries(c("readxl", "gtsummary", "ggmap", "ggiraph", "ggforce", "ggcorrplot", "ggthemes", "ggsignif", "ggsflabel", "ggrepel", "ggpubr", "ggsci", "glue", "gt", "janitor", "maptools", "mapview", "magrittr", "plyr", "prettyunits", "progress", "progressr", "psych", "rgeos", "rio", "rms", "Hmisc", "robustbase", "rspat", "s2", "sfheaders", "sfweight", "snakecase", "smoothr", "sp", "spatial", "spatialEco", "spatstat", "spatstat.linnet", "spatstat.model", "rpart", "spatstat.explore", "nlme", "spatstat.random", "spatstat.geom", "spatstat.data", "spdep", "sf", "spData", "abind", "summarytools", "terra", "tidycensus", "tidylog", "tidyselect", "lubridate", "forcats", "stringr", "dplyr", "purrr", "readr", "tidyr", "tibble", "ggplot2", "tidyverse", "tigris", "tmap", "vctrs", "viridis", "viridisLite", "vroom", "waldo", "wk", "stats", "graphics", "grDevices", "utils", "datasets", "methods", "base", "haven", "foreign", "survey", "srvyr", "sitrep", "questionr", "srvyr", "stringr", "readxl", "gtsummary")) acs_2022_vars <- load_variables(2022, "acs5", cache = TRUE) score_test <- read_excel("2024_socialisolation_vals_maybe_seniorhealthrankigs_mapranks.xlsx") shr_2024 <- read_excel("2024_socialisolation_vals_maybe_seniorhealthrankigs.xlsx") states_w_abbr <- read_excel("states_w_abbr.xlsx",col_names = FALSE) ##do USA variables first #############NOTE: this code is meant to confirm the findings of the 2024 Senior Report from the American Health Rankings score_test <- read_excel("2024_socialisolation_vals_maybe_seniorhealthrankigs_mapranks.xlsx") shr_2024 <- read_excel("2024_socialisolation_vals_maybe_seniorhealthrankigs.xlsx") states_w_abbr <- read_excel("states_w_abbr.xlsx",col_names = FALSE) get_acs("state", table = "B18101", year= 2022, survey = "acs5", geometry = TRUE) %>% left_join(., acs_2022_vars, by = c("variable"="name")) %>% .[,-8] %>% separate_wider_delim(cols="label", delim = "!!", names_sep = "", too_few = "align_start") -> disability_USAt #Poverty get_acs("state", table = "B17001", year= 2022, survey = "acs5", geometry = TRUE) %>% left_join(., acs_2022_vars, by = c("variable"="name")) %>% separate_wider_delim(cols="label", delim = "!!", names_sep = "", too_few = "align_start") -> poverty_USAt #Marital Status (i.e. Never Married, Separated, Widowed, Divorced) get_acs("state", table = "B12002", year= 2022, survey = "acs5", geometry = TRUE) %>% left_join(., acs_2022_vars, by = c("variable"="name")) %>% separate_wider_delim(cols="label", delim = "!!", names_sep = "", too_few = "align_start") -> maritalstatus_USAt #Living Alone 65+ (why they didn't check for multicollinearity between this and marital status or whatever I don't know but w/e) get_acs("state", table = "B11007", year= 2022, survey = "acs5", summary_var = "B11007_002", geometry = TRUE) %>% left_join(., acs_2022_vars, by = c("variable"="name")) %>% separate_wider_delim(cols="label", delim = "!!", names_sep = "", too_few = "align_start") -> living_alone_65_USAt #Disability get_acs("state", table = "B18101", year= 2022, survey = "acs5", geometry = TRUE) %>% left_join(., acs_2022_vars, by = c("variable"="name")) %>% .[,-8] %>% separate_wider_delim(cols="label", delim = "!!", names_sep = "", too_few = "align_start") -> disability_USAt #Independent Living get_acs("state", table = "B18107", year= 2022, survey = "acs5", geometry = TRUE) %>% left_join(., acs_2022_vars, by = c("variable"="name")) %>% separate_wider_delim(cols="label", delim = "!!", names_sep = "", too_few = "align_start") -> USA_independentlivingt #... #USA Disability scores (1) #confirmed #standard output disability_USAt %>% #single out only the elderly (65+) filter(label4 == "65 to 74 years:" | label4 == "75 years and over:") %>% #remove all the "grand total" rows filter(!is.na(label5)) %>% #calculate the "grand total" that we need to use by summing the disabled + not disabled across men and women transform(total_elders = ave(.$estimate, .$NAME, FUN=sum)) %>% mutate(age = "65+") %>% #calculate the percentage of those that are disable and sum them up, and then divide by total elders to get the total proportion of disabled filter(label5 == "With a disability") %>% mutate(prop = 100 * (estimate/total_elders)) %>% ##this feels REALLY stupid, but it's what they did, I swear to God transform(total_disabled = ave(.$prop, .$NAME, FUN=sum)) %>% #calculate the z_score for the total proportion of elders disabled by state as compared to national average mutate(z_score = (total_disabled - mean(total_disabled)/sd(total_disabled))) %>% dplyr::select(NAME, total_disabled, z_score, geometry) %>%left_join(., states_w_abbr %>% dplyr::rename(., state=1, abbr=2), by=c("NAME" = "state")) %>% unique(.) %>% left_join(., shr_2024 %>% filter(Measure == "Disabled"), by = c("abbr" = "State")) %>% filter(abbr != "DC" & abbr != "PR") %>% unique(.) %>% arrange(desc(-total_disabled)) %>% mutate(total_disabled = round(total_disabled, 1), rank = rank(total_disabled, ties.method = "min"), total_prop = total_disabled) %>% dplyr::select(NAME, abbr, Measure, total_prop, rank, z_score, Value, Rank, geometry) %>% st_as_sf(.) -> disability_USA_Zt #USA Poverty scores (2) #confirmed poverty_USAt %>% filter(label5 == "65 to 74 years" | label5 == "75 years and over") %>% transform(total_elders = ave(.$estimate, .$NAME, FUN=sum)) %>% mutate(age = "65+") %>% filter(grepl("below", label3)) %>% mutate(prop = 100 * (estimate/total_elders)) %>% transform(total_poverty = ave(.$prop, .$NAME, FUN=sum)) %>% mutate(z_score = (total_poverty - mean(total_poverty)/sd(total_poverty))) %>% left_join(., states_w_abbr %>% dplyr::rename(., state=1, abbr=2), by=c("NAME" = "state")) %>% unique(.) %>% left_join(., shr_2024 %>% filter(Measure == "Living in Poverty"), by = c("abbr" = "State")) %>% filter(abbr != "DC" & abbr != "PR") %>% arrange(desc(-total_poverty)) %>% mutate(total_poverty = round(total_poverty, 1), rank = rank(total_poverty, ties.method = "min"), total_prop = total_poverty) %>% dplyr::select(NAME, abbr, Measure, total_prop, rank, z_score, Value, Rank, geometry) %>% st_as_sf(.) ->poverty_USA_Zt #USA Living Alone scores (3) #confirmed!! living_alone_65_USAt %>% filter(variable == "B11007_003") %>% mutate(prop = 100 * (estimate/summary_est)) %>% mutate(age = "65+") %>% mutate(z_score = (prop - mean(prop))/sd(prop)) %>% left_join(., states_w_abbr %>% dplyr::rename(., state=1, abbr=2), by=c("NAME" = "state")) %>% unique(.) %>% arrange(desc(-prop)) %>% filter(abbr != "DC" & abbr != "PR") %>% mutate(total_prop = round(prop, 1), rank = rank(prop, ties.method = "min")) %>% filter(abbr != "DC" & abbr != "PR") %>% left_join(., shr_2024 %>% filter(Measure == "Living Alone"), by = c("abbr" = "State")) %>% dplyr::select(NAME, abbr, Measure, total_prop, rank, z_score, Value, Rank, geometry) %>% st_as_sf(.) -> living_alone_65_USA_Zt #USA Not Married scores (4) #confirmed! maritalstatus_USAt %>% filter(label5 == "65 to 74 years" | label5 == "75 to 84 years" | label5 == "85 years and over" | label6 == "65 to 74 years" | label6 == "75 to 84 years" | label6 == "85 years and over" | label7 == "65 to 74 years" | label7 == "75 to 84 years" | label7 == "85 years and over") %>% transform(total_elders = ave(.$estimate, .$NAME, FUN=sum)) %>% mutate(age = "65+") %>% filter(label4 == "Never married:") %>% transform(not_married_sum = ave(.$estimate, .$NAME, FUN=sum)) %>% mutate(prop = 100 * (not_married_sum/total_elders)) %>% mutate(z_score = (prop - mean(prop))/sd(prop)) %>% dplyr::select(NAME, not_married_sum, prop, z_score, geometry) %>% unique(.) %>% transform(total_prop = ave(.$prop, .$NAME, FUN=sum)) %>% left_join(., states_w_abbr %>% dplyr::rename(., state=1, abbr=2), by=c("NAME" = "state")) %>% unique(.) %>% filter(abbr != "DC" & abbr != "PR") %>% arrange(desc(-not_married_sum)) %>% mutate(total_prop = round(total_prop, 1), rank = rank(total_prop, ties.method = "min")) %>% left_join(., shr_2024 %>% filter(Measure == "Never Married"), by = c("abbr" = "State")) %>% dplyr::select(NAME, abbr, Measure, total_prop, rank, z_score, Value, Rank, geometry) %>% st_as_sf(.) -> USA_not_married_Zt #USA Widowed, Separated, or Divorced scores (5) #confirmed! ##minor note: this is technically an inaccuracy; it should be including "B12002_155" through "B12002_157" because "Other" for that category is functionally equivalent to "Separated" for our purposes -- their being apart owing to marital discord or not wasn't the point, 'not being together physically' was, but also who cares; source: https://www2.census.gov/programs-surveys/acs/tech_docs/subject_definitions/2023_ACSSubjectDefinitions.pdf maritalstatus_USAt %>% filter(label5 == "65 to 74 years" | label5 == "75 to 84 years" | label5 == "85 years and over" | label6 == "65 to 74 years" | label6 == "75 to 84 years" | label6 == "85 years and over" | label7 == "65 to 74 years" | label7 == "75 to 84 years" | label7 == "85 years and over") %>% transform(total_elders = ave(.$estimate, .$NAME, FUN=sum)) %>% mutate(age = "65+") %>% filter(label4 == "Widowed:" | label4 == "Divorced:" | label6 == "Separated:") %>% transform(widow_separate_divorced_sum = ave(.$estimate, .$NAME, FUN=sum)) %>% transform(div_sep_wid_sum = ave(.$estimate, .$NAME, FUN=sum)) %>% mutate(prop = 100 * (div_sep_wid_sum/total_elders)) %>% mutate(z_score = (prop - mean(prop))/sd(prop)) %>% dplyr::select(NAME, div_sep_wid_sum, prop, z_score, geometry) %>% unique(.) %>% transform(total_prop = ave(.$prop, .$NAME, FUN=sum)) %>% left_join(., states_w_abbr %>% dplyr::rename(., state=1, abbr=2), by=c("NAME" = "state")) %>% unique(.) %>% filter(abbr != "DC" & abbr != "PR") %>% arrange(desc(-div_sep_wid_sum)) %>% mutate(total_prop = round(total_prop, 1), rank = rank(total_prop, ties.method = "min")) %>% left_join(., shr_2024 %>% filter(Measure == "Divorced, Separated or Widowed"), by = c("abbr" = "State")) %>% dplyr::select(NAME, abbr, Measure, total_prop, rank, z_score, Value, Rank, geometry) %>% st_as_sf(.) -> USA_widow_separate_divorced_Zt #USA Independent Living scores (6) #confirmed! USA_independentlivingt %>% filter(label4 == "65 to 74 years:" | label4 == "75 years and over:") %>% filter(!is.na(label5)) %>% transform(total_elders = ave(.$estimate, .$NAME, FUN=sum)) %>% mutate(age = "65+") %>% filter(label5 == "With an independent living difficulty") %>% transform(diff_indep_sum = ave(.$estimate, .$NAME, FUN=sum)) %>% transform(diff_indep_sum = ave(.$estimate, .$NAME, FUN=sum)) %>% mutate(prop = 100 * (diff_indep_sum/total_elders)) %>% mutate(z_score = (prop - mean(prop))/sd(prop)) %>% dplyr::select(NAME, diff_indep_sum, prop, z_score, geometry) %>% unique(.) %>% transform(total_prop = ave(.$prop, .$NAME, FUN=sum)) %>% left_join(., states_w_abbr %>% dplyr::rename(., state=1, abbr=2), by=c("NAME" = "state")) %>% unique(.) %>% filter(abbr != "DC" & abbr != "PR") %>% arrange(desc(-diff_indep_sum)) %>% mutate(total_prop = round(total_prop, 1), rank = rank(total_prop, ties.method = "min")) %>% left_join(., shr_2024 %>% filter(Measure == "Independent Living Difficulty"), by = c("abbr" = "State")) %>% dplyr::select(NAME, abbr, Measure, total_prop, rank, z_score, Value, Rank, geometry) %>% st_as_sf(.) -> USA_independentliving_Zt #rbind(poverty_USA_Z, living_alone_65_USA_Z, USA_widow_separate_divorced_Z, USA_not_married_Z, USA_widow_separate_divorced_Z, disability_USA_Z, USA_independentliving_Z) %>% mutate(z_score = replace(z_score, z_score >2, 2)) %>% transform(composite_score = ave(.$z_score, .$NAME, FUN=sum)) %>% unique(.) %>% mutate(z_score_percentile = percent_rank(z_score) * 100) %>% mutate(FLAG=all.equal(as.numeric(total_prop), as.numeric(Value))) -> equality_check ##old one, don't use rbind(poverty_USA_Zt, living_alone_65_USA_Zt, USA_widow_separate_divorced_Zt, USA_not_married_Zt, USA_widow_separate_divorced_Zt, disability_USA_Zt, USA_independentliving_Zt) %>% unique(.) %>% mutate(FLAG=all.equal(as.numeric(total_prop), as.numeric(Value))) %>% group_by(Measure) %>% mutate(my_z_score = (total_prop - mean(total_prop))/sd(total_prop), their_z_score = (Value - mean(Value))/sd(Value), z_FLAG=all.equal(as.numeric(my_z_score), as.numeric(their_z_score))) %>% ungroup(.) %>% transform(my_state_mean_z = ave(.$my_z_score, .$abbr, FUN=mean), their_state_mean_Z = ave(.$their_z_score, .$abbr, FUN=mean)) %>% mutate(my_possible_composite = my_state_mean_z %>% scales::rescale(., to=c(1,100)) %>% round(.), my_possible_rank = dense_rank(my_possible_composite)) %>% unique(.) %>% dplyr::select(abbr, my_possible_composite, my_possible_rank, my_state_mean_z) %>% unique(.) %>% left_join(., score_test, by=c("abbr"="State")) %>% arrange(., -desc(my_possible_composite)) %>% mutate(my_possible_rank_two = 1:50) -> composite_score_equality_check composite_score_equality_check %>% filter(abbr != "HI" & abbr != "AK") %>% st_bbox(.) ->mainland_bbox tmap_mode("view") composite_score_equality_check %>% tm_shape(., bbox = mainland_bbox) + tm_polygons(col = "my_possible_composite", palette = "blues", breaks = c(0,37, 48, 57, 70, 100), popup.vars = c("abbr", "my_possible_composite", "Value", "Rank", "Measure")) -> test_map_w_bounds #compare with https://www.americashealthrankings.org/explore/measures/isolationrisk_sr_b
Ingénierie des données
Dans ce notebook, je partage quelques travaux personnels (ou en groupes) liés à l'Ingénierie des données. *
R Lab 2
Validity and Reliability