Recently Published
Final Project
Comparison of Red Maple and Sugar Maple to investigate if the Red Maple has muted dynamics.
FELIPE
BASE DE DATOS Y DESCRIPTIVAS
Midterm Covid Sentiment
---
title: "U.S. COVID-19 Sentiment Timeline"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE, message = FALSE, warning = FALSE)
library(dplyr)
library(ggplot2)
library(lubridate)
library(crosstalk)
library(plotly)
library(DT)
set.seed(123)
```
```{r load_and_prepare}
# Load data -------------------------------------------------------------
# If the CSV is on your Desktop:
sent <- read.csv("~/Desktop/usa_covid_2020_2022.csv", stringsAsFactors = FALSE)
# If it's in the project folder instead, use:
# sent <- read.csv("usa_covid_2020_2022.csv", stringsAsFactors = FALSE)
sent$Date <- as.Date(sent$Date)
sent <- sent[order(sent$Date), ]
sent <- sent %>%
mutate(
Year = year(Date),
Month = month(Date),
YearMonth = format(Date, "%Y-%m")
)
# Theme tagging ---------------------------------------------------------
assign_theme <- function(text) {
txt <- tolower(text)
if (grepl("vaccine|vaccination|shot|booster|dose|warp speed", txt)) {
"Vaccines & Boosters"
} else if (grepl("lockdown|stay-at-home|shutdown|restrictions|social distancing|face covering|mask", txt)) {
"Restrictions & Reopening"
} else if (grepl("delta|omicron|variant|ba\\.5|ba5", txt)) {
"Variants"
} else if (grepl("economy|economic|jobs|unemployment|market|business|stimulus|checks|rescue plan|relief", txt)) {
"Economy & Relief"
} else if (grepl("school|schools|students|classroom|kids", txt)) {
"Schools & Education"
} else if (grepl("misinformation|disinformation|false information|myths|rumors", txt)) {
"Misinformation & Communication"
} else {
"General Pandemic Response"
}
}
sent$Theme <- vapply(sent$Summary, assign_theme, character(1))
# Stopword lists for word scrambles -------------------------------------
stopwords_simple <- c(
"the","and","to","of","in","a","for","on","as","is","are","this","that",
"with","be","from","at","it","by","an","into","about","has","have","will",
"was","were","but","more","many","most","less","very","their","his","her",
"its","our","us","we","they","over","again","now","than","still","even"
)
stopwords_domain <- c(
"covid","coronavirus","pandemic","virus","viruses",
"case","cases","death","deaths",
"vaccine","vaccines","shot","shots",
"lockdown","lockdowns","restrictions","guidelines",
"americans","american","people","country",
"united","states","state","u","s","us","u.s",
"economy","economic","hospital","hospitals"
)
make_scramble <- function(text) {
clean <- gsub("[^A-Za-z ]", " ", tolower(text))
words <- unlist(strsplit(clean, "\\s+"))
words <- words[
words != "" &
!(words %in% c(stopwords_simple, stopwords_domain)) &
nchar(words) >= 5
]
if (length(words) == 0) return("")
unique_words <- unique(words)
paste(sample(unique_words, min(5, length(unique_words))), collapse = " ")
}
# Monthly aggregates -----------------------------------------------------
monthly_info <- sent %>%
group_by(YearMonth) %>%
summarize(
MonthStart = min(Date),
AvgSentiment = mean(SentimentScore),
HoverText = paste(Summary, collapse = "<br>• "),
MonthScramble = make_scramble(paste(Summary, collapse = " ")),
.groups = "drop"
) %>%
mutate(
SentSign = ifelse(AvgSentiment > 0, "Positive", "Negative/Neutral")
)
# Join month scramble back to row level ---------------------------------
sent <- sent %>%
left_join(monthly_info %>% select(YearMonth, MonthScramble), by = "YearMonth")
# Theme-level summary ---------------------------------------------------
theme_summary <- sent %>%
group_by(Theme) %>%
summarize(
AvgSentiment = mean(SentimentScore),
Count = n(),
.groups = "drop"
) %>%
mutate(
SentSign = ifelse(AvgSentiment > 0, "Positive", "Negative/Neutral")
)
```
```{r monthly_interactive}
plot_ly(
monthly_info,
x = ~MonthStart,
y = ~AvgSentiment,
type = "scatter",
mode = "lines+markers",
color = ~SentSign,
colors = c("Negative/Neutral" = "#d95f02", "Positive" = "#1b9e77"),
text = ~paste0(
"<b>", YearMonth, "</b><br>",
"Average Sentiment: ", round(AvgSentiment, 3), "<br>",
"Category: ", SentSign, "<br><br>",
"<b>Summaries:</b><br>• ", HoverText
),
hoverinfo = "text"
) %>%
layout(
title = "Monthly Average Sentiment (Color-Coded by Positive vs Negative)",
xaxis = list(title = "Month"),
yaxis = list(title = "Average Sentiment Score (-1 to +1)")
)
```
```{r theme_chart}
ggplot(
theme_summary,
aes(x = reorder(Theme, AvgSentiment), y = AvgSentiment, fill = SentSign)
) +
geom_col(width = 0.6) +
geom_hline(yintercept = 0, linetype = "dashed") +
coord_flip() +
scale_fill_manual(
values = c("Negative/Neutral" = "#d95f02", "Positive" = "#1b9e77"),
name = "Average Sentiment"
) +
theme_minimal(base_size = 13) +
labs(
title = "Average Sentiment by Theme",
x = "Theme",
y = "Average Sentiment Score (-1 to +1)"
)
```
```{r interactive_timeline}
shared_sent <- SharedData$new(sent, key = ~Date, group = "covid")
filter_date <- filter_slider(
id = "date_filter",
label = "Date Range",
sharedData = shared_sent,
column = ~Date
)
filter_score <- filter_slider(
id = "score_filter",
label = "Sentiment Score",
sharedData = shared_sent,
column = ~SentimentScore,
step = 0.05
)
filter_month <- filter_select(
id = "month_filter",
label = "Month",
sharedData = shared_sent,
group = ~YearMonth
)
timeline_plot <- plot_ly(
shared_sent,
x = ~Date,
y = ~SentimentScore,
type = "scatter",
mode = "lines+markers",
text = ~paste0(
"Date: ", Date, "<br>",
"Sentiment: ", SentimentScore, "<br>",
"Theme: ", Theme, "<br><br>",
Summary
),
hoverinfo = "text"
) %>%
layout(
title = "Daily Sentiment Timeline",
xaxis = list(title = "Date"),
yaxis = list(title = "Sentiment Score (-1 to +1)")
)
bscols(
widths = c(3, 9),
list(filter_date, filter_score, filter_month),
timeline_plot
)
```
```{r word_scramble_table}
table_data <- sent %>%
select(Date, SentimentScore, YearMonth, MonthScramble, Theme, Summary)
shared_table <- SharedData$new(table_data, key = ~Date, group = "covid")
datatable(
shared_table,
options = list(pageLength = 5, lengthChange = FALSE),
colnames = c(
"Date",
"Sentiment",
"Month",
"Word Scramble",
"Theme",
"Summary"
),
rownames = FALSE
)
```
Statistical Modeling I Portfolio
Auto dataset
TheSportPsychShow
A quick guide for finding similar episodes of interest.
broadband_housing_project
Brad Band penetration map by county and year.
Prova Final
Prova Final - Disciplina Visualização de Dados
Document
Tarea 1