Skip to content

Commit 4536ef4

Browse files
committed
add sentiment analysis scripts
1 parent 8168bc1 commit 4536ef4

File tree

1 file changed

+151
-0
lines changed

1 file changed

+151
-0
lines changed
Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
####################################################################
2+
# Sentiment Analysis
3+
4+
# Install all required packages
5+
install.packages("sentimentr")
6+
install.packages("syuzhet")
7+
install.packages("dplyr")
8+
install.packages("tidyr")
9+
install.packages("readr")
10+
install.packages("ggplot2")
11+
install.packages("RColorBrewer")
12+
install.packages("stringr")
13+
14+
# Load all packages
15+
library(sentimentr)
16+
library(syuzhet)
17+
library(dplyr)
18+
library(tidyr)
19+
library(readr)
20+
library(ggplot2)
21+
library(RColorBrewer)
22+
library(stringr)
23+
24+
# Polarity Analysis
25+
26+
# Load Data
27+
comments <- readr::read_csv("./data/comments_preprocessed.csv")
28+
29+
# Compute sentiment per row/case
30+
sentiment_scores <- sentiment_by(comments$comments)
31+
32+
# Add scores and labels to original dataset
33+
polarity <- comments %>%
34+
mutate(score = sentiment_scores$ave_sentiment,
35+
sentiment_label = case_when(
36+
score > 0.1 ~ "positive",
37+
score < -0.1 ~ "negative",
38+
TRUE ~ "neutral"
39+
))
40+
41+
# Check first rows with results
42+
head(polarity)
43+
44+
# Scores per label
45+
table(polarity$sentiment_label)
46+
47+
# Visualize
48+
ggplot(polarity, aes(x = score)) +
49+
geom_histogram(binwidth = 0.1, fill = "skyblue", color = "white") +
50+
theme_minimal() +
51+
labs(title = "Sentiment Score Distribution", x = "Average Sentiment", y = "Count")
52+
53+
# Extract season info (s1, s2) into a new column
54+
polarity_seasons <- mutate(polarity,
55+
season = str_extract(id, "s\\d+"))
56+
57+
# Histogram comparison by season, using Density
58+
ggplot(polarity_seasons, aes(x = score, fill = season)) +
59+
geom_histogram(aes(y = after_stat(density)),
60+
binwidth = 0.1,
61+
position = "dodge",
62+
color = "white") +
63+
theme_minimal() +
64+
labs(title = "Sentiment Score Distribution by Season (Normalized)",
65+
x = "Average Sentiment Score (Polarity)",
66+
y = "Density (Proportion of Comments)") +
67+
scale_fill_brewer(palette = "Set1")
68+
69+
# Save results
70+
write_csv(polarity, "output/polarity_results.csv")
71+
72+
####################################################################
73+
# Emotion Detection with Syuzhet's NRC Lexicon
74+
75+
# Detecting Emotions per Comment/Sentence
76+
sentences <- get_sentences(comments$comments)
77+
78+
# Compute Emotion Scores per Sentence
79+
# Assign NRC emotion scores (anger, joy, etc.) + positive/negative
80+
emotion_score <- get_nrc_sentiment(sentences)
81+
82+
# Review Summary of Emotion Scores
83+
summary(emotion_score)
84+
85+
# Regroup with Original Comments/IDs
86+
comments$comments <- sentences
87+
emotion_data <- bind_cols(comments, emotion_score)
88+
89+
# Summarize Overall Emotion Counts
90+
emotion_summary <- emotion_data %>%
91+
select(anger:trust) %>% # only emotion columns
92+
summarise(across(everything(), sum)) %>%
93+
pivot_longer(cols = everything(), names_to = "emotion", values_to = "count") %>%
94+
arrange(desc(count))
95+
96+
# Plot Overall Emotion Distribution
97+
ggplot(emotion_summary, aes(x = emotion, y = count, fill = emotion)) +
98+
geom_col(show.legend = FALSE) +
99+
geom_text(aes(label = count), hjust = -0.2, size = 2) +
100+
scale_fill_manual(values = brewer.pal(10, "Paired")) +
101+
theme_minimal(base_size = 12) +
102+
labs(title = "Overall Emotion Distribution", x = "Emotion", y = "Total Count") +
103+
coord_flip()
104+
105+
# Add "Season" Variable and Summarize by Season
106+
# Create season variable based on ID pattern
107+
emotion_seasons <- emotion_data %>%
108+
mutate(season = ifelse(grepl("^s1_", id), "s1",
109+
ifelse(grepl("^s2_", id), "s2", NA)))
110+
111+
# Aggregate emotion counts per season
112+
emotion_by_season <- emotion_seasons %>%
113+
group_by(season) %>%
114+
summarise(
115+
across(anger:positive, ~sum(., na.rm = TRUE))
116+
)
117+
118+
# Plotting the Data
119+
# Compare Emotions by Season
120+
emotion_long <- emotion_by_season %>%
121+
pivot_longer(cols = anger:positive, names_to = "emotion", values_to = "count")
122+
123+
ggplot(emotion_long, aes(x = reorder(emotion, -count), y = count, fill = season)) +
124+
geom_col(position = "dodge") +
125+
geom_text(aes(label = count), hjust = -0.2, size = 2) +
126+
scale_fill_brewer(palette = "Set2") +
127+
theme_minimal(base_size = 12) +
128+
labs(title = "Emotion Distribution by Season", x = "Emotion", y = "Total Count", fill = "Season") +
129+
coord_flip()
130+
131+
# Emotion Co-occurrence Heatmap
132+
# Compute correlations between emotions
133+
emotion_matrix <- emotion_data %>% select(anger:trust)
134+
co_occurrence <- cor(emotion_matrix, method = "pearson")
135+
diag(co_occurrence) <- NA # remove self-correlations
136+
137+
# Convert to long format for plotting
138+
co_occurrence_long <- as.data.frame(as.table(co_occurrence))
139+
colnames(co_occurrence_long) <- c("emotion1", "emotion2", "correlation")
140+
141+
# Plot heatmap
142+
ggplot(co_occurrence_long, aes(x = emotion1, y = emotion2, fill = correlation)) +
143+
geom_tile(color = "white") +
144+
scale_fill_gradient2(mid = "white", high = "red", midpoint = 0,
145+
limits = c(0, 1), na.value = "grey95", name = "Correlation") +
146+
theme_minimal(base_size = 12) +
147+
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
148+
labs(title = "Emotion Co-occurrence Heatmap", x = "Emotion", y = "Emotion")
149+
150+
# Save Results
151+
write_csv(emotion_data, "output/sentiment_emotion_results.csv")

0 commit comments

Comments
 (0)