---
title: "rstatix"
author: "Andrew H. Kim"
date: "8/03/2021"
output: html_document
editor_options: 
  chunk_output_type: console
---

```{r load packages, include=FALSE}

library("tidyverse"); packageVersion("tidyverse")
library("ggplot2"); packageVersion("ggplot2")
library("rstatix"); packageVersion("rstatix")
library("dplyr"); packageVersion("dplyr")
library("ggpubr"); packageVersion("ggpubr")

knitr::opts_chunk$set(echo = TRUE)

```


```{r read rstatix.df.txt}
rstatix.df <- read.delim(file.choose())
rstatix.df
```

```{r Fisher's exact test}

# count how many samples have 0 ASV_abundance in each group
rstatix.df %>% group_by(Groups) %>% count(ASV_abundance == 0)

# generate matrix based on above counting
xtab <- as.table(rbind(c(10,7), c(0,3)))
dimnames(xtab) <- list(samples = c("present", "absent"), ASV = c("Group2", "Group3"))
xtab

# run Fisher's exact test
fisher_test(xtab, detailed = TRUE)

```

```{r Shapiro normality test}
# null hypothesis = normally distributed
# reject null if p < 0.05

# run normality test
rstatix.df %>% 
group_by(Groups) %>% 
shapiro_test(ASV_abundance)

# plot ASV abundance in each sample
rstatix.df.distribution = ggplot(rstatix.df, aes(x = SampleID, y = ASV_abundance)) +
  geom_bar(stat = "identity") +
  ggtitle("abundance distribution")+
  facet_wrap("Groups")+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
rstatix.df.distribution

# seems like ASV 1 is not normally distributed while Group2 and Group3 are normally distributed
```

```{r t-test (two, parametric, Group2 vs Group3)}
# run t_test for normally distributed group comparison

# keep only normally distributed groups in the dataframe
t.test <- filter(rstatix.df, Groups == "Group2" | Groups == "Group3") %>%
droplevels()

# run t-test
# note: p-value correction is not feasible here because there are only 2 groups and you need more than 2 groups for multiple comparison correction.
t.test.stat <- as.tibble(t.test) %>% 
t_test(ASV_abundance ~ Groups, p.adjust.method= "holm")
t.test.stat

# generate box plot 
t.test.box <- ggplot(t.test,aes(x = Groups, y = ASV_abundance, colour = Groups)) +
  geom_boxplot()+
  geom_point()+
  theme(panel.grid = element_blank())
t.test.box

# generate box plot with p-values on the plot
t.test.stat <- t.test.stat %>% add_xy_position (x = "Groups")
t.test.box + stat_pvalue_manual(t.test.stat, label = "p", tip.length=0.02)

# multiple comparisons correction using holm

t.test.stat <- as.tibble(rstatix.df) %>% 
t_test(ASV_abundance ~ Groups, p.adjust.method= "holm")
t.test.stat

# generate box plot 
t.test.box <- ggplot(rstatix.df,aes(x = Groups, y = ASV_abundance, colour = Groups)) +
  geom_boxplot()+
  geom_point()+
  theme(panel.grid = element_blank())
t.test.box

# generate box plot with p-values on the plot
t.test.stat <- t.test.stat %>% add_xy_position (x = "Groups")
t.test.box + stat_pvalue_manual(t.test.stat, label = "{p.adj} {p.adj.signif}", tip.length=0.02)

```

```{r Wilcoxon Mann Whitney test (two, nonparametric, Group1 vs Group2)}
# run wilcox_test for not normally distributed group comparison

# remove normally distributed group from the dataframe
w.test <- filter(rstatix.df, Groups == "Group1" | Groups == "Group2") %>%
droplevels()

# run wilcox_test
# note: p-value correction is not feasible here because there are only 2 groups and you need more than 2 groups for multiple comparison correction.
w.test.stat <- w.test %>% 
wilcox_test(ASV_abundance~ASV, p.adjust.method= "holm")
w.test.stat

# generate box plot 
w.test.box <- ggplot(w.test,aes(x = Groups, y = ASV_abundance, colour = Groups)) +
  geom_boxplot()+
  geom_point()+
  theme(panel.grid = element_blank())
w.test.box

# generate box plot with p-values on the plot
w.test.stat <- w.test.stat %>% add_xy_position (x = "Groups")
w.test.box + stat_pvalue_manual(w.test.stat, label = "p", tip.length=0.02)

# multiple comparisons correction using holm
w.test.stat <- rstatix.df %>% 
wilcox_test(ASV_abundance~Groups, p.adjust.method= "holm")
w.test.stat

# generate box plot 
w.test.box <- ggplot(rstatix.df,aes(x = Groups, y = ASV_abundance, colour = Groups)) +
  geom_boxplot()+
  geom_point()+
  theme(panel.grid = element_blank())
w.test.box

# generate box plot with p-values on the plot
w.test.stat <- w.test.stat %>% add_xy_position (x = "Groups")
w.test.box + stat_pvalue_manual(w.test.stat, label = "{p.adj} {p.adj.signif}", tip.length=0.02)


```

```{r anova test (multiple, parametric, Group1 vs Group2 vs Group3)}
# run anova_test for all groups comparison
A.test.stat <- rstatix.df %>% 
anova_test(formula= ASV_abundance~Groups)

# run dunn_test for all groups comparison
A.test.stat.adj <- dunn_test(data=rstatix.df, formula=ASV_abundance~Groups, p.adjust.method = "BH")
A.test.stat.adj

# generate box plot 
A.test.box <- ggplot(rstatix.df, aes(x = Groups, y = ASV_abundance, colour = Groups)) +
  geom_boxplot()+
  geom_point()+
  theme(panel.grid = element_blank())
A.test.box

# generate box plot with p-values on the plot
A.test.stat.adj <-A.test.stat.adj %>% add_xy_position (x = "Groups") %>% p_round(digits = 3)
A.test.box + stat_pvalue_manual(A.test.stat.adj, label = "{p.adj} {p.adj.signif}", tip.length=0.02)
```

```{r Kruskal-Wallis test (multiple, nonparametric, Group1 vs Group2 vs Group3)}
# run kruskal_test for all groups comparison
k.test.stat <- rstatix.df %>% 
kruskal_test(formula= ASV_abundance~Groups)
k.test.stat

# run dunn_test for all groups comparison
k.test.stat.adj <- dunn_test(data=rstatix.df, formula=ASV_abundance~Groups, p.adjust.method = "BH")
k.test.stat.adj

# generate box plot 
k.test.box <- ggplot(rstatix.df, aes(x = Groups, y = ASV_abundance, colour = Groups)) +
  geom_boxplot()+
  geom_point()+
  theme(panel.grid = element_blank())
k.test.box

# generate box plot with p-values on the plot
k.test.stat.adj <- k.test.stat.adj %>% add_xy_position (x = "Groups") %>% p_round(digits = 3)
k.test.box + stat_pvalue_manual(k.test.stat.adj, label = "{p.adj} {p.adj.signif}", tip.length=0.02)
```