--- title: "rstatix" author: "Andrew H. Kim" date: "8/03/2021" output: html_document editor_options: chunk_output_type: console --- ```{r load packages, include=FALSE} library("tidyverse"); packageVersion("tidyverse") library("ggplot2"); packageVersion("ggplot2") library("rstatix"); packageVersion("rstatix") library("dplyr"); packageVersion("dplyr") library("ggpubr"); packageVersion("ggpubr") knitr::opts_chunk$set(echo = TRUE) ``` ```{r read rstatix.df.txt} rstatix.df <- read.delim(file.choose()) rstatix.df ``` ```{r Fisher's exact test} # count how many samples have 0 ASV_abundance in each group rstatix.df %>% group_by(Groups) %>% count(ASV_abundance == 0) # generate matrix based on above counting xtab <- as.table(rbind(c(10,7), c(0,3))) dimnames(xtab) <- list(samples = c("present", "absent"), ASV = c("Group2", "Group3")) xtab # run Fisher's exact test fisher_test(xtab, detailed = TRUE) ``` ```{r Shapiro normality test} # null hypothesis = normally distributed # reject null if p < 0.05 # run normality test rstatix.df %>% group_by(Groups) %>% shapiro_test(ASV_abundance) # plot ASV abundance in each sample rstatix.df.distribution = ggplot(rstatix.df, aes(x = SampleID, y = ASV_abundance)) + geom_bar(stat = "identity") + ggtitle("abundance distribution")+ facet_wrap("Groups")+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) rstatix.df.distribution # seems like ASV 1 is not normally distributed while Group2 and Group3 are normally distributed ``` ```{r t-test (two, parametric, Group2 vs Group3)} # run t_test for normally distributed group comparison # keep only normally distributed groups in the dataframe t.test <- filter(rstatix.df, Groups == "Group2" | Groups == "Group3") %>% droplevels() # run t-test # note: p-value correction is not feasible here because there are only 2 groups and you need more than 2 groups for multiple comparison correction. t.test.stat <- as.tibble(t.test) %>% t_test(ASV_abundance ~ Groups, p.adjust.method= "holm") t.test.stat # generate box plot t.test.box <- ggplot(t.test,aes(x = Groups, y = ASV_abundance, colour = Groups)) + geom_boxplot()+ geom_point()+ theme(panel.grid = element_blank()) t.test.box # generate box plot with p-values on the plot t.test.stat <- t.test.stat %>% add_xy_position (x = "Groups") t.test.box + stat_pvalue_manual(t.test.stat, label = "p", tip.length=0.02) # multiple comparisons correction using holm t.test.stat <- as.tibble(rstatix.df) %>% t_test(ASV_abundance ~ Groups, p.adjust.method= "holm") t.test.stat # generate box plot t.test.box <- ggplot(rstatix.df,aes(x = Groups, y = ASV_abundance, colour = Groups)) + geom_boxplot()+ geom_point()+ theme(panel.grid = element_blank()) t.test.box # generate box plot with p-values on the plot t.test.stat <- t.test.stat %>% add_xy_position (x = "Groups") t.test.box + stat_pvalue_manual(t.test.stat, label = "{p.adj} {p.adj.signif}", tip.length=0.02) ``` ```{r Wilcoxon Mann Whitney test (two, nonparametric, Group1 vs Group2)} # run wilcox_test for not normally distributed group comparison # remove normally distributed group from the dataframe w.test <- filter(rstatix.df, Groups == "Group1" | Groups == "Group2") %>% droplevels() # run wilcox_test # note: p-value correction is not feasible here because there are only 2 groups and you need more than 2 groups for multiple comparison correction. w.test.stat <- w.test %>% wilcox_test(ASV_abundance~ASV, p.adjust.method= "holm") w.test.stat # generate box plot w.test.box <- ggplot(w.test,aes(x = Groups, y = ASV_abundance, colour = Groups)) + geom_boxplot()+ geom_point()+ theme(panel.grid = element_blank()) w.test.box # generate box plot with p-values on the plot w.test.stat <- w.test.stat %>% add_xy_position (x = "Groups") w.test.box + stat_pvalue_manual(w.test.stat, label = "p", tip.length=0.02) # multiple comparisons correction using holm w.test.stat <- rstatix.df %>% wilcox_test(ASV_abundance~Groups, p.adjust.method= "holm") w.test.stat # generate box plot w.test.box <- ggplot(rstatix.df,aes(x = Groups, y = ASV_abundance, colour = Groups)) + geom_boxplot()+ geom_point()+ theme(panel.grid = element_blank()) w.test.box # generate box plot with p-values on the plot w.test.stat <- w.test.stat %>% add_xy_position (x = "Groups") w.test.box + stat_pvalue_manual(w.test.stat, label = "{p.adj} {p.adj.signif}", tip.length=0.02) ``` ```{r anova test (multiple, parametric, Group1 vs Group2 vs Group3)} # run anova_test for all groups comparison A.test.stat <- rstatix.df %>% anova_test(formula= ASV_abundance~Groups) # run dunn_test for all groups comparison A.test.stat.adj <- dunn_test(data=rstatix.df, formula=ASV_abundance~Groups, p.adjust.method = "BH") A.test.stat.adj # generate box plot A.test.box <- ggplot(rstatix.df, aes(x = Groups, y = ASV_abundance, colour = Groups)) + geom_boxplot()+ geom_point()+ theme(panel.grid = element_blank()) A.test.box # generate box plot with p-values on the plot A.test.stat.adj <-A.test.stat.adj %>% add_xy_position (x = "Groups") %>% p_round(digits = 3) A.test.box + stat_pvalue_manual(A.test.stat.adj, label = "{p.adj} {p.adj.signif}", tip.length=0.02) ``` ```{r Kruskal-Wallis test (multiple, nonparametric, Group1 vs Group2 vs Group3)} # run kruskal_test for all groups comparison k.test.stat <- rstatix.df %>% kruskal_test(formula= ASV_abundance~Groups) k.test.stat # run dunn_test for all groups comparison k.test.stat.adj <- dunn_test(data=rstatix.df, formula=ASV_abundance~Groups, p.adjust.method = "BH") k.test.stat.adj # generate box plot k.test.box <- ggplot(rstatix.df, aes(x = Groups, y = ASV_abundance, colour = Groups)) + geom_boxplot()+ geom_point()+ theme(panel.grid = element_blank()) k.test.box # generate box plot with p-values on the plot k.test.stat.adj <- k.test.stat.adj %>% add_xy_position (x = "Groups") %>% p_round(digits = 3) k.test.box + stat_pvalue_manual(k.test.stat.adj, label = "{p.adj} {p.adj.signif}", tip.length=0.02) ```