# Folder pathsinput_path <-"data/input_data/CFUs/"metadata_path <-"data/metadata/CFUs"# Create subfolders for output filesdataframes_folder <-"data/dataframes"if (!file.exists("data/dataframes")) {dir.create("data/dataframes", recursive =TRUE)}# Load data and metadatainput_data <-read_csv(file.path(input_path, "HNOBac_CFUs_0130_2024.csv")) %>%mutate_if(is.character, factor)input_data$Time <-as.factor(input_data$Time)Bac_order <-read_csv(file.path(metadata_path, "Order_Bacteria_CFUs.csv"))
Data clean-up
# Setting zero values to the limit of detectionCFU_data <- input_data %>%mutate(LOD = CFUs ==0,NewCFU =ifelse(LOD, 3.75, CFUs) )# Factor Ordering and StylingCFU_data <- CFU_data %>%mutate(Combined =interaction(bacteria, Time)) %>%mutate(Temp_label =factor(Temp, labels =c("<b><br><span style='color:#5b5b5b;'>34 °C</span></b>","<b><br><span style='color:red3;'>37 °C</span></b>"))) CFU_data <-merge(CFU_data, Bac_order, by ="Combined") CFU_data$bacteria_label <-factor(CFU_data$bacteria_label, levels = Bac_order$bacteria_label)CFU_data$Line <-fct_recode(CFU_data$Line, "HNO918"="A", "HNO204"="B", "HNO919"="C") # Calculate log2 fold change between final time points vs. time 0CFU_FC <- CFU_data %>%mutate(Collection_Label =ifelse(Time ==0, "initial", "final")) %>%group_by(Date, Line, bacteria) %>%reframe(Combined = Combined[Collection_Label =="final"],Date = Date[Collection_Label =="final"],Line = Line[Collection_Label =="final"],Temp = Temp[Collection_Label =="final"],Time = Time[Collection_Label =="final"],bacteria = bacteria[Collection_Label =="final"],bacteria_label = bacteria_label[Collection_Label =="final"],Temp_label = Temp_label[Collection_Label =="final"],log2FC =log2(NewCFU[Collection_Label =="final"]/NewCFU[Collection_Label =="initial"])) %>%droplevels() %>%mutate(Temp =factor(Temp))
Saving files
# Save data frame as CSV files in the output folderwrite_csv(CFU_data, file.path(dataframes_folder, "CFU_values.csv"))write_csv(CFU_FC, file.path(dataframes_folder, "CFU_FC.csv"))# Save data frame as R objects in the output foldersaveRDS(CFU_data, file.path(dataframes_folder, "CFU_values.rds"))saveRDS(CFU_FC, file.path(dataframes_folder, "CFU_FC.rds"))# Cleaning-up all objects from the environmentrm(list =ls())# Use this to read the final objectsCFU_data <-readRDS("data/dataframes/CFU_values.rds")CFU_FC <-readRDS("data/dataframes/CFU_FC.rds")
Stats and Plots
File Paths
# Folder pathsdataframes_path <-"data/dataframes"metadata_path <-"data/metadata/CFUs"# Create subfolders for output filesfigures_folder <-"data/outputs/CFUs/figures"if (!file.exists("data/outputs/CFUs/figures")) {dir.create("data/outputs/CFUs/figures", recursive =TRUE)}stats_folder <-"data/outputs/CFUs/stats"if (!file.exists("data/outputs/CFUs/stats")) {dir.create("data/outputs/CFUs/stats", recursive =TRUE)}# Load data and metadataCFU_data <-readRDS("data/dataframes/CFU_values.rds")CFU_FC <-readRDS("data/dataframes/CFU_FC.rds")Bac_order <-read_csv(file.path(metadata_path, "Order_Bacteria_CFUs.csv"))
Analysis for each Temp
# Function to analyze each temp conditionanalysis_function <-function(data, each_temp, cutoff_pvalue, cutoff_FC, color_error) {# Subset the data to the selected temp data_subset <- data %>%filter(Temp == each_temp) data_stats <- data_subset # Mixed-effects model with random effects model <-lmer(log(NewCFU) ~ bacteria * Time + (1|Line) + (1|Line:Date), data = data_stats)#Anova anova <-anova(model) anova_df <-as.data.frame(anova) %>%mutate(sign =case_when(`Pr(>F)`< cutoff_pvalue ~"*",TRUE~"")) %>%mutate_if(is.numeric, ~format(., digits =2, scientific =TRUE))# Calculate Individual contrasts emmeans_model <-emmeans(model, ~ bacteria * Time) emmeans_time <-pairs(emmeans_model, simple ="Time", adjust ="none") # Extract random effects and convert to dataframe (if not singular) random_effects_df <-as.data.frame(VarCorr(model)) %>%mutate(proportion =round(100* (vcov /sum(vcov)), 2))# Adds predictions based on fixed effects, averaged over random effects. It gives a population estimate data_stats <-cbind(data_stats, predval =predict(model,re.form =NA, se.fit =TRUE)) data_summary_df <- data_stats %>%group_by(Combined, bacteria_label) %>%summarize(mean.real =mean(NewCFU),mean.predval =mean(predval.fit), mean.predval.se =mean(predval.se.fit)) %>%mutate(exp.mean.predval =exp(mean.predval),max =exp(mean.predval +2*mean.predval.se),min =exp(mean.predval -2*mean.predval.se))# Extract time contrasts from emmeans_model, convert to dataframe and adjust pvalues. Remove the 0-48h contrasts contrasts_time_df <-as.data.frame(summary(emmeans_time)) %>%mutate(Temp = each_temp) %>%filter(contrast !="Time0 - Time48") %>%mutate(p.adj.holm =p.adjust(p.value, method ="holm")) %>%mutate(sign =case_when( p.adj.holm < cutoff_pvalue ~"*",TRUE~""))# Edits to the contrast dataframe to include pvalue brackets in plot contrasts_time_df <- contrasts_time_df %>%separate(contrast, into =c("Time1", "Time2"), sep =" - ") %>%mutate(Time1 =sub("Time", "", Time1),Time2 =sub("Time", "", Time2)) %>%mutate(condition1 =paste(bacteria, Time1, sep ="."),condition2 =paste(bacteria, Time2, sep =".")) contrasts_time_df$group1 <- Bac_order$bacteria_label[match(contrasts_time_df$condition1, Bac_order$Combined)] contrasts_time_df$group2 <- Bac_order$bacteria_label[match(contrasts_time_df$condition2, Bac_order$Combined)]# Calculate fold-change values for each contrast contrasts_time_df <- contrasts_time_df %>%ungroup() %>%left_join(select(data_summary_df, Combined, exp.mean.predval), by =join_by(condition1 == Combined)) %>%left_join(select(data_summary_df, Combined, exp.mean.predval), by =join_by(condition2 == Combined), suffix =c(".1", ".2")) %>%mutate(FC = exp.mean.predval.1/ exp.mean.predval.2,FC =if_else(FC <1, -1/ FC, FC),highlighted =case_when( FC <=-cutoff_FC ~"+", FC >= cutoff_FC ~"-",TRUE~"")) # Select p values to plot and define their location contrast_sign <- contrasts_time_df %>%filter(sign !=""& highlighted !="") %>%mutate(p.adj.holm =format(p.adj.holm, digits =2, scientific =TRUE)) location <-log10(max(data_subset$NewCFU, na.rm =TRUE)) *1.1# Standard Boxplot plot_1 <-ggplot() +geom_boxplot(data = data_subset, aes(x = bacteria_label, y = NewCFU, fill = bacteria_label)) +geom_jitter(data = data_subset, aes(x = bacteria_label, y = NewCFU, shape = Line), fill ="grey50", color ="grey30", size =2, width =0.05, stroke =0.75) +scale_fill_manual(values =c("#800080","#800080","#800080","#1E90FF","#1E90FF","#1E90FF", "#927ed1","#927ed1","#927ed1")) +scale_shape_manual(values =c(21, 22, 24)) +scale_y_log10(breaks =trans_breaks("log10", function(x) 10^x),labels =trans_format("log10", math_format(10^.x))) +labs(x ="Hours post-inoculation",y =paste0("CFUs/HNO at ", each_temp, " °C")) +theme_bw() +theme(panel.grid =element_blank(),legend.position ="none",text =element_text(size =20), axis.text.x =element_markdown(), axis.text.y =element_text(color ="black"))# Plot with predicted means and standard errors of the estimates plot_2 <-ggplot() +geom_point(data = data_subset, aes(x = bacteria_label, y = NewCFU, fill = bacteria_label, color = bacteria_label, shape = Line, group = Line), position =position_jitterdodge(dodge.width =0.7, jitter.width =0.2),size =3, alpha =0.75, stroke =0.75) +geom_point(data = data_summary_df, aes(x = bacteria_label, y = exp.mean.predval), shape =3, size =3, color = color_error) +geom_errorbar(data = data_summary_df, aes(x = bacteria_label,y = exp.mean.predval,ymax = max,ymin = min),width =0.5, color = color_error) +scale_fill_manual(values =c("#800080","#800080","#800080","#1E90FF","#1E90FF","#1E90FF","#927ed1","#927ed1","#927ed1")) +scale_color_manual(values =c("#800080","#800080","#800080","#1E90FF","#1E90FF","#1E90FF","#927ed1","#927ed1","#927ed1")) +scale_shape_manual(values =c(21, 22, 24)) +scale_y_log10(breaks =trans_breaks("log10", function(x) 10^x),labels =trans_format("log10", math_format(10^.x)),expand =c(0.1,0)) +labs(x ="Hours post-inoculation",y =paste0("CFUs/HNO at ", each_temp, " °C")) +theme_bw() +theme(panel.grid =element_blank(), legend.position ="none",text =element_text(size =20), axis.text.x =element_markdown(), axis.text.y =element_text(color ="black"))# Conditionally add p-value annotations layerif (nrow(contrast_sign) >0) { plot_2 <- plot_2 +stat_pvalue_manual(contrast_sign, label ="p.adj.holm", y.position = location,tip.length =0.02, bracket.shorten =0.2, vjust =-0.2, bracket.size =0.3, size =2.5) } else { plot_2 <- plot_2 }# Arrange plot and tables for summary pdf table <- contrasts_time_df %>%select(condition1, condition2, p.adj.holm, sign, exp.mean.predval.1, exp.mean.predval.2, FC, highlighted) %>%mutate(p.adj.holm =format(p.adj.holm, digits =2, scientific =TRUE)) Tmin <-ttheme_minimal() panel <-ggarrange(plot_1 +theme(plot.margin =unit(c(0.25,0.25,0.25,0.25), "in")), plot_2 +theme(plot.margin =unit(c(0.25,0.25,0.25,0.25), "in")),tableGrob(anova_df, theme = Tmin), tableGrob(random_effects_df, theme = Tmin, rows =NULL), tableGrob(table, theme = Tmin, rows =NULL), ncol =1, heights =c(0.7, 0.7, 0.2, 0.2, 0.2),labels =c(" Standard Boxplot ", "Predicted Mean ± 2*SE", " Anova ", "Random Effects", " Contrasts ")) panel <-annotate_figure(panel, top =text_grob(paste0("Analysis for ", each_temp, "C. P-value: ", cutoff_pvalue, " and FC: ", cutoff_FC),face ="bold", size =14, color ="red"))# Save filesggsave(panel, filename =paste0(figures_folder, "/summaryCFU_", each_temp, ".pdf"), width =10, height =15)ggsave(plot_2, filename =paste0(figures_folder, "/plotCFU_", each_temp, ".png"), width =7, height =6)saveRDS(plot_2, file.path(figures_folder, paste0("plotCFU_", each_temp, ".rds")))write_csv(anova_df, file.path(stats_folder, paste0("anova_", each_temp, ".csv")))write_csv(random_effects_df, file.path(stats_folder, paste0("stats_random_effects_", each_temp, ".csv")))write_csv(contrasts_time_df, file.path(stats_folder, paste0("stats_contrasts_", each_temp, ".csv")))write_csv(data_summary_df, file.path(stats_folder, paste0("stats_summary_", each_temp, ".csv")))return(list(anova = anova_df,random_effects = random_effects_df,contrasts_time = contrasts_time_df,#data_summary = data_summary_df,#data_stats = data_stats,plot_1 = plot_1,plot_2 = plot_2 ))}
pdf_output <-file.path(figures_folder, "HNOBac_SummaryCFUs.pdf")# Check if the file exists, and delete if it doesif (file.exists(pdf_output)) {file.remove(pdf_output)}
[1] TRUE
# Now combine the PDFs into a new filepdf_files <-list.files(figures_folder, pattern ="\\.pdf$", full.names =TRUE)qpdf::pdf_combine(input = pdf_files, output = pdf_output)