All data inputs and outputs in this repo are relative to FOLDER_PATH. Please, save the provided data frames in RDS format in the dataframes folder.
# Define directory paths based on R.environFOLDER_PATH <-Sys.getenv("BOX_PATH_RSVBacPublication") dataframes_folder <-file.path(FOLDER_PATH, "dataframes")# Create subfolder for output filesoutputs_folder <-file.path(FOLDER_PATH, "outputs/CFUs")if (!file.exists(outputs_folder)) {dir.create(outputs_folder, recursive =TRUE)}# ColorsBac.colors <-c("#a89de0", "#f54590", "#2e67f2")
# Read the provided RDS filedataframeID <-"RSVbac"CFU_data <-readRDS(file.path(dataframes_folder, paste0(dataframeID, "_CFU_values.rds")))
Statistics and Plots
Analysis with a mixed-effects model with random effects for line and date and the interaction of fixed effects for species and the Time.Virus combined condition. If the model is singular, it is rerun with only date as random effect. The contrasts are calculated using emmeans with Holm adjustment for the virus comparisons for each bacteria. Predicted values are backtransformed to CFU scale for plotting. Highlighted contrasts are those with a cutoff p-value of 0.05.
# Function to analyze the data and plot statsanalysis_function <-function(data, conditions_label_col, combined_col, cutoff_pvalue, cutoff_FC) {# Mixed-effects model with random effects model <-lmer(log(NewCFU) ~ species * Time.Virus + (1|line) + (1|line:date), data = data)# Extract random effects data and convert to dataframe singular =isSingular(model) random_effects_df <-as.data.frame(VarCorr(model)) %>%mutate(proportion =round(100* (vcov /sum(vcov)), 2)) %>%mutate(singular = singular) # If model is singular, rerun with only date as random effectif (singular) { model <-lmer(log(NewCFU) ~ species * Time.Virus + (1|date), data = data) }# Run ANOVA and extract pvalues anova <-anova(model) anova_df <-as.data.frame(anova) # Extract fixed effects coefficients and convert to dataframe fixed_effects_df <-as.data.frame(summary(model)$coefficients) %>%rownames_to_column(var ="term")# Generate contrasts for Time.Virus comparisons within each bacteria using emmeans with Holm adjustment emmeans_model <-emmeans(model, ~ species * Time.Virus) emmeans_Time.Virus <-pairs(emmeans_model, simple ="Time.Virus", adjust ="holm")# Extract virus contrasts from emmeans_model and format the dataframe contrasts_df <-as.data.frame(summary(emmeans_Time.Virus)) %>%mutate(contrast =str_replace_all(contrast, "\\s+", "")) %>%separate(contrast, into =c("Time.Virus1", "Time.Virus2"), sep ="-") %>%mutate(condition1 =paste(species, Time.Virus1, sep ="."),condition2 =paste(species, Time.Virus2, sep ="."),contrast =paste(condition1, condition2, sep =" - ")) %>%select(-species, Time.Virus1, Time.Virus2) %>%relocate(contrast)# Use the specified columns for group labels and matching contrasts_df$group1 <- data[[conditions_label_col]][match(contrasts_df$condition1, data[[combined_col]])] contrasts_df$group2 <- data[[conditions_label_col]][match(contrasts_df$condition2, data[[combined_col]])]# Adds predictions based on fixed effects, averaged over random effects. It gives a population estimate data <-cbind(data, predval =predict(model,re.form =NA, se.fit =TRUE))# Backtransform the predicted values and CIs transf_preds_df <- data %>%group_by(!!sym(combined_col), Time.Virus, !!sym(conditions_label_col), species, virus) %>%summarise(mean.predval.fit =mean(predval.fit),mean.predval.se.fit =mean(predval.se.fit),mean.CFU =mean(NewCFU),.groups ='drop') %>%mutate(pCFUs =exp(mean.predval.fit),pCFUs_min =exp(mean.predval.fit -2*mean.predval.se.fit),pCFUs_max =exp(mean.predval.fit +2*mean.predval.se.fit)) %>%mutate(condition =interaction(species, Time.Virus, sep ="."))# Calculate fold-change values for each contrast and highlight the most relevant is pvalue < cutoff and FC > cutoff contrasts_df <- contrasts_df %>%ungroup() %>%left_join(select(transf_preds_df, condition, pCFUs), by =join_by(condition1 == condition)) %>%left_join(select(transf_preds_df, condition, pCFUs), by =join_by(condition2 == condition), suffix =c(".1", ".2")) %>%mutate(FC = pCFUs.1/ pCFUs.2,log10FC =log10(FC),FC =if_else(FC <1, 1/ FC, -FC),highlighted =case_when( p.value < cutoff_pvalue &abs(log10FC) >= cutoff_FC ~"*",TRUE~"")) %>%select(contrast, condition1, condition2, p.value, pCFUs.1, pCFUs.2, FC, log10FC, highlighted, group1, group2)return(list(anova = anova,random_effects = random_effects_df,fixed_effects = fixed_effects_df, contrasts = contrasts_df,transf_preds = transf_preds_df))}
CFU_data_RSV <- CFU_data %>%filter(timecol !=0) %>%droplevels()# Create a combined condition variable for Time and Virus with timeinfCFU_data_RSV <- CFU_data_RSV %>%mutate(Time.Virus =factor(paste(timeinf, virus, sep =".")))
CFU_data_nonRSV <- CFU_data %>%filter(virus =="F") %>%droplevels()# Create a combined condition variable for Time and Virus with timecolCFU_data_nonRSV <- CFU_data_nonRSV %>%mutate(Time.Virus =factor(paste(timecol, virus, sep =".")))