RSVBac
  1. Bacterial CFUs
  • Overview
  • Bacterial CFUs
  • RSV PFUs
  • RSV RT-qPCR
  • Caspase
  • RNASeq
  • Cytokines
  • R Session Info
  • References

Table of contents

  • Data Input
  • Statistics and Plots
    • RSV Samples without Inoculum
    • Non-RSV Samples with Inoculum

Bacterial CFUs

# loading packages
library(tidyverse)
library(skimr)
library(ggtext)
library(scales)
library(lme4)
library(lmerTest) 
library(emmeans)
library(knitr)
library(ggpubr)

Data Input

All data inputs and outputs in this repo are relative to FOLDER_PATH. Please, save the provided data frames in RDS format in the dataframes folder.

# Define directory paths based on R.environ
FOLDER_PATH <- Sys.getenv("BOX_PATH_RSVBacPublication") 
dataframes_folder <- file.path(FOLDER_PATH, "dataframes")

# Create subfolder for output files
outputs_folder <- file.path(FOLDER_PATH, "outputs/CFUs")
if (!file.exists(outputs_folder)) {
  dir.create(outputs_folder, recursive = TRUE)
}

# Colors
Bac.colors <- c("#a89de0", "#f54590", "#2e67f2")
# Read the provided RDS file
dataframeID <- "RSVbac"
CFU_data <- readRDS(file.path(dataframes_folder, paste0(dataframeID, "_CFU_values.rds")))

Statistics and Plots

Analysis with a mixed-effects model with random effects for line and date and the interaction of fixed effects for species and the Time.Virus combined condition. If the model is singular, it is rerun with only date as random effect. The contrasts are calculated using emmeans with Holm adjustment for the virus comparisons for each bacteria. Predicted values are backtransformed to CFU scale for plotting. Highlighted contrasts are those with a cutoff p-value of 0.05.

# Function to analyze the data and plot stats
analysis_function <- function(data, conditions_label_col, combined_col, cutoff_pvalue, cutoff_FC) {
  
  # Mixed-effects model with random effects
  model <- lmer(log(NewCFU) ~ species * Time.Virus 
                + (1|line) + (1|line:date), 
                data = data)
  
  # Extract random effects data and convert to dataframe
  singular = isSingular(model)
  random_effects_df <- as.data.frame(VarCorr(model)) %>%
    mutate(proportion = round(100 * (vcov / sum(vcov)), 2)) %>%
    mutate(singular = singular) 
  
  # If model is singular, rerun with only date as random effect
  if (singular) {
    model <- lmer(log(NewCFU) ~ species * Time.Virus 
                  + (1|date), 
                  data = data)
  }
  
  # Run ANOVA and extract pvalues
  anova <- anova(model)
  anova_df <- as.data.frame(anova) 
  
  # Extract fixed effects coefficients and convert to dataframe
  fixed_effects_df <- as.data.frame(summary(model)$coefficients) %>%
    rownames_to_column(var = "term")
  
  # Generate contrasts for Time.Virus comparisons within each bacteria using emmeans with Holm adjustment
  emmeans_model <- emmeans(model, ~ species * Time.Virus)
  emmeans_Time.Virus <- pairs(emmeans_model, simple = "Time.Virus", adjust = "holm")
  
  # Extract virus contrasts from emmeans_model and format the dataframe
  contrasts_df <- as.data.frame(summary(emmeans_Time.Virus)) %>%
    mutate(contrast = str_replace_all(contrast, "\\s+", "")) %>%
    separate(contrast, into = c("Time.Virus1", "Time.Virus2"), sep = "-") %>%
    mutate(condition1 = paste(species, Time.Virus1, sep = "."),
           condition2 = paste(species, Time.Virus2, sep = "."),
           contrast = paste(condition1, condition2, sep = " - ")) %>%
    select(-species, Time.Virus1, Time.Virus2) %>%
    relocate(contrast)
  
  # Use the specified columns for group labels and matching
  contrasts_df$group1 <- data[[conditions_label_col]][match(contrasts_df$condition1, data[[combined_col]])]
  contrasts_df$group2 <- data[[conditions_label_col]][match(contrasts_df$condition2, data[[combined_col]])]
  
  # Adds predictions based on fixed effects, averaged over random effects. It gives a population estimate
  data <- cbind(data, predval = predict(model,re.form = NA, se.fit = TRUE))
  
  # Backtransform the predicted values and CIs
  transf_preds_df <- data %>% 
    group_by(!!sym(combined_col), Time.Virus, !!sym(conditions_label_col), species, virus) %>%
    summarise(mean.predval.fit = mean(predval.fit),
              mean.predval.se.fit = mean(predval.se.fit),
              mean.CFU = mean(NewCFU),
              .groups = 'drop') %>%
    mutate(pCFUs = exp(mean.predval.fit),
           pCFUs_min = exp(mean.predval.fit - 2*mean.predval.se.fit),
           pCFUs_max = exp(mean.predval.fit + 2*mean.predval.se.fit)) %>%
    mutate(condition = interaction(species, Time.Virus, sep = "."))
  
  # Calculate fold-change values for each contrast and highlight the most relevant is pvalue < cutoff and FC > cutoff
  contrasts_df <- contrasts_df %>%
    ungroup() %>%
    left_join(select(transf_preds_df, condition, pCFUs), by = join_by(condition1 == condition)) %>%
    left_join(select(transf_preds_df, condition, pCFUs), by = join_by(condition2 == condition), suffix = c(".1", ".2")) %>%
    mutate(FC = pCFUs.1 / pCFUs.2,
           log10FC = log10(FC),
           FC = if_else(FC < 1, 1 / FC, -FC),
           highlighted = case_when(
             p.value < cutoff_pvalue & abs(log10FC) >= cutoff_FC ~ "*",
             TRUE ~ "")) %>%
    select(contrast, condition1, condition2, p.value, pCFUs.1, pCFUs.2, FC, log10FC, highlighted, group1, group2)
  
  return(list(anova = anova,
              random_effects = random_effects_df,
              fixed_effects = fixed_effects_df, 
              contrasts = contrasts_df,
              transf_preds = transf_preds_df))
}
# Function to plot the CFUs using the statistical model
plot_function <- function(true_data, stats_data, contrasts, conditions_label_col, 
                          ymax_exp, ypval, caption_text) {
  
  # Plot
  plot <- ggplot() +
    
    geom_jitter(data = true_data, 
                aes(x = .data[[conditions_label_col]], y = NewCFU, color = species,
                    shape = line),
                width = 0.2, size = 3, alpha = 0.85, stroke = 0.75, show.legend = TRUE) +   
    
    scale_color_manual(values = Bac.colors) +
    scale_shape_manual(values = c(19,17)) +
    
    scale_y_log10(limits = c(NA, 10^ymax_exp),
                  breaks = 10^(1:ymax_exp), 
                  labels = trans_format("log10", math_format(10^.x))) +
    
    geom_point(data = stats_data, aes(x = .data[[conditions_label_col]], y = pCFUs), shape = 3, size = 3) +
    geom_errorbar(data = stats_data, width = 0.5,
                  aes(x = .data[[conditions_label_col]], y = pCFUs, ymax = pCFUs_max, ymin = pCFUs_min)) +
    geom_hline(yintercept = 7.5, linetype = "dashed", color = "black", linewidth = 0.8) +
    
    # General style
    scale_x_discrete(expand = c(0,0.5)) +
    labs(x = "",
         y = "Bacterial CFUs / organoid",
         caption = caption_text,
         color = "Bacteria", shape = "HNO Line") +  
    theme_bw() +
    theme(panel.grid = element_blank(), 
          legend.text = element_markdown(),
          text = element_text(size = 18, color = "black"),
          plot.caption = element_text(size = 13, face = "bold", hjust = -0.13, vjust = 24),
          axis.text.x = element_markdown(),
          axis.text.y = element_text(color = "black"),
          plot.margin = unit(c(0.1,0.1,-0.8,0.1), "in"))
  
  # Add significance asterisks for highlighted contrasts
  contrast_sign <- contrasts %>%
    filter(highlighted != "") %>%
    mutate(plot = "*")
  
  if (nrow(contrast_sign) > 0) {
    plot <- plot +
      stat_pvalue_manual(contrast_sign, label = "plot", y.position = ypval, 
                         step.increase = 0.04, tip.length = 0.01, bracket.shorten = 0.2, vjust = 0.6, bracket.size = 0.5)
  }
  
  return(plot)
} 

RSV Samples without Inoculum

CFU_data_RSV <- CFU_data %>%
  filter(timecol != 0) %>%
  droplevels()

# Create a combined condition variable for Time and Virus with timeinf
CFU_data_RSV <- CFU_data_RSV %>%
  mutate(Time.Virus = factor(paste(timeinf, virus, sep = "."))) 
all_stats_RSV <- analysis_function(data = CFU_data_RSV, cutoff_pvalue = 0.05, cutoff_FC = 0,
                                   conditions_label_col = "label_dpi_conditions", combined_col = "Combined_dpi")
kable(all_stats_RSV$anova)
Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
species 351.9545 175.97726 2 75 18.987097 0.0000002
Time.Virus 770.6863 385.34317 2 75 41.576669 0.0000000
species:Time.Virus 163.1083 40.77707 4 75 4.399649 0.0029996
kable(all_stats_RSV$random_effects)
grp var1 var2 vcov sdcor proportion singular
line:date (Intercept) NA 0.0000000 0.0000000 0.00 TRUE
line (Intercept) NA 0.6263916 0.7914491 6.56 TRUE
Residual NA NA 8.9264855 2.9877225 93.44 TRUE
kable(all_stats_RSV$fixed_effects)
term Estimate Std. Error df t value Pr(>|t|)
(Intercept) 16.9971239 1.014794 75 16.7493404 0.0000000
speciesHin -0.0264388 1.435135 75 -0.0184225 0.9853507
speciesDpi -2.4769512 1.435135 75 -1.7259361 0.0884767
Time.Virus4.F -1.9086463 1.368348 75 -1.3948550 0.1671770
Time.Virus4.T -9.8855892 1.435135 75 -6.8882648 0.0000000
speciesHin:Time.Virus4.F -0.6030711 1.982924 75 -0.3041322 0.7618694
speciesDpi:Time.Virus4.F -2.8007385 1.982924 75 -1.4124287 0.1619599
speciesHin:Time.Virus4.T 6.4019826 2.029587 75 3.1543274 0.0023148
speciesDpi:Time.Virus4.T 0.9290200 2.004057 75 0.4635697 0.6442986
kable(all_stats_RSV$contrasts %>% select(-group1, -group2, -condition1, -condition2))
contrast p.value pCFUs.1 pCFUs.2 FC log10FC highlighted
Spn.0.F - Spn.4.F 0.1685166 24085581.06 3571433.4166 -6.743953 0.8289146
Spn.0.F - Spn.4.T 0.0000000 24085581.06 1226.0277 -19645.218360 4.2932569 *
Spn.4.F - Spn.4.T 0.0000004 3571433.42 1226.0277 -2913.012112 3.4643423 *
Hin.0.F - Hin.4.F 0.1694047 23457130.59 1903048.5793 -12.326081 1.0908250
Hin.0.F - Hin.4.T 0.0537703 23457130.59 720051.8234 -32.577003 1.5129111
Hin.4.F - Hin.4.T 0.5006209 1903048.58 720051.8234 -2.642933 0.4220861
Dpi.0.F - Dpi.4.F 0.0033576 2023163.16 18229.3441 -110.983870 2.0452599 *
Dpi.0.F - Dpi.4.T 0.0000001 2023163.16 260.7608 -7758.693279 3.8897886 *
Dpi.4.F - Dpi.4.T 0.0034525 18229.34 260.7608 -69.908297 1.8445287 *
saveRDS(all_stats_RSV, file = file.path(outputs_folder,  paste0(dataframeID, "_CFU_stats.rds")))
CFU_plot <- plot_function(true_data = CFU_data_RSV, stats_data = all_stats_RSV$transf_preds, 
                          contrasts = all_stats_RSV$contrasts, ymax_exp = 9, ypval = 7.75, 
                          conditions_label_col = "label_dpi_conditions", 
                          caption_text = paste("RSV", "Bact", "DPVI", sep = "\n"))
CFU_plot

ggsave(CFU_plot, filename = file.path(outputs_folder,  paste0(dataframeID, "_CFU.png")), width = 7, height = 6)
saveRDS(CFU_plot, file = file.path(outputs_folder,  paste0(dataframeID, "_CFU.rds")))

Non-RSV Samples with Inoculum

Inoculum CFU values at time 0h for each species and line:

CFU_data %>%
  filter(timecol == 0) %>%
  group_by(species, line) %>%
  skim(NewCFU) %>%
  yank("numeric")

Variable type: numeric

skim_variable species line n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
NewCFU Spn HNO9007 0 1 20906250 11218184 13875000 14156250 16125000 22875000 37500000 ▇▁▁▁▂
NewCFU Spn HNO9009 0 1 38100000 43938736 13500000 17250000 17250000 26250000 116250000 ▇▁▁▁▂
NewCFU Hin HNO9007 0 1 61500000 19731795 44625000 51937500 55687500 65250000 90000000 ▃▇▁▁▃
NewCFU Hin HNO9009 0 1 53475000 13721106 33750000 46125000 56250000 63750000 67500000 ▃▃▁▃▇
NewCFU Dpi HNO9007 0 1 35343750 6717805 25875000 33187500 37125000 39281250 41250000 ▃▁▁▃▇
NewCFU Dpi HNO9009 0 1 36937500 8374347 27375000 30000000 37312500 42093750 48375000 ▇▁▇▃▃
CFU_data_nonRSV <- CFU_data %>%
  filter(virus == "F") %>%
  droplevels()

# Create a combined condition variable for Time and Virus with timecol
CFU_data_nonRSV <- CFU_data_nonRSV %>%
  mutate(Time.Virus = factor(paste(timecol, virus, sep = ".")))
all_stats_nonRSV <- analysis_function(CFU_data_nonRSV, cutoff_pvalue = 0.05, cutoff_FC = 0,
                                      conditions_label_col = "label_dpc_conditions", combined_col = "Combined_dpc")
kable(all_stats_nonRSV$anova)
Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
species 113.89477 56.94738 2 68.61274 20.887315 1.00e-07
Time.Virus 273.47445 136.73722 2 67.05532 50.152850 0.00e+00
species:Time.Virus 84.42156 21.10539 4 67.00856 7.741092 3.44e-05
kable(all_stats_nonRSV$random_effects)
grp var1 var2 vcov sdcor proportion singular
line:date (Intercept) NA 0.0000000 0.000000 0.00 TRUE
line (Intercept) NA 0.1096743 0.331171 3.92 TRUE
Residual NA NA 2.6899929 1.640120 96.08 TRUE
kable(all_stats_nonRSV$fixed_effects)
term Estimate Std. Error df t value Pr(>|t|)
(Intercept) 16.9431044 0.5527593 74.95795 30.6518686 0.0000000
speciesHin 0.8845574 0.7783758 66.49719 1.1364144 0.2598621
speciesDpi 0.4446977 0.7588349 67.29962 0.5860269 0.5598177
Time.Virus1.F 0.0568043 0.7783758 66.49719 0.0729780 0.9420427
Time.Virus5.F -1.8482291 0.7423405 67.36686 -2.4897324 0.0152581
speciesHin:Time.Virus1.F -0.9109963 1.1007896 66.49719 -0.8275844 0.4108661
speciesDpi:Time.Virus1.F -2.9262625 1.0870552 66.86065 -2.6919171 0.0089681
speciesHin:Time.Virus5.F -1.5176803 1.0756106 66.91516 -1.4109941 0.1628788
speciesDpi:Time.Virus5.F -5.7270263 1.0615042 67.15222 -5.3951987 0.0000010
kable(all_stats_nonRSV$contrasts %>% select(-group1, -group2, -condition1, -condition2))
contrast p.value pCFUs.1 pCFUs.2 FC log10FC highlighted
Spn.0.F - Spn.1.F 0.9420428 22819008 24152748.95 1.058449 -0.0246698
Spn.0.F - Spn.5.F 0.0380182 22819008 3594355.92 -6.348567 0.8026757 *
Spn.1.F - Spn.5.F 0.0380182 24152749 3594355.92 -6.719632 0.8273455 *
Hin.0.F - Hin.1.F 0.2764258 55265635 23522545.92 -2.349475 0.3709708
Hin.0.F - Hin.5.F 0.0001575 55265635 1908355.65 -28.959819 1.4617958 *
Hin.1.F - Hin.5.F 0.0038881 23522546 1908355.65 -12.326081 1.0908250 *
Dpi.0.F - Dpi.1.F 0.0003397 35598074 2019466.52 -17.627464 1.2461898 *
Dpi.0.F - Dpi.5.F 0.0000000 35598074 18261.43 -1949.357987 3.2898916 *
Dpi.1.F - Dpi.5.F 0.0000002 2019467 18261.43 -110.586410 2.0437018 *
saveRDS(all_stats_nonRSV, file = file.path(outputs_folder,  paste0(dataframeID, "_CFU_nonRSV_stats.rds")))
CFU_plot_nonRSV <- plot_function(true_data = CFU_data_nonRSV, stats_data = all_stats_nonRSV$transf_preds, 
                                 contrasts = all_stats_nonRSV$contrasts, ymax_exp = 10, ypval = 8,
                                 conditions_label_col = "label_dpc_conditions",
                                 caption_text = paste("RSV", "Bact", "DPC", sep = "\n"))
CFU_plot_nonRSV

ggsave(CFU_plot_nonRSV, filename = file.path(outputs_folder,  paste0(dataframeID, "_CFU_nonRSV.png")), width = 7, height = 6)
saveRDS(CFU_plot_nonRSV, file = file.path(outputs_folder,  paste0(dataframeID, "_CFU_nonRSV.rds")))
Overview
RSV PFUs
Source Code
---
format:
  html:
    suppress-bibliography: true
---

# Bacterial CFUs {.unnumbered}

```{r}
# loading packages
library(tidyverse)
library(skimr)
library(ggtext)
library(scales)
library(lme4)
library(lmerTest) 
library(emmeans)
library(knitr)
library(ggpubr)
```

## Data Input 

All data inputs and outputs in this repo are relative to **FOLDER_PATH**. Please, save the provided data frames in RDS format in the `dataframes` folder. 
```{r}
# Define directory paths based on R.environ
FOLDER_PATH <- Sys.getenv("BOX_PATH_RSVBacPublication") 
dataframes_folder <- file.path(FOLDER_PATH, "dataframes")

# Create subfolder for output files
outputs_folder <- file.path(FOLDER_PATH, "outputs/CFUs")
if (!file.exists(outputs_folder)) {
  dir.create(outputs_folder, recursive = TRUE)
}

# Colors
Bac.colors <- c("#a89de0", "#f54590", "#2e67f2")
```

```{r}
# Read the provided RDS file
dataframeID <- "RSVbac"
CFU_data <- readRDS(file.path(dataframes_folder, paste0(dataframeID, "_CFU_values.rds")))
```

## Statistics and Plots

Analysis with a mixed-effects model with random effects for `line` and `date` and the interaction of fixed effects for `species` and the `Time.Virus` combined condition. If the model is singular, it is rerun with only `date` as random effect. The contrasts are calculated using emmeans with Holm adjustment for the virus comparisons for each bacteria. Predicted values are backtransformed to CFU scale for plotting. Highlighted contrasts are those with a cutoff p-value of 0.05.

```{r}
# Function to analyze the data and plot stats
analysis_function <- function(data, conditions_label_col, combined_col, cutoff_pvalue, cutoff_FC) {
  
  # Mixed-effects model with random effects
  model <- lmer(log(NewCFU) ~ species * Time.Virus 
                + (1|line) + (1|line:date), 
                data = data)
  
  # Extract random effects data and convert to dataframe
  singular = isSingular(model)
  random_effects_df <- as.data.frame(VarCorr(model)) %>%
    mutate(proportion = round(100 * (vcov / sum(vcov)), 2)) %>%
    mutate(singular = singular) 
  
  # If model is singular, rerun with only date as random effect
  if (singular) {
    model <- lmer(log(NewCFU) ~ species * Time.Virus 
                  + (1|date), 
                  data = data)
  }
  
  # Run ANOVA and extract pvalues
  anova <- anova(model)
  anova_df <- as.data.frame(anova) 
  
  # Extract fixed effects coefficients and convert to dataframe
  fixed_effects_df <- as.data.frame(summary(model)$coefficients) %>%
    rownames_to_column(var = "term")
  
  # Generate contrasts for Time.Virus comparisons within each bacteria using emmeans with Holm adjustment
  emmeans_model <- emmeans(model, ~ species * Time.Virus)
  emmeans_Time.Virus <- pairs(emmeans_model, simple = "Time.Virus", adjust = "holm")
  
  # Extract virus contrasts from emmeans_model and format the dataframe
  contrasts_df <- as.data.frame(summary(emmeans_Time.Virus)) %>%
    mutate(contrast = str_replace_all(contrast, "\\s+", "")) %>%
    separate(contrast, into = c("Time.Virus1", "Time.Virus2"), sep = "-") %>%
    mutate(condition1 = paste(species, Time.Virus1, sep = "."),
           condition2 = paste(species, Time.Virus2, sep = "."),
           contrast = paste(condition1, condition2, sep = " - ")) %>%
    select(-species, Time.Virus1, Time.Virus2) %>%
    relocate(contrast)
  
  # Use the specified columns for group labels and matching
  contrasts_df$group1 <- data[[conditions_label_col]][match(contrasts_df$condition1, data[[combined_col]])]
  contrasts_df$group2 <- data[[conditions_label_col]][match(contrasts_df$condition2, data[[combined_col]])]
  
  # Adds predictions based on fixed effects, averaged over random effects. It gives a population estimate
  data <- cbind(data, predval = predict(model,re.form = NA, se.fit = TRUE))
  
  # Backtransform the predicted values and CIs
  transf_preds_df <- data %>% 
    group_by(!!sym(combined_col), Time.Virus, !!sym(conditions_label_col), species, virus) %>%
    summarise(mean.predval.fit = mean(predval.fit),
              mean.predval.se.fit = mean(predval.se.fit),
              mean.CFU = mean(NewCFU),
              .groups = 'drop') %>%
    mutate(pCFUs = exp(mean.predval.fit),
           pCFUs_min = exp(mean.predval.fit - 2*mean.predval.se.fit),
           pCFUs_max = exp(mean.predval.fit + 2*mean.predval.se.fit)) %>%
    mutate(condition = interaction(species, Time.Virus, sep = "."))
  
  # Calculate fold-change values for each contrast and highlight the most relevant is pvalue < cutoff and FC > cutoff
  contrasts_df <- contrasts_df %>%
    ungroup() %>%
    left_join(select(transf_preds_df, condition, pCFUs), by = join_by(condition1 == condition)) %>%
    left_join(select(transf_preds_df, condition, pCFUs), by = join_by(condition2 == condition), suffix = c(".1", ".2")) %>%
    mutate(FC = pCFUs.1 / pCFUs.2,
           log10FC = log10(FC),
           FC = if_else(FC < 1, 1 / FC, -FC),
           highlighted = case_when(
             p.value < cutoff_pvalue & abs(log10FC) >= cutoff_FC ~ "*",
             TRUE ~ "")) %>%
    select(contrast, condition1, condition2, p.value, pCFUs.1, pCFUs.2, FC, log10FC, highlighted, group1, group2)
  
  return(list(anova = anova,
              random_effects = random_effects_df,
              fixed_effects = fixed_effects_df, 
              contrasts = contrasts_df,
              transf_preds = transf_preds_df))
}
```

```{r}
# Function to plot the CFUs using the statistical model
plot_function <- function(true_data, stats_data, contrasts, conditions_label_col, 
                          ymax_exp, ypval, caption_text) {
  
  # Plot
  plot <- ggplot() +
    
    geom_jitter(data = true_data, 
                aes(x = .data[[conditions_label_col]], y = NewCFU, color = species,
                    shape = line),
                width = 0.2, size = 3, alpha = 0.85, stroke = 0.75, show.legend = TRUE) +   
    
    scale_color_manual(values = Bac.colors) +
    scale_shape_manual(values = c(19,17)) +
    
    scale_y_log10(limits = c(NA, 10^ymax_exp),
                  breaks = 10^(1:ymax_exp), 
                  labels = trans_format("log10", math_format(10^.x))) +
    
    geom_point(data = stats_data, aes(x = .data[[conditions_label_col]], y = pCFUs), shape = 3, size = 3) +
    geom_errorbar(data = stats_data, width = 0.5,
                  aes(x = .data[[conditions_label_col]], y = pCFUs, ymax = pCFUs_max, ymin = pCFUs_min)) +
    geom_hline(yintercept = 7.5, linetype = "dashed", color = "black", linewidth = 0.8) +
    
    # General style
    scale_x_discrete(expand = c(0,0.5)) +
    labs(x = "",
         y = "Bacterial CFUs / organoid",
         caption = caption_text,
         color = "Bacteria", shape = "HNO Line") +  
    theme_bw() +
    theme(panel.grid = element_blank(), 
          legend.text = element_markdown(),
          text = element_text(size = 18, color = "black"),
          plot.caption = element_text(size = 13, face = "bold", hjust = -0.13, vjust = 24),
          axis.text.x = element_markdown(),
          axis.text.y = element_text(color = "black"),
          plot.margin = unit(c(0.1,0.1,-0.8,0.1), "in"))
  
  # Add significance asterisks for highlighted contrasts
  contrast_sign <- contrasts %>%
    filter(highlighted != "") %>%
    mutate(plot = "*")
  
  if (nrow(contrast_sign) > 0) {
    plot <- plot +
      stat_pvalue_manual(contrast_sign, label = "plot", y.position = ypval, 
                         step.increase = 0.04, tip.length = 0.01, bracket.shorten = 0.2, vjust = 0.6, bracket.size = 0.5)
  }
  
  return(plot)
} 
```

### RSV Samples without Inoculum

```{r}
CFU_data_RSV <- CFU_data %>%
  filter(timecol != 0) %>%
  droplevels()

# Create a combined condition variable for Time and Virus with timeinf
CFU_data_RSV <- CFU_data_RSV %>%
  mutate(Time.Virus = factor(paste(timeinf, virus, sep = "."))) 
```

```{r}
all_stats_RSV <- analysis_function(data = CFU_data_RSV, cutoff_pvalue = 0.05, cutoff_FC = 0,
                                   conditions_label_col = "label_dpi_conditions", combined_col = "Combined_dpi")
kable(all_stats_RSV$anova)
kable(all_stats_RSV$random_effects)
kable(all_stats_RSV$fixed_effects)
kable(all_stats_RSV$contrasts %>% select(-group1, -group2, -condition1, -condition2))
saveRDS(all_stats_RSV, file = file.path(outputs_folder,  paste0(dataframeID, "_CFU_stats.rds")))
```

```{r}
#| fig-height: 6
#| fig-width: 7
CFU_plot <- plot_function(true_data = CFU_data_RSV, stats_data = all_stats_RSV$transf_preds, 
                          contrasts = all_stats_RSV$contrasts, ymax_exp = 9, ypval = 7.75, 
                          conditions_label_col = "label_dpi_conditions", 
                          caption_text = paste("RSV", "Bact", "DPVI", sep = "\n"))
CFU_plot

ggsave(CFU_plot, filename = file.path(outputs_folder,  paste0(dataframeID, "_CFU.png")), width = 7, height = 6)
saveRDS(CFU_plot, file = file.path(outputs_folder,  paste0(dataframeID, "_CFU.rds")))
```

### Non-RSV Samples with Inoculum

Inoculum CFU values at time 0h for each species and line:

```{r}
CFU_data %>%
  filter(timecol == 0) %>%
  group_by(species, line) %>%
  skim(NewCFU) %>%
  yank("numeric")
```

```{r}
CFU_data_nonRSV <- CFU_data %>%
  filter(virus == "F") %>%
  droplevels()

# Create a combined condition variable for Time and Virus with timecol
CFU_data_nonRSV <- CFU_data_nonRSV %>%
  mutate(Time.Virus = factor(paste(timecol, virus, sep = ".")))
```

```{r}
all_stats_nonRSV <- analysis_function(CFU_data_nonRSV, cutoff_pvalue = 0.05, cutoff_FC = 0,
                                      conditions_label_col = "label_dpc_conditions", combined_col = "Combined_dpc")
kable(all_stats_nonRSV$anova)
kable(all_stats_nonRSV$random_effects)
kable(all_stats_nonRSV$fixed_effects)
kable(all_stats_nonRSV$contrasts %>% select(-group1, -group2, -condition1, -condition2))
saveRDS(all_stats_nonRSV, file = file.path(outputs_folder,  paste0(dataframeID, "_CFU_nonRSV_stats.rds")))
```

```{r}
#| fig-height: 6
#| fig-width: 7

CFU_plot_nonRSV <- plot_function(true_data = CFU_data_nonRSV, stats_data = all_stats_nonRSV$transf_preds, 
                                 contrasts = all_stats_nonRSV$contrasts, ymax_exp = 10, ypval = 8,
                                 conditions_label_col = "label_dpc_conditions",
                                 caption_text = paste("RSV", "Bact", "DPC", sep = "\n"))
CFU_plot_nonRSV
ggsave(CFU_plot_nonRSV, filename = file.path(outputs_folder,  paste0(dataframeID, "_CFU_nonRSV.png")), width = 7, height = 6)
saveRDS(CFU_plot_nonRSV, file = file.path(outputs_folder,  paste0(dataframeID, "_CFU_nonRSV.rds")))
```