RSVBac
  1. RSV PFUs
  • Overview
  • Bacterial CFUs
  • RSV PFUs
  • RSV RT-qPCR
  • Caspase
  • RNASeq
  • Cytokines
  • R Session Info
  • References

Table of contents

  • Data Input
  • Time Course
    • Inoculum Summary
    • Plot
  • Virus + Bacteria at 4DPVI
    • Inoculum Summary
    • Stats and Plots
  • Spn Kinetics
    • Stats and Plots

RSV PFUs

# loading packages
library(tidyverse)
library(skimr)
library(ggtext)
library(scales)
library(lme4)
library(lmerTest)  
library(emmeans)
library(knitr)
library(ggpubr)

Data Input

All data inputs and outputs in this repo are relative to FOLDER_PATH. Please, save the provided data frames in RDS format in the dataframes folder.

# Define directory paths based on R.environ
FOLDER_PATH <- Sys.getenv("BOX_PATH_RSVBacPublication") 
dataframes_folder <- file.path(FOLDER_PATH, "dataframes")

# Create subfolder for output files
outputs_folder <- file.path(FOLDER_PATH, "outputs/PFUs")
if (!file.exists(outputs_folder)) {
  dir.create(outputs_folder, recursive = TRUE)
}

# Colors
Bac.colors <- c("#CC9564", "#a89de0", "#f54590", "#2e67f2")
Bac.colors.K <- c("#CC9564", "#a89de0")
Spn.colors.K <- c("#a89de0")
NB.colors.K <- c("#CC9564")
# Read the provided RDS files
dataframeID <- "RSVbac"
TimePFU_data <- readRDS(file.path(dataframes_folder, paste0(dataframeID, "_PFU_TimeCourse_values.rds")))
PFU_data <- readRDS(file.path(dataframes_folder, paste0(dataframeID, "_PFU_values.rds")))
PFU_SpnKinetics <- readRDS(file.path(dataframes_folder, paste0(dataframeID, "_PFU_SpnKinetics_values.rds")))

Time Course

Inoculum Summary

Summary stats for the inoculum samples (species = “In”).

TimePFU_data %>%
  filter(species == "In") %>%
  skim(NewPFU) %>%
  yank("numeric")

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
NewPFU 0 1 6655.4 1829.44 4639.22 5530.43 6557.87 7682.84 8866.67 ▇▇▁▇▇

The inoculum samples, as well as the samples without virus (virus = “F”), were removed from the plot

TimePFU_data_plots <- TimePFU_data %>%
  filter(species != "In") %>%
  filter(virus != "F")
# Summarize data to plot median lines
summary_data <- TimePFU_data_plots %>%
  group_by(timeinf, location) %>%
  summarise(medianPFU = median(NewPFU, na.rm = TRUE), .groups = "drop")
kable(summary_data)
timeinf location medianPFU
2 Ap 163500
2 Baso 15
4 Ap 461850
4 Baso 15
6 Ap 247500
6 Baso 15
8 Ap 76500
8 Baso 15
10 Ap 51000
10 Baso 15

Plot

plot <- ggplot() +
  geom_jitter(data = TimePFU_data_plots, 
              aes(x = timeinf, y = NewPFU, color = location, shape = line), 
              width = 0.15, height = 0, size = 3, alpha = 0.85) +
  geom_line(data = summary_data, 
            aes(x = timeinf, y = medianPFU, color = location, group = location), 
            linewidth = 1) +
  scale_shape_manual(values = c(19, 17)) +
  scale_color_manual(values = c("#1D793E", "#FF8200")) +
  scale_x_continuous(breaks = seq(2, 10, by = 2)) +
  scale_y_log10(breaks = 10^(1:6), 
                labels = trans_format("log10", math_format(10^.x))) +
  geom_hline(yintercept = 15, linetype = "dashed", color = "black", linewidth = 0.8) +
  labs(x = "Days post viral infection (DPVI)",
       y = "PFUs / organoid",
       color = "Location", shape = "HNO Line") +
  theme_bw() +
  theme(panel.grid = element_blank(), 
        text = element_text(size = 18, color = "black"),
        axis.text.y = element_text(color = "black"),
        axis.text.x = element_text(color = "black"),
        plot.margin = unit(c(0.1,0.1,0.1,0.1), "in"))

ggsave(plot, filename = file.path(outputs_folder,  paste0(dataframeID, "_PFU_TimeCourse.png")), width = 7, height = 5)
saveRDS(plot, file = file.path(outputs_folder,  paste0(dataframeID, "_PFU_TimeCourse.rds")))
plot

Virus + Bacteria at 4DPVI

Inoculum Summary

PFU_data %>%
  filter(species == "In") %>%
  group_by(line) %>%
  skim(NewPFU) %>%
  yank("numeric")

Variable type: numeric

skim_variable line n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
NewPFU HNO9007 0 1 5294.14 2762.59 1578.43 4532.84 5672.57 6433.88 8253.00 ▇▁▇▇▇
NewPFU HNO9009 0 1 7189.48 1106.31 6037.50 6352.50 7288.24 7402.50 8866.67 ▇▁▇▁▃

Inoculum (species = “In”) and no-bacteria/no-virus controls (Combined = “NB.F”) samples were removed from downstream plots and stats.

PFU_data_plots <- PFU_data %>%
  filter(species != "In") %>%
  filter(Combined != "NB.F") %>%
  droplevels()

Stats and Plots

Analysis with mixed-effects models, using line and date as random effects and the Combined condition (i.e., bacteria and RSV presence) as fixed effect. NB.F condition was removed from the stats. If the model is singular, it is rerun with only date as random effect. Contrasts are calculated using emmeans with Holm adjustment for multiple comparisons across all conditions. Predicted values and confidence intervals are backtransformed to PFU scale for plotting.

Highlighted contrasts are those with a cutoff p-value of 0.05.

analysis_function <- function(raw_data, loc, cutoff_pvalue, cutoff_FC) {
  
  # Filter data by location
  data_stats <- raw_data %>%
    filter(location == loc) %>%
    droplevels()
  
  # Mixed-effects model with random effects
  model <- lmer(log(NewPFU) ~ Combined
                + (1|line) + (1|line:date), 
                data = data_stats)
  
  # Extract random effects data and convert to dataframe
  singular <- isSingular(model)
  random_effects_df <- as.data.frame(VarCorr(model)) %>%
    mutate(proportion = round(100 * (vcov / sum(vcov)), 2)) %>%
    mutate(singular = singular) 
  
  # If model is singular, rerun with only date as random effect
  if (singular) {
    model <- lmer(log(NewPFU) ~ Combined + (1|date), data = data_stats)
  }
  
  # Run ANOVA and extract pvalues
  anova <- anova(model)
  anova_pvalue <- anova$`Pr(>F)`[1]
  
  # If ANOVA is not significant, stop here. Otherwise continue with post-hoc analysis
  if (is.na(anova_pvalue) || anova_pvalue >= cutoff_pvalue) {
    return(list(
      anova = anova,
      random_effects = random_effects_df,
      note = "ANOVA not significant; post-hoc tests skipped"
    ))
  }
  
  # Extract fixed effects coefficients and convert to dataframe
  fixed_effects_df <- as.data.frame(summary(model)$coefficients) %>%
    rownames_to_column(var = "term")
  
  # Generate contrasts for Combined condition using emmeans with Holm adjustment
  emmeans_model <- emmeans(model, ~ Combined)
  emmeans_contrasts <- pairs(emmeans_model, adjust = "holm")
  
  contrasts_df <- as.data.frame(summary(emmeans_contrasts)) %>%
    separate(contrast, into = c("condition1", "condition2"), sep = " - ", remove = FALSE)
  
  contrasts_df$group1 <- data_stats$bacteria_label[match(contrasts_df$condition1, data_stats$Combined)]
  contrasts_df$group2 <- data_stats$bacteria_label[match(contrasts_df$condition2, data_stats$Combined)]
  
  # Adds predictions based on fixed effects, averaged over random effects. It gives a population estimate
  data_stats <- cbind(data_stats, predval = predict(model, re.form = NA, se.fit = TRUE))
  
  # Backtransform the predicted values and CIs
  transf_preds_df <- data_stats %>%
    group_by(Combined, bacteria_label) %>%
    summarise(mean.predval.fit = mean(predval.fit),
              mean.predval.se.fit = mean(predval.se.fit),
              mean.PFU = mean(NewPFU),
              .groups = 'drop') %>%
    mutate(pPFUs = exp(mean.predval.fit),
           pPFUs_min = exp(mean.predval.fit - 2*mean.predval.se.fit),
           pPFUs_max = exp(mean.predval.fit + 2*mean.predval.se.fit)) 
  
  # Calculate fold-change values for each contrast and highlight the most relevant
  contrasts_df <- contrasts_df %>%
    ungroup() %>%
    left_join(select(transf_preds_df, Combined, pPFUs), by = join_by(condition1 == Combined)) %>%
    left_join(select(transf_preds_df, Combined, pPFUs), by = join_by(condition2 == Combined), suffix = c(".1", ".2")) %>%
    mutate(FC = pPFUs.1 / pPFUs.2,
           log10FC = log10(FC),
           FC = if_else(FC < 1, 1 / FC, -FC),
           highlighted = case_when(
             p.value < cutoff_pvalue & abs(log10FC) >= cutoff_FC ~ "*",
             TRUE ~ "")) %>%
    select(contrast, condition1, condition2, p.value, pPFUs.1, pPFUs.2, FC, log10FC, highlighted, group1, group2)
  
  return(list(
    anova = anova,
    random_effects = random_effects_df,
    fixed_effects = fixed_effects_df,
    contrasts = contrasts_df,
    transf_preds = transf_preds_df
  ))
}
plot_function <- function(raw_data, loc, stats_data, contrasts, ypval) {
  
  # Filter data by location
  raw_data <- raw_data %>%
    filter(location == loc) %>%
    droplevels()
  
  # Base plot
  plot <- ggplot() +
    geom_jitter(data = raw_data, 
                aes(x = bacteria_label, y = NewPFU, color = species, shape = line),
                width = 0.15, height = 0, size = 3, alpha = 0.85, stroke = 0.75, show.legend = TRUE) +   
    
    scale_color_manual(values = Bac.colors) +
    scale_shape_manual(values = c(19, 17)) +
    
    scale_x_discrete(expand = c(0, 0.5)) +
    
    scale_y_log10(limits = c(NA, 10^6.5),
                  breaks = 10^(1:7), 
                  labels = trans_format("log10", math_format(10^.x))) +
    
    geom_hline(yintercept = 15, linetype = "dashed", color = "black", linewidth = 0.8) +
    
    labs(x = "",
         y = "RSV PFUs / organoid",
         caption = paste("RSV", "Bact", "DPVI", sep = "\n"),
         color = "Bacteria", shape = "HNO Line") +  
    theme_bw() +
    theme(panel.grid = element_blank(), 
          text = element_text(size = 18, color = "black"),
          plot.caption = element_text(size = 13, face = "bold", hjust = -0.13, vjust = 24),
          axis.text.x = element_markdown(),
          axis.text.y = element_text(color = "black"),
          plot.margin = unit(c(0.1,0.1,-0.8,0.1), "in"))
  
  # Add stats layers only if stats_data is not NULL and has rows
  if (!is.null(stats_data) && nrow(stats_data) > 0) {
    plot <- plot +
      geom_point(data = stats_data, aes(x = bacteria_label, y = pPFUs), shape = 3, size = 3) +
      geom_errorbar(data = stats_data, width = 0.5,
                    aes(x = bacteria_label, y = pPFUs, ymax = pPFUs_max, ymin = pPFUs_min))
    
    # Add significance asterisks for highlighted contrasts
    contrast_sign <- contrasts %>%
      filter(highlighted != "") %>%
      mutate(plot = "*")
    
    if (nrow(contrast_sign) > 0) {
      plot <- plot +
        stat_pvalue_manual(contrast_sign, label = "plot", y.position = ypval, 
                           step.increase = 0.03, tip.length = 0.01, bracket.shorten = 0.2, vjust = 0.6, bracket.size = 0.5)
    }
  }
  
  return(plot)
}

Apical

stats_Ap <- analysis_function(PFU_data_plots, loc = "Ap", cutoff_pvalue = 0.05, cutoff_FC = 0)
kable(stats_Ap$anova)
Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
Combined 116.2505 38.75017 3 24 19.93204 1.1e-06
kable(stats_Ap$random_effects)
grp var1 var2 vcov sdcor proportion singular
line:date (Intercept) NA 0.6609486 0.8129875 25.37 TRUE
line (Intercept) NA 0.0000000 0.0000000 0.00 TRUE
Residual NA NA 1.9441111 1.3943139 74.63 TRUE
kable(stats_Ap$fixed_effects)
term Estimate Std. Error df t value Pr(>|t|)
(Intercept) 12.2340757 0.5380062 26.82059 22.7396566 0.0000000
CombinedSpn.T -4.3467812 0.6572865 24.00000 -6.6132214 0.0000008
CombinedHin.T -0.3156792 0.6572865 24.00000 -0.4802763 0.6353796
CombinedDpi.T -0.3058250 0.6572865 24.00000 -0.4652842 0.6459223
kable(stats_Ap$contrasts %>% select(-group1, -group2, -condition1, -condition2))
contrast p.value pPFUs.1 pPFUs.2 FC log10FC highlighted
NB.T - Spn.T 4.60e-06 205679.771 2663.229 -77.229475 1.8877831 *
NB.T - Hin.T 1.00e+00 205679.771 150000.899 -1.371190 0.1370977
NB.T - Dpi.T 1.00e+00 205679.771 151486.339 -1.357745 0.1328181
Spn.T - Hin.T 1.19e-05 2663.229 150000.899 56.322947 -1.7506854 *
Spn.T - Dpi.T 1.19e-05 2663.229 151486.339 56.880705 -1.7549650 *
Hin.T - Dpi.T 1.00e+00 150000.899 151486.339 1.009903 -0.0042796
saveRDS(stats_Ap, file = file.path(outputs_folder, paste0(dataframeID, "_PFU_stats_A.rds")))
plot_Ap <- plot_function(PFU_data_plots, loc = "Ap", stats_data = stats_Ap$transf_preds, contrasts = stats_Ap$contrasts, ypval = 6.1)
plot_Ap

ggsave(plot_Ap, filename = file.path(outputs_folder, paste0(dataframeID, "_PFU_A.png")), width = 7, height = 5)
saveRDS(plot_Ap, file = file.path(outputs_folder, paste0(dataframeID, "_PFU_A.rds")))

Basal

stats_Bas <- analysis_function(PFU_data_plots, loc = "Baso", cutoff_pvalue = 0.05, cutoff_FC = 0)
kable(stats_Bas$anova)
Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
Combined 0 0 3 0 0.0690104 1
saveRDS(stats_Bas, file = file.path(outputs_folder, paste0(dataframeID, "_PFU_stats_B.rds")))
plot_Baso <- plot_function(PFU_data_plots, loc = "Baso", stats_data = stats_Bas$transf_preds, contrasts = stats_Bas$contrasts, ypval = 6.1)
plot_Baso

ggsave(plot_Baso, filename = file.path(outputs_folder, paste0(dataframeID, "_PFU_B.png")), width = 7, height = 5)
saveRDS(plot_Baso, file = file.path(outputs_folder, paste0(dataframeID, "_PFU_B.rds")))

Spn Kinetics

# Filter only virus T
PFU_SpnKinetics <- PFU_SpnKinetics %>%
  filter(virus == "T") 

Stats and Plots

Analysis with mixed-effects models, using line and date as random effects and the timeinf condition as fixed effect. Ratios are calculated relative to the corresponding NB value for each line, date, location, and timeinf. Contrasts are calculated using emmeans with Holm adjustment for multiple comparisons against zero (this is the log2 ratio to NB).

# Calculate ratio relative to NB 
log2FC_PFU_SpnKinetics <- PFU_SpnKinetics %>%
  group_by(date, line, location, timeinf) %>%
  reframe(date = date,
          location = location,
          line = line,
          timeinf = timeinf,
          species = species,
          FC_PFU = NewPFU / NewPFU[species == "NB"],
          log2FC_PFU = log2(FC_PFU)) %>%
  filter(species != "NB")

# Average log2FC values for technical replicates
log2FC_PFU_SpnKinetics <- log2FC_PFU_SpnKinetics %>%
  group_by(date, line, location, timeinf, species) %>%
  summarise(FC_PFU = mean(FC_PFU, na.rm = TRUE),
            log2FC_PFU = mean(log2FC_PFU, na.rm = TRUE),
            .groups = "drop")
# Mixed-effects model analysis function for kinetics data
analysis_function_kinetics <- function(raw_data, loc, cutoff_pvalue) {
  
  # Filter data by location and convert timeinf to factor
  data_stats <- raw_data %>%
    filter(location == loc) %>%
    mutate(timeinf = as.factor(timeinf)) 
  
  summary <- data_stats %>%
    group_by(timeinf) %>%
    summarise(
      mean_FC = mean(FC_PFU, na.rm = TRUE),
      mean_log2FC = mean(log2FC_PFU, na.rm = TRUE)
    )
  
  # Mixed-effects model with random effects
  model <- lmer(log2FC_PFU ~ timeinf  
                + (1|line) + (1|line:date), 
                data = data_stats)
  
  # Extract random effects data and convert to dataframe
  singular <- isSingular(model)
  random_effects_df <- as.data.frame(VarCorr(model)) %>%
    mutate(proportion = round(100 * (vcov / sum(vcov)), 2)) %>%
    mutate(singular = singular) 
  
  # If model is singular, rerun without random effects
  if (singular) {
    model <- lm(log2FC_PFU ~ timeinf, data = data_stats)
  }
  
  # Run ANOVA 
  anova_result <- anova(model)
  anova_pvalue <- anova_result$`Pr(>F)`[1]
  
  # If ANOVA is not significant, stop here. Otherwise continue with post-hoc analysis
  if (is.na(anova_pvalue) || anova_pvalue >= cutoff_pvalue) {
    return(list(
      anova = anova_result,
      random_effects = random_effects_df,
      note = "ANOVA not significant; post-hoc tests skipped"
    ))
  }
  
  # Extract fixed effects coefficients and convert to dataframe
  fixed_effects_df <- as.data.frame(summary(model)$coefficients) %>%
    rownames_to_column(var = "term")
  
  # Generate contrasts for timeinf condition using emmeans with Holm adjustment vs zero
  emmeans_model <- emmeans(model, ~ timeinf)
  emmeans_test_vs_zero <- test(emmeans_model, null = 0, adjust = "holm")
  
  emmeans_test_vs_zero_df <- as.data.frame(summary(emmeans_test_vs_zero)) 
  emmeans_test_vs_zero_df <- merge(emmeans_test_vs_zero_df, summary)
  
  return(list(
    anova = anova_result,
    random_effects = random_effects_df,
    fixed_effects = fixed_effects_df,
    emmeans_test_vs_zero = emmeans_test_vs_zero_df
  ))
}
plotK_function_log2FC <- function(raw_data, stats_results = NULL, loc) {
  
  # Filter raw data
  plot_data <- raw_data %>%
    filter(location == loc) %>%
    mutate(timeinf = as.factor(timeinf)) 
  
  # Create base plot
  plot <- ggplot() +
    # Reference line at 0
    geom_hline(yintercept = 0, linetype = "dashed", 
               color = NB.colors.K, linewidth = 0.8) +
    
    # Add raw data points
    geom_jitter(data = plot_data, 
                aes(x = timeinf, y = log2FC_PFU, 
                    color = species, shape = line), 
                width = 0.15, height = 0, size = 3, 
                alpha = 0.85, stroke = 0.75, show.legend = TRUE) +
    
    # Add boxplot
    geom_boxplot(data = plot_data, 
                 aes(x = timeinf, y = log2FC_PFU, color = species), 
                 alpha = 0.3, outlier.shape = NA, show.legend = FALSE) +
    
    # Scales and colors
    scale_color_manual(values = Spn.colors.K) +
    scale_shape_manual(values = c(19, 17)) +
    scale_y_continuous(limits = c(-8, 1)) +
    
    # Labels
    labs(x = "DPVI",
         y = "log2(Spn/Unc) RSV PFU",
         color = "Bacteria", shape = "HNO Line") +
    
    # Theme
    theme_bw() +
    theme(
      panel.grid = element_blank(), 
      text = element_text(size = 18, color = "black"),
      axis.text.x = element_text(color = "black"),
      axis.text.y = element_text(color = "black")
    )
  
  # Conditionally add significance layers if stats_results are provided and valid
  add_sig <- !is.null(stats_results) && "emmeans_test_vs_zero" %in% names(stats_results)
  
  if (add_sig) {
    sig_df <- stats_results$emmeans_test_vs_zero
    
    # Align factor levels to plot_data$timeinf to ensure correct x positions
    sig_df <- sig_df %>%
      mutate(
        timeinf = factor(timeinf, levels = levels(plot_data$timeinf)),
        signif = case_when(p.value < 0.05 ~ "*", TRUE ~ ""),
        yend   = case_when(p.value < 0.05 ~ emmean, TRUE ~ 0),
        y_position = -1,  
        x_numeric  = as.numeric(timeinf),
        x_position = x_numeric + 0.4  # offset to avoid overlapping with box/points
      )
    
    plot <- plot +
      # Star placed just to the right of the segment
      geom_text(data = sig_df,
                aes(x = x_position + 0.08, y = y_position, label = signif),
                size = 5, color = "black", fontface = "bold", vjust = 0.5) +
      # Segment from 0 to emmean (or 0 if non-significant)
      geom_segment(data = sig_df,
                   aes(x = x_position, xend = x_position, y = 0, yend = yend),
                   color = "black", linewidth = 0.8)
  }
  
  return(plot)
}

Apical

stats_SpnKinetics_A <- analysis_function_kinetics(log2FC_PFU_SpnKinetics, loc = "Ap", cutoff_pvalue = 0.05)
stats_SpnKinetics_A$anova
Analysis of Variance Table

Response: log2FC_PFU
          Df Sum Sq Mean Sq F value  Pr(>F)  
timeinf    3 23.195  7.7315  4.8292 0.01641 *
Residuals 14 22.414  1.6010                  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
stats_SpnKinetics_A$random_effects
        grp        var1 var2     vcov    sdcor proportion singular
1 line:date (Intercept) <NA> 0.000000 0.000000          0     TRUE
2      line (Intercept) <NA> 0.000000 0.000000          0     TRUE
3  Residual        <NA> <NA> 1.601014 1.265312        100     TRUE
stats_SpnKinetics_A$fixed_effects
         term   Estimate Std. Error    t value    Pr(>|t|)
1 (Intercept) -2.0577863  0.5658646 -3.6365349 0.002695870
2    timeinf2 -0.7033791  0.8487969 -0.8286778 0.421190502
3    timeinf3 -1.5767036  0.8487969 -1.8575747 0.084387263
4    timeinf4 -2.9136395  0.8002534 -3.6408963 0.002672659
stats_SpnKinetics_A$emmeans_test_vs_zero
  timeinf    emmean        SE df   t.ratio      p.value   mean_FC mean_log2FC
1       1 -2.057786 0.5658646 14 -3.636535 2.695870e-03 0.2506541   -2.057786
2       2 -2.761165 0.6326558 14 -4.364404 1.295486e-03 0.1826651   -2.761165
3       3 -3.634490 0.6326558 14 -5.744814 1.521063e-04 0.1079730   -3.634490
4       4 -4.971426 0.5658646 14 -8.785540 1.811272e-06 0.0824651   -4.971426
saveRDS(stats_SpnKinetics_A, file = file.path(outputs_folder, paste0(dataframeID, "_PFU_SpnKinetics_stats_A.rds")))
plot_SpnKinetics_A <- plotK_function_log2FC(log2FC_PFU_SpnKinetics, stats_SpnKinetics_A, loc = "Ap")
plot_SpnKinetics_A

ggsave(plot_SpnKinetics_A, filename = file.path(outputs_folder, paste0(dataframeID, "_PFU_SpnKinetics_A.png")), width = 7, height = 5)
saveRDS(plot_SpnKinetics_A, file = file.path(outputs_folder, paste0(dataframeID, "_PFU_SpnKinetics_A.rds")))

Basal

stats_SpnKinetics_B <- analysis_function_kinetics(log2FC_PFU_SpnKinetics, loc = "Baso", cutoff_pvalue = 0.05)
stats_SpnKinetics_B$anova
Analysis of Variance Table

Response: log2FC_PFU
          Df  Sum Sq  Mean Sq F value Pr(>F)
timeinf    3 0.14737 0.049123  0.9211 0.4545
Residuals 15 0.80000 0.053333               
stats_SpnKinetics_B$random_effects
        grp        var1 var2         vcov        sdcor proportion singular
1 line:date (Intercept) <NA> 5.711833e-08 0.0002389944          0     TRUE
2      line (Intercept) <NA> 0.000000e+00 0.0000000000          0     TRUE
3  Residual        <NA> <NA> 5.333328e-02 0.2309399840        100     TRUE
saveRDS(stats_SpnKinetics_B, file = file.path(outputs_folder, paste0(dataframeID, "_PFU_SpnKinetics_stats_B.rds")))
plot_SpnKinetics_B <- plotK_function_log2FC(log2FC_PFU_SpnKinetics, stats_SpnKinetics_B, loc = "Baso")
plot_SpnKinetics_B

ggsave(plot_SpnKinetics_B, filename = file.path(outputs_folder, paste0(dataframeID, "_PFU_SpnKinetics_B.png")), width = 7, height = 5)
saveRDS(plot_SpnKinetics_B, file = file.path(outputs_folder, paste0(dataframeID, "_PFU_SpnKinetics_B.rds")))
Bacterial CFUs
RSV RT-qPCR
Source Code
---
format:
  html:
    suppress-bibliography: true
---

# RSV PFUs {.unnumbered}

```{r}
# loading packages
library(tidyverse)
library(skimr)
library(ggtext)
library(scales)
library(lme4)
library(lmerTest)  
library(emmeans)
library(knitr)
library(ggpubr)
```

## Data Input 

All data inputs and outputs in this repo are relative to **FOLDER_PATH**. Please, save the provided data frames in RDS format in the `dataframes` folder.
```{r}
# Define directory paths based on R.environ
FOLDER_PATH <- Sys.getenv("BOX_PATH_RSVBacPublication") 
dataframes_folder <- file.path(FOLDER_PATH, "dataframes")

# Create subfolder for output files
outputs_folder <- file.path(FOLDER_PATH, "outputs/PFUs")
if (!file.exists(outputs_folder)) {
  dir.create(outputs_folder, recursive = TRUE)
}

# Colors
Bac.colors <- c("#CC9564", "#a89de0", "#f54590", "#2e67f2")
Bac.colors.K <- c("#CC9564", "#a89de0")
Spn.colors.K <- c("#a89de0")
NB.colors.K <- c("#CC9564")
```

```{r}
# Read the provided RDS files
dataframeID <- "RSVbac"
TimePFU_data <- readRDS(file.path(dataframes_folder, paste0(dataframeID, "_PFU_TimeCourse_values.rds")))
PFU_data <- readRDS(file.path(dataframes_folder, paste0(dataframeID, "_PFU_values.rds")))
PFU_SpnKinetics <- readRDS(file.path(dataframes_folder, paste0(dataframeID, "_PFU_SpnKinetics_values.rds")))
```

## Time Course

### Inoculum Summary

Summary stats for the inoculum samples (species = "In"). 

```{r}
TimePFU_data %>%
  filter(species == "In") %>%
  skim(NewPFU) %>%
  yank("numeric")
```

The inoculum samples, as well as the samples without virus (virus = "F"), were removed from the plot

```{r}
TimePFU_data_plots <- TimePFU_data %>%
  filter(species != "In") %>%
  filter(virus != "F")
```

```{r}
# Summarize data to plot median lines
summary_data <- TimePFU_data_plots %>%
  group_by(timeinf, location) %>%
  summarise(medianPFU = median(NewPFU, na.rm = TRUE), .groups = "drop")
kable(summary_data)
```

### Plot

```{r}
plot <- ggplot() +
  geom_jitter(data = TimePFU_data_plots, 
              aes(x = timeinf, y = NewPFU, color = location, shape = line), 
              width = 0.15, height = 0, size = 3, alpha = 0.85) +
  geom_line(data = summary_data, 
            aes(x = timeinf, y = medianPFU, color = location, group = location), 
            linewidth = 1) +
  scale_shape_manual(values = c(19, 17)) +
  scale_color_manual(values = c("#1D793E", "#FF8200")) +
  scale_x_continuous(breaks = seq(2, 10, by = 2)) +
  scale_y_log10(breaks = 10^(1:6), 
                labels = trans_format("log10", math_format(10^.x))) +
  geom_hline(yintercept = 15, linetype = "dashed", color = "black", linewidth = 0.8) +
  labs(x = "Days post viral infection (DPVI)",
       y = "PFUs / organoid",
       color = "Location", shape = "HNO Line") +
  theme_bw() +
  theme(panel.grid = element_blank(), 
        text = element_text(size = 18, color = "black"),
        axis.text.y = element_text(color = "black"),
        axis.text.x = element_text(color = "black"),
        plot.margin = unit(c(0.1,0.1,0.1,0.1), "in"))

ggsave(plot, filename = file.path(outputs_folder,  paste0(dataframeID, "_PFU_TimeCourse.png")), width = 7, height = 5)
saveRDS(plot, file = file.path(outputs_folder,  paste0(dataframeID, "_PFU_TimeCourse.rds")))
```

```{r}
#| fig-height: 5
#| fig-width: 7
plot
```

## Virus + Bacteria at 4DPVI

### Inoculum Summary

```{r}
PFU_data %>%
  filter(species == "In") %>%
  group_by(line) %>%
  skim(NewPFU) %>%
  yank("numeric")
```

Inoculum (species = "In") and no-bacteria/no-virus controls (Combined = "NB.F") samples were removed from downstream plots and stats.

```{r}
PFU_data_plots <- PFU_data %>%
  filter(species != "In") %>%
  filter(Combined != "NB.F") %>%
  droplevels()
```

### Stats and Plots

Analysis with mixed-effects models, using `line` and `date` as random effects and the `Combined` condition (i.e., bacteria and RSV presence) as fixed effect. `NB.F` condition was removed from the stats. If the model is singular, it is rerun with only `date` as random effect. Contrasts are calculated using emmeans with Holm adjustment for multiple comparisons across all conditions. Predicted values and confidence intervals are backtransformed to PFU scale for plotting.

Highlighted contrasts are those with a cutoff p-value of 0.05.

```{r}
analysis_function <- function(raw_data, loc, cutoff_pvalue, cutoff_FC) {
  
  # Filter data by location
  data_stats <- raw_data %>%
    filter(location == loc) %>%
    droplevels()
  
  # Mixed-effects model with random effects
  model <- lmer(log(NewPFU) ~ Combined
                + (1|line) + (1|line:date), 
                data = data_stats)
  
  # Extract random effects data and convert to dataframe
  singular <- isSingular(model)
  random_effects_df <- as.data.frame(VarCorr(model)) %>%
    mutate(proportion = round(100 * (vcov / sum(vcov)), 2)) %>%
    mutate(singular = singular) 
  
  # If model is singular, rerun with only date as random effect
  if (singular) {
    model <- lmer(log(NewPFU) ~ Combined + (1|date), data = data_stats)
  }
  
  # Run ANOVA and extract pvalues
  anova <- anova(model)
  anova_pvalue <- anova$`Pr(>F)`[1]
  
  # If ANOVA is not significant, stop here. Otherwise continue with post-hoc analysis
  if (is.na(anova_pvalue) || anova_pvalue >= cutoff_pvalue) {
    return(list(
      anova = anova,
      random_effects = random_effects_df,
      note = "ANOVA not significant; post-hoc tests skipped"
    ))
  }
  
  # Extract fixed effects coefficients and convert to dataframe
  fixed_effects_df <- as.data.frame(summary(model)$coefficients) %>%
    rownames_to_column(var = "term")
  
  # Generate contrasts for Combined condition using emmeans with Holm adjustment
  emmeans_model <- emmeans(model, ~ Combined)
  emmeans_contrasts <- pairs(emmeans_model, adjust = "holm")
  
  contrasts_df <- as.data.frame(summary(emmeans_contrasts)) %>%
    separate(contrast, into = c("condition1", "condition2"), sep = " - ", remove = FALSE)
  
  contrasts_df$group1 <- data_stats$bacteria_label[match(contrasts_df$condition1, data_stats$Combined)]
  contrasts_df$group2 <- data_stats$bacteria_label[match(contrasts_df$condition2, data_stats$Combined)]
  
  # Adds predictions based on fixed effects, averaged over random effects. It gives a population estimate
  data_stats <- cbind(data_stats, predval = predict(model, re.form = NA, se.fit = TRUE))
  
  # Backtransform the predicted values and CIs
  transf_preds_df <- data_stats %>%
    group_by(Combined, bacteria_label) %>%
    summarise(mean.predval.fit = mean(predval.fit),
              mean.predval.se.fit = mean(predval.se.fit),
              mean.PFU = mean(NewPFU),
              .groups = 'drop') %>%
    mutate(pPFUs = exp(mean.predval.fit),
           pPFUs_min = exp(mean.predval.fit - 2*mean.predval.se.fit),
           pPFUs_max = exp(mean.predval.fit + 2*mean.predval.se.fit)) 
  
  # Calculate fold-change values for each contrast and highlight the most relevant
  contrasts_df <- contrasts_df %>%
    ungroup() %>%
    left_join(select(transf_preds_df, Combined, pPFUs), by = join_by(condition1 == Combined)) %>%
    left_join(select(transf_preds_df, Combined, pPFUs), by = join_by(condition2 == Combined), suffix = c(".1", ".2")) %>%
    mutate(FC = pPFUs.1 / pPFUs.2,
           log10FC = log10(FC),
           FC = if_else(FC < 1, 1 / FC, -FC),
           highlighted = case_when(
             p.value < cutoff_pvalue & abs(log10FC) >= cutoff_FC ~ "*",
             TRUE ~ "")) %>%
    select(contrast, condition1, condition2, p.value, pPFUs.1, pPFUs.2, FC, log10FC, highlighted, group1, group2)
  
  return(list(
    anova = anova,
    random_effects = random_effects_df,
    fixed_effects = fixed_effects_df,
    contrasts = contrasts_df,
    transf_preds = transf_preds_df
  ))
}
```

```{r}
plot_function <- function(raw_data, loc, stats_data, contrasts, ypval) {
  
  # Filter data by location
  raw_data <- raw_data %>%
    filter(location == loc) %>%
    droplevels()
  
  # Base plot
  plot <- ggplot() +
    geom_jitter(data = raw_data, 
                aes(x = bacteria_label, y = NewPFU, color = species, shape = line),
                width = 0.15, height = 0, size = 3, alpha = 0.85, stroke = 0.75, show.legend = TRUE) +   
    
    scale_color_manual(values = Bac.colors) +
    scale_shape_manual(values = c(19, 17)) +
    
    scale_x_discrete(expand = c(0, 0.5)) +
    
    scale_y_log10(limits = c(NA, 10^6.5),
                  breaks = 10^(1:7), 
                  labels = trans_format("log10", math_format(10^.x))) +
    
    geom_hline(yintercept = 15, linetype = "dashed", color = "black", linewidth = 0.8) +
    
    labs(x = "",
         y = "RSV PFUs / organoid",
         caption = paste("RSV", "Bact", "DPVI", sep = "\n"),
         color = "Bacteria", shape = "HNO Line") +  
    theme_bw() +
    theme(panel.grid = element_blank(), 
          text = element_text(size = 18, color = "black"),
          plot.caption = element_text(size = 13, face = "bold", hjust = -0.13, vjust = 24),
          axis.text.x = element_markdown(),
          axis.text.y = element_text(color = "black"),
          plot.margin = unit(c(0.1,0.1,-0.8,0.1), "in"))
  
  # Add stats layers only if stats_data is not NULL and has rows
  if (!is.null(stats_data) && nrow(stats_data) > 0) {
    plot <- plot +
      geom_point(data = stats_data, aes(x = bacteria_label, y = pPFUs), shape = 3, size = 3) +
      geom_errorbar(data = stats_data, width = 0.5,
                    aes(x = bacteria_label, y = pPFUs, ymax = pPFUs_max, ymin = pPFUs_min))
    
    # Add significance asterisks for highlighted contrasts
    contrast_sign <- contrasts %>%
      filter(highlighted != "") %>%
      mutate(plot = "*")
    
    if (nrow(contrast_sign) > 0) {
      plot <- plot +
        stat_pvalue_manual(contrast_sign, label = "plot", y.position = ypval, 
                           step.increase = 0.03, tip.length = 0.01, bracket.shorten = 0.2, vjust = 0.6, bracket.size = 0.5)
    }
  }
  
  return(plot)
}
```

#### Apical

```{r}
stats_Ap <- analysis_function(PFU_data_plots, loc = "Ap", cutoff_pvalue = 0.05, cutoff_FC = 0)
kable(stats_Ap$anova)
kable(stats_Ap$random_effects)
kable(stats_Ap$fixed_effects)
kable(stats_Ap$contrasts %>% select(-group1, -group2, -condition1, -condition2))
saveRDS(stats_Ap, file = file.path(outputs_folder, paste0(dataframeID, "_PFU_stats_A.rds")))
```

```{r}
#| fig-height: 5
#| fig-width: 7
plot_Ap <- plot_function(PFU_data_plots, loc = "Ap", stats_data = stats_Ap$transf_preds, contrasts = stats_Ap$contrasts, ypval = 6.1)
plot_Ap
ggsave(plot_Ap, filename = file.path(outputs_folder, paste0(dataframeID, "_PFU_A.png")), width = 7, height = 5)
saveRDS(plot_Ap, file = file.path(outputs_folder, paste0(dataframeID, "_PFU_A.rds")))
```

#### Basal

```{r}
stats_Bas <- analysis_function(PFU_data_plots, loc = "Baso", cutoff_pvalue = 0.05, cutoff_FC = 0)
kable(stats_Bas$anova)
saveRDS(stats_Bas, file = file.path(outputs_folder, paste0(dataframeID, "_PFU_stats_B.rds")))
```

```{r}
#| fig-height: 5
#| fig-width: 7
plot_Baso <- plot_function(PFU_data_plots, loc = "Baso", stats_data = stats_Bas$transf_preds, contrasts = stats_Bas$contrasts, ypval = 6.1)
plot_Baso
ggsave(plot_Baso, filename = file.path(outputs_folder, paste0(dataframeID, "_PFU_B.png")), width = 7, height = 5)
saveRDS(plot_Baso, file = file.path(outputs_folder, paste0(dataframeID, "_PFU_B.rds")))
```

## Spn Kinetics

```{r}
# Filter only virus T
PFU_SpnKinetics <- PFU_SpnKinetics %>%
  filter(virus == "T") 
```

### Stats and Plots

Analysis with mixed-effects models, using `line` and `date` as random effects and the `timeinf` condition as fixed effect. Ratios are calculated relative to the corresponding NB value for each line, date, location, and timeinf. Contrasts are calculated using emmeans with Holm adjustment for multiple comparisons against zero (this is the log2 ratio to NB).

```{r}
# Calculate ratio relative to NB 
log2FC_PFU_SpnKinetics <- PFU_SpnKinetics %>%
  group_by(date, line, location, timeinf) %>%
  reframe(date = date,
          location = location,
          line = line,
          timeinf = timeinf,
          species = species,
          FC_PFU = NewPFU / NewPFU[species == "NB"],
          log2FC_PFU = log2(FC_PFU)) %>%
  filter(species != "NB")

# Average log2FC values for technical replicates
log2FC_PFU_SpnKinetics <- log2FC_PFU_SpnKinetics %>%
  group_by(date, line, location, timeinf, species) %>%
  summarise(FC_PFU = mean(FC_PFU, na.rm = TRUE),
            log2FC_PFU = mean(log2FC_PFU, na.rm = TRUE),
            .groups = "drop")
```

```{r}
# Mixed-effects model analysis function for kinetics data
analysis_function_kinetics <- function(raw_data, loc, cutoff_pvalue) {
  
  # Filter data by location and convert timeinf to factor
  data_stats <- raw_data %>%
    filter(location == loc) %>%
    mutate(timeinf = as.factor(timeinf)) 
  
  summary <- data_stats %>%
    group_by(timeinf) %>%
    summarise(
      mean_FC = mean(FC_PFU, na.rm = TRUE),
      mean_log2FC = mean(log2FC_PFU, na.rm = TRUE)
    )
  
  # Mixed-effects model with random effects
  model <- lmer(log2FC_PFU ~ timeinf  
                + (1|line) + (1|line:date), 
                data = data_stats)
  
  # Extract random effects data and convert to dataframe
  singular <- isSingular(model)
  random_effects_df <- as.data.frame(VarCorr(model)) %>%
    mutate(proportion = round(100 * (vcov / sum(vcov)), 2)) %>%
    mutate(singular = singular) 
  
  # If model is singular, rerun without random effects
  if (singular) {
    model <- lm(log2FC_PFU ~ timeinf, data = data_stats)
  }
  
  # Run ANOVA 
  anova_result <- anova(model)
  anova_pvalue <- anova_result$`Pr(>F)`[1]
  
  # If ANOVA is not significant, stop here. Otherwise continue with post-hoc analysis
  if (is.na(anova_pvalue) || anova_pvalue >= cutoff_pvalue) {
    return(list(
      anova = anova_result,
      random_effects = random_effects_df,
      note = "ANOVA not significant; post-hoc tests skipped"
    ))
  }
  
  # Extract fixed effects coefficients and convert to dataframe
  fixed_effects_df <- as.data.frame(summary(model)$coefficients) %>%
    rownames_to_column(var = "term")
  
  # Generate contrasts for timeinf condition using emmeans with Holm adjustment vs zero
  emmeans_model <- emmeans(model, ~ timeinf)
  emmeans_test_vs_zero <- test(emmeans_model, null = 0, adjust = "holm")
  
  emmeans_test_vs_zero_df <- as.data.frame(summary(emmeans_test_vs_zero)) 
  emmeans_test_vs_zero_df <- merge(emmeans_test_vs_zero_df, summary)
  
  return(list(
    anova = anova_result,
    random_effects = random_effects_df,
    fixed_effects = fixed_effects_df,
    emmeans_test_vs_zero = emmeans_test_vs_zero_df
  ))
}
```

```{r}
plotK_function_log2FC <- function(raw_data, stats_results = NULL, loc) {
  
  # Filter raw data
  plot_data <- raw_data %>%
    filter(location == loc) %>%
    mutate(timeinf = as.factor(timeinf)) 
  
  # Create base plot
  plot <- ggplot() +
    # Reference line at 0
    geom_hline(yintercept = 0, linetype = "dashed", 
               color = NB.colors.K, linewidth = 0.8) +
    
    # Add raw data points
    geom_jitter(data = plot_data, 
                aes(x = timeinf, y = log2FC_PFU, 
                    color = species, shape = line), 
                width = 0.15, height = 0, size = 3, 
                alpha = 0.85, stroke = 0.75, show.legend = TRUE) +
    
    # Add boxplot
    geom_boxplot(data = plot_data, 
                 aes(x = timeinf, y = log2FC_PFU, color = species), 
                 alpha = 0.3, outlier.shape = NA, show.legend = FALSE) +
    
    # Scales and colors
    scale_color_manual(values = Spn.colors.K) +
    scale_shape_manual(values = c(19, 17)) +
    scale_y_continuous(limits = c(-8, 1)) +
    
    # Labels
    labs(x = "DPVI",
         y = "log2(Spn/Unc) RSV PFU",
         color = "Bacteria", shape = "HNO Line") +
    
    # Theme
    theme_bw() +
    theme(
      panel.grid = element_blank(), 
      text = element_text(size = 18, color = "black"),
      axis.text.x = element_text(color = "black"),
      axis.text.y = element_text(color = "black")
    )
  
  # Conditionally add significance layers if stats_results are provided and valid
  add_sig <- !is.null(stats_results) && "emmeans_test_vs_zero" %in% names(stats_results)
  
  if (add_sig) {
    sig_df <- stats_results$emmeans_test_vs_zero
    
    # Align factor levels to plot_data$timeinf to ensure correct x positions
    sig_df <- sig_df %>%
      mutate(
        timeinf = factor(timeinf, levels = levels(plot_data$timeinf)),
        signif = case_when(p.value < 0.05 ~ "*", TRUE ~ ""),
        yend   = case_when(p.value < 0.05 ~ emmean, TRUE ~ 0),
        y_position = -1,  
        x_numeric  = as.numeric(timeinf),
        x_position = x_numeric + 0.4  # offset to avoid overlapping with box/points
      )
    
    plot <- plot +
      # Star placed just to the right of the segment
      geom_text(data = sig_df,
                aes(x = x_position + 0.08, y = y_position, label = signif),
                size = 5, color = "black", fontface = "bold", vjust = 0.5) +
      # Segment from 0 to emmean (or 0 if non-significant)
      geom_segment(data = sig_df,
                   aes(x = x_position, xend = x_position, y = 0, yend = yend),
                   color = "black", linewidth = 0.8)
  }
  
  return(plot)
}
```

#### Apical

```{r}
stats_SpnKinetics_A <- analysis_function_kinetics(log2FC_PFU_SpnKinetics, loc = "Ap", cutoff_pvalue = 0.05)
stats_SpnKinetics_A$anova
stats_SpnKinetics_A$random_effects
stats_SpnKinetics_A$fixed_effects
stats_SpnKinetics_A$emmeans_test_vs_zero
saveRDS(stats_SpnKinetics_A, file = file.path(outputs_folder, paste0(dataframeID, "_PFU_SpnKinetics_stats_A.rds")))
```

```{r}
plot_SpnKinetics_A <- plotK_function_log2FC(log2FC_PFU_SpnKinetics, stats_SpnKinetics_A, loc = "Ap")
plot_SpnKinetics_A
ggsave(plot_SpnKinetics_A, filename = file.path(outputs_folder, paste0(dataframeID, "_PFU_SpnKinetics_A.png")), width = 7, height = 5)
saveRDS(plot_SpnKinetics_A, file = file.path(outputs_folder, paste0(dataframeID, "_PFU_SpnKinetics_A.rds")))
```

#### Basal

```{r}
stats_SpnKinetics_B <- analysis_function_kinetics(log2FC_PFU_SpnKinetics, loc = "Baso", cutoff_pvalue = 0.05)
stats_SpnKinetics_B$anova
stats_SpnKinetics_B$random_effects
saveRDS(stats_SpnKinetics_B, file = file.path(outputs_folder, paste0(dataframeID, "_PFU_SpnKinetics_stats_B.rds")))
```

```{r}
plot_SpnKinetics_B <- plotK_function_log2FC(log2FC_PFU_SpnKinetics, stats_SpnKinetics_B, loc = "Baso")
plot_SpnKinetics_B
ggsave(plot_SpnKinetics_B, filename = file.path(outputs_folder, paste0(dataframeID, "_PFU_SpnKinetics_B.png")), width = 7, height = 5)
saveRDS(plot_SpnKinetics_B, file = file.path(outputs_folder, paste0(dataframeID, "_PFU_SpnKinetics_B.rds")))
```