Caspase

# loading packages
library(tidyverse)
library(skimr)
library(ggtext)
library(scales)
library(lme4)
library(lmerTest) 
library(emmeans)
library(knitr)
library(ggpubr)

Data Input

All data inputs and outputs in this repo are relative to FOLDER_PATH. Please, save the provided data frames in RDS format in the dataframes folder.

# Define the directory paths based on R.environ
FOLDER_PATH <- Sys.getenv("BOX_PATH_RSVBacPublication") 
dataframes_folder <- file.path(FOLDER_PATH, "dataframes")

# Create subfolder for output files
outputs_folder <- file.path(FOLDER_PATH, "outputs/Caspase")
if (!file.exists(outputs_folder)) {
  dir.create(outputs_folder, recursive = TRUE)
}

# Colors
Bac.colors <- c("#CC9564", "#a89de0", "#f54590", "#2e67f2")
Bac.colors.ext <- c("#CC9564", "#a89de0", "#f54590", "#2e67f2", "grey", "grey")

# Define batch IDs
batchIDs <- c("250114", "251221")

# Read all RDS files into a named list
data_list <- setNames(
  lapply(batchIDs, function(id) {
    readRDS(file.path(dataframes_folder, paste0("RSVbac_Caspase_", id, ".rds")))
  }),
  batchIDs
)

Concentrations from Luminescence values

For each batch, we fit a linear model to the standard curve data and use the model coefficients to calculate the concentration of each sample based on its luminescence value. We also generate plots for the standard curves and individual samples and controls. The final concentration per organoid is calculated considering that 50 uL out of 200 uL were used in the assay.

analyze_lum_data <- function(data, batchID) {
  
  # Split sample and standards dataframes
  data_standards <- data %>%
    filter(sampletype == "standard") %>%
    filter(luminescence != 0) %>%
    select(NewCaspase,luminescence)
  
  data_samples <- data %>%
    filter(sampletype == "sample")
  
  data_samples$species <- factor(data_samples$species, levels = c("NB", "Spn", "Hin", "Dpi", "AODM", "EBSS"))
  
  # Fit a linear model 
  lmStandard <- lm(luminescence ~ log(NewCaspase), data_standards) 
  
  # Extract coefficients and R-squared value from model
  coefficients <- lmStandard$coefficients
  lm_eq <- paste("y =", coef(lmStandard)[2], "x +", coef(lmStandard)[1])
  r_squared <- round(summary(lmStandard)$r.squared, 3)
  
  # Create a data frame for the fitted line within the range of standard concentrations
  line_data <- data.frame(
    log_concentration = seq(min(log(data_standards$NewCaspase)),
                            max(log(data_standards$NewCaspase)),
                            length.out = 100)
  ) %>%
    mutate(Cq = coefficients[1] + coefficients[2] * log_concentration)
  
  # Standard curve plot
  standards_plot <- ggplot(data_standards, aes(x = log(NewCaspase), y = luminescence)) +
    geom_point(size = 3, alpha = 0.5) +
    geom_line(data = line_data, aes(x = log_concentration, y = Cq), 
              color = "black", linetype = "dotted", linewidth = 0.8) +
    labs(title = "Standard Curve Formula",
         x = "ln(caspase)", y = "luminescence") +
    geom_text(x = min(log(data_standards$NewCaspase)), y = max(data_standards$luminescence), 
              label = paste("Equation: ", lm_eq, "\n               R-squared: ", r_squared), 
              hjust = -0.3, vjust = 1) +
    theme_bw() +
    theme(panel.grid = element_blank(), 
          text = element_text(size = 14, color = "black"))
  
  # Apply linear model coefficients to calculate concentration in samples.
  data_samples <- data_samples %>%
    mutate(NewCaspase = exp((luminescence - coefficients[1]) / coefficients[2]))
  
  # Samples plot
  samples_plot <- ggplot(data_samples, aes(x = log(NewCaspase), y = luminescence)) +
    geom_point(aes(x = log(NewCaspase), y = luminescence, color = species), size = 3, alpha = 0.5) +
    geom_line(data = line_data, aes(x = log_concentration, y = Cq), 
              color = "black", linetype = "dotted", linewidth = 0.8) +
    scale_color_manual(values = Bac.colors.ext) +
    labs(title = "Individual Caspase Values",
         x = "ln(caspase)", y = "luminescence") +
    theme_bw() +
    theme(panel.grid = element_blank(), 
          text = element_text(size = 14, color = "black"))
  
  # Final concentration in caspase units / organoid
  data_samples <- data_samples %>%
    mutate(NewCaspase = NewCaspase*8) # 50 out of 200 uL used in the assay
  
  # Plot both plots side by side
  print(combined_plot <- ggarrange(standards_plot, samples_plot, ncol = 2, nrow = 1, common.legend = F, legend = "bottom"))
  
  # Return results
  return(list(
    samples = data_samples,
    standards = data_standards
  ))
}

results_list <- mapply(analyze_lum_data, data_list, names(data_list), SIMPLIFY = F)

# Combine all samples into a single dataframe
all_samples <- bind_rows(lapply(results_list, `[[`, "samples"), .id = "batchID")

# Save data frame as CSV and R object in the dataframes folder
dataframeID <- "RSVbac"
write_csv(all_samples, file.path(dataframes_folder, paste0(dataframeID, "_Caspase_values.csv")))
saveRDS(all_samples, file.path(dataframes_folder,  paste0(dataframeID, "_Caspase_values.rds")))

# Use this to read the final objects
all_samples <- readRDS(file.path(dataframes_folder, paste0(dataframeID, "_Caspase_values.rds")))

# Average technical replicates
avg_data <- all_samples %>%
  group_by(date, line, timeinf, species, virus, Combined, bacteria_label) %>%
  summarise(conc = mean(NewCaspase)) %>%
  ungroup()

Stats and Plots for samples at 4 dpvi

# Filter out the media control values
avg_data_4 <- avg_data %>%
  filter(timeinf == "4") %>%
  droplevels()

Statistics

Analysis with a mixed-effects model with random effects for line and date and the interaction of fixed effects for species and the virus combined condition. If the model is singular, it is rerun with only date as random effect. The contrasts are calculated using emmeans with Holm adjustment for the virus comparisons for each bacteria. Predicted values are backtransformed to CFU scale for plotting.

Highlighted contrasts are those with a cutoff p-value of 0.05.

# Function to analyze the data and plot stats
analysis_function <- function(data, cutoff_pvalue, cutoff_FC) {
  
  # Mixed-effects model with random effects
  model <- lmer(log(conc) ~ species * virus 
                + (1|line) + (1|line:date), 
                data = data)
  
  # Extract random effects data and convert to dataframe
  singular = isSingular(model)
  random_effects_df <- as.data.frame(VarCorr(model)) %>%
    mutate(proportion = round(100 * (vcov / sum(vcov)), 2)) %>%
    mutate(singular = singular) 
  
  # If model is singular, rerun with only date as random effect
  if (singular) {
    model <- lmer(log(conc) ~ species * virus 
                  + (1|date), 
                  data = data)
  }
  
  # Run ANOVA and extract pvalues
  anova <- anova(model)
  anova_df <- as.data.frame(anova) 
  
  # Extract fixed effects coefficients and convert to dataframe
  fixed_effects_df <- as.data.frame(summary(model)$coefficients) %>%
    rownames_to_column(var = "term")
  
  # Generate contrasts for virus comparisons within each bacteria using emmeans with Holm adjustment
  emmeans_model <- emmeans(model, ~ species * virus)
  emmeans_virus <- pairs(emmeans_model, simple = "virus", adjust = "holm")
  
  # Extract virus contrasts from emmeans_model and format the dataframe
  contrasts_df <- as.data.frame(summary(emmeans_virus)) %>%
    mutate(contrast = str_replace_all(contrast, "\\s+", "")) %>%
    separate(contrast, into = c("virus1", "virus2"), sep = "-") %>%
    mutate(condition1 = paste(species, virus1, sep = "."),
           condition2 = paste(species, virus2, sep = "."),
           contrast = paste(condition1, condition2, sep = " - ")) %>%
    select(-c(species, virus1, virus2)) %>%
    relocate(contrast)
  
  contrasts_df$group1 <- data$bacteria_label[match(contrasts_df$condition1, data$Combined)]
  contrasts_df$group2 <- data$bacteria_label[match(contrasts_df$condition2, data$Combined)]

  # Adds predictions based on fixed effects, averaged over random effects. It gives a population estimate
  data <- cbind(data, predval = predict(model,re.form = NA, se.fit = TRUE))
  
  # Backtransform the predicted values and CIs
  transf_preds_df <- data %>% 
    group_by(Combined, bacteria_label, species, virus) %>%
    summarise(mean.predval.fit = mean(predval.fit),
              mean.predval.se.fit = mean(predval.se.fit),
              mean.conc = mean(conc),
              .groups = 'drop') %>%
    mutate(pconc = exp(mean.predval.fit),
           pconc_min = exp(mean.predval.fit - 2*mean.predval.se.fit),
           pconc_max = exp(mean.predval.fit + 2*mean.predval.se.fit)) %>%
    mutate(condition = interaction(species, virus, sep = "."))
  
  # Calculate fold-change values for each contrast and highlight the most relevant is pvalue < cutoff and FC > cutoff
  contrasts_df <- contrasts_df %>%
    ungroup() %>%
    left_join(select(transf_preds_df, condition, pconc), by = join_by(condition1 == condition)) %>%
    left_join(select(transf_preds_df, condition, pconc), by = join_by(condition2 == condition), suffix = c(".1", ".2")) %>%
    mutate(FC = pconc.1 / pconc.2,
           log2FC = log2(FC),
           FC = if_else(FC < 1, 1 / FC, -FC),
           highlighted = case_when(
             p.value < cutoff_pvalue & abs(log2FC) >= cutoff_FC ~ "*",
             TRUE ~ "")) %>%
    select(contrast, condition1, condition2, p.value, pconc.1, pconc.2, FC, log2FC, highlighted, group1, group2)
  
  return(list(anova = anova,
              random_effects = random_effects_df,
              fixed_effects = fixed_effects_df,
              contrasts = contrasts_df,
              transf_preds = transf_preds_df))
}

all_stats <- analysis_function(avg_data_4, cutoff_pvalue = 0.05, cutoff_FC = 0)
kable(all_stats$anova)

	Sum Sq	Mean Sq	NumDF	DenDF	F value	Pr(>F)
species	0.4021894	0.1340631	3	27.99994	0.3535979	0.7868694
virus	26.9524908	26.9524908	1	27.99994	71.0884763	0.0000000
species:virus	0.2501575	0.0833858	3	27.99994	0.2199341	0.8817301

kable(all_stats$random_effects)

grp	var1	var2	vcov	sdcor	proportion	singular
line:date	(Intercept)	NA	0.0031961	0.0565344	0.68	FALSE
line	(Intercept)	NA	0.0911494	0.3019095	19.25	FALSE
Residual	NA	NA	0.3791401	0.6157435	80.07	FALSE

kable(all_stats$fixed_effects)

term	Estimate	Std. Error	df	t value	Pr(>\|t\|)
(Intercept)	1.2933904	0.3498347	4.518362	3.6971474	0.0168281
speciesSpn	0.1477809	0.3894304	27.999937	0.3794795	0.7071947
speciesHin	-0.0502572	0.3894304	27.999937	-0.1290531	0.8982382
speciesDpi	-0.0848277	0.3894304	27.999937	-0.2178251	0.8291450
virusT	1.3871569	0.3894304	27.999937	3.5620150	0.0013412
speciesSpn:virusT	0.2673291	0.5507377	27.999937	0.4854017	0.6311680
speciesHin:virusT	0.3213191	0.5507377	27.999937	0.5834339	0.5642717
speciesDpi:virusT	0.4296097	0.5507377	27.999937	0.7800622	0.4418989

kable(all_stats$contrasts %>% select(-group1, -group2, -condition1, -condition2))

contrast	p.value	pconc.1	pconc.2	FC	log2FC	highlighted
NB.F - NB.T	0.0013412	3.645124	14.59308	4.003452	-2.001244	*
Spn.F - Spn.T	0.0002151	4.225643	22.10176	5.230391	-2.386919	*
Hin.F - Hin.T	0.0001477	3.466458	19.13672	5.520542	-2.464810	*
Dpi.F - Dpi.T	0.0000693	3.348668	20.60079	6.151935	-2.621040	*

saveRDS(all_stats, file.path(outputs_folder,  paste0(dataframeID, "_Caspase_stats.rds")))

Plots

# Function to plot the CFUs using the statistical model
plot_function <- function(true_data, stats_data, contrasts, ymax) {
  
  # Plot
  plot <- ggplot() +
    
    geom_jitter(data = true_data, 
                aes(x = bacteria_label, y = conc, color = species,
                    shape = line),
                width = 0.2, size = 3, alpha = 0.85, stroke = 0.75, show.legend = TRUE) +   

    scale_color_manual(values = Bac.colors) +
    scale_shape_manual(values = c(19,17)) +
    
    scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x),
                  labels = trans_format("log10", math_format(10^.x))) +
    
    geom_point(data = stats_data, aes(x = bacteria_label, y = pconc), shape = 3, size = 3) +
    geom_errorbar(data = stats_data, width = 0.5,
                  aes(x = bacteria_label, y = pconc, ymax = pconc_max, ymin = pconc_min)) +
    
    # General style
    scale_x_discrete(expand = c(0,0.5)) +
    labs(x = "",
         y = "Caspase (total units)",
         caption = paste("RSV", "Bact", "DPVI", sep = "\n"),
         color = "Bacteria", color = "Bacteria", shape = "HNO Line") +  
    theme_bw() +
    theme(panel.grid = element_blank(), 
          legend.text = element_markdown(),
          text = element_text(size = 18, color = "black"),
          plot.caption = element_text(size = 13, face = "bold", hjust = -0.13, vjust = 24),
          axis.text.x = element_markdown(),
          axis.text.y = element_text(color = "black"),
          plot.margin = unit(c(0.1,0.1,-0.8,0.1), "in"))
  
  # Add significance asterisks for highlighted contrasts
  contrast_sign <- contrasts %>%
    filter(highlighted != "") %>%
    mutate(plot = "*")
  
  if (nrow(contrast_sign) > 0) {
    plot <- plot +
      stat_pvalue_manual(contrast_sign, label = "plot", y.position = ymax, 
                         step.increase = 0.03, tip.length = 0.01, bracket.shorten = 0.2, vjust = 0.6, bracket.size = 0.5)
  }
  
  return(plot)
}

# This plot was not used in the manuscript
Caspase_plot <- plot_function(true_data = avg_data_4, stats_data = all_stats$transf_preds, contrasts = all_stats$contrasts, ymax = 1.6)

boxplot <- ggplot(avg_data_4, aes(x = bacteria_label, y = conc)) +
  geom_boxplot(outliers = F ) +
  geom_jitter(aes(x = bacteria_label, y = conc, color = species, shape = line),
              width = 0.2, size = 3, alpha = 0.85, stroke = 0.75, show.legend = TRUE) +   
  geom_hline(yintercept = 0.16*8, linetype = "dashed", color = "black", linewidth = 0.8) + 
  scale_color_manual(values = Bac.colors) +
  scale_y_log10(limits = c(NA, 10^2),
                breaks = 10^(0:2), 
                labels = trans_format("log10", math_format(10^.x))) +
  labs(x = "",
       y = "Caspase units / organoid",
       caption = paste("RSV", "Bact", "DPVI", sep = "\n"),
       color = "Bacteria", shape = "HNO Line") +  
  theme_bw() +
  theme(panel.grid = element_blank(), 
        text = element_text(size = 18, color = "black"),
        plot.caption = element_text(size = 13, face = "bold", hjust = -0.13, vjust = 24),
        axis.text.x = element_markdown(),
        axis.text.y = element_text(color = "black"),
        plot.margin = unit(c(0.1,0.1,-0.8,0.1), "in"))

boxplot

ggsave(boxplot, filename = file.path(outputs_folder,  paste0(dataframeID, "_BoxplotCaspase.png")), width = 7, height = 5)
saveRDS(boxplot, file = file.path(outputs_folder,  paste0(dataframeID, "_BoxplotCaspase.rds")))