RSV RT-qPCR

# loading packages
library(tidyverse)
library(skimr)
library(ggtext)
library(scales)
library(lme4)
library(lmerTest) 
library(emmeans)
library(knitr)
library(ggpubr)

Data Input

All data inputs and outputs in this repo are relative to FOLDER_PATH. Please, save the provided data frames in RDS format in the dataframes folder.

# Define directory paths based on R.environ
FOLDER_PATH <- Sys.getenv("BOX_PATH_RSVBacPublication") 
dataframes_folder <- file.path(FOLDER_PATH, "dataframes")

# Create subfolder for output files
outputs_folder <- file.path(FOLDER_PATH, "outputs/qRT-PCR")
if (!file.exists(outputs_folder)) {
  dir.create(outputs_folder, recursive = TRUE)
}

# Colors
Bac.colors <- c("#CC9564", "#a89de0", "#f54590", "#2e67f2")
Bac.colors.ext <- c("#CC9564", "#a89de0", "#f54590", "#2e67f2", "grey", "grey")
Spn.colors.K <- c("#a89de0")
NB.colors.K <- c("#CC9564")

# Define batch IDs
batchIDs <- c("250114", "250510", "250522", "251206", "251221")

# Read all RDS files into a named list
data_list <- setNames(
  lapply(batchIDs, function(id) {
    readRDS(file.path(dataframes_folder, paste0("RSVbac_RSVqPCR_", id, ".rds")))
  }),
  batchIDs
)

Concentrations from Cq values

For each batch, we fit a linear model to the standard curve data and use the model coefficients to calculate the concentration of each sample based on its Cq value. We also generate plots for the standard curves and individual samples and controls. Standard curve was created with serial dilutions of the RSV ultramer starting with a 4.294 x 10^12 copies/µl stock. The final concentration per organoid is calculated considering that 2µl of RNA extract and standard were used in each qPCR reaction and that RNA samples were extracted in a final volume of 100µl.

analyze_qPCR_data <- function(data, batchID) {
  
  # Split sample and standards dataframes
  data_standards <- data %>%
    filter(sampletype == "standard") %>%
    mutate(label = case_when(
      enzyme == "T" ~ "Standard",
      enzyme == "F" ~ "Control No RT"))
  
  data_samples <- data %>%
    filter(sampletype == "sample") %>%
    select(-concentration) %>%
    mutate(label = case_when(
      enzyme == "T" & virus == "T" ~ "virus T",
      enzyme == "T" & virus == "F" ~ "virus F",
      enzyme == "T" & assay == "stock" ~ "virus stock",
      enzyme == "F" ~ "Control No RT")) %>%
    mutate(label = factor(label, levels = c("Control No RT", "virus F", "virus T", "virus stock")))
  
  # Filter out rows to use for linear model
  data_standards_model <- data_standards %>%
    filter(enzyme == "T") %>%
    filter(!is.na(Cq)) 
  
  # Replace NA Cq values and concentrations for standards
  data_standards <- data_standards %>%
    mutate(
      Cq = coalesce(ifelse(enzyme == "F", 40, Cq), 40),
      concentration = ifelse(enzyme == "F", 1, concentration)
    )
  
  # Fit a linear model
  lmStandard <- lm(Cq ~ log(concentration), data_standards_model)
  
  # Extract coefficients and R-squared value from model
  coefficients <- lmStandard$coefficients
  lm_eq <- paste("y =", round(coef(lmStandard)[2], 3), "x +", round(coef(lmStandard)[1], 3))
  r_squared <- round(summary(lmStandard)$r.squared, 3)
  
  # Create a data frame for the fitted line within the range of standard concentrations
  line_data <- data.frame(
    log_concentration = seq(min(log(data_standards_model$concentration)),
                            max(log(data_standards_model$concentration)),
                            length.out = 100)
  ) %>%
    mutate(Cq = coefficients[1] + coefficients[2] * log_concentration)
  
  # Standard curve plot
  standards_plot <- ggplot(data_standards, aes(x = log(concentration), y = Cq)) +
    geom_point(aes(color = label), size = 3, alpha = 0.5) +
    geom_line(data = line_data, aes(x = log_concentration, y = Cq), 
              color = "black", linetype = "dotted", linewidth = 0.8) +
    geom_text(x = max(log(data_standards$concentration)) - 5, y = max(data_standards$Cq) - 5, 
              label = paste("Equation: ", lm_eq, "\nR-squared: ", r_squared)) +
    labs(title = paste("Batch", batchID, " Standard Curve Formula"), x = "ln(Concentration)", y = "Cq", color = "") +
    scale_color_manual(values = c("grey30", "red")) +
    scale_y_continuous(limits = c(0, 45), breaks = seq(0, 45, by = 5)) +
    theme_bw() +
    theme(panel.grid = element_blank(), 
          text = element_text(size = 14, color = "black"))
  
  # Replace NA Cq values with 45
  data_samples$Cq[is.na(data_samples$Cq)] <- 40
  
  # Get the max Cq value from standards used in the model
  detection_limit_Cq <- max(data_standards_model$Cq, na.rm = TRUE)
  
  # Apply it in the concentration calculation
  data_samples <- data_samples %>%
    mutate(concentration = exp((Cq - coefficients[1]) / coefficients[2]),
           concentration = ifelse(Cq > detection_limit_Cq, 429.4, concentration))
  
  # Samples plot
  samples_plot <- ggplot(data_samples, aes(x = log(concentration), y = Cq)) +
    geom_point(aes(color = label), size = 3, alpha = 0.5) +
    geom_line(data = line_data, aes(x = log_concentration, y = Cq), 
              color = "black", linetype = "dotted", linewidth = 0.8) +
    labs(title = "Individual Sample Values", x = "ln(Concentration)", y = "Cq", color = "") +
    scale_color_manual(values = c("grey30", "orange", "blue", "lightgreen")) +
    scale_y_continuous(limits = c(0, 45), breaks = seq(0, 45, by = 5)) +
    theme_bw() +
    theme(panel.grid = element_blank(), 
          text = element_text(size = 14, color = "black"))
  
  # Final concentration calculation
  data_samples <- data_samples %>%
    mutate(
      concentration = case_when(
        assay == "stock" ~ concentration * 40, # Final concentration for stock samples
        TRUE             ~ concentration * 80, # Final concentration per organoid 
      )
    )
  
  # Plot both plots side by side
  print(combined_plot <- ggarrange(standards_plot, samples_plot, ncol = 2, nrow = 1, common.legend = F, legend = "bottom"))
  
  # Return results
  return(list(
    samples = data_samples,
    standards = data_standards
  ))
}

results_list <- mapply(analyze_qPCR_data, data_list, names(data_list), SIMPLIFY = F)

# Combine all samples into a single dataframe
all_samples <- bind_rows(lapply(results_list, `[[`, "samples"), .id = "batchID")

# Save data frame as CSV and R object in the dataframes folder
dataframeID <- "RSVbac"
write_csv(all_samples, file.path(dataframes_folder, paste0(dataframeID, "_RSVqPCR_values.csv")))
saveRDS(all_samples, file.path(dataframes_folder,  paste0(dataframeID, "_RSVqPCR_values.rds")))

# Use this to read the final objects
all_samples <- readRDS(file.path(dataframes_folder, paste0(dataframeID, "_RSVqPCR_values.rds")))

# Average technical replicates
avg_data <- all_samples %>%
  filter(enzyme == "T") %>%
  group_by(date, line, species, virus, location, time_inf, stock_name, assay, Combined, bacteria_label) %>%
  summarise(avg_conc = mean(concentration)) %>%
  ungroup()

Stats and Plots

Time Course

Samples without virus (virus = “F”), were removed from the plot

TimeCourse <- avg_data %>%
  filter(assay == "TimeCourse") %>%
  filter(virus == "T")

# Summarize data to plot median lines
summary_TimeCourse <- TimeCourse %>%
  group_by(time_inf, location) %>%
  summarise(median = median(avg_conc, na.rm = TRUE), .groups = "drop") 
kable(summary_TimeCourse)

time_inf	location	median
2	Ap	222886963
2	Bas	34352
4	Ap	707709888
4	Bas	34352
6	Ap	1184834458
6	Bas	34352
8	Ap	1166484038
8	Bas	34352
10	Ap	649749972
10	Bas	34352

plot <- ggplot() +
  geom_jitter(data = TimeCourse, 
              aes(x = time_inf, y = avg_conc, color = location, shape = line), 
              width = 0.15, height = 0, size = 3, alpha = 0.85) +
  geom_line(data = summary_TimeCourse, 
            aes(x = time_inf, y = median, color = location, group = location), 
            linewidth = 1) +
  geom_hline(yintercept = 429.4*80, linetype = "dashed", color = "black", linewidth = 0.8) + 
  scale_shape_manual(values = c(19, 17)) +
  scale_color_manual(values = c("#1D793E", "#FF8200")) +
  scale_x_continuous(breaks = seq(2, 10, by = 2)) +
  scale_y_log10(breaks = 10^(2:9), 
                labels = trans_format("log10", math_format(10^.x))) +
  labs(x = "Days post viral infection (DPVI)",
       y = "RSV copy # / organoid",
       color = "Location", shape = "HNO Line") +
  theme_bw() +
  theme(panel.grid = element_blank(), 
        text = element_text(size = 18, color = "black"),
        axis.text.y = element_text(color = "black"),
        axis.text.x = element_text(color = "black"),
        plot.margin = unit(c(0.1,0.1,0.1,0.1), "in"))

ggsave(plot, filename = file.path(outputs_folder, paste0(dataframeID, "_RSVqPCR_TimeCourse.png")), width = 7, height = 5)
saveRDS(plot, file = file.path(outputs_folder, paste0(dataframeID, "_RSVqPCR_TimeCourse.rds")))

plot

Virus + Bacteria at 4DPVI

RSVbac <- avg_data %>%
  filter(assay == "RSVbac") 

RSVbac$species <- factor(RSVbac$species, levels = c("NB", "Spn", "Hin", "Dpi"))

No-bacteria/no-virus controls (Combined = “NB.F”) samples were removed from downstream plots and stats.

RSVbac <- RSVbac %>%
  filter(Combined != "NB.F") %>%
  droplevels()

Analysis with mixed-effects models, using line and date as random effects and the Combined condition (i.e., bacteria and RSV presence) as fixed effect. NB.F condition was removed from the stats. If the model is singular, it is rerun with only date as random effect. Contrasts are calculated using emmeans with Holm adjustment for multiple comparisons across all conditions. Predicted values and confidence intervals are backtransformed to PFU scale for plotting.

Highlighted contrasts are those with a cutoff p-value of 0.05.

analysis_function <- function(raw_data, loc, cutoff_pvalue, cutoff_FC) {
  
  # Filter data by location
  data_stats <- raw_data %>%
    filter(location == loc) %>%
    droplevels()
  
  # Mixed-effects model with random effects
  model <- lmer(log(avg_conc) ~ Combined
                + (1|line) + (1|line:date), 
                data = data_stats)
  
  # Extract random effects data and convert to dataframe
  singular <- isSingular(model)
  random_effects_df <- as.data.frame(VarCorr(model)) %>%
    mutate(proportion = round(100 * (vcov / sum(vcov)), 2)) %>%
    mutate(singular = singular) 
  
  # If model is singular, rerun with only date as random effect
  if (singular) {
    model <- lmer(log(avg_conc) ~ Combined + (1|date), data = data_stats)
  }
  
  # Run ANOVA and extract pvalues
  anova <- anova(model)
  anova_pvalue <- anova$`Pr(>F)`[1]
  
  # If ANOVA is not significant, stop here. Otherwise continue with post-hoc analysis
  if (is.na(anova_pvalue) || anova_pvalue >= cutoff_pvalue) {
    return(list(
      anova = anova,
      random_effects = random_effects_df,
      note = "ANOVA not significant; post-hoc tests skipped"
    ))
  }
  
  # Extract fixed effects coefficients and convert to dataframe
  fixed_effects_df <- as.data.frame(summary(model)$coefficients) %>%
    rownames_to_column(var = "term")
  
  # Generate contrasts for Combined condition using emmeans with Holm adjustment
  emmeans_model <- emmeans(model, ~ Combined)
  emmeans_contrasts <- pairs(emmeans_model, adjust = "holm")
  
  contrasts_df <- as.data.frame(summary(emmeans_contrasts)) %>%
    separate(contrast, into = c("condition1", "condition2"), sep = " - ", remove = FALSE)
  
  contrasts_df$group1 <- data_stats$bacteria_label[match(contrasts_df$condition1, data_stats$Combined)]
  contrasts_df$group2 <- data_stats$bacteria_label[match(contrasts_df$condition2, data_stats$Combined)]
  
  # Adds predictions based on fixed effects, averaged over random effects. It gives a population estimate
  data_stats <- cbind(data_stats, predval = predict(model, re.form = NA, se.fit = TRUE))
  
  # Backtransform the predicted values and CIs
  transf_preds_df <- data_stats %>%
    group_by(Combined, bacteria_label) %>%
    summarise(mean.predval.fit = mean(predval.fit),
              mean.predval.se.fit = mean(predval.se.fit),
              mean.PFU = mean(avg_conc),
              .groups = 'drop') %>%
    mutate(pconc = exp(mean.predval.fit),
           pconc_min = exp(mean.predval.fit - 2*mean.predval.se.fit),
           pconc_max = exp(mean.predval.fit + 2*mean.predval.se.fit)) 
  
  # Calculate fold-change values for each contrast and highlight the most relevant
  contrasts_df <- contrasts_df %>%
    ungroup() %>%
    left_join(select(transf_preds_df, Combined, pconc), by = join_by(condition1 == Combined)) %>%
    left_join(select(transf_preds_df, Combined, pconc), by = join_by(condition2 == Combined), suffix = c(".1", ".2")) %>%
    mutate(FC = pconc.1 / pconc.2,
           log10FC = log10(FC),
           FC = if_else(FC < 1, 1 / FC, -FC),
           highlighted = case_when(
             p.value < cutoff_pvalue & abs(log10FC) >= cutoff_FC ~ "*",
             TRUE ~ "")) %>%
    select(contrast, condition1, condition2, p.value, pconc.1, pconc.2, FC, log10FC, highlighted, group1, group2)

    return(list(
    anova = anova,
    random_effects = random_effects_df,
    fixed_effects = fixed_effects_df,
    contrasts = contrasts_df,
    transf_preds = transf_preds_df
  ))
}

plot_function <- function(raw_data, loc, stats_data, contrasts, ypval) {
  
  # Filter data by location
  raw_data <- raw_data %>%
    filter(location == loc) %>%
    droplevels()
  
  # Base plot
  plot <- ggplot() +
    geom_jitter(data = raw_data, 
                aes(x = bacteria_label, y = avg_conc, color = species, shape = line),
                width = 0.15, height = 0, size = 3, alpha = 0.85, stroke = 0.75, show.legend = TRUE) +   
    
    scale_color_manual(values = Bac.colors) +
    scale_shape_manual(values = c(19, 17)) +
    
    scale_x_discrete(expand = c(0, 0.5)) +
    
    scale_y_log10(limits = c(NA, 10^10.5),
                  breaks = 10^(1:10), 
                  labels = trans_format("log10", math_format(10^.x))) +
    
    geom_hline(yintercept = 429.4*80, linetype = "dashed", color = "black", linewidth = 0.8) +
    
    labs(x = "",
         y = "RSV copy # / organoid",
         caption = paste("RSV", "Bact", "DPVI", sep = "\n"),
         color = "Bacteria", shape = "HNO Line") +  
    theme_bw() +
    theme(panel.grid = element_blank(), 
          text = element_text(size = 18, color = "black"),
          plot.caption = element_text(size = 13, face = "bold", hjust = -0.13, vjust = 24),
          axis.text.x = element_markdown(),
          axis.text.y = element_text(color = "black"),
          plot.margin = unit(c(0.1,0.1,-0.8,0.1), "in"))
  
  # Add stats layers only if stats_data is not NULL and has rows
  if (!is.null(stats_data) && nrow(stats_data) > 0) {
    plot <- plot +
      geom_point(data = stats_data, aes(x = bacteria_label, y = pconc), shape = 3, size = 3) +
      geom_errorbar(data = stats_data, width = 0.5,
                    aes(x = bacteria_label, y = pconc, ymax = pconc_max, ymin = pconc_min))
    
    # Add significance asterisks for highlighted contrasts
    contrast_sign <- contrasts %>%
      filter(highlighted != "") %>%
      mutate(plot = "*")
    
    if (nrow(contrast_sign) > 0) {
      plot <- plot +
        stat_pvalue_manual(contrast_sign, label = "plot", y.position = ypval, 
                           step.increase = 0.03, tip.length = 0.01, bracket.shorten = 0.2, vjust = 0.6, bracket.size = 0.5)
    }
  }
  
  return(plot)
}

Apical

stats_Ap <- analysis_function(RSVbac, loc = "Ap", cutoff_pvalue = 0.05, cutoff_FC = 0)
kable(stats_Ap$anova)

	Sum Sq	Mean Sq	NumDF	DenDF	F value	Pr(>F)
Combined	3.671692	1.223897	3	25.10559	15.71766	5.8e-06

kable(stats_Ap$random_effects)

grp	var1	var2	vcov	sdcor	proportion	singular
line:date	(Intercept)	NA	0.5534351	0.7439322	87.67	TRUE
line	(Intercept)	NA	0.0000000	0.0000000	0.00	TRUE
Residual	NA	NA	0.0778677	0.2790478	12.33	TRUE

kable(stats_Ap$fixed_effects)

term	Estimate	Std. Error	df	t value	Pr(>\|t\|)
(Intercept)	20.2159518	0.2512574	10.87021	80.4591338	0.0000000
CombinedSpn.T	-0.8018105	0.1297802	25.13421	-6.1782196	0.0000018
CombinedHin.T	-0.0904365	0.1297802	25.13421	-0.6968439	0.4922958
CombinedDpi.T	-0.1307738	0.1247940	25.05257	-1.0479174	0.3046818

kable(stats_Ap$contrasts %>% select(-group1, -group2, -condition1, -condition2))

contrast	p.value	pconc.1	pconc.2	FC	log10FC	highlighted
NB.T - Spn.T	0.0000110	602110660	270056391	-2.229574	0.3482219	*
NB.T - Hin.T	0.9850294	602110660	550047545	-1.094652	0.0392761
NB.T - Dpi.T	0.9141067	602110660	528301679	-1.139710	0.0567943
Spn.T - Hin.T	0.0000651	270056391	550047545	2.036788	-0.3089458	*
Spn.T - Dpi.T	0.0000956	270056391	528301679	1.956264	-0.2914275	*
Hin.T - Dpi.T	0.9850294	550047545	528301679	-1.041162	0.0175182

saveRDS(stats_Ap, file = file.path(outputs_folder,  paste0(dataframeID, "_RSVqPCR_stats_A.rds")))

plot_Ap <- plot_function(RSVbac, loc = "Ap", stats_data = stats_Ap$transf_preds, contrasts = stats_Ap$contrasts, ypval = 10)
plot_Ap

ggsave(plot_Ap, filename = file.path(outputs_folder,  paste0(dataframeID, "_RSVqPCR_A.png")), width = 7, height = 5)
saveRDS(plot_Ap, file = file.path(outputs_folder,  paste0(dataframeID, "_RSVqPCR_A.rds")))

Basal

stats_Bas <- analysis_function(RSVbac, loc = "Bas", cutoff_pvalue = 0.05, cutoff_FC = 0)
kable(stats_Bas$anova)

	Sum Sq	Mean Sq	NumDF	DenDF	F value	Pr(>F)
Combined	0	0	3	0	3.765826	1

saveRDS(stats_Bas, file = file.path(outputs_folder,  paste0(dataframeID, "_RSVqPCR_stats_B.rds")))

plot_Baso <- plot_function(RSVbac, loc = "Bas", stats_data = stats_Bas$transf_preds, contrasts = stats_Bas$contrasts, ypval = 8)
plot_Baso

ggsave(plot_Baso, filename = file.path(outputs_folder,  paste0(dataframeID, "_RSVqPCR_B.png")), width = 7, height = 5)
saveRDS(plot_Baso, file = file.path(outputs_folder,  paste0(dataframeID, "_RSVqPCR_B.rds")))

Spn Kinetics

SpnKinetics <- avg_data %>%
  filter(assay == "SpnKinetics") %>%
  filter(virus == "T")

Stats and Plots

Analysis with mixed-effects models, using line and date as random effects and the time_inf condition as fixed effect. Ratios are calculated relative to the corresponding NB value for each line, date, location, and time_inf Contrasts are calculated using emmeans with Holm adjustment for multiple comparisons against zero (this is the log2 ratio to NB).

# Calculate ratio to the corresponding species = NB
log2FC_conc_SpnKinetics <- SpnKinetics %>%
  group_by(date, line, location, time_inf) %>%
  reframe(date = date,
          location = location,
          line = line,
          time_inf = time_inf,
          species = species,
          FC_conc = avg_conc / avg_conc[species == "NB"],
          log2FC_conc = log2(FC_conc)) %>%
  filter(species != "NB")

# Mixed-effects model analysis function for kinetics data
analysis_function_kinetics <- function(raw_data, loc, cutoff_pvalue) {
  
  # Filter data by location and convert time_inf to factor
  data_stats <- raw_data %>%
    filter(location == loc) %>%
    mutate(time_inf = as.factor(time_inf)) 
  
  summary <- data_stats %>%
    group_by(time_inf) %>%
    summarise(
      mean_FC = mean(FC_conc, na.rm = TRUE),
      mean_log2FC = mean(log2FC_conc, na.rm = TRUE)
    )
  
  # Mixed-effects model with random effects
  model <- lmer(log2FC_conc ~ time_inf  
                + (1|line) + (1|line:date), 
                data = data_stats)
  
  # Extract random effects data and convert to dataframe
  singular <- isSingular(model)
  random_effects_df <- as.data.frame(VarCorr(model)) %>%
    mutate(proportion = round(100 * (vcov / sum(vcov)), 2)) %>%
    mutate(singular = singular) 
  
  # If model is singular, rerun without random effects
  if (singular) {
    model <- lm(log2FC_conc ~ time_inf, data = data_stats)
  }
  
  # Run ANOVA 
  anova_result <- anova(model)
  anova_pvalue <- anova_result$`Pr(>F)`[1]
  
  # If ANOVA is not significant, stop here. Otherwise continue with post-hoc analysis
  if (is.na(anova_pvalue) || anova_pvalue >= cutoff_pvalue) {
    return(list(
      anova = anova_result,
      random_effects = random_effects_df,
      note = "ANOVA not significant; post-hoc tests skipped"
    ))
  }
  
  # Extract fixed effects coefficients and convert to dataframe
  fixed_effects_df <- as.data.frame(summary(model)$coefficients) %>%
    rownames_to_column(var = "term")
  
  # Generate contrasts for time_inf condition using emmeans with Holm adjustment vs zero
  emmeans_model <- emmeans(model, ~ time_inf)
  emmeans_test_vs_zero <- test(emmeans_model, null = 0, adjust = "holm")
  
  emmeans_test_vs_zero_df <- as.data.frame(summary(emmeans_test_vs_zero)) 
  emmeans_test_vs_zero_df <- merge(emmeans_test_vs_zero_df, summary)
  
  return(list(
    anova = anova_result,
    random_effects = random_effects_df,
    fixed_effects = fixed_effects_df,
    emmeans_test_vs_zero = emmeans_test_vs_zero_df
  ))
}

plotK_function_log2FC <- function(raw_data, stats_results = NULL, loc) {
  
  # Filter raw data
  plot_data <- raw_data %>%
    filter(location == loc) %>%
    mutate(time_inf = as.factor(time_inf)) 
  
  # Create base plot
  plot <- ggplot() +
    # Reference line at 0
    geom_hline(yintercept = 0, linetype = "dashed", 
               color = NB.colors.K, linewidth = 0.8) +
    
    # Add raw data points
    geom_jitter(data = plot_data, 
                aes(x = time_inf, y = log2FC_conc, 
                    color = species, shape = line), 
                width = 0.15, height = 0, size = 3, 
                alpha = 0.85, stroke = 0.75, show.legend = TRUE) +
    
    # Add boxplot
    geom_boxplot(data = plot_data, 
                 aes(x = time_inf, y = log2FC_conc, color = species), 
                 alpha = 0.3, outlier.shape = NA, show.legend = FALSE) +
    
    # Scales and colors
    scale_color_manual(values = Spn.colors.K) +
    scale_shape_manual(values = c(19, 17)) +
    #scale_y_continuous(limits = c(-15, 1)) +
    
    # Labels
    labs(x = "DPVI",
         y = "log2(Spn/Unc) RSV copy #",
         color = "Bacteria", shape = "HNO Line") +
    
    # Theme
    theme_bw() +
    theme(
      panel.grid = element_blank(), 
      text = element_text(size = 18, color = "black"),
      axis.text.x = element_text(color = "black"),
      axis.text.y = element_text(color = "black")
    )
  
  # Conditionally add significance layers if stats_results are provided and valid
  add_sig <- !is.null(stats_results) && "emmeans_test_vs_zero" %in% names(stats_results)
  
  if (add_sig) {
    sig_df <- stats_results$emmeans_test_vs_zero
    
    # Align factor levels to plot_data$time_inf to ensure correct x positions
    sig_df <- sig_df %>%
      mutate(
        time_inf = factor(time_inf, levels = levels(plot_data$time_inf)),
        signif = case_when(p.value < 0.05 ~ "*", TRUE ~ ""),
        yend   = case_when(p.value < 0.05 ~ emmean, TRUE ~ 0),
        y_position = -1,  
        x_numeric  = as.numeric(time_inf),
        x_position = x_numeric + 0.4  # offset to avoid overlapping with box/points
      )
    
    plot <- plot +
      # Star placed just to the right of the segment
      geom_text(data = sig_df,
                aes(x = x_position + 0.08, y = y_position, label = signif),
                size = 5, color = "black", fontface = "bold", vjust = 0.5) +
      # Segment from 0 to emmean (or 0 if non-significant)
      geom_segment(data = sig_df,
                   aes(x = x_position, xend = x_position, y = 0, yend = yend),
                   color = "black", linewidth = 0.8)
  }
  
  return(plot)
}

Apical

stats_SpnKinetics_A <- analysis_function_kinetics(log2FC_conc_SpnKinetics, loc = "Ap", cutoff_pvalue = 0.05)
stats_SpnKinetics_A$anova

Type III Analysis of Variance Table with Satterthwaite's method
         Sum Sq Mean Sq NumDF  DenDF F value Pr(>F)
time_inf 1.9943 0.66478     3 10.144  2.2558 0.1435

stats_SpnKinetics_A$random_effects

        grp        var1 var2       vcov     sdcor proportion singular
1 line:date (Intercept) <NA> 0.05598443 0.2366103       8.89    FALSE
2      line (Intercept) <NA> 0.27941344 0.5285957      44.34    FALSE
3  Residual        <NA> <NA> 0.29469406 0.5428573      46.77    FALSE

stats_SpnKinetics_A$fixed_effects

NULL

stats_SpnKinetics_A$emmeans_test_vs_zero

NULL

saveRDS(stats_SpnKinetics_A, file = file.path(outputs_folder, paste0(dataframeID, "_RSVqPCR_SpnKinetics_stats_A.rds")))

plot_SpnKinetics_A <- plotK_function_log2FC(log2FC_conc_SpnKinetics, stats_SpnKinetics_A, loc = "Ap")
plot_SpnKinetics_A

ggsave(plot_SpnKinetics_A, filename = file.path(outputs_folder, paste0(dataframeID, "_RSVqPCR_SpnKinetics_A.png")), width = 7, height = 5)
saveRDS(plot_SpnKinetics_A, file = file.path(outputs_folder, paste0(dataframeID, "_RSVqPCR_SpnKinetics_A.rds")))

Basal

# Not possible to run stats, as all data are zeros

plot_SpnKinetics_B <- plotK_function_log2FC(log2FC_conc_SpnKinetics, loc = "Bas")
plot_SpnKinetics_B

ggsave(plot_SpnKinetics_B, filename = file.path(outputs_folder, paste0(dataframeID, "_RSVqPCR_SpnKinetics_B.png")), width = 7, height = 5)
saveRDS(plot_SpnKinetics_B, file = file.path(outputs_folder, paste0(dataframeID, "_RSVqPCR_SpnKinetics_B.rds")))

Viral Stocks

VirusStocks <- avg_data %>%
  filter(assay == "stock") %>%
  select(stock_name, avg_conc)
kable(VirusStocks)

stock_name	avg_conc
P4_mine	122758856
P5.1	209899579
P5.2	226394064
Piedra	84778764