HNOBac Manuscript
  1. Methods
  2. LDH 48h
  • Introduction
  • Methods
    • RNASeq
    • Cell Counts & TEER
    • CFUs 48h
    • CFUs Epithelial Lines
    • LDH 48h
    • LDH Epithelial Lines
    • Cytokines
    • HEK-Blue
  • R Session Info

Table of contents

  • Data Input and Selection
    • File Paths
    • Data clean-up
    • Saving files
  • Stats and Plots
    • File Paths
    • Analysis for each Temp
    • Analysis 34 vs. 37
    • Merged Summary Files

LDH 48h

library(tidyverse)
library(readxl)
library(ggpubr)
library(ggtext)
library(ggnewscale)
library(scales)
library(lme4)
library(afex)
library(emmeans)
library(gridExtra)

Data Input and Selection

File Paths

# Folder paths
input_path <- "data/input_data/LDH/"
metadata_path <- "data/metadata/LDH"

# Create subfolders for output files
dataframes_folder <- "data/dataframes"
if (!file.exists("data/dataframes")) {
  dir.create("data/dataframes", recursive = TRUE)
}

# Load data and metadata
input_data <- read_excel(file.path(input_path, "LAK24_0118_LDHData.xlsx")) %>% mutate_if(is.character, factor)
Bac_order <- read_csv(file.path(metadata_path, "Order_Bacteria_LDH.csv"))

Data clean-up

# Filter only samples matched to CFU wells
LDH_data <- input_data %>% 
  filter(CFUmatched == "T")

# Factor Ordering and Styling
LDH_data <- merge(LDH_data, Bac_order, by = "Species") 
LDH_data$bacteria_label <- factor(LDH_data$bacteria_label, levels = Bac_order$bacteria_label)
LDH_data$Line <- fct_recode(LDH_data$Line, "HNO918" = "A", "HNO204" = "B", "HNO919" = "C") 

# Average technical replicates for each individual experiment
LDH_avg <- LDH_data %>%
  group_by(Date, Line, Species, bacteria_label, Temp, Well_Endpoint, Collection_Time) %>% 
  summarise(avg_Value = mean(Value)) %>%
  # Add labels to distinguish final collection times from time 0
  mutate(Collection_Label = if_else(Collection_Time == 24 | Collection_Time == 48, "final", "initial"))

# Calculate fold change between final time point vs. time 0
LDH_FC <- LDH_avg %>%
  group_by(Date, Line, Species, bacteria_label, Temp, Well_Endpoint) %>%
  reframe(FC = avg_Value[Collection_Label == "final"]/avg_Value[Collection_Label == "initial"]) 

Saving files

# Save data frames as CSV files in the output folder
write_csv(LDH_data, file.path(dataframes_folder, "LDH_values.csv"))
write_csv(LDH_FC, file.path(dataframes_folder, "LDH_FC.csv"))

# Save data frames as R objects in the output folder
saveRDS(LDH_data, file.path(dataframes_folder, "LDH_values.rds"))
saveRDS(LDH_FC, file.path(dataframes_folder, "LDH_FC.rds"))

# Cleaning-up all objects from the environment
rm(list = ls())

# Use this to read the final objects
LDH_data <- readRDS("data/dataframes/LDH_values.rds")
LDH_FC <- readRDS("data/dataframes/LDH_FC.rds")

Stats and Plots

File Paths

# Folder paths
dataframes_path <- "data/dataframes"
metadata_path <- "data/metadata/LDH"

# Create subfolders for output files
figures_folder <- "data/outputs/LDH/figures"
if (!file.exists("data/outputs/LDH/figures")) {
  dir.create("data/outputs/LDH/figures", recursive = TRUE)
}
stats_folder <- "data/outputs/LDH/stats"
if (!file.exists("data/outputs/LDH/stats")) {
  dir.create("data/outputs/LDH/stats", recursive = TRUE)
}

# Load data and metadata
LDH_FC <- readRDS("data/dataframes/LDH_FC.rds")
Bac_order <- read_csv(file.path(metadata_path, "Order_Bacteria_LDH.csv"))

Analysis for each Temp

# Function to analyze each temp condition
analysis_function <- function(data, each_temp, each_endpoint, cutoff_pvalue, cutoff_FC, color_error) {
  
  # Subset the data to the selected temp
  data_subset <- data %>%
    filter(Temp == each_temp) %>%
    filter(Well_Endpoint == each_endpoint)
  
  # Mixed-effects model with random effects
  model <- lmer(FC ~ Species 
                + (1|Line) + (1|Line:Date), 
                data = data_subset)
  #Anova
  anova <- anova(model)
  anova_df <- as.data.frame(anova) %>%
    mutate(sign = case_when(
      `Pr(>F)` < cutoff_pvalue ~ "*",
      TRUE ~ "")) %>%
    mutate_if(is.numeric, ~ format(., digits = 2, scientific = TRUE))
  
  # Calculate Individual contrasts
  emmeans_model <- emmeans(model, ~ Species)
  emmeans_contrasts <- pairs(emmeans_model, adjust = "none")    
  
  # Extract random effects and convert to dataframe (if not singular)
  random_effects_df <- as.data.frame(VarCorr(model)) %>%
    mutate(proportion = round(100 * (vcov / sum(vcov)), 2)) 
  
  # Adds predictions based on fixed effects, averaged over random effects. It gives a population estimate
  data_subset <- cbind(data_subset, predval = predict(model,re.form = NA, se.fit = TRUE))
  data_summary_df <- data_subset %>%
    group_by(Species, bacteria_label) %>%
    summarize(mean.real = mean(FC),
              mean.predval = mean(predval.fit), 
              mean.predval.se = mean(predval.se.fit)) %>%
    mutate(max = mean.predval + 2*mean.predval.se,
           min = mean.predval - 2*mean.predval.se)
  
  # Convert contrasts to dataframe and adjust pvalues. Filter contrast to Uncolonized only
  contrasts_df <- as.data.frame(summary(emmeans_contrasts)) %>%
    filter(str_detect(contrast, "Uncolonized")) %>%
    mutate(p.adj.holm = p.adjust(p.value, method = "holm")) %>%
    mutate(sign = case_when(
      p.adj.holm < 0.05 ~ "*",
      TRUE ~ ""))
  
  # Edits to the contrast dataframe to include pvalue brackets in plot
  contrasts_df <- contrasts_df %>%
    separate(contrast, into = c("condition1", "condition2"), sep = " - ", remove = F) 
  
  contrasts_df$group1 <- Bac_order$bacteria_label[match(contrasts_df$condition1, Bac_order$Species)]
  contrasts_df$group2 <- Bac_order$bacteria_label[match(contrasts_df$condition2, Bac_order$Species)]
  
  # Calculate fold-change values for each contrast
  contrasts_df <- contrasts_df %>%
    ungroup() %>%
    left_join(select(data_summary_df, Species, mean.predval), by = join_by(condition1 == Species)) %>%
    left_join(select(data_summary_df, Species, mean.predval), by = join_by(condition2 == Species), suffix = c(".1", ".2")) %>%
    mutate(FC = mean.predval.1 / mean.predval.2,
           FC = if_else(FC < 1, -1 / FC, FC),
           highlighted = case_when(
             FC <= -cutoff_FC ~ "-",
             FC >= cutoff_FC ~ "+",
             TRUE ~ "")) 
  
  # Select p values to plot and define their location
  contrast_sign <- contrasts_df %>%
    filter(sign != "" & highlighted != "") %>%
    mutate(p.adj.holm = format(p.adj.holm, digits = 2, scientific = TRUE))
  
  location <- max(data_subset$FC, na.rm = TRUE) * 1.1
  
  # Standard Boxplot
  plot_1 <- ggplot() +
    geom_boxplot(data = data_subset, aes(x = bacteria_label, y = FC, fill = bacteria_label)) + 
    
    geom_jitter(data = data_subset, aes(x = bacteria_label, y = FC, shape = Line), 
                fill = "grey50", color = "grey30", size = 2, width = 0.05, stroke = 0.75) +
    
    scale_fill_manual(values = c("grey80","#800080","#1E90FF","#927ed1")) +
    scale_shape_manual(values = c(21, 22, 24)) +
    
    labs(x = "",
         y = "Fold change in LDH from -1 hour") +
    
    theme_bw() +
    theme(panel.grid = element_blank(),
          legend.position = "none",
          text = element_text(size = 20), 
          axis.text.x = element_markdown(), 
          axis.text.y = element_text(color = "black"))
  
  # Plot with predicted means and standard errors of the estimates
  plot_2 <- ggplot() +
    geom_point(data = data_subset, 
                aes(x = bacteria_label, y = FC, fill = bacteria_label, color = bacteria_label, shape = Line), 
                position = position_jitterdodge(dodge.width = 0.7, jitter.width = 0.2),
                size = 3, alpha = 0.75, stroke = 0.75) +
    
    geom_point(data = data_summary_df, aes(x = bacteria_label, y = mean.predval), shape = 3, size = 3, color = color_error) +
    geom_errorbar(data = data_summary_df, aes(x = bacteria_label,
                                              y = mean.predval,
                                              ymax = max,
                                              ymin = min),
                  width = 0.4, color = color_error) +
    
    scale_fill_manual(values = c("#5b5b5b","#800080","#1E90FF","#927ed1")) +
    scale_color_manual(values = c("#5b5b5b","#800080","#1E90FF","#927ed1")) +
    scale_shape_manual(values = c(21, 22, 24)) +
    
    scale_y_continuous(expand = c(0.1,0)) +
    
    labs(x = "",
         y = "Fold change in LDH from -1 hour",
         fill = "Bacteria", color = "Bacteria", shape = "HNO Line") +
    
    theme_bw() +
    theme(panel.grid = element_blank(), 
          legend.text = element_markdown(),
          text = element_text(size = 20), 
          axis.text.x = element_markdown(), 
          axis.text.y = element_text(color = "black"))
  
  # Conditionally add p-value annotations layer
  if (nrow(contrast_sign) > 0) {
    plot_2 <- plot_2 +
      stat_pvalue_manual(contrast_sign, label = "p.adj.holm", y.position = location,
                         tip.length = 0.02, bracket.shorten = 0.2, vjust = -0.2, bracket.size = 0.3, size = 2.5)
  } else {
    plot_2 <- plot_2
  }
  
  # Arrange plot and tables for summary pdf
  table <- contrasts_df %>%
    select(condition1, condition2, p.adj.holm, sign, mean.predval.1, mean.predval.2, FC, highlighted) %>%
    mutate(p.adj.holm = format(p.adj.holm, digits = 2, scientific = TRUE))
  
  Tmin <- ttheme_minimal()
  panel <- ggarrange(plot_1 + theme(plot.margin = unit(c(0.25,0.25,0.25,0.25), "in")), 
                     plot_2 + theme(plot.margin = unit(c(0.25,0.25,0.25,0.25), "in")),
                     tableGrob(anova_df, theme = Tmin), 
                     tableGrob(random_effects_df, theme = Tmin, rows = NULL), 
                     tableGrob(table, theme = Tmin, rows = NULL), 
                     ncol = 1, heights = c(0.6, 0.6, 0.1, 0.2, 0.2),
                     labels = c("  Standard Boxplot ", "Predicted Mean ± 2*SE", "    Anova    ", "Random Effects", "   Contrasts  "))
  panel <- annotate_figure(panel, top = text_grob(
    paste0("Analysis for ", each_temp, "C timepoint ", each_endpoint, " h. P-value: ", cutoff_pvalue, " and FC: ", cutoff_FC),
                                                         face = "bold", size = 14, color = "red"))
  
  # Save files
  ggsave(panel, filename = paste0(figures_folder, "/summaryLDH_", each_temp, "C_", each_endpoint, "h.pdf"), width = 9, height = 14)
  ggsave(plot_2, filename = paste0(figures_folder, "/plotLDH_", each_temp, "C_", each_endpoint, "h.png"), width = 7, height = 6)
  saveRDS(plot_2, file.path(figures_folder, paste0("plotLDH_", each_temp, "C_", each_endpoint, "h.rds")))
  write_csv(anova_df, file.path(stats_folder, paste0("anova_", each_temp, "C_", each_endpoint, "h.csv")))
  write_csv(random_effects_df, file.path(stats_folder, paste0("stats_random_effects_", each_temp, "C_", each_endpoint, "h.csv")))
  write_csv(contrasts_df, file.path(stats_folder, paste0("stats_contrasts_", each_temp, "C_", each_endpoint, "h.csv")))
  write_csv(data_summary_df, file.path(stats_folder, paste0("stats_summary_", each_temp, "C_", each_endpoint, "h.csv")))
  
  return(list(
    anova = anova_df,
    random_effects = random_effects_df,
    contrasts = contrasts_df,
    data_summary = data_summary_df,
    plot_1 = plot_1,
    plot_2 = plot_2
  ))
}

34C (Main Data)

analysis_function(LDH_FC, each_temp = "34", each_endpoint = "48", cutoff_pvalue = 0.05, cutoff_FC = 1, color_error = "black")
$anova
         Sum Sq Mean Sq NumDF   DenDF F value  Pr(>F) sign
Species 1.9e+02 6.3e+01     3 6.6e+01   4e+00 1.1e-02    *

$random_effects
        grp        var1 var2       vcov     sdcor proportion
1 Line:Date (Intercept) <NA>  0.0000000 0.0000000       0.00
2      Line (Intercept) <NA>  0.3787548 0.6154306       2.36
3  Residual        <NA> <NA> 15.6531696 3.9564087      97.64

$contrasts
           contrast condition1  condition2   estimate       SE       df
1 Dpi - Uncolonized        Dpi Uncolonized  0.5376209 1.261251 51.62153
2 Sau - Uncolonized        Sau Uncolonized  4.0243686 1.312822 53.38139
3 Spn - Uncolonized        Spn Uncolonized -0.2551308 1.289533 52.44458
     t.ratio     p.value p.adj.holm sign
1  0.4262599 0.671690735 1.00000000     
2  3.0654325 0.003406184 0.01021855    *
3 -0.1978474 0.843929327 1.00000000     
                                                 group1
1 <i><b><span style='color:#1E90FF;'>Dpi</span></i></b>
2 <i><b><span style='color:#800080;'>Sau</span></i></b>
3 <i><b><span style='color:#927ED1;'>Spn</span></i></b>
                                            group2 mean.predval.1
1 <b><span style='color:#5b5b5b;'>Uncol</span></b>       2.147787
2 <b><span style='color:#5b5b5b;'>Uncol</span></b>       5.634535
3 <b><span style='color:#5b5b5b;'>Uncol</span></b>       1.355036
  mean.predval.2        FC highlighted
1       1.610167  1.333891           +
2       1.610167  3.499349           +
3       1.610167 -1.188283           -

$data_summary
# A tibble: 4 × 7
# Groups:   Species [4]
  Species    bacteria_label mean.real mean.predval mean.predval.se   max     min
  <fct>      <fct>              <dbl>        <dbl>           <dbl> <dbl>   <dbl>
1 Dpi        <i><b><span s…      2.11         2.15           1.03   4.20  0.0973
2 Sau        <i><b><span s…      5.62         5.63           1.08   7.80  3.47  
3 Spn        <i><b><span s…      1.33         1.36           1.06   3.47 -0.758 
4 Uncoloniz… <b><span styl…      1.59         1.61           0.884  3.38 -0.158 

$plot_1


$plot_2

37C (Supplemental Data)

analysis_function(LDH_FC, each_temp = "37", each_endpoint = "48", cutoff_pvalue = 0.05, cutoff_FC = 1, color_error = "red3")
$anova
         Sum Sq Mean Sq NumDF   DenDF F value  Pr(>F) sign
Species 3.4e+01 1.1e+01     3 3.2e+01 4.2e+00 1.3e-02    *

$random_effects
        grp        var1 var2      vcov     sdcor proportion
1 Line:Date (Intercept) <NA> 0.1592123 0.3990142       5.63
2      Line (Intercept) <NA> 0.0000000 0.0000000       0.00
3  Residual        <NA> <NA> 2.6703268 1.6341135      94.37

$contrasts
           contrast condition1  condition2   estimate        SE       df
1 Dpi - Uncolonized        Dpi Uncolonized  0.6737338 0.6154067 36.63814
2 Sau - Uncolonized        Sau Uncolonized  1.6196034 0.6319761 37.41009
3 Spn - Uncolonized        Spn Uncolonized -0.7311755 0.6330511 37.41164
    t.ratio    p.value p.adj.holm sign
1  1.094778 0.28075888 0.51082708     
2  2.562761 0.01453752 0.04361255    *
3 -1.155002 0.25541354 0.51082708     
                                                 group1
1 <i><b><span style='color:#1E90FF;'>Dpi</span></i></b>
2 <i><b><span style='color:#800080;'>Sau</span></i></b>
3 <i><b><span style='color:#927ED1;'>Spn</span></i></b>
                                            group2 mean.predval.1
1 <b><span style='color:#5b5b5b;'>Uncol</span></b>       2.681271
2 <b><span style='color:#5b5b5b;'>Uncol</span></b>       3.627141
3 <b><span style='color:#5b5b5b;'>Uncol</span></b>       1.276362
  mean.predval.2        FC highlighted
1       2.007537  1.335602           +
2       2.007537  1.806761           +
3       2.007537 -1.572859           -

$data_summary
# A tibble: 4 × 7
# Groups:   Species [4]
  Species     bacteria_label  mean.real mean.predval mean.predval.se   max   min
  <fct>       <fct>               <dbl>        <dbl>           <dbl> <dbl> <dbl>
1 Dpi         <i><b><span st…      2.68         2.68           0.485  3.65 1.71 
2 Sau         <i><b><span st…      3.60         3.63           0.507  4.64 2.61 
3 Spn         <i><b><span st…      1.28         1.28           0.506  2.29 0.263
4 Uncolonized <b><span style…      2.01         2.01           0.386  2.78 1.24 

$plot_1


$plot_2

Analysis 34 vs. 37

# Function to compare both temps 
analysis_temps_function <- function(data, each_endpoint, cutoff_pvalue, cutoff_FC) {
  
  # Subset the data to the selected temp
  data_subset <- data %>%
    filter(Well_Endpoint == each_endpoint) %>%
    mutate(Species.Temp = interaction(Species, Temp))
  
  # Mixed-effects model with random effects
  model <- lmer(FC ~ Species * Temp
                + (1|Line) + (1|Line:Date), 
                data = data_subset)
  #Anova
  anova <- anova(model)
  anova_df <- as.data.frame(anova) %>%
    mutate(sign = case_when(
      `Pr(>F)` < cutoff_pvalue ~ "*",
      TRUE ~ "")) %>%
    mutate_if(is.numeric, ~ format(., digits = 2, scientific = TRUE))
  
  # Calculate Individual contrasts
  emmeans_model <- emmeans(model, ~ Species * Temp)
  emmeans_Temp <- pairs(emmeans_model, simple = "Temp", adjust = "none")   
  
  # Extract random effects and convert to dataframe (if not singular)
  random_effects_df <- as.data.frame(VarCorr(model)) %>%
    mutate(proportion = round(100 * (vcov / sum(vcov)), 2)) 
  
  # Adds predictions based on fixed effects, averaged over random effects. It gives a population estimate
  data_subset <- cbind(data_subset, predval = predict(model,re.form = NA, se.fit = TRUE))
  data_summary_df <- data_subset %>%
    group_by(Species.Temp, Temp, bacteria_label) %>%
    summarize(mean.real = mean(FC),
              mean.predval = mean(predval.fit), 
              mean.predval.se = mean(predval.se.fit)) %>%
    mutate(max = mean.predval + 2*mean.predval.se,
           min = mean.predval - 2*mean.predval.se)
  
  # Convert contrasts to dataframe and adjust pvalues.
  contrasts_df <- as.data.frame(summary(emmeans_Temp)) %>%
    mutate(p.adj.holm = p.adjust(p.value, method = "holm")) %>%
    mutate(sign = case_when(
      p.adj.holm < 0.05 ~ "*",
      TRUE ~ ""))
  
  # Edits to the contrast dataframe to include pvalue brackets in plot
  contrasts_df <- contrasts_df %>%
    separate(contrast, into = c("Temp1", "Temp2"), sep = " - ") %>%
    mutate(Temp1 = sub("Temp", "", Temp1),
           Temp2 = sub("Temp", "", Temp2)) %>%
    mutate(condition1 = paste(Species, Temp1, sep = "."),
           condition2 = paste(Species, Temp2, sep = "."))
  
  # Calculate fold-change values for each contrast
  contrasts_df <- contrasts_df %>%
    ungroup() %>%
    left_join(select(data_summary_df, Species.Temp, mean.predval), by = join_by(condition1 == Species.Temp)) %>%
    left_join(select(data_summary_df, Species.Temp, mean.predval), by = join_by(condition2 == Species.Temp), suffix = c(".1", ".2")) %>%
    mutate(FC = mean.predval.1 / mean.predval.2,
           FC = if_else(FC < 1, -1 / FC, FC),
           highlighted = case_when(
             FC <= -cutoff_FC ~ "+",
             FC >= cutoff_FC ~ "-",
             TRUE ~ "")) 
  
  # Plot with predicted means and standard errors of the estimates
  plot_2 <- ggplot() +
    geom_point(data = data_subset, 
               aes(x = interaction(bacteria_label, Temp, lex.order = T, sep = ""), y = FC, fill = bacteria_label, color = bacteria_label, shape = Line), 
               position = position_jitterdodge(dodge.width = 0.7, jitter.width = 0.2),
               size = 3, alpha = 0.75, stroke = 0.75) +
    
    scale_fill_manual(values = c("#5b5b5b","#800080","#1E90FF","#927ed1")) +
    scale_color_manual(values = c("#5b5b5b","#800080","#1E90FF","#927ed1")) +
    scale_shape_manual(values = c(21, 22, 24)) +
    
    new_scale_color() +
    
    geom_point(data = data_summary_df, aes(x = interaction(bacteria_label, Temp, lex.order = T, sep = ""), 
                                           color = Temp, y = mean.predval), shape = 3, size = 3) +
    geom_errorbar(data = data_summary_df, aes(x = interaction(bacteria_label, Temp, lex.order = T, sep = ""), color = Temp,
                                              y = mean.predval,
                                              ymax = max,
                                              ymin = min),
                  width = 0.4) +
    
    scale_color_manual(values = c("#5b5b5b", "red3")) +

    scale_y_continuous(expand = c(0.1,0)) +
    
    labs(x = "",
         y = "Fold change in LDH from -1 hour",
         fill = "Bacteria", color = "Temp", shape = "HNO Line") +
    
    theme_bw() +
    theme(panel.grid = element_blank(), 
          legend.text = element_markdown(),
          text = element_text(size = 20), 
          axis.text.x = element_markdown(), 
          axis.text.y = element_text(color = "black"))
  
  # Save files
  ggsave(plot_2, filename = paste0(figures_folder, "/plotLDH_", "34vs37_", each_endpoint, "h.png"), width = 7, height = 6)
  saveRDS(plot_2, file.path(figures_folder, paste0("plotLDH_", "34vs37_", each_endpoint, "h.rds")))
  write_csv(anova_df, file.path(stats_folder, paste0("anova_", "34vs37_", each_endpoint, "h.csv")))
  write_csv(random_effects_df, file.path(stats_folder, paste0("stats_random_effects_", "34vs37_", each_endpoint, "h.csv")))
  write_csv(contrasts_df, file.path(stats_folder, paste0("stats_contrasts_", "34vs37_", each_endpoint, "h.csv")))
  write_csv(data_summary_df, file.path(stats_folder, paste0("stats_summary_", "34vs37_", each_endpoint, "h.csv")))
  
  return(list(
    anova = anova_df,
    random_effects = random_effects_df,
    contrasts = contrasts_df,
    data_summary = data_summary_df,
    plot_2 = plot_2
  ))
}
analysis_temps_function(LDH_FC, each_endpoint = "48", cutoff_pvalue = 0.05, cutoff_FC = 1)
$anova
              Sum Sq Mean Sq NumDF   DenDF F value  Pr(>F) sign
Species      1.7e+02 5.7e+01     3 1.2e+02 5.6e+00 1.3e-03    *
Temp         3.1e+00 3.1e+00     1 1.2e+02 3.0e-01 5.8e-01     
Species:Temp 2.9e+01 9.8e+00     3 1.1e+02 9.6e-01 4.1e-01     

$random_effects
        grp        var1 var2         vcov        sdcor proportion
1 Line:Date (Intercept) <NA> 5.750247e-16 2.397967e-08       0.00
2      Line (Intercept) <NA> 2.940019e-01 5.422194e-01       2.79
3  Residual        <NA> <NA> 1.023014e+01 3.198458e+00      97.21

$contrasts
  Temp1 Temp2     Species   estimate        SE        df    t.ratio   p.value
1    34    37         Dpi -0.5451124 1.2125461 103.51969 -0.4495602 0.6539675
2    34    37         Sau  2.0672339 1.2748790 100.83523  1.6215138 0.1080304
3    34    37         Spn  0.1546089 1.2661465 103.56402  0.1221098 0.9030487
4    34    37 Uncolonized -0.3671386 0.9856734  99.62957 -0.3724749 0.7103305
  p.adj.holm sign     condition1     condition2 Temp.1 mean.predval.1 Temp.2
1  1.0000000              Dpi.34         Dpi.37     34       2.139584     37
2  0.4321215              Sau.34         Sau.37     34       5.627158     37
3  1.0000000              Spn.34         Spn.37     34       1.336087     37
4  1.0000000      Uncolonized.34 Uncolonized.37     34       1.600945     37
  mean.predval.2        FC highlighted
1       2.684696 -1.254775           +
2       3.559924  1.580696           -
3       1.181478  1.130861           -
4       1.968083 -1.229326           +

$data_summary
# A tibble: 8 × 8
# Groups:   Species.Temp, Temp [8]
  Species.Temp Temp  bacteria_label mean.real mean.predval mean.predval.se   max
  <fct>        <fct> <fct>              <dbl>        <dbl>           <dbl> <dbl>
1 Dpi.34       34    <i><b><span s…      2.11         2.14           0.838  3.82
2 Sau.34       34    <i><b><span s…      5.62         5.63           0.884  7.39
3 Spn.34       34    <i><b><span s…      1.33         1.34           0.863  3.06
4 Uncolonized… 34    <b><span styl…      1.59         1.60           0.725  3.05
5 Dpi.37       37    <i><b><span s…      2.68         2.68           0.975  4.63
6 Sau.37       37    <i><b><span s…      3.60         3.56           1.01   5.59
7 Spn.37       37    <i><b><span s…      1.28         1.18           1.02   3.21
8 Uncolonized… 37    <b><span styl…      2.01         1.97           0.799  3.57
# ℹ 1 more variable: min <dbl>

$plot_2

Merged Summary Files

pdf_output <- file.path(figures_folder, "HNOBac_SummaryLDH.pdf")

# Check if the file exists, and delete if it does
if (file.exists(pdf_output)) {
  file.remove(pdf_output)
}
[1] TRUE
# Now combine the PDFs into a new file
pdf_files <- list.files(figures_folder, pattern = "\\.pdf$", full.names = TRUE)
qpdf::pdf_combine(input = pdf_files, output = pdf_output)
[1] "data/outputs/LDH/figures/HNOBac_SummaryLDH.pdf"
CFUs Epithelial Lines
LDH Epithelial Lines
Source Code
---
execute:
  message: FALSE
  warning: FALSE
---

# LDH 48h {.unnumbered}

```{r}
library(tidyverse)
library(readxl)
library(ggpubr)
library(ggtext)
library(ggnewscale)
library(scales)
library(lme4)
library(afex)
library(emmeans)
library(gridExtra)
```

## Data Input and Selection

### File Paths

```{r}
# Folder paths
input_path <- "data/input_data/LDH/"
metadata_path <- "data/metadata/LDH"

# Create subfolders for output files
dataframes_folder <- "data/dataframes"
if (!file.exists("data/dataframes")) {
  dir.create("data/dataframes", recursive = TRUE)
}

# Load data and metadata
input_data <- read_excel(file.path(input_path, "LAK24_0118_LDHData.xlsx")) %>% mutate_if(is.character, factor)
Bac_order <- read_csv(file.path(metadata_path, "Order_Bacteria_LDH.csv"))
```

### Data clean-up

```{r}
# Filter only samples matched to CFU wells
LDH_data <- input_data %>% 
  filter(CFUmatched == "T")

# Factor Ordering and Styling
LDH_data <- merge(LDH_data, Bac_order, by = "Species") 
LDH_data$bacteria_label <- factor(LDH_data$bacteria_label, levels = Bac_order$bacteria_label)
LDH_data$Line <- fct_recode(LDH_data$Line, "HNO918" = "A", "HNO204" = "B", "HNO919" = "C") 

# Average technical replicates for each individual experiment
LDH_avg <- LDH_data %>%
  group_by(Date, Line, Species, bacteria_label, Temp, Well_Endpoint, Collection_Time) %>% 
  summarise(avg_Value = mean(Value)) %>%
  # Add labels to distinguish final collection times from time 0
  mutate(Collection_Label = if_else(Collection_Time == 24 | Collection_Time == 48, "final", "initial"))

# Calculate fold change between final time point vs. time 0
LDH_FC <- LDH_avg %>%
  group_by(Date, Line, Species, bacteria_label, Temp, Well_Endpoint) %>%
  reframe(FC = avg_Value[Collection_Label == "final"]/avg_Value[Collection_Label == "initial"]) 
```

### Saving files

```{r}
# Save data frames as CSV files in the output folder
write_csv(LDH_data, file.path(dataframes_folder, "LDH_values.csv"))
write_csv(LDH_FC, file.path(dataframes_folder, "LDH_FC.csv"))

# Save data frames as R objects in the output folder
saveRDS(LDH_data, file.path(dataframes_folder, "LDH_values.rds"))
saveRDS(LDH_FC, file.path(dataframes_folder, "LDH_FC.rds"))

# Cleaning-up all objects from the environment
rm(list = ls())

# Use this to read the final objects
LDH_data <- readRDS("data/dataframes/LDH_values.rds")
LDH_FC <- readRDS("data/dataframes/LDH_FC.rds")
```

## Stats and Plots

### File Paths

```{r}
# Folder paths
dataframes_path <- "data/dataframes"
metadata_path <- "data/metadata/LDH"

# Create subfolders for output files
figures_folder <- "data/outputs/LDH/figures"
if (!file.exists("data/outputs/LDH/figures")) {
  dir.create("data/outputs/LDH/figures", recursive = TRUE)
}
stats_folder <- "data/outputs/LDH/stats"
if (!file.exists("data/outputs/LDH/stats")) {
  dir.create("data/outputs/LDH/stats", recursive = TRUE)
}

# Load data and metadata
LDH_FC <- readRDS("data/dataframes/LDH_FC.rds")
Bac_order <- read_csv(file.path(metadata_path, "Order_Bacteria_LDH.csv"))
```

### Analysis for each Temp

```{r}
# Function to analyze each temp condition
analysis_function <- function(data, each_temp, each_endpoint, cutoff_pvalue, cutoff_FC, color_error) {
  
  # Subset the data to the selected temp
  data_subset <- data %>%
    filter(Temp == each_temp) %>%
    filter(Well_Endpoint == each_endpoint)
  
  # Mixed-effects model with random effects
  model <- lmer(FC ~ Species 
                + (1|Line) + (1|Line:Date), 
                data = data_subset)
  #Anova
  anova <- anova(model)
  anova_df <- as.data.frame(anova) %>%
    mutate(sign = case_when(
      `Pr(>F)` < cutoff_pvalue ~ "*",
      TRUE ~ "")) %>%
    mutate_if(is.numeric, ~ format(., digits = 2, scientific = TRUE))
  
  # Calculate Individual contrasts
  emmeans_model <- emmeans(model, ~ Species)
  emmeans_contrasts <- pairs(emmeans_model, adjust = "none")    
  
  # Extract random effects and convert to dataframe (if not singular)
  random_effects_df <- as.data.frame(VarCorr(model)) %>%
    mutate(proportion = round(100 * (vcov / sum(vcov)), 2)) 
  
  # Adds predictions based on fixed effects, averaged over random effects. It gives a population estimate
  data_subset <- cbind(data_subset, predval = predict(model,re.form = NA, se.fit = TRUE))
  data_summary_df <- data_subset %>%
    group_by(Species, bacteria_label) %>%
    summarize(mean.real = mean(FC),
              mean.predval = mean(predval.fit), 
              mean.predval.se = mean(predval.se.fit)) %>%
    mutate(max = mean.predval + 2*mean.predval.se,
           min = mean.predval - 2*mean.predval.se)
  
  # Convert contrasts to dataframe and adjust pvalues. Filter contrast to Uncolonized only
  contrasts_df <- as.data.frame(summary(emmeans_contrasts)) %>%
    filter(str_detect(contrast, "Uncolonized")) %>%
    mutate(p.adj.holm = p.adjust(p.value, method = "holm")) %>%
    mutate(sign = case_when(
      p.adj.holm < 0.05 ~ "*",
      TRUE ~ ""))
  
  # Edits to the contrast dataframe to include pvalue brackets in plot
  contrasts_df <- contrasts_df %>%
    separate(contrast, into = c("condition1", "condition2"), sep = " - ", remove = F) 
  
  contrasts_df$group1 <- Bac_order$bacteria_label[match(contrasts_df$condition1, Bac_order$Species)]
  contrasts_df$group2 <- Bac_order$bacteria_label[match(contrasts_df$condition2, Bac_order$Species)]
  
  # Calculate fold-change values for each contrast
  contrasts_df <- contrasts_df %>%
    ungroup() %>%
    left_join(select(data_summary_df, Species, mean.predval), by = join_by(condition1 == Species)) %>%
    left_join(select(data_summary_df, Species, mean.predval), by = join_by(condition2 == Species), suffix = c(".1", ".2")) %>%
    mutate(FC = mean.predval.1 / mean.predval.2,
           FC = if_else(FC < 1, -1 / FC, FC),
           highlighted = case_when(
             FC <= -cutoff_FC ~ "-",
             FC >= cutoff_FC ~ "+",
             TRUE ~ "")) 
  
  # Select p values to plot and define their location
  contrast_sign <- contrasts_df %>%
    filter(sign != "" & highlighted != "") %>%
    mutate(p.adj.holm = format(p.adj.holm, digits = 2, scientific = TRUE))
  
  location <- max(data_subset$FC, na.rm = TRUE) * 1.1
  
  # Standard Boxplot
  plot_1 <- ggplot() +
    geom_boxplot(data = data_subset, aes(x = bacteria_label, y = FC, fill = bacteria_label)) + 
    
    geom_jitter(data = data_subset, aes(x = bacteria_label, y = FC, shape = Line), 
                fill = "grey50", color = "grey30", size = 2, width = 0.05, stroke = 0.75) +
    
    scale_fill_manual(values = c("grey80","#800080","#1E90FF","#927ed1")) +
    scale_shape_manual(values = c(21, 22, 24)) +
    
    labs(x = "",
         y = "Fold change in LDH from -1 hour") +
    
    theme_bw() +
    theme(panel.grid = element_blank(),
          legend.position = "none",
          text = element_text(size = 20), 
          axis.text.x = element_markdown(), 
          axis.text.y = element_text(color = "black"))
  
  # Plot with predicted means and standard errors of the estimates
  plot_2 <- ggplot() +
    geom_point(data = data_subset, 
                aes(x = bacteria_label, y = FC, fill = bacteria_label, color = bacteria_label, shape = Line), 
                position = position_jitterdodge(dodge.width = 0.7, jitter.width = 0.2),
                size = 3, alpha = 0.75, stroke = 0.75) +
    
    geom_point(data = data_summary_df, aes(x = bacteria_label, y = mean.predval), shape = 3, size = 3, color = color_error) +
    geom_errorbar(data = data_summary_df, aes(x = bacteria_label,
                                              y = mean.predval,
                                              ymax = max,
                                              ymin = min),
                  width = 0.4, color = color_error) +
    
    scale_fill_manual(values = c("#5b5b5b","#800080","#1E90FF","#927ed1")) +
    scale_color_manual(values = c("#5b5b5b","#800080","#1E90FF","#927ed1")) +
    scale_shape_manual(values = c(21, 22, 24)) +
    
    scale_y_continuous(expand = c(0.1,0)) +
    
    labs(x = "",
         y = "Fold change in LDH from -1 hour",
         fill = "Bacteria", color = "Bacteria", shape = "HNO Line") +
    
    theme_bw() +
    theme(panel.grid = element_blank(), 
          legend.text = element_markdown(),
          text = element_text(size = 20), 
          axis.text.x = element_markdown(), 
          axis.text.y = element_text(color = "black"))
  
  # Conditionally add p-value annotations layer
  if (nrow(contrast_sign) > 0) {
    plot_2 <- plot_2 +
      stat_pvalue_manual(contrast_sign, label = "p.adj.holm", y.position = location,
                         tip.length = 0.02, bracket.shorten = 0.2, vjust = -0.2, bracket.size = 0.3, size = 2.5)
  } else {
    plot_2 <- plot_2
  }
  
  # Arrange plot and tables for summary pdf
  table <- contrasts_df %>%
    select(condition1, condition2, p.adj.holm, sign, mean.predval.1, mean.predval.2, FC, highlighted) %>%
    mutate(p.adj.holm = format(p.adj.holm, digits = 2, scientific = TRUE))
  
  Tmin <- ttheme_minimal()
  panel <- ggarrange(plot_1 + theme(plot.margin = unit(c(0.25,0.25,0.25,0.25), "in")), 
                     plot_2 + theme(plot.margin = unit(c(0.25,0.25,0.25,0.25), "in")),
                     tableGrob(anova_df, theme = Tmin), 
                     tableGrob(random_effects_df, theme = Tmin, rows = NULL), 
                     tableGrob(table, theme = Tmin, rows = NULL), 
                     ncol = 1, heights = c(0.6, 0.6, 0.1, 0.2, 0.2),
                     labels = c("  Standard Boxplot ", "Predicted Mean ± 2*SE", "    Anova    ", "Random Effects", "   Contrasts  "))
  panel <- annotate_figure(panel, top = text_grob(
    paste0("Analysis for ", each_temp, "C timepoint ", each_endpoint, " h. P-value: ", cutoff_pvalue, " and FC: ", cutoff_FC),
                                                         face = "bold", size = 14, color = "red"))
  
  # Save files
  ggsave(panel, filename = paste0(figures_folder, "/summaryLDH_", each_temp, "C_", each_endpoint, "h.pdf"), width = 9, height = 14)
  ggsave(plot_2, filename = paste0(figures_folder, "/plotLDH_", each_temp, "C_", each_endpoint, "h.png"), width = 7, height = 6)
  saveRDS(plot_2, file.path(figures_folder, paste0("plotLDH_", each_temp, "C_", each_endpoint, "h.rds")))
  write_csv(anova_df, file.path(stats_folder, paste0("anova_", each_temp, "C_", each_endpoint, "h.csv")))
  write_csv(random_effects_df, file.path(stats_folder, paste0("stats_random_effects_", each_temp, "C_", each_endpoint, "h.csv")))
  write_csv(contrasts_df, file.path(stats_folder, paste0("stats_contrasts_", each_temp, "C_", each_endpoint, "h.csv")))
  write_csv(data_summary_df, file.path(stats_folder, paste0("stats_summary_", each_temp, "C_", each_endpoint, "h.csv")))
  
  return(list(
    anova = anova_df,
    random_effects = random_effects_df,
    contrasts = contrasts_df,
    data_summary = data_summary_df,
    plot_1 = plot_1,
    plot_2 = plot_2
  ))
}
```

#### 34C (Main Data)

```{r}
#| warning: FALSE
analysis_function(LDH_FC, each_temp = "34", each_endpoint = "48", cutoff_pvalue = 0.05, cutoff_FC = 1, color_error = "black")
```

#### 37C (Supplemental Data)

```{r}
#| warning: FALSE
analysis_function(LDH_FC, each_temp = "37", each_endpoint = "48", cutoff_pvalue = 0.05, cutoff_FC = 1, color_error = "red3")
```

### Analysis 34 vs. 37

```{r}
# Function to compare both temps 
analysis_temps_function <- function(data, each_endpoint, cutoff_pvalue, cutoff_FC) {
  
  # Subset the data to the selected temp
  data_subset <- data %>%
    filter(Well_Endpoint == each_endpoint) %>%
    mutate(Species.Temp = interaction(Species, Temp))
  
  # Mixed-effects model with random effects
  model <- lmer(FC ~ Species * Temp
                + (1|Line) + (1|Line:Date), 
                data = data_subset)
  #Anova
  anova <- anova(model)
  anova_df <- as.data.frame(anova) %>%
    mutate(sign = case_when(
      `Pr(>F)` < cutoff_pvalue ~ "*",
      TRUE ~ "")) %>%
    mutate_if(is.numeric, ~ format(., digits = 2, scientific = TRUE))
  
  # Calculate Individual contrasts
  emmeans_model <- emmeans(model, ~ Species * Temp)
  emmeans_Temp <- pairs(emmeans_model, simple = "Temp", adjust = "none")   
  
  # Extract random effects and convert to dataframe (if not singular)
  random_effects_df <- as.data.frame(VarCorr(model)) %>%
    mutate(proportion = round(100 * (vcov / sum(vcov)), 2)) 
  
  # Adds predictions based on fixed effects, averaged over random effects. It gives a population estimate
  data_subset <- cbind(data_subset, predval = predict(model,re.form = NA, se.fit = TRUE))
  data_summary_df <- data_subset %>%
    group_by(Species.Temp, Temp, bacteria_label) %>%
    summarize(mean.real = mean(FC),
              mean.predval = mean(predval.fit), 
              mean.predval.se = mean(predval.se.fit)) %>%
    mutate(max = mean.predval + 2*mean.predval.se,
           min = mean.predval - 2*mean.predval.se)
  
  # Convert contrasts to dataframe and adjust pvalues.
  contrasts_df <- as.data.frame(summary(emmeans_Temp)) %>%
    mutate(p.adj.holm = p.adjust(p.value, method = "holm")) %>%
    mutate(sign = case_when(
      p.adj.holm < 0.05 ~ "*",
      TRUE ~ ""))
  
  # Edits to the contrast dataframe to include pvalue brackets in plot
  contrasts_df <- contrasts_df %>%
    separate(contrast, into = c("Temp1", "Temp2"), sep = " - ") %>%
    mutate(Temp1 = sub("Temp", "", Temp1),
           Temp2 = sub("Temp", "", Temp2)) %>%
    mutate(condition1 = paste(Species, Temp1, sep = "."),
           condition2 = paste(Species, Temp2, sep = "."))
  
  # Calculate fold-change values for each contrast
  contrasts_df <- contrasts_df %>%
    ungroup() %>%
    left_join(select(data_summary_df, Species.Temp, mean.predval), by = join_by(condition1 == Species.Temp)) %>%
    left_join(select(data_summary_df, Species.Temp, mean.predval), by = join_by(condition2 == Species.Temp), suffix = c(".1", ".2")) %>%
    mutate(FC = mean.predval.1 / mean.predval.2,
           FC = if_else(FC < 1, -1 / FC, FC),
           highlighted = case_when(
             FC <= -cutoff_FC ~ "+",
             FC >= cutoff_FC ~ "-",
             TRUE ~ "")) 
  
  # Plot with predicted means and standard errors of the estimates
  plot_2 <- ggplot() +
    geom_point(data = data_subset, 
               aes(x = interaction(bacteria_label, Temp, lex.order = T, sep = ""), y = FC, fill = bacteria_label, color = bacteria_label, shape = Line), 
               position = position_jitterdodge(dodge.width = 0.7, jitter.width = 0.2),
               size = 3, alpha = 0.75, stroke = 0.75) +
    
    scale_fill_manual(values = c("#5b5b5b","#800080","#1E90FF","#927ed1")) +
    scale_color_manual(values = c("#5b5b5b","#800080","#1E90FF","#927ed1")) +
    scale_shape_manual(values = c(21, 22, 24)) +
    
    new_scale_color() +
    
    geom_point(data = data_summary_df, aes(x = interaction(bacteria_label, Temp, lex.order = T, sep = ""), 
                                           color = Temp, y = mean.predval), shape = 3, size = 3) +
    geom_errorbar(data = data_summary_df, aes(x = interaction(bacteria_label, Temp, lex.order = T, sep = ""), color = Temp,
                                              y = mean.predval,
                                              ymax = max,
                                              ymin = min),
                  width = 0.4) +
    
    scale_color_manual(values = c("#5b5b5b", "red3")) +

    scale_y_continuous(expand = c(0.1,0)) +
    
    labs(x = "",
         y = "Fold change in LDH from -1 hour",
         fill = "Bacteria", color = "Temp", shape = "HNO Line") +
    
    theme_bw() +
    theme(panel.grid = element_blank(), 
          legend.text = element_markdown(),
          text = element_text(size = 20), 
          axis.text.x = element_markdown(), 
          axis.text.y = element_text(color = "black"))
  
  # Save files
  ggsave(plot_2, filename = paste0(figures_folder, "/plotLDH_", "34vs37_", each_endpoint, "h.png"), width = 7, height = 6)
  saveRDS(plot_2, file.path(figures_folder, paste0("plotLDH_", "34vs37_", each_endpoint, "h.rds")))
  write_csv(anova_df, file.path(stats_folder, paste0("anova_", "34vs37_", each_endpoint, "h.csv")))
  write_csv(random_effects_df, file.path(stats_folder, paste0("stats_random_effects_", "34vs37_", each_endpoint, "h.csv")))
  write_csv(contrasts_df, file.path(stats_folder, paste0("stats_contrasts_", "34vs37_", each_endpoint, "h.csv")))
  write_csv(data_summary_df, file.path(stats_folder, paste0("stats_summary_", "34vs37_", each_endpoint, "h.csv")))
  
  return(list(
    anova = anova_df,
    random_effects = random_effects_df,
    contrasts = contrasts_df,
    data_summary = data_summary_df,
    plot_2 = plot_2
  ))
}
```

```{r}
#| warning: FALSE
analysis_temps_function(LDH_FC, each_endpoint = "48", cutoff_pvalue = 0.05, cutoff_FC = 1)
```

### Merged Summary Files

```{r}
pdf_output <- file.path(figures_folder, "HNOBac_SummaryLDH.pdf")

# Check if the file exists, and delete if it does
if (file.exists(pdf_output)) {
  file.remove(pdf_output)
}

# Now combine the PDFs into a new file
pdf_files <- list.files(figures_folder, pattern = "\\.pdf$", full.names = TRUE)
qpdf::pdf_combine(input = pdf_files, output = pdf_output)
```