HNOBac Manuscript
  1. Methods
  2. Cytokines
  • Introduction
  • Methods
    • RNASeq
    • Cell Counts & TEER
    • CFUs 48h
    • CFUs Epithelial Lines
    • LDH 48h
    • LDH Epithelial Lines
    • Cytokines
    • HEK-Blue
  • R Session Info

Table of contents

  • Data Input and Selection
    • File Paths
    • Data input and clean-up
    • Selection of Cyto-location groups
    • Saving files
  • Heatmap
    • log2FC analysis
    • pg/mL analysis
  • Stats and Individual Plots
    • log2FC analysis
    • pg/mL analysis

Cytokines

library(tidyverse)
library(readxl)
library(writexl)
library(ggpubr)
library(ggtext)
library(scales)
library(lme4)
library(afex)
library(emmeans)
library(ggnewscale)
library(gridExtra)
library(plotly)
library(htmlwidgets)

Data Input and Selection

File Paths

# Folder paths
input_path <- "data/input_data/Cytokines/"
metadata_path <- "data/metadata/Cytokines"

# Create subfolders for output files
analysis_folder <- "data/outputs/Cytokines/analysis_log2FC"
if (!file.exists("data/outputs/Cytokines/analysis_log2FC")) {
  dir.create("data/outputs/Cytokines/analysis_log2FC", recursive = TRUE)
}
dataframes_folder <- "data/dataframes"
if (!file.exists("data/dataframes")) {
  dir.create("data/dataframes", recursive = TRUE)
}

# Load metadata
input_data <- read_excel(file.path(metadata_path, "Sample_List.xlsx"))
order_Cyto <- read_csv(file.path(metadata_path, "Order_Cytokines.csv")) %>% mutate_if(is.character, factor)

Data input and clean-up

# List all the input files, loop through each file and create a merged dataframe 
file_list <- list.files(input_path, pattern = "Detail\\.xls$", full.names = TRUE)

for (file_path in file_list) {
  # Read the Excel file and skip the first 3 rows
  file_data <- read_excel(file_path, sheet = "Summary", skip = 3)
  
  # Left join with the existing sample_list
  input_data <- left_join(input_data, file_data, by = "Analyte  Sample")
}

# Rename "Analyte  Sample" to avoid issues with spaces in variable names
input_data <- input_data %>% 
  rename("SampleID" = "Analyte  Sample")

# Pivot the data from wide to long format crating the Cytokine variable
input_data_long <- pivot_longer(input_data, 
                         cols = (which(names(input_data) == "SampleID") + 1):ncol(input_data),
                         names_to = "Cytokine", values_to = "value")

# Identify samples below and above the limit of detection and assign them a concentration value
input_data_long <- input_data_long %>%
  mutate(concentration = if_else(value == "N/A", NA, as.numeric(gsub("<|↓|>|↑", "", value))),
         below_LD = if_else(value == "N/A" | grepl("<|↓", value), TRUE, FALSE),
         above_LD = if_else(grepl(">|↑", value), TRUE, FALSE))

# Add columns with a simpler Cytokine name (Cyto) and Category names
input_data_long <- left_join(input_data_long, order_Cyto, by = "Cytokine")

# Add column with sample type information (NB control vs. bacterial sample)
input_data_long <- input_data_long %>% 
  mutate(sample_type = if_else(viability == "control", "control", "bacteria"))

# Get average values for conditions that have technical replicas (control or NB samples)
input_data_long <- input_data_long %>% 
  group_by(date, Cyto, Category, location, line, time, bacteria, viability) %>% 
  mutate(concentration_NA = ifelse(below_LD | above_LD, NA, concentration)) %>% 
  mutate(concentration_avg = mean(concentration_NA, na.rm = TRUE)) %>% 
  mutate(concentration_final = ifelse(is.na(concentration_avg), concentration, concentration_avg)) %>% 
  select(-concentration, concentration_NA, concentration_avg) %>%
  arrange(below_LD, above_LD) %>%
  distinct(date, Cyto, Category, location, line, time, bacteria, viability, .keep_all = TRUE) %>% 
  ungroup()

# Cleaning up extra columns
input_data_long <- input_data_long %>%
  select(date, Cyto, Category, location, line, time, sample_type, bacteria, viability, concentration_final, below_LD, above_LD) 

# Factoring variables with the right levels
input_data_long <- input_data_long %>% mutate_if(is.character, factor)

input_data_long$bacteria <- factor(input_data_long$bacteria, levels = c("NB", "Dpi", "Sau", "Spn"))
input_data_long$viability <- factor(input_data_long$viability, levels = c("control", "live", "IRR"))
input_data_long$line <- fct_recode(input_data_long$line, "HNO204" = "B", "HNO919" = "C") 
input_data_long$location <- fct_recode(input_data_long$location, "Apical" = "Ap", "Basal" = "Baso")
input_data_long$Cyto <- factor(input_data_long$Cyto, levels = rev(order_Cyto$Cyto))
input_data_long$Category <- factor(input_data_long$Category, levels = unique(order_Cyto$Category))

# Making new data frame with the 48h time point only
input_data_long_t48 <- input_data_long %>% 
  filter(time != 0)

# Calculating log2FC relative to the non-bacteria controls
input_data_log2FC <- input_data_long_t48 %>%
  group_by(date, Cyto, Category, location, line, time) %>%
  reframe(
    bacteria = bacteria[sample_type == "bacteria"],
    viability = viability[sample_type == "bacteria"],
    log2FC = log2(concentration_final[sample_type == "bacteria"] / concentration_final[sample_type == "control"]),
    log2ctl = log2(concentration_final[sample_type == "control"]),
    below_LD = below_LD[sample_type == "bacteria"],
    above_LD = above_LD[sample_type == "bacteria"]
  )

# Factoring variables with the right levels
input_data_log2FC$bacteria <- factor(input_data_log2FC$bacteria, levels = c("Dpi", "Sau", "Spn"))

# Cleaning-up objects from the environment
rm(input_data, input_path, metadata_path, file_list, file_path, file_data, order_Cyto)

Selection of Cyto-location groups

Proportion of samples above the detection limit

# Calculate for each Cytokine and Location the proportion of samples above limit of detection
LD_all <- input_data_long %>%
  group_by(Cyto, location) %>%
  summarise(detected_all = 1 - mean(below_LD), .groups = 'drop') 

# Same, but without including the time 0h
LD_48h <- input_data_long %>%
  filter(time != 0) %>%
  group_by(Cyto, location) %>%
  summarise(detected_48 = 1 - mean(below_LD), .groups = 'drop') 

# Combine the individual data frames and add label column with combined Cyto and location
LD <- left_join(LD_all, LD_48h, by = join_by(Cyto, location)) %>%
  mutate(group_label = paste(Cyto, location, sep = "_")) %>%
  relocate(group_label, .after = location)

# Plot function
plot_select_function <- function(data, x_var, title) {
  ggplot(data = data, aes(x = reorder(group_label, {{x_var}}), y = {{x_var}}, fill = location)) +
    geom_bar(stat = "identity", position = position_dodge()) + 
    scale_y_continuous(expand = c(0, 0)) +
    scale_fill_manual(values = c("coral", "steelblue1")) +
    coord_flip() +
    labs(title = title, x = "", y = "") +
    theme_bw() +
    theme(panel.grid = element_blank(),
          text = element_text(size = 10))

}

# Create and arrange plots
p_all <- plot_select_function(LD, detected_all, "Proportion of samples above detection limit (all pg/mL values)")
p_48h <- plot_select_function(LD, detected_48, "Proportion of samples above detection limit (pg/mL values at 48h)")

LD_Plots <- ggarrange(p_all, p_48h, ncol = 1, common.legend = T)
ggexport(LD_Plots, filename = file.path(analysis_folder, "LD_Plots.pdf"), height = 30, width = 10)
LD_Plots

log2FC range values

# Calculate the range for each log2FC
range_log2FC <- input_data_log2FC %>%
  group_by(Cyto, location) %>%
  mutate(min = ifelse(is.na(log2FC), 0, min(log2FC, na.rm = TRUE))) %>%
  mutate(max = max(log2FC, na.rm = TRUE)) %>%
  mutate(range = max - min) %>%
  summarise(range = mean(range, na.rm = TRUE), .groups = 'drop')

# Combine the range_log2FC and LD data frames into a summary file used for analysis selection
select_summary <- left_join(LD, range_log2FC, by = join_by(Cyto, location)) %>%
   mutate(group_label = paste(Cyto, location, sep = "_"))

# Plot the range in a similar way to the limit of detection
p_range <- plot_select_function(select_summary, range, "log2FC range")
ggsave(p_range, filename = file.path(analysis_folder, "Range_Plot.png"), height = 8, width = 10)
p_range

Filtering/selection thresholds

# Define cut-off values for group rejection and statistical analysis 
select_summary <- select_summary %>%
  mutate(group_rejected = as.factor(ifelse(range < 2.5 & detected_48 < 0.25, TRUE, FALSE))) %>%
  mutate(group_stats = as.factor(ifelse(range > 3, TRUE, FALSE)))

# Plot correlation between range and proportion of detected samples
p_correlation_FC <- ggplot(select_summary, aes(x = range, y = detected_48, color = Cyto, shape = location)) +
  geom_point(aes(size = group_rejected)) +
  scale_size_manual(values = c(4, 2)) +
  geom_point(data = subset(select_summary, group_stats == TRUE), size = 2, color = "black") +
  theme_bw() +
  theme(panel.grid = element_blank(),
        text = element_text(size = 10)) +
  labs(x = "log2FC range",
       y = "Proportion of samples above detection limit (pg/mL values at 48h)")

ggsave(p_correlation_FC, filename = file.path(analysis_folder, "LDvsRange_Plot.png"), height = 8, width = 10)
ggplotly(p_correlation_FC)
saveWidget(ggplotly(p_correlation_FC), file.path(analysis_folder, "LDvsRange_Interactive.html"), selfcontained = TRUE)
# Apply the grouping variables to the raw and log2FC data frames
input_data_long_t48 <- left_join(input_data_long_t48, select_summary, by = join_by(Cyto, location))
input_data_log2FC <- left_join(input_data_log2FC, select_summary, by = join_by(Cyto, location)) 

Saving files

# Save data frames as CSV files in the dataframes folder
write_csv(input_data_long_t48, file.path(dataframes_folder, "Cyto_values_48h.csv"))
write_csv(input_data_log2FC, file.path(dataframes_folder, "Cyto_log2FC_48h.csv"))

# Save data frames as R objects in the dataframes folder
saveRDS(input_data_long_t48, file.path(dataframes_folder, "Cyto_values_48h.rds"))
saveRDS(input_data_log2FC, file.path(dataframes_folder, "Cyto_log2FC_48h.rds"))

# Cleaning-up all objects from the environment
rm(list = ls())

# Use this to read the final objects
Cyto_values_48h <- readRDS("data/dataframes/Cyto_values_48h.rds")
Cyto_log2FC_48h <- readRDS("data/dataframes/Cyto_log2FC_48h.rds")

Heatmap

log2FC analysis

File Paths

# Folder paths
dataframes_path <- "data/dataframes"
metadata_path <- "data/metadata/Cytokines"

# Load data and metadata
Cyto_log2FC_48h <- readRDS("data/dataframes/Cyto_log2FC_48h.rds")
BacVia_order <- read_csv(file.path(metadata_path, "Order_BacteriaViability_FC.csv"))
SubsetFig2 <- read_csv(file.path(metadata_path, "Order_CytokinesFig2.csv"))

# Create subfolders for output files
figures_folder <- "data/outputs/Cytokines/figures_log2FC"
if (!file.exists("data/outputs/Cytokines/figures_log2FC")) {
  dir.create("data/outputs/Cytokines/figures_log2FC", recursive = TRUE)
}

Heatmap plot

# Averaging replicate log2FCs
Avg_log2FC_Cytos <- Cyto_log2FC_48h %>%
  group_by(Cyto, Category, location, viability, bacteria, group_rejected) %>%
  summarize(avg_log2FC = mean(log2FC, na.rm = TRUE), .groups = 'drop') %>%
  mutate(Bac.Via = paste(bacteria, viability, sep = "."))

# Factor Ordering
Avg_log2FC_Cytos <- merge(Avg_log2FC_Cytos, BacVia_order, by = "Bac.Via")
Avg_log2FC_Cytos$Bac.Via_label <- factor(Avg_log2FC_Cytos$Bac.Via_label, levels = BacVia_order$Bac.Via_label)

# Make new variable with a version of avg_log2FC with NAs if group_rejected is TRUE
Avg_log2FC_Cytos <- Avg_log2FC_Cytos %>%
  mutate(value = ifelse(group_rejected == TRUE, NA, avg_log2FC))
# Subset data for min-heatmap in Figure 2
Avg_log2FC_Cytos_Fig2 <- inner_join(select(Avg_log2FC_Cytos, -Category), SubsetFig2, by = join_by(Cyto))

# Factor Ordering
Avg_log2FC_Cytos_Fig2$Cyto <- factor(Avg_log2FC_Cytos_Fig2$Cyto, levels = rev(SubsetFig2$Cyto))
# Code for heatmap
plot_heatmap <- function(data) {
  ggplot(data, aes(x = Cyto, y = Bac.Via_label, fill = value)) +
    geom_tile(color = "black") + 
    facet_grid(location ~ Category , space = "free", scales = "free") +
    scale_fill_gradient2(low = "#009ad1", high = "#AD1457",
                         mid = "#fefbea", na.value = "grey",
                         name = expression("Log" [2]*"FC")) +
    labs(x = "", y = "") +
    scale_y_discrete(expand = c(0,0), limits = rev) +
    scale_x_discrete(expand = c(0,0), limits = rev) +
    theme_bw() +
    theme(panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          strip.background = element_rect(colour = "black", fill = NA),
          strip.text.y = element_text(angle = -90, hjust = 0.5, size = 14, face = "bold"),
          strip.text.x = element_text(size = 12),
          axis.text.y = element_markdown(size = 14),
          axis.text.x = element_text(size = 12, angle = 67, hjust = 1, colour = "black"),
          legend.title = element_text(size = 14),
          legend.text = element_text(size = 14),
          panel.spacing = unit(4, 'pt')
    )
}
heatmap <- plot_heatmap(Avg_log2FC_Cytos)
heatmap

heatmap_Fig2 <- plot_heatmap(Avg_log2FC_Cytos_Fig2)
heatmap_Fig2

Saving files

# Save figures as png
ggsave(plot = heatmap, filename = file.path(figures_folder, "heatmap.png"), width = unit(9, "in"), height = unit(3.5, "in"))
ggsave(plot = heatmap_Fig2, filename = file.path(figures_folder, "heatmap_Fig2.png"), width = unit(5, "in"), height = unit(3.5, "in"))

# Save figurse as an R objects
saveRDS(heatmap, file.path(figures_folder, "heatmap.rds"))
saveRDS(heatmap_Fig2, file.path(figures_folder, "heatmap_Fig2.rds"))

# Cleaning-up all objects from the environment
rm(list = ls())

pg/mL analysis

File Paths

# Folder paths
dataframes_path <- "data/dataframes"
metadata_path <- "data/metadata/Cytokines"

# Load data and metadata
Cyto_values_48h <- readRDS(file.path(dataframes_path, "Cyto_values_48h.rds"))
BacVia_order <- read_csv(file.path(metadata_path, "Order_BacteriaViability.csv"))

# Create subfolders for output files
figures_folder <- "data/outputs/Cytokines/figures_pgml"
if (!file.exists("data/outputs/Cytokines/figures_pgml")) {
  dir.create("data/outputs/Cytokines/figures_pgml", recursive = TRUE)
}

Heatmap plot

# Averaging replicate log2FCs
Avg_values_Cytos <- Cyto_values_48h %>%
  group_by(Cyto, Category, location, viability, bacteria, group_rejected) %>%
  summarize(avg_value = mean(concentration_final, na.rm = TRUE), .groups = 'drop') %>%
  mutate(log_value = log10(avg_value)) %>% 
  mutate(Bac.Via = paste(bacteria, viability, sep = "."))

# Factor Ordering
Avg_values_Cytos <- merge(Avg_values_Cytos, BacVia_order, by = "Bac.Via")
Avg_values_Cytos$Bac.Via_label <- factor(Avg_values_Cytos$Bac.Via_label, levels = BacVia_order$Bac.Via_label)

# Make new variable with a version of avg_log2FC with NAs if group_rejected is TRUE
Avg_values_Cytos <- Avg_values_Cytos %>%
  mutate(value = ifelse(group_rejected == TRUE, NA, log_value))
# Code for heatmap
plot_heatmap <- ggplot(Avg_values_Cytos, aes(x = Cyto, y = Bac.Via_label, fill = value)) +
  geom_tile(color = "black") + 
  facet_grid(location ~ Category , space = "free", scales = "free") +
  scale_fill_gradient2(low = "#009ad1", high = "#AD1457",
                       mid = "#fefbea", na.value = "grey",
                       name = "Log10 pg/mL") +
  labs(x = "", y = "") +
  scale_y_discrete(expand = c(0,0), limits = rev) +
  scale_x_discrete(expand = c(0,0), limits = rev) +
  theme_bw() +
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        strip.background = element_rect(colour = "black", fill = NA),
        strip.text.y = element_text(angle = -90, hjust = 0.5, size = 11),
        axis.text.y = element_markdown(size = 11),
        axis.text.x = element_text(size = 11, angle = 67,  hjust = 1),
        panel.spacing = unit(4, 'pt')
  )
plot_heatmap 

Saving files

# Save figure as png
ggsave(plot = plot_heatmap, filename = file.path(figures_folder, "heatmap.png"), width = unit(9, "in"), height = unit(3.5, "in"))

# Save figure as an R object
saveRDS(plot_heatmap, file.path(figures_folder, "heatmap.rds"))

# Cleaning-up all objects from the environment
rm(list = ls())

Stats and Individual Plots

log2FC analysis

File Paths

# Folder paths
dataframes_path <- "data/dataframes"
metadata_path <- "data/metadata/Cytokines"
analysis_folder <- "data/outputs/Cytokines/analysis_log2FC"
figures_folder <- "data/outputs/Cytokines/figures_log2FC"

# Load data and metadata
Cyto_log2FC_48h <- readRDS(file.path(dataframes_path, "Cyto_log2FC_48h.rds"))
Cyto_values_48h <- readRDS(file.path(dataframes_path, "Cyto_values_48h.rds"))
Bac_order <- read.csv(file.path(metadata_path, "Order_Bacteria.csv"))
Via_order <- read.csv(file.path(metadata_path, "Order_Viability.csv"))

# Create subfolders for output files
stats_folder <- "data/outputs/Cytokines/stats_log2FC"
if (!file.exists("data/outputs/Cytokines/stats_log2FC")) {
  dir.create("data/outputs/Cytokines/stats_log2FC", recursive = TRUE)
}
# Subset the data for the selected analysis groups
Stats_log2FC <- Cyto_log2FC_48h %>%
    filter(group_stats == TRUE) %>%
    arrange(desc(range))

Stats_values <- Cyto_values_48h %>%
    filter(group_stats == TRUE) %>%
    arrange(desc(range))

Stats function

# Function to analyze each individual Cyto-location group using log2FC data
stats_function <- function(log2FC_data, each_group) {
  
  # Subset the data
  log2FCdata_subset <- log2FC_data %>%
    filter(group_label == each_group) 
  
  # First run linear mixed-effects model with random effects
  model <- lmer(log2FC ~ bacteria * viability 
                + (1|line) + (1|line:date), 
                data = log2FCdata_subset)
  
  # If model is singular, rerun linear model without random effects
  singular = isSingular(model)
  if (singular) {
    model <- lm(log2FC ~ bacteria * viability, 
                  data = log2FCdata_subset)
  }
  
  # Individual contrasts
  emmeans_model <- emmeans(model, ~ bacteria * viability)
  emmeans_bacteria <- pairs(emmeans_model, simple = "bacteria", adjust = "none")   
  emmeans_viability <- pairs(emmeans_model, simple = "viability", adjust = "none")    
  
  # Extract fixed effects coefficients and convert to dataframe
  fixed_effects_df <- as.data.frame(summary(model)$coefficients) %>%
    mutate(singular = singular) %>%
    mutate(group_label = each_group) 
  
  # Extract random effects data and convert to dataframe (if not singular)
  if (singular) {
    random_effects_df <- data.frame()
  } else {
    random_effects_df <- as.data.frame(VarCorr(model)) %>%
      mutate(proportion = round(100 * (vcov / sum(vcov)), 2)) %>%
      mutate(singular = singular) %>%
      mutate(group_label = each_group) 
  }
  
  # Extract bacteria contrasts from emmeans_model and convert to dataframe. Filter live only
  contrasts_bacteria_df <- as.data.frame(summary(emmeans_bacteria)) %>%
    filter(viability == "live") %>%
    mutate(contrast_label = paste(contrast, viability, sep = " -- ")) %>%
    select(-contrast, -viability) %>%
    mutate(singular = singular) %>%
    mutate(group_label = each_group)
  
  # Extract viability contrasts from emmeans_model and convert to dataframe
  contrasts_viability_df <- as.data.frame(summary(emmeans_viability)) %>%
    mutate(contrast_label = paste(bacteria, contrast, sep = " -- ")) %>%
    select(-contrast, -bacteria) %>%
    mutate(singular = singular) %>%
    mutate(group_label = each_group)
  
  # Combine contrasts dataframes
  contrasts_df <- rbind(contrasts_bacteria_df,contrasts_viability_df) 
  
  # Adds predictions based on fixed effects, averaged over random effects. It gives a population estimate
  log2FCdata_subset <- cbind(log2FCdata_subset, predval = predict(model,re.form = NA, se.fit = TRUE))
  
  # Backtransform the predicted values and CIs
  transf_preds_df <- log2FCdata_subset %>% group_by(bacteria, viability, group_label) %>%
    summarise(ml2ctl = mean(log2ctl), ml2FC = mean(log2FC), 
              ml2predval.fit = mean(predval.fit),
              ml2predval.se.fit = mean(predval.se.fit),
              .groups = 'drop') %>%
    mutate(pconcentration = 2^(ml2predval.fit + ml2ctl),
           pconcentration_min = 2^(ml2predval.fit - 2*ml2predval.se.fit + ml2ctl),
           pconcentration_max = 2^(ml2predval.fit + 2*ml2predval.se.fit + ml2ctl),
           singular = singular)
  
  return(list(fixed_effects = fixed_effects_df, 
              random_effects = random_effects_df, 
              contrasts_bacteria = contrasts_bacteria_df,
              contrasts_viability = contrasts_viability_df,
              contrasts = contrasts_df,
              transf_preds = transf_preds_df))
}
#Example use for a specific Cytokine-location group
#each_group = "IL-18_Apical" 
#stats_function(Stats_log2FC, each_group)
# Apply the function to each unique group label and combine results into a list
stats_list <- map(unique(Stats_log2FC$group_label), function(group) {
  cat("Processing group:", group, "\n")  # Add this line to print the group
  stats_function(Stats_log2FC, group)
})
Processing group: IL-18_Apical 
Processing group: IL-1RN_Basal 
Processing group: IL-18_Basal 
Processing group: G-CSF_Basal 
Processing group: CXCL-10_Apical 
Processing group: IL-6_Apical 
Processing group: IL-1α_Basal 
Processing group: IL-1α_Apical 
Processing group: G-CSF_Apical 
Processing group: IL-6_Basal 
Processing group: GM-CSF_Apical 
Processing group: IL-36β_Basal 
Processing group: IL-1RN_Apical 
Processing group: GM-CSF_Basal 
Processing group: L-VEGF_Apical 
Processing group: IL-1β_Basal 
Processing group: CCL-2_Apical 
Processing group: CCL-4_Basal 
Processing group: CCL-20_Apical 
Processing group: IL-36β_Apical 
Processing group: CXCL-11_Apical 
Processing group: CXCL-10_Basal 
Processing group: CCL-5_Basal 
Processing group: CCL-20_Basal 
Processing group: TNF-α_Apical 
Processing group: CCL-5_Apical 
Processing group: IL-1β_Apical 
Processing group: CCL-4_Apical 
Processing group: CCL-2_Basal 
Processing group: CXCL-9_Apical 
Processing group: TNF-α_Basal 
Processing group: MMP-9_Apical 
# Combine all fixed effects dataframes in the list into a single dataframe
stats_fixed_effects <- bind_rows(map(stats_list, pluck, "fixed_effects"))

# Combine all random effects dataframes in the list into a single dataframe
stats_random_effects <- bind_rows(map(stats_list, pluck, "random_effects"))

# Combine all contrasts dataframes in the list into a single dataframe
stats_contrasts <- bind_rows(map(stats_list, pluck, "contrasts"))

# Combine all backtransformed predicted values dataframes in the list into a single dataframe
stats_transf_preds <- bind_rows(map(stats_list, pluck, "transf_preds"))
P.value Correction
# Adjust across all Cyto-Location groups using FDR
stats_contrasts <- stats_contrasts %>%
  mutate(adj.p.FDR = p.adjust(p.value, method = "fdr")) %>%
  mutate(sign.FDR = ifelse(adj.p.FDR < 0.05, T, F)) 
Contrasts table formatting
# Separate contrast names into 2 groups matching the x axis labels for the individual plots
stats_contrasts <- stats_contrasts %>%
  separate(contrast_label, into = c("bacteria", "viability"), sep = "--") %>%
  mutate(bacteria = str_trim(bacteria),  # Remove leading/trailing spaces
         viability = str_trim(viability)) %>%
  separate(bacteria, into = c("bact1", "bact2"), sep = "-") %>%
  mutate(bact2 = str_trim(if_else(!is.na(bact2), bact2, bact1)),
         bact1 = str_trim(bact1)) %>%
  separate(viability, into = c("via1", "via2"), sep = "-") %>%
  mutate(via2 = str_trim(if_else(!is.na(via2), via2, via1)),
         via1 = str_trim(via1))

# Substitute bacteria and viability names with the label versions
stats_contrasts$bact1_label <- Bac_order$bacteria_label[match(stats_contrasts$bact1, Bac_order$bacteria)]
stats_contrasts$bact2_label <- Bac_order$bacteria_label[match(stats_contrasts$bact2, Bac_order$bacteria)]
stats_contrasts$via1_label <- Via_order$viability_label[match(stats_contrasts$via1, Via_order$viability)]
stats_contrasts$via2_label <- Via_order$viability_label[match(stats_contrasts$via2, Via_order$viability)]

# Formatting of the p.value labels with option for * and **
stats_contrasts <- stats_contrasts %>%
  mutate(sign = case_when(
    adj.p.FDR < 0.05 ~ "*",
    TRUE ~ ""  
  ))
stats_contrasts$adj.p.FDR = format(stats_contrasts$adj.p.FDR, digits = 2, scientific = TRUE) 

# Generate the final group labels
stats_contrasts <- stats_contrasts %>%
  mutate(group1 = paste(bact1_label, via1_label, sep = " "),
         group2 = paste(bact2_label, via2_label, sep = " "),
         contrast = paste(bact1, via1, "vs.", bact2, via2, sep = " "))

Plot function

# Function to plot each individual Cyto-location group using pg/mL data
plot_function <- function(true_data, stats_data, contrasts, each_group) {
  
  # Subset data
  true_data <- true_data %>%
    filter(group_label == each_group) 
  stats_data <- stats_data %>%
    filter(group_label == each_group) 
  contrasts <- contrasts %>%
    filter(group_label == each_group) 
  
  # Factor Ordering
  stats_data <- merge(stats_data, Bac_order, by = "bacteria") 
  stats_data <- merge(stats_data, Via_order, by = "viability") 
  stats_data$bacteria_label <- factor(stats_data$bacteria_label, levels = Bac_order$bacteria_label)
  stats_data$viability_label <- factor(stats_data$viability_label, levels = Via_order$viability_label)

  true_data <- merge(true_data, Bac_order, by = "bacteria") 
  true_data <- merge(true_data, Via_order, by = "viability") 
  true_data$bacteria_label <- factor(true_data$bacteria_label, levels = Bac_order$bacteria_label)
  true_data$viability_label <- factor(true_data$viability_label, levels = Via_order$viability_label)
  
  # Add columns for coloring the background by viability
  true_data <- true_data %>%
    mutate(bar_pos = ifelse(viability == "IRR", Inf, NA)) %>%
    mutate(bar_neg = ifelse(viability == "IRR", 0, -Inf)) 
  
  # Get location for p value labels
  location <- log10(max(stats_data$pconcentration_max, na.rm = TRUE)) * 1.3
  
  # Plot
  plot <- ggplot() +
    # Background bars used to color by viability
    geom_col(data = true_data, aes(x = interaction(bacteria_label, viability_label, lex.order = TRUE, sep = " "), y = bar_pos, fill = "grey95"),
             position = "identity", show.legend = FALSE) +
    geom_col(data = true_data, aes(x = interaction(bacteria_label, viability_label, lex.order = TRUE, sep = " "), y = bar_neg, fill = "grey95"),
             position = "identity", show.legend = FALSE) +
    
    scale_fill_manual(values = c("grey95")) +
    new_scale_fill() +
    
    # Predictions from the model transformed into pg/mL
    geom_point(data = stats_data, aes(x = interaction(bacteria_label, viability_label, lex.order = TRUE, sep = " "), 
                                    y = pconcentration), shape = 3, size = 3) +
    geom_errorbar(data = stats_data, aes(x = interaction(bacteria_label, viability_label, lex.order = TRUE, sep = " "),
                                       y = pconcentration,
                                       ymin = pconcentration_min,
                                       ymax = pconcentration_max),
                  width = .2) +
    
    # True values in pg/mL
    geom_jitter(data = true_data, aes(x = interaction(bacteria_label, viability_label, lex.order = TRUE, sep = " "), 
                                      y = concentration_final, color = bacteria_label, shape = line), width = 0.3, size = 3, show.legend = FALSE) + 
    
    scale_color_manual(values = c("#5b5b5b", "#1E90FF", "#800080", "#927ed1")) +

    # General style
    scale_y_log10() +
    scale_x_discrete(expand = c(0,0)) +
    labs(title = each_group,
         x = "",
         y = "pg/mL",) +  
    theme_bw() +
    theme(panel.grid = element_blank(), 
          text = element_text(size = 12),
          axis.text.x = element_markdown()) 
  
  # Arrange plot and pvalues table
  contrasts <- contrasts %>%
    ungroup() %>%
    select(contrast, singular, estimate, SE, df, t.ratio, p.value, adj.p.FDR)

  Tmin <- ttheme_minimal()
  panel <- ggarrange(plot + theme(plot.margin = unit(c(0.25,0.25,0.25,0.25), "in")), 
                     tableGrob(contrasts, theme = Tmin, rows = NULL), 
                     ncol = 1, heights = c(0.7, 0.3))
  
  return(list(each_group,
              plot, 
              panel))
} 
#Example use for a specific Cytokine-location group
#each_group = "IL-18_Apical"
#plot_function(Stats_values, stats_transf_preds, stats_contrasts, each_group)
plots_list <- map(unique(Stats_values$group_label), ~plot_function(true_data = Stats_values, stats_data = stats_transf_preds, contrasts = stats_contrasts, .x))

Saving files

## Save outputs
write_csv(stats_fixed_effects, file.path(stats_folder, "stats_fixed_effects.csv"))
write_csv(stats_random_effects, file.path(stats_folder, "stats_random_effects.csv"))
write_csv(stats_contrasts, file.path(stats_folder, "stats_contrasts.csv"))
write_csv(stats_transf_preds, file.path(stats_folder, "stats_transf_preds.csv"))

# Save all multiple plots/tables on individual pages of a single pdf
pdf(paste0(figures_folder, "/Cyto_Boxplots.pdf"), width = 11, height = 8.5)
for (i in seq_along(plots_list)) {
  print(plots_list[[i]][3])
}
[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]
dev.off()
quartz_off_screen 
                2 
# Save all plot figures as an R object
saveRDS(plots_list, file.path(figures_folder, "plots_list.rds"))

# Cleaning-up all objects from the environment
rm(list = ls())

pg/mL analysis

File Paths

# Folder paths
dataframes_path <- "data/dataframes"
metadata_path <- "data/metadata/Cytokines"

# Load data and metadata
Cyto_values_48h <- readRDS(file.path(dataframes_path, "Cyto_values_48h.rds"))
BacVia_order <- read_csv(file.path(metadata_path, "Order_BacteriaViability.csv"))

# Create subfolders for output files
analysis_folder <- "data/outputs/Cytokines/analysis_pgml"
if (!file.exists("data/outputs/Cytokines/analysis_pgml")) {
  dir.create("data/outputs/Cytokines/analysis_pgml", recursive = TRUE)
}
figures_folder <- "data/outputs/Cytokines/figures_pgml"
if (!file.exists("data/outputs/Cytokines/figures_pgml")) {
  dir.create("data/outputs/Cytokines/figures_pgml", recursive = TRUE)
}
stats_folder <- "data/outputs/Cytokines/stats_pgml"
if (!file.exists("data/outputs/Cytokines/stats_pgml")) {
  dir.create("data/outputs/Cytokines/stats_pgml", recursive = TRUE)
}
# Creating combined variable for bacteria and viability including the NB control. Eliminate rejected grouping.
Stats_values <- Cyto_values_48h %>%
  mutate(Bac.Via = paste(bacteria, viability, sep = ".")) %>%
  filter(group_rejected == FALSE) %>%
  arrange(desc(range))

# Factor Ordering
Stats_values <- merge(Stats_values, BacVia_order, by = "Bac.Via", all.x = TRUE)
Stats_values$Bac.Via <- factor(Stats_values$Bac.Via, levels = BacVia_order$Bac.Via)
Stats_values$Bac.Via_label <- factor(Stats_values$Bac.Via_label, levels = BacVia_order$Bac.Via_label)

Stats function

# Function to analyze each individual Cyto-location group using pg/mL data
stats_function <- function(values_data, each_group) {
  
  # Subset the data
  data_subset <- values_data %>%
    filter(group_label == each_group) 
  
  # First run linear mixed-effects model with random effects
  model <- lmer(log(concentration_final) ~ Bac.Via 
                + (1|line) + (1|line:date), 
                data = data_subset)
  
  # Extract random effects data and convert to dataframe
  singular = isSingular(model)
  random_effects_df <- as.data.frame(VarCorr(model)) %>%
    mutate(proportion = round(100 * (vcov / sum(vcov)), 2)) %>%
    mutate(singular = singular) %>%
    mutate(group_label = each_group) 
  
  # If model is singular, rerun linear mixed-effects model with only date as random effect
  if (singular) {
    model <- lmer(log(concentration_final) ~ Bac.Via 
                   + (1|date), 
                  data = data_subset)
  }
  
  # Run ANOVA and extract pvalue
  anova <- anova(model)
  Bac.Via_pvalue <- anova$`Pr(>F)`[1]
  
  # Generate all individual contrasts without pvalue adjustment
  emmeans_model <- emmeans(model, ~ Bac.Via)
  emmeans_contrasts <- pairs(emmeans_model, adjust = "none")   
  
  # Extract fixed effects coefficients and convert to dataframe
  fixed_effects_df <- as.data.frame(summary(model)$coefficients) %>%
    mutate(singular = singular) %>%
    mutate(group_label = each_group) 
  
  # Extract individual contrasts from emmeans_model and convert to dataframe. 
  contrasts_df <- as.data.frame(summary(emmeans_contrasts)) %>%
    mutate(anova_pvalue = Bac.Via_pvalue) %>%
    mutate(singular = singular) %>%
    mutate(group_label = each_group)
  
  # Adds predictions based on fixed effects, averaged over random effects. It gives a population estimate
  data_subset <- cbind(data_subset, predval = predict(model,re.form = NA, se.fit = TRUE))
  
  # Backtransform the predicted values and CIs
  transf_preds_df <- data_subset %>% group_by(Bac.Via, group_label) %>%
    summarise(mean.predval.fit = mean(predval.fit),
              mean.predval.se.fit = mean(predval.se.fit),
              mean.concentration = mean(concentration_final),
              .groups = 'drop') %>%
    mutate(pconcentration = exp(mean.predval.fit),
           pconcentration_min = exp(mean.predval.fit - 2*mean.predval.se.fit),
           pconcentration_max = exp(mean.predval.fit + 2*mean.predval.se.fit),
           viability = sub(".*\\.", "", Bac.Via))
  
  return(list(fixed_effects = fixed_effects_df, 
              random_effects = random_effects_df, 
              contrasts = contrasts_df,
              transf_preds = transf_preds_df))
}
#Example use for a specific Cytokine-location group
#each_group = "IL-1α_Apical"  
#stats_function(Stats_values, each_group)
# Apply the function to each unique group label and combine results into a list
stats_list <- map(unique(Stats_values$group_label), function(group) {
  cat("Processing group:", group, "\n")  # Add this line to print the group name
  stats_function(Stats_values, group)
})
Processing group: IL-13_Basal 
Processing group: IL-1RN_Basal 
Processing group: CCL-2_Basal 
Processing group: CXCL-9_Apical 
Processing group: IL-6_Basal 
Processing group: CCL-4_Basal 
Processing group: CXCL-11_Apical 
Processing group: IL-18_Basal 
Processing group: IL-1RN_Apical 
Processing group: G-CSF_Basal 
Processing group: IL-1β_Apical 
Processing group: IL-1β_Basal 
Processing group: G-CSF_Apical 
Processing group: IL-18_Apical 
Processing group: CCL-2_Apical 
Processing group: IFN-γ_Apical 
Processing group: IL-36β_Apical 
Processing group: L-VEGF_Apical 
Processing group: CXCL-10_Apical 
Processing group: GM-CSF_Apical 
Processing group: CXCL-10_Basal 
Processing group: IL-10_Apical 
Processing group: TNF-α_Basal 
Processing group: IL-6_Apical 
Processing group: MMP-7_Apical 
Processing group: TNF-α_Apical 
Processing group: GM-CSF_Basal 
Processing group: CCL-5_Basal 
Processing group: CCL-20_Apical 
Processing group: IL-1α_Basal 
Processing group: IL-36β_Basal 
Processing group: MMP-1_Apical 
Processing group: CCL-20_Basal 
Processing group: CCL-7_Apical 
Processing group: MMP-1_Basal 
Processing group: MMP-2_Apical 
Processing group: IL-8_Basal 
Processing group: MMP-9_Apical 
Processing group: IL-5_Apical 
Processing group: IL-8_Apical 
Processing group: CCL-7_Basal 
Processing group: IL-1α_Apical 
Processing group: MMP-10_Basal 
Processing group: CCL-4_Apical 
Processing group: CCL-5_Apical 
Processing group: MMP-9_Basal 
Processing group: Eotaxin_Apical 
Processing group: MMP-7_Basal 
Processing group: L-VEGF_Basal 
Processing group: CCL-19_Basal 
Processing group: MMP-10_Apical 
Processing group: IFN-γ_Basal 
Processing group: MMP-2_Basal 
Processing group: Eotaxin_Basal 
Processing group: IL-10_Basal 
Processing group: IL-5_Basal 
# Combine all fixed effects dataframes in the list into a single dataframe
stats_fixed_effects <- bind_rows(map(stats_list, pluck, "fixed_effects"))

# Combine all random effects dataframes in the list into a single dataframe
stats_random_effects <- bind_rows(map(stats_list, pluck, "random_effects"))

# Combine all contrasts dataframes in the list into a single dataframe
stats_contrasts <- bind_rows(map(stats_list, pluck, "contrasts"))

# Combine all backtransformed predicted values dataframes in the list into a single dataframe
stats_transf_preds <- bind_rows(map(stats_list, pluck, "transf_preds"))
P.value Correction
# Filter rows with NB (controls)
contrast_NB <- stats_contrasts %>%
  filter(str_count(contrast, "NB.control") == 1)

# Filter rows with two live bacteria (live vs live)
contrast_live_vs_live <- stats_contrasts %>%
  filter(str_count(contrast, "live") == 2)

# Filter rows with one live and one IRR (same bacteria)
contrast_same_bacteria <- stats_contrasts %>%
  filter(str_extract(contrast, "[A-Za-z]+(?=.live)") == str_extract(contrast, "[A-Za-z]+(?=.IRR)"))

# Combine the filtered dataframes
stats_contrasts_selection <- bind_rows(contrast_NB, contrast_live_vs_live, contrast_same_bacteria) %>%
  arrange(group_label)

# Selection of Cyto-Location groups to perform contrast analysis based on anova
stats_contrasts_selection <- stats_contrasts_selection %>%
  filter(anova_pvalue < 0.05)
# Adjust across all Cyto-Location groups using FDR
stats_contrasts_selection <- stats_contrasts_selection %>%
  mutate(adj.p.FDR = p.adjust(p.value, method = "fdr")) %>%
  mutate(sign.FDR = ifelse(adj.p.FDR < 0.05, T, F)) 
P-value and Fold-Change significance added to Contrasts table
# Separate contrast names into 2 groups matching the x axis labels for the individual plots
stats_contrasts_selection <- stats_contrasts_selection %>%
  separate(contrast, into = c("condition1", "condition2"), sep = " - ", remove = F) 

# Substitute bacteria and viability names with the label versions
stats_contrasts_selection$group1 <- BacVia_order$Bac.Via_label[match(stats_contrasts_selection$condition1, BacVia_order$Bac.Via)]
stats_contrasts_selection$group2 <- BacVia_order$Bac.Via_label[match(stats_contrasts_selection$condition2, BacVia_order$Bac.Via)]

# Formatting of the p.value labels
stats_contrasts_selection <- stats_contrasts_selection %>%
  mutate(sign = case_when(
    adj.p.FDR < 0.05 ~ "*",
    TRUE ~ ""  
  ))
stats_contrasts_selection$adj.p.FDR = format(stats_contrasts_selection$adj.p.FDR, digits = 2, scientific = TRUE) 

# Calculate fold-change values for each contrast
stats_contrasts_selection <- stats_contrasts_selection %>%
  ungroup() %>%
  left_join(select(stats_transf_preds, Bac.Via, group_label, pconcentration), by = join_by(group_label == group_label, condition1 == Bac.Via)) %>%
  left_join(select(stats_transf_preds, Bac.Via, group_label, pconcentration), by = join_by(group_label == group_label, condition2 == Bac.Via), suffix = c(".1", ".2")) %>%
  mutate(FC = pconcentration.1 / pconcentration.2,
         FC = if_else(FC < 1, -1 / FC, FC),
         highlighted = case_when(
           FC <= -4 ~ "+",
           FC >= 4 ~ "-",
           TRUE ~ "")) 

Plot function

# Function to plot each individual Cyto-location group using pg/mL data
plot_function <- function(true_data, stats_data, contrasts, random, each_group) {
  
  # Subset data
  true_data <- true_data %>%
    filter(group_label == each_group) 
  stats_data <- stats_data %>%
    filter(group_label == each_group) 
  contrasts <- contrasts %>%
    filter(group_label == each_group) 
  random <- random %>%
    filter(group_label == each_group) 
  
  # Factor Ordering
  stats_data <- merge(stats_data, BacVia_order, by = "Bac.Via") 
  stats_data$Bac.Via_label <- factor(stats_data$Bac.Via_label, levels = BacVia_order$Bac.Via_label)

  # Plot
  plot <- ggplot() +
    
    # True values in pg/mL
    geom_jitter(data = true_data, 
                aes(x = Bac.Via_label, y = concentration_final, fill = Bac.Via_label, color = Bac.Via_label, shape = line),
                width = 0.15, size = 3, alpha = 0.75, stroke = 0.75, show.legend = TRUE) +   
    
    scale_fill_manual(values = c("#5b5b5b","#800080","#1E90FF","#927ed1","#800080","#1E90FF","#927ed1")) +
    scale_color_manual(values = c("#5b5b5b","#800080","#1E90FF","#927ed1","#800080","#1E90FF","#927ed1")) +
    scale_shape_manual(values = c(22,24)) +
    
    # Predictions from the model transformed into pg/mL
    geom_point(data = stats_data, aes(x = Bac.Via_label, y = pconcentration), shape = 3, size = 1) +
    geom_errorbar(data = stats_data, aes(x = Bac.Via_label,
                                         y = pconcentration,
                                         ymin = pconcentration_min,
                                         ymax = pconcentration_max),
                  width = .15) +
    
    # Add sections for live vs. IRR
    geom_vline(xintercept = 1.45, colour = "grey70", linetype = 'dotted') +
    geom_vline(xintercept = 4.5, colour = "grey70", linetype = 'dotted') +
    
    # General style
    scale_y_log10() +
    scale_x_discrete(expand = c(0,0.5)) +
    labs(title = each_group,
         x = "",
         y = "pg/mL",
         fill = "Bacteria", color = "Bacteria", shape = "HNO Line") +  
    theme_bw() +
    theme(panel.grid = element_blank(), 
          legend.text = element_markdown(),
          text = element_text(size = 12),
          axis.text.y = element_text(color = "black"),
          axis.text.x = element_markdown()) 
  
  # Conditionally arrange plot and p-values table if contrasts is not NULL
  Tmin <- ttheme_minimal()
  if (!is.null(contrasts) && nrow(contrasts) > 0) {
    contrasts <- contrasts %>%
      ungroup() %>%
      select(contrast, p.value, adj.p.FDR, sign, pconcentration.1, pconcentration.2, FC, highlighted)
    
    panel <- ggarrange(plot + theme(plot.margin = unit(c(0.25, 0.25, 0.25, 0.25), "in")), 
                       tableGrob(random, theme = Tmin, rows = NULL),
                       tableGrob(contrasts, theme = Tmin, rows = NULL), 
                       ncol = 1, heights = c(0.5, 0.2, 0.4))
  } else {
    panel <- ggarrange(plot + theme(plot.margin = unit(c(0.25, 0.25, 0.25, 0.25), "in")), 
                       tableGrob(random, theme = Tmin, rows = NULL),
                       NULL, 
                       ncol = 1, heights = c(0.5, 0.2, 0.4))
  }
  return(list(each_group, plot, panel))
} 
#Example use for a specific Cytokine-location group
#each_group = "IL-1α_Apical" 
#plot_function(Stats_values, stats_transf_preds, stats_contrasts_selection, stats_random_effects, each_group)
plots_list <- map(unique(Stats_values$group_label), ~plot_function(true_data = Stats_values, stats_data = stats_transf_preds, contrasts = stats_contrasts_selection, random = stats_random_effects, .x))

Saving files

## Save outputs
write_xlsx(stats_fixed_effects, file.path(stats_folder, "stats_fixed_effects.xlsx"))
write_xlsx(stats_random_effects, file.path(stats_folder, "stats_random_effects.xlsx"))
write_xlsx(stats_contrasts_selection, file.path(stats_folder, "stats_contrasts.xlsx"))
write_xlsx(stats_transf_preds, file.path(stats_folder, "stats_transf_preds.xlsx"))
saveRDS(stats_contrasts_selection, file.path(stats_folder, "stats_contrasts.rds"))

# Save all multiple plots/tables on individual pages of a single pdf
pdf(paste0(figures_folder, "/Cyto_Boxplots.pdf"), width = 10, height = 11)
for (i in seq_along(plots_list)) {
  print(plots_list[[i]][3])
}
[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]

[[1]]
dev.off()
quartz_off_screen 
                2 
# Save all plot figures as an R object
saveRDS(plots_list, file.path(figures_folder, "plots_list.rds"))

# Cleaning-up all objects from the environment
#rm(list = ls())
LDH Epithelial Lines
HEK-Blue
Source Code
---
execute:
  message: FALSE
  warning: FALSE
---

# Cytokines {.unnumbered}

```{r}
library(tidyverse)
library(readxl)
library(writexl)
library(ggpubr)
library(ggtext)
library(scales)
library(lme4)
library(afex)
library(emmeans)
library(ggnewscale)
library(gridExtra)
library(plotly)
library(htmlwidgets)
```

## Data Input and Selection

### File Paths

```{r}
# Folder paths
input_path <- "data/input_data/Cytokines/"
metadata_path <- "data/metadata/Cytokines"

# Create subfolders for output files
analysis_folder <- "data/outputs/Cytokines/analysis_log2FC"
if (!file.exists("data/outputs/Cytokines/analysis_log2FC")) {
  dir.create("data/outputs/Cytokines/analysis_log2FC", recursive = TRUE)
}
dataframes_folder <- "data/dataframes"
if (!file.exists("data/dataframes")) {
  dir.create("data/dataframes", recursive = TRUE)
}

# Load metadata
input_data <- read_excel(file.path(metadata_path, "Sample_List.xlsx"))
order_Cyto <- read_csv(file.path(metadata_path, "Order_Cytokines.csv")) %>% mutate_if(is.character, factor)
```

### Data input and clean-up

```{r}
# List all the input files, loop through each file and create a merged dataframe 
file_list <- list.files(input_path, pattern = "Detail\\.xls$", full.names = TRUE)

for (file_path in file_list) {
  # Read the Excel file and skip the first 3 rows
  file_data <- read_excel(file_path, sheet = "Summary", skip = 3)
  
  # Left join with the existing sample_list
  input_data <- left_join(input_data, file_data, by = "Analyte  Sample")
}

# Rename "Analyte  Sample" to avoid issues with spaces in variable names
input_data <- input_data %>% 
  rename("SampleID" = "Analyte  Sample")

# Pivot the data from wide to long format crating the Cytokine variable
input_data_long <- pivot_longer(input_data, 
                         cols = (which(names(input_data) == "SampleID") + 1):ncol(input_data),
                         names_to = "Cytokine", values_to = "value")

# Identify samples below and above the limit of detection and assign them a concentration value
input_data_long <- input_data_long %>%
  mutate(concentration = if_else(value == "N/A", NA, as.numeric(gsub("<|↓|>|↑", "", value))),
         below_LD = if_else(value == "N/A" | grepl("<|↓", value), TRUE, FALSE),
         above_LD = if_else(grepl(">|↑", value), TRUE, FALSE))

# Add columns with a simpler Cytokine name (Cyto) and Category names
input_data_long <- left_join(input_data_long, order_Cyto, by = "Cytokine")

# Add column with sample type information (NB control vs. bacterial sample)
input_data_long <- input_data_long %>% 
  mutate(sample_type = if_else(viability == "control", "control", "bacteria"))

# Get average values for conditions that have technical replicas (control or NB samples)
input_data_long <- input_data_long %>% 
  group_by(date, Cyto, Category, location, line, time, bacteria, viability) %>% 
  mutate(concentration_NA = ifelse(below_LD | above_LD, NA, concentration)) %>% 
  mutate(concentration_avg = mean(concentration_NA, na.rm = TRUE)) %>% 
  mutate(concentration_final = ifelse(is.na(concentration_avg), concentration, concentration_avg)) %>% 
  select(-concentration, concentration_NA, concentration_avg) %>%
  arrange(below_LD, above_LD) %>%
  distinct(date, Cyto, Category, location, line, time, bacteria, viability, .keep_all = TRUE) %>% 
  ungroup()

# Cleaning up extra columns
input_data_long <- input_data_long %>%
  select(date, Cyto, Category, location, line, time, sample_type, bacteria, viability, concentration_final, below_LD, above_LD) 

# Factoring variables with the right levels
input_data_long <- input_data_long %>% mutate_if(is.character, factor)

input_data_long$bacteria <- factor(input_data_long$bacteria, levels = c("NB", "Dpi", "Sau", "Spn"))
input_data_long$viability <- factor(input_data_long$viability, levels = c("control", "live", "IRR"))
input_data_long$line <- fct_recode(input_data_long$line, "HNO204" = "B", "HNO919" = "C") 
input_data_long$location <- fct_recode(input_data_long$location, "Apical" = "Ap", "Basal" = "Baso")
input_data_long$Cyto <- factor(input_data_long$Cyto, levels = rev(order_Cyto$Cyto))
input_data_long$Category <- factor(input_data_long$Category, levels = unique(order_Cyto$Category))

# Making new data frame with the 48h time point only
input_data_long_t48 <- input_data_long %>% 
  filter(time != 0)

# Calculating log2FC relative to the non-bacteria controls
input_data_log2FC <- input_data_long_t48 %>%
  group_by(date, Cyto, Category, location, line, time) %>%
  reframe(
    bacteria = bacteria[sample_type == "bacteria"],
    viability = viability[sample_type == "bacteria"],
    log2FC = log2(concentration_final[sample_type == "bacteria"] / concentration_final[sample_type == "control"]),
    log2ctl = log2(concentration_final[sample_type == "control"]),
    below_LD = below_LD[sample_type == "bacteria"],
    above_LD = above_LD[sample_type == "bacteria"]
  )

# Factoring variables with the right levels
input_data_log2FC$bacteria <- factor(input_data_log2FC$bacteria, levels = c("Dpi", "Sau", "Spn"))

# Cleaning-up objects from the environment
rm(input_data, input_path, metadata_path, file_list, file_path, file_data, order_Cyto)
```

### Selection of Cyto-location groups

#### Proportion of samples above the detection limit

```{r}
# Calculate for each Cytokine and Location the proportion of samples above limit of detection
LD_all <- input_data_long %>%
  group_by(Cyto, location) %>%
  summarise(detected_all = 1 - mean(below_LD), .groups = 'drop') 

# Same, but without including the time 0h
LD_48h <- input_data_long %>%
  filter(time != 0) %>%
  group_by(Cyto, location) %>%
  summarise(detected_48 = 1 - mean(below_LD), .groups = 'drop') 

# Combine the individual data frames and add label column with combined Cyto and location
LD <- left_join(LD_all, LD_48h, by = join_by(Cyto, location)) %>%
  mutate(group_label = paste(Cyto, location, sep = "_")) %>%
  relocate(group_label, .after = location)

# Plot function
plot_select_function <- function(data, x_var, title) {
  ggplot(data = data, aes(x = reorder(group_label, {{x_var}}), y = {{x_var}}, fill = location)) +
    geom_bar(stat = "identity", position = position_dodge()) + 
    scale_y_continuous(expand = c(0, 0)) +
    scale_fill_manual(values = c("coral", "steelblue1")) +
    coord_flip() +
    labs(title = title, x = "", y = "") +
    theme_bw() +
    theme(panel.grid = element_blank(),
          text = element_text(size = 10))

}

# Create and arrange plots
p_all <- plot_select_function(LD, detected_all, "Proportion of samples above detection limit (all pg/mL values)")
p_48h <- plot_select_function(LD, detected_48, "Proportion of samples above detection limit (pg/mL values at 48h)")

LD_Plots <- ggarrange(p_all, p_48h, ncol = 1, common.legend = T)
ggexport(LD_Plots, filename = file.path(analysis_folder, "LD_Plots.pdf"), height = 30, width = 10)
```

```{r}
#| fig-height: 16
#| fig-width: 8
LD_Plots
```

#### log2FC range values

```{r}
# Calculate the range for each log2FC
range_log2FC <- input_data_log2FC %>%
  group_by(Cyto, location) %>%
  mutate(min = ifelse(is.na(log2FC), 0, min(log2FC, na.rm = TRUE))) %>%
  mutate(max = max(log2FC, na.rm = TRUE)) %>%
  mutate(range = max - min) %>%
  summarise(range = mean(range, na.rm = TRUE), .groups = 'drop')

# Combine the range_log2FC and LD data frames into a summary file used for analysis selection
select_summary <- left_join(LD, range_log2FC, by = join_by(Cyto, location)) %>%
   mutate(group_label = paste(Cyto, location, sep = "_"))

# Plot the range in a similar way to the limit of detection
p_range <- plot_select_function(select_summary, range, "log2FC range")
ggsave(p_range, filename = file.path(analysis_folder, "Range_Plot.png"), height = 8, width = 10)
```

```{r}
#| fig-height: 8
#| fig-width: 8
p_range
```

#### Filtering/selection thresholds

```{r}
# Define cut-off values for group rejection and statistical analysis 
select_summary <- select_summary %>%
  mutate(group_rejected = as.factor(ifelse(range < 2.5 & detected_48 < 0.25, TRUE, FALSE))) %>%
  mutate(group_stats = as.factor(ifelse(range > 3, TRUE, FALSE)))

# Plot correlation between range and proportion of detected samples
p_correlation_FC <- ggplot(select_summary, aes(x = range, y = detected_48, color = Cyto, shape = location)) +
  geom_point(aes(size = group_rejected)) +
  scale_size_manual(values = c(4, 2)) +
  geom_point(data = subset(select_summary, group_stats == TRUE), size = 2, color = "black") +
  theme_bw() +
  theme(panel.grid = element_blank(),
        text = element_text(size = 10)) +
  labs(x = "log2FC range",
       y = "Proportion of samples above detection limit (pg/mL values at 48h)")

ggsave(p_correlation_FC, filename = file.path(analysis_folder, "LDvsRange_Plot.png"), height = 8, width = 10)
```

```{r}
#| fig-height: 10
#| fig-width: 10
ggplotly(p_correlation_FC)
saveWidget(ggplotly(p_correlation_FC), file.path(analysis_folder, "LDvsRange_Interactive.html"), selfcontained = TRUE)
```

```{r}
# Apply the grouping variables to the raw and log2FC data frames
input_data_long_t48 <- left_join(input_data_long_t48, select_summary, by = join_by(Cyto, location))
input_data_log2FC <- left_join(input_data_log2FC, select_summary, by = join_by(Cyto, location)) 
```

### Saving files

```{r}
# Save data frames as CSV files in the dataframes folder
write_csv(input_data_long_t48, file.path(dataframes_folder, "Cyto_values_48h.csv"))
write_csv(input_data_log2FC, file.path(dataframes_folder, "Cyto_log2FC_48h.csv"))

# Save data frames as R objects in the dataframes folder
saveRDS(input_data_long_t48, file.path(dataframes_folder, "Cyto_values_48h.rds"))
saveRDS(input_data_log2FC, file.path(dataframes_folder, "Cyto_log2FC_48h.rds"))

# Cleaning-up all objects from the environment
rm(list = ls())

# Use this to read the final objects
Cyto_values_48h <- readRDS("data/dataframes/Cyto_values_48h.rds")
Cyto_log2FC_48h <- readRDS("data/dataframes/Cyto_log2FC_48h.rds")
```

## Heatmap

### log2FC analysis

#### File Paths

```{r}
# Folder paths
dataframes_path <- "data/dataframes"
metadata_path <- "data/metadata/Cytokines"

# Load data and metadata
Cyto_log2FC_48h <- readRDS("data/dataframes/Cyto_log2FC_48h.rds")
BacVia_order <- read_csv(file.path(metadata_path, "Order_BacteriaViability_FC.csv"))
SubsetFig2 <- read_csv(file.path(metadata_path, "Order_CytokinesFig2.csv"))

# Create subfolders for output files
figures_folder <- "data/outputs/Cytokines/figures_log2FC"
if (!file.exists("data/outputs/Cytokines/figures_log2FC")) {
  dir.create("data/outputs/Cytokines/figures_log2FC", recursive = TRUE)
}
```

#### Heatmap plot

```{r}
# Averaging replicate log2FCs
Avg_log2FC_Cytos <- Cyto_log2FC_48h %>%
  group_by(Cyto, Category, location, viability, bacteria, group_rejected) %>%
  summarize(avg_log2FC = mean(log2FC, na.rm = TRUE), .groups = 'drop') %>%
  mutate(Bac.Via = paste(bacteria, viability, sep = "."))

# Factor Ordering
Avg_log2FC_Cytos <- merge(Avg_log2FC_Cytos, BacVia_order, by = "Bac.Via")
Avg_log2FC_Cytos$Bac.Via_label <- factor(Avg_log2FC_Cytos$Bac.Via_label, levels = BacVia_order$Bac.Via_label)

# Make new variable with a version of avg_log2FC with NAs if group_rejected is TRUE
Avg_log2FC_Cytos <- Avg_log2FC_Cytos %>%
  mutate(value = ifelse(group_rejected == TRUE, NA, avg_log2FC))
```

```{r}
# Subset data for min-heatmap in Figure 2
Avg_log2FC_Cytos_Fig2 <- inner_join(select(Avg_log2FC_Cytos, -Category), SubsetFig2, by = join_by(Cyto))

# Factor Ordering
Avg_log2FC_Cytos_Fig2$Cyto <- factor(Avg_log2FC_Cytos_Fig2$Cyto, levels = rev(SubsetFig2$Cyto))
```

::: {.content-hidden when-profile="manuscript"}
This code was used to subset the data to only Dpi and Sau for a version of the heatmap for the CETR grant.
```{r}
#| eval: FALSE
Avg_log2FC_Cytos <- Avg_log2FC_Cytos %>%
  filter(Bac.Via %in% c("Dpi.live","Dpi.IRR","Sau.live","Sau.IRR"))
```
:::

```{r}
# Code for heatmap
plot_heatmap <- function(data) {
  ggplot(data, aes(x = Cyto, y = Bac.Via_label, fill = value)) +
    geom_tile(color = "black") + 
    facet_grid(location ~ Category , space = "free", scales = "free") +
    scale_fill_gradient2(low = "#009ad1", high = "#AD1457",
                         mid = "#fefbea", na.value = "grey",
                         name = expression("Log" [2]*"FC")) +
    labs(x = "", y = "") +
    scale_y_discrete(expand = c(0,0), limits = rev) +
    scale_x_discrete(expand = c(0,0), limits = rev) +
    theme_bw() +
    theme(panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          strip.background = element_rect(colour = "black", fill = NA),
          strip.text.y = element_text(angle = -90, hjust = 0.5, size = 14, face = "bold"),
          strip.text.x = element_text(size = 12),
          axis.text.y = element_markdown(size = 14),
          axis.text.x = element_text(size = 12, angle = 67, hjust = 1, colour = "black"),
          legend.title = element_text(size = 14),
          legend.text = element_text(size = 14),
          panel.spacing = unit(4, 'pt')
    )
}
```

```{r}
#| fig-height: 4
#| fig-width: 10
heatmap <- plot_heatmap(Avg_log2FC_Cytos)
heatmap
```

```{r}
#| fig-height: 4
#| fig-width: 6
heatmap_Fig2 <- plot_heatmap(Avg_log2FC_Cytos_Fig2)
heatmap_Fig2
```

#### Saving files

```{r}
# Save figures as png
ggsave(plot = heatmap, filename = file.path(figures_folder, "heatmap.png"), width = unit(9, "in"), height = unit(3.5, "in"))
ggsave(plot = heatmap_Fig2, filename = file.path(figures_folder, "heatmap_Fig2.png"), width = unit(5, "in"), height = unit(3.5, "in"))

# Save figurse as an R objects
saveRDS(heatmap, file.path(figures_folder, "heatmap.rds"))
saveRDS(heatmap_Fig2, file.path(figures_folder, "heatmap_Fig2.rds"))

# Cleaning-up all objects from the environment
rm(list = ls())
```

### pg/mL analysis

#### File Paths

```{r}
# Folder paths
dataframes_path <- "data/dataframes"
metadata_path <- "data/metadata/Cytokines"

# Load data and metadata
Cyto_values_48h <- readRDS(file.path(dataframes_path, "Cyto_values_48h.rds"))
BacVia_order <- read_csv(file.path(metadata_path, "Order_BacteriaViability.csv"))

# Create subfolders for output files
figures_folder <- "data/outputs/Cytokines/figures_pgml"
if (!file.exists("data/outputs/Cytokines/figures_pgml")) {
  dir.create("data/outputs/Cytokines/figures_pgml", recursive = TRUE)
}
```

#### Heatmap plot

```{r}
# Averaging replicate log2FCs
Avg_values_Cytos <- Cyto_values_48h %>%
  group_by(Cyto, Category, location, viability, bacteria, group_rejected) %>%
  summarize(avg_value = mean(concentration_final, na.rm = TRUE), .groups = 'drop') %>%
  mutate(log_value = log10(avg_value)) %>% 
  mutate(Bac.Via = paste(bacteria, viability, sep = "."))

# Factor Ordering
Avg_values_Cytos <- merge(Avg_values_Cytos, BacVia_order, by = "Bac.Via")
Avg_values_Cytos$Bac.Via_label <- factor(Avg_values_Cytos$Bac.Via_label, levels = BacVia_order$Bac.Via_label)

# Make new variable with a version of avg_log2FC with NAs if group_rejected is TRUE
Avg_values_Cytos <- Avg_values_Cytos %>%
  mutate(value = ifelse(group_rejected == TRUE, NA, log_value))
```

```{r}
# Code for heatmap
plot_heatmap <- ggplot(Avg_values_Cytos, aes(x = Cyto, y = Bac.Via_label, fill = value)) +
  geom_tile(color = "black") + 
  facet_grid(location ~ Category , space = "free", scales = "free") +
  scale_fill_gradient2(low = "#009ad1", high = "#AD1457",
                       mid = "#fefbea", na.value = "grey",
                       name = "Log10 pg/mL") +
  labs(x = "", y = "") +
  scale_y_discrete(expand = c(0,0), limits = rev) +
  scale_x_discrete(expand = c(0,0), limits = rev) +
  theme_bw() +
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        strip.background = element_rect(colour = "black", fill = NA),
        strip.text.y = element_text(angle = -90, hjust = 0.5, size = 11),
        axis.text.y = element_markdown(size = 11),
        axis.text.x = element_text(size = 11, angle = 67,  hjust = 1),
        panel.spacing = unit(4, 'pt')
  )
```

```{r}
#| fig-height: 4
#| fig-width: 10
plot_heatmap 
```

#### Saving files

```{r}
# Save figure as png
ggsave(plot = plot_heatmap, filename = file.path(figures_folder, "heatmap.png"), width = unit(9, "in"), height = unit(3.5, "in"))

# Save figure as an R object
saveRDS(plot_heatmap, file.path(figures_folder, "heatmap.rds"))

# Cleaning-up all objects from the environment
rm(list = ls())
```

## Stats and Individual Plots

### log2FC analysis

#### File Paths

```{r}
# Folder paths
dataframes_path <- "data/dataframes"
metadata_path <- "data/metadata/Cytokines"
analysis_folder <- "data/outputs/Cytokines/analysis_log2FC"
figures_folder <- "data/outputs/Cytokines/figures_log2FC"

# Load data and metadata
Cyto_log2FC_48h <- readRDS(file.path(dataframes_path, "Cyto_log2FC_48h.rds"))
Cyto_values_48h <- readRDS(file.path(dataframes_path, "Cyto_values_48h.rds"))
Bac_order <- read.csv(file.path(metadata_path, "Order_Bacteria.csv"))
Via_order <- read.csv(file.path(metadata_path, "Order_Viability.csv"))

# Create subfolders for output files
stats_folder <- "data/outputs/Cytokines/stats_log2FC"
if (!file.exists("data/outputs/Cytokines/stats_log2FC")) {
  dir.create("data/outputs/Cytokines/stats_log2FC", recursive = TRUE)
}
```

```{r}
# Subset the data for the selected analysis groups
Stats_log2FC <- Cyto_log2FC_48h %>%
    filter(group_stats == TRUE) %>%
    arrange(desc(range))

Stats_values <- Cyto_values_48h %>%
    filter(group_stats == TRUE) %>%
    arrange(desc(range))
```

#### Stats function

```{r}
# Function to analyze each individual Cyto-location group using log2FC data
stats_function <- function(log2FC_data, each_group) {
  
  # Subset the data
  log2FCdata_subset <- log2FC_data %>%
    filter(group_label == each_group) 
  
  # First run linear mixed-effects model with random effects
  model <- lmer(log2FC ~ bacteria * viability 
                + (1|line) + (1|line:date), 
                data = log2FCdata_subset)
  
  # If model is singular, rerun linear model without random effects
  singular = isSingular(model)
  if (singular) {
    model <- lm(log2FC ~ bacteria * viability, 
                  data = log2FCdata_subset)
  }
  
  # Individual contrasts
  emmeans_model <- emmeans(model, ~ bacteria * viability)
  emmeans_bacteria <- pairs(emmeans_model, simple = "bacteria", adjust = "none")   
  emmeans_viability <- pairs(emmeans_model, simple = "viability", adjust = "none")    
  
  # Extract fixed effects coefficients and convert to dataframe
  fixed_effects_df <- as.data.frame(summary(model)$coefficients) %>%
    mutate(singular = singular) %>%
    mutate(group_label = each_group) 
  
  # Extract random effects data and convert to dataframe (if not singular)
  if (singular) {
    random_effects_df <- data.frame()
  } else {
    random_effects_df <- as.data.frame(VarCorr(model)) %>%
      mutate(proportion = round(100 * (vcov / sum(vcov)), 2)) %>%
      mutate(singular = singular) %>%
      mutate(group_label = each_group) 
  }
  
  # Extract bacteria contrasts from emmeans_model and convert to dataframe. Filter live only
  contrasts_bacteria_df <- as.data.frame(summary(emmeans_bacteria)) %>%
    filter(viability == "live") %>%
    mutate(contrast_label = paste(contrast, viability, sep = " -- ")) %>%
    select(-contrast, -viability) %>%
    mutate(singular = singular) %>%
    mutate(group_label = each_group)
  
  # Extract viability contrasts from emmeans_model and convert to dataframe
  contrasts_viability_df <- as.data.frame(summary(emmeans_viability)) %>%
    mutate(contrast_label = paste(bacteria, contrast, sep = " -- ")) %>%
    select(-contrast, -bacteria) %>%
    mutate(singular = singular) %>%
    mutate(group_label = each_group)
  
  # Combine contrasts dataframes
  contrasts_df <- rbind(contrasts_bacteria_df,contrasts_viability_df) 
  
  # Adds predictions based on fixed effects, averaged over random effects. It gives a population estimate
  log2FCdata_subset <- cbind(log2FCdata_subset, predval = predict(model,re.form = NA, se.fit = TRUE))
  
  # Backtransform the predicted values and CIs
  transf_preds_df <- log2FCdata_subset %>% group_by(bacteria, viability, group_label) %>%
    summarise(ml2ctl = mean(log2ctl), ml2FC = mean(log2FC), 
              ml2predval.fit = mean(predval.fit),
              ml2predval.se.fit = mean(predval.se.fit),
              .groups = 'drop') %>%
    mutate(pconcentration = 2^(ml2predval.fit + ml2ctl),
           pconcentration_min = 2^(ml2predval.fit - 2*ml2predval.se.fit + ml2ctl),
           pconcentration_max = 2^(ml2predval.fit + 2*ml2predval.se.fit + ml2ctl),
           singular = singular)
  
  return(list(fixed_effects = fixed_effects_df, 
              random_effects = random_effects_df, 
              contrasts_bacteria = contrasts_bacteria_df,
              contrasts_viability = contrasts_viability_df,
              contrasts = contrasts_df,
              transf_preds = transf_preds_df))
}
```

```{r}
#Example use for a specific Cytokine-location group
#each_group = "IL-18_Apical" 
#stats_function(Stats_log2FC, each_group)
```

```{r}
# Apply the function to each unique group label and combine results into a list
stats_list <- map(unique(Stats_log2FC$group_label), function(group) {
  cat("Processing group:", group, "\n")  # Add this line to print the group
  stats_function(Stats_log2FC, group)
})

# Combine all fixed effects dataframes in the list into a single dataframe
stats_fixed_effects <- bind_rows(map(stats_list, pluck, "fixed_effects"))

# Combine all random effects dataframes in the list into a single dataframe
stats_random_effects <- bind_rows(map(stats_list, pluck, "random_effects"))

# Combine all contrasts dataframes in the list into a single dataframe
stats_contrasts <- bind_rows(map(stats_list, pluck, "contrasts"))

# Combine all backtransformed predicted values dataframes in the list into a single dataframe
stats_transf_preds <- bind_rows(map(stats_list, pluck, "transf_preds"))
```

##### P.value Correction

```{r}
# Adjust across all Cyto-Location groups using FDR
stats_contrasts <- stats_contrasts %>%
  mutate(adj.p.FDR = p.adjust(p.value, method = "fdr")) %>%
  mutate(sign.FDR = ifelse(adj.p.FDR < 0.05, T, F)) 
```

::: {.content-hidden when-profile="manuscript"}
```{r}
# Adjust in each Cyto-Location group using Holmes
stats_contrasts <- stats_contrasts %>%
  group_by(group_label) %>%
  mutate(adj.p.value.holm = p.adjust(p.value, method = "holm")) %>% 
  mutate(sign.holm  = ifelse(adj.p.value.holm < 0.05, T, F)) 
```

Compare both adjustment methods
```{r}
# Compare both adjustment methods
stats_contrasts <- stats_contrasts %>%
  mutate(HOLM.FDR = paste(sign.holm, sign.FDR, sep = ".")) %>%
  mutate(HOLM.FDR = as.factor(HOLM.FDR))

# plot Holm vs. FDR Adjustment methods
plot_pvalues <- ggplot(data = stats_contrasts, 
                       aes(x = adj.p.value.holm,  y = adj.p.FDR, color = HOLM.FDR,
                           text = paste("Contrast Label: ", contrast_label, "<br>", "Group Label: ", group_label))) +
  geom_vline(xintercept = 0.05, linetype = "dashed", color = "grey40") +
  geom_hline(yintercept = 0.05, linetype = "dashed", color = "grey40") +
  geom_point(aes(), size = 2) +
  scale_color_manual(values = c("black", "blue", "magenta", "seagreen")) +
  scale_x_continuous(breaks = c(0.05, 0.25, 0.5, 0.75, 1.0), expand = c(0, 0)) +
  scale_y_continuous(breaks = c(0.05, 0.25, 0.5, 0.75, 1.0), expand = c(0, 0)) +
  labs(title = "", x = "Holmes adjusted in each Cyto-Location group", y = "FDR across all Cyto-Location groups") +
  theme_bw() +
  theme(panel.grid = element_blank(), text = element_text(size = 10))

ggsave(plot_pvalues, filename = file.path(analysis_folder, "pvaluesFDRvsHolm.png"), height = 8, width = 10)
```

```{r}
#| fig-height: 10
#| fig-width: 10
ggplotly(plot_pvalues)
saveWidget(ggplotly(plot_pvalues), file.path(analysis_folder, "pvaluesFDRvsHolm_Interactive.html"), selfcontained = TRUE)
```
:::

##### Contrasts table formatting

```{r}
# Separate contrast names into 2 groups matching the x axis labels for the individual plots
stats_contrasts <- stats_contrasts %>%
  separate(contrast_label, into = c("bacteria", "viability"), sep = "--") %>%
  mutate(bacteria = str_trim(bacteria),  # Remove leading/trailing spaces
         viability = str_trim(viability)) %>%
  separate(bacteria, into = c("bact1", "bact2"), sep = "-") %>%
  mutate(bact2 = str_trim(if_else(!is.na(bact2), bact2, bact1)),
         bact1 = str_trim(bact1)) %>%
  separate(viability, into = c("via1", "via2"), sep = "-") %>%
  mutate(via2 = str_trim(if_else(!is.na(via2), via2, via1)),
         via1 = str_trim(via1))

# Substitute bacteria and viability names with the label versions
stats_contrasts$bact1_label <- Bac_order$bacteria_label[match(stats_contrasts$bact1, Bac_order$bacteria)]
stats_contrasts$bact2_label <- Bac_order$bacteria_label[match(stats_contrasts$bact2, Bac_order$bacteria)]
stats_contrasts$via1_label <- Via_order$viability_label[match(stats_contrasts$via1, Via_order$viability)]
stats_contrasts$via2_label <- Via_order$viability_label[match(stats_contrasts$via2, Via_order$viability)]

# Formatting of the p.value labels with option for * and **
stats_contrasts <- stats_contrasts %>%
  mutate(sign = case_when(
    adj.p.FDR < 0.05 ~ "*",
    TRUE ~ ""  
  ))
stats_contrasts$adj.p.FDR = format(stats_contrasts$adj.p.FDR, digits = 2, scientific = TRUE) 

# Generate the final group labels
stats_contrasts <- stats_contrasts %>%
  mutate(group1 = paste(bact1_label, via1_label, sep = " "),
         group2 = paste(bact2_label, via2_label, sep = " "),
         contrast = paste(bact1, via1, "vs.", bact2, via2, sep = " "))
```

#### Plot function

```{r}
# Function to plot each individual Cyto-location group using pg/mL data
plot_function <- function(true_data, stats_data, contrasts, each_group) {
  
  # Subset data
  true_data <- true_data %>%
    filter(group_label == each_group) 
  stats_data <- stats_data %>%
    filter(group_label == each_group) 
  contrasts <- contrasts %>%
    filter(group_label == each_group) 
  
  # Factor Ordering
  stats_data <- merge(stats_data, Bac_order, by = "bacteria") 
  stats_data <- merge(stats_data, Via_order, by = "viability") 
  stats_data$bacteria_label <- factor(stats_data$bacteria_label, levels = Bac_order$bacteria_label)
  stats_data$viability_label <- factor(stats_data$viability_label, levels = Via_order$viability_label)

  true_data <- merge(true_data, Bac_order, by = "bacteria") 
  true_data <- merge(true_data, Via_order, by = "viability") 
  true_data$bacteria_label <- factor(true_data$bacteria_label, levels = Bac_order$bacteria_label)
  true_data$viability_label <- factor(true_data$viability_label, levels = Via_order$viability_label)
  
  # Add columns for coloring the background by viability
  true_data <- true_data %>%
    mutate(bar_pos = ifelse(viability == "IRR", Inf, NA)) %>%
    mutate(bar_neg = ifelse(viability == "IRR", 0, -Inf)) 
  
  # Get location for p value labels
  location <- log10(max(stats_data$pconcentration_max, na.rm = TRUE)) * 1.3
  
  # Plot
  plot <- ggplot() +
    # Background bars used to color by viability
    geom_col(data = true_data, aes(x = interaction(bacteria_label, viability_label, lex.order = TRUE, sep = " "), y = bar_pos, fill = "grey95"),
             position = "identity", show.legend = FALSE) +
    geom_col(data = true_data, aes(x = interaction(bacteria_label, viability_label, lex.order = TRUE, sep = " "), y = bar_neg, fill = "grey95"),
             position = "identity", show.legend = FALSE) +
    
    scale_fill_manual(values = c("grey95")) +
    new_scale_fill() +
    
    # Predictions from the model transformed into pg/mL
    geom_point(data = stats_data, aes(x = interaction(bacteria_label, viability_label, lex.order = TRUE, sep = " "), 
                                    y = pconcentration), shape = 3, size = 3) +
    geom_errorbar(data = stats_data, aes(x = interaction(bacteria_label, viability_label, lex.order = TRUE, sep = " "),
                                       y = pconcentration,
                                       ymin = pconcentration_min,
                                       ymax = pconcentration_max),
                  width = .2) +
    
    # True values in pg/mL
    geom_jitter(data = true_data, aes(x = interaction(bacteria_label, viability_label, lex.order = TRUE, sep = " "), 
                                      y = concentration_final, color = bacteria_label, shape = line), width = 0.3, size = 3, show.legend = FALSE) + 
    
    scale_color_manual(values = c("#5b5b5b", "#1E90FF", "#800080", "#927ed1")) +

    # General style
    scale_y_log10() +
    scale_x_discrete(expand = c(0,0)) +
    labs(title = each_group,
         x = "",
         y = "pg/mL",) +  
    theme_bw() +
    theme(panel.grid = element_blank(), 
          text = element_text(size = 12),
          axis.text.x = element_markdown()) 
  
  # Arrange plot and pvalues table
  contrasts <- contrasts %>%
    ungroup() %>%
    select(contrast, singular, estimate, SE, df, t.ratio, p.value, adj.p.FDR)

  Tmin <- ttheme_minimal()
  panel <- ggarrange(plot + theme(plot.margin = unit(c(0.25,0.25,0.25,0.25), "in")), 
                     tableGrob(contrasts, theme = Tmin, rows = NULL), 
                     ncol = 1, heights = c(0.7, 0.3))
  
  return(list(each_group,
              plot, 
              panel))
} 
```

```{r}
#Example use for a specific Cytokine-location group
#each_group = "IL-18_Apical"
#plot_function(Stats_values, stats_transf_preds, stats_contrasts, each_group)
```

```{r}
plots_list <- map(unique(Stats_values$group_label), ~plot_function(true_data = Stats_values, stats_data = stats_transf_preds, contrasts = stats_contrasts, .x))
```

#### Saving files

```{r}
## Save outputs
write_csv(stats_fixed_effects, file.path(stats_folder, "stats_fixed_effects.csv"))
write_csv(stats_random_effects, file.path(stats_folder, "stats_random_effects.csv"))
write_csv(stats_contrasts, file.path(stats_folder, "stats_contrasts.csv"))
write_csv(stats_transf_preds, file.path(stats_folder, "stats_transf_preds.csv"))

# Save all multiple plots/tables on individual pages of a single pdf
pdf(paste0(figures_folder, "/Cyto_Boxplots.pdf"), width = 11, height = 8.5)
for (i in seq_along(plots_list)) {
  print(plots_list[[i]][3])
}
dev.off()

# Save all plot figures as an R object
saveRDS(plots_list, file.path(figures_folder, "plots_list.rds"))

# Cleaning-up all objects from the environment
rm(list = ls())
```

### pg/mL analysis

#### File Paths

```{r}
# Folder paths
dataframes_path <- "data/dataframes"
metadata_path <- "data/metadata/Cytokines"

# Load data and metadata
Cyto_values_48h <- readRDS(file.path(dataframes_path, "Cyto_values_48h.rds"))
BacVia_order <- read_csv(file.path(metadata_path, "Order_BacteriaViability.csv"))

# Create subfolders for output files
analysis_folder <- "data/outputs/Cytokines/analysis_pgml"
if (!file.exists("data/outputs/Cytokines/analysis_pgml")) {
  dir.create("data/outputs/Cytokines/analysis_pgml", recursive = TRUE)
}
figures_folder <- "data/outputs/Cytokines/figures_pgml"
if (!file.exists("data/outputs/Cytokines/figures_pgml")) {
  dir.create("data/outputs/Cytokines/figures_pgml", recursive = TRUE)
}
stats_folder <- "data/outputs/Cytokines/stats_pgml"
if (!file.exists("data/outputs/Cytokines/stats_pgml")) {
  dir.create("data/outputs/Cytokines/stats_pgml", recursive = TRUE)
}
```

```{r}
# Creating combined variable for bacteria and viability including the NB control. Eliminate rejected grouping.
Stats_values <- Cyto_values_48h %>%
  mutate(Bac.Via = paste(bacteria, viability, sep = ".")) %>%
  filter(group_rejected == FALSE) %>%
  arrange(desc(range))

# Factor Ordering
Stats_values <- merge(Stats_values, BacVia_order, by = "Bac.Via", all.x = TRUE)
Stats_values$Bac.Via <- factor(Stats_values$Bac.Via, levels = BacVia_order$Bac.Via)
Stats_values$Bac.Via_label <- factor(Stats_values$Bac.Via_label, levels = BacVia_order$Bac.Via_label)
```

#### Stats function

```{r}
# Function to analyze each individual Cyto-location group using pg/mL data
stats_function <- function(values_data, each_group) {
  
  # Subset the data
  data_subset <- values_data %>%
    filter(group_label == each_group) 
  
  # First run linear mixed-effects model with random effects
  model <- lmer(log(concentration_final) ~ Bac.Via 
                + (1|line) + (1|line:date), 
                data = data_subset)
  
  # Extract random effects data and convert to dataframe
  singular = isSingular(model)
  random_effects_df <- as.data.frame(VarCorr(model)) %>%
    mutate(proportion = round(100 * (vcov / sum(vcov)), 2)) %>%
    mutate(singular = singular) %>%
    mutate(group_label = each_group) 
  
  # If model is singular, rerun linear mixed-effects model with only date as random effect
  if (singular) {
    model <- lmer(log(concentration_final) ~ Bac.Via 
                   + (1|date), 
                  data = data_subset)
  }
  
  # Run ANOVA and extract pvalue
  anova <- anova(model)
  Bac.Via_pvalue <- anova$`Pr(>F)`[1]
  
  # Generate all individual contrasts without pvalue adjustment
  emmeans_model <- emmeans(model, ~ Bac.Via)
  emmeans_contrasts <- pairs(emmeans_model, adjust = "none")   
  
  # Extract fixed effects coefficients and convert to dataframe
  fixed_effects_df <- as.data.frame(summary(model)$coefficients) %>%
    mutate(singular = singular) %>%
    mutate(group_label = each_group) 
  
  # Extract individual contrasts from emmeans_model and convert to dataframe. 
  contrasts_df <- as.data.frame(summary(emmeans_contrasts)) %>%
    mutate(anova_pvalue = Bac.Via_pvalue) %>%
    mutate(singular = singular) %>%
    mutate(group_label = each_group)
  
  # Adds predictions based on fixed effects, averaged over random effects. It gives a population estimate
  data_subset <- cbind(data_subset, predval = predict(model,re.form = NA, se.fit = TRUE))
  
  # Backtransform the predicted values and CIs
  transf_preds_df <- data_subset %>% group_by(Bac.Via, group_label) %>%
    summarise(mean.predval.fit = mean(predval.fit),
              mean.predval.se.fit = mean(predval.se.fit),
              mean.concentration = mean(concentration_final),
              .groups = 'drop') %>%
    mutate(pconcentration = exp(mean.predval.fit),
           pconcentration_min = exp(mean.predval.fit - 2*mean.predval.se.fit),
           pconcentration_max = exp(mean.predval.fit + 2*mean.predval.se.fit),
           viability = sub(".*\\.", "", Bac.Via))
  
  return(list(fixed_effects = fixed_effects_df, 
              random_effects = random_effects_df, 
              contrasts = contrasts_df,
              transf_preds = transf_preds_df))
}
```

```{r}
#Example use for a specific Cytokine-location group
#each_group = "IL-1α_Apical"  
#stats_function(Stats_values, each_group)
```

```{r}
# Apply the function to each unique group label and combine results into a list
stats_list <- map(unique(Stats_values$group_label), function(group) {
  cat("Processing group:", group, "\n")  # Add this line to print the group name
  stats_function(Stats_values, group)
})

# Combine all fixed effects dataframes in the list into a single dataframe
stats_fixed_effects <- bind_rows(map(stats_list, pluck, "fixed_effects"))

# Combine all random effects dataframes in the list into a single dataframe
stats_random_effects <- bind_rows(map(stats_list, pluck, "random_effects"))

# Combine all contrasts dataframes in the list into a single dataframe
stats_contrasts <- bind_rows(map(stats_list, pluck, "contrasts"))

# Combine all backtransformed predicted values dataframes in the list into a single dataframe
stats_transf_preds <- bind_rows(map(stats_list, pluck, "transf_preds"))
```

##### P.value Correction

```{r}
# Filter rows with NB (controls)
contrast_NB <- stats_contrasts %>%
  filter(str_count(contrast, "NB.control") == 1)

# Filter rows with two live bacteria (live vs live)
contrast_live_vs_live <- stats_contrasts %>%
  filter(str_count(contrast, "live") == 2)

# Filter rows with one live and one IRR (same bacteria)
contrast_same_bacteria <- stats_contrasts %>%
  filter(str_extract(contrast, "[A-Za-z]+(?=.live)") == str_extract(contrast, "[A-Za-z]+(?=.IRR)"))

# Combine the filtered dataframes
stats_contrasts_selection <- bind_rows(contrast_NB, contrast_live_vs_live, contrast_same_bacteria) %>%
  arrange(group_label)

# Selection of Cyto-Location groups to perform contrast analysis based on anova
stats_contrasts_selection <- stats_contrasts_selection %>%
  filter(anova_pvalue < 0.05)
```

```{r}
# Adjust across all Cyto-Location groups using FDR
stats_contrasts_selection <- stats_contrasts_selection %>%
  mutate(adj.p.FDR = p.adjust(p.value, method = "fdr")) %>%
  mutate(sign.FDR = ifelse(adj.p.FDR < 0.05, T, F)) 
```

::: {.content-hidden when-profile="manuscript"}
```{r}
# Adjust in each Cyto-Location group using Holmes
stats_contrasts_selection <- stats_contrasts_selection %>%
  group_by(group_label) %>%
  mutate(adj.p.value.holm = p.adjust(p.value, method = "holm")) %>% 
  mutate(sign.holm  = ifelse(adj.p.value.holm < 0.05, T, F)) 
```

Compare both adjustment methods
```{r}
# Compare both adjustment methods
stats_contrasts_selection <- stats_contrasts_selection %>%
  mutate(HOLM.FDR = paste(sign.holm, sign.FDR, sep = ".")) %>%
  mutate(HOLM.FDR = as.factor(HOLM.FDR))

# plot Holm vs. FDR Adjustment methods
plot_pvalues <- ggplot(data = stats_contrasts_selection, 
                       aes(x = adj.p.value.holm,  y = adj.p.FDR, color = HOLM.FDR,
                           text = paste("Contrast Label: ", contrast, "<br>", "Group Label: ", group_label))) +
  geom_vline(xintercept = 0.05, linetype = "dashed", color = "grey40") +
  geom_hline(yintercept = 0.05, linetype = "dashed", color = "grey40") +
  geom_point(aes(), size = 2) +
  scale_color_manual(values = c("black", "blue", "magenta", "seagreen")) +
  scale_x_continuous(breaks = c(0.05, 0.25, 0.5, 0.75, 1.0), expand = c(0, 0)) +
  scale_y_continuous(breaks = c(0.05, 0.25, 0.5, 0.75, 1.0), expand = c(0, 0)) +
  labs(title = "", x = "Holmes adjusted in each Cyto-Location group", y = "FDR across all Cyto-Location groups") +
  theme_bw() +
  theme(panel.grid = element_blank(), text = element_text(size = 10))

ggsave(plot_pvalues, filename = file.path(analysis_folder, "pvaluesFDRvsHolm.png"), height = 8, width = 10)
```

```{r}
#| fig-height: 10
#| fig-width: 10
ggplotly(plot_pvalues)
saveWidget(ggplotly(plot_pvalues), file.path(analysis_folder, "pvaluesFDRvsHolm_Interactive.html"), selfcontained = TRUE)
```
:::

##### P-value and Fold-Change significance added to Contrasts table

```{r}
# Separate contrast names into 2 groups matching the x axis labels for the individual plots
stats_contrasts_selection <- stats_contrasts_selection %>%
  separate(contrast, into = c("condition1", "condition2"), sep = " - ", remove = F) 

# Substitute bacteria and viability names with the label versions
stats_contrasts_selection$group1 <- BacVia_order$Bac.Via_label[match(stats_contrasts_selection$condition1, BacVia_order$Bac.Via)]
stats_contrasts_selection$group2 <- BacVia_order$Bac.Via_label[match(stats_contrasts_selection$condition2, BacVia_order$Bac.Via)]

# Formatting of the p.value labels
stats_contrasts_selection <- stats_contrasts_selection %>%
  mutate(sign = case_when(
    adj.p.FDR < 0.05 ~ "*",
    TRUE ~ ""  
  ))
stats_contrasts_selection$adj.p.FDR = format(stats_contrasts_selection$adj.p.FDR, digits = 2, scientific = TRUE) 

# Calculate fold-change values for each contrast
stats_contrasts_selection <- stats_contrasts_selection %>%
  ungroup() %>%
  left_join(select(stats_transf_preds, Bac.Via, group_label, pconcentration), by = join_by(group_label == group_label, condition1 == Bac.Via)) %>%
  left_join(select(stats_transf_preds, Bac.Via, group_label, pconcentration), by = join_by(group_label == group_label, condition2 == Bac.Via), suffix = c(".1", ".2")) %>%
  mutate(FC = pconcentration.1 / pconcentration.2,
         FC = if_else(FC < 1, -1 / FC, FC),
         highlighted = case_when(
           FC <= -4 ~ "+",
           FC >= 4 ~ "-",
           TRUE ~ "")) 
```

#### Plot function

```{r}
# Function to plot each individual Cyto-location group using pg/mL data
plot_function <- function(true_data, stats_data, contrasts, random, each_group) {
  
  # Subset data
  true_data <- true_data %>%
    filter(group_label == each_group) 
  stats_data <- stats_data %>%
    filter(group_label == each_group) 
  contrasts <- contrasts %>%
    filter(group_label == each_group) 
  random <- random %>%
    filter(group_label == each_group) 
  
  # Factor Ordering
  stats_data <- merge(stats_data, BacVia_order, by = "Bac.Via") 
  stats_data$Bac.Via_label <- factor(stats_data$Bac.Via_label, levels = BacVia_order$Bac.Via_label)

  # Plot
  plot <- ggplot() +
    
    # True values in pg/mL
    geom_jitter(data = true_data, 
                aes(x = Bac.Via_label, y = concentration_final, fill = Bac.Via_label, color = Bac.Via_label, shape = line),
                width = 0.15, size = 3, alpha = 0.75, stroke = 0.75, show.legend = TRUE) +   
    
    scale_fill_manual(values = c("#5b5b5b","#800080","#1E90FF","#927ed1","#800080","#1E90FF","#927ed1")) +
    scale_color_manual(values = c("#5b5b5b","#800080","#1E90FF","#927ed1","#800080","#1E90FF","#927ed1")) +
    scale_shape_manual(values = c(22,24)) +
    
    # Predictions from the model transformed into pg/mL
    geom_point(data = stats_data, aes(x = Bac.Via_label, y = pconcentration), shape = 3, size = 1) +
    geom_errorbar(data = stats_data, aes(x = Bac.Via_label,
                                         y = pconcentration,
                                         ymin = pconcentration_min,
                                         ymax = pconcentration_max),
                  width = .15) +
    
    # Add sections for live vs. IRR
    geom_vline(xintercept = 1.45, colour = "grey70", linetype = 'dotted') +
    geom_vline(xintercept = 4.5, colour = "grey70", linetype = 'dotted') +
    
    # General style
    scale_y_log10() +
    scale_x_discrete(expand = c(0,0.5)) +
    labs(title = each_group,
         x = "",
         y = "pg/mL",
         fill = "Bacteria", color = "Bacteria", shape = "HNO Line") +  
    theme_bw() +
    theme(panel.grid = element_blank(), 
          legend.text = element_markdown(),
          text = element_text(size = 12),
          axis.text.y = element_text(color = "black"),
          axis.text.x = element_markdown()) 
  
  # Conditionally arrange plot and p-values table if contrasts is not NULL
  Tmin <- ttheme_minimal()
  if (!is.null(contrasts) && nrow(contrasts) > 0) {
    contrasts <- contrasts %>%
      ungroup() %>%
      select(contrast, p.value, adj.p.FDR, sign, pconcentration.1, pconcentration.2, FC, highlighted)
    
    panel <- ggarrange(plot + theme(plot.margin = unit(c(0.25, 0.25, 0.25, 0.25), "in")), 
                       tableGrob(random, theme = Tmin, rows = NULL),
                       tableGrob(contrasts, theme = Tmin, rows = NULL), 
                       ncol = 1, heights = c(0.5, 0.2, 0.4))
  } else {
    panel <- ggarrange(plot + theme(plot.margin = unit(c(0.25, 0.25, 0.25, 0.25), "in")), 
                       tableGrob(random, theme = Tmin, rows = NULL),
                       NULL, 
                       ncol = 1, heights = c(0.5, 0.2, 0.4))
  }
  return(list(each_group, plot, panel))
} 
```

```{r}
#Example use for a specific Cytokine-location group
#each_group = "IL-1α_Apical" 
#plot_function(Stats_values, stats_transf_preds, stats_contrasts_selection, stats_random_effects, each_group)
```

```{r}
plots_list <- map(unique(Stats_values$group_label), ~plot_function(true_data = Stats_values, stats_data = stats_transf_preds, contrasts = stats_contrasts_selection, random = stats_random_effects, .x))
```

#### Saving files

```{r}
## Save outputs
write_xlsx(stats_fixed_effects, file.path(stats_folder, "stats_fixed_effects.xlsx"))
write_xlsx(stats_random_effects, file.path(stats_folder, "stats_random_effects.xlsx"))
write_xlsx(stats_contrasts_selection, file.path(stats_folder, "stats_contrasts.xlsx"))
write_xlsx(stats_transf_preds, file.path(stats_folder, "stats_transf_preds.xlsx"))
saveRDS(stats_contrasts_selection, file.path(stats_folder, "stats_contrasts.rds"))

# Save all multiple plots/tables on individual pages of a single pdf
pdf(paste0(figures_folder, "/Cyto_Boxplots.pdf"), width = 10, height = 11)
for (i in seq_along(plots_list)) {
  print(plots_list[[i]][3])
}
dev.off()

# Save all plot figures as an R object
saveRDS(plots_list, file.path(figures_folder, "plots_list.rds"))

# Cleaning-up all objects from the environment
#rm(list = ls())
```