Supplemental Methods: Glycogen

# Load libraries
library(readxl)
library(grid)
library(lme4)
library(afex)
library(tidyverse)
library(ggpubr)

1 Individual Experiment Analysis (N=1)

The analysis was ran three different times based on three independent experiments on different dates. This data can be found in data/glycogen as .xlsx files with the following 3 tabs:

  • csvOD: contains the columns Date, Species, Strain, TimePoint, Media, Reject and GrowthOD.
  • csvNorm: contains the columns Species, TimePoint, Date, and ODNorm.
  • csvLum: contains the columns Date, TimePoint, Well_ID, Type, Amy, Species, Strain, Reject, Glycogen, and Lum.
# Defining the experiment variable (Comment out one experiment at a time)
#experiment <- "240322_Gly"
#experiment <- "240325_Gly"
experiment <- "240331_Gly"

# Read data from an Excel file into two separate data frames: data_lum from the "csvLum" sheet and data_OD from the "csvOD" sheet
data_lum <- read_excel(file.path(paste0("data/glycogen/", experiment, ".xlsx")), sheet = "csvLum")
data_OD <- read_excel(file.path(paste0("data/glycogen/", experiment, ".xlsx")), sheet = "csvOD")
data_norm <- read_excel(file.path(paste0("data/glycogen/", experiment, ".xlsx")), sheet = "csvNorm")

# Filter out rows where "Reject" column is "F" and where "Type" column is not "Empty" in the data_lum data frame, and remove the "Reject" column for both dataframes. Remove species "Cna", as this strain was not included on the manuscript.
data_lum <- data_lum %>% 
  filter(Reject == "F") %>%
  filter(Type != "Empty") %>%
  filter(Species != "Cna") %>%
  select(-Reject)

data_OD <- data_OD %>% 
  filter(Reject == "F") %>%
  filter(Species != "Cna") %>%
  select(-Reject)

data_norm <- data_norm %>% 
  filter(Species != "Cna")

# Split the data_lum data frame into two separate data frames: data_lum_samples containing rows where "Type" is "Sample" and data_lum_standards containing rows where "Type" is "Standard", and remove unnecessary columns
data_lum_samples <- data_lum %>% 
  filter(Type == "Sample")

data_lum_standards <- data_lum %>% 
  filter(Type == "Standard") %>%
  select(-Species, -Strain)

# Convert the "Amy" column in data_lum_standards to a factor and "Glycogen" column to numeric
data_lum_standards$Amy <- as.factor(data_lum_standards$Amy)
data_lum_standards$Glycogen <- as.numeric(data_lum_standards$Glycogen)
# Function to analyze a single timepoint
analyze_glycogen <- function(time, data_lum_standards, data_lum_samples) {
  # Filter the data_lum_standards data frame for rows where "TimePoint" is equal to the specified time
  time_standard <- data_lum_standards %>% 
    filter(TimePoint == time) %>%
    group_by(Glycogen) %>%
    mutate(Difference = ifelse(Amy == "T", Lum - lag(Lum), NA)) %>%
    filter(!is.na(Difference))

  # Fit a linear model (lm) for the standards with "Difference" as the dependent variable and "Glycogen" as the independent variable
  lmStandard <- lm(Difference ~ Glycogen, time_standard) 

  # Extract coefficients from the linear model
  coefficients <- lmStandard$coefficients

  # Filter the data_lum_samples data frame for rows where "TimePoint" is equal to the specified time
  time_samples <- data_lum_samples %>%
    filter(TimePoint == time) %>%
    mutate(Glycogen = (Lum - coefficients[1]) / coefficients[2])

  # Group the data by Strain and Amy, calculate the mean Lum value within each group
  time_samples_diff <- time_samples %>% 
    group_by(Species, Amy) %>%
    summarise(LumAvg = mean(Lum)) %>%
    mutate(Difference = ifelse(Amy == "T", LumAvg - lag(LumAvg), NA)) %>%
    filter(!is.na(Difference)) %>%
    mutate(Glycogen = (Difference - coefficients[1]) / coefficients[2]) %>%
    select(-Amy, -LumAvg) %>%
    mutate(TimePoint = time)

  # Adds individual IDs to the replicas, with and without Amy.
  time_samples <- time_samples %>%
    arrange(Species, Amy)
  time_samples$Replica <- as.factor(rep(1:6, length.out = nrow(time_samples)))

  return(list(time_samples_diff = time_samples_diff))
}
# Loop through different values of time
all_results <- list()

for (time_point in unique(data_lum_standards$TimePoint)) {
  result <- analyze_glycogen(time_point, data_lum_standards, data_lum_samples)
  all_results[[paste0("Time_", time_point)]] <- result

# Combine all time_samples_diff dataframes into a single dataframe
full_Lum_df <- do.call(rbind, lapply(all_results, function(result) result$time_samples_diff))

full_Lum_df <- left_join(full_Lum_df, data_norm) %>%
  mutate(GlycogenNorm = Glycogen/ODNorm)
}
# Convert the 'Species' column to a factor with specified levels
full_Lum_df$Species <- factor(full_Lum_df$Species, levels = c("Cpr", "Cps", "Cac", "Ctu", "Cgl"))

# Convert the 'TimePoint' column to a factor with default level ordering
full_Lum_df$TimePoint <- factor(full_Lum_df$TimePoint)

# Assign the first 'Date' value from 'data_OD' to all rows in 'full_Lum_df'
full_Lum_df$Date <- data_OD$Date[1]

# Write the 'full_Lum_df' dataframe to a CSV file, excluding row names
#write.csv(full_Lum_df, paste0("data/glycogen/Lum/", experiment,"_Lum.csv"), row.names = F)

# Write the 'data_OD' dataframe to a CSV file, excluding row names
#write.csv(data_OD, paste0("data/glycogen/OD/", experiment,"_OD.csv"), row.names = F)

1.1 Saving files

# Create subfolders for output files
Lum_folder <- "data/glycogen/Lum"
if (!file.exists("data/glycogen/Lum")) {
  dir.create("data/glycogen/Lum", recursive = TRUE)
}

OD_folder <- "data/glycogen/OD"
if (!file.exists("data/glycogen/OD")) {
  dir.create("data/glycogen/OD", recursive = TRUE)
}

# Save data frames as CSV files in their output folders
write.csv(full_Lum_df, file.path(Lum_folder, paste0(experiment, "_Lum.csv")), row.names = FALSE)
write.csv(data_OD, file.path(OD_folder, paste0(experiment, "_OD.csv")), row.names = FALSE)

# Cleaning-up all objects from the environment
rm(list = ls())

2 Combined Analysis (N=3)

# Define the directory path for OD data
folder_OD <- "data/glycogen/OD"

# Create a list of full file names for all '_OD.csv' files in the directory
file_list_OD <- list.files(folder_OD, pattern = "_OD.csv", full.names = TRUE)

# Read each file into a list of dataframes
df_list_OD <- lapply(file_list_OD, read.csv)

# Combine all dataframes in the list into a single dataframe
df_OD <- bind_rows(df_list_OD)

# Define the directory path for Lum data
folder_lum <- "data/glycogen/Lum"

# Create a list of full file names for all '_Lum.csv' files in the directory
file_list_lum <- list.files(folder_lum, pattern = "_Lum.csv", full.names = TRUE)

# Read each file into a list of dataframes
df_list_lum <- lapply(file_list_lum, read.csv)

# Combine all dataframes in the list into a single dataframe
df_lum <- bind_rows(df_list_lum)

# Convert the 'Species' column in all average data frames to a factor with specified levels
df_OD$Species <- factor(df_OD$Species, levels = c("Cpr", "Cps", "Cac", "Ctu", "Cgl"))
df_lum$Species <- factor(df_lum$Species, levels = c("Cpr", "Cps", "Cac", "Ctu", "Cgl"))

# Convert the 'TimePoint' column in Lum data frame to a factor with default level ordering
df_lum$TimePoint <- factor(df_lum$TimePoint)

# Define colors
species_colors <- c("#FF8C00", "#FF0000", "#6A329F", "#3B54A4", "grey50")

2.1 Growth (OD600)

# Group the OD data by Species and TimePoint, then calculate the average OD and standard error
avg_data_OD <- df_OD %>%
  group_by(Species, TimePoint) %>%
  summarise(
    AvgOD = mean(GrowthOD, na.rm = TRUE), # Calculate the average OD, removing NA values
    SE = sd(GrowthOD, na.rm = TRUE) / sqrt(sum(!is.na(GrowthOD))) # Calculate standard error
  )
# Create a line plot of average OD600 over time for each species
plot_avg_OD600 <- ggplot(avg_data_OD, aes(x = TimePoint, y = AvgOD, color = Species)) +
  geom_line() + # Add lines to connect points
  geom_point(size = 2) + # Add points to represent data
  geom_errorbar(aes(ymin = AvgOD - SE, ymax = AvgOD + SE), width = 0.2) + # Add error bars
  labs(
    y = expression("Turbidity (OD"[600]*")"),
    x = "Time (hours)"
  ) +
  scale_color_manual(values = species_colors) +
  scale_y_continuous(expand = c(0,0)) +
  scale_x_continuous(expand = c(0,2)) +
  theme_classic() + 
  theme(
    axis.title = element_text(size = 7),
    legend.position = "none", # Remove the legend
    plot.margin = margin(0.3, 0.2, 0.2, 0.5, "cm") # Adjust plot margins
  )
plot_avg_OD600

2.2 Luminesce

# Mixed linear model with GlycogenNorm as fixed effect and TimePoint as a random effect. Using "Ctu" as reference (first factor level)
df_lum$Species <- factor(df_lum$Species, levels = c("Ctu", "Cgl", "Cpr", "Cps", "Cac")) 
model <- lmer(GlycogenNorm ~ Species 
              + (1|TimePoint),
              data = df_lum)

anova(model)
## Type III Analysis of Variance Table with Satterthwaite's method
##         Sum Sq Mean Sq NumDF DenDF F value    Pr(>F)    
## Species 378.71  94.677     4    52  37.688 8.772e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: GlycogenNorm ~ Species + (1 | TimePoint)
##    Data: df_lum
## 
## REML criterion at convergence: 223
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.6145 -0.4641  0.0295  0.4310  3.5387 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  TimePoint (Intercept) 0.427    0.6535  
##  Residual              2.512    1.5850  
## Number of obs: 60, groups:  TimePoint, 4
## 
## Fixed effects:
##             Estimate Std. Error      df t value Pr(>|t|)    
## (Intercept)   2.1545     0.5622 12.6438   3.832  0.00218 ** 
## SpeciesCgl    4.8838     0.6471 52.0000   7.548 6.64e-10 ***
## SpeciesCpr   -1.4157     0.6471 52.0000  -2.188  0.03319 *  
## SpeciesCps   -1.7575     0.6471 52.0000  -2.716  0.00895 ** 
## SpeciesCac   -1.6175     0.6471 52.0000  -2.500  0.01562 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##            (Intr) SpcsCg SpcsCpr SpcsCps
## SpeciesCgl -0.575                       
## SpeciesCpr -0.575  0.500                
## SpeciesCps -0.575  0.500  0.500         
## SpeciesCac -0.575  0.500  0.500   0.500
# Group Lum data by Species and TimePoint, then calculate average values
avg_data_Lum <- df_lum %>%
group_by(Species, TimePoint) %>%
  summarise(
    AvgLum = mean(GlycogenNorm, na.rm = TRUE), # Calculate the average normalized Glycogen, removing NA values
    SE = sd(GlycogenNorm, na.rm = TRUE) / sqrt(sum(!is.na(GlycogenNorm))), # Calculate standard error
    .groups = 'drop')
df_lum$Species <- factor(df_lum$Species, levels = c("Cpr", "Cps", "Cac", "Ctu", "Cgl"))

# Create a dot boxplot of normalized glycogen concentration over time for each species
plot_Lum <- ggplot(df_lum, aes(x = TimePoint, y = GlycogenNorm, fill = Species)) +
  geom_boxplot(linewidth = 0.3) + 
  labs(
    y = "Glycogen (ug/ml) per normalized culture density ",
    x = "Time (hours)"
  ) +
  scale_fill_manual(values = species_colors) +
  scale_y_continuous(expand = c(0,0)) +
  theme_classic() + 
  theme(
    axis.title = element_text(size = 7),
    legend.position = "none", # Remove the legend
    plot.margin = margin(0.3, 0.2, 0.2, 0.5, "cm") # Adjust plot margins
  )
plot_Lum

avg_data_Lum$Species <- factor(avg_data_Lum$Species, levels = c("Cpr", "Cps", "Cac", "Ctu", "Cgl"))

# Create a bar plot of average normalized glycogen concentration over time for each species (used for legend)
plot_avg_Lum <- ggplot(avg_data_Lum, aes(x = TimePoint, y = AvgLum, fill = Species)) +
  geom_bar(position = position_dodge(), stat = "identity", color = "black") + # Add bars with dodge position
  labs(
    y = "Glycogen (ug/ml) per normalized culture density ",
    x = "Time (hours)"
  ) +
  scale_fill_manual(values = species_colors) +
  scale_y_continuous(expand = c(0,0), limits = c(0, 13)) +
  theme_classic() + 
  theme(
    axis.title = element_text(size = 7),
    legend.title = element_text(size = 8),
    legend.text = element_text(size = 7, face = "italic"),
    legend.key.size = unit(0.7,"line"),
    plot.margin = margin(0.3, 0.2, 0.2, 0.5, "cm") # Adjust plot margins
  )
plot_avg_Lum

2.3 Saving files

# Combine the plots using ggarrange
plot_legend <- as_ggplot(get_legend(plot_avg_Lum))

plot_empty <-  ggplot() +
  theme_void() +
  theme(plot.background = element_rect(fill = "white", color = "white"))

combined_plot <- ggarrange(plot_empty,
                           ggarrange(plot_empty, plot_legend, plot_empty, ncol = 1, heights = c(0.5, 0.5, 0.5)),
                           plot_avg_OD600, 
                           plot_Lum, 
                           ncol = 4, widths = c(0.02, 0.08, 0.5, 0.5), labels = c("","","A","B"), hjust = -1.5, vjust = 1.5)
combined_plot

# Create subfolders for output files
outputs_folder <- "data/glycogen/Outputs"
if (!file.exists("data/glycogen/Outputs")) {
  dir.create("data/glycogen/Outputs", recursive = TRUE)
}

# Save data frames as CSV files in their output folders
write.csv(df_lum, file.path(outputs_folder,"CorPGA_Lum.csv"), row.names = FALSE)
write.csv(df_OD, file.path(outputs_folder, "CorPGA_OD.csv"), row.names = FALSE)


# Save the combined plot as a PNG file
ggsave(file.path(outputs_folder, "CorPGA_GlycogenPlots.png"), combined_plot, width = 6, height = 3, dpi = 600)

# Save the combined plot as a SVG file
ggsave(file.path(outputs_folder, "CorPGA_GlycogenPlots.svg"), combined_plot, width = 6, height = 3, dpi = 600)

References