Supplemental Methods: Glycogen
# Load libraries
library(readxl)
library(grid)
library(lme4)
library(afex)
library(tidyverse)
library(ggpubr)
1 Individual Experiment Analysis (N=1)
The analysis was ran three different times based on three independent experiments on different dates. This data can be found in data/glycogen
as .xlsx
files with the following 3 tabs:
- csvOD: contains the columns Date, Species, Strain, TimePoint, Media, Reject and GrowthOD.
- csvNorm: contains the columns Species, TimePoint, Date, and ODNorm.
- csvLum: contains the columns Date, TimePoint, Well_ID, Type, Amy, Species, Strain, Reject, Glycogen, and Lum.
# Defining the experiment variable (Comment out one experiment at a time)
#experiment <- "240322_Gly"
#experiment <- "240325_Gly"
experiment <- "240331_Gly"
# Read data from an Excel file into two separate data frames: data_lum from the "csvLum" sheet and data_OD from the "csvOD" sheet
data_lum <- read_excel(file.path(paste0("data/glycogen/", experiment, ".xlsx")), sheet = "csvLum")
data_OD <- read_excel(file.path(paste0("data/glycogen/", experiment, ".xlsx")), sheet = "csvOD")
data_norm <- read_excel(file.path(paste0("data/glycogen/", experiment, ".xlsx")), sheet = "csvNorm")
# Filter out rows where "Reject" column is "F" and where "Type" column is not "Empty" in the data_lum data frame, and remove the "Reject" column for both dataframes. Remove species "Cna", as this strain was not included on the manuscript.
data_lum <- data_lum %>%
filter(Reject == "F") %>%
filter(Type != "Empty") %>%
filter(Species != "Cna") %>%
select(-Reject)
data_OD <- data_OD %>%
filter(Reject == "F") %>%
filter(Species != "Cna") %>%
select(-Reject)
data_norm <- data_norm %>%
filter(Species != "Cna")
# Split the data_lum data frame into two separate data frames: data_lum_samples containing rows where "Type" is "Sample" and data_lum_standards containing rows where "Type" is "Standard", and remove unnecessary columns
data_lum_samples <- data_lum %>%
filter(Type == "Sample")
data_lum_standards <- data_lum %>%
filter(Type == "Standard") %>%
select(-Species, -Strain)
# Convert the "Amy" column in data_lum_standards to a factor and "Glycogen" column to numeric
data_lum_standards$Amy <- as.factor(data_lum_standards$Amy)
data_lum_standards$Glycogen <- as.numeric(data_lum_standards$Glycogen)
# Function to analyze a single timepoint
analyze_glycogen <- function(time, data_lum_standards, data_lum_samples) {
# Filter the data_lum_standards data frame for rows where "TimePoint" is equal to the specified time
time_standard <- data_lum_standards %>%
filter(TimePoint == time) %>%
group_by(Glycogen) %>%
mutate(Difference = ifelse(Amy == "T", Lum - lag(Lum), NA)) %>%
filter(!is.na(Difference))
# Fit a linear model (lm) for the standards with "Difference" as the dependent variable and "Glycogen" as the independent variable
lmStandard <- lm(Difference ~ Glycogen, time_standard)
# Extract coefficients from the linear model
coefficients <- lmStandard$coefficients
# Filter the data_lum_samples data frame for rows where "TimePoint" is equal to the specified time
time_samples <- data_lum_samples %>%
filter(TimePoint == time) %>%
mutate(Glycogen = (Lum - coefficients[1]) / coefficients[2])
# Group the data by Strain and Amy, calculate the mean Lum value within each group
time_samples_diff <- time_samples %>%
group_by(Species, Amy) %>%
summarise(LumAvg = mean(Lum)) %>%
mutate(Difference = ifelse(Amy == "T", LumAvg - lag(LumAvg), NA)) %>%
filter(!is.na(Difference)) %>%
mutate(Glycogen = (Difference - coefficients[1]) / coefficients[2]) %>%
select(-Amy, -LumAvg) %>%
mutate(TimePoint = time)
# Adds individual IDs to the replicas, with and without Amy.
time_samples <- time_samples %>%
arrange(Species, Amy)
time_samples$Replica <- as.factor(rep(1:6, length.out = nrow(time_samples)))
return(list(time_samples_diff = time_samples_diff))
}
# Loop through different values of time
all_results <- list()
for (time_point in unique(data_lum_standards$TimePoint)) {
result <- analyze_glycogen(time_point, data_lum_standards, data_lum_samples)
all_results[[paste0("Time_", time_point)]] <- result
# Combine all time_samples_diff dataframes into a single dataframe
full_Lum_df <- do.call(rbind, lapply(all_results, function(result) result$time_samples_diff))
full_Lum_df <- left_join(full_Lum_df, data_norm) %>%
mutate(GlycogenNorm = Glycogen/ODNorm)
}
# Convert the 'Species' column to a factor with specified levels
full_Lum_df$Species <- factor(full_Lum_df$Species, levels = c("Cpr", "Cps", "Cac", "Ctu", "Cgl"))
# Convert the 'TimePoint' column to a factor with default level ordering
full_Lum_df$TimePoint <- factor(full_Lum_df$TimePoint)
# Assign the first 'Date' value from 'data_OD' to all rows in 'full_Lum_df'
full_Lum_df$Date <- data_OD$Date[1]
# Write the 'full_Lum_df' dataframe to a CSV file, excluding row names
#write.csv(full_Lum_df, paste0("data/glycogen/Lum/", experiment,"_Lum.csv"), row.names = F)
# Write the 'data_OD' dataframe to a CSV file, excluding row names
#write.csv(data_OD, paste0("data/glycogen/OD/", experiment,"_OD.csv"), row.names = F)
1.1 Saving files
# Create subfolders for output files
Lum_folder <- "data/glycogen/Lum"
if (!file.exists("data/glycogen/Lum")) {
dir.create("data/glycogen/Lum", recursive = TRUE)
}
OD_folder <- "data/glycogen/OD"
if (!file.exists("data/glycogen/OD")) {
dir.create("data/glycogen/OD", recursive = TRUE)
}
# Save data frames as CSV files in their output folders
write.csv(full_Lum_df, file.path(Lum_folder, paste0(experiment, "_Lum.csv")), row.names = FALSE)
write.csv(data_OD, file.path(OD_folder, paste0(experiment, "_OD.csv")), row.names = FALSE)
# Cleaning-up all objects from the environment
rm(list = ls())
2 Combined Analysis (N=3)
# Define the directory path for OD data
folder_OD <- "data/glycogen/OD"
# Create a list of full file names for all '_OD.csv' files in the directory
file_list_OD <- list.files(folder_OD, pattern = "_OD.csv", full.names = TRUE)
# Read each file into a list of dataframes
df_list_OD <- lapply(file_list_OD, read.csv)
# Combine all dataframes in the list into a single dataframe
df_OD <- bind_rows(df_list_OD)
# Define the directory path for Lum data
folder_lum <- "data/glycogen/Lum"
# Create a list of full file names for all '_Lum.csv' files in the directory
file_list_lum <- list.files(folder_lum, pattern = "_Lum.csv", full.names = TRUE)
# Read each file into a list of dataframes
df_list_lum <- lapply(file_list_lum, read.csv)
# Combine all dataframes in the list into a single dataframe
df_lum <- bind_rows(df_list_lum)
# Convert the 'Species' column in all average data frames to a factor with specified levels
df_OD$Species <- factor(df_OD$Species, levels = c("Cpr", "Cps", "Cac", "Ctu", "Cgl"))
df_lum$Species <- factor(df_lum$Species, levels = c("Cpr", "Cps", "Cac", "Ctu", "Cgl"))
# Convert the 'TimePoint' column in Lum data frame to a factor with default level ordering
df_lum$TimePoint <- factor(df_lum$TimePoint)
# Define colors
species_colors <- c("#FF8C00", "#FF0000", "#6A329F", "#3B54A4", "grey50")
2.1 Growth (OD600)
# Group the OD data by Species and TimePoint, then calculate the average OD and standard error
avg_data_OD <- df_OD %>%
group_by(Species, TimePoint) %>%
summarise(
AvgOD = mean(GrowthOD, na.rm = TRUE), # Calculate the average OD, removing NA values
SE = sd(GrowthOD, na.rm = TRUE) / sqrt(sum(!is.na(GrowthOD))) # Calculate standard error
)
# Create a line plot of average OD600 over time for each species
plot_avg_OD600 <- ggplot(avg_data_OD, aes(x = TimePoint, y = AvgOD, color = Species)) +
geom_line() + # Add lines to connect points
geom_point(size = 2) + # Add points to represent data
geom_errorbar(aes(ymin = AvgOD - SE, ymax = AvgOD + SE), width = 0.2) + # Add error bars
labs(
y = expression("Turbidity (OD"[600]*")"),
x = "Time (hours)"
) +
scale_color_manual(values = species_colors) +
scale_y_continuous(expand = c(0,0)) +
scale_x_continuous(expand = c(0,2)) +
theme_classic() +
theme(
axis.title = element_text(size = 7),
legend.position = "none", # Remove the legend
plot.margin = margin(0.3, 0.2, 0.2, 0.5, "cm") # Adjust plot margins
)
plot_avg_OD600
2.2 Luminesce
# Mixed linear model with GlycogenNorm as fixed effect and TimePoint as a random effect. Using "Ctu" as reference (first factor level)
df_lum$Species <- factor(df_lum$Species, levels = c("Ctu", "Cgl", "Cpr", "Cps", "Cac"))
model <- lmer(GlycogenNorm ~ Species
+ (1|TimePoint),
data = df_lum)
anova(model)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## Species 378.71 94.677 4 52 37.688 8.772e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: GlycogenNorm ~ Species + (1 | TimePoint)
## Data: df_lum
##
## REML criterion at convergence: 223
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.6145 -0.4641 0.0295 0.4310 3.5387
##
## Random effects:
## Groups Name Variance Std.Dev.
## TimePoint (Intercept) 0.427 0.6535
## Residual 2.512 1.5850
## Number of obs: 60, groups: TimePoint, 4
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 2.1545 0.5622 12.6438 3.832 0.00218 **
## SpeciesCgl 4.8838 0.6471 52.0000 7.548 6.64e-10 ***
## SpeciesCpr -1.4157 0.6471 52.0000 -2.188 0.03319 *
## SpeciesCps -1.7575 0.6471 52.0000 -2.716 0.00895 **
## SpeciesCac -1.6175 0.6471 52.0000 -2.500 0.01562 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) SpcsCg SpcsCpr SpcsCps
## SpeciesCgl -0.575
## SpeciesCpr -0.575 0.500
## SpeciesCps -0.575 0.500 0.500
## SpeciesCac -0.575 0.500 0.500 0.500
# Group Lum data by Species and TimePoint, then calculate average values
avg_data_Lum <- df_lum %>%
group_by(Species, TimePoint) %>%
summarise(
AvgLum = mean(GlycogenNorm, na.rm = TRUE), # Calculate the average normalized Glycogen, removing NA values
SE = sd(GlycogenNorm, na.rm = TRUE) / sqrt(sum(!is.na(GlycogenNorm))), # Calculate standard error
.groups = 'drop')
df_lum$Species <- factor(df_lum$Species, levels = c("Cpr", "Cps", "Cac", "Ctu", "Cgl"))
# Create a dot boxplot of normalized glycogen concentration over time for each species
plot_Lum <- ggplot(df_lum, aes(x = TimePoint, y = GlycogenNorm, fill = Species)) +
geom_boxplot(linewidth = 0.3) +
labs(
y = "Glycogen (ug/ml) per normalized culture density ",
x = "Time (hours)"
) +
scale_fill_manual(values = species_colors) +
scale_y_continuous(expand = c(0,0)) +
theme_classic() +
theme(
axis.title = element_text(size = 7),
legend.position = "none", # Remove the legend
plot.margin = margin(0.3, 0.2, 0.2, 0.5, "cm") # Adjust plot margins
)
plot_Lum
avg_data_Lum$Species <- factor(avg_data_Lum$Species, levels = c("Cpr", "Cps", "Cac", "Ctu", "Cgl"))
# Create a bar plot of average normalized glycogen concentration over time for each species (used for legend)
plot_avg_Lum <- ggplot(avg_data_Lum, aes(x = TimePoint, y = AvgLum, fill = Species)) +
geom_bar(position = position_dodge(), stat = "identity", color = "black") + # Add bars with dodge position
labs(
y = "Glycogen (ug/ml) per normalized culture density ",
x = "Time (hours)"
) +
scale_fill_manual(values = species_colors) +
scale_y_continuous(expand = c(0,0), limits = c(0, 13)) +
theme_classic() +
theme(
axis.title = element_text(size = 7),
legend.title = element_text(size = 8),
legend.text = element_text(size = 7, face = "italic"),
legend.key.size = unit(0.7,"line"),
plot.margin = margin(0.3, 0.2, 0.2, 0.5, "cm") # Adjust plot margins
)
plot_avg_Lum
2.3 Saving files
# Combine the plots using ggarrange
plot_legend <- as_ggplot(get_legend(plot_avg_Lum))
plot_empty <- ggplot() +
theme_void() +
theme(plot.background = element_rect(fill = "white", color = "white"))
combined_plot <- ggarrange(plot_empty,
ggarrange(plot_empty, plot_legend, plot_empty, ncol = 1, heights = c(0.5, 0.5, 0.5)),
plot_avg_OD600,
plot_Lum,
ncol = 4, widths = c(0.02, 0.08, 0.5, 0.5), labels = c("","","A","B"), hjust = -1.5, vjust = 1.5)
combined_plot
# Create subfolders for output files
outputs_folder <- "data/glycogen/Outputs"
if (!file.exists("data/glycogen/Outputs")) {
dir.create("data/glycogen/Outputs", recursive = TRUE)
}
# Save data frames as CSV files in their output folders
write.csv(df_lum, file.path(outputs_folder,"CorPGA_Lum.csv"), row.names = FALSE)
write.csv(df_OD, file.path(outputs_folder, "CorPGA_OD.csv"), row.names = FALSE)
# Save the combined plot as a PNG file
ggsave(file.path(outputs_folder, "CorPGA_GlycogenPlots.png"), combined_plot, width = 6, height = 3, dpi = 600)
# Save the combined plot as a SVG file
ggsave(file.path(outputs_folder, "CorPGA_GlycogenPlots.svg"), combined_plot, width = 6, height = 3, dpi = 600)