HNOBac Manuscript
  1. Methods
  2. HEK-Blue
  • Introduction
  • Methods
    • RNASeq
    • Cell Counts & TEER
    • CFUs 48h
    • CFUs Epithelial Lines
    • LDH 48h
    • LDH Epithelial Lines
    • Cytokines
    • HEK-Blue
  • R Session Info

Table of contents

  • Data Input and Selection
    • File Paths
  • Standard Curve
    • Plot
  • Saving files
  • Results Plots
    • Function for each location
    • Apply to each location

HEK-Blue

library(tidyverse)
library(ggtext)

Data Input and Selection

File Paths

# Folder paths
input_path <- "data/input_data/HekBlue/"
metadata_path <- "data/metadata/HekBlue"

# Create subfolders for output files
dataframes_folder <- "data/dataframes"
if (!file.exists("data/dataframes")) {
  dir.create("data/dataframes", recursive = TRUE)
}
outputs_folder <- "data/outputs/HekBlue"
if (!file.exists("data/outputs/HekBlue")) {
  dir.create("data/outputs/HekBlue", recursive = TRUE)
}

# Load data and metadata
Hek_Original <- read_csv(file.path(input_path, "HekData_IL1R.csv"))
BacVia_order <- read_csv(file.path(metadata_path, "Order_BacteriaViability.csv"))
# Split sample and standards dataframes
data_samples <- Hek_Original %>%
  filter(type == "sample")

data_standards <- Hek_Original %>%
  filter(type == "standard")

# Rename columns
colnames(data_standards)[colnames(data_standards) == "bacteria"] <- "STD"
colnames(data_standards)[colnames(data_standards) == "treatment"] <- "pg.ml"

# Remove extra columns from the standards dataframe
data_standards <-
  select(data_standards,
         -well,
         -sample,
         -line,
         -time,
         -location,
         -viability,
         -`Analyte Sample`) %>%
  group_by(STD) 

# Select samples at 48h
data_samples <- data_samples %>%
  filter(time == "48")

# Variable formatting 
data_standards$STD <- as.factor(data_standards$STD)
data_standards$Abs <- as.numeric(data_standards$Abs)
data_standards$pg.ml <- as.numeric(data_standards$pg.ml)
data_samples$line <- as.factor(data_samples$line)
data_samples$line <- fct_recode(data_samples$line, "HNO204" = "B", "HNO919" = "C") 
data_samples$location <- fct_recode(data_samples$location, "Apical" = "Ap", "Basal" = "Baso")

Standard Curve

# Select standard curve values and log transform so it fits a linear model
data_standards <- data_standards %>% 
  filter(STD %in% c("STD1", "STD2", "STD3", "STD4", "STD5", "STD6")) %>%
  mutate(log_pg.ml = log10(pg.ml))
# Fit a linear model (lm) with "Abs" as the dependent variable and "log_pg.ml" as the independent variable
lmStandard <- lm(Abs ~ log_pg.ml, data_standards) 
  
# Extract coefficients from the linear model
coefficients <- lmStandard$coefficients

# Apply linear model coefficients to calculate pg.ml in samples. LOD_pg.ml has a max value of 500
data_samples <- data_samples %>%
  mutate(log_pg.ml = ((Abs - coefficients[1]) / coefficients[2]),
         pg.ml = 10^(log_pg.ml),
         LOD_pg.ml = ifelse(pg.ml > 500, 500, pg.ml))
# Add column with sample type information (NB control vs. bacterial sample). Calculate averages
data_samples_avg <- data_samples %>% 
  mutate(via = ifelse(bacteria == "NB", "control", viability)) %>%
  group_by(date, location, line, time, bacteria, via) %>% 
  mutate(concentration_avg = mean(pg.ml, na.rm = TRUE),
         Bac.Via = interaction(bacteria, via)) %>%
  distinct(date, location, line, time, bacteria, via, .keep_all = TRUE) %>% 
  ungroup()
# Factoring variables with the right levels
data_samples_avg <- data_samples_avg %>% mutate_if(is.character, factor)
data_samples_avg <- merge(data_samples_avg, BacVia_order, by = "Bac.Via")
data_samples_avg$Bac.Via <- factor(data_samples_avg$Bac.Via, levels = BacVia_order$Bac.Via)
data_samples_avg$Bac.Via_label <- factor(data_samples_avg$Bac.Via_label, levels = BacVia_order$Bac.Via_label)

Plot

# Extract coefficients and R-squared value
lm_eq <- paste("y =", round(coef(lmStandard)[2], 2), "x +", round(coef(lmStandard)[1], 2))
r_squared <- round(summary(lmStandard)$r.squared, 3)

# Create the ggplot object
pg.ml_plot <- ggplot(data_standards, aes(x = log_pg.ml, y = Abs)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE, color = "black", linetype = "dotted", linewidth = 0.8) +
  geom_point(data = data_samples_avg, aes(x = log_pg.ml, y = Abs), color = "pink") +
  labs(x = "log pg/mL", y = "Abs") +
  ggtitle("Standard Curve with sample data") +
  geom_text(x = min(data_samples_avg$log_pg.ml), y = max(data_samples_avg$Abs), 
            label = paste("Equation: ", lm_eq, "\nR-squared: ", r_squared), 
            hjust = 0, vjust = 1, color = "black") +
  theme_bw() +
  theme(panel.grid = element_blank(),
        text = element_text(size = 14))

pg.ml_plot

Saving files

# Save data frames as CSV files in the dataframes folder
write_csv(data_samples_avg, file.path(dataframes_folder, "HekBlue_values.csv"))

# Save data frames as R objects in the dataframes folder
saveRDS(data_samples_avg, file.path(dataframes_folder, "HekBlue_values.rds"))

# Use this to read the final objects
data_samples_avg <- readRDS("data/dataframes/HekBlue_values.rds")

Results Plots

Function for each location

# Function to analyze each location
analysis_function <- function(data, each_location) {
  
  # Subset the data to the selected location
  data_subset <- data %>%
    filter(location == each_location) 
  
  #Plot
  plot <- ggplot() +
    geom_hline(yintercept = 500, linetype = "dashed", color = "gray", size = 1) +
    
    geom_jitter(data = data_subset, 
               aes(x = Bac.Via_label, y = LOD_pg.ml, fill = Bac.Via_label, color = Bac.Via_label, shape = line),
               width = 0.4, size = 3, alpha = 0.75, show.legend = TRUE) +   
    
    scale_fill_manual(values = c("#5b5b5b","#800080","#1E90FF","#927ed1","#800080","#1E90FF","#927ed1")) +
    scale_color_manual(values = c("#5b5b5b","#800080","#1E90FF","#927ed1","#800080","#1E90FF","#927ed1")) +
    scale_shape_manual(values = c(22,24)) +
    
    # Add sections for live vs. IRR
    geom_segment(aes(x = 1.45 , y = Inf, xend = 1.45, yend = -90), colour = "grey70", linetype = 'dotted') +
    geom_segment(aes(x = 4.5 , y = Inf, xend = 4.45, yend = -90), colour = "grey70", linetype = 'dotted') +
    annotate("text", 
             x = seq(1, 7, length.out = 7), y = -80,
             size = 5,
             label = c("", "", "Live", "", "", "Dead", "")) +
    
    coord_cartesian(ylim = c(0, NA), clip = "off") +
    
    # General style
    labs(title = paste0(each_location),
         x = "",
         y = "pg/mL active IL-1α & IL-1β",
         fill = "Bacteria", color = "Bacteria", shape = "HNO Line") +
    theme_bw() +
    theme(panel.grid = element_blank(), 
          legend.text = element_markdown(),
          text = element_text(size = 20), 
          axis.text.y = element_text(color = "black"), 
          axis.text.x = element_markdown(angle = 0))
  
  # Save files
  ggsave(plot, filename = paste0(outputs_folder, "/plotHekBlue_", each_location, ".png"), width = 11, height = 10)
  saveRDS(plot, file.path(outputs_folder, paste0("plotHekBlue_", each_location, ".rds")))
  
  return(plot = plot)
}

Apply to each location

Apical

analysis_function(data_samples_avg, each_location = "Apical")

Basal

analysis_function(data_samples_avg, each_location = "Basal")

Cytokines
R Session Info
Source Code
---
execute:
  message: FALSE
  warning: FALSE
---

# HEK-Blue {.unnumbered}

```{r}
library(tidyverse)
library(ggtext)
```

## Data Input and Selection

### File Paths

```{r}
# Folder paths
input_path <- "data/input_data/HekBlue/"
metadata_path <- "data/metadata/HekBlue"

# Create subfolders for output files
dataframes_folder <- "data/dataframes"
if (!file.exists("data/dataframes")) {
  dir.create("data/dataframes", recursive = TRUE)
}
outputs_folder <- "data/outputs/HekBlue"
if (!file.exists("data/outputs/HekBlue")) {
  dir.create("data/outputs/HekBlue", recursive = TRUE)
}

# Load data and metadata
Hek_Original <- read_csv(file.path(input_path, "HekData_IL1R.csv"))
BacVia_order <- read_csv(file.path(metadata_path, "Order_BacteriaViability.csv"))
```

```{r}
# Split sample and standards dataframes
data_samples <- Hek_Original %>%
  filter(type == "sample")

data_standards <- Hek_Original %>%
  filter(type == "standard")

# Rename columns
colnames(data_standards)[colnames(data_standards) == "bacteria"] <- "STD"
colnames(data_standards)[colnames(data_standards) == "treatment"] <- "pg.ml"

# Remove extra columns from the standards dataframe
data_standards <-
  select(data_standards,
         -well,
         -sample,
         -line,
         -time,
         -location,
         -viability,
         -`Analyte Sample`) %>%
  group_by(STD) 

# Select samples at 48h
data_samples <- data_samples %>%
  filter(time == "48")

# Variable formatting 
data_standards$STD <- as.factor(data_standards$STD)
data_standards$Abs <- as.numeric(data_standards$Abs)
data_standards$pg.ml <- as.numeric(data_standards$pg.ml)
data_samples$line <- as.factor(data_samples$line)
data_samples$line <- fct_recode(data_samples$line, "HNO204" = "B", "HNO919" = "C") 
data_samples$location <- fct_recode(data_samples$location, "Apical" = "Ap", "Basal" = "Baso")
```

## Standard Curve

```{r}
# Select standard curve values and log transform so it fits a linear model
data_standards <- data_standards %>% 
  filter(STD %in% c("STD1", "STD2", "STD3", "STD4", "STD5", "STD6")) %>%
  mutate(log_pg.ml = log10(pg.ml))
```

```{r}
# Fit a linear model (lm) with "Abs" as the dependent variable and "log_pg.ml" as the independent variable
lmStandard <- lm(Abs ~ log_pg.ml, data_standards) 
  
# Extract coefficients from the linear model
coefficients <- lmStandard$coefficients

# Apply linear model coefficients to calculate pg.ml in samples. LOD_pg.ml has a max value of 500
data_samples <- data_samples %>%
  mutate(log_pg.ml = ((Abs - coefficients[1]) / coefficients[2]),
         pg.ml = 10^(log_pg.ml),
         LOD_pg.ml = ifelse(pg.ml > 500, 500, pg.ml))
```

```{r}
# Add column with sample type information (NB control vs. bacterial sample). Calculate averages
data_samples_avg <- data_samples %>% 
  mutate(via = ifelse(bacteria == "NB", "control", viability)) %>%
  group_by(date, location, line, time, bacteria, via) %>% 
  mutate(concentration_avg = mean(pg.ml, na.rm = TRUE),
         Bac.Via = interaction(bacteria, via)) %>%
  distinct(date, location, line, time, bacteria, via, .keep_all = TRUE) %>% 
  ungroup()
```

```{r}
# Factoring variables with the right levels
data_samples_avg <- data_samples_avg %>% mutate_if(is.character, factor)
data_samples_avg <- merge(data_samples_avg, BacVia_order, by = "Bac.Via")
data_samples_avg$Bac.Via <- factor(data_samples_avg$Bac.Via, levels = BacVia_order$Bac.Via)
data_samples_avg$Bac.Via_label <- factor(data_samples_avg$Bac.Via_label, levels = BacVia_order$Bac.Via_label)
```

### Plot

```{r}
#| message: FALSE

# Extract coefficients and R-squared value
lm_eq <- paste("y =", round(coef(lmStandard)[2], 2), "x +", round(coef(lmStandard)[1], 2))
r_squared <- round(summary(lmStandard)$r.squared, 3)

# Create the ggplot object
pg.ml_plot <- ggplot(data_standards, aes(x = log_pg.ml, y = Abs)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE, color = "black", linetype = "dotted", linewidth = 0.8) +
  geom_point(data = data_samples_avg, aes(x = log_pg.ml, y = Abs), color = "pink") +
  labs(x = "log pg/mL", y = "Abs") +
  ggtitle("Standard Curve with sample data") +
  geom_text(x = min(data_samples_avg$log_pg.ml), y = max(data_samples_avg$Abs), 
            label = paste("Equation: ", lm_eq, "\nR-squared: ", r_squared), 
            hjust = 0, vjust = 1, color = "black") +
  theme_bw() +
  theme(panel.grid = element_blank(),
        text = element_text(size = 14))

pg.ml_plot
```

## Saving files

```{r}
# Save data frames as CSV files in the dataframes folder
write_csv(data_samples_avg, file.path(dataframes_folder, "HekBlue_values.csv"))

# Save data frames as R objects in the dataframes folder
saveRDS(data_samples_avg, file.path(dataframes_folder, "HekBlue_values.rds"))

# Use this to read the final objects
data_samples_avg <- readRDS("data/dataframes/HekBlue_values.rds")
```

## Results Plots

### Function for each location

```{r}
# Function to analyze each location
analysis_function <- function(data, each_location) {
  
  # Subset the data to the selected location
  data_subset <- data %>%
    filter(location == each_location) 
  
  #Plot
  plot <- ggplot() +
    geom_hline(yintercept = 500, linetype = "dashed", color = "gray", size = 1) +
    
    geom_jitter(data = data_subset, 
               aes(x = Bac.Via_label, y = LOD_pg.ml, fill = Bac.Via_label, color = Bac.Via_label, shape = line),
               width = 0.4, size = 3, alpha = 0.75, show.legend = TRUE) +   
    
    scale_fill_manual(values = c("#5b5b5b","#800080","#1E90FF","#927ed1","#800080","#1E90FF","#927ed1")) +
    scale_color_manual(values = c("#5b5b5b","#800080","#1E90FF","#927ed1","#800080","#1E90FF","#927ed1")) +
    scale_shape_manual(values = c(22,24)) +
    
    # Add sections for live vs. IRR
    geom_segment(aes(x = 1.45 , y = Inf, xend = 1.45, yend = -90), colour = "grey70", linetype = 'dotted') +
    geom_segment(aes(x = 4.5 , y = Inf, xend = 4.45, yend = -90), colour = "grey70", linetype = 'dotted') +
    annotate("text", 
             x = seq(1, 7, length.out = 7), y = -80,
             size = 5,
             label = c("", "", "Live", "", "", "Dead", "")) +
    
    coord_cartesian(ylim = c(0, NA), clip = "off") +
    
    # General style
    labs(title = paste0(each_location),
         x = "",
         y = "pg/mL active IL-1α & IL-1β",
         fill = "Bacteria", color = "Bacteria", shape = "HNO Line") +
    theme_bw() +
    theme(panel.grid = element_blank(), 
          legend.text = element_markdown(),
          text = element_text(size = 20), 
          axis.text.y = element_text(color = "black"), 
          axis.text.x = element_markdown(angle = 0))
  
  # Save files
  ggsave(plot, filename = paste0(outputs_folder, "/plotHekBlue_", each_location, ".png"), width = 11, height = 10)
  saveRDS(plot, file.path(outputs_folder, paste0("plotHekBlue_", each_location, ".rds")))
  
  return(plot = plot)
}
```


### Apply to each location

#### Apical

```{r}
analysis_function(data_samples_avg, each_location = "Apical")
```

#### Basal

```{r}
analysis_function(data_samples_avg, each_location = "Basal")
```