single-nuclei RNAseq dataset | 424 unique samples from DLPFC
# New df containing only interested PRS values
prs = prs_combined[,c("projid",prs_id)]
colnames(prs) = c("projid","prs")
# Getting only the volunteers (projid) present on single nuclei data (n=424)
donors_in_common = intersect(colnames(celltype_exp$ext),prs$projid)
# Spliting data into deciles
PRS_quantiles = prs %>%
filter(projid %in% donors_in_common) %>%
mutate(prs_quantile = as.factor(cut(prs,quantile(prs, probs = 0:10 / 10, na.rm = T),include.lowest = T, labels = F)))
# table(PRS_quantiles$prs_quantile, useNA = "ifany")
# Show PRS distribution in each of the ten quantile
(p1 = ggplot(PRS_quantiles, aes(x = prs, fill = prs_quantile)) +
geom_histogram(alpha = 1, bins = 100) +
theme_classic() +
scale_fill_viridis_d() +
labs(x = "PRS", y = "Density") +
theme(
legend.position = "none"
))
min_quantile = min(as.numeric(PRS_quantiles$prs_quantile), na.rm = T)
max_quantile = max(as.numeric(PRS_quantiles$prs_quantile), na.rm = T)
# Make a long table of expression of cytokines in each cell type and PRS group
celltype_exp_prs_long = data.frame()
for(cell_i in maj_celltypes){
celltype_exp_prs_long = bind_rows(celltype_exp_prs_long,
celltype_exp[[cell_i]] %>% as.data.frame() %>% rownames_to_column("ensembl") %>%
filter(ensembl %in% cytokines_list$ensembl) %>%
pivot_longer(cols = -ensembl, names_to = "projid", values_to = "expression") %>%
mutate(celltype = cell_i) %>%
left_join(PRS_quantiles, by = "projid") %>% na.omit())
}
celltype_exp_prs_long = celltype_exp_prs_long %>% left_join(cytokines_list, by = "ensembl")
library(dplyr)
library(furrr)
library(data.table)
library(parallel)
library(future)
library(future.apply)
nperm = 1000
set.seed(123)
seeds <- sample(1:10000, nperm)
# Permute cell-type labels within each individual while keeping PRS and expression values fixed
permute_prs <- function(celltype_exp_prs_long, seeds, n_cores = NULL) {
# Set up parallel processing
if (is.null(n_cores)) {
n_cores <- min(detectCores() - 1, length(seeds))
}
# Convert to data.table for faster operations
dt <- as.data.table(celltype_exp_prs_long)
dt[, ...1 := NULL] # Remove unnecessary column
# Define function for each permutation using data.table
process_permutation_dt <- function(seed_val, data) {
set.seed(seed_val)
# Create copy and permute
dt_perm <- copy(data)
dt_perm[, prs_perm := sample(prs), by = ensembl]
# Calculate correlations
perm_cor_dt <- dt_perm[, .(
correlation = cor(prs_perm, expression, method = "spearman", use = "pairwise.complete.obs")
), by = .(ensembl, celltype)]
perm_cor_dt[, permutation := seed_val]
return(perm_cor_dt)
}
# Use parallel processing with data.table
plan(multisession, workers = n_cores)
result_list <- future_map(seeds, ~process_permutation_dt(.x, dt), .options = furrr_options(seed = TRUE))
plan(sequential)
# Combine results
rbindlist(result_list)
}
prs_correlation_prs_permuted = permute_prs(celltype_exp_prs_long, seeds)
avg_prs_correlation_prs_permuted = prs_correlation_prs_permuted[, .(avg_correlation = mean(correlation)), by = .(celltype, permutation)]
prs_correlation_celltype_observed <- celltype_exp_prs_long %>%
group_by(celltype,ensembl) %>%
summarise(observed_correlation = cor(prs, expression, method = "spearman", use = "pairwise.complete.obs"), .groups = 'drop')
avg_prs_correlation_celltype_observed = prs_correlation_celltype_observed %>% group_by(celltype) %>%
reframe(avg_observed_correlation = mean(observed_correlation))
avg_pval_df <- as.data.frame(avg_prs_correlation_prs_permuted) %>%
left_join(avg_prs_correlation_celltype_observed) %>%
group_by(celltype) %>%
summarise(pval = sum(avg_correlation >= avg_observed_correlation) / n()) %>%
mutate(cell_type = case_when(
celltype == "ext" ~ "Excitatory Neurons",
celltype == "inh" ~ "Inhibitory Neurons",
celltype == "oli" ~ "Oligodendrocytes",
celltype == "mic" ~ "Microglia",
celltype == "ast" ~ "Astrocytes",
celltype == "end" ~ "Endothelial Cells",
celltype == "opc" ~ "OPCs",
TRUE ~ celltype # keep original value if none match
))
maj_cell_avg_correlations = avg_prs_correlation_celltype_observed %>% mutate(permutation = "observed") %>%
rename(avg_correlation = avg_observed_correlation) %>%
bind_rows(avg_prs_correlation_prs_permuted %>% mutate(permutation = "permuted")) %>%
left_join(avg_pval_df, by = "celltype")
# Plot mean permuted distributions vs observed correlation of PRS vs cytokine expression (facet by celltype)
(p4 = maj_cell_avg_correlations %>% filter(permutation != "observed") %>%
ggplot(aes(avg_correlation)) +
geom_histogram() +
geom_vline(data = maj_cell_avg_correlations %>% filter(permutation == "observed"),
aes(xintercept = avg_correlation), color = "red") +
geom_text(data = avg_pval_df,
aes(x = -Inf, y = Inf,
label = paste0("P = ",format(pval, digits=1, nsmall=3))), hjust = -0.1, vjust = 1.5, size = 3.5) +
facet_wrap(~cell_type, scales = "free_y", nrow = 1) +
scale_x_continuous(n.breaks = 3) +
theme_classic()+
# ggeasy::easy_rotate_x_labels(angle = 25, side = c("right")) +
labs(x = "Average correlation", y = "Distribution")
)
assoc_results_all = bind_rows(assoc_results_ADPRS_all,assoc_results_APOE_all)
assoc_results_all$predictor <- factor(
assoc_results_all$predictor,
levels = assoc_results_all %>%
arrange(outcome, cell_type, predictor) %>%
pull(predictor) %>%
unique()
)
assoc_results_all$outcome[assoc_results_all$outcome == "prs"] <- "AD PRS"
assoc_results_all$outcome[assoc_results_all$outcome == "apoe_any4"] <- "APOE ε4"
# Define the color order for cell types
cell_type_order <- c("Excitatory Neurons", "Inhibitory Neurons", "Microglia",
"Astrocytes", "Oligodendrocytes", "OPCs", "Endothelial Cells")
# Reorder the data by cell_type according to the color order
assoc_results_all$cell_type <- factor(assoc_results_all$cell_type, levels = cell_type_order)
(p_assoc <- ggplot(assoc_results_all, aes(fill=cell_type, x=cell_type, y=log10_p)) +
geom_bar(stat = "identity", position = position_dodge2(preserve = "single")) +
geom_hline(yintercept = max(assoc_results_all$log10_p[assoc_results_all$fdr>0.05]), linetype="dashed") +
theme_classic() +
easy_remove_x_axis(what = "text") +
easy_labs(x = "Cytokines",
subtitle = "Cytokine expression associated with AD PRS and APOE ε4",
fill = "Cell Type",
y = bquote(-log[10](italic(P)-value))) +
geom_text(data = subset(assoc_results_all, fdr<=0.05),
aes(label = predictor),
position=position_dodge(width=0.9), vjust=-0.25, size = 3.5, fontface = "italic") +
facet_wrap(~outcome, scales = "free", ncol = 2) +
scale_y_continuous(expand = expansion(mult = c(0, 0.2))) +
scale_fill_manual(values = c(
"Excitatory Neurons" = "#00008B",
"Inhibitory Neurons" = "#CC3311",
"Microglia" = "#FF8C00",
"Astrocytes" = "#9932CC",
"Oligodendrocytes" = "#698b22",
"OPCs" = "#008b45",
"Endothelial Cells" = "#8a6407"
)))
cairo_pdf("/pastel/projects/cytokines_Juliana/JNI_revision_I/PRS_APOE_barplot.pdf", width = 10, height = 3)
p_assoc
dev.off()
## png
## 2
p_SPP1_PRS = top_cytokines_data_plus_with_stats %>% filter(cell_symbol == "mic:SPP1") %>%
ggplot(aes(x = prs, y = expression, color = cell_type)) +
geom_point(alpha = 0.3) +
geom_smooth(method = "lm", se = T, color = "black") +
geom_text(aes(x = Inf, y = -Inf, label = label),
hjust = 1.1, vjust = -0.8, size = 4.5, color = "black", check_overlap = T) +
facet_wrap(symbol ~ cell_type, scales = "free", ncol = 5, nrow = 2) +
theme_classic() +
scale_x_continuous(n.breaks = 3) +
# ggeasy::easy_rotate_x_labels(angle = 25, side = c("right")) +
theme(legend.position = "none") +
labs(x = "PRS", y = "Expression") +
theme(
legend.position = "none"
) +
scale_color_manual(values = c(
"Excitatory Neurons" = "#00008B",
"Inhibitory Neurons" = "#CC3311",
"Microglia" = "#FF8C00",
"Astrocytes" = "#9932CC",
"Oligodendrocytes" = "#698b22",
"OPCs" = "#008b45",
"Endothelial Cells" = "#8a6407"
))
p_SPP1_APOE = na.omit(top_cytokines_APOE_data_plus_with_stats) %>%filter(cell_symbol == "mic:SPP1") %>%
ggplot(aes(x = apoe_any4, y = expression, color = cell_type)) +
geom_boxplot(notch=F, fill="white", color="black", outlier.shape=NA, lwd=0.4, alpha=1, width=.7, fatten = 3) +
ggbeeswarm::geom_quasirandom(alpha=.3, width=.2, show.legend=F, size=2) +
geom_pwc(method = "t_test", label.y.npc = "top", label.size = 5, hide.ns = F) +
stat_summary(fun = median, fun.min = median, fun.max = median,
geom = "crossbar", width = .7, color = "black") +
geom_text(aes(x = Inf, y = Inf, label = label),
hjust = 1.1, vjust = 1.2, size = 4.5, color = "black", check_overlap = T) +
facet_wrap( ~ symbol + cell_type, scales = "free", nrow = 2, ncol = 5) +
# facet_wrap(~ celltype+symbol, scales = "free_y") +
scale_y_continuous(expand = expansion(mult = c(0, 0.2))) +
scale_color_manual(values = c(
"Excitatory Neurons" = "#00008B",
"Inhibitory Neurons" = "#CC3311",
"Microglia" = "#FF8C00",
"Astrocytes" = "#9932CC",
"Oligodendrocytes" = "#698b22",
"OPCs" = "#008b45",
"Endothelial Cells" = "#8a6407"
)) +
xlab("APOE e4") +
ylab("Expression") +
theme_classic() +
theme(legend.position = "none")
(SPP1_plot = ggarrange(p_SPP1_PRS, p_SPP1_APOE, ncol = 2, nrow = 1))
cairo_pdf("/pastel/projects/cytokines_Juliana/JNI_revision_I//PRS_SPP1_plot.pdf", width = 5, height = 3)
SPP1_plot
dev.off()
## png
## 2
p_IL15_PRS = top_cytokines_data_plus_with_stats %>% filter(cell_symbol == "mic:IL15") %>%
ggplot(aes(x = prs, y = expression, color = cell_type)) +
geom_point(alpha = 0.3) +
geom_smooth(method = "lm", se = T, color = "black") +
geom_text(aes(x = Inf, y = -Inf, label = label),
hjust = 1.1, vjust = -0.8, size = 4.5, color = "black", check_overlap = T) +
facet_wrap(symbol ~ cell_type, scales = "free", ncol = 5, nrow = 2) +
theme_classic() +
scale_x_continuous(n.breaks = 3) +
# ggeasy::easy_rotate_x_labels(angle = 25, side = c("right")) +
theme(legend.position = "none") +
labs(x = "PRS", y = "Expression") +
theme(
legend.position = "none"
) +
scale_color_manual(values = c(
"Excitatory Neurons" = "#00008B",
"Inhibitory Neurons" = "#CC3311",
"Microglia" = "#FF8C00",
"Astrocytes" = "#9932CC",
"Oligodendrocytes" = "#698b22",
"OPCs" = "#008b45",
"Endothelial Cells" = "#8a6407"
))
p_IL15_APOE = na.omit(top_cytokines_APOE_data_plus_with_stats) %>%filter(cell_symbol == "mic:IL15") %>%
ggplot(aes(x = apoe_any4, y = expression, color = cell_type)) +
geom_boxplot(notch=F, fill="white", color="black", outlier.shape=NA, lwd=0.4, alpha=1, width=.7, fatten = 3) +
ggbeeswarm::geom_quasirandom(alpha=.3, width=.2, show.legend=F, size=2) +
geom_pwc(method = "t_test", label.y.npc = "top", label.size = 5, hide.ns = F) +
stat_summary(fun = median, fun.min = median, fun.max = median,
geom = "crossbar", width = .7, color = "black") +
geom_text(aes(x = Inf, y = Inf, label = label),
hjust = 1.1, vjust = 1.2, size = 4.5, color = "black", check_overlap = T) +
facet_wrap( ~ symbol + cell_type, scales = "free", nrow = 2, ncol = 5) +
# facet_wrap(~ celltype+symbol, scales = "free_y") +
scale_y_continuous(expand = expansion(mult = c(0, 0.2))) +
scale_color_manual(values = c(
"Excitatory Neurons" = "#00008B",
"Inhibitory Neurons" = "#CC3311",
"Microglia" = "#FF8C00",
"Astrocytes" = "#9932CC",
"Oligodendrocytes" = "#698b22",
"OPCs" = "#008b45",
"Endothelial Cells" = "#8a6407"
)) +
xlab("APOE e4") +
ylab("Expression") +
theme_classic() +
theme(legend.position = "none")
(IL15_plot = ggarrange(p_IL15_PRS, p_IL15_APOE, ncol = 2, nrow = 1))
cairo_pdf("/pastel/projects/cytokines_Juliana/JNI_revision_I//PRS_IL15_plot.pdf", width = 5, height = 3)
IL15_plot
dev.off()
## png
## 2
p_IL17D_PRS = top_cytokines_data_plus_with_stats %>% filter(cell_symbol == "ast:IL17D") %>%
ggplot(aes(x = prs, y = expression, color = cell_type)) +
geom_point(alpha = 0.3) +
geom_smooth(method = "lm", se = T, color = "black") +
geom_text(aes(x = Inf, y = -Inf, label = label),
hjust = 1.1, vjust = -0.8, size = 4.5, color = "black", check_overlap = T) +
facet_wrap(symbol ~ cell_type, scales = "free", ncol = 5, nrow = 2) +
theme_classic() +
scale_x_continuous(n.breaks = 3) +
# ggeasy::easy_rotate_x_labels(angle = 25, side = c("right")) +
theme(legend.position = "none") +
labs(x = "PRS", y = "Expression") +
theme(
legend.position = "none"
) +
scale_color_manual(values = c(
"Excitatory Neurons" = "#00008B",
"Inhibitory Neurons" = "#CC3311",
"Microglia" = "#FF8C00",
"Astrocytes" = "#9932CC",
"Oligodendrocytes" = "#698b22",
"OPCs" = "#008b45",
"Endothelial Cells" = "#8a6407"
))
p_IL17D_APOE = na.omit(top_cytokines_APOE_data_plus_with_stats) %>%filter(cell_symbol == "ast:IL17D") %>%
ggplot(aes(x = apoe_any4, y = expression, color = cell_type)) +
geom_boxplot(notch=F, fill="white", color="black", outlier.shape=NA, lwd=0.4, alpha=1, width=.7, fatten = 3) +
ggbeeswarm::geom_quasirandom(alpha=.3, width=.2, show.legend=F, size=2) +
geom_pwc(method = "t_test", label.y.npc = "top", label.size = 5, hide.ns = F) +
stat_summary(fun = median, fun.min = median, fun.max = median,
geom = "crossbar", width = .7, color = "black") +
geom_text(aes(x = Inf, y = Inf, label = label),
hjust = 1.1, vjust = 1.2, size = 4.5, color = "black", check_overlap = T) +
facet_wrap( ~ symbol + cell_type, scales = "free", nrow = 2, ncol = 5) +
# facet_wrap(~ celltype+symbol, scales = "free_y") +
scale_y_continuous(expand = expansion(mult = c(0, 0.2))) +
scale_color_manual(values = c(
"Excitatory Neurons" = "#00008B",
"Inhibitory Neurons" = "#CC3311",
"Microglia" = "#FF8C00",
"Astrocytes" = "#9932CC",
"Oligodendrocytes" = "#698b22",
"OPCs" = "#008b45",
"Endothelial Cells" = "#8a6407"
)) +
xlab("APOE e4") +
ylab("Expression") +
theme_classic() +
theme(legend.position = "none")
(IL17D_plot = ggarrange(p_IL17D_PRS, p_IL17D_APOE, ncol = 2, nrow = 1))
cairo_pdf("/pastel/projects/cytokines_Juliana/JNI_revision_I//PRS_IL17D_plot.pdf", width = 5, height = 3)
IL17D_plot
dev.off()
## png
## 2
cairo_pdf("/pastel/projects/cytokines_Juliana/JNI_revision_I//PRS_p1.pdf", width = 4, height = 3)
p1
dev.off()
## png
## 2
cairo_pdf("/pastel/projects/cytokines_Juliana/JNI_revision_I//PRS_p2.pdf", width = 12, height = 6)
p2
## `geom_smooth()` using formula = 'y ~ x'
## png
## 2
cairo_pdf("/pastel/projects/cytokines_Juliana/JNI_revision_I//PRS_p3.pdf", width = 12, height = 6)
p3
## Warning: Computation failed in `stat_pwc()`.
## Caused by error in `mutate()`:
## ℹ In argument: `data = map(.data$data, .f, ...)`.
## Caused by error in `map()`:
## ℹ In index: 1.
## Caused by error in `t.test.default()`:
## ! not enough 'y' observations
## Warning: Computation failed in `stat_pwc()`.
## Caused by error in `mutate()`:
## ℹ In argument: `data = map(.data$data, .f, ...)`.
## Caused by error in `map()`:
## ℹ In index: 1.
## Caused by error in `t.test.default()`:
## ! not enough 'y' observations
## Warning: Computation failed in `stat_pwc()`.
## Caused by error in `mutate()`:
## ℹ In argument: `data = map(.data$data, .f, ...)`.
## Caused by error in `map()`:
## ℹ In index: 1.
## Caused by error in `t.test.default()`:
## ! not enough 'y' observations
## Warning: Computation failed in `stat_pwc()`.
## Caused by error in `mutate()`:
## ℹ In argument: `data = map(.data$data, .f, ...)`.
## Caused by error in `map()`:
## ℹ In index: 1.
## Caused by error in `t.test.default()`:
## ! not enough 'y' observations
## Warning: Computation failed in `stat_pwc()`.
## Caused by error in `mutate()`:
## ℹ In argument: `data = map(.data$data, .f, ...)`.
## Caused by error in `map()`:
## ℹ In index: 1.
## Caused by error in `t.test.default()`:
## ! not enough 'y' observations
## Warning: Computation failed in `stat_pwc()`.
## Caused by error in `mutate()`:
## ℹ In argument: `data = map(.data$data, .f, ...)`.
## Caused by error in `map()`:
## ℹ In index: 1.
## Caused by error in `t.test.default()`:
## ! not enough 'y' observations
## Warning: Computation failed in `stat_pwc()`.
## Caused by error in `mutate()`:
## ℹ In argument: `data = map(.data$data, .f, ...)`.
## Caused by error in `map()`:
## ℹ In index: 1.
## Caused by error in `t.test.default()`:
## ! not enough 'y' observations
## Warning: Computation failed in `stat_pwc()`.
## Caused by error in `mutate()`:
## ℹ In argument: `data = map(.data$data, .f, ...)`.
## Caused by error in `map()`:
## ℹ In index: 1.
## Caused by error in `t.test.default()`:
## ! not enough 'y' observations
## Warning: Computation failed in `stat_pwc()`.
## Caused by error in `mutate()`:
## ℹ In argument: `data = map(.data$data, .f, ...)`.
## Caused by error in `map()`:
## ℹ In index: 1.
## Caused by error in `t.test.default()`:
## ! not enough 'y' observations
## png
## 2
cairo_pdf("/pastel/projects/cytokines_Juliana/JNI_revision_I//PRS_p4.pdf", width = 15, height = 2.5)
p4
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## png
## 2
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Stream 8
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblasp-r0.3.15.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] future.apply_1.11.3 furrr_0.3.1 future_1.34.0
## [4] openxlsx_4.2.7.1 doParallel_1.0.17 iterators_1.0.14
## [7] foreach_1.5.2 fgsea_1.20.0 readxl_1.4.5
## [10] RColorBrewer_1.1-3 circlize_0.4.16 gtools_3.9.5
## [13] patchwork_1.3.0 ggsci_3.2.0 scales_1.3.0
## [16] ggbeeswarm_0.7.2 ggpubr_0.6.0 ggeasy_0.1.5
## [19] data.table_1.17.6 lubridate_1.9.3 forcats_1.0.0
## [22] stringr_1.5.1 dplyr_1.1.4 purrr_1.0.4
## [25] readr_2.1.5 tidyr_1.3.1 tibble_3.3.0
## [28] ggplot2_3.5.1 tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] nlme_3.1-153 bit64_4.6.0-1 tools_4.1.2
## [4] backports_1.5.0 bslib_0.8.0 R6_2.6.1
## [7] vipor_0.4.7 mgcv_1.9-1 colorspace_2.1-1
## [10] withr_3.0.2 tidyselect_1.2.1 gridExtra_2.3
## [13] bit_4.6.0 compiler_4.1.2 cli_3.6.5
## [16] labeling_0.4.3 sass_0.4.9 digest_0.6.37
## [19] rmarkdown_2.29 pkgconfig_2.0.3 htmltools_0.5.8.1
## [22] parallelly_1.39.0 fastmap_1.2.0 rlang_1.1.6
## [25] GlobalOptions_0.1.2 rstudioapi_0.17.1 shape_1.4.6.1
## [28] jquerylib_0.1.4 farver_2.1.2 generics_0.1.4
## [31] jsonlite_2.0.0 BiocParallel_1.28.3 vroom_1.6.5
## [34] zip_2.3.1 car_3.1-3 magrittr_2.0.3
## [37] Formula_1.2-5 Matrix_1.6-5 Rcpp_1.0.14
## [40] munsell_0.5.1 abind_1.4-8 lifecycle_1.0.4
## [43] stringi_1.8.7 yaml_2.3.10 carData_3.0-5
## [46] grid_4.1.2 listenv_0.9.1 crayon_1.5.3
## [49] lattice_0.20-45 cowplot_1.1.3 splines_4.1.2
## [52] hms_1.1.3 knitr_1.49 pillar_1.10.2
## [55] ggsignif_0.6.4 codetools_0.2-18 fastmatch_1.1-4
## [58] glue_1.8.0 evaluate_1.0.1 vctrs_0.6.5
## [61] tzdb_0.4.0 cellranger_1.1.0 gtable_0.3.6
## [64] cachem_1.1.0 xfun_0.49 broom_1.0.7
## [67] rstatix_0.7.2 viridisLite_0.4.2 beeswarm_0.4.0
## [70] timechange_0.3.0 globals_0.16.3