Bulk RNAseq dataset | 1,210 unique samples | DLPFC region | ROSMAP cohort

Cytokine families annotation can be found here

# data_bulk <- load ("C:/Users/beker/OneDrive/Documentos/Mestrado/GitHub/Cytokines/bulk_RNAseq/bulk_DLPFC_2022.Rdata")
data_bulk <- load ("C:/Users/beker/OneDrive/Documentos/Mestrado/GitHub/Cytokines/bulk_RNAseq/bulk_DLPFC_2022.Rdata")
num_genes_bulk <- nrow(exprData_DLPFC)
num_samples_bulk <- ncol(exprData_DLPFC)
message(paste0("Number of genes in bulk RNA-seq dataset: ", num_genes_bulk))

## Number of genes in bulk RNA-seq dataset: 17309

message(paste0("Number of samples in bulk RNA-seq dataset: ", num_samples_bulk))

## Number of samples in bulk RNA-seq dataset: 1210

# upload list of cytokines
file_path <- "C:/Users/beker/OneDrive/Documentos/Mestrado/GitHub/Cytokines/list_cytokines/T049/new_cytokines_families_T049.xlsx"
cytokines <- read_excel(file_path)

Regressions

Input: Average expression by family.

Numbers and colors : -log10(nominal pvalue)

cutpoints:

< 0.001 = ***

0.01 = **

0.05 = *

0.1 = .

1 = ” ”

# Heatmap 
to_show = colnames(res_test$matrix_pvalue)
transpose = T
show_only_significant = F; signif_cutoff = c("***","**","*")

matrix_rsquared = res_test$matrix_rsquared
matrix_pvalue = res_test$matrix_pvalue # final matrix with the pvalues 

# Reorder heatmap row names to paper 
row_newOrder <- c("gpath","tangles_sqrt","amyloid_sqrt","ad_dementia_status","ci_status","cogng_demog_slope","cogng_path_slope")
matrix_rsquared = matrix_rsquared[row_newOrder, ]
matrix_pvalue = matrix_pvalue[row_newOrder, ]

matrix_rsquared_to_plot = matrix_rsquared[,to_show]
matrix_pvalue_to_plot = matrix_pvalue[,to_show]

# Adjust P-values by each phenotype separately.
adj_matrix_pvalue_to_plot = matrix_pvalue_to_plot
for(i in 1:ncol(matrix_pvalue_to_plot)){
  adj_matrix_pvalue_to_plot[,i] = p.adjust(matrix_pvalue_to_plot[,i], method = "bonferroni")
}
adj_matrix_pvalue_to_plot.signif <- symnum(x = as.matrix(adj_matrix_pvalue_to_plot), corr = FALSE, na = FALSE,
                                           cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), 
                                           symbols = c("***", "**", "*", ".", " "))

log_matrix_pvalue_to_plot = -log10(matrix_pvalue_to_plot)
dimnames(log_matrix_pvalue_to_plot) = dimnames(log_matrix_pvalue_to_plot)

if(show_only_significant){
  if(is.numeric(signif_cutoff)){
    to_keep = colSums(adj_matrix_pvalue_to_plot <= signif_cutoff) > 0
  }else{
    to_keep = rep(F,ncol(adj_matrix_pvalue_to_plot.signif))
    for(cut_i in signif_cutoff){
      to_keep = to_keep | colSums(adj_matrix_pvalue_to_plot.signif == cut_i) > 0 # change for the significance you want 
    }
  }
  log_matrix_pvalue_to_plot = log_matrix_pvalue_to_plot[,to_keep]
  adj_matrix_pvalue_to_plot.signif = adj_matrix_pvalue_to_plot.signif[,to_keep]
}

matrix_pvalue_to_plot_labels = formatC(log_matrix_pvalue_to_plot, format = "f", digits = 2)
log_matrix_pvalue_to_plot_t = t(log_matrix_pvalue_to_plot)

if(transpose){
  log_matrix_pvalue_to_plot_t = t(log_matrix_pvalue_to_plot_t)
  matrix_pvalue_to_plot_labels = t(matrix_pvalue_to_plot_labels)
  adj_matrix_pvalue_to_plot.signif = t(adj_matrix_pvalue_to_plot.signif)
}

# Colored by -log10(pvalue)
# Numbers inside cell = -log10(pvalue): nominal
#
#
# New column names
new_column_names <- c(
  "gpath" = "Global AD burden",
  "tangles_sqrt" = "PHFtau tangle density",
  "amyloid_sqrt" = "Amyloid-β load",
  "ad_dementia_status" = "AD diagnosis",
  "ci_status" = "Mild cognitive impairment",
  "cogng_demog_slope" = "Cognitive decline",
  "cogng_path_slope" = "Resilience"
)

rownames(log_matrix_pvalue_to_plot_t) <- new_column_names[rownames(log_matrix_pvalue_to_plot_t)]

# Group colors
group_colors <- c("Pathology" = "#800074", "Cognition" = "#298c8c")

# Y lab annotation
group_ylab <- factor(
  c("Pathology","Pathology","Pathology","Cognition","Cognition","Cognition","Cognition"),
  levels = c("Pathology", "Cognition")
)

# Lateral annotation
row_anno <- rowAnnotation(
  Group = group_ylab,
  col = list(Group = group_colors),
  show_annotation_name = TRUE,
  annotation_name_side = "top",
  annotation_name_gp = gpar(fontsize = 9),
  width = unit(1, "mm")
)


heatmap_plot <- Heatmap(log_matrix_pvalue_to_plot_t, name = "-log10(P-value)",
                        cell_fun = function(j, i, x, y, width, height, fill) {
                          if(as.character(t(adj_matrix_pvalue_to_plot.signif)[i,j]) == " "){
                            grid.text( t(matrix_pvalue_to_plot_labels)[i,j], x, y, 
                                       gp = gpar(fontsize = 8))
                          }else{
                            grid.text(paste0( t(matrix_pvalue_to_plot_labels)[i,j],"\n", t(adj_matrix_pvalue_to_plot.signif)[i,j] ), x, y, 
                                      gp = gpar(fontsize = 8))
                          }
                        },
                        col = colorRampPalette(rev(brewer.pal(n = 7, name ="RdYlBu")))(100),
                        row_names_side = "right", show_row_names = T,
                        cluster_rows = F, cluster_columns = F,
                        column_names_gp = gpar(fontsize = 9),
                        row_names_gp = gpar(fontsize = 9),
                        border = T,
                        show_row_dend = F, show_column_dend = F, rect_gp = gpar(col = "white", lwd = 1),
                        column_names_rot = 40,
                        column_title = "Bulk RNAseq data")

# Heatmap + lateral annotation
final_heatmap <- row_anno + heatmap_plot
draw(final_heatmap)

# Saving as PDF
pdf(file = "C:/Users/beker/OneDrive/Documentos/Mestrado/GitHub/Cytokines/bulk_RNAseq/T051/reg_cytokines_bulk_family_T051.pdf", width = 5, height = 3)
draw(final_heatmap)
dev.off()

## png 
##   2

# Saving as PNG
png(file = "C:/Users/beker/OneDrive/Documentos/Mestrado/GitHub/Cytokines/bulk_RNAseq/T051/reg_cytokines_bulk_family_T051.png", width = 1425, height = 850, res = 300)
draw(final_heatmap)
dev.off()

## png 
##   2

Top results

Nominal pvalue

createDT(res_test$matrix_pvalue)

Session info

sessionInfo()

## R version 4.3.2 (2023-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 11 x64 (build 26100)
## 
## Matrix products: default
## 
## 
## locale:
## [1] LC_COLLATE=Portuguese_Brazil.utf8  LC_CTYPE=Portuguese_Brazil.utf8   
## [3] LC_MONETARY=Portuguese_Brazil.utf8 LC_NUMERIC=C                      
## [5] LC_TIME=Portuguese_Brazil.utf8    
## 
## time zone: America/Sao_Paulo
## tzcode source: internal
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] performance_0.12.2    lmerTest_3.1-3        lme4_1.1-35.5        
##  [4] Matrix_1.6-5          readxl_1.4.3          RColorBrewer_1.1-3   
##  [7] circlize_0.4.16       ComplexHeatmap_2.18.0 ggsignif_0.6.4       
## [10] ggeasy_0.1.4          ggpubr_0.6.0          lubridate_1.9.3      
## [13] forcats_1.0.0         stringr_1.5.1         purrr_1.0.2          
## [16] tidyr_1.3.1           tibble_3.2.1          tidyverse_2.0.0      
## [19] ggplot2_3.5.0         readr_2.1.5           rstatix_0.7.2        
## [22] dplyr_1.1.4          
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_1.2.1    fastmap_1.2.0       digest_0.6.36      
##  [4] timechange_0.3.0    lifecycle_1.0.4     cluster_2.1.4      
##  [7] magrittr_2.0.3      compiler_4.3.2      rlang_1.1.3        
## [10] sass_0.4.9          tools_4.3.2         utf8_1.2.4         
## [13] yaml_2.3.10         knitr_1.48          htmlwidgets_1.6.4  
## [16] plyr_1.8.9          abind_1.4-5         numDeriv_2016.8-1.1
## [19] withr_3.0.1         BiocGenerics_0.48.1 stats4_4.3.2       
## [22] fansi_1.0.6         colorspace_2.1-0    scales_1.3.0       
## [25] iterators_1.0.14    MASS_7.3-60         insight_0.20.3     
## [28] cli_3.6.2           rmarkdown_2.27      crayon_1.5.3       
## [31] generics_0.1.3      rstudioapi_0.16.0   reshape2_1.4.4     
## [34] tzdb_0.4.0          rjson_0.2.21        minqa_1.2.7        
## [37] cachem_1.1.0        splines_4.3.2       parallel_4.3.2     
## [40] cellranger_1.1.0    matrixStats_1.3.0   vctrs_0.6.5        
## [43] boot_1.3-28.1       jsonlite_1.8.8      carData_3.0-5      
## [46] car_3.1-2           IRanges_2.36.0      hms_1.1.3          
## [49] GetoptLong_1.0.5    S4Vectors_0.40.2    clue_0.3-65        
## [52] crosstalk_1.2.1     magick_2.8.4        foreach_1.5.2      
## [55] jquerylib_0.1.4     glue_1.7.0          nloptr_2.1.1       
## [58] codetools_0.2-19    DT_0.33             stringi_1.8.3      
## [61] gtable_0.3.5        shape_1.4.6.1       munsell_0.5.1      
## [64] pillar_1.9.0        htmltools_0.5.8.1   R6_2.5.1           
## [67] doParallel_1.0.17   evaluate_0.24.0     lattice_0.21-9     
## [70] highr_0.11          png_0.1-8           backports_1.4.1    
## [73] broom_1.0.6         bslib_0.8.0         Rcpp_1.0.12        
## [76] nlme_3.1-163        xfun_0.46           pkgconfig_2.0.3    
## [79] GlobalOptions_0.1.2

Linear and/or logistic regression - Bulk RNAseq filtered by cytokines families

Juliana Beker Godoy, MSc
Graduate Program in Bioinformatics
Professional and Technological Education Sector
Federal University of Parana (UFPR)
Curitiba, Brazil

2025-06-01

Regressions

Top results

Nominal pvalue

Session info

Linear and/or logistic regression - Bulk RNAseq filtered by cytokines families

Juliana Beker Godoy, MSc Graduate Program in Bioinformatics Professional and Technological Education Sector Federal University of Parana (UFPR) Curitiba, Brazil

2025-06-01

Regressions

Top results

Nominal pvalue

Session info

Juliana Beker Godoy, MSc
Graduate Program in Bioinformatics
Professional and Technological Education Sector
Federal University of Parana (UFPR)
Curitiba, Brazil