visualize data human LN without CD patients

Author

Mechthild Lütge

Load packages

## load packages 
suppressPackageStartupMessages({
  library(dplyr)
  library(reshape2)
  library(ggplot2)
  library(cowplot)
  library(purrr)
  library(Seurat)
  library(tidyverse)
  library(ggpubr)
  library(runSeurat3)
  library(here)
  library(ggsci)
  library(pheatmap)
  library(scater)
})

load seurat object

basedir <- here()

## load seurat object from previous analysis
seurat <- readRDS(file=paste0(basedir, "/data/AllPatCM_LNFSCMerged_seurat.rds"))

remove CD pat

dim(seurat)
[1]  36098 117137
seurat <- subset(seurat, cond %in% c("UCD", "CDsusp"), invert=T)
seurat <- subset(seurat, patient %in% c("ucd003"), invert=T)
dim(seurat)
[1] 36098 62604
## reprocess
res <- c(0.8,0.6,0.4,0.25)

seurat <- NormalizeData(object = seurat)
seurat <- FindVariableFeatures(object = seurat)
seurat <- ScaleData(object = seurat, verbose = FALSE)
seurat <- RunPCA(object = seurat, npcs = 30, verbose = FALSE)
seurat <- RunTSNE(object = seurat, reduction = "pca", dims = 1:20)
seurat <- RunUMAP(object = seurat, reduction = "pca", dims = 1:20)
seurat <- FindNeighbors(object = seurat, reduction = "pca", 
                        dims = 1:20)
for (i in 1:length(res)) {
    seurat <- FindClusters(object = seurat, resolution = res[i], 
                           random.seed = 1234)
    }
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 62604
Number of edges: 2073920

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9061
Number of communities: 24
Elapsed time: 17 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 62604
Number of edges: 2073920

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9177
Number of communities: 20
Elapsed time: 18 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 62604
Number of edges: 2073920

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9322
Number of communities: 17
Elapsed time: 17 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 62604
Number of edges: 2073920

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9468
Number of communities: 12
Elapsed time: 16 seconds
## remove contaminating epithelial cells from UPENN
seurat <- subset(seurat, RNA_snn_res.0.25 %in% c("10"), invert=T)
dim(seurat)
[1] 36098 61459
seurat <- RunPCA(object = seurat, npcs = 30, verbose = FALSE)
seurat <- RunTSNE(object = seurat, reduction = "pca", dims = 1:20)
seurat <- RunUMAP(object = seurat, reduction = "pca", dims = 1:20)

colPal <- c(pal_nejm()(7),pal_futurama()(12))[1:length(levels(seurat))]
names(colPal) <- levels(seurat)

assign groups

seurat$grp <- "FRC"
seurat$grp[which(seurat$seurat_clusters %in% c("3", "6"))] <- "BEC"
seurat$grp[which(seurat$seurat_clusters %in% c("7", "9"))] <- "LEC"
seurat$grp[which(seurat$seurat_clusters %in% c("1"))] <- "SMC"

visualize data

clustering

## visualize input data
DimPlot(seurat, reduction = "umap", cols=colPal)+
  theme_bw() +
  theme(axis.text = element_blank(), axis.ticks = element_blank(), 
        panel.grid.minor = element_blank()) +
  xlab("UMAP1") +
  ylab("UMAP2")

patient

colPat <- c(pal_jco()(10),pal_futurama()(12))[1:length(unique(seurat$patient))]
names(colPat) <- unique(seurat$patient)

## visualize input data
DimPlot(seurat, reduction = "umap", cols=colPat, group.by = "patient")+
  theme_bw() +
  theme(axis.text = element_blank(), axis.ticks = element_blank(), 
        panel.grid.minor = element_blank()) +
  xlab("UMAP1") +
  ylab("UMAP2")

cond

colTon <- pal_igv()(length(unique(seurat$cond)))
names(colTon) <- unique(seurat$cond)

## visualize input data
DimPlot(seurat, reduction = "umap", cols=colTon, group.by = "cond")+
  theme_bw() +
  theme(axis.text = element_blank(), axis.ticks = element_blank(), 
        panel.grid.minor = element_blank()) +
  xlab("UMAP1") +
  ylab("UMAP2")

grp

colGrp <- pal_uchicago()(length(unique(seurat$grp)))
names(colGrp) <- unique(seurat$grp)

## visualize input data
DimPlot(seurat, reduction = "umap", cols=colGrp, group.by = "grp")+
  theme_bw() +
  theme(axis.text = element_blank(), axis.ticks = element_blank(), 
        panel.grid.minor = element_blank()) +
  xlab("UMAP1") +
  ylab("UMAP2")

origin

colOri <- pal_npg()(length(unique(seurat$origin)))
names(colOri) <- unique(seurat$origin)

## visualize input data
DimPlot(seurat, reduction = "umap", cols=colOri, group.by = "origin")+
  theme_bw() +
  theme(axis.text = element_blank(), axis.ticks = element_blank(), 
        panel.grid.minor = element_blank()) +
  xlab("UMAP1") +
  ylab("UMAP2")

cnt tables

col Grp

grps_per_Pat <- data.frame(table(seurat$patient, seurat$grp))
colnames(grps_per_Pat) <- c("patient", "group", "cnt")

ggplot(grps_per_Pat, aes(x = patient, y = cnt)) +
  geom_bar(aes(color = group, fill = group, alpha = 0.8),
           stat = "identity") +
  scale_color_manual(values = colGrp) +
  scale_fill_manual(values = colGrp) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

col Pat

ggplot(grps_per_Pat, aes(x = patient, y = cnt)) +
  geom_bar(aes(color = patient, fill = patient),
           stat = "identity") +
  scale_color_manual(values = colPat) +
  scale_fill_manual(values = colPat) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

col Cond

grps_per_Pat <- data.frame(table(seurat$patient, seurat$cond))
colnames(grps_per_Pat) <- c("patient", "cond", "cnt")

ggplot(grps_per_Pat, aes(x = patient, y = cnt)) +
  geom_bar(aes(color = cond, fill = cond),
           stat = "identity") +
  scale_color_manual(values = colTon) +
  scale_fill_manual(values = colTon) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

col dataset

grps_per_Pat <- data.frame(table(seurat$patient, seurat$dataset))
colnames(grps_per_Pat) <- c("patient", "dataset", "cnt")
colDat <- c(pal_npg()(10),pal_futurama()(12))[1:length(unique(seurat$dataset))]
names(colDat) <- unique(seurat$dataset)

ggplot(grps_per_Pat, aes(x = patient, y = cnt)) +
  geom_bar(aes(color = dataset, fill = dataset),
           stat = "identity") +
  scale_color_manual(values = colDat) +
  scale_fill_manual(values = colDat) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

vis cluster characterization

avgHeatmap <- function(seurat, selGenes, colVecIdent, colVecCond=NULL,
                       ordVec=NULL, gapVecR=NULL, gapVecC=NULL,cc=FALSE,
                       cr=FALSE, condCol=FALSE){
  
  selGenes <- selGenes$gene
  
  ## assay data
  clusterAssigned <- as.data.frame(Idents(seurat)) %>%
  dplyr::mutate(cell=rownames(.))
  colnames(clusterAssigned)[1] <- "ident"
  seuratDat <- GetAssayData(seurat)
  
  ## genes of interest
  genes <- data.frame(gene=rownames(seurat)) %>% 
    mutate(geneID=gsub("^.*\\.", "", gene)) %>% filter(geneID %in% selGenes)

  ## matrix with averaged cnts per ident
  logNormExpres <- as.data.frame(t(as.matrix(
    seuratDat[which(rownames(seuratDat) %in% genes$gene),])))
  logNormExpres <- logNormExpres %>% dplyr::mutate(cell=rownames(.)) %>%
    dplyr::left_join(.,clusterAssigned, by=c("cell")) %>%
    dplyr::select(-cell) %>% dplyr::group_by(ident) %>%
    dplyr::summarise_all(mean)
  logNormExpresMa <- logNormExpres %>% dplyr::select(-ident) %>% as.matrix()
  rownames(logNormExpresMa) <- logNormExpres$ident
  logNormExpresMa <- t(logNormExpresMa)
  rownames(logNormExpresMa) <- gsub("^.*?\\.","",rownames(logNormExpresMa))
  
  ## remove genes if they are all the same in all groups
  ind <- apply(logNormExpresMa, 1, sd) == 0
  logNormExpresMa <- logNormExpresMa[!ind,]
  genes <- genes[!ind,]

  ## color columns according to cluster
  annotation_col <- as.data.frame(gsub("(^.*?_)","",
                                       colnames(logNormExpresMa)))%>%
    dplyr::mutate(celltype=gsub("(_.*$)","",colnames(logNormExpresMa)))
  colnames(annotation_col)[1] <- "col1"
  annotation_col <- annotation_col %>%
    dplyr::mutate(cond = gsub("(^[0-9]_?)","",col1)) %>%
    dplyr::select(cond, celltype)
  rownames(annotation_col) <- colnames(logNormExpresMa) 

  ann_colors = list(
      cond = colVecCond,
      celltype=colVecIdent)
  if(is.null(ann_colors$cond)){
    annotation_col$cond <- NULL
  }
  
  ## adjust order
  logNormExpresMa <- logNormExpresMa[selGenes,]
  if(is.null(ordVec)){
    ordVec <- levels(seurat)
  }
  logNormExpresMa <- logNormExpresMa[,ordVec]

  ## scaled row-wise
  pheatmap(logNormExpresMa, scale="row" ,treeheight_row = 0, cluster_rows = cr, 
         cluster_cols = cc,
         color = colorRampPalette(c("#2166AC", "#F7F7F7", "#B2182B"))(50),
         annotation_col = annotation_col, cellwidth=15, cellheight=10,
         annotation_colors = ann_colors, gaps_row = gapVecR, gaps_col = gapVecC)
}
selMarker <- list(LEC = c("PROX1", "PECAM1"),                  
                  BEC = c("CD34", "ENG", "CDH5"),
                  SMC = c("ACTA2"),
                  FRC = c("PDPN", "CCL19", "CCL21", "CLU", "CXCL13",
                          "CD34", "PI16"))

avg heatmap

selGenes <- data.frame(gene=unlist(selMarker)) %>%
  rownames_to_column(var="grp") %>% mutate(Grp=gsub(".{1}$", "", grp))
grpCnt <- selGenes %>% group_by(Grp) %>% summarise(cnt=n())
gapR <- data.frame(Grp=unique(selGenes$Grp)) %>% 
  left_join(.,grpCnt, by="Grp") %>% mutate(cumSum=cumsum(cnt)) 
ordVec <- levels(seurat)

pOut <- avgHeatmap(seurat = seurat, selGenes = selGenes,
                  colVecIdent = colPal, 
                  ordVec=ordVec,
                  gapVecR=gapR$cumSum, gapVecC=NULL,cc=T,
                  cr=F, condCol=F)

save seurat object

saveRDS(seurat, file=paste0(basedir,
                            "/data/AllPatWithoutCM_LNFSCMerged_seurat.rds"))

session info

sessionInfo()
R version 4.3.0 (2023-04-21)
Platform: x86_64-apple-darwin20 (64-bit)
Running under: macOS Ventura 13.4.1

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRblas.0.dylib 
LAPACK: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

time zone: Europe/Berlin
tzcode source: internal

attached base packages:
[1] stats4    stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] scater_1.28.0               scuttle_1.10.3              SingleCellExperiment_1.22.0
 [4] SummarizedExperiment_1.30.2 Biobase_2.60.0              GenomicRanges_1.52.1       
 [7] GenomeInfoDb_1.36.4         IRanges_2.36.0              S4Vectors_0.40.1           
[10] BiocGenerics_0.48.0         MatrixGenerics_1.12.3       matrixStats_1.2.0          
[13] pheatmap_1.0.12             ggsci_3.0.1                 here_1.0.1                 
[16] runSeurat3_0.1.0            ggpubr_0.6.0                lubridate_1.9.3            
[19] forcats_1.0.0               stringr_1.5.1               readr_2.1.5                
[22] tidyr_1.3.1                 tibble_3.2.1                tidyverse_2.0.0            
[25] Seurat_5.0.2                SeuratObject_5.0.1          sp_2.1-3                   
[28] purrr_1.0.2                 cowplot_1.1.3               ggplot2_3.5.0              
[31] reshape2_1.4.4              dplyr_1.1.4                

loaded via a namespace (and not attached):
  [1] RcppAnnoy_0.0.22          splines_4.3.0             later_1.3.2              
  [4] bitops_1.0-7              polyclip_1.10-6           fastDummies_1.7.3        
  [7] lifecycle_1.0.4           rstatix_0.7.2             rprojroot_2.0.4          
 [10] globals_0.16.2            lattice_0.22-5            MASS_7.3-60.0.1          
 [13] backports_1.4.1           magrittr_2.0.3            plotly_4.10.4            
 [16] rmarkdown_2.26            yaml_2.3.8                httpuv_1.6.14            
 [19] sctransform_0.4.1         spam_2.10-0               spatstat.sparse_3.0-3    
 [22] reticulate_1.35.0         pbapply_1.7-2             RColorBrewer_1.1-3       
 [25] abind_1.4-5               zlibbioc_1.46.0           Rtsne_0.17               
 [28] RCurl_1.98-1.14           GenomeInfoDbData_1.2.10   ggrepel_0.9.5            
 [31] irlba_2.3.5.1             listenv_0.9.1             spatstat.utils_3.0-4     
 [34] goftest_1.2-3             RSpectra_0.16-1           spatstat.random_3.2-3    
 [37] fitdistrplus_1.1-11       parallelly_1.37.1         DelayedMatrixStats_1.22.6
 [40] leiden_0.4.3.1            codetools_0.2-19          DelayedArray_0.26.7      
 [43] tidyselect_1.2.0          farver_2.1.1              viridis_0.6.5            
 [46] ScaledMatrix_1.8.1        spatstat.explore_3.2-6    jsonlite_1.8.8           
 [49] BiocNeighbors_1.18.0      ellipsis_0.3.2            progressr_0.14.0         
 [52] ggridges_0.5.6            survival_3.5-8            tools_4.3.0              
 [55] ica_1.0-3                 Rcpp_1.0.12               glue_1.7.0               
 [58] gridExtra_2.3             xfun_0.42                 withr_3.0.0              
 [61] fastmap_1.1.1             fansi_1.0.6               rsvd_1.0.5               
 [64] digest_0.6.34             timechange_0.3.0          R6_2.5.1                 
 [67] mime_0.12                 colorspace_2.1-0          scattermore_1.2          
 [70] tensor_1.5                spatstat.data_3.0-4       utf8_1.2.4               
 [73] generics_0.1.3            data.table_1.15.2         httr_1.4.7               
 [76] htmlwidgets_1.6.4         S4Arrays_1.0.6            uwot_0.1.16              
 [79] pkgconfig_2.0.3           gtable_0.3.4              lmtest_0.9-40            
 [82] XVector_0.40.0            htmltools_0.5.7           carData_3.0-5            
 [85] dotCall64_1.1-1           scales_1.3.0              png_0.1-8                
 [88] knitr_1.45                rstudioapi_0.15.0         tzdb_0.4.0               
 [91] nlme_3.1-164              zoo_1.8-12                KernSmooth_2.23-22       
 [94] vipor_0.4.7               parallel_4.3.0            miniUI_0.1.1.1           
 [97] pillar_1.9.0              grid_4.3.0                vctrs_0.6.5              
[100] RANN_2.6.1                promises_1.2.1            BiocSingular_1.16.0      
[103] car_3.1-2                 beachmat_2.16.0           xtable_1.8-4             
[106] cluster_2.1.6             beeswarm_0.4.0            evaluate_0.23            
[109] cli_3.6.2                 compiler_4.3.0            rlang_1.1.3              
[112] crayon_1.5.2              future.apply_1.11.1       ggsignif_0.6.4           
[115] labeling_0.4.3            ggbeeswarm_0.7.2          plyr_1.8.9               
[118] stringi_1.8.3             BiocParallel_1.34.2       viridisLite_0.4.2        
[121] deldir_2.0-4              munsell_0.5.0             lazyeval_0.2.2           
[124] spatstat.geom_3.2-9       Matrix_1.6-5              RcppHNSW_0.6.0           
[127] hms_1.1.3                 patchwork_1.2.0           sparseMatrixStats_1.12.2 
[130] future_1.33.1             shiny_1.8.0               ROCR_1.0-11              
[133] igraph_2.0.2              broom_1.0.5              
date()
[1] "Wed Mar 13 22:26:46 2024"