visualize data human LN without CD patients
Load packages
## load packages
suppressPackageStartupMessages({
library(dplyr)
library(reshape2)
library(ggplot2)
library(cowplot)
library(purrr)
library(Seurat)
library(tidyverse)
library(ggpubr)
library(runSeurat3)
library(here)
library(ggsci)
library(pheatmap)
library(scater)
})
load seurat object
<- here()
basedir
## load seurat object from previous analysis
<- readRDS(file=paste0(basedir, "/data/AllPatCM_LNFSCMerged_seurat.rds")) seurat
remove CD pat
dim(seurat)
[1] 36098 117137
<- subset(seurat, cond %in% c("UCD", "CDsusp"), invert=T)
seurat <- subset(seurat, patient %in% c("ucd003"), invert=T)
seurat dim(seurat)
[1] 36098 62604
## reprocess
<- c(0.8,0.6,0.4,0.25)
res
<- NormalizeData(object = seurat)
seurat <- FindVariableFeatures(object = seurat)
seurat <- ScaleData(object = seurat, verbose = FALSE)
seurat <- RunPCA(object = seurat, npcs = 30, verbose = FALSE)
seurat <- RunTSNE(object = seurat, reduction = "pca", dims = 1:20)
seurat <- RunUMAP(object = seurat, reduction = "pca", dims = 1:20)
seurat <- FindNeighbors(object = seurat, reduction = "pca",
seurat dims = 1:20)
for (i in 1:length(res)) {
<- FindClusters(object = seurat, resolution = res[i],
seurat random.seed = 1234)
}
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 62604
Number of edges: 2073920
Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9061
Number of communities: 24
Elapsed time: 17 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 62604
Number of edges: 2073920
Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9177
Number of communities: 20
Elapsed time: 18 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 62604
Number of edges: 2073920
Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9322
Number of communities: 17
Elapsed time: 17 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 62604
Number of edges: 2073920
Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9468
Number of communities: 12
Elapsed time: 16 seconds
## remove contaminating epithelial cells from UPENN
<- subset(seurat, RNA_snn_res.0.25 %in% c("10"), invert=T)
seurat dim(seurat)
[1] 36098 61459
<- RunPCA(object = seurat, npcs = 30, verbose = FALSE)
seurat <- RunTSNE(object = seurat, reduction = "pca", dims = 1:20)
seurat <- RunUMAP(object = seurat, reduction = "pca", dims = 1:20)
seurat
<- c(pal_nejm()(7),pal_futurama()(12))[1:length(levels(seurat))]
colPal names(colPal) <- levels(seurat)
assign groups
$grp <- "FRC"
seurat$grp[which(seurat$seurat_clusters %in% c("3", "6"))] <- "BEC"
seurat$grp[which(seurat$seurat_clusters %in% c("7", "9"))] <- "LEC"
seurat$grp[which(seurat$seurat_clusters %in% c("1"))] <- "SMC" seurat
visualize data
clustering
## visualize input data
DimPlot(seurat, reduction = "umap", cols=colPal)+
theme_bw() +
theme(axis.text = element_blank(), axis.ticks = element_blank(),
panel.grid.minor = element_blank()) +
xlab("UMAP1") +
ylab("UMAP2")
patient
<- c(pal_jco()(10),pal_futurama()(12))[1:length(unique(seurat$patient))]
colPat names(colPat) <- unique(seurat$patient)
## visualize input data
DimPlot(seurat, reduction = "umap", cols=colPat, group.by = "patient")+
theme_bw() +
theme(axis.text = element_blank(), axis.ticks = element_blank(),
panel.grid.minor = element_blank()) +
xlab("UMAP1") +
ylab("UMAP2")
cond
<- pal_igv()(length(unique(seurat$cond)))
colTon names(colTon) <- unique(seurat$cond)
## visualize input data
DimPlot(seurat, reduction = "umap", cols=colTon, group.by = "cond")+
theme_bw() +
theme(axis.text = element_blank(), axis.ticks = element_blank(),
panel.grid.minor = element_blank()) +
xlab("UMAP1") +
ylab("UMAP2")
grp
<- pal_uchicago()(length(unique(seurat$grp)))
colGrp names(colGrp) <- unique(seurat$grp)
## visualize input data
DimPlot(seurat, reduction = "umap", cols=colGrp, group.by = "grp")+
theme_bw() +
theme(axis.text = element_blank(), axis.ticks = element_blank(),
panel.grid.minor = element_blank()) +
xlab("UMAP1") +
ylab("UMAP2")
origin
<- pal_npg()(length(unique(seurat$origin)))
colOri names(colOri) <- unique(seurat$origin)
## visualize input data
DimPlot(seurat, reduction = "umap", cols=colOri, group.by = "origin")+
theme_bw() +
theme(axis.text = element_blank(), axis.ticks = element_blank(),
panel.grid.minor = element_blank()) +
xlab("UMAP1") +
ylab("UMAP2")
cnt tables
col Grp
<- data.frame(table(seurat$patient, seurat$grp))
grps_per_Pat colnames(grps_per_Pat) <- c("patient", "group", "cnt")
ggplot(grps_per_Pat, aes(x = patient, y = cnt)) +
geom_bar(aes(color = group, fill = group, alpha = 0.8),
stat = "identity") +
scale_color_manual(values = colGrp) +
scale_fill_manual(values = colGrp) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
col Pat
ggplot(grps_per_Pat, aes(x = patient, y = cnt)) +
geom_bar(aes(color = patient, fill = patient),
stat = "identity") +
scale_color_manual(values = colPat) +
scale_fill_manual(values = colPat) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
col Cond
<- data.frame(table(seurat$patient, seurat$cond))
grps_per_Pat colnames(grps_per_Pat) <- c("patient", "cond", "cnt")
ggplot(grps_per_Pat, aes(x = patient, y = cnt)) +
geom_bar(aes(color = cond, fill = cond),
stat = "identity") +
scale_color_manual(values = colTon) +
scale_fill_manual(values = colTon) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
col dataset
<- data.frame(table(seurat$patient, seurat$dataset))
grps_per_Pat colnames(grps_per_Pat) <- c("patient", "dataset", "cnt")
<- c(pal_npg()(10),pal_futurama()(12))[1:length(unique(seurat$dataset))]
colDat names(colDat) <- unique(seurat$dataset)
ggplot(grps_per_Pat, aes(x = patient, y = cnt)) +
geom_bar(aes(color = dataset, fill = dataset),
stat = "identity") +
scale_color_manual(values = colDat) +
scale_fill_manual(values = colDat) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
vis cluster characterization
<- function(seurat, selGenes, colVecIdent, colVecCond=NULL,
avgHeatmap ordVec=NULL, gapVecR=NULL, gapVecC=NULL,cc=FALSE,
cr=FALSE, condCol=FALSE){
<- selGenes$gene
selGenes
## assay data
<- as.data.frame(Idents(seurat)) %>%
clusterAssigned ::mutate(cell=rownames(.))
dplyrcolnames(clusterAssigned)[1] <- "ident"
<- GetAssayData(seurat)
seuratDat
## genes of interest
<- data.frame(gene=rownames(seurat)) %>%
genes mutate(geneID=gsub("^.*\\.", "", gene)) %>% filter(geneID %in% selGenes)
## matrix with averaged cnts per ident
<- as.data.frame(t(as.matrix(
logNormExpres which(rownames(seuratDat) %in% genes$gene),])))
seuratDat[<- logNormExpres %>% dplyr::mutate(cell=rownames(.)) %>%
logNormExpres ::left_join(.,clusterAssigned, by=c("cell")) %>%
dplyr::select(-cell) %>% dplyr::group_by(ident) %>%
dplyr::summarise_all(mean)
dplyr<- logNormExpres %>% dplyr::select(-ident) %>% as.matrix()
logNormExpresMa rownames(logNormExpresMa) <- logNormExpres$ident
<- t(logNormExpresMa)
logNormExpresMa rownames(logNormExpresMa) <- gsub("^.*?\\.","",rownames(logNormExpresMa))
## remove genes if they are all the same in all groups
<- apply(logNormExpresMa, 1, sd) == 0
ind <- logNormExpresMa[!ind,]
logNormExpresMa <- genes[!ind,]
genes
## color columns according to cluster
<- as.data.frame(gsub("(^.*?_)","",
annotation_col colnames(logNormExpresMa)))%>%
::mutate(celltype=gsub("(_.*$)","",colnames(logNormExpresMa)))
dplyrcolnames(annotation_col)[1] <- "col1"
<- annotation_col %>%
annotation_col ::mutate(cond = gsub("(^[0-9]_?)","",col1)) %>%
dplyr::select(cond, celltype)
dplyrrownames(annotation_col) <- colnames(logNormExpresMa)
= list(
ann_colors cond = colVecCond,
celltype=colVecIdent)
if(is.null(ann_colors$cond)){
$cond <- NULL
annotation_col
}
## adjust order
<- logNormExpresMa[selGenes,]
logNormExpresMa if(is.null(ordVec)){
<- levels(seurat)
ordVec
}<- logNormExpresMa[,ordVec]
logNormExpresMa
## scaled row-wise
pheatmap(logNormExpresMa, scale="row" ,treeheight_row = 0, cluster_rows = cr,
cluster_cols = cc,
color = colorRampPalette(c("#2166AC", "#F7F7F7", "#B2182B"))(50),
annotation_col = annotation_col, cellwidth=15, cellheight=10,
annotation_colors = ann_colors, gaps_row = gapVecR, gaps_col = gapVecC)
}
<- list(LEC = c("PROX1", "PECAM1"),
selMarker BEC = c("CD34", "ENG", "CDH5"),
SMC = c("ACTA2"),
FRC = c("PDPN", "CCL19", "CCL21", "CLU", "CXCL13",
"CD34", "PI16"))
avg heatmap
<- data.frame(gene=unlist(selMarker)) %>%
selGenes rownames_to_column(var="grp") %>% mutate(Grp=gsub(".{1}$", "", grp))
<- selGenes %>% group_by(Grp) %>% summarise(cnt=n())
grpCnt <- data.frame(Grp=unique(selGenes$Grp)) %>%
gapR left_join(.,grpCnt, by="Grp") %>% mutate(cumSum=cumsum(cnt))
<- levels(seurat)
ordVec
<- avgHeatmap(seurat = seurat, selGenes = selGenes,
pOut colVecIdent = colPal,
ordVec=ordVec,
gapVecR=gapR$cumSum, gapVecC=NULL,cc=T,
cr=F, condCol=F)
save seurat object
saveRDS(seurat, file=paste0(basedir,
"/data/AllPatWithoutCM_LNFSCMerged_seurat.rds"))
session info
sessionInfo()
R version 4.3.0 (2023-04-21)
Platform: x86_64-apple-darwin20 (64-bit)
Running under: macOS Ventura 13.4.1
Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
time zone: Europe/Berlin
tzcode source: internal
attached base packages:
[1] stats4 stats graphics grDevices utils datasets methods base
other attached packages:
[1] scater_1.28.0 scuttle_1.10.3 SingleCellExperiment_1.22.0
[4] SummarizedExperiment_1.30.2 Biobase_2.60.0 GenomicRanges_1.52.1
[7] GenomeInfoDb_1.36.4 IRanges_2.36.0 S4Vectors_0.40.1
[10] BiocGenerics_0.48.0 MatrixGenerics_1.12.3 matrixStats_1.2.0
[13] pheatmap_1.0.12 ggsci_3.0.1 here_1.0.1
[16] runSeurat3_0.1.0 ggpubr_0.6.0 lubridate_1.9.3
[19] forcats_1.0.0 stringr_1.5.1 readr_2.1.5
[22] tidyr_1.3.1 tibble_3.2.1 tidyverse_2.0.0
[25] Seurat_5.0.2 SeuratObject_5.0.1 sp_2.1-3
[28] purrr_1.0.2 cowplot_1.1.3 ggplot2_3.5.0
[31] reshape2_1.4.4 dplyr_1.1.4
loaded via a namespace (and not attached):
[1] RcppAnnoy_0.0.22 splines_4.3.0 later_1.3.2
[4] bitops_1.0-7 polyclip_1.10-6 fastDummies_1.7.3
[7] lifecycle_1.0.4 rstatix_0.7.2 rprojroot_2.0.4
[10] globals_0.16.2 lattice_0.22-5 MASS_7.3-60.0.1
[13] backports_1.4.1 magrittr_2.0.3 plotly_4.10.4
[16] rmarkdown_2.26 yaml_2.3.8 httpuv_1.6.14
[19] sctransform_0.4.1 spam_2.10-0 spatstat.sparse_3.0-3
[22] reticulate_1.35.0 pbapply_1.7-2 RColorBrewer_1.1-3
[25] abind_1.4-5 zlibbioc_1.46.0 Rtsne_0.17
[28] RCurl_1.98-1.14 GenomeInfoDbData_1.2.10 ggrepel_0.9.5
[31] irlba_2.3.5.1 listenv_0.9.1 spatstat.utils_3.0-4
[34] goftest_1.2-3 RSpectra_0.16-1 spatstat.random_3.2-3
[37] fitdistrplus_1.1-11 parallelly_1.37.1 DelayedMatrixStats_1.22.6
[40] leiden_0.4.3.1 codetools_0.2-19 DelayedArray_0.26.7
[43] tidyselect_1.2.0 farver_2.1.1 viridis_0.6.5
[46] ScaledMatrix_1.8.1 spatstat.explore_3.2-6 jsonlite_1.8.8
[49] BiocNeighbors_1.18.0 ellipsis_0.3.2 progressr_0.14.0
[52] ggridges_0.5.6 survival_3.5-8 tools_4.3.0
[55] ica_1.0-3 Rcpp_1.0.12 glue_1.7.0
[58] gridExtra_2.3 xfun_0.42 withr_3.0.0
[61] fastmap_1.1.1 fansi_1.0.6 rsvd_1.0.5
[64] digest_0.6.34 timechange_0.3.0 R6_2.5.1
[67] mime_0.12 colorspace_2.1-0 scattermore_1.2
[70] tensor_1.5 spatstat.data_3.0-4 utf8_1.2.4
[73] generics_0.1.3 data.table_1.15.2 httr_1.4.7
[76] htmlwidgets_1.6.4 S4Arrays_1.0.6 uwot_0.1.16
[79] pkgconfig_2.0.3 gtable_0.3.4 lmtest_0.9-40
[82] XVector_0.40.0 htmltools_0.5.7 carData_3.0-5
[85] dotCall64_1.1-1 scales_1.3.0 png_0.1-8
[88] knitr_1.45 rstudioapi_0.15.0 tzdb_0.4.0
[91] nlme_3.1-164 zoo_1.8-12 KernSmooth_2.23-22
[94] vipor_0.4.7 parallel_4.3.0 miniUI_0.1.1.1
[97] pillar_1.9.0 grid_4.3.0 vctrs_0.6.5
[100] RANN_2.6.1 promises_1.2.1 BiocSingular_1.16.0
[103] car_3.1-2 beachmat_2.16.0 xtable_1.8-4
[106] cluster_2.1.6 beeswarm_0.4.0 evaluate_0.23
[109] cli_3.6.2 compiler_4.3.0 rlang_1.1.3
[112] crayon_1.5.2 future.apply_1.11.1 ggsignif_0.6.4
[115] labeling_0.4.3 ggbeeswarm_0.7.2 plyr_1.8.9
[118] stringi_1.8.3 BiocParallel_1.34.2 viridisLite_0.4.2
[121] deldir_2.0-4 munsell_0.5.0 lazyeval_0.2.2
[124] spatstat.geom_3.2-9 Matrix_1.6-5 RcppHNSW_0.6.0
[127] hms_1.1.3 patchwork_1.2.0 sparseMatrixStats_1.12.2
[130] future_1.33.1 shiny_1.8.0 ROCR_1.0-11
[133] igraph_2.0.2 broom_1.0.5
date()
[1] "Wed Mar 13 22:26:46 2024"