visualize DE genes FRCs
Load packages
## load packages
suppressPackageStartupMessages({
library(dplyr)
library(reshape2)
library(ggplot2)
library(cowplot)
library(purrr)
library(Seurat)
library(tidyverse)
library(ggpubr)
library(runSeurat3)
library(here)
library(ggsci)
library(pheatmap)
library(scater)
})
load seurat object
<- here()
basedir
<- readRDS(file= paste0(basedir,
seurat "/data/AllPatWithoutCM_FRConly_intOrig_seurat.rds"))
## remove cluster with cycling cells
<- subset(seurat, integrated_snn_res.0.25 == "8", invert=T)
seurat
DefaultAssay(object = seurat) <- "RNA"
$intCluster <- as.character(seurat$integrated_snn_res.0.25)
seuratIdents(seurat) <- seurat$intCluster
## set col palettes
<- c(pal_uchicago()(6), "#6692a3", "#3b7f60")
colPal names(colPal) <- c("0", "1", "2", "3", "4", "5", "6", "7")
<- c(pal_nejm()(7),pal_futurama()(12))[1:length(unique(seurat$patient))]
colPat names(colPat) <- unique(seurat$patient)
<- c("#6692a3","#971c1c","#d17d67")
colCond names(colCond) <- unique(seurat$cond)
<- pal_uchicago()(length(unique(seurat$grp)))
colGrp names(colGrp) <- unique(seurat$grp)
<- pal_npg()(length(unique(seurat$origin)))
colOri names(colOri) <- unique(seurat$origin)
<- colCond
colTon
## all activated in one grp
$cond2 <- seurat$cond
seurat$cond2[which(seurat$cond %in% c("chronic", "acute"))] <- "activated"
seurat
<- c("#6692a3","#971c1c")
colCond2 names(colCond2) <- c("resting", "activated")
visualize data
clustering
## visualize input data
DimPlot(seurat, reduction = "umap", cols=colPal)+
theme_bw() +
theme(axis.text = element_blank(), axis.ticks = element_blank(),
panel.grid.minor = element_blank()) +
xlab("UMAP1") +
ylab("UMAP2")
clustering split by cond2
DimPlot(seurat, reduction = "umap", cols=colPal, pt.size=0.5,
split.by = "cond2")+
theme_void()
patient
## visualize input data
DimPlot(seurat, reduction = "umap", cols=colPat, group.by = "patient")+
theme_bw() +
theme(axis.text = element_blank(), axis.ticks = element_blank(),
panel.grid.minor = element_blank()) +
xlab("UMAP1") +
ylab("UMAP2")
cond
## visualize input data
DimPlot(seurat, reduction = "umap", cols=colCond, group.by = "cond")+
theme_bw() +
theme(axis.text = element_blank(), axis.ticks = element_blank(),
panel.grid.minor = element_blank()) +
xlab("UMAP1") +
ylab("UMAP2")
cond2
## visualize input data
DimPlot(seurat, reduction = "umap", cols=colCond2, group.by = "cond2",
shuffle = T)+
theme_bw() +
theme(axis.text = element_blank(), axis.ticks = element_blank(),
panel.grid.minor = element_blank()) +
xlab("UMAP1") +
ylab("UMAP2")
DimPlot(seurat, reduction = "umap", cols=colCond2, group.by = "cond2",
pt.size=0.5, shuffle = T)+
theme_void()
grp
## visualize input data
DimPlot(seurat, reduction = "umap", cols=colGrp, group.by = "grp")+
theme_bw() +
theme(axis.text = element_blank(), axis.ticks = element_blank(),
panel.grid.minor = element_blank()) +
xlab("UMAP1") +
ylab("UMAP2")
origin
## visualize input data
DimPlot(seurat, reduction = "umap", cols=colOri, group.by = "origin")+
theme_bw() +
theme(axis.text = element_blank(), axis.ticks = element_blank(),
panel.grid.minor = element_blank()) +
xlab("UMAP1") +
ylab("UMAP2")
relative cluster abundances per cond2
<- data.frame(table(seurat$cond2, seurat$intCluster))
clustCond colnames(clustCond) <- c("cond", "intCluster", "cnt")
<- data.frame(table(seurat$intCluster))
clustTot colnames(clustTot) <- c("intCluster", "tot")
<- clustCond %>% left_join(., clustTot, by = "intCluster") %>%
clustDat mutate(relAb = cnt/tot * 100)
ggbarplot(clustDat, x="intCluster", y="relAb",
fill="cond",
colour= "cond",
palette = colCond2,
xlab = "cluster",
ylab = "percentage")
## relative abundance per cond
<- data.frame(table(seurat$cond2, seurat$intCluster))
clustCond colnames(clustCond) <- c("cond", "intCluster", "cnt")
<- data.frame(table(seurat$cond2))
condTot colnames(condTot) <- c("cond", "tot")
<- data.frame(col=colPal) %>%
colPaldat rownames_to_column(var = "intCluster")
<- clustCond %>% left_join(., condTot, by = "cond") %>%
clustDat2 mutate(relAb = cnt/tot * 100) %>%
left_join(., colPaldat, by = "intCluster")
::kable(clustDat2) knitr
cond | intCluster | cnt | tot | relAb | col |
---|---|---|---|---|---|
activated | 0 | 6373 | 25449 | 25.042241 | #800000FF |
resting | 0 | 6049 | 18322 | 33.014955 | #800000FF |
activated | 1 | 3969 | 25449 | 15.595898 | #767676FF |
resting | 1 | 4080 | 18322 | 22.268311 | #767676FF |
activated | 2 | 3164 | 25449 | 12.432709 | #FFA319FF |
resting | 2 | 3773 | 18322 | 20.592730 | #FFA319FF |
activated | 3 | 4244 | 25449 | 16.676490 | #8A9045FF |
resting | 3 | 1560 | 18322 | 8.514354 | #8A9045FF |
activated | 4 | 2767 | 25449 | 10.872726 | #155F83FF |
resting | 4 | 724 | 18322 | 3.951534 | #155F83FF |
activated | 5 | 1917 | 25449 | 7.532712 | #C16622FF |
resting | 5 | 986 | 18322 | 5.381509 | #C16622FF |
activated | 6 | 1528 | 25449 | 6.004165 | #6692a3 |
resting | 6 | 760 | 18322 | 4.148019 | #6692a3 |
activated | 7 | 1487 | 25449 | 5.843059 | #3b7f60 |
resting | 7 | 390 | 18322 | 2.128589 | #3b7f60 |
lapply(names(colCond2), function(co){
<- clustDat2 %>% filter(cond==co)
clustDat2sel pie(clustDat2sel$relAb,
labels = clustDat2sel$intCluster,
col = clustDat2sel$col,
main = co)
symbols(0, 0, circles = 1, add=TRUE, bg="white")
})
[[1]]
NULL
[[2]]
NULL
lapply(names(colCond2), function(co){
<- clustDat2 %>% filter(cond==co)
clustDat2sel pie(clustDat2sel$relAb,
labels = clustDat2sel$intCluster,
col = clustDat2sel$col,
main = co)
})
[[1]]
NULL
[[2]]
NULL
### different sorting stratgies for the first samples..
<- subset(seurat, patient %in% c("ucd010","ucd13","ucd14",
seuratSub "P_20220201","P_20220202",
"P_20200220"))
<- data.frame(table(seuratSub$cond2, seuratSub$intCluster))
clustCond colnames(clustCond) <- c("cond", "intCluster", "cnt")
<- data.frame(table(seuratSub$intCluster))
clustTot colnames(clustTot) <- c("intCluster", "tot")
<- clustCond %>% left_join(., clustTot, by = "intCluster") %>%
clustDat mutate(relAb = cnt/tot * 100)
ggbarplot(clustDat, x="intCluster", y="relAb",
fill="cond",
colour= "cond",
palette = colCond2,
xlab = "cluster",
ylab = "percentage")
## relative abundance per cond
<- data.frame(table(seuratSub$cond2, seuratSub$intCluster))
clustCond colnames(clustCond) <- c("cond", "intCluster", "cnt")
<- data.frame(table(seuratSub$cond2))
condTot colnames(condTot) <- c("cond", "tot")
<- data.frame(col=colPal) %>%
colPaldat rownames_to_column(var = "intCluster")
<- clustCond %>% left_join(., condTot, by = "cond") %>%
clustDat2 mutate(relAb = cnt/tot * 100) %>%
left_join(., colPaldat, by = "intCluster")
::kable(clustDat2) knitr
cond | intCluster | cnt | tot | relAb | col |
---|---|---|---|---|---|
activated | 0 | 3464 | 18575 | 18.648721 | #800000FF |
resting | 0 | 2189 | 7849 | 27.888903 | #800000FF |
activated | 1 | 2327 | 18575 | 12.527591 | #767676FF |
resting | 1 | 1431 | 7849 | 18.231622 | #767676FF |
activated | 2 | 1990 | 18575 | 10.713324 | #FFA319FF |
resting | 2 | 1941 | 7849 | 24.729265 | #FFA319FF |
activated | 3 | 4021 | 18575 | 21.647375 | #8A9045FF |
resting | 3 | 728 | 7849 | 9.275067 | #8A9045FF |
activated | 4 | 2704 | 18575 | 14.557201 | #155F83FF |
resting | 4 | 412 | 7849 | 5.249076 | #155F83FF |
activated | 5 | 1664 | 18575 | 8.958277 | #C16622FF |
resting | 5 | 498 | 7849 | 6.344757 | #C16622FF |
activated | 6 | 1344 | 18575 | 7.235532 | #6692a3 |
resting | 6 | 477 | 7849 | 6.077207 | #6692a3 |
activated | 7 | 1061 | 18575 | 5.711978 | #3b7f60 |
resting | 7 | 173 | 7849 | 2.204102 | #3b7f60 |
lapply(names(colCond2), function(co){
<- clustDat2 %>% filter(cond==co)
clustDat2sel pie(clustDat2sel$relAb,
labels = clustDat2sel$intCluster,
col = clustDat2sel$col,
main = paste0(co, "- only same sorting"))
symbols(0, 0, circles = 1, add=TRUE, bg="white")
})
[[1]]
NULL
[[2]]
NULL
lapply(names(colCond2), function(co){
<- clustDat2 %>% filter(cond==co)
clustDat2sel pie(clustDat2sel$relAb,
labels = clustDat2sel$intCluster,
col = clustDat2sel$col,
main = paste0(co, "- only same sorting"))
})
[[1]]
NULL
[[2]]
NULL
relative cluster abundances per patient
<- data.frame(table(seurat$patient, seurat$intCluster))
clustCond colnames(clustCond) <- c("patient", "intCluster", "cnt")
<- data.frame(table(seurat$patient))
clustTot colnames(clustTot) <- c("patient", "tot")
<- data.frame(seurat$patient, seurat$cond2) %>% unique()
patCond colnames(patCond) <- c("patient", "cond")
<- clustCond %>% left_join(., clustTot, by = "patient") %>%
clustDat mutate(relAb = cnt/tot * 100) %>% left_join(., patCond, by = "patient")
ggbarplot(clustDat, x="patient", y="relAb",
width = NULL,
fill="intCluster",
colour= "intCluster",
palette = colPal,
xlab = "patient",
ylab = "percentage",
legend = "right",
legend.title = "") +
rotate_x_text(angle = 90) +
facet_grid(~cond, scales = "free", space = "free")
vis overall DE genes
marker genes
Idents(seurat) <- seurat$cond2
<- FindAllMarkers(object = seurat, assay ="RNA",
seurat_markers_all only.pos = TRUE, min.pct = 0.25,
logfc.threshold = 0.25,
test.use = "wilcox")
avg heatmap
<- levels(seurat)
cluster <- seurat_markers_all %>% group_by(cluster) %>%
selGenesAll top_n(-15, p_val_adj) %>%
top_n(15, avg_log2FC)
<- selGenesAll %>% mutate(geneIDval=gsub("^.*\\.", "", gene)) %>% filter(nchar(geneIDval)>1)
selGenesAll
$clust_plus_cond <- paste0(seurat$intCluster, "_", seurat$cond2)
seuratIdents(seurat) <- seurat$clust_plus_cond
<- avgHeatmap(seurat = seurat, selGenes = selGenesAll,
pOut colVecIdent = colPal,
ordVec=levels(seurat),
gapVecR=NULL, gapVecC=NULL,cc=FALSE,
cr=T, condCol=T, colVecCond = colCond2)
sc Heatmap
DefaultAssay(object = seurat) <- "RNA"
<- ScaleData(seurat, features = rownames(seurat))
seurat
### plot DE genes top 25 avg logFC
<- seurat_markers_all %>% group_by(cluster) %>%
DEGenesAll top_n(30, avg_log2FC)
<- DEGenesAll %>% mutate(label=gsub("^.*\\.", "", gene))
selFeatures DoHeatmap(seurat, features = selFeatures$gene, group.by = "cond2",
group.colors = colCond2, slot = 'scale.data', label = F,
disp.min = -0.5, disp.max = 1.5) +
scale_fill_continuous(type = "viridis") +
scale_y_discrete(breaks=selFeatures$gene, labels=selFeatures$label)
cw DE genes
Idents(seurat) <- seurat$cond2
<- unique(seurat$intCluster)
grpVec
<- lapply(grpVec, function(grp){
clustDE <- unique(seurat$intCluster)[which(
grpSub unique(seurat$intCluster)==grp)]
<- subset(seurat, intCluster == grpSub)
seuratSub <-FindAllMarkers(seuratSub, only.pos=T, logfc.threshold = 0.1,
DEgenes min.pct = 0.01)
if(nrow(DEgenes)>1){
<- DEgenes %>% filter(p_val_adj<0.01) %>%
DEgenes mutate(group=paste0(grp, "_", cluster)) %>%
mutate(geneID=gsub(".*\\.", "", gene)) %>%
filter(nchar(geneID)>1)
}
})
names(clustDE) <- grpVec
<- data.frame(do.call("rbind", clustDE))
clustDE_Dat
write.table(clustDE_Dat,
file=paste0(basedir, "/data/AllPatWithoutCM_FRConly_intOrig_cond2",
"_cwDEGenes.txt"),
row.names = FALSE, col.names = TRUE, quote = FALSE, sep = "\t")
avg Heatmap top cwDE genes
5
1
2
0
4
7
3
6
scatterplot cw DE genes cond 2
## scatterplot
## in red are all genes with an adjusted p_val < 0.01 and avg_logFC >0.25
################################################################################
### include only genes expressed in at least 10 % of cells in one condition ####
################################################################################
<- unique(seurat$integrated_snn_res.0.25)
grpVec
lapply(grpVec, function(x){
<- subset(seurat, integrated_snn_res.0.25 == x)
allID Idents(allID) <-allID$cond2
<- AverageExpression(allID, group.by="cond2",
avg.allID show.progress = FALSE)
<- data.frame(log1p(avg.allID$RNA)) %>%
avg.allID rownames_to_column(var="gene")
## expression frequency
<- as.data.frame(GetAssayData(allID, assay = "RNA",
GeneFreqSel slot = "data")) %>%
::rownames_to_column(var="gene") %>% mutate(count=rowSums(.!=0)-1) %>%
tibblemutate(countFreq=count/ncol(allID)) %>%
::select(gene,count, countFreq) %>%
dplyr::filter(countFreq>=0.1)
dplyr
<- clustDE_Dat %>% mutate(grp=gsub("_.*", "", group)) %>%
signGenes2 ::filter(grp == x) %>%
dplyr::filter(p_val_adj<0.01 & (avg_log2FC > 0.25))
dplyr
<- signGenes2 %>% group_by(.,cluster) %>%
genes.to.label slice_max(avg_log2FC, n=15) %>% mutate(labelNam=geneID)
<- avg.allID %>% mutate(labelNam=gsub("^.*\\.", "", gene)) %>%
avg.allID mutate(colourGrp=ifelse(gene %in% genes.to.label$gene, "lab",
ifelse(gene%in%signGenes2$gene, "sig", "notSig"))) %>%
::filter(gene%in%GeneFreqSel$gene)
dplyr
<- ggscatter(avg.allID, x="resting", y="activated",
p_allID color = "colourGrp",
palette = c("#660000", "#808080", "#FF3333"),
label="labelNam",
label.select = genes.to.label$labelNam,
repel = F,
label.rectangle=F,
xlab="resting",
ylab = "activated",
title=paste0("average expression - cluster ", x),
legend="none",
font.label=10,
alpha=0.9,
size=0.5)
p_allID
})
[[1]]
[[2]]
[[3]]
[[4]]
[[5]]
[[6]]
[[7]]
[[8]]
sc heatmap across all top cw DE genes
<- rep(colPal, each=2)
colPal2 names(colPal2) <- as.vector(t(outer(names(colPal), names(colCond2), paste,
sep="_")))
### plot DE genes top 15 avg logFC
<- clustDE_Dat %>% group_by(group) %>%
DEGenesAll top_n(15, avg_log2FC)
<- DEGenesAll %>% mutate(label=gsub("^.*\\.", "", gene))
selFeatures DoHeatmap(seurat, features = unique(selFeatures$gene),
group.by = "clust_plus_cond",
group.colors = colPal2, slot = 'scale.data', label = F,
disp.min = -0.5, disp.max = 1.5) +
scale_fill_continuous(type = "viridis") +
scale_y_discrete(breaks=selFeatures$gene, labels=selFeatures$label)
session info
sessionInfo()
R version 4.3.0 (2023-04-21)
Platform: x86_64-apple-darwin20 (64-bit)
Running under: macOS Ventura 13.4.1
Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
time zone: Europe/Berlin
tzcode source: internal
attached base packages:
[1] stats4 stats graphics grDevices utils datasets methods base
other attached packages:
[1] scater_1.28.0 scuttle_1.10.3 SingleCellExperiment_1.22.0
[4] SummarizedExperiment_1.30.2 Biobase_2.60.0 GenomicRanges_1.52.1
[7] GenomeInfoDb_1.36.4 IRanges_2.36.0 S4Vectors_0.40.1
[10] BiocGenerics_0.48.0 MatrixGenerics_1.12.3 matrixStats_1.2.0
[13] pheatmap_1.0.12 ggsci_3.0.1 here_1.0.1
[16] runSeurat3_0.1.0 ggpubr_0.6.0 lubridate_1.9.3
[19] forcats_1.0.0 stringr_1.5.1 readr_2.1.5
[22] tidyr_1.3.1 tibble_3.2.1 tidyverse_2.0.0
[25] Seurat_5.0.2 SeuratObject_5.0.1 sp_2.1-3
[28] purrr_1.0.2 cowplot_1.1.3 ggplot2_3.5.0
[31] reshape2_1.4.4 dplyr_1.1.4
loaded via a namespace (and not attached):
[1] RcppAnnoy_0.0.22 splines_4.3.0 later_1.3.2
[4] bitops_1.0-7 polyclip_1.10-6 fastDummies_1.7.3
[7] lifecycle_1.0.4 rstatix_0.7.2 rprojroot_2.0.4
[10] globals_0.16.2 lattice_0.22-5 MASS_7.3-60.0.1
[13] backports_1.4.1 magrittr_2.0.3 limma_3.56.2
[16] plotly_4.10.4 rmarkdown_2.26 yaml_2.3.8
[19] httpuv_1.6.14 sctransform_0.4.1 spam_2.10-0
[22] spatstat.sparse_3.0-3 reticulate_1.35.0 pbapply_1.7-2
[25] RColorBrewer_1.1-3 abind_1.4-5 zlibbioc_1.46.0
[28] Rtsne_0.17 presto_1.0.0 RCurl_1.98-1.14
[31] GenomeInfoDbData_1.2.10 ggrepel_0.9.5 irlba_2.3.5.1
[34] listenv_0.9.1 spatstat.utils_3.0-4 goftest_1.2-3
[37] RSpectra_0.16-1 spatstat.random_3.2-3 fitdistrplus_1.1-11
[40] parallelly_1.37.1 DelayedMatrixStats_1.22.6 leiden_0.4.3.1
[43] codetools_0.2-19 DelayedArray_0.26.7 tidyselect_1.2.0
[46] farver_2.1.1 viridis_0.6.5 ScaledMatrix_1.8.1
[49] spatstat.explore_3.2-6 jsonlite_1.8.8 BiocNeighbors_1.18.0
[52] ellipsis_0.3.2 progressr_0.14.0 ggridges_0.5.6
[55] survival_3.5-8 tools_4.3.0 ica_1.0-3
[58] Rcpp_1.0.12 glue_1.7.0 gridExtra_2.3
[61] xfun_0.42 withr_3.0.0 fastmap_1.1.1
[64] fansi_1.0.6 rsvd_1.0.5 digest_0.6.34
[67] timechange_0.3.0 R6_2.5.1 mime_0.12
[70] colorspace_2.1-0 scattermore_1.2 tensor_1.5
[73] spatstat.data_3.0-4 utf8_1.2.4 generics_0.1.3
[76] data.table_1.15.2 httr_1.4.7 htmlwidgets_1.6.4
[79] S4Arrays_1.0.6 uwot_0.1.16 pkgconfig_2.0.3
[82] gtable_0.3.4 lmtest_0.9-40 XVector_0.40.0
[85] htmltools_0.5.7 carData_3.0-5 dotCall64_1.1-1
[88] scales_1.3.0 png_0.1-8 knitr_1.45
[91] rstudioapi_0.15.0 tzdb_0.4.0 nlme_3.1-164
[94] zoo_1.8-12 KernSmooth_2.23-22 vipor_0.4.7
[97] parallel_4.3.0 miniUI_0.1.1.1 pillar_1.9.0
[100] grid_4.3.0 vctrs_0.6.5 RANN_2.6.1
[103] promises_1.2.1 BiocSingular_1.16.0 car_3.1-2
[106] beachmat_2.16.0 xtable_1.8-4 cluster_2.1.6
[109] beeswarm_0.4.0 evaluate_0.23 cli_3.6.2
[112] compiler_4.3.0 rlang_1.1.3 crayon_1.5.2
[115] future.apply_1.11.1 ggsignif_0.6.4 labeling_0.4.3
[118] ggbeeswarm_0.7.2 plyr_1.8.9 stringi_1.8.3
[121] BiocParallel_1.34.2 viridisLite_0.4.2 deldir_2.0-4
[124] munsell_0.5.0 lazyeval_0.2.2 spatstat.geom_3.2-9
[127] Matrix_1.6-5 RcppHNSW_0.6.0 hms_1.1.3
[130] patchwork_1.2.0 sparseMatrixStats_1.12.2 future_1.33.1
[133] shiny_1.8.0 ROCR_1.0-11 igraph_2.0.2
[136] broom_1.0.5
date()
[1] "Wed Mar 13 19:07:39 2024"