Merge all samples

Author

Mechthild Lütge

Published

November 14, 2022

Load packages

suppressPackageStartupMessages({
  library(tidyverse)
  library(Seurat)
  library(magrittr)
  library(dplyr)
  library(purrr)
  library(ggplot2)
  library(here)
  library(runSeurat3)
})

set dir

basedir <- here()
metaDat <- read_tsv(paste0(basedir, "/data/metadata.txt"), col_names = T)

load and assign samples

assignSamples <- function(smpNam, basedirSmp, smpCond, smpAge, smpLoc){
  smpNamFull <- list.files(path = paste0(basedirSmp, "/data/samples/"),
                 pattern = paste0(smpNam, ".*_seurat.rds"))
  seuratSmp <- readRDS(paste0(basedirSmp, "/data/samples/", smpNamFull))
  seuratSmp$cond <- smpCond
  seuratSmp$age <- smpAge
  seuratSmp$location <- smpLoc
  return(seuratSmp)
}


####################################################################

for(i in 1:length(metaDat$dataset)){
  seuratX <- assignSamples(smpNam = metaDat$dataset[i],
                           basedirSmp = basedir,
                           smpCond =  metaDat$cond[i],
                           smpAge = metaDat$age[i],
                           smpLoc = metaDat$location[i])
  if(exists("seurat")){
    seurat <- merge(x = seurat, y = seuratX, project = "LNdev")
  }else{
    seurat <- seuratX
  }
}

remove(seuratX)

filter cells

dim(seurat)
[1]  31899 115092
seurat <- subset(seurat, subset = ENSMUSG00000026395.Ptprc == 0)
seurat <- subset(seurat, subset = ENSMUSG00000003379.Cd79a == 0)
seurat <- subset(seurat, subset = ENSMUSG00000032093.Cd3e == 0)
seurat <- subset(seurat, subset = ENSMUSG00000031391.L1cam == 0)

dim(seurat)
[1]  31899 108387
## reprocess
res <- c(0.8,0.6,0.25,0.4)

seurat <- NormalizeData(object = seurat)
seurat <- FindVariableFeatures(object = seurat)
seurat <- ScaleData(object = seurat, verbose = FALSE)
seurat <- RunPCA(object = seurat, npcs = 30, verbose = FALSE)
seurat <- RunTSNE(object = seurat, reduction = "pca", dims = 1:20)
seurat <- RunUMAP(object = seurat, reduction = "pca", dims = 1:20)
seurat <- FindNeighbors(object = seurat, reduction = "pca", 
                        dims = 1:20)
for (i in 1:length(res)) {
    seurat <- FindClusters(object = seurat, resolution = res[i], 
                           random.seed = 1234)
    }
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 108387
Number of edges: 3407294

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9161
Number of communities: 27
Elapsed time: 43 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 108387
Number of edges: 3407294

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9275
Number of communities: 23
Elapsed time: 40 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 108387
Number of edges: 3407294

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9535
Number of communities: 16
Elapsed time: 41 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 108387
Number of edges: 3407294

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9416
Number of communities: 20
Elapsed time: 38 seconds
seuratSub <- subset(seurat, Rosa26eyfp.Rosa26eyfp>0)
eyfpPos <- colnames(seuratSub)

seurat$EYFP <- "neg"
seurat$EYFP[which(colnames(seurat)%in%eyfpPos)] <- "pos"

(table(seurat$dataset, seurat$EYFP))
                                                                 
                                                                    neg   pos
  1_20210401_Mu_Ccl19tTA_EYFP_E18_iLN_fibroblasts_V3               4459    60
  1_20210721_Mu_Ccl19tTA_EYFP_E18_iLN_fibroblasts_V3              10002   121
  10_20210120_Ccl19tTA_EYFP_P7_iLN_fibroblasts_V3                  1571   716
  11_20210120_Ccl19tTA_EYFP_P7_mLN_fibroblasts_V3                  5787   762
  12_20210121_Ccl19tTA_EYFP_adult_iLN_fibroblasts_V3                825  1165
  12_20210628_Mu_Ccl19tTA_EYFP_P7_mLN_EYFPpos_V3                    123  1002
  13_20210121_Ccl19tTA_EYFP_adult_mLN_fibroblasts_V3               3118   665
  13_20210628_Mu_Ccl19tTA_EYFP_P7_iLN_EYFPpos_and_neg_V3             30    14
  2_20201027_Ccl19tTA_EYFP_LTbR_8w_iLN_fibroblasts_V3               474   530
  2_20210216_Mu_Ccl19tTA_EYFP_Fatemap_E17to7wk_iLN_fibroblasts_V3   340    72
  2_20210401_Mu_Ccl19tTA_EYFP_E18_mLN_fibroblasts_V3               4456   759
  2_20210721_Mu_Ccl19tTA_EYFP_E18_mLN_fibroblasts_V3               5490  1751
  3_20201027_Ccl19tTA_EYFP_LTbR_8w_mLN_fibroblasts_V3              1977  1071
  3_20210216_Mu_Ccl19tTA_EYFP_Fatemap_E17to7wk_mLN_fibroblasts_V3  2882   837
  3_20210722_Mu_Ccl19tTA_LTbR_Fatemap_E18to7wk_iLN_fibroblasts_V3  1936   106
  3_20211108_Mu_Ccl19tTA_EYFP_E18_iLN_fib_V3                        128     6
  316781_05-5_20230427_Mu_Ccl19-EYFP_iLN_stroma                     843  1981
  316781_06-6_20230427_Mu_Ccl19-EYFP_periLN_stroma                 2487  3585
  4_20210216_Mu_Ccl19tTA_LTbR_Fatemap_E17to7wk_mLN_fibroblasts_V3  3330   141
  4_20210722_Mu_Ccl19tTA_LTbR_Fatemap_E18to7wk_mLN_fibroblasts_V3  4227   575
  6_20210518_Mu_Ccl19-tTA_EYFP_LTbR_8w_mLN_fibroblasts_V3          3875  2579
  o27244_1_05-5_20211214_Mu_Ccl19tTA_EYFP_3wk_iLN_fib_V3            565  4398
  o27244_1_06-6_20211214_Mu_Ccl19tTA_EYFP_3wk_mLN_fib_V3           1087  6241
  o27936_1_1-1_20220215_Mu_Ccl19iEYFP_FM18_Ad_mLN_GEM              3036  1482
  o27936_1_2-2_20220215_Mu_Ccl19iEYFP_FM18_Ad_iLN_GEM               596   125
  o28429_1_03-3_20220405_Mu_Ccl19iEYFP_8wk_mLN_V3                  1009  3378
  o28429_1_04-4_20220405_Mu_Ccl19iEYFP_8wk_iLN_V3                   772   928
  o298321_14-14_20221025_Mu_Ccl19iEYFP_E18_pLN_GEM                 6613    58
  o304531_11-11_20221221_Mu_Ccl19iEYFP_Adult_iLN_EYFPposneg_GEM     640   601

save seurat

saveRDS(seurat, file=paste0(basedir, "/data/AllSamplesMerged_seurat.rds"))

EYFP only

seurat <- subset(seurat, EYFP == "pos")

## reprocess
res <- c(0.8,0.6,0.25,0.4)

seurat <- NormalizeData(object = seurat)
seurat <- FindVariableFeatures(object = seurat)
seurat <- ScaleData(object = seurat, verbose = FALSE)
seurat <- RunPCA(object = seurat, npcs = 30, verbose = FALSE)
seurat <- RunTSNE(object = seurat, reduction = "pca", dims = 1:20)
seurat <- RunUMAP(object = seurat, reduction = "pca", dims = 1:20)
seurat <- FindNeighbors(object = seurat, reduction = "pca", 
                        dims = 1:20)
for (i in 1:length(res)) {
    seurat <- FindClusters(object = seurat, resolution = res[i], 
                           random.seed = 1234)
    }
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 35709
Number of edges: 1157449

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8891
Number of communities: 21
Elapsed time: 7 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 35709
Number of edges: 1157449

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9043
Number of communities: 17
Elapsed time: 6 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 35709
Number of edges: 1157449

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9400
Number of communities: 13
Elapsed time: 7 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 35709
Number of edges: 1157449

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9232
Number of communities: 14
Elapsed time: 7 seconds

save eyfp only

saveRDS(seurat, file = paste0(basedir, "/data/EYFPonly_seurat.rds"))

Session info

sessionInfo()
R version 4.3.0 (2023-04-21)
Platform: x86_64-apple-darwin20 (64-bit)
Running under: macOS Ventura 13.4.1

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRblas.0.dylib 
LAPACK: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

time zone: Europe/Berlin
tzcode source: internal

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] runSeurat3_0.1.0   here_1.0.1         magrittr_2.0.3     Seurat_5.0.2      
 [5] SeuratObject_5.0.1 sp_2.1-3           lubridate_1.9.3    forcats_1.0.0     
 [9] stringr_1.5.1      dplyr_1.1.4        purrr_1.0.2        readr_2.1.5       
[13] tidyr_1.3.1        tibble_3.2.1       ggplot2_3.5.0      tidyverse_2.0.0   

loaded via a namespace (and not attached):
  [1] RColorBrewer_1.1-3          rstudioapi_0.15.0          
  [3] jsonlite_1.8.8              spatstat.utils_3.0-4       
  [5] rmarkdown_2.26              zlibbioc_1.46.0            
  [7] vctrs_0.6.5                 ROCR_1.0-11                
  [9] spatstat.explore_3.2-6      RCurl_1.98-1.14            
 [11] S4Arrays_1.0.6              htmltools_0.5.7            
 [13] sctransform_0.4.1           parallelly_1.37.1          
 [15] KernSmooth_2.23-22          htmlwidgets_1.6.4          
 [17] ica_1.0-3                   plyr_1.8.9                 
 [19] plotly_4.10.4               zoo_1.8-12                 
 [21] igraph_2.0.2                mime_0.12                  
 [23] lifecycle_1.0.4             pkgconfig_2.0.3            
 [25] Matrix_1.6-5                R6_2.5.1                   
 [27] fastmap_1.1.1               GenomeInfoDbData_1.2.10    
 [29] MatrixGenerics_1.12.3       fitdistrplus_1.1-11        
 [31] future_1.33.1               shiny_1.8.0                
 [33] digest_0.6.34               colorspace_2.1-0           
 [35] S4Vectors_0.40.1            patchwork_1.2.0            
 [37] rprojroot_2.0.4             tensor_1.5                 
 [39] RSpectra_0.16-1             irlba_2.3.5.1              
 [41] GenomicRanges_1.52.1        progressr_0.14.0           
 [43] fansi_1.0.6                 spatstat.sparse_3.0-3      
 [45] timechange_0.3.0            httr_1.4.7                 
 [47] polyclip_1.10-6             abind_1.4-5                
 [49] compiler_4.3.0              bit64_4.0.5                
 [51] withr_3.0.0                 fastDummies_1.7.3          
 [53] MASS_7.3-60.0.1             DelayedArray_0.26.7        
 [55] tools_4.3.0                 lmtest_0.9-40              
 [57] httpuv_1.6.14               future.apply_1.11.1        
 [59] goftest_1.2-3               glue_1.7.0                 
 [61] nlme_3.1-164                promises_1.2.1             
 [63] grid_4.3.0                  Rtsne_0.17                 
 [65] cluster_2.1.6               reshape2_1.4.4             
 [67] generics_0.1.3              gtable_0.3.4               
 [69] spatstat.data_3.0-4         tzdb_0.4.0                 
 [71] data.table_1.15.2           hms_1.1.3                  
 [73] XVector_0.40.0              utf8_1.2.4                 
 [75] BiocGenerics_0.48.0         spatstat.geom_3.2-9        
 [77] RcppAnnoy_0.0.22            ggrepel_0.9.5              
 [79] RANN_2.6.1                  pillar_1.9.0               
 [81] vroom_1.6.5                 spam_2.10-0                
 [83] RcppHNSW_0.6.0              later_1.3.2                
 [85] splines_4.3.0               lattice_0.22-5             
 [87] bit_4.0.5                   survival_3.5-8             
 [89] deldir_2.0-4                tidyselect_1.2.0           
 [91] SingleCellExperiment_1.22.0 miniUI_0.1.1.1             
 [93] pbapply_1.7-2               knitr_1.45                 
 [95] gridExtra_2.3               IRanges_2.36.0             
 [97] SummarizedExperiment_1.30.2 scattermore_1.2            
 [99] stats4_4.3.0                xfun_0.42                  
[101] Biobase_2.60.0              matrixStats_1.2.0          
[103] pheatmap_1.0.12             stringi_1.8.3              
[105] lazyeval_0.2.2              yaml_2.3.8                 
[107] evaluate_0.23               codetools_0.2-19           
[109] cli_3.6.2                   uwot_0.1.16                
[111] xtable_1.8-4                reticulate_1.35.0          
[113] munsell_0.5.0               GenomeInfoDb_1.36.4        
[115] Rcpp_1.0.12                 globals_0.16.2             
[117] spatstat.random_3.2-3       png_0.1-8                  
[119] parallel_4.3.0              ellipsis_0.3.2             
[121] dotCall64_1.1-1             bitops_1.0-7               
[123] listenv_0.9.1               viridisLite_0.4.2          
[125] scales_1.3.0                ggridges_0.5.6             
[127] crayon_1.5.2                leiden_0.4.3.1             
[129] rlang_1.1.3                 cowplot_1.1.3              
date()
[1] "Wed Apr  3 11:47:31 2024"