Detailed walk-through of de novo CIDER (dnCIDER) on pancreas data • CIDER

Introduction

This vignette performs dnCIDER on a cross-species pancreas dataset. It is aimed to show the underneath structure of dnCIDER compared to the other high level vignette.

Set up

In addition to CIDER, we will load the following packages:

library(CIDER)
library(Seurat)
#> Attaching SeuratObject
library(parallel)
library(cowplot)

Load example data

The example data can be downloaded from https://figshare.com/s/d5474749ca8c711cc205.

Pancreatic cell data\(^1\) contain cells from human (8241 cells) and mouse (1886 cells).

load("../data/pancreas_counts.RData") # count matrix
load("../data/pancreas_meta.RData") # meta data/cell information
seu <- CreateSeuratObject(counts = pancreas_counts, meta.data = pancreas_meta)
table(seu$Batch)
#> 
#> human mouse 
#>  8241  1886

Perform initial clustering

seu_list <- Seurat::SplitObject(seu, split.by = "Batch")
seu_list <- mclapply(seu_list, function(x) {
  x <- NormalizeData(x, normalization.method = "LogNormalize", 
                     scale.factor = 10000, verbose = FALSE)
  x <- FindVariableFeatures(x, selection.method = "vst", 
                            nfeatures = 2000, verbose = FALSE)
  x <- ScaleData(x, verbose = FALSE, vars.to.regress = "Sample")
  x <- RunPCA(x, features = VariableFeatures(object = x), verbose = FALSE)  
  x <- FindNeighbors(x, dims = 1:15, verbose = FALSE)
  x <- FindClusters(x, resolution = 0.6, verbose = FALSE)
  return(x)
})

knitr::kable(table(seu_list[[2]]$Group, seu_list[[2]]$seurat_clusters))

	0	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17	18
acinar	0	0	823	0	0	0	1	1	2	102	0	0	0	0	0	0	0	3	0
activated_stellate	0	0	0	1	0	0	0	0	269	0	0	0	0	0	0	0	1	4	0
alpha	1067	3	0	0	0	665	0	0	0	0	251	243	1	6	0	0	1	0	4
b_cell	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
beta	1	886	2	812	740	1	0	0	0	0	2	0	3	2	1	0	4	0	1
delta	0	3	2	1	3	0	0	0	0	0	1	0	215	1	216	0	148	0	2
ductal	1	1	6	0	0	0	469	386	0	169	0	0	1	0	0	0	0	0	0
endothelial	0	0	0	0	0	0	0	0	1	0	0	0	0	0	0	212	0	1	0
epsilon	0	0	0	0	0	0	0	0	0	0	0	0	8	8	0	0	1	0	0
gamma	1	1	0	0	0	0	0	0	0	0	1	0	3	210	0	0	25	0	0
immune_other	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
macrophage	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	39
mast	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	25
quiescent_stellate	1	0	0	1	0	0	1	0	4	0	0	0	0	0	0	0	0	153	0
schwann	0	0	0	0	0	0	0	0	10	0	0	0	0	0	0	0	0	0	0
t_cell	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	7

seu_list <- mclapply(seu_list, RunTSNE, dims = 1:15)
p1 <- scatterPlot(seu_list[[1]], "tsne", colour.by = "seurat_clusters")
p2 <- scatterPlot(seu_list[[2]], "tsne", colour.by = "seurat_clusters")
cowplot::plot_grid(p1,p2)

dist_coef <- getDistMat(seu_list, downsampling.size = 50)
#> 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |======================================================================| 100%

par(mfrow = c(length(seu_list),1))
for(i in which(sapply(dist_coef, function(x) return(!is.null(x))))){
  tmp <- dist_coef[[i]] + t(dist_coef[[i]])
  diag(tmp) <- 1
  pheatmap::pheatmap(tmp, display_numbers = TRUE)
}

for(seu_itor in 1:2){
  tmp <- dist_coef[[seu_itor]] + t(dist_coef[[seu_itor]])
  diag(tmp) <- 1
  tmp <- 1 - tmp
  hc <- hclust(as.dist(tmp), method = "average")
  hres <- cutree(hc, h = 0.4)
  df_hres <- data.frame(hres)
  df_hres$hres <- paste0(df_hres$hres, "_", unique(seu_list[[seu_itor]]$Batch))
  seu_list[[seu_itor]]$inicluster_tmp <- paste0(seu_list[[seu_itor]]$seurat_clusters, "_", seu_list[[seu_itor]]$Batch)
  seu_list[[seu_itor]]$inicluster <- df_hres$hres[match(seu_list[[seu_itor]]$inicluster_tmp,rownames(df_hres))]
}

# plot(as.dendrogram(hc), horiz = T)

p1 <- scatterPlot(seu_list[[1]], "tsne", "inicluster")
p2 <- scatterPlot(seu_list[[2]], "tsne", "inicluster")
plot_grid(p1,p2)

scatterPlot(seu_list[[2]], "tsne", "Group")

Calculate of IDER similarity matrix

res <- unlist(lapply(seu_list, function(x) return(x$inicluster)))
res_names <- unlist(lapply(seu_list, function(x) return(colnames(x))))
seu$initial_cluster <- res[match(colnames(seu), res_names)]

ider <- getIDEr(seu, 
                group.by.var = "initial_cluster",
                batch.by.var = "Batch",
                downsampling.size = 35, 
                use.parallel = FALSE, verbose = FALSE)

net <- plotNetwork(seu, ider, colour.by = "Group" , vertex.size = 0.6, weight.factor = 5)

hc <- hclust(as.dist(1-(ider[[1]] + t(ider[[1]])))/2)
plot(as.dendrogram(hc), horiz = TRUE)

Perform final Clustering

seu <- finalClustering(seu, ider, cutree.h = 0.35) # final clustering

seu <- NormalizeData(seu, verbose = FALSE)
seu <- FindVariableFeatures(seu, selection.method = "vst", 
                            nfeatures = 2000, verbose = FALSE)
seu <- ScaleData(seu, verbose = FALSE)
seu <- RunPCA(seu, npcs = 20, verbose = FALSE)
seu <- RunTSNE(seu, reduction = "pca", dims = 1:12)

plot_list <- list()
plot_list[[1]] <- scatterPlot(seu, "tsne", colour.by = "CIDER_cluster", title = "asCIDER clustering results") 
plot_list[[2]] <- scatterPlot(seu, "tsne", colour.by = "Group", title = "Ground truth of cell populations") 
plot_grid(plotlist = plot_list, ncol = 2)

Technical

sessionInfo()
#> R version 4.1.2 (2021-11-01)
#> Platform: x86_64-apple-darwin17.0 (64-bit)
#> Running under: macOS Big Sur 10.16
#> 
#> Matrix products: default
#> BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
#> 
#> locale:
#> [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
#> 
#> attached base packages:
#> [1] parallel  stats     graphics  grDevices utils     datasets  methods  
#> [8] base     
#> 
#> other attached packages:
#> [1] cowplot_1.1.1      SeuratObject_4.0.4 Seurat_4.1.0       CIDER_0.99.1      
#> 
#> loaded via a namespace (and not attached):
#>   [1] systemfonts_1.0.2     plyr_1.8.6            igraph_1.2.8         
#>   [4] lazyeval_0.2.2        splines_4.1.2         listenv_0.8.0        
#>   [7] scattermore_0.7       ggplot2_3.4.2         digest_0.6.28        
#>  [10] foreach_1.5.1         htmltools_0.5.2       viridis_0.6.2        
#>  [13] fansi_0.5.0           magrittr_2.0.1        memoise_2.0.0        
#>  [16] tensor_1.5            cluster_2.1.2         doParallel_1.0.16    
#>  [19] ROCR_1.0-11           limma_3.50.0          globals_0.16.1       
#>  [22] matrixStats_0.61.0    pkgdown_2.0.7         spatstat.sparse_2.0-0
#>  [25] colorspace_2.0-2      ggrepel_0.9.3         textshaping_0.3.6    
#>  [28] xfun_0.28             dplyr_1.1.2           crayon_1.5.2         
#>  [31] jsonlite_1.7.2        spatstat.data_2.1-0   survival_3.2-13      
#>  [34] zoo_1.8-9             iterators_1.0.13      glue_1.6.2           
#>  [37] polyclip_1.10-0       gtable_0.3.0          leiden_0.3.9         
#>  [40] kernlab_0.9-29        future.apply_1.8.1    abind_1.4-5          
#>  [43] scales_1.2.1          pheatmap_1.0.12       DBI_1.1.1            
#>  [46] edgeR_3.36.0          miniUI_0.1.1.1        Rcpp_1.0.7           
#>  [49] viridisLite_0.4.0     xtable_1.8-4          reticulate_1.22      
#>  [52] spatstat.core_2.3-1   htmlwidgets_1.5.4     httr_1.4.2           
#>  [55] RColorBrewer_1.1-2    ellipsis_0.3.2        ica_1.0-2            
#>  [58] farver_2.1.0          pkgconfig_2.0.3       sass_0.4.0           
#>  [61] uwot_0.1.10           deldir_1.0-6          locfit_1.5-9.4       
#>  [64] utf8_1.2.2            labeling_0.4.2        tidyselect_1.2.0     
#>  [67] rlang_1.1.1           reshape2_1.4.4        later_1.3.0          
#>  [70] munsell_0.5.0         tools_4.1.2           cachem_1.0.6         
#>  [73] cli_3.4.1             dbscan_1.1-8          generics_0.1.1       
#>  [76] ggridges_0.5.3        evaluate_0.14         stringr_1.5.0        
#>  [79] fastmap_1.1.0         yaml_2.2.1            ragg_1.1.3           
#>  [82] goftest_1.2-3         knitr_1.36            fs_1.5.0             
#>  [85] fitdistrplus_1.1-6    purrr_1.0.1           RANN_2.6.1           
#>  [88] pbapply_1.5-0         future_1.28.0         nlme_3.1-153         
#>  [91] mime_0.12             compiler_4.1.2        rstudioapi_0.13      
#>  [94] plotly_4.10.0         png_0.1-7             spatstat.utils_2.2-0 
#>  [97] tibble_3.2.1          bslib_0.3.1           stringi_1.7.5        
#> [100] highr_0.9             desc_1.4.0            lattice_0.20-45      
#> [103] Matrix_1.3-4          vctrs_0.6.2           pillar_1.9.0         
#> [106] lifecycle_1.0.3       spatstat.geom_2.4-0   lmtest_0.9-39        
#> [109] jquerylib_0.1.4       RcppAnnoy_0.0.19      data.table_1.14.2    
#> [112] irlba_2.3.3           httpuv_1.6.3          patchwork_1.1.1      
#> [115] R6_2.5.1              promises_1.2.0.1      KernSmooth_2.23-20   
#> [118] gridExtra_2.3         parallelly_1.32.1     codetools_0.2-18     
#> [121] MASS_7.3-54           rprojroot_2.0.2       withr_2.5.0          
#> [124] sctransform_0.3.3     mgcv_1.8-38           grid_4.1.2           
#> [127] rpart_4.1-15          tidyr_1.3.0           rmarkdown_2.11       
#> [130] Rtsne_0.15            shiny_1.7.1

References

Baron, M. et al. A Single-Cell Transcriptomic Map of the Human and Mouse Pancreas Reveals Inter- and Intra-cell Population Structure. Cell Syst 3, 346–360.e4 (2016).
Satija R, et al. Spatial reconstruction of single-cell gene expression data. Nature Biotechnology 33, 495-502 (2015).