PK VokTªw—¥ÿS: ÿS: pseudotime_monocle.html
# R Options
options(stringsAsFactors=FALSE,
dplyr.summarise.inform=FALSE,
knitr.table.format="html",
kableExtra_view_html=TRUE,
future.globals.maxSize=2000000000, mc.cores=1,
future.fork.enable=TRUE, future.plan="multicore",
future.rng.onMisuse="ignore")
# Required libraries
library(monocle3)
library(Seurat)
library(SeuratWrappers)
library(ggplot2)
library(patchwork)
library(magrittr)
library(biomaRt)
library(dplyr)
library(tidyr)
# Load Seurat S4 object; output of scrnaseq script https://github.com/ktrns/scrnaseq
load("/mnt/ngsnfs/single_cell_dev/users/kosankem/Scripts/testset/scrnaseq.RData")
# Knitr default options
::opts_chunk$set(echo=TRUE, # output code
knitrcache=FALSE, # do not cache results
message=TRUE, # show messages
warning=TRUE, # show warnings
tidy=FALSE, # do not auto-tidy-up code
fig.width=10, # default fig width in inches
class.source='fold-hide', # by default collapse code blocks
dev=c('png', 'pdf'), # create figures in png and pdf; the first device (png) will be used for HTML output
dev.args=list(png=list(type="cairo"), # png: use cairo - works on cluster, supports anti-aliasing (more smooth)
pdf=list(bg="white")), # pdf: use cairo - works on cluster, supports anti-aliasing (more smooth)
dpi=96, # figure resolution
fig.retina=2 # retina multiplier
)
# displaying and testing different roots is possible, but only the last one is used in subsequent plots; At least one root needs to be provided!
$trajectory_root = c("CORO1A")
param# Choose clusters that should be displayed in trajectory (all clusters are used in calculation); NULL if all clusters should be displayed
#param$trajectory_cluster = NULL
$trajectory_cluster = c("2", "4")
paramif (is.null(param$trajectory_cluster)) {
$trajectory_cluster = levels(sc$seurat_clusters)
param
} # individual genes that should be plotted in feature plots; NULL if no target genes are specified
#param$plot_genes = NULL
$plot_genes = c("CD3E", "CD4", "EOMES", "CD22", "CD1D") param
# Feature plots and Violinplots
# The height of 1 row (= 1 plot) is fixed to 5
= max(3.5, 3.5 + 3 * length(param$trajectory_root))
fig_height_plots
# Dimplots (for multiple conditions)
= max(2.5, 2.5 * round(length(param$plot_genes)/3))
height_per_plot_genes
# Dotplots
# We fix the height of each row in a plot to the same height
= max(0.2, 0.2 * 2 * length(param$trajectory_cluster))
height_per_row = max(3, 3 + height_per_row) fig_height_dotplots
Constructing single-cell trajectories for the samples sample1, sample2 of the project pbmc_small (from publicly available single cell RNA-seq data).
During many biological processes, cells transition from one functional “state†to another or differentiate towards a required functional end state. Such events are characterized by the modulation of specific gene expression programs over time. By use of single cell expression signatures, it is, thus, possible to track down a cell’s ‘position’ within such a predetermined developmental path. The R packaged Monocle3 offers a strategy to explore respective expression changes when cells pass through such a ‘pseudotime’ matrix and to construct and visualize accordingly derived single-cell trajectories.
Here we use the S4 class object generated in the Main-Report “Single-cell RNA-seq data analysis†and convert it to a SingleCellExperiment class object for analysis of pseudotime trajectory of cells.
# Convert S4 class object to SingleCellExperiment class
= as.cell_data_set(sc) cds
## Warning: Monocle 3 trajectories require cluster partitions, which Seurat does
## not calculate. Please run 'cluster_cells' on your cell_data_set object
# Add gene_short_name to cds object
rowData(cds)$gene_short_name = rownames(cds)
# Transfer clusters from sc to cds object
= cluster_cells(cds)
cds @clusters$UMAP$clusters = sc@meta.data$seurat_clusters
cdsnames(cds@clusters$UMAP$clusters) = rownames(sc@meta.data)
# Print both objects' information
sc
## An object of class Seurat
## 2092 features across 416 samples within 1 assay
## Active assay: RNA (2092 features, 2092 variable features)
## 2 dimensional reductions calculated: pca, umap
cds
## class: cell_data_set
## dim: 2092 416
## metadata(1): citations
## assays(3): counts logcounts scaledata
## rownames(2092): FAM41C B3GALT6 ... MT-CO2 MT-ND4L
## rowData names(1): gene_short_name
## colnames(416): GCTTCACCAACACGAG-1 TAGTGCACAGAGGAAA-1 ...
## TTCCTTCAGGTAGTCG-2 TCGATTTGTACGCTTA-2
## colData names(17): orig.ident nCount_RNA ... ident Size_Factor
## reducedDimNames(2): PCA UMAP
## mainExpName: RNA
## altExpNames(0):
While in some situations cells may continuously transition from one state to the next along one trajectory, in other cases multiple distinct trajectories might reflect the experimental situation more precisely. For example, different cell types have different initial transcriptomes and response to a stimulus by moving along distinct trajectories. Such distinct trajectories are identified as different “partitions†through the clustering procedure.
# Plot cells coloured by sample
= DimPlot(sc, group.by = "orig.ident", cols = param$col_samples, pt.size = param$pt_size) +
p1 AddStyle(title="Coloured by sample", legend_title = "", xlab = "UMAP 1", ylab = "UMAP 2") +
theme(text = element_text(size = 12),
axis.line = element_line(size = 0.5, color = "grey50"),
legend.position = "bottom") +
NoGrid()
# Plot cells coloured by cluster
= plot_cells(cds, show_trajectory_graph = FALSE, label_cell_groups = FALSE, cell_size = param$pt_size) +
p2 AddStyle(title="Coloured by cluster", xlab = "UMAP 1", ylab = "UMAP 2") +
scale_color_manual(values = param$col_clusters) +
theme(text = element_text(size = 12),
axis.line = element_line(size = 0.5, color = "grey50"),
legend.position = "bottom") +
NoGrid()
# Plot cells coloured by partition
= plot_cells(cds, color_cells_by = "partition", show_trajectory_graph = FALSE, label_cell_groups = FALSE, cell_size = param$pt_size) +
p3 AddStyle(title="Coloured by partition", xlab = "UMAP 1", ylab = "UMAP 2") +
theme(text = element_text(size = 12), legend.title = element_blank(),
axis.line = element_line(size = 0.5, color = "grey50"),
legend.position = "bottom") +
NoGrid()
= patchwork::wrap_plots(p1, p2, p3, ncol = 3)
p p
Next, we fit a principal graph of gene expression changes within each partition and place each cell at its proper position according to its progress through the respective trajectory. To place the cells in order, a “root†of the trajectory has to be defined as the beginning of the biological process. That means, here we have set one gene (CORO1A) as root, which is (exclusively) expressed at the early state of the biological process. The progress along the trajectory an individual cell has made is measured in pseudotime. Pseudotime is an abstract unit that indicates the distance between a cell and the start of the trajectory. Here we are displaying three different visualisations for the pseudotime and cell trajectory. Labeled black circles indicate branches of the trajectory tree to different outcomes (i.e. cell fate), while the white circled number represents the root. Gray colored cells have infinite pseudotime because they were not reachable from the chosen root.
# Learn the trajectory graph
= learn_graph(cds, close_loop = TRUE) cds
# Plot trajectory with root and colored by clusters
= plot_cells(cds, label_groups_by_cluster = FALSE, label_leaves = FALSE, label_branch_points = FALSE, graph_label_size=3, group_label_size = 0, cell_size = param$pt_size) +
p1 AddStyle(legend_title = "cluster", xlab = "UMAP 1", ylab = "UMAP 2") +
scale_color_manual(values = param$col_clusters) +
theme(text = element_text(size = 12),
axis.line = element_line(size = 0.5, color = "grey50"),
legend.position = "bottom") +
NoGrid()
= NULL
p_list
for (i in param$trajectory_root) {
# Integrate root to trajectory
= which.max(unlist(FetchData(sc, i)))
max.root = colnames(sc)[max.root]
max.root = order_cells(cds, root_cells = max.root)
cds
# Extract the pseudotime and partition values form the SingleCellExperiment object and add them to the Seurat object
= AddMetaData(sc, metadata = cds@principal_graph_aux@listData$UMAP$pseudotime, col.name = "pseudotime")
sc = AddMetaData(sc, metadata = cds@clusters@listData$UMAP$partitions, col.name = "partition")
sc
# Plot trajectory with root and colored by pseudotime
= plot_cells(cds, color_cells_by = "pseudotime", label_cell_groups = FALSE, label_leaves = FALSE, label_branch_points = TRUE, graph_label_size=3, cell_size = param$pt_size) +
p2 theme(text = element_text(size = 12),
axis.line = element_line(size = 0.5, color = "grey50")) +
NoGrid() +
RestoreLegend(position = "right")
# Plot trajectory with root and colored by pseudotime as FeaturePlot with Seurat object
= FeaturePlot(sc, features = "pseudotime", pt.size = param$pt_size) +
p3 AddStyle(title=i, legend_title = "pseudotime", xlab = "UMAP 1", ylab = "UMAP 2") +
scale_color_viridis_c() +
theme(text = element_text(size = 12),
axis.line = element_line(size = 0.5, color = "grey50")) +
NoGrid() +
RestoreLegend(position = "right")
= p2 + p3
p_list[[i]]
}
= patchwork::wrap_plots(p1 + plot_spacer())
p1 = p1 + p_list + plot_layout(ncol=1) +
p AddStyle(title="Pseudotime trajectory")
p
The clusters 2, 4, as part of one trajectory, were manually chosen and shown in the following. We display the relative counts of cells that were detected in the same state (same pseudotime point) along the cell trajectory.
# Test whether cells at similar positions on the trajectory have correlated expression
= FetchData(object = sc, vars = c("pseudotime", "seurat_clusters", "orig.ident"))
pseudotime_values = filter(pseudotime_values, seurat_clusters %in% param$trajectory_cluster)
pseudotime_values
= ggplot(pseudotime_values, aes(x = pseudotime, fill = seurat_clusters)) +
p1 geom_density(alpha=0.7) +
theme_classic() +
AddStyle(legend_title = "cluster") +
NoGrid() +
theme(axis.text.y = element_blank(), axis.ticks.y = element_blank(), axis.line = element_line(size = 0.5)) +
scale_fill_manual(values = param$col_clusters)
= ggplot(pseudotime_values, aes(x = pseudotime, fill = seurat_clusters)) +
p2 geom_density() +
facet_grid(seurat_clusters ~ .) +
theme_classic() +
theme(axis.title.y = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank(), axis.line = element_line(size = 0.5)) +
scale_fill_manual(values = param$col_clusters) +
NoLegend()
= ggplot(pseudotime_values, aes(x = pseudotime, fill = orig.ident)) +
p3 geom_density(alpha=0.7) +
theme_classic() +
AddStyle(legend_title = "sample", col = param$col_samples) +
NoGrid() +
theme(axis.text.y = element_blank(), axis.ticks.y = element_blank(), axis.line = element_line(size = 0.5)) +
scale_fill_manual(values = param$col_samples)
= patchwork::wrap_plots((p1 / p3) | p2)
p p
To identify the genes that are regulated over the course of the trajectory, we test with the Moran’s I test whether cells at similar positions on the trajectory have also correlated expression of individual genes.
# Test whether cells at similar positions on the trajectory have correlated expression
= graph_test(cds, neighbor_graph="principal_graph", cores=4)
morans_test_res write.table(x = morans_test_res, file = paste0(param$path_out, "/morans_test_result.xlsx"))
The plots display the dynamics of gene expression as a function of pseudotime for the top 12 genes with the highest correlation of expression to trajectory position.
#
attach(morans_test_res)
= morans_test_res[order(q_value, -morans_I),]
morans_test_res_sort detach(morans_test_res)
= row.names(head(morans_test_res_sort, 12))
top12 = row.names(head(morans_test_res_sort, 50))
top50 = row.names(head(morans_test_res_sort, 100))
top100
= NULL
p_list for (i in top12) {
= FeatureScatter(subset(sc, idents = param$trajectory_cluster), feature1 = "pseudotime", feature2 = i, cols = param$col_clusters) +
p geom_smooth(method = "loess", color="black") +
AddStyle(title = "") +
theme(text = element_text(size = 12),
axis.line = element_line(size = 0.5, color = "grey50")) +
NoGrid() +
NoLegend()
= p
p_list[[i]]
}= patchwork::wrap_plots(p_list, ncol = 3)
p_list p_list
#
= levels(sc$orig.ident)
orig_ident_levels = list()
sc_subsets
for (h in orig_ident_levels) {
= subset(x = sc, subset = orig.ident == h)
sc_subsets[[h]]
}
= NULL
p_list for (i in top12) {
= purrr::map(list_names(sc_subsets), function(n) {
p_samples = Seurat::FeatureScatter(object = subset(sc_subsets[[n]], idents = param$trajectory_cluster), feature1 = "pseudotime", feature2 = i, cols = param$col_clusters) +
p geom_smooth(method = "loess", color="black") +
AddStyle(title = "") +
theme(text = element_text(size = 12),
axis.line = element_line(size = 0.5, color = "grey50")) +
NoGrid() +
NoLegend()
})= patchwork::wrap_plots(p_samples, ncol=2)
p
= p
p_list[[i]]
}= patchwork::wrap_plots(p_list, ncol = 2)
p_list p_list
The dotplot displays the average gene expression per cluster for the top 50 genes with the highest correlation of expression to trajectory position.
= DotPlot(sc, features = top50, group.by = "seurat_clusters", split.by = "orig.ident", cols = param$col_samples, idents = param$trajectory_cluster) +
p RotatedAxis() +
theme(axis.text.x.bottom = element_text(size = 8)) +
RestoreLegend()
p
## Warning: Removed 8 rows containing missing values (geom_point).
The heatmap displays the gene expression for the top 50 genes with the highest correlation of expression to trajectory position.
= DoHeatmap(subset(sc, idents = param$trajectory_cluster), features = top100, group.colors=param$col_clusters)
p p
Visualization of gene expression of individual target genes.
if (!is.null(param$plot_genes)) {
# Plot pseudotime for target gene
= plot_cells(cds, genes = param$plot_genes, scale_to_range = TRUE, label_cell_groups=FALSE, label_leaves = FALSE, graph_label_size = 3, cell_size = param$pt_size) +
p theme(text = element_text(size = 12), axis.line = element_line(size = 0.5, color = "grey50"), plot.title = element_text(size = 12)) +
NoGrid() +
RestoreLegend(position = "right")
pelse {message("No target gene selected")} }
# Plot feature plot for target gene
= NULL
p_list for (i in param$plot_genes) {
= FeaturePlot(sc, features = i, pt.size = param$pt_size) +
p AddStyle(legend_title = "expression", xlab = "UMAP 1", ylab = "UMAP 2", title = i) +
theme(text = element_text(size = 12), axis.line = element_line(size = 0.5, color = "grey50"), plot.title = element_text(size = 10, vjust = 0, hjust = 0.5)) +
NoGrid() +
RestoreLegend(position = "right")
= p
p_list[[i]]
}
= patchwork::wrap_plots(p_list, ncol = 3)
p p
The following parameters were used to run the workflow.
= ScrnaseqParamsInfo(params=param)
out
::kable(out, align="l") %>%
knitr::kable_styling(bootstrap_options=c("striped", "hover"), full_width=FALSE, position="left") kableExtra
Name | Value |
---|---|
project_id | pbmc_small (from publicly available single cell RNA-seq data) |
path_data | name:sample1, sample2; type:10x, 10x; path:/mnt/ngsnfs/single_cell_dev/users/kosankem/Scripts/testset/counts/sample1/, /mnt/ngsnfs/single_cell_dev/users/kosankem/Scripts/testset/counts/sample2/; stats:NA, NA; suffix:-1, -2 |
assay_raw | RNA |
path_out | /mnt/ngsnfs/single_cell_dev/users/kosankem/Scripts/testset/results/ |
mart_dataset | hsapiens_gene_ensembl |
annot_version | 98 |
annot_main | ensembl=ensembl_gene_id, symbol=external_gene_name, entrez=entrezgene_accession |
mart_attributes | ensembl_gene_id, external_gene_name, entrezgene_accession, chromosome_name, start_position, end_position, percentage_gene_gc_content, gene_biotype, strand, description |
mt | ^MT- |
cell_filter | sample1:nFeature_RNA=c(200, 8000), percent_mt=c(NA, 15); sample2:nFeature_RNA=c(200, 8000), percent_mt=c(NA, 15) |
feature_filter | sample1:min_counts=1, min_cells=5; sample2:min_counts=1, min_cells=5 |
samples_min_cells | 10 |
norm | RNA |
cc_remove | FALSE |
cc_remove_all | FALSE |
cc_rescore_after_merge | FALSE |
integrate_samples | method:merge |
pc_n | 10 |
cluster_resolution | 0.8 |
cluster_resolution_test | 0.4, 0.6, 1.2 |
marker_padj | 0.05 |
marker_log2FC | 1 |
marker_pct | 0.25 |
enrichr_site | Enrichr |
enrichr_padj | 0.05 |
enrichr_dbs | GO_Biological_Process_2021, KEGG_2021_Human, WikiPathway_2021_Human |
col | palevioletred |
col_palette_samples | ggsci::pal_rickandmorty |
col_palette_clusters | ggsci::pal_igv |
pt_size | 1 |
path_to_git | . |
debugging_mode | default_debugging |
file_annot | /mnt/ngsnfs/single_cell_dev/users/kosankem/Scripts/testset/results//annotation/hsapiens_gene_ensembl.v98.annot.txt |
file_cc_genes | /mnt/ngsnfs/single_cell_dev/users/kosankem/Scripts/testset/results//annotation/cell_cycle_markers.xlsx |
downsample_cells_n | |
biomart_mirror | www |
samples_to_drop | |
col_samples | sample1=#FAFD7CFF, sample2=#82491EFF |
col_clusters | 1=#5050FFFF, 2=#CE3D32FF, 3=#749B58FF, 4=#F0E685FF |
trajectory_root | CORO1A |
trajectory_cluster | 2, 4 |
plot_genes | CD3E, CD4, EOMES, CD22, CD1D |
This report was created with generated using the scrnaseq_add_condition_comparison script. Software versions were collected at run time.
= ScrnaseqSessionInfo(param$path_to_git)
out
::kable(out, align="l") %>%
knitr::kable_styling(bootstrap_options=c("striped", "hover")) kableExtra
Name | Version |
---|---|
ktrns/scrnaseq | 62ed87b7523480a9cf0876855fd2ce0ca242a331 |
R | R version 4.1.2 (2021-11-01) |
Platform | x86_64-pc-linux-gnu (64-bit) |
Operating system | Ubuntu 20.04.3 LTS |
Packages | abind1.4-5, AnnotationDbi1.54.1, assertthat0.2.1, Biobase2.52.0, BiocFileCache2.0.0, BiocGenerics0.38.0, BiocManager1.30.16, biomaRt2.48.3, Biostrings2.60.2, bit4.0.4, bit644.0.5, bitops1.0-7, blob1.2.2, boot1.3-28, bslib0.3.0, cachem1.0.6, class7.3-20, classInt0.4-3, cli3.0.1, cluster2.1.2, codetools0.2-18, colorspace2.0-2, cowplot1.1.1, crayon1.4.1, curl4.3.2, data.table1.14.2, DBI1.1.1, dbplyr2.1.1, DelayedArray0.18.0, deldir0.2-10, digest0.6.28, dplyr1.0.7, e10711.7-9, ellipsis0.3.2, evaluate0.14, fansi0.5.0, farver2.1.0, fastmap1.1.0, filelock1.0.2, fitdistrplus1.1-6, future1.22.1, future.apply1.8.1, generics0.1.0, GenomeInfoDb1.28.4, GenomeInfoDbData1.2.6, GenomicRanges1.44.0, ggplot23.3.5, ggrepel0.9.1, ggridges0.5.3, globals0.14.0, glue1.4.2, goftest1.2-2, gridExtra2.3, gtable0.3.0, highr0.9, hms1.1.1, htmltools0.5.2, htmlwidgets1.5.4, httpuv1.6.3, httr1.4.2, ica1.0-2, igraph1.2.6, IRanges2.26.0, irlba2.3.3, jquerylib0.1.4, jsonlite1.7.2, kableExtra1.3.4, KEGGREST1.32.0, KernSmooth2.23-20, knitr1.36, labeling0.4.2, later1.3.0, lattice0.20-45, lazyeval0.2.2, leiden0.3.9, leidenbase0.1.3, lifecycle1.0.1, listenv0.8.0, lmtest0.9-38, magrittr2.0.1, MASS7.3-55, Matrix1.4-0, MatrixGenerics1.4.3, matrixStats0.61.0, memoise2.0.0, mgcv1.8-38, mime0.12, miniUI0.1.1.1, monocle31.0.0, munsell0.5.0, nlme3.1-155, parallelly1.28.1, patchwork1.1.1, pbapply1.5-0, pbmcapply1.5.0, pillar1.6.3, pkgconfig2.0.3, plotly4.9.4.1, plyr1.8.6, png0.1-7, polyclip1.10-0, prettyunits1.1.1, progress1.2.2, promises1.2.0.1, proxy0.4-26, purrr0.3.4, R.methodsS31.8.1, R.oo1.24.0, R.utils2.11.0, R62.5.1, RANN2.6.1, rappdirs0.3.3, raster3.5-11, RColorBrewer1.1-2, Rcpp1.0.7, RcppAnnoy0.0.19, RCurl1.98-1.5, remotes2.4.1, reshape21.4.4, reticulate1.22, rlang0.4.11, rmarkdown2.11, ROCR1.0-11, rpart4.1.16, RSQLite2.2.8, rstudioapi0.13, rsvd1.0.5, Rtsne0.15, rvest1.0.1, s21.0.7, S4Vectors0.30.2, sass0.4.0, scales1.1.1, scattermore0.7, sctransform0.3.2, sessioninfo1.1.1, Seurat4.0.4, SeuratObject4.0.2, SeuratWrappers0.3.0, sf1.0-5, shiny1.7.0, SingleCellExperiment1.14.1, slam0.1-50, sp1.4-6, spatstat.core2.3-0, spatstat.data2.1-0, spatstat.geom2.2-2, spatstat.sparse2.0-0, spatstat.utils2.2-0, spData2.0.1, spdep1.2-1, stringi1.7.4, stringr1.4.0, SummarizedExperiment1.22.0, survival3.2-13, svglite2.0.0, systemfonts1.0.2, tensor1.5, terra1.5-12, tibble3.1.4, tidyr1.1.4, tidyselect1.1.1, units0.7-2, utf81.2.2, uwot0.1.10, vctrs0.3.8, viridis0.6.1, viridisLite0.4.0, webshot0.5.2, withr2.4.2, wk0.6.0, xfun0.26, XML3.99-0.8, xml21.3.2, xtable1.8-4, XVector0.32.0, yaml2.2.1, zlibbioc1.38.0, zoo1.8-9 |