How to perform sequence alignment
2024-06-04
seqAlign.Rmd
Installing Rust
First you need to have an updated Rust installation. Go to this site to learn how to install Rust.
Installing rustytools
You will need to have the devtools package installed…
devtools::install_github("furlan-lab/rustytools", force=T)
Running a tool to perform sequence alignment
Loading libraries
rm(list=ls())
suppressPackageStartupMessages({
library(rustytools)
})
Fast pairwise alignment
Using rust we have implemented an excellent pairwise alignment algorith from the rust bio package written by Johannes Köster, Vadim Nazarov, and Patrick Marks. We can quickly perform a pairwise alignment from all the VDJ-B sequences from 10X with the adaptive clonoseq values obtained from a patient with suspected relapse of B-ALL. The align function allows use to score the sequence similarity. Using this approach we can easily identify the leukemic clones in the UMAP embedding.
We also see that these cells do not express CDKN2A unlike the healthy lymphod precursors, consistent with the clinical results.
library(Seurat)
library(scCustomize)
seu<-readRDS("/Users/sfurlan/Library/CloudStorage/OneDrive-SharedLibraries-FredHutchinsonCancerCenter/Furlan_Lab - General/experiments/patient_marrows/5336/cds/240416_cds.RDS")
if(grepl("^gizmo", Sys.info()["nodename"])){
ROOT_DIR2<-"/fh/fast/furlan_s/grp/data/ddata/BM_data"
} else {
ROOT_DIR2<-"/Users/sfurlan/Library/CloudStorage/OneDrive-SharedLibraries-FredHutchinsonCancerCenter/Furlan_Lab - General/datasets/Healthy_BM_greenleaf"
}
#reference dataset
seur<-readRDS(file.path(ROOT_DIR2, "230329_rnaAugmented_seurat.RDS"))
DimPlot(seu, group.by = "vmR_pred", cols = seur@misc$colors)
#DimPlot(seu, group.by = "seurat_clusters", cols= as.character(pals::polychrome()))
seu$leuk_cdr3_nt<-NA
seu$leuk_cdr3_nt[seu$seurat_clusters %in% c(11,13)]<-seu$cdr3_nt[seu$seurat_clusters %in% c(11,13)]
DimPlot(seu, group.by = "leuk_cdr3_nt")+NoLegend()
putative<-names(table(seu$leuk_cdr3_nt)[order(-table(seu$leuk_cdr3_nt))])
#from adaptive clonoseq
cloneC<-"CAGGAACACCTCCATAAGCACAGCCTACATGGAGCTGAGCAGCCTGAGATCTGAGGACACGGCCGTGTATTACTGTGCGAGAGGCCTAACCCACACCCACCCCCTACTTATTGTAGTAGTACCAGCTGCTATGACTACTGGGGCCAGGGAACC"
cloneB<-"CGCGGACAAATCCACGAGCACAGCCTACATGGAGCTGAGCAGCCTGAGATCTGAGGACACGGCCGTGTATTACTGTGCGCGGCGGACTCCGTATTACTATGGTTCGGGGAGTTATACTACTACGGTATGGACGTCTGGGGCCAAGGGACC"
cloneD <- "GATGTTGGGGTTTATTACTGCATGCAAGGTACACACTGGCCCAACCTAGTGGCAGCCCAGGG"
cloneE <- "CTGATTATTACTGTGAGACCGGGACCAAGC"
seu$cloneC_score<-sapply(seu$cdr3_nt, function(seq) align(seq, cloneC, atype = "local", verbose = F))
seu$cloneB_score<-sapply(seu$cdr3_nt, function(seq) align(seq, cloneB, atype = "local", verbose = F))
seu$cloneD_score<-sapply(seu$cdr3_nt, function(seq) align(seq, cloneD, atype = "local", verbose = F))
seu$cloneE_score<-sapply(seu$cdr3_nt, function(seq) align(seq, cloneE, atype = "local", verbose = F))
FeaturePlot(seu, features = c("cloneB_score", "cloneC_score", "cloneD_score", "cloneE_score"), order = T, keep.scale="all")
Biostrings::pairwiseAlignment(putative[1], cloneC, type="global-local")
## Global-Local PairwiseAlignmentsSingleSubject (1 of 1)
## pattern: TGTGCGAGATCAGAGAGGCCTAACCCACACCC...TTGTAGTAGTACCAGCTGCTATGACTACTGG
## subject: [74] TGTGCGAGA-------GGCCTAACCCACACCC...TTGTAGTAGTACCAGCTGCTATGACTACTGG
## score: 96.75946
Biostrings::pairwiseAlignment(putative[2], cloneB, type="global-local")
## Global-Local PairwiseAlignmentsSingleSubject (1 of 1)
## pattern: TGTGCGAGCTCAGGGGTATACCAC-----GCG...GGAGTTATACTACTACGGTATGGACGTCTGG
## subject: [50] TCTGAGGACACGGCCGTGTATTACTGTGCGCG...GGAGTTATACTACTACGGTATGGACGTCTGG
## score: 49.77611
FeaturePlot_scCustom(seu, features = "CDKN2A")
Appendix
## R version 4.3.2 (2023-10-31)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.5.2
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/Los_Angeles
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] scCustomize_2.1.1 Seurat_5.0.0 SeuratObject_5.0.1 sp_2.1-3
## [5] rustytools_0.0.1
##
## loaded via a namespace (and not attached):
## [1] RcppAnnoy_0.0.22 splines_4.3.2 later_1.3.2
## [4] bitops_1.0-7 tibble_3.2.1 polyclip_1.10-6
## [7] janitor_2.2.0 fastDummies_1.7.3 lifecycle_1.0.4
## [10] pbmcapply_1.5.1 globals_0.16.2 lattice_0.21-9
## [13] MASS_7.3-60 magrittr_2.0.3 plotly_4.10.4
## [16] sass_0.4.8 rmarkdown_2.25 jquerylib_0.1.4
## [19] yaml_2.3.8 httpuv_1.6.14 sctransform_0.4.1
## [22] spam_2.10-0 spatstat.sparse_3.0-3 reticulate_1.35.0
## [25] cowplot_1.1.3 pbapply_1.7-2 RColorBrewer_1.1-3
## [28] ade4_1.7-22 lubridate_1.9.3 abind_1.4-5
## [31] zlibbioc_1.48.0 Rtsne_0.17 GenomicRanges_1.54.1
## [34] purrr_1.0.2 BiocGenerics_0.48.1 RCurl_1.98-1.14
## [37] circlize_0.4.16 GenomeInfoDbData_1.2.11 IRanges_2.36.0
## [40] S4Vectors_0.40.2 ggrepel_0.9.5 irlba_2.3.5.1
## [43] listenv_0.9.1 spatstat.utils_3.0-4 goftest_1.2-3
## [46] RSpectra_0.16-1 spatstat.random_3.2-2 fitdistrplus_1.1-11
## [49] parallelly_1.37.0 pkgdown_2.0.7 leiden_0.4.3.1
## [52] codetools_0.2-19 tidyselect_1.2.0 shape_1.4.6.1
## [55] farver_2.1.1 UCSC.utils_1.1.0 matrixStats_1.2.0
## [58] stats4_4.3.2 spatstat.explore_3.2-6 jsonlite_1.8.8
## [61] ellipsis_0.3.2 progressr_0.14.0 ggridges_0.5.6
## [64] survival_3.5-7 systemfonts_1.0.5 tools_4.3.2
## [67] ragg_1.2.7 ica_1.0-3 Rcpp_1.0.12
## [70] glue_1.7.0 gridExtra_2.3 xfun_0.42
## [73] GenomeInfoDb_1.41.1 dplyr_1.1.4 withr_3.0.0
## [76] fastmap_1.1.1 fansi_1.0.6 digest_0.6.34
## [79] timechange_0.3.0 R6_2.5.1 mime_0.12
## [82] ggprism_1.0.4 textshaping_0.3.7 colorspace_2.1-0
## [85] scattermore_1.2 tensor_1.5 spatstat.data_3.0-4
## [88] utf8_1.2.4 tidyr_1.3.1 generics_0.1.3
## [91] data.table_1.15.0 httr_1.4.7 htmlwidgets_1.6.4
## [94] uwot_0.1.16 pkgconfig_2.0.3 gtable_0.3.4
## [97] lmtest_0.9-40 XVector_0.42.0 htmltools_0.5.7
## [100] dotCall64_1.1-1 scales_1.3.0 png_0.1-8
## [103] snakecase_0.11.1 knitr_1.45 rstudioapi_0.15.0
## [106] reshape2_1.4.4 nlme_3.1-163 cachem_1.0.8
## [109] zoo_1.8-12 GlobalOptions_0.1.2 stringr_1.5.1
## [112] KernSmooth_2.23-22 parallel_4.3.2 miniUI_0.1.1.1
## [115] vipor_0.4.7 ggrastr_1.0.2 desc_1.4.3
## [118] pillar_1.9.0 grid_4.3.2 vctrs_0.6.5
## [121] RANN_2.6.1 promises_1.2.1 xtable_1.8-4
## [124] cluster_2.1.4 beeswarm_0.4.0 paletteer_1.6.0
## [127] evaluate_0.23 cli_3.6.2 compiler_4.3.2
## [130] crayon_1.5.2 rlang_1.1.3 future.apply_1.11.1
## [133] labeling_0.4.3 rematch2_2.1.2 plyr_1.8.9
## [136] forcats_1.0.0 fs_1.6.3 ggbeeswarm_0.7.2
## [139] stringi_1.8.3 viridisLite_0.4.2 deldir_2.0-2
## [142] Biostrings_2.70.2 munsell_0.5.0 lazyeval_0.2.2
## [145] spatstat.geom_3.2-8 Matrix_1.6-5 RcppHNSW_0.6.0
## [148] patchwork_1.2.0 future_1.33.1 ggplot2_3.5.0
## [151] seqinr_4.2-36 shiny_1.8.0 highr_0.10
## [154] ROCR_1.0-11 igraph_2.0.2 memoise_2.0.1
## [157] bslib_0.6.1