Skip to contents

Motivation

This is a highly opinionated example of how to perform RNA-Seq pre-processing and quantification focusing on estimation o transcript abundance.

For this workflow we are going to use the condathis R package. This package allow you to run command line tools like Salmon (Patro et al. 2017).

All those tools can be run from the command line directly.

Note for MacOS users

As of 2024-06-09, Bioconda does not support the Arm64 architecture of the Apple Silicon CPUs.

One way of bypassing that is using arguments platform = "osx-64" for the condathis::create_env(), to create the environment leveraging Rosetta 2 support. This option can also be added to the CLI command adding --platform osx-64 to the conda create command.

Note for Windows users

As of 2024-06-09 Bioconda does not support Windows native installations. Therefore this vignette can only be run, using a VM or container. The best approach would be running under the Windows Subsystem for Linux, if you have it available.

Install Salmon with {condathis}

# pak::pkg_install("local::~/projects/condathis")
if (!rlang::is_installed("condathis")) {
  pak::pkg_install("github::luciorq/condathis")
}
library("condathis")
if (!condathis::env_exists(env_name = "salmon-env")) {
  if (fs::dir_exists(fs::path(condathis::get_install_dir(),"envs", "salmon-env"))) {
    fs::dir_delete(fs::path(condathis::get_install_dir(),"envs", "salmon-env"))
  }
}
# Workaround for ARM CPU based MacOS
if (isTRUE(condathis::get_sys_arch())) {
  platform_var <- "osx-64"
} else {
  platform_var <- NULL
}

condathis::create_env(
  packages = "salmon=1.10.3",
  env_name = "salmon-env",
  method = "native",
  platform = platform_var
)

Check Salmon version:

condathis::run(
  "salmon", "--version",
  env_name = "salmon-env"
)
salmon 1.10.3

This is equivalent to running in the CLI.

# if MacOS with arm64 CPU add `--platform osx-64`
conda create -n salmon-env \
  -c bioconda -c conda-forge -c defaults \
  salmon;

Running Salmon with {condathis}

Salmon index

For the CLI command.

Salmon index command:

salmon index \
  --transcripts <TRANSCRIPTME_FASTA> \
  --index <SALMON_INDEX_DIR> \
  --kmerLen 15 \
  --threads 4 \
  --keepDuplicates;
base_dir <- fs::path_temp("isoformic")
reference_version <- "46"
download_reference(
  version = reference_version,
  reference = "gencode",
  file_type = "fasta",
  organism = "human",
  output_path = base_dir
)

Using condathis

txome_fasta_path <- fs::path(base_dir, paste0("gencode.v", reference_version, ".transcripts.fa.gz"))
salmon_index_path <- fs::path(base_dir, paste0("salmon_index_gencode_v", reference_version))
if (!fs::dir_exists(salmon_index_path) {
  fs::dir_create(salmon_index_path)
}
num_threads <- 4
condathis::run(
  "salmon", "index",
  "--transcripts", txome_fasta_path,
  "--index", salmon_index_path,
  "--kmerLen", 15,
  "--threads", num_threads,
  "--keepDuplicates",
  env_name = "salmon-env"
)

Salmon Quant

Salmon quant command:

salmon \
  quant \
  --libType A \
  --index <SALMON_INDEX_PATH> \
  --mates1 <FASTQ_R1> \
  --mates2 <FASTQ_R2> \
  --output <SALMON_OUTPUT_PATH> \
  --threads {threads} \
  --softclip \
  --softclipOverhangs \
  --disableChainingHeuristic \
  --dumpEq \
  --dumpEqWeights \
  --posBias \
  --seqBias \
  --gcBias \
  --useVBOpt \
  --rangeFactorizationBins 8 \
  --thinningFactor 100 \
  --validateMappings \
  --writeMappings={output.quant_dir}/txome_align.sam \
  --minScoreFraction 0.65 \
  --numGibbsSamples 100 2>&1 | tee -a {log};

Using condathis

salmon_quant_path <- fs::path(base_dir, "salmon_quant")
if (!fs::dir_exists(salmon_quant_path)) {
  fs::dir_create(salmon_quant_path)
}
reads_path <- c(
  fs::path("data-raw/sample_R1.fastq.gz"),
  fs::path("data-raw/sample_R2.fastq.gz")
)

condathis::run(
  "salmon", "quant",
  "--libType", "A",
  "--index", salmon_index_path,
  "--mates1", reads_path[1],
  "--mates2", reads_path[2],
  "--output", salmon_quant_path,
  "--numGibbsSamples", 20,
  "--posBias",
  "--seqBias",
  "--gcBias",
  "--threads", num_threads,
  "--softclip",
  "--softclipOverhangs",
  "--disableChainingHeuristic",
  "--dumpEq",
  "--useVBOpt",
  "--validateMappings",
  "--minAssignedFrags", 1,
  "--minScoreFraction", "0.65",
  env_name = "salmon-env"
)

Session Information

sessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#>  setting  value
#>  version  R version 4.4.0 (2024-04-24)
#>  os       macOS Sonoma 14.1.2
#>  system   aarch64, darwin20
#>  ui       X11
#>  language en
#>  collate  en_US.UTF-8
#>  ctype    en_US.UTF-8
#>  tz       America/New_York
#>  date     2024-06-11
#>  pandoc   3.1.13 @ /opt/homebrew/bin/ (via rmarkdown)
#> 
#> ─ Packages ───────────────────────────────────────────────────────────────────
#>  package     * version    date (UTC) lib source
#>  bslib         0.7.0      2024-03-29 [2] RSPM
#>  cachem        1.1.0      2024-05-16 [2] RSPM
#>  cli           3.6.2      2023-12-11 [2] RSPM
#>  desc          1.4.3      2023-12-10 [2] RSPM
#>  digest        0.6.35     2024-03-11 [2] RSPM
#>  evaluate      0.23       2023-11-01 [2] RSPM
#>  fastmap       1.2.0      2024-05-15 [2] RSPM
#>  fs            1.6.4      2024-04-25 [2] RSPM
#>  htmltools     0.5.8.1    2024-04-04 [2] RSPM
#>  htmlwidgets   1.6.4      2023-12-06 [2] RSPM
#>  isoformic   * 0.1.0.9006 2024-06-11 [1] local
#>  jquerylib     0.1.4      2021-04-26 [2] RSPM
#>  jsonlite      1.8.8      2023-12-04 [2] RSPM
#>  knitr         1.47       2024-05-29 [2] CRAN (R 4.4.0)
#>  lifecycle     1.0.4      2023-11-07 [2] RSPM
#>  magrittr      2.0.3      2022-03-30 [2] RSPM
#>  memoise       2.0.1      2021-11-26 [2] RSPM
#>  pkgdown       2.0.9      2024-04-18 [2] RSPM
#>  purrr         1.0.2      2023-08-10 [2] RSPM
#>  R6            2.5.1      2021-08-19 [2] RSPM
#>  ragg          1.3.2      2024-05-15 [2] RSPM
#>  rlang         1.1.3      2024-01-10 [2] RSPM
#>  rmarkdown     2.27       2024-05-17 [2] RSPM
#>  sass          0.4.9      2024-03-15 [2] RSPM
#>  sessioninfo   1.2.2      2021-12-06 [2] RSPM
#>  systemfonts   1.1.0      2024-05-15 [2] RSPM
#>  textshaping   0.4.0      2024-05-24 [2] RSPM
#>  vctrs         0.6.5      2023-12-01 [2] RSPM
#>  xfun          0.44       2024-05-15 [2] RSPM
#>  yaml          2.3.8      2023-12-11 [2] RSPM
#> 
#>  [1] /private/var/folders/2q/937_bkg10svdwx1x00prs9nm0000gn/T/Rtmp28DMZh/temp_libpath2f624fd5eade
#>  [2] /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library
#> 
#> ──────────────────────────────────────────────────────────────────────────────

References

Patro, R, G Duggal, MI Love, RA Irizarry, and C Kingsford. 2017. “Salmon Provides Fast and Bias-Aware Quantification of Transcript Expression.” Nature Methods 14: 417–19. https://doi.org/10.1038/nmeth.4197.