Version Compatibility

Reference examples tested with: DESeq2 1.42+, Salmon 1.10+, edgeR 4.0+, kallisto 0.50+, scanpy 1.10+

Before using code patterns, verify installed versions match. If versions differ:

R: packageVersion('<pkg>') then ?function_name to verify parameters

If code throws ImportError, AttributeError, or TypeError, introspect the installed package and adapt the example to match the actual API rather than retrying.

tximport Workflow

"Import Salmon/kallisto results into DESeq2" → Summarize transcript-level abundance estimates to gene-level counts with proper length-offset correction for use in DESeq2 or edgeR.

R: tximport::tximport(files, type='salmon', tx2gene=tx2gene)

Import transcript-level estimates from Salmon, kallisto, or other quantifiers into R for gene-level differential expression analysis.

Basic tximport

Goal: Import transcript-level quantifications from Salmon or kallisto into R as gene-level counts with proper length-offset correction for DESeq2 or edgeR.

Approach: Create a transcript-to-gene mapping from a GTF or biomaRt, then run tximport on the quantification files to produce a gene-level count matrix with length-scaled TPM offsets.

library(tximport)

# Define sample files
files <- c(
    sample1 = 'sample1_quant/quant.sf',
    sample2 = 'sample2_quant/quant.sf',
    sample3 = 'sample3_quant/quant.sf'
)

# Load transcript-to-gene mapping
tx2gene <- read.csv('tx2gene.csv')  # columns: TXNAME, GENEID

# Import at gene level
txi <- tximport(files, type = 'salmon', tx2gene = tx2gene)

Creating tx2gene Mapping

From GTF (using GenomicFeatures)

library(GenomicFeatures)

txdb <- makeTxDbFromGFF('annotation.gtf')
k <- keys(txdb, keytype = 'TXNAME')
tx2gene <- select(txdb, k, 'GENEID', 'TXNAME')

From Ensembl (using biomaRt)

library(biomaRt)

mart <- useMart('ensembl', dataset = 'hsapiens_gene_ensembl')
tx2gene <- getBM(
    attributes = c('ensembl_transcript_id_version', 'ensembl_gene_id_version'),
    mart = mart
)
colnames(tx2gene) <- c('TXNAME', 'GENEID')

From Salmon quant.sf

quant <- read.table('sample1_quant/quant.sf', header = TRUE)
tx2gene <- data.frame(
    TXNAME = quant$Name,
    GENEID = gsub('\\..*', '', quant$Name)  # Remove version
)

Import Types

Gene-Level Summarization (Default)

# Summarize transcripts to gene level
txi <- tximport(files, type = 'salmon', tx2gene = tx2gene)
# Returns: counts, abundance (TPM), length at gene level

Transcript-Level (No Summarization)

# Keep transcript-level estimates
txi <- tximport(files, type = 'salmon', txOut = TRUE)
# Returns: counts, abundance, length at transcript level

Scaled TPM (for visualization)

# Gene-level TPM
txi <- tximport(files, type = 'salmon', tx2gene = tx2gene,
                countsFromAbundance = 'scaledTPM')

Source-Specific Import

Salmon

txi <- tximport(files, type = 'salmon', tx2gene = tx2gene)

kallisto

txi <- tximport(files, type = 'kallisto', tx2gene = tx2gene)

RSEM

txi <- tximport(files, type = 'rsem', tx2gene = tx2gene)

StringTie

txi <- tximport(files, type = 'stringtie', tx2gene = tx2gene)

Using with DESeq2

library(DESeq2)

# Create sample metadata
coldata <- data.frame(
    condition = factor(c('control', 'control', 'treated', 'treated')),
    row.names = names(files)
)

# Create DESeqDataSet from tximport
dds <- DESeqDataSetFromTximport(txi, colData = coldata, design = ~ condition)

# Filter low counts
dds <- dds[rowSums(counts(dds)) >= 10, ]

# Run DESeq2
dds <- DESeq(dds)
res <- results(dds)

Using with edgeR

library(edgeR)

# Create DGEList with offset
cts <- txi$counts
normMat <- txi$length
normMat <- normMat / exp(rowMeans(log(normMat)))
o <- log(calcNormFactors(cts / normMat)) + log(colSums(cts / normMat))

y <- DGEList(cts)
y$offset <- t(t(log(normMat)) + o)

# Continue with edgeR analysis
y <- estimateDisp(y, design)

tximeta: Metadata-Aware Import

tximeta automatically attaches transcript and gene information from the original annotation.

library(tximeta)

# First time: link transcriptome to annotation
makeLinkedTxome(
    indexDir = 'salmon_index',
    source = 'Ensembl',
    organism = 'Homo sapiens',
    release = '110',
    genome = 'GRCh38',
    fasta = 'transcripts.fa',
    gtf = 'annotation.gtf'
)

# Import with full metadata
coldata <- data.frame(
    files = files,
    names = names(files),
    condition = c('control', 'control', 'treated', 'treated')
)

se <- tximeta(coldata)

# Summarize to gene level
gse <- summarizeToGene(se)

# Convert to DESeqDataSet
dds <- DESeqDataSet(gse, design = ~ condition)

tximport Output Structure

names(txi)
# [1] "abundance"           "counts"              "length"
# [4] "countsFromAbundance"

# abundance: TPM values (genes x samples)
# counts: estimated counts (genes x samples)
# length: effective gene lengths (genes x samples)

Handling Version Numbers

# Remove version from transcript IDs
tx2gene$TXNAME <- gsub('\\.\\d+$', '', tx2gene$TXNAME)

# Or ignore version during import
txi <- tximport(files, type = 'salmon', tx2gene = tx2gene,
                ignoreTxVersion = TRUE, ignoreAfterBar = TRUE)

Related Skills

rna-quantification/alignment-free-quant - Upstream Salmon/kallisto
differential-expression/deseq2-basics - DESeq2 analysis
differential-expression/edger-basics - edgeR analysis
genome-intervals/gtf-gff-handling - GTF annotation parsing

bio-rna-quantification-tximport-workflowSafety 95Repository

Package Files

Version Compatibility

tximport Workflow

Basic tximport

Creating tx2gene Mapping

From GTF (using GenomicFeatures)

From Ensembl (using biomaRt)

From Salmon quant.sf

Import Types

Gene-Level Summarization (Default)

Transcript-Level (No Summarization)

Scaled TPM (for visualization)

Source-Specific Import

Salmon

kallisto

RSEM

StringTie

Using with DESeq2

Using with edgeR

tximeta: Metadata-Aware Import

tximport Output Structure

Handling Version Numbers

Related Skills

Install

AI Quality Score

Metadata

Tags

bio-rna-quantification-tximport-workflowSafety 95Repository ShareFavorite skill

Package Files

Version Compatibility

tximport Workflow

Basic tximport

Creating tx2gene Mapping

From GTF (using GenomicFeatures)

From Ensembl (using biomaRt)

From Salmon quant.sf

Import Types

Gene-Level Summarization (Default)

Transcript-Level (No Summarization)

Scaled TPM (for visualization)

Source-Specific Import

Salmon

kallisto

RSEM

StringTie

Using with DESeq2

Using with edgeR

tximeta: Metadata-Aware Import

tximport Output Structure

Handling Version Numbers

Related Skills

Install

AI Quality Score

Metadata

Tags

bio-rna-quantification-tximport-workflowSafety 95Repository