Skip to contents

Overview

This tutorial walks through a minimal, end-to-end workflow for differential expression (DE) with devil on a public scRNA-seq dataset. You will: (1) load data, (2) filter cells/genes, (3) build a design, (4) fit the model, (5) specify contrasts, and (6) visualize results.

If your study has multiple patients/donors, devil can compute clustered (patient-aware) standard errors via a cluster argument.

Prerequisites

# If needed:
# install.packages("BiocManager")
# BiocManager::install(c("scRNAseq","SingleCellExperiment"))

library(devil)
library(scRNAseq)
library(SingleCellExperiment)
library(SummarizedExperiment)
library(Matrix)
library(dplyr)

Load and inspect data

We’ll use the Baron pancreas dataset from scRNAseq.

sce <- scRNAseq::BaronPancreasData()  # SingleCellExperiment
sce
#> class: SingleCellExperiment 
#> dim: 20125 8569 
#> metadata(0):
#> assays(1): counts
#> rownames(20125): A1BG A1CF ... ZZZ3 pk
#> rowData names(0):
#> colnames(8569): human1_lib1.final_cell_0001 human1_lib1.final_cell_0002
#>   ... human4_lib3.final_cell_0700 human4_lib3.final_cell_0701
#> colData names(2): donor label
#> reducedDimNames(0):
#> mainExpName: NULL
#> altExpNames(0):

Extract counts and metadata using accessors:

counts <- SummarizedExperiment::assay(sce, "counts")
meta   <- as.data.frame(SummarizedExperiment::colData(sce))

cat("Genes:", nrow(counts), "\nCells:", ncol(counts), "\n")
#> Genes: 20125 
#> Cells: 8569
stopifnot("label" %in% colnames(meta))
head(meta$label)
#> [1] "acinar" "acinar" "acinar" "acinar" "acinar" "acinar"

Tip: If you have a patient/donor column (often donor or patient), keep it, we’ll optionally pass it to cluste= later.

Light filtering

Keep the three most abundant cell types; filter lowly expressed genes.

# keep 3 largest cell types
top3 <- names(sort(table(meta$label), decreasing = TRUE))[1:3]
keep_cells <- meta$label %in% top3
counts <- counts[, keep_cells, drop = FALSE]
meta   <- meta[ keep_cells, , drop = FALSE]

# gene filter: expressed (>=1 UMI) in >= 1% of kept cells
min_cells <- max(1, floor(0.01 * ncol(counts)))
keep_genes <- Matrix::rowSums(counts >= 1) >= min_cells
counts <- counts[keep_genes, , drop = FALSE]

cat("After filtering — Genes:", nrow(counts), "Cells:", ncol(counts), "\n")
#> After filtering — Genes: 11951 Cells: 5928
table(meta$label)
#> 
#>  alpha   beta ductal 
#>   2326   2525   1077

Optionally restrict to highly expressed genes for a faster demo (skip for real analyses):

# demo mode: top 3000 genes by total counts
if (nrow(counts) > 3000) {
  ord <- order(Matrix::rowSums(counts), decreasing = TRUE)
  counts <- counts[ord[seq_len(3000)], ]
}

Design matrix

Build a no-intercept design so each coefficient corresponds to a cell type.

meta$label <- droplevels(factor(meta$label))
design <- model.matrix(~ 0 + label, data = meta)
colnames(design) <- gsub("^label", "", colnames(design))
colnames(design)
#> [1] "alpha"  "beta"   "ductal"

(Optional) Cluster variable for patient-aware SEs, if available:

cluster <- NULL
if ("donor" %in% names(meta))   cluster <- factor(meta$donor)
if (is.null(cluster) && "patient" %in% names(meta)) cluster <- factor(meta$patient)

Fit the model

fit_devil() expects a counts matrix (genes × cells), a design (cells × covariates). The parameters size_factors=TRUE computes internally a size factor that will scale expression based on the library size of each cell.

fit <- devil::fit_devil(
  input_matrix        = as.matrix(counts),
  design_matrix       = design,
  overdispersion      = TRUE,
  offset              = 1e-6,
  size_factors        = TRUE,
  parallel.cores      = 1,
  verbose             = TRUE
)

Specify contrasts

With a no-intercept design, each column is a cell-type mean on the log scale.
To test “beta vs ductal”, define the contrast (+1 * beta) + (-1 * ductal) and zero elsewhere.

make_contrast <- function(design, from, to) {
  stopifnot(from %in% colnames(design), to %in% colnames(design))
  c <- rep(0, ncol(design)); names(c) <- colnames(design)
  c[from] <-  1
  c[to]   <- -1
  as.numeric(c)
}

contrast <- make_contrast(design, from = "beta", to = "ductal")
contrast
#> [1]  0  1 -1

If your labels differ, update from/to accordingly—use colnames(design) to see available levels.

Test for differential expression

Run a Wald test with optional clustered SEs if cluster exists.

test <- devil::test_de(
  fit,
  contrast = contrast,
  max_lfc  = 20,      # Cap extreme fold changes
  cluster  = cluster  # NULL if not present; enables patient-aware SE if provided
)

# Add gene names if missing
if (!("name" %in% colnames(test))) {
  if (!is.null(rownames(counts))) {
    test$name <- rownames(counts)
  } else {
    test$name <- as.character(seq_len(nrow(test)))
  }
}

Quick peek at the top hits:

test %>%
  arrange(adj_pval, desc(abs(lfc))) %>%
  select(name, lfc, pval, adj_pval) %>%
  head(10)
#> # A tibble: 10 × 4
#>    name     lfc      pval  adj_pval
#>    <chr>  <dbl>     <dbl>     <dbl>
#>  1 CCL2   -9.47 0         0        
#>  2 TFPI2  -7.66 0         0        
#>  3 G6PC2   5.94 0         0        
#>  4 CPE     4.59 0         0        
#>  5 RBM47  -1.44 0         0        
#>  6 PGLS   -1.34 0         0        
#>  7 SSNA1  -1.11 0         0        
#>  8 DAZAP2 -1.32 1.98e-323 7.41e-321
#>  9 SPINT1 -3.12 3.13e-314 1.04e-311
#> 10 GJD2    6.10 1.24e-307 3.72e-305

Visualize results

devil::plot_volcano(
  test,
  lfc_cut    = 1,
  pval_cut   = 0.05,
  labels     = TRUE,
  point_size = 1.8, 
  title = "beta vs ductal"
)
Volcano plot of DE genes

Volcano plot of DE genes

Notes & tips

  • Designs with covariates. No-intercept designs (~ 0 + label) make contrasts more intuitive but aren’t always necessary. You can obviously add more covariates (e.g., age, sex). Keep cluster = donor for valid patient-level uncertainty.
  • Multiple testing Use adj_pval (FDR-corrected) rather than raw p-values for significance calls.
  • Speed. For large datasets, run on GPU if available.
  • Interpretation. lfc is on the log2 scale. lfc = 1 means 2-fold higher expression.

Session info

sessionInfo()
#> R version 4.5.1 (2025-06-13)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.2 LTS
#> 
#> Matrix products: default
#> BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so;  LAPACK version 3.12.0
#> 
#> locale:
#>  [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8       
#>  [4] LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8   
#>  [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C          
#> [10] LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C   
#> 
#> time zone: UTC
#> tzcode source: system (glibc)
#> 
#> attached base packages:
#> [1] stats4    stats     graphics  grDevices utils     datasets  methods  
#> [8] base     
#> 
#> other attached packages:
#>  [1] dplyr_1.1.4                 Matrix_1.7-3               
#>  [3] scRNAseq_2.22.0             SingleCellExperiment_1.30.1
#>  [5] SummarizedExperiment_1.38.1 Biobase_2.68.0             
#>  [7] GenomicRanges_1.60.0        GenomeInfoDb_1.44.1        
#>  [9] IRanges_2.42.0              S4Vectors_0.46.0           
#> [11] BiocGenerics_0.54.0         generics_0.1.4             
#> [13] MatrixGenerics_1.20.0       matrixStats_1.5.0          
#> [15] devil_0.1.0                
#> 
#> loaded via a namespace (and not attached):
#>   [1] DBI_1.2.3                 bitops_1.0-9             
#>   [3] httr2_1.2.1               rlang_1.1.6              
#>   [5] magrittr_2.0.3            gypsum_1.4.0             
#>   [7] compiler_4.5.1            RSQLite_2.4.2            
#>   [9] DelayedMatrixStats_1.30.0 GenomicFeatures_1.60.0   
#>  [11] png_0.1-8                 systemfonts_1.2.3        
#>  [13] vctrs_0.6.5               ProtGenerics_1.40.0      
#>  [15] pkgconfig_2.0.3           crayon_1.5.3             
#>  [17] fastmap_1.2.0             dbplyr_2.5.0             
#>  [19] XVector_0.48.0            labeling_0.4.3           
#>  [21] utf8_1.2.6                Rsamtools_2.24.0         
#>  [23] rmarkdown_2.29            UCSC.utils_1.4.0         
#>  [25] ragg_1.4.0                bit_4.6.0                
#>  [27] xfun_0.53                 cachem_1.1.0             
#>  [29] jsonlite_2.0.0            blob_1.2.4               
#>  [31] rhdf5filters_1.20.0       DelayedArray_0.34.1      
#>  [33] Rhdf5lib_1.30.0           BiocParallel_1.42.1      
#>  [35] parallel_4.5.1            R6_2.6.1                 
#>  [37] RColorBrewer_1.1-3        bslib_0.9.0              
#>  [39] rtracklayer_1.68.0        jquerylib_0.1.4          
#>  [41] Rcpp_1.1.0                knitr_1.50               
#>  [43] tidyselect_1.2.1          abind_1.4-8              
#>  [45] yaml_2.3.10               codetools_0.2-20         
#>  [47] curl_6.4.0                lattice_0.22-7           
#>  [49] alabaster.sce_1.8.0       tibble_3.3.0             
#>  [51] withr_3.0.2               KEGGREST_1.48.1          
#>  [53] evaluate_1.0.4            desc_1.4.3               
#>  [55] BiocFileCache_2.16.1      alabaster.schemas_1.8.0  
#>  [57] ExperimentHub_2.16.1      Biostrings_2.76.0        
#>  [59] pillar_1.11.0             BiocManager_1.30.26      
#>  [61] filelock_1.0.3            RCurl_1.98-1.17          
#>  [63] ggplot2_3.5.2             BiocVersion_3.21.1       
#>  [65] ensembldb_2.32.0          scales_1.4.0             
#>  [67] sparseMatrixStats_1.20.0  alabaster.base_1.8.1     
#>  [69] alabaster.ranges_1.8.0    glue_1.8.0               
#>  [71] alabaster.matrix_1.8.0    lazyeval_0.2.2           
#>  [73] tools_4.5.1               AnnotationHub_3.16.1     
#>  [75] BiocIO_1.18.0             GenomicAlignments_1.44.0 
#>  [77] fs_1.6.6                  XML_3.99-0.18            
#>  [79] rhdf5_2.52.1              grid_4.5.1               
#>  [81] AnnotationDbi_1.70.0      GenomeInfoDbData_1.2.14  
#>  [83] HDF5Array_1.36.0          restfulr_0.0.16          
#>  [85] cli_3.6.5                 rappdirs_0.3.3           
#>  [87] textshaping_1.0.1         S4Arrays_1.8.1           
#>  [89] AnnotationFilter_1.32.0   gtable_0.3.6             
#>  [91] alabaster.se_1.8.0        sass_0.4.10              
#>  [93] digest_0.6.37             SparseArray_1.8.1        
#>  [95] farver_2.1.2              rjson_0.2.23             
#>  [97] memoise_2.0.1             htmltools_0.5.8.1        
#>  [99] pkgdown_2.1.3             lifecycle_1.0.4          
#> [101] h5mread_1.0.1             httr_1.4.7               
#> [103] bit64_4.6.0-1