Skip to contents
library(lineaGT)
#> Warning: replacing previous import 'cli::num_ansi_colors' by
#> 'crayon::num_ansi_colors' when loading 'VIBER'
#> Warning: replacing previous import 'cli::num_ansi_colors' by
#> 'crayon::num_ansi_colors' when loading 'easypar'
#>  Loading ctree, 'Clone trees in cancer'. Support : <https://caravagn.github.io/ctree/>
#> Warning: replacing previous import 'crayon::%+%' by 'ggplot2::%+%' when loading
#> 'VIBER'
#>  Loading VIBER, 'Variational inference for multivariate Binomial mixtures'. Support : <https://caravagn.github.io/VIBER/>
#>  Loading lineaGT, 'Lineage inference from gene therapy'. Support : <https://caravagnalab.github.io/lineaGT/>
#> ! The 'lineagt-env' environment is already loaded!
library(magrittr)

The coverage dataset can be filtered calling the filter_dataset() function.

data(cov.df.example)
data(vaf.df.example)
cov.example.filt = cov.df.example %>%
  filter_dataset(min_cov=5, min_frac=0.05)
#>  Filtering the input dataset with minimum coverage 5 and minimum clusters frac…
#>  Filtering the input dataset with minimum coverage 5 and minimum clusters frac…
#> 

cov.example.filt
#> # A tibble: 264 × 4
#>    IS    timepoints lineage coverage
#>    <chr> <chr>      <chr>      <int>
#>  1 IS100 t1         l1             0
#>  2 IS100 t2         l1           418
#>  3 IS100 t1         l2             0
#>  4 IS100 t2         l2            74
#>  5 IS101 t1         l1           502
#>  6 IS101 t2         l1           186
#>  7 IS101 t1         l2            62
#>  8 IS101 t2         l2           640
#>  9 IS11  t1         l1           128
#> 10 IS11  t2         l1           196
#> # ℹ 254 more rows

Fitting the model

x = fit(
  cov.df = cov.example.filt,
  vaf.df = vaf.df.example,
  steps = 500,
  # n_runs = 1,
  k_interval = c(5, 15),
  timepoints_to_int = unlist(list("t1"=60, "t2"=150))
  )
#> 
[36mℹ
[39m Starting lineaGT model selection to retrieve the optimal number of clones
#> 
[32m✔
[39m Starting lineaGT model selection to retrieve the optimal number of clones ...…
#> 
#> 
[36mℹ
[39m Fitting model to cluster ISs
#> 
[32m✔
[39m Found 8 clones of ISs!
#> 
#> 
[36mℹ
[39m Fitting model to cluster mutations
#> 
[36mℹ
[39m Starting clustering of clone C0 mutations
#> 
[44m
[37m [ VIBER - variational fit ] 
#> 
[39m
[49m
#> 
[36mℹ
[39m Input n = 3, with k < 3. Dirichlet concentration 
[32m
[32mα = 1e-06
[32m
[39m.
#> 
[36mℹ
[39m Starting clustering of clone C0 mutations

[36mℹ
[39m Beta (a_0, b_0) = (1, 1); q_i = prior. Optimise: ε = 1e-10 or 5000 steps, r = 10 starts.
#> 
[36mℹ
[39m Starting clustering of clone C0 mutations
#> 
[32m✔
[39m 
[1mVIBER fit
[22m completed in 0.03 mins (status: 
[32mconverged
[39m)
#> 
[36mℹ
[39m Starting clustering of clone C0 mutations
#> ── 
[30m
[43m[ VIBER ] My VIBER model
[49m
[39m n = 3 (w = 4 dimensions). Fit with 
[32mk = 3
[39m clusters. ─
#> 
[36mℹ
[39m Starting clustering of clone C0 mutations
• Clusters: π = 67% [
[33mC1
[39m] and 33% [
[33mC2
[39m], with π > 0.
#> 
[36mℹ
[39m Starting clustering of clone C0 mutations
• Binomials: θ = <0.09, 0.19, 0.01, 0> [
[33mC1
[39m] and <0.01, 0.36, 0.03, 0.4> [
[33mC2
[39m].
#> 
[36mℹ
[39m Starting clustering of clone C0 mutations

[36mℹ
[39m Score(s): ELBO = -1461.846. Fit 
[32mconverged
[39m in 6 steps, ε = 1e-10.
#> 
[36mℹ
[39m Starting clustering of clone C0 mutations

[32m✔
[39m Reduced to k = 2 (from 3) selecting VIBER cluster(s) with π > 0.166666666666667, and Binomial p > 0 in w > 0 dimension(s).
#> 
[36mℹ
[39m Starting clustering of clone C0 mutations

[32m✔
[39m Starting clustering of clone C0 mutations ... done
#> 
#> 
[36mℹ
[39m Fitting model to cluster mutations

[36mℹ
[39m Starting phylogeny inference of clone C0
#> 
[44m
[37m [ ctree ~ clone trees generator for C0 ] 
#> 
[39m
[49m
#> 
[38;5;246m# A tibble: 3 × 8
[39m
#>   cluster   t1.l1 t2.l1  t1.l2 t2.l2 nMuts is.clonal is.driver
#>   
[3m
[38;5;246m<chr>
[39m
[23m     
[3m
[38;5;246m<dbl>
[39m
[23m 
[3m
[38;5;246m<dbl>
[39m
[23m  
[3m
[38;5;246m<dbl>
[39m
[23m 
[3m
[38;5;246m<dbl>
[39m
[23m 
[3m
[38;5;246m<dbl>
[39m
[23m 
[3m
[38;5;246m<lgl>
[39m
[23m     
[3m
[38;5;246m<lgl>
[39m
[23m    
#> 
[38;5;250m1
[39m S1      0.085
[4m6
[24m  0.190 0      0         2 FALSE     FALSE    
#> 
[38;5;250m2
[39m S2      0.006
[4m8
[24m
[4m6
[24m 0.362 0.028
[4m6
[24m 0.396     1 FALSE     TRUE     
#> 
[38;5;250m3
[39m C0      1       1     1      1         1 TRUE      FALSE
#> 
[32m✔
[39m Trees per region 
[32m
[32m1, 2, 1, 1
[32m
[39m
#> 
[36mℹ
[39m Starting phylogeny inference of clone C0

[36mℹ
[39m Total 
[32m
[32m2
[32m
[39m tree structures - search is 
[1mexahustive
[22m
#> 
[36mℹ
[39m Starting phylogeny inference of clone C0
#> 
[36mℹ
[39m Starting phylogeny inference of clone C0
── Ranking trees 
#> 
[36mℹ
[39m Starting phylogeny inference of clone C0

[32m✔
[39m 
[32m
[32m2
[32m
[39m  trees with non-zero score, storing 
[1m2
[22m
#> 
[36mℹ
[39m Starting phylogeny inference of clone C0

[32m✔
[39m Starting phylogeny inference of clone C0 ... done
#> 
#> 
[36mℹ
[39m Fitting model to cluster mutations

[36mℹ
[39m Starting clustering of clone C1 mutations
#> 
[44m
[37m [ VIBER - variational fit ] 
#> 
[39m
[49m
#> 
[36mℹ
[39m Input n = 8, with k < 8. Dirichlet concentration 
[32m
[32mα = 1e-06
[32m
[39m.
#> 
[36mℹ
[39m Starting clustering of clone C1 mutations

[36mℹ
[39m Beta (a_0, b_0) = (1, 1); q_i = prior. Optimise: ε = 1e-10 or 5000 steps, r = 10 starts.
#> 
[36mℹ
[39m Starting clustering of clone C1 mutations
#> 
[32m✔
[39m 
[1mVIBER fit
[22m completed in 0.04 mins (status: 
[32mconverged
[39m)
#> 
[36mℹ
[39m Starting clustering of clone C1 mutations
#> ── 
[30m
[43m[ VIBER ] My VIBER model
[49m
[39m n = 8 (w = 4 dimensions). Fit with 
[32mk = 8
[39m clusters. ─
#> 
[36mℹ
[39m Starting clustering of clone C1 mutations
• Clusters: π = 50% [
[33mC3
[39m], 25% [
[33mC8
[39m], 13% [
[33mC6
[39m], and 13% [
[33mC7
[39m], with π > 0.
#> 
[36mℹ
[39m Starting clustering of clone C1 mutations
• Binomials: θ = <0, 0.1, 0.01, 0.1> [
[33mC3
[39m], <0.18, 0, 0.01, 0> [
[33mC8
[39m], <0.2, 0.01,
#> 0.02, 0.23> [
[33mC6
[39m], and <0.43, 0.01, 0.02, 0.32> [
[33mC7
[39m].
#> 
[36mℹ
[39m Starting clustering of clone C1 mutations

[36mℹ
[39m Score(s): ELBO = -4630.119. Fit 
[32mconverged
[39m in 9 steps, ε = 1e-10.
#> 
[36mℹ
[39m Starting clustering of clone C1 mutations

[32m✔
[39m Reduced to k = 4 (from 8) selecting VIBER cluster(s) with π > 0.0625, and Binomial p > 0 in w > 0 dimension(s).
#> 
[36mℹ
[39m Starting clustering of clone C1 mutations

[32m✔
[39m Starting clustering of clone C1 mutations ... done
#> 
#> 
[36mℹ
[39m Fitting model to cluster mutations

[36mℹ
[39m Starting phylogeny inference of clone C1
#> 
[44m
[37m [ ctree ~ clone trees generator for C1 ] 
#> 
[39m
[49m
#> 
[38;5;246m# A tibble: 5 × 8
[39m
#>   cluster    t1.l1   t2.l1   t1.l2  t2.l2 nMuts is.clonal is.driver
#>   
[3m
[38;5;246m<chr>
[39m
[23m      
[3m
[38;5;246m<dbl>
[39m
[23m   
[3m
[38;5;246m<dbl>
[39m
[23m   
[3m
[38;5;246m<dbl>
[39m
[23m  
[3m
[38;5;246m<dbl>
[39m
[23m 
[3m
[38;5;246m<dbl>
[39m
[23m 
[3m
[38;5;246m<lgl>
[39m
[23m     
[3m
[38;5;246m<lgl>
[39m
[23m    
#> 
[38;5;250m1
[39m S1      0.000
[4m8
[24m
[4m0
[24m
[4m8
[24m 0.102   0.006
[4m1
[24m
[4m9
[24m 0.098
[4m9
[24m     4 FALSE     TRUE     
#> 
[38;5;250m2
[39m S2      0.198    0.005
[4m5
[24m
[4m7
[24m 0.017
[4m0
[24m  0.225      1 FALSE     FALSE    
#> 
[38;5;250m3
[39m S3      0.430    0.005
[4m6
[24m
[4m0
[24m 0.017
[4m1
[24m  0.318      1 FALSE     FALSE    
#> 
[38;5;250m4
[39m S4      0.184    0.002
[4m8
[24m
[4m1
[24m 0       0          2 FALSE     FALSE    
#> 
[38;5;250m5
[39m C1      1        1       1       1          1 TRUE      FALSE
#> 
[32m✔
[39m Trees per region 
[32m
[32m6, 1, 2, 5
[32m
[39m
#> 
[36mℹ
[39m Starting phylogeny inference of clone C1

[36mℹ
[39m Total 
[32m
[32m18
[32m
[39m tree structures - search is 
[1mexahustive
[22m
#> 
[36mℹ
[39m Starting phylogeny inference of clone C1
#> 
[36mℹ
[39m Starting phylogeny inference of clone C1
── Ranking trees 
#> 
[36mℹ
[39m Starting phylogeny inference of clone C1

[32m✔
[39m 
[32m
[32m18
[32m
[39m  trees with non-zero score, storing 
[1m18
[22m
#> 
[36mℹ
[39m Starting phylogeny inference of clone C1

[32m✔
[39m Starting phylogeny inference of clone C1 ... done
#> 
#> 
[36mℹ
[39m Fitting model to cluster mutations

[36mℹ
[39m Starting clustering of clone C4 mutations
#> 
[44m
[37m [ VIBER - variational fit ] 
#> 
[39m
[49m
#> 
[36mℹ
[39m Input n = 6, with k < 6. Dirichlet concentration 
[32m
[32mα = 1e-06
[32m
[39m.
#> 
[36mℹ
[39m Starting clustering of clone C4 mutations

[36mℹ
[39m Beta (a_0, b_0) = (1, 1); q_i = prior. Optimise: ε = 1e-10 or 5000 steps, r = 10 starts.
#> 
[36mℹ
[39m Starting clustering of clone C4 mutations
#> 
[32m✔
[39m 
[1mVIBER fit
[22m completed in 0.04 mins (status: 
[32mconverged
[39m)
#> 
[36mℹ
[39m Starting clustering of clone C4 mutations
#> ── 
[30m
[43m[ VIBER ] My VIBER model
[49m
[39m n = 6 (w = 4 dimensions). Fit with 
[32mk = 6
[39m clusters. ─
#> 
[36mℹ
[39m Starting clustering of clone C4 mutations
• Clusters: π = 33% [
[33mC3
[39m], 17% [
[33mC1
[39m], 17% [
[33mC4
[39m], 17% [
[33mC5
[39m], and 17% [
[33mC6
[39m], with π >
#> 0.
#> 
[36mℹ
[39m Starting clustering of clone C4 mutations
• Binomials: θ = <0.22, 0, 0.01, 0> [
[33mC3
[39m], <0.36, 0, 0.02, 0.28> [
[33mC1
[39m], <0, 0,
#> 0.02, 0.29> [
[33mC4
[39m], <0.19, 0.22, 0.3, 0.2> [
[33mC5
[39m], and <0, 0, 0.02, 0> [
[33mC6
[39m].
#> 
[36mℹ
[39m Starting clustering of clone C4 mutations

[36mℹ
[39m Score(s): ELBO = -3580.958. Fit 
[32mconverged
[39m in 6 steps, ε = 1e-10.
#> 
[36mℹ
[39m Starting clustering of clone C4 mutations

[32m✔
[39m Reduced to k = 5 (from 6) selecting VIBER cluster(s) with π > 0.0833333333333333, and Binomial p > 0 in w > 0 dimension(s).
#> 
[36mℹ
[39m Starting clustering of clone C4 mutations

[32m✔
[39m Starting clustering of clone C4 mutations ... done
#> 
#> 
[36mℹ
[39m Fitting model to cluster mutations

[36mℹ
[39m Starting phylogeny inference of clone C4
#> 
[44m
[37m [ ctree ~ clone trees generator for C4 ] 
#> 
[39m
[49m
#> 
[38;5;246m# A tibble: 6 × 8
[39m
#>   cluster t1.l1   t2.l1  t1.l2 t2.l2 nMuts is.clonal is.driver
#>   
[3m
[38;5;246m<chr>
[39m
[23m   
[3m
[38;5;246m<dbl>
[39m
[23m   
[3m
[38;5;246m<dbl>
[39m
[23m  
[3m
[38;5;246m<dbl>
[39m
[23m 
[3m
[38;5;246m<dbl>
[39m
[23m 
[3m
[38;5;246m<dbl>
[39m
[23m 
[3m
[38;5;246m<lgl>
[39m
[23m     
[3m
[38;5;246m<lgl>
[39m
[23m    
#> 
[38;5;250m1
[39m S1      0.358 0.004
[4m7
[24m
[4m3
[24m 0.017
[4m0
[24m 0.275     1 FALSE     FALSE    
#> 
[38;5;250m2
[39m S2      0.219 0.002
[4m3
[24m
[4m9
[24m 0      0         2 FALSE     FALSE    
#> 
[38;5;250m3
[39m S3      0     0       0.018
[4m6
[24m 0.292     1 FALSE     FALSE    
#> 
[38;5;250m4
[39m S4      0.192 0.222   0.296  0.198     1 FALSE     FALSE    
#> 
[38;5;250m5
[39m S5      0     0       0      0         1 FALSE     TRUE     
#> 
[38;5;250m6
[39m C4      1     1       1      1         1 TRUE      FALSE
#> 
[32m✔
[39m Trees per region 
[32m
[32m5, 1, 6, 5
[32m
[39m
#> 
[36mℹ
[39m Starting phylogeny inference of clone C4

[36mℹ
[39m Total 
[32m
[32m48
[32m
[39m tree structures - search is 
[1mexahustive
[22m
#> 
[36mℹ
[39m Starting phylogeny inference of clone C4

[31m✖
[39m Starting phylogeny inference of clone C4 ... failed
#> 
#> 
[36mℹ
[39m Fitting model to cluster mutations
#> <subscriptOutOfBoundsError in model[var, ]: subscript out of bounds>
#> 
[36mℹ
[39m Starting clustering of clone C7 mutations
#> 
[44m
[37m [ VIBER - variational fit ] 
#> 
[39m
[49m
#> 
[36mℹ
[39m Input n = 2, with k < 2. Dirichlet concentration 
[32m
[32mα = 1e-06
[32m
[39m.
#> 
[36mℹ
[39m Starting clustering of clone C7 mutations

[36mℹ
[39m Beta (a_0, b_0) = (1, 1); q_i = prior. Optimise: ε = 1e-10 or 5000 steps, r = 10 starts.
#> 
[36mℹ
[39m Starting clustering of clone C7 mutations
#> 
[32m✔
[39m 
[1mVIBER fit
[22m completed in 0.03 mins (status: 
[32mconverged
[39m)
#> 
[36mℹ
[39m Starting clustering of clone C7 mutations
#> ── 
[30m
[43m[ VIBER ] My VIBER model
[49m
[39m n = 2 (w = 4 dimensions). Fit with 
[32mk = 2
[39m clusters. ─
#> 
[36mℹ
[39m Starting clustering of clone C7 mutations
• Clusters: π = 50% [
[33mC1
[39m] and 50% [
[33mC2
[39m], with π > 0.
#> 
[36mℹ
[39m Starting clustering of clone C7 mutations
• Binomials: θ = <0.4, 0, 0.31, 0.35> [
[33mC1
[39m] and <0.01, 0.11, 0, 0> [
[33mC2
[39m].
#> 
[36mℹ
[39m Starting clustering of clone C7 mutations

[36mℹ
[39m Score(s): ELBO = -1584.479. Fit 
[32mconverged
[39m in 5 steps, ε = 1e-10.
#> 
[36mℹ
[39m Starting clustering of clone C7 mutations

[32m✔
[39m Starting clustering of clone C7 mutations ... done
#> 
#> 
[36mℹ
[39m Fitting model to cluster mutations

[36mℹ
[39m Starting phylogeny inference of clone C7
#> 
[44m
[37m [ ctree ~ clone trees generator for C7 ] 
#> 
[39m
[49m
#> 
[38;5;246m# A tibble: 3 × 8
[39m
#>   cluster  t1.l1   t2.l1 t1.l2 t2.l2 nMuts is.clonal is.driver
#>   
[3m
[38;5;246m<chr>
[39m
[23m    
[3m
[38;5;246m<dbl>
[39m
[23m   
[3m
[38;5;246m<dbl>
[39m
[23m 
[3m
[38;5;246m<dbl>
[39m
[23m 
[3m
[38;5;246m<dbl>
[39m
[23m 
[3m
[38;5;246m<dbl>
[39m
[23m 
[3m
[38;5;246m<lgl>
[39m
[23m     
[3m
[38;5;246m<lgl>
[39m
[23m    
#> 
[38;5;250m1
[39m S1      0.396  0.002
[4m3
[24m
[4m4
[24m 0.308 0.348     1 FALSE     TRUE     
#> 
[38;5;250m2
[39m S2      0.010
[4m4
[24m 0.112   0     0         1 FALSE     FALSE    
#> 
[38;5;250m3
[39m C7      1      1       1     1         1 TRUE      FALSE
#> 
[32m✔
[39m Trees per region 
[32m
[32m2, 1, 1, 1
[32m
[39m
#> 
[36mℹ
[39m Starting phylogeny inference of clone C7

[36mℹ
[39m Total 
[32m
[32m2
[32m
[39m tree structures - search is 
[1mexahustive
[22m
#> 
[36mℹ
[39m Starting phylogeny inference of clone C7
#> 
[36mℹ
[39m Starting phylogeny inference of clone C7
── Ranking trees 
#> 
[36mℹ
[39m Starting phylogeny inference of clone C7

[32m✔
[39m 
[32m
[32m2
[32m
[39m  trees with non-zero score, storing 
[1m2
[22m
#> 
[36mℹ
[39m Starting phylogeny inference of clone C7

[32m✔
[39m Starting phylogeny inference of clone C7 ... done
#> 
#> 
[36mℹ
[39m Fitting model to cluster mutations

[32m✔
[39m Fitting model to cluster mutations ... done
#> 
#> 
[36mℹ
[39m Fitting model to estimate population growth rates
#> 
[36mℹ
[39m Starting growth models inference of clone C0
#> 
[32m✔
[39m Starting growth models inference of clone C0 ... done
#> 
#> 
[36mℹ
[39m Fitting model to estimate population growth rates

[36mℹ
[39m Starting growth models inference of clone C1
#> 
[32m✔
[39m Starting growth models inference of clone C1 ... done
#> 
#> 
[36mℹ
[39m Fitting model to estimate population growth rates

[36mℹ
[39m Starting growth models inference of clone C2
#> 
[32m✔
[39m Starting growth models inference of clone C2 ... done
#> 
#> 
[36mℹ
[39m Fitting model to estimate population growth rates

[36mℹ
[39m Starting growth models inference of clone C3
#> 
[32m✔
[39m Starting growth models inference of clone C3 ... done
#> 
#> 
[36mℹ
[39m Fitting model to estimate population growth rates

[36mℹ
[39m Starting growth models inference of clone C4
#> 
[32m✔
[39m Starting growth models inference of clone C4 ... done
#> 
#> 
[36mℹ
[39m Fitting model to estimate population growth rates

[36mℹ
[39m Starting growth models inference of clone C5
#> 
[32m✔
[39m Starting growth models inference of clone C5 ... done
#> 
#> 
[36mℹ
[39m Fitting model to estimate population growth rates

[36mℹ
[39m Starting growth models inference of clone C6
#> 
[32m✔
[39m Starting growth models inference of clone C6 ... done
#> 
#> 
[36mℹ
[39m Fitting model to estimate population growth rates

[36mℹ
[39m Starting growth models inference of clone C7
#> 
[32m✔
[39m Starting growth models inference of clone C7 ... done
#> 
#> 
[36mℹ
[39m Fitting model to estimate population growth rates

[32m✔
[39m Fitting model to estimate population growth rates ... done

Printing the fitted object information regarding the data:

  • lineages and timpoints present in the data,

  • number of integration sites,

  • number of inferred clones of ISs, estimated via model selection on the input range of number of clusters,

  • for each clone, the number of assigned ISs and the mean coverage, per timepoint and lineage.

data(x.example)
x.example
#> ── [ lineaGT ]  ──── Python: /usr/share/miniconda/envs/lineagt-env/bin/python ──
#> → Lineages: l1 and l2.
#> → Timepoints: t1 and t2.
#> → Number of Insertion Sites: 66.
#> 
#> ── Optimal IS model with k = 8.
#> 
#>     C4 (19 ISs) : l1 [285, 209]; l2 [ 51, 492] 
#>     C1 (15 ISs) : l1 [245, 177]; l2 [ 23, 289] 
#>      C0 (6 ISs) : l1 [145, 240]; l2 [ 32, 373] 
#>      C2 (6 ISs) : l1 [  1, 547]; l2 [  1, 388] 
#>      C3 (6 ISs) : l1 [ 92, 109]; l2 [245, 751] 
#>      C5 (6 ISs) : l1 [  0, 551]; l2 [  1, 828] 
#>      C6 (4 ISs) : l1 [330,  16]; l2 [ 17,  38] 
#>      C7 (4 ISs) : l1 [  0, 426]; l2 [  1, 198]