Skip to contents
library(dplyr)
n_clocks=3
n_events=8
purity=0.9 
coverage=100 
epsilon=0.20 
seed = 123 
tolerance = 0.0001
max_attempts = 2
INIT = TRUE
min_mutations_number = 3


cat("coverage: ", coverage)

mu = 1e-4 # mutation rate
cat("mutation rate: ", mu)

w = 1e-2 # cell division rate
cat("cell division rate: ", w)

l = 2e7 # length of the segment
cat("length of the segment: ", l)

time_interval = 7
cat("time_interval: ", time_interval)


res_simulate <- tickTack::get_simulation_tickTack(number_clocks=n_clocks, 
                                               number_events=n_events, 
                                               purity=purity, 
                                               coverage=coverage, 
                                               epsilon=epsilon, 
                                               seed = seed)
  
data_simulation = as.data.frame(res_simulate$data_simulation) %>% mutate(chr=   1:length(res_simulate$data_simulation$taus))
x = res_simulate$x

df = x$cna %>% left_join(data_simulation)

# timing inference ticktack hierarchical

x <- tickTack::fit_h(x, max_attempts=max_attempts, INIT=INIT, tolerance = tolerance)


results_simulated <- x$results_timing
results_model_selection <- tickTack::model_selection_h(results_simulated)
best_K <- results_model_selection$best_K
data_simulation$taus
## [1] 0.1028646 0.1028646 0.4348927 0.4348927 0.9849570 0.9849570 0.9849570
## [8] 0.4348927

Results

The results object that is returned together with the CNAqc input object contains four components: data, draws_and_summary, log_lik_matrix_list and elbo_iterations.

# View summary for a specific K, here K = 2
results <- x$results

Interpreting the output

We can inspect the main output of interest to understand the timing of clonal peaks. results$draws_and_summary contains: - draws the draws from the approximate posterior distribution of the taus and weights; - summary a summary with the main statistics of the approximate posterior distributions; - summarized_results represents the clock assignment, a tibble with the estimate of taus for each segment with a copy number event that has been included in the hierarchical inference

# View summary for a specific K, here K = 2
results$draws_and_summary[[2]]$summary
## # A tibble: 18 × 7
##    variable    mean  median      sd     mad       q5     q95
##    <chr>      <dbl>   <dbl>   <dbl>   <dbl>    <dbl>   <dbl>
##  1 tau[1]   0.990   0.991   0.00300 0.00268 0.985    0.994  
##  2 tau[2]   0.0882  0.0873  0.0192  0.0198  0.0603   0.121  
##  3 w[1,1]   0.0178  0.0106  0.0230  0.00981 0.00187  0.0605 
##  4 w[2,1]   0.00321 0.00193 0.00425 0.00182 0.000331 0.00989
##  5 w[3,1]   0.297   0.297   0.0621  0.0648  0.196    0.396  
##  6 w[4,1]   0.224   0.221   0.0267  0.0267  0.183    0.271  
##  7 w[5,1]   0.996   0.998   0.00694 0.00180 0.989    1.00   
##  8 w[6,1]   0.994   0.996   0.00713 0.00328 0.980    0.999  
##  9 w[7,1]   0.969   0.976   0.0248  0.0168  0.921    0.993  
## 10 w[8,1]   0.245   0.245   0.0245  0.0258  0.207    0.286  
## 11 w[1,2]   0.982   0.989   0.0230  0.00981 0.939    0.998  
## 12 w[2,2]   0.997   0.998   0.00425 0.00182 0.990    1.00   
## 13 w[3,2]   0.703   0.703   0.0621  0.0648  0.604    0.804  
## 14 w[4,2]   0.776   0.779   0.0267  0.0267  0.729    0.817  
## 15 w[5,2]   0.00351 0.00187 0.00694 0.00180 0.000318 0.0108 
## 16 w[6,2]   0.00602 0.00351 0.00713 0.00327 0.000712 0.0201 
## 17 w[7,2]   0.0314  0.0243  0.0248  0.0168  0.00723  0.0794 
## 18 w[8,2]   0.755   0.755   0.0245  0.0258  0.714    0.793
# View detailed summarized results for a specific K, here K = 2
results$draws_and_summary[[2]]$summarized_results
## # A tibble: 8 × 8
##   segment_original_indx segment_name segment_id karyotype   chr clock_mean
##                   <int> <chr>             <dbl> <chr>     <int>      <dbl>
## 1                     1 1_1_2e+07             1 2:1           1     0.0873
## 2                     2 2_1_2e+07             2 2:0           2     0.0873
## 3                     3 3_1_2e+07             3 2:1           3     0.0873
## 4                     4 4_1_2e+07             4 2:2           4     0.0873
## 5                     5 5_1_2e+07             5 2:2           5     0.991 
## 6                     6 6_1_2e+07             6 2:2           6     0.991 
## 7                     7 7_1_2e+07             7 2:1           7     0.991 
## 8                     8 8_1_2e+07             8 2:2           8     0.0873
## # ℹ 2 more variables: clock_low <dbl>, clock_high <dbl>

Obtain the best K with model_selection_h

W e can run the model_selection_h function to obtain the scores for each inference performed with a different K and take the one with best ICL score if the BIC score prefer 2 components instead of 1, otherwise choose 1 as best K. The function takes as input the results and n_components and outputs the best_K and the corresponding best_fit together with the model_selection_tibble and the entropy_list used to evaluate the ICL score.

results_model_selection <- tickTack::model_selection_h(results, n_components = 0)

best_K <- results_model_selection$best_K
model_selection_tibble <- results_model_selection$model_selection_tibble
entropy <- results_model_selection$entropy_list
print(best_K)

Visulizing the output

The results can be viewed is genome-wise perspective using the tickTack::plot_timing_h function.

tickTack::plot_timing_h(results, best_K)

Visualize distributions of draws from the approximate posterior

The approximate posterior distributions can be viewed using the tickTack::plot_posterior_clocks_h and tickTack::plot_posterior_weights_h functions, that internally use functions from Bayesplot.

posterior_clocks <- tickTack::plot_posterior_clocks_h(results, best_K)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
posterior_weights <- tickTack::plot_posterior_weights_h(results, best_K)

Visualize the behavior of the ELBO during the inference

K = nrow(results_model_selection$model_selection_tibble)

p_elbo <- list()
for (i in 1:K){
  p_elbo[[i]] <- tickTack::plot_elbo_h(results$elbo_iterations[[i]]) + ggplot2::ggtitle(paste0("K = ", i))
}
p_elbo <- gridExtra::grid.arrange(grobs = p_elbo, ncol = 2)  #add global title

p_elbo
## TableGrob (2 x 2) "arrange": 3 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (2-2,1-1) arrange gtable[layout]

Visualize all the inference results for each K

plot_model_selection_inference <- list()
for (i in 1:K){
  plot_model_selection_inference[[i]] <- tickTack::plot_timing_h(results, i) + ggplot2::ggtitle(paste0("K = ", i))
}
plot_model_selection_inference <- gridExtra::grid.arrange(grobs = plot_model_selection_inference, ncol = 2) #add global title

plot_model_selection_inference
## TableGrob (2 x 2) "arrange": 3 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (2-2,1-1) arrange gtable[layout]