knitr::opts_chunk$set(echo = TRUE)
library(revolver)

Since the new release of REVOLVER (>= 0.3), we have implemented the internal structure of the package objects using the tidy approach, and created a different types of getters which can be used to access:

  • the data used to build the cohort;

  • the trees available for each patient;

  • the clusters computed by REVOLVER;

  • the results from the jackknife computations used by REVOLVER to determine the stability of the results;

  • other summary features (statistics, in broad sense).

In this vignette, we show the getters using one of the cohort objects released in the evoverse.datasets R package.

# Data released in the 'evoverse.datasets'
data('TRACERx_NEJM_2017_REVOLVER', package = 'evoverse.datasets')

Access patient-level data

Several types of getters can be used to perform queries on the data. All functions follow a common parametrization pattern, as they require

  • x a REVOLVER cohort object;
  • patients a list of patients IDs that will be used to subset the outputs (all by default);
# Access all data for a patient
Data(TRACERx_NEJM_2017_REVOLVER, 'CRUK0001')
## # A tibble: 7 x 12
##   id    Misc  patientID variantID is.driver is.clonal cluster cluster_size CCF  
##   <chr> <chr> <chr>     <chr>     <lgl>     <lgl>     <chr>          <int> <chr>
## 1 __mu… CRUK… CRUK0001  NF1       TRUE      FALSE     1                  1 R1:0…
## 2 __mu… CRUK… CRUK0001  ARHGAP35  TRUE      FALSE     2                  1 R1:0…
## 3 __mu… CRUK… CRUK0001  TP53      TRUE      TRUE      3                  4 R1:0…
## 4 __mu… CRUK… CRUK0001  MGA       TRUE      TRUE      3                  4 R1:0…
## 5 __mu… CRUK… CRUK0001  WRN       TRUE      TRUE      3                  4 R1:0…
## 6 __mu… Anno… CRUK0001  EGFR      TRUE      TRUE      3                  4 R1:0…
## 7 __mu… CRUK… CRUK0001  PASK      TRUE      FALSE     5                  1 R1:0…
## # … with 3 more variables: R1 <dbl>, R2 <dbl>, R3 <dbl>
# Access only the drivers for a patient
Drivers(TRACERx_NEJM_2017_REVOLVER, 'CRUK0001')
## # A tibble: 7 x 12
##   id    Misc  patientID variantID is.driver is.clonal cluster cluster_size CCF  
##   <chr> <chr> <chr>     <chr>     <lgl>     <lgl>     <chr>          <int> <chr>
## 1 __mu… CRUK… CRUK0001  NF1       TRUE      FALSE     1                  1 R1:0…
## 2 __mu… CRUK… CRUK0001  ARHGAP35  TRUE      FALSE     2                  1 R1:0…
## 3 __mu… CRUK… CRUK0001  TP53      TRUE      TRUE      3                  4 R1:0…
## 4 __mu… CRUK… CRUK0001  MGA       TRUE      TRUE      3                  4 R1:0…
## 5 __mu… CRUK… CRUK0001  WRN       TRUE      TRUE      3                  4 R1:0…
## 6 __mu… Anno… CRUK0001  EGFR      TRUE      TRUE      3                  4 R1:0…
## 7 __mu… CRUK… CRUK0001  PASK      TRUE      FALSE     5                  1 R1:0…
## # … with 3 more variables: R1 <dbl>, R2 <dbl>, R3 <dbl>
# Access the name of the clonal cluster for this patient
Clonal_cluster(TRACERx_NEJM_2017_REVOLVER, 'CRUK0001')
## [1] "3"
# Get the list of truncal (i.e., clonal) mutations in a patient
Truncal(TRACERx_NEJM_2017_REVOLVER, 'CRUK0001')
## # A tibble: 4 x 12
##   id    Misc  patientID variantID is.driver is.clonal cluster cluster_size CCF  
##   <chr> <chr> <chr>     <chr>     <lgl>     <lgl>     <chr>          <int> <chr>
## 1 __mu… CRUK… CRUK0001  TP53      TRUE      TRUE      3                  4 R1:0…
## 2 __mu… CRUK… CRUK0001  MGA       TRUE      TRUE      3                  4 R1:0…
## 3 __mu… CRUK… CRUK0001  WRN       TRUE      TRUE      3                  4 R1:0…
## 4 __mu… Anno… CRUK0001  EGFR      TRUE      TRUE      3                  4 R1:0…
## # … with 3 more variables: R1 <dbl>, R2 <dbl>, R3 <dbl>
# Get the list of subclonal mutations in a patient
Subclonal(TRACERx_NEJM_2017_REVOLVER, 'CRUK0001')
## # A tibble: 3 x 12
##   id    Misc  patientID variantID is.driver is.clonal cluster cluster_size CCF  
##   <chr> <chr> <chr>     <chr>     <lgl>     <lgl>     <chr>          <int> <chr>
## 1 __mu… CRUK… CRUK0001  NF1       TRUE      FALSE     1                  1 R1:0…
## 2 __mu… CRUK… CRUK0001  ARHGAP35  TRUE      FALSE     2                  1 R1:0…
## 3 __mu… CRUK… CRUK0001  PASK      TRUE      FALSE     5                  1 R1:0…
## # … with 3 more variables: R1 <dbl>, R2 <dbl>, R3 <dbl>
# Access the names of the samples for a patient
Samples(TRACERx_NEJM_2017_REVOLVER, 'CRUK0001')
## [1] "R1" "R2" "R3"
# Return the CCF entry for all the mutations of a patient,
CCF(TRACERx_NEJM_2017_REVOLVER, 'CRUK0001')
## # A tibble: 7 x 8
##   id            variantID is.driver is.clonal cluster    R1    R2    R3
##   <chr>         <chr>     <lgl>     <lgl>     <chr>   <dbl> <dbl> <dbl>
## 1 __mut_id_756  NF1       TRUE      FALSE     1        0.86  0     0   
## 2 __mut_id_1225 ARHGAP35  TRUE      FALSE     2        0.19  0     0.95
## 3 __mut_id_1350 TP53      TRUE      TRUE      3        0.99  0.99  1   
## 4 __mut_id_1466 MGA       TRUE      TRUE      3        0.99  0.99  1   
## 5 __mut_id_1519 WRN       TRUE      TRUE      3        0.97  0.98  0.99
## 6 __mut_id_1540 EGFR      TRUE      TRUE      3        0.99  0.99  1   
## 7 __mut_id_1796 PASK      TRUE      FALSE     5        0.82  0     0.71
# Return the CCF entry for all the clones of a patient, the overall CCF
# values are obtained by REVOLVER from the average of CCF values across clones.
CCF_clusters(TRACERx_NEJM_2017_REVOLVER, 'CRUK0001')
## # A tibble: 4 x 7
##   cluster nMuts is.driver is.clonal    R1    R2    R3
##   <chr>   <int> <lgl>     <lgl>     <dbl> <dbl> <dbl>
## 1 3           4 TRUE      TRUE       0.99  0.99  1   
## 2 1           1 TRUE      FALSE      0.86  0     0   
## 3 2           1 TRUE      FALSE      0.19  0     0.95
## 4 5           1 TRUE      FALSE      0.82  0     0.71

Access the trees in the cohort

Trees have getters similar to the data, and getters distinguish from trees before and after the fit.

Note The tree fits might be slightly different from the trees before the fit, because their Informatin Transfer is not expanded. Therefore keep this in mind when comparing trees.

You can extract the tree of a patient, before its fit. This can be one specific tree (in terms of its rank), or all of them at once. Trees before the fit are indexed by their rank, which is obtained from the ordering of the tree scores, which are obtained by the evaluated tree structure before the fit.

These getters, for instance Phylo, take as parameter

  • x the cohort object;
  • p the patient identifier;
  • rank the rank of the tree to extract;
  • data to decide whether one wants the trees before the fit (trees), or the actual fit tree fits.

By logic, if you are asking for the fit trees (data = 'fits'), the rank parameter is not considered (because there is only one top-scoring tree fit by REVOLVER).

# Access the top-rank tree for a patient
Phylo(TRACERx_NEJM_2017_REVOLVER, 'CRUK0001', rank = 1)
##  [ ctree - ctree rank 1/3 for CRUK0001 ] 
## 
## # A tibble: 4 x 7
##   cluster nMuts is.driver is.clonal    R1    R2    R3
##   <chr>   <int> <lgl>     <lgl>     <dbl> <dbl> <dbl>
## 1 3           4 TRUE      TRUE       0.99  0.99  1   
## 2 1           1 TRUE      FALSE      0.86  0     0   
## 3 2           1 TRUE      FALSE      0.19  0     0.95
## 4 5           1 TRUE      FALSE      0.82  0     0.71
## 
## Tree shape (drivers annotated)  
## 
##   \-GL
##    \-3 [R2] :: TP53, MGA, WRN, EGFR
##     |-1 :: NF1
##     | \-5 :: PASK
##     \-2 :: ARHGAP35
## 
## Information transfer  
## 
##    GL ---> TP53 
##    GL ---> MGA 
##    GL ---> WRN 
##    GL ---> EGFR 
##    TP53 ---> NF1 
##    MGA ---> NF1 
##    WRN ---> NF1 
##    EGFR ---> NF1 
##    TP53 ---> ARHGAP35 
##    MGA ---> ARHGAP35 
##    WRN ---> ARHGAP35 
##    EGFR ---> ARHGAP35 
##    NF1 ---> PASK 
## 
## Tree score 0.111111111111111
# Access all trees for a patient. We use CRUK0002 because it has only 3 trees
Phylo(TRACERx_NEJM_2017_REVOLVER, 'CRUK0002', rank = NULL)
## $`1`
##  [ ctree - ctree rank 1/2 for CRUK0002 ] 
## 
## # A tibble: 4 x 7
##   cluster nMuts is.driver is.clonal    R1    R2    R3
##   <chr>   <int> <lgl>     <lgl>     <dbl> <dbl> <dbl>
## 1 1           3 TRUE      FALSE      0     0.92  0   
## 2 2           2 TRUE      TRUE       0.99  0.98  0.99
## 3 5           1 TRUE      FALSE      0.78  0     0   
## 4 6           1 TRUE      FALSE      0.96  0.03  0.98
## 
## Tree shape (drivers annotated)  
## 
##   \-GL
##    \-2 :: MET, TERT
##     |-6 :: EP300
##     | \-5 :: NF1
##     \-1 :: RB1, IKZF1, KRAS
## 
## Information transfer  
## 
##    MET ---> RB1 
##    MET ---> IKZF1 
##    MET ---> KRAS 
##    TERT ---> RB1 
##    TERT ---> IKZF1 
##    TERT ---> KRAS 
##    GL ---> MET 
##    GL ---> TERT 
##    EP300 ---> NF1 
##    MET ---> EP300 
##    TERT ---> EP300 
## 
## Tree score 0.75 
## 
## $`2`
##  [ ctree - ctree rank 2/2 for CRUK0002 ] 
## 
## # A tibble: 4 x 7
##   cluster nMuts is.driver is.clonal    R1    R2    R3
##   <chr>   <int> <lgl>     <lgl>     <dbl> <dbl> <dbl>
## 1 1           3 TRUE      FALSE      0     0.92  0   
## 2 2           2 TRUE      TRUE       0.99  0.98  0.99
## 3 5           1 TRUE      FALSE      0.78  0     0   
## 4 6           1 TRUE      FALSE      0.96  0.03  0.98
## 
## Tree shape (drivers annotated)  
## 
##   \-GL
##    \-2 :: MET, TERT
##     \-1 :: RB1, IKZF1, KRAS
##      \-6 :: EP300
##       \-5 :: NF1
## 
## Information transfer  
## 
##    MET ---> RB1 
##    MET ---> IKZF1 
##    MET ---> KRAS 
##    TERT ---> RB1 
##    TERT ---> IKZF1 
##    TERT ---> KRAS 
##    GL ---> MET 
##    GL ---> TERT 
##    EP300 ---> NF1 
##    RB1 ---> EP300 
##    IKZF1 ---> EP300 
##    KRAS ---> EP300 
## 
## Tree score 0.0833333333333333

Notice that in the printing of a tree to screen you can immediately see the Information Transfer (IT) for the driver genes. In general, you can access the IT of a tree with another getter, which takes as extra parameter type in order to return either the transfer across drivers, or across clones annotated in a tree.

# Information Transfer for the drivers, top-ranking tree
ITransfer(TRACERx_NEJM_2017_REVOLVER, "CRUK0001", rank = 1, type = 'drivers')
## # A tibble: 13 x 2
##    from  to      
##    <chr> <chr>   
##  1 GL    TP53    
##  2 GL    MGA     
##  3 GL    WRN     
##  4 GL    EGFR    
##  5 TP53  NF1     
##  6 MGA   NF1     
##  7 WRN   NF1     
##  8 EGFR  NF1     
##  9 TP53  ARHGAP35
## 10 MGA   ARHGAP35
## 11 WRN   ARHGAP35
## 12 EGFR  ARHGAP35
## 13 NF1   PASK
# Information Transfer for the clones, top-ranking tree
ITransfer(TRACERx_NEJM_2017_REVOLVER, "CRUK0001", rank = 1, type = 'clones')
## # A tibble: 4 x 2
##   from  to   
##   <chr> <chr>
## 1 GL    3    
## 2 3     1    
## 3 3     2    
## 4 1     5

Fit trees can be accessed using the data argument. Essentially this is like before, but does not require specifying a rank parameter.

# Access the fit tree for a patient
Phylo(TRACERx_NEJM_2017_REVOLVER, 'CRUK0001', data = 'fits')
##  [ ctree - ctree rank 1/3 for CRUK0001 - Information Transfer expanded via Transfer Learning ] 
## 
## # A tibble: 4 x 7
##   cluster nMuts is.driver is.clonal    R1    R2    R3
##   <chr>   <int> <lgl>     <lgl>     <dbl> <dbl> <dbl>
## 1 3           4 TRUE      TRUE       0.99  0.99  1   
## 2 1           1 TRUE      FALSE      0.86  0     0   
## 3 2           1 TRUE      FALSE      0.19  0     0.95
## 4 5           1 TRUE      FALSE      0.82  0     0.71
## 
## Tree shape (drivers annotated)  
## 
##   \-GL
##    \-3 [R2] :: TP53, MGA, WRN, EGFR
##     |-1 :: NF1
##     | \-5 :: PASK
##     \-2 :: ARHGAP35
## 
## Information transfer  
## 
##    GL ---> EGFR 
##    GL ---> WRN 
##    GL ---> MGA 
##    EGFR ---> TP53 
##    WRN ---> TP53 
##    TP53 ---> NF1 
##    MGA ---> NF1 
##    TP53 ---> ARHGAP35 
##    MGA ---> ARHGAP35 
##    NF1 ---> PASK 
## 
## Tree score 0.111111111111111
# Information Transfer for the drivers, top-ranking tree. Notice that this is different
# from the result of the above call, because the transfer after fitting is expanded
ITransfer(TRACERx_NEJM_2017_REVOLVER, "CRUK0001", rank = 1, type = 'drivers', data = 'fits')
## # A tibble: 10 x 2
##    from  to      
##    <chr> <chr>   
##  1 GL    EGFR    
##  2 GL    WRN     
##  3 GL    MGA     
##  4 EGFR  TP53    
##  5 WRN   TP53    
##  6 TP53  NF1     
##  7 MGA   NF1     
##  8 TP53  ARHGAP35
##  9 MGA   ARHGAP35
## 10 NF1   PASK

Access clusters and jackknife statistics

Once REVOLVER clusters have been computed, a tibble is available that maps each patient to a cluster.

# Hard clustering assignments
Cluster(TRACERx_NEJM_2017_REVOLVER)
## # A tibble: 99 x 2
##    patientID cluster
##    <chr>     <chr>  
##  1 CRUK0001  C2     
##  2 CRUK0002  C1     
##  3 CRUK0003  C2     
##  4 CRUK0004  C2     
##  5 CRUK0005  C3     
##  6 CRUK0006  C3     
##  7 CRUK0007  C2     
##  8 CRUK0008  C1     
##  9 CRUK0009  C3     
## 10 CRUK0010  C2     
## # … with 89 more rows
# Specify a patient
Cluster(TRACERx_NEJM_2017_REVOLVER, "CRUK0001")
## # A tibble: 1 x 2
##   patientID cluster
##   <chr>     <chr>  
## 1 CRUK0001  C2

If after the cluster jackknife statistics have been computed, they can be extracted from the cohort object.

# A matrix that reports the probability that every pair of patients is assigned to 
# the same cluster across the jackknife resamples
Jackknife_patient_coclustering(TRACERx_NEJM_2017_REVOLVER) %>% head
##          CRUK0001 CRUK0002 CRUK0003 CRUK0004 CRUK0005 CRUK0006 CRUK0007
## CRUK0001     0.00        0     0.55     0.75      0.0      0.0     0.65
## CRUK0002     0.00        0     0.00     0.00      0.0      0.0     0.00
## CRUK0003     0.55        0     0.00     0.45      0.0      0.0     0.70
## CRUK0004     0.75        0     0.45     0.00      0.0      0.0     0.55
## CRUK0005     0.00        0     0.00     0.00      0.0      0.7     0.00
## CRUK0006     0.00        0     0.00     0.00      0.7      0.0     0.00
##          CRUK0008 CRUK0009 CRUK0010 CRUK0011 CRUK0012 CRUK0013 CRUK0014
## CRUK0001     0.00     0.00     0.75     0.00     0.75     0.00      0.0
## CRUK0002     0.85     0.00     0.00     0.05     0.00     0.75      0.0
## CRUK0003     0.00     0.00     0.75     0.00     0.75     0.00      0.0
## CRUK0004     0.00     0.00     0.60     0.00     0.60     0.00      0.0
## CRUK0005     0.00     0.90     0.00     0.00     0.00     0.00      0.1
## CRUK0006     0.00     0.75     0.00     0.00     0.00     0.00      0.1
##          CRUK0015 CRUK0016 CRUK0017 CRUK0018 CRUK0019 CRUK0020 CRUK0021
## CRUK0001     0.90     0.00      0.0     0.00     0.75     0.00     0.80
## CRUK0002     0.00     0.05      0.0     0.05     0.00     0.00     0.00
## CRUK0003     0.60     0.00      0.0     0.00     0.65     0.00     0.55
## CRUK0004     0.75     0.00      0.0     0.00     0.55     0.00     0.70
## CRUK0005     0.00     0.00      0.7     0.00     0.00     0.05     0.00
## CRUK0006     0.00     0.00      0.9     0.00     0.00     0.05     0.00
##          CRUK0022 CRUK0023 CRUK0024 CRUK0025 CRUK0026 CRUK0027 CRUK0028
## CRUK0001     0.95        0     0.05      0.0     0.90      0.0     0.65
## CRUK0002     0.00        0     0.70      0.0     0.00      0.0     0.00
## CRUK0003     0.60        0     0.05      0.0     0.55      0.0     0.65
## CRUK0004     0.80        0     0.05      0.0     0.75      0.0     0.55
## CRUK0005     0.00        0     0.00      0.1     0.00      0.1     0.00
## CRUK0006     0.00        0     0.00      0.1     0.00      0.1     0.00
##          CRUK0029 CRUK0030 CRUK0031 CRUK0032 CRUK0033 CRUK0034 CRUK0035
## CRUK0001     0.00     0.00      0.0     0.00      0.0     0.00     0.00
## CRUK0002     0.00     0.00      0.6     0.75      0.8     0.05     0.00
## CRUK0003     0.00     0.00      0.0     0.00      0.0     0.00     0.00
## CRUK0004     0.00     0.00      0.0     0.00      0.0     0.00     0.00
## CRUK0005     0.90     0.05      0.0     0.00      0.0     0.00     0.90
## CRUK0006     0.75     0.05      0.0     0.00      0.0     0.00     0.75
##          CRUK0036 CRUK0037 CRUK0038 CRUK0039 CRUK0040 CRUK0041 CRUK0042
## CRUK0001     0.00     0.00     0.00     0.00        0     0.75     0.00
## CRUK0002     0.35     0.05     0.05     0.05        0     0.00     0.05
## CRUK0003     0.05     0.00     0.00     0.00        0     0.70     0.00
## CRUK0004     0.00     0.00     0.00     0.00        0     0.60     0.00
## CRUK0005     0.00     0.00     0.00     0.00        0     0.00     0.00
## CRUK0006     0.00     0.00     0.00     0.00        0     0.00     0.00
##          CRUK0043 CRUK0044 CRUK0045 CRUK0046 CRUK0047 CRUK0048 CRUK0049
## CRUK0001     0.00     0.00     0.00      0.0     0.00     0.85     0.10
## CRUK0002     0.75     0.05     0.85      0.8     0.65     0.00     0.00
## CRUK0003     0.00     0.00     0.00      0.0     0.00     0.50     0.05
## CRUK0004     0.00     0.00     0.00      0.0     0.00     0.70     0.10
## CRUK0005     0.00     0.00     0.00      0.0     0.00     0.00     0.00
## CRUK0006     0.00     0.00     0.00      0.0     0.00     0.00     0.00
##          CRUK0050 CRUK0051 CRUK0052 CRUK0054 CRUK0055 CRUK0056 CRUK0057
## CRUK0001     0.00     0.05     0.00     0.65      0.0      0.0     0.00
## CRUK0002     0.85     0.00     0.00     0.00      0.8      0.8     0.65
## CRUK0003     0.00     0.00     0.00     0.60      0.0      0.0     0.00
## CRUK0004     0.00     0.05     0.00     0.55      0.0      0.0     0.00
## CRUK0005     0.00     0.00     0.05     0.00      0.0      0.0     0.00
## CRUK0006     0.00     0.00     0.05     0.00      0.0      0.0     0.00
##          CRUK0058 CRUK0059 CRUK0060 CRUK0061 CRUK0062 CRUK0063 CRUK0064
## CRUK0001     0.95     0.00      0.0     0.00        0     0.00     0.00
## CRUK0002     0.00     0.05      0.7     0.75        0     0.00     0.00
## CRUK0003     0.60     0.00      0.0     0.00        0     0.05     0.00
## CRUK0004     0.80     0.00      0.0     0.00        0     0.00     0.00
## CRUK0005     0.00     0.00      0.0     0.00        0     0.00     0.80
## CRUK0006     0.00     0.00      0.0     0.00        0     0.00     0.65
##          CRUK0065 CRUK0066 CRUK0067 CRUK0068 CRUK0069 CRUK0070 CRUK0071
## CRUK0001        0     0.00     0.00        0     0.00     0.00     0.00
## CRUK0002        0     0.05     0.00        0     0.00     0.00     0.05
## CRUK0003        0     0.00     0.05        0     0.00     0.00     0.05
## CRUK0004        0     0.00     0.00        0     0.00     0.00     0.00
## CRUK0005        0     0.00     0.00        0     0.85     0.65     0.00
## CRUK0006        0     0.00     0.00        0     0.75     0.50     0.00
##          CRUK0072 CRUK0073 CRUK0074 CRUK0075 CRUK0076 CRUK0077 CRUK0078
## CRUK0001     0.75      0.0        0     0.00        0     0.00     0.00
## CRUK0002     0.00      0.6        0     0.05        0     0.00     0.05
## CRUK0003     0.50      0.0        0     0.00        0     0.00     0.05
## CRUK0004     0.60      0.0        0     0.00        0     0.00     0.00
## CRUK0005     0.00      0.0        0     0.00        0     0.70     0.00
## CRUK0006     0.00      0.0        0     0.00        0     0.85     0.00
##          CRUK0079 CRUK0080 CRUK0081 CRUK0082 CRUK0083 CRUK0084 CRUK0085
## CRUK0001        0     0.80     0.00        0     0.00     0.00     0.00
## CRUK0002        0     0.00     0.05        0     0.00     0.75     0.65
## CRUK0003        0     0.50     0.00        0     0.05     0.00     0.00
## CRUK0004        0     0.65     0.00        0     0.00     0.00     0.00
## CRUK0005        0     0.00     0.00        0     0.00     0.00     0.00
## CRUK0006        0     0.00     0.00        0     0.00     0.00     0.00
##          CRUK0086 CRUK0087 CRUK0088 CRUK0089 CRUK0090 CRUK0091 CRUK0092
## CRUK0001     0.00     0.00     0.00     0.00     0.00     0.00     0.00
## CRUK0002     0.00     0.00     0.00     0.15     0.15     0.05     0.25
## CRUK0003     0.00     0.00     0.00     0.05     0.00     0.00     0.05
## CRUK0004     0.00     0.00     0.00     0.00     0.00     0.00     0.00
## CRUK0005     0.85     0.25     0.75     0.00     0.00     0.10     0.05
## CRUK0006     0.70     0.15     0.60     0.00     0.00     0.10     0.05
##          CRUK0093 CRUK0094 CRUK0095 CRUK0096 CRUK0097 CRUK0098 CRUK0099
## CRUK0001        0     0.00     0.00     0.00     0.00     0.00     0.00
## CRUK0002        0     0.75     0.70     0.05     0.00     0.00     0.00
## CRUK0003        0     0.00     0.00     0.00     0.00     0.00     0.00
## CRUK0004        0     0.00     0.00     0.00     0.00     0.00     0.00
## CRUK0005        0     0.00     0.05     0.00     0.75     0.90     0.65
## CRUK0006        0     0.00     0.05     0.00     0.60     0.75     0.85
##          CRUK0100
## CRUK0001        0
## CRUK0002        0
## CRUK0003        0
## CRUK0004        0
## CRUK0005        0
## CRUK0006        0
# A vector with the median stability per cluster
Jackknife_cluster_stability(TRACERx_NEJM_2017_REVOLVER)
##    C2    C1    C3    C4    C7    C5    C6   C11    C8   C10    C9 
## 0.700 0.750 0.750 0.850 0.600 0.700 0.550 0.425 0.550 0.375 0.600
# A tibble reporting the probability of detecting (at least in one patient) a trajectory 
# across the jackknife resamples, and the average number of patients where the trajectory
# is found across all resamples.
Jackknife_trajectories_stability(TRACERx_NEJM_2017_REVOLVER)
## # A tibble: 316 x 4
##    from     to    prob_resamp num_patients
##    <chr>    <chr>       <dbl>        <dbl>
##  1 ARHGAP35 TERT            1       0.0112
##  2 BRAF     TERT            1       0.0112
##  3 CCND1    UBR5            1       0.0225
##  4 CDKN2A   CYLD            1       0.0112
##  5 CDKN2A   PTPRC           1       0.0225
##  6 EGFR     CIC             1       0.0112
##  7 EP300    NF1             1       0.0225
##  8 FANCC    CYLD            1       0.0112
##  9 FAS      FLT4            1       0.0112
## 10 FAT1     CYLD            1       0.0112
## # … with 306 more rows

Summary statistics

A number of different types of Stats_* functions can be used to access cohort-level statistics.

Summaries for patients’ data

You can get a broad set of summary statistics for a custom set of patients. The statistics that are available in summarised format are patient-level (mutatational burdern, drivers etc.), and driver-level (frequency, clonality etc.).

# This returns patient-level statistics like the number of biopsies, overall mutations, drivers,
# clones with drivers, truncal and subclonal mutations.
# 
# This is also synonim to `Stats(TRACERx_NEJM_2017_REVOLVER)`
Stats_cohort(TRACERx_NEJM_2017_REVOLVER)
## # A tibble: 99 x 7
##    patientID numBiopsies numMutations numDriverMutations numClonesWithDriver
##    <chr>           <int>        <int>              <int>               <int>
##  1 CRUK0001            3            7                  7                   4
##  2 CRUK0002            3            7                  7                   4
##  3 CRUK0003            5            4                  4                   2
##  4 CRUK0004            4            4                  4                   2
##  5 CRUK0005            4            6                  6                   2
##  6 CRUK0006            2            6                  6                   3
##  7 CRUK0007            2            3                  3                   1
##  8 CRUK0008            2            6                  6                   2
##  9 CRUK0009            4            7                  7                   2
## 10 CRUK0010            2            3                  3                   1
## # … with 89 more rows, and 2 more variables: numTruncalMutations <int>,
## #   numSubclonalMutations <int>
# This returns driver-level statistics like the number of times the driver is clonal,
# subclonal, or found in general, and for quantity normalized by cohort size (i.e., the percentage)
Stats_drivers(TRACERx_NEJM_2017_REVOLVER)
## # A tibble: 79 x 7
##    variantID numClonal p_clonal numSubclonal p_subclonal N_tot  p_tot
##    <chr>         <dbl>    <dbl>        <dbl>       <dbl> <dbl>  <dbl>
##  1 TP53             53   0.535             3      0.0303    56 0.566 
##  2 KRAS             24   0.242             4      0.0404    28 0.283 
##  3 EGFR             21   0.212             1      0.0101    22 0.222 
##  4 PIK3CA           20   0.202             1      0.0101    21 0.212 
##  5 CDKN2A           14   0.141             0      0         14 0.141 
##  6 SOX2             14   0.141             0      0         14 0.141 
##  7 KEAP1            12   0.121             0      0         12 0.121 
##  8 TERT             11   0.111             2      0.0202    13 0.131 
##  9 FGFR1             9   0.0909            0      0          9 0.0909
## 10 STK11             8   0.0808            0      0          8 0.0808
## # … with 69 more rows

The list of all patients in the cohort is accessible as TRACERx_NEJM_2017_REVOLVER$patients, and these functions can be run on a smaller subset of patients.

Stats_cohort(TRACERx_NEJM_2017_REVOLVER, patients = TRACERx_NEJM_2017_REVOLVER$patients[1:5])
## # A tibble: 5 x 7
##   patientID numBiopsies numMutations numDriverMutations numClonesWithDriver
##   <chr>           <int>        <int>              <int>               <int>
## 1 CRUK0001            3            7                  7                   4
## 2 CRUK0002            3            7                  7                   4
## 3 CRUK0003            5            4                  4                   2
## 4 CRUK0004            4            4                  4                   2
## 5 CRUK0005            4            6                  6                   2
## # … with 2 more variables: numTruncalMutations <int>,
## #   numSubclonalMutations <int>

Summaries for trees and fits

There are getters for summary statistics that work for trees and fits, with the same principles fo the getters for the data discussed above

# This returns patient-level statistics for the trees available in a patient. The tibble reports
# whether the patient has trees annotated, the total number of trees, their minimum and maximum
# scores mutations and the total number of differnet combinations of Information Transfer for 
# the available trees.
Stats_trees(TRACERx_NEJM_2017_REVOLVER)
## # A tibble: 99 x 6
##    patientID hasTrees numTrees maxScore minScore combInfTransf
##    <chr>     <lgl>       <int>    <dbl>    <dbl>         <int>
##  1 CRUK0001  TRUE            3    0.111   0.111              3
##  2 CRUK0002  TRUE            2    0.75    0.0833             2
##  3 CRUK0003  TRUE            1    1       1                  1
##  4 CRUK0004  TRUE            1    1       1                  1
##  5 CRUK0005  TRUE            1    1       1                  1
##  6 CRUK0006  TRUE            2    0.667   0.167              2
##  7 CRUK0007  TRUE            1    1       1                  1
##  8 CRUK0008  TRUE            1    1       1                  1
##  9 CRUK0009  TRUE            1    1       1                  1
## 10 CRUK0010  TRUE            1    1       1                  1
## # … with 89 more rows
# This returns the same table of above, but with some extended information on the fits (like the fit rank, etc)
Stats_fits(TRACERx_NEJM_2017_REVOLVER)
## # A tibble: 99 x 9
##    patientID hasTrees numTrees maxScore minScore combInfTransf Solution
##    <chr>     <lgl>       <int>    <dbl>    <dbl>         <int>    <int>
##  1 CRUK0001  TRUE            3    0.111   0.111              3        1
##  2 CRUK0002  TRUE            2    0.75    0.0833             2        1
##  3 CRUK0003  TRUE            1    1       1                  1        1
##  4 CRUK0004  TRUE            1    1       1                  1        1
##  5 CRUK0005  TRUE            1    1       1                  1        1
##  6 CRUK0006  TRUE            2    0.667   0.167              2        1
##  7 CRUK0007  TRUE            1    1       1                  1        1
##  8 CRUK0008  TRUE            1    1       1                  1        1
##  9 CRUK0009  TRUE            1    1       1                  1        1
## 10 CRUK0010  TRUE            1    1       1                  1        1
## # … with 89 more rows, and 2 more variables: converged <lgl>, penalty <dbl>

Summaries for trees and fits

The index of Divergent Evolutionary Trajectories is a measure derived from Shannon’s entropy to determine, for any driver event X, how heterogeneous are the trajectories that lead to X.

DET_index(TRACERx_NEJM_2017_REVOLVER)
## Registered S3 method overwritten by 'vegan':
##   method     from      
##   rev.hclust dendextend
## # A tibble: 79 x 4
##    driver diversity     N DET_index
##    <chr>      <dbl> <int>     <dbl>
##  1 APC            0     1         0
##  2 BRAF           0     1         0
##  3 CDKN2A         0     1         0
##  4 CHEK2          0     1         0
##  5 CIC            0     1         0
##  6 CMTR2          0     1         0
##  7 COL2A1         0     1         0
##  8 CREBBP         0     1         0
##  9 CUX1           0     1         0
## 10 DICER1         0     1         0
## # … with 69 more rows

Other features

A number of different features in matrix format can be extracted using the get_features function.

features = get_features(TRACERx_NEJM_2017_REVOLVER)

# Matrix of the mean CCF/ binary value for a driver across all patient's biopsies
features$Matrix_mean_CCF %>% print
## # A tibble: 99 x 80
##    patientID   APC ARHGAP35 ARID1B ARID2 ASXL1   ATM  BAP1  BRAF  CBLB CCND1
##    <chr>     <dbl>    <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1 CRUK0001      0    0.38       0  0        0     0     0 0         0     0
##  2 CRUK0002      0    0          0  0        0     0     0 0         0     0
##  3 CRUK0003      0    0          0  0        0     0     0 0         0     0
##  4 CRUK0004      0    0          0  0        0     0     0 0         0     0
##  5 CRUK0005      0    0          0  0        0     0     0 0.99      0     0
##  6 CRUK0006      0    0          0  0        0     0     0 0         0     0
##  7 CRUK0007      0    0          0  0        0     0     0 0         0     0
##  8 CRUK0008      0    0          0  0.43     0     0     0 0         0     0
##  9 CRUK0009      0    0.988      0  0        0     0     0 0.988     0     0
## 10 CRUK0010      0    0          0  0        0     0     0 0         0     0
## # … with 89 more rows, and 69 more variables: CDKN2A <dbl>, CHEK2 <dbl>,
## #   CIC <dbl>, CMTR2 <dbl>, COL2A1 <dbl>, COL5A2 <dbl>, CREBBP <dbl>,
## #   CTNNB1 <dbl>, CUX1 <dbl>, CYLD <dbl>, DICER1 <dbl>, DNM2 <dbl>, EGFR <dbl>,
## #   EP300 <dbl>, FANCC <dbl>, FANCM <dbl>, FAS <dbl>, FAT1 <dbl>, FBXW7 <dbl>,
## #   FGFR1 <dbl>, FLT4 <dbl>, GATA3 <dbl>, IKZF1 <dbl>, KEAP1 <dbl>,
## #   KMT2C <dbl>, KMT2D <dbl>, KRAS <dbl>, LATS1 <dbl>, MAP3K1 <dbl>, MET <dbl>,
## #   MGA <dbl>, MLH1 <dbl>, MYC <dbl>, NCOA6 <dbl>, NCOR1 <dbl>, NF1 <dbl>,
## #   NFE2L2 <dbl>, NOTCH1 <dbl>, NOTCH2 <dbl>, NRAS <dbl>, PASK <dbl>,
## #   PDGFRA <dbl>, PHOX2B <dbl>, PIK3CA <dbl>, PLXNB2 <dbl>, POLE <dbl>,
## #   PRDM1 <dbl>, PRF1 <dbl>, PTEN <dbl>, PTPRC <dbl>, RAD21 <dbl>, RASA1 <dbl>,
## #   RB1 <dbl>, RNF43 <dbl>, SERPINB13 <dbl>, SETD2 <dbl>, SGK223 <dbl>,
## #   SMAD4 <dbl>, SMARCA4 <dbl>, SOX2 <dbl>, SPEN <dbl>, STK11 <dbl>,
## #   TERT <dbl>, TP53 <dbl>, TSC2 <dbl>, U2AF1 <dbl>, UBR5 <dbl>, WRN <dbl>,
## #   WT1 <dbl>
# Matrix of the occurrence of drivers across all patients
features$Matrix_drivers %>% print
## # A tibble: 99 x 80
##    patientID   APC ARHGAP35 ARID1B ARID2 ASXL1   ATM  BAP1  BRAF  CBLB CCND1
##    <chr>     <dbl>    <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1 CRUK0001      0        1      0     0     0     0     0     0     0     0
##  2 CRUK0002      0        0      0     0     0     0     0     0     0     0
##  3 CRUK0003      0        0      0     0     0     0     0     0     0     0
##  4 CRUK0004      0        0      0     0     0     0     0     0     0     0
##  5 CRUK0005      0        0      0     0     0     0     0     1     0     0
##  6 CRUK0006      0        0      0     0     0     0     0     0     0     0
##  7 CRUK0007      0        0      0     0     0     0     0     0     0     0
##  8 CRUK0008      0        0      0     1     0     0     0     0     0     0
##  9 CRUK0009      0        1      0     0     0     0     0     1     0     0
## 10 CRUK0010      0        0      0     0     0     0     0     0     0     0
## # … with 89 more rows, and 69 more variables: CDKN2A <dbl>, CHEK2 <dbl>,
## #   CIC <dbl>, CMTR2 <dbl>, COL2A1 <dbl>, COL5A2 <dbl>, CREBBP <dbl>,
## #   CTNNB1 <dbl>, CUX1 <dbl>, CYLD <dbl>, DICER1 <dbl>, DNM2 <dbl>, EGFR <dbl>,
## #   EP300 <dbl>, FANCC <dbl>, FANCM <dbl>, FAS <dbl>, FAT1 <dbl>, FBXW7 <dbl>,
## #   FGFR1 <dbl>, FLT4 <dbl>, GATA3 <dbl>, IKZF1 <dbl>, KEAP1 <dbl>,
## #   KMT2C <dbl>, KMT2D <dbl>, KRAS <dbl>, LATS1 <dbl>, MAP3K1 <dbl>, MET <dbl>,
## #   MGA <dbl>, MLH1 <dbl>, MYC <dbl>, NCOA6 <dbl>, NCOR1 <dbl>, NF1 <dbl>,
## #   NFE2L2 <dbl>, NOTCH1 <dbl>, NOTCH2 <dbl>, NRAS <dbl>, PASK <dbl>,
## #   PDGFRA <dbl>, PHOX2B <dbl>, PIK3CA <dbl>, PLXNB2 <dbl>, POLE <dbl>,
## #   PRDM1 <dbl>, PRF1 <dbl>, PTEN <dbl>, PTPRC <dbl>, RAD21 <dbl>, RASA1 <dbl>,
## #   RB1 <dbl>, RNF43 <dbl>, SERPINB13 <dbl>, SETD2 <dbl>, SGK223 <dbl>,
## #   SMAD4 <dbl>, SMARCA4 <dbl>, SOX2 <dbl>, SPEN <dbl>, STK11 <dbl>,
## #   TERT <dbl>, TP53 <dbl>, TSC2 <dbl>, U2AF1 <dbl>, UBR5 <dbl>, WRN <dbl>,
## #   WT1 <dbl>
# Matrix of the occurrence of clonal drivers across all patients
features$Matrix_clonal_drivers %>% print
## # A tibble: 99 x 80
##    patientID   APC ARHGAP35 ARID1B ARID2 ASXL1   ATM  BAP1  BRAF  CBLB CCND1
##    <chr>     <dbl>    <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1 CRUK0001      0        0      0     0     0     0     0     0     0     0
##  2 CRUK0002      0        0      0     0     0     0     0     0     0     0
##  3 CRUK0003      0        0      0     0     0     0     0     0     0     0
##  4 CRUK0004      0        0      0     0     0     0     0     0     0     0
##  5 CRUK0005      0        0      0     0     0     0     0     1     0     0
##  6 CRUK0006      0        0      0     0     0     0     0     0     0     0
##  7 CRUK0007      0        0      0     0     0     0     0     0     0     0
##  8 CRUK0008      0        0      0     0     0     0     0     0     0     0
##  9 CRUK0009      0        1      0     0     0     0     0     1     0     0
## 10 CRUK0010      0        0      0     0     0     0     0     0     0     0
## # … with 89 more rows, and 69 more variables: CDKN2A <dbl>, CHEK2 <dbl>,
## #   CIC <dbl>, CMTR2 <dbl>, COL2A1 <dbl>, COL5A2 <dbl>, CREBBP <dbl>,
## #   CTNNB1 <dbl>, CUX1 <dbl>, CYLD <dbl>, DICER1 <dbl>, DNM2 <dbl>, EGFR <dbl>,
## #   EP300 <dbl>, FANCC <dbl>, FANCM <dbl>, FAS <dbl>, FAT1 <dbl>, FBXW7 <dbl>,
## #   FGFR1 <dbl>, FLT4 <dbl>, GATA3 <dbl>, IKZF1 <dbl>, KEAP1 <dbl>,
## #   KMT2C <dbl>, KMT2D <dbl>, KRAS <dbl>, LATS1 <dbl>, MAP3K1 <dbl>, MET <dbl>,
## #   MGA <dbl>, MLH1 <dbl>, MYC <dbl>, NCOA6 <dbl>, NCOR1 <dbl>, NF1 <dbl>,
## #   NFE2L2 <dbl>, NOTCH1 <dbl>, NOTCH2 <dbl>, NRAS <dbl>, PASK <dbl>,
## #   PDGFRA <dbl>, PHOX2B <dbl>, PIK3CA <dbl>, PLXNB2 <dbl>, POLE <dbl>,
## #   PRDM1 <dbl>, PRF1 <dbl>, PTEN <dbl>, PTPRC <dbl>, RAD21 <dbl>, RASA1 <dbl>,
## #   RB1 <dbl>, RNF43 <dbl>, SERPINB13 <dbl>, SETD2 <dbl>, SGK223 <dbl>,
## #   SMAD4 <dbl>, SMARCA4 <dbl>, SOX2 <dbl>, SPEN <dbl>, STK11 <dbl>,
## #   TERT <dbl>, TP53 <dbl>, TSC2 <dbl>, U2AF1 <dbl>, UBR5 <dbl>, WRN <dbl>,
## #   WT1 <dbl>
# Matrix of the occurrence of subclonal drivers across all patients
features$Matrix_subclonal_drivers %>% print
## # A tibble: 99 x 80
##    patientID   APC ARHGAP35 ARID1B ARID2 ASXL1 ATM   BAP1   BRAF CBLB  CCND1
##    <chr>     <dbl> <chr>    <chr>  <chr> <chr> <chr> <chr> <dbl> <chr> <chr>
##  1 CRUK0001      0 1        0      0     0     0     0         0 0     0    
##  2 CRUK0002      0 0        0      0     0     0     0         0 0     0    
##  3 CRUK0003      0 0        0      0     0     0     0         0 0     0    
##  4 CRUK0004      0 0        0      0     0     0     0         0 0     0    
##  5 CRUK0005      0 0        0      0     0     0     0         0 0     0    
##  6 CRUK0006      0 0        0      0     0     0     0         0 0     0    
##  7 CRUK0008      0 0        0      1     0     0     0         0 0     0    
##  8 CRUK0009      0 0        0      0     0     0     0         0 0     0    
##  9 CRUK0011      0 0        0      0     0     0     0         0 0     0    
## 10 CRUK0013      0 0        0      0     0     0     0         0 0     0    
## # … with 89 more rows, and 69 more variables: CDKN2A <dbl>, CHEK2 <dbl>,
## #   CIC <chr>, CMTR2 <dbl>, COL2A1 <dbl>, COL5A2 <chr>, CREBBP <dbl>,
## #   CTNNB1 <chr>, CUX1 <dbl>, CYLD <chr>, DICER1 <dbl>, DNM2 <chr>, EGFR <chr>,
## #   EP300 <chr>, FANCC <chr>, FANCM <chr>, FAS <chr>, FAT1 <chr>, FBXW7 <dbl>,
## #   FGFR1 <dbl>, FLT4 <chr>, GATA3 <dbl>, IKZF1 <chr>, KEAP1 <dbl>,
## #   KMT2C <dbl>, KMT2D <chr>, KRAS <chr>, LATS1 <chr>, MAP3K1 <chr>, MET <dbl>,
## #   MGA <chr>, MLH1 <chr>, MYC <dbl>, NCOA6 <chr>, NCOR1 <chr>, NF1 <chr>,
## #   NFE2L2 <chr>, NOTCH1 <chr>, NOTCH2 <dbl>, NRAS <chr>, PASK <chr>,
## #   PDGFRA <dbl>, PHOX2B <dbl>, PIK3CA <chr>, PLXNB2 <chr>, POLE <dbl>,
## #   PRDM1 <dbl>, PRF1 <dbl>, PTEN <dbl>, PTPRC <chr>, RAD21 <dbl>, RASA1 <dbl>,
## #   RB1 <chr>, RNF43 <chr>, SERPINB13 <dbl>, SETD2 <chr>, SGK223 <dbl>,
## #   SMAD4 <chr>, SMARCA4 <dbl>, SOX2 <dbl>, SPEN <dbl>, STK11 <dbl>,
## #   TERT <chr>, TP53 <chr>, TSC2 <dbl>, U2AF1 <dbl>, UBR5 <chr>, WRN <dbl>,
## #   WT1 <dbl>
# Matrix of the occurrence of the inferred trajectories across all patients
features$Matrix_trajectories %>% print
## # A tibble: 99 x 263
##    patientID `ARHGAP35 --> T… `ARID1B --> ASX… `ARID1B --> COL… `ARID1B --> KRA…
##    <chr>                <dbl>            <dbl>            <dbl>            <dbl>
##  1 CRUK0001                 0                0                0                0
##  2 CRUK0002                 0                0                0                0
##  3 CRUK0003                 0                0                0                0
##  4 CRUK0004                 0                0                0                0
##  5 CRUK0005                 0                0                0                0
##  6 CRUK0006                 0                0                0                0
##  7 CRUK0007                 0                0                0                0
##  8 CRUK0008                 0                0                0                0
##  9 CRUK0009                 1                0                0                0
## 10 CRUK0010                 0                0                0                0
## # … with 89 more rows, and 258 more variables: ARID2 --> KRAS <dbl>,
## #   ATM --> CCND1 <dbl>, ATM --> MGA <dbl>, ATM --> NCOR1 <dbl>,
## #   BAP1 --> PIK3CA <dbl>, BAP1 --> RB1 <dbl>, BAP1 --> TP53 <dbl>,
## #   BRAF --> TERT <dbl>, CBLB --> ARID1B <dbl>, CBLB --> DNM2 <dbl>,
## #   CBLB --> LATS1 <dbl>, CBLB --> PTPRC <dbl>, CCND1 --> ARID1B <dbl>,
## #   CCND1 --> FAS <dbl>, CCND1 --> IKZF1 <dbl>, CCND1 --> UBR5 <dbl>,
## #   CDKN2A --> COL5A2 <dbl>, CDKN2A --> CTNNB1 <dbl>, CDKN2A --> KMT2D <dbl>,
## #   CDKN2A --> NF1 <dbl>, CDKN2A --> NFE2L2 <dbl>, CDKN2A --> PTPRC <dbl>,
## #   CDKN2A --> TP53 <dbl>, CMTR2 --> NRAS <dbl>, CMTR2 --> UBR5 <dbl>,
## #   COL2A1 --> BAP1 <dbl>, COL2A1 --> NCOR1 <dbl>, COL5A2 --> CBLB <dbl>,
## #   COL5A2 --> NCOR1 <dbl>, COL5A2 --> NFE2L2 <dbl>, CREBBP --> TP53 <dbl>,
## #   CYLD --> FANCM <dbl>, DICER1 --> PLXNB2 <dbl>, DNM2 --> CBLB <dbl>,
## #   DNM2 --> NFE2L2 <dbl>, EGFR --> ARHGAP35 <dbl>, EGFR --> CTNNB1 <dbl>,
## #   EGFR --> RB1 <dbl>, EGFR --> TP53 <dbl>, EP300 --> CYLD <dbl>,
## #   EP300 --> NF1 <dbl>, FANCC --> CYLD <dbl>, FANCM --> FAT1 <dbl>,
## #   FAS --> FLT4 <dbl>, FAT1 --> ARID1B <dbl>, FAT1 --> CYLD <dbl>,
## #   FAT1 --> DNM2 <dbl>, FAT1 --> KRAS <dbl>, FAT1 --> LATS1 <dbl>,
## #   FAT1 --> NFE2L2 <dbl>, FAT1 --> PTPRC <dbl>, FBXW7 --> EP300 <dbl>,
## #   FGFR1 --> COL5A2 <dbl>, FGFR1 --> KRAS <dbl>, FGFR1 --> NF1 <dbl>,
## #   FGFR1 --> NFE2L2 <dbl>, FGFR1 --> PLXNB2 <dbl>, GL --> APC <dbl>,
## #   GL --> ARHGAP35 <dbl>, GL --> ARID2 <dbl>, GL --> ASXL1 <dbl>,
## #   GL --> ATM <dbl>, GL --> BAP1 <dbl>, GL --> BRAF <dbl>, GL --> CCND1 <dbl>,
## #   GL --> CDKN2A <dbl>, GL --> CHEK2 <dbl>, GL --> CMTR2 <dbl>,
## #   GL --> COL2A1 <dbl>, GL --> COL5A2 <dbl>, GL --> CREBBP <dbl>,
## #   GL --> CTNNB1 <dbl>, GL --> CUX1 <dbl>, GL --> DICER1 <dbl>,
## #   GL --> EGFR <dbl>, GL --> FANCM <dbl>, GL --> FAT1 <dbl>,
## #   GL --> FBXW7 <dbl>, GL --> FGFR1 <dbl>, GL --> GATA3 <dbl>,
## #   GL --> KEAP1 <dbl>, GL --> KMT2C <dbl>, GL --> KMT2D <dbl>,
## #   GL --> KRAS <dbl>, GL --> LATS1 <dbl>, GL --> MAP3K1 <dbl>,
## #   GL --> MET <dbl>, GL --> MGA <dbl>, GL --> MYC <dbl>, GL --> NCOA6 <dbl>,
## #   GL --> NF1 <dbl>, GL --> NOTCH2 <dbl>, GL --> NRAS <dbl>,
## #   GL --> PASK <dbl>, GL --> PDGFRA <dbl>, GL --> PHOX2B <dbl>,
## #   GL --> PIK3CA <dbl>, GL --> POLE <dbl>, GL --> PRDM1 <dbl>,
## #   GL --> PRF1 <dbl>, …