Skip to contents

Detect largest clusters from a time sequence of predictor statistics

Usage

extract_empirical_clusters(
  empirical_statistics,
  threshold,
  binned = FALSE,
  top_n = Inf
)

Arguments

empirical_statistics

A predictor-by-time matrix of empirical timewise statistics.

threshold

The threshold value that the statistic must pass to contribute to cluster mass. Interpretation differs on the choice of statistic (more below):

  • If statistic = "t", the threshold for t-value (beta/std.err) from the regression model.

  • If statistic = "chisq", the threshold for the p-value of chi-squared statistics from likelihood ratio tests.

binned

Whether the data has been aggregated/collapsed into time bins. Defaults to FALSE, which requires a cluster to span at least two time points. If TRUE, allows length-1 clusters to exist.

top_n

How many clusters to return, in the order of the size of the cluster-mass statistic. Defaults to Inf which return all detected clusters.

Value

An empirical_clusters object.

Examples

# \donttest{

library(dplyr, warn.conflicts = FALSE)

# Specification object
spec <- make_jlmer_spec(
  weight ~ 1 + Diet, filter(ChickWeight, Time <= 20),
  subject = "Chick", time = "Time"
)
spec
#> ── jlmer specification ───────────────────────────────────────── <jlmer_spec> ──
#> Formula: weight ~ 1 + Diet2 + Diet3 + Diet4
#> Predictors:
#>   Diet: Diet2, Diet3, Diet4
#> Groupings:
#>   Subject: Chick
#>   Trial:
#>   Time: Time
#> Data:
#> # A tibble: 533 × 6
#>   weight Diet2 Diet3 Diet4 Chick  Time
#>    <dbl> <dbl> <dbl> <dbl> <ord> <dbl>
#> 1     42     0     0     0 1         0
#> 2     51     0     0     0 1         2
#> 3     59     0     0     0 1         4
#> # ℹ 530 more rows
#> ────────────────────────────────────────────────────────────────────────────────

# Empirical clusters are derived from the timewise statistics
empirical_statistics <- compute_timewise_statistics(spec)
empirical_clusters <- extract_empirical_clusters(empirical_statistics, threshold = 2)
empirical_clusters
#> ── Empirical clusters (t > 2) ────────────────────────── <empirical_clusters> ──
#> Diet2
#>   [4, 8]: 8.496
#> Diet3
#>   [2, 20]: 34.628
#> Diet4
#>   [2, 20]: 39.371
#> ────────────────────────────────────────────────────────────────────────────────

# Collect as dataframe with `tidy()`
empirical_clusters_df <- tidy(empirical_clusters)
empirical_clusters_df
#> # A tibble: 3 × 6
#>   predictor id    start   end length sum_statistic
#>   <chr>     <fct> <dbl> <dbl>  <dbl>         <dbl>
#> 1 Diet2     1         4     8      3          8.50
#> 2 Diet3     1         2    20     10         34.6 
#> 3 Diet4     1         2    20     10         39.4 

# Changing the `threshold` value identifies different clusters
extract_empirical_clusters(empirical_statistics, threshold = 1)
#> ── Empirical clusters (t > 1) ────────────────────────── <empirical_clusters> ──
#> Diet2
#>   [2, 20]: 19.427
#> Diet3
#>   [2, 20]: 34.628
#> Diet4
#>   [2, 20]: 39.371
#> ────────────────────────────────────────────────────────────────────────────────

# A predictor can have zero or multiple clusters associated with it
extract_empirical_clusters(empirical_statistics, threshold = 3)
#> ── Empirical clusters (t > 3) ────────────────────────── <empirical_clusters> ──
#> Diet3
#>   [4, 8]: 12.719
#>   [16, 20]: 10.449
#> Diet4
#>   [2, 12]: 29.659
#> ────────────────────────────────────────────────────────────────────────────────
#> ! No clusters found for Diet2

# }