Skip to contents

Calculate bootstrapped p-values of cluster-mass statistics

Usage

calculate_clusters_pvalues(
  empirical_clusters,
  null_cluster_dists,
  add1 = FALSE
)

clusters_are_comparable(empirical_clusters, null_cluster_dists, error = FALSE)

Arguments

empirical_clusters

A empirical_clusters object

null_cluster_dists

A null_cluster_dists object

add1

Whether to add 1 to the numerator and denominator when calculating the p-value. Use TRUE to effectively count the observed statistic as part of the permuted null distribution (recommended with larger nsim prior to publishing results).

error

Whether to throw an error if incompatible

Value

An empirical_clusters object augmented with p-values.

Examples

# \donttest{

library(dplyr, warn.conflicts = FALSE)

# Specification object
spec <- make_jlmer_spec(
  weight ~ 1 + Diet, filter(ChickWeight, Time <= 20),
  subject = "Chick", time = "Time"
)
spec
#> ── jlmer specification ───────────────────────────────────────── <jlmer_spec> ──
#> Formula: weight ~ 1 + Diet2 + Diet3 + Diet4
#> Predictors:
#>   Diet: Diet2, Diet3, Diet4
#> Groupings:
#>   Subject: Chick
#>   Trial:
#>   Time: Time
#> Data:
#> # A tibble: 533 × 6
#>   weight Diet2 Diet3 Diet4 Chick  Time
#>    <dbl> <dbl> <dbl> <dbl> <ord> <dbl>
#> 1     42     0     0     0 1         0
#> 2     51     0     0     0 1         2
#> 3     59     0     0     0 1         4
#> # ℹ 530 more rows
#> ────────────────────────────────────────────────────────────────────────────────

# Make empirical clusters
empirical_statistics <- compute_timewise_statistics(spec)
empirical_clusters <- extract_empirical_clusters(empirical_statistics, threshold = 2)
empirical_clusters
#> ── Empirical clusters (t > 2) ────────────────────────── <empirical_clusters> ──
#> Diet2
#>   [4, 8]: 8.496
#> Diet3
#>   [2, 20]: 34.628
#> Diet4
#>   [2, 20]: 39.371
#> ────────────────────────────────────────────────────────────────────────────────

# Make null cluster-mass distribution
reset_rng_state()
null_statistics <- permute_timewise_statistics(spec, nsim = 100)
null_cluster_dists <- extract_null_cluster_dists(null_statistics, threshold = 2)

# Significance test the empirical cluster(s) from each predictor against the simulated null
calculate_clusters_pvalues(empirical_clusters, null_cluster_dists)
#> ── Empirical clusters (t > 2) ────────────────────────── <empirical_clusters> ──
#> Diet2
#>   [4, 8]: 8.496 (p=0.0200)
#> Diet3
#>   [2, 20]: 34.628 (p=0.0000)
#> Diet4
#>   [2, 20]: 39.371 (p=0.0000)
#> ────────────────────────────────────────────────────────────────────────────────

# Set `add1 = TRUE` to normalize by adding 1 to numerator and denominator
calculate_clusters_pvalues(empirical_clusters, null_cluster_dists, add1 = TRUE)
#> ── Empirical clusters (t > 2) ────────────────────────── <empirical_clusters> ──
#> Diet2
#>   [4, 8]: 8.496 (p=0.0297)
#> Diet3
#>   [2, 20]: 34.628 (p=0.0099)
#> Diet4
#>   [2, 20]: 39.371 (p=0.0099)
#> ────────────────────────────────────────────────────────────────────────────────

# This sequence of procedures is equivalent to `clusterpermute()`
reset_rng_state()
clusterpermute(spec, threshold = 2, nsim = 100, progress = FALSE)
#> $null_cluster_dists
#> ── Null cluster-mass distribution (t > 2) ────────────── <null_cluster_dists> ──
#> Diet2 (n = 100)
#>   Mean (SD): 0.266 (2.13)
#>   Coverage intervals: 95% [0.000, 6.579]
#> Diet3 (n = 100)
#>   Mean (SD): -0.124 (2.75)
#>   Coverage intervals: 95% [-4.777, 5.707]
#> Diet4 (n = 100)
#>   Mean (SD): 0.007 (2.50)
#>   Coverage intervals: 95% [0.000, 0.000]
#> ────────────────────────────────────────────────────────────────────────────────
#> 
#> $empirical_clusters
#> ── Empirical clusters (t > 2) ────────────────────────── <empirical_clusters> ──
#> Diet2
#>   [4, 8]: 8.496 (p=0.0297)
#> Diet3
#>   [2, 20]: 34.628 (p=0.0099)
#> Diet4
#>   [2, 20]: 39.371 (p=0.0099)
#> ────────────────────────────────────────────────────────────────────────────────

# The empirical clusters and the null cluster-mass distribution must be comparable
empirical_clusters2 <- extract_empirical_clusters(empirical_statistics, threshold = 3)
# For example, below code errors because thresholds are different (2 vs. 3)
try( calculate_clusters_pvalues(empirical_clusters2, null_cluster_dists) )
#> Error in clusters_are_comparable(empirical_clusters, null_cluster_dists,  : 
#>   Cluster-mass statistics between empirical and null are not comparable.
#>  `empirical_clusters` and `null_cluster_dists` must share the same `statistic`
#>   and `threshold`.
#>  threshold: empirical uses 3 but null uses 2.

# Check for compatibility with `clusters_are_comparable()`
clusters_are_comparable(empirical_clusters, null_cluster_dists)
#> [1] TRUE
clusters_are_comparable(empirical_clusters2, null_cluster_dists)
#> [1] FALSE

# }