Cracking open ggplot internals with {ggtrace}

June Choe

University of Pennsylvania

@yjunechoe

experiment_data
# A tibble: 2,879 × 4
  subject trial condition response_time
    <int> <int> <chr>             <dbl>
1       1     1 A                  2903
2       1     2 A                  2713
3       1     3 A                  2118
# … with 2,876 more rows

my_boxplot <- ggplot(experiment_data) +
  geom_boxplot(
    aes(x = condition, y = response_time, fill = condition)
  )
my_boxplot

+ layer( … )

+ layer( … )

my_boxplot + 
  geom_label(
    aes(x = condition,
        y = stage(start = response_time, after_stat = ymax),
        label = after_stat(ymax)),
    stat = "boxplot", size = 5,
    data = ~ .x %>% filter(condition == "B")
  )
my_boxplot + 
  geom_label(
    aes(x = condition,
        y = stage(start = response_time, after_stat = ymax),
        label = after_stat(ymax)),
    stat = "boxplot", size = 5,
    data = ~ .x %>% filter(condition == "B")
  )

stage() and after_stat() are “new” in {ggplot2} v3.3.0 (March 2020)

geom_boxplot(
  aes(x = condition, y = response_time, fill = condition)
)
geom_label(
  aes(x = condition,
      y = stage(start = response_time, after_stat = ymax),
      label = after_stat(ymax)),
  stat = "boxplot", size = 5,
  data = . %>% filter(condition == "B")
)
geom_boxplot(
  aes(x = condition, y = response_time, fill = condition)
)
geom_label(
  aes(x = condition,
      y = stage(start = response_time, after_stat = ymax),
      label = after_stat(ymax)),
  stat = "boxplot", size = 5,
  data = . %>% filter(condition == "B")
)
geom_boxplot(
  aes(x = condition, y = response_time, fill = condition)
)
geom_label(
  aes(x = condition,
      y = stage(start = response_time, after_stat = ymax),
      label = after_stat(ymax)),
  stat = "boxplot", size = 5,
  data = . %>% filter(condition == "B")
)
geom_boxplot(
  aes(x = condition, y = response_time, fill = condition)
)
geom_label(
  aes(x = condition,
      y = stage(start = response_time, after_stat = ymax),
      label = after_stat(ymax)),
  stat = "boxplot", size = 5,
  data = . %>% filter(condition == "B")
)
geom_boxplot(
  aes(x = condition, y = response_time, fill = condition)
)
geom_label(
  aes(x = condition,
      y = stage(start = response_time, after_stat = ymax),
      label = after_stat(ymax)),
  stat = "boxplot", size = 5,
  data = . %>% filter(condition == "B")
)
geom_boxplot(
  aes(x = condition, y = response_time, fill = condition)
)
geom_label(
  aes(x = condition,
      y = stage(start = response_time, after_stat = ymax),
      label = after_stat(ymax)),
  stat = "boxplot", size = 5,
  data = . %>% filter(condition == "B")
)
geom_boxplot(
  aes(x = condition, y = response_time, fill = condition)
)
geom_label(
  aes(x = condition,
      y = stage(start = response_time, after_stat = ymax),
      label = after_stat(ymax)),
  stat = "boxplot", size = 5,
  data = . %>% filter(condition == "B")
)
geom_boxplot(
  aes(x = condition, y = response_time, fill = condition)
)
geom_label(
  aes(x = condition,
      y = stage(start = response_time, after_stat = ymax),
      label = after_stat(ymax)),
  stat = "boxplot", size = 5,
  data = . %>% filter(condition == "B")
)

ggplot internals for users

ggplot internals as data wrangling

Outline:

  1. Reframing of ggplot internals as a data wrangling pipeline

  2. Walkthrough with a barplot example

  3. Building up the boxplot annotation layer

1) Layers as dataframes

ggplot2::layer_data(plot = my_boxplot, i = 1L)
     fill ymin  lower middle   upper ymax         outliers notchupper
1 #F8766D 1982 2263.0   2536 2803.25 3111                    2558.494
2 #00BFC4 1221 1670.5   2002 2380.50 3012 3590, 3712, 3963   2031.572
  notchlower x flipped_aes PANEL group ymin_final ymax_final  xmin
1   2513.506 1       FALSE     1     1       1982       3111 0.625
2   1972.428 2       FALSE     1     2       1221       3963 1.625
   xmax xid newx new_width weight colour size alpha shape linetype
1 1.375   1    1      0.75      1 grey20  0.5    NA    19    solid
2 2.375   2    2      0.75      1 grey20  0.5    NA    19    solid

Input

experiment_data
  subject trial condition response_time
1       1     1         A          2903
2       1     2         A          2713
3       1     3         A          2118
4       1     4         A          2596
5       1     5         A          2191
 [ reached 'max' / getOption("max.print") -- omitted 2874 rows ]

Output

ggplot2::layer_data(plot = my_boxplot, i = 1L)
     fill ymin  lower middle   upper ymax         outliers notchupper
1 #F8766D 1982 2263.0   2536 2803.25 3111                    2558.494
2 #00BFC4 1221 1670.5   2002 2380.50 3012 3590, 3712, 3963   2031.572
  notchlower x flipped_aes PANEL group ymin_final ymax_final  xmin
1   2513.506 1       FALSE     1     1       1982       3111 0.625
2   1972.428 2       FALSE     1     2       1221       3963 1.625
   xmax xid newx new_width weight colour size alpha shape linetype
1 1.375   1    1      0.75      1 grey20  0.5    NA    19    solid
2 2.375   2    2      0.75      1 grey20  0.5    NA    19    solid

Output

ggplot2::layer_data(plot = my_boxplot, i = 1L)
     fill ymin  lower middle   upper ymax         outliers notchupper
1 #F8766D 1982 2263.0   2536 2803.25 3111                    2558.494
2 #00BFC4 1221 1670.5   2002 2380.50 3012 3590, 3712, 3963   2031.572
  notchlower x flipped_aes PANEL group ymin_final ymax_final  xmin
1   2513.506 1       FALSE     1     1       1982       3111 0.625
2   1972.428 2       FALSE     1     2       1221       3963 1.625
   xmax xid newx new_width weight colour size alpha shape linetype
1 1.375   1    1      0.75      1 grey20  0.5    NA    19    solid
2 2.375   2    2      0.75      1 grey20  0.5    NA    19    solid
geom_label(
  aes(x = condition,
      y = stage(start = response_time, after_stat = ymax),
      label = after_stat(ymax)),
  stat = "boxplot", size = 5,
  data = . %>% filter(condition == "B")
)
geom_label(
  aes(x = condition,
      y = stage(start = response_time, after_stat = ymax),
      label = after_stat(ymax)),
  stat = "boxplot", size = 5,
  data = . %>% filter(condition == "B")
)
geom_label(
  aes(x = condition,
      y = stage(start = response_time, after_stat = ymax),
      label = after_stat(ymax)),
  stat = "boxplot", size = 5,
  data = . %>% filter(condition == "B")
)
geom_label(
  aes(x = condition,
      y = stage(start = response_time, after_stat = ymax),
      label = after_stat(ymax)),
  stat = "boxplot", size = 5,
  data = . %>% filter(condition == "B")
)
geom_label(
  aes(x = condition,
      y = stage(start = response_time, after_stat = ymax),
      label = after_stat(ymax)),
  stat = "boxplot", size = 5,
  data = . %>% filter(condition == "B")
)
Before StatAfter StatBefore GeomAfter Scale
geom_label(
  aes(x = condition,
      y = stage(start = response_time, after_stat = ymax),
      label = after_stat(ymax)),
  stat = "boxplot", size = 5,
  data = . %>% filter(condition == "B")
)

Snapshots of layer data

Input (user-supplied data)

  1. Before Stat validates the layer’s choice of stat

  2. After Stat context for after_stat() mappings

  3. Before Geom validates the layer’s choice of geom

  4. After Scale context for after_scale() mappings

Output (drawing-ready data)

👻 ggproto methods 👻

ggplot_internals <- function(data) {
  data %>% 
    ggproto$method() %>% 
    ggproto$method() %>% 
    ggproto$method() %>% 
    ggproto$method() %>% 
    ggproto$method() %>% 
    ggproto$method() %>% 
    ...
}
Before Stat
After Stat
Before Geom
After Scale

This is a pseudo-code abstraction of ggplot2:::ggplot_build.ggplot()

👻 ggproto methods 👻

ggplot_internals <- function(data) {
  data %>% 
    ... %>% 
    Layer$compute_statistic() %>% 
    ... %>% 
    Layer$compute_geom_1() %>% 
    ... %>% 
    Layer$compute_geom_2() %>% 
    ...
}
Before Stat
After Stat
Before Geom
After Scale

This is a pseudo-code abstraction of ggplot2:::ggplot_build.ggplot()

👻 ggproto methods 👻

ggplot_internals <- function(data) {
  data %>% 
    ... %>% 
    Layer$compute_statistic()
  
  
  
  
  
}
Before Stat
After Stat
Before Geom
After Scale

This is a pseudo-code abstraction of ggplot2:::ggplot_build.ggplot()

👻 ggproto methods 👻

ggplot_internals <- function(data) {
  data %>% 
    ... %>% 
    Layer$compute_statistic() %>% 
    ...
  
  
  
  
}
Before Stat
After Stat
Before Geom
After Scale

This is a pseudo-code abstraction of ggplot2:::ggplot_build.ggplot()

👻 ggproto methods 👻

ggplot_internals <- function(data) {
  data %>% 
    ... %>% 
    Layer$compute_statistic() %>% 
    ... %>% 
    Layer$compute_geom_1()
  
  
  
}
Before Stat
After Stat
Before Geom
After Scale

This is a pseudo-code abstraction of ggplot2:::ggplot_build.ggplot()

👻 ggproto methods 👻

ggplot_internals <- function(data) {
  data %>% 
    ... %>% 
    Layer$compute_statistic() %>% 
    ... %>% 
    Layer$compute_geom_1() %>% 
    ... %>% 
    Layer$compute_geom_2() %>% 
    ...
}
Before Stat
After Stat
Before Geom
After Scale

This is a pseudo-code abstraction of ggplot2:::ggplot_build.ggplot()

👻 ggproto methods 👻

ggplot2:::Layer$compute_statistic
<ggproto method>
  <Wrapper function>
    function (...) f(..., self = self)

  <Inner function (f)>
    function (self, data, layout) {
      if (empty(data)) return(new_data_frame())
      self$computed_stat_params <- self$stat$setup_params(data, self$stat_params)
      data <- self$stat$setup_data(data, self$computed_stat_params)
      self$stat$compute_layer(data, self$computed_stat_params, layout)
    }

👻 ggproto methods 👻

ggplot2:::Layer$compute_statistic
<ggproto method>
  <Wrapper function>
    function (...) f(..., self = self)

  <Inner function (f)>
    function (self, data, layout) {
      if (empty(data)) return(new_data_frame())
      self$computed_stat_params <- self$stat$setup_params(data, self$stat_params)
      data <- self$stat$setup_data(data, self$computed_stat_params)
      self$stat$compute_layer(data, self$computed_stat_params, layout)
    }

👻 ggproto methods 👻

ggplot2:::Layer$compute_statistic

├───Layer$compute_statistic()
│   │
│   ├───StatBoxplot$setup_params()
│   │
│   ├───StatBoxplot$setup_data()
│   │
│   └───Stat$compute_layer()
│       │
│       └───Stat$compute_panel()
│           │
│           └───StatBoxplot$compute_group()

Introducing {ggtrace}

Toolkit to Inspect, Capture, and Highjack the internals

Workflow functions ggtrace_{action}_{value}():

  • x: The ggplot object

  • method: The ggproto method

  • cond: When to interact with the method

If cond is an integer N, targets the Nth time method is called.

Introducing {ggtrace}

Toolkit to Inspect, Capture, and Highjack the internals

Workflow functions ggtrace_{action}_{value}():

“While rendering x, inspect method when cond is met.”

👻 ggproto methods 👻

ggplot_internals <- function(data) {
  data %>% 
    ... %>% 
    Layer$compute_statistic() %>% 
    ... %>% 
    Layer$compute_geom_1() %>% 
    ... %>% 
    Layer$compute_geom_2() %>% 
    ...
}
Before Stat
After Stat
Before Geom
After Scale

This is a pseudo-code abstraction of ggplot2:::ggplot_build.ggplot()

ggtrace::ggtrace_inspect_args(
  x = my_boxplot,
  method = ggplot2:::Layer$compute_statistic,
  cond = 1
)$data
  x    y fill PANEL group
1 1 2903    A     1     1
2 1 2713    A     1     1
3 1 2118    A     1     1
 [ reached 'max' / getOption("max.print") -- omitted 2876 rows ]
Before Stat

Default value of cond is 1, so it can be omitted here

👻 ggproto methods 👻

ggplot_internals <- function(data) {
  data %>% 
    ... %>% 
    Layer$compute_statistic() %>% 
    ... %>% 
    Layer$compute_geom_1() %>% 
    ... %>% 
    Layer$compute_geom_2() %>% 
    ...
}
Before Stat
After Stat
Before Geom
After Scale

This is a pseudo-code abstraction of ggplot2:::ggplot_build.ggplot()

ggtrace::ggtrace_inspect_return(
  x = my_boxplot,
  method = ggplot2:::Layer$compute_statistic,
  cond = 1
)
  ymin  lower middle   upper ymax         outliers notchupper
1 1982 2263.0   2536 2803.25 3111                    2558.494
2 1221 1670.5   2002 2380.50 3012 3590, 3712, 3963   2031.572
  notchlower x width relvarwidth flipped_aes fill PANEL group
1   2513.506 1  0.75    37.94733       FALSE    A     1     1
2   1972.428 2  0.75    37.93415       FALSE    B     1     2
After Stat

Default value of cond is 1, so it can be omitted here

ggplot internals for users

Outline:

  1. Reframing of ggplot internals as a data wrangling pipeline

  2. Walkthrough with a barplot example

  3. Building up the boxplot annotation layer

Walkthrough with a barplot

penguins_sm <- palmerpenguins::penguins %>% 
  select(species, bill_length_mm) %>% 
  filter(!is.na(bill_length_mm))
penguins_sm
# A tibble: 342 × 2
  species bill_length_mm
  <fct>            <dbl>
1 Adelie            39.1
2 Adelie            39.5
3 Adelie            40.3
4 Adelie            36.7
5 Adelie            39.3
# … with 337 more rows

my_barplot <- ggplot(penguins_sm) + 
  geom_bar(aes(x = species, fill = species))
my_barplot

It always happens in steps

geom_bar(aes(x = species, fill = species))

=

geom_bar(
  aes(
    x = species, fill = species,
    y = after_stat(count)
  ),
  stat = "count"
)
penguins_sm
  species bill_length_mm
1  Adelie           39.1
2  Adelie           39.5
3  Adelie           40.3
 [ reached 'max' / getOption("max.print") -- omitted 339 rows ]
Input
ggplot2::layer_data(plot = my_barplot, i = 1L)
     fill   y count prop x flipped_aes PANEL group ymin ymax xmin
1 #F8766D 151   151    1 1       FALSE     1     1    0  151 0.55
2 #00BA38  68    68    1 2       FALSE     1     2    0   68 1.55
3 #619CFF 123   123    1 3       FALSE     1     3    0  123 2.55
  xmax colour size linetype alpha
1 1.45     NA  0.5        1    NA
2 2.45     NA  0.5        1    NA
3 3.45     NA  0.5        1    NA
Output
Input
Before Stat
After Stat
Before Geom
After Scale
Output

1) Before Stat

ggtrace_inspect_args(
  x = my_barplot,
  method = ggplot2:::Layer$compute_statistic)$data
  x   fill PANEL group
1 1 Adelie     1     1
2 1 Adelie     1     1
3 1 Adelie     1     1
 [ reached 'max' / getOption("max.print") -- omitted 339 rows ]
Before Stat
After Stat
Before Geom
After Scale

1) Before Stat

ggtrace_inspect_args(
  x = my_barplot,
  method = ggplot2:::Layer$compute_statistic)$data
  x   fill PANEL group
1 1 Adelie     1     1
2 1 Adelie     1     1
3 1 Adelie     1     1
 [ reached 'max' / getOption("max.print") -- omitted 339 rows ]
Before Stat
After Stat
Before Geom
After Scale

1) Before Stat

ggtrace_inspect_args(
  x = ggplot(penguins_sm) +
        geom_bar(
          aes(x = species, fill = species),
          data = ~ .x %>% filter(species != "Adelie")
        ),
  method = ggplot2:::Layer$compute_statistic)$data
  x   fill PANEL group
1 2 Gentoo     1     2
2 2 Gentoo     1     2
3 2 Gentoo     1     2
 [ reached 'max' / getOption("max.print") -- omitted 188 rows ]
Before Stat
After Stat
Before Geom
After Scale

1) Before Stat

ggtrace_inspect_args(
  x = my_barplot,
  method = ggplot2:::Layer$compute_statistic)$data
  x   fill PANEL group
1 1 Adelie     1     1
2 1 Adelie     1     1
3 1 Adelie     1     1
 [ reached 'max' / getOption("max.print") -- omitted 339 rows ]
Before Stat
After Stat
Before Geom
After Scale

1) Before Stat

ggtrace_inspect_args(
  x = my_barplot,
  method = ggplot2:::Layer$compute_statistic)$data
  x   fill PANEL group
1 1 Adelie     1     1
2 1 Adelie     1     1
3 1 Adelie     1     1
 [ reached 'max' / getOption("max.print") -- omitted 339 rows ]

Before Stat
After Stat
Before Geom
After Scale

1) Before Stat

ggplot(penguins_sm) +
  geom_bar(aes(x = species, y = bill_length_mm))
Error in `f()`:
! stat_count() can only have an x or y aesthetic.
Before Stat
After Stat
Before Geom
After Scale

Error messages will be much nicer in {ggplot2} v3.4.0, with {cli} integration

1) Before Stat

ggtrace_inspect_args(error = TRUE,
  x = ggplot(penguins_sm) +
        geom_bar(aes(x = species, y = bill_length_mm)),
  method = ggplot2:::Layer$compute_statistic)$data
Error in `f()`:
! stat_count() can only have an x or y aesthetic.

  x    y PANEL group
1 1 39.1     1     1
2 1 39.5     1     1
3 1 40.3     1     1
 [ reached 'max' / getOption("max.print") -- omitted 339 rows ]
Before Stat
After Stat
Before Geom
After Scale

2) After Stat

ggtrace_inspect_return(
  x = my_barplot,
  method = ggplot2:::Layer$compute_statistic)
  count prop x width flipped_aes      fill PANEL group
1   151    1 1   0.9       FALSE    Adelie     1     1
2    68    1 2   0.9       FALSE Chinstrap     1     2
3   123    1 3   0.9       FALSE    Gentoo     1     3
Before Stat
After Stat
Before Geom
After Scale

2) After Stat

ggtrace_inspect_return(
  x = my_barplot,
  method = ggplot2:::Layer$compute_statistic)
  count prop x width flipped_aes      fill PANEL group
1   151    1 1   0.9       FALSE    Adelie     1     1
2    68    1 2   0.9       FALSE Chinstrap     1     2
3   123    1 3   0.9       FALSE    Gentoo     1     3
Before Stat
After Stat
Before Geom
After Scale

2) After Stat

geom_bar(aes(x = species, fill = species,
             y = after_stat(count)),
         stat = "count")
Before Stat
After Stat
Before Geom
After Scale

after_stat(vars) supercedes stat(var) and ..var.. syntax

2) After Stat

geom_bar(aes(x = species, fill = species,
             y = after_stat(count)),
         stat = "count")
ggtrace_inspect_return(
  my_barplot, ggplot2:::Layer$compute_statistic) %>% 
  mutate(y = count, .before = 1L)
    y count prop x width flipped_aes      fill PANEL group
1 151   151    1 1   0.9       FALSE    Adelie     1     1
2  68    68    1 2   0.9       FALSE Chinstrap     1     2
3 123   123    1 3   0.9       FALSE    Gentoo     1     3
Before Stat
After Stat
Before Geom
After Scale

FYI - mutate(..., .before = 1L) moves new colums to the front

2) After Stat

geom_bar(aes(x = species, fill = species,
             y = after_stat( count/sum(count) )),
         stat = "count")
ggtrace_inspect_return(
  my_barplot, ggplot2:::Layer$compute_statistic) %>% 
  mutate(y = count/sum(count), .before = 1L)
          y count prop x width flipped_aes      fill PANEL group
1 0.4415205   151    1 1   0.9       FALSE    Adelie     1     1
2 0.1988304    68    1 2   0.9       FALSE Chinstrap     1     2
3 0.3596491   123    1 3   0.9       FALSE    Gentoo     1     3
Before Stat
After Stat
Before Geom
After Scale

FYI - mutate(..., .before = 1L) moves new colums to the front

2) After Stat

ggplot(penguins_sm) +
  geom_bar(aes(x = species, fill = species,
               y = after_stat( count/sum(count) )),
           stat = "count")
Before Stat
After Stat
Before Geom
After Scale

3) Before Geom

ggtrace_inspect_args(
  x = my_barplot,
  method = ggplot2:::Layer$compute_geom_1)$data
    y count prop x width flipped_aes      fill PANEL group
1 151   151    1 1   0.9       FALSE    Adelie     1     1
2  68    68    1 2   0.9       FALSE Chinstrap     1     2
3 123   123    1 3   0.9       FALSE    Gentoo     1     3
Before Stat
After Stat
Before Geom
After Scale

3) Before Geom

ggtrace_inspect_args(
  x = my_barplot,
  method = ggplot2:::Layer$compute_geom_1)$data
    y count prop x width flipped_aes      fill PANEL group
1 151   151    1 1   0.9       FALSE    Adelie     1     1
2  68    68    1 2   0.9       FALSE Chinstrap     1     2
3 123   123    1 3   0.9       FALSE    Gentoo     1     3

Before Stat
After Stat
Before Geom
After Scale

3) Before Geom

ggtrace_inspect_args(error = TRUE,
  x = ggplot(penguins_sm) +
        geom_bar(aes(x = species, y = NULL)),
  method = ggplot2:::Layer$compute_geom_1)$data
Error in `check_required_aesthetics()`:
! geom_bar requires the following missing aesthetics: y
  count prop x width flipped_aes PANEL group
1   151    1 1   0.9       FALSE     1     1
2    68    1 2   0.9       FALSE     1     2
3   123    1 3   0.9       FALSE     1     3
Before Stat
After Stat
Before Geom
After Scale

4) After Scale

ggtrace_inspect_return(
  x = my_barplot,
  method = ggplot2:::Layer$compute_geom_2)
     fill   y count prop x flipped_aes PANEL group ymin ymax xmin
1 #F8766D 151   151    1 1       FALSE     1     1    0  151 0.55
2 #00BA38  68    68    1 2       FALSE     1     2    0   68 1.55
3 #619CFF 123   123    1 3       FALSE     1     3    0  123 2.55
  xmax colour size linetype alpha
1 1.45     NA  0.5        1    NA
2 2.45     NA  0.5        1    NA
3 3.45     NA  0.5        1    NA
Before Stat
After Stat
Before Geom
After Scale

In {ggplot2} v3.4.0, size aesthetic for lines will be renamed to linewidth

4) After Scale

ggtrace_inspect_return(
  x = my_barplot,
  method = ggplot2:::Layer$compute_geom_2)
     fill   y count prop x flipped_aes PANEL group ymin ymax xmin
1 #F8766D 151   151    1 1       FALSE     1     1    0  151 0.55
2 #00BA38  68    68    1 2       FALSE     1     2    0   68 1.55
3 #619CFF 123   123    1 3       FALSE     1     3    0  123 2.55
  xmax colour size linetype alpha
1 1.45     NA  0.5        1    NA
2 2.45     NA  0.5        1    NA
3 3.45     NA  0.5        1    NA
Before Stat
After Stat
Before Geom
After Scale

In {ggplot2} v3.4.0, size aesthetic for lines will be renamed to linewidth

4) After Scale

ggtrace_inspect_return(
  x = my_barplot,
  method = ggplot2:::Layer$compute_geom_2)
     fill   y count prop x flipped_aes PANEL group ymin ymax xmin
1 #F8766D 151   151    1 1       FALSE     1     1    0  151 0.55
2 #00BA38  68    68    1 2       FALSE     1     2    0   68 1.55
3 #619CFF 123   123    1 3       FALSE     1     3    0  123 2.55
  xmax colour size linetype alpha
1 1.45     NA  0.5        1    NA
2 2.45     NA  0.5        1    NA
3 3.45     NA  0.5        1    NA
Before Stat
After Stat
Before Geom
After Scale

In {ggplot2} v3.4.0, size aesthetic for lines will be renamed to linewidth

4) After Scale

ggplot(penguins_sm) +
  geom_bar(aes(x = species, fill = species),
           color = "grey")

Before Stat
After Stat
Before Geom
After Scale

4) After Scale

ggplot(penguins_sm) +
  geom_bar(aes(x = species, fill = species,
               size = after_scale( size * 5 )),
           color = "grey")

after_scale() is “new” in {ggplot2} v3.3.0 (March 2020)

Before Stat
After Stat
Before Geom
After Scale

4) After Scale

library(colorspace) # for the `darken()` function
ggplot(penguins_sm) +
  geom_bar(aes(x = species, fill = species,
               color = after_scale( darken(fill, .5) ),
               size = after_scale( size * 5 )))

Before Stat
After Stat
Before Geom
After Scale

Snapshots of my_barplot’s bar layer

Input (user-supplied data)

  1. Before Stat has x, allows "count" stat

  2. After Stat has count, allows y = after_stat(count)

  3. Before Geom has x and y, allows "bar" geom

  4. After Scale allows after_scale() mappings (unused)

Output (drawing-ready data)

ggplot internals for users

Outline:

  1. Reframing of ggplot internals as a data wrangling pipeline

  2. Walkthrough with a barplot example

  3. Building up the boxplot annotation layer

my_boxplot

my_boxplot + ?

Step-by-step

  1. Use a label to draw a boxplot varible
geom_label(
  stat = "boxplot"
)

Step-by-step

  1. Use a label to draw a boxplot varible
geom_label(
  stat = "boxplot"
)
stat_boxplot(
  geom = "label"
)

They’re interchangeable - your choice of emphasis!

Step-by-step

  1. Plot just the responses from condition B
geom_label(
  stat = "boxplot",
  data = ~ .x %>% filter(condition == "B")
)

Step-by-step

  1. Give the stat the aesthetics it needs to start
geom_label(
  aes(
    x = condition,
    y = response_time
  ),
  stat = "boxplot",
  data = ~ .x %>% filter(condition == "B")
)

Step-by-step

  1. Make sure the geom gets the aesthetics it needs later
geom_label(
  aes(
    x = condition,
    y = response_time,
    label = after_stat(ymax)
  ),
  stat = "boxplot",
  data = ~ .x %>% filter(condition == "B")
)
my_boxplot_annotated <- my_boxplot +
  geom_label(
    aes(
      x = condition,
      y = response_time,
      label = after_stat(ymax)
    ),
    stat = "boxplot",
  data = ~ .x %>% filter(condition == "B")
  )
my_boxplot_annotated
Error in `check_required_aesthetics()`:
! geom_label requires the following missing aesthetics: y

Debugging with {ggtrace}

ggtrace_inspect_args(error = TRUE,
  x = my_boxplot_annotated,
  method = ggplot2:::Layer$compute_geom_1, cond = 2)$data
Error in `check_required_aesthetics()`:
! geom_label requires the following missing aesthetics: y
  label ymin  lower middle  upper ymax         outliers notchupper
1  3012 1221 1670.5   2002 2380.5 3012 3590, 3712, 3963   2031.572
  notchlower x width relvarwidth flipped_aes PANEL group
1   1972.428 2  0.75    37.93415       FALSE     1     1

Debugging with {ggtrace}

ggtrace_inspect_args(error = TRUE,
  x = my_boxplot_annotated,
  method = ggplot2:::Layer$compute_geom_1, cond = 2)$data
Error in `check_required_aesthetics()`:
! geom_label requires the following missing aesthetics: y
  label ymin  lower middle  upper ymax         outliers notchupper
1  3012 1221 1670.5   2002 2380.5 3012 3590, 3712, 3963   2031.572
  notchlower x width relvarwidth flipped_aes PANEL group
1   1972.428 2  0.75    37.93415       FALSE     1     1

Multiple mappings with stage()

my_boxplot_annotated <- my_boxplot +
  geom_label(
    aes(
      x = condition,
      y = stage(start = response_time, after_stat = ymax),
      label = after_stat(ymax)
    ),
    stat = "boxplot", size = 5,
    data = ~ .x %>% filter(condition == "B")
  )
ggtrace_inspect_args(
  x = my_boxplot_annotated,
  method = ggplot2:::Layer$compute_statistic, cond = 2)$data
  x    y PANEL group
1 2 2884     1     1
2 2 2299     1     1
3 2 2734     1     1
 [ reached 'max' / getOption("max.print") -- omitted 1436 rows ]
Before Stat
ggtrace_inspect_args(
  x = my_boxplot_annotated,
  method = ggplot2:::Layer$compute_geom_1, cond = 2)$data
     y label ymin  lower middle  upper ymax         outliers
1 3012  3012 1221 1670.5   2002 2380.5 3012 3590, 3712, 3963
  notchupper notchlower x width relvarwidth flipped_aes PANEL group
1   2031.572   1972.428 2  0.75    37.93415       FALSE     1     1
Before Geom

my_boxplot_annotated

ggplot internals for users

{ggtrace} helps us learn the internals as users:

  1. Layers have an underlying dataframe representation

  2. Internals is data wrangling to make them “drawing-ready”

  • Aesthetic mapping are like mutate() calls, scheduled to apply to the data at different stage()’s of the pipeline.

  • Every layer has a stat and a geom - they step in to modify the data, as long as the required columns are present.

Capture workflow

stat_transform_layer2 <- ggtrace_capture_fn(
  my_boxplot_annotated, ggplot2:::Layer$compute_statistic, cond = 2)
formals(stat_transform_layer2)$data
  x    y PANEL group
1 2 2884     1     1
2 2 2299     1     1
 [ reached 'max' / getOption("max.print") -- omitted 1437 rows ]
stat_transform_layer2()
  ymin  lower middle  upper ymax         outliers notchupper
1 1221 1670.5   2002 2380.5 3012 3590, 3712, 3963   2031.572
  notchlower x width relvarwidth flipped_aes PANEL group
1   1972.428 2  0.75    37.93415       FALSE     1     1

Highjack workflow

ggtrace_highjack_return(
  my_boxplot, ggplot2:::Layer$compute_statistic, cond = 1,
  value = quote( returnValue() %>% mutate(outliers = NULL) ))

Thank you!

remotes::install_github("yjunechoe/ggtrace") # v5.1

More on ggplot internals: