## Rows: 200
## Columns: 8
## $ trt <chr> "Drug A", "Drug B", "Drug A", "Drug A", "Drug A", "Drug B", "…
## $ age <dbl> 23, 9, 31, NA, 51, 39, 37, 32, 31, 34, 42, 63, 54, 21, 48, 71…
## $ marker <dbl> 0.160, 1.107, 0.277, 2.067, 2.767, 0.613, 0.354, 1.739, 0.144…
## $ stage <fct> T1, T2, T1, T3, T4, T4, T1, T1, T1, T3, T1, T3, T4, T4, T1, T…
## $ grade <fct> II, I, II, III, III, I, II, I, II, I, III, I, III, I, I, III,…
## $ response <int> 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0…
## $ death <int> 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0…
## $ ttdeath <dbl> 24.00, 24.00, 24.00, 17.64, 16.43, 15.64, 24.00, 18.43, 24.00…
Introduction to {gtsummary}
Note
This lecture used materials from the {gtsummary} package homepage.
Trial data
A dataset containing the baseline characteristics of 200 patients who received Drug A or Drug B. Dataset also contains the outcome of tumor response to the treatment.
# See the labels of the variables if available
labelled::get_variable_labels(trial)## $trt
## [1] "Chemotherapy Treatment"
##
## $age
## [1] "Age"
##
## $marker
## [1] "Marker Level (ng/mL)"
##
## $stage
## [1] "T Stage"
##
## $grade
## [1] "Grade"
##
## $response
## [1] "Tumor Response"
##
## $death
## [1] "Patient Died"
##
## $ttdeath
## [1] "Months to Death/Censor"
Summary Table
Use tbl_summary() to summarize a data frame.
# summarize some variables
trial |>
select(age, grade, response) |>
tbl_summary()| Characteristic | N = 2001 |
|---|---|
| Age | 47 (38, 57) |
| Unknown | 11 |
| Grade | |
| I | 68 (34%) |
| II | 68 (34%) |
| III | 64 (32%) |
| Tumor Response | 61 (32%) |
| Unknown | 7 |
| 1 Median (Q1, Q3); n (%) | |
- Statistics are
median (IQR)for continuous,n (\%)for categorical/dichotomous - Variables coded
0/1,TRUE/FALSE,Yes/Notreated as dichotomous -
NAvalues under “Unknown” - Label attributes are printed automatically
trial |>
select(age, trt, grade, response) |>
tbl_summary(
by = trt, # split table by group
missing = "no" # don't list missing data separately
) |>
add_n() |> # add column with total number of non-missing observations
add_p() |> # test for a difference between groups
modify_header(label = "**Variable**") # update the column header| Variable | N |
Drug A N = 981 |
Drug B N = 1021 |
p-value2 |
|---|---|---|---|---|
| Age | 189 | 46 (37, 60) | 48 (39, 56) | 0.7 |
| Grade | 200 | 0.9 | ||
| I | 35 (36%) | 33 (32%) | ||
| II | 32 (33%) | 36 (35%) | ||
| III | 31 (32%) | 33 (32%) | ||
| Tumor Response | 193 | 28 (29%) | 33 (34%) | 0.5 |
| 1 Median (Q1, Q3); n (%) | ||||
| 2 Wilcoxon rank sum test; Pearson’s Chi-squared test | ||||
trial |>
select(trt, marker, response) |>
tbl_summary(
by = trt,
statistic = list(marker ~ "{mean} ({sd})", response ~ "{p}%"),
missing = "no"
) |>
add_difference()| Characteristic |
Drug A N = 981 |
Drug B N = 1021 |
Difference2 | 95% CI2 | p-value2 |
|---|---|---|---|---|---|
| Marker Level (ng/mL) | 1.02 (0.89) | 0.82 (0.83) | 0.20 | -0.05, 0.44 | 0.12 |
| Tumor Response | 29% | 34% | -4.2% | -18%, 9.9% | 0.6 |
| Abbreviation: CI = Confidence Interval | |||||
| 1 Mean (SD); % | |||||
| 2 Welch Two Sample t-test; 2-sample test for equality of proportions with continuity correction | |||||
trial |>
select(age, grade, trt, response) |>
tbl_summary(by = trt, missing = "no") |>
add_overall() |>
add_n() |>
add_p()| Characteristic | N |
Overall N = 2001 |
Drug A N = 981 |
Drug B N = 1021 |
p-value2 |
|---|---|---|---|---|---|
| Age | 189 | 47 (38, 57) | 46 (37, 60) | 48 (39, 56) | 0.7 |
| Grade | 200 | 0.9 | |||
| I | 68 (34%) | 35 (36%) | 33 (32%) | ||
| II | 68 (34%) | 32 (33%) | 36 (35%) | ||
| III | 64 (32%) | 31 (32%) | 33 (32%) | ||
| Tumor Response | 193 | 61 (32%) | 28 (29%) | 33 (34%) | 0.5 |
| 1 Median (Q1, Q3); n (%) | |||||
| 2 Wilcoxon rank sum test; Pearson’s Chi-squared test | |||||
Cross-tabulation
tbl_cross() is a wrapper for tbl_summary() for \(n \times m\) tables
Regression models
Use tbl_regression() to easily and beautifully display regression model results in a table.
mod1 <- glm(response ~ trt + age + grade, trial, family = binomial)
tbl_regression(mod1, exponentiate = TRUE)| Characteristic | OR | 95% CI | p-value |
|---|---|---|---|
| Chemotherapy Treatment | |||
| Drug A | — | — | |
| Drug B | 1.13 | 0.60, 2.13 | 0.7 |
| Age | 1.02 | 1.00, 1.04 | 0.10 |
| Grade | |||
| I | — | — | |
| II | 0.85 | 0.39, 1.85 | 0.7 |
| III | 1.01 | 0.47, 2.15 | >0.9 |
| Abbreviations: CI = Confidence Interval, OR = Odds Ratio | |||
Univariate models
# Univariate Regression with `tbl_uvregression()`
t1 <- trial |>
tbl_uvregression(
method = glm,
y = response,
method.args = list(family = binomial),
exponentiate = TRUE
)Side-by-side Regression Models
You can also present side-by-side regression model results using tbl_merge()
# Multivariate Regression
t2 <- trial |>
glm(formula = response ~ trt + age + grade, family = binomial) |>
tbl_regression(exponentiate = TRUE)# merge tables
t3 <-
tbl_merge(tbls = list(t1, t2),
tab_spanner = c("**Univariate**", "**Multivariate**"))
t3| Characteristic |
Univariate
|
Multivariate
|
|||||
|---|---|---|---|---|---|---|---|
| N | OR | 95% CI | p-value | OR | 95% CI | p-value | |
| Chemotherapy Treatment | 193 | ||||||
| Drug A | — | — | — | — | |||
| Drug B | 1.21 | 0.66, 2.24 | 0.5 | 1.13 | 0.60, 2.13 | 0.7 | |
| Age | 183 | 1.02 | 1.00, 1.04 | 0.10 | 1.02 | 1.00, 1.04 | 0.10 |
| Marker Level (ng/mL) | 183 | 1.35 | 0.94, 1.93 | 0.10 | |||
| T Stage | 193 | ||||||
| T1 | — | — | |||||
| T2 | 0.63 | 0.27, 1.46 | 0.3 | ||||
| T3 | 1.13 | 0.48, 2.68 | 0.8 | ||||
| T4 | 0.83 | 0.36, 1.92 | 0.7 | ||||
| Grade | 193 | ||||||
| I | — | — | — | — | |||
| II | 0.95 | 0.45, 2.00 | 0.9 | 0.85 | 0.39, 1.85 | 0.7 | |
| III | 1.10 | 0.52, 2.29 | 0.8 | 1.01 | 0.47, 2.15 | >0.9 | |
| Patient Died | 193 | 0.38 | 0.20, 0.71 | 0.003 | |||
| Months to Death/Censor | 193 | 1.10 | 1.03, 1.18 | 0.006 | |||
| Abbreviations: CI = Confidence Interval, OR = Odds Ratio | |||||||
Save Individual Tables
{gtsummary} tables can also be saved directly to file as an image, HTML, Word, RTF, and LaTeX file.