Load packages

# Load main package
library(ggforestplot)

# Load adjunct packages
library(ggplot2)
library(ggstance)
library(scales)

Input file

Please download the required input files for this tutorial here: https://drive.google.com/file/d/1xo_KNfNWqmz0Q8hHcqAtzVYPuQGHEF3j/view?usp=drive_link
The input file requires the following information represented by separate columns.

# Read example regression output
# Figure 3c
path <- "data/"
file <- "fig_3c.txt"
df <- read.table(paste(path, file, sep="/"), sep="\t", header=TRUE, stringsAsFactors=FALSE)

head(df)

##                       model outcome n_case n_control        beta
## 1 Not adjusted for ancestry      CH   3816    120843  0.02900114
## 2 Not adjusted for ancestry  DNMT3A   1886    120843  0.03989228
## 3 Not adjusted for ancestry    TET2   1143    120843  0.03575867
## 4 Not adjusted for ancestry   ASXL1    348    120843  0.01772667
## 5 Not adjusted for ancestry   PPM1D    209    120843 -0.05494472
## 6 Not adjusted for ancestry    TP53    112    120843 -0.02701751
##   beta_95ci_lower beta_95ci_upper odds_ratio odds_ratio_95ci_lower
## 1      0.01664923     0.041353054  1.0294258             1.0167886
## 2      0.02274808     0.057036490  1.0406987             1.0230088
## 3      0.01377815     0.057739184  1.0364057             1.0138735
## 4     -0.02191158     0.057364919  1.0178847             0.9783267
## 5     -0.10628718    -0.003602263  0.9465375             0.8991664
## 6     -0.09676130     0.042726283  0.9733442             0.9077727
##   odds_ratio_95ci_upper         pval pval_adj_bonferroni pval_threshold
## 1             1.0422200 4.186597e-06        6.698555e-05           TRUE
## 2             1.0586944 5.099343e-06        8.158949e-05           TRUE
## 3             1.0594386 1.429654e-03        2.287447e-02           TRUE
## 4             1.0590422 3.807397e-01        1.000000e+00             NA
## 5             0.9964042 3.594788e-02        5.751661e-01           TRUE
## 6             1.0436522 4.476915e-01        1.000000e+00             NA

outcome. This is the main variable and will be represented on the y-axis. This is typically the outcome or predictor in a statistical model.
model. This is the sub-categories of the y-axis variables (optional).
beta or odds_ratio. This is the point estimate of the statistical model. For example, this would be odds ratio from a logistic regression model or beta coefficient from a linear regression model.
beta_95ci_lower or odds_ratio_95ci_lower. This is the lower bound of the point estimate. This may be the lower limit of the confidence interval, which in turn may be derived from the standard error.
beta_95ci_upper or odds_ratio_95ci_upper. This is the upper bound of the point estimate. This may be the upper limit of the confidence interval, which in turn may be derived from the standard error.
pval. P-values.
pval_threshold. Are the p-values statistically significant? A threshold of 0.05 was used here where TRUE represents statistically significant p-values while NA represents non-statistically significant p-values.

Example (1): Basic plot

# Read summary statistics
path <- "data/"
file <- "fig_1c.txt"
df <- read.table(paste(path, file, sep=""), sep="\t", header=TRUE, stringsAsFactors=FALSE)

# Check input file
head(df)

##   outcome n_case_mcps n_control_mcps pct_case_mcps n_case_ukb n_control_ukb
## 1      CH        4247         132112    3.11457256      20354        393676
## 2  DNMT3A        2087         132112    1.55515317      12103        393676
## 3    TET2        1289         132112    0.96625962       5050        393676
## 4   ASXL1         394         132112    0.29734503       1923        393676
## 5   PPM1D         237         132112    0.17907200        681        393676
## 6    TP53         122         132112    0.09226069        427        393676
##   pct_case_ukb      beta beta_95ci_lower beta_95ci_upper odds_ratio
## 1    4.9160689 0.5264336     0.490849494       0.5620177   1.692884
## 2    2.9826580 0.6881036     0.639806543       0.7364007   1.989938
## 3    1.2665339 0.3905781     0.324047196       0.4571090   1.477835
## 4    0.4860983 0.5677214     0.451266565       0.6841763   1.764243
## 5    0.1726862 0.1692847     0.001801182       0.3367683   1.184457
## 6    0.1083473 0.2616044     0.045416557       0.4777923   1.299013
##   odds_ratio_95ci_lower odds_ratio_95ci_upper          pval pval_adj_bonferroni
## 1              1.633703              1.754208 7.311364e-185       1.169818e-183
## 2              1.896114              2.088405 1.336305e-171       2.138088e-170
## 3              1.382713              1.579501  1.224446e-30        1.959113e-29
## 4              1.570300              1.982138  1.235046e-21        1.976073e-20
## 5              1.001803              1.400415  4.758246e-02        7.613194e-01
## 6              1.046464              1.612511  1.770382e-02        2.832612e-01
##   pval_threshold
## 1           TRUE
## 2           TRUE
## 3           TRUE
## 4           TRUE
## 5           TRUE
## 6           TRUE

# Set factor levels
levels <- df$outcome
levels <- rev(levels)
df$outcome <- factor(df$outcome, levels=levels)

# Definition
data <- df
x <- data$odds_ratio
y <- data$outcome
xmin <- data$odds_ratio_95ci_lower
xmax <- data$odds_ratio_95ci_upper
maintitle <- ""
xtitle <- "OR [95% CI]"
ytitle <- ""

# Check range
print(min(xmin))

## [1] 0.5173003

print(max(xmax))

## [1] 3.04654

# Plot
plot <- ggplot(data, aes(x=x, y=y)) +
        theme_forest() +
        scale_color_manual(values="blue") +
        scale_fill_manual(values="blue") +
        geom_stripes() +
        geom_vline(xintercept=1, linetype="solid", linewidth=0.4, colour="black") +
        geom_effect(aes(xmin=xmin, xmax=xmax, colour="blue"),
                    position=position_dodgev(height = 0.5)) +
        scale_shape_manual(values = c(21L, 22L, 23L, 24L, 25L)) +
        scale_x_continuous(breaks = c(seq(0, 3, 0.25)),
                           labels = round(seq(0, 3, 0.25), digits=2)
                           ) +
        guides(colour = guide_legend(reverse=TRUE), shape=guide_legend(reverse=TRUE)) +
        labs(title=maintitle, x=xtitle, y=ytitle) +
        theme(axis.title.x=element_text(size=14, colour="black"),
              axis.text.x=element_text(size=14, colour="black"),
              axis.text.y=element_text(size=10, colour="black"),
              #axis.text.y=element_blank(),
              legend.position="none"
              )

# Display plot
plot

# Plot needs to be saved in PNG, not PDF
# The grey stripes will not appear in PDF format
# Please adjust the plot dimensions to your liking
path <- "outcome/"
file <- "fig_1c_v1.png"
ggsave(paste(path, file, sep=""), plot, width=6.5, height=5)

Example (2): Data points reflect statistical significance

Point estimates reflect statistical significance using the filled option under the geom_effect() function.

# Definition
pval.threshold <- data$pval_threshold

# Plot
plot <- ggplot(data, aes(x=x, y=y)) +
        theme_forest() +
        scale_color_manual(values="blue") +
        scale_fill_manual(values="blue") +
        geom_stripes() +
        geom_vline(xintercept=1, linetype="solid", linewidth=0.4, colour="black") +
        geom_effect(aes(xmin=xmin, xmax=xmax, colour="blue", filled=pval.threshold),
                    position=position_dodgev(height = 0.5)) +
        scale_shape_manual(values = c(21L, 22L, 23L, 24L, 25L)) +
        scale_x_continuous(breaks = c(seq(0, 3, 0.25)),
                           labels = round(seq(0, 3, 0.25), digits=2)
                           ) +
        guides(colour = guide_legend(reverse=TRUE), shape=guide_legend(reverse=TRUE)) +
        labs(title=maintitle, x=xtitle, y=ytitle) +
        theme(axis.title.x=element_text(size=14, colour="black"),
              axis.text.x=element_text(size=14, colour="black"),
              axis.text.y=element_text(size=10, colour="black"),
              #axis.text.y=element_blank(),
              legend.position="none"
              )

# Display plot
plot

# Plot needs to be saved in PNG, not PDF
# The grey stripes will not appear in PDF format
# Please adjust the plot dimensions to your liking
path <- "outcome/"
file <- "fig_1c_v2.png"
ggsave(paste(path, file, sep=""), plot, width=6.5, height=5)

Example (3): X-axis in log10 unit

Useful to present the point estimates in log10 units when the values are very large.
This can be done using the trans option under the scale_x_continuous() function.

# Read summary statistics
path <- "data/"
file <- "fig_4b.txt"
df <- read.table(paste(path, file, sep=""), sep="\t", header=TRUE, stringsAsFactors=FALSE)

# Check input file
head(df)

##   cohort phenotype              ID        ID_ORIGINAL CHROM   GENPOS ALLELE0
## 1   MCPS        CH 14-95571114-C-T chr14:95571114:C:T    14 95571114       C
## 2   MCPS    DNMT3A 14-95571114-C-T chr14:95571114:C:T    14 95571114       C
## 3   MCPS      TET2 14-95571114-C-T chr14:95571114:C:T    14 95571114       C
## 4   MCPS     ASXL1 14-95571114-C-T chr14:95571114:C:T    14 95571114       C
## 5   MCPS     PPM1D 14-95571114-C-T chr14:95571114:C:T    14 95571114       C
## 6   MCPS      TP53 14-95571114-C-T chr14:95571114:C:T    14 95571114       C
##   ALLELE1    A1FREQ A1FREQ_CASES A1FREQ_CONTROLS MAC_CASES MAC_CONTROLS
## 1    TRUE 0.0106430   0.01655370        0.010453       141         2763
## 2    TRUE 0.0103485   0.00373723        0.010453        16         2763
## 3    TRUE 0.0106375   0.02955790        0.010453        76         2763
## 4    TRUE 0.0104852   0.02128390        0.010453        17         2763
## 5    TRUE 0.0104583   0.01340890        0.010453         6         2763
## 6    TRUE 0.0104627   0.02105260        0.010453         5         2763
##       INFO      N TEST      BETA        SE ODDS_RATIO ODDS_RATIO_95CI_LOWER
## 1 0.857562 136401  ADD  0.601168 0.0962676  1.8242483             1.5105646
## 2 0.854193 134241  ADD -1.169870 0.2895340  0.3104073             0.1759850
## 3 0.856513 133441  ADD  1.279530 0.1254240  3.5949497             2.8114461
## 4 0.855473 132546  ADD  0.939617 0.2570630  2.5590011             1.5461586
## 5 0.855194 132389  ADD  0.359399 0.4986750  1.4324682             0.5390192
## 6 0.855317 132274  ADD  1.174940 0.6773150  3.2379487             0.8584753
##   ODDS_RATIO_95CI_UPPER     CHISQ         PVAL    LOG10P EXTRA pval_threshold
## 1             2.2030714 33.821500 6.040877e-09  8.218900    NA           TRUE
## 2             0.5475052 23.229500 1.437772e-06  5.842310    NA           TRUE
## 3             4.5968029 75.477700 3.695728e-18 17.432300    NA           TRUE
## 4             4.2353268 10.235800 1.377400e-03  2.860940    NA           TRUE
## 5             3.8068497  0.519421 4.710890e-01  0.326897    NA             NA
## 6            12.2127123  3.009170 8.279422e-02  1.082000    NA             NA

# Set factor levels
levels <- df$phenotype
levels <- rev(levels)
df$phenotype <- factor(df$phenotype, levels=levels)

# Definition
data <- df
x <- data$ODDS_RATIO
y <- data$phenotype
xmin <- data$ODDS_RATIO_95CI_LOWER
xmax <- data$ODDS_RATIO_95CI_UPPER
pval.threshold <- data$pval_threshold
maintitle <- ""
xtitle <- "OR [95% CI]"
ytitle <- ""

# Check range
print(min(xmin))

## [1] 0.175985

print(max(xmax))

## [1] 187.0767

# Plot
plot <- ggplot(data, aes(x=x, y=y)) +
        theme_forest() +
        scale_color_manual(values=c("red")) +
        scale_fill_manual(values=c("red")) +
        geom_stripes() +
        geom_vline(xintercept=1, linetype="solid", linewidth=0.4, colour="black") +
        geom_effect(aes(xmin=xmin, xmax=xmax, colour="red", filled=pval.threshold),
                    position=position_dodgev(height = 0.5)) +
        scale_shape_manual(values = c(21L, 22L, 23L, 24L, 25L)) +
        scale_x_continuous(trans = pseudo_log_trans(base=10, sigma=0.1),
                           breaks = c(0.1, 0, 1, 10, 100),
                           labels = c(0.1, 0, 1, 10, 100)
                           ) +
        guides(colour = guide_legend(reverse=TRUE), shape=guide_legend(reverse=TRUE)) +
        labs(title=maintitle, x=xtitle, y=ytitle) +
        theme(axis.title.x=element_text(size=14, colour="black"),
              axis.text.x=element_text(size=14, colour="black"),
              axis.text.y=element_text(size=10, colour="black"),
              #axis.text.y=element_blank(),
              legend.position="none"
              )

# Display plot
plot

# Plot needs to be saved in PNG, not PDF
# The grey stripes will not appear in PDF format
# Please adjust the plot dimensions to your liking
path <- "outcome/"
file <- "fig_4b.png"
ggsave(paste(path, file, sep=""), plot, width=5, height=4)

Example (4): Plotting sub-categories

The sub-categories may be defined in the color option under the geom_effect() function.
The vertical distance between the data points for a given main variable may be defined in the height option under the position_dodgev() function.

# Read summary statistics
path <- "data/"
file <- "fig_3c.txt"
df <- read.table(paste(path, file, sep=""), sep="\t", header=TRUE, stringsAsFactors=FALSE)

# Check input file
head(df)

##                       model outcome n_case n_control        beta
## 1 Not adjusted for ancestry      CH   3816    120843  0.02900114
## 2 Not adjusted for ancestry  DNMT3A   1886    120843  0.03989228
## 3 Not adjusted for ancestry    TET2   1143    120843  0.03575867
## 4 Not adjusted for ancestry   ASXL1    348    120843  0.01772667
## 5 Not adjusted for ancestry   PPM1D    209    120843 -0.05494472
## 6 Not adjusted for ancestry    TP53    112    120843 -0.02701751
##   beta_95ci_lower beta_95ci_upper odds_ratio odds_ratio_95ci_lower
## 1      0.01664923     0.041353054  1.0294258             1.0167886
## 2      0.02274808     0.057036490  1.0406987             1.0230088
## 3      0.01377815     0.057739184  1.0364057             1.0138735
## 4     -0.02191158     0.057364919  1.0178847             0.9783267
## 5     -0.10628718    -0.003602263  0.9465375             0.8991664
## 6     -0.09676130     0.042726283  0.9733442             0.9077727
##   odds_ratio_95ci_upper         pval pval_adj_bonferroni pval_threshold
## 1             1.0422200 4.186597e-06        6.698555e-05           TRUE
## 2             1.0586944 5.099343e-06        8.158949e-05           TRUE
## 3             1.0594386 1.429654e-03        2.287447e-02           TRUE
## 4             1.0590422 3.807397e-01        1.000000e+00             NA
## 5             0.9964042 3.594788e-02        5.751661e-01           TRUE
## 6             1.0436522 4.476915e-01        1.000000e+00             NA

# Set factor levels
    # Main variable
    levels <- unique(df$outcome)
    levels <- rev(levels)
    df$outcome <- factor(df$outcome, levels=levels)

    # Sub-categories
    levels <- c("Not adjusted for ancestry", "Adjusted for ancestry")
    levels <- rev(levels)
    df$model <- factor(df$model, levels=levels)

# Definition
data <- df
x <- data$beta
y <- data$outcome
z <- data$model
xmin <- data$beta_95ci_lower
xmax <- data$beta_95ci_upper
pval.threshold <- data$pval_threshold
maintitle <- ""
xtitle <- "Beta [95% CI]"
ytitle <- ""
legendtitle <- "model"

# Check range
print(min(xmin))

## [1] -0.3369658

print(max(xmax))

## [1] 0.242598

# Plot
plot <- ggplot(data, aes(x=x, y=y)) +
        theme_forest() +
        scale_color_manual(values=c("magenta", "red")) +
        scale_fill_manual(values=c("magenta", "red")) +
        geom_stripes() +
        geom_vline(xintercept=0, linetype="solid", linewidth=0.4, colour="black") +
        geom_effect(aes(xmin=xmin, xmax=xmax, colour=z, filled=pval.threshold),
                    position=position_dodgev(height = 1)) +
        scale_shape_manual(values = c(21L, 22L, 23L, 24L, 25L)) +
        guides(colour = guide_legend(reverse=TRUE), shape=guide_legend(reverse=TRUE)) +
        labs(title=maintitle, x=xtitle, y=ytitle, color=legendtitle) +
        theme(axis.title.x=element_text(size=14, colour="black"),
              axis.text.x=element_text(size=14, colour="black"),
              axis.text.y=element_text(size=10, colour="black")
              #axis.text.y=element_blank(),
              #legend.position="none"
              )

# Display plot
plot

# Plot needs to be saved in PNG, not PDF
# The grey stripes will not appear in PDF format
# Please adjust the plot dimensions to your liking
path <- "outcome/"
file <- "fig_3c.png"
ggsave(paste(path, file, sep=""), plot, width=5.5, height=6)

ggforestplot (modified)

Sean Wen

Load packages

Input file

Example (1): Basic plot

Example (2): Data points reflect statistical significance

Example (3): X-axis in log10 unit

Example (4): Plotting sub-categories