# Load main package
library(ggforestplot)
# Load adjunct packages
library(ggplot2)
library(ggstance)
library(scales)
Please download the required input files for this tutorial here: https://drive.google.com/file/d/1xo_KNfNWqmz0Q8hHcqAtzVYPuQGHEF3j/view?usp=drive_link
The input file requires the following information represented by separate columns.
# Read example regression output
# Figure 3c
path <- "data/"
file <- "fig_3c.txt"
df <- read.table(paste(path, file, sep="/"), sep="\t", header=TRUE, stringsAsFactors=FALSE)
head(df)
## model outcome n_case n_control beta
## 1 Not adjusted for ancestry CH 3816 120843 0.02900114
## 2 Not adjusted for ancestry DNMT3A 1886 120843 0.03989228
## 3 Not adjusted for ancestry TET2 1143 120843 0.03575867
## 4 Not adjusted for ancestry ASXL1 348 120843 0.01772667
## 5 Not adjusted for ancestry PPM1D 209 120843 -0.05494472
## 6 Not adjusted for ancestry TP53 112 120843 -0.02701751
## beta_95ci_lower beta_95ci_upper odds_ratio odds_ratio_95ci_lower
## 1 0.01664923 0.041353054 1.0294258 1.0167886
## 2 0.02274808 0.057036490 1.0406987 1.0230088
## 3 0.01377815 0.057739184 1.0364057 1.0138735
## 4 -0.02191158 0.057364919 1.0178847 0.9783267
## 5 -0.10628718 -0.003602263 0.9465375 0.8991664
## 6 -0.09676130 0.042726283 0.9733442 0.9077727
## odds_ratio_95ci_upper pval pval_adj_bonferroni pval_threshold
## 1 1.0422200 4.186597e-06 6.698555e-05 TRUE
## 2 1.0586944 5.099343e-06 8.158949e-05 TRUE
## 3 1.0594386 1.429654e-03 2.287447e-02 TRUE
## 4 1.0590422 3.807397e-01 1.000000e+00 NA
## 5 0.9964042 3.594788e-02 5.751661e-01 TRUE
## 6 1.0436522 4.476915e-01 1.000000e+00 NA
outcome. This is the main variable and will be
represented on the y-axis. This is typically the outcome or predictor in
a statistical model.model. This is the sub-categories of the y-axis
variables (optional).beta or odds_ratio. This is the point
estimate of the statistical model. For example, this would be odds ratio
from a logistic regression model or beta coefficient from a linear
regression model.beta_95ci_lower or odds_ratio_95ci_lower.
This is the lower bound of the point estimate. This may be the lower
limit of the confidence interval, which in turn may be derived from the
standard error.beta_95ci_upper or odds_ratio_95ci_upper.
This is the upper bound of the point estimate. This may be the upper
limit of the confidence interval, which in turn may be derived from the
standard error.pval. P-values.pval_threshold. Are the p-values statistically
significant? A threshold of 0.05 was used here where TRUE
represents statistically significant p-values while NA
represents non-statistically significant p-values.# Read summary statistics
path <- "data/"
file <- "fig_1c.txt"
df <- read.table(paste(path, file, sep=""), sep="\t", header=TRUE, stringsAsFactors=FALSE)
# Check input file
head(df)
## outcome n_case_mcps n_control_mcps pct_case_mcps n_case_ukb n_control_ukb
## 1 CH 4247 132112 3.11457256 20354 393676
## 2 DNMT3A 2087 132112 1.55515317 12103 393676
## 3 TET2 1289 132112 0.96625962 5050 393676
## 4 ASXL1 394 132112 0.29734503 1923 393676
## 5 PPM1D 237 132112 0.17907200 681 393676
## 6 TP53 122 132112 0.09226069 427 393676
## pct_case_ukb beta beta_95ci_lower beta_95ci_upper odds_ratio
## 1 4.9160689 0.5264336 0.490849494 0.5620177 1.692884
## 2 2.9826580 0.6881036 0.639806543 0.7364007 1.989938
## 3 1.2665339 0.3905781 0.324047196 0.4571090 1.477835
## 4 0.4860983 0.5677214 0.451266565 0.6841763 1.764243
## 5 0.1726862 0.1692847 0.001801182 0.3367683 1.184457
## 6 0.1083473 0.2616044 0.045416557 0.4777923 1.299013
## odds_ratio_95ci_lower odds_ratio_95ci_upper pval pval_adj_bonferroni
## 1 1.633703 1.754208 7.311364e-185 1.169818e-183
## 2 1.896114 2.088405 1.336305e-171 2.138088e-170
## 3 1.382713 1.579501 1.224446e-30 1.959113e-29
## 4 1.570300 1.982138 1.235046e-21 1.976073e-20
## 5 1.001803 1.400415 4.758246e-02 7.613194e-01
## 6 1.046464 1.612511 1.770382e-02 2.832612e-01
## pval_threshold
## 1 TRUE
## 2 TRUE
## 3 TRUE
## 4 TRUE
## 5 TRUE
## 6 TRUE
# Set factor levels
levels <- df$outcome
levels <- rev(levels)
df$outcome <- factor(df$outcome, levels=levels)
# Definition
data <- df
x <- data$odds_ratio
y <- data$outcome
xmin <- data$odds_ratio_95ci_lower
xmax <- data$odds_ratio_95ci_upper
maintitle <- ""
xtitle <- "OR [95% CI]"
ytitle <- ""
# Check range
print(min(xmin))
## [1] 0.5173003
print(max(xmax))
## [1] 3.04654
# Plot
plot <- ggplot(data, aes(x=x, y=y)) +
theme_forest() +
scale_color_manual(values="blue") +
scale_fill_manual(values="blue") +
geom_stripes() +
geom_vline(xintercept=1, linetype="solid", linewidth=0.4, colour="black") +
geom_effect(aes(xmin=xmin, xmax=xmax, colour="blue"),
position=position_dodgev(height = 0.5)) +
scale_shape_manual(values = c(21L, 22L, 23L, 24L, 25L)) +
scale_x_continuous(breaks = c(seq(0, 3, 0.25)),
labels = round(seq(0, 3, 0.25), digits=2)
) +
guides(colour = guide_legend(reverse=TRUE), shape=guide_legend(reverse=TRUE)) +
labs(title=maintitle, x=xtitle, y=ytitle) +
theme(axis.title.x=element_text(size=14, colour="black"),
axis.text.x=element_text(size=14, colour="black"),
axis.text.y=element_text(size=10, colour="black"),
#axis.text.y=element_blank(),
legend.position="none"
)
# Display plot
plot
# Plot needs to be saved in PNG, not PDF
# The grey stripes will not appear in PDF format
# Please adjust the plot dimensions to your liking
path <- "outcome/"
file <- "fig_1c_v1.png"
ggsave(paste(path, file, sep=""), plot, width=6.5, height=5)
Point estimates reflect statistical significance using the
filled option under the geom_effect()
function.
# Definition
pval.threshold <- data$pval_threshold
# Plot
plot <- ggplot(data, aes(x=x, y=y)) +
theme_forest() +
scale_color_manual(values="blue") +
scale_fill_manual(values="blue") +
geom_stripes() +
geom_vline(xintercept=1, linetype="solid", linewidth=0.4, colour="black") +
geom_effect(aes(xmin=xmin, xmax=xmax, colour="blue", filled=pval.threshold),
position=position_dodgev(height = 0.5)) +
scale_shape_manual(values = c(21L, 22L, 23L, 24L, 25L)) +
scale_x_continuous(breaks = c(seq(0, 3, 0.25)),
labels = round(seq(0, 3, 0.25), digits=2)
) +
guides(colour = guide_legend(reverse=TRUE), shape=guide_legend(reverse=TRUE)) +
labs(title=maintitle, x=xtitle, y=ytitle) +
theme(axis.title.x=element_text(size=14, colour="black"),
axis.text.x=element_text(size=14, colour="black"),
axis.text.y=element_text(size=10, colour="black"),
#axis.text.y=element_blank(),
legend.position="none"
)
# Display plot
plot
# Plot needs to be saved in PNG, not PDF
# The grey stripes will not appear in PDF format
# Please adjust the plot dimensions to your liking
path <- "outcome/"
file <- "fig_1c_v2.png"
ggsave(paste(path, file, sep=""), plot, width=6.5, height=5)
trans option under the
scale_x_continuous() function.# Read summary statistics
path <- "data/"
file <- "fig_4b.txt"
df <- read.table(paste(path, file, sep=""), sep="\t", header=TRUE, stringsAsFactors=FALSE)
# Check input file
head(df)
## cohort phenotype ID ID_ORIGINAL CHROM GENPOS ALLELE0
## 1 MCPS CH 14-95571114-C-T chr14:95571114:C:T 14 95571114 C
## 2 MCPS DNMT3A 14-95571114-C-T chr14:95571114:C:T 14 95571114 C
## 3 MCPS TET2 14-95571114-C-T chr14:95571114:C:T 14 95571114 C
## 4 MCPS ASXL1 14-95571114-C-T chr14:95571114:C:T 14 95571114 C
## 5 MCPS PPM1D 14-95571114-C-T chr14:95571114:C:T 14 95571114 C
## 6 MCPS TP53 14-95571114-C-T chr14:95571114:C:T 14 95571114 C
## ALLELE1 A1FREQ A1FREQ_CASES A1FREQ_CONTROLS MAC_CASES MAC_CONTROLS
## 1 TRUE 0.0106430 0.01655370 0.010453 141 2763
## 2 TRUE 0.0103485 0.00373723 0.010453 16 2763
## 3 TRUE 0.0106375 0.02955790 0.010453 76 2763
## 4 TRUE 0.0104852 0.02128390 0.010453 17 2763
## 5 TRUE 0.0104583 0.01340890 0.010453 6 2763
## 6 TRUE 0.0104627 0.02105260 0.010453 5 2763
## INFO N TEST BETA SE ODDS_RATIO ODDS_RATIO_95CI_LOWER
## 1 0.857562 136401 ADD 0.601168 0.0962676 1.8242483 1.5105646
## 2 0.854193 134241 ADD -1.169870 0.2895340 0.3104073 0.1759850
## 3 0.856513 133441 ADD 1.279530 0.1254240 3.5949497 2.8114461
## 4 0.855473 132546 ADD 0.939617 0.2570630 2.5590011 1.5461586
## 5 0.855194 132389 ADD 0.359399 0.4986750 1.4324682 0.5390192
## 6 0.855317 132274 ADD 1.174940 0.6773150 3.2379487 0.8584753
## ODDS_RATIO_95CI_UPPER CHISQ PVAL LOG10P EXTRA pval_threshold
## 1 2.2030714 33.821500 6.040877e-09 8.218900 NA TRUE
## 2 0.5475052 23.229500 1.437772e-06 5.842310 NA TRUE
## 3 4.5968029 75.477700 3.695728e-18 17.432300 NA TRUE
## 4 4.2353268 10.235800 1.377400e-03 2.860940 NA TRUE
## 5 3.8068497 0.519421 4.710890e-01 0.326897 NA NA
## 6 12.2127123 3.009170 8.279422e-02 1.082000 NA NA
# Set factor levels
levels <- df$phenotype
levels <- rev(levels)
df$phenotype <- factor(df$phenotype, levels=levels)
# Definition
data <- df
x <- data$ODDS_RATIO
y <- data$phenotype
xmin <- data$ODDS_RATIO_95CI_LOWER
xmax <- data$ODDS_RATIO_95CI_UPPER
pval.threshold <- data$pval_threshold
maintitle <- ""
xtitle <- "OR [95% CI]"
ytitle <- ""
# Check range
print(min(xmin))
## [1] 0.175985
print(max(xmax))
## [1] 187.0767
# Plot
plot <- ggplot(data, aes(x=x, y=y)) +
theme_forest() +
scale_color_manual(values=c("red")) +
scale_fill_manual(values=c("red")) +
geom_stripes() +
geom_vline(xintercept=1, linetype="solid", linewidth=0.4, colour="black") +
geom_effect(aes(xmin=xmin, xmax=xmax, colour="red", filled=pval.threshold),
position=position_dodgev(height = 0.5)) +
scale_shape_manual(values = c(21L, 22L, 23L, 24L, 25L)) +
scale_x_continuous(trans = pseudo_log_trans(base=10, sigma=0.1),
breaks = c(0.1, 0, 1, 10, 100),
labels = c(0.1, 0, 1, 10, 100)
) +
guides(colour = guide_legend(reverse=TRUE), shape=guide_legend(reverse=TRUE)) +
labs(title=maintitle, x=xtitle, y=ytitle) +
theme(axis.title.x=element_text(size=14, colour="black"),
axis.text.x=element_text(size=14, colour="black"),
axis.text.y=element_text(size=10, colour="black"),
#axis.text.y=element_blank(),
legend.position="none"
)
# Display plot
plot
# Plot needs to be saved in PNG, not PDF
# The grey stripes will not appear in PDF format
# Please adjust the plot dimensions to your liking
path <- "outcome/"
file <- "fig_4b.png"
ggsave(paste(path, file, sep=""), plot, width=5, height=4)
color option
under the geom_effect() function.height option under the
position_dodgev() function.# Read summary statistics
path <- "data/"
file <- "fig_3c.txt"
df <- read.table(paste(path, file, sep=""), sep="\t", header=TRUE, stringsAsFactors=FALSE)
# Check input file
head(df)
## model outcome n_case n_control beta
## 1 Not adjusted for ancestry CH 3816 120843 0.02900114
## 2 Not adjusted for ancestry DNMT3A 1886 120843 0.03989228
## 3 Not adjusted for ancestry TET2 1143 120843 0.03575867
## 4 Not adjusted for ancestry ASXL1 348 120843 0.01772667
## 5 Not adjusted for ancestry PPM1D 209 120843 -0.05494472
## 6 Not adjusted for ancestry TP53 112 120843 -0.02701751
## beta_95ci_lower beta_95ci_upper odds_ratio odds_ratio_95ci_lower
## 1 0.01664923 0.041353054 1.0294258 1.0167886
## 2 0.02274808 0.057036490 1.0406987 1.0230088
## 3 0.01377815 0.057739184 1.0364057 1.0138735
## 4 -0.02191158 0.057364919 1.0178847 0.9783267
## 5 -0.10628718 -0.003602263 0.9465375 0.8991664
## 6 -0.09676130 0.042726283 0.9733442 0.9077727
## odds_ratio_95ci_upper pval pval_adj_bonferroni pval_threshold
## 1 1.0422200 4.186597e-06 6.698555e-05 TRUE
## 2 1.0586944 5.099343e-06 8.158949e-05 TRUE
## 3 1.0594386 1.429654e-03 2.287447e-02 TRUE
## 4 1.0590422 3.807397e-01 1.000000e+00 NA
## 5 0.9964042 3.594788e-02 5.751661e-01 TRUE
## 6 1.0436522 4.476915e-01 1.000000e+00 NA
# Set factor levels
# Main variable
levels <- unique(df$outcome)
levels <- rev(levels)
df$outcome <- factor(df$outcome, levels=levels)
# Sub-categories
levels <- c("Not adjusted for ancestry", "Adjusted for ancestry")
levels <- rev(levels)
df$model <- factor(df$model, levels=levels)
# Definition
data <- df
x <- data$beta
y <- data$outcome
z <- data$model
xmin <- data$beta_95ci_lower
xmax <- data$beta_95ci_upper
pval.threshold <- data$pval_threshold
maintitle <- ""
xtitle <- "Beta [95% CI]"
ytitle <- ""
legendtitle <- "model"
# Check range
print(min(xmin))
## [1] -0.3369658
print(max(xmax))
## [1] 0.242598
# Plot
plot <- ggplot(data, aes(x=x, y=y)) +
theme_forest() +
scale_color_manual(values=c("magenta", "red")) +
scale_fill_manual(values=c("magenta", "red")) +
geom_stripes() +
geom_vline(xintercept=0, linetype="solid", linewidth=0.4, colour="black") +
geom_effect(aes(xmin=xmin, xmax=xmax, colour=z, filled=pval.threshold),
position=position_dodgev(height = 1)) +
scale_shape_manual(values = c(21L, 22L, 23L, 24L, 25L)) +
guides(colour = guide_legend(reverse=TRUE), shape=guide_legend(reverse=TRUE)) +
labs(title=maintitle, x=xtitle, y=ytitle, color=legendtitle) +
theme(axis.title.x=element_text(size=14, colour="black"),
axis.text.x=element_text(size=14, colour="black"),
axis.text.y=element_text(size=10, colour="black")
#axis.text.y=element_blank(),
#legend.position="none"
)
# Display plot
plot
# Plot needs to be saved in PNG, not PDF
# The grey stripes will not appear in PDF format
# Please adjust the plot dimensions to your liking
path <- "outcome/"
file <- "fig_3c.png"
ggsave(paste(path, file, sep=""), plot, width=5.5, height=6)