?function_name
.# Load MARVEL package
library(MARVEL)
# Load adjunct packages for selected MARVEL features
# General data processing, plotting
library(ggnewscale)
library(ggrepel)
library(parallel)
library(reshape2)
library(stringr)
library(textclean)
# Dimension reduction analysis
library(factoextra)
library(FactoMineR)
# Modality analysis
library(fitdistrplus)
# Differential splicing analysis
library(kSamples)
library(twosamples)
# Gene ontology analysis
library(AnnotationDbi)
library(clusterProfiler)
library(org.Hs.eg.db)
library(org.Mm.eg.db)
# Nonsense-mediated decay (NMD) analysis
library(Biostrings)
library(BSgenome)
library(BSgenome.Hsapiens.NCBI.GRCh38)
# Load adjunct packages for this tutorial
library(data.table)
library(ggplot2)
library(gridExtra)
sample.id
while all other columns are
optional.path <- "Data/SJ/"
file <- "SJ_phenoData.txt"
df.pheno <- read.table(paste(path, file, sep=""), sep="\t", header=TRUE, stringsAsFactors=FALSE, na.strings="NA")
head(df.pheno)
## sample.id cell.type sample.type qc.seq
## 1 ERR1562083 Unknown Single Cell pass
## 2 ERR1562084 iPSC Single Cell pass
## 3 ERR1562085 iPSC Single Cell pass
## 4 ERR1562086 iPSC Single Cell pass
## 5 ERR1562087 iPSC Single Cell pass
## 6 ERR1562088 iPSC Single Cell pass
coord.intron
.SJ
is created here to contain the splice
junction count files (SJ.out.tab) generated from 1st pass mode to be
used for 2nd pass mode.# STAR in 1st pass mode
STAR --runThreadN 16 \
--genomeDir GRCh38_GENCODE_genome_STAR_indexed \
--readFilesCommand zcat \
--readFilesIn ERR1562083_1_val_1.fq.gz ERR1562083_2_val_2.fq.gz \
--outFileNamePrefix SJ/ERR1562083. \
--outSAMtype None
# STAR in 2nd pass mode
STAR --runThreadN 16 \
--genomeDir GRCh38_GENCODE_genome_STAR_indexed \
--readFilesCommand zcat \
--readFilesIn ERR1562083_1_val_1.fq.gz ERR1562083_2_val_2.fq.gz \
--outFileNamePrefix ERR1562083. \
--sjdbFileChrStartEnd SJ/*SJ.out.tab \
--outSAMtype BAM SortedByCoordinate \
--outSAMattributes NH HI AS nM XS \
--quantMode TranscriptomeSAM
path <- "Data/SJ/"
file <- "SJ.txt"
sj <- as.data.frame(fread(paste(path, file, sep=""), sep="\t", header=TRUE, stringsAsFactors=FALSE, na.strings="NA"))
sj[!is.na(sj[,2]), ][1:5,1:5]
## coord.intron ERR1562083 ERR1562084 ERR1562085 ERR1562086
## 9 chr1:100007082:100022621 0 NA NA NA
## 10 chr1:100007091:100024554 0 NA NA NA
## 18 chr1:100007157:100011364 12 3 5 9
## 23 chr1:100007601:100023781 1 NA NA NA
## 39 chr1:100011534:100015301 15 NA 8 12
tran_id
and
gene_id
.--b1
and --b2
. This is because rMATS requires
these specification for statistical testing of splicing events between
the two samples. But here, we will only be using the splicing events
detected (fromGTF.SE.txt, fromGTF.MXE.txt, fromGTF.RI.txt,
fromGTF.A5SS.txt, fromGTF.A3SS.txt), but not the statistical test
results, from this step for our downstream analysis.rmats \
--b1 path_to_BAM_sample_1.txt \
--b2 path_to_BAM_sample_2.txt \
--gtf gencode.v31.annotation.gtf \
--od rMATS/ \
--tmp rMATS/ \
-t paired \
--readLength 125 \
--variable-read-length \
--nthread 8 \
--statoff
tran_id
) as per here: https://wenweixiong.github.io/Splicing_Nomenclature.
Example data and codes to do this available here: https://drive.google.com/file/d/1SIR1-GLXU8Qp0Wh4PkoABliPmW91peu2/view?usp=sharing.# SE
path <- "Data/rMATS/SE/"
file <- "SE_featureData.txt"
df.feature.se <- read.table(paste(path, file, sep=""), sep="\t", header=TRUE, stringsAsFactors=FALSE, na.strings="NA")
head(df.feature.se)
## tran_id
## 1 chrX:156022314:156022459:+@chrX:156022699:156022834:+@chrX:156023012:156023209
## 2 chrX:154227192:154227360:+@chrX:154228828:154228993:+@chrX:154230548:154230598
## 3 chrX:154190054:154190222:+@chrX:154191688:154191853:+@chrX:154193408:154193458
## 4 chrX:154152940:154153108:+@chrX:154154574:154154739:+@chrX:154156294:154156344
## 5 chrX:152918983:152919081:+@chrX:152920328:152920411:+@chrX:152920707:152920748
## 6 chrX:152922173:152922256:+@chrX:152922720:152922809:+@chrX:152928575:152928661
## gene_id gene_short_name gene_type
## 1 ENSG00000182484.15 WASH6P transcribed_unprocessed_pseudogene
## 2 ENSG00000166160.9 OPN1MW2 protein_coding
## 3 ENSG00000268221.5 OPN1MW protein_coding
## 4 ENSG00000102076.9 OPN1LW protein_coding
## 5 ENSG00000147394.18 ZNF185 protein_coding
## 6 ENSG00000147394.18 ZNF185 protein_coding
# MXE
path <- "Data/rMATS/MXE/"
file <- "MXE_featureData.txt"
df.feature.mxe <- read.table(paste(path, file, sep=""), sep="\t", header=TRUE, stringsAsFactors=FALSE, na.strings="NA")
# RI
path <- "Data/rMATS/RI/"
file <- "RI_featureData.txt"
df.feature.ri <- read.table(paste(path, file, sep=""), sep="\t", header=TRUE, stringsAsFactors=FALSE, na.strings="NA")
# A5SS
path <- "Data/rMATS/A5SS/"
file <- "A5SS_featureData.txt"
df.feature.a5ss <- read.table(paste(path, file, sep=""), sep="\t", header=TRUE, stringsAsFactors=FALSE, na.strings="NA")
# A3SS
path <- "Data/rMATS/A3SS/"
file <- "A3SS_featureData.txt"
df.feature.a3ss <- read.table(paste(path, file, sep=""), sep="\t", header=TRUE, stringsAsFactors=FALSE, na.strings="NA")
# Merge
df.feature.list <- list(df.feature.se, df.feature.mxe, df.feature.ri, df.feature.a5ss, df.feature.a3ss)
names(df.feature.list) <- c("SE", "MXE", "RI", "A5SS", "A3SS")
RI_featureData.txt
generated from rMATS in the previous
step.bedtools coverage \
-g GRCh38.primary_assembly.genome_bedtools.txt \
-split \
-sorted \
-a RI_Coordinates.bed \
-b ERR1562083.Aligned.sortedByCoord.out.bam > \
ERR1562083.txt \
-d
path <- "Data/MARVEL/PSI/RI/"
file <- "Counts_by_Region.txt"
df.intron.counts <- as.data.frame(fread(paste(path, file, sep=""), sep="\t", header=TRUE, stringsAsFactors=FALSE, na.strings="NA"))
df.intron.counts[1:5,1:5]
## coord.intron ERR1562083 ERR1562084 ERR1562085 ERR1562086
## 1 chr1:13375:13452 0 0 0 0
## 2 chr1:146510:146641 0 150 0 129
## 3 chr1:498457:498683 678 0 0 0
## 4 chr1:764801:765143 5375 4544 6794 1591
## 5 chr1:804967:806385 794 0 0 0
rsem-calculate-expression --bam \
--paired-end \
-p 8 \
ERR1562083.Aligned.toTranscriptome.out.bam \
GRCh38_GENCODE_genome_RSEM_indexed/gencode.v31 \
ERR1562083
path <- "Data/RSEM/"
file <- "TPM.txt"
df.tpm <- read.table(paste(path, file, sep=""), sep="\t", header=TRUE, stringsAsFactors=FALSE)
df.tpm[1:5,1:5]
## gene_id ERR1562083 ERR1562084 ERR1562085 ERR1562086
## 1 ENSG00000000003.14 343.26 163.45 210.43 190.46
## 2 ENSG00000000005.6 5.69 0.00 0.00 0.00
## 3 ENSG00000000419.12 288.02 155.26 42.49 238.67
## 4 ENSG00000000457.14 1.58 8.71 0.00 1.06
## 5 ENSG00000000460.17 41.23 28.53 100.17 66.43
gene_id
, gene_short_name
, and
gene_type
. All other columns are optional.path <- "Data/RSEM/"
file <- "TPM_featureData.txt"
df.tpm.feature <- read.table(paste(path, file, sep=""), sep="\t", header=TRUE, stringsAsFactors=FALSE)
head(df.tpm.feature)
## gene_id gene_short_name gene_type
## 1 ENSG00000000003.14 TSPAN6 protein_coding
## 2 ENSG00000000005.6 TNMD protein_coding
## 3 ENSG00000000419.12 DPM1 protein_coding
## 4 ENSG00000000457.14 SCYL3 protein_coding
## 5 ENSG00000000460.17 C1orf112 protein_coding
## 6 ENSG00000000938.13 FGR protein_coding
path <- "Data/GTF/"
file <- "gencode.v31.annotation.gtf"
gtf <- as.data.frame(fread(paste(path, file, sep=""), sep="\t", header=FALSE, stringsAsFactors=FALSE, na.strings="NA", quote="\""))
marvel <- CreateMarvelObject(SpliceJunction=sj,
SplicePheno=df.pheno,
SpliceFeature=df.feature.list,
IntronCounts=df.intron.counts,
GeneFeature=df.tpm.feature,
Exp=df.tpm,
GTF=gtf
)
# Detect AFE
marvel <- DetectEvents(MarvelObject=marvel,
min.cells=50,
min.expr=1,
track.progress=FALSE,
EventType="AFE"
)
# Detect ALE
marvel <- DetectEvents(MarvelObject=marvel,
min.cells=50,
min.expr=1,
track.progress=FALSE,
EventType="ALE"
)
min.cells
option. Minimum number of cells expressing
the gene for the gene to be included for splicing detection. Expressed
genes are defined as genes with minimum expression specified in
min.expr
.min.expr
option. Normalised gene expression value,
above which, the gene is considered to be expressed.track.progress
option. For the brevity of the tutorial,
we did not track the progress of splicing detection. But users are
advised to set this option to TRUE
when running these steps
on their own devices.CoverageThreshold
option.# Check splicing junction data
marvel <- CheckAlignment(MarvelObject=marvel, level="SJ")
# Validate, filter, compute SE splicing events
marvel <- ComputePSI(MarvelObject=marvel,
CoverageThreshold=10,
UnevenCoverageMultiplier=10,
EventType="SE"
)
# Validate, filter, compute MXE splicing events
marvel <- ComputePSI(MarvelObject=marvel,
CoverageThreshold=10,
UnevenCoverageMultiplier=10,
EventType="MXE"
)
# Validate, filter, compute RI splicing events
marvel <- ComputePSI(MarvelObject=marvel,
CoverageThreshold=10,
EventType="RI",
thread=4
)
# Validate, filter, compute A5SS splicing events
marvel <- ComputePSI(MarvelObject=marvel,
CoverageThreshold=10,
EventType="A5SS"
)
# Validate, filter, compute A3SS splicing events
marvel <- ComputePSI(MarvelObject=marvel,
CoverageThreshold=10,
EventType="A3SS"
)
# Validate, filter, compute AFE splicing events
marvel <- ComputePSI(MarvelObject=marvel,
CoverageThreshold=10,
EventType="AFE"
)
# Validate, filter, compute ALE splicing events
marvel <- ComputePSI(MarvelObject=marvel,
CoverageThreshold=10,
EventType="ALE"
)
CoverageThreshold
.
This option indicates the minimum number of splice junction reads
supporting the splicing events, above which, the PSI will be computed.
PSI of splicing events below this threshold will be coded as
NA
.UnevenCoverageMultiplier
Specific to computing SE and
MXE. Two splice junctions are used to compute to inclusion of SE and
MXE. This option represent the ratio of read coverage of one splice
junction over the other. The threshold specified here, above which, the
PSI will be coded as NA
.thread
Specific to computing RI. Number of cores to
use. This is depended on the user’s device.read.length
Specific to computing RI. If the values in
df.intron.counts
represent number of reads, then this
option should reflect the sequencing read length, e.g., 150 etc.. If the
values in df.intron.counts
represent total intronic
coverage (here), then this option should be set to 1
(default).# Define sample IDs to subset
# cell.type
table(df.pheno$cell.type)
##
## Endoderm iPSC Unknown
## 57 84 51
index.1 <- which(df.pheno$cell.type %in% c("iPSC", "Endoderm"))
# sample.type
table(df.pheno$sample.type)
##
## Single Cell
## 192
# qc.seq
table(df.pheno$qc.seq)
##
## fail pass
## 51 141
index.2 <- which(df.pheno$qc.seq=="pass")
# sample IDs
index <- Reduce(intersect, list(index.1, index.2))
sample.ids <- df.pheno[index, "sample.id"]
length(sample.ids)
## [1] 136
# Subset relevant samples
marvel <- SubsetSamples(MarvelObject=marvel,
sample.ids=sample.ids
)
marvel <- TransformExpValues(MarvelObject=marvel,
offset=1,
transformation="log2",
threshold.lower=1
)
# Check splicing data
marvel <- CheckAlignment(MarvelObject=marvel, level="splicing")
# Check gene data
marvel <- CheckAlignment(MarvelObject=marvel, level="gene")
# Cross-check splicing and gene data
marvel <- CheckAlignment(MarvelObject=marvel, level="splicing and gene")
MATCHED
flags are reported. If any
NOT MATCHED
flags are reported, please double-check the
input file requirements.# Save object
path <- "Data/R object/"
file <- "MARVEL.RData"
save(marvel, file=paste(path, file, sep=""))
# Load object
# path <- "Data/R Object/"
# file <- "MARVEL.RData"
# load(paste(path, file, sep=""))
# Retrieve sample metadata
df.pheno <- marvel$SplicePheno
# Define sample ids
sample.ids <- df.pheno[which(df.pheno$cell.type=="iPSC"), "sample.id"]
# Tabulate expressed events
marvel <- CountEvents(MarvelObject=marvel,
sample.ids=sample.ids,
min.cells=25
)
# Output (1): Plot
marvel$N.Events$Plot
# Output (2): Table
marvel$N.Events$Table
## event_type freq pct
## 1 SE 5270 40.1523810
## 3 RI 2030 15.4666667
## 6 AFE 1789 13.6304762
## 5 A3SS 1746 13.3028571
## 4 A5SS 1494 11.3828571
## 7 ALE 724 5.5161905
## 2 MXE 72 0.5485714
min.cells
option. Here, we required the splicing event
to be expressed in at least 25 cells for the splicing event to be
included for analysis.# Retrieve sample metadata
df.pheno <- marvel$SplicePheno
# Define sample ids
sample.ids <- df.pheno[which(df.pheno$cell.type=="Endoderm"), "sample.id"]
# Tabulate expressed events
marvel <- CountEvents(MarvelObject=marvel,
sample.ids=sample.ids,
min.cells=25
)
# Output (1): Plot
marvel$N.Events$Plot
# Output (2): Table
marvel$N.Events$Table
## event_type freq pct
## 1 SE 1966 37.0384326
## 3 RI 943 17.7656368
## 6 AFE 818 15.4107008
## 5 A3SS 656 12.3587038
## 4 A5SS 625 11.7746797
## 7 ALE 279 5.2562170
## 2 MXE 21 0.3956292
# Retrieve sample metadata
df.pheno <- marvel$SplicePheno
# Define sample IDs
sample.ids <- df.pheno[which(df.pheno$cell.type=="iPSC"), "sample.id"]
# Assign modality
marvel <- AssignModality(MarvelObject=marvel,
sample.ids=sample.ids,
min.cells=25,
seed=1
)
marvel$Modality$Results[1:5, c("tran_id", "event_type", "gene_id", "gene_short_name", "modality.bimodal.adj")]
## tran_id
## 1 chrX:155090784:155090839:+@chrX:155099315:155099389:+@chrX:155116057:155116188
## 2 chrX:155090784:155090839:+@chrX:155116057:155116188:+@chrX:155116711:155116754
## 3 chrX:155033403:155033553:+@chrX:155046509:155046584:+@chrX:155051670:155051801
## 4 chrX:155046509:155046584:+@chrX:155047369:155047391:+@chrX:155051670:155051801
## 5 chrX:154399338:154399396:+@chrX:154399487:154399594:+@chrX:154399803:154399941
## event_type gene_id gene_short_name modality.bimodal.adj
## 1 SE ENSG00000185515.14 BRCC3 Excluded.Dispersed
## 2 SE ENSG00000185515.14 BRCC3 Included.Primary
## 3 SE ENSG00000165775.18 FUNDC2 Included.Dispersed
## 4 SE ENSG00000165775.18 FUNDC2 Excluded.Primary
## 5 SE ENSG00000147403.16 RPL10 Included.Primary
# Tabulate modality proportion (overall)
marvel <- PropModality(MarvelObject=marvel,
modality.column="modality.bimodal.adj",
modality.type="extended",
event.type=c("SE", "MXE", "RI", "A5SS", "A3SS", "AFE", "ALE"),
across.event.type=FALSE
)
marvel$Modality$Prop$DoughnutChart$Plot
marvel$Modality$Prop$DoughnutChart$Table
## modality freq pct
## 5 Included.Primary 2500 19.0476190
## 4 Included.Dispersed 3606 27.4742857
## 3 Excluded.Primary 2932 22.3390476
## 2 Excluded.Dispersed 3658 27.8704762
## 1 Bimodal 58 0.4419048
## 6 Middle 99 0.7542857
## 7 Multimodal 272 2.0723810
min.cells
option. Here, we required the splicing event
to be expressed in at least 25 cells for the splicing event to be
included for modality assignment. This value should match that
previously defined in CountEvents
function.# Tabulate modality proportion (by event type)
marvel <- PropModality(MarvelObject=marvel,
modality.column="modality.bimodal.adj",
modality.type="extended",
event.type=c("SE", "MXE", "RI", "A5SS", "A3SS", "AFE", "ALE"),
across.event.type=TRUE,
prop.test="chisq",
prop.adj="fdr",
xlabels.size=8
)
marvel$Modality$Prop$BarChart$Plot
head(marvel$Modality$Prop$BarChart$Table)
## modality freq pct event_type
## 5 Included (Primary) 1184 22.4667932 SE
## 4 Included (Dispersed) 1685 31.9734345 SE
## 3 Excluded (Primary) 1049 19.9051233 SE
## 2 Excluded (Dispersed) 1260 23.9089184 SE
## 1 Bimodal 17 0.3225806 SE
## 6 Middle 15 0.2846300 SE
# Retrieve sample metadata
df.pheno <- marvel$SplicePheno
# Define sample IDs
sample.ids <- df.pheno[which(df.pheno$cell.type=="Endoderm"), "sample.id"]
# Assign modality
marvel <- AssignModality(MarvelObject=marvel,
sample.ids=sample.ids,
min.cells=25,
seed=1
)
marvel$Modality$Results[1:5, c("tran_id", "event_type", "gene_id", "gene_short_name", "modality.bimodal.adj")]
## tran_id
## 1 chrX:155033403:155033553:+@chrX:155046509:155046584:+@chrX:155051670:155051801
## 2 chrX:155046509:155046584:+@chrX:155047369:155047391:+@chrX:155051670:155051801
## 3 chrX:154399338:154399396:+@chrX:154399487:154399594:+@chrX:154399803:154399941
## 4 chrX:154399803:154399941:+@chrX:154400464:154400626:+@chrX:154400702:154402339
## 5 chrX:154379197:154379566:+@chrX:154379690:154379794:+@chrX:154379942:154380019
## event_type gene_id gene_short_name modality.bimodal.adj
## 1 SE ENSG00000165775.18 FUNDC2 Included.Dispersed
## 2 SE ENSG00000165775.18 FUNDC2 Excluded.Dispersed
## 3 SE ENSG00000147403.16 RPL10 Included.Primary
## 4 SE ENSG00000147403.16 RPL10 Included.Primary
## 5 SE ENSG00000102119.10 EMD Included.Primary
# Tabulate modality proportion (overall)
marvel <- PropModality(MarvelObject=marvel,
modality.column="modality.bimodal.adj",
modality.type="extended",
event.type=c("SE", "MXE", "RI", "A5SS", "A3SS", "AFE", "ALE"),
across.event.type=FALSE
)
marvel$Modality$Prop$DoughnutChart$Plot
marvel$Modality$Prop$DoughnutChart$Table
## modality freq pct
## 5 Included.Primary 1251 23.5681989
## 4 Included.Dispersed 1069 20.1394122
## 3 Excluded.Primary 1598 30.1055011
## 2 Excluded.Dispersed 1238 23.3232856
## 1 Bimodal 53 0.9984928
## 6 Middle 24 0.4521477
## 7 Multimodal 75 1.4129616
# Tabulate modality proportion (by event type)
marvel <- PropModality(MarvelObject=marvel,
modality.column="modality.bimodal.adj",
modality.type="extended",
event.type=c("SE", "MXE", "RI", "A5SS", "A3SS", "AFE", "ALE"),
across.event.type=TRUE,
prop.test="chisq",
prop.adj="fdr",
xlabels.size=8
)
marvel$Modality$Prop$BarChart$Plot
head(marvel$Modality$Prop$BarChart$Table)
## modality freq pct event_type
## 5 Included (Primary) 586 29.8067141 SE
## 4 Included (Dispersed) 459 23.3468973 SE
## 3 Excluded (Primary) 533 27.1108850 SE
## 2 Excluded (Dispersed) 371 18.8708037 SE
## 1 Bimodal 8 0.4069176 SE
## 6 Middle 1 0.0508647 SE
# Define cell groups
# Retrieve sample metadata
df.pheno <- marvel$SplicePheno
# Cell group 1 (reference)
cell.group.g1 <- df.pheno[which(df.pheno$cell.type=="iPSC"), "sample.id"]
# Cell group 2
cell.group.g2 <- df.pheno[which(df.pheno$cell.type=="Endoderm"), "sample.id"]
# DE analysis
marvel <- CompareValues(MarvelObject=marvel,
cell.group.g1=cell.group.g1,
cell.group.g2=cell.group.g2,
min.cells=3,
method="wilcox",
method.adjust="fdr",
level="gene",
show.progress=FALSE
)
marvel$DE$Exp$Table[1:5, ]
## gene_id gene_short_name gene_type n.cells.g1 n.cells.g2
## 1 ENSG00000141448.10 GATA6 protein_coding 0 52
## 2 ENSG00000273706.4 LHX1 protein_coding 1 52
## 3 ENSG00000250361.8 GYPB protein_coding 3 52
## 4 ENSG00000266010.2 GATA6-AS1 lncRNA 1 51
## 5 ENSG00000158815.11 FGF17 protein_coding 0 50
## mean.g1 mean.g2 log2fc statistic p.val p.val.adj
## 1 0.00000000 6.051803 6.051803 NA 3.364948e-28 7.086244e-24
## 2 0.02387774 6.346353 6.322476 NA 7.065377e-28 7.439489e-24
## 3 0.06427675 12.180694 12.116417 NA 2.412354e-27 1.693392e-23
## 4 0.01578723 7.528112 7.512325 NA 3.652428e-27 1.922912e-23
## 5 0.00000000 5.835133 5.835133 NA 9.209198e-27 3.001489e-23
min.cells
option. Here, we required the gene to be
expressed in at least 3 cells in either iPSCs or endoderm cells for the
gene to be included for analysis.show.progress
option. For the brevity of the tutorial,
we did not track the progress of differential expression analysis. But
users are advised to set this option to TRUE
when running
this step on their own devices.# Plot DE results
marvel <- PlotDEValues(MarvelObject=marvel,
pval=0.10,
log2fc=0.5,
point.size=0.1,
xlabel.size=8,
level="gene.global",
anno=FALSE
)
marvel$DE$Exp.Global$Plot
marvel$DE$Exp.Global$Summary
## sig freq
## 1 up 1383
## 2 down 6864
## 3 n.s. 12812
head(marvel$DE$Exp.Global$Table[,c("gene_id", "gene_short_name", "sig")])
## gene_id gene_short_name sig
## 1 ENSG00000141448.10 GATA6 up
## 2 ENSG00000273706.4 LHX1 up
## 3 ENSG00000250361.8 GYPB up
## 4 ENSG00000266010.2 GATA6-AS1 up
## 5 ENSG00000158815.11 FGF17 up
## 6 ENSG00000185155.11 MIXL1 up
pval
option. The adjusted p-value, below which, the
gene is considered to be differentially expressed.log2fc
option. The absolute log2 fold change, above
which, the gene is considered to be differentially expressed.# Plot DE results with annotation of selected genes
# Retrieve DE output table
results <- marvel$DE$Exp$Table
# Retrieve top genes
index <- which(results$log2fc > 8 | results$log2fc < -7)
gene_short_names <- results[index, "gene_short_name"]
# Plot
marvel <- PlotDEValues(MarvelObject=marvel,
pval=0.10,
log2fc=0.5,
point.size=0.1,
xlabel.size=10,
level="gene.global",
anno=TRUE,
anno.gene_short_name=gene_short_names
)
marvel$DE$Exp.Global$Plot
marvel <- CompareValues(MarvelObject=marvel,
cell.group.g1=cell.group.g1,
cell.group.g2=cell.group.g2,
min.cells=25,
method=c("ad", "dts"),
method.adjust="fdr",
level="splicing",
event.type=c("SE", "MXE", "RI", "A5SS", "A3SS", "ALE", "AFE"),
show.progress=FALSE
)
head(marvel$DE$PSI$Table[["ad"]])
## tran_id
## 11000 chr13:43059394:43059714:+@chr13:43062190:43062295
## 2660 chr15:24962114:24962209:+@chr15:24967029:24967152:+@chr15:24967932:24968082
## 3100 chr17:8383254:8382781|8383157:-@chr17:8382143:8382315
## 4100 chr17:8383157:8383193|8382781:8383164:-@chr17:8382143:8382315
## 5100 chr10:78037194:78037304:+@chr10:78037439:78037441:+@chr10:78040204:78040225
## 6100 chr10:78037194:78037304:+@chr10:78037439|78040204:78040225
## event_type gene_id gene_short_name gene_type n.cells.g1
## 11000 RI ENSG00000120675.6 DNAJC15 protein_coding 67
## 2660 SE ENSG00000128739.22 SNRPN protein_coding 83
## 3100 A5SS ENSG00000161970.15 RPL26 protein_coding 83
## 4100 AFE ENSG00000161970.15 RPL26 protein_coding 83
## 5100 SE ENSG00000138326.20 RPS24 protein_coding 82
## 6100 A3SS ENSG00000138326.20 RPS24 protein_coding 83
## n.cells.g2 mean.g1 mean.g2 mean.diff statistic p.val
## 11000 53 0.01407511 12.2010454 12.186970 66.959 1.4350e-37
## 2660 45 10.03557547 0.3400754 -9.695500 63.851 8.5431e-36
## 3100 53 5.32707355 0.5447294 -4.782344 59.893 1.4504e-33
## 4100 53 5.32707355 0.5447294 -4.782344 59.893 1.4504e-33
## 5100 52 13.01291792 1.2559445 -11.756973 57.853 1.9554e-32
## 6100 53 13.46476002 1.4915462 -11.973214 56.514 1.1045e-31
## p.val.adj modality.bimodal.adj.g1 modality.bimodal.adj.g2
## 11000 7.246750e-34 Excluded.Primary Excluded.Dispersed
## 2660 2.157133e-32 Excluded.Dispersed Excluded.Primary
## 3100 1.831130e-30 Excluded.Dispersed Excluded.Primary
## 4100 1.831130e-30 Excluded.Dispersed Excluded.Primary
## 5100 1.974954e-29 Excluded.Dispersed Excluded.Dispersed
## 6100 9.296208e-29 Excluded.Dispersed Excluded.Dispersed
## n.cells.outliers.g1 n.cells.outliers.g2 outliers
## 11000 7 50 FALSE
## 2660 82 3 FALSE
## 3100 79 5 FALSE
## 4100 79 5 FALSE
## 5100 81 9 FALSE
## 6100 82 10 FALSE
head(marvel$DE$PSI$Table[["dts"]])
## tran_id
## 1 chr8:73026940:73027052:+@chr8:73030336:73030395:+@chr8:73032042:73032106
## 5 chr17:76557764:76557857:+@chr17:76558945:76559043:+@chr17:76561288:76561398
## 24 chr10:78037194:78037304:+@chr10:78040204:78040225:+@chr10:78040615:78040747
## 26 chr17:32365330:32365487:+@chr17:32366665:32366757:+@chr17:32367772:32368014
## 29 chr2:27772674:27772692:+@chr2:27774424:27774530:+@chr2:27779433:27779734
## 31 chr20:49279104:49279208:+@chr20:49280485:49280570:+@chr20:49289045:49290860
## event_type gene_id gene_short_name gene_type n.cells.g1
## 1 SE ENSG00000147601.14 TERF1 protein_coding 83
## 5 SE ENSG00000163597.15 SNHG16 lncRNA 76
## 24 SE ENSG00000138326.20 RPS24 protein_coding 83
## 26 SE ENSG00000010244.18 ZNF207 protein_coding 70
## 29 SE ENSG00000243147.8 MRPL33 protein_coding 80
## 31 SE ENSG00000177410.13 ZFAS1 lncRNA 74
## n.cells.g2 mean.g1 mean.g2 mean.diff statistic p.val p.val.adj
## 1 30 39.37718 61.67573 22.29855 7.924382 5e-04 5e-04
## 5 29 42.93249 56.31129 13.37880 4.262573 5e-04 5e-04
## 24 53 46.65793 66.30364 19.64571 4.296221 5e-04 5e-04
## 26 28 38.46449 72.84364 34.37915 5.646416 5e-04 5e-04
## 29 47 49.55403 36.58367 -12.97037 3.661446 5e-04 5e-04
## 31 42 58.18767 74.35116 16.16349 3.245651 5e-04 5e-04
## modality.bimodal.adj.g1 modality.bimodal.adj.g2 n.cells.outliers.g1
## 1 Middle Included.Dispersed 0
## 5 Multimodal Bimodal 0
## 24 Middle Middle 0
## 26 Multimodal Included.Dispersed 0
## 29 Middle Multimodal 0
## 31 Middle Included.Dispersed 0
## n.cells.outliers.g2 outliers
## 1 0 FALSE
## 5 0 FALSE
## 24 0 FALSE
## 26 0 FALSE
## 29 0 FALSE
## 31 0 FALSE
min.cells
option. Here, we required the splicing event
to be expressed in at least 25 cells in both iPSCs and endoderm cells
for the splicing event to be included for analysis.method
option. We recommend Anderson-Darling
(ad
) and D Test Statistics (dts
) for comparing
the overall PSI distribution between two cell populations.show.progress
option. For the brevity of the tutorial,
we did not track the progress of differential expression analysis. But
users are advised to set this option to TRUE
when running
this step on their own devices.marvel <- PlotDEValues(MarvelObject=marvel,
method="ad",
pval=0.10,
level="splicing.distance",
anno=FALSE
)
marvel$DE$PSI$Plot[["ad"]]
method
option. Plot results for ad
statistical test.pval
option. The adjusted p-value, below which, the
splicing event is considered to be differentially spliced.level
option. When set to
"splicing.distance"
, the distance statistic will be used to
plot the DE results. Only applicable when method
set to
"ad"
or "dts"
. When set to
splicing.mean
. The typical volcano plot is returned, and
the delta
option may be used.delta
option. when level
set to
"splicing.mean"
, the absolute differences in mean PSI
values between the two cell populations, above which, the splicing event
is considered to be differentially spliced.marvel <- CompareValues(MarvelObject=marvel,
cell.group.g1=cell.group.g1,
cell.group.g2=cell.group.g2,
psi.method=c("ad", "dts"),
psi.pval=c(0.10, 0.10),
psi.delta=0,
method.de.gene="wilcox",
method.adjust.de.gene="fdr",
downsample=FALSE,
show.progress=FALSE,
level="gene.spliced"
)
head(marvel$DE$Exp.Spliced$Table)
## gene_id gene_short_name gene_type n.cells.g1 n.cells.g2
## 1 ENSG00000147601.14 TERF1 protein_coding 83 51
## 2 ENSG00000115541.11 HSPE1 protein_coding 83 52
## 3 ENSG00000154277.12 UCHL1 protein_coding 83 43
## 4 ENSG00000119705.9 SLIRP protein_coding 83 53
## 5 ENSG00000132341.12 RAN protein_coding 83 53
## 6 ENSG00000121570.12 DPPA4 protein_coding 83 53
## mean.g1 mean.g2 log2fc statistic p.val p.val.adj
## 1 10.909400 5.946858 -4.962541 NA 2.104188e-22 1.224696e-19
## 2 11.364996 8.638485 -2.726511 NA 3.121495e-22 1.224696e-19
## 3 9.372543 5.405119 -3.967423 NA 5.400597e-22 1.224696e-19
## 4 11.127065 9.387294 -1.739771 NA 8.128340e-22 1.224696e-19
## 5 10.403278 8.829252 -1.574026 NA 8.128340e-22 1.224696e-19
## 6 8.320448 5.694548 -2.625900 NA 9.254628e-22 1.224696e-19
psi.method
, psi.pval
, and
psi.delta
options. For defining differentially spliced
events whose corresponding genes will be included for differential gene
expression analysis.method.de.gene
and method.adjust.de.gene
options. The statistical test and multiple testing method for
differential gene expression analysis.# Plot: No annotation
marvel <- PlotDEValues(MarvelObject=marvel,
method=c("ad", "dts"),
psi.pval=c(0.10, 0.10),
psi.delta=0,
gene.pval=0.10,
gene.log2fc=0.5,
point.size=0.1,
xlabel.size=8,
level="gene.spliced",
anno=FALSE
)
marvel$DE$Exp.Spliced$Plot
marvel$DE$Exp.Spliced$Summary
## sig freq
## 1 up 69
## 2 down 394
## 3 n.s. 331
method
option. Merge results from ad
and
dts
statistical tests.pval.psi
option. The adjusted p-value, below which, the
splicing event is considered to be differentially spliced.delta.psi
option. The absolute difference in mean PSI
values between the two cell populations, above which, the splicing event
is considered to be differentially spliced.gene.pval
option. The adjusted p-value, below which,
the gene is considered to be differentially expressed.gene.log2fc
option. The absolute log2 fold change,
above which, the gene is considered to be differentially expressed.# Plot: Annotate top genes
results <- marvel$DE$Exp.Spliced$Table
index <- which((results$log2fc > 3 | results$log2fc < -3) & -log10(results$p.val.adj) > 15)
gene_short_names <- results[index, "gene_short_name"]
marvel <- PlotDEValues(MarvelObject=marvel,
method=c("ad", "dts"),
psi.pval=c(0.10, 0.10),
psi.delta=0,
gene.pval=0.10,
gene.log2fc=0.5,
point.size=0.1,
xlabel.size=8,
level="gene.spliced",
anno=TRUE,
anno.gene_short_name=gene_short_names
)
marvel$DE$Exp.Spliced$Plot
# Define sample groups
# Retrieve sample metadata
df.pheno <- marvel$SplicePheno
# Group 1
sample.ids.1 <- df.pheno[which(df.pheno$cell.type=="iPSC"), "sample.id"]
# Group 2
sample.ids.2 <- df.pheno[which(df.pheno$cell.type=="Endoderm"), "sample.id"]
# Merge
cell.group.list <- list("iPSC"=sample.ids.1,
"Endoderm"=sample.ids.2
)
# Retrieve DE genes
# Retrieve DE result table
results.de.exp <- marvel$DE$Exp$Table
# Retrieve relevant gene_ids
index <- which(results.de.exp$p.val.adj < 0.10 & abs(results.de.exp$log2fc) > 0.5)
gene_ids <- results.de.exp[index, "gene_id"]
# Reduce dimension
marvel <- RunPCA(MarvelObject=marvel,
cell.group.column="cell.type",
cell.group.order=c("iPSC", "Endoderm"),
cell.group.colors=NULL,
min.cells=25,
features=gene_ids,
point.size=2.5,
level="gene"
)
marvel$PCA$Exp$Plot
min.cells
option. Here, we required the gene to be
expressed in at least 25 cells across the overall cell populations
defined in cell.group.list
for the gene to be included for
analysis.feature
option. Gene IDs to be used for dimension
reduction.# Retrieve DE tran_ids
method <- c("ad", "dts")
tran_ids.list <- list()
for(i in 1:length(method)) {
results.de.psi <- marvel$DE$PSI$Table[[method[i]]]
index <- which(results.de.psi$p.val.adj < 0.10 & results.de.psi$outlier==FALSE)
tran_ids <- results.de.psi[index, "tran_id"]
tran_ids.list[[i]] <- tran_ids
}
tran_ids <- unique(unlist(tran_ids.list))
# Reduce dimension
marvel <- RunPCA(MarvelObject=marvel,
cell.group.column="cell.type",
cell.group.order=c("iPSC", "Endoderm"),
cell.group.colors=NULL,
min.cells=25,
features=tran_ids,
point.size=2.5,
level="splicing",
method.impute="random",
seed=1
)
## [1] "136 of 136 cells specified were found and retained"
## [1] "1554 of 1554 splicing events expressed in at least 25 cells and are retained"
## [1] "Performing pre-flight checks..."
marvel$PCA$PSI$Plot
min.cells
option. Here, we required the splicing event
to be expressed in at least 25 cells across the overall cell populations
defined in cell.group.list
for the splicing event to be
included for analysis.feature
option. Splicing events to be used for
dimension reduction.method.impute
option. Method to impute missing PSI
values. Indicate "random"
to to randomly assign any values
between 0-100 to missing values (Song et el., 2017). Indicate
"population.mean"
to use the mean PSI value of each cell
group to impute the missing values found in the corresponding cell group
(Huang et al., 2021).seed
option. Only applicable when
method.impute
option set to "random"
. This
option ensures that the randomly imputed values will always be
reproducible.# Retrieve relevant gene_ids
results.de.exp <- marvel$DE$Exp$Table
index <- which(results.de.exp$p.val.adj < 0.10 & abs(results.de.exp$log2fc) > 0.5)
gene_ids <- results.de.exp[-index, "gene_id"]
# Reduce dimension
marvel <- RunPCA(MarvelObject=marvel,
cell.group.column="cell.type",
cell.group.order=c("iPSC", "Endoderm"),
cell.group.colors=NULL,
min.cells=25,
features=gene_ids,
point.size=2.5,
level="gene"
)
marvel$PCA$Exp$Plot
# Retrieve non-DE gene_ids
results.de.exp <- marvel$DE$Exp$Table
index <- which(results.de.exp$p.val.adj > 0.10 )
gene_ids <- results.de.exp[, "gene_id"]
# Retrieve tran_ids
df.feature <- do.call(rbind.data.frame, marvel$SpliceFeatureValidated)
df.feature <- df.feature[which(df.feature$gene_id %in% gene_ids), ]
# Reduce dimension: All DE splicing events
tran_ids <- df.feature$tran_id
marvel <- RunPCA(MarvelObject=marvel,
cell.group.column="cell.type",
cell.group.order=c("iPSC", "Endoderm"),
cell.group.colors=NULL,
min.cells=25,
features=tran_ids,
point.size=2.5,
level="splicing",
method.impute="random",
seed=1
)
## [1] "136 of 136 cells specified were found and retained"
## [1] "16423 of 47936 splicing events expressed in at least 25 cells and are retained"
## [1] "Performing pre-flight checks..."
plot.all <- marvel$PCA$PSI$Plot
# Reduce dimension: SE
tran_ids <- df.feature[which(df.feature$event_type=="SE"), "tran_id"]
marvel <- RunPCA(MarvelObject=marvel,
cell.group.column="cell.type",
cell.group.order=c("iPSC", "Endoderm"),
cell.group.colors=NULL,
min.cells=25,
features=tran_ids,
point.size=2.5,
level="splicing",
method.impute="random",
seed=1
)
## [1] "136 of 136 cells specified were found and retained"
## [1] "6690 of 20458 splicing events expressed in at least 25 cells and are retained"
## [1] "Performing pre-flight checks..."
plot.se <- marvel$PCA$PSI$Plot
# Reduce dimension: MXE
tran_ids <- df.feature[which(df.feature$event_type=="MXE"), "tran_id"]
marvel <- RunPCA(MarvelObject=marvel,
cell.group.column="cell.type",
cell.group.order=c("iPSC", "Endoderm"),
cell.group.colors=NULL,
min.cells=25,
features=tran_ids,
point.size=2.5,
level="splicing",
method.impute="random",
seed=1
)
## [1] "136 of 136 cells specified were found and retained"
## [1] "97 of 1278 splicing events expressed in at least 25 cells and are retained"
## [1] "Performing pre-flight checks..."
plot.mxe <- marvel$PCA$PSI$Plot
# Reduce dimension: RI
tran_ids <- df.feature[which(df.feature$event_type=="RI"), "tran_id"]
marvel <- RunPCA(MarvelObject=marvel,
cell.group.column="cell.type",
cell.group.order=c("iPSC", "Endoderm"),
cell.group.colors=NULL,
min.cells=25,
features=tran_ids,
point.size=2.5,
level="splicing",
method.impute="random",
seed=1
)
## [1] "136 of 136 cells specified were found and retained"
## [1] "2518 of 7317 splicing events expressed in at least 25 cells and are retained"
## [1] "Performing pre-flight checks..."
plot.ri <- marvel$PCA$PSI$Plot
# Reduce dimension: A5SS
tran_ids <- df.feature[which(df.feature$event_type=="A5SS"), "tran_id"]
marvel <- RunPCA(MarvelObject=marvel,
cell.group.column="cell.type",
cell.group.order=c("iPSC", "Endoderm"),
cell.group.colors=NULL,
min.cells=25,
features=tran_ids,
point.size=2.5,
level="splicing",
method.impute="random",
seed=1
)
## [1] "136 of 136 cells specified were found and retained"
## [1] "1856 of 5153 splicing events expressed in at least 25 cells and are retained"
## [1] "Performing pre-flight checks..."
plot.a5ss <- marvel$PCA$PSI$Plot
# Reduce dimension: A3SS
tran_ids <- df.feature[which(df.feature$event_type=="A3SS"), "tran_id"]
marvel <- RunPCA(MarvelObject=marvel,
cell.group.column="cell.type",
cell.group.order=c("iPSC", "Endoderm"),
cell.group.colors=NULL,
min.cells=25,
features=tran_ids,
point.size=2.5,
level="splicing",
method.impute="random",
seed=1
)
## [1] "136 of 136 cells specified were found and retained"
## [1] "2154 of 5840 splicing events expressed in at least 25 cells and are retained"
## [1] "Performing pre-flight checks..."
plot.a3ss <- marvel$PCA$PSI$Plot
# Reduce dimension: AFE
tran_ids <- df.feature[which(df.feature$event_type=="AFE"), "tran_id"]
marvel <- RunPCA(MarvelObject=marvel,
cell.group.column="cell.type",
cell.group.order=c("iPSC", "Endoderm"),
cell.group.colors=NULL,
min.cells=25,
features=tran_ids,
point.size=2.5,
level="splicing",
method.impute="random",
seed=1
)
## [1] "136 of 136 cells specified were found and retained"
## [1] "2199 of 5818 splicing events expressed in at least 25 cells and are retained"
## [1] "Performing pre-flight checks..."
plot.afe <- marvel$PCA$PSI$Plot
# Reduce dimension:
tran_ids <- df.feature[which(df.feature$event_type=="ALE"), "tran_id"]
marvel <- RunPCA(MarvelObject=marvel,
cell.group.column="cell.type",
cell.group.order=c("iPSC", "Endoderm"),
cell.group.colors=NULL,
min.cells=25,
features=tran_ids,
point.size=2.5,
level="splicing",
method.impute="random",
seed=1
)
## [1] "136 of 136 cells specified were found and retained"
## [1] "909 of 2072 splicing events expressed in at least 25 cells and are retained"
## [1] "Performing pre-flight checks..."
plot.ale <- marvel$PCA$PSI$Plot
# Arrange and view plots
# Read plots from right to left for each row
grid.arrange(plot.all, plot.se,
plot.mxe, plot.ri,
plot.a5ss, plot.a3ss,
plot.afe, plot.ale,
nrow=4)
PlotValues
for plotting selected splicing events.# Define sample groups
# Retrieve sample metadata
df.pheno <- marvel$SplicePheno
# Group 1
sample.ids.1 <- df.pheno[which(df.pheno$cell.type=="iPSC"), "sample.id"]
# Group 2
sample.ids.2 <- df.pheno[which(df.pheno$cell.type=="Endoderm"), "sample.id"]
# Merge
cell.group.list <- list("iPSC"=sample.ids.1,
"Endoderm"=sample.ids.2
)
# Assign modality dynamics
marvel <- ModalityChange(MarvelObject=marvel,
method=c("ad", "dts"),
psi.pval=c(0.10, 0.10)
)
marvel$DE$Modality$Plot
head(marvel$DE$Modality$Table)
## tran_id
## 1 chr13:43059394:43059714:+@chr13:43062190:43062295
## 2 chr15:24962114:24962209:+@chr15:24967029:24967152:+@chr15:24967932:24968082
## 3 chr17:8383254:8382781|8383157:-@chr17:8382143:8382315
## 4 chr17:8383157:8383193|8382781:8383164:-@chr17:8382143:8382315
## 5 chr10:78037194:78037304:+@chr10:78037439:78037441:+@chr10:78040204:78040225
## 6 chr10:78037194:78037304:+@chr10:78037439|78040204:78040225
## event_type gene_id gene_short_name gene_type
## 1 RI ENSG00000120675.6 DNAJC15 protein_coding
## 2 SE ENSG00000128739.22 SNRPN protein_coding
## 3 A5SS ENSG00000161970.15 RPL26 protein_coding
## 4 AFE ENSG00000161970.15 RPL26 protein_coding
## 5 SE ENSG00000138326.20 RPS24 protein_coding
## 6 A3SS ENSG00000138326.20 RPS24 protein_coding
## modality.bimodal.adj.g1 modality.bimodal.adj.g2 modality.change
## 1 Excluded.Primary Excluded.Dispersed Implicit
## 2 Excluded.Dispersed Excluded.Primary Implicit
## 3 Excluded.Dispersed Excluded.Primary Implicit
## 4 Excluded.Dispersed Excluded.Primary Implicit
## 5 Excluded.Dispersed Excluded.Dispersed Restricted
## 6 Excluded.Dispersed Excluded.Dispersed Restricted
marvel$DE$Modality$Plot.Stats
## modality.change freq pct
## 1 Explicit 161 10.36036
## 2 Implicit 280 18.01802
## 3 Restricted 1113 71.62162
method
option. The statistical tests used earlier for
differential splicing analysis. Here, we combined the differentially
spliced events from both ad
and dts
tests.pval
option. The adjusted p-value, below which, the
splicing event is considered to be differentially spliced. The numeric
vector should be the same length as the method
option.# Example 1
tran_id <- "chr3:129171771:129171634|129171655:-@chr3:129171446:129171538"
marvel <- PlotValues(MarvelObject=marvel,
cell.group.list=cell.group.list,
feature=tran_id,
xlabels.size=5,
level="splicing",
min.cells=25
)
plot.1 <- marvel$adhocPlot$PSI
# Example 2
tran_id <- "chr11:134253299:134253370|134253227:134253248:-@chr11:134252840:134252916"
marvel <- PlotValues(MarvelObject=marvel,
cell.group.list=cell.group.list,
feature=tran_id,
xlabels.size=5,
level="splicing",
min.cells=25
)
plot.2 <- marvel$adhocPlot$PSI
# Example 3
tran_id <- "chr16:29454016:29454113:-@chr16:29446922|29447026:29446802"
marvel <- PlotValues(MarvelObject=marvel,
cell.group.list=cell.group.list,
feature=tran_id,
xlabels.size=5,
level="splicing",
min.cells=25
)
plot.3 <- marvel$adhocPlot$PSI
# Example 4
tran_id <- "chr6:31535485:31535367:-@chr6:31532911:31532780"
marvel <- PlotValues(MarvelObject=marvel,
cell.group.list=cell.group.list,
feature=tran_id,
xlabels.size=5,
level="splicing",
min.cells=25
)
plot.4 <- marvel$adhocPlot$PSI
# Arrange and view plots
# Read plots from right to left for each row
grid.arrange(plot.1, plot.2,
plot.3, plot.4,
nrow=1)
# Example 1
tran_id <- "chr6:21596763:21598248:+@chr6:21598321:21599378"
marvel <- PlotValues(MarvelObject=marvel,
cell.group.list=cell.group.list,
feature=tran_id,
xlabels.size=5,
level="splicing",
min.cells=25
)
plot.1 <- marvel$adhocPlot$PSI
# Example 2
tran_id <- "chr12:56158684:56158711:+@chr12:56159587|56159975:56160148"
marvel <- PlotValues(MarvelObject=marvel,
cell.group.list=cell.group.list,
feature=tran_id,
xlabels.size=5,
level="splicing",
min.cells=25
)
plot.2 <- marvel$adhocPlot$PSI
# Example 3
tran_id <- "chr2:37196505:37196520:+@chr2:37198494:37198672:+@chr2:37199704:37199819"
marvel <- PlotValues(MarvelObject=marvel,
cell.group.list=cell.group.list,
feature=tran_id,
xlabels.size=5,
level="splicing",
min.cells=25
)
plot.3 <- marvel$adhocPlot$PSI
# Example 4
tran_id <- "chr6:170583188:170583057:-@chr6:170582265:170581792"
marvel <- PlotValues(MarvelObject=marvel,
cell.group.list=cell.group.list,
feature=tran_id,
xlabels.size=5,
level="splicing",
min.cells=25
)
plot.4 <- marvel$adhocPlot$PSI
# Arrange and view plots
# Read plots from right to left for each row
grid.arrange(plot.1, plot.2,
plot.3, plot.4,
nrow=1)
# Example 1
tran_id <- "chr3:109337464:109337652:-@chr3:109333920|109333993:109333870"
marvel <- PlotValues(MarvelObject=marvel,
cell.group.list=cell.group.list,
feature=tran_id,
xlabels.size=5,
level="splicing",
min.cells=25
)
plot.1 <- marvel$adhocPlot$PSI
# Example 2
tran_id <- "chr17:81510833:81510480:-@chr17:81510329:81509971"
marvel <- PlotValues(MarvelObject=marvel,
cell.group.list=cell.group.list,
feature=tran_id,
xlabels.size=5,
level="splicing",
min.cells=25
)
plot.2 <- marvel$adhocPlot$PSI
# Example 3
tran_id <- "chr15:60397943:60398043:-@chr15:60397258:60397338:-@chr15:60386028:60386086"
marvel <- PlotValues(MarvelObject=marvel,
cell.group.list=cell.group.list,
feature=tran_id,
xlabels.size=5,
level="splicing",
min.cells=25
)
plot.3 <- marvel$adhocPlot$PSI
# Example 4
tran_id <- "chr19:18575057:18575151:+@chr19:18577003:18577550"
marvel <- PlotValues(MarvelObject=marvel,
cell.group.list=cell.group.list,
feature=tran_id,
xlabels.size=5,
level="splicing",
min.cells=25
)
plot.4 <- marvel$adhocPlot$PSI
# Arrange and view plots
# Read plots from right to left for each row
grid.arrange(plot.1, plot.2,
plot.3, plot.4,
nrow=1)
PlotValues
for plotting selected splicing events and
genes.CompareValues
with the level
option set to gene.spliced
needs to be executed prior to proceeding with gene-splicing dynamics
analysis below. Kindly refer to
Differential (spliced) gene analysis
section of this
tutorial.marvel <- IsoSwitch(MarvelObject=marvel,
method=c("ad", "dts"),
psi.pval=c(0.10, 0.10),
psi.delta=0,
gene.pval=0.10,
gene.log2fc=0.5
)
marvel$DE$Cor$Plot
head(marvel$DE$Cor$Table)
## gene_id gene_short_name gene_type cor
## 20 ENSG00000109475.16 RPL34 protein_coding Iso-Switch
## 29 ENSG00000198467.15 TPM2 protein_coding Iso-Switch
## 30 ENSG00000111237.18 VPS29 protein_coding Iso-Switch
## 38 ENSG00000168028.14 RPSA protein_coding Iso-Switch
## 42 ENSG00000182774.13 RPS17 protein_coding Iso-Switch
## 54 ENSG00000167526.13 RPL13 protein_coding Iso-Switch
marvel$DE$Cor$Plot.Stats
## cor freq pct
## 1 Coordinated 192 24.18136
## 2 Opposing 158 19.89924
## 3 Iso-Switch 331 41.68766
## 4 Complex 113 14.23174
method
option. Merge results from ad
and
dts
statistical test.pval.psi
option. The adjusted p-value, below which, the
splicing event is considered to be differentially spliced.delta.psi
option. The absolute difference in mean PSI
values between the two cell populations, above which, the splicing event
is considered to be differentially spliced.gene.pval
option. The adjusted p-value, below which,
the gene is considered to be differentially expressed.gene.log2fc
option. The absolute log2 fold change,
above which, the gene is considered to be differentially expressed.# Define cell groups
# Retrieve sample metadata
df.pheno <- marvel$SplicePheno
# Group 1
sample.ids.1 <- df.pheno[which(df.pheno$cell.type=="iPSC"), "sample.id"]
# Group 2
sample.ids.2 <- df.pheno[which(df.pheno$cell.type=="Endoderm"), "sample.id"]
# Merge
cell.group.list <- list("iPSC"=sample.ids.1,
"Endoderm"=sample.ids.2
)
# Example 1
# Gene
df.feature <- marvel$GeneFeature
gene_id <- df.feature[which(df.feature$gene_short_name=="DHX9"), "gene_id"]
marvel <- PlotValues(MarvelObject=marvel,
cell.group.list=cell.group.list,
feature=gene_id,
maintitle="gene_short_name",
xlabels.size=7,
level="gene"
)
plot.1_gene <- marvel$adhocPlot$Exp
# Splicing
tran_id <- "chr1:182839347:182839456|182839734:182839935:+@chr1:182842545:182842677"
marvel <- PlotValues(MarvelObject=marvel,
cell.group.list=cell.group.list,
feature=tran_id,
xlabels.size=7,
level="splicing",
min.cells=25
)
plot.1_splicing <- marvel$adhocPlot$PSI
# Example 2
# Gene
df.feature <- marvel$GeneFeature
gene_id <- df.feature[which(df.feature$gene_short_name=="DNAJC15"), "gene_id"]
marvel <- PlotValues(MarvelObject=marvel,
cell.group.list=cell.group.list,
feature=gene_id,
maintitle="gene_short_name",
xlabels.size=7,
level="gene"
)
plot.2_gene <- marvel$adhocPlot$Exp
# Splicing
tran_id <- "chr13:43059394:43059714:+@chr13:43062190:43062295"
marvel <- PlotValues(MarvelObject=marvel,
cell.group.list=cell.group.list,
feature=tran_id,
xlabels.size=7,
level="splicing",
min.cells=25
)
plot.2_splicing <- marvel$adhocPlot$PSI
# Arrange and view plots
# Read plots from right to left for each row
grid.arrange(plot.1_gene, plot.1_splicing,
plot.2_gene, plot.2_splicing,
nrow=2)
# Example 1
# Gene
df.feature <- marvel$GeneFeature
gene_id <- df.feature[which(df.feature$gene_short_name=="BCLAF1"), "gene_id"]
marvel <- PlotValues(MarvelObject=marvel,
cell.group.list=cell.group.list,
feature=gene_id,
maintitle="gene_short_name",
xlabels.size=7,
level="gene"
)
plot.1_gene <- marvel$adhocPlot$Exp
# Splicing
tran_id <- "chr6:136276508:136276468:-@chr6:136275989:136275843"
marvel <- PlotValues(MarvelObject=marvel,
cell.group.list=cell.group.list,
feature=tran_id,
xlabels.size=7,
level="splicing",
min.cells=25
)
plot.1_splicing <- marvel$adhocPlot$PSI
# Example 2
# Gene
df.feature <- marvel$GeneFeature
gene_id <- df.feature[which(df.feature$gene_short_name=="SUB1"), "gene_id"]
marvel <- PlotValues(MarvelObject=marvel,
cell.group.list=cell.group.list,
feature=gene_id,
maintitle="gene_short_name",
xlabels.size=7,
level="gene"
)
plot.2_gene <- marvel$adhocPlot$Exp
# Splicing
tran_id <- "chr5:32601005:32601113:+@chr5:32601801:32603088"
marvel <- PlotValues(MarvelObject=marvel,
cell.group.list=cell.group.list,
feature=tran_id,
xlabels.size=7,
level="splicing",
min.cells=25
)
plot.2_splicing <- marvel$adhocPlot$PSI
# Arrange and view plots
# Read plots from right to left for each row
grid.arrange(plot.1_gene, plot.1_splicing,
plot.2_gene, plot.2_splicing,
nrow=2)
# Example 1
# Gene
df.feature <- marvel$GeneFeature
gene_id <- df.feature[which(df.feature$gene_short_name=="CELF1"), "gene_id"]
marvel <- PlotValues(MarvelObject=marvel,
cell.group.list=cell.group.list,
feature=gene_id,
maintitle="gene_short_name",
xlabels.size=7,
level="gene"
)
plot.1_gene <- marvel$adhocPlot$Exp
# Splicing
tran_id <- "chr11:47477297:47477425:-@chr11:47476956|47476959:47476846"
marvel <- PlotValues(MarvelObject=marvel,
cell.group.list=cell.group.list,
feature=tran_id,
xlabels.size=7,
level="splicing",
min.cells=25
)
plot.1_splicing <- marvel$adhocPlot$PSI
# Example 2
# Gene
df.feature <- marvel$GeneFeature
gene_id <- df.feature[which(df.feature$gene_short_name=="TPM2"), "gene_id"]
marvel <- PlotValues(MarvelObject=marvel,
cell.group.list=cell.group.list,
feature=gene_id,
maintitle="gene_short_name",
xlabels.size=7,
level="gene"
)
plot.2_gene <- marvel$adhocPlot$Exp
# Splicing
tran_id <- "chr9:35685269:35685339:-@chr9:35685064:35685139:-@chr9:35684732:35684807:-@chr9:35684488:35684550"
marvel <- PlotValues(MarvelObject=marvel,
cell.group.list=cell.group.list,
feature=tran_id,
xlabels.size=7,
level="splicing",
min.cells=25
)
plot.2_splicing <- marvel$adhocPlot$PSI
# Arrange and view plots
# Read plots from right to left for each row
grid.arrange(plot.1_gene, plot.1_splicing,
plot.2_gene, plot.2_splicing,
nrow=2)
# Example 1
# Gene
df.feature <- marvel$GeneFeature
gene_id <- df.feature[which(df.feature$gene_short_name=="TERF1"), "gene_id"]
marvel <- PlotValues(MarvelObject=marvel,
cell.group.list=cell.group.list,
feature=gene_id,
maintitle="gene_short_name",
xlabels.size=6,
level="gene"
)
plot.1_gene <- marvel$adhocPlot$Exp
# Splicing (1)
tran_id <- "chr8:73026940:73027052:+@chr8:73030336:73030395:+@chr8:73032042:73032106"
marvel <- PlotValues(MarvelObject=marvel,
cell.group.list=cell.group.list,
feature=tran_id,
xlabels.size=6,
level="splicing",
min.cells=25
)
plot.1_splicing.1 <- marvel$adhocPlot$PSI
# Splicing (2)
tran_id <- "chr8:73008864:73009205|73012857:73012931:+@chr8:73013895:73013990"
marvel <- PlotValues(MarvelObject=marvel,
cell.group.list=cell.group.list,
feature=tran_id,
xlabels.size=6,
level="splicing",
min.cells=25
)
plot.1_splicing.2 <- marvel$adhocPlot$PSI
# Example 2
# Gene
df.feature <- marvel$GeneFeature
gene_id <- df.feature[which(df.feature$gene_short_name=="SNRPN"), "gene_id"]
marvel <- PlotValues(MarvelObject=marvel,
cell.group.list=cell.group.list,
feature=gene_id,
maintitle="gene_short_name",
xlabels.size=6,
level="gene"
)
plot.2_gene <- marvel$adhocPlot$Exp
# Splicing (1)
tran_id <- "chr15:24967932:24968082:+@chr15:24974311|24974398:24974456"
marvel <- PlotValues(MarvelObject=marvel,
cell.group.list=cell.group.list,
feature=tran_id,
xlabels.size=6,
level="splicing",
min.cells=25,
scale.y.log=TRUE
)
plot.2_splicing.1 <- marvel$adhocPlot$PSI
# Splicing (2)
tran_id <- "chr15:24962114:24962209:+@chr15:24967029:24967240:+@chr15:24967932:24968082"
marvel <- PlotValues(MarvelObject=marvel,
cell.group.list=cell.group.list,
feature=tran_id,
xlabels.size=7,
level="splicing",
min.cells=25,
scale.y.log=TRUE
)
plot.2_splicing.2<- marvel$adhocPlot$PSI
# Read plots from right to left for each row
grid.arrange(plot.1_gene, plot.1_splicing.1, plot.1_splicing.2,
plot.2_gene, plot.2_splicing.1, plot.2_splicing.2,
nrow=2)
marvel <- BioPathways(MarvelObject=marvel,
method=c("ad", "dts"),
pval=0.10,
species="human"
)
head(marvel$DE$BioPathways$Table)
## ID Description GeneRatio BgRatio
## GO:0002181 GO:0002181 cytoplasmic translation 88/725 156/18870
## GO:0008380 GO:0008380 RNA splicing 82/725 478/18870
## GO:0042254 GO:0042254 ribosome biogenesis 63/725 325/18870
## GO:0071826 GO:0071826 protein-RNA complex organization 49/725 243/18870
## GO:0022618 GO:0022618 protein-RNA complex assembly 48/725 235/18870
## GO:0009060 GO:0009060 aerobic respiration 43/725 197/18870
## enrichment pvalue p.adjust qvalue
## GO:0002181 14.682228 2.189603e-83 9.152542e-80 8.389638e-80
## GO:0008380 4.464983 6.333591e-31 1.323720e-27 1.213383e-27
## GO:0042254 5.045347 6.142723e-27 4.279430e-24 3.922721e-24
## GO:0071826 5.248361 5.935633e-22 3.544421e-19 3.248978e-19
## GO:0022618 5.316273 8.859458e-22 4.629067e-19 4.243214e-19
## GO:0009060 5.681148 8.281736e-21 3.846406e-18 3.525792e-18
## geneID
## GO:0002181 RPL26/RPS24/RPS14/RPL8/RPL30/RPL34/RPL35A/RPL21/RPS19/RPL9/RPS15A/RPL29/RPS27A/RPSA/RPS17/PABPC1/RPL13/RPL38/PKM/CSDE1/RPL31/RPL19/RPL17/RPL23A/RPS23/RPS6/CNBP/HNRNPD/DHX9/RPL3/EIF4A2/RPL14/RPS13/RPS15/RPS25/RPL39/UBA52/RPS11/RPS2/RPL41/RPL35/RPS29/RPL22L1/EIF4A1/RPL24/RPL7A/RPL4/RPL36A/RPL12/RPL10A/RPS12/RPS16/RPS20/RPS28/EIF3M/RPL13A/RPL37A/EIF3K/RPS9/RPL18/RPL10/RPL37/RPLP0/RBM4/EIF3E/EIF3G/DENR/RPLP2/RPS21/RPL36/EIF2S3/RACK1/EIF5/RPLP1/ZC3H15/RWDD1/RPL22/RPL28/RPS3/RPS7/RPL5/RPL18A/RPS27/RPS3A/RPL27/RPL27A/EIF3H/RPS26
## GO:0008380 SNRPN/HNRNPC/PABPC1/UBL5/SNRPD2/PRMT1/PRPF40A/RBM39/DHX9/DDX39B/HNRNPK/NPM1/RPS13/TMBIM6/NONO/BUD31/MAGOHB/HNRNPA1/LSM7/PPIG/HNRNPR/HNRNPH1/HNRNPH3/SNRPG/ENY2/SRRM1/C1QBP/ARL6IP4/SRSF7/NCL/SRSF3/TIA1/SAP18/LSM2/RBM8A/CELF1/HNRNPA2B1/SRSF5/SRSF10/TRA2B/SF1/TAF9/RBM4/RBFOX2/PRPF8/SUPT20H/LARP7/SRSF2/HNRNPM/ZBTB8OS/DDX5/SRPK1/SLC38A2/SREK1IP1/SF3B1/LUC7L3/HNRNPF/RALY/SF3B6/SRSF11/SNRPA1/NSRP1/TAF15/SFPQ/HNRNPA3/SNRPB2/ACIN1/DDX17/HNRNPL/SNU13/CLNS1A/FXR1/PPP4R2/RBMX/SNRPD1/STRAP/MAGOH/KHDRBS1/SNRPB/CWC15/LSM6/RPS26
## GO:0042254 RPL26/RPS24/RPS14/RPL35A/RPS19/RPS15A/RPSA/RPS17/RPL38/RPL23A/RPS23/NOP58/RPS6/RPL14/NPM1/RPS13/RPS15/RPS25/EIF6/NOP56/RPS11/TRMT112/RPL35/FCF1/SERBP1/RPL24/RPL7A/C1QBP/RPL10A/RPS12/RPS16/SDAD1/RPS28/RPS9/C1D/RPL10/RPLP0/RBIS/EXOSC8/PA2G4/RPS21/METTL5/RIOK3/DCAF13/FBL/RPL7L1/EIF5/ZNHIT3/RAN/DDX3X/NHP2/EXOSC3/NPM3/DDX17/SNU13/RPS7/EXOSC1/RPL5/RPS27/RPS3A/RPL27/MRPS7/LSM6
## GO:0071826 RPS14/RPS19/RPSA/RPL38/SNRPD2/RPL23A/DHX9/DDX39B/RPS15/EIF6/RPL24/SNRPG/RPS28/EIF3M/RPL13A/LSM2/PTGES3/CELF1/EIF3K/CPSF6/SRSF5/RPL10/SRSF10/RPLP0/SF1/TAF9/EIF3E/PRPF8/EIF3G/DENR/EIF2S3/SRPK1/VCP/SF3B1/EIF5/LUC7L3/ZNHIT3/SF3B6/SNRPA1/SNRPB2/SNU13/CLNS1A/RPL5/SNRPD1/RPS27/STRAP/MRPS7/SNRPB/EIF3H
## GO:0022618 RPS14/RPS19/RPSA/RPL38/SNRPD2/RPL23A/DHX9/DDX39B/RPS15/EIF6/RPL24/SNRPG/RPS28/EIF3M/RPL13A/LSM2/PTGES3/CELF1/EIF3K/CPSF6/SRSF5/RPL10/SRSF10/RPLP0/SF1/TAF9/EIF3E/PRPF8/EIF3G/DENR/EIF2S3/SRPK1/SF3B1/EIF5/LUC7L3/ZNHIT3/SF3B6/SNRPA1/SNRPB2/SNU13/CLNS1A/RPL5/SNRPD1/RPS27/STRAP/MRPS7/SNRPB/EIF3H
## GO:0009060 DNAJC15/ATP5PF/BLOC1S1/COX7A2/COX7C/UQCRQ/COX4I1/COX6C/ATP5F1D/NDUFA3/DGUOK/MDH1/UQCC2/NDUFA11/NDUFB2/IDH1/IDH3B/NDUFB4/ATP5PO/SHMT2/PARK7/NDUFB6/UQCRH/VCP/ATP5MF/NDUFS4/ARL2/UQCRC1/NDUFAB1/ACLY/NDUFB1/SDHC/COX5A/ATP5PB/STOML2/NDUFV2/ATP5F1C/ATP5ME/PDHB/NDUFB10/NDUFA5/NDUFB3/ATP5F1B
## Count
## GO:0002181 88
## GO:0008380 82
## GO:0042254 63
## GO:0071826 49
## GO:0022618 48
## GO:0009060 43
method
option. Merge results from ad
and
dts
statistical test.pval
option. The adjusted p-value, below which, the
splicing event is considered to be differentially spliced.species
option. MARVEL also supports GO analysis of
"mouse"
.custom.genes
option. In lieu of specifying genes with
the method
and pval
options, users may specify
any custom set of genes using this option.# Plot top pathways
df <- marvel$DE$BioPathways$Table
go.terms <- df$Description[c(1:10)]
marvel <- BioPathways.Plot(MarvelObject=marvel,
go.terms=go.terms,
y.label.size=10
)
marvel$DE$BioPathways$Plot
BioPathways
function. Simply specify
the custom set of pathways using the go.terms
option of the
BioPathways.Plot
function.# Parse GTF
marvel <- ParseGTF(MarvelObject=marvel)
# Find PTCs
marvel <- FindPTC(MarvelObject=marvel,
method=c("ad", "dts"),
pval=c(0.10, 0.10),
delta=5
)
method
option. Merge results from ad
and
dts
statistical tests.pval
option. The adjusted p-value, below which, the
splicing event is included for NMD analysis.delta
option. The difference in mean PSI values of
endoderm cells vs iPSCs, above which, the the splicing event is included
for NMD analysis.# Show novel SJ and non-CDS transcripts
marvel <- PropPTC(MarvelObject=marvel,
xlabels.size=8,
show.NovelSJ.NoCDS=TRUE,
prop.test="chisq"
)
marvel$NMD$PTC.Prop$Plot
marvel$NMD$PTC.Prop$Table[1:5, ]
## tran_id
## 1 chr10:78037194:78037304:+@chr10:78040204:78040225:+@chr10:78040615:78040747
## 2 chr10:78037194:78037304:+@chr10:78040204:78040225:+@chr10:78040615:78040747
## 3 chr10:78037194:78037304:+@chr10:78040204:78040225:+@chr10:78040615:78040747
## 4 chr12:56158362:56158711:+@chr12:56159587:56159730:+@chr12:56159975:56160148
## 5 chr8:73026940:73027052:+@chr8:73030336:73030395:+@chr8:73032042:73032106
## gene_id gene_short_name transcript_id aa.length
## 1 ENSG00000138326.20 RPS24 ENST00000465692.2 NA
## 2 ENSG00000138326.20 RPS24 ENST00000482069.5 NA
## 3 ENSG00000138326.20 RPS24 ENST00000613865.5 140
## 4 ENSG00000092841.18 MYL6 ENST00000548580.5 151
## 5 ENSG00000147601.14 TERF1 ENST00000522018.1 NA
## stop.position.aa sj.last.position.aa ptc.sj.last.distance NMD event_type
## 1 NA NA NA No CDS SE
## 2 NA NA NA No CDS SE
## 3 131 138 21 No PTC SE
## 4 -1 143 NA No PTC SE
## 5 NA NA NA No CDS SE
## strand
## 1 +
## 2 +
## 3 +
## 4 +
## 5 +
marvel$NMD$PTC.Prop$Plot.Stats
## event_type Novel SJ No CDS No PTC PTC
## SE SE 6 (3.7%) 94 (57.3%) 42 (25.6%) 22 (13.4%)
## RI RI 22 (7.8%) 158 (56.2%) 14 (5%) 87 (31%)
## A5SS A5SS 3 (6.5%) 21 (45.7%) 12 (26.1%) 10 (21.7%)
## A3SS A3SS 0 (0%) 57 (50.4%) 33 (29.2%) 23 (20.4%)
## pval
## SE 1.40997591213549e-11
## RI
## A5SS
## A3SS
Novel SJ
refers to alternative exons not found in the
GTF provided (novel). Therefore, the effect of these novel alternative
exons on PTC introduction cannot be assessed.No CDS
refers to non-proten-coding isoforms in which a
match is found for the alternative exon. CDS stands for coding
sequence.No PTC
refers to proten-coding isoforms that have no
PTC introduced by the alternative exon.PTC
refers to proten-coding isoforms that have PTC
introduced by the alternative exon.No PTC
and
PTC
) will be brought forward for NMD prediction, let’s
focus on this two categories.# Do not show novel SJ and non-CDS transcripts
marvel <- PropPTC(MarvelObject=marvel,
xlabels.size=8,
show.NovelSJ.NoCDS=FALSE,
prop.test="chisq"
)
marvel$NMD$PTC.Prop$Plot
marvel$NMD$PTC.Prop$Table[1:5, ]
## tran_id
## 3 chr10:78037194:78037304:+@chr10:78040204:78040225:+@chr10:78040615:78040747
## 4 chr12:56158362:56158711:+@chr12:56159587:56159730:+@chr12:56159975:56160148
## 6 chr8:73026940:73027052:+@chr8:73030336:73030395:+@chr8:73032042:73032106
## 7 chr17:32365330:32365487:+@chr17:32366665:32366757:+@chr17:32367772:32368014
## 9 chr12:53467221:53467293:+@chr12:53467805:53467843:+@chr12:53468777:53468832
## gene_id gene_short_name transcript_id aa.length
## 3 ENSG00000138326.20 RPS24 ENST00000613865.5 140
## 4 ENSG00000092841.18 MYL6 ENST00000548580.5 151
## 6 ENSG00000147601.14 TERF1 ENST00000276602.10 439
## 7 ENSG00000010244.18 ZNF207 ENST00000394673.6 494
## 9 ENSG00000197111.15 PCBP2 ENST00000437231.5 331
## stop.position.aa sj.last.position.aa ptc.sj.last.distance NMD event_type
## 3 131 138 21 No PTC SE
## 4 -1 143 NA No PTC SE
## 6 -1 382 NA No PTC SE
## 7 -1 442 NA No PTC SE
## 9 -1 320 NA No PTC SE
## strand
## 3 +
## 4 +
## 6 +
## 7 +
## 9 +
marvel$NMD$PTC.Prop$Plot.Stats
## event_type No PTC PTC pval
## SE SE 42 (65.6%) 22 (34.4%) 4.99818097649527e-12
## RI RI 14 (13.9%) 87 (86.1%)
## A5SS A5SS 12 (54.5%) 10 (45.5%)
## A3SS A3SS 33 (58.9%) 23 (41.1%)
marvel <- CompareExpr(MarvelObject=marvel,
xlabels.size=8
)
marvel$NMD$NMD.Expr$Plot
head(marvel$NMD$NMD.Expr$Table)
## gene_id gene_short_name log2fc event_type NMD
## 1 ENSG00000138326.20 RPS24 -0.1750992 SE FALSE
## 2 ENSG00000092841.18 MYL6 0.1434717 SE FALSE
## 3 ENSG00000147601.14 TERF1 -4.9625414 SE FALSE
## 4 ENSG00000010244.18 ZNF207 -0.5135040 SE FALSE
## 5 ENSG00000197111.15 PCBP2 0.7654579 SE FALSE
## 6 ENSG00000177410.13 ZFAS1 0.8550282 SE FALSE
marvel$NMD$NMD.Expr$Plot.Stats
## event_type nmd.null.cells nmd.cells nmd.null.log2fc.mean nmd.log2fc.mean
## 1 SE 50 10 -0.4208627 -0.3992539
## 2 RI 36 22 -0.2204057 -0.9991883
## 3 A5SS 15 4 -0.9150688 -0.6457604
## 4 A3SS 20 11 -0.8115343 -0.7094088
## p.val
## 1 0.866110652
## 2 0.001074357
## 3 0.410732714
## 4 0.854991268
CompareValues
with the level
option set to gene.spliced
needs to be executed prior to proceeding with volcano plotting below.
Kindly refer to ifferential (spliced) gene analysis
section
of this tutorial.# Volcano plot: Annotate NMD events
marvel <- AnnoVolcanoPlot(MarvelObject=marvel,
anno=FALSE,
xlabel.size=7
)
marvel$NMD$AnnoVolcanoPlot$Plot
# Volcano plot: Annotate candidate NMD genes
results <- marvel$NMD$AnnoVolcanoPlot$Table
index <- which(results$log2fc < 0 & -log10(results$p.val.adj) > 10)
gene_short_names <- results[index, "gene_short_name"]
marvel <- AnnoVolcanoPlot(MarvelObject=marvel,
anno=TRUE,
anno.gene_short_name=gene_short_names,
point.size=1.0,
label.size=2.5,
xlabel.size=7
)
marvel$NMD$AnnoVolcanoPlot$Plot