Global variables
#set the global ggplot theme
theme_set(theme_bw() + theme(axis.text = element_text(size=12),
axis.title = element_text(size=14),
plot.title = element_text(size = 15, hjust =0.5, face="bold")))
#only use the filtered data for main analysis
load("../../var/CPS1000_mainAnalysis.RData")
#choose the first sample
viabMat <- arrange(pheno1000_main, screenDate) %>%
filter(diagnosis == "CLL") %>%
distinct(patientID, Drug, concIndex, .keep_all = TRUE) %>%
filter(! Drug %in% c("DMSO","PBS")) %>%
mutate(id = paste0(Drug,"_",concIndex)) %>%
select(patientID, id, normVal.adj.sigm) %>%
spread(key = patientID, value = normVal.adj.sigm) %>%
data.frame() %>% column_to_rownames("id") %>%
as.matrix()
# load the most up to date datasets
loadData("ddsrna", "../../var")
## [1] "Loading data ../../var/ddsrna_180717.RData"
#only CLL samples
dds <- dds[,dds$diag %in% "CLL"]
dds <- estimateSizeFactors(dds)
#filter out none protein coding genes
dds<-dds[rowData(dds)$biotype %in% "protein_coding",]
dds <- dds[! rowData(dds)$symbol %in% c("",NA),]
#filter out low count genes
minrs <- 100
rs <- rowSums(counts(dds, normalized = TRUE))
dds<-dds[ rs >= minrs, ]
#variance stabilize the data
#(includes normalizing for library size and dispsersion estimation)
dds<-vst(dds)
exprMat <- assay(dds)
#filter out low variable genes
ntop<-5000
sds <- genefilter::rowSds(exprMat)
exprMat<-exprMat[order(sds,decreasing = T)[1:ntop],]
#scale and center RNAseq data
exprMat <- jyluMisc::mscale(exprMat, center = TRUE, scale = TRUE, useMad = FALSE)
## [1] "Loading data ../../var/patmeta_181120.RData"
Generate object
cpsData <- list(drugs = viabMat,
rna = exprMat,
gene = genData,
meth = methData)
# check dimensionalities, samples are columns, features are rows
lapply(cpsData, dim)
## $drugs
## [1] 315 275
##
## $rna
## [1] 5000 206
##
## $gene
## [1] 110 275
##
## $meth
## [1] 5000 196
# Create MultiAssayExperiment object
mae_cps <- MultiAssayExperiment(
experiments = cpsData
)
# Build the MOFA object
MOFAobject <- createMOFAobject(mae_cps)
## Creating MOFA object from a MultiAssayExperiment object...
MOFAobject
## Untrained MOFA model with the following characteristics:
## Number of views: 4
## View names: drugs rna gene meth
## Number of features per view: 315 5000 110 5000
## Number of samples: 349
Overview of data
plotTilesData(MOFAobject)
Define data options
DataOptions <- getDefaultDataOptions()
DataOptions$scaleViews <- FALSE
Define model options
ModelOptions <- getDefaultModelOptions(MOFAobject)
ModelOptions$numFactors <- 30
ModelOptions$sparsity <- FALSE
ModelOptions
## $likelihood
## drugs rna gene meth
## "gaussian" "gaussian" "bernoulli" "gaussian"
##
## $numFactors
## [1] 30
##
## $sparsity
## [1] FALSE
Define training options
TrainOptions <- getDefaultTrainOptions()
# Automatically drop factors that explain less than 2% of variance in all omics
TrainOptions$DropFactorThreshold <- 0.05
#TrainOptions$seed <- 2018
TrainOptions
## $maxiter
## [1] 5000
##
## $tolerance
## [1] 0.1
##
## $DropFactorThreshold
## [1] 0.05
##
## $verbose
## [1] FALSE
##
## $seed
## NULL
MOFAobject <- prepareMOFA(
MOFAobject,
DataOptions = DataOptions,
ModelOptions = ModelOptions,
TrainOptions = TrainOptions
)
## Checking data options...
## Checking training options...
## Checking model options...
Save mofa input
save(MOFAobject, file = "mofaIn_noSparsity.RData")
Run mofa and save results
MOFAobject <- runMOFA(MOFAobject)
save(MOFAobject, file = "mofaRes.RData")
Load MOFA model
load("./mofaRes.RData")
# Calculate the variance explained (R2) per factor in each view
r2 <- calculateVarianceExplained(MOFAobject)
r2$R2Total
## drugs gene meth rna
## 0.2921765 0.2260048 0.2770163 0.3658035
# Variance explained by each factor in each view
head(r2$R2PerFactor)
## drugs gene meth rna
## LF1 1.004250e-01 1.769737e-01 1.867190e-01 0.08157859
## LF2 4.367826e-02 4.899725e-02 6.867120e-03 0.05042249
## LF3 1.186995e-01 7.417882e-06 6.575994e-03 0.01544383
## LF4 2.661160e-02 8.728655e-06 1.681635e-02 0.02560368
## LF5 6.861238e-07 5.402758e-07 1.730650e-04 0.06527149
## LF6 8.287495e-04 1.343789e-06 8.027143e-05 0.06065077
# Plot it
plotVarianceExplained(MOFAobject)
Get weights and factors
allWeights <- getWeights(MOFAobject,
views = "all",
factors = "all",
as.data.frame = TRUE) %>% as.tibble()
allFactors <- getFactors(
MOFAobject,
factors = "all",
as.data.frame = TRUE
) %>% as.tibble()
allFactors <- left_join(allFactors,
select(patMeta, Patient.ID, IGHV.status, trisomy12),
by = c(sample = "Patient.ID"))
## Warning: Column `sample`/`Patient.ID` joining factor and character vector,
## coercing into character vector
Plot the separation of the samples by the first two factor
plotTab <- filter(allFactors, factor %in% c("LF1","LF2")) %>%
spread(key =factor, value = value) %>% mutate(trisomy12 = factor(trisomy12)) %>%
filter(!is.na(IGHV.status), !is.na(trisomy12))
p <- ggplot(plotTab, aes(x=LF1, y=LF2, color = trisomy12,
shape = IGHV.status, label = sample)) +
geom_point() +
scale_shape_manual(values = c(M = 16, U =1))
p
#ggplotly(p)
Load survival data
load("../../var/survival_CPS1000.RData")
#first sample of each patient
survT <- arrange(survT, sampleID) %>%
distinct(patientID, .keep_all = TRUE)
function for cox regression
com <- function(response, time, endpoint, scale =FALSE) {
if (scale) {
#calculate z-score
response <- (response - mean(response, na.rm = TRUE))/sd(response, na.rm=TRUE)
}
surv <- coxph(Surv(time, endpoint) ~ response)
tibble(p = summary(surv)[[7]][,5],
HR = summary(surv)[[7]][,2],
lower = summary(surv)[[8]][,3],
higher = summary(surv)[[8]][,4])
}
Cox regression test
testTab <- left_join(allFactors, survT, by = c(sample = "patientID"))
#for OS
resOS <- filter(testTab, !is.na(OS)) %>%
group_by(factor) %>%
do(com(.$value, .$OS, .$died, TRUE)) %>% ungroup() %>%
arrange(p) %>% mutate(p.adj = p.adjust(p, method = "BH")) %>%
mutate(Endpoint = "OS")
resOS
## # A tibble: 10 x 7
## factor p HR lower higher p.adj Endpoint
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 LF1 0.0000103 2.15 1.53 3.02 0.000103 OS
## 2 LF7 0.000312 0.601 0.456 0.793 0.00156 OS
## 3 LF9 0.00409 1.40 1.11 1.76 0.0136 OS
## 4 LF10 0.0751 1.39 0.967 2.00 0.188 OS
## 5 LF2 0.175 0.758 0.507 1.13 0.318 OS
## 6 LF3 0.191 0.802 0.577 1.12 0.318 OS
## 7 LF4 0.552 0.900 0.637 1.27 0.655 OS
## 8 LF8 0.586 1.10 0.784 1.54 0.655 OS
## 9 LF5 0.589 0.909 0.643 1.29 0.655 OS
## 10 LF6 0.786 0.951 0.661 1.37 0.786 OS
#for TTT
resTTT <- filter(testTab, !is.na(TTT)) %>%
group_by(factor) %>%
do(com(.$value, .$TTT, .$treatedAfter, TRUE)) %>% ungroup() %>%
arrange(p) %>% mutate(p.adj = p.adjust(p, method = "BH")) %>%
mutate(Endpoint = "TTT")
resTTT
## # A tibble: 10 x 7
## factor p HR lower higher p.adj Endpoint
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 LF7 7.45e-12 0.424 0.331 0.542 7.45e-11 TTT
## 2 LF1 1.77e- 8 2.11 1.63 2.73 8.84e- 8 TTT
## 3 LF4 5.53e- 3 1.33 1.09 1.62 1.84e- 2 TTT
## 4 LF3 2.69e- 2 0.751 0.583 0.968 6.73e- 2 TTT
## 5 LF9 3.90e- 2 1.31 1.01 1.69 7.81e- 2 TTT
## 6 LF8 7.29e- 2 0.784 0.602 1.02 1.22e- 1 TTT
## 7 LF2 1.05e- 1 0.769 0.560 1.06 1.49e- 1 TTT
## 8 LF6 1.40e- 1 1.22 0.937 1.58 1.75e- 1 TTT
## 9 LF5 3.95e- 1 1.13 0.852 1.50 4.38e- 1 TTT
## 10 LF10 8.34e- 1 1.03 0.768 1.39 8.34e- 1 TTT
Plot p value and hazard ratio
plotTab <- bind_rows(resOS, resTTT) %>%
filter(factor %in% unique(filter(.,p.adj < 0.05)$factor))
ggplot(plotTab, aes(x=factor, y = HR, col = Endpoint, dodge = Endpoint)) +
geom_hline(yintercept = 1, linetype = "dotted") +
geom_point(position = position_dodge(width=0.8)) +
geom_errorbar(position = position_dodge(width =0.8),
aes(ymin = lower, ymax = higher), width = 0.3) +
geom_text(position = position_dodge(width =0.8),
aes(y = higher + 0.5, label = sprintf("p=%1.2f",p))) +
ylim(-0.5,4) +
coord_flip() + theme_bw()
Function for KM plot
km <- function(response, time, endpoint, titlePlot = "KM plot", pval = NULL, stat = "maxstat") {
#function for km plot
survS <- data.frame(time = time,
endpoint = endpoint)
if (stat == "maxstat") {
ms <- maxstat.test(Surv(time, endpoint) ~ response,
data = survS,
smethod = "LogRank",
minprop = 0.2,
maxprop = 0.8,
alpha = NULL)
survS$group <- factor(ifelse(response >= ms$estimate, "high", "low"))
} else if (stat == "median") {
med <- median(response, na.rm = TRUE)
survS$group <- factor(ifelse(response >= med, "high", "low"))
} else if (stat == "binary") {
survS$group <- factor(response)
}
if (is.null(pval)) pval = TRUE
p <- ggsurvplot(survfit(Surv(time, endpoint) ~ group, data = survS),
data = survS, pval = TRUE, conf.int = FALSE, palette = "Set1",
ylab = "Fraction", xlab = "Time (years)", title = titlePlot,
ggtheme = theme_bw() + theme(plot.title = element_text(hjust =0.5)))$plot
p
}
KM plot for overall survival
facList <- c("LF1","LF4","LF7","LF9")
plotList <- lapply(facList, function(x) {
eachTab <- filter(testTab, factor == x) %>%
select(value, OS, died) %>% filter(!is.na(OS))
km(eachTab$value, eachTab$OS, eachTab$died, sprintf("%s VS Overall Survival", x), stat = "maxstat")
})
grid.arrange(grobs = plotList, ncol = 2)
KM plot for TTT
facList <- c("LF1","LF4","LF7","LF9")
plotList <- lapply(facList, function(x) {
eachTab <- filter(testTab, factor == x) %>%
select(value, TTT, treatedAfter) %>% filter(!is.na(TTT))
km(eachTab$value, eachTab$TTT, eachTab$treatedAfter, sprintf("%s VS Time to next treatment", x), stat = "maxstat")
})
grid.arrange(grobs = plotList, ncol = 2)
plotTopWeights(MOFAobject, "gene", 7)
Function to plot customized MOFA examine heatmap
plotMofaHeatmap <- function(mofaObj, view, factor, features, geneAnno = NULL, censor = c(-5,5)) {
# get plot features
weightTab <- getWeights(mofaObj, views = view, factors = factor)
seleFeatures <- rownames(weightTab[[1]])[order(abs(weightTab[[1]][,1]), decreasing = TRUE)[seq(1,features)]]
#get original data matrix for plot
plotMat <- getTrainData(
mofaObj,
as.data.frame = FALSE,
views = view)
plotMat <- plotMat[[1]][seleFeatures,]
plotMat <- plotMat[,complete.cases(t(plotMat))]
if (!is.null(censor)) {
plotMat[plotMat > censor[2]] <- censor[2]
plotMat[plotMat < censor[1]] <- censor[1]
}
#get factor annotation
facAnno <- getFactors(
mofaObj,
factors = factor)
facAnno <- data.frame(facAnno[colnames(plotMat),,drop=FALSE])
plotMat <- plotMat[,order(facAnno[,1])]
if (!is.null(geneAnno)) {
facAnno <- cbind(facAnno, geneAnno[match(rownames(facAnno), rownames(geneAnno)),,drop=FALSE])
}
pheatmap(plotMat, cluster_cols = FALSE,
annotation_col = facAnno, labels_col = "")
}
Plot heatmap
p53Anno <- data.frame(row.names = patMeta$Patient.ID,
IGHV = patMeta$IGHV.status,
TP53 = as.factor(patMeta$TP53),
treatment = patMeta$treatment)
plotMofaHeatmap(MOFAobject, "drugs", 7, 20, geneAnno = p53Anno, censor = c(-4,4))
Gene enrichment analysis
inputTab <- getWeights(MOFAobject, views = "rna", factor = 7, as.data.frame = TRUE) %>%
mutate(symbol = rowData(dds)[match(feature, rownames(dds)),]$symbol) %>%
filter(!is.na(symbol)) %>%
arrange(desc(abs(value))) %>% distinct(symbol, .keep_all = TRUE) %>%
dplyr::rename(stat = value) %>% select(symbol, stat) %>%
data.frame() %>% column_to_rownames("symbol")
enRes <- list()
enRes[["Hallmark"]] <- runGSEA(inputTab, gmtFile = "../../data/commonFiles/h.all.v5.1.symbols_customized.gmt", GSAmethod = "page")
## Loading required package: piano
## Warning: package 'piano' was built under R version 3.5.1
enRes[["KEGG"]] <- runGSEA(inputTab, gmtFile = "../../data/commonFiles/c2.cp.kegg.v5.1.symbols.gmt", GSAmethod = "page")
p <- plotEnrichmentBar(enRes, pCut = 0.01)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
plot(p)
plotTopWeights(MOFAobject, "gene",4)
Plot heatmap
geneAnno <- data.frame(row.names = patMeta$Patient.ID, SF3B1 = as.factor(patMeta$SF3B1),
treatment = patMeta$treatment)
plotMofaHeatmap(MOFAobject, "drugs", 4, 20, geneAnno = geneAnno)
Gene enrichment analysis
inputTab <- getWeights(MOFAobject, views = "rna", factor =4, as.data.frame = TRUE) %>%
mutate(symbol = rowData(dds)[match(feature, rownames(dds)),]$symbol) %>%
filter(!is.na(symbol)) %>%
arrange(desc(abs(value))) %>% distinct(symbol, .keep_all = TRUE) %>%
dplyr::rename(stat = value) %>% select(symbol, stat) %>%
data.frame() %>% column_to_rownames("symbol")
enRes <- list()
enRes[["Hallmark"]] <- runGSEA(inputTab, gmtFile = "../../data/commonFiles/h.all.v5.1.symbols_customized.gmt", GSAmethod = "page")
enRes[["KEGG"]] <- runGSEA(inputTab, gmtFile = "../../data/commonFiles/c2.cp.kegg.v5.1.symbols.gmt", GSAmethod = "page")
p <- plotEnrichmentBar(enRes, pCut = 0.01)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
plot(p)
plotTopWeights(MOFAobject, "gene", 9)
Plot heatmap
geneAnno <- data.frame(row.names = patMeta$Patient.ID,
IGHV = as.factor(patMeta$IGHV.status),
del11q = as.factor(patMeta$del11q),
treatment = patMeta$treatment)
plotMofaHeatmap(MOFAobject, "drugs", 9, 20, geneAnno = geneAnno)
Gene enrichment analysis
inputTab <- getWeights(MOFAobject, views = "rna", factor = 9, as.data.frame = TRUE) %>%
mutate(symbol = rowData(dds)[match(feature, rownames(dds)),]$symbol) %>%
filter(!is.na(symbol)) %>%
arrange(desc(abs(value))) %>% distinct(symbol, .keep_all = TRUE) %>%
dplyr::rename(stat = value) %>% select(symbol, stat) %>%
data.frame() %>% column_to_rownames("symbol")
enRes <- list()
enRes[["Hallmark"]] <- runGSEA(inputTab, gmtFile = "../../data/commonFiles/h.all.v5.1.symbols_customized.gmt", GSAmethod = "page")
enRes[["KEGG"]] <- runGSEA(inputTab, gmtFile = "../../data/commonFiles/c2.cp.kegg.v5.1.symbols.gmt", GSAmethod = "page")
p <- plotEnrichmentBar(enRes, pCut = 0.01)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
plot(p)
clusters <- clusterSamples(MOFAobject,
k=4,
factors=c(1,4,7,9))
lfMat <- getFactors(MOFAobject, factors = c(1,4,7,9))
colAnno <- data.frame(row.names = names(clusters),
clusters = factor(clusters),
TP53 = patMeta[match(names(clusters), patMeta$Patient.ID),]$TP53,
IGHV = patMeta[match(names(clusters), patMeta$Patient.ID),]$IGHV.status,
SF3B1 = patMeta[match(names(clusters), patMeta$Patient.ID),]$SF3B1,
trisomy12 = patMeta[match(names(clusters), patMeta$Patient.ID),]$trisomy12)
#using ggplot color for group annotation
gg_color_hue <- function(n) {
hues = seq(15, 375, length = n + 1)
hcl(h = hues, l = 65, c = 100)[1:n]
}
#reorder by cluster
plotMat <- t(lfMat)
plotMat <- plotMat[,rownames(colAnno[order(colAnno$clusters),])]
annoColor <- list(clusters = structure(RColorBrewer::brewer.pal(4, "Set1"), names = seq(4)))
pheatmap(plotMat, clustering_method = "complete", annotation_col = colAnno,
cluster_rows = FALSE, cluster_cols = FALSE,
show_colnames = FALSE, annotation_colors = annoColor)
Survial analysis using clusters from MOFA
testTab <- survT %>% mutate(cluster = factor(clusters[patientID]))
#OS
com(testTab$cluster, testTab$OS, testTab$died, FALSE)
## # A tibble: 3 x 4
## p HR lower higher
## <dbl> <dbl> <dbl> <dbl>
## 1 0.0844 0.456 0.187 1.11
## 2 0.0000844 0.0842 0.0245 0.289
## 3 0.591 1.23 0.579 2.61
#TTT
com(testTab$cluster, testTab$TTT, testTab$treatedAfter, FALSE)
## # A tibble: 3 x 4
## p HR lower higher
## <dbl> <dbl> <dbl> <dbl>
## 1 0.710 0.886 0.469 1.68
## 2 0.000000435 0.130 0.0592 0.287
## 3 0.218 1.59 0.760 3.32
km(testTab$cluster, testTab$TTT, testTab$treatedAfter,
sprintf("%s VS Time to next treatment", "cluster"), stat = "binary")
km(testTab$cluster, testTab$OS, testTab$died, sprintf("%s VS Overall survival", "cluster"), stat = "binary")