Detect differentially expressed proteins

Process datasets

We are interested in intermeiate group specific changes, i.e. changes that do not follow the gradient of LP-IP-HP

Process proteomics data

protMat <- assays(protCLL)[["count"]] #without imputation

Get methylation cluster information

designMat <- data.frame(row.names = colnames(protMat),
                        Mclust = factor(patMeta[match(colnames(protMat),patMeta$Patient.ID),]$Methylation_Cluster,
                                        levels = c("IP","LP","HP")))
designMat <- designMat[!$Mclust),,drop=FALSE]
protMat <- protMat[,rownames(designMat)]

How many sample have methylation cluster information

[1] 44

Numbers of samples in each cluster


 8 21 15 

Differential expression using proDA

Fit the probailistic dropout model

fit <- proDA(protMat, design = ~ Mclust,
             col_data = designMat)

Test for differentially expressed proteins

resList <- lapply(c("LP","HP"), function(n) {
  contra <- paste0("Mclust",n)
  resTab <- test_diff(fit, contra) %>%
    dplyr::rename(id = name, logFC = diff, t=t_statistic,
                  P.Value = pval, adj.P.Val = adj_pval) %>% 
    mutate(name = rowData(protCLL[id,])$hgnc_symbol) %>%
    select(name, id, logFC, t, P.Value, adj.P.Val) %>%  
    arrange(P.Value) %>% mutate(Gene = n) %>%
}) %>% bind_rows()

Identifying IP group specific changes (25% FDR)

ipChange <- filter(resList, adj.P.Val <= 0.25) %>%
  select(name,id, logFC, Gene) %>%
  spread(key = Gene, value = logFC) %>%
  filter(HP*LP >0)

How many cases show IP specific changes at 25% FDR?

[1] 7

Plot IP-specific changes

plotTab <- protMat[ipChange$id,] %>%
  data.frame() %>% rownames_to_column("id") %>%
  gather(key = "patID", value = "expr",-id) %>%
  mutate(Mclust = designMat[patID,],
         IGHV = protCLL[,patID]$IGHV.status,
         name = rowData(protCLL[id,])$hgnc_symbol) %>%
  mutate(Mclust = factor(Mclust, c("LP","IP","HP")))

ggplot(plotTab, aes(x=Mclust, y = expr, fill = Mclust)) +
  geom_boxplot() +
  geom_point(aes(col = IGHV)) + facet_wrap(~name, scale = "free") +
  #theme(legend.position = "none") +
  xlab("Methylation Cluster")
Warning: Removed 4 rows containing non-finite values (stat_boxplot).
Warning: Removed 4 rows containing missing values (geom_point).

Version Author Date
b8e0823 Junyan Lu 2020-03-10

Do the RNA expressions of those 7 genes also correlate with methylation cluster?

dds <- dds[rowSums(counts(dds))>0,]
dds$MClust <- patMeta[match(dds$PatID, patMeta$Patient.ID),]$Methylation_Cluster
ddsSub <- dds[rowData(dds)$symbol %in% ipChange$name, dds$diag %in% "CLL" & !$MClust)]
plotTab <- counts(ddsSub, normalized = TRUE) %>% data.frame() %>%
  rownames_to_column("id") %>% gather(key = "patID", value = "count",-id) %>%
  mutate(MClust = ddsSub[,patID]$MClust,
         proteomicSample = ifelse(patID %in% colnames(protCLL),"yes", "no"),
         symbol = rowData(ddsSub[id,])$symbol) %>%
  mutate(MClust = factor(MClust, levels = c("LP","IP","HP")))
ggplot(plotTab, aes(x=MClust, y = count, fill = MClust)) + 
  geom_boxplot(outlier.shape = NA) + ggbeeswarm::geom_beeswarm(aes(col = proteomicSample, alpha= proteomicSample)) +
  scale_y_log10() + facet_wrap(~symbol,scale = "free", ncol =2) +
  scale_color_manual(values = c(yes = "red", no="grey50")) +
  scale_alpha_manual(values = c(yes = 1, no = 0.5)) +
  xlab("Methylation Cluster")

Version Author Date
b8e0823 Junyan Lu 2020-03-10

Only six out of seven genes have RNAseq expression and none of them show similar trend as oberserved in proteomic data

Enrichment analysis

Select proteins with IP specific changes (raw p < 0.05)

protList <- filter(resList, P.Value < 0.05) %>%
  select(name,id, logFC, Gene) %>%
  spread(key = Gene, value = logFC) %>%
  filter(HP*LP >0)

Rank proteins by the difference to HP and LP group

inputTab <- protList %>% mutate(stat = (HP + LP)/2) %>%
  select(name, stat) %>% data.frame() %>% column_to_rownames("name")

Enrichment analysis using PAGE

gmts = list(H= "../data/gmts/h.all.v6.2.symbols.gmt",
            KEGG= "../data/gmts/c2.cp.kegg.v6.2.symbols.gmt")
enRes <- list()
enRes[["HALLMARK"]] <- runGSEA(inputTab, gmts$H, "page")
enRes[["KEGG"]] <- runGSEA(inputTab, gmts$KEGG, "page")
p <- plotEnrichmentBar(enRes, pCut =0.05, ifFDR= FALSE)
Coordinate system already present. Adding new coordinate system, which will replace the existing one.
Coordinate system already present. Adding new coordinate system, which will replace the existing one.

Version Author Date
b8e0823 Junyan Lu 2020-03-10

