Last updated: 2022-11-17

Load packages and dataset

Load processed data




Use different normalization method

#variance stabilizing normalization
ppe.vst <- preprocessPhos(maeData, missCut = 0.5, transform = "vst")
[1] "Number of proteins and samples:"
[1] 3787   96
#log2 + median normalization
ppe.log2Med <- preprocessPhos(maeData, missCut = 0.5, transform = "log2", normalize = TRUE)
[1] "Number of proteins and samples:"
[1] 3787   96
#only log2 transformation
ppe.log2Only <- preprocessPhos(maeData, missCut = 0.5, transform ="log2", normalize = FALSE)
[1] "Number of proteins and samples:"
[1] 3787   96
#log2 transformation + normalization based on precursur quantity
ppe.pre <- preprocessPhos(maeData, missCut = 0.5, transform ="log2", normalize = TRUE, usePrecursor = TRUE)
[1] "Number of proteins and samples:"
[1] 3787   96

Mean SD plots





log2 only


#### Use precursor


Distribution after normalization


countMat <- assay(ppe.vst)
annoTab <- colData(ppe.vst)[,c("sample","time","drug")] %>% as_tibble()
countTab <- countMat %>% as_tibble(rownames = "id") %>% 
    pivot_longer(-id) %>%
    filter(! %>%
    left_join(annoTab, by = c(name = "sample"))
ggplot(countTab, aes(x=name, y=value)) +
    geom_boxplot(aes(fill = time)) +
    facet_wrap(~drug, scales = "free_x") +
    theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))


countMat <- assay(ppe.log2Med)
annoTab <- colData(ppe.log2Med)[,c("sample","time","drug")] %>% as_tibble()
countTab <- countMat %>% as_tibble(rownames = "id") %>% 
    pivot_longer(-id) %>%
    filter(! %>%
    left_join(annoTab, by = c(name = "sample"))
ggplot(countTab, aes(x=name, y=value)) +
    geom_boxplot(aes(fill = time)) +
    facet_wrap(~drug, scales = "free_x") +
    theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))

#### only transformation

countMat <- assay(ppe.log2Only)
annoTab <- colData(ppe.log2Only)[,c("sample","time","drug")] %>% as_tibble()
countTab <- countMat %>% as_tibble(rownames = "id") %>% 
    pivot_longer(-id) %>%
    filter(! %>%
    left_join(annoTab, by = c(name = "sample"))
ggplot(countTab, aes(x=name, y=value)) +
    geom_boxplot(aes(fill = time)) +
    facet_wrap(~drug, scales = "free_x") +
    theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))

use precursor

countMat <- assay(ppe.pre)
annoTab <- colData(ppe.pre)[,c("sample","time","drug")] %>% as_tibble()
countTab <- countMat %>% as_tibble(rownames = "id") %>% 
    pivot_longer(-id) %>%
    filter(! %>%
    left_join(annoTab, by = c(name = "sample"))
ggplot(countTab, aes(x=name, y=value)) +
    geom_boxplot(aes(fill = time)) +
    facet_wrap(~drug, scales = "free_x") +
    theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))



exprMat <- assays(ppe.vst)[["imputed"]]
sds <- genefilter::rowSds(exprMat)
exprMat <- exprMat[order(sds, decreasing = TRUE)[1:2000],]

smpAnno <- colData(ppe.vst) %>%
    as_tibble(rownames = "id")

pcRes <- prcomp(t(exprMat), scale. = TRUE, center = TRUE)
pcTab <- pcRes$x[,1:10] %>% 
    as_tibble(rownames = "id") %>%

PC1 versus PC2

ggplot(pcTab, aes(x=PC1, y=PC2)) +
    geom_point(aes(col = drug, size = factor(time),
               shape = replicate)) +
    ggrepel::geom_text_repel(aes(label = id)) +

PC3 versus PC4

ggplot(pcTab, aes(x=PC3, y=PC4)) +
        geom_point(aes(col = drug, size = factor(time),
               shape = replicate)) +
    ggrepel::geom_text_repel(aes(label = id)) +


exprMat <- assays(ppe.log2Med)[["imputed"]]
sds <- genefilter::rowSds(exprMat)
exprMat <- exprMat[order(sds, decreasing = TRUE)[1:2000],]

smpAnno <- colData(ppe.log2Med) %>%
    as_tibble(rownames = "id")

pcRes <- prcomp(t(exprMat), scale. = TRUE, center = TRUE)
pcTab <- pcRes$x[,1:10] %>% 
    as_tibble(rownames = "id") %>%

PC1 versus PC2

ggplot(pcTab, aes(x=PC1, y=PC2)) +
    geom_point(aes(col = drug, size = factor(time),
               shape = replicate)) +
    ggrepel::geom_text_repel(aes(label = id)) +

PC3 versus PC4

ggplot(pcTab, aes(x=PC3, y=PC4)) +
        geom_point(aes(col = drug, size = factor(time),
               shape = replicate)) +
    ggrepel::geom_text_repel(aes(label = id)) +

log2 only

exprMat <- assays(ppe.log2Only)[["imputed"]]
sds <- genefilter::rowSds(exprMat)
exprMat <- exprMat[order(sds, decreasing = TRUE)[1:2000],]

smpAnno <- colData(ppe.log2Only) %>%
    as_tibble(rownames = "id")

pcRes <- prcomp(t(exprMat), scale. = TRUE, center = TRUE)
pcTab <- pcRes$x[,1:10] %>% 
    as_tibble(rownames = "id") %>%

PC1 versus PC2

ggplot(pcTab, aes(x=PC1, y=PC2)) +
    geom_point(aes(col = drug, size = factor(time),
               shape = replicate)) +
    ggrepel::geom_text_repel(aes(label = id)) +

PC3 versus PC4

ggplot(pcTab, aes(x=PC3, y=PC4)) +
        geom_point(aes(col = drug, size = factor(time),
               shape = replicate)) +
    ggrepel::geom_text_repel(aes(label = id)) +

Use precursor

exprMat <- assays(ppe.pre)[["imputed"]]
sds <- genefilter::rowSds(exprMat)
exprMat <- exprMat[order(sds, decreasing = TRUE)[1:2000],]

smpAnno <- colData(ppe.pre) %>%
    as_tibble(rownames = "id")

pcRes <- prcomp(t(exprMat), scale. = TRUE, center = TRUE)
pcTab <- pcRes$x[,1:10] %>% 
    as_tibble(rownames = "id") %>%

PC1 versus PC2

ggplot(pcTab, aes(x=PC1, y=PC2)) +
    geom_point(aes(col = drug, size = factor(time),
               shape = replicate)) +
    ggrepel::geom_text_repel(aes(label = id)) +

PC3 versus PC4

ggplot(pcTab, aes(x=PC3, y=PC4)) +
        geom_point(aes(col = drug, size = factor(time),
               shape = replicate)) +
    ggrepel::geom_text_repel(aes(label = id)) +

Hierarchical clustering


colAnno <- colData(ppe.vst)[,c("cellLine","drug","time")] %>% data.frame()
exprMat.scaled <- jyluMisc::mscale(exprMat, center = TRUE, scale = TRUE, censor = 5)
pheatmap::pheatmap(exprMat.scaled, annotation_col = colAnno, clustering_method = "ward.D2",
                   color = colorRampPalette(c("blue","white","red"))(100), 
                   breaks = seq(-5,5, length.out = 101), 
                   show_rownames = FALSE)


colAnno <- colData(ppe.log2Med)[,c("cellLine","drug","time")] %>% data.frame()
exprMat.scaled <- jyluMisc::mscale(exprMat, center = TRUE, scale = TRUE, censor = 5)
pheatmap::pheatmap(exprMat.scaled, annotation_col = colAnno, clustering_method = "ward.D2",
                   color = colorRampPalette(c("blue","white","red"))(100), 
                   breaks = seq(-5,5, length.out = 101), 
                   show_rownames = FALSE)

log2 only

colAnno <- colData(ppe.log2Only)[,c("cellLine","drug","time")] %>% data.frame()
exprMat.scaled <- jyluMisc::mscale(exprMat, center = TRUE, scale = TRUE, censor = 5)
pheatmap::pheatmap(exprMat.scaled, annotation_col = colAnno, clustering_method = "ward.D2",
                   color = colorRampPalette(c("blue","white","red"))(100), 
                   breaks = seq(-5,5, length.out = 101), 
                   show_rownames = FALSE)


colAnno <- colData(ppe.pre)[,c("cellLine","drug","time")] %>% data.frame()
exprMat.scaled <- jyluMisc::mscale(exprMat, center = TRUE, scale = TRUE, censor = 5)
pheatmap::pheatmap(exprMat.scaled, annotation_col = colAnno, clustering_method = "ward.D2",
                   color = colorRampPalette(c("blue","white","red"))(100), 
                   breaks = seq(-5,5, length.out = 101), 
                   show_rownames = FALSE)

Remove one potential outlier

maeSub <- maeData[, maeData$sample != "cell5_combo_24h_Rep2"]

#variance stabilizing normalization
ppe.vst <- preprocessPhos(maeSub, missCut = 0.5, transform = "vst")
[1] "Number of proteins and samples:"
[1] 3879   95
#log2 + median normalization
ppe.log2Med <- preprocessPhos(maeSub, missCut = 0.5, transform = "log2", normalize = TRUE)
[1] "Number of proteins and samples:"
[1] 3879   95
#only log2 transformation
ppe.log2Only <- preprocessPhos(maeSub, missCut = 0.5, transform ="log2", normalize = FALSE)
[1] "Number of proteins and samples:"
[1] 3879   95
#log2 transformation + normalization based on precursur quantity
ppe.pre <- preprocessPhos(maeData, missCut = 0.5, transform ="log2", normalize = TRUE, usePrecursor = TRUE)
[1] "Number of proteins and samples:"
[1] 3787   96



exprMat <- assays(ppe.vst)[["imputed"]]
sds <- genefilter::rowSds(exprMat)
exprMat <- exprMat[order(sds, decreasing = TRUE)[1:2000],]

smpAnno <- colData(ppe.vst) %>%
    as_tibble(rownames = "id")

pcRes <- prcomp(t(exprMat), scale. = TRUE, center = TRUE)
pcTab <- pcRes$x[,1:10] %>% 
    as_tibble(rownames = "id") %>%

PC1 versus PC2

ggplot(pcTab, aes(x=PC1, y=PC2)) +
    geom_point(aes(col = drug, size = factor(time),
               shape = replicate)) +
    ggrepel::geom_text_repel(aes(label = id)) +

PC3 versus PC4

ggplot(pcTab, aes(x=PC3, y=PC4)) +
        geom_point(aes(col = drug, size = factor(time),
               shape = replicate)) +
    ggrepel::geom_text_repel(aes(label = id)) +


exprMat <- assays(ppe.log2Med)[["imputed"]]
sds <- genefilter::rowSds(exprMat)
exprMat <- exprMat[order(sds, decreasing = TRUE)[1:2000],]

smpAnno <- colData(ppe.log2Med) %>%
    as_tibble(rownames = "id")

pcRes <- prcomp(t(exprMat), scale. = TRUE, center = TRUE)
pcTab <- pcRes$x[,1:10] %>% 
    as_tibble(rownames = "id") %>%

PC1 versus PC2

ggplot(pcTab, aes(x=PC1, y=PC2)) +
    geom_point(aes(col = drug, size = factor(time),
               shape = replicate)) +
    ggrepel::geom_text_repel(aes(label = id)) +

PC3 versus PC4

ggplot(pcTab, aes(x=PC3, y=PC4)) +
        geom_point(aes(col = drug, size = factor(time),
               shape = replicate)) +
    ggrepel::geom_text_repel(aes(label = id)) +

log2 only

exprMat <- assays(ppe.log2Only)[["imputed"]]
sds <- genefilter::rowSds(exprMat)
exprMat <- exprMat[order(sds, decreasing = TRUE)[1:2000],]

smpAnno <- colData(ppe.log2Only) %>%
    as_tibble(rownames = "id")

pcRes <- prcomp(t(exprMat), scale. = TRUE, center = TRUE)
pcTab <- pcRes$x[,1:10] %>% 
    as_tibble(rownames = "id") %>%

PC1 versus PC2

ggplot(pcTab, aes(x=PC1, y=PC2)) +
    geom_point(aes(col = drug, size = factor(time),
               shape = replicate)) +
    ggrepel::geom_text_repel(aes(label = id)) +

PC3 versus PC4

ggplot(pcTab, aes(x=PC3, y=PC4)) +
        geom_point(aes(col = drug, size = factor(time),
               shape = replicate)) +
    ggrepel::geom_text_repel(aes(label = id)) +

Use precursor

exprMat <- assays(ppe.pre)[["imputed"]]
sds <- genefilter::rowSds(exprMat)
exprMat <- exprMat[order(sds, decreasing = TRUE)[1:2000],]

smpAnno <- colData(ppe.pre) %>%
    as_tibble(rownames = "id")

pcRes <- prcomp(t(exprMat), scale. = TRUE, center = TRUE)
pcTab <- pcRes$x[,1:10] %>% 
    as_tibble(rownames = "id") %>%

PC1 versus PC2

ggplot(pcTab, aes(x=PC1, y=PC2)) +
    geom_point(aes(col = drug, size = factor(time),
               shape = replicate)) +
    ggrepel::geom_text_repel(aes(label = id)) +

PC3 versus PC4

ggplot(pcTab, aes(x=PC3, y=PC4)) +
        geom_point(aes(col = drug, size = factor(time),
               shape = replicate)) +
    ggrepel::geom_text_repel(aes(label = id)) +

Hierarchical clustering


colAnno <- colData(ppe.vst)[,c("cellLine","drug","time")] %>% data.frame()
exprMat.scaled <- jyluMisc::mscale(exprMat, center = TRUE, scale = TRUE, censor = 5)
pheatmap::pheatmap(exprMat.scaled, annotation_col = colAnno, clustering_method = "ward.D2",
                   color = colorRampPalette(c("blue","white","red"))(100), 
                   breaks = seq(-5,5, length.out = 101), 
                   show_rownames = FALSE)


colAnno <- colData(ppe.log2Med)[,c("cellLine","drug","time")] %>% data.frame()
exprMat.scaled <- jyluMisc::mscale(exprMat, center = TRUE, scale = TRUE, censor = 5)
pheatmap::pheatmap(exprMat.scaled, annotation_col = colAnno, clustering_method = "ward.D2",
                   color = colorRampPalette(c("blue","white","red"))(100), 
                   breaks = seq(-5,5, length.out = 101), 
                   show_rownames = FALSE)

log2 only

colAnno <- colData(ppe.log2Only)[,c("cellLine","drug","time")] %>% data.frame()
exprMat.scaled <- jyluMisc::mscale(exprMat, center = TRUE, scale = TRUE, censor = 5)
pheatmap::pheatmap(exprMat.scaled, annotation_col = colAnno, clustering_method = "ward.D2",
                   color = colorRampPalette(c("blue","white","red"))(100), 
                   breaks = seq(-5,5, length.out = 101), 
                   show_rownames = FALSE)


colAnno <- colData(ppe.pre)[,c("cellLine","drug","time")] %>% data.frame()
exprMat.scaled <- jyluMisc::mscale(exprMat, center = TRUE, scale = TRUE, censor = 5)
pheatmap::pheatmap(exprMat.scaled, annotation_col = colAnno, clustering_method = "ward.D2",
                   color = colorRampPalette(c("blue","white","red"))(100), 
                   breaks = seq(-5,5, length.out = 101), 
                   show_rownames = FALSE)

Compare the reproducibiliy of replicates when using different normalization methods

Function to calculate reproducibility of replicates

getSdTab <- function(x) {
  sd <- jyluMisc::sumToTidy(x) %>%
  group_by(sampleCondi, rowID) %>%
  summarise(sdVal = sd(Intensity,na.rm=TRUE), meanVal = mean(Intensity,na.rm=TRUE)) %>%
  filter(! %>% mutate(meanRnk = order(meanVal))
sdTab.vst <- getSdTab(ppe.vst) %>% mutate(norm = "vst")
sdTab.log2Med <- getSdTab(ppe.log2Med) %>% mutate(norm = "log2Med")
sdTab.log2Only <- getSdTab(ppe.log2Only) %>% mutate(norm = "log2Only")
sdTab.pre <- getSdTab(ppe.pre) %>% mutate(norm = "precursor")
sumTab <- bind_rows(sdTab.vst, sdTab.log2Med, sdTab.log2Only, sdTab.pre) 

Add annotations

colTab <- colData(ppe.vst) %>% as_tibble() %>%
  distinct(sampleCondi,.keep_all = TRUE)
sumTab <- left_join(sumTab, colTab)

Overall distribution

ggplot(sumTab, aes(x=sdVal, fill = norm)) +
  geom_histogram(position = "identity", alpha=0.5, color = "grey50") +

Per sample

ggplot(sumTab, aes(x=sampleCondi, y = sdVal, fill = norm)) +
  geom_violin() +
  facet_wrap(~drug, scale="free_x")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))

Mean versus SD

ggplot(sumTab, aes(x=meanVal, y=sdVal)) +
  geom_hex() + geom_smooth()  +
  facet_wrap(~norm, ncol=1)

Rank of sd for each normalization method

ordTab <- group_by(sumTab, sampleCondi, rowID) %>%
  mutate(index = order(sdVal))
ggplot(ordTab, aes(x=index, fill = norm)) +
  geom_bar(position = "dodge", alpha=0.5, color = "grey50") 

For each sample, which normalization method gives the best reproduciblity for replicates

ordPerTab <- arrange(ordTab, index) %>% distinct(sampleCondi, drug,.keep_all = TRUE) %>%
  group_by(sampleCondi, norm, drug) %>% summarise(n=length(rowID))
ggplot(ordPerTab, aes(x=sampleCondi, y = n, fill = norm)) +
  facet_wrap(~drug, scale = "free_x") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))

It seems log2 + median scaling can get the best reproducibility among replicates. No normalization is the worst.



Use different normalization method

#variance stabilizing normalization
ppe.vst <- preprocessProteome(maeData, missCut = 0.5, transform = "vst")
[1] "Number of proteins and samples:"
[1] 7608   96
#log2 + median normalization
ppe.log2Med <- preprocessProteome(maeData, missCut = 0.5, transform = "log2", normalize = TRUE)
[1] "Number of proteins and samples:"
[1] 7608   96
#only log2 transformation
ppe.log2Only <- preprocessProteome(maeData, missCut = 0.5, transform ="log2", normalize = FALSE)
[1] "Number of proteins and samples:"
[1] 7608   96
#log2 transformation + normalization based on precursur quantity
ppe.pre <- preprocessProteome(maeData, missCut = 0.5, transform ="log2", normalize = TRUE, usePrecursor = TRUE)
[1] "Number of proteins and samples:"
[1] 7608   96

Mean SD plots





log2 only


#### Use precursor


Distribution after normalization


countMat <- assay(ppe.vst)
annoTab <- colData(ppe.vst)[,c("sample","time","drug")] %>% as_tibble()
countTab <- countMat %>% as_tibble(rownames = "id") %>% 
    pivot_longer(-id) %>%
    filter(! %>%
    left_join(annoTab, by = c(name = "sample"))
ggplot(countTab, aes(x=name, y=value)) +
    geom_boxplot(aes(fill = time)) +
    facet_wrap(~drug, scales = "free_x") +
    theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))


countMat <- assay(ppe.log2Med)
annoTab <- colData(ppe.log2Med)[,c("sample","time","drug")] %>% as_tibble()
countTab <- countMat %>% as_tibble(rownames = "id") %>% 
    pivot_longer(-id) %>%
    filter(! %>%
    left_join(annoTab, by = c(name = "sample"))
ggplot(countTab, aes(x=name, y=value)) +
    geom_boxplot(aes(fill = time)) +
    facet_wrap(~drug, scales = "free_x") +
    theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))

#### only transformation

countMat <- assay(ppe.log2Only)
annoTab <- colData(ppe.log2Only)[,c("sample","time","drug")] %>% as_tibble()
countTab <- countMat %>% as_tibble(rownames = "id") %>% 
    pivot_longer(-id) %>%
    filter(! %>%
    left_join(annoTab, by = c(name = "sample"))
ggplot(countTab, aes(x=name, y=value)) +
    geom_boxplot(aes(fill = time)) +
    facet_wrap(~drug, scales = "free_x") +
    theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))

use precursor

countMat <- assay(ppe.pre)
annoTab <- colData(ppe.pre)[,c("sample","time","drug")] %>% as_tibble()
countTab <- countMat %>% as_tibble(rownames = "id") %>% 
    pivot_longer(-id) %>%
    filter(! %>%
    left_join(annoTab, by = c(name = "sample"))
ggplot(countTab, aes(x=name, y=value)) +
    geom_boxplot(aes(fill = time)) +
    facet_wrap(~drug, scales = "free_x") +
    theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))



exprMat <- assays(ppe.vst)[["imputed"]]
sds <- genefilter::rowSds(exprMat)
exprMat <- exprMat[order(sds, decreasing = TRUE)[1:2000],]

smpAnno <- colData(ppe.vst) %>%
    as_tibble(rownames = "id")

pcRes <- prcomp(t(exprMat), scale. = TRUE, center = TRUE)
pcTab <- pcRes$x[,1:10] %>% 
    as_tibble(rownames = "id") %>%

PC1 versus PC2

ggplot(pcTab, aes(x=PC1, y=PC2)) +
    geom_point(aes(col = drug, size = factor(time),
               shape = replicate)) +
    ggrepel::geom_text_repel(aes(label = id)) +

PC3 versus PC4

ggplot(pcTab, aes(x=PC3, y=PC4)) +
        geom_point(aes(col = drug, size = factor(time),
               shape = replicate)) +
    ggrepel::geom_text_repel(aes(label = id)) +


exprMat <- assays(ppe.log2Med)[["imputed"]]
sds <- genefilter::rowSds(exprMat)
exprMat <- exprMat[order(sds, decreasing = TRUE)[1:2000],]

smpAnno <- colData(ppe.log2Med) %>%
    as_tibble(rownames = "id")

pcRes <- prcomp(t(exprMat), scale. = TRUE, center = TRUE)
pcTab <- pcRes$x[,1:10] %>% 
    as_tibble(rownames = "id") %>%

PC1 versus PC2

ggplot(pcTab, aes(x=PC1, y=PC2)) +
    geom_point(aes(col = drug, size = factor(time),
               shape = replicate)) +
    ggrepel::geom_text_repel(aes(label = id)) +

PC3 versus PC4

ggplot(pcTab, aes(x=PC3, y=PC4)) +
        geom_point(aes(col = drug, size = factor(time),
               shape = replicate)) +
    ggrepel::geom_text_repel(aes(label = id)) +

log2 only

exprMat <- assays(ppe.log2Only)[["imputed"]]
sds <- genefilter::rowSds(exprMat)
exprMat <- exprMat[order(sds, decreasing = TRUE)[1:2000],]

smpAnno <- colData(ppe.log2Only) %>%
    as_tibble(rownames = "id")

pcRes <- prcomp(t(exprMat), scale. = TRUE, center = TRUE)
pcTab <- pcRes$x[,1:10] %>% 
    as_tibble(rownames = "id") %>%

PC1 versus PC2

ggplot(pcTab, aes(x=PC1, y=PC2)) +
    geom_point(aes(col = drug, size = factor(time),
               shape = replicate)) +
    ggrepel::geom_text_repel(aes(label = id)) +

PC3 versus PC4

ggplot(pcTab, aes(x=PC3, y=PC4)) +
        geom_point(aes(col = drug, size = factor(time),
               shape = replicate)) +
    ggrepel::geom_text_repel(aes(label = id)) +

Use precursor

exprMat <- assays(ppe.pre)[["imputed"]]
sds <- genefilter::rowSds(exprMat)
exprMat <- exprMat[order(sds, decreasing = TRUE)[1:2000],]

smpAnno <- colData(ppe.pre) %>%
    as_tibble(rownames = "id")

pcRes <- prcomp(t(exprMat), scale. = TRUE, center = TRUE)
pcTab <- pcRes$x[,1:10] %>% 
    as_tibble(rownames = "id") %>%

PC1 versus PC2

ggplot(pcTab, aes(x=PC1, y=PC2)) +
    geom_point(aes(col = drug, size = factor(time),
               shape = replicate)) +
    ggrepel::geom_text_repel(aes(label = id)) +

PC3 versus PC4

ggplot(pcTab, aes(x=PC3, y=PC4)) +
        geom_point(aes(col = drug, size = factor(time),
               shape = replicate)) +
    ggrepel::geom_text_repel(aes(label = id)) +

Hierarchical clustering


colAnno <- colData(ppe.vst)[,c("cellLine","drug","time")] %>% data.frame()
exprMat.scaled <- jyluMisc::mscale(exprMat, center = TRUE, scale = TRUE, censor = 5)
pheatmap::pheatmap(exprMat.scaled, annotation_col = colAnno, clustering_method = "ward.D2",
                   color = colorRampPalette(c("blue","white","red"))(100), 
                   breaks = seq(-5,5, length.out = 101), 
                   show_rownames = FALSE)


colAnno <- colData(ppe.log2Med)[,c("cellLine","drug","time")] %>% data.frame()
exprMat.scaled <- jyluMisc::mscale(exprMat, center = TRUE, scale = TRUE, censor = 5)
pheatmap::pheatmap(exprMat.scaled, annotation_col = colAnno, clustering_method = "ward.D2",
                   color = colorRampPalette(c("blue","white","red"))(100), 
                   breaks = seq(-5,5, length.out = 101), 
                   show_rownames = FALSE)

log2 only

colAnno <- colData(ppe.log2Only)[,c("cellLine","drug","time")] %>% data.frame()
exprMat.scaled <- jyluMisc::mscale(exprMat, center = TRUE, scale = TRUE, censor = 5)
pheatmap::pheatmap(exprMat.scaled, annotation_col = colAnno, clustering_method = "ward.D2",
                   color = colorRampPalette(c("blue","white","red"))(100), 
                   breaks = seq(-5,5, length.out = 101), 
                   show_rownames = FALSE)


colAnno <- colData(ppe.pre)[,c("cellLine","drug","time")] %>% data.frame()
exprMat.scaled <- jyluMisc::mscale(exprMat, center = TRUE, scale = TRUE, censor = 5)
pheatmap::pheatmap(exprMat.scaled, annotation_col = colAnno, clustering_method = "ward.D2",
                   color = colorRampPalette(c("blue","white","red"))(100), 
                   breaks = seq(-5,5, length.out = 101), 
                   show_rownames = FALSE)

Compare the reproducibiliy of replicates when using different normalization methods

sdTab.vst <- getSdTab(ppe.vst) %>% mutate(norm = "vst")
sdTab.log2Med <- getSdTab(ppe.log2Med) %>% mutate(norm = "log2Med")
sdTab.log2Only <- getSdTab(ppe.log2Only) %>% mutate(norm = "log2Only")
sdTab.pre <- getSdTab(ppe.pre) %>% mutate(norm = "precursor")
sumTab <- bind_rows(sdTab.vst, sdTab.log2Med, sdTab.log2Only, sdTab.pre) 

Add annotations

colTab <- colData(ppe.vst) %>% as_tibble() %>%
  distinct(sampleCondi,.keep_all = TRUE)
sumTab <- left_join(sumTab, colTab)

Overall distribution

ggplot(sumTab, aes(x=sdVal, fill = norm)) +
  geom_histogram(position = "identity", alpha=0.5, color = "grey50") +

Per sample

ggplot(sumTab, aes(x=sampleCondi, y = sdVal, fill = norm)) +
  geom_violin() +
  facet_wrap(~drug, scale="free_x")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))

Mean versus SD

ggplot(sumTab, aes(x=meanVal, y=sdVal)) +
  geom_hex() + geom_smooth()  +
  facet_wrap(~norm, ncol=1)

Rank of sd for each normalization method

ordTab <- group_by(sumTab, sampleCondi, rowID) %>%
  mutate(index = order(sdVal))
ggplot(ordTab, aes(x=index, fill = norm)) +
  geom_bar(position = "dodge", alpha=0.5, color = "grey50") 

For each sample, which normalization method gives the best reproduciblity for replicates

ordPerTab <- arrange(ordTab, index) %>% distinct(sampleCondi, drug,.keep_all = TRUE) %>%
  group_by(sampleCondi, norm, drug) %>% summarise(n=length(rowID))
ggplot(ordPerTab, aes(x=sampleCondi, y = n, fill = norm)) +
  facet_wrap(~drug, scale = "free_x") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))

It seems log2 + median scaling can get the best reproducibility among replicates. No normalization is the worst.

Phosphoproteomic with proteomic regressed out


ppeSub <- preprocessPhos(maeData, normalize = TRUE, transform = "none", assayName = "PhosReg")
[1] "Number of proteins and samples:"
[1] 3574   96

Distribution after normalization

countMat <- assay(ppeSub)
annoTab <- colData(ppeSub)[,c("sample","time","drug")] %>% as_tibble()
countTab <- countMat %>% as_tibble(rownames = "id") %>% 
    pivot_longer(-id) %>%
    filter(! %>%
    left_join(annoTab, by = c(name = "sample"))
ggplot(countTab, aes(x=name, y=value)) +
    geom_boxplot(aes(fill = time)) +
    facet_wrap(~drug, scales = "free_x") +
    theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))


exprMat <- assays(ppeSub)[["imputed"]]
sds <- genefilter::rowSds(exprMat)
exprMat <- exprMat[order(sds, decreasing = TRUE)[1:1000],]

smpAnno <- colData(ppeSub) %>%
    as_tibble(rownames = "id")

pcRes <- prcomp(t(exprMat), scale. = TRUE, center = TRUE)
pcTab <- pcRes$x[,1:10] %>% 
    as_tibble(rownames = "id") %>%

PC1 versus PC2

ggplot(pcTab, aes(x=PC1, y=PC2)) +
    geom_point(aes(col = drug, size = factor(time),
               shape = replicate)) +
    ggrepel::geom_text_repel(aes(label = id)) +

PC3 versus PC4

ggplot(pcTab, aes(x=PC3, y=PC4)) +
        geom_point(aes(col = drug, size = factor(time),
               shape = replicate)) +
    ggrepel::geom_text_repel(aes(label = id)) +

Hierarchical clustering

colAnno <- colData(ppeSub)[,c("cellLine","drug","time")] %>% data.frame()
exprMat.scaled <- jyluMisc::mscale(exprMat, center = TRUE, scale = TRUE, censor = 5)
pheatmap::pheatmap(exprMat.scaled, annotation_col = colAnno, clustering_method = "ward.D2",
                   color = colorRampPalette(c("blue","white","red"))(100), 
                   breaks = seq(-5,5, length.out = 101), 
                   show_rownames = FALSE)

Remove one potential outlier

ppeSub <- ppeSub[, colnames(ppeSub)!="cell5_combo_24h_Rep2"]

Redo PCA

exprMat <- assays(ppeSub)[["imputed"]]
sds <- genefilter::rowSds(exprMat)
exprMat <- exprMat[order(sds, decreasing = TRUE)[1:1000],]

smpAnno <- colData(ppeSub) %>%
    as_tibble(rownames = "id")

pcRes <- prcomp(t(exprMat), scale. = TRUE, center = TRUE)
pcTab <- pcRes$x[,1:10] %>% 
    as_tibble(rownames = "id") %>%

PC1 versus PC2

ggplot(pcTab, aes(x=PC1, y=PC2)) +
    geom_point(aes(col = drug, size = factor(time),
               shape = replicate)) +
    ggrepel::geom_text_repel(aes(label = id)) +

PC3 versus PC4

ggplot(pcTab, aes(x=PC3, y=PC4)) +
        geom_point(aes(col = drug, size = factor(time),
               shape = replicate)) +
    ggrepel::geom_text_repel(aes(label = id)) +

Hierarchical clustering

colAnno <- colData(ppeSub)[,c("cellLine","drug","time")] %>% data.frame()
exprMat.scaled <- jyluMisc::mscale(exprMat, center = TRUE, scale = TRUE, censor = 5)
pheatmap::pheatmap(exprMat.scaled, annotation_col = colAnno, clustering_method = "ward.D2",
                   color = colorRampPalette(c("blue","white","red"))(100), 
                   breaks = seq(-5,5, length.out = 101), 
                   show_rownames = FALSE)

Use ration between phosphoproteome and proteome


ppeSub <- preprocessPhos(maeData, normalize = TRUE, transform = "none", assayName = "PhosRatio")
[1] "Number of proteins and samples:"
[1] 3574   96

Distribution after normalization

countMat <- assay(ppeSub)
annoTab <- colData(ppeSub)[,c("sample","time","drug")] %>% as_tibble()
countTab <- countMat %>% as_tibble(rownames = "id") %>% 
    pivot_longer(-id) %>%
    filter(! %>%
    left_join(annoTab, by = c(name = "sample"))
ggplot(countTab, aes(x=name, y=value)) +
    geom_boxplot(aes(fill = time)) +
    facet_wrap(~drug, scales = "free_x") +
    theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))


exprMat <- assays(ppeSub)[["imputed"]]
sds <- genefilter::rowSds(exprMat)
exprMat <- exprMat[order(sds, decreasing = TRUE)[1:1000],]

smpAnno <- colData(ppeSub) %>%
    as_tibble(rownames = "id")

pcRes <- prcomp(t(exprMat), scale. = TRUE, center = TRUE)
pcTab <- pcRes$x[,1:10] %>% 
    as_tibble(rownames = "id") %>%

PC1 versus PC2

ggplot(pcTab, aes(x=PC1, y=PC2)) +
    geom_point(aes(col = drug, size = factor(time),
               shape = replicate)) +
    ggrepel::geom_text_repel(aes(label = id)) +

PC3 versus PC4

ggplot(pcTab, aes(x=PC3, y=PC4)) +
        geom_point(aes(col = drug, size = factor(time),
               shape = replicate)) +
    ggrepel::geom_text_repel(aes(label = id)) +

Hierarchical clustering

colAnno <- colData(ppeSub)[,c("cellLine","drug","time")] %>% data.frame()
exprMat.scaled <- jyluMisc::mscale(exprMat, center = TRUE, scale = TRUE, censor = 5)
pheatmap::pheatmap(exprMat.scaled, annotation_col = colAnno, clustering_method = "ward.D2",
                   color = colorRampPalette(c("blue","white","red"))(100), 
                   breaks = seq(-5,5, length.out = 101), 
                   show_rownames = FALSE)

Remove one potential outlier

ppeSub <- ppeSub[, colnames(ppeSub)!="cell5_combo_24h_Rep2"]

Redo PCA

exprMat <- assays(ppeSub)[["imputed"]]
sds <- genefilter::rowSds(exprMat)
exprMat <- exprMat[order(sds, decreasing = TRUE)[1:1000],]

smpAnno <- colData(ppeSub) %>%
    as_tibble(rownames = "id")

pcRes <- prcomp(t(exprMat), scale. = TRUE, center = TRUE)
pcTab <- pcRes$x[,1:10] %>% 
    as_tibble(rownames = "id") %>%

PC1 versus PC2

ggplot(pcTab, aes(x=PC1, y=PC2)) +
    geom_point(aes(col = drug, size = factor(time),
               shape = replicate)) +
    ggrepel::geom_text_repel(aes(label = id)) +

PC3 versus PC4

ggplot(pcTab, aes(x=PC3, y=PC4)) +
        geom_point(aes(col = drug, size = factor(time),
               shape = replicate)) +
    ggrepel::geom_text_repel(aes(label = id)) +

Hierarchical clustering

colAnno <- colData(ppeSub)[,c("cellLine","drug","time")] %>% data.frame()
exprMat.scaled <- jyluMisc::mscale(exprMat, center = TRUE, scale = TRUE, censor = 5)
pheatmap::pheatmap(exprMat.scaled, annotation_col = colAnno, clustering_method = "ward.D2",
                   color = colorRampPalette(c("blue","white","red"))(100), 
                   breaks = seq(-5,5, length.out = 101), 
                   show_rownames = FALSE)

