1 Data

library(readxl)
library(SingleCellExperiment)
library(scater)
library(ggplot2)
library(ggthemes)
library(ggpubr)
library(moon)
library(rcartocolor)
library(gridExtra)
library(RColorBrewer)
library(pheatmap)
library(ggrepel)
library(dplyr)
ggplot2::theme_set(theme_bw() + theme_yx())
theme_black <- function() {
    theme_base() + 
        theme(panel.background = element_rect(fill = "black", 
                                              colour = NA),
              plot.background =  element_rect(fill = "black", 
                                              colour = NA),
              legend.key = element_rect(fill = "black", 
                                        colour = NA),
              legend.background = element_rect(fill = "black", color = "black"),
              legend.text = element_text(color = "white"),
              strip.background = element_rect(fill = "black", 
                                              colour = NA),
              text = element_text(color = "white"))
}
sce_subset <- readRDS("data/NanoStringDaveLin/sce_WT_MOR28.rds")

colData_cartridge_2019 <- colData(sce_subset)
annotation_col <- colData_cartridge_2019[, c("Sex", "Genotype", "Sample.Type", "Age", "OR.Type")]
age_color <- carto_pal(7, "Sunset")
names(age_color) <- names(table(annotation_col$Age))
or_color <- tableau_color_pal("Superfishel Stone")(3)[c(1, 3)]
names(or_color) <- names(table(annotation_col$OR.Type))
Genotype_color <-  carto_pal(2, "Bold")[c(1:2)]
names(Genotype_color) <- names(table(annotation_col$Genotype))
names(age_color) <- names(table(annotation_col$Age))
Sex_color <- RColorBrewer::brewer.pal(2, "Set1")
names(Sex_color) <- names(table(annotation_col$Sex))
sample_color <- RColorBrewer::brewer.pal(2, "Set3")
names(sample_color) <- names(table(annotation_col$Sample.Type))

Cluster_color <- carto_pal(2, "Vivid")
names(Cluster_color) <- c(1, 2)

anno_colors <- list(Age = age_color,
                    Genotype = Genotype_color[c(1:2)],
                    cluster = Cluster_color[c(1:2)],
                    Sex = Sex_color[c(1:2)],
                    Sample.Type = sample_color[c(1:2)],
                    OR.Type = or_color)

2 Subset of WT & MOR28 in 2019 data

sce_subset$cluster <- factor(sce_subset$cluster)
set.seed(2020)
sce_subset <- runPCA(sce_subset)
print("percent of variacne")
## [1] "percent of variacne"
print(attr(reducedDim(sce_subset), "percentVar"))
##  [1] 62.0507039  6.3727729  4.3372164  3.5675338  2.8184664  2.4638722
##  [7]  2.1677750  1.9433015  1.8248348  1.6659495  1.6232741  1.3039883
## [13]  1.2345340  1.1963799  1.0828636  0.9519746  0.8808929  0.8271117
## [19]  0.7526504  0.5586262  0.3752780
set.seed(2020)
sce_subset <- runTSNE(sce_subset, dimred = "PCA")

2.1 PCA analysis

df_toPlot <- moon::makeMoonDF(sce_subset)
g1 <- ggplot(df_toPlot, aes(x = PCA1, y = PCA2, color = Sex)) +
    geom_point(size = 4) +
    scale_color_manual(values = anno_colors$Sex) +
    theme(aspect.ratio = 1)

g2 <- ggplot(df_toPlot, aes(x = PCA2, y = PCA3, color = Sex)) +
    geom_point(size = 4) +
    scale_color_manual(values = anno_colors$Sex) +
    theme(aspect.ratio = 1)

g3 <- ggplot(df_toPlot, aes(x = PCA3, y = PCA4, color = Sex)) +
    geom_point(size = 4) +
    scale_color_manual(values = anno_colors$Sex) +
    theme(aspect.ratio = 1)

ggarrange(g1, g2, g3, ncol = 3, nrow = 1, align = "hv", common.legend = TRUE)

ggsave("figures/DaveLinNanostring/cartridge_2019_PCA_WT_MOR28_bySex.pdf", width = 10, height = 4)
g1 <- ggplot(df_toPlot, aes(x = PCA1, y = PCA2, color = Sex)) +
    geom_point(size = 4) +
    scale_color_manual(values = anno_colors$Sex) +
    theme(aspect.ratio = 1)

g2 <- ggplot(df_toPlot, aes(x = PCA1, y = PCA2, color = cluster)) +
    geom_point(size = 4) +
    scale_color_manual(values = anno_colors$cluster) +
    theme(aspect.ratio = 1)

g3 <- ggplot(df_toPlot, aes(x = PCA2, y = PCA3, color = Sex)) +
    geom_point(size = 4) +
    scale_color_manual(values = anno_colors$Sex) +
    theme(aspect.ratio = 1)

g4 <- ggplot(df_toPlot, aes(x = PCA2, y = PCA3, color = cluster)) +
    geom_point(size = 4) +
    scale_color_manual(values = anno_colors$cluster) +
    theme(aspect.ratio = 1)

ggarrange(g1, g2, g3, g4, ncol = 2, nrow = 2, align = "hv")

ggsave("figures/DaveLinNanostring/cartridge_2019_PCA_WT_MOR28_bySex_PCA.pdf", width = 8, height = 7)
g1 <- ggplot(df_toPlot, aes(x = PCA1, y = PCA2, color = logcounts(sce_subset)["Cdh8", ])) +
    geom_point(size = 4) +
    scale_color_viridis_c() +
    theme(aspect.ratio = 1) +
    labs(color = "", title = "Cdh8")


g2 <- ggplot(df_toPlot, aes(x = PCA1, y = PCA2, color = logcounts(sce_subset)["Pcdhga6", ])) +
    geom_point(size = 4) +
    scale_color_viridis_c() +
    theme(aspect.ratio = 1) +
    labs(color = "", title = "Pcdhga6")

g3 <- ggplot(df_toPlot, aes(x = PCA1, y = PCA2, color = logcounts(sce_subset)["Pcdh17", ])) +
    geom_point(size = 4) +
    scale_color_viridis_c() +
    theme(aspect.ratio = 1) +
    labs(color = "", title = "Pcdh17")

g4 <- ggplot(df_toPlot, aes(x = PCA1, y = PCA2, color = logcounts(sce_subset)["Pcdh19", ])) +
    geom_point(size = 4) +
    scale_color_viridis_c() +
    theme(aspect.ratio = 1) +
    labs(color = "", title = "Pcdh19")

ggarrange(g1, g2, g3, g4, ncol = 2, nrow = 2, align = "hv")

ggsave("figures/DaveLinNanostring/cartridge_2019_PCA_WT_MOR28_PCA_selected.pdf", width = 8, height = 7)

2.2 DE analysis between Cluster

library(limma)
x <- logcounts(sce_subset)

design <- model.matrix(~ sce_subset$cluster)
# contr.matrix <- makeContrasts(
#   cellTypes = cluster2 - cluster1,
#   levels = colnames(design))

vfit <- lmFit(x, design, trend = TRUE, robust = TRUE)
#vfit <- contrasts.fit(vfit, contrasts = contr.matrix)
efit <- eBayes(vfit)
top_cluster_genes <- topTable(efit, n = Inf)
DT::datatable(round(top_cluster_genes, 3))
top_cluster_genes$name <- rownames(top_cluster_genes)
ggplot(top_cluster_genes, aes(x = logFC, y = -log10(P.Value))) +
    geom_point() +
    geom_point(data = top_cluster_genes %>% dplyr::filter(abs(logFC) > 1, P.Value < 0.05), 
               aes(x = logFC, y = -log10(P.Value)), color = "red") +
    geom_text_repel(data = top_cluster_genes %>% dplyr::filter(abs(logFC) > 1, P.Value < 0.05), 
                    aes(x = logFC, y = -log10(P.Value), label = name)) +
    theme(aspect.ratio = 1)

ggsave("figures/DaveLinNanostring/cartridge_2019_WT_MOR28_byCluster_volcano.pdf", 
       width = 9, height = 7)


selected_genes <- top_cluster_genes %>% dplyr::filter(abs(logFC) > 4, 
                                                      adj.P.Val < 0.01) %>% rownames()

rownames(df_toPlot) <- colnames(sce_subset)

pheatmap(logcounts(sce_subset)[selected_genes, order(sce_subset$Sex)],
         clustering_method = "ward.D2",
         annotation_col = df_toPlot[, c("cluster", "Sex"), drop = FALSE],
         annotation_colors = anno_colors,
         # annotation_row = annotation_col,
         # main = "Single cell vs Bulk correlation",
         #file = "figures/DaveLinNanostring/cartridge_2019_WT_MOR28_byCluster_heatmap_selected.pdf",
         width = 8,
         height = 6,
         #cluster_cols = FALSE,
         border_color = NA)

hclust_res <- hclust(dist(t(logcounts(sce_subset))))

pheatmap(logcounts(sce_subset)[, order(sce_subset$Sex, hclust_res$order)],
         clustering_method = "ward.D2",
         annotation_col = df_toPlot[, c("Sex", "cluster"), drop = FALSE],
         annotation_colors = anno_colors,
         # annotation_row = annotation_col,
         # main = "Single cell vs Bulk correlation",
         #file = "figures/DaveLinNanostring/cartridge_2019_WT_MOR28_bySex_heatmap_all.pdf",
         width = 8,
         height = 20,
         #cluster_cols = FALSE,
         border_color = NA)

2.3 DE analysis between sex

x <- logcounts(sce_subset)

design <- model.matrix(~ sce_subset$Sex)
# contr.matrix <- makeContrasts(
#   cellTypes = cluster2 - cluster1,
#   levels = colnames(design))

vfit <- lmFit(x, design, trend = TRUE, robust = TRUE)
#vfit <- contrasts.fit(vfit, contrasts = contr.matrix)
efit <- eBayes(vfit)
top_sex_genes <- topTable(efit, n = Inf)
DT::datatable(round(top_sex_genes, 3))
top_sex_genes$name <- rownames(top_sex_genes)
ggplot(top_sex_genes, aes(x = logFC, y = -log10(P.Value))) +
    geom_point() +
    geom_point(data = top_sex_genes %>% dplyr::filter(abs(logFC) > 1, P.Value < 0.05), 
               aes(x = logFC, y = -log10(P.Value)), color = "red") +
    geom_text_repel(data = top_sex_genes %>% dplyr::filter(abs(logFC) > 1, P.Value < 0.05), 
                    aes(x = logFC, y = -log10(P.Value), label = name)) +
    theme(aspect.ratio = 1)

ggsave("figures/DaveLinNanostring/cartridge_2019_WT_MOR28_bySex_volcano.pdf", width = 9, height = 7)


g <- lapply(top_sex_genes %>% dplyr::filter(abs(logFC) > 1, P.Value < 0.05) %>% rownames(),
            function(x) {
                ggplot(df_toPlot, aes(x = Sex, y = logcounts(sce_subset)[x, ], color = Sex)) +
                    geom_boxplot() +
                    scale_color_manual(values = anno_colors$Sex) +
                    theme(aspect.ratio = 1) +
                    ylab(x)
            })
ggarrange(plotlist = g, ncol = 4, nrow = 2, align = "hv", common.legend = TRUE)

ggsave("figures/DaveLinNanostring/cartridge_2019_WT_MOR28_bySex_boxplot.pdf", width = 10, height = 8)

g <- lapply(top_sex_genes %>% dplyr::filter(abs(logFC) > 1, P.Value < 0.05) %>% rownames(),
            function(x) {
                ggplot(df_toPlot, aes(x = Sex, y = logcounts(sce_subset)[x, ], color = Sex)) +
                    geom_boxplot() +
                    scale_color_manual(values = anno_colors$Sex) +
                    theme(aspect.ratio = 1) +
                    ylab(x) + facet_wrap(~cluster)
            })
ggarrange(plotlist = g, ncol = 2, nrow = 4, align = "hv", common.legend = TRUE)

ggsave("figures/DaveLinNanostring/cartridge_2019_WT_MOR28_bySexCluster_boxplot.pdf", 
       width = 10, height = 12)
selected_genes <- top_sex_genes %>% dplyr::filter(abs(logFC) > 0.5, P.Value < 0.05) %>% rownames()

rownames(df_toPlot) <- colnames(sce_subset)

pheatmap(logcounts(sce_subset)[selected_genes, order(sce_subset$Sex)],
         clustering_method = "ward.D2",
         annotation_col = df_toPlot[, "Sex", drop = FALSE],
         annotation_colors = anno_colors,
         # annotation_row = annotation_col,
         # main = "Single cell vs Bulk correlation",
         #file = "figures/DaveLinNanostring/cartridge_2019_WT_MOR28_bySex_heatmap_selected.pdf",
         width = 8,
         height = 6,
         cluster_cols = FALSE,
         border_color = NA)

hclust_res <- hclust(dist(t(logcounts(sce_subset))))

pheatmap(logcounts(sce_subset)[, order(sce_subset$Sex, hclust_res$order)],
         clustering_method = "ward.D2",
         annotation_col = df_toPlot[, "Sex", drop = FALSE],
         annotation_colors = anno_colors,
         # annotation_row = annotation_col,
         # main = "Single cell vs Bulk correlation",
         #file = "figures/DaveLinNanostring/cartridge_2019_WT_MOR28_bySex_heatmap_all.pdf",
         width = 8,
         height = 20,
         cluster_cols = FALSE,
         border_color = NA)

2.4 DE analysis between sex + cluster

x <- logcounts(sce_subset)

design <- model.matrix(~ sce_subset$Sex + sce_subset$cluster)
# contr.matrix <- makeContrasts(
#   cellTypes = cluster2 - cluster1,
#   levels = colnames(design))

vfit <- lmFit(x, design, trend = TRUE, robust = TRUE)
#vfit <- contrasts.fit(vfit, contrasts = contr.matrix)
efit <- eBayes(vfit)
top_sex_genes_model2 <- topTable(efit, n = Inf, coef = 2)
DT::datatable(round(top_sex_genes_model2, 3))
g <- lapply(top_sex_genes_model2 %>% dplyr::filter(abs(logFC) > 1, P.Value < 0.1) %>% 
                rownames(),
            function(x) {
                ggplot(df_toPlot, aes(x = Sex, y = logcounts(sce_subset)[x, ], color = Sex)) +
                    geom_boxplot() +
                    scale_color_manual(values = anno_colors$Sex) +
                    theme(aspect.ratio = 1) +
                    ylab(x)
            })
ggarrange(plotlist = g, ncol = 4, nrow = 2, align = "hv", common.legend = TRUE)

g <- lapply(top_sex_genes_model2 %>% dplyr::filter(abs(logFC) > 1, P.Value < 0.1) %>% 
                rownames(),
            function(x) {
                ggplot(df_toPlot, aes(x = Sex, y = logcounts(sce_subset)[x, ], color = Sex)) +
                    geom_boxplot() +
                    scale_color_manual(values = anno_colors$Sex) +
                    theme(aspect.ratio = 1) +
                    ylab(x) + facet_wrap(~cluster)
            })
ggarrange(plotlist = g, ncol = 4, nrow = 2, align = "hv", common.legend = TRUE)

2.5 Selected gene visualisation

selected_genes <- c("Pcdhga6", "Pcdhga11", "Pcdh1", "Pcdh7", "Pcdh9")

g <- lapply(selected_genes,
            function(x) {
                ggplot(df_toPlot, aes(x = Sex, y = logcounts(sce_subset)[x, ], color = Sex)) +
                    geom_boxplot() +
                    scale_color_manual(values = anno_colors$Sex) +
                    theme(aspect.ratio = 1) +
                    ylab(x) 
            })
ggarrange(plotlist = g, ncol = 3, nrow = 2, align = "hv", common.legend = TRUE)

g <- lapply(selected_genes,
            function(x) {
                ggplot(df_toPlot, aes(x = Sex, y = logcounts(sce_subset)[x, ], color = Sex)) +
                    geom_boxplot() +
                    scale_color_manual(values = anno_colors$Sex) +
                    theme(aspect.ratio = 1) +
                    ylab(x) + facet_wrap(~cluster)
            })
ggarrange(plotlist = g, ncol = 2, nrow = 3, align = "hv", common.legend = TRUE)

hclust_res <- hclust(dist(t(logcounts(sce_subset)[selected_genes, ])))

pheatmap(logcounts(sce_subset)[selected_genes, 
                               order(sce_subset$Sex)],
         clustering_method = "ward.D2",
         annotation_col = df_toPlot[, c("Sex", "cluster"), drop = FALSE],
         annotation_colors = anno_colors,
         # annotation_row = annotation_col,
         # main = "Single cell vs Bulk correlation",
         #file = "figures/DaveLinNanostring/cartridge_2019_WT_MOR28_bySex_heatmap_all.pdf",
         width = 8,
         height = 20,
         cluster_rows = FALSE,
         # cluster_cols = FALSE,
         border_color = NA)

selected_genes <- c("Pcdh8", "Pcdh9", "Pcdhga6", "Pcdh17", "Pcdhga8",
                    "Cdh9", "Pcdhga11")

g <- lapply(selected_genes,
            function(x) {
                ggplot(df_toPlot, aes(x = Sex, y = logcounts(sce_subset)[x, ], color = Sex)) +
                    geom_boxplot() +
                    scale_color_manual(values = anno_colors$Sex) +
                    theme(aspect.ratio = 1) +
                    ylab(x) 
            })
ggarrange(plotlist = g, ncol = 4, nrow = 2, align = "hv", common.legend = TRUE)

g <- lapply(selected_genes,
            function(x) {
                ggplot(df_toPlot, aes(x = Sex, y = logcounts(sce_subset)[x, ], color = Sex)) +
                    geom_boxplot() +
                    scale_color_manual(values = anno_colors$Sex) +
                    theme(aspect.ratio = 1) +
                    ylab(x) + facet_wrap(~cluster)
            })
ggarrange(plotlist = g, ncol = 2, nrow = 4, align = "hv", common.legend = TRUE)

hclust_res <- hclust(dist(t(logcounts(sce_subset)[selected_genes, ])))

pheatmap(logcounts(sce_subset)[selected_genes, 
                               order(sce_subset$Sex)],
         clustering_method = "ward.D2",
         annotation_col = df_toPlot[, c("Sex", "cluster"), drop = FALSE],
         annotation_colors = anno_colors,
         # annotation_row = annotation_col,
         # main = "Single cell vs Bulk correlation",
         #file = "figures/DaveLinNanostring/cartridge_2019_WT_MOR28_bySex_heatmap_all.pdf",
         width = 8,
         height = 20,
         cluster_rows = FALSE,
         # cluster_cols = FALSE,
         border_color = NA)

pheatmap(logcounts(sce_subset)[grep("Pcdh|Cdh|Cels", rownames(sce_subset), value = TRUE), order(sce_subset$Sex)],
         clustering_method = "ward.D2",
         annotation_col = df_toPlot[, c("Sex"), drop = FALSE],
         annotation_colors = anno_colors,
         # annotation_row = annotation_col,
         # main = "Single cell vs Bulk correlation",
         # file = "figures/DaveLinNanostring/cartridge_2019_WT_MOR28_bySex_heatmap_selected_cadherins.pdf",
         width = 8,
         height = 15,
         cluster_cols = FALSE,
         border_color = NA)

pheatmap(logcounts(sce_subset)[grep("Pcdh|Cdh|Cels", rownames(sce_subset), value = TRUE), order(sce_subset$Sex)],
         clustering_method = "ward.D2",
         annotation_col = df_toPlot[, c("Sex", "cluster"), drop = FALSE],
         annotation_colors = anno_colors,
         # annotation_row = annotation_col,
         # main = "Single cell vs Bulk correlation",
         # file = "figures/DaveLinNanostring/cartridge_2019_WT_MOR28_bySex_heatmap_selected_cadherins_withCluster.pdf",
         width = 8,
         height = 15,
         #cluster_cols = FALSE,
         border_color = NA)

3 Session Info

sessionInfo()
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.7
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_AU.UTF-8/en_AU.UTF-8/en_AU.UTF-8/C/en_AU.UTF-8/en_AU.UTF-8
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] limma_3.52.4                dplyr_1.0.10               
##  [3] ggrepel_0.9.1               pheatmap_1.0.12            
##  [5] RColorBrewer_1.1-3          gridExtra_2.3              
##  [7] rcartocolor_2.0.0           moon_0.1.0                 
##  [9] ggpubr_0.4.0                ggthemes_4.2.4             
## [11] scater_1.24.0               ggplot2_3.4.0              
## [13] scuttle_1.6.3               SingleCellExperiment_1.18.1
## [15] SummarizedExperiment_1.26.1 Biobase_2.56.0             
## [17] GenomicRanges_1.48.0        GenomeInfoDb_1.32.4        
## [19] IRanges_2.30.1              S4Vectors_0.34.0           
## [21] BiocGenerics_0.42.0         MatrixGenerics_1.8.1       
## [23] matrixStats_0.62.0          readxl_1.4.1               
## 
## loaded via a namespace (and not attached):
##  [1] bitops_1.0-7              tools_4.2.1              
##  [3] backports_1.4.1           bslib_0.4.0              
##  [5] DT_0.26                   utf8_1.2.2               
##  [7] R6_2.5.1                  irlba_2.3.5.1            
##  [9] vipor_0.4.5               DBI_1.1.3                
## [11] colorspace_2.0-3          withr_2.5.0              
## [13] tidyselect_1.2.0          compiler_4.2.1           
## [15] textshaping_0.3.6         cli_3.4.1                
## [17] BiocNeighbors_1.14.0      DelayedArray_0.22.0      
## [19] labeling_0.4.2            sass_0.4.2               
## [21] scales_1.2.1              systemfonts_1.0.4        
## [23] stringr_1.4.1             digest_0.6.30            
## [25] rmarkdown_2.17            XVector_0.36.0           
## [27] pkgconfig_2.0.3           htmltools_0.5.3          
## [29] sparseMatrixStats_1.8.0   highr_0.9                
## [31] fastmap_1.1.0             htmlwidgets_1.5.4        
## [33] rlang_1.0.6               rstudioapi_0.14          
## [35] DelayedMatrixStats_1.18.2 farver_2.1.1             
## [37] jquerylib_0.1.4           generics_0.1.3           
## [39] jsonlite_1.8.2            crosstalk_1.2.0          
## [41] BiocParallel_1.30.4       car_3.1-1                
## [43] RCurl_1.98-1.9            magrittr_2.0.3           
## [45] BiocSingular_1.12.0       GenomeInfoDbData_1.2.8   
## [47] Matrix_1.5-1              Rcpp_1.0.9               
## [49] ggbeeswarm_0.6.0          munsell_0.5.0            
## [51] fansi_1.0.3               abind_1.4-5              
## [53] viridis_0.6.2             lifecycle_1.0.3          
## [55] stringi_1.7.8             yaml_2.3.6               
## [57] carData_3.0-5             zlibbioc_1.42.0          
## [59] Rtsne_0.16                grid_4.2.1               
## [61] parallel_4.2.1            lattice_0.20-45          
## [63] cowplot_1.1.1             beachmat_2.12.0          
## [65] knitr_1.40                pillar_1.8.1             
## [67] ggsignif_0.6.4            codetools_0.2-18         
## [69] ScaledMatrix_1.4.1        glue_1.6.2               
## [71] evaluate_0.17             vctrs_0.5.1              
## [73] cellranger_1.1.0          gtable_0.3.1             
## [75] purrr_0.3.5               tidyr_1.2.1              
## [77] assertthat_0.2.1          cachem_1.0.6             
## [79] xfun_0.34                 rsvd_1.0.5               
## [81] broom_1.0.1               rstatix_0.7.1            
## [83] ragg_1.2.3                viridisLite_0.4.1        
## [85] tibble_3.1.8              beeswarm_0.4.0