-
Notifications
You must be signed in to change notification settings - Fork 2
/
Figure1.r
103 lines (75 loc) · 2.99 KB
/
Figure1.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#!/usr/bin/Rscript
# or open it with jupyter notebook
# Title: "NATURE BIOTECHNOLOGY README"
# Author: "Peng"
# load previous seurat object
# load clustering results
library("Seurat")
library(dplyr)
library(tidyr)
s.tmp <- readRDS(file="1228_5_CpGs_70cell_3000HVG_50pc_res1_s.tmp.with.normalization.clustering.rds")
s.tmp
chr.in <- sapply(rownames(s.tmp),function(x){
as.character(unlist(strsplit(x,"-"))[1])
})
chr.in.uniq <- unique(chr.in)
chr.in.uniq
CG_final <- readRDS(file="0111_ourData_1mb_bin_5hmCG.rds")
# using 5 CpG here
mat_CpG <- CG_final$rate
mat_CpG[CG_final$nsite < 5] <- NA
# replcate CpH- to CpG to match the orignal data set
colnames(mat_CpG) <- gsub(".gz","",colnames(mat_CpG))
#colnames(mat_CpH) <- gsub("CpG_NeuN","NeuN",colnames(mat_CpG))
dim(mat_CpG)
na_num_per_bin <- apply(mat_CpG,1,function(x){
sum(is.na(x))
})
keep.features <- names(na_num_per_bin)[na_num_per_bin < ncol(mat_CpG) * (1-0.6)] # less than 60%
length(keep.features) # 2610
# impute by rowMeans
mat_CpG2 <- mat_CpG[keep.features,]
m <- mat_CpG2
## from https://stackoverflow.com/questions/6918086/replace-na-values-by-row-means
k <- which(is.na(m), arr.ind=TRUE)
m[k] <- rowMeans(m, na.rm=TRUE)[k[,1]]
mat_CpG3 <- m
dim(mat_CpG3)
mat_CpG4 <- mat_CpG3[,rownames(s.tmp@meta.data)]
s.tmp[["CpG"]] <- CreateAssayObject(counts = mat_CpG3[,rownames(s.tmp@meta.data)])
DefaultAssay(s.tmp) <- "CpG"
s.tmp <- NormalizeData(s.tmp,assay = "CpG")
s.tmp <- FindVariableFeatures(s.tmp,assay = "CpG")
s.tmp <- ScaleData(s.tmp, assay = "CpG",features = rownames(s.tmp))
# add annotation here
s.tmp@meta.data$Cluster <- plyr::mapvalues(s.tmp@meta.data$`RNA_snn_res.1`,c(0,1,2),c("0_Ex","1_nonNeu","2_Inh"))
s.tmp@meta.data$Cluster2 <- plyr::mapvalues(s.tmp@meta.data$Cluster,
c("0_Ex","1_nonNeu","2_Inh"),
c("Neu","nonNeu","Neu"))
plot_list <- lapply(chr.in.uniq[1:19],function(i){
j <- paste0(i,"-")
features.in <- grep(j,rownames(s.tmp),value=T)
# order features by first
coord.start <- sapply(features.in,function(x){
as.integer(unlist(strsplit(x,"-"))[2])
})
features.in2 <- names(sort(coord.start))
mat <- as.matrix(s.tmp@assays$CpG@counts[features.in2,])
if (nrow(mat) > 1){
mat[mat > .4] <- .4
NeuID <- rownames(s.tmp@meta.data)[s.tmp@meta.data$Cluster2 == "Neu"]
nonNeuID <- rownames(s.tmp@meta.data)[s.tmp@meta.data$Cluster2 == "nonNeu"]
# cellid as row, 1kb bin as column
mat <- t(mat)
mat.p1 <- mat[NeuID,]
mat.p1 <- mat.p1[order(rowSums(mat.p1),decreasing = T),]
mat.p2 <- mat[nonNeuID,]
mat.p2 <- mat.p2[order(rowSums(mat.p2),decreasing = T),]
mat2 <- rbind(mat.p1,mat.p2)
hf <- pheatmap::pheatmap(mat2,scale = "none",cluster_rows = F,cluster_cols = F,
show_colnames = F, show_rownames = F,
gaps_row = length(NeuID),
main = i)
return(hf[[4]])
}
})