127 lines
3.8 KiB
R
127 lines
3.8 KiB
R
|
|
||
|
set.seed(888)
|
||
|
|
||
|
type = c(rep("Tumor", 10), rep("Control", 10))
|
||
|
gender = sample(c("F", "M"), 20, replace = TRUE)
|
||
|
gender[sample(1:20, 2)] = NA
|
||
|
age = runif(20, min = 30, max = 80)
|
||
|
mutation = data.frame(mut1 = sample(c(TRUE, FALSE), 20, p = c(0.2, 0.8), replace = TRUE),
|
||
|
mut2 = sample(c(TRUE, FALSE), 20, p = c(0.3, 0.7), replace = TRUE))
|
||
|
anno = data.frame(type = type, gender = gender, age = age, mutation, stringsAsFactors = FALSE)
|
||
|
|
||
|
anno_col = list(type = c("Tumor" = "red", "Control" = "blue"),
|
||
|
gender = c("F" = "pink", "M" = "darkgreen"),
|
||
|
mutation = c("TRUE" = "black", "FALSE" = "#EEEEEE"))
|
||
|
|
||
|
######################################
|
||
|
# generate methylation matrix
|
||
|
rand_meth = function(k, mean) {
|
||
|
(runif(k) - 0.5)*min(c(1-mean), mean) + mean
|
||
|
}
|
||
|
|
||
|
mean_meth = c(rand_meth(300, 0.3), rand_meth(700, 0.7))
|
||
|
mat_meth = as.data.frame(lapply(mean_meth, function(m) {
|
||
|
if(m < 0.3) {
|
||
|
c(rand_meth(10, m), rand_meth(10, m + 0.2))
|
||
|
} else if(m > 0.7) {
|
||
|
c(rand_meth(10, m), rand_meth(10, m - 0.2))
|
||
|
} else {
|
||
|
c(rand_meth(10, m), rand_meth(10, m + sample(c(1, -1), 1)*0.2))
|
||
|
}
|
||
|
|
||
|
}))
|
||
|
mat_meth = t(mat_meth)
|
||
|
rownames(mat_meth) = NULL
|
||
|
colnames(mat_meth) = paste0("sample", 1:20)
|
||
|
|
||
|
######################################
|
||
|
# generate directions for methylation
|
||
|
direction = rowMeans(mat_meth[, 1:10]) - rowMeans(mat_meth[, 11:20])
|
||
|
direction = ifelse(direction > 0, "hyper", "hypo")
|
||
|
library(circlize)
|
||
|
|
||
|
#######################################
|
||
|
# generate expression matrix
|
||
|
mat_expr = t(apply(mat_meth, 1, function(x) {
|
||
|
x = x + rnorm(length(x), sd = abs(runif(1)-0.5)*0.4 + 0.1)
|
||
|
-scale(x)
|
||
|
}))
|
||
|
dimnames(mat_expr) = dimnames(mat_meth)
|
||
|
|
||
|
#############################################################
|
||
|
# matrix for correlation between methylation and expression
|
||
|
cor_pvalue = sapply(seq_len(nrow(mat_meth)), function(i) {
|
||
|
cor.test(mat_meth[i, ], mat_expr[i, ])$p.value
|
||
|
})
|
||
|
|
||
|
#####################################################
|
||
|
# matrix for types of genes
|
||
|
gene_type = sample(c("protein_coding", "lincRNA", "microRNA", "psedo-gene", "others"),
|
||
|
nrow(mat_meth), replace = TRUE, prob = c(6, 1, 1, 1, 1))
|
||
|
|
||
|
#################################################
|
||
|
# annotation to genes
|
||
|
anno_gene = sapply(mean_meth, function(m) {
|
||
|
if(m > 0.6) {
|
||
|
if(runif(1) < 0.8) return("intragenic")
|
||
|
}
|
||
|
if(m < 0.4) {
|
||
|
if(runif(1) < 0.4) return("TSS")
|
||
|
}
|
||
|
return("intergenic")
|
||
|
})
|
||
|
|
||
|
anno_gene_col = c("intragenic" = "blue", "TSS" = "red", "intergenic" = "grey")
|
||
|
|
||
|
############################################
|
||
|
# distance to genes
|
||
|
tss_dist = sapply(mean_meth, function(m) {
|
||
|
if(m < 0.3) {
|
||
|
if(runif(1) < 0.5) {
|
||
|
return(round( (runif(1) - 0.5)*1000 + 500))
|
||
|
} else {
|
||
|
return(round( (runif(1) - 0.5)*10000 + 500))
|
||
|
}
|
||
|
} else if(m < 0.6) {
|
||
|
if(runif(1) < 0.8) {
|
||
|
return(round( (runif(1)-0.5)*100000 + 50000 ))
|
||
|
} else {
|
||
|
return(round( (runif(1)-0.5)*1000000 + 500000 ))
|
||
|
}
|
||
|
}
|
||
|
return(round( (runif(1) - 0.5)*1000000 + 500000))
|
||
|
})
|
||
|
|
||
|
|
||
|
#######################################
|
||
|
# annotation to enhancers
|
||
|
rand_tss = function(m) {
|
||
|
if(m < 0.4) {
|
||
|
if(runif(1) < 0.25) return(runif(1))
|
||
|
} else if (runif(1) < 0.1) {
|
||
|
return(runif(1))
|
||
|
}
|
||
|
return(0)
|
||
|
}
|
||
|
rand_enhancer = function(m) {
|
||
|
if(m < 0.4) {
|
||
|
if(runif(1) < 0.6) return(runif(1))
|
||
|
} else if (runif(1) < 0.1) {
|
||
|
return(runif(1))
|
||
|
}
|
||
|
return(0)
|
||
|
}
|
||
|
rand_repressive = function(m) {
|
||
|
if(m > 0.4) {
|
||
|
if(runif(1) < 0.8) return(runif(1))
|
||
|
}
|
||
|
return(0)
|
||
|
}
|
||
|
anno_states = data.frame(
|
||
|
tss = sapply(mean_meth, rand_tss),
|
||
|
enhancer = sapply(mean_meth, rand_enhancer),
|
||
|
repressive = sapply(mean_meth, rand_repressive))
|
||
|
|
||
|
save(mat_meth, mat_expr, anno, anno_col, anno_states, cor_pvalue, direction,
|
||
|
anno_gene, gene_type, tss_dist, file = "random_meth_expr_data.RData")
|