数据库代码公开:XSum分析cMap数据库
# 加载包
library(tidyverse)
library(AnnotationDbi)
#加载数据
load("CMAP_gene_signatures.RData")
camp_signature <- CMAP.genePerturbations %>%
as.data.frame() %>% # 如果它还不是数据框
select(tstat) %>% # 选择tstat列
rownames_to_column("rowname") %>% # 将行名转换为列
separate(rowname, into = c("NA", "ENTREZID"), sep = "\\.") %>% # 分割行名以获取ENTREZID
select(-NA) # 删除不需要的列
# 使用bitr函数进行ID转换
SYMBOL <- bitr(camp_signature$ENTREZID,
fromType = "ENTREZID",
toType = "SYMBOL",
OrgDb = org.Hs.eg.db)
# 合并结果
camp_signature <- left_join(SYMBOL, camp_signature, by = "ENTREZID")
# 将SYMBOL列设置为行名
camp_signature <- camp_signature %>%
rownames_to_column("old_rownames") %>% # 保存原始行名(如果需要)
column_to_rownames(var = "SYMBOL") %>%
select(-old_rownames) # 如果不需要原始行名,可以删除
# 定义eXtremeLogFC函数
eXtremeLogFC <- function(logFC_Matrix, N = 500) {
numOfGenes <- nrow(logFC_Matrix)
XLogFC <- logFC_Matrix %>%
mutate(across(everything(), ~ {
logFCOrder <- order(.)
FC0 <- logFCOrder[(N+1):(numOfGenes-N)]
replace(., FC0, 0)
}))
XLogFC
}
XLogFC = eXtremeLogFC(camp_signature, N = 500)
XSum = data.frame(score = XSum(XLogFC,
Up_gene,
Down_gene)