数据库代码公开:GSVA评估通路活性
#选定肿瘤
CancerName = "LUAD"
#设置种子以达到可重复性
set.seed(18931226)
#引用R包
library(GSVA)
#获得要提取的肿瘤的样本名
rawAnno = read.delim("merged_sample_quality_annotations.tsv",sep = "\t",row.names = NULL,check.names = F,stringsAsFactors = F,header = T)
rawAnno$simple_barcode = substr(rawAnno$aliquot_barcode,1,15)
samAnno = rawAnno[!duplicated(rawAnno$simple_barcode),c("cancer type", "simple_barcode")]
samAnno = samAnno[which(samAnno$`cancer type` = CancerName),]
#读取泛癌基因表达谱
panexpr = fread("EBPlusPlusAdjustPANCAN_IlluminaHiSeq_RNASeqV2.geneExp.grsw.impute.log2(Xsum1).tsv",
sep = "\t",
stringsAsFactors = F,
check.names = F,
header = T)
panexpr = as.data.frame(panexpr)
rownames(panexpr) = panexpr[,1]
panexpr = panexpr[,-1]
#对目标肿瘤的样本与泛癌表达谱里的所有样本取交集
sample = samAnno[which(samAnno$`cancer type` != ""),"simple_barcode"]
samesample = intersect(colnames(panexpr), sample)
expr = panexpr[,samesample]
#个人习惯,重新整理表达谱,将肿瘤样本放在前面
tumorsample <- colnames(expr)[substr(colnames(expr),14,14) == "0"]
normalsample <- colnames(expr)[substr(colnames(expr),14,14) == "1"]
expr = expr[,c(tumorsample,normalsample)]
#定义需要评估的基因列表
Potential_signature <- list(pos.comp = geneList)
#有四个参数可以评估GSVA
`combined z-scores` <- gsva(as.matrix(expr),
gset.idx.list = Potential_signature,
method = "zscore")
`GSVA scores` <- gsva(as.matrix(expr),
gset.idx.list = Potential_signature,
method = "gsva")
`ssGSEA scores` <- gsva(as.matrix(expr),
gset.idx.list = Potential_signature,
method = "ssgsea")
`PLAGE scores` <- gsva(as.matrix(expr),
gset.idx.list = Potential_signature,
method = "plage")