数据库代码公开:GSVA评估通路活性

作者:半步博导 时间:2024年6月29日 21:56 阅读量:171


#选定肿瘤
CancerName = "LUAD"
#设置种子以达到可重复性
set.seed(18931226)
#引用R包
library(GSVA)
#获得要提取的肿瘤的样本名
rawAnno = read.delim("merged_sample_quality_annotations.tsv",sep = "\t",row.names = NULL,check.names = F,stringsAsFactors = F,header = T)
rawAnno$simple_barcode = substr(rawAnno$aliquot_barcode,1,15)
samAnno = rawAnno[!duplicated(rawAnno$simple_barcode),c("cancer type", "simple_barcode")]
samAnno = samAnno[which(samAnno$`cancer type` = CancerName),]
#读取泛癌基因表达谱
panexpr = fread("EBPlusPlusAdjustPANCAN_IlluminaHiSeq_RNASeqV2.geneExp.grsw.impute.log2(Xsum1).tsv",
                 sep = "\t",
                 stringsAsFactors = F,
                 check.names = F,
                 header = T)
panexpr = as.data.frame(panexpr)
rownames(panexpr) = panexpr[,1]
panexpr = panexpr[,-1]
#对目标肿瘤的样本与泛癌表达谱里的所有样本取交集
sample = samAnno[which(samAnno$`cancer type` != ""),"simple_barcode"] 
samesample = intersect(colnames(panexpr), sample) 
expr = panexpr[,samesample] 
#个人习惯,重新整理表达谱,将肿瘤样本放在前面
tumorsample <- colnames(expr)[substr(colnames(expr),14,14) == "0"] 
normalsample <- colnames(expr)[substr(colnames(expr),14,14) == "1"] 
expr = expr[,c(tumorsample,normalsample)]
#定义需要评估的基因列表
Potential_signature <- list(pos.comp = geneList)
#有四个参数可以评估GSVA
`combined z-scores` <- gsva(as.matrix(expr), 
                              gset.idx.list = Potential_signature, 
                              method = "zscore")
`GSVA scores` <- gsva(as.matrix(expr), 
                      gset.idx.list = Potential_signature, 
                      method = "gsva")
`ssGSEA scores` <- gsva(as.matrix(expr), 
                        gset.idx.list = Potential_signature, 
                        method = "ssgsea")
`PLAGE scores` <- gsva(as.matrix(expr), 
                       gset.idx.list = Potential_signature, 
                       method = "plage")