数据库代码公开:limma包差异分析基因高表达与低表达组差异基因,以中位值为cutoff

作者:半步博导 时间:2024年6月29日 22:20 阅读量:63

 
CancerName = "LUAD"
geneName = "MKI67"
#引用R包
suppressPackageStartupMessages(library(limma))
#读取表达输入文件,并对输入文件处理
data = read.table(paste0("/mnt/ExpBig/","Expression.Firehose.",CancerName,".txt"),sep="\t",header=TRUE,check.names=FALSE)
data = as.matrix(data)
rownames(data) = data[,1]
data = data[,-1]
class(data) = "numeric"
#只提取肿瘤样本进行分析
tumorsample = colnames(data)[substr(colnames(data),14,14) == "0"]
data = data[,tumorsample]
#对表达谱进行重新整理,把低表达的样本放在前面
Low = data[,data[geneName,] <= median(data[geneName,]),drop=F]
High = data[,data[geneName,] > median(data[geneName,]),drop=F]
data = cbind(Low,High)
LowSampleNumber = ncol(Low)
HighSampleNumber = ncol(High)
#设计比较组
Type = c(rep("con",LowSampleNumber),#低表达组命名为con,意为对照组
         rep("treat",HighSampleNumber)#低表达组命名为treat,意为训练组
)
design <- model.matrix(~0 + factor(Type))
colnames(design) <- levels(factor(Type))
rownames(design) <- colnames(data)
design
colnames(Low)
colnames(High)
contrast.matrix <- makeContrasts(treat-con, 
                                 levels = design)
fit <- lmFit(data, design)
fit2 <- contrasts.fit(fit, contrast.matrix)
fit2 <- eBayes(fit2)
x <- topTable(fit2, coef = 1, 
              n = Inf, 
              adjust.method = "BH", 
              sort.by = "P")
head(x)
write.csv(x, "TCGA_Limma_results.csv", quote = F)