数据库代码公开:卡方检验免疫亚型

作者:半步博导 时间:2024年6月30日 03:53 阅读量:37

 
#泛癌免疫亚型以PDCD1为例
geneName = "PDCD1"
#读取基因表达数据
data = read.table(paste0("Expression.",geneName,".zscore.txt"),sep="\t",header=T,check.names=F)
#读取免疫分型文件
Subtype = read.table("Subtype_Immune_Model_Based.txt", header=T, sep="\t", check.names=F, row.names=1)
#基因表达数据与免疫分型文件的共同样本
sameSample = intersect(row.names(Subtype), row.names(data))
Subtype = Subtype[sameSample,]
Subtype=gsub(".+Immune |\\)","",Subtype)
data = data[sameSample,]
#合并基因表达数据与免疫亚型
data = cbind(Subtype, as.data.frame(data))
#观察Subtype的各个类型的数量
SubtypeTable = table(data$Subtype)
#数量超过6的Subtype的名字
SubtypeName = names(SubtypeTable[SubtypeTable>6])
#提取SubtypeName后续分析
data = data[which(data[,"Subtype"] %in% SubtypeName),]
cliName = colnames(data)[1]
colnames(data)[1] = "Subtype"
data$Expression <- factor(ifelse(data[,geneName] > median(data[,geneName]),"high","low"), levels = c("high","low"))
#计算免疫分型在高低表达组之间的数量
Subtype.groups = sort(unique(data$Subtype))
Subtype.groups.table = table(data$Subtype)
Subtype.groups.num = length(Subtype.groups)
Subtype.groups.samples.number = nrow(data)
Subtype.groups.low.number = sum(data$Expression == "low")
Subtype.groups.high.number = sum(data$Expression == "high")
Tabledata = table(data$Clinical, 
                      data$Expression)
#卡方检验,观察免疫分型在高低表达组之间的数量是否具有差异
chisq_test = chisq.test(Tabledata)
chisq_test_pvalue = chisq_test$p.value