gene27434.3774925.586773208.02683.488383e-264.513967e-23 gene120324.7343835.098148192.93784.359649e-254.231040e-22 gene491-2.73391010.412673190.98396.104188e-254.739291e-22 gene89412.9971856.839106177.76146.332836e-244.097345e-21 gene2611-2.8469247.216173174.73321.099339e-236.096619e-21 gene62422.5291259.897771169.26583.022914e-231.466869e-20 gene72523.7323156.137670188.20943.890569e-231.678132e-20 gene61252.8754236.569935160.31891.656083e-226.428914e-20
查看差异表达基因原始的 CMP
>top<-rownames(topTags(qlf)) >cpm(y)[top,]
CA_1CA_2CA_3CC_1CC_2CC_3
gene70241711.3830021405.8618991480.12111533.1141837.1604029.62696 gene661217.55864912.10384826.585753403.99298582.457961044.35046 gene27434.6823061.8155775.96823062.9169487.26431114.34156 gene120321.7558652.4207702.71283265.6764647.5987275.45617
gene4912811.1397272059.4696692222.351938444.83381385.38258253.68087 gene894123.99682024.81288824.415488131.35291244.67410225.90560 gene2611245.821088310.463691225.16505243.0484326.3045539.81123 gene6242231.188880299.570228298.4115151348.298991343.619882191.93237 gene72529.36461313.3142325.42566492.71970108.55847181.92807 gene612523.41153214.52461729.841152145.70239160.75005185.16852
查看上调和下调基因的数目
>summary(dt<-decideTestsDGE(qlf)) [,1] -1536 02793 1553
挑选出差异表达基因的名字
>isDE<-as.logical(dt) >DEnames<-rownames(y)[isDE] >head(DEnames)
[1]\
差异表达基因画图
>plotSmear(qlf,de.tags=DEnames) >abline(h=c(-1,1),col=\)
DESeq2 包的安装
?
## try http:// if https:// URLs are not supported >source(\)
安装:
>biocLite(\)
数据导入
导入count 矩阵,导入数据的方式很多这里直接导入 count 矩阵 ? count 结果如下:
?
library(DESeq2)
sampleNames<-c(\,\,\,\,\,\) mydata<-read.table(\,header=TRUE,quote='\\t',skip=1) names(mydata)[7:12]<-sampleNames countMatrix<-as.matrix(mydata[7:12]) rownames(countMatrix)<-mydata$Geneid
table2<-data.frame(name=c(\,\,\,\,\,\),condition=(\,\,\,\,\,\)) rownames(table2)<-sampleNames head(countMatrix)
CA_1CA_2CA_3CC_1CC_2CC_3 gene1314000000 gene1315000000 gene1316000000 gene1317000000 gene1318000000 gene1319000000
?
把 count 矩阵转化为 DESeq2 的数据格式
>dds<-DESeqDataSetFromMatrix(countMatrix,colData=table2,design=~condition) >dds
class:DESeqDataSet dim:142176 metadata(0): assays(1):counts
rownames(14217):gene1314gene1315...gene6710gene6709 rowRangesmetadatacolumnnames(0): colnames(6):CA_1CA_2...CC_2CC_3 colDatanames(2):namecondition
过滤
?
过滤掉那些 count 结果都为 0 的数据,这些没有表达的基因对结果的分析没有用
dds<-dds[rowSums(counts(dds))>1,] dds
class:DESeqDataSet dim:41906 metadata(0): assays(1):counts
rownames(4190):gene1321gene1322...gene6712gene6710 rowRangesmetadatacolumnnames(0): colnames(6):CA_1CA_2...CC_2CC_3 colDatanames(2):namecondition
PCA分析
rld<-rlog(dds)
plotPCA(rld,intgroup=c(\,\))
?
当然也可以使用 ggplot2 来画 PCA 图
library(ggplot2) rld <- rlog(dds)
data <- plotPCA(rld, intgroup=c(\percentVar <- round(100 * attr(data, \
p<- ggplot(data, aes(PC1, PC2, color=condition, shape=name)) + geom_point(size=3) +
xlab(paste0(\ylab(paste0(\p
?
注意在进行 PCA 分析前不要 library(DESeq) 否则无法进行 PCA 分析