Bioinformatics Asked on April 4, 2021
I am working with a RNA-seq data set in maize that has a relatively complex design. There are two levels of treatment A (nitrogen fertilizer level in the field, high or low), two levels of treatment B (nitrogen nutrients in in vitro cultures, high and low) and two levels of treatment C (two time points of sampling), all with 3 reps.
> library(edgeR)
> load("KC_Raw.RData")
> y <- DGEList(counts = KCraw.data[,2:25])
> keep <- rowSums(cpm(y) > 10) >= 3
> targets <- data.frame(rownames=colnames(KCraw.data)[2:25] ,
+ Time=rep(c(rep("2DIC",12),rep("5DIC",12))) ,
+ FieldN=rep(c(rep("FH",6), rep("FL",6)),2) ,
+ CultureN=rep(c(rep("CL",3),rep("CH",3)),4))
> Group <- factor(paste(targets$FieldN,targets$Time,targets$CultureN,sep="."))
> targets <- cbind(targets,Group=Group)
> targets
rownames Time FieldN CultureN Group
1 KC1_H2L 2DIC FH CL FH.2DIC.CL
2 KC2_H2L 2DIC FH CL FH.2DIC.CL
3 KC3_H2L 2DIC FH CL FH.2DIC.CL
4 KC4_H2H 2DIC FH CH FH.2DIC.CH
5 KC5_H2H 2DIC FH CH FH.2DIC.CH
6 KC6_H2H 2DIC FH CH FH.2DIC.CH
7 KC7_L2L 2DIC FL CL FL.2DIC.CL
8 KC8_L2L 2DIC FL CL FL.2DIC.CL
9 KC9_L2L 2DIC FL CL FL.2DIC.CL
10 KC10_L2H 2DIC FL CH FL.2DIC.CH
11 KC11_L2H 2DIC FL CH FL.2DIC.CH
12 KC12_L2H 2DIC FL CH FL.2DIC.CH
13 KC13_H5L 5DIC FH CL FH.5DIC.CL
14 KC14_H5L 5DIC FH CL FH.5DIC.CL
15 KC15_H5L 5DIC FH CL FH.5DIC.CL
16 KC16_H5H 5DIC FH CH FH.5DIC.CH
17 KC17_H5H 5DIC FH CH FH.5DIC.CH
18 KC18_H5H 5DIC FH CH FH.5DIC.CH
19 KC19_L5L 5DIC FL CL FL.5DIC.CL
20 KC20_L5L 5DIC FL CL FL.5DIC.CL
21 KC21_L5L 5DIC FL CL FL.5DIC.CL
22 KC22_L5H 5DIC FL CH FL.5DIC.CH
23 KC23_L5H 5DIC FL CH FL.5DIC.CH
24 KC24_L5H 5DIC FL CH FL.5DIC.CH
I have used edgeR in R to calculate differential expression for contrasts involving 3 reps at one treatment combination to 3 reps at another treatment combination, for example
> y <- DGEList(counts = KCraw.data[keep,2:25], group = Group)
> y <- calcNormFactors(y)
>
> TMM <- KCraw.data[keep,2:25]
> for (i in 1:24) {
+ TMM[,i] <- TMM[,i] / (y$samples$lib.size[i] * y$samples$norm.factors[i]) * 1e6
+ }
>
> y <- DGEList(counts = TMM,group = Group)
>
> design <- model.matrix(~0+Group)
> colnames(design) <- levels(Group)
> y <- calcNormFactors(y,method = "TMM")
> y <- estimateDisp(y,design)
> fitQL <- glmQLFit(y,design)
> fit <- glmFit(y,design)
> myKC.contrasts <- makeContrasts(
+ H2H.H2L = FH.2DIC.CH - FH.2DIC.CL,
+ L2H.L2L = FL.2DIC.CH - FL.2DIC.CL,
+ H2H.L2H = FH.2DIC.CH - FL.2DIC.CH,
+ H2L.L2L = FH.2DIC.CL - FL.2DIC.CL,
+ H5H.H5L = FH.5DIC.CH - FH.5DIC.CL,
+ L5H.L5L = FL.5DIC.CH - FL.5DIC.CL,
+ H5H.L5H = FH.5DIC.CH - FL.5DIC.CH,
+ H5L.L5L = FH.5DIC.CL - FL.5DIC.CL,
+ H2H.L2L = FH.2DIC.CH - FL.2DIC.CL,
+ H5H.L5L = FH.5DIC.CH - FL.5DIC.CL,
+ H5L.H2L = FH.5DIC.CL - FH.2DIC.CL,
+ H5H.H2H = FH.5DIC.CH - FH.2DIC.CH,
+ L5L.L2L = FL.5DIC.CL - FL.2DIC.CL,
+ L5H.L2H = FL.5DIC.CH - FL.2DIC.CH,
+ levels=design)
> design
FH.2DIC.CH FH.2DIC.CL FH.5DIC.CH FH.5DIC.CL FL.2DIC.CH FL.2DIC.CL FL.5DIC.CH FL.5DIC.CL
1 0 1 0 0 0 0 0 0
2 0 1 0 0 0 0 0 0
3 0 1 0 0 0 0 0 0
4 1 0 0 0 0 0 0 0
5 1 0 0 0 0 0 0 0
6 1 0 0 0 0 0 0 0
7 0 0 0 0 0 1 0 0
8 0 0 0 0 0 1 0 0
9 0 0 0 0 0 1 0 0
10 0 0 0 0 1 0 0 0
11 0 0 0 0 1 0 0 0
12 0 0 0 0 1 0 0 0
13 0 0 0 1 0 0 0 0
14 0 0 0 1 0 0 0 0
15 0 0 0 1 0 0 0 0
16 0 0 1 0 0 0 0 0
17 0 0 1 0 0 0 0 0
18 0 0 1 0 0 0 0 0
19 0 0 0 0 0 0 0 1
20 0 0 0 0 0 0 0 1
21 0 0 0 0 0 0 0 1
22 0 0 0 0 0 0 1 0
23 0 0 0 0 0 0 1 0
24 0 0 0 0 0 0 1 0
attr(,"assign")
[1] 1 1 1 1 1 1 1 1
attr(,"contrasts")
attr(,"contrasts")$Group
[1] "contr.treatment"
> myKC.contrasts
Contrasts
Levels H2H.H2L L2H.L2L H2H.L2H H2L.L2L H5H.H5L L5H.L5L H5H.L5H H5L.L5L H2H.L2L H5H.L5L H5L.H2L H5H.H2H L5L.L2L
FH.2DIC.CH 1 0 1 0 0 0 0 0 1 0 0 -1 0
FH.2DIC.CL -1 0 0 1 0 0 0 0 0 0 -1 0 0
FH.5DIC.CH 0 0 0 0 1 0 1 0 0 1 0 1 0
FH.5DIC.CL 0 0 0 0 -1 0 0 1 0 0 1 0 0
FL.2DIC.CH 0 1 -1 0 0 0 0 0 0 0 0 0 0
FL.2DIC.CL 0 -1 0 -1 0 0 0 0 -1 0 0 0 -1
FL.5DIC.CH 0 0 0 0 0 1 -1 0 0 0 0 0 0
FL.5DIC.CL 0 0 0 0 0 -1 0 -1 0 -1 0 0 1
Contrasts
Levels L5H.L2H
FH.2DIC.CH 0
FH.2DIC.CL 0
FH.5DIC.CH 0
FH.5DIC.CL 0
FL.2DIC.CH -1
FL.2DIC.CL 0
FL.5DIC.CH 1
FL.5DIC.CL 0
After analyzing these contrasts, I wanted to estimate some sort of simple effect, such as the culture media nitrogen level. To do this, I ran the following code.
> myKC.contrasts <- cbind(myKC.contrasts,
+ Development = c(1,1,-1,-1,1,1,-1,-1),
+ FieldN = c(1,1,1,1,-1,-1,-1,-1),
+ CultureN = c(1,-1,1,-1,1,-1,1,-1)
+ )
> myKC.contrasts
H2H.H2L L2H.L2L H2H.L2H H2L.L2L H5H.H5L L5H.L5L H5H.L5H H5L.L5L H2H.L2L H5H.L5L H5L.H2L H5H.H2H L5L.L2L
FH.2DIC.CH 1 0 1 0 0 0 0 0 1 0 0 -1 0
FH.2DIC.CL -1 0 0 1 0 0 0 0 0 0 -1 0 0
FH.5DIC.CH 0 0 0 0 1 0 1 0 0 1 0 1 0
FH.5DIC.CL 0 0 0 0 -1 0 0 1 0 0 1 0 0
FL.2DIC.CH 0 1 -1 0 0 0 0 0 0 0 0 0 0
FL.2DIC.CL 0 -1 0 -1 0 0 0 0 -1 0 0 0 -1
FL.5DIC.CH 0 0 0 0 0 1 -1 0 0 0 0 0 0
FL.5DIC.CL 0 0 0 0 0 -1 0 -1 0 -1 0 0 1
L5H.L2H Development FieldN CultureN
FH.2DIC.CH 0 1 1 1
FH.2DIC.CL 0 1 1 -1
FH.5DIC.CH 0 -1 1 1
FH.5DIC.CL 0 -1 1 -1
FL.2DIC.CH -1 1 -1 1
FL.2DIC.CL 0 1 -1 -1
FL.5DIC.CH 1 -1 -1 1
FL.5DIC.CL 0 -1 -1 -1
Once I rerun the analysis for the CultureN contrast and look at the result for a particular gene, I see that it’s estimated log2FC is equal to the sum of every simple contrast.
> lrt <- glmQLFTest(fitQL, contrast=myKC.contrasts[,"CultureN"])
> topTags(lrt,n=nrow(y$counts))["GRMZM2G445575",]
Coefficient: 1*FH.2DIC.CH -1*FH.2DIC.CL 1*FH.5DIC.CH -1*FH.5DIC.CL 1*FL.2DIC.CH -1*FL.2DIC.CL 1*FL.5DIC.CH -1*FL.5DIC.CL
logFC logCPM F PValue FDR
GRMZM2G445575 -6.63617 5.417106 151.5261 3.691525e-11 2.825777e-08
# FC is a data frame of the logFC of each constrast in columns for each gene in rows
> sum(FC["GRMZM2G445575",c("H2H.H2L","L2H.L2L","H5H.H5L","L5H.L5L")])
[1] -6.636197
My first question is if this analysis is a valid way of summarizing the simple effects of each treatment.
I would like to be able to also include the effects of the H2H.L2L and H5H.L5L contrast in the FieldN and CultureN comparison, but I am not sure how to do this, or if this would be valid because each of these contrasts includes treatments that have different levels of two treatment factors.
I think the problem is in the design. There is no room for error or variation from a common ground.
I'm not sure of the solution, but I think you'll need to delete a column to give the design a degree of freedom, and probably add the intercept. You'll need to adjust the contrasts accordingly.
However, I would recommend to ask this at support.bioconductor.org. There are more experts on linear modelling and contrasts there than me. (If you ask there make it easier to copy and paste your code)
Answered by llrs on April 4, 2021
Get help from others!
Recent Questions
Recent Answers
© 2024 TransWikia.com. All rights reserved. Sites we Love: PCI Database, UKBizDB, Menu Kuliner, Sharing RPP