2009-08-07

Additive vs. dominance two-allele genetic model by DIC

Two-allele model has alleles A1 and A2 and genotypes A1A1, A1A2, and A2A2. This model can be fitted using only additive effect (restricted model) or additve+dominance effect. (full model) The later model has one parameter more than the former. I was using this model lately and got weird DIC results - the full model had less number of parameters. Bellow is a simulation (using R and BUGS), that shows expected DIC results.
## Needed packages and functions
library(package="e1071")
library(package="doBy")
library(package="R2WinBUGS")

rvalue <- function(n, p, value, mean=0, sdE=0)
{
if(!is.matrix(value)) value <- as.matrix(value)

## Frequencies
k <- length(p)
q <- 1 - p
P <- p * p
Q <- q * q
H <- 2 * p * q

## Allocate matrix of arbitrary values by loci
x <- matrix(data=1, nrow=n, ncol=k)
y <- matrix(data=1, nrow=n, ncol=k)

## Simulate arbitrary values
for(i in 1:k) {
x[, i] <- rdiscrete(n=n, probs=c(P[i], H[i], Q[i]))
y[, i] <- value[x[, i], i]
}
x <- x - 1 ## convert 1:3 to 0:2, i.e., number of A1 alleles

## Sum all values
y <- rowSums(y)

## Add mean and normal deviate (error)
y <- y + rnorm(n=n, mean=mean, sd=sdE)

return(as.data.frame(cbind(y, x)))
}

prepairData <- function(x, dominance=FALSE)
{
ret <- list()
ret$n <- nrow(x)
ret$y <- x$y
if(!dominance) {
ret$x <- x$V2 - 1 ## scale to the mean
} else {
ret$x <- x$V2 + 1 ## BUGS does not allow zero indexing!
}
ret
}

### SIMULATE DATA
###-----------------------------------------------------------------------

## --- Purely additive ---

set.seed(seed=19791123)

## Model parameters
n <- 1000
mu <- 100
sigma <- 1
value <- c(-1, 0, 1)
p <- 0.5

## Simulate
podatkiA <- rvalue(n=n, p=p, value=value, mean=mu, sdE=sigma)

par(mfrow=c(2, 1))
hist(podatkiA$y, xlab="Phenotype")

plot(podatkiA$y ~ jitter(podatkiA$V2), xlab="Genotype", ylab="Phenotype")

tmpA1 <- lm(podatkiA$y ~ podatkiA$V2)
abline(coef(tmpA1), lwd="2", col="blue")
tmpA2 <- summaryBy(y ~ V2, data=podatkiA)
points(tmpA2$y.mean ~ tmpA2$V2, pch=19, col="red")

## --- Additive + Dominance ---

set.seed(seed=19791123)

## Model parameters
n <- 1000
mu <- 100
sigma <- 1
value <- c(-1, 1, -1)
p <- 0.5

## Simulate
podatkiD <- rvalue(n=n, p=p, value=value, mean=mu, sdE=sigma)

par(mfrow=c(2, 1))
hist(podatkiD$y, xlab="Phenotype")

plot(podatkiD$y ~ jitter(podatkiD$V2), xlab="Genotype", ylab="Phenotype")

tmpD1 <- lm(podatkiD$y ~ podatkiD$V2)
abline(coef(tmpD1), lwd="2", col="blue")
tmpD2 <- summaryBy(y ~ V2, data=podatkiD)
points(tmpD2$y.mean ~ tmpD2$V2, pch=19, col="red")

### ESTIMATE MODEL PARAMETERS AND DIC
###-----------------------------------------------------------------------

modelA <- function()
{
## --- Additive model (regression on gene content) ---
## Phenotypes
for(i in 1:n) {
y[i] ~ dnorm(mu[i], tau2)
mu[i] <- a + b * x[i]
}
## Location prior
a ~ dnorm(0, 1.0E-6)
b ~ dnorm(0, 1.0E-6)
## Variance prior
lsigma ~ dunif(-10, 10) ## sigma between 4.539993e-05 and 2.202647e+04
tau2 <- pow(sigma, -2)
sigma <- exp(lsigma)
}

modelD <- function()
{
## --- Additive + Dominance model (genotype as a factor) ---
## Phenotypes
for(i in 1:n) {
y[i] ~ dnorm(mu[i], tau2)
mu[i] <- g[x[i]]
}
## Location prior
g[1] ~ dnorm(0, 1.0E-6)
g[2] ~ dnorm(0, 1.0E-6)
g[3] ~ dnorm(0, 1.0E-6)
## Variance prior
lsigma ~ dunif(-10, 10) ## sigma between 4.539993e-05 and 2.202647e+04
tau2 <- pow(sigma, -2)
sigma <- exp(lsigma)
}

tmpAA <- prepairData(x=podatkiA)
tmpAD <- prepairData(x=podatkiA, dominance=TRUE)
tmpDA <- prepairData(x=podatkiD)
tmpDD <- prepairData(x=podatkiD, dominance=TRUE)

initsA <- function() list(a=rnorm(n=1, mean=mu), b=rnorm(n=1), lsigma=0)
initsD <- function() list(g=rnorm(n=3, mean=mu), lsigma=0)

(fitAA <- bugs(model=modelA, data=tmpAA, inits=initsA,
n.chains=3, n.burnin=5000, n.iter=10000, n.thin=10,
bugs.seed=19791123, parameters=c("a", "b", "sigma"),
bugs.directory="C:/Programs/BUGS/WinBUGS14_orig"))
## mean sd 2.5% 25% 50% 75% 97.5% Rhat n.eff
## a 99.9 0.0 99.9 99.9 100 100 100.0 1 1500
## b 1.0 0.0 0.9 1.0 1 1 1.1 1 1500
## sigma 1.0 0.0 1.0 1.0 1 1 1.0 1 400
## deviance 2828.3 2.4 2826.0 2826.0 2828 2829 2834.0 1 1500
## pD = 2.9 and DIC = 2831.2

(fitAD <- bugs(model=modelD, data=tmpAD, inits=initsD,
n.chains=3, n.burnin=5000, n.iter=10000, n.thin=10,
bugs.seed=19791123, parameters=c("g", "sigma"),
bugs.directory="C:/Programs/BUGS/WinBUGS14_orig"))
## mean sd 2.5% 25% 50% 75% 97.5% Rhat n.eff
## g[1] 98.9 0.1 98.8 98.9 98.9 99 99.1 1 660
## g[2] 99.9 0.0 99.8 99.9 99.9 100 100.0 1 1500
## g[3] 101.0 0.1 100.8 100.9 101.0 101 101.1 1 1500
## sigma 1.0 0.0 1.0 1.0 1.0 1 1.0 1 1300
## deviance 2829.2 2.9 2826.0 2827.0 2829.0 2831 2837.0 1 1500
## pD = 4.0 and DIC = 2833.2

## pD seem OK and DIC agrees with simulated data - it is worse
## for AD model for data following additive model

(fitDA <- bugs(model=modelA, data=tmpDA, inits=initsA,
n.chains=3, n.burnin=5000, n.iter=10000, n.thin=10,
bugs.seed=19791123, parameters=c("a", "b", "sigma"),
bugs.directory="C:/Programs/BUGS/WinBUGS14_orig"))
## mean sd 2.5% 25% 50% 75% 97.5% Rhat n.eff
## a 100.0 0.0 99.9 100.0 100.0 100.0 100.1 1 570
## b 0.0 0.1 -0.1 0.0 0.0 0.1 0.2 1 1500
## sigma 1.4 0.0 1.3 1.4 1.4 1.4 1.5 1 1300
## deviance 3512.9 2.6 3510.0 3511.0 3512.0 3514.0 3520.0 1 1500
## pD = 3.0 and DIC = 3515.9

(fitDD <- bugs(model=modelD, data=tmpDD, inits=initsD,
n.chains=3, n.burnin=5000, n.iter=10000, n.thin=10,
bugs.seed=19791123, parameters=c("g", "sigma"),
bugs.directory="C:/Programs/BUGS/WinBUGS14_orig"))
## mean sd 2.5% 25% 50% 75% 97.5% Rhat n.eff
## g[1] 98.9 0.1 98.8 98.9 99.0 99 99.1 1 1500
## g[2] 100.9 0.1 100.8 100.9 100.9 101 101.0 1 1500
## g[3] 99.0 0.1 98.8 98.9 99.0 99 99.1 1 1500
## sigma 1.0 0.0 1.0 1.0 1.0 1 1.0 1 530
## deviance 2829.2 2.9 2826.0 2827.0 2829.0 2831 2837.0 1 1500
## pD = 4.1 and DIC = 2833.3

## pD seems OK and DIC also agrees with the model --> dominance
## model is better for this dataset

No comments: