2009-08-26

EAAP Talk: Inference of genotype probabilities and derived statistics for PrP locus in sheep

I had a talk (see bellow) today at 60th EAAP meeting in Barcelona at the Sheep and Goats free communications session. Details of the talk are in the paper (a shortened version will appear on the EAAP site in the following days, but you might prefer the long version instead). I got some nice responses which is great. I have to mention that EAAP granted me a scholarship to present our results. This is greatly appreciated. I will post more about the trip to Barcelona in few days.
Inference of genotype probabilities and derived statistics for PrP locus in sheep

2009-08-08

What is so civil about the war, anyway?

I am a fan of Guns & Roses band. Well, this dates back to being a teenager, but I still like their songs. Today I was in mood and listen to a bunch of their songs, and it only occured to me now about the meaning of what Axl says at the end of "Ciwili war" song. He says. "What is so civil about the war, anyway?". Good point!

Open access

PhD comics has a very nice comic on "Nature vs Scince vs Open Access". It is a nice comic that points out some problems with current system of publishig our work.

2009-08-07

Additive vs. dominance two-allele genetic model by DIC

Two-allele model has alleles A1 and A2 and genotypes A1A1, A1A2, and A2A2. This model can be fitted using only additive effect (restricted model) or additve+dominance effect. (full model) The later model has one parameter more than the former. I was using this model lately and got weird DIC results - the full model had less number of parameters. Bellow is a simulation (using R and BUGS), that shows expected DIC results.
## Needed packages and functions
library(package="e1071")
library(package="doBy")
library(package="R2WinBUGS")

rvalue <- function(n, p, value, mean=0, sdE=0)
{
if(!is.matrix(value)) value <- as.matrix(value)

## Frequencies
k <- length(p)
q <- 1 - p
P <- p * p
Q <- q * q
H <- 2 * p * q

## Allocate matrix of arbitrary values by loci
x <- matrix(data=1, nrow=n, ncol=k)
y <- matrix(data=1, nrow=n, ncol=k)

## Simulate arbitrary values
for(i in 1:k) {
x[, i] <- rdiscrete(n=n, probs=c(P[i], H[i], Q[i]))
y[, i] <- value[x[, i], i]
}
x <- x - 1 ## convert 1:3 to 0:2, i.e., number of A1 alleles

## Sum all values
y <- rowSums(y)

## Add mean and normal deviate (error)
y <- y + rnorm(n=n, mean=mean, sd=sdE)

return(as.data.frame(cbind(y, x)))
}

prepairData <- function(x, dominance=FALSE)
{
ret <- list()
ret$n <- nrow(x)
ret$y <- x$y
if(!dominance) {
ret$x <- x$V2 - 1 ## scale to the mean
} else {
ret$x <- x$V2 + 1 ## BUGS does not allow zero indexing!
}
ret
}

### SIMULATE DATA
###-----------------------------------------------------------------------

## --- Purely additive ---

set.seed(seed=19791123)

## Model parameters
n <- 1000
mu <- 100
sigma <- 1
value <- c(-1, 0, 1)
p <- 0.5

## Simulate
podatkiA <- rvalue(n=n, p=p, value=value, mean=mu, sdE=sigma)

par(mfrow=c(2, 1))
hist(podatkiA$y, xlab="Phenotype")

plot(podatkiA$y ~ jitter(podatkiA$V2), xlab="Genotype", ylab="Phenotype")

tmpA1 <- lm(podatkiA$y ~ podatkiA$V2)
abline(coef(tmpA1), lwd="2", col="blue")
tmpA2 <- summaryBy(y ~ V2, data=podatkiA)
points(tmpA2$y.mean ~ tmpA2$V2, pch=19, col="red")

## --- Additive + Dominance ---

set.seed(seed=19791123)

## Model parameters
n <- 1000
mu <- 100
sigma <- 1
value <- c(-1, 1, -1)
p <- 0.5

## Simulate
podatkiD <- rvalue(n=n, p=p, value=value, mean=mu, sdE=sigma)

par(mfrow=c(2, 1))
hist(podatkiD$y, xlab="Phenotype")

plot(podatkiD$y ~ jitter(podatkiD$V2), xlab="Genotype", ylab="Phenotype")

tmpD1 <- lm(podatkiD$y ~ podatkiD$V2)
abline(coef(tmpD1), lwd="2", col="blue")
tmpD2 <- summaryBy(y ~ V2, data=podatkiD)
points(tmpD2$y.mean ~ tmpD2$V2, pch=19, col="red")

### ESTIMATE MODEL PARAMETERS AND DIC
###-----------------------------------------------------------------------

modelA <- function()
{
## --- Additive model (regression on gene content) ---
## Phenotypes
for(i in 1:n) {
y[i] ~ dnorm(mu[i], tau2)
mu[i] <- a + b * x[i]
}
## Location prior
a ~ dnorm(0, 1.0E-6)
b ~ dnorm(0, 1.0E-6)
## Variance prior
lsigma ~ dunif(-10, 10) ## sigma between 4.539993e-05 and 2.202647e+04
tau2 <- pow(sigma, -2)
sigma <- exp(lsigma)
}

modelD <- function()
{
## --- Additive + Dominance model (genotype as a factor) ---
## Phenotypes
for(i in 1:n) {
y[i] ~ dnorm(mu[i], tau2)
mu[i] <- g[x[i]]
}
## Location prior
g[1] ~ dnorm(0, 1.0E-6)
g[2] ~ dnorm(0, 1.0E-6)
g[3] ~ dnorm(0, 1.0E-6)
## Variance prior
lsigma ~ dunif(-10, 10) ## sigma between 4.539993e-05 and 2.202647e+04
tau2 <- pow(sigma, -2)
sigma <- exp(lsigma)
}

tmpAA <- prepairData(x=podatkiA)
tmpAD <- prepairData(x=podatkiA, dominance=TRUE)
tmpDA <- prepairData(x=podatkiD)
tmpDD <- prepairData(x=podatkiD, dominance=TRUE)

initsA <- function() list(a=rnorm(n=1, mean=mu), b=rnorm(n=1), lsigma=0)
initsD <- function() list(g=rnorm(n=3, mean=mu), lsigma=0)

(fitAA <- bugs(model=modelA, data=tmpAA, inits=initsA,
n.chains=3, n.burnin=5000, n.iter=10000, n.thin=10,
bugs.seed=19791123, parameters=c("a", "b", "sigma"),
bugs.directory="C:/Programs/BUGS/WinBUGS14_orig"))
## mean sd 2.5% 25% 50% 75% 97.5% Rhat n.eff
## a 99.9 0.0 99.9 99.9 100 100 100.0 1 1500
## b 1.0 0.0 0.9 1.0 1 1 1.1 1 1500
## sigma 1.0 0.0 1.0 1.0 1 1 1.0 1 400
## deviance 2828.3 2.4 2826.0 2826.0 2828 2829 2834.0 1 1500
## pD = 2.9 and DIC = 2831.2

(fitAD <- bugs(model=modelD, data=tmpAD, inits=initsD,
n.chains=3, n.burnin=5000, n.iter=10000, n.thin=10,
bugs.seed=19791123, parameters=c("g", "sigma"),
bugs.directory="C:/Programs/BUGS/WinBUGS14_orig"))
## mean sd 2.5% 25% 50% 75% 97.5% Rhat n.eff
## g[1] 98.9 0.1 98.8 98.9 98.9 99 99.1 1 660
## g[2] 99.9 0.0 99.8 99.9 99.9 100 100.0 1 1500
## g[3] 101.0 0.1 100.8 100.9 101.0 101 101.1 1 1500
## sigma 1.0 0.0 1.0 1.0 1.0 1 1.0 1 1300
## deviance 2829.2 2.9 2826.0 2827.0 2829.0 2831 2837.0 1 1500
## pD = 4.0 and DIC = 2833.2

## pD seem OK and DIC agrees with simulated data - it is worse
## for AD model for data following additive model

(fitDA <- bugs(model=modelA, data=tmpDA, inits=initsA,
n.chains=3, n.burnin=5000, n.iter=10000, n.thin=10,
bugs.seed=19791123, parameters=c("a", "b", "sigma"),
bugs.directory="C:/Programs/BUGS/WinBUGS14_orig"))
## mean sd 2.5% 25% 50% 75% 97.5% Rhat n.eff
## a 100.0 0.0 99.9 100.0 100.0 100.0 100.1 1 570
## b 0.0 0.1 -0.1 0.0 0.0 0.1 0.2 1 1500
## sigma 1.4 0.0 1.3 1.4 1.4 1.4 1.5 1 1300
## deviance 3512.9 2.6 3510.0 3511.0 3512.0 3514.0 3520.0 1 1500
## pD = 3.0 and DIC = 3515.9

(fitDD <- bugs(model=modelD, data=tmpDD, inits=initsD,
n.chains=3, n.burnin=5000, n.iter=10000, n.thin=10,
bugs.seed=19791123, parameters=c("g", "sigma"),
bugs.directory="C:/Programs/BUGS/WinBUGS14_orig"))
## mean sd 2.5% 25% 50% 75% 97.5% Rhat n.eff
## g[1] 98.9 0.1 98.8 98.9 99.0 99 99.1 1 1500
## g[2] 100.9 0.1 100.8 100.9 100.9 101 101.0 1 1500
## g[3] 99.0 0.1 98.8 98.9 99.0 99 99.1 1 1500
## sigma 1.0 0.0 1.0 1.0 1.0 1 1.0 1 530
## deviance 2829.2 2.9 2826.0 2827.0 2829.0 2831 2837.0 1 1500
## pD = 4.1 and DIC = 2833.3

## pD seems OK and DIC also agrees with the model --> dominance
## model is better for this dataset