Gregor Gorjanc (gg)

> x <- 1:10
> trimSum(x, n=5)
[1]  1  2  3  4 45
> trimSum(x, n=5, right=FALSE)
[1] 21  7  8  9 10

> x[9] <- NA
> trimSum(x, n=5)
[1]  1  2  3  4 NA
> trimSum(x, n=5, na.rm=TRUE)
[1]  1  2  3  4 36

rem --- Add RTools to the PATH ---
set PATH=c:\Programs\R\Rtools\bin;c:\Programs\R\Rtools\perl\bin;c:\Programs\R\Rtools\MinGW\bin;%PATH%

rem --- Start the Command Prompt ---
cmd
> fit <- lmer(y ~ effect1 + ....) > summary(fit)
Error: cannot allocate vector of size 130.4 Mb
In addition: There were 22 warnings (use warnings() to see them)
> warnings()
1: In slot(from, what) <- slot(value, what) ... :
Reached total allocation of 1535Mb: see help(memory.size) ...
>memory.limit()
[1] 1535.875
> memory.limit(size=1800)
> summary(fit)

\documentclass{article}
\usepackage{Sweave}
\begin{document}

<<setup>>=
n <- 10
s <- 15
@

Let us first simulate \Sexpr{n} values from a normal distribution and add a \Sexpr{s} sec pause to show the effect of caching.

<<simulate, cache=true>>=
x <- rnorm(n)
Sys.sleep(s)
@

Now print the values:

<<print, results=verbatim>>=
print(x)
@

\end{document}
Run Sweave and postprocess with LaTeX directly from the command line
- cache mode via cacheSweave R package

R version 2.8.0 (2008-10-20)
Copyright (C) 2008 The R Foundation for Statistical Computing
ISBN 3-900051-07-0

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> library(package='cacheSweave'); Sweave(file='test.Rnw', driver=cacheSweaveDriver);
Loading required package: filehash
filehash: Simple key-value database (2.0 2008-08-03)
Loading required package: stashR
A Set of Tools for Administering SHared Repositories (0.3-2 2008-04-30)
Writing to file test.tex
Processing code chunks ...
1 : echo term verbatim (label=setup)
2 : echo term verbatim (label=simulate)
3 : echo term verbatim (label=print)

You can now run LaTeX on 'test.tex'

## Sample 100 values from standard normal distribution
x <- rnorm(n=100)
## Add an outlier
x <- c(x, 100)
## Calculate the mean
mean(x)
## [1] 1.018909
## Calculate the trimmed mean
mean(x, trim=0.01)
## [1] 0.04981092 
sd.trim <- function(x, trim=0, na.rm=FALSE, ...)
{
  if(!is.numeric(x) && !is.complex(x) && !is.logical(x)) {
    warning("argument is not numeric or logical: returning NA")
    return(NA_real_)
  }
  if(na.rm) x <- x[!is.na(x)]
  if(!is.numeric(trim) || length(trim) != 1)
    stop("'trim' must be numeric of length one")
  n <- length(x)
  if(trim > 0 && n > 0) {
     if(is.complex(x)) stop("trimmed sd are not defined for complex data")
     if(trim >= 0.5) return(0)
     lo <- floor(n * trim) + 1
     hi <- n + 1 - lo
     x <- sort.int(x, partial = unique(c(lo, hi)))[lo:hi]
  }
  sd(x)
}

## Days in milk
dimMin <- 1
dimMax <- 300
dimBy <- 0.5

## Days in milk
AliSch <- function(x, c=305)
{
  cbind(x / c,
        (x / c) * (x / c),
        log(c / x),
        (log(c / x)) * (log(c / x)))
}

x <- seq(from=dimMin, to=dimMax, by=dimBy)
XInt <- rep(1, times=length(x))
XDim <- AliSch(x=x)

## Design matrix
X <- cbind(XInt, XDim)

## Parameter estimates - from one analyis in goats
int <- 2800.222
aliSch <-  c(-1678.68038, 219.29628, 53.04481, -55.26959)
b <- as.matrix(c(int, aliSch))

## Estimate
yHat <- (X %*% b) / 1000

## Plot

max(yHat)
## 2.47
yMax <- 3
yMin <- 1.5

## Open PDF
## pdf(file="lact_curve.pdf", width=5, height=3.2, pointsize=11)

par(bty="l",               # brez okvirja
    mar=c(3.2, 3, .5, .5), # robovi od osi
    mgp=c(2, 0.7, 0),      # robovi za stevilke na osi
    pty="m")               # maksimalno izkoristi povrsino

plot(x=x, y=yHat, type="l", lwd=2, ylim=c(yMin, yMax),
     axes=FALSE,
     xlab="Days in milk", ylab="Daily milk yield (kg)")
box()

text(x=0, y=yMin - 0.15, xpd=TRUE,
     labels="Parturition")

xP <- c(80, 120, 145, 180, 210, 240, 265)
points(x=xP, y=yHat[which(x %in% xP)], cex=2, pch=19)
points(x=xP, y=yHat[which(x %in% xP)], cex=3, pch=21)

text(x=200, y=yMax - 0.8, labels="Test-day records")     

abline(v=75, lwd=2, lty=2, col="gray")
arrows(x0=75 - 30, x1=75 + 30, y0=yMin + 0.005, y1=yMin + 0.005,
       lty=1, lwd=2, code=3, length=0.15, col="gray")
text(x=75, y=yMin - 0.15, xpd=TRUE,
     labels="Weaning")

text(x=30, y=yMax - 0.2, 
     labels="Suckling period")

abline(v=275, lwd=2, lty=2, col="gray")
arrows(x0=275 - 30, x1=275 + 30, y0=yMin + 0.005, y1=yMin + 0.005,
       lty=1, lwd=2, code=3, length=0.15, col="gray")
text(x=275, y=yMin - 0.15, xpd=TRUE,
     labels="Drying off")

text(x=175, y=yMax - 0.2,
     labels="Milking period")

## Close the plot
## dev.off()

## Define the mean and the standard deviation
mu <- 100
sigma <- 12

## Define the % of upper are to shade
k <- 0.05

## Define the grid
x <- (seq(-5, 5, 0.01) * sigma) + mu

## Compute the density of normal distribution over the grid
y <- dnorm(x=x, mean=mu, sd=sigma)

## Compute the "threshold" for upper K % of the distribution
t <- qnorm(p=1-k, mean=mu, sd=sigma)

## Open PDF
## pdf(file="normal.pdf", width=5, height=3, pointsize=12)

## Open Windows metafile --> good for inclusion into MS Office documents
## win.metafile(filename="normal.wmf", width=5, height=3, pointsize=12)

par(mar=c(5, 4, 1, 1) + 0.1, # c(bottom, left, top, right)
    bty="l",
    pty="m")
plot(y ~ x, type="l", axes=FALSE, 
     xlab="Standardized breeding value", 
     ylab="Distribution", lwd=3)
## Mark the mean and the "threshold"
abline(v=c(mu, t), lwd=2, lty=2)
abline(h=0, lwd=3)
axis(1)
axis(2, labels=FALSE, tick=FALSE)
box()

## Add shaded polygon
testK <- x >= t
xK <- x[testK]
yK <- y[testK]
polygon(x=c(xK, rev(xK)), y=c(yK, rep(0, times=length(xK))), 
        col="black", border=NA)

## Add arrow --> this will need some manual work, i.e. modify the values
x2 <- t * 1.1
y[which(round(x) == round(x2))[1]]
max(y)
arrows(x0=(mu + sigma * 3.3), y0=y[which(round(x) == t)[1]], 
       x1=x2, y1=y[which(round(x) == x2)[1]], lwd=2)
text(x=142, y=y[which(round(x) == t)[1]] + 0.0015, labels=paste(t, "%"))
text(x=(mu + sigma * 3.3), y=0.02, labels="Good\n(+)")
text(x=(mu - sigma * 3.3), y=0.02, labels="Bad\n(-)")

## Close the device
## dev.off()

schools.sim <-  bugs(data, inits, parameters, model.file,      n.chains=3, n.iter=5000,      bugs.directory="/Applications/WinBUGS14",      working.directory=".",      WINE="/Applications/Darwine/Wine.bundle/Contents/bin/wine",      WINEPATH="/Applications/Darwine/Wine.bundle/Contents/bin/winepath") 
## Install the DemoPopQuanGen package
tmp <- c("http://gregor.gorjanc.googlepages.com",           
         contrib.url(repos=getOption("repos"), type=getOption("pkgType"))) 
install.packages("DemoPopQuanGen", contriburl=tmp, dep=TRUE)  

## Function for simulation of genotypes and phenotypic values 
rvalue <- function(n, p, value, mean=0, sdE=0, h2=NULL, genotype=FALSE)
{   
  ## --- Setup ---    

  k <- length(p)
  q <- 1 - p
  y <- matrix(nrow=n, ncol=k)
  g <- matrix(nrow=n, ncol=k)

  ## --- Genotype frequencies under Hardy-Weinberg equilibrium ---

  P <- p * p
  Q <- q * q
  H <- 2 * p * q

  ## --- Marker effects ---    
  for (i in 1:k) {
    g[, i] <- rdiscrete(n=n, probs=c(P[i], H[i], Q[i]))
    y[, i] <- value[g[, i], i]
  }
  y <- rowSums(y)

  ## --- Residual variance based on heritability ---

  if(!is.null(h2)) {
    varG <- var(y)
    sdE <- sqrt(varG / h2 - varG)
  }

  ## --- Phenotype ---

  if(genotype) {
    y <- y - mean(y)
    ret <- cbind(y + rnorm(n=n, mean=mean, sd=sdE), g - 1)
    colnames(ret) <- c("y", paste("g", 1:k, sep=""))
  } else {
    ret <- y
  }
  ret
}

## Set number of phenotyped and genotyped individuals 
n <- 2000  
## Set number of loci 
nLoci <- 1000  

## Simulate loci properties, see ?rvalueSetup 
tmp <- rvalueSetup(nLoci=nLoci, p=runif(n=nLoci, min=0.05, max=0.95))  

## Simulate genotypes and phenotypes, see ?rvalue 
sim <- rvalue(n=n, p=tmp$p, value=tmp$value, mean=100, h2=0.5, genotype=TRUE)  

## Single marker regression 
b <- se <- p_value <- p2t <- numeric(length=nLoci) 
for(i in 1:nLoci) {
  fit <- lm(sim[, 1] ~ sim[, i+1])
  p2t[i] <- mean(sim[, i+1]) / 2
  b[i] <- coef(fit)[2]
  se[i] <- vcov(fit)[2, 2]
  p_value[i] <- summary(fit)$coefficients[2, 4]
}

a <- tmp$value[3, ]
d <- tmp$value[2, ]
p2 <- tmp$p
p1 <- 1 - p2  
p1t <- 1 - p2t

## Plots 
pdf("marker_effects.pdf", version="1.4") 
par(mfrow=c(2, 2), bty="l",
    mar=c(2.7, 3.0, 1, 1) + .1,
    mgp=c(1.6, 0.55, 0),
    pty="m")  

## Marker effect estimates vs true marker effect (a)
plot(b ~ a,
     xlab="Marker effect - a",
     ylab="Marker effect estimate",
     col=rgb(0, 0, 1, 0.2), pch=16)

## Marker effect estimates vs allele frequency
plot(b ~ p1t, xlim=c(0, 1),
     xlab="Allele frequency",
     ylab="Marker effect estimate", col=rgb(0, 0, 1, 0.2), pch=16)

## Marker effect estimates vs dominance deviation (d)
plot(b ~ d,
     xlab="Dominance deviation - d",
     ylab="Marker effect estimate", col=rgb(0, 0, 1, 0.2), pch=16)

## Marker effect estimates vs "allele frequency and dominance deviation" 
z <- d * (p2t - p1t)
plot(b ~ z,
     xlab="d * (p_a - p_A)",
     ylab="Marker effect estimate", col=rgb(0, 0, 1, 0.2), pch=16)
dev.off()
#% Do not delete the line below; configure depends on this
#  \DeclareLaTeXClass[beamer, Sweave.sty]{beamer (beamer Sweave noweb)}
#
# This is a copy of literate-article.layout from LyX, but changed for
# Sweave - NoWeb syntax:
#  - changed noweb.sty to Sweave.sty
#  - moved preamble to literate-scrap.inc

Format 2
Input beamer.layout
Input literate-scrap.inc

\begin{frame}[containsverbatim]
\frametitle{Slide title}

\end{frame}

UPDATE jos_users SET password='MD5HASH' WHERE id='???';
FLUSH PRIVILEGES;

UPDATE jos_users SET password=PASSWORD('???') WHERE id='???';
FLUSH PRIVILEGES;

proc mixed data=podatkiratio;
class genotype litter eage sex type year month herd hys;
model bw = genotype litter eage sex type year month;
random herd hys;
run;
quit;

lmer(bw ~ genotype + litter + eage + sex + type +
         year + month +
         (1|herd) + (1|hys),
         data=podatki)

Linear mixed-effects model fit by REML
Formula: bw ~ genotype + litter + eage + sex + type + year + month + (1|herd)  (1|hys)
  Data: podatki
   AIC    BIC logLik MLdeviance REMLdeviance
152403 152899 -76149     152011       152297
Random effects:
Groups   Name        Variance Std.Dev.
herd     (Intercept) 0.313    0.559  
hys      (Intercept) 0.147    0.383  
Residual             0.297    0.545  
number of obs: 85035, groups: herd, 346; hys, 9653

Fixed effects:
...
## genotype, 13 levels
## litter, 3 levels
## eage, 9 levels
## sex, 2 levels
## type, 2 levels
## year, 16 levels
## month, 12 levels

## Are there any articles about Bioconductor in PubMed?  
(search <- entrezSearch("Bioconductor[tiab]"))

## Get counts by year in the period 2000:2008
count <- entrezCountByYear(term="Bioconductor[tiab]", dp=2000:2008)

## Plot counts  
plot(count, typ="l", xlab="Year",
     ylab="Number of publications in PubMed") 
## Install the package
install.packages(pkg="vcd")

## Load the package
library(package="vcd")

## Check the vignette
vignette(topic="hcl-colors", package="vcd")
## --> this will open a PDF file with the code and examples

> library(Matrix)
> tmp <- factor(rep(letters[1:3], each=4))  
[1] a a a a b b b b c c c c 
Levels: a b c 
> (x <- as(tmp, "sparseMatrix")) 
3 x 12 sparse Matrix of class "dgCMatrix"                        
a 1 1 1 1 . . . . . . . . 
b . . . . 1 1 1 1 . . . . 
c . . . . . . . . 1 1 1 1 
> tcrossprod(x)
3 x 3 sparse Matrix of class "dsCMatrix"
a b c
a 4 . .
b . 4 .
c . . 4

i <- as.integer(c(1, 2, 3))
j <- as.integer(c(1, 2, 3))
x <- c(4, 4, 4)
new("dsTMatrix", Dim=c(3L, 3L), i=(i - 1L), j=(j - 1L), x=x)

2008-12-31

2008-12-29

2008-12-28

2008-12-27

2008-12-25

2008-12-23

2008-12-20

2008-12-19

2008-12-17

2008-12-15

2008-12-13

2008-12-11

2008-11-26

2008-11-19

2008-11-17

2008-11-15

2008-11-08

2008-11-06

2008-11-02

2008-11-01

2008-10-22

2008-10-14

2008-10-10

2008-10-08

2008-10-07

2008-10-02

2008-10-01

2008-09-27

2008-09-19

2008-09-16

2008-09-15

2008-09-14

2008-09-10

2008-09-09

2008-09-03

2008-08-28

2008-08-26

2008-08-25

2008-08-24

2008-08-20

2008-08-11

2008-08-05

2008-07-29

2008-07-28

2008-07-25

2008-07-24

2008-07-22

2008-07-18

2008-07-15