It is quite some time since I downloaded the CFC program from

here, see also

here. However, I only tried to use it recently and found out that is quite handy. The program allows user to calculate:

- inbreeding coefficients,
- relationships (numerator or the "whole" relationship coefficient),
- ancestral decomposition of inbreeding coefficients,
- ancestral decomposition of the average coancestry,
- optimization of matings to minimize the average inbreeding in the next generation,
- probabilities of gene origin,
- numerator relationship matrix, its decomposition and its inverse, and

- recode the identifications from 1 to n (number of all individuals) in such a way that ancestors appear always before descendants.

A nice list of possible tasks for anyone working in genetics. It's GUI is very easy to use. It is a bit clumsy if you want to import the output to other program, say

R. Therefore, I wrote two read.* functions that can import the output of numerator relationship matrix and its inverse into R.

read.CFCSparseA <- function(file, fileF=NULL, x, id, idI, out="dsCMatrix")

{

## ToDo: wrap into few lines - does it make any difference

## Read the numerator relationship matrix (A) from the CFC program

## - output in sparse format (triplets)

##

## file - character, name of the file with sparse matrix output from the CFC

## program (see bellow) non-inbred

## fileF - character, name of the file with inbreeding coefficients (see bellow)

## if NULL individuals are assumed

## x - data.frame, a pedigree with columns id (original identification) and

## idI (integer recoded identification)

## id - character, column name with original identification

## idI - character, column name with integer recoded identification

## out - character, "format" of the output: "triplet" (a data.frame) or

## "matrix" (a dsCMatrix)

##

## The file should contain only lines with the matrix elements i.e. without

## the header and footer lines. Edit the original output file to get file

## like this one (the first column is original subject identification, the

## second column is original subject identification, and the third column is

## non-zero matrix element):

##

## C A 0.5

## C B 0.5

## E A 0.25

## E B 0.25

## E D 0.5

## E C 0.5

##

## The fileF should have two columns: the first with original identification

## and the second with inbreeding coefficient of the individual:

##

## E 0.25

## F 0.05

##

require(Matrix)

## --- Check ---

## id and idI must be in the pedigree

tmp <- c(id, idI)

if(sum(tmp %in% names(x)) < 2) {

stop("wrong specification of id and/or idI column(s)")

}

## Output

tmp <- c("triplet", "dsCMatrix")

if(any(!(out %in% tmp))) {

stop(paste("out must be one of:", paste(tmp, collapse=", ")))

}

## Number of individuals

n <- as.integer(nrow(x))

## --- Import the datafile ---

tmp <- read.table(file=file)

## --- Match recoded identifications ---

tmp <- merge(tmp, x[, c(id, idI)], by.x="V1", by.y=id)

names(tmp)[names(tmp) %in% idI] <- "i"

tmp <- merge(tmp, x[, c(id, idI)], by.x="V2", by.y=id)

names(tmp)[names(tmp) %in% idI] <- "j"

names(tmp)[3] <- "x"

tmp[, 1] <- NULL

tmp[, 1] <- NULL

## --- Add diagonals = 1 + f ---

if(is.null(fileF)) { ## Assume that there is no inbreeding: diag(A) = 1

inbCoef <- x[, idI, drop=FALSE]

names(inbCoef) <- "i"

inbCoef$j <- inbCoef$i

inbCoef$x <- 1

tmp <- rbind(inbCoef[, c("x", "i", "j")], tmp)

} else { ## Get inbreeding coefficients: diag(A) = 1 + F

inbCoef <- read.table(file=fileF)[, 1:2]

names(inbCoef) <- c(id, "F")

inbCoef <- merge(inbCoef, x[, c(id, idI)], by.x=id, by.y=id, all.y=TRUE)

inbCoef[is.na(inbCoef[, "F"]), "F"] <- 0

inbCoef$j <- inbCoef[, idI]

inbCoef$F <- 1 + inbCoef[, "F"]

inbCoef[, 1] <- NULL

names(inbCoef) <- c("x", "i", "j")

tmp <- rbind(inbCoef, tmp)

}

## --- Return ---

if(out == "triplet") { ## Triplets: i, j, x

tmp[, c("i", "j", "x")]

} else { ## Symetric sparse matrix

as(new("dsTMatrix", Dim=c(n, n), uplo="L",

i=(tmp$i - 1L), j=(tmp$j - 1L), x=tmp$x), "dsCMatrix")

}

}

read.CFCSparseAInv <- function(file, out="dsCMatrix")

{

## ToDo: wrap into few lines - does it make any difference

## Read inverse of the numerator relationship matrix (A) from the CFC program

## - output in sparse format (triplets)

##

## file - character, name of the file with sparse matrix output from the CFC

## program

## out - character, "format" of the output: "triplet" (a data.frame) or

## "matrix" (a dsCMatrix)

##

## The file should contain only lines with the matrix elements i.e. without

## the header and footer lines. Edit the original output file to get file

## like this one (the first column is original subject identification, the

## second column is row index and the rest are column indexes, followed by a

## non-zero matrix element in the format columnIndex:non-zeroElement, ...):

##

## A, 1, 1:1.5,

## B, 2, 1:0.5, 2:1.5,

## D, 3, 3:1.5,

## F, 4, 4:1,

## C, 5, 1:-1, 2:-1, 3:0.5, 5:2.5,

## E, 6, 3:-1, 5:-1, 6:2,

require(Matrix)

## --- Check ---

tmp <- c("triplet", "dsCMatrix")

if(any(!(out %in% tmp))) {

stop(paste("out must be one of:", paste(tmp, collapse=", ")))

}

## --- Import and transformation ---

## A long "massage"

## 6. Split at each comma

tmp <- strsplit(split=",",

## 5. Remove the comma at the end

x=sub(pattern=",$", replacement="",

## 4. Change any colon to comma

x=gsub(pattern=":", replacement=",", fixed=TRUE,

## 3. Remove the first column (Individual ID) - ".," anything up to the first (since we use sub()) coma

x=sub(pattern=".,", replacement="",

## 2. Remove any spaces - " "

x=gsub(pattern=" ", replacement="", fixed=TRUE,

## 1. Read the file line by line

x=readLines(con=file))))))

## Number of rows = number of individuals

r <- as.integer(length(tmp))

## Length of each row

n <- sapply(tmp, length)

## Number of triplets by row - first element is a row index followed by a

## column index and a non-zero value

t <- (n - 1) %/% 2

## Number of all triplets; according to my tests, inverse of A has always at

## least one element per individual i.e. 1 for individuals without any relatives

N <- sum(t)

## Remove row index values

tmp <- lapply(tmp, function(z) z[-1])

## --- Build triplets ---

ret <- matrix(nrow=3, ncol=N)

## Row index

ret[1, ] <- rep(1:length(t), times=t)

## Column index and non-zero values

end <- cumsum(t)

start <- end - t + 1

i <- 1

while(i < (r + 1)) { ## Loop over all (r) individuals

ret[2:3, start[i]:end[i]] <- tmp[[i]]

i <- i + 1

}

## Modify for output

ret <- as.data.frame(t(matrix(as.numeric(ret), nrow=3, ncol=N, byrow=FALSE)))

names(ret) <- c("i", "j", "x")

ret[, 1:2] <- lapply(ret[, 1:2], as.integer)

## --- Return ---

if(out == "triplet") { ## Triplets: i, j, x

ret

} else { ## Symetric sparse matrix

as(new("dsTMatrix", Dim=c(r, r),

i=(ret$j - 1L), j=(ret$i - 1L), x=ret$x), "dsCMatrix")

}

}