Page 1 of 1

Detect dominance effect using RepeatABLE package

Posted: Thu May 05, 2016 3:08 pm
by joshuahane
Dear all,

I was running GWAS using repeatABEL package since I got a lot repeat measurements and want to use it to increase the power. However, the model used in this package (function rGLS) can only detect the additive effect since we treated the SNP dosage as linear covariate. Is there any code that I can treat the SNP dosage as categorical variable in the RepeatABLE package so I can detect the dominance effect?

Thanks in advance!

Re: Detect dominance effect using RepeatABLE package

Posted: Wed May 18, 2016 9:03 am
by larsronn
Dear all,
Testing dominance effects was not implemented as an option in the RepeatABEL package. However, below I show how a model including dominance effects can be fitted for a specific marker (SNP). It uses the hglm function and a likelihood ratio test to compute the P value.

Use it with care and please let me know if you run into problems. I will consider to include it in a future version of RepeatABEL.

(Note also that my MEE paper is accepted and should be cited, DOI: 10.1111/2041-210X.12535, when the RepeatABEL package is used).

Cheers,
Lars

library(RepeatABEL)
#Below there are two useful functions and an example for testing a model with dominance effect
get.XyZ <- function(formula.FixedEffects = y ~ 1, genabel.data, phenotype.data, id.name = "id", GRM = NULL) {
if (class(genabel.data) != "gwaa.data")
stop("The input of genabel.data is not a GenABEL object")
if (is.null(genabel.data@phdata$id))
stop("IDs not given as id in the phdata list")
if (is.null(GRM)) GRM <- compute.GRM(genabel.data)
trait <- all.vars(formula.FixedEffects)[1]
y.all <- phenotype.data[, names(phenotype.data) %in% trait]
phenotype.data <- phenotype.data[!is.na(y.all), ]
id1 <- phenotype.data[, names(phenotype.data) %in% id.name]
id2 <- genabel.data@phdata$id
test1 <- id1 %in% id2
test2 <- id2 %in% id1
genabel.data <- genabel.data[test2, ]
phenotype.data <- phenotype.data[test1, ]
id1 <- phenotype.data[, names(phenotype.data) %in% id.name]
id2 <- genabel.data@phdata$id
N = length(id2)
n = length(id1)
indx <- numeric(n)
for (i in 1:N) {
indx <- indx + i * (id1 %in% id2[i])
}
Z.indx <- diag(N)[indx, ]
y <- phenotype.data[, names(phenotype.data) %in% trait]
X <- model.matrix(formula.FixedEffects, data = phenotype.data)
eig <- eigen(GRM)
non_zero.eigenvalues <- eig$values > (1e-06)
eig$values[!non_zero.eigenvalues] <- 0
#print("GRM ready")
Z.GRM <- (eig$vectors %*% diag(sqrt(eig$values)))[indx, ]
Z <- cbind(Z.GRM, Z.indx)
list(y=y, X=X, Z.GRM=Z.GRM, Z.indx=Z.indx, indx=indx)
}

dominance.test <- function(formula.FixedEffects = y ~ 1, genabel.data, phenotype.data, id.name = "id", GRM = NULL, snpname) {
if ( length(snpname) > 1 )
stop("Only one SNP at a time can be tested, ie the length of snpname must be 1.")
if (class(genabel.data) != "gwaa.data")
stop("The input of genabel.data is not a GenABEL object")
if (is.null(genabel.data@phdata$id))
stop("IDs not given as id in the phdata list")
XyZ <- get.XyZ(formula.FixedEffects, genabel.data, phenotype.data, id.name, GRM)
SNP.matrix <- as.double(genabel.data)
k <- which.max( colnames(SNP.matrix) == snpname)
SNP <- SNP.matrix[XyZ$indx, k]
missing.genotype <- is.na(SNP)
X.SNP <- model.matrix( ~ 0 + factor(SNP) )[ , 1:2]
X0 <- XyZ$X[!missing.genotype, ]
if (is.null(dim(X0)) ) X0 <- matrix(X0, length(X0), 1)
X1 <- cbind( XyZ$X[!missing.genotype, ], X.SNP )
cat("\n Genotype frequencies")
print(table(SNP.matrix[ ,k]))
if ( min( eigen( crossprod( X1 ) )$values ) < 1e-8 ) {
cat("Reverted to additive model for marker", colnames(SNP.matrix)[k], "\n")
X.SNP <- model.matrix( ~ 0 + SNP )
X1 <- cbind( XyZ$X[!missing.genotype, ], X.SNP )
}
y <- XyZ$y[!missing.genotype]
Z.GRM <- XyZ$Z.GRM[!missing.genotype, ]
Z.indx <- XyZ$Z.indx[!missing.genotype, ]
mod1 <- hglm( y = y, X = X1, Z = cbind(Z.GRM, Z.indx), RandC = c(ncol(Z.GRM), ncol(Z.indx)) , calc.like = TRUE, maxit = 200)
mod0 <- hglm( y = y, X = X0, Z = cbind(Z.GRM, Z.indx), RandC = c(ncol(Z.GRM), ncol(Z.indx)) , calc.like = TRUE, maxit = 200)
LRT <- 2 * (mod1$like$pvh - mod0$like$pvh) #Likelihood ratio test for the two alternative marginal likelihoods
Pvalue <- 1 - pchisq(LRT, df = 2)
log10p <- -log10( Pvalue )
cat("The -log10 P-value is", round(log10p, 3), "for marker", colnames(SNP.matrix)[k], "\n")
return(Pvalue) #The function returns the P value
}


######### EXAMPLE #####
data(Phen.Data) #Phenotype data with repeated observations
data(gen.data) #GenABEL object including IDs and marker genotypes

#Test a model with dominance effect for a specific SNP
dom.test <- dominance.test(y ~ sex + age, genabel.data = gen.data, phenotype.data = Phen.Data, snpname = "rs6484850")