Документ взят из кэша поисковой машины. Адрес оригинального документа : http://herba.msu.ru/shipunov/school/biol_240/en/r/mice.r
Дата изменения: Sun Sep 20 05:13:35 2015
Дата индексирования: Mon Apr 11 03:52:41 2016
Кодировка:

# saved from the older version of "mice"
.packageName <- "mice"
.First.lib <- function(library, pkg) {
require(MASS)
require(nnet)
}

"check.imputationMethod" <-
function(imputationMethod, defaultImputationMethod, visitSequence, data, nmis, nvar)
{
#
# check imputationMethod, set defaults if appropriate
# stops the program if an error is found
#
if(all(imputationMethod == "")) {
# the default argument
for(j in visitSequence) {
if(length(unique(data[, j])) == 2)
stop(paste("Column ", j, " is constant")) # NA is also a unique level
else if(is.numeric(data[, j]))
imputationMethod[j] <- defaultImputationMethod[1]
else if(length(levels(data[, j])) == 2)
imputationMethod[j] <- defaultImputationMethod[2]
else if(length(levels(data[, j])) > 2)
imputationMethod[j] <- defaultImputationMethod[3]
}
}
#
# expand user's imputation method to all visited columns
#
if(length(imputationMethod) == 1) {
# single string supplied by user
if(is.passive(imputationMethod)) stop("Cannot have a passive imputationMethod for every column.")
imputationMethod <- rep(imputationMethod, nvar)
}
#
# if user specifies multiple methods, check the length of the argument
#
if(length(imputationMethod) != nvar) stop(paste("The number of elements in imputationMethod (", length(imputationMethod),
") does not match the number of columns (", nvar, ").")) #
# check whether the elementary imputation methods are actually available on the search path
#
active <- !is.passive(imputationMethod) & nmis > 0
fullNames <- paste("impute", imputationMethod[active], sep = ".")
notFound <- !sapply(fullNames, exists, mode = "function")
if(any(notFound)) stop(paste("The following functions were not found:", paste(fullNames[notFound], collapse = ", "))) #
#
return(imputationMethod)
}
"check.predictorMatrix" <-
function(predictorMatrix, nmis, nvar)
{
# checks the predictorMatrix
# it can change the predictormatrix
# stops the program if an error is found
#
if(!is.matrix(predictorMatrix)) stop("Argument predictorMatrix should be a square matrix.")
if(nvar != nrow(predictorMatrix) | nvar != ncol(predictorMatrix))
stop("Argument predictorMatrix has improper dimensions.")
if(sum(diag(predictorMatrix) != 0))
stop("The diagonal of predictorMatrix may contain only zeroes.")
for(j in 1:nvar) {
if(!any(predictorMatrix[j, ]) & any(predictorMatrix[, j]) & nmis[j] > 0)
stop(paste("Column ", j, " is used, has missing values, but is not imputed"))
if(nmis[j] == 0 & any(predictorMatrix[j, ])) {
# warning(paste("Row ",j," of predictorMatrix is set to zero"))
predictorMatrix[j, ] <- rep(0, nvar)
}
}
return(predictorMatrix)
}
"check.visitSequence" <-
function(visitSequence, nmis, nvar)
{
# checks the visitSequence array
# stops the program if an error is found
#
# if (length(unique(visitSequence))!=length(visitSequence)) stop("Indices in visitSequence are not unique")
if(all(nmis[visitSequence] == 0)) stop(paste("No missing values present in ", expression(data)))
flags <- nmis == 0 & is.element(1:nvar, visitSequence)
if(any(flags))
stop(paste("Columns ", paste((1:nvar)[flags], collapse = 1, sep = ","), " requested to be imputed, but contain no missing values."))
}
"complete" <-
function(x, action = 1)
{
# complete
# Takes an object of class "mids", fills in the missing data, and
# return the completed data in the format specified by action.
# If action is a scalar between 1 and x$m, the function
# returns the data with the action's imputation filled in.
# action can also be one of the following strings:
# "long" produces a long matrix with nrow(x)*x$m
# rows, containing all imputed data plus
# two additional variables ".id" (containing the
# row.names and ".imp" (containing the imputation
# number).
# "broad" produces a broad matrix with ncol(x)*x$m
# columns. The first ncol(x) columns contain the first
# imputed data matrix. Column names are changed to reflect the
# imputation number.
# "repeated" produces a broad matrix with ncol(x)*x$m
# columns. The first x$m columns give the filled-in
# first variable. Column names are changed to reflect the
# imputation number.
#
# Authors: S van Buuren, K Oudshoorn
# Copyright (c) 1999 TNO Prevention and Health
# Adapted for data frames 15 okt 99
#
if(!is.mids(x)) stop("Input data must have class 'mids'.")
if(is.numeric(action) && action >= 1 && action <= x$m) {
data <- x$data
mis <- is.na(data)
#R specific change
ind <- (1:ncol(data))[apply(mis,2,sum) > 0]
for(j in ind) {
data[mis[, j], j] <- x$imp[[j]][, action]
}
return(data)
}
code <- pmatch(action, c("long", "broad", "repeated"))
if(!is.na(code) && code == 1) {
# long
m <- x$m
nr <- nrow(x$data)
nc <- ncol(x$data)
data <- as.data.frame(matrix(0, nrow = nr * m, ncol = nc + 2))
for(j in nr * 1:m) {
data[(j + 1 - nr):j, 1:nc] <- Recall(x, j/nr) # recursive
}
for(j in 1:nc) {
levels(data[, j]) <- levels(x$data[, j])
}
data[, nc + 1] <- rep(row.names(x$data), m)
data[, nc + 2] <- rep(1:m, rep(nr, m))
names(data) <- c(names(x$data), ".id", ".imp")
return(data)
}
if(!is.na(code) && code == 2) {
# broad
m <- x$m
nr <- nrow(x$data)
nc <- ncol(x$data)
data <- as.data.frame(matrix(nrow = nr, ncol = nc * m))
for(j in nc * 1:m)
data[, (j + 1 - nc):j] <- Recall(x, j/nc) # recursive
names(data) <- paste(rep(names(x$data), m), rep(1:m, rep(nc, m)), sep = ".")
return(data)
}
if(!is.na(code) && code == 3) {
# repeated
data <- Recall(x, "broad")
data <- data[, order(rep(1:ncol(x$data), x$m))]
return(data)
}
stop("Argument action not recognized. \n")
}
"data.frame.to.matrix" <-
function(x)
{
# adapted version of as.matrix.data.frame
# converts factor values to codes instead of labels
# return numeric matrix
# SvB, 12-7-99
X <- x
DD <- dim(X)
dn <- dimnames(X)
collabs <- as.list(dn[[2]])
class(X) <- NULL
p <- DD[2]
n <- DD[1]
non.numeric <- non.atomic <- F
for(j in 1:p) {
xj <- X[[j]]
if(length(dj <- dim(xj)) == 2 && dj[2] > 1) {
if(inherits(xj, "data.frame"))
xj <- X[[j]] <- as.matrix(X[[j]])
dnj <- dimnames(xj)[[2]]
collabs[[j]] <- paste(collabs[[j]], if(length(dnj)) dnj else seq(1:dj[2]), sep = ".")
}
if(length(levels(xj)) > 0 || !(is.numeric(xj) || is.complex(xj)) || inherits(xj, "dates"))
non.numeric <- TRUE
if(!is.atomic(xj))
non.atomic <- TRUE
}
if(non.atomic) {
for(j in 1:p) {
xj <- X[[j]]
if(is.recursive(xj)) {
}
else X[[j]] <- as.list(as.vector(xj))
}
}
else if(non.numeric) {
for(j in 1:p) {
xj <- X[[j]]
if(length(levels(xj))) {
X[[j]] <- as.vector(codes(xj))
}
else X[[j]] <- format(xj)
}
}
if(length(unique(sapply(X, function(Xi)
if(is.matrix(Xi)) nrow(Xi) else length(Xi)))) != 1)
stop("Illegal data frame: lengths of columns differ")
X <- unlist(X, recursive = F, use.names = F)
X <- as.numeric(X)
dim(X) <- c(n, length(X)/n)
dimnames(X) <- list(dn[[1]], unlist(collabs, use.names = F))
class(X) <- "matrix"
X
}
"glm.mids" <-
function(formula = formula(data), family = gaussian, data = sys.parent(), weights, subset, na.action, start = eta, control = glm.control(...), method = "glm.fit",
model = F, x = F, y = T, contrasts = NULL, ...)
{
# adapted 04/02/00
# repeated complete data regression (glm) on a mids data set
#
call <- match.call()
if(!is.mids(data))
stop("The data must have class mids")
analyses <- as.list(1:data$m) #
# do the repated analysis, store the result
#
for(i in 1:data$m) {
data.i <- complete(data, i)
analyses[[i]] <- glm(formula, data = data.i, ...)
}
#
# return the complete data analyses as a list of length nimp
#
object <- list(call = call, call1 = data$call, nmis = data$nmis, analyses = analyses)
class(object) <- c("mira", "glm", "lm")
return(object)
}
"impute.lda" <-
function(y, ry, x)
{
# impute.lda
#
# Imputation of categorical response variables by linear discriminant
# analysis. This function uses the Venables/Ripley functions
# lda and predict.lda to compute posterior probabilities for
# each incomplete case, and draws the imputations from this
# posterior.
#
# Authors: Stef van Buuren, Karin Oudshoorn, 11 okt 1999
#
if(!exists("lda")) library(MASS)
fy <- as.factor(y)
nc <- length(levels(fy))
fit <- lda(x, fy, subset = ry)
post <- predict(fit, x[!ry, , drop = F])$posterior
un <- rep(runif(sum(!ry)), each = nc)
idx <- 1 + apply(un > apply(post, 1, cumsum), 2, sum)
return(as.numeric(levels(fy)[idx]))
}
"impute.logreg" <-
function(y, ry, x)
{
# impute.logreg
#
# Imputation for binary response variables by the Bayesian
# logistic regression model. See Rubin (1987, p. 169-170) for
# a description of the method.
#
# The method consists of the following steps:
# 1. Fit a logit, and find (bhat, V(bhat))
# 2. Draw BETA from N(bhat, V(bhat))
# 3. Compute predicted scores for m.d., i.e. logit-1(X BETA)
# 4. Compare the score to a random (0,1) deviate, and impute.
#
# Authors: Stef van Buuren, Karin Oudshoorn, 1998
#
#
x <- cbind(1, as.matrix(x))
fit <- glm.fit(x[ry, ], y[ry], family = binomial(link = logit)
,control=glm.control(maxit = 10, trace = F, epsilon = 0.1))
# R-change PM
fit.sum <- summary.glm(fit)
beta <- coef(fit)
rv <- t(chol(fit.sum$cov.unscaled))
beta.star <- beta + rv %*% rnorm(ncol(rv))
p <- 1/(1 + exp( - (x[!ry, ] %*% beta.star)))
vec <- (runif(nrow(p)) <= p)
vec[vec] <- 1
if(is.factor(y)) {
vec <- factor(vec, c(0, 1), levels(y))
}
return(vec)
}
"impute.logreg2" <-
function(y, ry, x)
{
# impute.logreg2
#
# Imputation for binary response variables by the Bayesian
# logistic regression model. See Rubin (1987, p. 169-170) for
# a description of the method.
#
# This routine uses direct minimization of the likelihood.
#
# The method consists of the following steps:
# 1. Fit a logit, and find (bhat, V(bhat))
# 2. Draw BETA from N(bhat, V(bhat))
# 3. Compute predicted scores for m.d., i.e. logit-1(X BETA)
# 4. Compare the score to a random (0,1) deviate, and impute.
#
# Authors: Stef van Buuren, Karin Oudshoorn, 1998
#
#
x <- cbind(1, as.matrix(x))
dimnames(x)[[2]][1] <- "(Intercept)"
xobs <- x[ry, ]
yobs <- as.vector(as.numeric(y[ry])) #names(y) <- "y"
fit1 <- logitreg(xobs, yobs, intercept = F)
p <- 1/(1 + exp( - (x[ry, ] %*% coef(fit1))))
fit <- glm.fit(x[ry, ], y[ry], family = binomial(link = logit), start = p, maxit = 1, trace = F)
fit.sum <- summary.glm(fit)
beta <- coef(fit)
rv <- t(chol(fit.sum$cov.unscaled))
beta.star <- beta + rv %*% rnorm(ncol(rv))
p <- 1/(1 + exp( - (x[!ry, ] %*% beta.star)))
vec <- (runif(nrow(p)) <= p)
vec[vec] <- 1
if(is.factor(y)) {
vec <- factor(vec, c(0, 1), levels(y))
}
return(vec)
}
"impute.mean" <-
function(y, ry, x = NULL)
{
# impute.mean
#
# Unconditional mean imputation
#
return(rep(mean(y[ry]), times = sum(!ry)))
}
"impute.norm" <-
function(y, ry, x)
{
# Bayesian normal imputation of y given x according to Rubin p. 167
# x is complete.
#
# TNO Prevention and Health
# authors: S. van Buuren and K. Oudshoorn
#
x <- cbind(1, as.matrix(x))
parm <- norm.draw(y, ry, x)
return(x[!ry, ] %*% parm$beta + rnorm(sum(!ry)) * parm$sigma)
}
"impute.passive" <-
function(data, func)
{
#
# Special elementary imputation method for transformed data.
return(model.frame(func, data))
}
"impute.pmm" <-
function(y, ry, x)
{
# Imputation of y by predictive mean matching, based on
# Rubin (p. 168, formulas a and b).
# The procedure is as follows:
# 1. Draw beta and sigma from the proper posterior
# 2. Compute predicted values for yobs and ymis
# 3. For each ymis, find the observation with closest predicted value,
# and take its observed y as the imputation.
# NOTE: The matching is on yhat, NOT on y, which deviates from formula b.
# ry=T if y observed, ry=F if y missing
#
# Authors: S. van Buuren and K. Oudshoorn
# 2 dec SvB
#
x <- cbind(1, as.matrix(x))
parm <- norm.draw(y, ry, x)
yhat <- x %*% parm$beta
return(apply(as.array(yhat[!ry]), 1, pmm.match, yhat = yhat[ry], y = y[ry]))
}
"impute.polyreg" <-
function(y, ry, x)
{
# impute.polyreg
#
# Imputation for categorical response variables by the Bayesian
# polytomous regression model. See J.P.L. Brand (1999), Chapter 4,
# Appendix B.
#
# The method consists of the following steps:
# 1. Fit categorical response as a multinomial model
# 2. Compute predicted categories
# 3. Add appropriate noise to predictions.
#
# This algorithm uses the function multinom from the libraries nnet and MASS
# (Venables and Ripley).
#
# Authors: Stef van Buuren, Karin Oudshoorn, 2000
#
#
#assign("data", cbind.data.frame(y, x), where = 1)
if(!exists("multinom"))
library(nnet)
if(!exists("vcov"))
library(MASS)
fit <- multinom(formula.data.frame(data), data = data[ry, ], maxit = 200, trace = F)
post <- predict.multinom(fit, data[!ry, ], type = "probs")
#remove("data", where = 1)
fy <- as.factor(y)
nc <- length(levels(fy))
un <- rep(runif(sum(!ry)), each = nc)
idx <- 1 + apply(un > apply(post, 1, cumsum), 2, sum)
return(levels(fy)[idx])
}
"impute.sample" <-
function(y, ry, x = NULL)
{
# impute.sample
#
# Generates random sample from the observed y's
#
return(sample(y[ry], size = sum(!ry), replace = T))
}
"is.mids" <-
function(data)
{
inherits(data, "mids")
}
"is.mipo" <-
function(data)
{
inherits(data, "mipo")
}
"is.mira" <-
function(data)
{
inherits(data, "mira")
}
"is.passive" <-
function(string)
{
return("~" == substring(string, 1, 1))
}

"lm.mids" <-
function(formula, data, weights, subset, na.action, method = "qr", model = F, x = F, y = F, contrasts = NULL, ...)
{
# adapted 28/1/00
# repeated complete data regression (lm) on a mids data set
#
call <- match.call()
if(!is.mids(data))
stop("The data must have class mids")
analyses <- as.list(1:data$m) #
# do the repated analysis, store the result
#
for(i in 1:data$m) {
data.i <- complete(data, i)
analyses[[i]] <- lm(formula, data = data.i, ...)
}
#
# return the complete data analyses as a list of length nimp
#
object <- list(call = call, call1 = data$call, nmis = data$nmis, analyses = analyses)
class(object) <- c("mira", "lm")
return(object)
}
"logitreg" <-
function(x, y, wt = rep(1, length(y)), intercept = T, start, trace = T, ...)
{
# logistic regression according to Venables & Ripley, p. 293
fmin <- function(x, y, w, beta)
{
eta <- x %*% beta
p <- plogis(eta)
g <- -2 * dlogis(eta) * ifelse(y, 1/p, -1/(1 - p))
value <- -2 * w * ifelse(y, log(p), log(1 - p))
attr(value, "gradient") <- x * g * w
value
}
if(is.null(dim(x)))
dim(x) <- c(length(x), 1)
dn <- dimnames(x)[[2]]
if(!length(dn))
dn <- paste("Var", 1:ncol(x), sep = "")
p <- ncol(x) + intercept
if(intercept) {
x <- cbind(1, x)
dn <- c("(Intercept)", dn)
}
if(missing(start))
start <- rep(0, p)
fit <- ms( ~ fmin(x, y, wt, beta), start = list(beta = start), trace = trace, ...)
names(fit$par) <- dn
invisible(fit)
}

"md.pattern" <-
function(x)
{
# md.pattern
#
# computes the missing data pattern in the data
# x can be a vector, matrix or data frame
# NA's indicate missing data
# based on Schafer's prelim.norm function
# SvB, 13-7-99
#
if(!(is.matrix(x) | is.data.frame(x))) stop("Data should be a matrix or dataframe")
if(ncol(x) < 2)
stop("Data should have at least two columns")
if(is.data.frame(x))
x <- data.frame.to.matrix(x)
n <- nrow(x)
p <- ncol(x)
storage.mode(x) <- "single" # find missingness patterns
r <- 1 * is.na(x)
nmis <- as.integer(apply(r, 2, sum))
names(nmis) <- dimnames(x)[[2]] # index the missing data patterns
mdp <- as.integer((r %*% (2^((1:ncol(x)) - 1))) + 1) # do row sort
ro <- order(mdp)
##print(x[ro, ]);print(n);print(p)
x <- matrix(x[ro, ], n, p)
mdp <- mdp[ro]
r <- matrix(r[ro, ], n, p)
ro <- order(ro) # compress missing data patterns
mdpst <- as.integer(seq(along = mdp)[!duplicated(mdp)])
mdp <- unique(mdp)
npatt <- length(mdpst) # create r-matrix for display purposes
r <- 1 - r
r <- matrix(r[mdpst, ], npatt, p)
if(npatt == 1)
tmp <- format(n)
if(npatt > 1)
tmp <- format(c(mdpst[2:npatt], n + 1) - mdpst)
dimnames(r) <- list(tmp, dimnames(x)[[2]])
storage.mode(r) <- "integer" # center and scale the columns of x
#
if(npatt > 1)
nmdp <- as.integer(c(mdpst[-1], n + 1) - mdpst)
if(npatt == 1) nmdp <- n #
# sort the rows and columns according to the marginals
co <- order(nmis)
ro2 <- order(nmis.row <- p - as.integer(apply(r, 1, sum)))
r <- rbind(r[ro2, co], nmis[co])
r <- cbind(r, c(nmis.row[ro2], sum(nmis)))
r
}
"mice" <-
function(data,
m = 5,
imputationMethod = vector("character", length = ncol(data)),
predictorMatrix = (1 - diag(1, ncol(data))),
visitSequence = (1:ncol(data))[apply(is.na(data), 2, any)],
defaultImputationMethod = c("pmm", "logreg", "polyreg"),
maxit = 5,
diagnostics = T,
printFlag = T,
seed = 0)
{
call <- match.call()
set.seed(seed)
if(!(is.matrix(data) | is.data.frame(data)))
stop("Data should be a matrix or dataframe")
if((nvar <- ncol(data)) < 2)
stop("Data should contain at least two columns")
data <- as.data.frame(data)
nmis <- apply(is.na(data), 2, sum)
if(sum(nmis) == 0)
stop("No missing values found")
varnames <- dimnames(data)[[2]]
#
#If a column consists of entirely NA's, don't let it be a factor.
#
#data[,nmis==nrow(data)] <- as.numeric(NA)
#the above statement creates problems if data is a matrix (28/1/00)
#
#Perform various validity checks on the specified arguments
#
check.visitSequence(visitSequence, nmis, nvar)
predictorMatrix <- check.predictorMatrix(predictorMatrix, nmis, nvar)
imputationMethod <- check.imputationMethod(imputationMethod,
defaultImputationMethod,
visitSequence,
data, nmis, nvar)
#
# Pad the imputation model with dummy variables for the factors
#
p <- padModel(data, imputationMethod, predictorMatrix, visitSequence, nmis, nvar)
if(sum(duplicated(names(p$data))) > 0) stop("Column names of padded data should be unique")
# Initialize response matrix r, imputation array imp,
# as well as some other stuff
#
r <- (!is.na(p$data))
imp <- vector("list", ncol(p$data))
if(m > 0) {
#
# Initializes the imputed values before entering the main
# iteration loop.
#
for(j in visitSequence) {
imp[[j]] <- as.data.frame(matrix(0, nrow = sum(!r[, j]), ncol = m))
dimnames(imp[[j]]) <- list(row.names(data)[r[, j] == F], 1:m)
y <- data[, j]
ry <- r[, j]
for(i in 1:m) {
if(nmis[j] < nrow(data) - 1) {
# if (is.passive(imputationMethod[j]))
# p$data[ry,j] <- data[ry,j] <- model.frame(imputationMethod[j],data[ry,])
imp[[j]][, i] <- impute.sample(y, ry)
}
else imp[[j]][, i] <- rnorm(nrow(data))
}
}
}
q <- sampler(p, data, m, imp, r, visitSequence, maxit, printFlag) # restore the original NA's in the data
for(j in p$visitSequence)
p$data[(!r[, j]), j] <- NA # delete data and imputations of automatic dummy variables
imp <- q$imp[1:nvar]
names(imp) <- varnames
dimnames(predictorMatrix) <- list(varnames, varnames)
names(imputationMethod) <- varnames
names(visitSequence) <- varnames[visitSequence] # save, and return
midsobj <- list(
call = call,
data = as.data.frame(p$data[, 1:nvar]),
m = m,
nmis = nmis,
imp = imp,
imputationMethod = imputationMethod,
predictorMatrix = predictorMatrix,
visitSequence = visitSequence,
seed = seed,
iteration = q$iteration,
lastSeedValue = .Random.seed,
chainMean = q$chainMean,
chainVar = q$chainVar)
if(diagnostics)
midsobj <- c(midsobj, list(pad = p))
class(midsobj) <- "mids"
return(midsobj)
}
"mice.mids" <-
function(obj, maxit = 1, diagnostics = T, printFlag = T)
{
# MICE.MIDS -
# MICE algorithm that takes mids object as input, iterates maxit
# iteration and produces another mids object as output.
if(!is.mids(obj)) stop("Object should be of type mids.")
if(maxit < 1) return(obj) #
call <- match.call()
nvar <- ncol(obj$data)
sumIt <- obj$iteration + maxit
varnames <- dimnames(obj$data)[[2]] #
#
if(is.null(obj$pad))
p <- padModel(obj$data, obj$imputationMethod, obj$predictorMatrix, obj$visitSequence, obj$nmis, nvar)
else p <- obj$pad
r <- (!is.na(p$data))
imp <- vector("list", ncol(p$data))
for(j in obj$visitSequence)
imp[[j]] <- obj$imp[[j]]
#?? ueberpruefen!!!
#assign(".Random.seed", obj$lastSeedValue, w = 1) #
#
q <- sampler(p, obj$data, obj$m, imp, r, obj$visitSequence, maxit, printFlag) # restore the original NA's in the data
for(j in p$visitSequence)
p$data[(!r[, j]), j] <- NA # delete data and imputations of automatic dummy variables
data <- p$data[, 1:nvar]
imp <- q$imp[1:nvar]
names(imp) <- varnames # combine with previous chainMean and chainVar
chainMean <- chainVar <- array(0, dim = c(length(obj$visitSequence), sumIt, obj$m), dimnames = list(varnames[obj$visitSequence], 1:sumIt, paste("Chain", 1:
obj$m)))
for(j in 1:length(obj$visitSequence)) {
if(!is.factor(obj$data[, obj$visitSequence[j]])) {
if(obj$iteration == 0) {
chainMean[j, , ] <- q$chainMean[j, , ]
chainVar[j, , ] <- q$chainVar[j, , ]
}
else {
chainMean[j, , ] <- rbind(obj$chainMean[j, , ], q$chainMean[j, , ])
chainVar[j, , ] <- rbind(obj$chainVar[j, , ], q$chainVar[j, , ])
}
}
}
# save, and return
midsobj <- list(call = call, data = as.data.frame(data), m = obj$m, nmis = obj$nmis, imp = imp, imputationMethod = obj$imputationMethod, predictorMatrix
= obj$predictorMatrix, visitSequence = obj$visitSequence, seed = obj$seed, iteration = sumIt, lastSeedValue = .Random.seed, chainMean = chainMean,
chainVar = chainVar)
if(diagnostics)
midsobj <- c(midsobj, list(pad = p))
class(midsobj) <- "mids"
return(midsobj)
}
"mids" <-
"Use '?mids' to access the help file"
"mipo" <-
"Use '?mipo' to access the help file"
"mira" <-
"Use '?mira' to access the help file"

"padModel" <-
function(data, imputationMethod, predictorMatrix, visitSequence, nmis, nvar)
{
# Called by mice().
# Augments the imputation model by including dummy variables. Adapts data, predictorMatrix,
# imputationMethod and visitSequence.
# Returns a list whose components make up the padded model.
#
categories <- data.frame(yes.no.categorical = factor(rep(F, nvar),
levels = c("TRUE", "FALSE")),
number.of.dummies = rep(0, nvar),
yes.no.dummy = factor(
rep(F, nvar), levels = c("TRUE", "FALSE")), corresponding.column.dummy = rep(0, nvar))
# zero is default in corresponding.column.dummy for no dummy variable
for(j in 1:nvar) {
if(is.factor(data[, j]) && any(predictorMatrix[, j])) {
categories[j, 1] <- T
data[, j] <- C(data[, j], treatment) #assign contrast-attribute, choice treatment, to factor
n.dummy <- length(levels(data[, j])) - 1
categories[j, 2] <- n.dummy
predictorMatrix <- rbind(predictorMatrix, matrix(0, ncol = ncol(predictorMatrix), nrow = n.dummy))
predictorMatrix <- cbind(predictorMatrix, matrix(rep(predictorMatrix[, j], times = n.dummy), ncol = n.dummy))
predictorMatrix[1:nvar, j] <- rep(0, times = nvar)
if(any(predictorMatrix[j, ])) {
predictorMatrix[(ncol(predictorMatrix) - n.dummy + 1):ncol(predictorMatrix), j] <- rep(1, times = n.dummy)
#in predictorMatrix only F/T is allowed due to other part program.
visitSequence <- append(visitSequence, ncol(predictorMatrix) - n.dummy + 1, (1:length(visitSequence))[visitSequence == j])
}
data <- (cbind(data, matrix(0, ncol = n.dummy, nrow = nrow(data))))
data[is.na(data[, j]), (ncol(predictorMatrix) - n.dummy + 1):ncol(predictorMatrix)] <- NA
#R-change PM
cat.column <- data[!is.na(data[, j]), j]
data[!is.na(data[, j]), (ncol(predictorMatrix) - n.dummy + 1):ncol(predictorMatrix)] <- model.matrix( ~ cat.column - 1)[, -1]
names(data)[(ncol(predictorMatrix) - n.dummy + 1):ncol(predictorMatrix)] <- paste(attr(data, "names")[j], "d", (1:n.dummy), sep = ".")
imputationMethod <- c(imputationMethod, rep("dummy", n.dummy))
categories <- rbind(categories, data.frame(yes.no.categorical = rep(F, n.dummy), number.of.dummies = rep(0, n.dummy), yes.no.dummy = rep(T,
n.dummy), corresponding.column.dummy = rep(j, n.dummy)))
}
}
varnames <- dimnames(data)[[2]]
dimnames(predictorMatrix) <- list(varnames, varnames)
names(imputationMethod) <- varnames
names(visitSequence) <- varnames[visitSequence]
dimnames(categories)[[1]] <- dimnames(data)[[2]]
return(list(data = as.data.frame(data), predictorMatrix = predictorMatrix, imputationMethod = imputationMethod, visitSequence = visitSequence, categories
= categories))
}
"plot.mids" <-
function(imp)
{
names <- dimnames(imp$chainVar[, , 1])[[1]]
s.mat <- sqrt(imp$chainVar)
m.mat <- imp$chainMean
par(mfrow = c((dim(m.mat)[1]), 2))
ylimits1.min <- apply(m.mat, 1, min)
ylimits1.max <- apply(m.mat, 1, max)
ylimits2.min <- apply(s.mat, 1, min)
ylimits2.max <- apply(s.mat, 1, max)
for(m in 1:(dim(m.mat)[1])) {
plot(1:(dim(m.mat)[2]), m.mat[m, , 1], type = "n", ylim = c(ylimits1.min[m], ylimits1.max[m]), xlab = "iteration", ylab = "mean")
title(names[m])
for(i in 1:(dim(m.mat)[3]))
lines(1:(dim(m.mat)[2]), m.mat[m, , i], col = i)
plot(1:(dim(s.mat)[2]), s.mat[m, , 1], type = "n", ylim = c(ylimits2.min[m], ylimits2.max[m]), xlab = "iteration", ylab = "sd")
title(names[m])
for(i in 1:(dim(s.mat)[3]))
lines(1:(dim(s.mat)[2]), s.mat[m, , i], col = i)
}
}
"pool" <-
function(object, method = "smallsample")
{
# General pooling function for multiple imputation parameters
# object: an object of class mira (Multiple Imputed Repeated Analysis)
#
# Note: This code will generally produce valid results for any complete
# data analysis for which an appropriate Var-Cov matrix of the estimates
# can be found. (This is done here by Harrell's function Varcov).
# In R with VR vcov in MASS.
#
# Stef van Buuren, Karin Oudshoorn, July 1999.
#
# Check the arguments
#

call <- match.call()
if(!is.mira(object))
stop("The object must have class 'mira'")
if((m <- length(object$analyses)) < 2)
stop("At least two imputations are needed for pooling.\n")
#
# Set up arrays
#
analyses <- object$analyses
k <- length(coef(analyses[[1]]))
names <- names(coef(analyses[[1]]))
qhat <- matrix(NA, nrow = m, ncol = k, dimnames = list(1:m, names))
u <- array(NA, dim = c(m, k, k), dimnames = list(1:m, names, names)) #
# Fill arrays
#
for(i in 1:m) {
fit <- analyses[[i]]
qhat[i, ] <- coef(fit)
u[i, , ] <- vcov(fit)
}
#
# Compute within, between and total variances
#
qbar <- apply(qhat, 2, mean) # (3.1.2)
ubar <- apply(u, c(2, 3), mean) # (3.1.3)
e <- qhat - matrix(qbar, nrow = m, ncol = k, byrow = T)
b <- (t(e) %*% e)/(m - 1) # (3.1.4)
t <- ubar + (1 + 1/m) * b # (3.1.5)
#
# compute scalar inference quantities
#
r <- (1 + 1/m) * diag(b/ubar) # (3.1.7)
f <- (1 + 1/m) * diag(b/t) # fraction of missing information
df <- (m - 1) * (1 + 1/r)^2 # (3.1.6)
if(method == "smallsample") {
# Barnard-Rubin adjustment
dfc <- fit$df.residual
df <- dfc/((1 - (f/(m + 1)))/(1 - f) + dfc/df)
}
names(r) <- names(df) <- names(f) <- names
fit <- list(call = call, call1 = object$call, call2 = object$call1, nmis = object$nmis, m = m, qhat = qhat, u = u, qbar = qbar, ubar = ubar, b = b, t = t,
r = r, df = df, f = f)
class(fit) <- c("mipo", class(object))
return(fit)
}
"print.mids" <-
function(x, ...)
{
if(is.mids(x)) {
cat("Multiply imputed data set")
cat("\nCall:\n")
print(x$call)
cat("Number of multiple imputations: ", x$m)
cat("\nMissing cells per column:\n")
print(x$nmis)
cat("Imputation methods:\n")
print(x$imputationMethod)
cat("VisitSequence:\n")
print(x$visitSequence)
cat("PredictorMatrix:\n")
print(x$predictorMatrix)
cat("Random generator seed value: ", x$seed, "\n")
}
else print(x)
invisible()
}
"print.mipo" <-
function(x, ...)
{
if(!is.null(x$call)) {
cat("Call: ")
dput(x$call)
}
cat("\nPooled coefficients:\n")
print(x$qbar, ...) # cat("Relative increase in variance due to nonresponse per parameter:", "\n")
# print(x$r)
cat("\nFraction of information about the coefficients missing due to nonresponse:", "\n")
print(x$f)
invisible(x)
}
"print.mira" <-
function(object, ...)
{
## prints the mira object; A mira object
## is in fact a list, so it will be printed as such.
## KO, 3/2/00
if(is.mira(object)) print.listof(object) else print(object)
invisible()
}
"sampler" <-
function(p, data, m, imp, r, visitSequence, maxit, printFlag)
{
#
# The sampler controls the actual Gibbs sampling iteration scheme
# This function is called by mice and mice.mids
#
# Authors: S van Buuren, K Oudshoorn
# Copyright (c) 2000 TNO Prevention and Health
# set up array for convergence checking
if(maxit > 0) chainVar <- chainMean <-
array(0,
dim = c(length(visitSequence), maxit, m),
dimnames = list(dimnames(data)[[2]][visitSequence], 1:maxit,
paste("Chain", 1:m))) else chainVar <- chainMean <- NULL
if(maxit < 1)
iteration <- 0
else {
if(printFlag)
cat("\n iter imp variable")
for(k in 1:maxit) {
#begin k loop : iteration loop
iteration <- k
for(i in 1:m) {
#begin i loop : repeated imputation loop
if(printFlag) cat("\n ", iteration, " ", i) # fill the data with the last set of imputations
for(j in visitSequence)
p$data[!r[, j], j] <- imp[[j]][, i] # augment the data with the actual dummy variables
for(j in setdiff(p$visitSequence, visitSequence)) {
#R -change PM
cat.columns <- p$data[, p$categories[j, 4]]
p$data[, (j:(j + p$categories[p$categories[j, 4], 2] - 1))] <- matrix((model.matrix( ~ cat.columns - 1)[, -1]), ncol = p$
categories[p$categories[j, 4], 2], nrow = nrow(p$data))
}
#
# iterate once over the variables of the augmented model
#
for(j in p$visitSequence) {
if(printFlag)
cat(" ", dimnames(p$data)[[2]][j])
theMethod <- p$imputationMethod[j]
if((!is.passive(theMethod)) & theMethod != "dummy") {
# for a true imputation method
imp[[j]][, i] <- do.call(paste("impute", theMethod, sep = "."), args = list(p$data[, j], r[, j], p$data[, p$predictorMatrix[j,
] == 1, drop = F]))
p$data[!r[, j], j] <- imp[[j]][, i]
}
else if(is.passive(theMethod)) {
#R specfi change
imp[[j]][, i] <- model.frame(as.formula(theMethod), p$data[!r[, j], ])
p$data[!r[, j], j] <- imp[[j]][, i]
}
else if(theMethod == "dummy") {
cat.columns <- p$data[, p$categories[j, 4]]
p$data[, (j:(j + p$categories[p$categories[j, 4], 2] - 1))] <- matrix((model.matrix( ~ cat.columns - 1)[, -1]), ncol = p$
categories[p$categories[j, 4], 2], nrow = nrow(p$data))
remove("cat.columns")
}
}
# end j loop
}
# end i loop
for(j in 1:length(visitSequence)) {
if(!is.factor(data[, visitSequence[j]])) {
#R-specific Change PM
#chainVar[j, k, ] <- colVars(imp[[visitSequence[j]]])
#chainMean[j, k, ] <- colMeans(as.matrix(imp[[visitSequence[j]]]))
chainVar[j, k, ] <- apply(imp[[visitSequence[j]]],2,var)
chainMean[j, k, ] <- apply(as.matrix(imp[[visitSequence[j]]]),2,mean)
}
}
}
# end iteration loop
if(printFlag)
cat("\n")
}
return(list(iteration = iteration, imp = imp, chainMean = chainMean, chainVar = chainVar))
}
"summary.mids" <-
function(x, ...)
{
if(is.mids(x))
print.list(x)
else print(x)
invisible()
}
"summary.mipo" <-
function(x)
{
#
# summary method for the pooled analysis results
#
# x: object of class mipo
#
table <- array(x$qbar, dim = c(length(x$qbar), 9))
dimnames(table) <- list(labels(x$qbar), c("est", "se", "t", "df", "Pr(>|t|)", "lo 95", "hi 95", "missing", "fmi"))
table[, 2] <- sqrt(diag(x$t))
table[, 3] <- table[, 1]/table[, 2]
table[, 4] <- x$df
table[, 5] <- if(all(x$df) > 0) 2 * (1 - pt(abs(table[, 3]), x$df)) else NA
table[, 6] <- table[, 1] - qt(0.975, x$df) * table[, 2]
table[, 7] <- table[, 1] + qt(0.975, x$df) * table[, 2]
table[, 8] <- x$nmis[names(x$qbar)]
table[, 9] <- x$f
return(table)
}
"summary.mira" <-
function(miraobject, correlation = T)
{
# This summary function is for a mira object.
# Then the seperate analyses are of class lm (glm), it calls
# sequentially summary.lm (summary.glm) for all analyses.
# KO, 4/2/00
for(i in (1:length(miraobject$analyses))) {
cat("\n", "##summary analyses of imputation", i, ":\n")
print(summary(miraobject$analyses[[i]]))
}
}

norm.draw <-
function(y, ry, x)
{
# norm.draw
# Draws values of beta and sigma for Bayesian linear regrssion imputation
# of y given x according to Rubin p. 167
# x is complete.
#
# TNO Prevention and Health
# authors: S. van Buuren and K. Oudshoorn
#

# adapted 17/12 nrow(x) should be sum(ry)
xobs <- x[ry, ]
yobs <- y[ry]
v <- solve(t(xobs) %*% xobs)
coef <- t(yobs %*% xobs %*% v)
residuals <- yobs - xobs %*% coef
sigma.star <- sqrt(sum((residuals)^2)/rgamma(1, sum(ry) - ncol(x)))
beta.star <- coef + (t(chol((v + t(v))/2)) %*% rnorm(ncol(x))) * sigma.star
parm <- list(beta.star, sigma.star)
names(parm) <- c("beta", "sigma")
return(parm)
}

pmm.match <-
function(z, yhat = yhat, y = y)
{
# Auxilary function for impute.pmm.
# z = target predictive value (scalar)
# yobs = array of yhat, to be matched against z
# y = array of donor data values, same length as yobs.
# Returns the donor value for which abs(z-yobs) is minimal.
# In case of multiple matches, a random draw is made.
d <- abs(yhat - z)
m <- y[d == min(d)]
if(length(m) > 1)
m <- sample(m, 1)
return(m)
}