######################################################
### R functions used to calculate Empirical Bayes
### Estimates of rates based on vector of counts. The rates
### refer to Age Specific Fertility Rates and the vector
### entries are the age classes.
###
### We have data from many areas and borrow strength from
### other areas and other age classes.
###
### References
### * Marshall (1991), Applied Statistics (now, JRSS-C),40(2), 283-294.
### * Longford (1999), J. Royal Statistical Society A, 162:227-245
### * Assuncao, Schmertmann, Potter, & Cavenaghi (2005), Demography 42(3):537-558
###
### This program was written by
### Renato Martins Assuncao
### Belo Horizonte, September 4, 2002
###
### Edited & simplified by Carl Schmertmann, July 2009
###
######################################################
### HOW TO USE THIS CODE
### EITHER
### 1. Paste this ascii file in the commands window
### of Splus or R, OR
### 2. Use the command below in the commands window
### source("path//of//this//file//eb_functions.R")
######################################################
########################################################
#-----------------------------------
# construct.nhoods function
#
# Inputs:
# cox and coy are vectors of length N with x and y coordinates of each place i=1..N
# events is an N x K matrix of event counts for N places and K groups (usually age groups)
# expos is an N x K matrix of exposure for N places and K groups
# pop.min is a scalar constant with the desired minimum population in a local neighborhood
# ngb.min is a scalar constant with the desired minimum number of places in a neighborhood
#
# Output:
# a list of N arrays, one per place i=1..N
# the i-th list element is an A[i] x K x 2 array, where A[i] is the number of
# places in i's neighborhood, K is the number of groups, and the final dimension is (events or exposure)
#
# for ex., if nhood.list is the output and place 34 has 19 neighbors, then
# nhood.list[[34]] is a 19 x K x 2 array
# with nhood.list[[34]][,,1] = a 19 x K event count matrix and
# nhood.list[[34]][,,2] = a 19 x K exposure matrix
#
# Note: With several thousand places this function may take several minutes to run. It may be
# best to construct the neighborhoods once and save the results with dput() for reuse.
#-----------------------------------
construct.nhoods <- function(cox,coy,events,expos,pop.min,ngb.min){
narea <- nrow(events)
K <- ncol(events)
list.nhood <- vector("list",narea)
for(i in 1:narea){
dist <- sqrt( (cox[i]-cox)^2 + (coy[i]-coy)^2 ) # Euclidean (flat-Earth) distance from i to 1..narea
pop <- apply(expos, 1, sum)
odist <- order(dist) #indices of areas, from closest to farthest from i (odist[1] will = i)
pop.acum <- cumsum(pop[odist]) # j-th element of this vector is the cumulative popn
# of the j closest areas to i
# where the first-closest is i itself
nn <- max(ngb.min, 1 + sum(pop.acum < pop.min)) # number of neighbors >= ngb.min
nhood <- odist[1:nn] # keep the indices for i's neighborhood
# nhood[1] will = i always
## minor Cleanup: if a neighborhood has zero events in any age category,
## expand the neighborhood by multiples of pop.min until there is at least one event
poptest <- pop.min
needs.more <- ( any( apply(events[nhood,],2,sum) == 0))
while (needs.more) {
poptest <- poptest + pop.min
nn <- sum(pop.acum < poptest) + 1 # size of enlarged neighborhood
nhood <- odist[1:nn] # indices of enlarged neighborhood
needs.more <- (any(apply(events[nhood,],2,sum) == 0)) # do we still need larger?
}
EV <- events[nhood,]
EX <- expos[nhood,]
list.nhood[[i]] <- array(c(EV, EX), c(nn,K,2),
dimnames=list(nhood,NULL,c("event","expos")))
}
return(list.nhood)
} # end construct.nhoods()
#------------------------------------------
# SigmaHat function
#
# Inputs:
# Expos is an A x K matrix of exposure in A places and K groups
# Event is an A x K matrix of event counts in A places and K groups
# Output:
# K x K matrix estimate of the covariance of the true rate vectors theta
# across the A places
#
# The notation follows the Assuncao et al. 2005 Demography article, p. 544
#------------------------------------------
SigmaHat <- function(Expos, Event){
## Number of areas
A <- nrow(Expos)
## Number of components or sub-populations
K <- ncol(Expos)
## Vec 1xK with total sub-pop (pop of each component)
Ktot <- apply(Expos,2,sum)
## P is an A x K matrix whose (s,k) element is the neighborhood share of the s-th region for the k-th group
## For ex., P[2,4] = .55 means that 55% of the neighborhood's group-4 exposure was in region 2
P <- sweep(Expos, 2, Ktot, "/")
## vec 1 x K with neighborhood crude rates
mu.hood <- apply(Event,2,sum)/Ktot
## Implementation, pt. 1
## Matrix A x K with Area direct ML rates (per 1)
## if there are any cells with zero population at risk, define rate =0
mu.local <- Event/Expos
mu.local[Expos==0] <- 0
## estimated avg sample noise for each component in the neighborhood
## in the notation of the 2005 Demography paper, this is sum over s { P[s] * Omega[s] }
OmegaBar <- matrix(0,K,K)
for (i in 1:A) {
OmegaHat <- diag( mu.hood / Expos[i,] )
OmegaBar <- OmegaBar + diag(P[i,]) %*% OmegaHat
}
## estimated sampling variance of ML estimators (NOT true rates yet) in nhood
Qhat <- matrix(0,K,K)
for (i in 1:A) {
Phalf <- diag(sqrt(P[i,]))
devn <- mu.local[i,]-mu.hood
Qhat <- Qhat + Phalf %*% outer(devn,devn) %*% Phalf
}
Num <- Qhat - OmegaBar
Sigma2 <- matrix(0,K,K)
for (i in 1:K){
for (j in i:K){
den <- sum( sqrt( P[,i] * P[,j]) )
Sigma2[i,j] <- Sigma2[j,i] <- Num[i,j]/den
}
}
return(Sigma2)
} # end SigmaHat()
#-----------------------------------
# AdjustMatrix function
#
# Input:
# a symmetric matrix A
# Output:
# a symmetric matrix with the same eigenvectors as A, but with eigenvalues in
# the unit interval. (This makes sure that output is positive semi-definite, and constrains
# the amount of shrinkage).
#-----------------------------------
AdjustMatrix <- function(MAT)
{
e <- eigen(MAT)
adjMAT <- e$vec %*% diag( pmin( pmax(0,e$val), 1) ) %*% t(e$vec)
return(adjMAT)
} # end AdjustMatrix()
##############################################################
## Empirical Bayes and Maximum Likelihood
## estimation functions
##############################################################
#-----------------------------------
# EB function
#-----------------------------------
EB <- function(evex.array, univar=F, global=F) {
##
## evex.array is a A x K x 2 array
## evex[,,1] or [,,"event"] AxK matrix with event counts (events)
## evex[,,2] or [,,"expos"] AxK matrix with number of person-years (exposure)
##
## the A rows correspond to A areas in a neighborhood
##
## if global=F, function returns a single Kx1 vector for area #1 (focal area of nhood)
## if global=T, function returns an AxK matrix for all A areas (all shrunk to the same
## nhood means)
##
## split array into events and exposure (use dimnames if available. Otherwise
## assume that first component is events and second is expos
nonames <- length(dimnames(evex.array)[[3]]) == 0
if (nonames) {
Event <- evex.array[,,1]
Expos <- evex.array[,,2]
} else {
Event <- evex.array[,,"event"]
Expos <- evex.array[,,"expos"]
}
## Number of areas
A <- nrow(Expos)
## Number of components or sub-populations
K <- ncol(Expos)
## Vec 1xK with total sub-pop (pop of each component)
Ktot <- apply(Expos,2,sum)
## A x K matrix of area crude rates (per 1)
mu.local <- Event/Expos
## 1 x K vector of neighborhood crude rates
mu.hood <- apply(Event,2,sum)/Ktot
## Matrix Sigma
Sigma2 <- SigmaHat(Expos, Event)
## if these are univariate estimates, assume covariances are zero
if (univar) Sigma2 <- diag( diag(Sigma2) )
## adjust if necessary to make PSD (eigenvalues in [0,1])
Sigma2 <- AdjustMatrix(Sigma2)
## A or 1 vectors of output, depending on value of global
Aout <- ifelse(global, A, 1)
## EB rates
EBrates <- matrix(0,Aout,K)
for(i in 1:Aout){
## Weight matrix for each area
W <- Sigma2 %*% solve(Sigma2 + diag(as.numeric(mu.hood/Expos[i,])))
## EBrates of each area (with a floor of zero)
EBrates[i,] <- mu.hood + W %*% (mu.local[i,] - mu.hood)
EBrates[i,] <- pmax(0, EBrates[i,])
}
dimnames(EBrates) <- list( NULL ,paste("eb",seq(K),sep="") )
return(EBrates)
} # end EB()
#-----------------------------------
# ML function
#-----------------------------------
ML <- function(evex.array) {
##
## evex.array is a A x K x 2 array
## evex[,,1] or [,,"event"] AxK matrix with event counts (events)
## evex[,,2] or [,,"expos"] AxK matrix with number of person-years (exposure)
##
## the A rows correspond to A areas in a neighborhood
## split array into events and exposure (use dimnames if available. Otherwise
## assume that first component is events and second is expos
nonames <- length(dimnames(evex.array)[[3]]) == 0
if (nonames) {
Event <- evex.array[,,1]
Expos <- evex.array[,,2]
} else {
Event <- evex.array[,,"event"]
Expos <- evex.array[,,"expos"]
}
## ML rates
MLrates <- Event/Expos
dimnames(MLrates) <- list( NULL ,paste("ml",seq(ncol(Event)),sep="") )
return(MLrates)
} # end ML()