#-- housekeeping

rm(list=ls())
graphics.off()
memory.limit(4000)

windows(record=T)

set.seed(031)

incsv  = "brazil_2000_municipal_fertility_data.csv"
outcsv = "brazil_2000_municipal_fertility_estimates.csv"

### read the Census long-form data

data <- as.matrix(read.csv(incsv, row.names=34, as.is=TRUE) )

coord   <- data[, c("X_COORD","Y_COORD")]
women   <- data[, paste("wwnw00",1:7,sep="") ]
births  <- data[, paste("cblynw00",1:7,sep="") ]
ceb     <- data[, paste("cebnw00",1:7,sep="") ]
munic   <- as.matrix(read.csv("municipio_names.csv"))[,"name"]

rawf = births/women
rawp = ceb/women

M <- length(munic)

### read the smoothed Empirical Bayes fx and Px schedules
fB = dget("fB.dput")
PB = dget("PB.dput")

#########################################################
# Read calibrated interpolation constants
# from Schmertmann (2012), "Calibrated spline estimation 
#  of detailed fertility schedules from abridged data"
#
# MPIDR Working Paper WP-2012-022 (July 2012).
# Rostock, Max Planck Institute for Demographic Research
#########################################################

Const = dget("calibrated spline Cdata.dput")

## grab the right subset of the C contants for calculating moments, then
##  rearrange for easy multiplication
L = seq(15,45,5)
M1 = t(Const["g=7",paste(L),paste(L+2.5),"1" ])   ## M1 %*% f5 gives first   moments integral{ a * f(a) | a < x }
M0 = t(Const["g=7",paste(L),paste(L+2.5),"0" ])   ## M0 %*% f5 gives zero-th moments integral{ f(a) | a < x }

### Construct Brass Manual X multipliers

Brass.W <- matrix(c(2.5430, -0.1760,  0.0120,  0.0120,  0.0120,  0.0120,  0.0120,
                    5.0805,  3.4015, -0.6735,  0.0805,  0.0805,  0.0805,  0.0805,
                    5.0725,  5.0725,  3.3375, -0.5545,  0.0725,  0.0725,  0.0725,
                    5.0145,  5.0145,  5.0145,  3.4565, -0.5485,  0.0145,  0.0145,
                    5.0030,  5.0030,  5.0030,  5.0030,  3.5210, -0.7600,  0.0030,
                    4.9995,  4.9995,  4.9995,  4.9995,  4.9995,  3.8615, -2.4815,
                    4.9990,  4.9990,  4.9990,  4.9990,  4.9990,  5.0150,  3.8270), 
                   nrow=7,ncol=7, byrow=T)

### calculate Census and EB TFRs

tfr.Cens   = apply(5 * rawf, 1, sum)
tfr.EB     = apply(5 * fB, 1, sum)

r = tfr.EBB = tfr.PF2 = tfr.PF23 = ( NA * tfr.EB )   # matrices for holding results

### function to display data and regression fit for one municipio

show.regression = function(mm,xx,yy,sdev,tfr,r,sub=1:7) {

  plot( 2000+xx , exp(yy), main=paste(mm,munic[mm]), 
        xlim=2000+c(-25,0),ylim=c(1.5, 9.5), type="n", bty="l",
             xlab="Year",ylab="TFR")

  text( 2000+xx[sub], exp(yy[sub]), paste(seq(15,45,5)[sub]), cex=1.40)
##  segments( 2000+xx[sub], exp(yy[sub]-2*sdev), 2000+xx[sub], exp(yy[sub]+2*sdev), lwd=2, col="grey")

  tt = 2000+seq(-25,0,.25) 
  lines(tt, tfr * exp(r*(tt-2000)), type="l", lwd=4, col="red")
 
  text(2000, tfr.EB[mm], "EB", col="black", cex=1.30)  # EB
  text(2000, tfr.Cens[mm], "C", col="royalblue", cex=1.30)  # Census

  abline(h=0:10,v=seq(1970,2000,5), lty=2, col="grey")
  abline(v=2000, lwd=3)

}

########################## loop over municipios and calculate EBB estimators ####################

## a few randomly selected municipios for plotting
sel = c(sample(M, 25), which(apply(women,1,sum) > 3500))

sub = 2:7  # which age groups will be used in regression

for (mm in 1:M) {

  ### (mux - x) values, estimated from calibrated spline interpolation

  xx = (M1 %*% fB[mm,]) / (M0 %*% fB[mm,])   - seq(17.5, 47.5, 5)

  PP     = PB[mm,]
  FF     = M0 %*% fB[mm,]              ## estimated using calibrated spline interpolation
  FBrass = Brass.W %*% fB[mm,]         ## estimated using Brass Manual X procedure 
  yy     = log(tfr.EB[mm] * PP/FF)

  ww  = 1 / ( 1/PP + 1/FF -1/tfr.EB[mm] )  ## (approx) inverse variance weights

  fit = lm( yy ~ xx, weights=ww, subset=sub)

  tfr.EBB[mm] = exp(fit$coef[1])
  r[mm]       = fit$coef[2]

  tfr.PF2[mm]  = tfr.EB[mm] * PP[2]/FBrass[2]
  tfr.PF23[mm] = tfr.EB[mm] * mean( PP[2:3]/FBrass[2:3])

  ### display a few demonstration plots during the calculations
  if (mm %in% sel) { 
    nbar = sum(women[mm,])/35
    sdev = sqrt(ww/nbar)
    show.regression(mm,xx,yy,sdev,tfr=tfr.EBB[mm],r=r[mm], sub=sub) 
  }

} # for mm


##-- Append new calculations to the input data, and save

## neighborhood data and averages

## read the neighborhood lists (they are *slightly* different for f and P, and remember
##   that the first element in each neighborhood vector is the focal municipio

## for ex., the fhood[[2]] is a 28x7x2 array for the neighborhood centered on
##  municipio number 2, Ariquemes (RO). For each of the 28 municipios in Ariquemes's 
##  neighborhood [2 30 19 40 ...] it contains a 7 age group x {events,expos} matrix of 
##  weighted counts of births and women

### function to calculate neighborhood-level rates by age from an element of fhood or phood
calcRates = function(hoodData) {
  tmp = apply( hoodData, c(2,3), sum)
  return( tmp[,"event"]/tmp[,"expos"] )
} #calcRates

### read the lists of (wtd) neighborhood data for P and f
fff = dget("nhood.fdata.dput")
PPP = dget("nhood.pdata.dput")

### calculate all the neighborhood-level rates for f and P
###  results will be M x 7 matrices
f.hood = t( sapply( fff, calcRates) )
P.hood = t( sapply( PPP, calcRates) )

rm(fff,PPP)  # we no longer need these big variables

#--------------------------------------------------------------------
# Construct the big output file 
#  First column is municipio name
#--------------------------------------------------------------------
out = data.frame(munic=munic)

#--- append columns with municipio information
keepers = c("state","meso","micro","region","mun6d00",
            "X_COORD","Y_COORD","wwnw154900")

for (v in keepers) out[[v]] = data[,v]

#--- rename the column with unwtd sample size
names(out)[match("wwnw154900",names(out))] = "n"    

#---append columns for local ML estimates of f
for (i in 1:7) {
  colname = paste("f.Cens",i,sep="")
  out[[colname]] = round( rawf[,i], 4)
}

#--- append columns for neighborhood means of f
for (i in 1:7) {
  colname = paste("f.hood",i,sep="")
  out[[colname]] = round( f.hood[,i], 4)
}

#--- append columns for EB estimates of f
for (i in 1:7) {
  colname = paste("f.EB",i,sep="")
  out[[colname]] = round( fB[,i], 4)
}

#---append columns for local ML estimates of P
for (i in 1:7) {
  colname = paste("P.Cens",i,sep="")
  out[[colname]] = round( rawp[,i], 4)
}

#--- append columns for neighborhood means of P
for (i in 1:7) {
  colname = paste("P.hood",i,sep="")
  out[[colname]] = round(P.hood[,i], 4)
}

#--- append columns for EB estimates of P
for (i in 1:7) {
  colname = paste("P.EB",i,sep="")
  out[[colname]] = round(PB[,i], 4)
}


#--- append columns with TFR estimates
keepers = c("tfr.Cens","tfr.EB","tfr.EBB","tfr.PF2","tfr.PF23","r")

for (v in keepers) { out[[v]] = round(get(v),4) }

#--- write the summary data to the results file

write.csv( out, file=outcsv)