#-- housekeeping rm(list=ls()) graphics.off() memory.limit(4000) windows(record=T) set.seed(031) incsv = "brazil_2000_municipal_fertility_data.csv" outcsv = "brazil_2000_municipal_fertility_estimates.csv" ### read the Census long-form data data <- as.matrix(read.csv(incsv, row.names=34, as.is=TRUE) ) coord <- data[, c("X_COORD","Y_COORD")] women <- data[, paste("wwnw00",1:7,sep="") ] births <- data[, paste("cblynw00",1:7,sep="") ] ceb <- data[, paste("cebnw00",1:7,sep="") ] munic <- as.matrix(read.csv("municipio_names.csv"))[,"name"] rawf = births/women rawp = ceb/women M <- length(munic) ### read the smoothed Empirical Bayes fx and Px schedules fB = dget("fB.dput") PB = dget("PB.dput") ######################################################### # Read calibrated interpolation constants # from Schmertmann (2012), "Calibrated spline estimation # of detailed fertility schedules from abridged data" # # MPIDR Working Paper WP-2012-022 (July 2012). # Rostock, Max Planck Institute for Demographic Research ######################################################### Const = dget("calibrated spline Cdata.dput") ## grab the right subset of the C contants for calculating moments, then ## rearrange for easy multiplication L = seq(15,45,5) M1 = t(Const["g=7",paste(L),paste(L+2.5),"1" ]) ## M1 %*% f5 gives first moments integral{ a * f(a) | a < x } M0 = t(Const["g=7",paste(L),paste(L+2.5),"0" ]) ## M0 %*% f5 gives zero-th moments integral{ f(a) | a < x } ### Construct Brass Manual X multipliers Brass.W <- matrix(c(2.5430, -0.1760, 0.0120, 0.0120, 0.0120, 0.0120, 0.0120, 5.0805, 3.4015, -0.6735, 0.0805, 0.0805, 0.0805, 0.0805, 5.0725, 5.0725, 3.3375, -0.5545, 0.0725, 0.0725, 0.0725, 5.0145, 5.0145, 5.0145, 3.4565, -0.5485, 0.0145, 0.0145, 5.0030, 5.0030, 5.0030, 5.0030, 3.5210, -0.7600, 0.0030, 4.9995, 4.9995, 4.9995, 4.9995, 4.9995, 3.8615, -2.4815, 4.9990, 4.9990, 4.9990, 4.9990, 4.9990, 5.0150, 3.8270), nrow=7,ncol=7, byrow=T) ### calculate Census and EB TFRs tfr.Cens = apply(5 * rawf, 1, sum) tfr.EB = apply(5 * fB, 1, sum) r = tfr.EBB = tfr.PF2 = tfr.PF23 = ( NA * tfr.EB ) # matrices for holding results ### function to display data and regression fit for one municipio show.regression = function(mm,xx,yy,sdev,tfr,r,sub=1:7) { plot( 2000+xx , exp(yy), main=paste(mm,munic[mm]), xlim=2000+c(-25,0),ylim=c(1.5, 9.5), type="n", bty="l", xlab="Year",ylab="TFR") text( 2000+xx[sub], exp(yy[sub]), paste(seq(15,45,5)[sub]), cex=1.40) ## segments( 2000+xx[sub], exp(yy[sub]-2*sdev), 2000+xx[sub], exp(yy[sub]+2*sdev), lwd=2, col="grey") tt = 2000+seq(-25,0,.25) lines(tt, tfr * exp(r*(tt-2000)), type="l", lwd=4, col="red") text(2000, tfr.EB[mm], "EB", col="black", cex=1.30) # EB text(2000, tfr.Cens[mm], "C", col="royalblue", cex=1.30) # Census abline(h=0:10,v=seq(1970,2000,5), lty=2, col="grey") abline(v=2000, lwd=3) } ########################## loop over municipios and calculate EBB estimators #################### ## a few randomly selected municipios for plotting sel = c(sample(M, 25), which(apply(women,1,sum) > 3500)) sub = 2:7 # which age groups will be used in regression for (mm in 1:M) { ### (mux - x) values, estimated from calibrated spline interpolation xx = (M1 %*% fB[mm,]) / (M0 %*% fB[mm,]) - seq(17.5, 47.5, 5) PP = PB[mm,] FF = M0 %*% fB[mm,] ## estimated using calibrated spline interpolation FBrass = Brass.W %*% fB[mm,] ## estimated using Brass Manual X procedure yy = log(tfr.EB[mm] * PP/FF) ww = 1 / ( 1/PP + 1/FF -1/tfr.EB[mm] ) ## (approx) inverse variance weights fit = lm( yy ~ xx, weights=ww, subset=sub) tfr.EBB[mm] = exp(fit$coef[1]) r[mm] = fit$coef[2] tfr.PF2[mm] = tfr.EB[mm] * PP[2]/FBrass[2] tfr.PF23[mm] = tfr.EB[mm] * mean( PP[2:3]/FBrass[2:3]) ### display a few demonstration plots during the calculations if (mm %in% sel) { nbar = sum(women[mm,])/35 sdev = sqrt(ww/nbar) show.regression(mm,xx,yy,sdev,tfr=tfr.EBB[mm],r=r[mm], sub=sub) } } # for mm ##-- Append new calculations to the input data, and save ## neighborhood data and averages ## read the neighborhood lists (they are *slightly* different for f and P, and remember ## that the first element in each neighborhood vector is the focal municipio ## for ex., the fhood[[2]] is a 28x7x2 array for the neighborhood centered on ## municipio number 2, Ariquemes (RO). For each of the 28 municipios in Ariquemes's ## neighborhood [2 30 19 40 ...] it contains a 7 age group x {events,expos} matrix of ## weighted counts of births and women ### function to calculate neighborhood-level rates by age from an element of fhood or phood calcRates = function(hoodData) { tmp = apply( hoodData, c(2,3), sum) return( tmp[,"event"]/tmp[,"expos"] ) } #calcRates ### read the lists of (wtd) neighborhood data for P and f fff = dget("nhood.fdata.dput") PPP = dget("nhood.pdata.dput") ### calculate all the neighborhood-level rates for f and P ### results will be M x 7 matrices f.hood = t( sapply( fff, calcRates) ) P.hood = t( sapply( PPP, calcRates) ) rm(fff,PPP) # we no longer need these big variables #-------------------------------------------------------------------- # Construct the big output file # First column is municipio name #-------------------------------------------------------------------- out = data.frame(munic=munic) #--- append columns with municipio information keepers = c("state","meso","micro","region","mun6d00", "X_COORD","Y_COORD","wwnw154900") for (v in keepers) out[[v]] = data[,v] #--- rename the column with unwtd sample size names(out)[match("wwnw154900",names(out))] = "n" #---append columns for local ML estimates of f for (i in 1:7) { colname = paste("f.Cens",i,sep="") out[[colname]] = round( rawf[,i], 4) } #--- append columns for neighborhood means of f for (i in 1:7) { colname = paste("f.hood",i,sep="") out[[colname]] = round( f.hood[,i], 4) } #--- append columns for EB estimates of f for (i in 1:7) { colname = paste("f.EB",i,sep="") out[[colname]] = round( fB[,i], 4) } #---append columns for local ML estimates of P for (i in 1:7) { colname = paste("P.Cens",i,sep="") out[[colname]] = round( rawp[,i], 4) } #--- append columns for neighborhood means of P for (i in 1:7) { colname = paste("P.hood",i,sep="") out[[colname]] = round(P.hood[,i], 4) } #--- append columns for EB estimates of P for (i in 1:7) { colname = paste("P.EB",i,sep="") out[[colname]] = round(PB[,i], 4) } #--- append columns with TFR estimates keepers = c("tfr.Cens","tfr.EB","tfr.EBB","tfr.PF2","tfr.PF23","r") for (v in keepers) { out[[v]] = round(get(v),4) } #--- write the summary data to the results file write.csv( out, file=outcsv)