set.seed(135) x1<-rgamma(50000,6,1) x2<-rgamma(50000,2,1) #independent x1, x2 e<-10*rnorm(50000,1) y<-10+20*x1+e p<-cbind(y,x1,x2) # population values for y, x1, x2 cbind(sum(y),sum(x1), sum(x2)) #Population totals for y, x1, x2 cbind(var(y),var(x1),var(x2),var(e)) #population variance for y, x1, x2, e r12<-(cor(x1,x2))^2 #coefficient of determination for x1,x2 ry1<-cor(y,x1)^2 #coefficient of determination for y,x1 ry2<-cor(y,x2)^2 #coefficient of determination for y,x2 r<-(ry1+ry2-2*sqrt(ry1*ry2*r12))/(1-r12) #coefficient of determination for y cbind(r12,ry1,ry2,r) plot(x1,y); --------------------------------- s<-p[sample(50000,5000),]; #simple random sample of size n Y<-s[,1]; #sample values of y X1<-s[,2]; #sample values of x1 X2<-s[,3]; #sample values of x2 par(mfrow=c(3,2)); hist(y,breaks=30, prob="TRUE", col="lightgreen", border="blue") hist(Y,breaks=30, prob="TRUE", col="lightgreen", border="blue") hist(x1,breaks=30, prob="TRUE", col="lightgreen", border="blue") hist(X1,breaks=30, prob="TRUE", col="lightgreen", border="blue") hist(x2,breaks=30, prob="TRUE", col="lightgreen", border="blue") hist(X2,breaks=30, prob="TRUE", col="lightgreen", border="blue") par(mfrow=c(3,2)); plot(density(y),col="darkgreen" );plot(density(Y),col="darkgreen" ) plot(density(x1),col="darkgreen" );plot(density(X1),col="darkgreen" ) plot(density(x2),col="darkgreen" );plot(density(X2),col="darkgreen" ) --------------------------------------------- CALIBRATION with x1 set.seed(135) x1<-rgamma(50000,6,1) x2<-rgamma(50000,2,1) #independent x1, x2 e<-2*rnorm(50000,1) y<-10+5*x1+e p<-cbind(y,x1,x2) # population values for y, x1, x2 cbind(sum(y),sum(x1), sum(x2)) #Population totals for y, x1, x2 cbind(var(y),var(x1),var(x2),var(e)) #population variance for y, x1, x2, e r12<-(cor(x1,x2))^2 #coefficient of determination for x1,x2 ry1<-cor(y,x1)^2 #coefficient of determination for y,x1 ry2<-cor(y,x2)^2 #coefficient of determination for y,x2 r<-(ry1+ry2-2*sqrt(ry1*ry2*r12))/(1-r12) #coefficient of determination for y cbind(r12,ry1,ry2,r) #plot(x1,y); N<-50000;n<-5000 w<-rep(N/n,times=n) F<-function(N,n){ s<-p[sample(N,n),]; Y<-s[,1]; X1<-s[,2]; X2<-s[,3]; c<-w+X1*(sum(x1)-t(X1)%*%w)/crossprod(X1,X1); #GR weights T<-t(X1)%*%c; HT<-crossprod(Y,w); #HT estimator of sum(y) GR<-crossprod(Y,c); #GR estimator of sum(y) HTx1<-crossprod(X1,w); #HT estimator of sum(x1) RE<-HT*(sum(x1)/HTx1); A<-c(T,HT,GR,RE); } system.time(R<-replicate(15000,F(50000,5000 ))) B<-mean(R[1,])-sum(x1) #check of calibration constraints RB_GR<-100*(mean(R[3,])-sum(y))/sum(y) #relative bias of GR estimator RB_RE<-100*(mean(R[4,])-sum(y))/sum(y) #relative bias of RE estimator vR2<-var(R[2,]) #var(HT) vR3<-var(R[3,]) #var(GR) vR4<-var(R[4,]) #var(RE) RV_HTGR<-100*(vR3-vR2)/vR2 #relative variance of GR,HT estimators RV_HTRE<-100*(vR4-vR2)/vR2 #relative variance of RE,HT estimators cbind(B) #Output cbind(RB_GR,RB_RE) #Output cbind(RV_HTGR,RV_HTRE) #Output --------------------------------------------------------------------------------- CALIBRATION with x1,x2 set.seed(135) x1<-rgamma(50000,6,1) x2<-rgamma(50000,2,1) #independent x1, x2 e<-3*rnorm(50000,1) y<-10+5*x1+5*x2+e # population values for y p<-cbind(y,x1,x2) cbind(sum(y),sum(x1), sum(x2)) #Population totals for y, x1, x2 cbind(var(y),var(x1),var(x2),var(e)) #population variance for y, x1, x2, e r12<-(cor(x1,x2))^2 #coefficient of determination for x1,x2 ry1<-cor(y,x1)^2 #coefficient of determination for y,x1 ry2<-cor(y,x2)^2 #coefficient of determination for y,x2 r<-(ry1+ry2-2*sqrt(ry1*ry2*r12))/(1-r12) #coefficient of determination for y cbind(r12,ry1,ry2,r) N<-50000;n<-500 w<-rep(N/n,times=n) CT<-rbind(sum(x1),sum(x2)) F<-function(N,n){ s<-p[sample(N,n),]; Y<-s[,1]; X1<-s[,2]; X2<-s[,3]; X12<-cbind(X1,X2); c<-w+X12%*%solve(t(X12)%*%X12)%*%(CT-t(X12)%*%w); # GR weights T<-t(X12)%*%c; HT<-crossprod(Y,w); #HT estimator of sum(y) GR<-crossprod(Y,c); #GR estimator of sum(y) A<-c(T,HT,GR); } system.time(R<-replicate(15000,F(50000,500))) B1<-mean(R[1,])-sum(x1) #check of calibration constraints B2<-mean(R[2,])-sum(x2) #check of calibration constraints RB_GR<-100*(mean(R[4,])-sum(y))/sum(y) #relative bias of GR estimator vR3<-var(R[3,]) #var(HT) vR4<-var(R[4,]) #var(GR) RV_HTGR<-100*(vR4-vR3)/vR3 #relative variance of GR,HT estimators cbind(B1,B2) #Output cbind(RB_GR) #Output cbind(RV_HTGR) #Output --------------------------------------------------------