# clear Global Environment rm(list = ls()) #============================================================== #============================================================== # PANEL DATA MODELS #============================================================== #============================================================== #============================================================== # Install packages #============================================================== install.packages("plm") library(plm) install.packages("AER") library(AER) #============================================================== data<- read.table("C:/Vrontos/mathimata/A-mathimata-2021-2022/BSc-Econometrics/Vrontos/Ergastirio/R-Panel-models/panel_data.txt") dim(data) head(data) # Assign names at the variables names(data)<-c('country','time','y','cf1','cf2','sf1','sf2') names(data) dim(data) head(data) data$country<-factor(data$country) data$time<-factor(data$time) # some plots boxplot(split(data$y,data$country), names=c('GRE','FR','GER','IT','SP','UK')) with(data, plot(data$cf1, data$y, main = 'CF1 vs GI')) #install.packages("car") library(car) X11(); scatterplot(y~time|country, boxplots=FALSE, smooth=TRUE, reg.line=FALSE, data=data) #install.packages("foreign") library(foreign) #install.packages("gplots") library(gplots) plotmeans(y ~ country, main="Heterogeineity across countries", data=data) # plotmeans draw a 95% confidence interval around the means X11(); plotmeans(y ~ time, main="Heterogeineity across years", data=data) coplot(y ~ time|country, type="l", data=data) # Lines #coplot(y ~ time|country, type="b", data=data) # Points and lines #============================================================== # RUN MODEL 1: POOL MODEL #============================================================== pool1 <- lm(y ~ cf1 + sf1, data = data) summary(pool1) # Call: # lm(formula = y ~ cf1 + sf1, data = data) # # Residuals: # Min 1Q Median 3Q Max # -0.108727 -0.007470 -0.000141 0.007833 0.096343 # # Coefficients: # Estimate Std. Error t value Pr(>|t|) # (Intercept) -5.332e-03 1.357e-03 -3.929 8.87e-05 *** # cf1 9.213e-01 2.083e-02 44.240 < 2e-16 *** # sf1 7.788e-05 2.169e-05 3.592 0.000338 *** # --- # Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 # # Residual standard error: 0.01507 on 1707 degrees of freedom # Multiple R-squared: 0.5458, Adjusted R-squared: 0.5452 # F-statistic: 1025 on 2 and 1707 DF, p-value: < 2.2e-16 pool2 <- plm(y ~ cf1 + sf1, data = data, model="pooling") summary(pool2) # Pooling Model # # Call: # plm(formula = y ~ cf1 + sf1, data = data, model = "pooling") # # Balanced Panel: n = 6, T = 285, N = 1710 # # Residuals: # Min. 1st Qu. Median 3rd Qu. Max. # -0.10872684 -0.00747020 -0.00014105 0.00783289 0.09634300 # # Coefficients: # Estimate Std. Error t-value Pr(>|t|) # (Intercept) -5.3319e-03 1.3570e-03 -3.9291 8.868e-05 *** # cf1 9.2133e-01 2.0826e-02 44.2396 < 2.2e-16 *** # sf1 7.7885e-05 2.1686e-05 3.5916 0.000338 *** # --- # Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 # # Total Sum of Squares: 0.85402 # Residual Sum of Squares: 0.38792 # R-Squared: 0.54577 # Adj. R-Squared: 0.54524 # F-statistic: 1025.5 on 2 and 1707 DF, p-value: < 2.22e-16 #============================================================== # RUN MODEL 2: FIXED EFFECT MODEL #============================================================== fixed1 <- lm(y ~ cf1 + sf1 + country , data = data) summary(fixed1) # Call: # lm(formula = y ~ cf1 + sf1 + country, data = data) # # Residuals: # Min 1Q Median 3Q Max # -0.108680 -0.007308 -0.000310 0.007779 0.097484 # # Coefficients: # Estimate Std. Error t value Pr(>|t|) # (Intercept) -6.520e-03 1.593e-03 -4.093 4.47e-05 *** # cf1 9.209e-01 2.085e-02 44.175 < 2e-16 *** # sf1 8.133e-05 2.203e-05 3.691 0.00023 *** # country2 1.390e-03 1.264e-03 1.100 0.27155 # country3 1.929e-03 1.269e-03 1.521 0.12852 # country4 8.801e-04 1.268e-03 0.694 0.48759 # country5 9.604e-04 1.264e-03 0.760 0.44740 # country6 7.232e-04 1.265e-03 0.572 0.56755 # --- # Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 # # Residual standard error: 0.01509 on 1702 degrees of freedom # Multiple R-squared: 0.5465, Adjusted R-squared: 0.5446 # F-statistic: 293 on 7 and 1702 DF, p-value: < 2.2e-16 #============================================================== # RUN MODEL 2: FIXED EFFECT MODEL #============================================================== fixed2 <- plm(y ~ cf1 + sf1, data = data, model = 'within', index = c('country')) summary(fixed2) # Oneway (individual) effect Within Model # # Call: # plm(formula = y ~ cf1 + sf1, data = data, model = "within", # index = c("country")) # # Balanced Panel: n = 6, T = 285, N = 1710 # # Residuals: # Min. 1st Qu. Median 3rd Qu. Max. # -0.10868005 -0.00730801 -0.00031041 0.00777941 0.09748397 # # Coefficients: # Estimate Std. Error t-value Pr(>|t|) # cf1 9.2089e-01 2.0847e-02 44.175 < 2.2e-16 *** # sf1 8.1326e-05 2.2034e-05 3.691 0.0002304 *** # --- # Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 # # Total Sum of Squares: 0.8536 # Residual Sum of Squares: 0.38733 # R-Squared: 0.54624 # Adj. R-Squared: 0.54437 # F-statistic: 1024.44 on 2 and 1702 DF, p-value: < 2.22e-16 fixef(fixed2) # 1 2 3 4 5 6 #-0.0065198 -0.0051299 -0.0045907 -0.0056396 -0.0055594 -0.0057965 pFtest(fixed2, pool2) #If a=0.05 > p-value then the fixed effects model is a better choice # dfRest = NT-k-1 = 1710-3 = 1707 # dfUnrest = NT-N-k = 1710-6-2 = 1702 # df_Restr - df_Unrest = (NT-3) - (NT-N-2) = NT-3-NT+N+2 = N-1 = 6-1 = 5 # F test for individual effects # # data: y ~ cf1 + sf1 # F = 0.52233, df1 = 5, df2 = 1702, p-value = 0.7596 # alternative hypothesis: significant effects m3 <- lm(y ~ cf1 + sf1 + country*cf1 + country*sf1 , data = data) summary(m3) m4 <- lm(y ~ cf1 + sf1 + country + country*cf1 + country*sf1, data = data) summary(m4) #============================================================== # RUN MODEL 3: RANDOM EFFECTS MODEL #============================================================== random <- plm(y ~ cf1+sf1, data=data, index=c("country"), model="random") summary(random) # Oneway (individual) effect Random Effect Model # (Swamy-Arora's transformation) # # Call: # plm(formula = y ~ cf1 + sf1, data = data, model = "random", # index = c("country")) # # Balanced Panel: n = 6, T = 285, N = 1710 # # Effects: # var std.dev share # idiosyncratic 0.0002276 0.0150855 1 # individual 0.0000000 0.0000000 0 # theta: 0 # # Residuals: # Min. 1st Qu. Median 3rd Qu. Max. # -0.10872684 -0.00747020 -0.00014105 0.00783289 0.09634300 # # Coefficients: # Estimate Std. Error z-value Pr(>|z|) # (Intercept) -5.3319e-03 1.3570e-03 -3.9291 8.528e-05 *** # cf1 9.2133e-01 2.0826e-02 44.2396 < 2.2e-16 *** # sf1 7.7885e-05 2.1686e-05 3.5916 0.0003287 *** # --- # Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 # # Total Sum of Squares: 0.85402 # Residual Sum of Squares: 0.38792 # R-Squared: 0.54577 # Adj. R-Squared: 0.54524 # Chisq: 2051 on 2 DF, p-value: < 2.22e-16 #To decide between fixed or random effects you can run a Hausman test #where the null hypothesis is that the preferred model is random effects #vs the alternative the fixed effects (see Green, 2008, chapter 9). #Run a fixed effects model and save the estimates, #then run a random model and save the estimates, #then perform the test. If the p-value is significant #(for example a=0.05> pv) then use fixed effects, if not use random effects. phtest(fixed2, random) # Hausman Test # # data: y ~ cf1 + sf1 # chisq = 0.77834, df = 2, p-value = 0.6776 # alternative hypothesis: one model is inconsistent #============================================================== #==============================================================