# clear Global Environment rm(list = ls()) #============================================================== #============================================================== # EXAMPLE 1: Consumption versus Income #============================================================== #============================================================== #============================================================== # Data Entry #============================================================== income <- c(881.8,2469.3,1134.5,3032.8,2122.9,1884.7,1786.5,1982.7,1256.3,2253.4, 2633.2,1866.8,1468.5,2202.5,971.1,1214.3,1419.5,1530.3,1464.7,2301.7, 1630.3,1022.4,2440.3,2014.7,1873.4,3057.3,965.7,2183.6,1943.9,630.4, 1059.9,2561.2,742.3,3274.4,2164.4,1533.7,1297.2,1972.9,1237.1,1145.6, 2279.3,1768.0,1140.5,1188.1,1352.5,1403.6,1196.5,1281.3,2001.2,1896.2) income consumption <- c(801.2,2120.1,950.8,2541.6,1962.0,1669.9,1266.0,1403.6,977.2,1983.8, 2226.2,1682.0,1368.2,1975.7,804.9,1066.5,1372.5,1096.2,1411.7,1891.7, 1437.3,834.5,2190.6,1680.1,1708.7,2538.5,1064.9,1835.0,1697.7,506.5, 804.8,2147.4,702.4,2774.7,1923.6,1413.5,991.4,1359.7,915.3,1057.7, 2037.4,1674.1,1180.7,1166.8,1161.4,1083.0,1367.7,1045.1,1734.4,1595.5) consumption city <- c(1,2,1,2,2,2,1,1,1,2,2,2,2,2,1,1,2,1,2,2,2,1,2,2,2,2,2,2,2,1, 1,2,1,2,2,2,1,1,1,2,2,2,2,2,1,1,2,1,2,2) #============================================================== #============================================================== # PLOTS AND CORRELATIONS #============================================================== # Simple Scatterplot plot(income,consumption,main="consumption vs income", xlab="income", ylab="consumption") # ======================================== # ESTIMATE SIMPLE LINEAR REGRESSION MODEL: # y = a + bx + u # ======================================== fit<-lm(consumption ~ income) summary(fit) #Call: # lm(formula = consumption ~ income) #Residuals: # Min 1Q Median 3Q Max #-330.22 -66.75 11.47 91.24 316.65 #Coefficients: # Estimate Std. Error t value Pr(>|t|) #(Intercept) 66.50939 54.84543 1.213 0.231 #income 0.82285 0.02999 27.436 <2e-16 *** # --- # Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 # #Residual standard error: 130.1 on 48 degrees of freedom #Multiple R-squared: 0.9401, Adjusted R-squared: 0.9388 #F-statistic: 752.8 on 1 and 48 DF, p-value: < 2.2e-16 #===================== # Plot the fitted line #===================== plot(income, consumption, main="consumption vs income and fitted line", xlab="income", ylab="consumption") abline(fit, col="red") # regression fitted line # same plot with another way plot(income, consumption, main="consumption vs income, fitted line", xlab="income", ylab="consumption") abline(a=coef(fit)[1], b=coef(fit)[2], col="blue") # regression fitted line # ==================================================== # ESTIMATE LINEAR REGRESSION MODEL WITH DUMMY VARIABLE # (test if the constant is different) # Model 2: y = a + bX + gz + u # ==================================================== # plot the data plot(income, consumption, pch=city, main="consumption vs income", xlab="income", ylab="consumption") # Construct dummy variable dummyz <- city==2 cbind(city,dummyz) # or using command for, and command if z <- NULL for (i in 1:length(city)) { if (city[i]==2) z[i] <- 1 else z[i] <- 0 } z cbind(city,z) # Estimate model M2: y = a + bX + gz + u fit2<-lm(consumption ~ income + z) summary(fit2) #Call: # lm(formula = consumption ~ income + z) # #Residuals: # Min 1Q Median 3Q Max #-126.028 -39.801 3.657 43.889 155.766 # #Coefficients: # Estimate Std. Error t value Pr(>|t|) #(Intercept) 97.88728 29.39477 3.33 0.0017 ** # income 0.70345 0.01932 36.42 < 2e-16 *** # z 272.36597 24.69108 11.03 1.23e-14 *** # --- # Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 # #Residual standard error: 69.41 on 47 degrees of freedom #Multiple R-squared: 0.9833, Adjusted R-squared: 0.9826 #F-statistic: 1384 on 2 and 47 DF, p-value: < 2.2e-16 # usefull plot plot(income, consumption, pch=z, main="consumption vs income", xlab="income", ylab="consumption") abline(a=fit2$coefficients[1],b=fit2$coefficients[2],col=1) abline(a=fit2$coefficients[1]+fit2$coefficients[3],b=fit2$coefficients[2],col=2) # ==================================================== # ESTIMATE LINEAR REGRESSION MODEL WITH DUMMY VARIABLE # (test if the slope is different) # Model 3: y = a + bX + d(zx) + u # ==================================================== # Construct interaction variable zx <- z*income zx # Estimate model M3: y = a + bX + d(zx) + u fit3<-lm(consumption ~ income + zx) summary(fit3) #Call: # lm(formula = consumption ~ income + zx) # #Residuals: # Min 1Q Median 3Q Max #-133.451 -33.290 -5.202 32.912 188.441 # #Coefficients: # Estimate Std. Error t value Pr(>|t|) #(Intercept) 290.88749 32.52130 8.945 1.03e-11 *** # income 0.55372 0.02635 21.017 < 2e-16 *** # zx 0.18876 0.01532 12.323 2.50e-16 *** # --- # Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 # #Residual standard error: 63.93 on 47 degrees of freedom #Multiple R-squared: 0.9858, Adjusted R-squared: 0.9852 #F-statistic: 1635 on 2 and 47 DF, p-value: < 2.2e-16 # ==================================== # Model 4: y = a + bX + gz + d(zx) + u # ==================================== fit4<-lm(consumption ~ income + z + zx) summary(fit4) #Call: # lm(formula = consumption ~ income + z + zx) # #Residuals: # Min 1Q Median 3Q Max #-108.302 -39.688 -5.888 36.877 176.529 # #Coefficients: # Estimate Std. Error t value Pr(>|t|) #(Intercept) 243.24425 53.99953 4.505 4.54e-05 *** # income 0.58848 0.04102 14.345 < 2e-16 *** # z 74.56259 67.55380 1.104 0.27544 #zx 0.14145 0.04550 3.109 0.00322 ** # --- # Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 # #Residual standard error: 63.78 on 46 degrees of freedom #Multiple R-squared: 0.9862, Adjusted R-squared: 0.9853 #F-statistic: 1096 on 3 and 46 DF, p-value: < 2.2e-16 # # ================================================