%% MATLAB Session 2 - OLS models and residual diagnostics
% 2026
% Goal: Estimate OLS models, interpret regression output,
% and perform residual diagnostics.

clear;                  % Clears all variables from the workspace
close all;              % Closes all open figure windows
clc;                    % Clears the Command Window

%% 1. Load data

scriptDir = fileparts(mfilename('fullpath'));   % Finds the folder where the current script is saved

if isempty(scriptDir)                           % Checks whether scriptDir is empty, for example if the script has no saved path
    scriptDir = pwd;                            % If scriptDir is empty, uses the current working directory instead
end                                             % Ends the if statement

load(fullfile(scriptDir,'wage1-matlab.mat'))    % Loads the file wage1-matlab.mat from the script folder

load('wage1-matlab.mat')    % Loads the file wage1-matlab.mat from the script folder

%% 2. Scatterplot: education and wage

figure                                          % Opens a new figure window
scatter(educ,wage)                              % Creates a scatterplot of wage against education
title('Scatterplot: education and wage')         % Adds a title to the plot
xlabel('Years of education')                    % Adds a label to the horizontal axis
ylabel('Hourly earnings')                       % Adds a label to the vertical axis
grid on                                         % Adds a grid to the plot
lsline                                          % Adds the least-squares regression line to the scatterplot

%% 3. First regression: wage on educ

% Model: wage = b0 + b1*educ + u
% The first model regresses wage on education.

my_model = fitlm(educ,wage)                     % Estimates a simple OLS regression with wage as the dependent variable and educ as the explanatory variable

myres = my_model.Residuals.Raw;                 % Stores the raw residuals from the regression
myfit = predict(my_model,educ);                 % Computes the fitted values of wage from the estimated model

figure                                          % Opens a new figure window

subplot(2,2,1)                                  % Divides the figure into a 2 x 2 grid and selects the 1st plot
plot(myres)                                     % Plots the residuals
title('Residuals')                              % Adds a title to the residual plot
grid on                                         % Adds a grid to the plot

subplot(2,2,2)                                  % Selects the 2nd plot in the 2 x 2 grid
ksdensity(myres)                                % Plots the kernel density estimate of the residuals
title('Kernel density')                         % Adds a title to the density plot
grid on                                         % Adds a grid to the plot

subplot(2,2,3)                                  % Selects the 3rd plot in the 2 x 2 grid
scatter(myfit,myres)                            % Creates a scatterplot of residuals against fitted values
title('Residuals vs fitted')                    % Adds a title to the plot
xlabel('Fitted values')                         % Adds a label to the horizontal axis
ylabel('Residuals')                             % Adds a label to the vertical axis
grid on                                         % Adds a grid to the plot

subplot(2,2,4)                                  % Selects the 4th plot in the 2 x 2 grid
autocorr(myres)                                 % Plots the autocorrelation function of the residuals
title('Autocorrelation')                        % Adds a title to the autocorrelation plot

[h_ks1,p_value_ks1] = kstest((myres-mean(myres))/std(myres));   % Performs the Kolmogorov-Smirnov normality test on standardized residuals
p_value_ks1                                     % Displays the p-value of the Kolmogorov-Smirnov test

[h_JB1,p_value_JB1] = jbtest(myres);            % Performs the Jarque-Bera normality test on the residuals
p_value_JB1                                     % Displays the p-value of the Jarque-Bera test

%% 4. Second regression: wage on educ and tenure

% Model: wage = b0 + b1*educ + b2*tenure + u
% The second model regresses wage on education and tenure.

close all;                                      % Closes all open figure windows

my_model2 = fitlm([educ, tenure], wage)         % Estimates a multiple OLS regression with wage as the dependent variable and educ and tenure as regressors

myres = my_model2.Residuals.Raw;                % Stores the raw residuals from the second regression
myfit = predict(my_model2,[educ, tenure]);      % Computes the fitted values of wage from the second model

figure                                          % Opens a new figure window

subplot(2,2,1)                                  % Divides the figure into a 2 x 2 grid and selects the 1st plot
plot(myres)                                     % Plots the residuals
title('Residuals')                              % Adds a title to the residual plot
grid on                                         % Adds a grid to the plot

subplot(2,2,2)                                  % Selects the 2nd plot in the 2 x 2 grid
ksdensity(myres)                                % Plots the kernel density estimate of the residuals
title('Kernel density')                         % Adds a title to the density plot
grid on                                         % Adds a grid to the plot

subplot(2,2,3)                                  % Selects the 3rd plot in the 2 x 2 grid
scatter(myfit,myres)                            % Creates a scatterplot of residuals against fitted values
title('Residuals vs fitted')                    % Adds a title to the plot
xlabel('Fitted values')                         % Adds a label to the horizontal axis
ylabel('Residuals')                             % Adds a label to the vertical axis
grid on                                         % Adds a grid to the plot

subplot(2,2,4)                                  % Selects the 4th plot in the 2 x 2 grid
autocorr(myres)                                 % Plots the autocorrelation function of the residuals
title('Autocorrelation')                        % Adds a title to the autocorrelation plot

[h_ks2,p_value_ks2] = kstest((myres-mean(myres))/std(myres));   % Performs the Kolmogorov-Smirnov normality test on standardized residuals
p_value_ks2                                     % Displays the p-value of the Kolmogorov-Smirnov test

[h_JB2,p_value_JB2] = jbtest(myres);            % Performs the Jarque-Bera normality test on the residuals
p_value_JB2                                     % Displays the p-value of the Jarque-Bera test

%% 5. Third regression: log(wage) on educ and tenure

% Model: log(wage) = b0 + b1*educ + b2*tenure + u
% The third model regresses the logarithm of wage on education and tenure.

close all;                                      % Closes all open figure windows

my_model3 = fitlm([educ, tenure], log(wage))    % Estimates a multiple OLS regression with log(wage) as the dependent variable

myres = my_model3.Residuals.Raw;                % Stores the raw residuals from the third regression
myfit = predict(my_model3,[educ, tenure]);      % Computes the fitted values of log(wage) from the third model

figure                                          % Opens a new figure window

subplot(2,2,1)                                  % Divides the figure into a 2 x 2 grid and selects the 1st plot
plot(myres)                                     % Plots the residuals
title('Residuals')                              % Adds a title to the residual plot
grid on                                         % Adds a grid to the plot

subplot(2,2,2)                                  % Selects the 2nd plot in the 2 x 2 grid
ksdensity(myres)                                % Plots the kernel density estimate of the residuals
title('Kernel density')                         % Adds a title to the density plot
grid on                                         % Adds a grid to the plot

subplot(2,2,3)                                  % Selects the 3rd plot in the 2 x 2 grid
scatter(myfit,myres)                            % Creates a scatterplot of residuals against fitted values
title('Residuals vs fitted')                    % Adds a title to the plot
xlabel('Fitted values')                         % Adds a label to the horizontal axis
ylabel('Residuals')                             % Adds a label to the vertical axis
grid on                                         % Adds a grid to the plot

subplot(2,2,4)                                  % Selects the 4th plot in the 2 x 2 grid
autocorr(myres)                                 % Plots the autocorrelation function of the residuals
title('Autocorrelation')                        % Adds a title to the autocorrelation plot

[h_ks3,p_value_ks3] = kstest((myres-mean(myres))/std(myres));   % Performs the Kolmogorov-Smirnov normality test on standardized residuals
p_value_ks3                                     % Displays the p-value of the Kolmogorov-Smirnov test

[h_JB3,p_value_JB3] = jbtest(myres);            % Performs the Jarque-Bera normality test on the residuals
p_value_JB3                                     % Displays the p-value of the Jarque-Bera test

%% 6. Extract information from the final OLS model

my_model3.Coefficients                          % Displays the table of estimated coefficients, standard errors, t-statistics, and p-values

my_model3.Coefficients(1,1)                     % Displays the element in row 1 and column 1 of the coefficient table, corresponding to the estimated intercept

my_model3.Coefficients.pValue                   % Displays the p-values of the estimated coefficients in the final model

my_model3.Rsquared                              % Displays the R-squared measures of the final model

my_model3.Rsquared.Ordinary                     % Displays the ordinary R-squared of the final model

coefCI(my_model3)                               % Computes and displays confidence intervals for the estimated coefficients