%% MATLAB Session 3 - Hypothesis testing, prediction, dummies, simulations
% 2026
% Goal: Use estimated models for tests, predictions, dummy variables,
% and Monte Carlo simulations.

clear;                          % Clears all variables from the workspace
close all;                      % Closes all open figure windows
clc;                            % Clears the Command Window

%% 1. Load data and estimate the base model
% This model is used as input for the applications below.

scriptDir = fileparts(mfilename('fullpath'));   % Finds the folder where the current script is saved

if isempty(scriptDir)                           % Checks whether scriptDir is empty
    scriptDir = pwd;                            % If scriptDir is empty, uses the current working directory
end                                             % Ends the if statement

load(fullfile(scriptDir,'wage1-matlab.mat'))    % Loads the dataset from the same folder as the script

% Alternative 1: if the data file is inside a subfolder called "data"
% load(fullfile('data','wage1-matlab.mat'))

% Alternative 2: if the data file is in another folder, use the full path
% load('C:\Users\ Set your Current Folder - working directory\wage1-matlab.mat')

% Alternative 3: if the data file is in the Current Folder
% load('wage1-matlab.mat')

my_model3 = fitlm([educ, tenure], log(wage))    % Estimates an OLS model with log(wage) as dependent variable and educ, tenure as regressors


% logwage = log(wage);
% data = table(logwage, educ, tenure);
% my_model3 = fitlm(data, 'logwage ~ educ + tenure')

%% 2. Hypothesis testing with coefTest

% Performs the overall F-test of the regression
[p,F] = coefTest(my_model3)                     

% Tests whether the coefficients of educ and tenure are jointly equal to zero
% H0: b1 = b2 = 0
[p2,F2] = coefTest(my_model3,[0 1 0; 0 0 1])    

% Tests whether the coefficient of educ is equal to zero
% H0: b1 = 0
[p3,F3] = coefTest(my_model3,[0 1 0])           

% Tests whether the coefficient of tenure is equal to zero
% H0: b2 = 0
[p4,F4] = coefTest(my_model3,[0 0 1])    

% Tests whether the coefficient of educ is equal to 0.0865
% H0: b1 = 0.0865
[p5,F5] = coefTest(my_model3,[0 1 0],0.0865)    

% Tests whether the coefficient of tenure is equal to 0.0258
% H0: b2 = 0.0258
[p6,F6] = coefTest(my_model3,[0 0 1],0.0258)    

% Tests whether the coefficients of educ and tenure are equal
% H0: b1 - b2 = 0
[p7,F7] = coefTest(my_model3,[0 1 -1])         

% Tests whether the coefficient of educ equals three times the coefficient of tenure
% H0: b1 - 3*b2 = 0
[p8,F8] = coefTest(my_model3,[0 1 -3])          

%% 3. Fitted values

x = 1:length(log(wage));                        % Creates an index for the observations

figure                                          % Opens a new figure window
plot(x, log(wage), x, my_model3.Fitted)         % Plots actual log(wage) and fitted log(wage)
legend('log(wage)','log(wage) fitted')             % Adds a legend to distinguish actual and fitted values
grid on                                         % Adds a grid to the plot
title('Actual and fitted log(wage)')             % Adds a title to the plot
xlabel('Observation')                           % Adds a label to the horizontal axis
ylabel('log(wage)')                             % Adds a label to the vertical axis

%% 4. Predictions

% Defines a new observation with educ = 1 and tenure = 1
Xnew = [1 1];    
% Predicts log(wage) and gives a prediction interval for the new observation
[pred,predCI] = predict(my_model3,Xnew)  
% Converts the predicted log(wage) into predicted wage
exp(pred)                                       

% Verify manually. The first 1 is for the intercept.

% Manually computes the predicted wage using the estimated coefficients
exp([1 1 1]*my_model3.Coefficients.Estimate)   
% Many predictions simultaneously.
% Defines two new observations for prediction
Xnew2 = [1 1; 2 2];  
% Predicts log(wage) for both new observations
[pred,predCI] = predict(my_model3,Xnew2)   
[pred,predCI] = predict(my_model3,Xnew2,'Alpha',0.05)

% Converts the predicted log(wage) values into predicted wage values
exp(pred)                                       

%% 5. Correct predictions for y when the model is for log(y)

temp = fitlm(exp(my_model3.Fitted),wage,'Intercept',false);   % Estimates a no-intercept auxiliary regression of wage on exp(fitted log wage)

corrected_prediction = temp.Coefficients.Estimate(1)*exp(pred)   % Applies a correction factor to transform log-model predictions into wage predictions

%% 6. Dummy variables and interaction groups

close all;                                      % Closes all open figure windows

female_married    = female.*married;           % Creates a dummy equal to 1 for married women and 0 otherwise
female_notmarried = female.*(1-married);        % Creates a dummy equal to 1 for unmarried women and 0 otherwise
male_notmarried   = (1-female).*(1-married);    % Creates a dummy equal to 1 for unmarried men and 0 otherwise

my_model4 = fitlm([educ, tenure, female_married, female_notmarried, male_notmarried], log(wage))   % Estimates a log-wage model with education, tenure, and group dummies

% Reference category: male and married, because this category is omitted.
my_model4.Coefficients                          % Displays the estimated coefficients of the model with dummies

%% 7. Monte Carlo: sampling distribution of the sample mean

clearvars -except scriptDir                     % Clears all variables except scriptDir
close all;                                      % Closes all open figure windows
clc;                                            % Clears the Command Window

mu = 0;                                         % Sets the population mean of the normal distribution
sigma = 1;                                      % Sets the population standard deviation of the normal distribution
nSim = 1000;                                    % Sets the number of Monte Carlo simulations
vectorN = [10, 25, 100, 200];                   % Defines the different sample sizes
averageMatrix = zeros(nSim,length(vectorN));    % Creates a matrix to store the simulated sample means

for j = 1:length(vectorN)                       % Loops over the different sample sizes
    N = vectorN(j);                             % Selects the current sample size

    for i = 1:nSim                              % Repeats the simulation nSim times
        sample = normrnd(mu,sigma,N,1);         % Draws a random sample from a normal distribution
        averageMatrix(i,j) = mean(sample);      % Stores the sample mean from the current simulation
    end                                         % Ends the inner simulation loop

end                                             % Ends the loop over sample sizes

hFig = figure;                                  % Opens a new figure window and stores its handle
set(hFig,'Position',[300 300 1000 700])         % Sets the size and position of the figure window

for j = 1:length(vectorN)                       % Loops over the different sample sizes

    subplot(2,2,j)                              % Divides the figure into a 2 x 2 grid and selects plot j
    histogram(averageMatrix(:,j),20)            % Plots a histogram of the simulated sample means
    title(['N = ' num2str(vectorN(j))])          % Adds a title showing the sample size

    xlabel(['sample mean sd = ' num2str(std(averageMatrix(:,j)),3) ...
        ', theoretical sd = ' num2str(sigma/sqrt(vectorN(j)),3)])   % Adds empirical and theoretical standard deviations to the x-axis label

    xlim([-1 1])                                % Sets the limits of the horizontal axis
    grid on                                     % Adds a grid to the plot

end                                             % Ends the plotting loop

%% 8. Monte Carlo estimate of pi

clear;                                          % Clears all variables from the workspace
close all;                                      % Closes all open figure windows
clc;                                            % Clears the Command Window

% The original example used N = 100000000. A smaller N runs faster in class.

N = 1000000;                                    % Sets the number of random points to simulate
x = rand(1,N);                                  % Generates N random x-coordinates from the uniform distribution on (0,1)
y = rand(1,N);                                  % Generates N random y-coordinates from the uniform distribution on (0,1)

counter = (x.^2 + y.^2) <= 1;                  % Identifies points that fall inside the quarter circle of radius 1
N1 = sum(counter);                              % Counts the number of points inside the quarter circle

pi_estimate = (N1/N)*4                          % Estimates pi using the ratio of points inside the quarter circle
true_pi = pi                                    % Displays MATLAB's built-in value of pi

%% 9. Dice simulation using randi

clear;                                          % Clears all variables from the workspace
close all;                                      % Closes all open figure windows
clc;                                            % Clears the Command Window

dice = randi([1 6],100,1)                       % Simulates 100 dice rolls with outcomes from 1 to 6

figure                                          % Opens a new figure window
histogram(dice,1:7)                             % Plots a histogram of the dice outcomes
title('Dice simulation with randi')              % Adds a title to the plot
xlabel('Dice outcome')                          % Adds a label to the horizontal axis
ylabel('Frequency')                             % Adds a label to the vertical axis
grid on                                         % Adds a grid to the plot

%% 10. Dice simulation without randi

clear;                                          % Clears all variables from the workspace
close all;                                      % Closes all open figure windows
clc;                                            % Clears the Command Window

random = rand(100,1);                           % Generates 100 random numbers from the uniform distribution on (0,1)
dice = ones(size(random));                      % Initializes all dice outcomes as 1

for i = 1:length(random)                        % Loops over all simulated random numbers

    if random(i) > 1/6                          % Checks whether the random number is above 1/6
        dice(i) = 2;                            % Assigns the outcome 2
    end                                         % Ends the first if statement

    if random(i) > 2/6                          % Checks whether the random number is above 2/6
        dice(i) = 3;                            % Assigns the outcome 3
    end                                         % Ends the second if statement

    if random(i) > 3/6                          % Checks whether the random number is above 3/6
        dice(i) = 4;                            % Assigns the outcome 4
    end                                         % Ends the third if statement

    if random(i) > 4/6                          % Checks whether the random number is above 4/6
        dice(i) = 5;                            % Assigns the outcome 5
    end                                         % Ends the fourth if statement

    if random(i) > 5/6                          % Checks whether the random number is above 5/6
        dice(i) = 6;                            % Assigns the outcome 6
    end                                         % Ends the fifth if statement

end                                             % Ends the loop over simulated random numbers

dice                                            % Displays the simulated dice outcomes

figure                                          % Opens a new figure window
histogram(dice,1:7)                             % Plots a histogram of the simulated dice outcomes
title('Dice simulation without randi')           % Adds a title to the plot
xlabel('Dice outcome')                          % Adds a label to the horizontal axis
ylabel('Frequency')                             % Adds a label to the vertical axis
grid on                                         % Adds a grid to the plot