# clear Global Environment
rm(list = ls())

#==============================================================
#==============================================================
# TESTS FOR STRUCTURAL BREAK POINT MODELS
#==============================================================
#==============================================================

#==============================================================
# Install packages
#==============================================================
install.packages("strucchange")
library(strucchange)

#==============================================================
# Nile data (single breakpoint): 
# the annual flows drop in 1898 
# because the first Ashwan dam was built
#==============================================================
data(Nile)
Nile
plot(Nile)

#==============================================================
# BAI-PERRON METHOD 
# FOR BREAK POINT OPTIMAL SEGMENT PARTITION
#==============================================================
# The foundation for estimating breaks in time series regression 
# models was given by Bai (1994) and was extended to 
# multiple breaks by Bai (1997a) and Bai & Perron (1998). 
# breakpoints implements the algorithm described in 
# Bai & Perron (2003) for simultaneous estimation 
# of multiple breakpoints. The distribution function 
# used for the confidence intervals for the breakpoints 
# is given in Bai (1997b). The ideas behind this implementation 
# are described in Zeileis et al. (2003)

bp <- breakpoints(Nile ~ 1)
breakpoints(bp)
#Optimal 2-segment partition: 
#  
#  Call:
#  breakpoints.breakpointsfull(obj = bp)
#
#Breakpoints at observation number:
#  28 
#
#Corresponding to breakdates:
#  1898 

summary(bp)
#Optimal (m+1)-segment partition: 
#  
#  Call:
#  breakpoints.formula(formula = Nile ~ 1)
#
#Breakpoints at observation number:
#  
#m = 1      28         
#m = 2      28       83
#m = 3      28    68 83
#m = 4      28 45 68 83
#m = 5   15 30 45 68 83
#
#Corresponding to breakdates:
#  
#m = 1        1898               
#m = 2        1898           1953
#m = 3        1898      1938 1953
#m = 4        1898 1915 1938 1953
#m = 5   1885 1900 1915 1938 1953
#
#Fit:
#  
#  m       0       1       2       3       4       5      
#RSS 2835157 1597457 1552924 1538097 1507888 1659994
#BIC    1318    1270    1276    1285    1292    1311

# BIC chooses one breakpoint
plot(bp)

# fit null hypothesis model and model with 1 breakpoint
fm0 <- lm(Nile ~ 1)
summary(fm0)
#Call:
#  lm(formula = Nile ~ 1)
#
#Residuals:
#  Min      1Q  Median      3Q     Max 
#-463.35 -120.85  -25.85  113.15  450.65 
#
#Coefficients:
#  Estimate Std. Error t value Pr(>|t|)    
#(Intercept)   919.35      16.92   54.33   <2e-16 ***
#  ---
#  Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#
#Residual standard error: 169.2 on 99 degrees of freedom
#
fm0
#Call:
#  lm(formula = Nile ~ 1)
#
#Coefficients:
#  (Intercept)  
#919.3 


fm1 <- lm(Nile ~ breakfactor(bp, breaks = 1))
summary(fm1)
#Call:
#   lm(formula = Nile ~ breakfactor(bp, breaks = 1))
# 
# Residuals:
#   Min      1Q  Median      3Q     Max 
# -393.97  -93.47   -2.97   70.03  320.03 
# 
# Coefficients:
#   Estimate Std. Error t value Pr(>|t|)
# (Intercept)                          1097.75      24.13  45.497  < 2e-16
# breakfactor(bp, breaks = 1)segment2  -247.78      28.44  -8.714 7.44e-14
# 
# (Intercept)                         ***
#   breakfactor(bp, breaks = 1)segment2 ***
#   ---
#   Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Residual standard error: 127.7 on 98 degrees of freedom
# Multiple R-squared:  0.4366,	Adjusted R-squared:  0.4308 
# F-statistic: 75.93 on 1 and 98 DF,  p-value: 7.439e-14

fm1

# Call:
#   lm(formula = Nile ~ breakfactor(bp, breaks = 1))
# 
# Coefficients:
#   (Intercept)  breakfactor(bp, breaks = 1)segment2  
# 1097.7                               -247.8 


# plot data and fitted models
plot(Nile)
lines(ts(fitted(fm0), start = 1871), col = 3)
lines(ts(fitted(fm1), start = 1871), col = 4)
lines(bp)

# confidence interval
ci <- confint(bp)
ci
lines(ci)

#==============================================================
# F-tests 
#==============================================================
# test the null hypothesis that the annual flow remains constant
# over the years
fs <- Fstats(Nile ~ 1)
summary(fs)
fs$Fstats
fs$breakpoint

# visualize the breakpoint 
plot(Nile)
lines(breakpoints(fs))

# plot the F statistic
plot(fs, alpha = 0.05)
lines(breakpoints(fs))
#bd=boundary(fs,alpha=0.05)
#bd

# plot the corresponding p values
plot(fs, pval = TRUE)

# perform the aveF test
sctest(fs, type = "aveF")
# aveF test
# 
# data:  fs
# ave.F = 21.215, p-value < 2.2e-16

# perform the supF test
sctest(fs, type = "supF")
# supF test
# 
# data:  fs
# sup.F = 75.93, p-value = 2.22e-16

# perform the expF test
sctest(fs, type = "expF")
# expF test
# 
# data:  fs
# exp.F = 33.759, p-value < 2.2e-16


#==============================================================
# FLUCTUATION TESTS
# Empirical Fluctuation Processes (efp function)
# CUSUM, MOSUM tests based on recursive or OLS residuals 
#==============================================================
# type: "Rec-CUSUM", "OLS-CUSUM", "Rec-MOSUM", "OLS-MOSUM" 
# the function efp will return a one-dimensional empirical 
# process of sums of residuals. Either it will be based 
# on recursive residuals or on OLS residuals and the 
# process will contain CUmulative SUMs or MOving SUMs 
# of residuals in a certain data window. 
# For the MOSUM and ME processes all estimations are done 
# for the observations in a moving data window, whose size 
# is determined by h and which is shifted over the whole sample.

# If type is either "RE" or "ME" a k-dimensional process 
# will be returned, if k is the number of regressors in 
# the model, as it is based on recursive OLS estimates 
# of the regression coefficients or moving OLS estimates 
# respectively. 

# =================
# Recursive CUSUM
# =================
# Run recursive CUSUM test
rc <- efp(Nile ~ 1, type = "Rec-CUSUM")

# calculate corresponding test statistic
sctest(rc)
# Recursive CUSUM test
# 
# data:  rc
# S = 2.0669, p-value = 7.487e-08

sctest(rc, alt = TRUE)
# Recursive CUSUM test with alternative boundaries
# 
# data:  rc
# S = 6.0333, p-value = 0.001

# plot the test statistic and doundaries 
plot(rc, alpha = 0.05) # linear boundaries
plot(rc, alpha = 0.05, alt.boundary = TRUE) # alternarive boundaries

# or
bd=boundary(rc,alpha=0.05)
bd
bdalt=boundary(rc,alpha=0.05,alt.boundary=TRUE)
bdalt
plot(rc)
lines(bd)
lines(-bd)
lines(bdalt)
lines(-bdalt)

# =================
# OLS CUSUM
# =================
# Run OLS CUSUM test
oc <- efp(Nile ~ 1, type = "OLS-CUSUM")

# calculate corresponding test statistic
sctest(oc)
# OLS-based CUSUM test
# 
# data:  oc
# S0 = 2.9518, p-value = 5.409e-08

sctest(oc, alt = TRUE)
# OLS-based CUSUM test with alternative boundaries
# 
# data:  oc
# S0 = 6.5741, p-value = 1e-04

# plot the test statistic and doundaries 
plot(oc, alpha = 0.05) # linear boundaries
plot(oc, alpha = 0.05, alt.boundary = TRUE) # alternarive boundaries

# =================
# Recursive MOSUM
# =================
# Run recursive MOSUM test
rm <- efp(Nile ~ 1, type = "Rec-MOSUM")
# calculate corresponding test statistic
sctest(rm)
# Recursive MOSUM test
# 
# data:  rm
# M = 2.1, p-value = 0.01

# plot the test statistic and doundaries 
plot(rm, alpha = 0.05) # linear boundaries

# =================
# OLS MOSUM
# =================
# Run OLS MOSUM test
om <- efp(Nile ~ 1, type = "OLS-MOSUM")
# calculate corresponding test statistic
sctest(om)
# OLS-based MOSUM test
# 
# data:  om
# M0 = 1.5309, p-value = 0.01

# plot the test statistic and doundaries 
plot(om, alpha = 0.05) # linear boundaries

# ===================
# Recursive estimates
# ===================
# Run recursive estimates test
re <- efp(Nile ~ 1, type = "RE")

# calculate corresponding test statistic
sctest(re)
# RE test (recursive estimates test)
# 
# data:  re
# RE = 2.9518, p-value = 5.409e-08

sctest(re, alt = TRUE)
# RE test (recursive estimates test) with alternative boundaries
# 
# data:  re
# RE = 6.5741, p-value = 1e-04

# plot the test statistic and doundaries 
par(mfrow=c(2,1))
plot(re, alpha = 0.05) # linear boundaries
plot(re, alt = TRUE, alpha = 0.05) # alternative boundaries

# ===================
# Moving estimates
# ===================
# Run moving estimates test
me <- efp(Nile ~ 1, type = "ME")

# calculate corresponding test statistic
sctest(me)
# ME test (moving estimates test)
# 
# data:  me
# ME = 1.5309, p-value = 0.01

# plot the test statistic and doundaries 
plot(me, alpha = 0.05) # linear boundaries

# ===================================================
# All plots
par(mfrow=c(4,2))
plot(fs)

plot(Nile)
lines(ts(fitted(fm0), start = 1871), col = 3)
lines(ts(fitted(fm1), start = 1871), col = 4)
lines(bp)

plot(rc)

plot(oc)

plot(rm)

plot(om)

plot(re)

plot(me)
# ===================================================
# ===================================================
#model=dataCI$y~dataCI$V1+dataCI$V2+dataCI$V3
#summary(model)
#rc=efp(model,type="Rec-CUSUM",data=dataCI)
# ===================================================



# ===================================================
# ===================================================
# EXAMPLE 2: BAI-PERRON DATA
# ===================================================
# ===================================================
# read the data
bpdata<- read.table("C:/Vrontos/mathimata/A-mathimata-2021-2022/MSc-Ecomomics/Quantitative-Methods/Course_Slides_Vrontos/Chapter5-Structural-Break-Point-Models/data_Bai_Perron.txt")
bpdata
par(mfrow=c(1,1))
plot(bpdata$V1, type='l')


#==============================================================
# BAI-PERRON TEST
#==============================================================
bp1 <- breakpoints(bpdata$V1 ~ 1)
breakpoints(bp1)
# Optimal 3-segment partition: 
#   
#   Call:
#   breakpoints.breakpointsfull(obj = bp1)
# 
# Breakpoints at observation number:
#   47 79 
# 
# Corresponding to breakdates:
#   0.4563107 0.7669903 

summary(bp1)
# Optimal (m+1)-segment partition: 
#   
#   Call:
#   breakpoints.formula(formula = bpdata$V1 ~ 1)
# 
# Breakpoints at observation number:
#   
# m = 1               79
# m = 2         47    79
# m = 3      24 47    79
# m = 4      24 47 64 79
# m = 5   16 31 47 64 79
# 
# Corresponding to breakdates:
#   
# m = 1                                                                           0.766990291262136
# m = 2                                       0.456310679611651                   0.766990291262136
# m = 3                     0.233009708737864 0.456310679611651                   0.766990291262136
# m = 4                     0.233009708737864 0.456310679611651 0.621359223300971 0.766990291262136
# m = 5   0.155339805825243 0.300970873786408 0.456310679611651 0.621359223300971 0.766990291262136
# 
# Fit:
#   
#   m   0      1      2      3      4      5     
# RSS 1214.9  645.0  456.0  445.2  444.9  449.6
# BIC  555.7  499.8  473.3  480.1  489.3  499.7

# BIC chooses two breakpoints
plot(bp1)

# fit null hypothesis model and model with 2 breakpoints
fm0 <- lm(bpdata$V1 ~ 1)
summary(fm0)
fm0
# Call:
#   lm(formula = bpdata$V1 ~ 1)
# 
# Coefficients:
#   (Intercept)  
# 1.375  

fm1 <- lm(bpdata$V1 ~ breakfactor(bp1, breaks = 2))
summary(fm1)
fm1
# Call:
#   lm(formula = bpdata$V1 ~ breakfactor(bp1, breaks = 2))
# 
# Coefficients:
#   (Intercept)  breakfactor(bp1, breaks = 2)segment2  breakfactor(bp1, breaks = 2)segment3  
# 1.355                                -3.151                                 4.288  

# Interpret the model coefficients
mean(bpdata$V1[1:47])
#[1] 1.355037
mean(bpdata$V1[48:79])
#[1] -1.796138
1.355-3.151
#[1] -1.796
length(bpdata$V1)
#[1] 103
mean(bpdata$V1[80:103])
#[1] 5.64289
1.355+4.288
#[1] 5.643

# plot data and fitted models
plot(bpdata$V1, type = 'l')
lines(fitted(fm0), col = 3)
lines(fitted(fm1), col = 4)
abline(v=bp1$breakpoints[1])
abline(v=bp1$breakpoints[2])