# Model evaluation: Class work
### remove all objects from workspace
rm(list=ls())
### set working directory
setwd("L:/My Documents/")
data = read.csv(file = "exercise #2 data file.csv")
names(data) = c("Obs", "Pred")
#Always plot your data first to get a feel for the data
plot(data$Pred, data$Obs, type='p')
abline (0, 1)
obs=data$Obs
pred=data$Pred
#You need a funciton to write the equation.
#MSEP
if(length(obs) != length(pred)) stop("The length of obs and pred are not equal or incorrect names")
#Compute MSPE
MSPE = sum((obs-pred)^2)/length(pred)
#Compute root MSPE
RMSPE = sqrt(MSPE)
#population standard deviation
STDEVP <- sd(obs)*sqrt((length(obs)-1)^2/length(obs)^2)
#MSPE to Standard deviation of observed values ration (RSR)
RSR = RMSPE/STDEVP
#Compute scale shift for concordance correlation coefficient
v = sd(obs)/sd(pred)
#Compute location shift for concordance correlation coefficient
u = (mean(obs)-mean(pred))/(sqrt(sd(obs)*sd(pred)))
#Compute bias correction factor concordance correlation coefficient
Cb = 2/(v + 1/v + u^2)
#Compute correlation coefficient for concordance correlation coefficient
SEobs = sqrt(sum((obs-mean(obs))^2)/length(obs))
SEpred = sqrt(sum((pred-mean(pred))^2)/length(pred))
r = sum((pred - mean(pred))*(obs - mean(obs)))/(length(obs)*SEobs*SEpred)
#simpler way to calculate r
cor(obs,pred)
#Concordance correlation coefficient
CCC = (2*cov(obs,pred))/
(sd(obs)^2+sd(pred)^2 + (mean(obs)-mean(pred))^2)
#simpler way to calculate ccc
CCC1 = r * Cb
#Results in a matrix
Results = matrix(c(MSPE, RMSPE, RSR, Cb, r, CCC), byrow = TRUE, ncol = 6)
colnames(Results) = c("MSPE", "RMSPE", "RSR", "Cb", "r", "CCC")
Results
#If you have time breakdown MSPE
#Errors in Central Tendency, or mean bias
ECT = (mean(pred)-mean(obs))^2
#Errors Due to Regression
ER = (SEpred - r*SEobs)^2
#Errors due to Disturbances
ED = (1-r^2)*SEobs^2
#Percent of MSPE
ECTP = ECT*100/MSPE
ERP = ER*100/MSPE
EDP = ED*100/MSPE
Output = matrix(c(ECT, ER, ED, ECTP, ERP, EDP), byrow = FALSE, ncol = 2)
rownames(Output) = c("Errors in Central Tendency, or mean bias", "Errors Due to Regression", "Errors due to Disturbances")
colnames(Output) = c("Absolute Value", "Percent of MSPE" )
Output
#If you have time, do the residual analysis and test if slope is different from 1