# Matriz de datos
load(url("http://pagines.uab.cat/plopez/sites/pagines.uab.cat.plopez/files/IDH2014.rda"))

# install.packages("Hmisc")
library(Hmisc)
library(Deducer)

# Asignacin de nombres a las variables
Y=IDH2014$Lifeexpectancy
X1=IDH2014$GNIpercapita2011
X2=IDH2014$Schooling

########## Anlisis de regresin 1: regresin simple con las variables originales

# Diagrama de dispersin
plot(Y ~ X1)
ggplot() + geom_point(aes(x=X1, y=Y)) + geom_smooth(aes(x=X1, y=Y),method = 'lm') 
ggplot() + geom_point(aes(x=X1, y=Y)) + geom_smooth(aes(x=X1, y=Y),method = 'lm') +
  geom_text(aes(x=X1, y=Y,label=IDH2014$HDIrank),parse = FALSE)
ggplot() + geom_point(aes(x=X1, y=Y)) + geom_smooth(aes(x=X1, y=Y),method = 'lm') +
  geom_text(aes(x=X1, y=Y,label=IDH2014$Country),parse = FALSE,
            size=4,angle=35,hjust=0,vjust=1,alpha=0.5)

# Correlacin (diferentes alternativas)
cor.matrix(variables=d(Y,X1), test=cor.test, method='pearson', aternative="two.sided") # Con Deducer
cor(Y,X1,use="complete.obs")
cor.test(Y,X1)
rcorr(Y,X1) # Con Hmisc

# Anlisis de regresin simple
AR=lm(Y ~ X1)
summary(AR)
anova(AR,type='III')
plot(AR) # Grficos para diagnstico

# Para guardar como variables: residuos, residuos estandarizados, resiudos studentizados, 
#                              valores predichos y distancia de Cook 
tmp=cooks.distance(AR)
IDH2014[names(tmp),"cooks"]=tmp
tmp=residuals(AR)
IDH2014[names(tmp),"Residuals"]=tmp
tmp=rstandard(AR)
IDH2014[names(tmp),"resid.standardized"]=tmp
tmp=rstudent(AR)
IDH2014[names(tmp),"resid.studentized"]=tmp
IDH2014[["predicted.link"]] = predict(AR, newdata=IDH2014)
# Para editar IDH2014
edit(IDH2014)
# Casos extremos con residuos estandarizados por encima de 3 unidades de desviacin
IDH2014.sub<-subset(IDH2014,resid.standardized < -3 | resid.standardized >3)
edit(IDH2014.sub)

########## Anlisis de regresin 2: regresin simple con la transformacin logartmica

LX1=log(X1)

ggplot() + geom_point(aes(x=LX1, y=Y)) + geom_smooth(aes(x=LX1, y=Y),method = 'lm') 
ggplot() + geom_point(aes(x=LX1, y=Y)) + geom_smooth(aes(x=LX1, y=Y),method = 'lm') +
  geom_text(aes(x=LX1, y=Y,label=IDH2014$HDIrank),parse = FALSE)
ggplot() + geom_point(aes(x=LX1, y=Y)) + geom_smooth(aes(x=LX1, y=Y),method = 'lm') +
  geom_text(aes(x=LX1, y=Y,label=IDH2014$Country),parse = FALSE,
            size=4,angle=35,hjust=0,vjust=1,alpha=0.5)

cor.matrix(variables=d(Y,LX1), test=cor.test, method='pearson', aternative="two.sided") # Con Deducer
AR=lm(Y ~ LX1)
summary(AR)
anova(AR,type='III')
plot(AR)

tmp=cooks.distance(AR)
IDH2014[names(tmp),"cooks"]=tmp
tmp=residuals(AR)
IDH2014[names(tmp),"Residuals"]=tmp
tmp=rstandard(AR)
IDH2014[names(tmp),"resid.standardized"]=tmp
tmp=rstudent(AR)
IDH2014[names(tmp),"resid.studentized"]=tmp
IDH2014[["predicted.link"]] = predict(AR, newdata=IDH2014)
edit(IDH2014) # Para editar IDH2014
IDH2014.sub<-subset(IDH2014,resid.standardized > 3) # Casos extremos 
edit(IDH2014.sub)

########## Anlisis de regresin 3: regresin mltiple

# Diagrama de dispersin en 3D
# install.packages("scatterplot3d")
library(scatterplot3d)
G3D=scatterplot3d(Y ~ LX1+X2,pch=16)

# Ajuste del plano de regresin
AR=lm(Y ~ LX1+X2)
G3D$plane3d(AR)

# Grfico con Rotacin
# install.packages("rgl")
library(rgl)
plot3d(Y,LX1,X2)
# install.packages("Rcmdr")
library(Rcmdr)
library(nlme, pos = 38)
library(mgcv, pos = 38)
scatter3d(Y ~ LX1 + X2, surface=FALSE, residuals=TRUE, bg="white", axis.scales=TRUE, grid=TRUE, ellipsoid=FALSE)
scatter3d(Y ~ LX1+X2)

# Anlisis de regresin mltiple
AR=lm(Y ~ LX1+X2)
summary(AR)
anova(AR,type='III')
plot(AR)

# Regresin con variables estandarizadas
YS=scale(Y)
LX1S=scale(LX1)
X2S=scale(X2)
ARS=lm(YS ~ LX1S+X2S)
summary(ARS)

# Clculo de la Importancia Relativa de cada predictor
# install.packages("relaimpo")
library(relaimpo)
calc.relimp(ARS)

# Diagnsticos: colinealidad (FIV: factor de inflacin de la varianza) y residuos
library(car)
vif(ARS)
outlierTest(ARS)
summary(influence.measures(ARS))
