### LOGISTIC REGRESSION

##Reading data
DISDATA<-read.csv2("arthrosis.csv", header=TRUE)
attach(DISDATA)
str(DISDATA)

##A first look to the data
print(paste("Number of observations =", nrow(DISDATA)), quote=FALSE)

plot(AGE, DISEASE)

T<-table(DISDATA$DISEASE)
PT<-prop.table(table(DISDATA$DISEASE))
O<-round(PT/(1-PT), digits=3)
cbind(N_obs.=T,Frequency=PT, Odds=O)

##Fitting the model
DISDATA.GLM <- glm(DISEASE~ AGE,family=binomial())
anova(DISDATA.GLM, test="Chisq")
summary(DISDATA.GLM)
confint(DISDATA.GLM) # 95% CI for the coefficients

library(ResourceSelection)
hoslem.test(DISDATA.GLM$y, fitted(DISDATA.GLM))

library(epicalc)
logistic.display(DISDATA.GLM) # displays OR and p Wald test, log-likelihood and AIC
exp(coef(DISDATA.GLM)) # exponentiated coefficients
exp(confint(DISDATA.GLM)) # 95% CI for exponentiated coefficients, dif en SAS

#Diagnostics
layout(matrix(c(1,2,3,4),2,2)) 
plot(DISDATA.GLM)
influence.measures(DISDATA.GLM)
summary(influence.measures(DISDATA.GLM))

RD<-resid(DISDATA.GLM, type="deviance") # deviance residuals
RP<-resid(DISDATA.GLM, type="pearson") # Pearson residuals
H<-hatvalues(DISDATA.GLM)
cbind(AGE,DISEASE,Deviance_res=RD,Pearson_res=RP)

##Prediction 
PRED.PROB<-round(predict(DISDATA.GLM, type="response"), digits=3)  # predicted values
PRED.DIS<-cut(PRED.PROB, breaks=c(0,0.5,1), labels=c("0","1"))
data.frame(AGE, DISEASE, PRED.PROB, PRED.DIS)
plot(AGE, PRED.PROB, col="blue")

T<-table(DISDATA.GLM$y, fitted(DISDATA.GLM)>.5)
TSN<-setNames(T, rep(" ",length(T)))
colnames(TSN)<-c("Healthy", "Arthrosis"); TSN

##ROC curve (library epicalc needed)
lroc(DISDATA.GLM, auc.coords=c(.5,.1))
PSS<-data.frame(lroc(DISDATA.GLM)$predicted.table[,1], lroc(DISDATA.GLM)$diagnostic.table[1:21,])
PSSN<-setNames(PSS, rep(" ",length(PSS)))
colnames(PSSN)<-c("Prob.cutpoint", "1-Specificity", "Sensitivity"); PSSN

q()