### REGRESION LINEAL SIMPLE 
library(MASS)
library(lmtest)
library(nortest)
### Base de datos (parcial) de 7 bebes recien nacidos.
# Una variable respuesta: Talla_hoy (en cm),
# 4 Covariables, continuas: Edad_dias, Talla_nacer, Peso_nacer y Torax_nacer.  
# Datos tomados de Walpole y Myers (2017).
setwd("C:/Users/Nathalia/Documents/Maestria_Estadística")
babies <- read_delim("Tallas.babies.csv - Tallas.babies.csv.csv", delim = ";", escape_double = FALSE, trim_ws = TRUE)
babies
attach(babies)
head(babies)
summary(babies)
dim(babies)
##### MODELO DE RLS CON TORAX
baby.fit11<-lm(babies$Talla.hoy~babies$Torax_nacer) # 1) AJUSTAR EL MODELO 
baby.fit11     ### 2) COEFICIENTES ESTIMADOS bo y b1
resid.torax<-residuals(baby.fit11)  ### Residuales (errores estimados)
resid.torax
########## COMIENZO A VALIDAR SUPUESTOS.
shapiro.test(residuals(baby.fit11)) ## normalidad 
lillie.test(residuals(baby.fit11)) ### normalidad
bptest(Talla.hoy~ Torax_nacer, data=babies) ### Homocedasticidad Breusch-Pagan 
gqtest(lm(babies$Talla.hoy~ babies$Torax_nacer)) ### Homocedasticidad Goldfeld-Quandt
dwtest(babies$Talla.hoy ~ residuals(baby.fit11)) # Durbin Watson independencia
### EN RESUMEN: no hay normalidad, no hay varianza constante, si independencia
### en nuestra estrategia: detenemos el proceso y volvemos al modelo teórico 1. 

########## INTENTO OTRO MODELO DE RLS datos babies.
plot(babies$Peso_nacer, babies$Talla.hoy)
baby.fit12<-lm(babies$Talla.hoy~babies$Peso_nacer) # 1) AJUSTAR EL MODELO 
baby.fit12     ### 2) COEFICIENTES ESTIMADOS bo y b1
resid.peso<-residuals(baby.fit12)  ### Residuales (errores estimados)
resid.peso
shapiro.test(residuals(baby.fit12)) ## test normalidad 
lillie.test(residuals(baby.fit12)) ### test normalidad
bptest(babies$Talla.hoy~ babies$Peso_nacer) ### Test Homocedasticidad Breusch-Pagan 
gqtest(lm(babies$Talla.hoy~ babies$Peso_nacer)) ### Test Homocedasticidad Goldfeld-Quandt
dwtest(babies$Talla.hoy ~ residuals(baby.fit12)) # Test Durbin Watson independencia
### EN RESUMEN: Se cumplen los tres supuestos !!!!!
### ?qu? sigue en nuestra estrategia?   Tiene sentido Tabla ANOVA.
##### ?Existe modelo de RLS?
qf(0.99,1,5) #F tabulado con alfa de 0,01, 1 grado de libertad del error y 5 grados de libertad de la regresión
anova(baby.fit12) ### SI SE PUEDE INTERPRETAR LA PENDIENTE.
summary(baby.fit12)
summary(Peso_nacer) ### ?tiene sentido interpretar bo estimado?

##### INTENTO OTRO MODELO DE RLS datos babies. Talla_nacer.
## plot(Peso_nacer, Talla_hoy)
baby.fit13<-lm(babies$Talla.hoy~babies$Talla_nacer) # 1) AJUSTAR EL MODELO 
baby.fit13   ### 2) COEFICIENTES ESTIMADOS bo y b1
resid.3<-residuals(baby.fit13)  ### Residuales (errores estimados)
resid.3
shapiro.test(residuals(baby.fit13)) ## test normalidad 
lillie.test(residuals(baby.fit13)) ### test normalidad
bptest(babies$Talla.hoy~ babies$Talla_nacer) ### Test Homocedasticidad Breusch-Pagan 
gqtest(lm(babies$Talla.hoy~ babies$Talla_nacer)) ### Test Homocedasticidad Goldfeld-Quandt
dwtest(babies$Talla.hoy ~ residuals(baby.fit13)) # Test Durbin Watson independencia
summary(baby.fit13)
anova(baby.fit13)

### DATA: ROAD
data(trees)
data(road)
summary(road)
attach(road)  #Para poder usar las columnas de datos, sin citar los datos
help(road)

data(trees)
summary(trees)
attach(trees)  #Para poder usar las columnas de datos, sin citar los datos
help(trees)
trees.fit11<-lm(Volume~Girth) # 1) AJUSTAR EL MODELO 
trees.fit11     ### 2) COEFICIENTES ESTIMADOS bo y b1
resid.Girth<-residuals(trees.fit11)  ### Residuales (errores estimados)
resid.Girth
shapiro.test(residuals(trees.fit11)) ## test normalidad 
lillie.test(residuals(trees.fit11)) ### test normalidad
bptest(Volume~Girth) ### Test Homocedasticidad Breusch-Pagan 
gqtest(lm(Volume~Girth)) ### Test Homocedasticidad Goldfeld-Quandt
dwtest(Girth ~ residuals(trees.fit11)) # Test Durbin Watson independencia
# Si cumple normalidad, no cumple homocedasticidad, no cumple independencia. Conclusión: Se debe construir otro modelo

attach(trees)
mod_y<-Height
mod_x<-Girth
mod<-lm(mod_y~mod_x) # 1) AJUSTAR EL MODELO 
mod     ### 2) COEFICIENTES ESTIMADOS bo y b1
resid.mod_x<-residuals(mod)  ### Residuales (errores estimados)
resid.mod_x
shapiro.test(residuals(mod)) ## test normalidad 
lillie.test(residuals(mod)) ### test normalidad
bptest(mod_y~mod_x) ### Test Homocedasticidad Breusch-Pagan 
gqtest(lm(mod_y~mod_x)) ### Test Homocedasticidad Goldfeld-Quandt
dwtest(mod_x ~ residuals(mod)) # Test Durbin Watson independencia
# Si cumple normalidad, no cumple homocedasticidad, no cumple independencia. Conclusión: Se debe construir otro modelo

data(trees)
data(road)
mod_y<-Height #dependiente
mod_x<-Girth #independiente
mod<-lm(mod_y~mod_x) # 1) AJUSTAR EL MODELO 
mod     ### 2) COEFICIENTES ESTIMADOS bo y b1
resid.mod_x<-residuals(mod)  ### Residuales (errores estimados)
resid.mod_x
shapiro.test(residuals(mod)) ## test normalidad 
lillie.test(residuals(mod)) ### test normalidad
bptest(mod_y~mod_x) ### Test Homocedasticidad Breusch-Pagan 
gqtest(lm(mod_y~mod_x)) ### Test Homocedasticidad Goldfeld-Quandt
dwtest(mod_y ~ residuals(mod)) # Test Durbin Watson independencia
# Si cumple normalidad, si cumple homocedasticidad, no cumple independencia. Conclusión: Se debe construir otro modelo
summary(mod)
anova(mod)

######


data(road)
boxplot(road
        4)

summary(road)
attach(road)  #Para poder usar las columnas de datos, sin citar los datos
help(road)

accidentes=road[-9,c(1,3)] #matriz sin la línea 9
attach(accidentes)

library(MASS)
library(MVN)
round(cor(road),3)
library(Hmisc)
rcorr(road)
cor_x<-deaths
cor_y<-popden
par(mfrow=c(1,1))
plot(cor_x,cor_y, main="x vs y")
plot(cor_y,cor_x, main="y vs x")
baby
vecbi = data.frame (cbind(cor_x, cor_y))
vecbi

mvn(data = vecbi, univariateTest = "SW", desc = TRUE)
mvn(data = vecbi, mvnTest = "mardia", desc = TRUE) 

#cor.test(cor_x, cor_y, method="pearson")
cor.test(cor_x, cor_y, method="spearman")

cov(accidentes)