## raul = read.csv(file.choose(), header = TRUE) www = "http://www.biostatisticaumg.it/dataset/raul.csv" raul = read.csv(www, header = TRUE) attach(raul) names(raul) table(Esito) uno = lm(Ldhtre ~ Ldhuno) due = lm(Ldhtre ~ Ldhuno * Esito) plot(Ldhuno, Ldhtre, bg = "gray", pch = 21) abline(uno) plot(Ldhuno, Ldhtre) abline(27.1098, -0.1686, col = "cyan", lwd = 3) abline(27.1098, -0.1686, lty = 3) abline((27.1098+0.2673), (-0.1686+0.2642), col = "red", lwd = 3) abline((27.1098+0.2673), (-0.1686+0.2642), lty = 3) points(Ldhuno[Esito == "benigno"], Ldhtre[Esito == "benigno"], bg = "cyan", pch = 21) points(Ldhuno[Esito == "maligno"], Ldhtre[Esito == "maligno"], bg = "red", pch = 21) # fresher = read.csv(file.choose(), header = TRUE ) www = "http://www.biostatisticaumg.it/dataset/fresher.csv" fresher = read.csv( www, header = TRUE ) attach(fresher) str(fresher) # mostriamo che il t test e la retta di regressione # con il comando lm "sono la stessa minestra" # calcoliamo la media dei pesi tapply(weight, gender, mean) # calcoliamo la differenza tra le medie dei pesi 70.2 - 56.6 # facciamo il t test: t.test(weight ~ gender) # decidiamo [p = 10^-11] che # la differenza tra le medie dei pesi # c'e' di sicuro [altamente significativa, ***] tapply(weight, gender, mean) var.test(weight ~ gender) fligner.test(weight ~ gender) t.test(weight ~ gender, var.equal = TRUE) summary(lm(weight ~ gender)) relation1 = weight ~ height lm(relation1) model1 = lm(relation1) summary(model1) resid(model1) summary(resid(model1)) sum(resid(model1)^2) deviance(model1) sd(resid(model1)) summary(model1)$sigma fitted(model1) as.numeric(fitted(model1)) + as.numeric(resid(model1)) weight io = fresher[58,] io peso = weight[58] statu = height[58] plot(height, weight, pch = 23, bg = "orange", col = "orange") abline(model) m = summary(model1)$coefficient[2] q = summary(model1)$coefficient[1] pesostim = m*statu+q 78 - pesostim numericasuali = rnorm(65, mean = 0, sd = 6.46) pesofittizio = -83.9 + 0.854 * height + numericasuali par(mfrow = c(1,2)) plot(height, weight) plot(height, pesofittizio) names(raul) table(Esito) formula1 = Ldhtre ~ Ldhuno formula2 = Ldhtre ~ Ldhuno * Esito formula3 = Ldhtre ~ Esito modello1 = lm(formula1) modello1 summary(modello1)$sigma numericasuali = rnorm(1610, mean = 0, sd = 2.5) LDH3fittizio = 28.8 - 0.223 * Ldhuno + numericasuali par(mfrow = c(1,2)) plot(Ldhuno, Ldhtre) plot(Ldhuno, LDH3fittizio) par(mfrow = c(1,2)) plot(Ldhuno, Ldhtre) plot(Ldhuno, trunc(LDH3fittizio)) model1 = lm(relation1, data = fresher) model1 names(fresher) formulasciocca = weight ~ day modellosciocco = lm(formulasciocca) modellosciocco mean(weight) plot(formulasciocca) abline(modellosciocco) relation0 = weight ~ 1 modellonullo = lm(relation0) summary(modellonullo) mean(weight) sd(weight) sd(weight)/sqrt(65) (b = summary(model1)$coefficient[2]) b * sd(height) / sd(weight) cor(height, weight) cor(height, weight)^2 summary(model1)$r.squared install.packages("rsq") library(rsq) rsq.kl(model1) deviance(model1) model0 = lm(weight ~ 1) deviance(model0) 1 - deviance(model1)/deviance(model0) summary(model0) ## qui figura sigmamodel1 = summary(model1)$sigma sigmamodel1 sigmaMLmodel1 = sigmamodel1 * sqrt( (65 - 2) / 65 ) sigmaMLmodel1 dnorm(6.5, mean = 0, sd = sigmaMLmodel1) sum(log(dnorm(resid(model1), mean = 0, sd = sigmaMLmodel1))) logLik(model1) -2 * logLik(model1) 2 * ( 3 - logLik(model1) ) AIC(model1) ## - - - - - - - - - - - - - - - - - - ## ## - - - - - - - - - - - - - - - - - - ## ## SECTION 3: DIAGNOSTIC OF ## THE LINEAR MODEL ## - - - - - - - - - - - - - - - - - - ## ## - - - - - - - - - - - - - - - - - - ## attach(airquality) ipotesi1 = Ozone ~ Solar.R + Wind + Temp modellodiscutibile = lm(ipotesi1) summary(modellodiscutibile) par(mfrow = c(2,2)) plot(modellodiscutibile) ipotesi2 = Ozone ~ Solar.R + Wind + Temp + I(Temp^2) modellomigliore = lm(ipotesi2) summary(modellomigliore) par(mfrow = c(2,2)) plot(modellomigliore) cbind(Temp, I(Temp^2))[1:5,] (Temp * Temp)[1:5] detach(airquality) differenzaresa = c(106, -20, 101, -33, 72, -36, 62, 38, -70, 127, 24) t.test(differenzaresa) ipotesinulla = differenzaresa ~ 1 modelloGosset = lm(ipotesinulla) summary(modelloGosset) sd(differenzaresa) sd(differenzaresa)/sqrt(11) ipotesibiomarker = Ca125 ~ Esito modellobiomarker = lm(ipotesibiomarker) summary(modellobiomarker) par(mfrow = c(2,2)) plot(modellobiomarker) relation2 = weight ~ gender model2 = lm(relation2) summary(model2) ## - - - - - - - - - - - - - - - - - - ## ## - - - - - - - - - - - - - - - - - - ## ## ANCOVA ## - - - - - - - - - - - - - - - - - - ## ## - - - - - - - - - - - - - - - - - - ## relationcross = weight ~ height * gender relationplus = weight ~ height + gender modelcross = lm(relationcross) summary(modelcross) -18.5041 + -25.9617 0.4505 + 0.1925 modelplus = lm(relationplus) summary(modelplus) -35.3725 + 7.1965 AIC(model0, model2, model1, modelplus, modelcross) ## - - - - - - - - - - - - - - - - - - ## ## - - - - - - - - - - - - - - - - - - ## ## ANOVA ## - - - - - - - - - - - - - - - - - - ## ## - - - - - - - - - - - - - - - - - - ## levels(gym) relation3 = weight ~ gym t.test(relation3) anovamodel = aov(relation3) summary(anovamodel) t.test(weight ~ gender)$p.value t.test(weight ~ smoke)$p.value tapply(weight, gender, var) tapply(weight, smoke, var) var(weight) summary(lm(weight ~ gender))$coefficient summary(lm(weight ~ smoke))$coefficient relation = weight ~ gender boxplot(relation) boxplot(weight ~ gender) oldfashioned = aov(relation) summary(oldfashioned) oldfashioned summary(oldfashioned) oldfashioned var(weight)*(length(weight)-1)-deviance(oldfashioned) deviance(oldfashioned) summary(oldfashioned) summary.lm(oldfashioned) summary.lm(oldfashioned)$sigma 6.459431 6.459431 relation3 = weight ~ gym anovamodel = aov(relation3) summary(anovamodel) library(MASS) boxcox(anovamodel) weightboxcox = sqrt(sqrt(weight)) pairwise.t.test(weightboxcox, gym, p.adj="bonferroni") TukeyHSD(anovamodel) il1b = tooth$il1b library(multcomp) library(sandwich) toothmodel = lm(tooth$areainfl ~ tooth$il1b) posthoc = glht(toothmodel, linfct = mcp(il1b = "Tukey"), vcov = sandwich) summary(posthoc) library(multcomp) library(sandwich) posthoc = glht(anovamodel, linfct = mcp(gym = "Tukey"), vcov = sandwich) summary(posthoc) #######{The Anova with Model Selection} relation3 = weight ~ gym linearmodel = lm(relation3) AIC(linearmodel) summary(linearmodel) relation4 = weight ~ smoke relation2 = weight ~ gender boxplot(relation4) tapply(weight, smoke, mean) tapply(weight, smoke, sd) tapply(weight, gender, mean) tapply(weight, gender, sd) mean(weight) sd(weight) table(smoke) table(gender) length(fresher) relation3 = weight ~ gym model3 = lm(relation3) AIC(model0, model3) gymOS = gym levels(gymOS) levels(gymOS)[2] = "occasionalsporty" levels(gymOS)[3] = "occasionalsporty" levels(gymOS) relation3bis = weight ~ gymOS model3bis = lm(relation3bis) AIC(model0, model3bis, model3) gymNS = gym levels(gymNS)[1] = "notsporty" levels(gymNS)[3] = "notsporty" relation3ter = weight ~ gymNS model3ter = lm(relation3ter) AIC(model0, model3bis, model3, model3ter) ####### uno = lm(Ldhtre ~ Ldhuno) due = lm(Ldhtre ~ Ldhuno * Esito) plot(Ldhuno, Ldhtre, bg = "gray", pch = 21) abline(uno) plot(Ldhuno, Ldhtre) abline(27.1098, -0.1686, col = "cyan", lwd = 3) abline(27.1098, -0.1686, lty = 3) abline((27.1098+0.2673), (-0.1686+0.2642), col = "red", lwd = 3) abline((27.1098+0.2673), (-0.1686+0.2642), lty = 3) points(Ldhuno[Esito == "benigno"], Ldhtre[Esito == "benigno"], bg = "cyan", pch = 21) points(Ldhuno[Esito == "maligno"], Ldhtre[Esito == "maligno"], bg = "red", pch = 21) uno = lm(Ldhtre ~ Ldhuno) due = lm(Ldhtre ~ Ldhuno * Esito) AIC(uno, due) names(fresher) maximalrel = weight ~ gender + height + shoesize + smoke + gym + systolic + diastolic maximalmodel = lm(maximalrel) step(maximalmodel) formulaTemporanea01 = weight ~ height + shoesize + gym modelloTemporaneo01 = lm(formulaTemporanea01) formulaTemporanea02 = weight ~ height + shoesize + gymOS modelloTemporaneo02 = lm(formulaTemporanea02) AIC(modelloTemporaneo01, modelloTemporaneo02) formulaTemporanea03 = weight ~ height * shoesize + gym formulaTemporanea04 = weight ~ height + shoesize * gym formulaTemporanea05 = weight ~ height * gym + shoesize modelloTemporaneo03 = lm(formulaTemporanea03) modelloTemporaneo04 = lm(formulaTemporanea04) modelloTemporaneo05 = lm(formulaTemporanea05) AIC(modelloTemporaneo01, modelloTemporaneo03, modelloTemporaneo04, modelloTemporaneo05) formulaTemporanea06 = weight ~ height + shoesize + gym + I(height^2) formulaTemporanea07 = weight ~ height + shoesize + gym + I(shoesize^2) modelloTemporaneo06 = lm(formulaTemporanea06) modelloTemporaneo07 = lm(formulaTemporanea07) AIC(modelloTemporaneo01, modelloTemporaneo06, modelloTemporaneo07) formulaTemporanea08 = weight ~ height + shoesize + gym - 1 modelloTemporaneo08 = lm(formulaTemporanea08) AIC(modelloTemporaneo01, modelloTemporaneo08) par(mfrow = c(2,2)) plot(modelloTemporaneo01) modminadeg = lm(weight ~ height + shoesize + gym) summary(modminadeg) forzaheight = lm(weight ~ height ) forzashoesize = lm(weight ~ shoesize ) forzagym = lm(weight ~ gym) AIC(forzaheight, forzashoesize, forzagym, modminadeg) modminadeg modminadeg$coefficients[1] sum(modminadeg$coefficients * c(1, 182, 43, 0, 0)) table(gender, shoesize > 40) 75.3 - 4.5 75.3 + 4.5 75.3 - 9 75.3 + 9