Data and Code for Modern Regression Techniques
This page is so you can copy and paste commands rather than having to retype them. Explanations are all in the book. The location of the data is shown in the read.table command. If you run the whole thing at once (not sure why, but?), you should set par(ask=T), and change back to par(ask=F) afterwards. This will prompt you for each new graph. EXCEPT, lattice graphs will still write over traditional graphs (at least in R2.5.0). We assume you run these in order, but if you do not, you may need to run install.packages before loading some packages. Also, some packages may write over function names from other packages, and may even write over variable names (we think this only happens with mgcv in this code). This may happen with variables with names that are used elsewhere in R. So, this happened one time running the memory recognition example with the variable time (which is also the name of a function for time series analysis). Therefore, we used memrec$time and the remaining code worked. If you want the Figures to look the same as in the text you will need to the scale the graphics window appropriately. These commands load the latest versions of many of the packages. Thus, if they change some of the code may stop working. Email danw@sussex.ac.uk if this happens and he will try to fix the code.
The data can be read from the CD or web page. In the book several methods are described so that you do not have to write out the URL every time (also so that the command to read the data did not take up multiple lines). The simplest is changing RProfile.site. A function could also be written so that R automatically reads data from there. In the book we assigned the URL name to webreg. This was so we could still show how to use the read.table command since when you want to access your own data they will not be on the web page. We just make this assignment once here so in case you are doing something different you can just skip this command.
#####################
webreg <- "http://www.sagepub.co.uk//wrightandlondon//"
# Chapter 1. Loading libraries and some univariate statistics (i.e., skewness and histograms)
# Example 1. Length of a chile.
library("foreign")
chile <- read.table(paste(webreg,"chile.dat",sep=""), header=T)
attach(chile)
names(chile)
chile$HEAT
install.packages("e1071")
library(e1071)
skewness(LENGTH)
library(boot)
lengthboot <- boot(LENGTH,function(x,i) skewness(x[i]), R=1000)
boot.ci(lengthboot)
lnlength <- log(LENGTH)
skewness(lnlength)
par(mfrow=c(1,3))
hist(LENGTH)
hist(log(LENGTH))
hist(log(LENGTH+2.54))
par(mfrow=c(1,1))
detach(chile)
# Chapter 2.
set.seed = 121
x1 <- rnorm(100,10,10)
x2 <- rbinom(100,1,.5)
y <- x1 + x2*40 + rnorm(100,0,10)
reg1 <- lm(y~x1)
reg2 <- lm(y~x2)
par(mfrow=c(1,2))
plot(x1,y)
abline(reg1)
plot(x2,y)
abline(reg2)
par(mfrow=c(1,1))
par(mfrow=c(2,2))
plot(reg1)
reg3 <- lm(y~x1+x2)
plot(reg3)
summary(reg3)
reg4 <- lm(y~x1+x2-1)
reg5 <- lm(y~x1*x2)
anova(reg4,reg5)
# Example 2. Dissociation and suggestibility
suggest <- c(12, 2, -2, 5, 10,-13, 10, -3, 4, 9, 13, 6, 18, 12, 14, -6, 0, 5, 6, 19, -5, 8, 5, 14, -1, -2, 10, 17, 2, 10, -1, 21, 14, 4, 20, 24, 5,-12, 8, 7, 0, 2, 7, -1, 12, 4, 0, 19, 8,-12)
DES <-c(45.00,49.28,38.57,55.71,48.21,14.60,12.86,43.50,27.10,46.40,46.40, 35.00,55.00,52.85,46.40,18.21,36.79,48.93,45.50,22.10,31.43,32.50,52.14, 18.21,22.50,27.14,16.67,14.64,39.64,16.78,34.28,69.64,38.90,23.20,41.67, 62.86, 47.00, 22.86,31.42,48.21,18.57,48.21,40.00,35.00,57.85,50.35,41.07, 45.00,48.00,49.28)
reg <- lm(suggest~DES)
summary(reg)
plot(DES,suggest,xlab="Dissociation score (0-100)",ylab="Suggestibility (-25-25)")
abline(reg)
text(65,-12,"r = .31")
dataset1 <- cbind(DES,suggest)
write.table(dataset1,"c:\\temp\\dataset1.dat")
library("foreign")
write.foreign(as.data.frame(dataset1),"c:\\temp\\dataspss.dat","c:\\temp\\dataspss.sps",package="SPSS")
detach(dataset1)
# Chapter 3.
# Example 3. Cognitive dissonance.
control <- c( 0,-3,3,2,-2,-1,2,3,-3,-5,2,-3,3,0,-2,-2,-2,-2,-1,2)
dollar1 <- c(3,1,1,3,2,3,3,2,2,2,2,2,-4,4,0,-3,4,1,1,-2)
dollar20 <- c(1,2,3,0,1,3,0,-2,2,1,0,0,-1,-2,-1,-4,-3,-1,0,0)
enjoy <- c(control,dollar1,dollar20)
group <- as.factor(rep(c("control","$1","$20"),each=20))
tapply(enjoy,group,mean)
tapply(enjoy,group,sd)
plot(group,enjoy,ylab="Enjoyment (-5 to 5)",xlab="Condition",cex.lab=1.3)
anova1 <- aov(enjoy~group)
summary(anova1)
anova2 <- lm(enjoy~group)
summary(anova2)
anova(anova2)
contrasts(group)
wcontrol <- c(1,0,0,0,1,0)
dim(wcontrol) <- c(3,2)
contrasts(group) <- wcontrol
contrasts(group)
summary(lm(enjoy~group))
# Example 4. Children's forgetting I.
lordex <- read.table(paste(webreg,"lordex.txt",sep=""), header=T)
attach(lordex)
names(lordex)
par(mfrow=c(1,2))
hist(Final,main="Untransformed Final")
library(e1071)
library(boot)
skewness(Final)
skewboot <- boot(Final, function(x,i) skewness(x[i]), R=1000)
boot.ci(skewboot,type="bca")
newfinal <- sqrt(Final + .5)
hist(newfinal, main=expression(sqrt(Final + .5)))
par(mfrow=c(1,1))
skewness(newfinal)
skewboot <- boot(newfinal, function(x,i) skewness(x[i]), R=1000)
boot.ci(skewboot,type="bca")
hist(AGEMOS, breaks=c(seq(36,120,12)),xlab="Age in months")
AGEYRS <- trunc(AGEMOS/12)
AGEYRSfac <- as.factor(AGEYRS)
AGE2 <- cut(AGEYRS, br=c(0,6.5,10))
reg1 <- lm(newfinal~AGEMOS)
summary(reg1)
reg2 <- lm(newfinal~AGEYRS)
summary(reg2)
par(mfrow=c(1,2))
plot(AGEMOS,newfinal,ylab=expression(sqrt(Final + .5)),xlab="Age in months")
lines(sort(AGEMOS),predict(reg1)[order(AGEMOS)],col="red")
lines(sort(AGEMOS),predict(reg2)[order(AGEMOS)],col="blue")
reg3 <- lm(newfinal ~ AGEYRSfac)
summary(reg3)
tapply(newfinal,AGEYRSfac,mean)
anova(reg2,reg3)
t.test(newfinal~AGE2,var.equal=T)
reg4 <- lm(newfinal~AGE2)
summary(reg4)
plot(AGEMOS,newfinal,ylab=expression(sqrt(Final + .5)),xlab="Age in months")
lines(sort(AGEMOS),predict(reg3)[order(AGEMOS)],col="red")
lines(sort(AGEMOS),predict(reg4)[order(AGEMOS)],col="blue")
par(mfrow=c(1,1))
Diff <- Final - Initial
dreg1 <- lm(Diff~AGEMOS)
dreg2 <- lm(Diff~AGEYRS)
dreg3 <- lm(Diff~AGEYRSfac)
dreg4 <- lm(Diff~AGE2)
par(mfrow=c(1,2))
plot(AGEMOS,Diff, ylab="Final - Initial", xlab="Age in months")
lines(sort(AGEMOS),predict(dreg1)[order(AGEMOS)],col="red")
lines(sort(AGEMOS),predict(dreg2)[order(AGEMOS)],col="blue")
plot(AGEMOS,Diff,ylab="Final - Initial", xlab="Age in months")
lines(sort(AGEMOS),predict(dreg3)[order(AGEMOS)],col="red")
lines(sort(AGEMOS),predict(dreg4)[order(AGEMOS)],col="blue")
par(mfrow=c(1,1))
tapply(Diff,AGEYRSfac,mean)
summary(dreg1)
detach(lordex)
# Chapter 4
# Example 4. Children's forgetting II
lordex <- read.table(paste(webreg,"lordex.txt",sep=""), header=T)
attach(lordex)
lm1 <- lm(Final ~ Initial)
summary(lm1)
lm2 <- lm(Final ~ Initial + AGEMOS)
summary(lm2)
lm3 <- lm(Final ~ Initial*AGEMOS)
summary(lm3)
anova(lm1,lm2,lm3)
summary(lm2)
summary(lm3)
library(lattice)
AGEYRS <- trunc(AGEMOS/12)
scatmat <- xyplot(Final~Initial | as.factor(AGEYRS))
print(scatmat)
tapply(Initial, AGEYRS, sd)
tapply(Final, AGEYRS, sd)
sexit <- split(Initial,AGEYRS)
sfoll <- split(Final,AGEYRS)
predmod4 <- split(lm(Final~Initial*as.factor(AGEYRS))$fitted.values,AGEYRS)
plot(Initial,Final,pch=19,cex=.5,col="black", xlab="Initial interview", ylab="Final interview",xlim=c(0,25),ylim=c(0,10),cex.lab=1.3,las=1,font.lab=1.5)
abline(0,1,lty = 3)
for (i in 1:6) lines(sexit[[i]],predmod4[[i]], col="black")
meanexit <- c(mean(sexit[[1]]),mean(sexit[[2]]), mean(sexit[[3]]), mean(sexit[[4]]), mean(sexit[[5]]), mean(sexit[[6]]))
meanfoll<- c(mean(sfoll[[1]]),mean(sfoll[[2]]), mean(sfoll[[3]]), mean(sfoll[[4]]), mean(sfoll[[5]]), mean(sfoll[[6]]))
lines(meanexit,meanfoll,col="grey",lwd=4,lend="round")
text(12,10,"Final = Initial",pos=4)
arrows(12,10,10,10,length=.1)
text(c(13,21,12.1,0,0,4.4),c(5.5,4.5,2.2,3.7,1.5,.5),c("9","7","8","6","5","4"))
points(meanexit,meanfoll,pch=19)
lines(c(14,16.7), c(9.2,9.2),col="grey",lwd=4)
text(16.7,9.2,"Age group means", pos=4)
text(14,8.5,"Means increase with age\nfor both interviews",pos=4)
text(-.3,9.5,"Intercepts higher for\nolder children\n (Ancova)",pos=4)
text(14,1,"Older children further away\nfrom dashed line (Anova).", pos=4)
detach(lordex)
# Mediation example
set.seed(143)
leaflet <- rep(c(0,1),each=50)
fairskin <- rbinom(100,1,.5)
likely <- rbinom(100,10,.20 + .2*leaflet + .2*fairskin)
plan <- rbinom(100,7,likely/15+leaflet*.2)
summary(lm(likely~leaflet))$coef
summary(lm(plan~leaflet))$coef
summary(lm(plan~likely))$coef
summary(lm(plan~likely+leaflet))$coef
(1.92*.45)/sqrt(.37^2*.45^2 + .06^2*1.92^2+.37^2*.06^2)
sobel1 <- function(a,b,sea,seb){
sobel <- (a*b)/sqrt(sea^2*b^2+seb^2*a^2+sea^2*seb^2)
paste("Sobel z =", format(sobel,nsmall=2,digits=3))}
sobel1(1.92,.45,.37,.06)
sobel1(1.920,.454,.367,.057)
# source(paste(webreg,"mediator.R",sep=""))
# This will work once Dan returns to Brighton and updates the webpage.
# You need to run the function first
## Simple Mediation Function with Figure of effects
mediator <- function(x,y,m, ...){
# Put in experimental variable x, outcome variable y, then mediator m.
# Some of this is written so that it works in a mediator
# function for more complex problems.
reg0 <- lm(y~x)
reg1 <- lm(m~x)
reg2 <- lm(y~m+x)
c <- summary(reg0)$coefficients[2,1]
sc <- summary(reg0)$coefficients[2,2]
a <- summary(reg1)$coefficients[2,1]
sa <- summary(reg1)$coefficients[2,2]
b <- summary(reg2)$coefficients[2,1]
sb <- summary(reg2)$coefficients[2,2]
cp <- summary(reg2)$coefficients[3,1]
scp <- summary(reg2)$coefficients[3,2]
sobel <- (a*b)/sqrt(b^2*sa^2 + a^2*sb^2 + sa^2*sb^2)
psobel <- format(2*(1-pnorm(abs(sobel))),digits=2,nsmall=2)
plot(c(0,100),c(0,110),col="white",ann=F,tck=0,col.axis="white")
rect(c(10,10,70,70,40),c(10,50,10,50,80),c(30,30,90,90,60),c(30,70,30,70,100))
arrows(c(30,30,20,60),c(20,60,70,90),c(70,70,40,80),c(20,60,90,70),length=.15)
text(c(20,20,80,80,50),c(20,60,20,60,90),c("X","X","Y","Y","M"),cex=2)
text(30,80,paste(format(a,digits=2,nsmall=2)),pos=2,cex=1.3)
text(70,80,paste(format(b,digits=2,nsmall=2)),pos=4,cex=1.3)
text(50,20,paste(format(c,digits=2,nsmall=2)),pos=3,cex=1.3)
text(50,60,paste(format(cp,digits=2,nsmall=2)),pos=3,cex=1.3)
text(50,2,paste("Sobel z = ",format(sobel,digits=2,nsmall=2),"; p = ",psobel),cex=1.3)
print(paste("Sobel z =", format(sobel,digits=3,nsmall=2),", p =", psobel))
invisible(c(sobel,psobel))
}
mediator(leaflet,plan,likely)
# Chapter 5.
# Example 6. Fathers' ptsd.
maleptsd <- read.table(paste(webreg,"maleptsd.dat",sep=""), header=T)
attach(maleptsd)
ptsd <- log(PTSD + 1)
preds <- cbind(OVER2,OVER3,OVER5,BOND,POSIT,NEG,CONTR,SUP,CONS,AFF)
ptsdmat <- cor(preds)
print(ptsdmat,digits=2)
print(sd(preds),digits=2)
spmat <- cor(preds,method="spearman")
print(spmat,digits=2)
cor.test(OVER2,OVER3)
new <- diag(sd(preds)) + upper.tri(spmat)*spmat + lower.tri(ptsdmat)*ptsdmat
print(new,digits=2)
library(car)
spm(preds)
allvars <-
lm(ptsd~preds)
summary(allvars)
install.packages("leaps")
library(leaps)
x1 <- regsubsets(preds,ptsd)
summary(x1)
par(mfrow=c(2,2))
plot(x1,scale=c("r2"))
title("Default for leaps")
plot(x1,scale=c("adjr2"))
title("Default for leaps")
x2 <- leaps(preds,ptsd,nbest=1, method="r2")
x3 <- leaps(preds,ptsd,nbest=1, method="adjr2")
plot(x2$size-1,x2$r2,xlab="Number of predictors",ylab=expression(R^2))
lines(spline(x2$size-1,x2$r2))
plot(x3$size-1,x3$adjr2, xlab="Number of predictors",ylab=expression(adj.R^2))
lines(spline(x3$size-1,x3$adjr2))
par(mfrow=c(1,1))
plot(x3$size-1,x3$adjr2,xlab="# of predictors not including B0", ylab="Adj. R-squared")
lines(spline(x3$size-1,x3$adjr2))
m2 <- max(x3$adjr2)
m1 <- which.max(x3$adjr2)
arrows(m1,m2-.02,m1,m2)
text(m1,m2-.023,expression(paste("max ",r[adj]^2 == 0.2)))
text(m1,m2-.031, paste("with",format(m1,digits=1),"variables"),pos=3)
par(mfrow=c(1,1))
x3$which[which.max(x3$adjr2),]
labels(preds[1,])
summary(lm(ptsd~preds[,c(1,2,4,6,8,10)]))
library(MASS)
lm.ridge(ptsd~preds,lambda=seq(0,100,by=1))-> x
plot(x)
title("Ridge Regression")
abline(h=0)
abline(v=50,lty=3)
x$coef[,50]
install.packages("lars")
library("lars")
lasso1 <- lars(preds,ptsd)
plot(lasso1)
predict(lasso1,s=7,mode="step", type="coefficient")$coefficients
lassocv <- cv.lars(preds,ptsd)
frac <- lassocv$fraction[which.min(lassocv$cv)]
predict(lasso1,s=frac,mode="fraction", type="coefficient")$coefficients
install.packages("pls")
library("pls")
pcr1 <- pcr(ptsd~preds,ncomp=10)
plot(1:10,summary(pcr1)[1,],col="blue",ylab="Cumulative variance accounted for",xlab="Number of components",ylim=c(0,100),pch=19,cex.lab=1.3)
lines(1:10,summary(pcr1)[1,],col="blue",lwd=1.5)
points(1:10,summary(pcr1)[2,],col="red",pch=19)
lines(1:10,summary(pcr1)[2,],col="red",lwd=1.5)
text(4,85,"variance of predictors",col="blue",pos=4,cex=1.2)
text(5,35,"variance of PTSD",col="red",pos=4,cex=1.2)
pcr1$loadings
pca1 <- princomp(preds)
pcrreg <- lm(ptsd ~ pca1$scores[,1]+ pca1$scores[,2])
summary(pcrreg)
plsr1 <- plsr(ptsd~preds,ncomp=10)
plot(1:10,summary(plsr1)[1,],col="blue",ylab="Cumulative variance accounted for",xlab="Number of components",ylim=c(0,100),pch=19,cex.lab=1.3)
lines(1:10,summary(plsr1)[1,],col="blue",lwd=1.5)
points(1:10,summary(plsr1)[2,],col="red",pch=19)
lines(1:10,summary(plsr1)[2,],col="red",lwd=1.5)
text(4.3,85,"variance of predictors",col="blue",pos=4,cex=1.2)
text(5,35,"variance of PTSD",col="red",pos=4,cex=1.2)
summary(plsr1)
plsr1$loadings
plsreg <- lm(ptsd ~ plsr1$scores[,1])
summary(plsreg)
detach(maleptsd)
# Chapter 6.
plot(0:40,dpois(0:40,1),xlab="Variable",ylab="",ylim=c(0,.5),col="white")
for (i in c(1,2,5,10,20)) lines(spline(0:40,dpois(0:40,i)))
text(1, .37, expression(lambda," = 1"),pos=4)
text(2.4, .26, expression(lambda," = 2"),pos=4)
text(4, .20, expression(lambda," = 5"), pos=4)
text(6.8, .14, expression(lambda," = 10"), pos=4)
text(17, .11, expression(lambda," = 20"), pos=4)
# Example 7. Associations with test score.
glmexample <- read.table(paste(webreg,"glmexample.dat",sep=""),header=T)
attach(glmexample)
socreg <- glm(social~test)
summary(socreg)
detreg <- glm(detent~test,binomial)
summary(detreg)
x <- cbind(math,10-math)
mathreg <- glm(x~test,binomial)
summary(mathreg)
bookreg <- glm(books~test,poisson)
summary(bookreg)
par(mfrow=c(2,2))
plot(test,social)
lines(test,predict(socreg,type="response"))
plot(test,detent)
lines(test,predict(detreg,type="response"))
plot(test,math/10)
lines(test,predict(mathreg,type="response"))
plot(test,books)
lines(test,predict(bookreg,type="response"))
par(mfrow=c(1,1))
det2 <- glm(detent~test+social,binomial)
anova(detreg,det2,test="Chi")
det3 <- glm(detent~test*social,binomial)
anova(det2,det3,test="Chi")
socialpoly <- glm(social~poly(test,2))
plot(test,social)
lines(test,predict(socialpoly,type="response"))
detach(glmexample)
# Example 8. Manipulation reasonable doubt.
juryrd <- read.table(paste(webreg,"juryrd.dat",sep=""),header=T)
attach(juryrd)
names(juryrd)
par(mfrow=c(1,2))
hist(BELIEF)
library(e1071)
skewness(BELIEF)
lb <- format(skewness(BELIEF) - sqrt(6/length(BELIEF))*1.96,digit=3)
ub <- format(skewness(BELIEF) + sqrt(6/length(BELIEF))*1.96,digit=3)
print(paste("Lower Bound = ", lb, "Upper Bound = ", ub))
library(boot)
beliefboot <- boot(BELIEF,function(x,i) skewness(x[i]), R=1000)
boot.ci(beliefboot)
shapiro.test(BELIEF)
ks.test(BELIEF,"pnorm")
beliefsq <- BELIEF^2
hist(beliefsq)
par(mfrow=c(1,1))
bboot2 <- boot(beliefsq,function(x,i) skewness(x[i]), R=1000)
boot.ci(bboot2)
reg1 <- lm(beliefsq~FORM)
summary(reg1)
t.test(beliefsq~FORM)
wbeliefsq <- split(beliefsq,FORM)
wilcox.test(wbeliefsq[[1]],wbeliefsq[[2]])
reg2 <- glm(GUILTY~beliefsq, binomial)
reg3 <- glm(GUILTY~beliefsq+FORM, binomial)
anova(reg2,reg3)
1-pchisq(15.351,1)
summary(reg3)
plot(BELIEF,predict(reg3,type="response"),ylab="Probability of a guilty verdict")
beliefs <- split(BELIEF,FORM)
predicts <- split(predict(reg3,type="response"),FORM)
o1 <- order(beliefs[[1]])
o2 <- order(beliefs[[2]])
lines(beliefs[[1]][o1],predicts[[1]][o1])
lines(beliefs[[2]][o2],predicts[[2]][o2])
abline(h=.5,lty=3)
ld1 <- sqrt(-(reg3$coef[1]/reg3$coef[2]))
ld2 <- sqrt(-((reg3$coef[1]+reg3$coef[3])/reg3$coef[2]))
abline(v=ld1,lty=3)
abline(v=ld2,lty=3)
paste("Control LD50 =", format(ld1,dig=2), "Instruction LD50 =",format(ld2,dig=2))
anova(reg3,glm(GUILTY~FORM*beliefsq,binomial),test="Chi")
table(GUILTY,FORM)
chisq.test(GUILTY,FORM,correct=F)
cells <- c(54,44,32,42)
imagine <- c(0,1,0,1)
guilty <- c(0,0,1,1)
glm(cells~imagine+guilty,poisson)
detach(juryrd)
# Chapter 7.
set.seed=2007
x <- (1:100)/50
y <- 4 + 2*x - 3*x^2 + x^3 + rnorm(100,0,.3)
par(mfrow=c(1,4))
plot(x,y, main="0 degree", xlab="", ylab="", pch=19, axes=F)
box()
abline(h=mean(y),lwd=1.5)
for (i in 1:3) {
plot(x,y, main=paste(i,"degree"), xlab="", ylab="", pch=19, axes=F)
box()
lines(x,predict(lm(y~poly(x,i))),lwd=1.5)}
par(mfrow=c(1,1))
anova(lm(y~1),lm(y~poly(x,1)),lm(y~poly(x,2)),lm(y~poly(x,3)))
qreg1 <- lm(y[1:50]~poly(x[1:50],1))
qreg2 <- lm(y[51:100]~poly(x[51:100],1))
par(mfrow=c(1,2))
plot(x,y, main="Piecewise linear", xlab="", ylab="", pch=19, axes=F)
box()
lines(x[1:50],predict(qreg1),lwd=1.5)
lines(x[51:100],predict(qreg2),lwd=1.5)
plot(x,y, main="Cubic spline with 1 knot", xlab="", ylab="", pch=19, axes=F)
box()
library(splines)
lines(x,predict(lm(y~bs(x,degree=3,df=4))),lwd=1.5)
par(mfrow=c(1,1))
# Example 9. Academics' salaries
years <- c(1973,1975,1977,1979,1981,1983,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006)
salary <- c(-3.0,-8.0,-1.7,-10.2,-4.0,3.7,5.2,4.8,0.5,1.4,1.5,-0.7, 0.4,-0.4,0.3,0.7,0.4,-0.3,1.6,2.0,1.0,0.1,2.2,0.6,0.2,-0.5,-0.3,1.3)
plot(years,salary,xlab="Year",ylab="Salary change in real terms",pch=20)
abline(h=0,lty=2)
salary1 <- lm(salary ~ bs(years,degree=1))
summary(salary1)
par(mfrow=c(2,3))
plot(years,salary,xlab="Years",ylab="Salary change",pch = 19)
abline(h=0,lty=2)
lines(years, fitted(salary1))
salary2 <- lm(salary ~ bs(years,degree=1,df=2))
plot(years,salary,xlab="Years",ylab="Salary change",pch = 19)
abline(h=0,lty=2)
lines(years,fitted(salary2))
salary3 <- lm(salary ~ bs(years,degree=1,knots=1986))
plot(years,salary,xlab="Years",ylab="Salary change",pch = 19)
abline(h=0,lty=2)
lines(years,fitted(salary3))
anova(salary1, salary2, salary3)
salary4 <- lm(salary ~ bs(years,degree=2,df=2))
plot(years,salary,xlab="Years",ylab="Salary change",pch = 19)
abline(h=0,lty=2)
lines(years,fitted(salary4))
salary5 <- lm(salary ~ bs(years,degree=2,df=3))
plot(years,salary,xlab="Years",ylab="Salary change",pch = 19)
abline(h=0,lty=2)
lines(years,fitted(salary5))
salary6 <- lm(salary ~ bs(years,degree=2,knots=1986))
plot(years,salary,xlab="Years",ylab="Salary change",pch = 19)
abline(h=0,lty=2)
lines(years,fitted(salary6))
par(mfrow=c(1,1))
anova(salary4,salary5,salary6)
anova(salary2,salary5)
# Example 10. Experience, Gender, Education, and Salary
cp85 <- read.table(paste(webreg,"cp85.dat",sep=""), header=T)
library(splines)
attach(cp85)
names(cp85)
wage1 <- lm(LNWAGE~bs(EXPER,df=1,degree=1) + bs(EDUC,df=4) + FEMALE)
summary(wage1)
anova(wage1)
experq <- quantile(EXPER)
minimales <- data.frame(EXPER=rep(experq,each=21),EDUC=rep(0:20,5),FEMALE=rep(0,105))
minifemales <- data.frame(EXPER=rep(experq,each=21),EDUC=rep(0:20,5),FEMALE=rep(1,105))
par(mfrow=c(1,2))
plot(minimales$EDUC,predict(wage1,minimales),xlab="Years in education", ylab="Predicted ln(wage)",pch=19,cex=.5,ylim=c(.5,3))
lines(0:20, predict(wage1,minimales[1:21,]),lwd=.5)
lines(0:20, predict(wage1,minimales[22:42,]),lwd=1)
lines(0:20, predict(wage1,minimales[43:63,]),lwd=1.5)
lines(0:20, predict(wage1,minimales[64:84,]),lwd=2)
lines(0:20, predict(wage1,minimales[85:105,]),lwd=2.5)
text(6,2.5,"More experience")
text(15,1.5,"Less experience")
title("For males")
plot(minifemales$EDUC,predict(wage1,minifemales),xlab="Years in education", ylab="Predicted ln(wage)",pch=19,cex=.5,ylim=c(.5,3))
lines(0:20, predict(wage1,minifemales[1:21,]),lwd=.5)
lines(0:20, predict(wage1,minifemales[22:42,]),lwd=1)
lines(0:20, predict(wage1,minifemales[43:63,]),lwd=1.5)
lines(0:20, predict(wage1,minifemales[64:84,]),lwd=2)
lines(0:20, predict(wage1,minifemales[85:105,]),lwd=2.5)
text(6,2.5,"More experience")
text(15,1.2,"Less experience")
title("For females")
par(mfrow=c(1,1))
install.packages("gam")
library(gam)
wage1a <- gam(LNWAGE~bs(EXPER,df=1,degree=1) + bs(EDUC,df=4) + FEMALE)
summary(wage1a)
par(mfrow=c(1,3))
plot(wage1a, se=T)
wage2a <- gam(LNWAGE~bs(EXPER,df=2,degree=1) + bs(EDUC,df=4) + FEMALE)
anova(wage1a,wage2a,test="F")
plot(wage2a,se=T)
summary(lm(LNWAGE ~ bs(EXPER, degree=3, df=4)+bs(EDUC,degree=3, df=4) + FEMALE))
plot(gam(LNWAGE~bs(EXPER,degree=3,df=4)+bs(EDUC,degree=3,df=4)+ FEMALE),se=T)
par(mfrow=c(1,1))
detach(cp85)
# Example 11. Detecting deception.
set.seed(406)
age <- runif(1000,3,23)
truth <- rbinom(1000,1,.5)
cbca <- round(-3*truth+2*truth*log(age)+.2*age+rbinom(1000,25,.7))
summary(glm(truth~age,family=binomial))
library(gam)
plot(gam(truth~bs(age,df=4),family=binomial),se=T)
gam0 <- gam(truth~bs(cbca, df=4), family=binomial)
gam1 <- gam(truth ~ bs(cbca, df=4) + bs(age, df=4), family=binomial)
anova(gam0,gam1)
inter <- cbca*age
gam2 <- gam(truth~bs(cbca,df=4)+bs(age,df=4)+bs(inter,df=4), family=binomial)
anova(gam1,gam2)
par(mfrow=c(3,3))
truth2 <- factor(truth,label=c("false","true"))
plot(gam0, se=T)
plot(age,cbca,pch=".")
plot(truth2,cbca,ylab="cbca")
plot(gam1, se=T)
plot(truth2,age,ylab="age")
plot(gam2, se = T)
par(mfrow=c(1,1))
# Here is the code for making the generalized additive model for the
# London et al. (in press) data. Other models were examined.
attach(lordex)
gam1 <- gam(Final ~ Initial + bs(AGEMOS,df=4),poisson)
plot(gam1,se=T,terms="bs(AGEMOS,
df = 4)",1,pch=19,xlab="Age in months",ylab="Predicted
conditional recall",cex.lab=1.5,cex.axis=1.3)
# Chapter 8.
# Example 12. Getting children to exercise
exer <- read.table(paste(webreg,"exercise.dat",sep=""),header=T)
attach(exer)
cond <- factor(wcond, labels=c("Control","Leaftet", "L+quiz","L+plan"))
install.packages("nlme")
library("nlme")
model1 <- lm(sqw2 ~ cond)
summary(model1)
plot(cond,sqw2,xlab="Conditions",ylab=expression(sqrt(exercise + .5)))
mexer <- aggregate(sqw2,list(class),mean)[,2]
mcond <- aggregate(wcond,list(class),mean)[,2]
model2 <- lm(mexer~factor(mcond))
summary(model2)
points(mcond,mexer,pch=19,cex=1.3)
model3 <- lm(sqw2 ~ factor(class) + cond)
summary(model3)
model4 <- lme(sqw2 ~ 1, random = ~1|class,method="ML")
model4a <- lme(sqw2 ~ cond, random = ~1|class,method="ML")
anova(model4,model4a)
summary(model4a)
newcontrasts <- c(-3, 1, 1, 1, 0, -2, 1, 1, 0, 0, -1, 1)
dim(newcontrasts) <- c(4,3)
contrasts(cond) <- newcontrasts
contrasts(cond)
model4b <- lme(sqw2 ~ cond, random = ~1|class,method="ML")
summary(model4b)
model5 <- lme(sqw2 ~ sqw1, random = ~1|class,method="ML")
model5a <- lme(sqw2 ~ sqw1 + cond, random = ~1|class,method="ML")
model5b <- lme(sqw2 ~ sqw1*cond, random = ~1|class,method="ML")
anova(model5,model5a,model5b)
par(mar=c(5,5,4,2))
plot(jitter(sqw1),jitter(sqw2), xlab=expression(sqrt(pre-exercise + .5)), ylab=expression(sqrt(post-exercise + .5)),pch=wcond,col=wcond)
legend(3,1.3,c(levels(cond)),pch=1:4,col=1:4)
sexer1 <- split(sqw1,cond)
spred <- split(model5b$fitted[,1],cond)
for (i in 1:4) lines(sexer1[[i]],spred[[i]],col=i)
par(mar=c(5,4,4,2))
library(lattice)
xyplot(sqw2~sqw1 | class)
intervals(model5a)
varprop <- function(m1,m2)
{v1 <- sum(as.numeric(VarCorr(m1)[,1]))
v2 <- sum(as.numeric(VarCorr(m2)[,1]))
rsq <- (v1-v2)/v1
return(rsq)
}
model50 <- lme(sqw2 ~ 1, random = ~1|class,method="ML")
varprop(model50,model5)
varprop(model50,model5a)
varprop(model50,model5b)
detach(exer)
# Example 13. Response times and memory recognition accuracy
memrec <- read.table(paste(webreg,"memrec.dat",sep=""),header=T)
attach(memrec)
par(mfrow=c(2,1))
hist(time,xlab="Response time (in msec)",main="Untransformed variable")
library(e1071)
text(6000,850,paste("skewness = ",format(skewness(time),digits=2)),pos=4)
lntime <- log(time)
hist(lntime,xlab="ln(response time in msec)",main="Transformed variable")
text(8.5,400,paste("skewness = ",format(skewness(lntime),digits=2)),pos=4)
par(mfrow=c(1,1))
install.packages("lme4")
library(lme4)
model1 <- lmer(saysold ~ faceold + (1|partno), family=binomial,method="Laplace")
model2 <- update(model1, .~. + facewhite)
model3 <- update(model2, .~. + facewhite:faceold)
model4 <- update(model3, .~. + lntime)
model5 <- update(model4, .~. + lntime:faceold)
model6 <- update(model5, .~. + lntime:facewhite)
model7 <- update(model6, .~. + lntime:faceold:facewhite)
anova(model1,model2,model3,model4,model5,model6,model7)
model5
mod5 <- -5.8387 + faceold*10.7439+facewhite*-1.0359+lntime*0.6486+faceold*facewhite*0.9677+faceold*lntime*-1.1774
predprob <- exp(mod5)/(1+exp(mod5))
rightprob <- predprob*faceold + (1-faceold)*(1-predprob)
par(mfrow=c(1,1))
plot(memrec$time,rightprob,pch=20,xlab="Time in msec",ylab="Probability of a correct response",cex.lab=1.3)
stime <- split(time,facewhite+2*faceold)
srightprob <- split(rightprob,facewhite+2*faceold)
for (i in 1:4) lines(sort(stime[[i]]),srightprob[[i]][order(stime[[i]])],col=i,lwd=1.5)
text(12000,.8,"previously uncseen White faces",cex=1.3)
model5b <- update(model5, .~. -(1|partno) + (faceold|partno))
anova(model5,model5b)
model5b
# The mgcv package overwrites stuff in gam and some other things, so if you run this
# and then the models above you may get some funny answers
install.packages("mgcv")
library(mgcv)
par(mfrow=c(2,2))
ssaysold <- split(saysold,facewhite+2*faceold)
slntime <- split(lntime,facewhite+2*faceold)
spartno <- split(partno,facewhite+2*faceold)
for (i in 1:4) {
part <- spartno[[i]]
time <- slntime[[i]]
sold <- ssaysold[[i]]
gammy <- gamm(sold ~ s(time),random=list(part=~1),family=binomial)
plot(gammy$gam,xlab="ln(time in msec)")
if (i == 1) title("New Black faces")
if (i == 2) title("New White faces")
if (i == 3) title("Old Black faces")
if (i == 4) title("Old White faces")
}
par(mfrow=c(1,1))
detach(memrec)
# Chapter 9. Robust regression
# Example 14. Crime in Sussex.
crime <- read.table(paste(webreg,"sussexcrime.dat",sep=""),header=T)
crime <- crime[order(crime[,9]),1:10]
attach(crime)
par(mfrow=c(1,3))
plot(theft, Drugs, xlab="Theft offences",ylab="Drug offences",pch=19,main="Scatterplot of raw data",cex.lab=1.3)
text(2900,300,"Regency",pos=3,cex=1.3)
plot(rank(theft), rank(Drugs) ,xlim=c(0,300),ylim=c(0,300), xlab="Rank of theft offences",ylab="Rank of drug offences",pch=19,main="Scatterplot of ranked data",cex.lab=1.3)
text(180,270,"Regency",pos=3,cex=1.3)
arrows(230,285, 255,265, length=.07)
plot(log(theft+1), log(Drugs+1), xlab="Log of theft offences + 1",ylab="Log of drug offences + 1",pch=19,main="Scatterplot of logged data",cex.lab=1.3)
text(6.7,5.4,"Regency",pos=3,cex=1.3)
par(mfrow=c(1,1))
cor.test(theft,Drugs)
cor.test(theft,Drugs,method="spearman")
library(boot)
bootspear <- function(x,i) cor.test(x$Drugs[i],x$theft[i],method="spearman")$estimate
thevars <- as.data.frame(cbind(Drugs,theft))
spearboot <- boot(thevars,bootspear, R=1000)
boot.ci(spearboot)
cor.test(rank(theft),rank(Drugs))
lnreg <- lm(log(Drugs+1)~log(theft+1))
par(mfrow=c(1,2))
plot(log(theft+1), log(Drugs+1), xlab="LN of theft offences",ylab="LN of drug offences",pch=19,main="Scatterplot of logged data",cex.lab=1.3)
lines(log(theft+1),predict(lnreg))
plot(theft, Drugs, xlab="Theft offences",ylab="Drug offences",pch=19,main="Scatterplot of raw data",cex.lab=1.3)
lines(theft,exp(predict(lnreg))-1)
abline(lm(Drugs~theft),col="red")
par(mfrow=c(1,1))
detach(crime)
# Example 15. Children's well being
children <- read.table(paste(webreg,"unicef.txt",sep=""),header=T)
attach(children)
cor.test(risks,health)
cor.test(risks,health,method="spearman")
source("ftp://ftp.usc.edu/pub/wilcox/Rallfunv1.v4")
source("ftp://ftp.usc.edu/pub/wilcox/Rallfunv2.v4")
scor(risks,health,xlab="Risk rank",ylab="Health rank")
text(c(2,8,4),c(15,18,19),c("Poland","Greece","Ireland"),pos=4)
detach(children)
# Example 16. Food and drink intake.
anscombe <- read.table(paste(webreg,"animal.dat",sep=""),header=T)
attach(anscombe)
cbind(anscombe[1:11,], anscombe[12:22,], anscombe[23:33,], anscombe[34:44,])
meansd <- rbind(tapply(Food,Type,mean),tapply(Food,Type,sd), tapply(Drink,Type,mean),tapply(Drink,Type,sd))
row.names(meansd) <- c("Food mean","Food sd","Drink mean","Drink sd")
meansd
summary(aov(Food~Type))
summary(aov(Drink~Type))
Foodg <- split(Food,Type)
Drinkg <- split(Drink,Type)
for (i in 1:4) {print(paste("Group =",names(Drinkg)[i]))
print(summary(lm(Drinkg[[i]]~Foodg[[i]])))}
library(lattice)
xyplot(Drink~Food|Type)
par(mfrow=c(2,2))
for (i in 1:4) {
x <- lm(Drinkg[[i]]~Foodg[[i]])
plot(Foodg[[i]],Drinkg[[i]],main=(paste(names(Foodg)[i])),xlab="Food intake",ylab="Drink intake",ylim=c(0,15),xlim=c(0,20))
abline(x)}
par(mfrow=c(1,1))
library(MASS)
birds <- rlm(Drinkg[[1]]~Foodg[[1]])
summary(birds)
insect <- rlm(Drinkg[[2]]~Foodg[[2]])
summary(insect)
mammals <- rlm(Drinkg[[3]]~Foodg[[3]])
summary(mammals)
mammals2 <- lm(Drinkg[[3]] ~ poly(Foodg[[3]],2))
summary(mammals2)
reptiles <- rlm(Drinkg[[4]]~Foodg[[4]])
summary(reptiles)
detach(anscombe)
# Chapter 10. Conclusion
set.seed(47)
genoheight <- runif(100,60,80)
parentheight <- genoheight + runif(100,-10,10)
kidheight <- genoheight + runif(100,-10,10)
plot(parentheight,kidheight,pch=20,xlab="Parent's height in inches",ylab="Child's height in inches")
abline(0,1)
abline(h=mean(kidheight),v=mean(parentheight),lty="dashed")
text(55,84,"Most
kids of short\nparents above\nthe diagonal", pos=4)
text(73,57,"Most
kids of tall\nparents below diagonal", pos=4)
arrows(58,54,58,58,length=.1)
text(54,53, "kids = parents",pos=4)
cor.test(parentheight,kidheight)
etasq
<- function(aovob){
effects <- dim(aovob)[1] - 1
for (i in 1:effects){
partialeta <- format(aovob[i,2]/(aovob[i,2] + aovob[effects+1,2]),
nsmall=2, digits=2)
eta <-
format(aovob[i,2]/sum(aovob[,2]),nsmall=2,digits=2)
print(paste("Partial eta sq. =", partialeta,", eta squared
=",