ModReg Page

 

 

 

Data and Code for Modern Regression Techniques

 

 

 

    This page is so you can copy and paste commands rather than having to retype them. Explanations are all in the book.  The location of the data is shown in the read.table command. If you run the whole thing at once (not sure why, but?), you should set par(ask=T), and change back to par(ask=F) afterwards. This will prompt you for each new graph. EXCEPT, lattice graphs will still write over traditional graphs (at least in R2.5.0). We assume you run these in order, but if you do not, you may need to run install.packages before loading some packages. Also, some packages may write over function names from other packages, and may even write over variable names (we think this only happens with mgcv in this code). This may happen with variables with names that are used elsewhere in R. So, this happened one time running the memory recognition example with the variable time (which is also the name of a function for time series analysis). Therefore, we used memrec$time and the remaining code worked. If you want the Figures to look the same as in the text you will need to the scale the graphics window appropriately. These commands load the latest versions of many of the packages. Thus, if they change some of the code may stop working. Email danw@sussex.ac.uk if this happens and he will try to fix the code.

 

The data can be read from the CD or web page. In the book several methods are described so that you do not have to write out the URL every time (also so that the command to read the data did not take up multiple lines). The simplest is changing RProfile.site. A function could also be written so that R automatically reads data from there. In the book we assigned the URL name to webreg. This was so we could still show how to use the read.table command since when you want to access your own data they will not be on the web page. We just make this assignment once here so in case you are doing something different you can just skip this command.

 

#####################

 

webreg <- "http://www.sagepub.co.uk//wrightandlondon//"

 

 

#  Chapter 1. Loading libraries and some univariate statistics (i.e., skewness and histograms)

#  Example 1. Length of a chile.

library("foreign")

chile <- read.table(paste(webreg,"chile.dat",sep=""), header=T)

attach(chile)

names(chile)

chile$HEAT

install.packages("e1071")

library(e1071)

skewness(LENGTH)

library(boot)

lengthboot <- boot(LENGTH,function(x,i) skewness(x[i]), R=1000)

boot.ci(lengthboot)

lnlength <- log(LENGTH)

skewness(lnlength)

par(mfrow=c(1,3))

hist(LENGTH)

hist(log(LENGTH))

hist(log(LENGTH+2.54))

par(mfrow=c(1,1))

detach(chile)

 

#  Chapter 2.

set.seed = 121

x1 <- rnorm(100,10,10)

x2 <- rbinom(100,1,.5)

y <- x1 + x2*40 + rnorm(100,0,10)

reg1 <- lm(y~x1)  

reg2 <- lm(y~x2)

par(mfrow=c(1,2))

plot(x1,y)

abline(reg1) 

plot(x2,y)

abline(reg2)

par(mfrow=c(1,1))

par(mfrow=c(2,2))

plot(reg1)

reg3 <- lm(y~x1+x2)

plot(reg3)

summary(reg3)

reg4 <- lm(y~x1+x2-1)

reg5 <- lm(y~x1*x2)

anova(reg4,reg5)

 

# Example 2. Dissociation and suggestibility

suggest <- c(12, 2, -2, 5, 10,-13, 10, -3, 4, 9, 13, 6, 18, 12, 14, -6, 0, 5, 6, 19, -5, 8, 5, 14, -1, -2, 10, 17, 2, 10, -1, 21, 14, 4, 20, 24, 5,-12, 8, 7, 0, 2, 7, -1, 12, 4, 0, 19, 8,-12)

DES <-c(45.00,49.28,38.57,55.71,48.21,14.60,12.86,43.50,27.10,46.40,46.40, 35.00,55.00,52.85,46.40,18.21,36.79,48.93,45.50,22.10,31.43,32.50,52.14, 18.21,22.50,27.14,16.67,14.64,39.64,16.78,34.28,69.64,38.90,23.20,41.67, 62.86, 47.00, 22.86,31.42,48.21,18.57,48.21,40.00,35.00,57.85,50.35,41.07, 45.00,48.00,49.28)

reg <- lm(suggest~DES)

summary(reg)

plot(DES,suggest,xlab="Dissociation score (0-100)",ylab="Suggestibility (-25-25)")

abline(reg)

text(65,-12,"r = .31")

dataset1 <- cbind(DES,suggest)

write.table(dataset1,"c:\\temp\\dataset1.dat")

library("foreign")

write.foreign(as.data.frame(dataset1),"c:\\temp\\dataspss.dat","c:\\temp\\dataspss.sps",package="SPSS")

detach(dataset1)

 

# Chapter 3.

# Example 3. Cognitive dissonance.

control <- c( 0,-3,3,2,-2,-1,2,3,-3,-5,2,-3,3,0,-2,-2,-2,-2,-1,2)

dollar1 <- c(3,1,1,3,2,3,3,2,2,2,2,2,-4,4,0,-3,4,1,1,-2)

dollar20 <- c(1,2,3,0,1,3,0,-2,2,1,0,0,-1,-2,-1,-4,-3,-1,0,0)

enjoy <- c(control,dollar1,dollar20)

group <- as.factor(rep(c("control","$1","$20"),each=20))

tapply(enjoy,group,mean)

tapply(enjoy,group,sd)

plot(group,enjoy,ylab="Enjoyment (-5 to 5)",xlab="Condition",cex.lab=1.3)

anova1 <- aov(enjoy~group)

summary(anova1)

anova2 <- lm(enjoy~group)

summary(anova2)

anova(anova2)

contrasts(group)

wcontrol <- c(1,0,0,0,1,0)

dim(wcontrol) <- c(3,2)

contrasts(group) <- wcontrol

contrasts(group)

summary(lm(enjoy~group))

 

# Example 4. Children's forgetting I.

lordex <- read.table(paste(webreg,"lordex.txt",sep=""), header=T)

attach(lordex)

names(lordex)

par(mfrow=c(1,2))

hist(Final,main="Untransformed Final")

library(e1071)

library(boot)

skewness(Final)

skewboot <- boot(Final, function(x,i) skewness(x[i]), R=1000)

boot.ci(skewboot,type="bca")

newfinal <- sqrt(Final + .5)

hist(newfinal, main=expression(sqrt(Final + .5)))

par(mfrow=c(1,1))

skewness(newfinal)

skewboot <- boot(newfinal, function(x,i) skewness(x[i]), R=1000)

boot.ci(skewboot,type="bca")

hist(AGEMOS, breaks=c(seq(36,120,12)),xlab="Age in months")

AGEYRS <- trunc(AGEMOS/12)

AGEYRSfac <- as.factor(AGEYRS)

AGE2 <- cut(AGEYRS, br=c(0,6.5,10))

reg1 <- lm(newfinal~AGEMOS)

summary(reg1)

reg2 <- lm(newfinal~AGEYRS)

summary(reg2)

par(mfrow=c(1,2))

plot(AGEMOS,newfinal,ylab=expression(sqrt(Final + .5)),xlab="Age in months")

lines(sort(AGEMOS),predict(reg1)[order(AGEMOS)],col="red")

lines(sort(AGEMOS),predict(reg2)[order(AGEMOS)],col="blue")

reg3 <- lm(newfinal ~ AGEYRSfac)

summary(reg3)

tapply(newfinal,AGEYRSfac,mean)

anova(reg2,reg3)

t.test(newfinal~AGE2,var.equal=T)

reg4 <- lm(newfinal~AGE2)

summary(reg4)

plot(AGEMOS,newfinal,ylab=expression(sqrt(Final + .5)),xlab="Age in months")

lines(sort(AGEMOS),predict(reg3)[order(AGEMOS)],col="red")

lines(sort(AGEMOS),predict(reg4)[order(AGEMOS)],col="blue")

par(mfrow=c(1,1))

Diff  <- Final - Initial

dreg1 <- lm(Diff~AGEMOS)

dreg2 <- lm(Diff~AGEYRS)

dreg3 <- lm(Diff~AGEYRSfac)

dreg4 <- lm(Diff~AGE2)

par(mfrow=c(1,2))

plot(AGEMOS,Diff, ylab="Final - Initial", xlab="Age in months")

lines(sort(AGEMOS),predict(dreg1)[order(AGEMOS)],col="red")

lines(sort(AGEMOS),predict(dreg2)[order(AGEMOS)],col="blue")

plot(AGEMOS,Diff,ylab="Final - Initial", xlab="Age in months")

lines(sort(AGEMOS),predict(dreg3)[order(AGEMOS)],col="red")

lines(sort(AGEMOS),predict(dreg4)[order(AGEMOS)],col="blue")

par(mfrow=c(1,1))

tapply(Diff,AGEYRSfac,mean)

summary(dreg1)

detach(lordex)

 

# Chapter 4

# Example 4. Children's forgetting II

lordex <- read.table(paste(webreg,"lordex.txt",sep=""), header=T)

attach(lordex)

lm1 <- lm(Final ~ Initial)

summary(lm1)

lm2 <- lm(Final ~ Initial + AGEMOS)

summary(lm2)

lm3 <- lm(Final ~ Initial*AGEMOS)

summary(lm3)

anova(lm1,lm2,lm3)

summary(lm2)

summary(lm3)

library(lattice)

AGEYRS <- trunc(AGEMOS/12)

scatmat <- xyplot(Final~Initial | as.factor(AGEYRS))

print(scatmat)

tapply(Initial, AGEYRS, sd)

tapply(Final, AGEYRS, sd)

sexit <- split(Initial,AGEYRS)

sfoll <- split(Final,AGEYRS)

predmod4 <- split(lm(Final~Initial*as.factor(AGEYRS))$fitted.values,AGEYRS)

plot(Initial,Final,pch=19,cex=.5,col="black", xlab="Initial interview", ylab="Final interview",xlim=c(0,25),ylim=c(0,10),cex.lab=1.3,las=1,font.lab=1.5)

abline(0,1,lty = 3)

for (i in 1:6) lines(sexit[[i]],predmod4[[i]], col="black")

meanexit <- c(mean(sexit[[1]]),mean(sexit[[2]]), mean(sexit[[3]]), mean(sexit[[4]]), mean(sexit[[5]]), mean(sexit[[6]]))

meanfoll<- c(mean(sfoll[[1]]),mean(sfoll[[2]]), mean(sfoll[[3]]), mean(sfoll[[4]]), mean(sfoll[[5]]), mean(sfoll[[6]]))

lines(meanexit,meanfoll,col="grey",lwd=4,lend="round")

text(12,10,"Final = Initial",pos=4)

arrows(12,10,10,10,length=.1)

text(c(13,21,12.1,0,0,4.4),c(5.5,4.5,2.2,3.7,1.5,.5),c("9","7","8","6","5","4"))

points(meanexit,meanfoll,pch=19)

lines(c(14,16.7), c(9.2,9.2),col="grey",lwd=4)

text(16.7,9.2,"Age group means", pos=4)

text(14,8.5,"Means increase with age\nfor both interviews",pos=4)

text(-.3,9.5,"Intercepts higher for\nolder children\n (Ancova)",pos=4)

text(14,1,"Older children further away\nfrom dashed line (Anova).", pos=4)

detach(lordex)

 

# Mediation example

set.seed(143)

leaflet <- rep(c(0,1),each=50)

fairskin <- rbinom(100,1,.5)

likely <- rbinom(100,10,.20 + .2*leaflet + .2*fairskin)

plan <- rbinom(100,7,likely/15+leaflet*.2)

summary(lm(likely~leaflet))$coef

summary(lm(plan~leaflet))$coef

summary(lm(plan~likely))$coef

summary(lm(plan~likely+leaflet))$coef

(1.92*.45)/sqrt(.37^2*.45^2 + .06^2*1.92^2+.37^2*.06^2)

sobel1 <- function(a,b,sea,seb){

  sobel <- (a*b)/sqrt(sea^2*b^2+seb^2*a^2+sea^2*seb^2)

  paste("Sobel z =", format(sobel,nsmall=2,digits=3))}

sobel1(1.92,.45,.37,.06)

sobel1(1.920,.454,.367,.057)

# source(paste(webreg,"mediator.R",sep=""))

# This will work once Dan returns to Brighton and updates the webpage.

# You need to run the function first

## Simple Mediation Function with Figure of effects

mediator <- function(x,y,m, ...){

  # Put in experimental variable x, outcome variable y, then mediator m.

  # Some of this is written so that it works in a mediator

  # function for more complex problems.

  reg0 <- lm(y~x)

  reg1 <- lm(m~x)

  reg2 <- lm(y~m+x)

  c <- summary(reg0)$coefficients[2,1]

  sc <- summary(reg0)$coefficients[2,2]

  a <- summary(reg1)$coefficients[2,1]

  sa <- summary(reg1)$coefficients[2,2]

  b <- summary(reg2)$coefficients[2,1]

  sb <- summary(reg2)$coefficients[2,2]

  cp <- summary(reg2)$coefficients[3,1]

  scp <- summary(reg2)$coefficients[3,2]

  sobel <- (a*b)/sqrt(b^2*sa^2 + a^2*sb^2 + sa^2*sb^2)

  psobel <- format(2*(1-pnorm(abs(sobel))),digits=2,nsmall=2)

  plot(c(0,100),c(0,110),col="white",ann=F,tck=0,col.axis="white")

  rect(c(10,10,70,70,40),c(10,50,10,50,80),c(30,30,90,90,60),c(30,70,30,70,100))

  arrows(c(30,30,20,60),c(20,60,70,90),c(70,70,40,80),c(20,60,90,70),length=.15)

  text(c(20,20,80,80,50),c(20,60,20,60,90),c("X","X","Y","Y","M"),cex=2)

  text(30,80,paste(format(a,digits=2,nsmall=2)),pos=2,cex=1.3)

  text(70,80,paste(format(b,digits=2,nsmall=2)),pos=4,cex=1.3)

  text(50,20,paste(format(c,digits=2,nsmall=2)),pos=3,cex=1.3)

  text(50,60,paste(format(cp,digits=2,nsmall=2)),pos=3,cex=1.3)

  text(50,2,paste("Sobel z = ",format(sobel,digits=2,nsmall=2),"; p = ",psobel),cex=1.3)

  print(paste("Sobel z =", format(sobel,digits=3,nsmall=2),", p =", psobel))

  invisible(c(sobel,psobel))

}

mediator(leaflet,plan,likely)

 

# Chapter 5.

# Example 6. Fathers' ptsd.

maleptsd <- read.table(paste(webreg,"maleptsd.dat",sep=""), header=T)

attach(maleptsd)

ptsd <- log(PTSD + 1)

preds <- cbind(OVER2,OVER3,OVER5,BOND,POSIT,NEG,CONTR,SUP,CONS,AFF)

ptsdmat <- cor(preds)

print(ptsdmat,digits=2)

print(sd(preds),digits=2)

spmat <- cor(preds,method="spearman")

print(spmat,digits=2)

cor.test(OVER2,OVER3)

new <- diag(sd(preds)) + upper.tri(spmat)*spmat + lower.tri(ptsdmat)*ptsdmat

print(new,digits=2)

library(car)

spm(preds)
allvars <- lm(ptsd~preds)

summary(allvars)

install.packages("leaps")

library(leaps)

x1 <- regsubsets(preds,ptsd)

summary(x1)

par(mfrow=c(2,2))

plot(x1,scale=c("r2"))

title("Default for leaps")

plot(x1,scale=c("adjr2"))

title("Default for leaps")

x2 <- leaps(preds,ptsd,nbest=1, method="r2")

x3 <- leaps(preds,ptsd,nbest=1, method="adjr2")

plot(x2$size-1,x2$r2,xlab="Number of predictors",ylab=expression(R^2))

lines(spline(x2$size-1,x2$r2))

plot(x3$size-1,x3$adjr2, xlab="Number of predictors",ylab=expression(adj.R^2))

lines(spline(x3$size-1,x3$adjr2))

par(mfrow=c(1,1))

plot(x3$size-1,x3$adjr2,xlab="# of predictors not including B0", ylab="Adj. R-squared")

lines(spline(x3$size-1,x3$adjr2))

m2 <- max(x3$adjr2)

m1 <- which.max(x3$adjr2)

arrows(m1,m2-.02,m1,m2)

text(m1,m2-.023,expression(paste("max ",r[adj]^2 == 0.2)))

text(m1,m2-.031, paste("with",format(m1,digits=1),"variables"),pos=3)

par(mfrow=c(1,1))

x3$which[which.max(x3$adjr2),]

labels(preds[1,])

summary(lm(ptsd~preds[,c(1,2,4,6,8,10)]))

library(MASS)

lm.ridge(ptsd~preds,lambda=seq(0,100,by=1))-> x

plot(x)

title("Ridge Regression")

abline(h=0)

abline(v=50,lty=3)

x$coef[,50]

install.packages("lars")

library("lars")

lasso1 <- lars(preds,ptsd)

plot(lasso1)

predict(lasso1,s=7,mode="step", type="coefficient")$coefficients

lassocv <- cv.lars(preds,ptsd)

frac <- lassocv$fraction[which.min(lassocv$cv)]

predict(lasso1,s=frac,mode="fraction", type="coefficient")$coefficients

install.packages("pls")

library("pls")

pcr1 <- pcr(ptsd~preds,ncomp=10)

plot(1:10,summary(pcr1)[1,],col="blue",ylab="Cumulative variance accounted for",xlab="Number of components",ylim=c(0,100),pch=19,cex.lab=1.3)

lines(1:10,summary(pcr1)[1,],col="blue",lwd=1.5)

points(1:10,summary(pcr1)[2,],col="red",pch=19)

lines(1:10,summary(pcr1)[2,],col="red",lwd=1.5)

text(4,85,"variance of predictors",col="blue",pos=4,cex=1.2)

text(5,35,"variance of PTSD",col="red",pos=4,cex=1.2)

pcr1$loadings

pca1 <- princomp(preds)

pcrreg <- lm(ptsd ~ pca1$scores[,1]+ pca1$scores[,2])

summary(pcrreg)

plsr1 <- plsr(ptsd~preds,ncomp=10)

plot(1:10,summary(plsr1)[1,],col="blue",ylab="Cumulative variance accounted for",xlab="Number of components",ylim=c(0,100),pch=19,cex.lab=1.3)

lines(1:10,summary(plsr1)[1,],col="blue",lwd=1.5)

points(1:10,summary(plsr1)[2,],col="red",pch=19)

lines(1:10,summary(plsr1)[2,],col="red",lwd=1.5)

text(4.3,85,"variance of predictors",col="blue",pos=4,cex=1.2)

text(5,35,"variance of PTSD",col="red",pos=4,cex=1.2)

summary(plsr1)

plsr1$loadings

plsreg <- lm(ptsd ~ plsr1$scores[,1])

summary(plsreg)

detach(maleptsd)

 

# Chapter 6.

plot(0:40,dpois(0:40,1),xlab="Variable",ylab="",ylim=c(0,.5),col="white")

for (i in c(1,2,5,10,20)) lines(spline(0:40,dpois(0:40,i)))

text(1, .37, expression(lambda,"   = 1"),pos=4)

text(2.4, .26, expression(lambda,"   = 2"),pos=4)

text(4, .20, expression(lambda,"   = 5"), pos=4)

text(6.8, .14, expression(lambda,"    = 10"), pos=4)

text(17, .11, expression(lambda,"    = 20"), pos=4)

 

# Example 7. Associations with test score.

glmexample <- read.table(paste(webreg,"glmexample.dat",sep=""),header=T)

attach(glmexample)

socreg <- glm(social~test)

summary(socreg)

detreg <- glm(detent~test,binomial)

summary(detreg)

x <- cbind(math,10-math)

mathreg <- glm(x~test,binomial)

summary(mathreg)

bookreg <- glm(books~test,poisson)

summary(bookreg)

par(mfrow=c(2,2))

plot(test,social)

lines(test,predict(socreg,type="response"))

plot(test,detent)

lines(test,predict(detreg,type="response"))

plot(test,math/10)

lines(test,predict(mathreg,type="response"))

plot(test,books)

lines(test,predict(bookreg,type="response"))

par(mfrow=c(1,1))

det2 <- glm(detent~test+social,binomial)

anova(detreg,det2,test="Chi")

det3 <- glm(detent~test*social,binomial)

anova(det2,det3,test="Chi")

socialpoly <- glm(social~poly(test,2))

plot(test,social)

lines(test,predict(socialpoly,type="response"))

detach(glmexample)

 

# Example 8. Manipulation reasonable doubt.

juryrd <- read.table(paste(webreg,"juryrd.dat",sep=""),header=T)

attach(juryrd)

names(juryrd)

par(mfrow=c(1,2))

hist(BELIEF)

library(e1071)

skewness(BELIEF)

lb <- format(skewness(BELIEF) - sqrt(6/length(BELIEF))*1.96,digit=3)

ub <- format(skewness(BELIEF) + sqrt(6/length(BELIEF))*1.96,digit=3)

print(paste("Lower Bound = ", lb, "Upper Bound = ", ub))

library(boot)

beliefboot <- boot(BELIEF,function(x,i) skewness(x[i]), R=1000)

boot.ci(beliefboot)

shapiro.test(BELIEF)

ks.test(BELIEF,"pnorm")

beliefsq <- BELIEF^2

hist(beliefsq)

par(mfrow=c(1,1))

bboot2 <- boot(beliefsq,function(x,i) skewness(x[i]), R=1000)

boot.ci(bboot2)

reg1 <- lm(beliefsq~FORM)

summary(reg1)

t.test(beliefsq~FORM)

wbeliefsq <- split(beliefsq,FORM)

wilcox.test(wbeliefsq[[1]],wbeliefsq[[2]])

reg2 <- glm(GUILTY~beliefsq, binomial)

reg3 <- glm(GUILTY~beliefsq+FORM, binomial)

anova(reg2,reg3)

1-pchisq(15.351,1)

summary(reg3)

plot(BELIEF,predict(reg3,type="response"),ylab="Probability of a guilty verdict")

beliefs <- split(BELIEF,FORM)

predicts <- split(predict(reg3,type="response"),FORM)

o1 <- order(beliefs[[1]])

o2 <- order(beliefs[[2]])

lines(beliefs[[1]][o1],predicts[[1]][o1])

lines(beliefs[[2]][o2],predicts[[2]][o2])

abline(h=.5,lty=3)

ld1 <- sqrt(-(reg3$coef[1]/reg3$coef[2]))

ld2 <- sqrt(-((reg3$coef[1]+reg3$coef[3])/reg3$coef[2]))

abline(v=ld1,lty=3)

abline(v=ld2,lty=3)

paste("Control LD50 =", format(ld1,dig=2), "Instruction LD50 =",format(ld2,dig=2))

anova(reg3,glm(GUILTY~FORM*beliefsq,binomial),test="Chi")

table(GUILTY,FORM)

chisq.test(GUILTY,FORM,correct=F)

cells <- c(54,44,32,42)

imagine <- c(0,1,0,1)

guilty <- c(0,0,1,1)

glm(cells~imagine+guilty,poisson)

detach(juryrd)

 

 

# Chapter 7.

set.seed=2007

x <- (1:100)/50

y <- 4 + 2*x - 3*x^2 + x^3 + rnorm(100,0,.3)

par(mfrow=c(1,4))

plot(x,y, main="0 degree", xlab="", ylab="", pch=19, axes=F)

box()

abline(h=mean(y),lwd=1.5)

for (i in 1:3) {

plot(x,y, main=paste(i,"degree"), xlab="", ylab="", pch=19, axes=F)

box()

lines(x,predict(lm(y~poly(x,i))),lwd=1.5)}

par(mfrow=c(1,1))

anova(lm(y~1),lm(y~poly(x,1)),lm(y~poly(x,2)),lm(y~poly(x,3)))

qreg1 <- lm(y[1:50]~poly(x[1:50],1))

qreg2 <- lm(y[51:100]~poly(x[51:100],1))

par(mfrow=c(1,2))

plot(x,y, main="Piecewise linear", xlab="", ylab="", pch=19, axes=F)

box()

lines(x[1:50],predict(qreg1),lwd=1.5)

lines(x[51:100],predict(qreg2),lwd=1.5)

plot(x,y, main="Cubic spline with 1 knot", xlab="", ylab="", pch=19, axes=F)

box()

library(splines)

lines(x,predict(lm(y~bs(x,degree=3,df=4))),lwd=1.5)

par(mfrow=c(1,1))

 

# Example 9. Academics' salaries

years <- c(1973,1975,1977,1979,1981,1983,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006)

salary <- c(-3.0,-8.0,-1.7,-10.2,-4.0,3.7,5.2,4.8,0.5,1.4,1.5,-0.7,     0.4,-0.4,0.3,0.7,0.4,-0.3,1.6,2.0,1.0,0.1,2.2,0.6,0.2,-0.5,-0.3,1.3)

plot(years,salary,xlab="Year",ylab="Salary change in real terms",pch=20)

abline(h=0,lty=2)

salary1 <- lm(salary ~ bs(years,degree=1))

summary(salary1)

par(mfrow=c(2,3))

plot(years,salary,xlab="Years",ylab="Salary change",pch = 19)

abline(h=0,lty=2)

lines(years, fitted(salary1))

salary2 <- lm(salary ~ bs(years,degree=1,df=2))

plot(years,salary,xlab="Years",ylab="Salary change",pch = 19)

abline(h=0,lty=2)

lines(years,fitted(salary2))

salary3 <- lm(salary ~ bs(years,degree=1,knots=1986))

plot(years,salary,xlab="Years",ylab="Salary change",pch = 19)

abline(h=0,lty=2)

lines(years,fitted(salary3))

anova(salary1, salary2, salary3)

salary4 <- lm(salary ~ bs(years,degree=2,df=2))

plot(years,salary,xlab="Years",ylab="Salary change",pch = 19)

abline(h=0,lty=2)

lines(years,fitted(salary4))

salary5 <- lm(salary ~ bs(years,degree=2,df=3))

plot(years,salary,xlab="Years",ylab="Salary change",pch = 19)

abline(h=0,lty=2)

lines(years,fitted(salary5))

salary6 <- lm(salary ~ bs(years,degree=2,knots=1986))

plot(years,salary,xlab="Years",ylab="Salary change",pch = 19)

abline(h=0,lty=2)

lines(years,fitted(salary6))

par(mfrow=c(1,1))

anova(salary4,salary5,salary6)

anova(salary2,salary5)

 

# Example 10. Experience, Gender, Education, and Salary

cp85 <- read.table(paste(webreg,"cp85.dat",sep=""), header=T)

library(splines)

attach(cp85)

names(cp85)

wage1 <- lm(LNWAGE~bs(EXPER,df=1,degree=1) + bs(EDUC,df=4) + FEMALE)

summary(wage1)

anova(wage1)

experq <- quantile(EXPER)

minimales <- data.frame(EXPER=rep(experq,each=21),EDUC=rep(0:20,5),FEMALE=rep(0,105))

minifemales <- data.frame(EXPER=rep(experq,each=21),EDUC=rep(0:20,5),FEMALE=rep(1,105))

par(mfrow=c(1,2))

plot(minimales$EDUC,predict(wage1,minimales),xlab="Years in education", ylab="Predicted ln(wage)",pch=19,cex=.5,ylim=c(.5,3))

lines(0:20, predict(wage1,minimales[1:21,]),lwd=.5)

lines(0:20, predict(wage1,minimales[22:42,]),lwd=1)

lines(0:20, predict(wage1,minimales[43:63,]),lwd=1.5)

lines(0:20, predict(wage1,minimales[64:84,]),lwd=2)

lines(0:20, predict(wage1,minimales[85:105,]),lwd=2.5)

text(6,2.5,"More experience")

text(15,1.5,"Less experience")

title("For males")

plot(minifemales$EDUC,predict(wage1,minifemales),xlab="Years in education", ylab="Predicted ln(wage)",pch=19,cex=.5,ylim=c(.5,3))

lines(0:20, predict(wage1,minifemales[1:21,]),lwd=.5)

lines(0:20, predict(wage1,minifemales[22:42,]),lwd=1)

lines(0:20, predict(wage1,minifemales[43:63,]),lwd=1.5)

lines(0:20, predict(wage1,minifemales[64:84,]),lwd=2)

lines(0:20, predict(wage1,minifemales[85:105,]),lwd=2.5)

text(6,2.5,"More experience")

text(15,1.2,"Less experience")

title("For females")

par(mfrow=c(1,1))

install.packages("gam")

library(gam)

wage1a <- gam(LNWAGE~bs(EXPER,df=1,degree=1) + bs(EDUC,df=4) + FEMALE)

summary(wage1a)

par(mfrow=c(1,3))

plot(wage1a, se=T)

wage2a <- gam(LNWAGE~bs(EXPER,df=2,degree=1) + bs(EDUC,df=4) + FEMALE)

anova(wage1a,wage2a,test="F")

plot(wage2a,se=T)

summary(lm(LNWAGE ~ bs(EXPER, degree=3, df=4)+bs(EDUC,degree=3, df=4) + FEMALE))

plot(gam(LNWAGE~bs(EXPER,degree=3,df=4)+bs(EDUC,degree=3,df=4)+ FEMALE),se=T)

par(mfrow=c(1,1))

detach(cp85)

 

# Example 11. Detecting deception.

set.seed(406)

age <- runif(1000,3,23)

truth <- rbinom(1000,1,.5)

cbca <- round(-3*truth+2*truth*log(age)+.2*age+rbinom(1000,25,.7))

summary(glm(truth~age,family=binomial))

library(gam)

plot(gam(truth~bs(age,df=4),family=binomial),se=T)

gam0 <- gam(truth~bs(cbca, df=4), family=binomial)

gam1 <- gam(truth ~ bs(cbca, df=4) + bs(age, df=4), family=binomial)

anova(gam0,gam1)

inter <- cbca*age

gam2 <- gam(truth~bs(cbca,df=4)+bs(age,df=4)+bs(inter,df=4), family=binomial)

anova(gam1,gam2)

par(mfrow=c(3,3))

truth2 <- factor(truth,label=c("false","true"))

plot(gam0, se=T)

plot(age,cbca,pch=".")

plot(truth2,cbca,ylab="cbca")

plot(gam1, se=T)

plot(truth2,age,ylab="age")

plot(gam2, se = T)

par(mfrow=c(1,1))

 

# Here is the code for making the generalized additive model for the 

# London et al. (in press) data. Other models were examined. 

attach(lordex)

gam1 <- gam(Final ~ Initial + bs(AGEMOS,df=4),poisson)

plot(gam1,se=T,terms="bs(AGEMOS, df = 4)",1,pch=19,xlab="Age in months",ylab="Predicted conditional recall",cex.lab=1.5,cex.axis=1.3)

 

 

# Chapter 8.

# Example 12. Getting children to exercise

exer <- read.table(paste(webreg,"exercise.dat",sep=""),header=T)

attach(exer)

cond <- factor(wcond, labels=c("Control","Leaftet", "L+quiz","L+plan"))

install.packages("nlme")

library("nlme")

model1 <- lm(sqw2 ~ cond)

summary(model1)

plot(cond,sqw2,xlab="Conditions",ylab=expression(sqrt(exercise + .5)))

mexer <- aggregate(sqw2,list(class),mean)[,2]

mcond <- aggregate(wcond,list(class),mean)[,2]

model2 <- lm(mexer~factor(mcond))

summary(model2)

points(mcond,mexer,pch=19,cex=1.3)

model3 <- lm(sqw2 ~ factor(class) + cond)

summary(model3)

model4 <- lme(sqw2 ~ 1, random = ~1|class,method="ML")

model4a <- lme(sqw2 ~ cond, random = ~1|class,method="ML")

anova(model4,model4a)

summary(model4a)

newcontrasts <- c(-3, 1, 1, 1, 0, -2, 1, 1, 0, 0, -1, 1)

dim(newcontrasts) <- c(4,3)

contrasts(cond) <- newcontrasts

contrasts(cond)

model4b <- lme(sqw2 ~ cond, random = ~1|class,method="ML")

summary(model4b)

model5 <- lme(sqw2 ~ sqw1, random = ~1|class,method="ML")

model5a <- lme(sqw2 ~ sqw1 + cond, random = ~1|class,method="ML")

model5b <- lme(sqw2 ~ sqw1*cond, random = ~1|class,method="ML")

anova(model5,model5a,model5b)

par(mar=c(5,5,4,2))

plot(jitter(sqw1),jitter(sqw2), xlab=expression(sqrt(pre-exercise + .5)), ylab=expression(sqrt(post-exercise + .5)),pch=wcond,col=wcond)

legend(3,1.3,c(levels(cond)),pch=1:4,col=1:4)

sexer1 <- split(sqw1,cond)

spred <- split(model5b$fitted[,1],cond)

for (i in 1:4) lines(sexer1[[i]],spred[[i]],col=i)

par(mar=c(5,4,4,2))

library(lattice)

xyplot(sqw2~sqw1 | class)

intervals(model5a)

varprop <- function(m1,m2)

  {v1 <- sum(as.numeric(VarCorr(m1)[,1]))

    v2 <- sum(as.numeric(VarCorr(m2)[,1]))

   rsq <- (v1-v2)/v1

   return(rsq)

  }

model50 <- lme(sqw2 ~ 1, random = ~1|class,method="ML")

varprop(model50,model5)

varprop(model50,model5a)

varprop(model50,model5b)

detach(exer)

 

# Example 13. Response times and memory recognition accuracy

memrec <- read.table(paste(webreg,"memrec.dat",sep=""),header=T)

attach(memrec)

par(mfrow=c(2,1))

hist(time,xlab="Response time (in msec)",main="Untransformed variable")

library(e1071)

text(6000,850,paste("skewness = ",format(skewness(time),digits=2)),pos=4)

lntime <- log(time)

hist(lntime,xlab="ln(response time in msec)",main="Transformed variable")

text(8.5,400,paste("skewness = ",format(skewness(lntime),digits=2)),pos=4)

par(mfrow=c(1,1))

install.packages("lme4")

library(lme4)

model1 <- lmer(saysold ~ faceold + (1|partno), family=binomial,method="Laplace")

model2 <- update(model1, .~. + facewhite)

model3 <- update(model2, .~. + facewhite:faceold)

model4 <- update(model3, .~. + lntime)

model5 <- update(model4, .~. + lntime:faceold)

model6 <- update(model5, .~. + lntime:facewhite)

model7 <- update(model6, .~. + lntime:faceold:facewhite)

anova(model1,model2,model3,model4,model5,model6,model7)

model5

mod5 <-  -5.8387 + faceold*10.7439+facewhite*-1.0359+lntime*0.6486+faceold*facewhite*0.9677+faceold*lntime*-1.1774

predprob <- exp(mod5)/(1+exp(mod5))

rightprob <- predprob*faceold + (1-faceold)*(1-predprob)

par(mfrow=c(1,1))

plot(memrec$time,rightprob,pch=20,xlab="Time in msec",ylab="Probability of a correct response",cex.lab=1.3)

stime <- split(time,facewhite+2*faceold)

srightprob <- split(rightprob,facewhite+2*faceold)

for (i in 1:4) lines(sort(stime[[i]]),srightprob[[i]][order(stime[[i]])],col=i,lwd=1.5)

text(12000,.8,"previously uncseen White faces",cex=1.3)

model5b <- update(model5, .~. -(1|partno) + (faceold|partno))

anova(model5,model5b)

model5b

# The mgcv package overwrites stuff in gam and some other things, so if you run this

# and then the models above you may get some funny answers

install.packages("mgcv")

library(mgcv)

par(mfrow=c(2,2))

ssaysold <- split(saysold,facewhite+2*faceold)

slntime <- split(lntime,facewhite+2*faceold)

spartno <- split(partno,facewhite+2*faceold)

for (i in 1:4) {

   part <- spartno[[i]]

   time <- slntime[[i]]

   sold <- ssaysold[[i]]

   gammy <- gamm(sold ~ s(time),random=list(part=~1),family=binomial)

   plot(gammy$gam,xlab="ln(time in msec)")

   if (i == 1) title("New Black faces")

   if (i == 2) title("New White faces")

   if (i == 3) title("Old Black faces")

   if (i == 4) title("Old White faces")

}

par(mfrow=c(1,1))

detach(memrec)

 

# Chapter 9. Robust regression

# Example 14. Crime in Sussex.

crime <- read.table(paste(webreg,"sussexcrime.dat",sep=""),header=T)

crime <- crime[order(crime[,9]),1:10]

attach(crime)

par(mfrow=c(1,3))

plot(theft, Drugs, xlab="Theft offences",ylab="Drug offences",pch=19,main="Scatterplot of raw data",cex.lab=1.3)

text(2900,300,"Regency",pos=3,cex=1.3)

plot(rank(theft), rank(Drugs) ,xlim=c(0,300),ylim=c(0,300), xlab="Rank of theft offences",ylab="Rank of drug offences",pch=19,main="Scatterplot of ranked data",cex.lab=1.3)

text(180,270,"Regency",pos=3,cex=1.3)

arrows(230,285, 255,265, length=.07)

plot(log(theft+1), log(Drugs+1), xlab="Log of theft offences + 1",ylab="Log of drug offences + 1",pch=19,main="Scatterplot of logged data",cex.lab=1.3)

text(6.7,5.4,"Regency",pos=3,cex=1.3)

par(mfrow=c(1,1))

cor.test(theft,Drugs)

cor.test(theft,Drugs,method="spearman")

library(boot)

bootspear <- function(x,i) cor.test(x$Drugs[i],x$theft[i],method="spearman")$estimate

thevars <- as.data.frame(cbind(Drugs,theft))

spearboot <- boot(thevars,bootspear, R=1000)

boot.ci(spearboot)

cor.test(rank(theft),rank(Drugs))

lnreg <- lm(log(Drugs+1)~log(theft+1))

par(mfrow=c(1,2))

plot(log(theft+1), log(Drugs+1), xlab="LN of theft offences",ylab="LN of drug offences",pch=19,main="Scatterplot of logged data",cex.lab=1.3)

lines(log(theft+1),predict(lnreg))

plot(theft, Drugs, xlab="Theft offences",ylab="Drug offences",pch=19,main="Scatterplot of raw data",cex.lab=1.3)

lines(theft,exp(predict(lnreg))-1)

abline(lm(Drugs~theft),col="red")

par(mfrow=c(1,1))

detach(crime)

 

# Example 15. Children's well being

children <- read.table(paste(webreg,"unicef.txt",sep=""),header=T)

attach(children)

cor.test(risks,health)

cor.test(risks,health,method="spearman")

source("ftp://ftp.usc.edu/pub/wilcox/Rallfunv1.v4")

source("ftp://ftp.usc.edu/pub/wilcox/Rallfunv2.v4")

scor(risks,health,xlab="Risk rank",ylab="Health rank")

text(c(2,8,4),c(15,18,19),c("Poland","Greece","Ireland"),pos=4)

detach(children)

 

# Example 16. Food and drink intake.

anscombe <- read.table(paste(webreg,"animal.dat",sep=""),header=T)

attach(anscombe)

cbind(anscombe[1:11,], anscombe[12:22,], anscombe[23:33,], anscombe[34:44,])

meansd <- rbind(tapply(Food,Type,mean),tapply(Food,Type,sd), tapply(Drink,Type,mean),tapply(Drink,Type,sd))

row.names(meansd) <- c("Food mean","Food sd","Drink mean","Drink sd")

meansd

summary(aov(Food~Type))

summary(aov(Drink~Type))

Foodg <- split(Food,Type)

Drinkg <- split(Drink,Type)

for (i in 1:4) {print(paste("Group =",names(Drinkg)[i])) 

    print(summary(lm(Drinkg[[i]]~Foodg[[i]])))}

library(lattice)

xyplot(Drink~Food|Type)

par(mfrow=c(2,2))

for (i in 1:4) {

   x <- lm(Drinkg[[i]]~Foodg[[i]])

   plot(Foodg[[i]],Drinkg[[i]],main=(paste(names(Foodg)[i])),xlab="Food intake",ylab="Drink intake",ylim=c(0,15),xlim=c(0,20))

   abline(x)}

par(mfrow=c(1,1))

library(MASS)

birds <- rlm(Drinkg[[1]]~Foodg[[1]])

summary(birds)

insect <- rlm(Drinkg[[2]]~Foodg[[2]])

summary(insect)

mammals <- rlm(Drinkg[[3]]~Foodg[[3]])

summary(mammals)

mammals2 <- lm(Drinkg[[3]] ~ poly(Foodg[[3]],2))

summary(mammals2)

reptiles <- rlm(Drinkg[[4]]~Foodg[[4]])

summary(reptiles)

detach(anscombe)

 

 

# Chapter 10. Conclusion

set.seed(47)

genoheight <- runif(100,60,80)

parentheight <- genoheight + runif(100,-10,10)

kidheight <- genoheight + runif(100,-10,10)

plot(parentheight,kidheight,pch=20,xlab="Parent's height in inches",ylab="Child's height in inches")

abline(0,1)

abline(h=mean(kidheight),v=mean(parentheight),lty="dashed")

text(55,84,"Most kids of short\nparents above\nthe diagonal", pos=4)

text(73,57,"Most kids of tall\nparents below diagonal", pos=4)

arrows(58,54,58,58,length=.1)

text(54,53, "kids = parents",pos=4)

cor.test(parentheight,kidheight)

 

 

etasq <- function(aovob){

  effects <- dim(aovob)[1] - 1

  for (i in 1:effects){

    partialeta <- format(aovob[i,2]/(aovob[i,2] + aovob[effects+1,2]), nsmall=2, digits=2)

    eta <- format(aovob[i,2]/sum(aovob[,2]),nsmall=2,digits=2)

    print(paste("Partial eta sq. =", partialeta,", eta squared =",  eta,"for", dimnames(aovob[i,])[[1]]))} }