# Stats bf <- read.table("/home/units/r.data/butter-R/dat", header=TRUE) summary(bf) attach(bf) # Plots plot( Butterfat ~ Age ) plot( Butterfat ~ Breed ) table(Age, Breed ) tapply(Butterfat, list(Age, Breed), mean) # Try fitting a model bf.m <- lm( Butterfat ~ Age * Breed ) # Fits an interaction anova(bf.m) summary(bf.m) # Remove Age bf.m <- lm( Butterfat ~ Breed ) summary(bf.m) # Residual analysis bf.lm.res <- resid( bf.m) qqnorm( bf.lm.res) qqline( bf.lm.res ) plot(bf.m) # Appears that variance is related to the mean # so try a gamma glm bf.glm <- glm( Butterfat ~ Breed, family=Gamma(link=log) ) bf.glm.res <- resid( bf.glm, type="deviance") # Deviance is the default qqnorm( bf.glm.res ) qqline( bf.glm.res ) # Try another distribution bf.glm <- glm( Butterfat ~ Breed, family=inverse.gaussian(link=log) ) bf.glm.res <- resid( bf.glm, type="deviance") # Deviance is the default qqnorm( bf.glm.res ) qqline( bf.glm.res ) # Better option? mn <- tapply( Butterfat, list(Age, Breed), mean ) mn <- as.vector( mn ) vr <- as.vector( tapply( Butterfat, list(Age, Breed), var ) ) plot( log(vr) ~ log(mn) ) test <- lm( log(vr) ~ log(mn) ) abline( coef(test) ) # Perhaps a distribution with variance = mu^4.75 is appropriate # Beyond the scope of this Workshop # Some modern statistics data(cars) plot(cars, main = "lowess(cars)") lines(lowess(cars), col = 2) lines(lowess(cars, f=.2), col = 3) legend(5, 120, c(paste("f = ", c("2/3", ".2"))), lty = 1, col = (2:3) )