Initialization

Initialize R with the following commands:

options(digits=4,width=80)
options(contrasts=c("contr.sum","contr.poly") )  # set definition of contrasts
load(url("http://pnb.mcmaster.ca/bennett/psy710/datasets/cement.rda") )
load(url("http://pnb.mcmaster.ca/bennett/psy710/datasets/pigs.rda") )
load(url("http://pnb.mcmaster.ca/bennett/psy710/datasets/penicillin.rda") )
load(url("http://pnb.mcmaster.ca/bennett/psy710/datasets/pigment.rda") )

Useful R Commands

Here are some useful commands for analyzing data from a mixed, factorial design.

# ANOVA A: fixed; B: random
mixed.aov <- aov(Y ~ A + Error(B+A:B),data=df0)
summary(mixed.aov) # anova for fixed effects
library(VCA)
# each random effect enclosed with parentheses
anovaMM(Y ~ A + (B)+(A:B),Data=df0) # variance components & intraclass correlations
# linear mixed models:
library(lmerTest)
mixed.lme <- lmer(Y ~ A + (1|B) + (1|A:B),data=df0)
anova(mixed.lme) # fixed effects
ranova(mixed.lme) # random effects
VarCorr(mixed.lme) # variance components
# association strength
library(performance)
icc(mixed.lme,by_group=T) # intraclass correlations
library(effectsize)
omega_squared(mixed.lme) # for fixed effects

Example: Cement Data

This example is taken from section 4.16 in Sahai and Ageel (2000). An experiment was performed to evaluate the effect of the duration of aging the strength of cement. Three mixes of cement were prepared, and six samples were taken from each mix. The six samples (from each mix) were then divided into two groups that were aged either for two or seven days. Two-inch cubes were formed from each sample at the end of the aging period, and the weight that crushed each cube was recorded. The data are stored in the data frame cement. The independent variables are the fixed factor aging and the random factor mix; the dependent variable is yield. Note that you must convert aging and mix from character variables to factors before doing the analysis.

summary(cement)
##      yield          mix               aging          
##  Min.   : 524   Length:18          Length:18         
##  1st Qu.: 566   Class :character   Class :character  
##  Median : 795   Mode  :character   Mode  :character  
##  Mean   : 808                                        
##  3rd Qu.:1062                                        
##  Max.   :1092
# convert to factors!
cement$mix <- as.factor(cement$mix)
cement$aging <- as.factor(cement$aging)
xtabs(~mix+aging,cement)
##     aging
## mix  d2 d7
##   m1  3  3
##   m2  3  3
##   m3  3  3

Use a mixed-effects ANOVA to evaluate the effects of aging, mix and the interaction on yield.

options(contrasts=c("contr.sum","contr.poly"))
cem.aov.01 <- aov(yield~aging*mix,data=cement)
summary(cem.aov.01)
##             Df  Sum Sq Mean Sq F value  Pr(>F)    
## aging        1 1107072 1107072 2080.09 8.1e-15 ***
## mix          2    2957    1479    2.78    0.10    
## aging:mix    2    1126     563    1.06    0.38    
## Residuals   12    6387     532                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(F.aging <- 1107072/563)
## [1] 1966
(p.aging <- 1-pf(F.aging,1,2))
## [1] 0.0005082

Here is an alternate form of aov that calculates the F and p values for the fixed effect correctly. The random effects are included in Error.

# note that the random effects are in Error()
cement.aov.02 <- aov(yield~aging+Error(mix+mix:aging),data=cement)
summary(cement.aov.02)
## 
## Error: mix
##           Df Sum Sq Mean Sq F value Pr(>F)
## Residuals  2   2957    1479               
## 
## Error: mix:aging
##           Df  Sum Sq Mean Sq F value  Pr(>F)    
## aging      1 1107072 1107072    1966 0.00051 ***
## Residuals  2    1126     563                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Error: Within
##           Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 12   6387     532

Here is the analysis using lmer. The random effects are enclosed in parentheses.

library(lmerTest)
cement.lme.01 <- lmer(yield~aging+(1|mix)+(1|mix:aging),data=cement)
anova(cement.lme.01)
## Type III Analysis of Variance Table with Satterthwaite's method
##        Sum Sq Mean Sq NumDF DenDF F value  Pr(>F)    
## aging 1046215 1046215     1     2    1966 0.00051 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ranova(cement.lme.01)
## ANOVA-like table for random-effects: Single term deletions
## 
## Model:
## yield ~ aging + (1 | mix) + (1 | mix:aging)
##                 npar logLik AIC   LRT Df Pr(>Chisq)
## <none>             5  -76.9 164                    
## (1 | mix)          4  -77.1 162 0.449  1       0.50
## (1 | mix:aging)    4  -76.9 162 0.003  1       0.96

Compute the variance components

Here are the ANOVA estimates of the variance components:

# compute from ANOVA table:
(var.comp.error <- 532 )
## [1] 532
(var.comp.mix <- (1479-532)/(3*2) ) # 3x2 observations per marginal mean
## [1] 157.8
(var.comp.aging_x_mix <- (563-532)/3 ) # 3 per cell
## [1] 10.33
(sd.values <- sqrt(c(var.comp.error,var.comp.aging_x_mix,var.comp.mix)) )
## [1] 23.065  3.215 12.563

Using anovaMM:

# using anovaMM
library(VCA)
cem.vca <- anovaMM(yield~aging+(mix)+(aging:mix),Data=cement)
print(cem.vca,digits=3)
## 
## 
## ANOVA-Type Estimation of Mixed Model:
## --------------------------------------
## 
##  [Fixed Effects]
## 
##     int agingd2 agingd7 
##    1056    -496       0 
## 
## 
##  [Variance Components]
## 
##   Name      DF     SS       MS       VC      %Total SD     CV[%]
## 1 total     10.675                   695.13  100    26.365 3.261
## 2 mix       2      2957.444 1478.722 152.593 21.952 12.353 1.528
## 3 aging:mix 2      1126.333 563.167  10.315  1.484  3.212  0.397
## 4 error     12     6386.667 532.222  532.222 76.564 23.07  2.854
## 
## Mean: 808.4 (N = 18) 
## 
## Experimental Design: balanced  |  Method: ANOVA

Using VarCorr:

VarCorr(cement.lme.01) # list variance components
##  Groups    Name        Std.Dev.
##  mix:aging (Intercept)  3.21   
##  mix       (Intercept) 12.35   
##  Residual              23.07

Question 1: Random Effects ANOVA (Penicillin)

The penicillin data frame contains the results from an experiment that used a crossed factorial design to estimate the effects of two random factors, plate and sample, on the potency of penicillin samples (as indexed by the diameter of a clear area on a petri dish). The factors of plate has 24 levels and sample has six levels. These factors are crossed, and there are two measures per condition.

sapply(penicillin,class)
##  diameter     plate    sample 
## "numeric"  "factor"  "factor"
xtabs(~sample+plate,penicillin)
##       plate
## sample a b c d e f g h i j k l m n o p q r s t u v w x
##      A 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
##      B 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
##      C 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
##      D 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
##      E 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
##      F 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
  1. Calculate the variance components and intraclass correlations for the random effects.
# ANOVA estimates of variance components
options(contrasts=c("contr.sum","contr.poly"))
pen.aov.01 <- aov(diameter ~ sample*plate,data=penicillin)
summary(pen.aov.01)
##               Df Sum Sq Mean Sq F value  Pr(>F)    
## sample         5    980   195.9   42.62 < 2e-16 ***
## plate         23    314    13.7    2.97 4.1e-05 ***
## sample:plate 115    575     5.0    1.09    0.31    
## Residuals    144    662     4.6                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# variance components using the formulae in section 10.3.3 of my notes:
( var.comp.resid <- 4.6)
## [1] 4.6
MS.interaction <- 5
var.comp.interact <- (MS.interaction - var.comp.resid)/2
var.comp.plate <- (13.7 - MS.interaction) / 12  # divide by number of observations
var.comp.sample <- (195.9-MS.interaction) / 48  # divide by number of observations
# variance components:
c(var.comp.plate,var.comp.sample,var.comp.interact,var.comp.resid)
## [1] 0.725 3.977 0.200 4.600
# standard deviations
sqrt(c(var.comp.plate,var.comp.sample,var.comp.interact,var.comp.resid))
## [1] 0.8515 1.9943 0.4472 2.1448
# variance components calculated using anovaMM listed in VC column
# %Total column shows intraclass correlation
library(VCA)
vca.res <- anovaMM(diameter ~ 1 + (plate) + (sample) + (plate:sample),Data=penicillin)
print(vca.res,digits=3) #  check column VC
## 
## 
## Result Variance Component Analysis:
## -----------------------------------
## 
##   Name         DF     SS      MS     VC    %Total SD    CV[%] 
## 1 total        26.073                9.498 100    3.082 13.339
## 2 plate        23     314.042 13.654 0.721 7.592  0.849 3.675 
## 3 sample       5      979.75  195.95 3.978 41.883 1.995 8.633 
## 4 plate:sample 115    575.083 5.001  0.202 2.124  0.449 1.944 
## 5 error        144    662     4.597  4.597 48.401 2.144 9.28  
## 
## Mean: 23.1 (N = 288) 
## 
## Experimental Design: balanced  |  Method: ANOVA
# variance components calculated using lmer:
penn.lme <- lmer(diameter ~ 1 + (1|plate) + (1|sample) + (1|plate:sample),data=penicillin)
VarCorr(penn.lme)
##  Groups       Name        Std.Dev.
##  plate:sample (Intercept) 0.449   
##  plate        (Intercept) 0.849   
##  sample       (Intercept) 1.995   
##  Residual                 2.144
# partial intraclass correlations:
pICC.plate <- 0.721/(0.721+4.597)
pICC.sample <- 3.978/(3.978+4.597)
pICC.interaction <-0.202/(0.202+4.597)
c(pICC.plate,pICC.sample,pICC.interaction)
## [1] 0.13558 0.46391 0.04209

# intraclass correlation
library(performance)
icc(penn.lme,by_group=T) # ICC, not partial ICC
## # ICC by Group
## 
## Group        |   ICC
## --------------------
## plate:sample | 0.021
## plate        | 0.076
## sample       | 0.419

Example: Raising Pigs

An experiment was done to evaluate the breeding value of a set of five sires in raising pigs. Each sire was mated to two dams selected randomly from a large group, and the average daily weight gains of two pigs from each litter were recorded. The experimental design is illustrated in Figure 1. In this study, sires is fixed, dams is random, and dams is nested within sires. The data are contained in the data frame pigs.

Figure 1: Design of pigs experiment.

Figure 1: Design of pigs experiment.

Analyze the effects of sires and dams on weight gain (wt).

names(pigs)
## [1] "sires" "dams"  "wt"
# n per cell:
xtabs(~sires+dams,pigs)
##      dams
## sires d1 d2 d3 d4 d5 d6 d7 d8 d9 d10
##    s1  2  2  0  0  0  0  0  0  0   0
##    s2  0  0  2  2  0  0  0  0  0   0
##    s3  0  0  0  0  2  2  0  0  0   0
##    s4  0  0  0  0  0  0  2  2  0   0
##    s5  0  0  0  0  0  0  0  0  2   2
# NOT a crossed factorial design: dams is NESTED within sires
# can't get interaction because design is not crossed:
options(contrasts=c("contr.sum","contr.poly"))
pigs.aov.01 <- aov(wt~sires+dams, data=pigs)
summary( pigs.aov.01 )
##             Df Sum Sq Mean Sq F value Pr(>F)  
## sires        4  0.100  0.0249    0.64  0.643  
## dams         5  0.564  0.1127    2.91  0.071 .
## Residuals   10  0.387  0.0387                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# sires=fixed; dams=random
# recompute F & p for fixed factor:
(F.sires <- .0249/.1127)
## [1] 0.2209
(p.sires <- 1-pf(F.sires,4,5))
## [1] 0.9157
# F & p values for nested random factor are unchanged:
F.dams <- 2.9124;
p.dams <- .07067
# using aov + Error:
pigs.aov.02 <- aov(wt~sires+Error(dams), data=pigs)
summary( pigs.aov.02 )
## 
## Error: dams
##           Df Sum Sq Mean Sq F value Pr(>F)
## sires      4  0.100  0.0249    0.22   0.92
## Residuals  5  0.564  0.1127               
## 
## Error: Within
##           Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 10  0.387  0.0387
# calcluate F/p for random/nested factor:
( F.dams <- 0.1127/0.0387 )
## [1] 2.912
( p.dams <- 1 - pf(F.dams,5,10))
## [1] 0.07069
# using lmer:
require(lmerTest)
pigs.lme <- lmer(wt~sires + (1|dams), data=pigs)
anova(pigs.lme) # anova for fixed effects
## Type III Analysis of Variance Table with Satterthwaite's method
##       Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## sires 0.0342 0.00856     4     5    0.22   0.92
ranova(pigs.lme) # anova-like table for random effects
## ANOVA-like table for random-effects: Single term deletions
## 
## Model:
## wt ~ sires + (1 | dams)
##            npar logLik  AIC  LRT Df Pr(>Chisq)
## <none>        7  -4.64 23.3                   
## (1 | dams)    6  -5.67 23.3 2.05  1       0.15

Compute the variance components.

# anova estimates of variance components:
# note that variance among dams is about the same the variance within litters
n <- 2; # 2 observations per cell
( var.comp.dams <- (.11271 - .0387) / n ) # pop "dam" variance
## [1] 0.03701
( var.comp.error <- .0387) # pop error variance
## [1] 0.0387
# anova estimates calculated using anovaMM
library(VCA)
vca.res <- anovaMM(wt~sires + (dams), Data=pigs)
print(vca.res,digits=3)
## 
## 
## ANOVA-Type Estimation of Mixed Model:
## --------------------------------------
## 
##  [Fixed Effects]
## 
##     int siress1 siress2 siress3 siress4 siress5 
##   2.570   0.097  -0.040   0.062  -0.100   0.000 
## 
## 
##  [Variance Components]
## 
##   Name  DF    SS    MS    VC    %Total SD    CV[%] 
## 1 total 8.521             0.076 100    0.275 10.689
## 2 dams  5     0.564 0.113 0.037 48.881 0.192 7.473 
## 3 error 10    0.387 0.039 0.039 51.119 0.197 7.643 
## 
## Mean: 2.574 (N = 20) 
## 
## Experimental Design: unbalanced  |  Method: ANOVA
# extract VCs from lme model:
vc.pigs <- VarCorr(pigs.lme) # list variance components
print(vc.pigs,comp="Variance")
##  Groups   Name        Variance
##  dams     (Intercept) 0.0370  
##  Residual             0.0387

Question 2: Nested ANOVA (Pigment)

A study examined the moisture content of pigment paste. Fifteen batches were selected randomly from a large set of batches, and two pigment samples were selected randomly from each batch. Finally, the moisture content of each sample was measured in two separate analyses. The data are stored in the data frame pigment.

summary(pigment)
##      batch        sample      moisture   
##  b1     : 4   s1     : 2   Min.   :13.0  
##  b2     : 4   s2     : 2   1st Qu.:24.0  
##  b3     : 4   s3     : 2   Median :27.0  
##  b4     : 4   s4     : 2   Mean   :26.8  
##  b5     : 4   s5     : 2   3rd Qu.:30.2  
##  b6     : 4   s6     : 2   Max.   :40.0  
##  (Other):36   (Other):48
# xtabs(~batch+sample,pigment)
  1. Use ANOVA to analyze the effects of batch and sample on moisture.
# ANOVA
# sample is nested within batch:
options(contrasts=c("contr.sum","contr.poly"))
pigment.aov.1 <- aov(moisture~batch+sample,data=pigment)
summary(pigment.aov.1)
##             Df Sum Sq Mean Sq F value Pr(>F)    
## batch       14   1211    86.5    94.4 <2e-16 ***
## sample      15    870    58.0    63.2 <2e-16 ***
## Residuals   30     27     0.9                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# need to recalculate F and p values for batch:
( F.batch <- 86.50/57.98 )
## [1] 1.492
( p.batch <- 1-pf(F.batch,14,15) )
## [1] 0.2256
# using lmer:
pigment.lme <- lmer(moisture~(1|batch)+(1|sample),data=pigment)
ranova(pigment.lme)
## ANOVA-like table for random-effects: Single term deletions
## 
## Model:
## moisture ~ (1 | batch) + (1 | sample)
##              npar logLik AIC  LRT Df Pr(>Chisq)    
## <none>          4   -146 300                       
## (1 | batch)     3   -146 299  0.6  1       0.45    
## (1 | sample)    3   -184 375 76.4  1     <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
  1. Calculate the variance components.
# anova variance components:
a <- 15 # number of batches
b <- 2 # samples per batch
n <-  2 # measures per sample
var.comp.error <- 0.92
( var.comp.sample <- (57.98-0.92) / n )
## [1] 28.53
( var.comp.batch <- (86.50-57.98) / (b*n) )
## [1] 7.13
sqrt(c(var.comp.sample,var.comp.batch,var.comp.error))
## [1] 5.3413 2.6702 0.9592
# using anovaMM:
library(VCA)
vca.res <- anovaMM(moisture~(batch)+(sample),Data=pigment)
print(vca.res,digits=3)
## 
## 
## Result Variance Component Analysis:
## -----------------------------------
## 
##   Name   DF     SS       MS     VC     %Total SD    CV[%] 
## 1 total  28.218                 36.578 100    6.048 22.581
## 2 batch  14     1210.933 86.495 7.128  19.487 2.67  9.968 
## 3 sample 15     869.75   57.983 28.533 78.007 5.342 19.944
## 4 error  30     27.5     0.917  0.917  2.506  0.957 3.575 
## 
## Mean: 26.78 (N = 60) 
## 
## Experimental Design: unbalanced  |  Method: ANOVA
# using varCorr:
library(lmerTest)
lme.vca <- VarCorr(pigment.lme)
print(lme.vca,comp=c("Variance","Std.Dev."),digits=3)
##  Groups   Name        Variance Std.Dev.
##  sample   (Intercept) 28.533   5.342   
##  batch    (Intercept)  7.128   2.670   
##  Residual              0.917   0.957
  1. Calculate measures of association strength for batch and sample.
# paritial ICC
( icc.partial.batch <- var.comp.batch/(var.comp.batch+var.comp.error) )
## [1] 0.8857
( icc.partial.sample <- var.comp.sample/(var.comp.sample+var.comp.error) )
## [1] 0.9688
# ICC (also, see %Total column of anovaMM output)
library(performance)
icc(pigment.lme,by_group=T)
## # ICC by Group
## 
## Group  |   ICC
## --------------
## sample | 0.780
## batch  | 0.195

References

Sahai, Hardeo, and Mohammed I. Ageel. 2000. The Analysis of Variance: Fixed, Random, and Mixed Models. Boston: Birkhä̈user.