Display system information and load tidyverse
and
faraway
packages
sessionInfo()
## R version 4.2.3 (2023-03-15)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.3.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] pbkrtest_0.5.2 lme4_1.1-33 Matrix_1.5-4 faraway_1.0.8
## [5] lubridate_1.9.2 forcats_1.0.0 stringr_1.5.0 dplyr_1.1.2
## [9] purrr_1.0.1 readr_2.1.4 tidyr_1.3.0 tibble_3.2.1
## [13] ggplot2_3.4.2 tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] tidyselect_1.2.0 xfun_0.39 bslib_0.4.2 splines_4.2.3
## [5] lattice_0.21-8 colorspace_2.1-0 vctrs_0.6.2 generics_0.1.3
## [9] htmltools_0.5.5 yaml_2.3.7 utf8_1.2.3 rlang_1.1.1
## [13] nloptr_2.0.3 jquerylib_0.1.4 pillar_1.9.0 glue_1.6.2
## [17] withr_2.5.0 lifecycle_1.0.3 munsell_0.5.0 gtable_0.3.3
## [21] evaluate_0.21 knitr_1.42 tzdb_0.3.0 fastmap_1.1.1
## [25] parallel_4.2.3 fansi_1.0.4 broom_1.0.4 Rcpp_1.0.10
## [29] backports_1.4.1 scales_1.2.1 cachem_1.0.8 jsonlite_1.8.4
## [33] hms_1.1.3 digest_0.6.31 stringi_1.7.12 grid_4.2.3
## [37] cli_3.6.1 tools_4.2.3 magrittr_2.0.3 sass_0.4.6
## [41] pkgconfig_2.0.3 MASS_7.3-60 timechange_0.2.0 minqa_1.2.5
## [45] rmarkdown_2.21 rstudioapi_0.14 R6_2.5.1 boot_1.3-28.1
## [49] nlme_3.1-162 compiler_4.2.3
library(tidyverse)
library(faraway)
library(precrec)
# install.packages("precrec")
psid
(Panel Study of Income Dynamics) data records the
income of 85 individuals, who were aged 25-39 in 1968 and had complete
data for at least 11 of the years between 1968 and 1990.psid <- as_tibble(psid) %>%
print(n = 30)
## # A tibble: 1,661 × 6
## age educ sex income year person
## <int> <int> <fct> <int> <int> <int>
## 1 31 12 M 6000 68 1
## 2 31 12 M 5300 69 1
## 3 31 12 M 5200 70 1
## 4 31 12 M 6900 71 1
## 5 31 12 M 7500 72 1
## 6 31 12 M 8000 73 1
## 7 31 12 M 8000 74 1
## 8 31 12 M 9600 75 1
## 9 31 12 M 9000 76 1
## 10 31 12 M 9000 77 1
## 11 31 12 M 23000 78 1
## 12 31 12 M 22000 79 1
## 13 31 12 M 8000 80 1
## 14 31 12 M 10000 81 1
## 15 31 12 M 21800 82 1
## 16 31 12 M 19000 83 1
## 17 29 16 M 7500 68 2
## 18 29 16 M 7300 69 2
## 19 29 16 M 9250 70 2
## 20 29 16 M 10300 71 2
## 21 29 16 M 10900 72 2
## 22 29 16 M 11900 73 2
## 23 29 16 M 13000 74 2
## 24 29 16 M 12900 75 2
## 25 29 16 M 19900 76 2
## 26 29 16 M 18900 77 2
## 27 29 16 M 19100 78 2
## 28 29 16 M 20300 79 2
## 29 29 16 M 31600 80 2
## 30 29 16 M 16600 81 2
## # ℹ 1,631 more rows
summary(psid)
## age educ sex income year
## Min. :25.00 Min. : 3.00 F:732 Min. : 3 Min. :68.00
## 1st Qu.:28.00 1st Qu.:10.00 M:929 1st Qu.: 4300 1st Qu.:73.00
## Median :34.00 Median :12.00 Median : 9000 Median :78.00
## Mean :32.19 Mean :11.84 Mean : 13575 Mean :78.61
## 3rd Qu.:36.00 3rd Qu.:13.00 3rd Qu.: 18050 3rd Qu.:84.00
## Max. :39.00 Max. :16.00 Max. :180000 Max. :90.00
## person
## Min. : 1.00
## 1st Qu.:20.00
## Median :42.00
## Mean :42.44
## 3rd Qu.:63.00
## Max. :85.00
psid %>%
filter(person <= 20) %>%
ggplot() +
geom_line(mapping = aes(x = year, y = income)) +
facet_wrap(~ person) +
labs(x = "Year", y = "Income")
psid %>%
filter(person <= 20) %>%
ggplot() +
geom_line(mapping = aes(x = year, y = income, group = person)) +
facet_wrap(~ sex) +
scale_y_log10()
year
by the median 78.library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
psid <- psid %>%
mutate(cyear = I(year - 78))
oldw <- getOption("warn")
options(warn = -1) # turn off warnings
ml <- lmList(log(income) ~ cyear | person, data = psid)
options(warn = oldw)
intercepts <- sapply(ml, coef)[1, ]
slopes <- sapply(ml, coef)[2, ]
tibble(int = intercepts,
slo = slopes) %>%
ggplot() +
geom_point(mapping = aes(x = int, y = slo)) +
labs(x = "Intercept", y = "Slope")
mmod <- lmer(log(income) ~ cyear * sex + age + educ + (cyear | person), data = psid)
summary(mmod)
## Linear mixed model fit by REML ['lmerMod']
## Formula: log(income) ~ cyear * sex + age + educ + (cyear | person)
## Data: psid
##
## REML criterion at convergence: 3819.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -10.2310 -0.2134 0.0795 0.4147 2.8254
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## person (Intercept) 0.2817 0.53071
## cyear 0.0024 0.04899 0.19
## Residual 0.4673 0.68357
## Number of obs: 1661, groups: person, 85
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 6.674211 0.543323 12.284
## cyear 0.085312 0.008999 9.480
## sexM 1.150312 0.121292 9.484
## age 0.010932 0.013524 0.808
## educ 0.104209 0.021437 4.861
## cyear:sexM -0.026306 0.012238 -2.150
##
## Correlation of Fixed Effects:
## (Intr) cyear sexM age educ
## cyear 0.020
## sexM -0.104 -0.098
## age -0.874 0.002 -0.026
## educ -0.597 0.000 0.008 0.167
## cyear:sexM -0.003 -0.735 0.156 -0.010 -0.011
Exercise: interpretation of fixed effects.
To test the fixed effects, we can use the Kenward-Roger adjusted F-test. For example, to test the interaction term
library(pbkrtest)
mmod <- lmer(log(income) ~ cyear * sex + age + educ + (cyear | person), data = psid, REML = TRUE)
mmodr <- lmer(log(income) ~ cyear + sex + age + educ + (cyear | person), data = psid, REML = TRUE)
KRmodcomp(mmod, mmodr)
## large : log(income) ~ cyear * sex + age + educ + (cyear | person)
## small : log(income) ~ cyear + sex + age + educ + (cyear | person)
## stat ndf ddf F.scaling p.value
## Ftest 4.6142 1.0000 81.3279 1 0.03468 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The interaction term is marginally significant.
confint(mmod, method = "boot")
## Computing bootstrap confidence intervals ...
##
## 3 warning(s): Model failed to converge with max|grad| = 0.00255347 (tol = 0.002, component 1) (and others)
## 2.5 % 97.5 %
## .sig01 0.44203369 0.612002991
## .sig02 -0.09635989 0.449285497
## .sig03 0.03974560 0.058927318
## .sigma 0.65745803 0.707362223
## (Intercept) 5.49842638 7.787216349
## cyear 0.06820245 0.103892463
## sexM 0.91974547 1.383076952
## age -0.01773141 0.038756723
## educ 0.06064885 0.146991383
## cyear:sexM -0.05072448 -0.003343122
All variance component parameters are significant except for the covariance (correlation) between random intercept and slope.
(diagd <- fortify.merMod(mmod))
## # A tibble: 1,661 × 10
## age educ sex income year person cyear .fitted .resid .scresid
## <int> <int> <fct> <int> <int> <int> <I<dbl>> <dbl> <dbl> <dbl>
## 1 31 12 M 6000 68 1 -10 8.63 0.0672 0.0983
## 2 31 12 M 5300 69 1 -9 8.71 -0.132 -0.193
## 3 31 12 M 5200 70 1 -8 8.78 -0.226 -0.331
## 4 31 12 M 6900 71 1 -7 8.86 -0.0185 -0.0271
## 5 31 12 M 7500 72 1 -6 8.93 -0.0103 -0.0151
## 6 31 12 M 8000 73 1 -5 9.01 -0.0209 -0.0306
## 7 31 12 M 8000 74 1 -4 9.08 -0.0961 -0.141
## 8 31 12 M 9600 75 1 -3 9.16 0.0111 0.0162
## 9 31 12 M 9000 76 1 -2 9.23 -0.129 -0.188
## 10 31 12 M 9000 77 1 -1 9.31 -0.204 -0.298
## # ℹ 1,651 more rows
diagd %>%
ggplot(mapping = aes(sample = .resid)) +
stat_qq() + facet_grid(~sex)
diagd$edulevel <- cut(psid$educ,
c(0, 8.5, 12.5, 20),
labels=c("lessHS", "HS", "moreHS"))
diagd %>%
ggplot(mapping = aes(x = .fitted, y = .resid)) +
geom_point(alpha = 0.3) +
geom_hline(yintercept = 0) +
facet_grid(~ edulevel) +
labs(x = "Fitted", ylab = "Residuals")
vision
dataacuity
. Each individual is tested on left
and right eyes under 4 powers.vision <- as_tibble(vision) %>%
print(n = Inf)
## # A tibble: 56 × 4
## acuity power eye subject
## <dbl> <fct> <fct> <fct>
## 1 116 6/6 left 1
## 2 119 6/18 left 1
## 3 116 6/36 left 1
## 4 124 6/60 left 1
## 5 120 6/6 right 1
## 6 117 6/18 right 1
## 7 114 6/36 right 1
## 8 122 6/60 right 1
## 9 110 6/6 left 2
## 10 110 6/18 left 2
## 11 114 6/36 left 2
## 12 115 6/60 left 2
## 13 106 6/6 right 2
## 14 112 6/18 right 2
## 15 110 6/36 right 2
## 16 110 6/60 right 2
## 17 117 6/6 left 3
## 18 118 6/18 left 3
## 19 120 6/36 left 3
## 20 120 6/60 left 3
## 21 120 6/6 right 3
## 22 120 6/18 right 3
## 23 120 6/36 right 3
## 24 124 6/60 right 3
## 25 112 6/6 left 4
## 26 116 6/18 left 4
## 27 115 6/36 left 4
## 28 113 6/60 left 4
## 29 115 6/6 right 4
## 30 116 6/18 right 4
## 31 116 6/36 right 4
## 32 119 6/60 right 4
## 33 113 6/6 left 5
## 34 114 6/18 left 5
## 35 114 6/36 left 5
## 36 118 6/60 left 5
## 37 114 6/6 right 5
## 38 117 6/18 right 5
## 39 116 6/36 right 5
## 40 112 6/60 right 5
## 41 119 6/6 left 6
## 42 115 6/18 left 6
## 43 94 6/36 left 6
## 44 116 6/60 left 6
## 45 100 6/6 right 6
## 46 99 6/18 right 6
## 47 94 6/36 right 6
## 48 97 6/60 right 6
## 49 110 6/6 left 7
## 50 110 6/18 left 7
## 51 105 6/36 left 7
## 52 118 6/60 left 7
## 53 105 6/6 right 7
## 54 105 6/18 right 7
## 55 115 6/36 right 7
## 56 115 6/60 right 7
vision %>%
mutate(npower = rep(1:4, 14)) %>%
ggplot() +
geom_line(mapping = aes(x = npower, y = acuity, linetype = eye)) +
facet_wrap(~ subject) +
scale_x_continuous("Power", breaks = 1:4,
labels = c("6/6", "6/18", "6/36", "6/60"))
eyes
and
subject
, since we don’t believe the eye difference is
consistent across individuals.mmod <- lmer(acuity ~ power + (1 | subject) + (1 | subject:eye), data = vision)
summary(mmod)
## Linear mixed model fit by REML ['lmerMod']
## Formula: acuity ~ power + (1 | subject) + (1 | subject:eye)
## Data: vision
##
## REML criterion at convergence: 328.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.4240 -0.3232 0.0109 0.4408 2.4662
##
## Random effects:
## Groups Name Variance Std.Dev.
## subject:eye (Intercept) 10.27 3.205
## subject (Intercept) 21.53 4.640
## Residual 16.60 4.075
## Number of obs: 56, groups: subject:eye, 14; subject, 7
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 112.6429 2.2349 50.401
## power6/18 0.7857 1.5400 0.510
## power6/36 -1.0000 1.5400 -0.649
## power6/60 3.2857 1.5400 2.134
##
## Correlation of Fixed Effects:
## (Intr) pw6/18 pw6/36
## power6/18 -0.345
## power6/36 -0.345 0.500
## power6/60 -0.345 0.500 0.500
power
, we can use the
Kenward-Roger adjusted F-test. We find the power
is not
significant at the 0.05 level.mmod <- lmer(acuity ~ power + (1 | subject) + (1 | subject:eye), data = vision, REML = TRUE)
nmod <- lmer(acuity ~ (1 | subject) + (1 | subject:eye), data = vision, REML = TRUE)
KRmodcomp(mmod, nmod)
## large : acuity ~ power + (1 | subject) + (1 | subject:eye)
## small : acuity ~ (1 | subject) + (1 | subject:eye)
## stat ndf ddf F.scaling p.value
## Ftest 2.8263 3.0000 39.0000 1 0.05106 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mmod_mle <- lmer(acuity ~ power + (1 | subject) + (1 | subject:eye), data = vision, REML = FALSE)
nmod_mle <- lmer(acuity ~ (1 | subject) + (1 | subject:eye), data = vision, REML = FALSE)
(lrtstat <- as.numeric(2 * (logLik(mmod_mle, REML = FALSE) - logLik(nmod_mle, REML = FALSE))))
## [1] 8.262412
pchisq(lrtstat, 3, lower.tail = FALSE)
## [1] 0.04088862
library(pbkrtest)
set.seed(123)
PBmodcomp(mmod, nmod) %>%
summary()
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00254113 (tol = 0.002, component 1)
## Bootstrap test; time: 9.07 sec; samples: 1000; extremes: 51;
## large : acuity ~ power + (1 | subject) + (1 | subject:eye)
## acuity ~ (1 | subject) + (1 | subject:eye)
## stat df ddf p.value
## LRT 8.2624 3.0000 0.04089 *
## PBtest 8.2624 0.05195 .
## Gamma 8.2624 0.04870 *
## Bartlett 7.7466 3.0000 0.05155 .
## F 2.7541 3.0000 2.9092 0.21814
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
power
becomes significant.mmodr <- lmer(acuity ~ power + (1 | subject) + (1 | subject:eye), data = vision, REML = TRUE, subset = -43)
nmodr <- lmer(acuity ~ (1 | subject) + (1 | subject:eye), data = vision, REML = TRUE, subset = -43)
KRmodcomp(mmodr, nmodr)
## large : acuity ~ power + (1 | subject) + (1 | subject:eye)
## small : acuity ~ (1 | subject) + (1 | subject:eye)
## stat ndf ddf F.scaling p.value
## Ftest 3.5956 3.0000 38.0373 1 0.02212 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
PBmodcomp(mmodr, nmodr) %>%
summary()
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00311466 (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00210241 (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00245455 (tol = 0.002, component 1)
## Bootstrap test; time: 9.13 sec; samples: 1000; extremes: 31;
## large : acuity ~ power + (1 | subject) + (1 | subject:eye)
## acuity ~ (1 | subject) + (1 | subject:eye)
## stat df ddf p.value
## LRT 10.2475 3.0000 0.01658 *
## PBtest 10.2475 0.03197 *
## Gamma 10.2475 0.03412 *
## Bartlett 9.0967 3.0000 0.02803 *
## F 3.4158 3.0000 2.8405 0.17782
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mmod <- lmer(acuity ~ power + (1 | subject) + (1 | subject:eye), data = vision)
confint(mmod, method = "boot")
## Computing bootstrap confidence intervals ...
##
## 45 message(s): boundary (singular) fit: see help('isSingular')
## 2.5 % 97.5 %
## .sig01 1.822218e-07 5.569230
## .sig02 0.000000e+00 8.080920
## .sigma 3.147609e+00 4.995176
## (Intercept) 1.086538e+02 117.166997
## power6/18 -2.388635e+00 3.877868
## power6/36 -4.111418e+00 1.892004
## power6/60 3.254217e-01 6.605027
qqnorm(ranef(mmodr)$"subject:eye"[[1]], main = "")
plot(resid(mmodr) ~ fitted(mmodr), xlab = "Fitted", ylab = "Residuals")
abline(h=0)