# Plots from Johnson & Kuhn (2014)

This document (nearly) duplicates the examples from Johnson and Kuhn (2014). Minor differences are due to the use of R Markdown.

## Necessary Packages

library(ltbayes)
library(ggplot2)
library(gridExtra)
library(reshape2)

## Figure 1

Trace plot (a) and estimated posterior density (b) for $$\zeta$$ based on 5000 simulated realizations from the posterior distribution.

samp <- 5000
burn <- 1000

alph <- c(1.27,1.34,1.14,1,0.67)
beta <- c(1.19,0.59,0.15,-0.59,-2)
gamm <- c(0.1,0.15,0.15,0.2,0.1)

zeta <- postsamp(fmodel3pl, c(0,0,1,1,1),
apar = alph, bpar = beta, cpar = gamm,
control = list(nbatch = samp + burn, scale = 3))

zeta <- data.frame(sample = 1:samp,
zeta = zeta$batch[(burn + 1):(samp + burn)]) p <- ggplot(zeta, aes(x = sample, y = zeta)) p <- p + geom_line() + ylab(expression(zeta)) + xlab("Sample") p <- p + theme_bw() + theme(legend.key = element_blank()) p <- p + theme(axis.text.x = element_text(size = 10, color = "black")) p <- p + theme(text = element_text(size = 15)) p <- p + theme(axis.text.y = element_text(color = "black")) p <- p + ggtitle("(a)\n") p1 <- p p <- ggplot(zeta, aes(x = zeta)) + geom_density(adjust = 2) + coord_flip() p <- p + xlab(expression(zeta)) + ylab("Density") p <- p + theme_bw() + theme(legend.key = element_blank()) p <- p + theme(axis.text.x = element_text(size = 10, color = "black")) p <- p + theme(text = element_text(size = 15)) p <- p + theme(axis.text.y = element_text(color = "black")) p <- p + ggtitle("(b)\n") p2 <- p grid.arrange(p1, p2, nrow = 1) ## Figure 2 Violin plots for 5000 simulated realizations from the posterior distribution of $$\zeta$$ given each of 32 possible response patterns, stratified by sum score. Points and line segments within each violin plot represent the posterior mean and 95% credibility interval, respectively. y <- patterns(5, 2, 0:5) zeta <- vector("list", length = 32) for (i in 1:32) { zeta[[i]] <- postsamp(fmodel3pl, y[i,], apar = alph, bpar = beta, cpar = gamm, control = list(nbatch = samp + burn))$batch[(burn + 1):(samp + burn)]
}
zeta <- data.frame(zeta = unlist(zeta))
zeta$pattern <- rep(apply(y, 1, paste, collapse = ""), each = samp) zeta$sum <- rep(apply(y, 1, sum), each = samp)

p <- ggplot(zeta, aes(x = pattern, y = zeta))
p <- p + geom_violin(trim = FALSE)
p <- p + facet_grid(. ~ sum, scales = "free_x", space = "free_x")
p <- p + theme_bw() + theme(legend.key = element_blank())
p <- p + theme(text = element_text(size = 15)) + xlab("Response Pattern")
p <- p + theme(axis.text.x = element_text(size = 10, angle = 45,
vjust = 0.5, color = "black"))
p <- p + theme(axis.text.y = element_text(color = "black"))
p <- p + ylab(expression(zeta)) + xlab("Reponse Pattern")
p <- p + stat_summary(fun.y = mean,
fun.ymin = function(x) quantile(x, probs = 0.025),
fun.ymax = function(x) quantile(x, probs = 0.975), size = 0.5)

plot(p)

## Figure 3

Violin plots (a) and empirical cumulative distribution functions (b) for 5000 simulated realizations from the posterior distribution of $$\zeta$$ conditional on sum score. Points and line segments within each violin plot represent the posterior mean and 95% credibility interval, respectively.

zeta <- vector("list", length = 6)
for (i in 1:6) {
y <- patterns(5, 2, i-1)
zeta[[i]] <- postsamp(fmodel3pl, y, apar = alph, bpar = beta, cpar = gamm,
control = list(nbatch = samp + burn))
zeta[[i]] <- zeta[[i]]$batch[(burn+1):(samp + burn)] } zeta <- data.frame(zeta = unlist(zeta)) zeta$sum <- factor(rep(0:5, each = samp))

p <- ggplot(zeta, aes(x = zeta, color = sum, linetype = sum)) + stat_ecdf()
p <- p + ylab("") + xlim(c(-3,3)) + guides(color = guide_legend(title = "Sum"))
p <- p + theme_bw() + theme(legend.key = element_blank())
p <- p + theme(axis.text.x = element_text(size = 10, color = "black"))
p <- p + theme(text = element_text(size = 15))
p <- p + theme(axis.text.y = element_text(color = "black"))
p <- p + ylab("") + xlab(expression(zeta))
p <- p + guides(linetype = guide_legend(title = "Sum"))
p <- p + ggtitle("(b)\n")

p1 <- p

p <- ggplot(zeta, aes(x = sum, y = zeta))
p <- p + geom_violin(trim = FALSE)
p <- p + theme_bw() + theme(legend.key = element_blank())
p <- p + theme(text = element_text(size = 15)) + xlab("Sum Score")
p <- p + theme(axis.text.x = element_text(size = 10, color = "black"))
p <- p + theme(axis.text.y = element_text(color = "black")) + ylab(expression(zeta))
p <- p + stat_summary(fun.y = mean,
fun.ymin = function(x) quantile(x, probs = 0.025),
fun.ymax = function(x) quantile(x, probs = 0.975), size = 0.5)
p <- p + ggtitle("(a)\n")

p2 <- p

grid.arrange(p2, p1, nrow = 1)
## Warning: Removed 190 rows containing non-finite values (stat_ecdf).

## Figure 4

Estimated density functions and empirical cumulative distribution functions for the posterior distributions of $$\zeta$$ given $$\tilde{Y} < \tilde{y}$$ (lower interval) versus $$\tilde{Y} \ge \tilde{y}$$ (upper interval) for $$c = 1, 2, \dots, 5$$.

zetal <- vector("list", length = 5)
zetau <- vector("list", length = 5)
for (i in 1:5) {
zetal[[i]] <- postsamp(fmodel3pl, patterns(5, 2, 0:(i-1)),
apar = alph, bpar = beta, cpar = gamm,
control = list(nbatch = samp + burn))
zetau[[i]] <- postsamp(fmodel3pl, patterns(5, 2, i:5),
apar = alph, bpar = beta, cpar = gamm,
control = list(nbatch = samp + burn))
zetal[[i]] <- zetal[[i]]$batch[(burn+1):(samp + burn)] zetau[[i]] <- zetau[[i]]$batch[(burn+1):(samp + burn)]
}
zetal <- data.frame(zeta = unlist(zetal))
zetau <- data.frame(zeta = unlist(zetau))
zetal$interval <- "Lower" zetau$interval <- "Upper"
zetal$cut <- rep(c("y1","y2","y3","y4","y5"), each = samp) zetau$cut <- rep(c("y1","y2","y3","y4","y5"), each = samp)
zeta <- rbind(zetal, zetau)

p <- ggplot(zeta, aes(x = zeta)) + geom_density(aes(linetype = interval),
adjust = 2, show.legend = FALSE)
p <- p + facet_wrap(~ cut, ncol = 5) + xlim(c(-4,4)) + ylab("") + xlab(expression(zeta))
p <- p + stat_ecdf(aes(linetype = interval))
p <- p + theme_bw() + theme(legend.key = element_blank())
p <- p + theme(text = element_text(size = 15))
p <- p + theme(axis.text.x = element_text(size = 10, color = "black"))
p <- p + theme(axis.text.y = element_text(color = "black"))
p <- p + scale_linetype_manual(values = c(6,1), name = "Interval")

plot(p)

## Figure 5

Posterior probabilities, conditional on sum score ($$\tilde{y}$$), of $$\zeta$$ falling into one of four consecutive intervals defined as $$R_1 = \{\zeta | \zeta < Q_1\}$$, $$R_2 = \{\zeta|Q_1 < \zeta < Q_2\}$$, $$R_3 = \{\zeta|Q_2 < \zeta < Q_3\}$$, and $$R_4 = \{\zeta|\zeta > Q_4\}$$ where $$Q_1$$, $$Q_2$$, and $$Q_3$$ are the quartiles of the prior distribution of $$\zeta$$.

zeta <- vector("list", length = 6)
for (i in 1:6) {
y <- patterns(5, 2, i-1)
zeta[[i]] <- postsamp(fmodel3pl, y, apar = alph, bpar = beta, cpar = gamm,
control = list(nbatch = samp + burn))
zeta[[i]] <- zeta[[i]]$batch[(burn + 1):(samp + burn)] zeta[[i]] <- table(cut(zeta[[i]], c(-Inf, qnorm(c(0.25,0.5,0.75)), Inf))) } zeta <- data.frame(zeta = unlist(zeta)) zeta$quartile = factor(rep(1:4, 6))
levels(zeta$quartile) <- c("First","Second","Third","Fourth") zeta$zeta <- zeta$zeta/samp * 100 zeta$sum <- factor(rep(0:5, each = 4))
zeta$text <- paste(round(zeta$zeta, 1), "%", sep = "")
zeta$side <- factor(ifelse(zeta$zeta > 50, 1, 0))

p <- ggplot(zeta, aes(x = sum, y = quartile, fill = zeta)) + geom_tile()
p <- p + scale_fill_gradient(name = "Probability (%)", low = grey(0.9),
high = grey(0.1), breaks = c(0, 50, 100), limits = c(0, 100))
p <- p + geom_text(aes(label = text, color = side))
p <- p + scale_color_manual(values = c("black","white"), guide = FALSE)
p <- p + theme_bw() + coord_fixed()
p <- p + theme(text = element_text(size = 10))
p <- p + theme(axis.text = element_text(color = "black"))
p <- p + ylab("") + xlab("Sum")

plot(p)

## Figure 6

Posterior probabilities of the form $$P(\zeta_A > \zeta_B|\tilde{Y}_A = \tilde{y}_A, \tilde{Y}_B = \tilde{y}_B)$$ — i.e., the posterior probability that examinee A has a larger latent score than examinee B given their sum scores.

prob <- matrix(NA, 6, 6)
for (j in 1:6) {
for (i in j:6) {
if (i == j) {
prob[i,j] <- 0.5
}
else {
zeta1 <- postsamp(fmodel3pl, patterns(5, 2, i-1),
apar = alph, bpar = beta, cpar = gamm,
control = list(nbatch = samp + burn))
zeta2 <- postsamp(fmodel3pl, patterns(5, 2, j-1),
apar = alph, bpar = beta, cpar = gamm,
control = list(nbatch = samp + burn))
zeta1 <- zeta1$batch[(burn + 1):(samp + burn)] zeta2 <- zeta2$batch[(burn + 1):(samp + burn)]
prob[i,j] <- mean(zeta1 > zeta2)
prob[j,i] <- 1 - prob[i,j]
}
}
}
prob <- data.frame(p = as.vector(prob), sumr = factor(rep(0:5, 6)),
sumc = factor(rep(0:5, each = 6)))
prob$p <- prob$p * 100
prob$text <- paste(round(prob$p, 1), "%", sep = "")
prob$side <- factor(ifelse(prob$p > 50, 1, 0))

p <- ggplot(prob, aes(x = sumc, y = sumr, fill = p)) + geom_tile()
p <- p + scale_fill_gradient(name = "Probability (%)", low = grey(0.9),
high = grey(0.1), breaks = c(0, 50, 100), limits = c(0, 100))
p <- p + geom_text(aes(label = text, color = side))
p <- p + scale_color_manual(values = c("black","white"), guide = FALSE)
p <- p + theme_bw() + coord_fixed()
p <- p + theme(text = element_text(size = 10))
p <- p + theme(axis.text = element_text(color = "black"))
p <- p + ylab("Examinee A Sum") + xlab("Examinee B Sum")

plot(p)

## Figure 7

Profiles of the unnormalized log-posterior distribution of $$\zeta$$ given $$\tilde{Y} = 0, 1, \dots, 5$$ with normal and improper uniform prior distributions.

post <- vector("list", length = 6)
for (i in 1:6) {
y <- patterns(5, 2, i-1)
post[[i]] <- posttrace(fmodel3pl, y, apar = alph, bpar = beta, cpar = gamm,
zmin = -5, zmax = 5, length = 100)
post[[i]] <- cbind(post[[i]]$zeta, post[[i]]$post)
}
post <- data.frame(do.call("rbind", post))
names(post) <- c("zeta","post")
post$sum <- factor(rep(0:5, each = 100)) post.norm <- post post.norm$prior <- "normal"

post <- vector("list", length = 6)
for (i in 1:6) {
y <- patterns(5, 2, i-1)
post[[i]] <- posttrace(fmodel3pl, y, apar = alph, bpar = beta, cpar = gamm,
zmin = -5, zmax = 5, prior = function(z) 1, length = 100)
post[[i]] <- cbind(post[[i]]$zeta, post[[i]]$post)
}
post <- data.frame(do.call("rbind", post))
names(post) <- c("zeta","post")
post$sum <- factor(rep(0:5, each = 100)) post.unif <- post post.unif$prior = "uniform"

post <- rbind(post.norm, post.unif)

names(post)[4] <- "Prior"

p <- ggplot(post, aes(x = zeta, y = post, linetype = Prior))
p <- p + geom_line() + facet_wrap(~ sum)
p <- p + xlab(expression(zeta)) + ylab("Log-Posterior Density")
p <- p + theme_bw() + theme(legend.key = element_blank())
p <- p + theme(text = element_text(size = 15))
p <- p + theme(axis.text.x = element_text(size = 10, color = "black"))
p <- p + theme(axis.text.y = element_text(color = "black"))

plot(p)

## Figure 8

Profile likelihoods for $$\zeta$$ given $$\tilde{Y} = 2,3,4$$ with maximum likelihood estimates and profile likelihood confidence intervals.

post <- post[post$Prior == "uniform",] post <- post[post$sum %in% c(2,3,4),]

y2 <- profileci(fmodel3pl, patterns(5, 2, 2), apar = alph,
bpar = beta, cpar = gamm, lower = FALSE)
y3 <- profileci(fmodel3pl, patterns(5, 2, 3), apar = alph,
bpar = beta, cpar = gamm)
y4 <- profileci(fmodel3pl, patterns(5, 2, 4), apar = alph,
bpar = beta, cpar = gamm)

names(post)[3] <- "Sum"

p <- ggplot(post, aes(x = zeta, y = post, linetype = Sum))
p <- p + geom_line() + ylim(c(-14,0))
p <- p + xlab(expression(zeta)) + ylab("Log-Posterior Density")
p <- p + theme_bw() + theme(legend.key = element_blank())
p <- p + theme(text = element_text(size = 20))
p <- p + theme(axis.text.x = element_text(size = 15, color = "black"))
p <- p + theme(axis.text.y = element_text(color = "black"))
p <- p + annotate("segment", x = y2$zeta, xend = y2$zeta,
y = -14, yend = y2$post, linetype = 1) p <- p + annotate("segment", x = y2$upper, xend = y2$upper, y = -14, yend = y2$f.upper, linetype = 1)
p <- p + annotate("segment", x = y3$zeta, xend = y3$zeta,
y = -14, yend = y3$post, linetype = 3) p <- p + annotate("segment", x = y3$lower, xend = y3$lower, y = -14, yend = y3$f.lower, linetype = 3)
p <- p + annotate("segment", x = y3$upper, xend = y3$upper,
y = -14, yend = y3$f.upper, linetype = 3) p <- p + annotate("segment", x = y4$zeta, xend = y4$zeta, y = -14, yend = y4$post, linetype = 2)
p <- p + annotate("segment", x = y4$lower, xend = y4$lower,
y = -14, yend = y4$f.lower, linetype = 2) p <- p + annotate("segment", x = y4$upper, xend = y4$upper, y = -14, yend = y4$f.upper, linetype = 2)

plot(p)

## Figure 9

Item information functions for five items of a 3-parameter binary logistic model.

zeta <- seq(-3, 3, length = 100)
info <- matrix(NA, 100, 5)
for (j in 1:100) {
info[j,] <- information(fmodel3pl, c(0,1,1,1,1), zeta = zeta[j],
apar = alph, bpar = beta, cpar = gamm)$item } matplot(zeta, info, type = "l", ylab = "Information", bty = "n", xlab = expression(zeta)) legend(-3, 0.3, paste("Item", 1:5), lty = 1:5, col = 1:5) ## Figure 11 Posterior distribution of $$\zeta$$ for the hyperbolic cosine model given $$\tilde{y} = 1$$ (a), $$y' = (1,0,0,0,0)$$ (b), $$y' = (1,1,1,0,0)$$ ©, and $$y' = (0,1,1,1,0)$$ (d). fmodelhcm <- function(zeta, y, alph, beta, prior = dnorm, ...) { if (is.vector(y)) y <- matrix(y, 1, length(y)) m <- ncol(y) n <- nrow(y) prob <- matrix(NA, m, 2) yprb <- matrix(NA, n, m) prob[,1] <- 2*cosh(zeta - beta) prob[,2] <- exp(alph) prob <- sweep(prob, 1, apply(prob, 1, sum), "/") for (i in 1:n) { yprb[i,] <- prob[row(prob) == 1:m & col(prob) == y[i,] + 1] } return(list(post = log(sum(apply(yprb, 1, prod))) + log(prior(zeta, ...)), prob = prob)) } dmixnorm <- function(z, pi, m1, m2, s1, s2) { return(pi*dnorm(z, m1, s1) + (1 - pi)*dnorm(z, m2, s2)) } apar <- c(1,1,1,1,1) bpar <- c(-2,-1,0,1,2) n <- 10000 tmp.1 <- postsamp(fmodelhcm, patterns(5,2,1), alph = apar, beta = bpar, prior = dmixnorm, m1 = -2, m2 = 2, s1 = 2, s2 = 2, pi = 0.5, control = list(nbatch = n)) tmp.2 <- postsamp(fmodelhcm, c(1,0,0,0,0), alph = apar, beta = bpar, prior = dmixnorm, m1 = -2, m2 = 2, s1 = 2, s2 = 2, pi = 0.5, control = list(nbatch = n)) tmp.3 <- postsamp(fmodelhcm, c(1,1,1,0,0), alph = apar, beta = bpar, prior = dmixnorm, m1 = -2, m2 = 2, s1 = 2, s2 = 2, pi = 0.5, control = list(nbatch = n)) tmp.4 <- postsamp(fmodelhcm, c(0,1,1,1,0), alph = apar, beta = bpar, prior = dmixnorm, m1 = -2, m2 = 2, s1 = 2, s2 = 2, pi = 0.5, control = list(nbatch = n)) tmp <- data.frame(zeta = c(tmp.1$batch, tmp.2$batch, tmp.3$batch, tmp.4$batch), Pattern = rep(letters[1:4], times = c(n,n,n,n*5))) p <- ggplot(tmp, aes(x = zeta, color = Pattern, linetype = Pattern)) p <- p + geom_density(adjust = 2) p <- p + theme_bw() + theme(legend.key = element_blank()) p <- p + theme(text = element_text(size = 20)) p <- p + theme(axis.text.x = element_text(size = 15, color = "black")) p <- p + theme(axis.text.y = element_text(color = "black")) p <- p + xlab(expression(zeta)) + ylab("Posterior Density") plot(p) ## Figure 12 Item and test Fisher information functions for five items of a hyperbolic cosine model. zeta <- seq(-3, 3, length = 100) info <- matrix(NA, 100, 5+1) for (j in 1:100) { tmp <- information(fmodelhcm, c(0,0,0,0,0), zeta = zeta[j], alph = apar, beta = bpar) info[j,] <- c(tmp$test, tmp\$item)
}

info <- data.frame(cbind(zeta, info))
names(info) <- c("zeta","test",paste("item", 1:5))
info <- melt(info, id.vars = "zeta", variable.name = "Type", value.name = "information")

p <- ggplot(info, aes(x = zeta, y = information, color = Type, linetype = Type))
p <- p + geom_line() + ylab("Information") + xlab(expression(zeta))
p <- p + theme_bw() + theme(legend.key = element_blank())
p <- p + theme(text = element_text(size = 15))
p <- p + theme(axis.text = element_text(size = 15))

plot(p)

## References

Johnson, T. R. & Kuhn, K. M. (2015). Simulation-Based Bayesian Inference for Latent Traits of Item Response Models: Introduction to the ltbayes Package for R. Behavior Research Methods, 47, 1309–1327.