### functions for brute force AMOC changepoint / breakpoint modeling
## Laurel Brehm, Summer 2015
#######Basics
## Model fitting procedure for 'at most one change' with two discontinuous line segements
## Used to distinguish predictor category structure (discrete or gradient) for logistic and linear regression.
## Discrete models will be piecewise with one change, leading to big difference in intercept and small difference in slope on each side.
## Gradient models will not be piecewise: One line segment should have the highest likelihood. This can be confirmed with CIs for the sides of the model: Even the very best piecewise model taken from a gradient predictor should have no difference in intercept between sides.
## Citation for this analysis technique: Brehm & Goldrick (in prep). Distinguishing discrete and gradient category structure in language.
## Piecewise model fitting is based upon Crawley's R Book. (https://archive.org/details/TheRBook)
######Functions
## RunAMOC function: Fits a piecewise model on top of an LMER or GLM. Function fits a series of nested models varying on where a break is specified in a predictor. Breaks are found by grid search within an interval specified by the user. Function finds the breakpoint model with the maximum likelihood.
## adjABIC function: Adjusts degrees of freedom for breakpoint model to use AIC and BIC tests, and also performs likelihood testing with chi-squared.
## labeledSummary: outputs a slightly-prettier summary table for a piecewise model
runAMOC <- function(cmodel,breakfactor,onset,offset){
## cmodel = a zero-break comparison regression model. any type of regression model should work: I've tested on lmer and glmer.
## breakfactor = the factor to be searched for one breakpoint. input as *text*, with quotes around it
## onset = starting quantile for grid search
## offset = ending quantile for grid search
##
#extract the formula and data from comparison model
cFormula <- attributes(cmodel)$call
dataF <- attributes(cmodel)$frame
#extract the levels of the broken factor
#note that more variability on values of broken factor will lead to finer search resolution
cq <- quantile(dataF[,breakfactor],probs=c(onset,offset))
l2 <- dataF[,breakfactor]
cqs <- l2[ l2 > cq[1] & l2 < cq[2]]
cqs <- levels(as.factor(cqs))
breakTests <- length(cqs)
cp <- list()
logLs <- list()
warns <- list()
for (i in 1:breakTests){
bL <- eval(cqs[i])
bFormula <- cFormula
bFormula[2] <- sub(breakfactor,paste(breakfactor,'*((',breakfactor,' < ',bL,') + (',breakfactor,' >=',bL,'))',""),bFormula[2])
cp[[i]]<- list(eval(bFormula))
logLs[[i]]<- logLik(cp[[i]][[1]])
xt <- cp[[i]][[1]]@optinfo$conv$lme4$code
warns[[i]]<- ifelse(length(xt)==0,0,xt)
}
out1 <- rbind(cqs,unlist(logLs),unlist(warns),abs(rank(unlist(logLs))-(breakTests +1)))
row.names(out1)<- c(paste(breakfactor,'Level',""),'LogLik','Warnings','ModelRank')
breakP <- out1[1,which.max(out1[2,])]
outb <- out1[,which(warns==0)]
if (length(outb) <= 4){
breakNW <- outb[1]}
else {
breakNW <- outb[1,which.max(outb[2,])] }
outFormula <- cFormula
outFormula[2] <- sub(breakfactor,paste(breakfactor,'*((',breakfactor,' < ',breakP,') + (',breakfactor,' >= ',breakP,'))',""),cFormula[2])
outNWFormula <- cFormula
outNWFormula[2] <- sub(breakfactor,paste(breakfactor,'*((',breakfactor,' < ',breakNW,') + (',breakfactor,' >= ',breakNW,'))',""),cFormula[2])
out <- list(out1,breakP,outFormula,breakNW,outNWFormula)
names(out) <- c("Piecewise Models","Break Point: Max LL","Best LL Model Formula","Break Point: No warnings","Best No Warnings Model Formula")
out
}
## Convenience function to make model outputs easier to read
## Sorts piecewise output into overall effects (general intercepts/slopes), piecewise adjustments to intercepts (in right half of model), and piecewise adjustements to slopes (in right half of model)
## optional flag: add in confidence intervals that are calculated from model profile
## This can be slow, so it defaults to false
labeledSummary <- function(model,breakfactor,getCI=FALSE){
#model = piecewise model
#breakfactor = variable that takes a breakpoint in piecewise model
intBreak <- paste(breakfactor, " <",sep='')
slopeBreak <- paste(breakfactor,":",breakfactor," <",sep='')
cfm <- round(coef(summary(model)),4)[,1:3]
lbs <- dimnames(cfm)[[1]]
ints <- matrix(nrow=length(lbs),ncol=2)
for (i in 1:length(lbs)){
ints[i,1] <- as.numeric(grepl(intBreak,lbs[i])) + as.numeric(grepl(slopeBreak,lbs[i])) + 1
ints[i,2] <- lbs[i]
}
cfm <- cbind(ints[,1],cfm)
if (getCI==FALSE){
labs=rbind(c(0," "," "," "),c(1," "," "," "),c(2," "," "," "))
row.names(labs)=c(" Overall Effects"," Intercept Adjustments"," Slope Adjustments")
cfm <- rbind(cfm,labs)
cfm <- cfm[order(cfm[,1]),]
cfm <- as.table(cfm[,2:4])
print('Random Effects')
print(vc <- VarCorr(model), comp = c("Variance", "Std.Dev."))
print('Fixed Effects')
print(cfm)
}
else {
cis <- round(confint(model),4)
colnames(cis)[1:2]=c('2.5% CI','97.5% CI')
cfm <- cbind(cfm,cis[4:dim(cis)[1],])
labs= rbind(c(0," "," "," "," "," "),c(1," "," "," "," "," "),c(2," "," "," "," "," "))
row.names(labs)=c(" Overall Effects"," Intercept Adjustments"," Slope Adjustments")
cfm <- rbind(cfm,labs)
cfm <- cfm[order(cfm[,1]),]
cfm <- as.table(cfm[,c(2:6)])
print('Random Effects')
print(vc <- VarCorr(model), comp = c("Variance", "Std.Dev."))
print('Fixed Effects')
print(cfm)
}
}
## AIC and BIC adjustment for change point models, with likelihood ratio test
adjABIC <- function(modelBreak,modelComparison,nBreaks=1){
#modelBreak = model with a breakpoint
#modelComparison = simpler comparison model
#number of breaks = number of extra parameters to add for penalty
if( attributes(modelBreak)[11]$devcomp$dims[12] != 0)
warning("Fit model with REML = F")
oLogLik <- logLik(modelBreak)
oPars <- as.numeric(attributes(oLogLik)[3])
nObs <- as.numeric(attributes(oLogLik)[1])
oAIC <- AIC(modelBreak)
oBIC <- BIC(modelBreak)
aPars <- oPars + nBreaks
aAIC <- as.numeric(2*aPars - 2*oLogLik)
aBIC <- as.numeric(aPars*log(nObs)-2*oLogLik)
cLogLik <- logLik(modelComparison)
cPars <- as.numeric(attributes(cLogLik)[3])
cAIC <- AIC(modelComparison)
cBIC <- BIC(modelComparison)
dPars <- aPars - cPars
dBIC <- cBIC - aBIC
llratio <- as.numeric(2*(oLogLik-cLogLik))
llP <- 1 - pchisq(llratio,dPars)
xxx <- " "
out <- rbind(c(aPars,round(oLogLik,2),round(aAIC,2),round(aBIC,2),xxx,xxx,xxx,xxx),c(cPars,round(cLogLik,2),round(cAIC,2),round(cBIC,2),round(dBIC,2),round(llratio,2),dPars,round(llP,4)))
colnames(out)<- c("NumPars","LogLik","AIC","BIC","deltaBIC","LLRatio","ChiDF","Pr(>Chisq)")
rownames(out)<- c("BrokenAdjusted","NoBreakComparison")
as.data.frame(out)
}