R Scripts for graphing

Two parameter model: Continuous predictor

valuesToPredict = seq(min(d$education), max(d$education), length = 100) #if 100 doesn’t work, change to range of the variable
dIncome <- data.frame(education=valuesToPredict, prestige = mean(d$prestige), women = mean(d$women))
dIncome <- modelPredictions(m3, dIncome)

scatterplot <- ggplot() +
geom_point(data=d, aes(x = education, y = income), color = “black”) +
geom_smooth(aes(ymin = CILo, ymax = CIHi, x = education, y = Predicted),
data = dIncome, stat = “identity”, color=”red”) +
theme_bw(base_size = 14) +
scale_x_continuous(“Education”, breaks = seq(6, 16, by=2)) + # x-axis label and tick mark location. Use scale_x_continuous() because x is specified as a continuous variable in default aes settings
scale_y_continuous(“Income”, # y-axis label and tick mark location. Use scale_y_continuous() because y is specified as continuous variable in default aes settings
breaks = seq(600, 26000, by=2000)) +
labs(title = ‘Effect of Education on Income’)
scatterplot


 

Two parameter model: Dichotomous Predictor (error bars + raw data)

pYA <- data.frame(Condition = c(0, 1))
pYA <- modelPredictions(mA, pYA)
head(pYA)

# make a Condition variable that has a descriptive name for labels
d$ConditionStr=varRecode(d$Condition,c(0,1),c(‘Control’,’Training’))
pYA$ConditionStr=varRecode(pYA$Condition,c(0,1),c(‘Control’,’Training’))

# make a new variable from the “Predicted” value in pY so it matches the label in the data frame:
pYA$Wk4IAT = pYA$Predicted

barplot <- ggplot(d, aes(x = ConditionStr, y = Predicted)) +
geom_bar(mapping = aes(fill = as.factor(ConditionStr)),
data = pYA,
stat = “identity”,
width = 0.5) +
geom_errorbar(data = pYA, width=.25,
aes(y = Predicted, x = ConditionStr,
ymin = CILo,
ymax = CIHi), stat=”identity”, width=0.75) +
geom_point(data = d, aes(y = Wk4IAT, x = ConditionStr),colour=’darkgrey’,
position = position_jitter(w = 0.1, h = 0.1)) +
labs(y = ‘IAT Scores at Week 4’, x = ‘Condition’) +
ggtitle(‘Week 4 IAT by Condition’) +
theme_bw(base_size = 14) +
theme(legend.position=”none”)
barplot


Multiple regression: controlling for dichotomous variable

ConcernToPredict = seq(1,10, length = 200) # why did we use 1 and 10?
dNew <- expand.grid(ConcernM=ConcernToPredict, Sex=c(0, 1))
pY <- modelPredictions(m3, dNew)

head(pY)
tail(pY)
#CILo & CIHi are one standard error (+-1) confidence interval
#Predicted +- SE = CILo & CIHi –> error bands based on standard error

scatterplot <- ggplot(data=pY, aes(x = ConcernM, y = Predicted, color = as.factor(Sex))) +
geom_point(data=d, aes(x = ConcernM, y = Wk4IAT, color = as.factor(Sex))) +
geom_smooth(aes(ymin = CILo, ymax = CIHi),
data = pY, stat = “identity”, formula = y ~ x) +
labs(x = ‘Concern (Mean)’, # indicate a label for the x-axis
y = ‘Week 4 IAT Score’, title = ‘Effect of Concern on IAT Scores’) +
scale_color_discrete(name =”Gender”,
labels=c(“Female”, “Male”)) + # changing labels of legend. Use scale_color_discrete() because Condition is specifed as a factor (i.e., discrete variable) that’s set to color in default aes settings
theme_bw(base_size = 14) + # removing background of graph
theme(legend.position = c(0.8, 0.9), # positioning legend (play around with values)
legend.background = element_blank()) # removing background of legend
scatterplot


Scatter plot for interaction of 2 predictors (1 continusous, 1 dichotomous) – 2 regression lines

# Make dichotomous categories into strings
d$ConditionStr = varRecode(d$Condition, Old=c(0,1), New=c(‘Observation’, ‘Collaboration’))

# Model with string dichotomous variable
mIntPlot <- lm(Inferences ~ ConditionStr * Age, data=d)
modelSummary(mIntPlot)

# predict data from the model separately by group
yHatObs <- data.frame(Age=seq(min(d$Age),max(d$Age)),ConditionStr=’Observation’) # make a new data frame predictor set to a range of values from the min to max with >= length of actual data
yHatObs <- modelPredictions(mIntPlot, yHatObs) # predict values for the DV based on those values of the IV
head(yHatObs) # check out the results of what you did above and make sure it makes sense

yHatColl <- data.frame(Age=seq(min(d$Age),max(d$Age)),ConditionStr=’Collaboration’) # make a new data frame predictor set to a range of values from the min to max with >= length of actual data
yHatColl <- modelPredictions(mIntPlot, yHatColl) # predict values for the DV based on those values of the IV
head(yHatColl) # check out the results of what you did above and make sure it makes sense
scatterplot <- ggplot(data=d, aes(x = Age, y = Inferences)) +
geom_point(aes(color=ConditionStr, shape=ConditionStr)) + # set color AND shape by group
scale_colour_brewer(palette=”Set1″) + # use this function to change the color pallette; see help for options (or google it)
geom_smooth(aes(ymin = CILo, ymax = CIHi, x = Age, y = Predicted), # the predicted regression line for observation condition
data = yHatObs, stat = “identity”, color=”blue”) + # set data frame for observation group
geom_smooth(aes(ymin = CILo, ymax = CIHi, x = Age, y = Predicted), # the predicted regression line for collaboration condition
data = yHatColl, stat = “identity”, color=”red”) + # set data frame for collaboration group
theme_bw(base_size = 14) +
labs(x = ‘Age (months)’, # indicate a label for the x-axis
y = ‘Inferences (%)’) +
coord_cartesian(ylim = c(0,100), xlim = c(35,64)) + # specify the range of axes
theme(legend.position = c(.2,.93), # positioning legend (play around with values; (0,0) is bottom left, and (1,1) is top right)
legend.background = element_blank(), # removing background of legend
legend.title = element_blank()) # removing title of legend
scatterplot


Bar plot: Two categorical IVs Interaction

# Recode from numeric to actual names
d$SexStr <- varRecode(d$sexC, c(-0.5, 0.5), c(“Male”,”Female”))
d$ConditionStr <- varRecode(d$conditionC, c(-0.5, 0.5), c(“Deprived”,”Nondeprived”))

# Make the strings factor
d$SexStr=as.factor(d$SexStr)
d$ConditionStr=as.factor(d$ConditionStr)

# Re-run the model with IVs as factors and make sure that everything looks fine
mInterPlot <- lm(anxSTL ~ ConditionStr + SexStr + ConditionStr:SexStr, data=d)
modelSummary(mInterPlot)
# Yhat predictions for each ‘condition’ separately:

YhatDeprived <- data.frame(ConditionStr = ‘Deprived’, SexStr = c(‘Male’,’Female’)) # make a new data frame predictor set at the 2 levels
YhatDeprived <- modelPredictions(mInterPlot, YhatDeprived) # predict values for the DV based on those levels of the IV
YhatDeprived # check out the results of what you did above and make sure it makes sense

YhatNonDeprived <- data.frame(ConditionStr = ‘Nondeprived’, SexStr = c(‘Male’,’Female’)) # make a new data frame predictor set at the 2 levels
YhatNonDeprived <- modelPredictions(mInterPlot, YhatNonDeprived) # predict values for the DV based on those levels of the IV
YhatNonDeprived # check out the results of what you did above and make sure it makes sense

Yhats <- rbind(YhatDeprived,YhatNonDeprived) # Combine Yhats for each ‘condition’

# Graph

BarPlot=ggplot(Yhats, aes(x = ConditionStr, y = Predicted, fill = SexStr))+
geom_bar(stat=’identity’, position=position_dodge(.9))+
geom_errorbar(aes(ymax=CIHi, ymin=CILo), position=position_dodge(.9), width=0.25)+
theme_bw()+
theme(legend.title=element_blank(),
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
panel.border=element_blank(),
axis.line=element_line(),
text=element_text(family=’Times’))+
ylab(‘Startle Response’) + # Y-axis title
xlab(‘Nicotine Deprivation’) + # X-axis title
scale_fill_grey() # Grayscale automatic colors

BarPlot


 

Interaction model with continuous variables (+1 SD, -1 SD)

# predict data for Exercise set at +1 and -1 S.D. on Age with error bands

varDescribe(d)
d$ExerciseF = if(d$Exercise < 16.06, “-1 SD”, NA)
d$ExerciseF = ifelse(d$Exercise > 43.70, “+1 SD”, NA)

mplot <- lm(Health ~ Age + Exercise + Age:Exercise, data=d)
modelSummary(mplot)

yHatm1sdU <- data.frame(Age=seq(min(d$Age),max(d$Age)),Exercise=16.06) # make a new data frame predictor set to a range of values from the min to max with >= length of actual data
yHatm1sdU <- modelPredictions(mplot, yHatm1sdU) # predict values for the DV based on those values of the IV
yHatm1sdU$ExerciseF = “-1 SD”

yHatp1sdU <- data.frame(Age=seq(min(d$Age),max(d$Age)),Exercise=43.70) # make a new data frame predictor set to a range of values from the min to max with >= length of actual data
yHatp1sdU <- modelPredictions(mplot, yHatp1sdU) # predict values for the DV based on those values of the IV
yHatp1sdU$ExerciseF = “+1 SD”

scatterplot <- ggplot(data=d, aes(x = Age, y = Health)) +
geom_point() + # set color AND shape by group
geom_smooth(aes(ymin = CILo, ymax = CIHi, x = Age, y = Predicted, color=ExerciseF), # the predicted regression line
data = yHatp1sdU, stat = “identity”) +
geom_smooth(aes(ymin = CILo, ymax = CIHi, x = Age, y = Predicted, color=ExerciseF), # the predicted regression line
data = yHatm1sdU, stat = “identity”) + # set data frame for blue collar group
scale_color_discrete(name=’Exercise’) +
theme_bw(base_size = 14)
scatterplot


Three-way interaction: continuous variables

# Recode Gender as factor
d$GenderF <- as.factor(varRecode(d$Gender, c(0,1), c(“Girls”,”Boys”)))

# Model for plotting
mInter = lm(Achieve ~ Hours * GenderF * Ready, data=d)
modelSummary(mInter)

# Prepare independent variables for ggplot
HoursToPredict = seq(min(d$Hours), max(d$Hours), length = 120)
Ready_lo <- mean(d$Ready) – sd(d$Ready)
Ready_hi <- mean(d$Ready) + sd(d$Ready)

# Use modelPredictions() to generate Y-hats
yHats <- expand.grid(Hours = HoursToPredict, Ready = c(Ready_lo, Ready_hi), GenderF=c(“Girls”,”Boys”)) # all IVs
yHats <- modelPredictions(mInter, yHats)
# Starting plot in which we graph regression lines
modelplot <- ggplot() +
geom_point(data=d, aes(x = Hours, y = Achieve), color = “black”) +
geom_smooth(aes(ymin = CILo, ymax = CIHi, x = Hours, y = Predicted,
color=as.factor(Ready), group = as.factor(Ready), lineGender=as.factor(Ready)),
# you can put in interactions in the color labeling!!! i.e. colour = orientation:direction
data = yHats, stat = “identity”) +
facet_wrap(“GenderF”) + # creates two panels based on perceived efficacy label
# Note: the facet_wrap function creates two panels in a single window in this case, because
# each predicted value corresponds to one of two values for eff, the mean + 1 SD or the mean – 1 SD.
scale_x_continuous(“Hours”, breaks = seq(0, 60, by=10)) + # x-axis label and tick mark location
scale_y_continuous(“Achieve”, # y-axis label and tick mark location
breaks = seq(0, 75, by=10)) +

scale_linetype_discrete(name =”Readiness”,
labels=c(“- 1SD”, “+ 1SD”)) + # changing labels of legend.
scale_color_discrete(name =”Readiness”,
labels=c(“- 1SD”, “+ 1SD”)) + # changing labels of legend.
theme_bw(base_size = 14) + # removing background of graph
theme(legend.position = c(0.92, 0.13), # positioning legend (play around with values)
legend.background = element_blank()) # removing background of legend

modelplot


Polynomial Regression (with regions of significance)

Yhat = seq(min(d$Years), max(d$Years), length = 60)
Yhat <- data.frame(Years=Yhat)
Yhat <- modelPredictions(mPoly, Yhat)

library(ggplot2)

scatterplot <- ggplot() +
geom_point(data=d, aes(x = Years, y = Productivity), color = “black”) + # RAw adjusted Data
geom_smooth(aes(ymin = CILo, ymax = CIHi, x = Years, y = Predicted), #model data
data = Yhat, stat = “identity”, color=”red”) +

theme_bw(base_size = 14) +

scale_x_continuous(“Years since PhD”, breaks = seq(0, 60, by=10)) + # x-axis label and tick mark location
scale_y_continuous(“Productivity”, # y-axis label and tick mark location
breaks = seq(-2, 4, by=0.5))

scatterplot
############################################################################################
# Regions of significance: This is the range of x values for which the slope is significant.
############################################################################################

# You’ll need the model covariance matrix:
vcov(mPoly) # Notice that I’m using the same model that went into ggplot

# You’ll have to calculate the coefficient variances.
# (The squared SEs for each parameter estimate)
# b0:
b0var = (0.1304008)^2
b1var = (0.0120446)^2
b3var = (0.0002392)^2

# Then plug all the information into: http://quantpsy.org/interact/mlr2.htm

# For an interactive model, you have the following parameters to plug into the web app:
# b0, b1, b2, b3 …. from: Yhat = b0 + b1 x + b2 z + b3 x z

# For a quadratic model, you do not have a b2, hence you plug zeros into the web app:
# b0, b1, 0, b3 ….. from Yhat = b0 + b1 x + b3 x^2
# Do it and see what you get.
# Is the whole range of x values englobing the data significant?

# Double Check the web input
# File: Web Input.png

# Double check the web output
# File: Quadratic Model & Regions of Significance.txt
# Redo the graph above, and shade to the area of significance

scatterplot <- ggplot() +
geom_point(data=d, aes(x = Years, y = Productivity), color = “black”) + # RAw adjusted Data
geom_smooth(aes(ymin = CILo, ymax = CIHi, x = Years, y = Predicted), #model data
data = Yhat, stat = “identity”, color=”red”) +

theme_bw(base_size = 14) +

scale_x_continuous(“Years since PhD”, breaks = seq(0, 60, by=10)) + # x-axis label and tick mark location
scale_y_continuous(“Productivity”, # y-axis label and tick mark location
breaks = seq(-2, 4, by=0.5)) +
geom_rect(aes(xmin=0.01756, xmax=59.9825, ymin=-Inf, ymax=Inf, alpha=0.2))
# alpha = transparency
# min & max for shading the canvas.

scatterplot