SDQ Predictive algoritm in R
Adapted from syntax available at the SDQ website.
The predictive algorithm is based on up to twelve input variables:
phyper = SDQ hyperactivity score from parent SDQ
thyper = SDQ hyperactivity score from teacher SDQ
shyper = SDQ hyperactivity score from self-report SDQ
pconduct = SDQ conduct problems score from parent SDQ
tconduct = SDQ conduct problems score from teacher SDQ
sconduct = SDQ conduct problems score from self-report SDQ
pemotion = SDQ emotional symptoms score from parent SDQ
temotion = SDQ emotional symptoms score from teacher SDQ
semotion = SDQ emotional symptoms score from self-report SDQ
pimpact = SDQ impact score from parent SDQ
timpact = SDQ impact score from teacher SDQ
simpact = SDQ impact score from self-report SDQ
The algorithm generates four output variables:
sdqed = prediction of an emotional disorder
(0 = unlikely, 1 = possible, 2 = probable)
sdqcd = prediction of a conduct disorder
(0 = unlikely, 1 = possible, 2 = probable)
sdqhk = prediction of a hyperactivity disorder
(0 = unlikely, 1 = possible, 2 = probable)
anydiag = prediction of any psychiatric disorder
(0 = unlikely, 1 = possible, 2 = probable)
Load required libraries
Note that the current scoring syntax is based on dplyr
1 which must be installed. I also recommend installing haven
2 to read SPSS files into R
. Use the links to read more about dplyr and haven.
library(dplyr)
library(haven)
Load your data into R
The syntax is based on you reading you data into an object named sdq
. This could be done with the command:
sdq <- read_sav("sdq.sav")
Note that read_sav
is from haven
and will fail if you have not installed it. Alternatively, you could use the package foreign
, but this code is only tested with haven
.
SDQ HYPERACTIVITY PREDICTION
sdq <- sdq %>%
mutate(
phk = case_when(
phyper>=7 & pimpact>=2 ~ 2,
phyper>=9 & pimpact>=1 ~ 2,
phyper>=6 & pimpact>=1 ~ 1,
phyper>=0 & pimpact>=0 ~ 0)
)
sdq <- sdq %>%
mutate(
shk = case_when(
shyper>=7 & simpact>=2 ~ 2,
shyper>=6 & simpact>=1 ~ 1,
shyper>=0 & simpact>=0 ~ 0)
)
sdq <- sdq %>%
mutate(
thk = case_when(
thyper>=7 & timpact>=2 ~ 2,
thyper>=6 & timpact>=1 ~ 1,
thyper>=0 & timpact>=0 ~ 0))
sdq$pshk <- sdq$phk
sdq$pshk <- ifelse(is.na(sdq$phk), sdq$shk, sdq$pshk)
sdq <- sdq %>%
mutate(
sdqhk = case_when(
pshk==2 & thk>=1 ~ 2,
pshk>=1 & thk>=1 ~ 1,
is.na(pshk) & thk>=1 ~ 1,
pshk>=1 & !is.na(pshk) & is.na(thk) ~ 1,
pshk==2 | thk==2 ~ 1,
phk>=0 | shk>=0 | thk>=0 ~ 0)
)
SDQ CONDUCT PREDICTION
sdq <- sdq %>%
mutate(
sdqcd = case_when(
is.na(pimpact) & is.na(timpact) & is.na(simpact) ~ NA_real_,
pconduct>=5 & pimpact>=2 ~ 2,
tconduct>=4 & timpact>=2 ~ 2,
sconduct>=6 & simpact>=2 ~ 2,
pconduct>=4 | tconduct>=3 | sconduct>=5 ~ 1,
pconduct>=0 | tconduct>=0 | sconduct>=0 ~ 0)
)
SDQ EMOTION PREDICTION
sdq <- sdq %>%
mutate(
sdqed = case_when(
semotion>=6 & simpact>=1 ~ 1,
pemotion>=5 & pimpact>=1 ~ 1,
temotion>=5 & timpact>=1 ~ 1,
pemotion>=0 | temotion>=0 | semotion>=0 ~ 0)
)
sdq <- sdq %>%
mutate(pem = case_when(
pemotion>=6 & pimpact>=2 ~ 1,
TRUE ~ 0))
sdq <- sdq %>%
mutate(tem = case_when(
temotion>=6 & timpact>=2 ~ 1,
TRUE ~ 0))
sdq <- sdq %>%
mutate(sem = case_when(
semotion>=7 & simpact>=2 ~ 1,
TRUE ~ 0))
sdq <- sdq %>%
mutate(allem = pem + tem + sem)
sdq <- sdq %>%
mutate(
sdqed_tmp = case_when(
is.na(sdqcd) & is.na(sdqhk) ~ NA_real_,
allem==1 & (sdqcd==2 | sdqhk==2) ~ 1,
allem>=1 ~ 2,
sdqed==1 ~ 1,
TRUE ~ 0))
sdq <- sdq %>%
mutate(
sdqed = sdqed_tmp)
SDQ ANY DISORDER PREDICTION
sdq <- sdq %>%
mutate(
anydiag = case_when(
sdqed==2 | sdqcd==2 | sdqhk==2 ~ 2,
sdqed>=1 | sdqcd>=1 | sdqhk>=1 ~ 1,
sdqed>=0 | sdqcd>=0 | sdqhk>=0 ~ 0)
)
CLEANING UP
# DELETE MIDWAY VARIABLES
sdq <- sdq %>% select(-phk, -shk, -thk, -pshk, -pem, -tem, -sem, -allem, -sdqed_tmp)
# ASSIGN LABELS
sdq$sdqhk <- ordered(sdq$sdqhk, levels = c(0, 1, 2),
labels = c("Unlikely", "Possible", "Probable"))
sdq$sdqcd <- ordered(sdq$sdqcd, levels = c(0, 1, 2),
labels = c("Unlikely", "Possible", "Probable"))
sdq$sdqed <- ordered(sdq$sdqed, levels = c(0, 1, 2),
labels = c("Unlikely", "Possible", "Probable"))
sdq$anydiag <- ordered(sdq$anydiag, levels = c(0, 1, 2),
labels = c("Unlikely", "Possible", "Probable"))