Get and Prep Example Data

We’ll use the faketucky dataset to provide an example of our “restricted” dataset.

But first a quick rant about code style. Notice the reference to the function:

packageName::functionName()

This is how you should write code if you want to avoid headaches in the future, particularly when a function may exist in multiple packages. Chasing down the package that a function comes from is not always the easiest/best use of time, but a few extra keystrokes upfront saves a lot of effort later. In some languages this is the only way to do things, while others allow assigning aliases to packages/libraries (e.g., import pandas as pd; pd.read_stata()). That concludes my coding related rant.

# Set the pseudo-random number seed for the sake of replicability
set.seed(7779311)

# Load the library we'll use to parse the Stata formated file
library(haven)

# Load the library we'll use to do some data munging
library(dplyr)

# Storing the file location in a variable for code formatting purposes
filenm <- "https://github.com/OpenSDP/faketucky/raw/master/faketucky.dta"

# Read the dataset into memory, and return only the first 42 variables/columns
df <- haven::read_dta(filenm, 
      col_select = c("sid", "first_dist_code", "first_hs_code", 
                     "first_hs_alt", "first_hs_urbanicity", "chrt_ninth", 
                     "male", "race_ethnicity", "frpl_ever_in_hs", 
                     "sped_ever_in_hs", "lep_ever_in_hs", "gifted_ever_in_hs",
                     "ever_alt_sch_in_hs", "scale_score_6_math", 
                     "scale_score_6_read", "scale_score_8_math", 
                     "scale_score_8_read", "pct_absent_in_hs", 
                     "pct_excused_in_hs", "avg_gpa_hs", "scale_score_11_eng", 
                     "scale_score_11_math", "scale_score_11_read",
                     "scale_score_11_comp", "collegeready_ever_in_hs", 
                     "careerready_ever_in_hs", "ap_ever_take_class", 
                     "last_acadyr_observed", "transferout", "dropout", 
                     "still_enrolled", "ontime_grad", "chrt_grad", "hs_diploma",
                     "enroll_yr1_any", "enroll_yr1_2yr", "enroll_yr1_4yr",
                     "enroll_yr2_any"))

# Rename variables to less verbose but still clear names
names(df) <- c("stdid", "distid", "schcd", "altsch", "urbanicity", 
               "cohort", "male", "race", "frleverhs", "swdeverhs", "eleverhs",
               "tageverhs", "alteverhs", "mthss6", "rlass6", "mthss8", 
               "rlass8", "pctabshs", "pctexcusedhs", "hsgpa", "acteng11", 
               "actmth11", "actrla11", "actcmp11", "evercollrdyhs", 
               "evercarrdyhs", "aptakenever", "lastobsyr", "transfer", 
               "dropout", "stillenrolled", "gradontime", "gradcohort", 
               "diploma", "yr1psenrany", "yr1psenr2yr", "yr1psenr4yr", 
               "yr2psenrany")

# Create the unique school identifier by concatenating the dist & sch codes
df$schid <- paste0(df$distid, df$schcd)

# Get a random sample of school IDs
validSchools <- data.frame("schid" = sample(unique(df$schid), size = 60))

# Filter the data set to include only the random sample of school IDs
df <- dplyr::inner_join(df, validSchools)

# New version of R broke the usually way I would do this, but thankfully my 
# spiffy keyboard (SO to Jason Becker Cohort 4) allows me to save time by recording
# and playing back keystrokes so I really only had to type the variable names
df$altsch <- as.factor(df$altsch)
df$cohort <- as.factor(df$cohort)
df$male <- as.factor(df$male)
df$swdeverhs <- as.factor(df$swdeverhs)
df$eleverhs <- as.factor(df$eleverhs)
df$schid <- as.factor(df$schid)
df$tageverhs <- as.factor(df$tageverhs)
df$alteverhs <- as.factor(df$alteverhs)
df$evercollrdyhs <- as.factor(df$evercollrdyhs)
df$evercarrdyhs <- as.factor(df$evercarrdyhs)
df$aptakenever <- as.factor(df$aptakenever)
df$transfer <- as.factor(df$transfer)
df$dropout <- as.factor(df$dropout)
df$stillenrolled <- as.factor(df$stillenrolled)
df$gradontime <- as.factor(df$gradontime)
df$diploma <- as.factor(df$diploma)
df$yr1psenrany <- as.factor(df$yr1psenrany)
df$yr1psenr2yr <- as.factor(df$yr1psenr2yr)
df$yr1psenr4yr <- as.factor(df$yr1psenr4yr)
df$yr2psenrany <- as.factor(df$yr2psenrany)
df$schid <- as.factor(df$schid)
df$race <- as.factor(df$race)
df$urbanicity <- as.factor(df$urbanicity)
df$frleverhs <- as.factor(df$frleverhs)
df$lastobsyr <- as.factor(df$lastobsyr)
df$gradcohort <- as.factor(df$gradcohort)

# Remove the school/district codes (subsumed in schid) and student IDs
df <- df[-c(2, 3)]

# Display a sample of the data
head(df, n = 20)
## # A tibble: 20 × 37
##    stdid altsch urbanicity cohort male  race        frleverhs swdeverhs eleverhs
##    <dbl> <fct>  <fct>      <fct>  <fct> <fct>       <fct>     <fct>     <fct>   
##  1    10 0      3          2010   0     White       1         0         0       
##  2  1622 0      3          2009   1     Multiple/N… 1         0         0       
##  3  1877 0      3          2009   1     White       1         0         0       
##  4  1941 0      3          2009   1     White       0         0         0       
##  5  2405 0      3          2010   0     White       0         0         0       
##  6  2551 0      3          2010   1     White       1         0         0       
##  7  2719 0      3          2010   0     White       1         0         0       
##  8  3044 0      3          2010   0     White       1         0         0       
##  9  3268 0      3          2010   0     White       0         0         0       
## 10  3442 0      3          2009   0     White       0         0         0       
## 11  3647 0      3          2010   0     White       0         0         0       
## 12  4489 0      3          2010   0     Multiple/N… 1         0         0       
## 13  4623 0      3          2009   1     White       1         0         0       
## 14  4913 0      3          2009   1     White       1         0         0       
## 15  5685 0      3          2010   0     White       0         0         0       
## 16  5754 0      3          2009   1     White       1         0         0       
## 17  6293 0      3          2009   0     White       1         0         0       
## 18  7010 0      3          2009   1     White       0         0         0       
## 19  7923 0      3          2010   0     White       1         0         0       
## 20  8343 0      3          2009   1     White       0         1         0       
## # ℹ 28 more variables: tageverhs <fct>, alteverhs <fct>, mthss6 <dbl>,
## #   rlass6 <dbl>, mthss8 <dbl>, rlass8 <dbl>, pctabshs <dbl>,
## #   pctexcusedhs <dbl>, hsgpa <dbl>, acteng11 <dbl>, actmth11 <dbl>,
## #   actrla11 <dbl>, actcmp11 <dbl>, evercollrdyhs <fct>, evercarrdyhs <fct>,
## #   aptakenever <fct>, lastobsyr <fct>, transfer <fct>, dropout <fct>,
## #   stillenrolled <fct>, gradontime <fct>, gradcohort <fct>, diploma <fct>,
## #   yr1psenrany <fct>, yr1psenr2yr <fct>, yr1psenr4yr <fct>, …

Simple Synthesis Example

Although this likely is not what you should do in practice, I’ll show the simplest possible example of generating a synthetic dataset:

# Loads the library that is used for synthetic data generation in a decent 
# amount of the SDC literature
library(synthpop)

# Simplest synthetic example:
cake <- synthpop::syn(df)
## 
## Synthesis
## -----------
##  stdid altsch urbanicity cohort male race frleverhs swdeverhs eleverhs tageverhs
##  alteverhs mthss6 rlass6 mthss8 rlass8 pctabshs pctexcusedhs hsgpa acteng11 actmth11
##  actrla11 actcmp11 evercollrdyhs evercarrdyhs aptakenever lastobsyr transfer dropout stillenrolled gradontime
##  gradcohort diploma yr1psenrany yr1psenr2yr yr1psenr4yr yr2psenrany schid
# Show the synthpop generated object
cake
## Call:
## ($call) synthpop::syn(data = df)
## 
## Number of synthesised data sets: 
## ($m)  1 
## 
## First rows of synthesised data set: 
## ($syn)
##   stdid altsch urbanicity cohort male             race frleverhs swdeverhs
## 1 94426      0          3   2010    1            White         1         0
## 2  9490      0          9   2010    1            White         1         0
## 3 49128      0          6   2010    0            White         1         0
## 4 59084      0          1   2010    1 African-American         1         1
## 5 38865      0          9   2009    0 African-American         0         0
## 6 17927      0          9   2009    1                          0         0
##   eleverhs tageverhs alteverhs mthss6 rlass6 mthss8 rlass8  pctabshs
## 1        0         0         0    600    624     49     52  5.263636
## 2        0         0         0    623    636     75     64  2.815296
## 3        0         0         0    642    658     51     76 11.381429
## 4        0         0         1     NA     NA     NA     15 17.663366
## 5        0         1         0     NA     NA     23     48 10.941595
## 6        0         1         0     NA     NA     79     61  2.221751
##   pctexcusedhs   hsgpa acteng11 actmth11 actrla11 actcmp11 evercollrdyhs
## 1    3.9951377 3.59840       22       24       20       21             1
## 2    2.3564065 3.36000       29       28       27       27             1
## 3    5.0925927 3.12250       23       23       22       23             0
## 4    0.9827586 1.66275       NA       NA       NA       NA             0
## 5    7.9750371 2.27150       NA       NA       NA       NA             0
## 6    2.3103449 2.82925       22       24       21       21             0
##   evercarrdyhs aptakenever lastobsyr transfer dropout stillenrolled gradontime
## 1            1           1      2013        0       0             0          1
## 2            0           1      2013        0       0             0          1
## 3            0           1      2013        0       0             0          1
## 4            0           0      2013        1       0             0          0
## 5            0           0      2009        0       1             0          0
## 6            0           0      2012        0       0             0          1
##   gradcohort diploma yr1psenrany yr1psenr2yr yr1psenr4yr yr2psenrany   schid
## 1       2013       1           1           1           0           0 1751840
## 2       2013       1           1           0           1           0 2533120
## 3       2013       1           1           0           1           0 5177492
## 4       <NA>       0        <NA>        <NA>        <NA>        <NA> 6079352
## 5       <NA>       0        <NA>        <NA>        <NA>        <NA> 3013590
## 6       2012       1           1           1           0           1 2593656
## ...
## 
## Synthesising methods: 
## ($method)
##         stdid        altsch    urbanicity        cohort          male 
##      "sample"        "cart"        "cart"        "cart"        "cart" 
##          race     frleverhs     swdeverhs      eleverhs     tageverhs 
##        "cart"        "cart"        "cart"        "cart"        "cart" 
##     alteverhs        mthss6        rlass6        mthss8        rlass8 
##        "cart"        "cart"        "cart"        "cart"        "cart" 
##      pctabshs  pctexcusedhs         hsgpa      acteng11      actmth11 
##        "cart"        "cart"        "cart"        "cart"        "cart" 
##      actrla11      actcmp11 evercollrdyhs  evercarrdyhs   aptakenever 
##        "cart"        "cart"        "cart"        "cart"        "cart" 
##     lastobsyr      transfer       dropout stillenrolled    gradontime 
##        "cart"        "cart"        "cart"        "cart"        "cart" 
##    gradcohort       diploma   yr1psenrany   yr1psenr2yr   yr1psenr4yr 
##        "cart"        "cart"        "cart"        "cart"        "cart" 
##   yr2psenrany         schid 
##        "cart"        "cart" 
## 
## Order of synthesis: 
## ($visit.sequence)
##         stdid        altsch    urbanicity        cohort          male 
##             1             2             3             4             5 
##          race     frleverhs     swdeverhs      eleverhs     tageverhs 
##             6             7             8             9            10 
##     alteverhs        mthss6        rlass6        mthss8        rlass8 
##            11            12            13            14            15 
##      pctabshs  pctexcusedhs         hsgpa      acteng11      actmth11 
##            16            17            18            19            20 
##      actrla11      actcmp11 evercollrdyhs  evercarrdyhs   aptakenever 
##            21            22            23            24            25 
##     lastobsyr      transfer       dropout stillenrolled    gradontime 
##            26            27            28            29            30 
##    gradcohort       diploma   yr1psenrany   yr1psenr2yr   yr1psenr4yr 
##            31            32            33            34            35 
##   yr2psenrany         schid 
##            36            37 
## 
## Matrix of predictors: 
## ($predictor.matrix)
##               stdid altsch urbanicity cohort male race frleverhs swdeverhs
## stdid             0      0          0      0    0    0         0         0
## altsch            1      0          0      0    0    0         0         0
## urbanicity        1      1          0      0    0    0         0         0
## cohort            1      1          1      0    0    0         0         0
## male              1      1          1      1    0    0         0         0
## race              1      1          1      1    1    0         0         0
## frleverhs         1      1          1      1    1    1         0         0
## swdeverhs         1      1          1      1    1    1         1         0
## eleverhs          1      1          1      1    1    1         1         1
## tageverhs         1      1          1      1    1    1         1         1
## alteverhs         1      1          1      1    1    1         1         1
## mthss6            1      1          1      1    1    1         1         1
## rlass6            1      1          1      1    1    1         1         1
## mthss8            1      1          1      1    1    1         1         1
## rlass8            1      1          1      1    1    1         1         1
## pctabshs          1      1          1      1    1    1         1         1
## pctexcusedhs      1      1          1      1    1    1         1         1
## hsgpa             1      1          1      1    1    1         1         1
## acteng11          1      1          1      1    1    1         1         1
## actmth11          1      1          1      1    1    1         1         1
## actrla11          1      1          1      1    1    1         1         1
## actcmp11          1      1          1      1    1    1         1         1
## evercollrdyhs     1      1          1      1    1    1         1         1
## evercarrdyhs      1      1          1      1    1    1         1         1
## aptakenever       1      1          1      1    1    1         1         1
## lastobsyr         1      1          1      1    1    1         1         1
## transfer          1      1          1      1    1    1         1         1
## dropout           1      1          1      1    1    1         1         1
## stillenrolled     1      1          1      1    1    1         1         1
## gradontime        1      1          1      1    1    1         1         1
## gradcohort        1      1          1      1    1    1         1         1
## diploma           1      1          1      1    1    1         1         1
## yr1psenrany       1      1          1      1    1    1         1         1
## yr1psenr2yr       1      1          1      1    1    1         1         1
## yr1psenr4yr       1      1          1      1    1    1         1         1
## yr2psenrany       1      1          1      1    1    1         1         1
## schid             1      1          1      1    1    1         1         1
##               eleverhs tageverhs alteverhs mthss6 rlass6 mthss8 rlass8 pctabshs
## stdid                0         0         0      0      0      0      0        0
## altsch               0         0         0      0      0      0      0        0
## urbanicity           0         0         0      0      0      0      0        0
## cohort               0         0         0      0      0      0      0        0
## male                 0         0         0      0      0      0      0        0
## race                 0         0         0      0      0      0      0        0
## frleverhs            0         0         0      0      0      0      0        0
## swdeverhs            0         0         0      0      0      0      0        0
## eleverhs             0         0         0      0      0      0      0        0
## tageverhs            1         0         0      0      0      0      0        0
## alteverhs            1         1         0      0      0      0      0        0
## mthss6               1         1         1      0      0      0      0        0
## rlass6               1         1         1      1      0      0      0        0
## mthss8               1         1         1      1      1      0      0        0
## rlass8               1         1         1      1      1      1      0        0
## pctabshs             1         1         1      1      1      1      1        0
## pctexcusedhs         1         1         1      1      1      1      1        1
## hsgpa                1         1         1      1      1      1      1        1
## acteng11             1         1         1      1      1      1      1        1
## actmth11             1         1         1      1      1      1      1        1
## actrla11             1         1         1      1      1      1      1        1
## actcmp11             1         1         1      1      1      1      1        1
## evercollrdyhs        1         1         1      1      1      1      1        1
## evercarrdyhs         1         1         1      1      1      1      1        1
## aptakenever          1         1         1      1      1      1      1        1
## lastobsyr            1         1         1      1      1      1      1        1
## transfer             1         1         1      1      1      1      1        1
## dropout              1         1         1      1      1      1      1        1
## stillenrolled        1         1         1      1      1      1      1        1
## gradontime           1         1         1      1      1      1      1        1
## gradcohort           1         1         1      1      1      1      1        1
## diploma              1         1         1      1      1      1      1        1
## yr1psenrany          1         1         1      1      1      1      1        1
## yr1psenr2yr          1         1         1      1      1      1      1        1
## yr1psenr4yr          1         1         1      1      1      1      1        1
## yr2psenrany          1         1         1      1      1      1      1        1
## schid                1         1         1      1      1      1      1        1
##               pctexcusedhs hsgpa acteng11 actmth11 actrla11 actcmp11
## stdid                    0     0        0        0        0        0
## altsch                   0     0        0        0        0        0
## urbanicity               0     0        0        0        0        0
## cohort                   0     0        0        0        0        0
## male                     0     0        0        0        0        0
## race                     0     0        0        0        0        0
## frleverhs                0     0        0        0        0        0
## swdeverhs                0     0        0        0        0        0
## eleverhs                 0     0        0        0        0        0
## tageverhs                0     0        0        0        0        0
## alteverhs                0     0        0        0        0        0
## mthss6                   0     0        0        0        0        0
## rlass6                   0     0        0        0        0        0
## mthss8                   0     0        0        0        0        0
## rlass8                   0     0        0        0        0        0
## pctabshs                 0     0        0        0        0        0
## pctexcusedhs             0     0        0        0        0        0
## hsgpa                    1     0        0        0        0        0
## acteng11                 1     1        0        0        0        0
## actmth11                 1     1        1        0        0        0
## actrla11                 1     1        1        1        0        0
## actcmp11                 1     1        1        1        1        0
## evercollrdyhs            1     1        1        1        1        1
## evercarrdyhs             1     1        1        1        1        1
## aptakenever              1     1        1        1        1        1
## lastobsyr                1     1        1        1        1        1
## transfer                 1     1        1        1        1        1
## dropout                  1     1        1        1        1        1
## stillenrolled            1     1        1        1        1        1
## gradontime               1     1        1        1        1        1
## gradcohort               1     1        1        1        1        1
## diploma                  1     1        1        1        1        1
## yr1psenrany              1     1        1        1        1        1
## yr1psenr2yr              1     1        1        1        1        1
## yr1psenr4yr              1     1        1        1        1        1
## yr2psenrany              1     1        1        1        1        1
## schid                    1     1        1        1        1        1
##               evercollrdyhs evercarrdyhs aptakenever lastobsyr transfer dropout
## stdid                     0            0           0         0        0       0
## altsch                    0            0           0         0        0       0
## urbanicity                0            0           0         0        0       0
## cohort                    0            0           0         0        0       0
## male                      0            0           0         0        0       0
## race                      0            0           0         0        0       0
## frleverhs                 0            0           0         0        0       0
## swdeverhs                 0            0           0         0        0       0
## eleverhs                  0            0           0         0        0       0
## tageverhs                 0            0           0         0        0       0
## alteverhs                 0            0           0         0        0       0
## mthss6                    0            0           0         0        0       0
## rlass6                    0            0           0         0        0       0
## mthss8                    0            0           0         0        0       0
## rlass8                    0            0           0         0        0       0
## pctabshs                  0            0           0         0        0       0
## pctexcusedhs              0            0           0         0        0       0
## hsgpa                     0            0           0         0        0       0
## acteng11                  0            0           0         0        0       0
## actmth11                  0            0           0         0        0       0
## actrla11                  0            0           0         0        0       0
## actcmp11                  0            0           0         0        0       0
## evercollrdyhs             0            0           0         0        0       0
## evercarrdyhs              1            0           0         0        0       0
## aptakenever               1            1           0         0        0       0
## lastobsyr                 1            1           1         0        0       0
## transfer                  1            1           1         1        0       0
## dropout                   1            1           1         1        1       0
## stillenrolled             1            1           1         1        1       1
## gradontime                1            1           1         1        1       1
## gradcohort                1            1           1         1        1       1
## diploma                   1            1           1         1        1       1
## yr1psenrany               1            1           1         1        1       1
## yr1psenr2yr               1            1           1         1        1       1
## yr1psenr4yr               1            1           1         1        1       1
## yr2psenrany               1            1           1         1        1       1
## schid                     1            1           1         1        1       1
##               stillenrolled gradontime gradcohort diploma yr1psenrany
## stdid                     0          0          0       0           0
## altsch                    0          0          0       0           0
## urbanicity                0          0          0       0           0
## cohort                    0          0          0       0           0
## male                      0          0          0       0           0
## race                      0          0          0       0           0
## frleverhs                 0          0          0       0           0
## swdeverhs                 0          0          0       0           0
## eleverhs                  0          0          0       0           0
## tageverhs                 0          0          0       0           0
## alteverhs                 0          0          0       0           0
## mthss6                    0          0          0       0           0
## rlass6                    0          0          0       0           0
## mthss8                    0          0          0       0           0
## rlass8                    0          0          0       0           0
## pctabshs                  0          0          0       0           0
## pctexcusedhs              0          0          0       0           0
## hsgpa                     0          0          0       0           0
## acteng11                  0          0          0       0           0
## actmth11                  0          0          0       0           0
## actrla11                  0          0          0       0           0
## actcmp11                  0          0          0       0           0
## evercollrdyhs             0          0          0       0           0
## evercarrdyhs              0          0          0       0           0
## aptakenever               0          0          0       0           0
## lastobsyr                 0          0          0       0           0
## transfer                  0          0          0       0           0
## dropout                   0          0          0       0           0
## stillenrolled             0          0          0       0           0
## gradontime                1          0          0       0           0
## gradcohort                1          1          0       0           0
## diploma                   1          1          1       0           0
## yr1psenrany               1          1          1       1           0
## yr1psenr2yr               1          1          1       1           1
## yr1psenr4yr               1          1          1       1           1
## yr2psenrany               1          1          1       1           1
## schid                     1          1          1       1           1
##               yr1psenr2yr yr1psenr4yr yr2psenrany schid
## stdid                   0           0           0     0
## altsch                  0           0           0     0
## urbanicity              0           0           0     0
## cohort                  0           0           0     0
## male                    0           0           0     0
## race                    0           0           0     0
## frleverhs               0           0           0     0
## swdeverhs               0           0           0     0
## eleverhs                0           0           0     0
## tageverhs               0           0           0     0
## alteverhs               0           0           0     0
## mthss6                  0           0           0     0
## rlass6                  0           0           0     0
## mthss8                  0           0           0     0
## rlass8                  0           0           0     0
## pctabshs                0           0           0     0
## pctexcusedhs            0           0           0     0
## hsgpa                   0           0           0     0
## acteng11                0           0           0     0
## actmth11                0           0           0     0
## actrla11                0           0           0     0
## actcmp11                0           0           0     0
## evercollrdyhs           0           0           0     0
## evercarrdyhs            0           0           0     0
## aptakenever             0           0           0     0
## lastobsyr               0           0           0     0
## transfer                0           0           0     0
## dropout                 0           0           0     0
## stillenrolled           0           0           0     0
## gradontime              0           0           0     0
## gradcohort              0           0           0     0
## diploma                 0           0           0     0
## yr1psenrany             0           0           0     0
## yr1psenr2yr             0           0           0     0
## yr1psenr4yr             1           0           0     0
## yr2psenrany             1           1           0     0
## schid                   1           1           1     0

Why the example above is not generally a good idea

synthpop will make a lot of assumptions about your data based on arbitrary features, like the order of the variables in the data frame. In the example, every variable in the data frame will be used in the synthesis process and they will enter from the variable with the lowest index to the highest. This means that the underlying code base is going to start the synthesis process by resampling student IDs. We don’t want that to happen though. So, we need to either do some data munging to rearrange the order of the variables in the data frame or we need to pass some arguments to the function’s parameters to make it behave the way we want it to behave.

IMPORTANT If you attempt to call the synthpop::syn() function with the default method and your session freezes or becomes unresponsive, try changing the method to "ctree". This is a recommendation from one of the authors of the software and worked for the example below (which is why you see method = "ctree" in the example below).

# Define a new order of columns in the data frame
ord <- c("schid", "altsch", "urbanicity", "male", "race", "cohort", 
         "frleverhs", "swdeverhs", "eleverhs", "tageverhs", "alteverhs", 
         "mthss6", "rlass6", "mthss8", "rlass8", "pctabshs", "pctexcusedhs", 
         "aptakenever", "lastobsyr", "transfer", "dropout", "stillenrolled", 
         "hsgpa", "gradontime", "gradcohort", "diploma", "evercollrdyhs", 
         "evercarrdyhs", "actmth11", "actrla11", "acteng11", "actcmp11", 
         "yr1psenr2yr", "yr1psenr4yr", "yr1psenrany", "yr2psenrany")

# Create the synthetic dataset object, exclude Student IDs, specify the order
# in which the variables are synthesized, and keep the information about the 
# models in the output object.
cakier <- synthpop::syn(df[-c(1)], models = TRUE, method = "ctree", 
                        visit.sequence = ord)
## 
## Synthesis
## -----------
##  schid altsch urbanicity male race cohort frleverhs swdeverhs eleverhs tageverhs
##  alteverhs mthss6 rlass6 mthss8 rlass8 pctabshs pctexcusedhs aptakenever lastobsyr transfer
##  dropout stillenrolled hsgpa gradontime gradcohort diploma evercollrdyhs evercarrdyhs actmth11 actrla11
##  acteng11 actcmp11 yr1psenr2yr yr1psenr4yr yr1psenrany yr2psenrany
# Show the object
cakier
## Call:
## ($call) synthpop::syn(data = df[-c(1)], method = "ctree", visit.sequence = ord, 
##     models = TRUE)
## 
## Number of synthesised data sets: 
## ($m)  1 
## 
## First rows of synthesised data set: 
## ($syn)
##   altsch urbanicity cohort male                     race frleverhs swdeverhs
## 1      0          6   2010    1                    White         0         0
## 2      0          6   2009    1 Multiple/Native American         1         0
## 3      0          9   2010    1                    White         1         0
## 4      0         11   2010    1                    White         0         0
## 5      0          5   2009    1                    White         1         0
## 6      0          4   2009    0                    White         1         0
##   eleverhs tageverhs alteverhs mthss6 rlass6 mthss8 rlass8 pctabshs
## 1        0         0         0    639    630     53     52 2.011445
## 2        0         0         0     NA     NA     50     48 7.055163
## 3        0         0         1    627    625     48     44 2.074141
## 4        0         0         0    658    666     32     50 4.116643
## 5        0         0         0     NA     NA     43     35 2.487032
## 6        0         0         0     NA     NA     64     56 6.507123
##   pctexcusedhs    hsgpa acteng11 actmth11 actrla11 actcmp11 evercollrdyhs
## 1     1.607649 3.007350       19       22       19       18             1
## 2     2.982249 2.026625       16       18       18       18             0
## 3     1.841498 1.883952       NA       NA       NA       NA             0
## 4     3.868006 1.276900       NA       NA       NA       NA             0
## 5     1.629943 2.474250       19       15       16       16             0
## 6     4.864215 1.923000       13       16       21       17             0
##   evercarrdyhs aptakenever lastobsyr transfer dropout stillenrolled gradontime
## 1            0           0      2013        0       0             0          1
## 2            0           0      2012        0       0             0          1
## 3            0           0      2010        1       0             0          0
## 4            0           0      2012        1       0             0          0
## 5            0           0      2012        0       0             0          1
## 6            0           0      2012        0       0             0          1
##   gradcohort diploma yr1psenrany yr1psenr2yr yr1psenr4yr yr2psenrany   schid
## 1       2013       1           1           0           1           0 2112606
## 2       2012       1           1           1           0           0 2533120
## 3       <NA>       0        <NA>        <NA>        <NA>        <NA> 5778358
## 4       <NA>       0        <NA>        <NA>        <NA>        <NA> 4095680
## 5       2012       1           0           0           0           0 4245620
## 6       2012       1           0           0           0           0 1601256
## ...
## 
## Synthesising methods: 
## ($method)
##        altsch    urbanicity        cohort          male          race 
##       "ctree"       "ctree"       "ctree"       "ctree"       "ctree" 
##     frleverhs     swdeverhs      eleverhs     tageverhs     alteverhs 
##       "ctree"       "ctree"       "ctree"       "ctree"       "ctree" 
##        mthss6        rlass6        mthss8        rlass8      pctabshs 
##       "ctree"       "ctree"       "ctree"       "ctree"       "ctree" 
##  pctexcusedhs         hsgpa      acteng11      actmth11      actrla11 
##       "ctree"       "ctree"       "ctree"       "ctree"       "ctree" 
##      actcmp11 evercollrdyhs  evercarrdyhs   aptakenever     lastobsyr 
##       "ctree"       "ctree"       "ctree"       "ctree"       "ctree" 
##      transfer       dropout stillenrolled    gradontime    gradcohort 
##       "ctree"       "ctree"       "ctree"       "ctree"       "ctree" 
##       diploma   yr1psenrany   yr1psenr2yr   yr1psenr4yr   yr2psenrany 
##       "ctree"       "ctree"       "ctree"       "ctree"       "ctree" 
##         schid 
##      "sample" 
## 
## Order of synthesis: 
## ($visit.sequence)
##         schid        altsch    urbanicity          male          race 
##            36             1             2             4             5 
##        cohort     frleverhs     swdeverhs      eleverhs     tageverhs 
##             3             6             7             8             9 
##     alteverhs        mthss6        rlass6        mthss8        rlass8 
##            10            11            12            13            14 
##      pctabshs  pctexcusedhs   aptakenever     lastobsyr      transfer 
##            15            16            24            25            26 
##       dropout stillenrolled         hsgpa    gradontime    gradcohort 
##            27            28            17            29            30 
##       diploma evercollrdyhs  evercarrdyhs      actmth11      actrla11 
##            31            22            23            19            20 
##      acteng11      actcmp11   yr1psenr2yr   yr1psenr4yr   yr1psenrany 
##            18            21            33            34            32 
##   yr2psenrany 
##            35 
## 
## Matrix of predictors: 
## ($predictor.matrix)
##               altsch urbanicity cohort male race frleverhs swdeverhs eleverhs
## altsch             0          0      0    0    0         0         0        0
## urbanicity         1          0      0    0    0         0         0        0
## cohort             1          1      0    1    1         0         0        0
## male               1          1      0    0    0         0         0        0
## race               1          1      0    1    0         0         0        0
## frleverhs          1          1      1    1    1         0         0        0
## swdeverhs          1          1      1    1    1         1         0        0
## eleverhs           1          1      1    1    1         1         1        0
## tageverhs          1          1      1    1    1         1         1        1
## alteverhs          1          1      1    1    1         1         1        1
## mthss6             1          1      1    1    1         1         1        1
## rlass6             1          1      1    1    1         1         1        1
## mthss8             1          1      1    1    1         1         1        1
## rlass8             1          1      1    1    1         1         1        1
## pctabshs           1          1      1    1    1         1         1        1
## pctexcusedhs       1          1      1    1    1         1         1        1
## hsgpa              1          1      1    1    1         1         1        1
## acteng11           1          1      1    1    1         1         1        1
## actmth11           1          1      1    1    1         1         1        1
## actrla11           1          1      1    1    1         1         1        1
## actcmp11           1          1      1    1    1         1         1        1
## evercollrdyhs      1          1      1    1    1         1         1        1
## evercarrdyhs       1          1      1    1    1         1         1        1
## aptakenever        1          1      1    1    1         1         1        1
## lastobsyr          1          1      1    1    1         1         1        1
## transfer           1          1      1    1    1         1         1        1
## dropout            1          1      1    1    1         1         1        1
## stillenrolled      1          1      1    1    1         1         1        1
## gradontime         1          1      1    1    1         1         1        1
## gradcohort         1          1      1    1    1         1         1        1
## diploma            1          1      1    1    1         1         1        1
## yr1psenrany        1          1      1    1    1         1         1        1
## yr1psenr2yr        1          1      1    1    1         1         1        1
## yr1psenr4yr        1          1      1    1    1         1         1        1
## yr2psenrany        1          1      1    1    1         1         1        1
## schid              0          0      0    0    0         0         0        0
##               tageverhs alteverhs mthss6 rlass6 mthss8 rlass8 pctabshs
## altsch                0         0      0      0      0      0        0
## urbanicity            0         0      0      0      0      0        0
## cohort                0         0      0      0      0      0        0
## male                  0         0      0      0      0      0        0
## race                  0         0      0      0      0      0        0
## frleverhs             0         0      0      0      0      0        0
## swdeverhs             0         0      0      0      0      0        0
## eleverhs              0         0      0      0      0      0        0
## tageverhs             0         0      0      0      0      0        0
## alteverhs             1         0      0      0      0      0        0
## mthss6                1         1      0      0      0      0        0
## rlass6                1         1      1      0      0      0        0
## mthss8                1         1      1      1      0      0        0
## rlass8                1         1      1      1      1      0        0
## pctabshs              1         1      1      1      1      1        0
## pctexcusedhs          1         1      1      1      1      1        1
## hsgpa                 1         1      1      1      1      1        1
## acteng11              1         1      1      1      1      1        1
## actmth11              1         1      1      1      1      1        1
## actrla11              1         1      1      1      1      1        1
## actcmp11              1         1      1      1      1      1        1
## evercollrdyhs         1         1      1      1      1      1        1
## evercarrdyhs          1         1      1      1      1      1        1
## aptakenever           1         1      1      1      1      1        1
## lastobsyr             1         1      1      1      1      1        1
## transfer              1         1      1      1      1      1        1
## dropout               1         1      1      1      1      1        1
## stillenrolled         1         1      1      1      1      1        1
## gradontime            1         1      1      1      1      1        1
## gradcohort            1         1      1      1      1      1        1
## diploma               1         1      1      1      1      1        1
## yr1psenrany           1         1      1      1      1      1        1
## yr1psenr2yr           1         1      1      1      1      1        1
## yr1psenr4yr           1         1      1      1      1      1        1
## yr2psenrany           1         1      1      1      1      1        1
## schid                 0         0      0      0      0      0        0
##               pctexcusedhs hsgpa acteng11 actmth11 actrla11 actcmp11
## altsch                   0     0        0        0        0        0
## urbanicity               0     0        0        0        0        0
## cohort                   0     0        0        0        0        0
## male                     0     0        0        0        0        0
## race                     0     0        0        0        0        0
## frleverhs                0     0        0        0        0        0
## swdeverhs                0     0        0        0        0        0
## eleverhs                 0     0        0        0        0        0
## tageverhs                0     0        0        0        0        0
## alteverhs                0     0        0        0        0        0
## mthss6                   0     0        0        0        0        0
## rlass6                   0     0        0        0        0        0
## mthss8                   0     0        0        0        0        0
## rlass8                   0     0        0        0        0        0
## pctabshs                 0     0        0        0        0        0
## pctexcusedhs             0     0        0        0        0        0
## hsgpa                    1     0        0        0        0        0
## acteng11                 1     1        0        1        1        0
## actmth11                 1     1        0        0        0        0
## actrla11                 1     1        0        1        0        0
## actcmp11                 1     1        1        1        1        0
## evercollrdyhs            1     1        0        0        0        0
## evercarrdyhs             1     1        0        0        0        0
## aptakenever              1     0        0        0        0        0
## lastobsyr                1     0        0        0        0        0
## transfer                 1     0        0        0        0        0
## dropout                  1     0        0        0        0        0
## stillenrolled            1     0        0        0        0        0
## gradontime               1     1        0        0        0        0
## gradcohort               1     1        0        0        0        0
## diploma                  1     1        0        0        0        0
## yr1psenrany              1     1        1        1        1        1
## yr1psenr2yr              1     1        1        1        1        1
## yr1psenr4yr              1     1        1        1        1        1
## yr2psenrany              1     1        1        1        1        1
## schid                    0     0        0        0        0        0
##               evercollrdyhs evercarrdyhs aptakenever lastobsyr transfer dropout
## altsch                    0            0           0         0        0       0
## urbanicity                0            0           0         0        0       0
## cohort                    0            0           0         0        0       0
## male                      0            0           0         0        0       0
## race                      0            0           0         0        0       0
## frleverhs                 0            0           0         0        0       0
## swdeverhs                 0            0           0         0        0       0
## eleverhs                  0            0           0         0        0       0
## tageverhs                 0            0           0         0        0       0
## alteverhs                 0            0           0         0        0       0
## mthss6                    0            0           0         0        0       0
## rlass6                    0            0           0         0        0       0
## mthss8                    0            0           0         0        0       0
## rlass8                    0            0           0         0        0       0
## pctabshs                  0            0           0         0        0       0
## pctexcusedhs              0            0           0         0        0       0
## hsgpa                     0            0           1         1        1       1
## acteng11                  1            1           1         1        1       1
## actmth11                  1            1           1         1        1       1
## actrla11                  1            1           1         1        1       1
## actcmp11                  1            1           1         1        1       1
## evercollrdyhs             0            0           1         1        1       1
## evercarrdyhs              1            0           1         1        1       1
## aptakenever               0            0           0         0        0       0
## lastobsyr                 0            0           1         0        0       0
## transfer                  0            0           1         1        0       0
## dropout                   0            0           1         1        1       0
## stillenrolled             0            0           1         1        1       1
## gradontime                0            0           1         1        1       1
## gradcohort                0            0           1         1        1       1
## diploma                   0            0           1         1        1       1
## yr1psenrany               1            1           1         1        1       1
## yr1psenr2yr               1            1           1         1        1       1
## yr1psenr4yr               1            1           1         1        1       1
## yr2psenrany               1            1           1         1        1       1
## schid                     0            0           0         0        0       0
##               stillenrolled gradontime gradcohort diploma yr1psenrany
## altsch                    0          0          0       0           0
## urbanicity                0          0          0       0           0
## cohort                    0          0          0       0           0
## male                      0          0          0       0           0
## race                      0          0          0       0           0
## frleverhs                 0          0          0       0           0
## swdeverhs                 0          0          0       0           0
## eleverhs                  0          0          0       0           0
## tageverhs                 0          0          0       0           0
## alteverhs                 0          0          0       0           0
## mthss6                    0          0          0       0           0
## rlass6                    0          0          0       0           0
## mthss8                    0          0          0       0           0
## rlass8                    0          0          0       0           0
## pctabshs                  0          0          0       0           0
## pctexcusedhs              0          0          0       0           0
## hsgpa                     1          0          0       0           0
## acteng11                  1          1          1       1           0
## actmth11                  1          1          1       1           0
## actrla11                  1          1          1       1           0
## actcmp11                  1          1          1       1           0
## evercollrdyhs             1          1          1       1           0
## evercarrdyhs              1          1          1       1           0
## aptakenever               0          0          0       0           0
## lastobsyr                 0          0          0       0           0
## transfer                  0          0          0       0           0
## dropout                   0          0          0       0           0
## stillenrolled             0          0          0       0           0
## gradontime                1          0          0       0           0
## gradcohort                1          1          0       0           0
## diploma                   1          1          1       0           0
## yr1psenrany               1          1          1       1           0
## yr1psenr2yr               1          1          1       1           0
## yr1psenr4yr               1          1          1       1           0
## yr2psenrany               1          1          1       1           1
## schid                     0          0          0       0           0
##               yr1psenr2yr yr1psenr4yr yr2psenrany schid
## altsch                  0           0           0     1
## urbanicity              0           0           0     1
## cohort                  0           0           0     1
## male                    0           0           0     1
## race                    0           0           0     1
## frleverhs               0           0           0     1
## swdeverhs               0           0           0     1
## eleverhs                0           0           0     1
## tageverhs               0           0           0     1
## alteverhs               0           0           0     1
## mthss6                  0           0           0     1
## rlass6                  0           0           0     1
## mthss8                  0           0           0     1
## rlass8                  0           0           0     1
## pctabshs                0           0           0     1
## pctexcusedhs            0           0           0     1
## hsgpa                   0           0           0     1
## acteng11                0           0           0     1
## actmth11                0           0           0     1
## actrla11                0           0           0     1
## actcmp11                0           0           0     1
## evercollrdyhs           0           0           0     1
## evercarrdyhs            0           0           0     1
## aptakenever             0           0           0     1
## lastobsyr               0           0           0     1
## transfer                0           0           0     1
## dropout                 0           0           0     1
## stillenrolled           0           0           0     1
## gradontime              0           0           0     1
## gradcohort              0           0           0     1
## diploma                 0           0           0     1
## yr1psenrany             1           1           0     1
## yr1psenr2yr             0           0           0     1
## yr1psenr4yr             1           0           0     1
## yr2psenrany             1           1           0     1
## schid                   0           0           0     0

Improvements

The model above exerts some control over the order of the variables being synthesized. In this example, we resampled the unique school identifiers. Now, using only the schid values from the protected data, we’ll predict the value of altsch and use that model to project values of altsch in our synthetic data.
Next, the program will use both the schid and altsch variables to predict the urbanicity variable and will use that model to project synthetic values of urbanicity using the resampled schid and synthetic altsch values.
This continues one variable at a time until we get to the last variable in the visit.sequence.

You can synthesize the data in any order you want, but I selected this order based on the assumption that it is going to better reflect the underlying data generating process (DGP). You may have other assumptions and it may not affect the end result much at all.

Things to keep in mind

It isn’t discussed on ton in the statistical data control (SDC) literature, but one challenge with this approach to synthesis is error propagation. Each model has it’s own amount of error. That error is injected into the projections. As the process progresses further to the later variables in the data set, the error from earlier projections is compounded.

# Now we'll add some logical constraints
cakiest <- synthpop::syn(df[-c(1)], models = TRUE, method = "ctree",
                         visit.sequence = ord,
            rules = list(yr1psenrany = "yr1psenr2yr == 1 | yr1psenr4yr == 1",
                         diploma = "dropout == 0 & stillenrolled == 0 & transfer == 0"),
              rvalues = list(yr1psenrany = 1, diploma = 1))
## 
## Unexpected values (not obeying the rules) found for variable(s): diploma, yr1psenrany.
## Rules have been applied but make sure they are correct.
## 
## Synthesis
## -----------
##  schid altsch urbanicity male race cohort frleverhs swdeverhs eleverhs tageverhs
##  alteverhs mthss6 rlass6 mthss8 rlass8 pctabshs pctexcusedhs aptakenever lastobsyr transfer
##  dropout stillenrolled hsgpa gradontime gradcohort diploma evercollrdyhs evercarrdyhs actmth11 actrla11
##  acteng11 actcmp11 yr1psenr2yr yr1psenr4yr yr1psenrany yr2psenrany
# Show the object
cakiest
## Call:
## ($call) synthpop::syn(data = df[-c(1)], method = "ctree", visit.sequence = ord, 
##     rules = list(yr1psenrany = "yr1psenr2yr == 1 | yr1psenr4yr == 1", 
##         diploma = "dropout == 0 & stillenrolled == 0 & transfer == 0"), 
##     rvalues = list(yr1psenrany = 1, diploma = 1), models = TRUE)
## 
## Number of synthesised data sets: 
## ($m)  1 
## 
## First rows of synthesised data set: 
## ($syn)
##   altsch urbanicity cohort male  race frleverhs swdeverhs eleverhs tageverhs
## 1      0          9   2009    1 White         0         0        0         0
## 2      0          9   2010    1 White         1         0        0         1
## 3      0          6   2009    0 White         1         0        0         0
## 4      0          4   2009    0 White         0         0        0         0
## 5      0          7   2009    1 White         1         0        0         0
## 6      0          4   2010    0 White         1         0        0         0
##   alteverhs mthss6 rlass6 mthss8 rlass8 pctabshs pctexcusedhs    hsgpa acteng11
## 1         1     NA     NA     62     51 2.735127     1.855051 2.944000       16
## 2         0    655    637     64     61 7.919605     5.497127 3.354200       26
## 3         0     NA     NA     43     42 5.326531     2.899533 0.958220       12
## 4         1     NA     NA     NA     NA 2.513399     1.187948 3.175207       17
## 5         0     NA     NA     18     23 4.540000     3.037572 2.638000       16
## 6         1    658    648     43     45 9.957205     6.978968 2.819950       18
##   actmth11 actrla11 actcmp11 evercollrdyhs evercarrdyhs aptakenever lastobsyr
## 1       19       21       18             0            0           0      2012
## 2       24       26       23             1            0           1      2013
## 3       15       10       13             0            0           0      2012
## 4       17       21       18             0            0           0      2011
## 5       16       17       16             0            0           0      2012
## 6       17       20       18             0            0           0      2013
##   transfer dropout stillenrolled gradontime gradcohort diploma yr1psenrany
## 1        0       0             0          1       2012       1           1
## 2        0       0             0          1       2013       1           1
## 3        0       0             0          1       2012       1           0
## 4        0       1             0          0       <NA>       0        <NA>
## 5        0       0             0          1       2012       1           1
## 6        0       0             0          1       2013       1           0
##   yr1psenr2yr yr1psenr4yr yr2psenrany   schid
## 1           0           1           1 1601256
## 2           0           1           0 2533120
## 3           0           0           0 5447464
## 4        <NA>        <NA>        <NA> 2112606
## 5           1           0           1 3104610
## 6           0           0           0 4095680
## ...
## 
## Synthesising methods: 
## ($method)
##        altsch    urbanicity        cohort          male          race 
##       "ctree"       "ctree"       "ctree"       "ctree"       "ctree" 
##     frleverhs     swdeverhs      eleverhs     tageverhs     alteverhs 
##       "ctree"       "ctree"       "ctree"       "ctree"       "ctree" 
##        mthss6        rlass6        mthss8        rlass8      pctabshs 
##       "ctree"       "ctree"       "ctree"       "ctree"       "ctree" 
##  pctexcusedhs         hsgpa      acteng11      actmth11      actrla11 
##       "ctree"       "ctree"       "ctree"       "ctree"       "ctree" 
##      actcmp11 evercollrdyhs  evercarrdyhs   aptakenever     lastobsyr 
##       "ctree"       "ctree"       "ctree"       "ctree"       "ctree" 
##      transfer       dropout stillenrolled    gradontime    gradcohort 
##       "ctree"       "ctree"       "ctree"       "ctree"       "ctree" 
##       diploma   yr1psenrany   yr1psenr2yr   yr1psenr4yr   yr2psenrany 
##       "ctree"       "ctree"       "ctree"       "ctree"       "ctree" 
##         schid 
##      "sample" 
## 
## Order of synthesis: 
## ($visit.sequence)
##         schid        altsch    urbanicity          male          race 
##            36             1             2             4             5 
##        cohort     frleverhs     swdeverhs      eleverhs     tageverhs 
##             3             6             7             8             9 
##     alteverhs        mthss6        rlass6        mthss8        rlass8 
##            10            11            12            13            14 
##      pctabshs  pctexcusedhs   aptakenever     lastobsyr      transfer 
##            15            16            24            25            26 
##       dropout stillenrolled         hsgpa    gradontime    gradcohort 
##            27            28            17            29            30 
##       diploma evercollrdyhs  evercarrdyhs      actmth11      actrla11 
##            31            22            23            19            20 
##      acteng11      actcmp11   yr1psenr2yr   yr1psenr4yr   yr1psenrany 
##            18            21            33            34            32 
##   yr2psenrany 
##            35 
## 
## Matrix of predictors: 
## ($predictor.matrix)
##               altsch urbanicity cohort male race frleverhs swdeverhs eleverhs
## altsch             0          0      0    0    0         0         0        0
## urbanicity         1          0      0    0    0         0         0        0
## cohort             1          1      0    1    1         0         0        0
## male               1          1      0    0    0         0         0        0
## race               1          1      0    1    0         0         0        0
## frleverhs          1          1      1    1    1         0         0        0
## swdeverhs          1          1      1    1    1         1         0        0
## eleverhs           1          1      1    1    1         1         1        0
## tageverhs          1          1      1    1    1         1         1        1
## alteverhs          1          1      1    1    1         1         1        1
## mthss6             1          1      1    1    1         1         1        1
## rlass6             1          1      1    1    1         1         1        1
## mthss8             1          1      1    1    1         1         1        1
## rlass8             1          1      1    1    1         1         1        1
## pctabshs           1          1      1    1    1         1         1        1
## pctexcusedhs       1          1      1    1    1         1         1        1
## hsgpa              1          1      1    1    1         1         1        1
## acteng11           1          1      1    1    1         1         1        1
## actmth11           1          1      1    1    1         1         1        1
## actrla11           1          1      1    1    1         1         1        1
## actcmp11           1          1      1    1    1         1         1        1
## evercollrdyhs      1          1      1    1    1         1         1        1
## evercarrdyhs       1          1      1    1    1         1         1        1
## aptakenever        1          1      1    1    1         1         1        1
## lastobsyr          1          1      1    1    1         1         1        1
## transfer           1          1      1    1    1         1         1        1
## dropout            1          1      1    1    1         1         1        1
## stillenrolled      1          1      1    1    1         1         1        1
## gradontime         1          1      1    1    1         1         1        1
## gradcohort         1          1      1    1    1         1         1        1
## diploma            1          1      1    1    1         1         1        1
## yr1psenrany        1          1      1    1    1         1         1        1
## yr1psenr2yr        1          1      1    1    1         1         1        1
## yr1psenr4yr        1          1      1    1    1         1         1        1
## yr2psenrany        1          1      1    1    1         1         1        1
## schid              0          0      0    0    0         0         0        0
##               tageverhs alteverhs mthss6 rlass6 mthss8 rlass8 pctabshs
## altsch                0         0      0      0      0      0        0
## urbanicity            0         0      0      0      0      0        0
## cohort                0         0      0      0      0      0        0
## male                  0         0      0      0      0      0        0
## race                  0         0      0      0      0      0        0
## frleverhs             0         0      0      0      0      0        0
## swdeverhs             0         0      0      0      0      0        0
## eleverhs              0         0      0      0      0      0        0
## tageverhs             0         0      0      0      0      0        0
## alteverhs             1         0      0      0      0      0        0
## mthss6                1         1      0      0      0      0        0
## rlass6                1         1      1      0      0      0        0
## mthss8                1         1      1      1      0      0        0
## rlass8                1         1      1      1      1      0        0
## pctabshs              1         1      1      1      1      1        0
## pctexcusedhs          1         1      1      1      1      1        1
## hsgpa                 1         1      1      1      1      1        1
## acteng11              1         1      1      1      1      1        1
## actmth11              1         1      1      1      1      1        1
## actrla11              1         1      1      1      1      1        1
## actcmp11              1         1      1      1      1      1        1
## evercollrdyhs         1         1      1      1      1      1        1
## evercarrdyhs          1         1      1      1      1      1        1
## aptakenever           1         1      1      1      1      1        1
## lastobsyr             1         1      1      1      1      1        1
## transfer              1         1      1      1      1      1        1
## dropout               1         1      1      1      1      1        1
## stillenrolled         1         1      1      1      1      1        1
## gradontime            1         1      1      1      1      1        1
## gradcohort            1         1      1      1      1      1        1
## diploma               1         1      1      1      1      1        1
## yr1psenrany           1         1      1      1      1      1        1
## yr1psenr2yr           1         1      1      1      1      1        1
## yr1psenr4yr           1         1      1      1      1      1        1
## yr2psenrany           1         1      1      1      1      1        1
## schid                 0         0      0      0      0      0        0
##               pctexcusedhs hsgpa acteng11 actmth11 actrla11 actcmp11
## altsch                   0     0        0        0        0        0
## urbanicity               0     0        0        0        0        0
## cohort                   0     0        0        0        0        0
## male                     0     0        0        0        0        0
## race                     0     0        0        0        0        0
## frleverhs                0     0        0        0        0        0
## swdeverhs                0     0        0        0        0        0
## eleverhs                 0     0        0        0        0        0
## tageverhs                0     0        0        0        0        0
## alteverhs                0     0        0        0        0        0
## mthss6                   0     0        0        0        0        0
## rlass6                   0     0        0        0        0        0
## mthss8                   0     0        0        0        0        0
## rlass8                   0     0        0        0        0        0
## pctabshs                 0     0        0        0        0        0
## pctexcusedhs             0     0        0        0        0        0
## hsgpa                    1     0        0        0        0        0
## acteng11                 1     1        0        1        1        0
## actmth11                 1     1        0        0        0        0
## actrla11                 1     1        0        1        0        0
## actcmp11                 1     1        1        1        1        0
## evercollrdyhs            1     1        0        0        0        0
## evercarrdyhs             1     1        0        0        0        0
## aptakenever              1     0        0        0        0        0
## lastobsyr                1     0        0        0        0        0
## transfer                 1     0        0        0        0        0
## dropout                  1     0        0        0        0        0
## stillenrolled            1     0        0        0        0        0
## gradontime               1     1        0        0        0        0
## gradcohort               1     1        0        0        0        0
## diploma                  1     1        0        0        0        0
## yr1psenrany              1     1        1        1        1        1
## yr1psenr2yr              1     1        1        1        1        1
## yr1psenr4yr              1     1        1        1        1        1
## yr2psenrany              1     1        1        1        1        1
## schid                    0     0        0        0        0        0
##               evercollrdyhs evercarrdyhs aptakenever lastobsyr transfer dropout
## altsch                    0            0           0         0        0       0
## urbanicity                0            0           0         0        0       0
## cohort                    0            0           0         0        0       0
## male                      0            0           0         0        0       0
## race                      0            0           0         0        0       0
## frleverhs                 0            0           0         0        0       0
## swdeverhs                 0            0           0         0        0       0
## eleverhs                  0            0           0         0        0       0
## tageverhs                 0            0           0         0        0       0
## alteverhs                 0            0           0         0        0       0
## mthss6                    0            0           0         0        0       0
## rlass6                    0            0           0         0        0       0
## mthss8                    0            0           0         0        0       0
## rlass8                    0            0           0         0        0       0
## pctabshs                  0            0           0         0        0       0
## pctexcusedhs              0            0           0         0        0       0
## hsgpa                     0            0           1         1        1       1
## acteng11                  1            1           1         1        1       1
## actmth11                  1            1           1         1        1       1
## actrla11                  1            1           1         1        1       1
## actcmp11                  1            1           1         1        1       1
## evercollrdyhs             0            0           1         1        1       1
## evercarrdyhs              1            0           1         1        1       1
## aptakenever               0            0           0         0        0       0
## lastobsyr                 0            0           1         0        0       0
## transfer                  0            0           1         1        0       0
## dropout                   0            0           1         1        1       0
## stillenrolled             0            0           1         1        1       1
## gradontime                0            0           1         1        1       1
## gradcohort                0            0           1         1        1       1
## diploma                   0            0           1         1        1       1
## yr1psenrany               1            1           1         1        1       1
## yr1psenr2yr               1            1           1         1        1       1
## yr1psenr4yr               1            1           1         1        1       1
## yr2psenrany               1            1           1         1        1       1
## schid                     0            0           0         0        0       0
##               stillenrolled gradontime gradcohort diploma yr1psenrany
## altsch                    0          0          0       0           0
## urbanicity                0          0          0       0           0
## cohort                    0          0          0       0           0
## male                      0          0          0       0           0
## race                      0          0          0       0           0
## frleverhs                 0          0          0       0           0
## swdeverhs                 0          0          0       0           0
## eleverhs                  0          0          0       0           0
## tageverhs                 0          0          0       0           0
## alteverhs                 0          0          0       0           0
## mthss6                    0          0          0       0           0
## rlass6                    0          0          0       0           0
## mthss8                    0          0          0       0           0
## rlass8                    0          0          0       0           0
## pctabshs                  0          0          0       0           0
## pctexcusedhs              0          0          0       0           0
## hsgpa                     1          0          0       0           0
## acteng11                  1          1          1       1           0
## actmth11                  1          1          1       1           0
## actrla11                  1          1          1       1           0
## actcmp11                  1          1          1       1           0
## evercollrdyhs             1          1          1       1           0
## evercarrdyhs              1          1          1       1           0
## aptakenever               0          0          0       0           0
## lastobsyr                 0          0          0       0           0
## transfer                  0          0          0       0           0
## dropout                   0          0          0       0           0
## stillenrolled             0          0          0       0           0
## gradontime                1          0          0       0           0
## gradcohort                1          1          0       0           0
## diploma                   1          1          1       0           0
## yr1psenrany               1          1          1       1           0
## yr1psenr2yr               1          1          1       1           0
## yr1psenr4yr               1          1          1       1           0
## yr2psenrany               1          1          1       1           1
## schid                     0          0          0       0           0
##               yr1psenr2yr yr1psenr4yr yr2psenrany schid
## altsch                  0           0           0     1
## urbanicity              0           0           0     1
## cohort                  0           0           0     1
## male                    0           0           0     1
## race                    0           0           0     1
## frleverhs               0           0           0     1
## swdeverhs               0           0           0     1
## eleverhs                0           0           0     1
## tageverhs               0           0           0     1
## alteverhs               0           0           0     1
## mthss6                  0           0           0     1
## rlass6                  0           0           0     1
## mthss8                  0           0           0     1
## rlass8                  0           0           0     1
## pctabshs                0           0           0     1
## pctexcusedhs            0           0           0     1
## hsgpa                   0           0           0     1
## acteng11                0           0           0     1
## actmth11                0           0           0     1
## actrla11                0           0           0     1
## actcmp11                0           0           0     1
## evercollrdyhs           0           0           0     1
## evercarrdyhs            0           0           0     1
## aptakenever             0           0           0     1
## lastobsyr               0           0           0     1
## transfer                0           0           0     1
## dropout                 0           0           0     1
## stillenrolled           0           0           0     1
## gradontime              0           0           0     1
## gradcohort              0           0           0     1
## diploma                 0           0           0     1
## yr1psenrany             1           1           0     1
## yr1psenr2yr             0           0           0     1
## yr1psenr4yr             1           0           0     1
## yr2psenrany             1           1           0     1
## schid                   0           0           0     0

Further Improved Synthesis

The improved example has quite a bit more information specified. The models parameter will save the parameters for the models fitted to the data to generate the synthetic records. However, the last three parameters: visit.sequence, rules, and rvalues are the most important for controlling how the data are synthesized. visit.sequence allows you to specify the order in which the variables are synthesized. This is particularly important since the first variable is resampled.

Rules and Rule Values

Sometimes variables have logical constraints and we want the synthetic data to impose similar logical constraints on the data we synthesize. The rules and rvalues parameters allow us to specify the logical constraints and values that should result from the logical constraints. In the first example, we have the following logical constraint specified as a rule:

yr1psenrany = "yr1psenr2yr == 1 | yrpsenr4yr == 1"

This means that the variable yr1psenrany is a function of a logical constraint for the yr1psenr2yr and yr1psenr4yr variables. In otherwords, if a student enrolls in the year after completing high school in a 2-year or 4-year post-secondary education program, it will affect the yr1psenrany value. When the logical constraint is true we specify the value that the yr1psenrany variable should take in an argument passed to the rvalues parameter:

yr1psenrany = 1

So, if the synthetic student enrolls in a 2- or 4-year post-secondary institution, we want the synthetic value for the yr1psenrany variable to be equal to 1, indicating that the synthetic student enrolled in a post-secondary program in the year following high school.

Longitudinal Data

In general, I would recommend using other approaches for longitudinal data synthesis.
To use the approach here, you’d need to use a multivariate or wide data structure (e.g., one variable for each measure for each period of time). This increases the error propagation and also requires additional effort to specify the order of the variables to maintain the appropriate temporal relationship.