clear all set more off capture log close log using miex.log, replace use midata // test missingness of data unab numvars: * unab missvars: urban-wage misstable sum, gen(miss_) foreach var of local missvars { local covars: list numvars - var display _newline(3) "logit missingness of `var' on `covars'" logit miss_`var' `covars' foreach nvar of local covars { display _newline(3) "ttest of `nvar' by missingness of `var'" ttest `nvar', by(miss_`var') } } // set up trial imputation command just to get the individual regression commands mi set wide mi register imputed race-wage mi register regular female mi impute chained (logit) urban (mlogit) race (ologit) edu (regress) exp wage = i.female, dryrun // test imputation model for race mlogit race exp wage i.edu i.urban i.female // test for misspecification by adding interactions mlogit race (c.exp c.wage i.edu)##(i.female i.urban) // test imputation model for exp regress exp i.race wage i.edu i.urban i.female // test for misspecification with rvfplot // constraint line indicates exp>=0 rvfplot, ylabel(-40 -20 0 20 40) graph export exp1.png, replace predict exphat predict expres, res gen y=-exphat scatter expres exphat || line y exphat, legend(order(2 "Exp>=0 Constraint")) graph export exp2.png, replace drop expres exphat y //test for misspecification by adding interactions regress exp (i.race i.urban i.female)##(c.wage i.edu) // test imputation model for wage regress wage i.race exp i.edu i.urban i.female // test for misspecification with rvfplot // constraint line indicates wage>=0 rvfplot graph export wage.png, replace // test interactions regress wage (i.race i.urban i.female)##(c.exp i.edu) // test imputation model for edu ologit edu i.race exp wage i.urban i.female // test for misspecification by adding interactions ologit edu (i.race i.urban i.female)##(c.exp c.wage) // test imputation model for urban logit urban i.race exp wage i.edu i.female // test for misspecification by adding interactions logit urban (i.race i.female)##(c.exp c.wage i.edu) // refine models after reviewing results mi impute chained (logit) urban (mlogit) race (ologit) edu (pmm) exp wage, dryrun by(female) // test new models for convergence bysort female: reg exp i.urban i.race wage i.edu by female: logit urban exp i.race wage i.edu by female: mlogit race exp i.urban wage i.edu by female: reg wage exp i.urban i.race i.edu by female: ologit edu exp i.urban i.race wage // for real work you would explore misspecification of refined models as well // test convergence of imputation process // since by() and savetrace() don't get along right now, we'll remove by() then throw away these imputations and do them with by() but no savetrace(). preserve mi impute chained (logit) urban (mlogit) race (ologit) edu (pmm) exp wage = female, add(5) rseed(88) savetrace(extrace, replace) burnin(100) use extrace, replace reshape wide *mean *sd, i(iter) j(m) tsset iter tsline exp_mean*, title("Mean of Imputed Values of Experience") note("Each line is for one imputation") legend(off) graph export conv1.png, replace tsline exp_sd*, title("Standard Deviation of Imputed Values of Experience") note("Each line is for one imputation") legend(off) graph export conv2.png, replace restore // "real" imputation mi impute chained (logit) urban (mlogit) race (ologit) edu (pmm) exp wage = i.female, add(5) rseed(88) by(female) // check if imputed values match observed values foreach var of varlist urban race edu { mi xeq 0: tab `var' mi xeq 1/5: tab `var' if miss_`var' } foreach var of varlist wage exp { mi xeq 0: sum `var' mi xeq 1/5: sum `var' if miss_`var' mi xeq 0: kdensity `var'; graph export chk`var'0.png, replace forval i=1/5 { mi xeq `i': kdensity `var' if miss_`var'; graph export chk`var'`i'.png, replace } } save mi1,replace log close