// midata.do // Creates an example data set used to illustrate multiple imputation // Written by Russell Dimond for the SSCC 8/2012 clear all set more off set seed 91238 set obs 3000 gen byte female=(runiform()>.5) gen byte race=runiform()*3 gen byte urban=(runiform()>.3+.1*(race==2)) label define edu 1 "< High School" 2 "High School" 3 "Bachelors" 4 "Advanced Degree" gen byte edu=1 forval i=1/3 { replace edu=edu+1 if runiform()<.3+.1*race+.2*female-.3*(!urban & female) } label values edu edu //drop rawedu gen exp=20-edu-5*female+invnorm(runiform())*10 replace exp=0 if exp<0 // The following command also gives the 'right' answers for the regressions. gen wage=30000+(edu==2)*10000+(edu==3)*30000+(edu==4)*50000+urban*5000+exp*(2000-500*female)-female*edu*5000-(race==1)*3000-(race==2)*5000+invnorm(runiform())*30000 replace wage=0 if wage<0 // regress wage on other variables with complete data reg wage i.race urban female##(i.edu c.exp) // now convert 10% of the values of each variable (except female) to missing foreach var of varlist race-wage { replace `var'=. if runiform()<(.1) } // complete cases analysis reg wage i.race urban female##(i.edu c.exp) save midata,replace