/* c:\lotteries_b\1997\programs\sampling_merging.do */ /* In this program, we generate a "choice set" for each individual that consists of his/her actual choice and 9 alternatives randomly drawn from the collection of alternatives. */ /* First, some preliminaries that are common across the programs at which we are looking.*/ clear set memory 80m set more off /* Second, we read in the dataset created in "cleaning_generating_summarizing.do" and keep only the variables necessary for the random draws. Recall, the "merge" command can be used to merge files with common variables; we will use this later to restore the variables that we discard in the second command below. For simplicity, we consider only rifle hunts, so we drop about 11,000 observations, leaving 18,708 for analysis. */ use c:\lotteries_b\1997\intermediate\elk_res_1997_4.dta keep choice_1 orig_zip lottery obsno drop if obsno>18708 /* Third, we identify a unique alternative within each set of chosen alternatives. We do this because we don't want any alternative to appear more than once in the set that we will randomly draw from */ generate unique=0 if (lottery[_n]==lottery[_n+1]) replace unique=1 if unique==. save c:\lotteries_b\1997\intermediate\elk_res_1997_5.dta, replace clear /* Fourth, we now do the random draws using the "sample" command. The process goes as follows: From the master dataset, we start with the first individual (obsno=1), drop all duplicate destinations (leaving a dataset with 136 destinations), "pull out" the destination that individual 1 chose, and then randomly draw without replacement 9 alternatives from the remaining set of 135 alternatives. The individual's actual choice is then included with the draws, leaving a dataset with 10 destinations. This dataset, referred to as individual 1's "choice set" is saved, and we repeat the process for the remaining 18707 individuals. The critical command that allows the process to be automatically repeated is the "for" command. Note that "sample" is nested within the "for" command. "For" won't repeat 18708 times, so we split the draws into chunks of 1000 individuals. The commands for the first 1000 individuals is below; I have excluded below the code for individuals 1001 to 18708. */ /* Individuals 1 to 1000 */ #delimit ; for num 1/1000: use "c:\lotteries_b\1997\intermediate\elk_res_1997_5.dta" \ drop if unique==0 & obsno~=X \ drop if choice_1[_n-1]==choice_1[_n] \ sample 6.5 if (obsnoX) \ generate id=X \ generate choice= _n \ replace choice=0 if choice<10 \ replace choice=1 if choice>0 \ save "c:\lotteries_b\1997\intermediate\alt_X.dta", replace \ clear ; #delimit cr clear /* Fifth, we combine the 1000 files (each containing a choice set of 10 alternatives) using the "append" command and save the resulting file. The 1000 files are then deleted using the "erase" command. */ use c:\lotteries\1997\intermediate\alt_1.dta for num 2/1000: append using "c:\lotteries\1997\intermediate\alt_X.dta" for num 1/1000: erase "c:\lotteries\1997\intermediate\alt_X.dta" generate chunks=group(1000) drop unique orig_zip save c:\lotteries_b\1997\intermediate\sorted_draws0.dta, replace clear /* Sixth, although the draws commmands for individuals 1001 to 18708 are excluded here, the code below is needed to merge the 18 files that each contain 1000 individuals. This file will contain 18,708 x 10 = 187,080 observations. */ use c:\lotteries\1997\intermediate\sorted_draws0.dta for num 1/18: append using "c:\lotteries_b\1997\intermediate\sorted_drawsX.dta" drop id generate id=group(18708) save c:\lotteries_b\1997\intermediate\sorted_draws.dta, replace for num 0/18: erase "c:\lotteries_b\1997\intermediate\sorted_drawsX.dta" /* So, the bulk of the "work" is complete; all that will remain before analysis will be to: i) merge the file with the original file containing the individual characteristics (age and gender) and the outcomes of the lotteries; ii) merge the file with the file containing mileage estimates between zipcodes; iii) merge the file with a file containinng the destination characteristics; */ /* This collection of steps pertains to comment "i" above */ use c:\lotteries_b\1997\intermediate\elk_res_1997_5.dta keep obsno orig_zip rename obsno id sort id save c:\lotteries_b\1997\intermediate\elk_res_1997_6.dta, replace clear use c:\lotteries_b\1997\intermediate\sorted_draws.dta sort id merge id using c:\lotteries_b\1997\intermediate\elk_res_1997_6.dta, nokeep drop _merge chunks save c:\lotteries_b\1997\intermediate\sorted_draws.dta, replace clear /* This merges the drawn/purchased data with the random draws */ use c:\lotteries_b\1997\intermediate\elk_res_1997_3.dta keep choice_1 drawn purchase generate obsno=group(_N) drop if obsno>18708 keep obsno choice_1 drawn purchase rename obsno id sort id choice_1 save c:\lotteries_b\1997\intermediate\elk_res_1997_5.dta, replace clear use c:\lotteries_b\1997\intermediate\sorted_draws.dta sort id choice_1 merge id choice_1 using c:\lotteries_b\1997\intermediate\elk_res_1997_5.dta, nokeep save c:\lotteries_b\1997\intermediate\sorted_draws.dta, replace /* This collection of steps pertains to comment "ii" above */ use c:\lotteries_b\shared\elk_miles.dta rename huntcode choice_1 sort choice_1 orig_zip save c:\lotteries_b\1997\intermediate\elk_miles_2.dta, replace clear use c:\lotteries_b\1997\intermediate\sorted_draws.dta sort choice_1 orig_zip merge choice_1 orig_zip using c:\lotteries_b\1997\intermediate\elk_miles_2.dta, nokeep drop _merge rename miles miles_1 sort id obsno save c:\lotteries_b\1997\intermediate\sorted_draws.dta, replace clear /* This collection of steps pertains to comment "iii" above */ use c:\lotteries_b\1997\data\hunts_97.dta sort huntcode rename huntcode choice_1 save c:\lotteries_b\1997\intermediate\hunts_1997_2.dta, replace use c:\lotteries_b\1997\intermediate\sorted_draws.dta sort choice_1 merge choice_1 using c:\lotteries_b\1997\intermediate\hunts_1997_2.dta, nokeep sort id obsno drop _merge save c:\lotteries_b\1997\intermediate\sorted_draws, replace clear use c:\lotteries_b\1997\intermediate\elk_res_1997_3.dta drop choice_2-resident phone-purchase generate obsno=group(_N) drop if obsno>18708 keep obsno gender age rename obsno id sort id save c:\lotteries_b\1997\intermediate\elk_res_1997_5.dta, replace clear use c:\lotteries_b\1997\intermediate\sorted_draws.dta sort id merge id using c:\lotteries_b\1997\intermediate\elk_res_1997_5.dta, nokeep drop _merge save c:\lotteries_b\1997\intermediate\10_sorted_draws.dta, replace /* Some additional variables are generated */ generate outcome=1 if choice_1==drawn replace outcome=0 if outcome==. generate outcome2=1 if (purchase~=. & purchase~=0) & choice_1~=drawn replace outcome2=0 if outcome2==. generate male=1 if gender=="M" replace male=0 if male==. generate constant=1 generate prob_prc=licenses/apps95 replace prob_prc=1 if prob_prc>1 generate prob_tru=licenses/apps96 replace prob_tru=1 if prob_tru>1 generate harvest=charv/chunts replace harvest=1 if harvest>1 replace miles=925 if miles==. generate age_sqd=age*age drop gender _merge drawn purchase generate tcost=0.526*miles_1 generate site1 = (site_num==1) generate site2 = (site_num==2) generate site3 = (site_num==3) generate site4 = (site_num==4) generate site5 = (site_num==5) generate site6 = (site_num==6) generate site7 = (site_num==7) generate site8 = (site_num==8) generate site9 = (site_num==9) generate site10 = (site_num==10) /* Perceived Probabilities */ generate pp_tcost=tcost*prob_prc generate pp_prob=prob_prc*prob_prc generate pp_lics=licenses*prob_prc generate pp_harv=harvest*prob_prc generate pp_ne=ne*prob_prc generate pp_nw=nw*prob_prc generate pp_sw=sw*prob_prc generate pp_se=se*prob_prc generate pp_qual=quality*prob_prc generate pp_bull=bull*prob_prc generate pp_open=opening*prob_prc generate pp_late=late*prob_prc generate pp_holid=holiday*prob_prc generate pp_age=age*prob_prc generate pp_male=male*prob_prc /* Actual Probabilities */ generate pt_tcost=tcost*prob_tru generate pt_prob=prob_tru*prob_tru generate pt_lics=licenses*prob_tru generate pt_harv=harvest*prob_tru generate pt_ne=ne*prob_tru generate pt_nw=nw*prob_tru generate pt_sw=sw*prob_tru generate pt_se=se*prob_tru generate pt_qual=quality*prob_tru generate pt_bull=bull*prob_tru generate pt_open=opening*prob_tru generate pt_late=late*prob_tru generate pt_holid=holiday*prob_tru generate pt_age=age*prob_tru generate pt_male=male*prob_tru save c:\lotteries\1997\intermediate\10_sorted_draws_B.dta, replace gsort id -choice drop orig_zip marquez mba rifle bow muzzle handicap save c:\lotteries_b\1997\intermediate\10_sorted_draws.dta, replace drop choice_1 /* And as a final step, we save the file as a text file which can then be imported by just about any statistical software package. Note that the number of rows in the dataset exceed the number of rows that Excel can accomodate, so only about 1/3 of the data can be viewed with this package. */ outsheet using c:\lotteries_b\1997\data\10_sorted_draws.txt, nonames