* Set working directory (differs if in Windows vs Mac) *

	* For a Mac (Your directory will differ, remember) *
	
	cd mydir/myfolder
	pwd
	
	* For Windows (Again, use your own directory) *
	
	cd "c:mydir\myfolder"
	pwd
	
	* If you're having trouble... *
	
	help cd

* Setting the seed allows us to replicate sampling procedures *

set seed 123456789

* Create 67 observations, 1 for each of AL's 67 counties *

set obs 67

* Create an index numbering each county from 1 to 67 *

gen county_index=_n

* Take a random sample of 10 counties *

sample 10, count

* Create string variable that labels which counties remain by cross-referening AL's license plate #'s *

tab county_index

gen county_name="."
replace county_name="Montgomery" if county_index==3
replace county_name="Autauga" if county_index==4
replace county_name="Bibb" if county_index==7
replace county_name="Clay" if county_index==17
replace county_name="Covington" if county_index==23
replace county_name="Dallas" if county_index==27
replace county_name="Fayette" if county_index==32
replace county_name="Henry" if county_index==37
replace county_name="Marshall" if county_index==50
replace county_name="Randolph" if county_index==56

* Indicate the year for each observation *

gen year=2018

* Add demographic data for % of each county that is black (https://www.census.gov/quickfacts/fact/table/US/PST045219) *

gen black=.
replace black=59.3 if county_name=="Montgomery"
replace black=20.1 if county_name=="Autauga"
replace black=21.3 if county_name=="Bibb"
replace black=13.8 if county_name=="Clay"
replace black=12.5 if county_name=="Covington"
replace black=70.7 if county_name=="Dallas"
replace black=11.7 if county_name=="Fayette"
replace black=25.9 if county_name=="Henry"
replace black=3.2 if county_name=="Marshall"
replace black=19.2 if county_name=="Randolph"

* Create variable for % of county voting GOP for governor in 2018 election (https://www.sos.alabama.gov/sites/default/files/voter-pdfs/2018/2018-Official-General-Election-Results-Certified-2018-11-27.pdf) *

gen pct_gop_gov=.
replace pct_gop_gov=100*(28491/(48722+28491+62)) if county_name=="Montgomery"
replace pct_gop_gov=100*(13994/(13994+5734+21)) if county_name=="Autauga"
replace pct_gop_gov=100*(5082/(5082+1799+15)) if county_name=="Bibb"
replace pct_gop_gov=100*(4164/(1241+4164+9)) if county_name=="Clay"
replace pct_gop_gov=100*(9852/(9852+2168+22)) if county_name=="Covington"
replace pct_gop_gov=100*(4917/(4917+10295+8)) if county_name=="Dallas"
replace pct_gop_gov=100*(5455/(5455+2025+9)) if county_name=="Fayette"
replace pct_gop_gov=100*(4655/(4655+2045+14)) if county_name=="Henry"
replace pct_gop_gov=100*(22031/(22031+5193+70)) if county_name=="Marshall"
replace pct_gop_gov=100*(5729/(5729+1859+12)) if county_name=="Randolph"

* Get a preview of the first 5 lines of data *

list in 1/5

* Sort the data, low-to-high, by support for GOP and view result *

sort pct_gop_gov
browse

* Now make County name the first variable in the data matrix, followed by % black *

order county_name black

* Create dichotomous variable taking value of "1" if county is majority black, "0" else, then tab results *

gen majority_black=.
replace majority_black=1 if black>=50
replace majority_black=0 if black<50
tab majority_black

* Report counties that are majority black *

tab county_name if majority_black!=0

* Alternatively... *

tab county_name if majority_black==1

* Create value labels for the previous variable just created *

label define race_label 0 "Majority White" 1 "Majority Black"
label values majority_black race_label

tab majority_black

* Now create a new variable and then drop it * 

gen new_var=10
drop new_var

* Rename a variable * 

rename county_index license_number

* Now change it back *

rename license_number county_index

* Using grouping commands, create an index of counties by their majority black status *

bysort majority_black: gen race_index=_n

* Using grouping commands, create a variable that counts the number of counties in each category of majority_black *

bysort majority_black: egen race_count=count(majority_black)

* Save your current dataset in your working directory and then clear memory *

save unit2data1.dta, replace
clear

* Create a new dataset that contains the county seat of each of the 10 previously sampled counties *

set seed 123456789

set obs 67

* Create an index numbering each county from 1 to 67 *

gen county_index=_n

* Take a random sample of 10 counties *

sample 10, count

* Create string variable that labels which counties remain by cross-referening AL's license plate #'s *

tab county_index

gen county_name="."
replace county_name="Montgomery" if county_index==3
replace county_name="Autauga" if county_index==4
replace county_name="Bibb" if county_index==7
replace county_name="Clay" if county_index==17
replace county_name="Covington" if county_index==23
replace county_name="Dallas" if county_index==27
replace county_name="Fayette" if county_index==32
replace county_name="Henry" if county_index==37
replace county_name="Marshall" if county_index==50
replace county_name="Randolph" if county_index==56

gen county_seat="."
replace county_seat="Montgomery" if county_index==3
replace county_seat="Prattville" if county_index==4
replace county_seat="Centreville" if county_index==7
replace county_seat="Ashland" if county_index==17
replace county_seat="Andalusia" if county_index==23
replace county_seat="Selma" if county_index==27
replace county_seat="Fayette" if county_index==32
replace county_seat="Abbeville" if county_index==37
replace county_seat="Guntersville" if county_index==50
replace county_seat="Wedowee" if county_index==56

* save this dataset * 

save unit2data2.dta, replace

* Merge this current dataset with the previous one using county_index as identifying variable *

merge 1:1 county_index using "unit2data1.dta"
browse
drop _merge

save unit2data2.dta, replace

* Let's add in data for the 2014 governor's election and drop unneeded variables (https://www.sos.alabama.gov/sites/default/files/voter-pdfs/2014/2014GeneralResults-WithoutWriteIn.pdf) *

expand 2
drop race_index race_count majority_black black
gen index=_n
replace year=2014 if index>10
replace pct_gop_gov=. if index>10
drop index

replace pct_gop_gov=100*(23811/(33366+23811+125)) if county_name=="Montgomery" & year==2014
replace pct_gop_gov=100*(9449/(9449+3646+27)) if county_name=="Autauga" & year==2014
replace pct_gop_gov=100*(3525/(1368+3525+7)) if county_name=="Bibb" & year==2014
replace pct_gop_gov=100*(3214/(3214+1223+3)) if county_name=="Clay" & year==2014
replace pct_gop_gov=100*(6155/(6155+1777+5)) if county_name=="Covington" & year==2014
replace pct_gop_gov=100*(4116/(4116+8456+18)) if county_name=="Dallas" & year==2014
replace pct_gop_gov=100*(4849/(4849+1966+9)) if county_name=="Fayette" & year==2014
replace pct_gop_gov=100*(3333/(3333+1780+3)) if county_name=="Henry" & year==2014
replace pct_gop_gov=100*(16523/(16523+3345+37)) if county_name=="Marshall" & year==2014
replace pct_gop_gov=100*(4076/(4076+1917+11)) if county_name=="Randolph" & year==2014

* Reshape the data into "wide" format *

help reshape
reshape wide pct_gop_gov, i(county_index) j(year)
browse

* Now reshape long (note how the command changes since Stata knows how to reshape now) *

reshape long

* Merge back in the other dataset and save (note new notation in command) *

merge m:1 county_index using "unit2data1.dta"

save unit2data2.dta, replace




































