/***************************************************************************************** * MERGING INDIVIDUAL FILES ACROSS WAVES INTO LONG FORMAT * * To match individual level files across two waves into a long format * * do the following (for more waves add wave specific prefix in the foreach statement) * *****************************************************************************************/ // change current file location cd "UK life satisfaction" // assign global macro to refer to Understanding Society data global ukhls "Understanding Society\UKDA-6614-stata\stata\stata11_se" //loop through each wave foreach w in a b c d e f g h i { // For fewer waves use only the wave prefix of the waves you need to merge // find the wave number local waveno=strpos("abcdefghijklmnopqrstuvwxyz","`w'") // open the individual level file use pidp `w'_* using "$ukhls/ukhls_w`waveno'/`w'_indresp", clear // gets file with waveno in there file with correct letter // drop the wave prefix from all variables rename `w'_* * // create a wave variable gen wave=`waveno' // save one file for each wave save temp`w', replace } // open the file for the first wave (wave a_) use tempa, clear // loop through the remaining waves foreach w in b c d e f g h i { // append the files for the second wave onwards append using temp`w' } // check how many observations are available from each wave tab wave // save the long file save longfile, replace * up to here code from Understanding Society team * merge wave 10 (Covid sample, April 2020) use "ca_indresp_w.dta" , clear gen wave=10 rename ca_* * generate intdaty_dv=2020 save temp2020.dta, replace use longfile, clear append using temp2020 // erase temporary files foreach w in a b c d e f g h i 2020 { erase temp`w'.dta } compress rename pidp pid // need naming from SOEP xtset pid wave rename intdaty_dv syear save longfile, replace use longfile, clear recode wave (1=2010 "≈2010") (2=2011 "≈2011") (3=2012 "≈2012") (4=2013 "≈2013") (5=2014 "≈2014") (6=2015 "≈2015") (7=2016 "≈2016") (8=2017 "≈2017") (9=2018 "≈2018") (10=2020 "4.2020"), gen(approx_syear) // make approx survey year from waves * Generate life satisfaction variable from 0-100: more = more satisfied with life capture drop life_sat* generate life_sat=scghq1_dv if scghq1_dv>=0 // get the min and max of price sum life_sat // create the rescaled life satisfaction gen life_sat_rs= (life_sat - r(min) ) / ( r(max)-r(min) )*100 sum life_sat_rs gen life_sat100 = r(max)-life_sat_rs label var life_sat100 "Well-being 0-100" * weights used, for info see: Understanding Society –UK Household Longitudinal Study: Wave 1 -9, 2009-2018. User Guide. November 2019, page 75 generate xsect_weight=. replace xsect_weight=indinui_xw if xsect_weight==. & indinui_xw>=0 // replace with weights for adult main interview (BHPS, GPS, EMBS but WITHOUT IEMBS if missing) // weights for adult main interview (BHPS, GPS, EMBS and IEMBS, also reference weight for Covid 19 survey) replace xsect_weight=indinub_xw if xsect_weight==. & indinub_xw>=0 // replace with weights for adult main interview (BHPS, GPS, EMBS but WITHOUT IEMBS if missing) replace xsect_weight=indinus_xw if xsect_weight==. & indinus_xw>=0 // replace with weights for adult main interview (without additional sample only for first wave) replace xsect_weight=betaindin_xw if xsect_weight==. & betaindin_xw>=0 // this weights for covid-19 survey bysort pid: egen weight_wave10_generalized = max(betaindin_xw) label var weight_wave10_generalized "Weight Covid survey generalized to all cases of person" bysort pid: egen max_wave=max(wave) generate last_weight_pers_constant_temp=xsect_weight if wave==max_wave bysort pid: egen last_weight_pers_constant=max(last_weight_pers_constant_temp) label var last_weight_pers_constant "last weight for each person (time constant for xtreg)" drop last_weight_pers_constant_temp foreach var of varlist scghqa scghqb scghqc scghqd scghqe scghqf scghqg scghqh scghqi scghqj scghqk scghql { replace `var'=. if `var'<0 codebook `var' recode `var' (1=4 "better") (2=3 "same") (3=2 "worse") (4=1 "much worse"), gen(lin_`var') label var lin_`var' "`:var label `var''" recode `var' (1/2=0 "same") (3/4=1 "more") (else=.), gen(cat_`var') label var cat_`var' "increased problem: `:var label `var''" } replace age=syear-birthy if syear>0 & birthy>0 & age==. // age for years where missing replace age=. if age<16 label var scghqk "ghq: believe worthless" label var cat_scghqa "Problems concentrating" label var cat_scghqb "Sleeplessness" label var cat_scghqc "Not feeling useful" label var cat_scghqd "Inability to decide" label var cat_scghqe "Constantly under strain" label var cat_scghqf "Problems overcoming difficulties" label var cat_scghqg "Problems enjoying day-to-day activities" label var cat_scghqh "Inability to face problems" label var cat_scghqi "Unhappy or depressed" label var cat_scghqj "Losing confidence" label var cat_scghqk "Feeling worthless" label var cat_scghql "General happiness lower" compress save modified_longfile, replace use modified_longfile, replace ******************************************************************************** * Calculations for blog post use modified_longfile, replace drop if wave<6 // only calculate change from last wave *keep if weight_wave10_generalized!=. * FE likelihood to have higher problems eststo clear estimates clear foreach var of varlist scghqg scghqc scghqa scghql scghqb scghqi scghqd scghqh scghqe scghqk scghqj scghqf { eststo `var': xtlogit cat_`var' ib2018.approx_syear c.age##c.age [weight=last_weight_pers_constant], fe coefplot, keep(*.approx_syear) title("`:var label cat_`var''") base xline(1, lstyle(dot)) mlabel format(%2.1g) mlabpos(1) saving(cat_`var', replace) plotregion(lwidth(none)) eform } graph combine /// cat_scghqg.gph /// cat_scghqc.gph /// cat_scghqa.gph /// cat_scghql.gph /// cat_scghqb.gph /// cat_scghqi.gph /// cat_scghqd.gph /// cat_scghqh.gph /// cat_scghqe.gph /// cat_scghqk.gph /// cat_scghqj.gph /// cat_scghqf.gph /// , imargin(vsmall) scale(.95) col(2) ysize(7) * descriptive: share of population with problems foreach var of varlist scghqa scghqb scghqc scghqd scghqe scghqf scghqg scghqh scghqi scghqj scghqk scghql { graph bar cat_`var' [weight=xsect_weight], over(approx_syear, label(alt)) blabel(total, format(%4.2f)) ytitle("`:var label cat_`var''") plotregion(lwidth(none)) saving(cat_`var'_desc, replace) } graph combine /// cat_scghqg_desc.gph /// cat_scghqc_desc.gph /// cat_scghqa_desc.gph /// cat_scghql_desc.gph /// cat_scghqb_desc.gph /// cat_scghqi_desc.gph /// cat_scghqd_desc.gph /// cat_scghqh_desc.gph /// cat_scghqe_desc.gph /// cat_scghqk_desc.gph /// cat_scghqj_desc.gph /// cat_scghqf_desc.gph /// , imargin(small) scale(1) col(3) ysize(7) title("Share of population saying (much) more:") * overall decline in life satisfaction, note: weight_wave10_generalized makes pretty much no difference use modified_longfile, replace eststo: xtreg life_sat100 c.age##c.age ib2018.approx_syear [weight=last_weight_pers_constant], vce(robust) fe margins, at(approx_syear=(2010 2011 2012 2013 2014 2015 2016 2017 2018 2020)) post coefplot, mlabel format(%5.3g) mlabpos(1) vertical ylabel(,angle(0)) title(Well-being on scale from 0-100) plotregion(lwidth(none)) xlabel(1 "2010" 2 "2011" 3 "2012" 4 "2013" 5 "2014" 6 "2015" 7 "2016" 8 "2017" 9 "2018" 10 "04.2020") saving(long_term_general_change.gph, replace) title("Long term well being change and Covid-19 pandemic" "(net of general age-related changes)") recast(connect) * compare effects to effect of being unemployed and receiving unemployment-related benefits capture drop unempl_benefits generate unempl_benefits = btype1 if btype1 >=0 // receiving unemployment-related benefits, or national insurance credits? replace unempl_benefits=0 if employ ==1 & unempl_benefits!=. // do not count as unemployed if in paid employment xtreg life_sat100 unempl_benefits, fe vce(robust) *different effects within the pandemic reg life_sat100 i.hassymp if hassymp>0, vce(robust) // effect of currently having symptoms that could be corona virus reg life_sat100 i.testresult if testresult>0, vce(robust) // effect of currently having symptoms that could be corona virus * different subgroups use modified_longfile, replace * generate variables for different subgroups generate general_population=1 * Monthly earnings 04.2020 capture drop monthly_earnings_042020 generate monthly_earnings_042020= netpay_answer if netpay_answer>=0 replace monthly_earnings_042020=. if netpay_period <0 |netpay_period==5 // missing if no earning period specified or top-coded replace monthly_earnings_042020=monthly_earnings_042020/7*30 if netpay_period ==1 // change weekly to monthly earnings replace monthly_earnings_042020=monthly_earnings_042020/14*30 if netpay_period ==2 // change pay every two weeks to monthly earnings replace monthly_earnings_042020=monthly_earnings_042020/12 if netpay_period ==4 // change pay every year to monthly earnings sum monthly_earnings_042020 [weight=betaindin_xw], d // sum up average wage 2020 recode monthly_earnings_042020 (0/1450=0 "poorer 50%") (1451/max=1 "richer 50 percent"), gen(upper_50_percent_2020_temp) * Monthly earnings 01.2020 capture drop monthly_earnings_012020 generate monthly_earnings_012020= blpay_answer if blpay_answer>=0 replace monthly_earnings_012020=. if blpay_answer <0 | blpay_answer==5 // missing if no earning period specified or top-coded replace monthly_earnings_012020=monthly_earnings_012020/7*30 if blpay_answer ==1 // change weekly to monthly earnings replace monthly_earnings_012020=monthly_earnings_012020/14*30 if blpay_answer ==2 // change pay every two weeks to monthly earnings replace monthly_earnings_012020=monthly_earnings_012020/12 if blpay_answer ==4 // change pay every year to monthly earnings generate earnings_lost_gained_04_since_01=monthly_earnings_042020-monthly_earnings_012020 label var earnings_lost_gained_04_since_01 "Earnings lost/gained from 01.2020 to 04.2020" recode earnings_lost_gained_04_since_01 (min/-100=1) (1/max=0), gen(lost_inc_since_jan) bysort pid: egen lost_inc_since_jan_time_const=max(lost_inc_since_jan) // those who lost income since january bysort pid: egen upper_50_percent=max(upper_50_percent_2020_temp) recode upper_50_percent (1=0) (0=1), generate(lower_50_percent) bysort pid: egen high_covid_risk =max(hrisk_dv) bysort pid: egen vhigh_covid_risk =max(vhrisk_dv) generate self_employed=0 replace self_employed=1 if jbsemp==2 // variable before 2020 replace self_employed=1 if semp==2 // 2020-variable bysort pid: egen living_with_partner=max(couple) recode age (0/70=0) (70/max=1), gen(old) recode age (0/30=1) (30/max=0), gen(young) recode sex (2=1) (1=0) (else=.), generate(women) recode child1 (2=0) (1=1) (else=.), gen(school_age_child2020_temp) bysort pid: egen with_school_age_children =max(school_age_child2020_temp) * general indicator for different groups eststo clear estimates clear drop if approx_syear<2014 foreach var of varlist general_population self_employed living_with_partner old young high_covid_risk lower_50_percent women with_school_age_children lost_inc_since_jan_time_const { eststo: xtreg life_sat100 c.age##c.age ib2018.approx_syear [weight=last_weight_pers_constant] if `var'==1, vce(robust) fe *margins, at(approx_syear=(2015 2016 2017 2018 2020)) post coefplot, drop(_cons *age*) mlabel format(%5.2g) mlabpos(1) vertical base ylabel(,angle(0)) title(Well-being on scale from 0-100) plotregion(lwidth(none)) saving(`var',replace) title(`var') xlabel(,alternate) ylabel(-10(2)8) yline(0, lstyle(dot)) recast(connect) // coefplot regression from margins } graph combine general_population.gph young.gph self_employed.gph women.gph with_school_age_children.gph lower_50_percent.gph living_with_partner.gph old.gph high_covid_risk.gph , imargin(small) scale(.75) title("Well-being change and Covid-19 pandemic for subgroups" "(net of general age-related changes)") ysize(7) * different subgroups different indicators drop if wave<6 // only calculate change from last couple of waves * FE likelihood to have higher problems eststo clear estimates clear foreach group of varlist self_employed living_with_partner old young high_covid_risk lower_50_percent women with_school_age_children { foreach var of varlist scghqa scghqb scghqc scghqd scghqe scghqf scghqg scghqh scghqi scghqj scghqk scghql { preserve keep if `group'==1 // only keep if part of group drop if `var'==. // only keep if var exists eststo `var': xtlogit cat_`var' ib2018.approx_syear c.age##c.age [weight=last_weight_pers_constant], fe or estimates store m`var' coefplot, keep(*.approx_syear) title("`:var label cat_`var''") base xline(1, lstyle(dot)) mlabel format(%2.1g) mlabpos(1) saving(`group'_cat_`var', replace) plotregion(lwidth(none)) eform restore } } foreach group in self_employed living_with_partner old young high_covid_risk lower_50_percent women with_school_age_children { graph combine /// `group'_cat_scghqg.gph /// `group'_cat_scghqc.gph /// `group'_cat_scghqa.gph /// `group'_cat_scghql.gph /// `group'_cat_scghqb.gph /// `group'_cat_scghqi.gph /// `group'_cat_scghqd.gph /// `group'_cat_scghqh.gph /// `group'_cat_scghqe.gph /// `group'_cat_scghqk.gph /// `group'_cat_scghqj.gph /// `group'_cat_scghqf.gph /// , imargin(vsmall) scale(.95) col(2) ysize(7) saving(`group'_group.gph, replace) title(`group') } * overall life satisfaction for different groups