/*=====================================================================================

 cex.do:

   Estimates taxes for households (i.e., "consumer units (CU)") in the Consumer 
   Expenditure Survey.

		
        Author: Lorenz Kueng
		
        First version: Nov 2010
        This  version: Jul 2014

=======================================================================================*/


*------- START OF PROGRAM --------------
local filename = "CEX_TAXSIM"
capture: log close `filename'
log using "$datadir/log-files/`filename'.log", replace name(`filename')




    *===================
    * MTAB variables
    *===================


*-------------------
* Estimate tax year
*-------------------
* Allocate taxes to the current year if interviews 2 or 5 occur after July (i.e. reference month after June),
*   and to the previous year if they occur before August (reference month before July).


use newid INTNO REF_YR REF_MO using "$datadir/stata/Interview/mtab/mtab_aggregate.dta", clear

generate spending_month = ym(REF_YR, REF_MO)
format   spending_month %tm

generate temp = spending_month   if INTNO ==2 | INTNO==5
replace  temp = spending_month-3 if INTNO ==3
replace  temp = spending_month-6 if INTNO ==4
format   temp %tm

gsort newid INTNO -spending_month
duplicates drop newid INTNO, force

* select first and last interview, i.e. income in interviews 2 (or 3,4) and 5 (if available) 
bysort newid: egen INTNOmin = min(INTNO)
keep if INTNO==INTNOmin | INTNO==5 
drop INTNOmin 
tab  INTNO

generate temp2     = dofm(temp)
generate tax_year  = yofd(temp2)
generate tax_month = month(temp2)
drop temp*

generate year = tax_year    if tax_month>=7 // estimated tax year for TAXSIM
replace  year = tax_year -1 if tax_month<7

keep newid INTNO year
tempfile tax_year
sort newid INTNO
save `tax_year', replace



*-----------------------------------------------------
* Estimate annual expenditures for some TAXSIM inputs
*-----------------------------------------------------

use newid INTNO proptax_annual rent_aspay food_aspay rent_paid occupexp int_taxded child_care adult_care health_* using "$datadir/stata/Interview/mtab/mtab_aggregate.dta", clear 
/* Note:
	proptax_annual = annualized property taxes (UCC=220211, 220212)
	rent_aspay     = rent received as pay (UCC=800710)
	food_aspay     = meals received as pay (UCC=800700)
	rent_paid      = rent (UCC=210110)
	occupexp       = occupational expenses (UCC=690310,690114,690116,900002 and 1/4th of 900001)
	int_taxded     = tax deductible interest: mortgages, home equity loans, home equity lines of credit, prepayment penalty charges (UCC=220311-220314,880110,880210,880310,220321,220322)
	child_care     = child care expenses (UCC=340210-340212,670310)
	adult_care     = adult day care centres and care for elderly, invalids, handicapped, etc. (UCC=340910,340906)
	health_*       = health care expenses (UCCs 580110	580111	580113	580112	580114	580210	580310	580311	580312	580901	580904	580906	580907	580400	580902	580903	580905	540000	570901	570903	560110	560210	560310	560320	560330	560400	560900	570230	570240	570110	570210	570111	570220	550110	550320	550330	550340)
*/

* drop observations with more or less than 3 months of expenditures per interview
duplicates tag newid INTNO, g(tag)
replace tag = tag+1
tab tag
keep if tag==3
drop tag INTNO

gen numbmonth = 1 // number of months CU expenditures are observed (to adjust annual expenditures below for sample attrition)
gen healthexp = health_dur + health_serv + health_insur
gen AAA=.
gen zzz=.
aorder
order newid
collapse (sum) AAA-zzz, by(newid)
foreach var in rent_aspay food_aspay rent_paid occupexp int_taxded child_care adult_care healthexp { 
  replace `var' = `var' * numbmonth/12 // estimated annual expenditures, taking into account that not all CUs observed for 12 months
}
tab  numbmonth
drop numbmonth AAA zzz

tempfile mtab
sort newid
save `mtab', replace




 

    *===================
    * MEMB variables
    *===================

use salaryxIMP nonfarmxIMP farmincxIMP socrrxIMP ssixIMP anfedtx ansltx cu_code AGE in_coll newid INTNO MEMBNO using "$datadir/stata/Interview/memb/memb.dta", clear // Note: *IMP variables contain some imputations following Jonathan Fisher, David S. Johnson and Timothy M. Smeeding "Inequality of Income and Consumption: Measuring the Trends in Inequality from 1985-2010 for the Same Individuals"

* Add tax year
preserve 
	use `tax_year', clear
	keep if INTNO==5
	tempfile tax_year5
	save `tax_year5'
	duplicates report newid
	
	use `tax_year', clear
	drop if INTNO==5
	tempfile tax_year2
	save `tax_year2'
	duplicates report newid
	tab INTNO
restore
merge m:1 newid INTNO using `tax_year5', nogen
merge m:1 newid       using `tax_year2', nogen update


* Set missing values of in_coll to 3, "not at all enrolled"
replace in_coll = 3 if in_coll==.

* Construct indicator>0 if member paid taxes (or received refunds) to construct TAXSIM variable agex below
generate tax1 = anfedtx ==.| anfedtx <0 // replace with an arbitrary positive number to indicate that member paid taxes (either federal or state and local).	
generate tax2 = ansltx  ==.| ansltx  <0
generate tax  = tax1 + tax2 
drop anfedtx* ansltx* tax1 tax2

* Approximation of gross income
gen grossinc = salaryx + nonfarmx + farmincx + socrrx 
drop if grossinc ==.

* Construct age of reference person and spouse
* (Note: We first need to generate a unique reference person since CU_CODE=1 and CU_CODE==2 are not unique)
preserve
	* Problem 1: CU_CODE=1 (and CU_CODE=2) is not unique per household, i.e. there can be more than one reference person (spouse) per CU
	sort newid MEMBNO cu_code
	duplicates drop   newid MEMBNO, force // keep one observations per member
	generate cu1 = 1 if cu_code==1 // identify reference persons
	replace  cu1 = 0 if cu_code!=1
	generate cu2 = 1 if cu_code==2 // identify spouses
	replace  cu2 = 0 if cu_code!=2
	duplicates report newid cu1 // report multiple reference persons per CU
	duplicates report newid cu2 // report multiple spouses per CU

	* Solution: Use older member if CU_CODE=1 (or CU_CODE=2) is not unique by newid.
	gsort newid cu_code -AGE 
	duplicates drop newid cu_code, force
	duplicates report newid cu1 // report multiple reference persons per CU
	duplicates report newid cu2 // report multiple spouses per CU
	
	* Problem 2: A few households do not have a spouse (CU_CODE=2) but still might file jointly (see 2nd age test below)
	foreach i of numlist 1 2 6 7 5 8 9 { // 1: ref person, 2: spouse, 6: sibling, 7: parent, 5: in-law, 8: other related, 9: unrelated
	 generate age`i' = AGE if cu_code==`i'
	 replace  age`i' = 0   if age`i'==.
	}
	keep newid age*
	collapse (max) age*, by(newid)
	foreach i of numlist 6 7 5 8 9 {
	 replace age2 = age`i' if age2==0 & age`i'!=0
	}
	
	keep newid age1 age2 // age1 = age of reference person; age2 = age of spouse (if any)

	tempfile age_ref_spouse
	sort newid
	save `age_ref_spouse', replace
restore
merge m:1 newid using `age_ref_spouse', nogen


* Qualified Dependent Exemption AND Dependent Care Tax Credit. Source: IRS Publication 501: "Exemptions, Standard Deduction, and Filing Information"

* Qualified children:
generate qualchld = (cu_code==3 | cu_code==4 | cu_code==6 | cu_code==7) /// relationship test: 3= child, 4= grandchild, 6= sibling, 7= parent			
                  & (AGE<age1 | AGE<age2)                               /// 1st age test     : must be younger than either head or spouse to qualify as a child	
                  & (AGE<19 | (AGE<24 & in_coll==1) )                   /// 2nd age test     : must be either under age 19 or under 24 and a college student
                  & ( (grossinc< 750 & year==1978)                      /// self-support test: approximate gross income test instead of infeasible self-support test
				    | (grossinc<1000 & year>=1979 & year<=1984) ///
				    | (grossinc<1040 & year==1985) ///
				    | (grossinc<1080 & year==1986) ///
				    | (grossinc<1900 & year==1987) ///
				    | (grossinc<1950 & year==1988) ///
				    | (grossinc<2000 & year==1989) ///
				    | (grossinc<2050 & year==1990) ///
				    | (grossinc<2150 & year==1991) ///
				    | (grossinc<2300 & year==1992) ///
				    | (grossinc<2350 & year==1993) ///
				    | (grossinc<2450 & year==1994) ///
				    | (grossinc<2500 & year==1995) ///
				    | (grossinc<2550 & year==1996) ///
				    | (grossinc<2650 & year==1997) ///
				    | (grossinc<2700 & year==1998) ///
				    | (grossinc<2750 & year==1999) ///
				    | (grossinc<2800 & year==2000) ///
				    | (grossinc<2900 & year==2001) ///
				    | (grossinc<3000 & year==2002) ///
				    | (grossinc<3050 & year==2003) ///
				    | (grossinc<3100 & year==2004) ///
				    | (grossinc<3200 & year==2005) ///
				    | (grossinc<3300 & year==2006) ///
				    | (grossinc<3400 & year==2007) ///
				    | (grossinc<3500 & year==2008) ///
				    | (grossinc<3650 & year==2009) ///
				    | (grossinc<3650 & year==2010) ///
				    | (grossinc<3700 & year==2011) ///
				    | (grossinc<3800 & year==2012) ///
				    | (grossinc<3900 & year==2013) ///
					)

* Qualified relatives:
generate qualrel  = (qualchld!=1)                  /// not a qualifying child		
                  & (cu_code!=1 & cu_code!=2)      /// not reference person or head of household or spouse	
                  & ( (grossinc< 750 & year==1978) /// self-support test
				    | (grossinc<1000 & year>=1979 & year<=1984) ///
				    | (grossinc<1040 & year==1985) ///
				    | (grossinc<1080 & year==1986) ///
				    | (grossinc<1900 & year==1987) ///
				    | (grossinc<1950 & year==1988) ///
				    | (grossinc<2000 & year==1989) ///
				    | (grossinc<2050 & year==1990) ///
				    | (grossinc<2150 & year==1991) ///
				    | (grossinc<2300 & year==1992) ///
				    | (grossinc<2350 & year==1993) ///
				    | (grossinc<2450 & year==1994) ///
				    | (grossinc<2500 & year==1995) ///
				    | (grossinc<2550 & year==1996) ///
				    | (grossinc<2650 & year==1997) ///
				    | (grossinc<2700 & year==1998) ///
				    | (grossinc<2750 & year==1999) ///
				    | (grossinc<2800 & year==2000) ///
				    | (grossinc<2900 & year==2001) ///
				    | (grossinc<3000 & year==2002) ///
				    | (grossinc<3050 & year==2003) ///
				    | (grossinc<3100 & year==2004) ///
				    | (grossinc<3200 & year==2005) ///
				    | (grossinc<3300 & year==2006) ///
				    | (grossinc<3400 & year==2007) ///
				    | (grossinc<3500 & year==2008) ///
				    | (grossinc<3650 & year==2009) ///
				    | (grossinc<3650 & year==2010) ///
				    | (grossinc<3700 & year==2011) ///
				    | (grossinc<3800 & year==2012) ///
				    | (grossinc<3900 & year==2013) ///
					)


* Calculate TAXSIM input variables (depx, agex, depchild, pwages, swages): 

* Number of (qualified) dependents:
generate depx = qualrel + qualchld		
						
* Number of CU taxpayers over age 65:
generate agex = (AGE>65) & (tax>0)		
	 
* Number of children under age 17 (for child tax credit in TAXSIM. Starts in 1998 - Taxpayer Relief Act of 1997). Source: IRS Publication 972: "Child Tax Credit"					
generate depchild = (cu_code==3 | cu_code==4 | cu_code==6| cu_code==7) ///
                  & (AGE<17)  ///
                  & ( (grossinc< 750 & year==1978) /// 
				    | (grossinc<1000 & year>=1979 & year<=1984) ///
				    | (grossinc<1040 & year==1985) ///
				    | (grossinc<1080 & year==1986) ///
				    | (grossinc<1900 & year==1987) ///
				    | (grossinc<1950 & year==1988) ///
				    | (grossinc<2000 & year==1989) ///
				    | (grossinc<2050 & year==1990) ///
				    | (grossinc<2150 & year==1991) ///
				    | (grossinc<2300 & year==1992) ///
				    | (grossinc<2350 & year==1993) ///
				    | (grossinc<2450 & year==1994) ///
				    | (grossinc<2500 & year==1995) ///
				    | (grossinc<2550 & year==1996) ///
				    | (grossinc<2650 & year==1997) ///
				    | (grossinc<2700 & year==1998) ///
				    | (grossinc<2750 & year==1999) ///
				    | (grossinc<2800 & year==2000) ///
				    | (grossinc<2900 & year==2001) ///
				    | (grossinc<3000 & year==2002) ///
				    | (grossinc<3050 & year==2003) ///
				    | (grossinc<3100 & year==2004) ///
				    | (grossinc<3200 & year==2005) ///
				    | (grossinc<3300 & year==2006) ///
				    | (grossinc<3400 & year==2007) ///
				    | (grossinc<3500 & year==2008) ///
				    | (grossinc<3650 & year==2009) ///
				    | (grossinc<3650 & year==2010) ///
				    | (grossinc<3700 & year==2011) ///
				    | (grossinc<3800 & year==2012) ///
				    | (grossinc<3900 & year==2013) ///
					)
						
* Wage and salary income (including self-employment) of primary taxpayer
generate pwages = salaryx + nonfarmx + farmincx if cu_code==1
replace  pwages = -999999999 if pwages==. & cu_code==1 // observations with negative income will be dropped by TAXSIM

* Wage and salary income of spouse (including self-employment)
*  Note: I include all other household income in the secondary earner income. If the primary household is single
*        or head of household, we will add any secondary income to the primary taxpayer's income below.
generate swages = salaryx + nonfarmx + farmincx  if cu_code!=1
replace  swages = -999999999 if swages==. & cu_code!=1 

replace  socrrx = -999999999 if socrrx ==.
replace  ssix   = -999999999 if ssix   ==.

collapse (sum) pwages swages socrrx ssix depx agex depchild, by(newid INTNO)
replace pwages =. if pwages < -100000000
replace swages =. if swages < -100000000
replace socrrx =. if socrrx < -100000000
replace ssix   =. if ssix   < -100000000

* Impose TAXSIM's restrictions to prevent observations being dropped
replace  depx     = 15   if depx >15          
replace  agex     = 2    if agex >2  
replace  depchild = depx if depchild > depx

tempfile memb
sort newid INTNO
save `memb', replace




    *================
    * Merge data sets
    *================

* Load FMLY variables
use newid INTNO MARITAL1 state_recoded *IMP yq_num ///
   alimox chldsupx collexpx cntrchrx cntrelgx cntedorx  ///  Contributions
  using "$datadir/stata/Interview/fmly/fmly.dta", clear
 
*----------------------------------------------
* Replace missing values of chldsupx with zeros
*----------------------------------------------
replace chldsupx =0 if chldsupx==.
				
				
* Merge MEMB variables
merge 1:1 newid INTNO using `memb'
keep if _merge==3
drop _merge

* Merge MTAB variables. Note: This imposes the sample selection on the expenditure variables on the fmly and memb data, too.
merge m:1 newid using `mtab'
keep if _merge==3
drop _merge

* Merge tax year
merge 1:1 newid INTNO using `tax_year' 
keep if _merge==3 // selects first and last interview of each CU (see code above)
drop _merge





	*=============================================================================================
	*  (i) Deal with STATE, and 
	* (ii) Set missing values to zero of variables that are discontinued
	*=============================================================================================

*-------------------------------------------------------------------------------------------
* (i) Replace missing state with 0 (no state tax calculations) or with -1 (all state taxes)
*-------------------------------------------------------------------------------------------

generate STATE = state_recoded // state_recoded is the CE's STATE variable, with all states that contain recoded values set to zero (codes R and RR)
drop state_recoded

* Mapping FIPS to SOI state codes

generate state=.
								
/*              SOI             FIPS                        */	
replace state =	1	if STATE==	1		 /*	Alabama 		*/	
replace state =	2	if STATE==	2		 /*	Alaska 			*/	
replace state =	3	if STATE==	4		 /*	Arizona 		*/	
replace state =	4	if STATE==	5		 /*	Arkansas 		*/	
replace state =	5	if STATE==	6		 /*	California 		*/	
replace state =	6	if STATE==	8		 /*	Colorado 		*/	
replace state =	7	if STATE==	9		 /*	Connecticut		*/	
replace state =	8	if STATE==	10		 /*	Delaware 		*/	
replace state =	9	if STATE==	11		 /*	D.C.			*/	
replace state =	10	if STATE==	12		 /*	Florida 		*/	
replace state =	11	if STATE==	13		 /*	Georgia 		*/	
replace state =	12	if STATE==	15		 /*	Hawaii 			*/	
replace state =	13	if STATE==	16		 /*	Idaho 			*/	
replace state =	14	if STATE==	17		 /*	Illinois 		*/	
replace state =	15	if STATE==	18		 /*	Indiana 		*/	
replace state =	16	if STATE==	19		 /*	Iowa			*/	
replace state =	17	if STATE==	20		 /*	Kansas 			*/	
replace state =	18	if STATE==	21		 /*	Kentucky 		*/	
replace state =	19	if STATE==	22		 /*	Louisiana 		*/	
replace state =	20	if STATE==	23		 /*	Maine 			*/	
replace state =	21	if STATE==	24		 /*	Maryland 		*/	
replace state =	22	if STATE==	25		 /*	Massachusetts	*/	
replace state =	23	if STATE==	26		 /*	Michigan 		*/	
replace state =	24	if STATE==	27		 /*	Minnesota 		*/	
replace state =	25	if STATE==	28		 /*	Mississippi		*/	
replace state =	26	if STATE==	29		 /*	Missouri 		*/	
replace state =	27	if STATE==	30		 /*	Montana 		*/	
replace state =	28	if STATE==	31		 /*	Nebraska 		*/	
replace state =	29	if STATE==	32		 /*	Nevada 			*/	
replace state =	30	if STATE==	33		 /*	New Hampshire	*/	
replace state =	31	if STATE==	34		 /*	New Jersey 		*/	
replace state =	32	if STATE==	35		 /*	New Mexico 		*/	
replace state =	33	if STATE==	36		 /*	New York 		*/	
replace state =	34	if STATE==	37		 /*	North Carolina	*/	
replace state =	35	if STATE==	38		 /*	North Dakota	*/	/* Missing state, i.e., never sampled in the CE */
replace state =	36	if STATE==	39		 /*	Ohio 			*/	
replace state =	37	if STATE==	40		 /*	Oklahoma 		*/	
replace state =	38	if STATE==	41		 /*	Oregon 			*/	
replace state =	39	if STATE==	42		 /*	Pennsylvania	*/	
replace state =	40	if STATE==	44		 /*	Rhode Island	*/	
replace state =	41	if STATE==	45		 /*	South Carolina	*/	
replace state =	42	if STATE==	46		 /*	South Dakota	*/	
replace state =	43	if STATE==	47		 /*	Tennessee 		*/	
replace state =	44	if STATE==	48		 /*	Texas 			*/	
replace state =	45	if STATE==	49		 /*	Utah 			*/	
replace state =	46	if STATE==	50		 /*	Vermont			*/	
replace state =	47	if STATE==	51		 /*	Virginia 		*/	
replace state =	48	if STATE==	53		 /*	Washington 		*/	
replace state =	49	if STATE==	54		 /*	West Virginia	*/	
replace state =	50	if STATE==	55		 /*	Wisconsin 		*/	
replace state =	51	if STATE==	56		 /*	Wyoming 		*/	/* Missing state, i.e., never sampled in the CE */

replace state =0 if state==. // alternatively, set to -1 and take the average across all 51 states

drop STATE


*---------------------------------------------------------------------------------------
* (ii) Contributions and other variables with missing values
*---------------------------------------------------------------------------------------

foreach var in foodsmpxIMP chdlmpxIMP ///
	alimox collexpx cntrchrx cntrelgx cntedorx /// contributions
 {
	replace `var'=0 if `var'==.
}
tabmiss
summarize





    *============================================
    * Construct the 21 TAXSIM input variables
    *============================================


* 1. tax year
* year // constructed from MTAB files

* 2. state (SOI codes)
* state // constructed from FMLY files

* 3. marital status
generate mstat = 1                           // single filer (default)
replace  mstat = 2 if  MARITAL1==1           // married (assumed to file jointly)
replace  mstat = 3 if  MARITAL1!=1 & depx>0  // head of household
replace  mstat = 1 if  mstat ==. // impose TAXSIM's restrictions to prevent a crash

* 4. number of dependents
*depx // constructed from MEMB files above

* 5. number of taxpayers age>65 (0,1, or 2)
*agex // constructed from MEMB files above

* 6. wage and salary of taxpayer (incl. self-employment). Note: Will be dropped by TAXSIM if negative.
*pwages // constructed from MEMB files above

* 7. wage and salary of spouse (incl. self-employment). Note: Will be dropped by TAXSIM if negative.
*swages // constructed from MEMB files above

replace  pwages = pwages + swages if mstat!=2 // total family income if not married (e.g. head of household for tax purposes)
replace  swages = 0               if mstat!=2 // no other income if not married

* 8. dividends. Note: Qualified dividends only from 2003 on. 
generate dividends = finincx  // dividends, royalties, estates, trusts. Note: We have to assume that those are ordinary, not qualified dividends.

* 9. interest and other property income
generate otherprop =  ///
	  intearnx   /// interest from savings accounts + bonds.  Note: We need to assume that interest is fully taxable.
	+ othrincx   /// other income (scholarships, stipends,...)
	+ inclossa   /// income from roomers and borders
	+ inclossb   /// income from other rental units
	+ aliothx    /// income from alimony. (CE: "total amount of income from regular contributions from alimony and other sources, such as from persons outside the CU received by all CU members")
	+ chdothx    /// income from child support payments (CE: "total amount of income from child support payments, other than in a lump sum amount, received by ALL CU members")
	+ saleincx   /// sale of goods
	+ setlinsx   /// insurance settlement receipts
	+ lumpsumx   /// estates, trusts,royalties,alimony,lump-sum payments,...
	+ ssoverpx   /// social security refunds received from overpayment.  Note: There might be a tax penalty associated with this.
	+ insrfndx   /// insurance policy refunds received
	+ rent_aspay /// rent as pay (constructed from MTAB files)
	+ food_aspay /// food as pay (constructed from MTAB files)
	/// other adjustments not reported elsewhere
	- findretx   /// contribution to IRA, KEOGH
	- collexpx   /// student support paid
	- alimox     /// alimony paid
	- chldsupx   /// child support paid
	/// income depending on itemization status.  NOTE: slrfundx and ptaxrfdx is taxable income only for itemizers. It is excluded from income in the second run of TAXSIM if the standard deduction was chosen.
	+ slrfundx   /// state and local income tax refund received 
	+ ptaxrfdx   //  property tax refund received. Note: 

* 10. taxable pensions
generate pensions = pensionx  // pensions, private annuities, annuities from IRA,Keogh

* 11. gross social security income
generate gssi = socrrx  // social security and rail-road retirement income. Note: SOCRRX includes FRRETIRX.

* 12. other non-taxable transfer income
generate transfers = ///
	  compensx /// workers' compensation + veterans' benefits
	+ welfarex /// public assistance + welfare 
	+ foodsmpx /// food stamps and electronic benefits. Note: FOODSMPX combines JFDSTMPA and is missing 1982-86 (set to zero for those years).
	+ ssix     //  supplemental security income

* 13. rent paid
generate rentpaid = rent_paid 

* 14. property and other taxes (part of itemized deductions)
generate proptax = proptax_annual // annual property taxes paid (constructed from MTAB files)

* 15. additional personal itemized deductions (except mortgage, state and property tax)
gen otheritem = 0 ///
	// + occupexp + healthexp : will be added to 2nd run of TAXSIM after adjustment for AGI rules. Note: 'occupexp' are job expenses and 'healthexp' are medical and dental expenses (both constructed from the MTAB files). 

* 16. child care expenses
gen childcare =  ///
	  child_care /// babysitting and day-care centres, nursery, and preschools (constructed from MTAB files above)
	+ adult_care //  care for elderly, invalids, handicapped, and adult day care centre expense (constructed from MTAB files above)

* 17. unemployment compensation
gen ui = unemplx 

* 18. number of dependents under age 17
*depchild // constructed from MEMB files above

* 19. mortgage interest
gen mortgage =   ///
	  int_taxded /// interest on mortgage, home equity loan, home equity line of credit and prepayment penalty charges from (i) owned dwellings and (ii) owned vacation homes (constructed from MTAB files)
	+ taxpropx   /// personal property taxes for vehicles
//	+ cntrchrx   /// charitable  contributions
//	+ cntedorx   /// educational contributions
//	+ cntrelgx   /// religious   contributions
* Note: Due to serious sample breaks in the contribution variables I do not include them.

* 20. short-term capital gains
gen stcg = 0 // insufficient information in CE survey

* 21. long-term capital gains
gen ltcg = 0 // insufficient information in CE survey


keep newid INTNO year slrfundx ptaxrfdx ///
	year state mstat depx agex pwages swages dividends otherprop pensions gssi transfers ///
	rentpaid proptax otheritem childcare ui depchild mortgage stcg ltcg occupexp healthexp


* check data
tabmiss
summarize

* optional: save data before 1st run of TAXSIM
compress
sort newid INTNO
save "$datadir/TAXSIM_input.dta", replace





    *==============
    * Impute Taxes
    *==============

*---------------------------------------------------------------
* 1st run of TAXSIM to determine AGI and itemization status
*---------------------------------------------------------------

capture noisily: net from "http://www.nber.org/stata"
capture noisily: net describe taxsim9
capture noisily: net install taxsim9

di c(current_time)
taxsim9, replace full
di c(current_time)

do "$datadir/do-files/subprograms/TAXSIMrename.do" // renames TAXSIM output variables



*-----------------------------------------------------------------------------------------------------------
* 2st run of TAXSIM to calculate taxes: Use AGI from 1st run to adjust for AGI rules and itemization status
*-----------------------------------------------------------------------------------------------------------
	
* healthcare expenses

generate AGItest82 = 0.03  * fedAGI // AGI rule for 1960-1982 Note: 'fedAGI' is TAXSIM variable v10
generate AGItest86 = 0.05  * fedAGI // AGI rule for 1983-1986
generate AGItest87 = 0.075 * fedAGI // AGI rule since 1987 

generate temp=.
replace  temp = healthexp - AGItest82 if healthexp >  AGItest82 & year<=1982
replace  temp =0                      if healthexp <= AGItest82 & year<=1982
replace  temp = healthexp - AGItest86 if healthexp >  AGItest86 & year> 1982 & year<=1986
replace  temp =0                      if healthexp <= AGItest86 & year> 1982 & year<=1986
replace  temp = healthexp - AGItest87 if healthexp >  AGItest87 & year> 1986
replace  temp =0                      if healthexp <= AGItest87 & year> 1986

replace healthexp = temp

drop temp


* job expenses

generate AGItest = 0.02 * fedAGI
generate temp=.
replace  temp = occupexp-AGItest if occupexp >  AGItest
replace  temp =0                 if occupexp <= AGItest
replace  temp = occupexp

replace otheritem = otheritem + occupexp + healthexp

drop temp occupexp healthexp


* state and local tax refund and property tax refund income

replace otherprop = otherprop - (slrfundx + ptaxrfdx) if dedallowed==0 // Note: 'dedallowed' is TAXSIM variable v17 

drop slrfundx ptaxrfdx


keep newid INTNO year ///
 state mstat depx agex pwages swages dividends otherprop pensions gssi transfers ///
 rentpaid proptax otheritem childcare ui depchild mortgage stcg ltcg  


* TAXSIM with AGI rules and itemization status

di c(current_time)	
taxsim9, replace full
di c(current_time)

do "$datadir/do-files/subprograms/TAXSIMrename.do" // renames TAXSIM output variables

compress
sort newid INTNO
tempfile taxsim1
save `taxsim1', replace



*------------------------------------------------------------------------------------
* 3rd run of TAXSIM to calculate statutory federal marginal tax rates using plan(51)
*------------------------------------------------------------------------------------

keep newid INTNO year ///
 state mstat depx agex pwages swages dividends otherprop pensions gssi transfers ///
 rentpaid proptax otheritem childcare ui depchild mortgage stcg ltcg

cd "$datadir/do-files/subprograms" // location of taxsim9_lorenz.ado
di c(current_time)	
taxsim9_lorenz, replace full plan(51) planval(1) 
di c(current_time)
rm "ftp.txt"
rm "msg.txt"
rm "taxsim_out.dta"
cd "$datadir"

do "$datadir/do-files/subprograms/TAXSIMrename_lorenz.do" // renames TAXSIM output variables

merge 1:1 taxsimid using `taxsim1', nogen
drop taxsimid



*---------------------------------------------------------------
* Merge imputed taxes with CE FMLY files
*---------------------------------------------------------------

compress
sort newid INTNO

preserve
	tempfile taxsimY5
	keep if INTNO==5
	duplicates report newid
	sort newid INTNO
	save `taxsimY5', replace
restore

tempfile taxsimY2
drop if INTNO==5
duplicates report newid INTNO
drop INTNO
duplicates report newid
sort newid
save `taxsimY2', replace


use "$datadir/stata/Interview/fmly/fmly.dta", clear 

merge 1:1 newid INTNO using `taxsimY5', nogen 
merge m:1 newid using `taxsimY2', nogen update

compress
sort newid INTNO
save "$datadir/stata/Interview/fmly/fmly.dta", replace

rm "ftp.txt"
rm "msg.txt"
rm "taxsim_out.dta"
capture: rm "TAXSIM_input.dta"


*---------------------------------------------------------------
* Additional steps (optional)
*---------------------------------------------------------------

* report all tax rates in percentages

replace MTRstat = MTRstat * 100
replace rexem   = rexem   * 100
replace rxmp    = rxmp    * 100
replace rchild  = rchild  * 100
replace raddcr  = raddcr  * 100
replace MTRstat = MTRstat + rxmp // add 33% bubble for 1988-1990
						
* rename variables that have the same name in TAXSIM and the CE FMLY files

rename year year_taxsim
lab var year_taxsim "tax year [TAXSIM]"

rename state state_taxsim
lab var state_taxsim "state [TAXSIM]"

rename proptax proptax_taxsim
lab var proptax_taxsim "(annual) proptax [TAXSIM]"

save "$datadir/stata/Interview/fmly/fmly.dta", replace

log close `filename'
*------- END OF PROGRAM --------------


***************************************************************************************************
* !!! NOTES: 
*      - State taxes for households with missing state identifier are currently set to zero
*      - Contributions are currently set to zero due to sample breaks 
*
*  Please send me an email if you know how to fix those issues.
*
* !!!
***************************************************************************************************
