/**
 * strel - Estimation of relative survival
 *
 * Version history:
 *   1.2 TB                   Jan 2007 - updated to Stata 8 syntax
 *   1.1 BR                   Oct 2006
 *   1.0 BR                   May 2003 - added period option
 *   5.7 (strel2) AS,MH,BS,AM Jul 2000
 */
*! v1.2.7 Oct 2008: Fixed problem with variance calculation and removed unnecessary 
*!                  division by sum(iwgt) when using standardisation
program define strel, eclass byable(recall) sortpreserve
    version 8.2
	if replay() {                  /* show last estimates */
		syntax [, *]
        if _by() { 
            error 190
        }
        if "`e(cmd2)'" != "strel" {
            error 301
        }
        ml display, `options'
        exit 
    }
    
    syntax anything(name=breaks id="numlist") using/ [if] [in] [iweight/], [ Mergeby(varlist) ///
        SHowrates at(numlist asc >=0) GRoup(integer 0) TRy(integer 3) Continue ///
        DIagdate(varname numeric) PERiod(numlist asc >1900 <2050 min=1 max=2 integer) ///
        HYbrid STANdardise(varlist min=1) STDprop(namelist min=1 max=1) BRenner ///
        SAving(string) replace level(integer 0) FINal DIVider SEParator(integer 0) ///
        noMODel * ]

    // Check data is st
    st_is 2 analysis

    // No age variable
    confirm new variable age rate

    // Add .dta extension to using filename if necessary
    cap confirm file `using' 
    if _rc {
        local using `using'.dta
    }
    
    // Sanity checks on options
    numlist "`breaks'", ascending range(>=0)
    confirm file `"`using'"'
    if "`saving'" != "" & "`replace'" == "" & _by()==0 {
        confirm new file "`saving'"
    }
    if `group' < 0 | `group' > 2 {
        di as error "Group must be 0, 1 or 2"
        exit 100
    }
    // period analysis
    if "`period'" != "" & "`diagdate'" == "" {
        di as error "You must specify date of diagnosis in diagdate() option with period approach"
        exit 100
    }
    if "`period'" == "" & "`diagdate'" != "" {
        di as error "You must specify period() option with diagdate() option"
        exit 198
    }
    if "`hybrid'" != "" & "`period'" == ""  {
        di as error "You must specify period() and diagdate() options with hybrid approach"
        exit 100
    }
    // Standardisation
    if "`brenner'" != "" & "`stdprop'" == "" & "`weight'" == ""  {
        di as error "Brenner standardisation requires weights to be specified using either [iweight] or stdprop() option"
        exit 100
    }
    if "`standardise'" == "" & "`brenner'" != ""  {
        di as error "Must use standardise() option with brenner"
        exit 100
    }
    if "`stdprop'" != "" {
        sum `stdprop', meanonly
        if r(min) < 0 | r(max) > 1 {
            di as error "`stdprop' must contain proportions"
            exit 450
        }
    }
    
    if "`standardise'" != "" & "`brenner'" == "" & `group' > 0  {
        di as error "Cannot regroup during traditional standardisation"
        exit 100
    }

    // Get default level
    if `level' == 0 {
        local level = c(level)
    }
    // Respect if, in and by
    marksample touse, novarlist strok

    preserve

    qui {
        tempvar in fup time key temp num den selogl loglam rhcens timep tag
    
        gen `time' = _t - _t0
        // Expand numlist
        numlist "`breaks'"
        local breaks `r(numlist)'
        
        // Drop obs to create sample
        keep if _st & `touse'
        if "`standardise'" != "" {
            foreach var of varlist `standardise' {
                drop if missing(`var')
            }
        }

        // Drop obs for period option
        if "`period'" != "" {
            // Get period start and end year
            tokenize "`period'"
            local yearstart `1'
            if "`2'" != "" {
                local yearend `2'
            }
            else {
                local yearend `1'
            }
            // Right-censor at end of period
            gen byte `rhcens' = int(`diagdate' + (_t - _t0)*365.25) > d(31dec`yearend') & ! missing(`diagdate', _t, _t0)
            replace _d = 0 if `rhcens'
            replace _t = _t0 + (d(31dec`yearend') - `diagdate')/365.25 if `rhcens'
            replace `time' = _t - _t0
            // Elapsed time (years) between diagnosis and beginning of period - used for left truncation
            gen `timep'=(d(1jan`yearstart') - `diagdate')/365.25
            if "`hybrid'" != "" {
                // Hybrid approach - detect last incident case
                sum `diagdate', meanonly
                if r(max) >= d(1jan`yearstart') {
                    nois di as text "NOTE: Hybrid approach requested but incident case found in `yearstart' or after - option hybrid ignored"
                    local hybrid
                }
                else {
                    // Include incident cases just before period
                    local hybridyear = `yearstart' - (`yearend' - `yearstart' + 1)
                    count if `diagdate' >= d(1jan`hybridyear')
                    nois di as text "Hybrid approach - including extra " r(N) " cases incident in `hybridyear' or after"
                    replace `timep'=(d(1jan`hybridyear') - `diagdate')/365.25 if `diagdate' >= d(1jan`hybridyear')
                }
            }
            replace `timep' = 0 if `timep' < 0
            // Drop subjects wholly excluded from period
            drop if `diagdate' >= d(31dec`yearend') | `time' <= `timep'
        }
        else {
            gen `timep' = 0
        }
        // No of subjects in analysis
        count
        local subjects = r(N)
        
        // Get external mortality rates
        gen age=int(_t)
        cap confirm var _origin
        if !_rc {
            replace age=int(_origin + _t)
        }
        replace age=99 if age>99 & !missing(age)
        sort age `mergeby'
        merge age `mergeby' using `"`using'"'
        count if _merge==1
        if r(N) > 0 {
            nois di as error "WARNING: " r(N) " records fail to find standard rate"
        }
        keep if _merge==3
        drop _merge

        // Calculate N, deaths and person years by interval
        egen double start=cut(`time'), at(`breaks')
        gen p_years = .
        gen subjects = .
        gen double end = .
        gen `in' = .
        foreach cutp of local breaks {
            if "`last'" != "" {
                gen `fup' = (min(`cutp', `time') - max(`last', `timep')) if `time' >= `last' & `cutp' > `timep'
                if "`standardise'" != "" & "`brenner'" == "" {
                    egen `temp'=sum(`fup'), by(`standardise')
                    replace p_years = `temp' if start == `last'
                    drop `temp'
                }
                else {
                    sum `fup', meanonly
                    replace p_years = r(sum) if start == `last'
                }
                replace `in' = `time' >= `last' & `cutp' > `timep'
                if "`standardise'" != "" & "`brenner'" == "" {
                    egen `temp'=sum(`in'), by(`standardise')
                    replace subjects = `temp' if start == `last'
                    drop `temp'
                }
                else {
                    sum `in', meanonly
                    replace subjects = r(sum) if start == `last'
                }
                replace end = `cutp' if start == `last'
                drop `fup'
            }
            local last `cutp'
        }
        // Standardisation
        if "`standardise'" != "" {
            tempvar iwgt
            if "`weight'" == "" {
                // Calculate iweights (stud[i])
                bysort `standardise': gen `iwgt' = _N / `subjects'
            }
            else {
                // Get iweights from user specified [iweight=]
                local iwgt `exp'
            }
            // Brenner approach
            if "`brenner'" != "" & "`stdprop'" != "" {
                // Weights modified by stdprop var (stand[i])
                tempvar stud
                gen `stud' = `iwgt'
                replace `iwgt' = `stdprop' / `stud'
                local wgtlist `stdprop' `stud'
                char `stud'[varname] stud
                format `stud' %5.4f
                //char `stand'[varname] stand
                //format `stand' %5.4f
            }
            if "`brenner'" == "" {
                // Stratify data when collapsing
                local _byvars `_byvars' `standardise'
            }
            // Check weights are constant within strata
            bysort `standardise': egen `temp' = sd(`iwgt')
            sum `temp', meanonly
            if r(max) > 0 {
                di as error "Weights are not constant within standardisation strata"
                exit 498
            }
            // Display weights
            char `iwgt'[varname] weight
            format `iwgt' %5.4f
            bysort `standardise': gen `tag' = _n==1
            if "`brenner'" == "" {
                nois di as text "Weights:"
            }
            else {
                nois di as text "Weights (Brenner):"
            }
            nois li `standardise' `wgtlist' `iwgt' if `tag', noobs subvarname
            if "`brenner'" == "" {
                // Check weights sum to 1 if using stratified standardisation
                sum `iwgt' if `tag', meanonly
                if abs(r(sum) - 1) > 0.01 {
                    di as error "Weights do not sum to 1 (sum to `r(sum)')"
                    exit 498
                }
            }
            local wgt [iw=`iwgt']
            gen __n=1
        }

        keep if _d > 0      /* keep deaths only */
        drop if mi(start)
        
        if "`brenner'" == "" {
            // Collapse and use fweights for speed
            collapse (count) __n=_d (mean) subjects p_years `iwgt', by(start end rate `_byvars') fast
            local wgt [fw=__n]
        }
        gen _width = end - start
        bys `_byvars' start: gen _py = p_years / (__n * _N)      /* allocate py for interval to those who died in the interval */
        egen interval = group(start)
        if "`model'" == "" {
            // Display ml output
            local output nois
        }
        if "`standardise'" != "" & "`brenner'" == "" {
            // Traditional standardisation - stratified
            egen __group=group(`standardise')
            sum __group, meanonly
            local ngroups = r(max)
            local estgroup 1
            gen alpha=.
            gen se=.
            // Store number of intervals
            tab interval
            local numint = r(r)
            while `estgroup' <= `ngroups' {
                local groupif "if __group == `estgroup'"
                `output' di as text _n "Strata `estgroup':" _c
                foreach var of varlist `standardise' {
                    sum `var' `groupif', meanonly
                    `output' di as text " `var' = " as result r(min) _c
                }
                `output' di
                // Estimate alpha with ml
                `output' _relestimate `wgt' `groupif', try(`try') group(0) `options'
                // Check for collinearity
                if e(df_m) != (`numint' - 1) {
                    di as error "Some intervals not estimated - please try wider intervals"
                    exit 498
                }
                local ++estgroup
                cap drop `temp'
                predict `temp' if e(sample)
                replace alpha=`temp' if !missing(`temp')
                drop `temp'
                predict `temp' if e(sample), stdp
                replace se=`temp' if !missing(`temp')
            }
        }
        else {
            // Estimate alpha with ml
            `output' _relestimate `wgt', try(`try') group(`group') `options'
            predict alpha
            predict se, stdp
        }
    
        // compute crude estimates
        collapse (sum) deaths=__n (mean) subjects p_years interval alpha se `iwgt', by(start end `indepvars' `_byvars')
        if "`standardise'" != "" & "`brenner'" == "" {
            local stdby bysort `standardise' (start):
        }
        gen _width = end - start
        local z = invnorm((100 + `level')/200)
        `stdby' gen Crude=exp(-sum(deaths/p_years*_width))*100
        `stdby' gen `loglam'=log(sum(deaths/p_years*_width))
        `stdby' gen `num'=sum( (deaths/(p_years^2))*(_width^2) )
        `stdby' gen `den'=sum( (deaths/p_years)*(_width) )
        gen `selogl'=sqrt(`num')/(`den')
        gen seCr=abs(-Crude+100*exp(-sum(deaths/p_years*_width)/exp(-`z'*`selogl')))/`z'
        gen Cr_lo=exp(-exp(`loglam'+`z'*`selogl'))*100
        gen Cr_up=exp(-exp(`loglam'-`z'*`selogl'))*100
        drop `num' `den' `loglam' `selogl'
        
        // compute relative estimates
        `stdby' gen RelS=exp(-sum(_width*alpha*(alpha>0))) *100
        `stdby' gen seRS=RelS*sqrt(sum(se*se*_width*_width))
        `stdby' gen `loglam'=log(sum(_width*alpha*(alpha>0)))
        `stdby' gen `num'=sum(se*se*_width*_width)
        `stdby' gen `den'=sum(_width*alpha*(alpha>0))
        gen `selogl'=sqrt(`num')/(`den')
        gen Re_lo=exp(-exp(`loglam'+`z'*`selogl'))  * 100
        gen Re_up=exp(-exp(`loglam'-`z'*`selogl'))  * 100
        drop `num' `den' `loglam' `selogl'

        if "`standardise'" != "" & "`brenner'" == "" {
            // Calculate weighted RS from stratified estimates
            replace Crude = Crude * `iwgt'
            replace seCr = seCr^2 * `iwgt'^2	/* estimation of the variance */
            replace RelS = RelS * `iwgt'
            replace seRS = seRS^2 * `iwgt'^2	/* estimation of the variance */
            collapse (sum) deaths subjects p_years Crude seCr Cr_lo Cr_up RelS seRS Re_lo Re_up `iwgt' (mean) interval alpha se, by(start end `indepvars')
    		replace seCr = sqrt(seCr)	/* calculation of the standardised standard error */
    		replace seRS = sqrt(seRS)	/* calculation of the standardised standard error */
            replace Cr_lo = Crude - `z' * seCr
            replace Cr_up = Crude + `z' * seCr
            replace Re_lo = RelS - `z' * seRS
            replace Re_up = RelS + `z' * seRS
        }
    
        gen str1 I="."
        format I %1s
        sort start
    
        // fixes 1st line not starting from zero
        gen fix=2 if _n==1&start>0
        if fix==2 {
            expand fix
            sort start
            replace start=0 in 1
            replace interval=. in 1
            replace end=start[2] in 1
            replace deaths=0 in 1
            replace se=0 in 1
            replace alpha=0 in 1
            replace Crude=99.99 in 1
            replace Cr_lo=. in 1
            replace Cr_up=. in 1
            replace RelS=99.99 in 1
            replace Re_lo=. in 1
            replace Re_up=. in 1
        }
        drop fix
    

        // to fix "running out of subjects" problem (if continue), and also "jumps"
        // between intervals (i.e. gaps) forces new lines into table
        gen double jump=start[_n+1] - end
        replace jump=0 if jump==.
        local tol 1e-8
        expand 2 if jump > `tol'
        sort start
        gen double `temp'=start
        replace `temp'=end[_n-1] if  jump>`tol'
        by start:replace interval=. if _n==2 & jump>`tol'
        by start:replace alpha=0 if _n==2 & jump>`tol'
        by start:replace deaths=0 if _n==2 & jump>`tol'
        by start:replace jump=0 if _n==1 & jump>`tol'
        replace start=`temp' if jump>`tol'
        replace end=start+jump if jump>`tol'

        local nbreaks : word count `breaks'
        local cutp : word `nbreaks' of `breaks'
        // if continue specified
        if "`continue'" != "" {
            gen lastline=_n==_N & end[_N]<`last'
            expand 2 if lastline
            sort start
            replace lastline=_n==_N & end[_N]<`last'
            replace interval=. if lastline
            replace alpha=0 if lastline
            replace deaths=0 if lastline
            replace start=end[_n-1] if lastline
            replace end=`last' if lastline
            
            // lines that straddle 1,5,10 - should ONLY occur when real data exhausted
            replace jump=(start<1.0 & end>1.0)
            expand 2 if jump
            sort start
            by start:replace end=1.0 if _n==1 & jump
            by start:replace start =1.0 if _n==2 & jump
            replace jump=(start<5.0 & end>5.0)
            expand 2 if jump
            sort start
            by start:replace end=5.0 if _n==1 & jump
            by start:replace start =5.0 if _n==2 & jump
            replace jump=(start<10.0 & end>10.0)
            expand 2 if jump
            sort start
            by start:replace end=10.0 if _n==1 & jump
            by start:replace start =10.0 if _n==2 & jump
        }
            
        format deaths %6.0f
        format start end Crude Cr_lo Cr_up RelS Re_lo Re_up %5.2f
        format alpha %7.4f
        replace Crude=99.99 if Crude>99.99&Crude<.
        replace RelS=99.99 if RelS>99.99&RelS<.
        replace Cr_up=99.99 if Cr_up>99.99&Cr_up<.
        replace Re_up=99.99 if Re_up>99.99&Re_up<.
        // Show estimates at chosen end-points only
        gen byte `key' = 1
        if "`at'" != "" {
            replace `key' = 0
            foreach endp of local at {
                replace `key' = 1 if end == `endp'
            }
        }
    
        // Saved results in e()
        ereturn local cmd2 = "strel"
        local n = 1
        sort end
        while end[`n'] != . {
            local i = round(end[`n']*12,1)
            ereturn scalar deaths`i' = deaths[`n']
            if "`standardise'" == "" {
                ereturn scalar alpha`i' = alpha[`n']
                ereturn scalar se_alpha`i' = se[`n']
            }
            ereturn scalar CS`i' = Crude[`n']
            ereturn scalar seCS`i' = seCr[`n']
            ereturn scalar CS_lo`i' = Cr_lo[`n']
            ereturn scalar CS_hi`i' = Cr_up[`n']
            ereturn scalar RS`i' = RelS[`n']
            ereturn scalar seRS`i' = seRS[`n']
            ereturn scalar RS_lo`i' = Re_lo[`n']
            ereturn scalar RS_hi`i' = Re_up[`n']
            local n = `n'+1
        }
    
        // B=* means extrapolated estimate or nc ...  now I
        // "lastline" is by definition extrapolated
        replace I="*" if deaths==0|alpha==.
    }   // end qui

    // ****** Output ********
    
    // Death rates table (optional)
    format start end %5.2f
    gen d_rate=deaths/p_years
    format p_years %10.2f
    format d_rate %12.6f
    if "`showrates'" != "" {
        di as text _n "Death rates by interval"
        list start end subjects deaths p_years d_rate if `key' & !mi(interval), noobs table
    }

    // Header
    sum deaths, meanonly
    ereturn scalar N = `subjects'
    ereturn scalar deaths = r(sum)
    di as text _n "Subjects   " as result `subjects'
    di as text    "Deaths     " as result r(sum)

    // Main results table display
    di as text _n "Table of crude and relative survival probabilities"
    di as text "(expressed as percentages with `level'% confidence intervals)"
    local dispvars start end interval deaths alpha Crude Cr_lo Cr_up RelS Re_lo Re_up I
    if "`standardise'" != "" {
        local dispvars start end interval deaths Crude Cr_lo Cr_up RelS Re_lo Re_up I
        di as text "Standardised by " as result "`standardise'" _c
        if "`brenner'" != "" {
            di as text " (Brenner method)"
        }
        else {
            di
        }
    }
    if "`period'" != "" {
        di as text "Based on period " as result `yearstart' as text " to " as result `yearend' as text " `hybrid'"
    }
    list `dispvars' if `key', `divider' separator(`separator') noobs table

    // Save the survival estimations in a file
    if `"`saving'"' != "" {
        qui {
            drop if start==.
            gen month = round(end*12,1)
            rename RelS RS
            rename Re_lo RS_lo
            rename Re_up RS_up
            label data "Survival estimates from strel"
            label variable start "Start time of interval"
            label variable end "End time of interval"
            label variable month "End time of interval (months)"
            label variable subjects "Subjects at start of interval"
            label variable deaths "Deaths during the interval"
            label variable p_years "Person years of observation during interval"
            label variable d_rate "Crude death rate during interval"
            label variable alpha "Estimated interval-specific excess mortality hazard"
            label variable RS "Relative survival"
            label variable seRS "Standard error of the relative survival"
            label variable RS_lo "Lower 95% boundary of the relative survival"
            label variable RS_up "Upper 95% boundary of the relative survival"
            format deaths %6.0f
            format RS seRS RS_lo RS_up %12.8f
            format alpha %11.8f
            compress
            sort month
            rename Crude CS
            rename seCr seCS
            rename Cr_lo CS_lo
            rename Cr_up CS_up
            label variable CS "Crude survival"
            label variable seCS "Standard error of the crude survival"
            label variable CS_lo "Lower 95% boundary of the crude survival"
            label variable CS_up "Upper 95% boundary of the crude survival"
            format CS seCS CS_lo CS_up %12.8f
            if "`standardise'" == "" {
                rename se se_alpha
                label variable se_alpha "Standard error of the excess mortality hazard"
                format se_alpha %12.8f
                keep  `_byvars' start end month subjects deaths p_years d_rate alpha se_alpha CS seCS CS_lo CS_up RS seRS RS_lo RS_up
                order `_byvars' start end month subjects deaths p_years d_rate alpha se_alpha CS seCS CS_lo CS_up RS seRS RS_lo RS_up
            }
            else {
                keep  start end month subjects deaths p_years d_rate CS seCS CS_lo CS_up RS seRS RS_lo RS_up
                order start end month subjects deaths p_years d_rate CS seCS CS_lo CS_up RS seRS RS_lo RS_up
            }
        }   // end qui
        if (_byindex() == 1) {
            save `"`saving'"', `replace'
        }
        else {
            // in second or later by group - add to saving file
            qui append using `"`saving'"'
            sort `_byvars'
            save `"`saving'"', replace
        }
    }
end

program define _relestimate
    syntax [varlist (default=none)] [fweight iweight] [if], try(integer) group(integer) *
    tempname minalpha
    if "`weight'" != "" {
        local wgt [`weight' `exp']
    }
    // Must have at least some pyears
    qui sum p_years `wgt', meanonly
    if r(sum) == 0 {
        di as error "No person-years found"
        exit 459
    }
    local converge 0
    while `converge' < `try' {
        sum interval, meanonly
        local first = r(min)
        local last = r(max)
        if r(min) != r(max) {
            // > one interval - create dummies
            xi i.interval
            local indepvars _Iinterval_*
        }
        else  {
            // no dummies
            local indepvars
        }
        ml model lf strel_ll (`indepvars') `if' `wgt', title(Relative survival: estimation of alpha)
        if "`final'" == "" {
            cap nois ml maximize, `options' search(quietly)
        }
        else {
            cap ml maximize, `options' search(quietly)
        }
        if _rc | (`group' == 2 & `try' - `converge' > 1) {
            // Problem with maximisation or check for negative alpha
            predict alpha
            sum alpha, meanonly
            if _rc | r(min) < 0 {
                nois di as text "(regrouping)"
                // look for interval with most neg alpha
                sum alpha, meanonly
                scalar `minalpha' = r(min)
                sum interval if alpha==`minalpha', meanonly
                if r(min) < `last' {
                    // join with next interval
                    replace interval = interval + 1 if interval == r(min)
                    nois di as text "Combining interval " r(min) " with " r(min) + 1
                }
                else if r(min) > `first' {
                    // join with previous
                    replace interval = interval - 1 if interval == r(min)
                    nois di as text "Combining interval " r(min) " with " r(min) - 1
                }
                else {
                    nois di as error "Only one group - cannot regroup again!"
                    exit 430
                }
            }
            else {
                local converge `try'    /* escape while loop */
            }
            drop alpha
        }
        else {
            local converge `try'    /* escape while loop */
        }
        local ++converge
    }
    if "`final'" != "" {
       nois ml display
    }
end
