function [bound,maxiter,maxsup]=optselect(pscore); % This version: December 03, 2008 % Calculates the optimal cutoff point for the propensity % score (to calculate Optimal Subpopulation Average Treatment % Effect, OSATE) based on the propensity score distribution, % as proposed by Crump, R.K., V.J. Hotz, G.W. Imbens and % O.A. Mitnik (2008) "Dealing with limited overlap in % estimation of average treatment effects", Biometrika. % The cutoff is calculated assuming homoskedasticity and a % weight function that gives equal weight to all observations. % % [bound,maxiter,maxsup]=optselect(pscore) % % Input: % pscore = propensity score vector. % % Outputs: % bound = optimal bound (cutoff point) of the % propensity score. % maxiter = indicator=1 if optimal cutoff could not % be calculated because the maximum number % of iterations (100) attained. % In that case bound is set to 0.1. % maxsup = indicator=1 if optimal cutoff is 0 % (whole support of prop. score distribution % to be used) % warning off MATLAB:divideByZero; ipscore=1./pscore; mscore=1-pscore; imscore=1./mscore; g=ipscore.*imscore; % Initial values for a_low avoids extremely high values of gamma_low function % and initial value of a_high makes sure is withing observed range of pscore a_low=0.0001; b_low=1-a_low; gamma_high=4; a_high=min(0.5-sqrt(0.25-1/(2*gamma_high)),max(pscore)); b_high=1-a_high; gamma_high=mean(g((pscore>=a_high)&(pscore<=b_high))); % Check if condition for optimally trimming the propensity score distribution holds % if it does not, then stops and reports 0 as optimal lower bound and 1 as optimal upper bound % (and sets "maxsup"=1) if max(g)<= 2*mean(g); % If this condition is satisfied, it is not necessary to calculate optimal % bound (optimal lower and upper bounds are 0 and 1) maxsup=1; % Indicator function "maxsup" is one when optimal PS support is maximum (0 to 1) bound=0; maxiter=0; return; % Ends this run of the function end; % If the function continues here is because optimal bound needs to be calculated maxsup=0; % Find the optimal bound checkk=0; icount=0; while checkk==0, icount=icount+1; difa=abs(a_high-a_low); gamma_low=mean(g((pscore>=a_low)&(pscore<=b_low))); a_low=0.5-sqrt(0.25-1/(2*gamma_low)); b_low=0.5+sqrt(0.25-1/(2*gamma_low)); a_high=0.5-sqrt(0.25-1/(2*gamma_high)); b_high=0.5+sqrt(0.25-1/(2*gamma_high)); gamma_high=mean(g((pscore>=a_high)&(pscore<=b_high))); % When a_low and a_high stop converging performs a grid seach between the values % of a_low and a_high to find a different starting point of the parameter that is % not moving if difa==abs(a_high-a_low) & icount>1; var_grid=[]; a_grid=[]; for grid=a_low:difa/100:a_high; q_grid=length(g((pscore>=grid)&(pscore<=(1-grid))))/length(pscore); gamma_grid=mean(g((pscore>=grid)&(pscore<=(1-grid)))); var_grid=[var_grid;gamma_grid/q_grid]; a_grid=[a_grid;grid]; end; a_low=a_grid(min(find(var_grid==min(var_grid)))); b_low=1-a_low; a_high=a_grid(max(find(var_grid==min(var_grid)))); b_high=1-a_high; gamma_low=mean(g((pscore>=a_low)&(pscore<=b_low))); gamma_high=mean(g((pscore>=a_high)&(pscore<=b_high))); end; q_low=length(g((pscore>=a_low)&(pscore<=b_low)))/length(pscore); q_high=length(g((pscore>=a_high)&(pscore<=b_high)))/length(pscore); var_low=gamma_low/q_low; var_high=gamma_high/q_high; % Stops when a_high and a_low are close enough OR imply exactly the same variance % OR a maximum number of iterations is attained checkk=(abs(a_high-a_low)<0.001)+(var_high==var_low)+(icount>100); end, bound=a_low; maxiter = icount>100; % Indicator for max # of iterations attained if icount>100, bound=0.1; end