$ontext
Logit,Probit Estimation -----------------------
Erwin Kalvelagen, Amsterdam Optimization
Data: http://pages.stern.nyu.edu/~wgreene/Text/tables/TableF21-1.txt
$offtext
option qcp=cplex;
*----------------------------------------------------------- * raw data from Greene *-----------------------------------------------------------
sets i 'records'/case1*case32/ p0 'all variables'/constant,grade,gpa,tuce,psi/ ;
table data(i,p0) 'raw data'
GPA TUCE PSI GRADE case1 2.66 20 0 0 case2 2.89 22 0 0 case3 3.28 24 0 0 case4 2.92 12 0 0 case5 4.00 21 0 1 case6 2.86 17 0 0 case7 2.76 17 0 0 case8 2.87 21 0 0 case9 3.03 25 0 0 case10 3.92 29 0 1 case11 2.63 20 0 0 case12 3.32 23 0 0 case13 3.57 23 0 0 case14 3.26 25 0 1 case15 3.53 26 0 0 case16 2.74 19 0 0 case17 2.75 25 0 0 case18 2.83 19 0 0 case19 3.12 23 1 0 case20 3.16 25 1 1 case21 2.06 22 1 0 case22 3.62 28 1 1 case23 2.89 14 1 0 case24 3.51 26 1 0 case25 3.54 24 1 1 case26 2.83 27 1 1 case27 3.39 17 1 1 case28 2.67 24 1 0 case29 3.65 21 1 1 case30 4.00 23 1 1 case31 3.10 21 1 0 case32 2.39 19 1 1 ;
display data;
*----------------------------------------------------------- * extract data * form y, x *-----------------------------------------------------------
set p(p0) 'independent variables'/constant,gpa,tuce,psi/;
parameters y(i) 'dependent variable (grade)' X(i,p) 'independent variables' ;
y(i) = data(i,'grade'); x(i,p) = data(i,p); x(i,'constant') = 1; display y,x;
* check the assumption that y(i) is binary abort$sum(i$(y(i)<>0 and y(i)<>1),1) "GRADE should be binary"; * psi is binary is used in APE calculation abort$sum(i$(x(i,'psi')<>0 and x(i,'psi')<>1),1) "PSI should be binary";
*----------------------------------------------------------- * solve OLS as QP *-----------------------------------------------------------
parameter results(*,*,*) 'replicate Greene table 17.1 '; option results:3:1:2;
variable sse 'sum of squared errors' coeff(p) 'estimated coefficients' e(i) 'error term' ; equation obj 'objective' fit(i) 'linear fit' ;
obj.. sse =e= sum(i, sqr(e(i))); fit(i).. y(i) =e= sum(p, coeff(p)*x(i,p)) + e(i);
model ols /obj,fit/; solve ols using qcp minimizing sse;
results(p,'OLS(QP1)','coeff') = coeff.l(p); display results;
*----------------------------------------------------------- * solve OLS as QP (alternative formulation) *-----------------------------------------------------------
equation unconobj 'unconstrained objective'; unconobj.. sse =e= sum(i, sqr(y(i)-sum(p, coeff(p)*x(i,p))));
model ols2 /unconobj/; solve ols2 using qcp minimizing sse;
results(p,'OLS(QP2)','coeff') = coeff.l(p);
display results;
*----------------------------------------------------------- * solve OLS as as system of linear equations * * solve the normal equations * * (X'X) b = X'y * *-----------------------------------------------------------
alias(p,pp); parameter xx(p,pp) "inner product (X'X)"; xx(p,pp) = sum(i, x(i,p)*x(i,pp));
equation normal(p) 'normal equations';
normal(p).. sum(pp, xx(p,pp)*coeff(pp)) =e= sum(i, x(i,p)*y(i));
model ols3 /normal/; solve ols3 using cns;
results(p,'OLS(NRML)','coeff') = coeff.l(p);
display results;
*----------------------------------------------------------- * solve OLS using python/numpy *-----------------------------------------------------------
parameter theta(p) 'estimated coefficients'; $libinclude linalg ols i p x y theta
results(p,'OLS(py)','coeff') = theta(p);
display results;
*----------------------------------------------------------- * Logit model 1 : optimization *-----------------------------------------------------------
variable lnL 'log likelihood';
equation LogitObj 'log likelihood for Logit model';
LogitObj.. lnL =e= sum(i, y(i)*sum(p, coeff(p)*x(i,p)) - log[1 + exp(sum(p, coeff(p)*x(i,p)))] );
model logit1 /LogitObj/; * reset levels (no cheating) coeff.l(pp)=0; solve logit1 using nlp maximizing lnL;
results(p,'LOGIT1','coeff') = coeff.l(p); display results;
*----------------------------------------------------------- * Logit model 2 : system of equations *-----------------------------------------------------------
alias(p,pp); equation LogitFirstOrder 'Logit first order conditions';
LogitFirstOrder(p).. sum(i, {y(i)-exp(sum(pp, coeff(pp)*x(i,pp)))/ [1+exp(sum(pp, coeff(pp)*x(i,pp)))]}*x(i,p)) =e= 0;
model logit2 /LogitFirstOrder/; * reset levels coeff.l(pp)=0; solve logit2 using cns;
results(p,'LOGIT2','coeff') = coeff.l(p); display results;
*----------------------------------------------------------- * Logit APE *-----------------------------------------------------------
set pnoc(p) 'p except const'; pnoc(p) = not sameas(p,'constant');
parameter xb(i) 'Xb (intermediate expression)'; Xb(i) = sum(p, coeff.l(p)*x(i,p));
results(pnoc,'LOGIT','APE') = sum(i, exp(Xb(i))/sqr(1+exp(Xb(i)))*coeff.l(pnoc))/card(i);
parameter xb2(i,*) 'Xb2 (Xb with PSI=0 and PSI=1)'; xb2(i,'PSI=0') = sum(p$(not sameas(p,'PSI')), coeff.l(p)*x(i,p)); xb2(i,'PSI=1') = xb2(i,'PSI=0')+coeff.l('PSI');
parameter plogis(i,*) 'logistic distribution'; plogis(i,'PSI=1') = 1/(1+exp(-xb2(i,'PSI=1'))); plogis(i,'PSI=0') = 1/(1+exp(-xb2(i,'PSI=0')));
results('PSI','LOGIT','APE') = sum(i,plogis(i,'PSI=1')-plogis(i,'PSI=0'))/card(i);
results("mean f(x'b)",'LOGIT','APE') = sum(i, exp(Xb(i))/sqr(1+exp(Xb(i))))/card(i);
display plogis,Xb2
display results;
*----------------------------------------------------------- * Probit model *-----------------------------------------------------------
equation probitObj 'log likehood for Probit model'; probitObj.. lnL =e= sum(i, y(i)*log(errorf(sum(p, coeff(p)*x(i,p)))) + (1-y(i))*log(1-errorf(sum(p, coeff(p)*x(i,p)))));
model probit /probitObj/; * reset levels coeff.l(pp)=0; solve probit maximizing lnL using nlp;
results(p,'PROBIT','coeff') = coeff.l(p);
results(pnoc,'PROBIT','APE') = sum(i, 1/sqrt(2*pi) * exp(-0.5*sqr(sum(p, coeff.l(p)*x(i,p)))) * coeff.l(pnoc))/card(i);
* same as for logit APE xb2(i,'PSI=0') = sum(p$(not sameas(p,'PSI')), coeff.l(p)*x(i,p)); xb2(i,'PSI=1') = xb2(i,'PSI=0')+coeff.l('PSI');
results('PSI','PROBIT','APE') = sum(i, errorf(xb2(i,'PSI=1'))-errorf(xb2(i,'PSI=0')))/card(i);
results("mean f(x'b)",'PROBIT','APE') = sum(i, 1/sqrt(2*pi) * exp(-0.5*sqr(sum(p, coeff.l(p)*x(i,p)))))/card(i);
display results;
*----------------------------------------------------------- * Complementary log log model *-----------------------------------------------------------
equation compLogLogObj 'log likehood for comp log log model'; compLogLogObj.. lnL =e= sum(i, y(i)*log(1-exp(-exp(sum(p, coeff(p)*x(i,p))))) + (1-y(i))*log(exp(-exp(sum(p, coeff(p)*x(i,p))))));
model comploglog /compLogLogObj/; * reset levels coeff.l(pp)=0; solve comploglog maximizing lnL using nlp;
results(p,'CLOGLOG','coeff') = coeff.l(p);
Xb(i) = sum(p, coeff.l(p)*x(i,p));
results(pnoc,'CLOGLOG','APE') = sum(i, exp(Xb(i))*exp(-exp(Xb(i))) * coeff.l(pnoc))/card(i);
* same as for logit APE xb2(i,'PSI=0') = sum(p$(not sameas(p,'PSI')), coeff.l(p)*x(i,p)); xb2(i,'PSI=1') = xb2(i,'PSI=0')+coeff.l('PSI');
results('PSI','CLOGLOG','APE') = sum(i, exp(-exp(xb2(i,'PSI=0')))-exp(-exp(xb2(i,'PSI=1'))))/card(i);
results("mean f(x'b)",'CLOGLOG','APE') = sum(i, exp(Xb(i))*exp(-exp(Xb(i))))/card(i);
display results;
|