Wednesday, July 8, 2009

SHAPLEY VALUE MACRO.sas

*SHAPLEY VALUE MACRO;
option nomprint nomlogic;
options compress=yes;
%let logoutput_clear=;
%let data=egtask.impw;
%let _dvar=PayrollHrs;
%let var=
Unit_HardA
Unit_HardB
Unit_HardC
Unit_Soft
;
*This macro, called combo(r), when execute will create a combination of n taken at a time r.;
%macro combo(r);
data combo&r.;
keep v1-v&r.;
array word $&maxl. w1-w&n. (&things.);
array rr (*) r1-r&r.;
array v $&maxl. v1-v&r.;
%do i=1 %to &r.;
%if &i.=1 %then %do;
do r&i.=1 to &n.-(&r.-&i.);
%end;
%else %do;
do r&i.=r%eval(&i.-1)+1 to &n.-(&r.-&i.);
%end;
%end;
do k=1 to &r.;
v(k)=word (rr(k));
end;
output;
%do i=1 %to &r.;
end;
%end;
run;
%mend combo;
%macro mrun;
%let i=1;
%let thing=;
%do %while (%scan(&var.,&i.) ne );
%let p&i.="%scan(&var.,&i.)";
%if &i.=1 %then %let thing=&&p&i..;
%else %let thing=&thing.,&&p&i..;
%let i=%eval(&i.+1);
%end;
%let things=&thing.;
%let n=%eval(&i.-1);
* Calculation the max length of independent variables;
%do m=1 %to &n.;
%let _length&m. = %length(%scan(&var.,&m.));
%end;
%do m=1 %to &n.;
%if &m.=1 %then %let copy=&&_length&m.; %else
%let copy=&copy., &&_length&m.;
%end;
%let maxl = %sysfunc(max(&copy.));
*end of calculation;
*executing the macro combo(r);
%do m=1 %to &n. %by 1;
%combo(&m.);
%end; *end of execution;
%if %sysfunc(exist(Varcombo)) %then %do;
proc datasets nolist;
delete Varcombo;
run;quit;
%end;
data Varcombo;
set
%do _p=1 %to &n.;
combo&_p.
%end;;
run;
proc datasets nolist;
delete
%do _q=1 %to &n.;
combo&_q.
%end;
; run; quit;
%mend mrun;
%macro metadata(ds);
%global _dset _nvars _nobs;
%let _dset = &ds;
%let _dsid = %sysfunc(open(&_dset.));
%let _nobs = %sysfunc(attrn(&_dsid.,NOBS));
%let _rc = %sysfunc(close(&_dsid.));
%mend metadata;
%macro runreg;
%let i=1;
%let thing=;
%do %while (%scan(&var.,&i.) ne );
%let p&i.="%scan(&var.,&i.)";
%if &i.=1 %then %let thing=&&p&i..;
%else %let thing=&thing.,&&p&i..;
%let i=%eval(&i.+1);
%end;
%let things=&thing.;
%let n=%eval(&i.-1);
* Calculation the max length of independent variables;
%do m=1 %to &n.;
%let _length&m. = %length(%scan(&var.,&m.));
%end;
%do m=1 %to &n.;
%if &m.=1 %then %let copy=&&_length&m.; %else
%let copy=&copy., &&_length&m.;
%end;
%let maxl = %sysfunc(max(&copy.));
*end of calculation;
%let _lnth=%eval(&n.*&maxl.);
%if %sysfunc(exist(final)) %then %do;
proc datasets nolist;
delete final;
run; quit;
%end;
%metadata(Varcombo);
proc datasets nolist;
delete final;
run;quit;
sasfile &data. open;
%do _a=1 %to &_nobs.;
data _null_;
set varcombo(firstobs=&_a. obs=&_a.);
%do _b=1 %to &n.;
call symput('_indv'left(&_b.),v&_b.);
%end;
run;
proc reg data=&data.;
model &_dvar. =
%do _x=1 %to &n.;
&&_indv&_x..
%end;/selection=rsquare;
ods output SubsetSelSummary=_tmp&_a.;
run;quit;
data _tmp&_a.(drop=Model Control);
length VarsInModel $&_lnth..;
set _tmp&_a. end=last; %put &_a.;
if last then output _tmp&_a.;
run;
proc append base=final data=_tmp&_a.;
run;
proc datasets nolist;
delete _tmp&_a.;
run; quit;
*clearing output and log;
%if %upcase(&logoutput_clear)=YES %then %do;
dm log 'clear'; dm output 'clear';
%end;
%end;
sasfile &data. close;
%mend runreg;
%macro Shapley;
%let i=1;
%let thing=;
%do %while (%scan(&var.,&i.) ne );
%let p&i.="%scan(&var.,&i.)";
%if &i.=1 %then %let thing=&&p&i..;
%else %let thing=&thing.,&&p&i..;
%let i=%eval(&i.+1);
%end;
%let things=&thing.;
%let n=%eval(&i.-1);
* Calculation the max length of independent variables;
%do m=1 %to &n.;
%let _length&m. = %length(%scan(&var.,&m.));
%end;
%do m=1 %to &n.;
%if &m.=1 %then %let copy=&&_length&m.; %else
%let copy=&copy., &&_length&m.;
%end;
%let maxl = %sysfunc(max(&copy.));
*end of calculation;
%let _lnth=%eval(&n.*&maxl.);
%if %sysfunc(exist(shapleyvalue)) %then %do;
proc datasets nolist;
delete shapleyvalue;
run;quit;
%end;
%do _t=1 %to &n.;
%let var&_t.=%upcase(%scan(&var.,&_t.));
data
present&&var&_t..(drop=var_absent rsq_absent RSquare VarsInModel)
absent&&var&_t..(drop=var_present rsq_present RSquare VarsInModel);
set final;
if index(upcase(VarsInModel),"&&var&_t")>0 then do;
vs=compress(tranwrd(upcase(VarsInModel),"&&var&_t",""));
var_present=VarsInModel; rsq_present=RSquare;
output present&&var&_t..;
end; else do;
vs=compress(tranwrd(upcase(VarsInModel),"&&var&_t",""));
var_absent=VarsInModel;rsq_absent=RSquare;
output absent&&var&_t..;
end;
run;
proc sort data=absent&&var&_t..; by vs; run;
proc sort data=present&&var&_t..; by vs; run;
data _tmp&&var&_t..;
merge present&&var&_t..(in=a drop=NumInModel)
absent&&var&_t..(in=b drop=Dependent);
by vs; if a;
run;
data _tmp&&var&_t..(drop=vs);
set _tmp&&var&_t..;
if rsq_absent=. then rsq_absent=0;
if NumInModel=. then NumInModel=0;
n_m_one=&n.-NumInModel-1;
m_fact=fact(NumInModel);
fact_n_m_one=fact(n_m_one);
product_fact=(m_fact*fact_n_m_one)/fact(&n.);
diff_rsq=(rsq_present-rsq_absent);
shapV=product_fact*diff_rsq;
run;
proc sql;
create table _temp&&var&_t.. as select "&&var&_t" as variable, sum(shapV) as impact
from _tmp&&var&_t..;
quit;
data _temp&&var&_t;
length variable $&_lnth.;
set _temp&&var&_t..;
run;
proc append base=shapleyvalue data=_temp&&var&_t..;
run;
proc datasets nolist;
delete
absent&&var&_t.. present&&var&_t.. _tmp&&var&_t.. _temp&&var&_t..;
run; quit;
%end;
proc datasets nolist;
delete Final varcombo;
run; quit;
%mend Shapley;
%mrun;
%runreg;
%Shapley;

1 comment:

  1. Hi, The above code produces error while executing call symput routine..Or am I missing anything? It produces the following error in log:
    "data set was already opened in memory by a prior SASFILE statement" and
    "The SYMPUT subroutine call has too many arguments"

    ReplyDelete