Creating a neat and reader-friendly SAS Chi-square test report

This macro is for performing chi-square test. Both var and byvar should be categorical variables. The macro would automatically convert a numeric variable into a character variable. Therefore there is no warning or error if you feed the macro with a continuous variable. If the Fisher test consumes too much time, you can manually set fisher=0 to suppress the Fisher test. The default fisher parameter(If not specify) is 1.

Example:


data example;
set SASHELP.electric;
if Revenue>60 then revenue_Group="A";
else revenue_Group="B";
if Year<2000 then time_group="A";
else time_group="B";
keep Customer revenue_Group time_group ;
run;
proc freq data=example;
tables (time_group revenue_Group)*Customer/norow nocol nopercent;
run;
%INCLUDE "/folders/myshortcuts/SASfolder/Tools/Macro_ChiSqOrFisher.sas" ;
%ChiSqOrFisher(table=example,var=time_group revenue_Group,Byvar=Customer,fisher=1);







Result:

Macro:


%Macro ChiSqOrFisher(table=,var=,Byvar=,alpha=0.05,fisher=1);
*Generate the report data;
%INCLUDE "/folders/myshortcuts/SASfolder/Tools/NumToStr.sas" ;
%Num2Str(table=&table,var=&var,newTable=temp0);
ods select none;
ods output ChiSq=temp 
%if &fisher=1 %then FishersExact=temp1; 
CrossTabFreqs=temp2;
PROC FREQ DATA=temp0;
tables (&var)*&Byvar  / chisq 
%if &fisher=1 %then exact;
expect norow nopercent;
;
RUN; 
ods output off;
ods select all;

*Just clean the data;
data temp2;
set temp2;
where &Byvar is not missing and expected is not missing;
run;

*Get an index, so further manipulation of data will not affect the order;
data temp2;
set temp2;
indicator=put(0,8.);
array x[*] &var;
do i=1 to dim(x);
if not missing(x[i]) then indicator=put(i,8.);
end;
run;

*Get the variable that at least one expected # in its cell is less than 5;
data Indice;
set temp2;
run;
proc sort data=Indice out=Indice;
by expected;
run;
proc sort nodupkey data=Indice out=Indice;
by table;
run;

*Rename the variable;
data indice;
set indice;
var=table;
RecommandTest="-------------";
min_expected_count=expected;
if min_expected_count<5 then RecommandTest="Fisher";
else RecommandTest='ChiSq';
keep var RecommandTest min_expected_count indicator;
run;
data indice;
retain var min_expected_count RecommandTest indicator;
set indice;
run;

*Get the p-value of chi-square test;
data temp;
set temp;
where Statistic ="Chi-Square";
run;
proc sort data=temp out=temp;
by table;
run;

*Get the p-value of fisher test;
%if &fisher=1 %then %do;
data temp1;
set temp1;
where Name1 ="XP2_FISH";
run;
*prepare for combining the chi-square and fisher table;
proc sort data=temp1 out=temp1;
by table;
run;
*Combine;
data combinedTable;
merge temp temp1;
by Table;
run;
%end;
%else %do;
data combinedTable;
set temp;
run;
%end;
*Rename the variable;
data combinedTable1;
set combinedTable;
var=table;
chi_square=prob;
%if &fisher=1 %then fisher=nvalue1;;
keep var chi_square %if &fisher=1 %then fisher;;
run;

*Combining the p-value data and indicator table which indicates if one variable has #<5 in one cell;
data final;
merge combinedTable1 indice;
by var;
run;
*if indicator is not missing(The value is 'true'),then let recommanded p value be p-value of fisher,
otherwise it is p value of chi-square;
%if &fisher=1 %then %do;
data final;
set final;
if RecommandTest="Fisher" then Recommanded_P_value=fisher;
else Recommanded_P_value=chi_square;
run;
%end;
proc sort data=final out=final;
by indicator;
run;
data final;
set final;
var1=SCAN(SCAN(var,1,"*"),2,' ');
drop var;
rename var1=var;
run;

data final;
retain var;
set final;
run;

%if &fisher=1 %then %do;
data final;
set final;
Significant=' ';
if Recommanded_P_value<&alpha and not missing(Recommanded_P_value) then Significant="*";
run;
%end;
%else %do;
data final;
set final;
Significant=' ';
if chi_square<&alpha and not missing(chi_square) then Significant="*";
drop RecommandTest;
run;
%end;

data final;
set final;
drop indicator min_expected_count;
run;

/*==========Print=========*/
title10 "The chi-square test for discrete variables versus variable &Byvar";
*footnote1 "Minimum expected # in cell : the minimum expected number among cells in each cross tabulation table.";
*footnote2 "Recommanded P value : The P-value is for Fisher exact test if 'Minimum expected # in cell' is less than 5, otherwise it is for chi-square test.";
proc print data=final label;
label chi_square="Chi-Square test p-value";
*label min_expected_count="Minimum expected # in cell";
%if &fisher=1 %then %do;
label fisher="Fisher exact test p-value";
label   RecommandTest="Recommand Test";
label Recommanded_P_value="Recommanded P value";
%end;
run;
footnote0;
title0;
%MEND ChiSqOrFisher;

Comments

Popular posts from this blog

Creating a neat and reader-friendly SAS ANOVA report