Pages

Monday, March 25, 2019

SAS Example : List the missing percentage of variables/columns of dataset

This post lists the missing percentage of all the variables in a dataset.



Below code to create sample data:


data sample;
input id name $;
cards;
1 anil
2 bob
3 arun
4 tarun
5 tom
5 paul
7 peter
8 krish
9 sai
10 gita
;
run;



Assign missing values. (name has 2 missing values and id has 1 missing value)


data sample1;
set sample;
if id>=9 then name='';
if name='tom' then id=.;
run;


Macro:


Below macro lists the missing percentage of variables in a dataset.


%macro list_misspct(input_dataset);

proc contents data = &input_dataset out = var_list noprint;
run;

proc sql noprint;
select count(*) into :total_obs from &input_dataset;
select count(*) into :var_cnt from var_list;
select name into :var_in separated by ' ' from var_list;
select type into :var_type separated by ' ' from var_list;
quit;

/*Creating dataset to hold variable names and their missing percentages*/

proc sql noprint; 
CREATE TABLE misspercentage
(Variable varchar(32),
missing_percentage int);
quit;

/*Computing missing percentage of variables*/

%do i=1 %to &var_cnt;

%let scan_var=%scan(&var_in,&i);
%let scan_type=%scan(&var_type,&i);

proc sql noprint;
create table miss_pct as
select "&scan_var" as variable,
sum(case when &scan_var is missing then 1 else 0 end) as miss_cnt,
((calculated miss_cnt/&total_obs)*100) as missing_percentage
from &input_dataset;
quit;

proc append base=misspercentage data=miss_pct(keep=variable missing_percentage) force;
run;

%end;

proc print data=misspercentage;run;

%mend list_misspct;

%list_misspct(sample1);




Note: Missing percentage calculation: count of missing observations/total no. of observation