/* simple cluster analysis example. Variable data for a 20 case example */ data a; input node x y; cards; 1 1 13 2 1 8.5 3 2 8.5 4 1.5 2.5 5 4 3 6 3 3.9 7 9.5 8.7 8 4 10 9 9 1 10 3 1.5 11 3.4 2.5 12 1.5 7 13 3 13 14 2 12 15 10 9.2 16 9.5 10 17 9.5 2.5 18 10 1.5 19 9.5 0 20 8 2.5 ; run; proc gplot; /* lets you look at it on the screen */ plot x*y = node; run; /* run a cluster analysis on the data */ proc cluster data=a method=ave out=clustd std; var x y; id node; run; proc tree data=clustd ncl=5 out=cluvars; copy node x y; run; /* note that it changes the order of the observations, so you usually need to resort the data */ proc print data=cluvars; var node x y cluster; run; /* random data example: you can get clusters out of nothing */ data random; do i=1 to 20; x=rannor(0); y=rannor(0); output; end; run; proc cluster data=random method=ward out=clustd std; var x y; id i; run; proc tree data=clustd ncl=5 out=ranclust; copy i x y; run; proc gplot data=ranclust; plot y*x=cluster; run; /* Distance matrix example on networks */ proc iml; %include 'c:\moody\sas\programs\modules\reach.mod'; /* blue eye example */ mat2=j(15,15,0); mat2[1,{2 14 15}]=1; mat2[2,{1 15 13 10}]=1; mat2[3,{12 11 10}]=1; mat2[4,{9 5 8 6 15}]=1; mat2[5,{4 8 9 10 7}]=1; mat2[6,{4 8 7}]=1; mat2[7,{6 5}]=1; mat2[8,{6 4 9 5}]=1; mat2[9,{4 8 5 10}]=1; mat2[10,{5 9 2 12 3 11}]=1; mat2[11,{3 12 10}]=1; mat2[12,{3 14 13 10 11}]=1; mat2[13,{2 12}]=1; mat2[14,{1 15 12}]=1; mat2[15,{1 14 2 4}]=1; dmat=reach(mat2); /* create the distance matrix */ mattrib dmat format=1.0; print dmat; /* just to look at it. Do not do this for large networks! */ id=1:nrow(dmat); id=id`; ddat=id||dmat; /* add a row-id indicator to the distance matrix */ create ddat (type=dist) from ddat; /* Create a distance dataset */ append from ddat; reset storage = work.temp; /* not needed, used to run the MDS plot below */ store mat2; quit; proc cluster data=ddat method=ward out=clustd; id col1; run; proc tree data=clustd ncl=3 out=netclust; copy col1; run; proc freq data=netclust; tables cluster; run; proc print data=netclust; var col1 cluster; run; proc sort data=netclust; by col1; run; /* lets say I want to now look at the mixing matrix for the cluster I created, I would do the following: */ proc iml; %include 'c:\moody\sas\programs\modules\mixmat.mod'; reset storage = work.temp; load mat2; /* get my matrix back */ use work.netclust; read all var{cluster} into cluster; mixclst=mixmat(mat2,cluster); print mixclst; quit; /* use this to get a plot based on the implicit dimensions underlying the network distance matrix */ proc mds out=mds1 level=a data=ddat dim=2; id col1; run; data mds1; set mds1; where col1^=.; /* need to drop one extra case */ run; /* this IML program writes a PAJEK file with pre-specified layout coordinates */ proc iml; %include 'c:\moody\sas\programs\modules\pajwrtc.mod'; use work.mds1; read all var{dim1} into dim1; read all var{dim2} into dim2; read all var{col1} into id; dim1=dim1+abs(dim1[><]); dim1=dim1/dim1[+]; dim2=dim2+abs(dim2[><]); dim2=dim2/dim2[+]; reset storage = work.temp; load mat2; file 'c:\moody\classes\soc884\examples\blueeyt.net'; call pajwrtc(mat2,id,2,dim1,dim2); quit;