/* This program uses the Add Health data to crate a kids by extra-curricular activity network, using the PTG module stored in SPAN. Author: Moody Date: Jan, 2001 */ libname ahdat 'c:\temp\'; /* put the place were you copied s884dat.sd2 between the quote marks */ data a; /* open a new working dataset, called 'a' */ set ahdat.s884dat; /* read the data from your libname space */ run; proc sort data=a; /* sort the data */ by aidr; run; proc means fw=5; /* look at the data. Make sure it is right */ run; /* lets look at the distribution of activities by gender in this hs */ proc freq; tables s2*(s44a1 s44a2 s44a3 s44a4 s44a5 s44a6 s44a7 s44a8 s44a9 s44a10 s44a11 s44a12 s44a13 s44a14 s44a15 s44a16 s44a17 s44a18 s44a19 s44a20 s44a21 s44a22 s44a23 s44a24 s44a25 s44a26 s44a27 s44a28 s44a29 s44a30 s44a31 s44a32 s44a33); run; /* recode some of the sports by gender, since male and female teans are likely separate */ data a; set a; numacts=0; array acts s44a1 s44a2 s44a3 s44a4 s44a5 s44a6 s44a7 s44a8 s44a9 s44a10 s44a11 s44a12 s44a13 s44a14 s44a15 s44a16 s44a17 s44a18 s44a19 s44a20 s44a21 s44a22 s44a23 s44a24 s44a25 s44a26 s44a27 s44a28 s44a29 s44a30 s44a31 s44a32 s44a33; do over acts; if acts=1 then numacts=numacts+1; end; if numacts > 10 then delete; /* these kids are likely fibbing */ /* now recode by gender */ s44a18f=0; if s44a18=1 & s2=2 then do; s44a18f=1; s44a18=0; end; s44a19f=0; if s44a19=1 & s2=2 then do; s44a19f=1; s44a19=0; end; s44a23f=0; if s44a23=1 & s2=2 then do; s44a23f=1; s44a23=0; end; s44a24f=0; if s44a24=1 & s2=2 then do; s44a24f=1; s44a24=0; end; s44a25f=0; if s44a25=1 & s2=2 then do; s44a25f=1; s44a25=0; end; s44a26f=0; if s44a26=1 & s2=2 then do; s44a26f=1; s44a26=0; end; s44a27f=0; if s44a27=1 & s2=2 then do; s44a27f=1; s44a27=0; end; if s44a21=1 & s2=2 then s44a21=0; *recode the one female football player; run; /* Use IML to look create the person to person from the person to group matrix */ proc iml; %include 'c:\moody\sas\programs\modules\pajwrite.mod'; %include 'c:\moody\sas\programs\modules\pajpart.mod'; %include 'c:\moody\sas\programs\modules\ptg.mod'; /* read the data from work.a into IML */ use work.a; /* tell IML where to get the data */ read all var{aidr} into aidr; /* read the id varible into an id matrix */ read all var{s2} into sex; read all var{s3} into grade; read all var{s44a3 s44a4 s44a6 s44a7 s44a8 s44a14 s44a15 s44a18 s44a18f s44a19 s44a19f s44a21 s44a23 s44a23f s44a24 s44a24f s44a25 s44a25f s44a26 s44a26f s44a27 s44a27f s44a30 s44a31 s44a32 s44a33} into teams; teamlab={latin spanish computer debate drama dance chorus baseball softball mbasket fbasket football msoccer fsoccer mswim fswim mtennis ftennis mtrack ftrack mvolley fvolley newspaper honor stdntcoun yearbook}; clubnet=teams`*teams; /* group to group */ stdntnet=teams*teams`; /* student to student */ mattrib clubnet format=2.0 [rowname=teamlab]; print clubnet; /* because the overlap is so heavy in clubnet, I want to look only at the 'strong' overlaps. Do this by taking only those cells that have greater than random chance overlap. */ clubnet=clubnet-diag(clubnet); c_odg=clubnet[,+]; /* row marginal */ c_idg=clubnet[+,]; /* col marginal - in this case they are the same */ expected=(c_odg*c_idg)/c_odg[+]; /* expected number of overlaps, by chance */ clubnet2=clubnet#(clubnet>expected); mattrib clubnet2 format=2.0 [rowname=teamlab]; print clubnet2; /* calculate density of p to p matrix */ den=(stdntnet-diag(stdntnet))[+]/(nrow(stdntnet)*(nrow(stdntnet)-1)); print den; /* now write the nets out to PAJEK */ file 'c:\temp\student_grp.net'; call pajwrite(stdntnet,aidr,2); file 'c:\temp\grp2grp.net'; call pajwrite(clubnet2,teamlab,2); stdnt2=stdntnet>1; /* must share 2 or more clubs */ file 'c:\temp\stdnt2.net'; call pajwrite(stdnt2,aidr,2); sex=choose(sex=.,0,sex); /* recode missing values to 0 */ file 'c:\temp\schl_sex.clu'; call pajpart(sex); quit; /* exit IML */ proc print data=a; where aidr = 90575999; run;