/* this program calculates freeman's segregation and the same-group Odds Ratio given an input network and a partition. Here I use gender, you should try it on race and grade */ libname ahdat 'i:\people\jwm\s884\data\'; /* put the place were you copied s884dat.sd2 between the quote marks */ data a; /* open a new working dataset, called 'a' */ set ahdat.s884dat; /* read the data from your libname space */ run; proc sort data=a; by aidr; run; proc means fw=5; /* look at the data. Make sure it is right */ run; /* now we need to change some codes, to clean the data before we use it. do this with datasteps */ data a; set a; where aidr>90000000; /* remove people who could not be nominated */ s2=s2-1; /*currently 1 for male, 2 for female change to: male=0, female=1 */ if s3 > 12 then s3=.; /* missing value codes */ if s3 < 9 then s3=.; /* nobody in this school should be in less than 9th grade, so clean it up */ /* kids were allowed to nominate people in the school or not. But since we don't have any information on the people outside of the school, we need to change those to missing data for now. Because each person named up to 10 people, we could write 10 if-then statements, like those above. However, it is more efficient to let SAS do the same thing over many variables. You can do this with an ARRAY statement. */ array frnds mf1aid mf2aid mf3aid mf4aid mf5aid ff1aid ff2aid ff3aid ff4aid ff5aid; do over frnds; if frnds = 77777777 then frnds = .; /* nominations to another school */ if frnds = 88888888 then frnds = .; /* goes to sister school, not in directory */ if frnds = 99999999 then frnds = .; /* not found in the directory */ if frnds = 99959995 then frnds = .; /* bad nomination, miskeyed, etc. */ end; /* lets create a race variable. For the purposes of this example, lets say that: a) if they say white only then they are white b) if they say black only then they are black c) if they say hispanic then they are hispanic (I do this because most hispanics choose 'other' as their race, and thus substantively consider themselves 'hispanic' not 'white-hispanic' etc. d) if they choose asian as their only race, then they are asian e) if they choose other as their only race, they choose more than one race, or they choose am. indian, then they are 'other'. */ numrace=(s6a+s6b+s6c+s6d+s6e); /* since these are indicator variables, adding gives the number */ racecat=0; if numrace=1 then do; if s6a=1 then racecat=1; /* white */ if s6b=1 then racecat=2; /* black */ if s6c=1 then racecat=3; /* Asian */ /* saving 4 for hispanic */ if s6d=1 | s6e=1 then racecat=5; /* other */ end; else do; racecat=5; /* people with more than one race */ end; if s4=1 then racecat=4; /* hispanic */ if numrace=0 & s4>1 then racecat=.; /* people who gave us no race information & are not hispanic */ run; proc means; /* look at it now, after having fixed the missing data & recoding */ run; proc freq; /* look at the distribution of some of the variables */ tables s2 s3 racecat numrace; run; /* at this point, we want to read the nomination data and some of the characteristics into IML, so we can write it out to PAJEK and plot the school network */ proc iml; %include 'c:\moody\sas\programs\modules\pajwrite.mod'; %include 'c:\moody\sas\programs\modules\pajpart.mod'; %include 'c:\moody\sas\programs\modules\adj.mod'; %include 'c:\moody\sas\programs\modules\mixmat.mod'; %include 'c:\moody\sas\programs\modules\freeseg.mod'; /* read the data from work.a into IML */ use work.a; /* tell IML where to get the data */ read all var{aidr} into aidr; /* read the id varible into an id matrix */ read all var{s2} into sex; read all var{s3} into grade; read all var{racecat} into race; read all var{mf1aid mf2aid mf3aid mf4aid mf5aid} into mfrnds; read all var{ff1aid ff2aid ff3aid ff4aid ff5aid} into ffrnds; frnds=mfrnds||ffrnds; /* this puts the male and female nominations into one matrix. Could just as easily have read them all into frnds above */ /* to create the adjaceny matrix, I use a pre-coded function (adj.mod) that loops over all of the found ids and creates an adjacency matrix. Open that file in textpad and look at. See if you can figure out how it works. */ adjmat=adj(aidr,frnds); /* this creates adjmat, which has N rows and N+1 columns. The extra-column is an ID variable */ adjid=adjmat[,1]; /* pull off the id variable, which is in the first column */ adjmat=adjmat[,2:ncol(adjmat)]; /* pull of the square adjacency matrix */ /* Real data are messy. One of the complications with real data is that some people do not fill out your survey. That shows up as people who are nominated as friends, but who are not part of the survey sample. That is, a particular ID number can be named, even though nobody with that name filled out the survey. We need to remove these people from the network. We do this by creating a variable called 'sampled' that =1 if the found ID is among the ids we read (AIDR) and 0 otherwise */ sampled=j(nrow(adjmat),1,0); do i=1 to nrow(adjid); /* look over every person in the network */ iloc=loc(aidr=adjid[i]); /* see if you can find them in the AID matrix */ if type(iloc)='N' then do; /* type returns either C=character, N=number, or U undefined */ sampled[i]=1; /* if so, they were sampled, change value to 1 */ free iloc; /* not really needed, but is good practice */ end; end; keep=loc(sampled=1); /* keep will be a set of people who are in the dataset */ adjid=adjid[keep,1]; /* take just those rows of the id varialbe */ adjmat=adjmat[keep,keep]; /* and just those rows and columns of the adj. matrix */ /* here I write a little subroutine for calculating the OR, only need this once, I call it in below */ start mix_or(mixmat,postie); cell_a=trace(mixmat); /* same-group, frnds */ cell_b=mixmat[+]-trace(mixmat); /* different group, frnds */ cell_c=trace(postie); /* not frnds, same group */ cell_d=postie[+]-trace(postie); /* not frnds, dif group */ mixor = (cell_a*cell_d) / (cell_c*cell_b); return(mixor); finish; /* now create the mixing matrix */ /* FOR SEX */ sch_mm=mixmat(adjmat,sex); print sch_mm; /* note that the matrix has the possible number of ties in the lower half, this is needed to calculate other statistics */ sch_pm=sch_mm[(ncol(sch_mm)+1):nrow(sch_mm),]; /* pull off the number of possible ties */ sch_mm=sch_mm[1:ncol(sch_mm),]; /* now just the mixing matrix */ /* get the expected value. This is easy to get as the matrix multiplication of the marginal totals divided by the sum of the table */ expmat=(sch_mm[,+]*sch_mm[+,])/sch_mm[+]; print sch_mm; print expmat; /* now get the segregation indices */ sch_fseg=freeseg(sch_mm,expmat); sch_or = mix_or(sch_mm,sch_pm); print "Segregation for SEX"; print sch_fseg sch_or; /* FOR RACE */ /* FOR SEX */ sch_mm=mixmat(adjmat,race); print sch_mm; /* note that the matrix has the possible number of ties in the lower half, this is needed to calculate other statistics */ sch_pm=sch_mm[(ncol(sch_mm)+1):nrow(sch_mm),]; /* pull off the number of possible ties */ sch_mm=sch_mm[1:ncol(sch_mm),]; /* now just the mixing matrix */ /* get the expected value. This is easy to get as the matrix multiplication of the marginal totals divided by the sum of the table */ expmat=(sch_mm[,+]*sch_mm[+,])/sch_mm[+]; print sch_mm; print expmat; /* now get the segregation indices */ sch_fseg=freeseg(sch_mm,expmat); sch_or = mix_or(sch_mm,sch_pm); print "SEGREGATION FOR RACE:"; print sch_fseg sch_or; /* for GRADE */ sch_mm=mixmat(adjmat,grade); print sch_mm; /* note that the matrix has the possible number of ties in the lower half, this is needed to calculate other statistics */ sch_pm=sch_mm[(ncol(sch_mm)+1):nrow(sch_mm),]; /* pull off the number of possible ties */ sch_mm=sch_mm[1:ncol(sch_mm),]; /* now just the mixing matrix */ /* get the expected value. This is easy to get as the matrix multiplication of the marginal totals divided by the sum of the table */ expmat=(sch_mm[,+]*sch_mm[+,])/sch_mm[+]; print sch_mm; print expmat; /* now get the segregation indices */ sch_fseg=freeseg(sch_mm,expmat); sch_or = mix_or(sch_mm,sch_pm); print 'Segregation for GRADE '; print sch_fseg sch_or; quit; /* exit IML */