/* this program reads the GSS .xpt file, and demonstrates how to calculate a set of measures on the egonetwork variables. See codebook.txt for a description of what the variables mean, or for better information go to http://www.icpsr.umich.edu/GSS99/module/m-index.htm !!!BEFORE RUNNING THE PROGRAM, CHANGE THE DIRECTORIES TO MATCH YOUR OWN SPACE!! author: Jim Moody Date: Dec. 18, 2002 */ /* start by reading the raw GSS data into SAS. To do this, we need to tell SAS where the data live using a LIBNAME statement. Libraries are "shortcuts" or "tags" that tell sas where to find data files. We name this library "in1" */ libname in1 xport 'c:\moody\classes\soc884\examples\gss\gss85.xpt'; /* the GSS data are formated, meaning that when you print or display the data you see labels (such as "white") instead of numbers. We have to read these formats in seperately. The code in "gssformat.sas" does this, which we now include below */ %include 'c:\moody\classes\soc884\examples\gss\gssformat.sas'; /* Now read in the data. We are going to put the data in a working dataset called 'work.a' "work" is the defalut */ data a; set in1.gss85; /* the in1 here is the library we created above */ run; /* take a look at the data. */ proc contents data=a; run; /* here I simply look at the distribution of numgiven (number of people they named) and the closeness rating. Note that the rclose# questions were only asked if eqclose = no */ proc freq data=a; tables numgiven eqclose rclose1 rclose2 rclose3 rclose4 rclose5 close12 close13; run; /* lets look at the relation between ego's religion and alter1's religion. Ego's religion is in the RELIG variable, Alter's is in RELIG<#>. The chisq command gives me fit statistics, of which cramer's V is a good measure for nominal variables. 0 = no relation, 1 = perfect match */ proc freq data=a; tables relig*RELIG1 /chisq; run; /* program below creates ego-network variables. */ data a; set a; /* calculate ego network density, code as marsden did. Ego network density is the average strength of ties among ego's alters. */ /* First recode the closeness variables to match Marsden. I have done this using an ARRAY. ARRAYS give names to a list of variables (here all the pair-wise closeness variables). You can then repeat the instructions over each element of the array */ array altcls close12 close13 close14 close15 close23 close24 close25 close34 close35 close45; do over altcls; if altcls=1 then altcls=1; /* especially close */ if altcls=2 then altcls=.5; /* neither close nor distant */ if altcls=3 then altcls=0; /* strangers */ end; /* density is the mean of the pair-wise values, so calculate using the mean function */ egoden=mean(close12, close13, close14, close15, close23, close24, close25, close34, close35, close45); /* calculate the proportion of ego's network who are kin. Do this by going through each of the alters and their relation to ego. If they fit any of the kin types, we code them as bing kin */ numkin=0; /* initialize the variable to be 0 - no kin */ kin1=0; if (spouse1=1 | parent1=1 | sibling1=1 | child1=1 |othfam1=1) then kin1=1; kin2=0; if (spouse2=1 | parent2=1 | sibling2=1 | child2=1 |othfam2=1) then kin2=1; kin3=0; if (spouse3=1 | parent3=1 | sibling3=1 | child3=1 |othfam3=1) then kin3=1; kin4=0; if (spouse4=1 | parent4=1 | sibling4=1 | child4=1 |othfam4=1) then kin4=1; kin5=0; if (spouse5=1 | parent5=1 | sibling5=1 | child5=1 |othfam5=1) then kin5=1; numkin=kin1+kin2+kin3+kin4+kin5; /* number of kin named */ propkin=numkin/numgiven; /* proportion of total network that r said was kin. Note that this will give us missing values for people who named nobody */ if numgiven = . then propkin = .; /* if numgiven = ., then they are missing data on networks */ /* now I want a measure of the type of tie heterogeneity of the alters. To get this, I am going to create a variable called alttype# that assigns people to 1 of 6 possible types: kin, Friend, group member, neighbor, advisor, other. In reality, these overlap, but I am going to make them trump in this order, for demonstration purposes. Note that I embedd this in a macro, just to save typing. You could repeat this code one relation at a time if you wanted to. Macros are text-replacement sub programs, so the macro below replaces '&i' with '1', '2', etc each time it loops. */ %macro loop; /* this defines a subroutine called 'loop' */ %do i=1 %to 5; /* do over each atler */ alttype&i=.; if other&i=1 then alttype&i=6; if advisor&i=1 then alttype&i=5; if neighbr&i=1 then alttype&i=4; if memgrp&i=1 then alttype&i=3; if friend&i=1 then alttype&i=2; if kin&i=1 then alttype&i=1; %end; %mend; %loop; /* this runs the subroutine defined above */ /*now I want to calculate heterogeneity. I like the heterogeneity index better than the IQV (they reduce to the same thing, differ only by a constant) because it is interpretable as the probability that any two randomly chosen people are of different categories. To do this, I need to calculate some more intermediate variables, to get the number of people in each type for R. */ numfrnd=0; numgrp=0; numadv=0; numnbr=0; numoth=0; array types alttype1 alttype2 alttype3 alttype4 alttype5; do over types; if types=2 then numfrnd=numfrnd+1; if types=3 then numgrp=numgrp+1; if types=4 then numnbr=numnbr+1; if types=5 then numadv=numadv+1; if types=6 then numoth=numoth+1; end; typecnt=numkin+numfrnd+numgrp+numnbr+numadv+numoth; typerat=typecnt/numgiven; /* just to check. Should never be > 1 */ typehet=.; if (numgiven > 1 & typecnt=numgiven) then do; /* only makes sense for people with more than one alter */ typehet=1-(((numkin/numgiven)**2)+((numfrnd/numgiven)**2)+((numgrp/numgiven)**2)+ ((numnbr/numgiven)**2)+((numadv/numgiven)**2)+((numoth/numgiven)**2)); end; run; /* now look at the means for the variables */ proc means; var propkin numgiven typehet propkin egoden close12 close13 close14 close15 close23 close24 close25 close34 close35 close45; run; /* now modify the program (or write a new one) to calculate (1) the proportion of ego's network that is the same race, (2) the same sex */