* Fueron discriminadas las mujeres en la universidad de Berkeley en el acceso a los estudios de postgrado?

* D. A. Powers DA y Y. Xie (2008) en su libro Statistical Methods for Categorical Data Analysis
  San Diego: Academic Press, pg. 102, presentan este ejemplo extrado de un estudio realizado 
  en la University of California-Berkeley y referenciado en:
  Bickel, P. J.; Hammel, E. A. ; OConnell, J. W. (1975). Sex Bias in Graduate Admissions Data from Berkeley
  Science, 187 (4175): 398-404
  Freedman, D.; Pisani, R.; Purves, R. (1978). Statistics. New York: Norton.

SET TNUMBERS=BOTH TVARS=BOTH ONUMBERS=BOTH OVARS=BOTH. 

* Generacin e identificacin de los datos de la tabla de contingencia.
DATA LIST FREE / A S D (3F1.0) Frequencies (F3.0).
BEGIN DATA.
1	1	1	89
1	1	2	17
1	1	3	202
1	1	4	131
1	1	5	94
1	1	6	24
2	1	1	19
2	1	2	8
2	1	3	391
2	1	4	244
2	1	5	299
2	1	6	317
1	2	1	512
1	2	2	353
1	2	3	120
1	2	4	138
1	2	5	53
1	2	6	22
2	2	1	314
2	2	2	207
2	2	3	205
2	2	4	279
2	2	5	138
2	2	6	351
END DATA.

VARIABLE LABELS A "Admission outcome"
                               S "Sex of applicants"
                               D "Department".
VALUE LABELS  A 1 'Admitted' 2 'Rejected' 
		                       /S 1 'Woman' 2 'Men'
                         /D 1 'A' 2 'B' 3 'C' 4 'D' 5 'E' 6 'F'.
FORMATS A S (F1.0).
WEIGHT BY Frequencies.
FREQUENCIES ALL.
CROSSTABS  A BY S BY D.
TABLES /TABLE=S > D BY A.

CROSSTABS  A BY S D / D BY S /  A BY S BY D  
	/CELLS= COUNT COLUMN
 /STATISTICS=CHISQ PHI.

HILOGLINEAR A S(1 2) D(1 6)
  /METHOD=BACKWARD
  /CRITERIA MAXSTEPS(10) P(.05) ITERATION(20) DELTA(.5)
  /PRINT=FREQ RESID ASSOCIATION
  /DESIGN.
GENLOG A S D /PRINT=FREQ RESID ESTIM /PLOT=NONE /DESIGN.

* Modelos log-lineales con 3 variables. Clculo del ndice de disimilitud.

GENLOG A S D /PRINT=FREQ RESID ESTIM /PLOT=NONE /DESIGN A S D 
   /SAVE=RESID ZRESID ADJRESID DEV PRED.
* Clculo del ndice de disimilitud.
COMPUTE C=1.
COMPUTE ABS_RES_1=ABS(RES_1).
WEIGHT OFF.
AGGREGATE
  /OUTFILE=* MODE=ADDVARIABLES
  /BREAK=C
  /ABS_RES_1_sum=SUM(ABS_RES_1) 
  /PRE_1_sum=SUM(PRE_1).
WEIGHT BY Freq.
COMPUTE ID=(ABS_RES_1_sum/(2*PRE_1_sum))*100.
LIST ID /CASES=FROM 1 TO 1.
DELETE VARIABLES RES_1 ZRE_1 ADJ_1 DEV_1 PRE_1 ABS_RES_1 ABS_RES_1_sum PRE_1_sum ID.

GENLOG A S D /PRINT=FREQ RESID ESTIM /PLOT=NONE /DESIGN A S D A*S
  /SAVE=RESID ZRESID ADJRESID DEV PRED.
* Clculo del ndice de disimilitud.
COMPUTE C=1.
COMPUTE ABS_RES_1=ABS(RES_1).
WEIGHT OFF.
AGGREGATE
  /OUTFILE=* MODE=ADDVARIABLES
  /BREAK=C
  /ABS_RES_1_sum=SUM(ABS_RES_1) 
  /PRE_1_sum=SUM(PRE_1).
WEIGHT BY Freq.
COMPUTE ID=(ABS_RES_1_sum/(2*PRE_1_sum))*100.
LIST ID /CASES=FROM 1 TO 1.
DELETE VARIABLES RES_1 ZRE_1 ADJ_1 DEV_1 PRE_1 ABS_RES_1 ABS_RES_1_sum PRE_1_sum ID.

GENLOG A S D /PRINT=FREQ RESID ESTIM /PLOT=NONE /DESIGN A S D A*D
  /SAVE=RESID ZRESID ADJRESID DEV PRED.
* Clculo del ndice de disimilitud.
COMPUTE C=1.
COMPUTE ABS_RES_1=ABS(RES_1).
WEIGHT OFF.
AGGREGATE
  /OUTFILE=* MODE=ADDVARIABLES
  /BREAK=C
  /ABS_RES_1_sum=SUM(ABS_RES_1) 
  /PRE_1_sum=SUM(PRE_1).
WEIGHT BY Freq.
COMPUTE ID=(ABS_RES_1_sum/(2*PRE_1_sum))*100.
LIST ID /CASES=FROM 1 TO 1.
DELETE VARIABLES RES_1 ZRE_1 ADJ_1 DEV_1 PRE_1 ABS_RES_1 ABS_RES_1_sum PRE_1_sum ID.

GENLOG A S D /PRINT=FREQ RESID ESTIM /PLOT=NONE /DESIGN A S D S*D
  /SAVE=RESID ZRESID ADJRESID DEV PRED.
* Clculo del ndice de disimilitud.
COMPUTE C=1.
COMPUTE ABS_RES_1=ABS(RES_1).
WEIGHT OFF.
AGGREGATE
  /OUTFILE=* MODE=ADDVARIABLES
  /BREAK=C
  /ABS_RES_1_sum=SUM(ABS_RES_1) 
  /PRE_1_sum=SUM(PRE_1).
WEIGHT BY Freq.
COMPUTE ID=(ABS_RES_1_sum/(2*PRE_1_sum))*100.
LIST ID /CASES=FROM 1 TO 1.
DELETE VARIABLES RES_1 ZRE_1 ADJ_1 DEV_1 PRE_1 ABS_RES_1 ABS_RES_1_sum PRE_1_sum ID.

GENLOG A S D /PRINT=FREQ RESID ESTIM /PLOT=NONE /DESIGN A S D A*S A*D
  /SAVE=RESID ZRESID ADJRESID DEV PRED.
* Clculo del ndice de disimilitud.
COMPUTE C=1.
COMPUTE ABS_RES_1=ABS(RES_1).
WEIGHT OFF.
AGGREGATE
  /OUTFILE=* MODE=ADDVARIABLES
  /BREAK=C
  /ABS_RES_1_sum=SUM(ABS_RES_1) 
  /PRE_1_sum=SUM(PRE_1).
WEIGHT BY Freq.
*  Clculo del ndice de disimilitud.
COMPUTE ID=(ABS_RES_1_sum/(2*PRE_1_sum))*100.
LIST ID /CASES=FROM 1 TO 1.
DELETE VARIABLES RES_1 ZRE_1 ADJ_1 DEV_1 PRE_1 ABS_RES_1 ABS_RES_1_sum PRE_1_sum ID.

GENLOG A S D /PRINT=FREQ RESID ESTIM /PLOT=NONE /DESIGN A S D A*S S*D
  /SAVE=RESID ZRESID ADJRESID DEV PRED.
* Clculo del ndice de disimilitud.
COMPUTE C=1.
COMPUTE ABS_RES_1=ABS(RES_1).
WEIGHT OFF.
AGGREGATE
  /OUTFILE=* MODE=ADDVARIABLES
  /BREAK=C
  /ABS_RES_1_sum=SUM(ABS_RES_1) 
  /PRE_1_sum=SUM(PRE_1).
WEIGHT BY Freq.
* Clculo del ndice de disimilitud.
COMPUTE ID=(ABS_RES_1_sum/(2*PRE_1_sum))*100.
LIST ID /CASES=FROM 1 TO 1.
DELETE VARIABLES RES_1 ZRE_1 ADJ_1 DEV_1 PRE_1 ABS_RES_1 ABS_RES_1_sum PRE_1_sum ID.

GENLOG A S D /PRINT=FREQ RESID ESTIM /PLOT=NONE /DESIGN A S D A*D S*D
  /SAVE=RESID ZRESID ADJRESID DEV PRED.
* Clculo del ndice de disimilitud.
COMPUTE C=1.
COMPUTE ABS_RES_1=ABS(RES_1).
WEIGHT OFF.
AGGREGATE
  /OUTFILE=* MODE=ADDVARIABLES
  /BREAK=C
  /ABS_RES_1_sum=SUM(ABS_RES_1) 
  /PRE_1_sum=SUM(PRE_1).
WEIGHT BY Freq.
COMPUTE ID=(ABS_RES_1_sum/(2*PRE_1_sum))*100.
LIST ID /CASES=FROM 1 TO 1.
DELETE VARIABLES RES_1 ZRE_1 ADJ_1 DEV_1 PRE_1 ABS_RES_1 ABS_RES_1_sum PRE_1_sum ID.

GENLOG A S D /PRINT=FREQ RESID ESTIM /PLOT=NONE /DESIGN A S D A*S A*D S*D
  /SAVE=RESID ZRESID ADJRESID DEV PRED.
* Clculo del ndice de disimilitud.
COMPUTE C=1.
COMPUTE ABS_RES_1=ABS(RES_1).
WEIGHT OFF.
AGGREGATE
  /OUTFILE=* MODE=ADDVARIABLES
  /BREAK=C
  /ABS_RES_1_sum=SUM(ABS_RES_1) 
  /PRE_1_sum=SUM(PRE_1).
WEIGHT BY Freq.
COMPUTE ID=(ABS_RES_1_sum/(2*PRE_1_sum))*100.
LIST ID /CASES=FROM 1 TO 1.
DELETE VARIABLES RES_1 ZRE_1 ADJ_1 DEV_1 PRE_1 ABS_RES_1 ABS_RES_1_sum PRE_1_sum ID.

GENLOG A S D /PRINT=FREQ RESID ESTIM /PLOT=NONE /DESIGN A S D A*S A*D S*D A*S*D
  /SAVE=RESID ZRESID ADJRESID DEV PRED.
* Clculo del ndice de disimilitud.
COMPUTE C=1.
COMPUTE ABS_RES_1=ABS(RES_1).
WEIGHT OFF.
AGGREGATE
  /OUTFILE=* MODE=ADDVARIABLES
  /BREAK=C
  /ABS_RES_1_sum=SUM(ABS_RES_1) 
  /PRE_1_sum=SUM(PRE_1).
WEIGHT BY Freq.
COMPUTE ID=(ABS_RES_1_sum/(2*PRE_1_sum))*100.
LIST ID /CASES=FROM 1 TO 1.
DELETE VARIABLES RES_1 ZRE_1 ADJ_1 DEV_1 PRE_1 ABS_RES_1 ABS_RES_1_sum PRE_1_sum ID.


* Si no consideramos el Departamento A.
HILOGLINEAR A S(1 2) D(2 6)
  /METHOD=BACKWARD
  /CRITERIA MAXSTEPS(10) P(.05) ITERATION(20) DELTA(.5)
  /PRINT=FREQ RESID ASSOCIATION
  /DESIGN.
TEMPORARY.
SELECT IF (D NE 1).
GENLOG A S D /PRINT=FREQ RESID ESTIM /PLOT=NONE /DESIGN A S D A*D S*D
  /SAVE=RESID ZRESID ADJRESID DEV PRED.
* Clculo del ndice de disimilitud.
COMPUTE C=1.
COMPUTE ABS_RES_1=ABS(RES_1).
WEIGHT OFF.
AGGREGATE
  /OUTFILE=* MODE=ADDVARIABLES
  /BREAK=C
  /ABS_RES_1_sum=SUM(ABS_RES_1) 
  /PRE_1_sum=SUM(PRE_1).
WEIGHT BY Freq.
COMPUTE ID=(ABS_RES_1_sum/(2*PRE_1_sum))*100.
LIST ID /CASES=FROM 1 TO 1.
DELETE VARIABLES RES_1 ZRE_1 ADJ_1 DEV_1 PRE_1 ABS_RES_1 ABS_RES_1_sum PRE_1_sum ID.

* Modelo 17 sin el Departamento A.
* Opcin a) resultados si no lo seleccionamos.
TEMPORARY.
SELECT IF (D NE 1).
GENLOG A S D
  /MODEL=POISSON
  /PRINT=FREQ RESID ADJRESID ZRESID DEV DESIGN ESTIM CORR COV
  /PLOT=RESID(ADJRESID DEV) NORMPROB(ADJRESID DEV)
  /CRITERIA=CIN(95) ITERATE(20) CONVERGE(0.001) DELTA(.5)
  /DESIGN A S D A*D D*S 
  /SAVE=RESID ZRESID ADJRESID DEV PRED.
COMPUTE C=1.
COMPUTE ABS_RES_1=ABS(RES_1).
WEIGHT OFF.
TEMPORARY.
SELECT IF (D NE 1).
AGGREGATE
  /OUTFILE=* MODE=ADDVARIABLES
  /BREAK=C
  /ABS_RES_1_sum=SUM(ABS_RES_1) 
  /PRE_1_sum=SUM(PRE_1).
WEIGHT BY Freq.
COMPUTE ID=(ABS_RES_1_sum/(2*PRE_1_sum))*100.
LIST ID /CASES=FROM 2 TO 2.
DELETE VARIABLES RES_1 ZRE_1 ADJ_1 DEV_1 PRE_1 ABS_RES_1 ABS_RES_1_sum PRE_1_sum ID.

* Opci b) variable fictcia (dummy).
COMPUTE X=1.
IF (A=1 AND S=1 AND D=1) X=0.
FREQUENCIES X.
GENLOG A S D WITH X
  /MODEL=POISSON
  /PRINT=FREQ RESID ADJRESID ZRESID DEV ESTIM CORR COV
  /PLOT=RESID(ADJRESID) NORMPROB(ADJRESID)
  /CRITERIA=CIN(95) ITERATE(20) CONVERGE(0.001) DELTA(.5)
  /DESIGN A S D X A*D D*S.

* Opci b) resultados de Powers & Xie.
RECODE D (1=6)(2=5)(3=4)(4=3)(5=2)(6=1) INTO D2.
COMPUTE Y=1.
IF (A=1 AND S=1 AND D2=6) Y=0.
FREQUENCIES Y.
GENLOG A S D2 WITH Y
  /MODEL=POISSON
  /PRINT=ESTIM
  /PLOT=NONE
  /CRITERIA=CIN(95) ITERATE(20) CONVERGE(0.001) DELTA(.5)
  /DESIGN A S D2 Y A*D2 D2*S.

* Opci c) ponderacin de las casillas.
COMPUTE X=1.
IF (D=1) X=0.
FREQUENCIES X.
GENLOG A S D
  /CSTRUCTURE=X
  /MODEL=POISSON
  /PRINT=FREQ RESID ADJRESID ZRESID DEV DESIGN ESTIM CORR COV
  /PLOT=RESID(ADJRESID DEV) NORMPROB(ADJRESID DEV)
  /DESIGN A S D A*D D*S
  /SAVE=RESID ZRESID ADJRESID DEV PRED.
COMPUTE C=1.
COMPUTE ABS_RES_1=ABS(RES_1).
WEIGHT OFF.
AGGREGATE
  /OUTFILE=* MODE=ADDVARIABLES
  /BREAK=C
  /ABS_RES_1_sum=SUM(ABS_RES_1) 
  /PRE_1_sum=SUM(PRE_1).
WEIGHT BY Freq.
COMPUTE ID=(ABS_RES_1_sum/(2*PRE_1_sum))*100.
LIST ID /CASES=FROM 1 TO 1.
DELETE VARIABLES RES_1 ZRE_1 ADJ_1 DEV_1 PRE_1 ABS_RES_1 ABS_RES_1_sum PRE_1_sum ID.
