/******************************************************************** Title: What does a zero mean? Understanding false, random and structural zeros in ecology Authors: Blasco-Morenoa, A., Pérez-Casany, M., Puig, P., Morante, M. Castells, E. Journal: Methods in Ecology and Evolution Data accessibility: https://ddd.uab.cat/record/194390 ********************************************************************/ option nodate nonumber ls=100 ps=2000; *Import data; proc import out= WORK.dades datafile= "...\Senecio_data.txt" dbms=TAB REPLACE; getnames=YES; datarow=2; run; *Data formats; proc format; value pob 1="CB" 2="CP" 3="CT" 4="FM" 5="SS" 6="VF"; value esp 1="SV" 2="SL" 3="SI" 4="SP"; run; /* Categories of Location main factor: "CB" = Can Bosc "CP" = Can Perellada "CT" = Can Torrents "FM" = Fogueres "SS" = Santa Susanna "VF" = Vallfornells Categories of Species main factor: "SV" = S. vulgaris "SL" = S. lividus "SI" = S. inaequidens "SP" = S. pterophorus */ *Assign formats; data dades; set dades; if Location= "CB" then pob2=1; if Location= "CP" then pob2=2; if Location= "CT" then pob2=3; if Location= "FM" then pob2=4; if Location= "SS" then pob2=5; if Location= "VF" then pob2=6; espSI=0; espSP=0; espSV=0; espSL=0; if Species="SI" then do; espSI=1; esp2=3; end; if Species="SP" then do; espSP=1; esp2=4; end; if Species="SV" then do; espSV=1; esp2=1; end; if Species="SL" then do; espSL=1; esp2=2; end; pobCB=0; pobCP=0; pobCT=0; pobFM=0; pobSS=0; pobVF=0; if Location="CB" then pobCB=1; if Location="CP" then pobCP=1; if Location="CT" then pobCT=1; if Location="FM" then pobFM=1; if Location="SS" then pobSS=1; if Location="VF" then pobVF=1; log_TC = log(Total_heads); format pob2 pob.; format esp2 esp.; run; proc sort data=dades; by esp2 pob2; run; *esp2 = Species main factor; *pob2 = Locality main factor; /********************************************************************************************/ *POISSON MODEL; /********************************************************************************************/ * Species and Locality as main effects; proc genmod data=dades plots= RESCHI(index xbeta); class esp2 pob2; model Damaged = esp2 pob2 / offset=log_TC dist = poisson type3; lsmeans esp2 / diff adjust=tukey ilink; run; /********************************************************************************************/ *NEGATIVE BINOMIAL MODEL; /********************************************************************************************/ * Species and Locality as main effects; proc genmod data=dades plots= RESCHI(index xbeta); class esp2 pob2; model Damaged = esp2 pob2 / offset=log_TC dist = NEGBIN type3; lsmeans esp2 / diff adjust=tukey ilink; run; /********************************************************************************************/ *ZIP(1) MODEL; /********************************************************************************************/ *Species and Locality as main effects for count model + Intercept for zero model; *SAS 9.2 or upper; proc genmod data=dades plots= RESCHI(index xbeta) ; class esp2 pob2; model Damaged = esp2 pob2/ offset=log_TC dist = zip type3 covb expected SCORING=51; zeromodel / link= logit ; lsmeans esp2 / diff adjust=tukey ilink; run; *Manual programming alternative for SAS 9.13 or lower; proc nlmixed data = dades tech=NEWRAP; parms b0= -1.5020 pf=1 pe1=2.241209 pe2=2.196204 pe3=0.201141 pob1=-0.1763 pob2= -0.6227 pob3=-0.4531 pob4=0.3983 pob5= -1.0377 a0= -0.1003; bounds 0.99999 < pf < 1.0001; logit0 = a0; prob0 = 1/(1+exp(-logit0)); mu = exp(b0 + pe1*espSI + pe2*espSL + pe3*espSP + pf*log_TC + pob1*pobCB + pob2*pobCP + pob3*pobCT + pob4*pobFM + pob5*pobSS); if Damaged = 0 then ll = log(prob0 + (1- prob0)*exp(-mu)); else ll = Damaged*log(mu) + log(1- prob0) - mu - lgamma(Damaged + 1); model Damaged ~ general(ll); estimate 'inflation p' 1/(1+exp(-a0)); run; /********************************************************************************************/ *ZIP(2) MODEL; /********************************************************************************************/ *Species and Locality as main effects for count model + Species as main effects for zero model; *SAS 9.2 or upper; proc genmod data=dades plots= RESCHI(index xbeta); class esp2 pob2; model Damaged = esp2 pob2/ offset=log_TC dist = zip type3 covb expected SCORING=51; zeromodel esp2 / link= logit; lsmeans esp2 / diff adjust=tukey ilink; run; *Manual programming alternative for SAS 9.13 or lower; proc nlmixed data = dades tech=NEWRAP; parms b0= -1.4652 pf=1 pe1=-1.4461 pe2=0.8188 pe3=-0.7023 pob1=-0.1735 pob2=-0.6414 pob3= -0.4436 pob4= 0.4036 pob5=-1.0291 a0=1.2325 zpe1=-3.3601 zpe2=-2.1133 zpe3=0.4082 ; bounds 0.99999 < pf < 1.01; logit0 = a0 + zpe1*espSI + zpe2*espSL + zpe3*espSP; prob0 = 1/(1+exp(-logit0)); mu = exp(b0 + pe1*espSI + pe2*espSL + pe3*espSP + pf*log_TC + pob1*pobCB + pob2*pobCP + pob3*pobCT + pob4*pobFM + pob5*pobSS); if Damaged = 0 then ll = log(prob0 + (1- prob0)*exp(-mu)); else ll = Damaged*log(mu) + log(1- prob0) - mu - lgamma(Damaged + 1); model Damaged ~ general(ll); estimate 'Inflation SI' 1/(1+exp(-(a0 + zpe1))); estimate 'Inflation SL' 1/(1+exp(-(a0 + zpe2))); estimate 'Inflation SP' 1/(1+exp(-(a0 + zpe3))); estimate 'Inflation SV' 1/(1+exp(-a0)); run; /********************************************************************************************/ *ZINB(1) MODEL; /********************************************************************************************/ *Species and Locality as main effects for count model + Intercept for zero model; *SAS 9.22 or upper; proc genmod data=dades plots= RESCHI(index xbeta); class esp2 pob2; model Damaged = esp2 pob2/ offset=log_TC dist = zinb type3; zeromodel / link= logit; lsmeans esp2 / diff adjust=tukey ilink; run; *Manual programming alternative for SAS 9.2 or lower; proc nlmixed data = dades tech=NEWRAP; parms b0=-2.0481 pf=1 pe1=-0.230339 pe2=1.461367 pe3=0.264191 pob1=0.457879 pob2= -0.7904 pob3=-0.7280 pob4=-0.3412 pob5= -1.086 a0=-0.405262 alpha=0.602843; bounds 0.99 < pf < 1.000001; logit0 = a0; prob0 = 1/(1+exp(-logit0)); mu = exp(b0 + pe1*espSI + pe2*espSL + pe3*espSP + pf*log_TC + pob1*pobCB + pob2*pobCP + pob3*pobCT + pob4*pobFM + pob5*pobSS); m = 1/alpha; p = 1/(1+alpha*mu); if Damaged = 0 then ll = log(prob0 + (1-prob0)*(p**m)); else ll = log(1-prob0) + lgamma(m + Damaged) - lgamma(Damaged + 1) - lgamma(m) + m*log(p) + Damaged*log(1-p); model Damaged ~ general(ll); estimate 'inflation p' 1/(1+exp(-a0)); estimate 'Overdispersion' alpha; run; /********************************************************************************************/ *ZINB(2) MODEL; /********************************************************************************************/ *Species and Locality as main effects for count model + Species as main effects for zero model; *SAS 9.22 or upper; proc genmod data=dades plots= RESCHI(index xbeta); class esp2 pob2; model Damaged = esp2 pob2/ offset=log_TC dist = zinb type3; zeromodel esp2 / link= logit; lsmeans esp2 / diff adjust=tukey ilink; run; *Manual programming alternative for SAS 9.2 or lower; proc nlmixed data = dades tech=NEWRAP; parms b0= -1.7400 pf=1 pe1=-0.6754 pe2=1.0848 pe3=0.0983 pob1=-0.3990 pob2=-0.8718 pob3=-0.6880 pob4=-0.3448 pob5=-1.0759 a0=0.8127 zpe1=-3.9023 zpe2=-2.2445 zpe3=0.8046 alpha= 0.6399; bounds 0.99999 < pf < 1.00001; logit0 = a0 + zpe1*espSI + zpe2*espSL + zpe3*espSP; prob0 = 1/(1+exp(-logit0)); mu = exp(b0 + pe1*espSI + pe2*espSL + pe3*espSP + pf*log_TC + pob1*pobCB + pob2*pobCP + pob3*pobCT + pob4*pobFM + pob5*pobSS); m = 1/alpha; p = 1/(1+alpha*mu); if Damaged = 0 then ll = log(prob0 + (1-prob0)*(p**m)); else ll = log(1-prob0) + lgamma(m + Damaged) - lgamma(Damaged + 1) - lgamma(m) + m*log(p) + Damaged*log(1-p); model Damaged ~ general(ll); estimate 'Inflation SI' 1/(1+exp(-(a0 + zpe1))); estimate 'Inflation SL' 1/(1+exp(-(a0 + zpe2))); estimate 'Inflation SP' 1/(1+exp(-(a0 + zpe3))); estimate 'Inflation SV' 1/(1+exp(-a0)); estimate 'Overdispersion' alpha; run; /********************************************************************************************/ *VUONG TEST; /********************************************************************************************/ /* Macro vuong.sas: Tests for comparing nested and nonnested models Compare two nested or nonnested models fit by maximum likelihood. Nested models are compared using the likelihood ratio test. Nonnested models are compared using tests by Vuong or Clarke testing the hypothesis that both models are equally distant from the true model. */ %include "...\vuong.sas"; *ZIP(II) vs Poisson; proc genmod data=dades plots=RESCHI(index xbeta); model Damaged = espSI espSL espSP pobCB pobCP pobCT pobFM pobSS/ offset=log_TC dist =poisson type3; output out=respoi RESCHI=RESCHI PRED=PREDPOI XBETA =XBETA ; ods output ParameterEstimates=ParameterEstimates ; run; proc genmod data=respoi plots=RESCHI(index xbeta); model Damaged = espSI espSL espSP pobCB pobCP pobCT pobFM pobSS/ offset=log_TC dist = zip type3; zeromodel espSI espSL espSP / link= logit; output out=reszip RESCHI=RESCHI PRED=PREDZIP PZERO=PZEROZIP XBETA =XBETA; ods output ParameterEstimates=ParameterEstimates ZeroParameterEstimates=ZeroParameterEstimates; run; %vuong(data=reszip, response=Damaged, model1=zip, p1=predzip, dist1=zip, pzero1=PZEROZIP, model2=Poisson, p2=predpOI, dist2=poi, nparm1=13, nparm2=9) *ZINB(II) vs NB; proc genmod data=dades plots=RESCHI(index xbeta); model Damaged = espSI espSL espSP pobCB pobCP pobCT pobFM pobSS/ offset=log_TC dist =negbin type3 ; output out=resnb RESCHI=RESCHI PRED=PREDNB XBETA =XBETA; ods output ParameterEstimates=ParameterEstimates ; run; proc genmod data=resnb plots=RESCHI(index xbeta); model Damaged = espSI espSL espSP pobCB pobCP pobCT pobFM pobSS/ offset=log_TC dist = zinb type3; zeromodel espSI espSL espSP / link= logit; output out=reszinb RESCHI=RESCHI PRED=PREDZINB PZERO=PZEROZINB XBETA =XBETA; ods output ParameterEstimates=ParameterEstimates ZeroParameterEstimates=ZeroParameterEstimates; run; %vuong(data=reszinb, response=Damaged, model1=zinb, p1=predzinb, dist1=zinb, scale1=0.6399, pzero1=PZEROZINB, model2=NegBin, p2=PREDNB, scale2= 3.1236, dist2=nb, nparm1=14, nparm2=10)