######################################################################## ## ## ## PURPOSE: ## ## ## ## THIS PROGRAM IS FOR CALCULATING THE SAMPLE SIZE OF A PAIRED CASE- ## ## CONTROL STUDY TO TEST FOR INTERACTION BETWEEN A BINARY EXPOSURE ## ## VARIABLE AND ANOTHER RISK FACTOR WHICH IS NOT A MATCHING VARIABLE ## ## OF THE STUDY. ## ## ## ## NOTE: ## ## ## ## THIS PROGRAM GENERATES THE DATA FOR MAKING FIGURE 4 IN THE PAPER: ## ## Qiu, P., Moeschberger, M., Cooke, G., and Goldschmidt, P. (2000), ## ## ``Sample size to test for interaction between a specific exposure ## ## and a second risk factor in a pair-matched case-control study," ## ## Statistics in Medicine, vol.19, 923-935. ## ## ## ## ALL COMMENTS AND SUGGESTIONS SHOULD BE FORWARDED TO ## ## ## ## Peihua Qiu ## ## 313 Ford Hall ## ## 224 Church Street SE ## ## School of Statistics ## ## University of Minnesota ## ## Minneapolis, MN 55455 ## ## Office Phone: 612-625-5819 ## ## e-mail: qiu@stat.umn.edu ## ## ## ## @ COPYRIGHT 2001 BY PEIHUA QIU. ## ## ## ######################################################################## ## The following Splus function "powerf" is the averaged power function ## ## defined by equation (5) in the paper. It depends on the following ## ## parameters ## ## m: the number of discordant pairs ## ## pd1: the conditional probability of a (+,-) pair ## ## mu00,mu01,mu10,mu11,sigma02,sigma12,rho: means and variances defined ## ## on page 929 of the paper. ## powerf <- function(m,pd1,mu00,mu01,mu10,mu11,sigma02,sigma12,rho){ ## x10 is the number of (+,-) pairs ## ## x01 is the number of (-,+) pairs ## x10 <- seq(0,m) x01 <- m - x10 ## The next part is to compute the values of some parameters used ## ## in equation (18). ## nu1 <- (x10*mu11+(m-x10)*mu10)/m nu0 <- (x10*mu01+(m-x10)*mu00)/m eta1 <- sigma12 + (x10*mu11**2+(m-x10)*mu10**2)/m eta0 <- sigma02 + (x10*mu01**2+(m-x10)*mu00**2)/m eta10 <- rho*sqrt(sigma12*sigma02) + (x10*mu11*mu01+x01*mu10*mu00)/m delta <- (eta1*eta0+2*nu1*nu0*eta10-(nu0^2)*eta1-(nu1^2)*eta0-eta10^2)/ ((1-rho^2)*sigma12*sigma02) phi <- x10*x01/(m*(1-rho^2)*sigma12*sigma02)* (sigma02*(mu11-mu10)^2-2*rho*sqrt(sigma12*sigma02)* (mu11-mu10)*(mu01-mu00)+sigma12*(mu01-mu00)^2) ## power is define by formula (18). we assume that alpha=0.05 ## power <- 1 - pchisq(5.99*delta,2,phi) ## apower is define by formula (5). ## prob <- dbinom(x10,m,sum(pd1)/length(pd1)) apower <- sum(power*prob)/sum(prob) apower } ### Define the averaged power function with respect to m ### powerb <- function(n,p1){ ## a, b1 and b0 are the coefficients in model (15) in which the notations ## ## delta, theta1 and theta0 were used. ## a <- log(2.8) b1 <- log(1.414)/6.44 b0 <- log(1.414)/4.92 ## "homo" denotes homocysteine level. Next we generate random numbers ## ## of homocysteine level for both case and control populations. ## homo1 <- rlnorm(1000,2.514,.448) homo0 <- rlnorm(1000,2.299,.430) ## So we obtain 1000 estimators of pi_10^c (cf. (11)-(13)) ## pd1 <- exp(a+b1*(homo1-13.66)+b0*(homo0-10.93))/ (1+exp(a+b1*(homo1-13.66)+b0*(homo0-10.93))) pd0 <- 1-pd1 mu00 <- sum(homo0*pd0)/sum(pd0) mu01 <- sum(homo0*pd1)/sum(pd1) mu10 <- sum(homo1*pd0)/sum(pd0) mu11 <- sum(homo1*pd1)/sum(pd1) sigma02 <- sum(homo0^2)/length(homo0) - ((mu01**2)*sum(pd1) + (mu00**2)*sum(pd0))/length(homo0) sigma12 <- sum(homo1^2)/length(homo1) - ((mu11**2)*sum(pd1) + (mu10**2)*sum(pd0))/length(homo1) rho <- (sum(homo0*homo1)/length(homo1) - (mu01*mu11*sum(pd1) + mu00*mu10*sum(pd0))/length(homo1))/ sqrt(sigma02*sigma12) pi10c <- sum(pd1)/length(pd1) ## Then we can compute pi_d (cf. equation (14)) ## psi <- pi10c/(1-pi10c) # p1 <- psi*p0/(1-p0+psi*p0) p0 <- p1/(psi+p1*(1-psi)) pid <- p0*(1-p1)+p1*(1-p0) ## we then compute the averaged power by equation (6) temp1 <- 0 temp2 <- 0 for(i in 1:n){ temp1 <- temp1 + sum(powerf(i,pd1,mu00,mu01,mu10,mu11, sigma02,sigma12,rho)*dbinom(i,n,pid)) temp2 <- temp2 + dbinom(i,n,pid) } temp1/temp2 } ## Two cases p1=0.3 and p1=0.5 are considered. In the two cases, ## ## power values for several n (sample size) values are calculated ## ## and saved in variables power1 and power2, respectively. Then ## ## Figure 4 can be made based on these data. ## n <- seq(500,1100,50) power1 <- rep(0,length(n)) power2 <- rep(0,length(n)) for(i in 1:length(n)){ power1[i] <- powerb(n[i],0.3) power2[i] <- powerb(n[i],0.5) } write(rbind(n,power1,power2),"fig4.dat",ncol=3)