######################################################################## ## ## ## PURPOSE: ## ## ## ## THIS PROGRAM IS FOR CALCULATING THE SAMPLE SIZE OF A PAIRED CASE- ## ## CONTROL STUDY TO TEST FOR INTERACTION BETWEEN A BINARY EXPOSURE ## ## VARIABLE AND ANOTHER RISK FACTOR WHICH IS A MATCHING VARIABLE OF ## ## THE STUDY. ## ## ## ## NOTE: ## ## ## ## THIS PROGRAM GENERATES THE DATA FOR MAKING FIGURE 3 IN THE PAPER: ## ## Qiu, P., Moeschberger, M., Cooke, G., and Goldschmidt, P. (2000), ## ## ``Sample size to test for interaction between a specific exposure ## ## and a second risk factor in a pair-matched case-control study," ## ## Statistics in Medicine, vol.19, 923-935. ## ## ## ## ALL COMMENTS AND SUGGESTIONS SHOULD BE FORWARDED TO ## ## ## ## Peihua Qiu ## ## 313 Ford Hall ## ## 224 Church Street SE ## ## School of Statistics ## ## University of Minnesota ## ## Minneapolis, MN 55455 ## ## Office Phone: 612-625-5819 ## ## e-mail: qiu@stat.umn.edu ## ## ## ## @ COPYRIGHT 2001 BY PEIHUA QIU. ## ## ## ######################################################################## ## The following Splus function "powerf" is the averaged power function ## ## defined by equation (5) in the paper. It depends on the following ## ## parameters ## ## m: the number of discordant pairs ## ## pd1: the conditional probability of a (+,-) pair ## ## mu0,sigma02: mean and variance of the conditional distribution of ## ## the second risk factor in the (+,-) population. ## ## mu1,sigma12: mean and variance of the conditional distribution of ## ## the second risk factor in the (-,+) population. ## powerf <- function(m,pd1,mu0,mu1,sigma02,sigma12){ ## x10 is the number of (+,-) pairs ## ## x01 is the number of (-,+) pairs ## x10 <- seq(0,m) x01 <- m - x10 ## eta, tau2, sigmay2 are defined by the formulas right after ## ## equation (4) ## eta <- x10*x01*(mu1-mu0)/(x10+x01) tau2 <- x10*x01*(x01*sigma12+x10*sigma02)/(x10+x01)^2 sigmay2 <- (x10*sigma12+x01*sigma02)/(x10+x01) + (mu1-mu0)^2*x10*x01/(x10+x01)^2 ## power is define by formula (4). we assume that alpha=0.05 ## power <- 1 - pnorm(1.64*sqrt(sigmay2)*sqrt(x10*x01/(x10+x01))/ sqrt(tau2) - eta/sqrt(tau2) ) ## apower is define by formula (5). ## prob <- dbinom(x10[2:m],m,sum(pd1)/length(pd1))/ sum(dbinom(x10[2:m],m,sum(pd1)/length(pd1))) apower <- sum(power[2:m]*prob) apower } ### Define the averaged power function with respect to m ### powerb <- function(n,p1){ ## a and b are the coefficients in model (1) in which the notations ## ## delta and theta were used. ## a <- log(2.8) b <- log(2.0) ## we know that about 63.7% people smokes in a general population. ## ## 1000 random numbers are generated from a Bernoulli population. ## smoking <- rbinom(1000,1,.637) ## So we obtain 1000 estimators of pi_10^c (cf. (11)-(13)) ## pd1 <- exp(a+b*(smoking-.637))/(1+exp(a+b*(smoking-.637))) pd0 <- 1-pd1 mu0 <- sum(smoking*pd0)/sum(pd0) mu1 <- sum(smoking*pd1)/sum(pd1) sigma02 <- sum(smoking^2*pd0)/sum(pd0) - mu0^2 sigma12 <- sum(smoking^2*pd1)/sum(pd1) - mu1^2 pi10c <- sum(pd1)/length(pd1) ## Then we can compute pi_d (cf. equation (14)) ## psi <- pi10c/(1-pi10c) # p1 <- psi*p0/(1-p0+psi*p0) p0 <- p1/(psi+p1*(1-psi)) pid <- p0*(1-p1)+p1*(1-p0) ## we then compute the averaged power by equation (6) temp1 <- 0 temp2 <- 0 for(i in 2:(n-1)){ temp1 <- temp1 + sum(powerf(i,pd1,mu0,mu1,sigma02,sigma12)* dbinom(i,n,pid)) temp2 <- temp2 + dbinom(i,n,pid) } temp1/temp2 } ## Two cases p1=0.3 and p1=0.5 are considered. In the two cases, ## ## power values for several n (sample size) values are calculated ## ## and saved in variables power1 and power2, respectively. Then ## ## Figure 3 can be made based on these data. ## n <- seq(500,1100,50) power1 <- rep(0,length(n)) power2 <- rep(0,length(n)) for(i in 1:length(n)){ power1[i] <- powerb(n[i],0.3) power2[i] <- powerb(n[i],0.5) } write(rbind(n,power1,power2),"fig3.dat",ncol=3)