##################### # QERM 598 # # Lab 2 1.17.2007 # ##################### # Knee data example # 32 patients who recently had ACL surgery were randomly assigned to # one of two recovery treatment plans, A or B. # # The research question of interest is which treatment gets patients # to 75% of pre-surgery leg strength faster. The times (in weeks) # for the patients to reach this strength level are given here: # A: 2,3,3,4,5,6,7,8,10,11,13,16,17,19,20,20 # B: 2,2,3,3,4,4,6,6,6,7,9,10,10,12,19,39 # One M.D. collaborating on this research project is most interested # in the average recovery time before patients reach the # desired strength level. Her null and alternative hypotheses are: # H0: There is no difference in average recovery time between # the two treatments. # H1: There is a difference in average recovery time between # the two treatments. # # When we translate this to mathematical language, we have: # H0: mean(A data) - mean(B data) = 0 # H1: mean(A data) - mean(B data) != 0 # # Our test statistic will be: # t.0 = mean(A) - mean(B) # # Read the data into R and calculate the test statistic: a.times<-c(2,3,3,4,5,6,7,8,10,11,13,16,17,19,20,20) b.times<-c(2,2,3,3,4,4,6,6,6,7,9,10,10,12,19,39) t.0<-mean(a.times)-mean(b.times) t.0 # How can we generate a null distribution for this test statistic? # We will use a randomization test. all.times<-c(a.times,b.times) # combine time vectors num.sims<-999 # how many realizations to run null.test.stats<-rep(NA,num.sims) # storage vector for test stats under null for(sim in 1:num.sims){ # open loop to repeat num.sims times # permute the data randomly mixup.data<-sample(all.times) n<-length(mixup.data) # assume first half of values are treatment A a.times.null<-mixup.data[1:(n/2)] # assume second half of values are treatment B b.times.null<-mixup.data[(n/2+1):n] # calculate test statistic: t.0.null<-mean(a.times.null)-mean(b.times.null) # store the test statistic null.test.stats[sim]<-t.0.null } # append observed test statistic to complete sample: all.test.stats<-c(null.test.stats,t.0) # calculate p.value extreme.test.stats<-all.test.stats[all.test.stats>=t.0] p.value<-length(extreme.test.stats)/length(all.test.stats) p.value # What can we conclude from this test? ################################ # Second half: if time allows # ################################ # A second M.D. collaborator thinks there's more to the story. # She presents the following graph: par(mfrow=c(1,2)) boxplot(a.times,ylim=c(0,45)) boxplot(b.times,ylim=c(0,45)) # and makes the argument that the extremely long recovery time # of one patient in treatment B may be blurring the picture. # # She then argues that treatment B may be more effective in speeding # up recovery for the first 80% patients, and wants to run a different test # of the hypotheses: # H0: There is no difference in the average time for the two treatments # to get the first 80% of patients to the desired strength level. # H1: There is a difference in the average time for the two treatments # to get the first 80% of patients to the desired strength level. # # When we translate this to mathematical language, we have: # H0: mean(first 80% of A data) - mean(first 80% of B data) = 0 # H1: mean(first 80% of A data) - mean(first 80% of B data) != 0 # # Our test statistic will be the difference in the means from the # first 80% of each data set: a.80<-quantile(a.times,prob=.8) b.80<-quantile(b.times,prob=.8) a.first.80<-a.times[a.times=t.0] p.value<-length(extreme.test.stats)/length(all.test.stats) p.value # What can we conclude from this test?