TAGS en R

Implémentation de TAGS en R

(diagnostic values of Tests in the Absence of a Gold Standard)

1. Références

TAGS V.2.0 is a R program developed by R. Pouillot and G. Gerbier, Agence Française de Sécurité Sanitaire des Aliments, France. r.pouillot@afssa.fr Its purpose is to evaluate diagnostic tests in the absence of a gold standard, using Maximum Likelihood Estimation (Newton Raphson and Expectation Maximisation algorithms). For further details: see Pouillot R., Gerbier G. (2001) 'Tags' a program for validation of the diagnostic values of tests in the absence of a gold standard. Proceedings of the Society for Veterinary Epidemiology and Preventive Medecine, Noordwijkerhout, The netherlands, 28th - 30th March 2001: 37-48

2. Code R

#FONCTION TAGS TAGS <- function(jeu="") { ex.warn <- options(warn=-1) dat <- NULL # Fonctions internes sortie.BG <- function(dat){ BG <- array(c(dat$pinit[,1],as.vector(dat$ppinit)),dim=c(1,dat$pop+2*dat$test)) BG[1,1:(dat$pop+dat$test)] <- 1-BG[1,1:(dat$pop+dat$test)] dimnames(BG) <- (list(c("Best Guess"),c(paste(c("pre"),1:dat$pop,sep=""),paste(c("Sp"),1:dat$test,sep=""),paste(c("Se"),1:dat$test,sep="")))) BG} recap <- function(dat){ cat("\n\nDATA SUMMARY \n") cat(dat$pop,"Population(s); ",dat$test,"Tests; ",dat$nbref,"Reference Population(s) \n \n") cat("df:",dat$ddl,"; parameters:",dat$par,"\n \n") rec <- cbind(dat$a,dat$compte,dat$ref) dimnames(rec) <- list(c(1:dat$nombre),c(paste(c("test"),1:dat$test,sep=""), paste(c("pop"),1:dat$pop,sep=""),"RefInd","RefInf")) print(rec) cat("\n") rec} comb <- function (dat) { comb <- array(0,dim=c(dat$nombre,dat$test)) for (p in 1:(dat$nombre-1)){ q <- p for (t in 1:dat$test) { comb[p+1,t] <- q-2*floor(q/2) q <- floor(q/2) }} comb} test <- function(teste,inf=-Inf,sup=Inf){ if (is.na(teste) || teste<inf || teste>sup) {cat("Be serious ! let's try again !\n ") return(FALSE)} TRUE} ask <- function(nom.popu,dat){ mat <- rep(0,dat$nombre) cat("\n" ,nom.popu,"\n") for(q in 1:dat$nombre) { cat("Number of results",dat$a[q,],"in",nom.popu,": ") cond <- FALSE; while(cond==FALSE){mat[q] <- as.numeric(readline());cond <- test(mat[q],0,Inf)} } mat} # FONCTION LOAD.DATA load.data <- function(){ dat <- NULL cat("Enter a name for your data : ") dat$name <- readline() assign(dat$name, NULL,env = .GlobalEnv) cat("\n \nENTER YOUR data \n") cat("Number of tests (between 1 and 10) ? ") cond <- FALSE; while(cond==FALSE){dat$test <- as.numeric(readline());cond <- test(dat$test,1,10)} cat("Number of population without unknown status (between 1 and 10) ? ") cond <- FALSE; while(cond==FALSE){dat$pop <- as.numeric(readline());cond <- test(dat$pop,1,10)} cat("\nDo you have Reference Population(s) ? \n") cat("No : Enter 0 \n") cat("Yes, Disease free : Enter 1\n") cat("Yes, Infected : Enter 2\n") cat("Yes, one Disease-free and one Infected : Enter 3\n") cond <- FALSE; while(cond == FALSE) {askref <- readline();cond <- test(as.integer(askref), 0, 3)} dat$nbref <- switch(as.character(as.integer(askref)),"0"=0,"1"=1,"2"=1,"3"=2) dat$par <- 2*dat$test+dat$pop dat$ddl <- (dat$nbref+dat$pop)*(2**dat$test-1) if (dat$par > dat$ddl) stop(paste("df (",dat$ddl,") < parameters (",dat$par,") : Sorry, evaluation is impossible")) dat$nombre <- 2**dat$test dat$a <- comb(dat) dat$compte <-array(0,dim=c(dat$nombre,dat$pop)) for (p in 1:dat$pop){dat$compte[,p] <- ask(paste("population with unknown status",p),dat)} dat$ref <- array(0, dim=c(dat$nombre,2)) if (askref=="1" || askref=="3") dat$ref[,1] <- ask("reference Disease Free population",dat) if (askref=="2" || askref=="3") dat$ref[,2] <- ask("reference Infected population",dat) dat$ppinit <- matrix(rep(c(0.05,0.8),dat$test), ncol=2,byrow=T) dat$pinit <- matrix(rep(c(0.8,0.2),dat$pop), ncol=2,byrow=T) dat$N <- apply(dat$compte,2,sum) dat$Nref <- apply(dat$ref,2,sum) recap(dat) cat("\n Is it OK ?\n No : Enter 0\n Yes : Enter 1\n ") cond <- FALSE;while(cond == FALSE) {rep <- readline();cond <- test(as.integer(rep), 0, 1)} if (rep == 0) load.data() assign(dat$name, dat,env = .GlobalEnv) dat } if (jeu==""){ cat("\nTAGS V.2.0 is a R program developed by \nR. Pouillot and G. Gerbier, Agence Française de Sécurité Sanitaire des Aliments, France. r.pouillot@afssa.fr\n \nIts purpose is to evaluate diagnostic tests in the absence of a gold standard,\nusing Maximum Likelihood Estimation (Newton Raphson and Expectation Maximisation algorithms).\n\nFor further details: see \nPouillot R., Gerbier G. (2001) \n'Tags' a program for validation of the diagnostic values of tests in the absence of a gold standard. \nProceedings of the Society for Veterinary Epidemiology and Preventive Medecine, \nNoordwijkerhout, The netherlands, 28th - 30th March 2001: 37-48\n\nReference(s) population(s) data may be used (population(s) with a known infection status)\n\nEvaluation may be used as soon as df>=parameters\nA goodness-of-fit test and residual correlations are then provided\n\nThree sets of data may be used as examples : \nHui and Walter (Biometrics, 1980, 36:167-171): Enter hui \nSaegerman et al (Vet Record, 1999, 145:214-8): Enter sae \nHandelman's dentistry data (JADA, 1986, 113:751-754): Enter qu\nIf you want to enter new data: Enter new \nIf you want to use loaded data: Enter the name the program has already given to you \n\n") jeu <- readline("data Set ? ")} # JEU DE DONNEES PROPOSES if (jeu == "new" ) dat <- load.data() if (jeu=="hui"){ dat$test <- 2 dat$pop <- 2 dat$nombre <- 4 dat$a <- array(c(0,1,0,1,0,0,1,1),dim=c(4,2)) dat$compte <- array(c(528,4,9,14,367,31,37,887),dim=c(4,2)) dat$nbref <- 0 dat$ref <- array(0,dim=c(4,2)) dat$ppinit <- matrix(rep(c(0.05,0.8),2),ncol=2,byrow=T) dat$pinit <- matrix(rep(c(0.8,0.2),2),ncol=2,byrow=T) } if (jeu=="sae"){ dat$test <- 3 dat$pop <- 1 dat$nombre <- 8 dat$compte <- array(c(275,6,33,11,0.0,0.0,6,32),dim=c(8,1)) dat$nbref <- 1 dat$ref <- array(c(1144,0,22,3,2,0,0,0,rep(0,8)),dim=c(8,2)) dat$a <- array(c(0,1,0,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,1,1,1),dim=c(dat$nombre,dat$test)) dat$ppinit <- matrix(rep(c(0.05,0.8),3),ncol=2,byrow=T) dat$pinit <- matrix(rep(c(0.8,0.2),1),ncol=2,byrow=T) } if (jeu=="qu"){ dat$test <- 4 dat$pop <- 1 dat$nombre <- 16 dat$compte <- array(c(170,4,6,1,0,0,0,0,15,17,0,4,0,84,0,128),dim=c(16,1)) dat$nbref <- 0 dat$ref <- array(0,dim=c(16,2)) dat$a <-array(c(0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1),dim=c(16,4)) dat$ppinit <- matrix(rep(c(0.05,0.8),dat$test),ncol=2,byrow=T) dat$pinit <- matrix(rep(c(0.8,0.2),dat$pop),ncol=2,byrow=T) } if (!is.element(jeu,c("new","sae","hui","qu"))){if (!exists(jeu)) return("This set of data does not exist") dat <- get(jeu)} cat("\nDefault Best Guess:\n") print(sortie.BG(dat)) cat("\nDo you have Best Guess?") cat("\nNo : Enter 0 \nYes : Enter 1\n ") cond <- FALSE;while(cond == FALSE) {rep <- readline();cond <- test(as.integer(rep), 0, 1)} if (rep == 1) { cat("Best Guess \n") for (p in 1:dat$pop){ cond <- FALSE; while(cond==FALSE) {dat$pinit[p,2] <- as.numeric(readline(cat("Prevalence in pop",p," (within ]0 - 1[) ? "))) cond <- test(dat$pinit[p,2],0.000001,.999999)} dat$pinit[p,1] <- 1-dat$pinit[p,2] } for (t in 1:dat$test){ cond <- FALSE; while(cond==FALSE) {dat$ppinit[t,1] <- 1-as.numeric(readline(cat("Specificity test",t," (within ]0 - 1[) ? "))) cond <- test(dat$ppinit[t,1],0.000001,.999999)} cond <- FALSE; while(cond==FALSE) {dat$ppinit[t,2] <- as.numeric(readline(cat("Sensitivity test",t," (within ]0 - 1[) ? "))) cond <- test(dat$ppinit[t,2],0.000001,.999999)} } } dat$ddl <- (dat$nbref+dat$pop)*(2**dat$test-1) dat$par <- 2*dat$test+dat$pop dat$N <- apply(dat$compte,2,sum) dat$Nref <- apply(dat$ref,2,sum) cat("\nWould you like Bootstrap Confidence intervals \n(CAUTION: this may be time consumming. It is recommended to ask for it in a second step!)\n") cat("No : Enter 0 \nYes : Enter 1\n ") cond <- FALSE; while(cond==FALSE){askboot <- readline();cond <- test(as.integer(askboot),0,1)} if (askboot==1){ cat("How many iterations? (between 50 and 5000)\n") cond <- FALSE; while(cond==FALSE){askboot <- as.numeric(readline());cond <- test(askboot,1,5000)} } # RECAPITULATIF DES DONNEES dat$rec <- recap(dat) print(sortie.BG(dat)) # FONCTION EVALUATE EM <- function(dat) { pp <- dat$ppinit p <-dat$pinit pitmoins <- rep(0,2*dat$pop+2*dat$test) for (i in 1:1000){ y <- Vref(p,pp) Vind <- y %*% t(p) x <- matrix(y[,1],nrow=dat$nombre,ncol=dat$pop) * matrix(p[,1],ncol=dat$pop,nrow=dat$nombre,byrow=T) / Vind * dat$compte x[is.na(x)] <- 0 p[,1] <- apply(x,2,sum,na.rm=T)/dat$N p[,2] <- 1 - p[,1] pp[,1] <- apply(t(dat$a) %*% (x+dat$ref[,1]),1,sum)/((sx<-sum(x))+dat$Nref[1]) pp[,2] <- apply(t(dat$a) %*% (dat$compte-x+dat$ref[,2]),1,sum)/(sum(dat$N)-sx+dat$Nref[2]) pit <- c(as.vector(p),as.vector(pp)) if (all.equal(pit,pitmoins)==T) break else pitmoins <- pit } if(sum(1-pp[,1],pp[,2]) < sum(pp[,1],1-pp[,2])){ pp <- cbind(pp[,2],pp[,1]) p <- cbind(p[,2],p[,1]) } list(p=p,pp=pp,i=i)} SortEM <- function(dat,p,pp,i){ ptot <- rbind(p,c(1,0),c(0,1)) ll <- sum(cbind(dat$compte,dat$ref)*log(Vref(ptot,pp) %*% t(ptot)),na.rm=T) sol <- array(c(as.vector(p[,1]),as.vector(pp)),dim=c(1,dat$pop+2*dat$test)) sol[1:(dat$pop+dat$test)] <- 1-sol[1:(dat$pop+dat$test)] dimnames(sol) <- list(c("Est"), c(paste(c("pre"),1:dat$pop,sep=""),paste(c("Sp"),1:dat$test,sep=""),paste(c("Se"),1:dat$test,sep=""))) list(Iterations=i,LogLikelihood=ll,Estimations=round(sol,4),p=p,pp=pp) } vv <- function(i,pp) {t(pp[,i]*t(dat$a))+t((1-pp[,i])*t(1-dat$a))} Vref <- function(p,pp){ z <- lapply(1:2,vv,pp) Vref <- lapply(z,apply,1,prod) matrix(c(Vref[[1]],Vref[[2]]),ncol=2) } #BOOTSTRAP bootfun <- function(boot1,iter=100,p,pp){ boot1$pinit <- p boot1$ppinit <- pp EstBoot <- array(0,dim=c(iter,boot1$par)) for (k in 1:iter){ boot <- boot1 for (i in 1:boot$pop){ ech <- c(1:boot$nombre,sample(1:boot$nombre, boot$N[i], replace=T, prob=boot$compte[,i]/boot$N[i])) boot$compte[,i] <- as.vector(table(ech))-1 } for (i in 1:2){ if (boot$Nref[i]!=0){ ech <- c(1:boot$nombre,sample(1:boot$nombre, boot$Nref[i], replace=T, prob=boot$ref[,i]/boot$Nref[i])) boot$ref[,i] <- as.vector(table(ech))-1 }} EMboot <- EM(boot) EstBoot[k,] <- as.vector(c(EMboot$p[,2],EMboot$pp)) } res <- apply(EstBoot,2,quantile,probs = c(0.025,0.975)) res[1:2,(boot$pop+1):(boot$pop+boot$test)] <- 1-res[2:1,(boot$pop+1):(boot$pop+boot$test)] dimnames(res) <- list(c("CIinf","CIsup"), c(paste(c("pre"),1:dat$pop,sep=""),paste(c("Sp"),1:dat$test,sep=""),paste(c("Se"),1:dat$test,sep=""))) round(res,4) } # MAXIMUM LIKELIHOOD ML <- function(dat,p,pp) { para <- logit(c(as.vector(p[,1]),as.vector(pp))) res.nlm <- nlm(llik, para, hessian=TRUE, ndigit=10) x <- res.nlm$hessian sol <- res.nlm$estimate if (prod(diag(qr(x)$qr))*(-1)^(ncol(x)-1)==0) {cat('The Matrix is singular : no SE available\n');SE <- rep(NaN,2*dat$test+dat$pop)} else {SE <- sqrt(diag(solve(x)))} p <- inv.logit(sol[1:dat$pop]) p <- cbind(p,(1-p)) pp <- inv.logit(sol[(dat$pop+1):(dat$pop+2*dat$test)]) pp <- array(pp,dim=c(dat$test,2)) sol <- rbind(sol,sol - 1.96*SE,sol + 1.96*SE) sol <- inv.logit(sol) sol[,1:(dat$pop+dat$test)] <- 1-sol[,1:(dat$pop+dat$test)] sol[3:2,1:(dat$pop+dat$test)] <- sol[2:3,1:(dat$pop+dat$test)] dimnames(sol) <- list(c("Est","CIinf","CIsup"), c(paste(c("pre"),1:dat$pop,sep=""),paste(c("Sp"),1:dat$test,sep=""),paste(c("Se"),1:dat$test,sep=""))) list(Iterations=res.nlm$iterations,LogLikelihood=-res.nlm$minimum,Estimations=round(sol,4),p=p,pp=pp) } # VRAISSEMBLANCE llik<-function(para) { p <- inv.logit(matrix(c(para[1:dat$pop],-para[1:dat$pop]),ncol=2,byrow=F)) p <- rbind(p,c(1,0),c(0,1)) pp <- inv.logit(array(para[(dat$pop+1):(dat$test*2+dat$pop)],dim=c(dat$test,2))) -sum(cbind(dat$compte,dat$ref)*log(Vref(p,pp) %*% t(p))) } logit <- function(p) {log(p/(1-p))} inv.logit <- function(x) {exp(x)/(1+exp(x))} # CHECK FOR CORRELATION check <- function(pest,ppest,dat){ if (dat$par >= dat$ddl){return(list(ResCor="The number of parameters = df: no meaningful residuals"))} if (dat$test == 1){return(list(ResCor="The number of test=1: no residuals"))} res <- NULL for (p in 1:dat$pop){ cor <- lab <- mue <- NULL muo <- apply(dat$compte[,p]*dat$a,2,sum)/sum(dat$compte[,p]) for (i in 1:dat$test) {mue[i] <- pest[p,1]*(ppest[i,1])+pest[p,2]*ppest[i,2]} for (i in 1:(dat$test-1)) { for (j in (i+1):dat$test) { obs <- sum(dat$a[,i]*dat$a[,j]*dat$compte[,p])/sum(dat$compte[,p]) obs <- (obs-muo[i]*muo[j])/sqrt(muo[i]*(1-muo[i])*muo[j]*(1-muo[j])) att <- pest[p,1]*ppest[i,1]*ppest[j,1]+pest[p,2]*ppest[i,2]*ppest[j,2] att <- (att-mue[i]*mue[j])/sqrt(mue[i]*(1-mue[i])*mue[j]*(1-mue[j])) cor <- c(cor,obs-att) lab <- c(lab,paste("Corr",i,"-",j,sep="")) }} res <- rbind(res,cor) } dimnames(res) <- list(paste("pop",1:dat$pop,":"),lab) list(ResCor=res,Commentary="The residuals should be randomly distributed around 0") } # LIKELIHOOD RATIO LR <- function(p,pp,L.ML,dat){ p <- rbind(p,c(1,0),c(0,1)) Vind <- Vref(p,pp) %*% t(p) Exp <- t(t(Vind[,1:dat$pop])*dat$N) Expref <- t(t(Vind[,(dat$pop+1):(dat$pop+2)])*dat$Nref) tot <- cbind(dat$rec,round(Exp,2),round(Expref,2)) dimnames(tot) <- list(c(1:dat$nombre),c(paste(c("test"),1:dat$test,sep=""),paste(c("pop"),1:dat$pop,sep=""),"RefInd","RefInf",paste(c("ExpPop"),1:dat$pop,sep=""),"ExpRefInd","ExpRefInf")) df <- dat$ddl-dat$par if (dat$par<dat$ddl) { L.Tot <- sum(dat$compte*log(t(t(dat$compte)/dat$N)),na.rm=TRUE)+sum(dat$ref*log(t(t(dat$ref)/dat$Nref)),na.rm=TRUE) Dev <- 2*(L.Tot-L.ML) LRes <- matrix(c(L.Tot,L.ML,Dev,df,p <- 1-pchisq(Dev,df)),nrow=1) dimnames(LRes) <- list(c(""),c("Max LogLikelihood: Achievable","Obtained","Deviance","d.f.","p value")) if (p < 0.05) com <-"The model does not fit: Assumptions may be not justified" else com<-"The model could fit" } if (dat$par>=dat$ddl) { LRes <- "NA" com <-"The number of parameters = df: no goodness-of-fit test possible"} list(Expected=tot,Test=LRes,Commentary=com) } # MODELISATION cat("\n EXPECTATION MAXIMISATION \n") resEMtp <- EM(dat) resEM <- SortEM(dat,resEMtp$p,resEMtp$pp,resEMtp$i) print(resEM[1:3]) cat("\n NEWTON-RAPHSON \n") p <- array(pmax(0.0001,pmin(.9999,resEM$p)),dim=dim(resEM$p)) pp <- array(pmax(0.0001,pmin(.9999,resEM$pp)),dim=dim(resEM$pp)) resML <- ML(dat,p,pp) print(resML[1:3]) cat("\nWARNING 1: test results are assumed to be independent conditional on infection or disease status\n") if ((dat$pop+dat$nbref)>1) cat("WARNING 2: tests are supposed to have constant sensitivity and specificity in all populations\n") cat("\nExpected Results (NR) and Goodness-of-fit test\n") resLR <- LR(resML$p,resML$pp,resML$LogLikelihood,dat) print(resLR) cat("\nResidual correlations between test\n") resCorr <- check(resML$p,resML$pp,dat) print(resCorr) cat("\n BOOTSTRAP CONFIDENCE INTERVALS :",askboot," samples \n") if(askboot!=0){ resboot <- bootfun(dat,askboot,resEMtp$p,resEMtp$pp) print(resboot)} options(warn=as.numeric(ex.warn)) if (jeu=="new"){ cat(paste("your data are stored in \"",dat$name,"\"\n",sep="")) cat("Note that you'll have to Save workspace image before leaving R if you want to use it in a new R session\n")} invisible(list(name=dat$name,resEM=resEM, resML=resML, resLR=resLR, resCorr=resCorr)) }

3. Exemple d'éxécution

> TAGS() Reference(s) population(s) data may be used (population(s) with a known infection status) Evaluation may be used as soon as df>=parameters A goodness-of-fit test and residual correlations are then provided Three sets of data may be used as examples : Hui and Walter (Biometrics, 1980, 36:167-171): Enter hui Saegerman et al (Vet Record, 1999, 145:214-8): Enter sae Handelman's dentistry data (JADA, 1986, 113:751-754): Enter qu If you want to enter new data: Enter new If you want to use loaded data: Enter the name the program has already given to you data Set ? hui Default Best Guess: pre1 pre2 Sp1 Sp2 Se1 Se2 Best Guess 0.2 0.2 0.95 0.95 0.8 0.8 Do you have Best Guess? No : Enter 0 Yes : Enter 1 0 Would you like Bootstrap Confidence intervals (CAUTION: this may be time consumming. It is recommended to ask for it in a second step!) No : Enter 0 Yes : Enter 1 1 How many iterations? (between 50 and 5000) 5000 DATA SUMMARY 2 Population(s); 2 Tests; 0 Reference Population(s) df: 6 ; parameters: 6 test1 test2 pop1 pop2 RefInd RefInf 1 0 0 528 367 0 0 2 1 0 4 31 0 0 3 0 1 9 37 0 0 4 1 1 14 887 0 0 pre1 pre2 Sp1 Sp2 Se1 Se2 Best Guess 0.2 0.2 0.95 0.95 0.8 0.8 EXPECTATION MAXIMISATION $Iterations [1] 27 $LogLikelihood [1] -1207.617 $Estimations pre1 pre2 Sp1 Sp2 Se1 Se2 Est 0.0268 0.7168 0.9933 0.9841 0.9661 0.9688 NEWTON-RAPHSON $Iterations [1] 1 $LogLikelihood [1] -1207.617 $Estimations pre1 pre2 Sp1 Sp2 Se1 Se2 Est 0.0268 0.7168 0.9933 0.9841 0.9661 0.9688 CIinf 0.0159 0.6911 0.9797 0.9684 0.9495 0.9540 CIsup 0.0450 0.7412 0.9978 0.9921 0.9774 0.9790 WARNING 1: test results are assumed to be independent conditional on infection or disease status WARNING 2: tests are supposed to have constant sensitivity and specificity in all populations Expected Results (NR) and Goodness-of-fit test $Expected test1 test2 pop1 pop2 RefInd RefInf ExpPop1 ExpPop2 ExpRefInd ExpRefInf 1 0 0 528 367 0 0 528 367 0 0 2 1 0 4 31 0 0 4 31 0 0 3 0 1 9 37 0 0 9 37 0 0 4 1 1 14 887 0 0 14 887 0 0 $Test [1] "NA" $Commentary [1] "The number of parameters = df: no goodness-of-fit test possible" Residual correlations between test $ResCor [1] "The number of parameters = df: no meaningful residuals" BOOTSTRAP CONFIDENCE INTERVALS : 5000 samples pre1 pre2 Sp1 Sp2 Se1 Se2 CIinf 0.0135 0.6917 0.9855 0.9727 0.9520 0.9565 CIsup 0.0414 0.7414 0.9994 0.9939 0.9792 0.9806

Retour à la page principale de (gH)