Code-source


00001     #  (gH)   -_-  countsites.r  ;  TimeStamp (unix) : 26 Juin 2013 vers 16:57     

00002          

00003     # source("statgh.r")     

00004          

00005     library(hash)     

00006     library(lubridate)     

00007          

00008     ##############################################################################     

00009          

00010     countSites <- function(rep1="",rep2="") {     

00011          

00012     ##############################################################################     

00013          

00014        if (missing(rep1) | (rep1=="")) {     

00015          cat(" counSites() : fusion de fichiers txt pour compter les sites \n\n")     

00016          cat("syntaxe : countSites(rep1_longevity,rep2_network) \n")     

00017          cat("exemple : countSites(\"results_longevity_module\",\"results_network\") \n")     

00018          cat("\n")     

00019          cat("le mieux est d'exécuter cette fonction dans le répertoire parent de ces deux répertoires\n\n")     

00020          return("")     

00021        } # fin d'aide     

00022          

00023        tdeb <- Sys.time()     

00024          

00025        # ---------------------------------------------------------------------------     

00026          

00027        cat("\n\n")     

00028        cat("COMPTAGE DES SITES dans les répertoires ",rep1," et ",rep2)     

00029        cat("\n\n")     

00030          

00031        # ---------------------------------------------------------------------------     

00032          

00033        nomf1 <- "longevity.csv"     

00034        nomf2 <- "network.csv"     

00035        nomf3 <- "global.csv"     

00036        nomf4 <- "tglobal.csv"     

00037          

00038        tdf1  <- compteFichiers(rep1)     

00039        tdf2  <- compteFichiers(rep2)     

00040          

00041        # ---------------------------------------------------------------------------     

00042          

00043        cnt1  <- compteSite(tdf1,rep1,"L")     

00044        cnt2  <- compteSite(tdf2,rep2,"N")     

00045          

00046        # ---------------------------------------------------------------------------     

00047          

00048        cat("\n",format(Sys.time(),"%X")," Fusion...\n")     

00049        cnt3  <- merge(cnt1,cnt2,by="site",all.x=TRUE,all.y=TRUE)     

00050        cnt3[ is.na(cnt3) ] <- 0     

00051        cnt4  <- t(cnt3[,-1])     

00052        colnames(cnt4)  <- cnt3[,1]     

00053          

00054        # ---------------------------------------------------------------------------     

00055          

00056        write.csv(x=cnt1,file=nomf1,row.names=FALSE)     

00057        write.csv(x=cnt2,file=nomf2,row.names=FALSE)     

00058        write.csv(x=cnt3,file=nomf3,row.names=FALSE)     

00059        write.csv(x=cnt4,file=nomf4,row.names=TRUE)     

00060          

00061        cat("\n")     

00062        cat("FIN DE COMPTAGE DES SITES dans les répertoires ",rep1," et ",rep2,"\n\n")     

00063        tfin <- Sys.time()     

00064        cat(" durée : ",as.duration(tfin-tdeb)," secondes entre ",format(tdeb,"%X")," et ",format(tfin, "%X"))     

00065        cat("\n")     

00066        cat("\n")     

00067        cat(" Fichiers résultats : ",nomf1,dims(cnt1),", ",nomf2,dims(cnt2),",\n ",nomf3,dims(cnt3)," et ",nomf4,dims(cnt4),"\n")     

00068          

00069     } # fin de fonction countSites     

00070          

00071     ##############################################################################     

00072     ##############################################################################     

00073          

00074     fichiersTxt <- function(rep=".") {     

00075          

00076     ##############################################################################     

00077          

00078        fs  <- paste("/tmp/liste.",runif(1,1,10**8),sep="")     

00079        cmd <- paste("(cd ",rep," ; ls *.txt | sort -n > ",fs," ) ",sep="")     

00080        # cat(cmd,"\n")     

00081        system(cmd)     

00082          

00083        return(as.data.frame(read.table(fs,as.is=c(1))))     

00084          

00085     } # fin de fonction fichiersTxt     

00086          

00087     ##############################################################################     

00088          

00089     compteFichiers <- function(rep) {     

00090          

00091     ##############################################################################     

00092          

00093        tdf <- fichiersTxt(rep)     

00094        nbf <- nrow(tdf)     

00095        cat(" il y a ",sprintf("%4d",nbf)," fichiers .txt dans ",rep,"\n")     

00096          

00097        return(tdf)     

00098          

00099     } # fin de fonction compteSite     

00100          

00101     ##############################################################################     

00102          

00103     tdm2df <- function(tdm,nomcol) {     

00104          

00105     ##############################################################################     

00106          

00107       leDf           <- as.data.frame(matrix(nrow=nrow(tdm),ncol=2))     

00108       colnames(leDf) <- c("site",nomcol)     

00109       leDf[,1]       <- row.names(tdm)     

00110       leDf[,2]       <- tdm[,1]     

00111          

00112       return(leDf)     

00113          

00114     } # fin de fonction tdm2df     

00115          

00116     ##############################################################################     

00117          

00118     compteSite <- function(tdf,rep,inirep) {     

00119          

00120     ##############################################################################     

00121          

00122        cat("\n")     

00123        nbf <- nrow(tdf)     

00124        cat(" Traitement des ",sprintf("%4d",nbf)," fichiers .txt dans ",rep,"\n")     

00125        for (idf in (1:nbf)) {     

00126            leFichier  <- tdf[idf,1]     

00127            nomFichier <- paste(rep,"/",leFichier,sep="")     

00128            cat("   fichier ",sprintf("%5d",idf)," / ",nbf," : ", leFichier,"\n")     

00129            lesmots <- sitesVus(nomFichier)     

00130            if (lesmots=="") {     

00131               cat("           pas de site vu\n")     

00132            } else {     

00133               # cat(" on comptabilise les mots de ",lesmots,"\n")     

00134               tdmc      <- analexies(lesmots,FALSE,FALSE,FALSE)$tmots     

00135               idFichier <- paste(inirep,"_",leFichier,sep="")     

00136               tdmcDf    <- tdm2df(tdmc,idFichier)     

00137               if (idf==1) {     

00138                 cntSites <- tdmcDf     

00139               } else {     

00140                 cntSites <- merge(cntSites,tdmcDf,by="site",all.x=TRUE,all.y=TRUE)     

00141               } # fin si     

00142            } # fin si     

00143        } # fin pour idf     

00144        cat("\n")     

00145          

00146        # merge produit des NA au lieu de 0, donc :     

00147          

00148        cntSites[ is.na(cntSites) ] <- 0     

00149          

00150        return(cntSites)     

00151          

00152     } # fin de fonction compteSite     

00153          

00154     ##############################################################################     

00155          

00156     sitesVus <- function(unFichier) {     

00157          

00158     ##############################################################################     

00159          

00160     # on analyse unFichier, produit par PLACE (http://www.dna.affrc.go.jp/PLACE/index.html)     

00161     # le premier site est indiqué après une ligne de "_"     

00162     # lorsqu'on voit plusieurs "--" qui se suivent, il n'y a plus de site à lire     

00163     # chaque site correspond au premier mot de la ligne     

00164          

00165     texte  <- ""     

00166     lignes <- readLines(unFichier)     

00167     nbligs <- length(lignes)     

00168     sites  <- 0     

00169          

00170     for (idl in (1:nbligs)) {     

00171       laLigne <- lignes[idl]     

00172       if (nchar(laLigne)>0) {     

00173          if (substr(laLigne,1,5)=="-----") { sites <- 0 }     

00174          if (sites==1) {     

00175            mots <- strsplit(laLigne," ")     

00176            mot1 <- mots[[1]][1]     

00177            # cat(idl," : on retient juste ",mot1," dans ",laLigne,"\n")     

00178            texte <- paste(texte,mot1)     

00179          } # fin si     

00180          if (substr(laLigne,1,5)=="_____") { sites <- 1 }     

00181      } # fin si     

00182     } # fin pour idl     

00183          

00184     # print(texte)     

00185          

00186     return(texte)     

00187          

00188     } # fin de fonction sitesVus     

00189          

00190     ##############################################################################     

00191          

00192     ##############################################################################     

00193     ##############################################################################     

00194          

00195     # countSites("testmo","testne")     

00196     # cat("\n")     

00197     countSites("results_longevity_module","results_network")     

00198
Listing du fichier countsites_v2.r