/*  (gH)   -_-  dico.rex  ;  TimeStamp (unix) : 01 Décembre 2012 vers 23:31 */

parse arg fn .

if fn="" then do /* aide éventuelle */
   say
   say " dico.rex ; création des dictionnaires alphabétiques et fréquentiels d'un texte "
   say
   say " syntaxe : regina dico.rex FICHIER "
   say " exemple : regina dico.rex texte.txt"
   say
   say " les fichiers produits sont dico.tpm et dico.tpo"
   say "  dico.tpm : tri par mot croissant "
   say "  dico.tpo : tri par fréquence décroissante " ;
   say "             puis par mot croissant an cas d'égalité."
   exit
end /* finsi */

rf = fileut("checkFile^"fn)
if rf=-1 then do
   say
   say "Fin du programme du à l'absence du fichier. "
   say
   exit
end /* fin si */

/* si on arrive ici c'est que le fichier existe */

fs1 = "dico.tpm"
rf = fileut("deleteFile^"fs1)

fs2 = "dico.tpo"
rf = fileut("deleteFile^"fs2)

say "Traitement des mots dans :" fn
/* parcours du fichier, comptage des mots */

nbl   = 0 /* lignes                */
nbm   = 0 /* mots en tout          */
nbmd  = 0 /* mots différents       */
nbo.  = 0 /* tableau des comptages */
tdm.  = "" /* tableau des mots "   */

do while lines(fn)>0
   lig  = linein(fn)
   nbl  = nbl + 1
   /* on vire la ponctuation */
   ligp = ""
   do ic=1 to length(lig)
      cc = substr(lig,ic,1)
      select
        when cc="'" then oc = " "
        when cc="," then oc = " "
        when cc="-" then oc = " "
        when cc="?" then oc = " "
        when cc="!" then oc = " "
        when cc="." then oc = " "
        when cc=";" then oc = " "
        otherwise        oc = cc
      end /* fin des cas */
      ligp = ligp || oc
   end /* fin pour im */
   /* donc maintenant on peut compter les mots */
   nbmc = words(ligp)
   nbm  = nbm + nbmc
   do im=1 to nbmc
      mot = word(ligp,im)
      nbo.mot = nbo.mot + 1
      if nbo.mot = 1 then do
         nbmd     = nbmd + 1
         tdm.nbmd = mot
      end /* fin si */
   end /* fin pour im */
end /* fin tant que */
rf = fileut("closeFile^"fn)

verif = 0 /* 0 en normal, 1 en mode debug */

if verif=1 then do idm=1 to nbmd
   mot = tdm.idm
   occ =nbo.mot
   say " mot " idm " = " mot " vu " occ " fois "
end /* fin pour idm */

say "On a vu " nbm " mot(s) sur " nbl "lignes et " nbmd " mots différents."

/* tri par mots : on les met bout à bout pour profiter de triPhrase */

tlm = "" /* tous les mots */

do idm=1 to nbmd
   tlm = tlm tdm.idm
end /* fin pour idm */
tlmt = rexxut("triPhrase^"tlm)

/* on écrit dans fs1 */

do idm=1 to words(tlmt)
   mot = word(tlmt,idm)
   occ = nbo.mot
   call lineout fs1 , rexxut("surncarg^30^"mot) format(occ,5)
end /* fin pour idm */
rf = fileut("closeFile^"fs1)

say "le fichier " fs1 " contient les mots par ordre alphabétique"

/* tri par occurences : on construit des occ_mot que l'on trie via triPhrase */

tlo = "" /* tous les mots */

do idm=1 to nbmd
   mot = tdm.idm
   occ = rexxut("formatZero^"nbo.mot"^6")
   tlo = tlo occ||"_"||mot
end /* fin pour idm */
tlot = rexxut("triPhrase^"tlo)

/* on écrit dans fs1 en retriant les ex-aequo */

nbop = 0  /* nb d'occurences précédent */
ldm  = "" /* liste des mots */
nbmo = words(tlot)
do idm=1 to nbmo
   om  = word(tlot,nbmo+1-idm)
   parse var om occ "_" mot
   if (idm=1) then do
     nbop = occ
     ldm  = mot
   end ; else do
     if occ=nbop then do
                     ldm = ldm mot
                 end ; else do
                   /* on affiche les précédents et on prépare la nouvelle liste */
                     call ecritMots fs2 nbop ldm
                     nbop = occ
                     ldm  = mot
                 end /* fin si */
   end /* finsi on est après le premier mot */
end /* fin pour idm */
/* il faut toujours afficher la dernière liste */
call ecritMots fs2 nbop ldm
rf = fileut("closeFile^"fs2)

say "le fichier " fs2 " contient les mots par ordre décroissant d'occurences"
say
exit

ecritMots: procedure
  parse arg fic nboc liste
  ldmt = rexxut("triPhrase^"liste)
  do jdm=1 to words(ldmt)
      motc = word(ldmt,jdm)
      call lineout fic , rexxut("surncarg^30^"motc) format(nboc,5)
  end /* finsi */
return