home *** CD-ROM | disk | FTP | other *** search
/ Crawly Crypt Collection 1 / crawlyvol1.bin / apps / science / clustalv / upgma.c < prev    next >
C/C++ Source or Header  |  1993-04-11  |  5KB  |  241 lines

  1. #include <stdio.h>
  2. #include <string.h>
  3. #include <stdlib.h>
  4. #include "clustalv.h"
  5.  
  6. /*
  7. *    Prototypes
  8. */
  9.  
  10. extern void *ckalloc(size_t);
  11. extern Boolean read_tree(int *,double *,int *,int *,int *);
  12. extern void warning(char *,...);
  13. void init_upgma(void);
  14. void upgma(int);
  15.  
  16. /*
  17. *    Global variables
  18. */
  19.  
  20. extern FILE *tree;
  21. extern char treename[];
  22. extern double **tmat;
  23.  
  24. static int *combine;
  25. static int *otree_array,*tree_array;
  26. double **smat;
  27. static int *group1,*group2;
  28.  
  29.  
  30. void init_upgma()
  31. {
  32.     register int i;
  33.     
  34.     combine = (int *)ckalloc( (MAXN+1) * sizeof (int) );
  35.     otree_array = (int *)ckalloc( (MAXN+1) * sizeof (int) );
  36.     
  37.     smat = (double **) ckalloc( (MAXN+1) * sizeof (double *) );
  38.     for(i=0;i<MAXN+1;i++)
  39.         smat[i] = (double *)ckalloc( (MAXN+1) * sizeof (double) );
  40.         
  41.     tree_array = (int *)ckalloc( (MAXN+1) * sizeof (int) );
  42.     group1 = (int *)ckalloc( (MAXN+1) * sizeof (int) );
  43.     group2 = (int *)ckalloc( (MAXN+1) * sizeof (int) );
  44. }
  45.  
  46. void upgma(int totseqs)
  47. {
  48.     int i,j,k,sub1,sub2,lowp,highp,ndone,chunks,comv,flag,gp2,iter,n,idummy;
  49.     int m,m2;
  50.     int bottom,top;
  51.     double score,med,acc,dummy;
  52.     
  53.     iter=0;
  54.     
  55.     for(i=1;i<=totseqs;++i) {
  56.         combine[i]=otree_array[i]=0;
  57.         tmat[i][i]=0.0;
  58.     }
  59.     
  60.     for(i=1;i<=totseqs;++i)
  61.         for(j=1;j<=totseqs;++j)
  62.             smat[i][j]=tmat[i][j];
  63.     
  64.     while(TRUE) {
  65.         score = 0.0;
  66.         sub1 = sub2 =0;
  67.     
  68.         for(i=1;i<=totseqs;++i)
  69.             if(combine[i]==0 || combine[i]==i)
  70.                 for(j=1;j<=totseqs;++j) {
  71.                     if(combine[j]!=0 && combine[j]!=j) continue;
  72.                     if(smat[i][j]> score) {
  73.                         score = smat[i][j];
  74.                         sub1 = i;
  75.                         sub2 = j;
  76.                     }
  77.                 }
  78.     
  79.     
  80.         bottom = (sub1<sub2) ? sub1 : sub2;
  81.         top = (sub1>sub2) ? sub1 : sub2;
  82.         for(i=1;i<=totseqs;++i)
  83.             tree_array[i]=0;
  84.     
  85.         if(combine[bottom]==0 && combine[top]==0) {
  86.             combine[bottom]=bottom;
  87.             combine[top]=bottom;
  88.             lowp=highp=0;
  89.             tree_array[bottom]=1;
  90.             tree_array[top]=2;
  91.         }
  92.         else if(combine[bottom]==0 && combine[top]>0) {
  93.             combine[bottom]=bottom;
  94.             lowp=0;
  95.             tree_array[bottom]=1;
  96.             for(i=top;i<=totseqs;++i)
  97.                 if(combine[i]==top) {
  98.                     combine[i]=bottom;
  99.                     tree_array[i]=2;
  100.                 }
  101.         }
  102.         else if(combine[bottom]>0 && combine[top]==0) {
  103.             highp=0;
  104.             for(i=bottom;i<=totseqs;++i)
  105.                 if(combine[i]==bottom)
  106.                     tree_array[i]=1;
  107.             combine[top]=bottom;
  108.             tree_array[top]=2;
  109.         }
  110.         else {
  111.             for(i=1;i<=totseqs;++i) {
  112.                 if(combine[i]==bottom)
  113.                     tree_array[i]=1;
  114.                 if(combine[i]==top) {
  115.                     combine[i]=bottom;
  116.                     tree_array[i]=2;
  117.                 }
  118.             }
  119.         }
  120.     
  121.         m=m2=ndone=0;
  122.         for(i=1;i<=totseqs;++i) {
  123.             if(tree_array[i]==1)
  124.                 ++m;
  125.             if(tree_array[i]==2)
  126.                 ++m2;
  127.             if(combine[i]==bottom) {
  128.                 ++ndone;
  129.                 group1[ndone]=i;
  130.             }
  131.         }
  132.         
  133.         chunks=0;
  134.         
  135.         if(ndone>2) {
  136.             flag=FALSE;
  137.             if(m>1) {
  138.                 tree=fopen(treename,"r");
  139.                 while(TRUE) {
  140.                     if(!read_tree(&n,&dummy,&idummy,&idummy,otree_array)) {
  141.                         flag=TRUE;
  142.                         break;
  143.                     }
  144.                     ++chunks;
  145.                     if(n!=m)
  146.                         continue;
  147.                     else {
  148.                         comv=0;
  149.                         for(i=1;i<=totseqs;++i)
  150.                             if(otree_array[i]>0 && tree_array[i]==1)
  151.                                 ++comv;
  152.                         if(comv!=m)
  153.                             continue;
  154.                         lowp=chunks;
  155.                         break;
  156.                     }
  157.                     break;
  158.                 }
  159.                 fclose(tree);
  160.             }
  161.             if(flag)
  162.                 warning("Dutch Elm Disease. Bad tree");
  163.             
  164.             flag=FALSE;
  165.             chunks=0;
  166.             if(m2>1) {
  167.                 tree=fopen(treename,"r");
  168.                 while(TRUE) {
  169.                     if(!read_tree(&n,&dummy,&idummy,&idummy,otree_array)) {
  170.                         flag=TRUE;
  171.                         break;
  172.                     }
  173.                     ++chunks;
  174.                     if(n!=m2)
  175.                         continue;
  176.                     else {
  177.                         comv=0;
  178.                         for(i=1;i<=totseqs;++i)
  179.                             if(otree_array[i]>0 && tree_array[i]==2)
  180.                                 ++comv;
  181.                         if(comv!=m2)
  182.                             continue;
  183.                         highp=chunks;
  184.                         break;
  185.                     }
  186.                     break;
  187.                 }
  188.                 fclose(tree);
  189.             }
  190.             if(flag)
  191.                 warning("Dutch Elm Disease. Bad tree");
  192.         }
  193.     
  194.         tree=fopen(treename,"a");
  195.         fprintf(tree," %7.1f %3d %3d %3d   ",score,lowp,highp,ndone);
  196.         for(i=1;i<=totseqs;++i)
  197.             fprintf(tree,"%1d",tree_array[i]);
  198.         fprintf(tree,"\n");
  199.         fclose(tree);
  200.     
  201.         for(i=1;i<=totseqs;++i) {
  202.             gp2=0;
  203.             if(combine[i]==bottom)
  204.                 continue;
  205.             else if(combine[i]==0) {
  206.                 gp2=1;
  207.                 group2[1]=i;
  208.             }
  209.             else if(combine[i]==i) {
  210.                 for(j=1;j<=totseqs;++j)
  211.                     if(combine[j]==i) {
  212.                         ++gp2;
  213.                         group2[gp2]=j;
  214.                     }
  215.             }
  216.             else continue;
  217.         
  218.             acc=0.0;
  219.             comv=0;
  220.             for(j=1;j<=gp2;++j)
  221.                 for(k=1;k<=ndone;++k) {
  222.                     acc += tmat[group2[j]][group1[k]];
  223.                     ++comv;
  224.                 }
  225.             med=acc/(double)comv;
  226.             smat[i][bottom]=med;
  227.             smat[bottom][i]=med;
  228.         }
  229.     
  230.         ++iter;
  231.         if(iter>800)
  232.             return;
  233.         flag=FALSE;
  234.         for(i=1;i<=totseqs;++i)
  235.             if(combine[i]!=1)
  236.                 flag=TRUE;
  237.         if(!flag)
  238.             break;
  239.     }
  240. }
  241.