#include #include #include #include #include "svm_common.h" /* for my_malloc */ /* if x=log(a) and y=log(b), compute log(a+b) : */ #define LOGP(x,y) (((x)>(y))?(x)+log1p(exp((y)-(x))):(y)+log1p(exp((x)-(y)))) #define LOG_0 -100 /* Structures */ /* node for the bayesian tree */ typedef struct node { double lp00 , lp01 , lp10 , lp11; /* the transition probabilities */ double lfx0 , lfx1 , lfy0 , lfy1; /* the phi values for x and y */ double lz0 , lz1; /* the zeta values */ int v , x , y; /* possible values for the node */ int invisible; /* 0 for a normal node 1 for an invisible node (used to simulate higher order trees from binary trees */ struct node *left , *right; } Node; /* Static variables */ static Node *phyloTree; /*********************/ /* Private Functions */ /*********************/ /*Node *nodeNew(p01,p11,invi,l,r) possible mistake in the initialization?? 2006/09/15 */ Node *nodeNew(p11,p01,invi,l,r) /* returns a pointer to a node. p01 and p11 are the probabilities of 1 conditionally to 0 and 1 respectively. */ double p01, p11; int invi; Node *l,*r; { /* Allocate memory with my_malloc */ Node *n = (Node *)my_malloc(sizeof(Node)); /* initialization of transition probabilities (take the log) */ n->lp01 = log(p01); n->lp00 = log(1-p01); n->lp11 = log(p11); n->lp10 = log(1-p11); /* phi and zeta are initialized to zero */ n->lfx0 = 0; n->lfx1 = 0; n->lfy0 = 0; n->lfy1 = 0; n->lz0 = 0; n->lz1 = 0; n->v = 0; n->x = 0; n->y = 0; /* children nodes are initialized */ n->left = l; n->right = r; /* visibility of the node */ n->invisible = invi; return n; } void nodeFree(n) /* Recursive memory release for a node and its descendants */ Node *n; { if (n != NULL) { nodeFree(n->left); nodeFree(n->right); free(n); } } /* Phylogenetic tree initialization */ void phyloTreeInit() { phyloTree=nodeNew(0.9,0.1,0,nodeNew(0.9,0.1,0,nodeNew(0.9,0.1,0,NULL,NULL),nodeNew(0.9,0.1,0,nodeNew(0.9,0.1,0,NULL,NULL),nodeNew(0.9,0.1,0,nodeNew(0.9,0.1,0,nodeNew(0.9,0.1,0,NULL,NULL),nodeNew(0.9,0.1,0,NULL,NULL)),nodeNew(0.9,0.1,1,nodeNew(0.9,0.1,0,NULL,NULL),nodeNew(0.9,0.1,1,nodeNew(0.9,0.1,0,NULL,NULL),nodeNew(0.9,0.1,0,NULL,NULL)))))),nodeNew(0.9,0.1,0,nodeNew(0.9,0.1,1,nodeNew(0.9,0.1,0,nodeNew(0.9,0.1,0,NULL,NULL),nodeNew(0.9,0.1,0,NULL,NULL)),nodeNew(0.9,0.1,1,nodeNew(0.9,0.1,0,NULL,NULL),nodeNew(0.9,0.1,0,nodeNew(0.9,0.1,0,NULL,NULL),nodeNew(0.9,0.1,0,NULL,NULL)))),nodeNew(0.9,0.1,1,nodeNew(0.9,0.1,0,nodeNew(0.9,0.1,0,NULL,NULL),nodeNew(0.9,0.1,0,nodeNew(0.9,0.1,0,NULL,NULL),nodeNew(0.9,0.1,0,nodeNew(0.9,0.1,0,NULL,NULL),nodeNew(0.9,0.1,0,NULL,NULL)))),nodeNew(0.9,0.1,1,nodeNew(0.9,0.1,0,nodeNew(0.9,0.1,0,NULL,NULL),nodeNew(0.9,0.1,0,NULL,NULL)),nodeNew(0.9,0.1,1,nodeNew(0.9,0.1,0,nodeNew(0.9,0.1,0,NULL,NULL),nodeNew(0.9,0.1,0,NULL,NULL)),nodeNew(0.9,0.1,1,nodeNew(0.9,0.1,1,nodeNew(0.9,0.1,0,NULL,NULL),nodeNew(0.9,0.1,0,NULL,NULL)),nodeNew(0.9,0.1,1,nodeNew(0.9,0.1,0,NULL,NULL),nodeNew(0.9,0.1,0,NULL,NULL)))))))); } int treeSize(n) /* returns the number of leaves in the tree rooted in *n */ Node *n; { if (n==NULL) return 0; if ((n->left == NULL) && (n->left == NULL)) return 1; /* in that case n is a leaf */ else return (treeSize(n->left) + treeSize(n->right)); } WORD *fillPhiX(n,current) /* fill the phi_x values recursively */ Node *n; WORD *current; { WORD *l; if (n == NULL) { /* this should not happen for complete trees.. */ perror("Error in leafFill (in kernel.c): I have reached an empty node, which means that either the phylogenetic tree has not been initialized yet, or it is incomplete."); exit(1); } if ((n->left == NULL) && (n->right == NULL)) { /* Case 1: n is a leaf */ /* the value of x at this node is taken from the array of WORD at the current position */ n->x = (int)(current->weight); if (n->x == 0) { /* if x is 0 on the leaf, use the following equations */ n->lfx0 = n->lp00; n->lfx1 = n->lp10; } else { /* if x is 1 on the leaf, use the following equations */ n->lfx0 = n->lp01; n->lfx1 = n->lp11; } /* return the position of the next x value for the next leaf */ return current+1; } else { /* Case 2: n is an internal node */ /* First compute the values on the descendants of the node: */ l = fillPhiX(n->left , current); l = fillPhiX(n->right , l); /* then compute the values of phi_x with the following equations : */ if (n->invisible) { /* for an invisible node phi is the product of the sons phi's */ n->lfx0 = n->left->lfx0 + n->right->lfx0; n->lfx1 = n->left->lfx1 + n->right->lfx1; } else { /* for a visible node use the following equations */ n->lfx0 = LOGP(n->lp00 + n->left->lfx0 + n->right->lfx0 , n->lp01 + n->left->lfx1 + n->right->lfx1); n->lfx1 = LOGP(n->lp10 + n->left->lfx0 + n->right->lfx0 , n->lp11 + n->left->lfx1 + n->right->lfx1); } /* return the index of the next x value */ return l; } } WORD *fillPhiY(n,current) /* fill the phi_y values recursively */ Node *n; WORD *current; { WORD *l; if (n == NULL) { /* this should not happen for complete trees.. */ perror("Error in leafFill (in kernel.c): I have reached an empty node, which means that either the phylogenetic tree has not been initialized yet, or it is incomplete."); exit(1); } if ((n->left == NULL) && (n->right == NULL)) { /* Case 1: n is a leaf */ /* the value of y at this node is taken from the array of WORD at the current position */ n->y = (int)(current->weight); if (n->y == 0) { /* if y is 0 on the leaf, use the following equations */ n->lfy0 = n->lp00; n->lfy1 = n->lp10; } else { /* if y is 1 on the leaf, use the following equations */ n->lfy0 = n->lp01; n->lfy1 = n->lp11; } /* return the position of the next y value for the next leaf */ return current+1; } else { /* Case 2: n is an internal node */ /* First compute the values on the descendants of the node: */ l = fillPhiY(n->left , current); l = fillPhiY(n->right , l); /* then compute the values of phi_y with the following equations : */ if (n->invisible) { /* for an invisible node phi is the product of the sons phi's */ n->lfy0 = n->left->lfy0 + n->right->lfy0; n->lfy1 = n->left->lfy1 + n->right->lfy1; } else { /* for a visible node use the following equations */ n->lfy0 = LOGP(n->lp00 + n->left->lfy0 + n->right->lfy0 , n->lp01 + n->left->lfy1 + n->right->lfy1); n->lfy1 = LOGP(n->lp10 + n->left->lfy0 + n->right->lfy0 , n->lp11 + n->left->lfy1 + n->right->lfy1); } /* return the index of the next y value */ return l; } } void updateZeta(n) /* compute the zeta value in the node n and its descendants from the current values of phi(x) and phi(y) */ Node *n; { double d1 , d2; Node *son; if (n == NULL) { /* this should not happen for complete trees.. */ perror("Error in leafFill (in kernel.c): I have reached an empty node, which means that either the phylogenetic tree has not been initialized yet, or it is incomplete."); exit(1); } if ((n->left == NULL) && (n->right == NULL)) { /* Case 1: n is a leaf */ if ((n->x == 0) && (n->y == 0)) n->lz0 = 0; else n->lz0 = LOG_0; if ((n->x == 1) && (n->y == 1)) n->lz1 = 0; else n->lz1 = LOG_0; return; } else { /* Case 2: n is not a leaf */ /* compute zeta on the descendance */ (void)updateZeta(n->left); (void)updateZeta(n->right); /* computation of log( zeta_0 ) */ son = n->left; if (son->invisible) { d1 = son->lz0; } else { d1 = LOGP(son->lp00 + son->lz0 , son->lp01 + son->lz1); d1 = LOGP(d1 , son->lfx0 + son->lfy0); } son = n->right; if (son->invisible) { d2 = son->lz0; } else { d2 = LOGP(son->lp00 + son->lz0 , son->lp01 + son->lz1); d2 = LOGP(d2 , son->lfx0 + son->lfy0); } n->lz0 = d1 + d2; /* computation of log( zeta_1 ) */ son = n->left; if (son->invisible) { d1 = son->lz1; } else { d1 = LOGP(son->lp10 + son->lz0 , son->lp11 + son->lz1); d1 = LOGP(d1 , son->lfx1 + son->lfy1); } son = n->right; if (son->invisible) { d2 = son->lz1; } else { d2 = LOGP(son->lp10 + son->lz0 , son->lp11 + son->lz1); d2 = LOGP(d2 , son->lfx1 + son->lfy1); } n->lz1 = d1 + d2; return; } } /********************/ /* Public functions */ /********************/ double custom_kernel(kernel_parm,a,b) /* plug in you favorite kernel */ KERNEL_PARM *kernel_parm; DOC *a,*b; { WORD *w; double s; /* For the first call of this function the phylogenetic tree must be initialized as a static variable */ if (phyloTree == NULL) { (void)phyloTreeInit(); if (verbosity >= 1) { fprintf(stdout,"Initializing the phylogenetic tree, with %d node\n",treeSize(phyloTree)); } } if ((a->words->wnum == 0) || (b->words->wnum == 0)) { /* nulle document has a nulle kenel */ return 0; } /* fprintf(stdout,"K(%d%d%d%d,%d%d%d%d) = ",(int)(a->words->weight),(int)((a->words + 1)->weight),(int)((a->words + 2)->weight),(int)((a->words + 3)->weight),(int)(b->words->weight),(int)((b->words + 1)->weight),(int)((b->words + 2)->weight),(int)((b->words + 3)->weight)); */ /* Compute phi(a) and phi(b) at each node of the phylo tree */ w = fillPhiX(phyloTree,a->words); w = fillPhiY(phyloTree,b->words); if (w->wnum) { fprintf(stdout,"Error: it seems that the phylogenetic profile does not fit into the phylogenetic tree... I am still at the coordonate %d\n",w->wnum); exit(1); } /* Compute the zeta values on each node */ updateZeta(phyloTree); /* return the kernel from the values of the counters at the root */ s = LOGP(phyloTree->lp00 + phyloTree->lz0 , phyloTree->lp01 + phyloTree->lz1); s = LOGP(s,phyloTree->lfx0 + phyloTree->lfy0); s = exp(s/10+3); /* fprintf(stdout,"%.10f\n",s); */ return s; }