Logo Search packages:      
Sourcecode: rpart version File versions


/* SCCS @(#)node.h      1.3 12/13/99 */
** definition of a node in the tree
#ifndef FLOAT
#define FLOAT float   /*see comments in rpart.h */

#ifdef MAINRP
#define EXTERN
#define EXTERN extern

struct split {
    double improve;
    double adj;           /* for surrogates only, adjusted agreement */
    FLOAT spoint;    /*only used if it is continuous */
    struct split *nextsplit;
    int var_num;
    int count;
    int csplit[2];     /*the actual length will be longer for a categorical */
    };                 /*   predictor with >2 levels */

struct node {
    double  risk;       /*risk for the node */
    double complexity;  /* complexity at which it will collapse */
    double sum_wt;      /* sum of the weights for the node  */
    struct split *primary;
    struct split *surrogate;
    struct node *rightson;
    struct node *leftson;
    int num_obs;
    int lastsurrogate;
    double response_est[2];  /*actual length depends on splitting rule */
EXTERN int nodesize;

struct cptable {
    double cp;
    double risk;
    double xrisk;
    double xstd;
    int nsplit;
    struct cptable *forward;
    struct cptable *back;
    }  ;

**  Split:
**      variable number of the split; 0 = no more surrogates (or primaries)
**      split point: the actual split point for a continuous
**      improve:  For primary splits, the iprovement index returned by the
**                 bsplit routine.  This is the measure that determines the
**                 winning split.
**                For surrogate splits, this holds the error rate, i.e., the
**                 % incorrect guesses of the primary by using this surrogate.
**      count: The number of observations split using this variable.  For the
**             first primary, this will = the number of non-missing values.
**             For surrogates, it will be the number missing in the primary
**             and all earlier surrogates but not missing on this one.  (For
**             all primaries but the first, the number is theoretical).
**    adj:  Let "maj" be the %agreement for going with the majority,
**                and "agree" the %agreement for this surrogate.  The
**                adjusted value is (agree - maj)/(1-maj); the amount of
**                the potential improvement actually realized.  The denominator
**                for both percents depends on the sur_agree option.
**      csplit[0]:   For a continuous variable, we also need to know the
**                    direction of the split.  We use this "extra" variable
**                    as 1: <x to the left, -1: <x to the right.
**      csplit[]:    For a categorical, the labels are LEFT, RIGHT, and
**                    0=missing.  (Even if a particular category is not empty,
**                    there may be no subjects from that category present
**                    at a particular split further down the tree).
**  Node:
**      num_obs: Number of observations in the node.
**      response_est: From the eval routine.  Estimate of the response, if
**                      this node were terminal.
**      risk: From the eval routine. Estimate of risk, if this node were
**                      terminal.
**      complexity: On the way down, it holds equation 5.18.  On the way up
**              it holds a provisional C.P. (The actual C.P. for each node
**              will be the minimum of this number and the provisional C.P.
**              of all nodes above it.  One more pass downward can establish
**              the proper C.P.).
**      lastsurrogate: Which direction to send obs for which the primary and
**              all the surrogates are missing.  (The child with the greatest
**          sum of weights).

Generated by  Doxygen 1.6.0   Back to index