Module `artemis.importance_methods.model_specific`

Expand source code

from artemis.importance_methods.model_specific._minimal_depth import MinimalDepthImportance
from artemis.importance_methods.model_specific._split_score import SplitScoreImportance

__all__ = ["SplitScoreImportance", "MinimalDepthImportance"]

Classes

class MinimalDepthImportance

Minimal Depth Feature Importance. It applies to tree-based models like Random Forests. It uses data calculated in ConditionalMinimalDepth method from interactions_methods module and so needs to be calculated together.

Importance of a feature is defined as the lowest depth of node using this feature as a split feature in a tree, averaged over all trees.

Attributes

method : str: Method name.
feature_importance : pd.DataFrame: Feature importance values.

References

Constructor for MinimalDepthImportance

Expand source code

class MinimalDepthImportance(FeatureImportanceMethod):
    """
    Minimal Depth Feature Importance.
    It applies to tree-based models like Random Forests.
    It uses data calculated in ConditionalMinimalDepth method from `interactions_methods` module and so needs to be calculated together.

    Importance of a feature is defined as the lowest depth of node using this feature as a split feature in a tree, averaged over all trees.

    Attributes
    ----------
    method : str 
        Method name.
    feature_importance : pd.DataFrame 
        Feature importance values.
        
    References
    ----------
    - https://modeloriented.github.io/randomForestExplainer/
    - https://doi.org/10.1198/jasa.2009.tm08622
    """

    def __init__(self):
        """Constructor for MinimalDepthImportance"""
        super().__init__(ImportanceMethod.MINIMAL_DEPTH_IMPORTANCE)

    def importance(
        self,
        model,  
        tree_id_to_depth_split: dict,
    ) -> pd.DataFrame:
        """Calculates Minimal Depth Feature Importance.

        Parameters
        ----------
        model : object
               Model for which importance will be calculated, should have predict method.
        tree_id_to_depth_split : dict
            Dictionary containing minimal depth of each node in each tree.

        Returns
        -------
        pd.DataFrame
            Result dataframe containing feature importance with columns: "Feature", "Importance"
        """
        _check_preconditions(self.method, tree_id_to_depth_split)

        columns = _make_column_dict(model.feature_names_in_)
        feature_to_depth = defaultdict(list)
        for tree_id in tree_id_to_depth_split.keys():
            depth_tree, split_tree = tree_id_to_depth_split[tree_id]
            for f in split_tree.keys():
                feature_to_depth[f].append(depth_tree[split_tree[f][0]])


        records_result = []
        for f in feature_to_depth.keys():
            records_result.append(
                {"Feature": columns[f], "Importance": np.mean(feature_to_depth[f])}
            )

        self.feature_importance = pd.DataFrame.from_records(
            records_result
        ).sort_values(by="Importance", ignore_index=True)

        return self.feature_importance

    @property
    def importance_ascending_order(self):
        return True

Ancestors

artemis.importance_methods._method.FeatureImportanceMethod

Instance variables

var importance_ascending_order

Expand source code

@property
def importance_ascending_order(self):
    return True

Methods

def importance(self, model, tree_id_to_depth_split: dict) ‑> pandas.core.frame.DataFrame

Calculates Minimal Depth Feature Importance.

Parameters

model : object: Model for which importance will be calculated, should have predict method.
tree_id_to_depth_split : dict: Dictionary containing minimal depth of each node in each tree.

Returns

pd.DataFrame: Result dataframe containing feature importance with columns: "Feature", "Importance"

Expand source code

def importance(
    self,
    model,  
    tree_id_to_depth_split: dict,
) -> pd.DataFrame:
    """Calculates Minimal Depth Feature Importance.

    Parameters
    ----------
    model : object
           Model for which importance will be calculated, should have predict method.
    tree_id_to_depth_split : dict
        Dictionary containing minimal depth of each node in each tree.

    Returns
    -------
    pd.DataFrame
        Result dataframe containing feature importance with columns: "Feature", "Importance"
    """
    _check_preconditions(self.method, tree_id_to_depth_split)

    columns = _make_column_dict(model.feature_names_in_)
    feature_to_depth = defaultdict(list)
    for tree_id in tree_id_to_depth_split.keys():
        depth_tree, split_tree = tree_id_to_depth_split[tree_id]
        for f in split_tree.keys():
            feature_to_depth[f].append(depth_tree[split_tree[f][0]])


    records_result = []
    for f in feature_to_depth.keys():
        records_result.append(
            {"Feature": columns[f], "Importance": np.mean(feature_to_depth[f])}
        )

    self.feature_importance = pd.DataFrame.from_records(
        records_result
    ).sort_values(by="Importance", ignore_index=True)

    return self.feature_importance

class SplitScoreImportance

Split Score Feature Importance. It applies to gradient boosting tree-based models. It can use data calculated in SplitScore method from interactions_methods module and so needs to be calculated together.

Importance of a feature is defined by the metric selected by user (default is sum of gains).

Attributes

method : str: Method name.
feature_importance : pd.DataFrame: Feature importance values.
selected_metric : str: Metric used for calculating importance.

References

https://modeloriented.github.io/EIX/

Constructor for SplitScoreImportance

Expand source code

class SplitScoreImportance(FeatureImportanceMethod):
    """
    Split Score Feature Importance.
    It applies to gradient boosting tree-based models.
        It can use data calculated in SplitScore method from `interactions_methods` module and so needs to be calculated together.

    Importance of a feature is defined by the metric selected by user (default is sum of gains).

    Attributes
    ----------
    method : str 
        Method name.
    feature_importance : pd.DataFrame 
        Feature importance values.
    selected_metric : str
        Metric used for calculating importance.
        
    References
    ----------
    - https://modeloriented.github.io/EIX/
    """

    def __init__(self):
        """Constructor for SplitScoreImportance"""
        super().__init__(ImportanceMethod.SPLIT_SCORE_IMPORTANCE)
        self.selected_metric = None

    def importance(
            self,
            model,
            features: Optional[List[str]] = None,
            selected_metric: str = SplitScoreImportanceMetric.SUM_GAIN,
            show_progress: bool = False,
            trees_df: Optional[pd.DataFrame] = None,
    ):
        """Calculates Split Score Feature Importance.

        Parameters
        ----------
        model : object
             Model for which importance will be calculated, should have predict_proba or predict method, or predict_function should be provided. 
        features : List[str], optional
            List of features for which importance will be calculated. If None, all features from X will be used. Default is None.
        selected_metric : str
            Metric used to calculate feature importance, 
            one of ['sum_gain', 'sum_cover', 'mean_gain', 'mean_cover', 'mean_depth', 
            'mean_weighted_depth', 'root_frequency', 'weighted_root_frequency'].
            Default is 'mean_gain'.
        show_progress : bool
            If True, progress bar will be shown. Default is False.
        trees_df : pd.DataFrame, optional
            DataFrame containing unified structure of the trained trees, can be precalculated by SplitScore method. Default is None.

        Returns
        -------
        pd.DataFrame
            Result dataframe containing feature importance with columns: "Feature", "Importance"
        """
        if trees_df is None:
            trees_df = model.trees_df

        if trees_df["depth"].isnull().values.any():
            trees_df = _calculate_depth(trees_df, show_progress)
        self.full_result = _calculate_all_feature_importance(
            trees_df, features, selected_metric
        )
        self.feature_importance = _select_metric(self.full_result, selected_metric)
        self.selected_metric = selected_metric

        return self.feature_importance

    @property
    def importance_ascending_order(self):
        return self.selected_metric in [SplitScoreImportanceMetric.MEAN_DEPTH,
                                        SplitScoreImportanceMetric.MEAN_WEIGHTED_DEPTH]

Ancestors

artemis.importance_methods._method.FeatureImportanceMethod

Instance variables

var importance_ascending_order

Expand source code

@property
def importance_ascending_order(self):
    return self.selected_metric in [SplitScoreImportanceMetric.MEAN_DEPTH,
                                    SplitScoreImportanceMetric.MEAN_WEIGHTED_DEPTH]

Methods

def importance(self, model, features: Optional[List[str]] = None, selected_metric: str = 'sum_gain', show_progress: bool = False, trees_df: Optional[pandas.core.frame.DataFrame] = None)

Calculates Split Score Feature Importance.

Parameters

model : object: Model for which importance will be calculated, should have predict_proba or predict method, or predict_function should be provided.
features : List[str], optional: List of features for which importance will be calculated. If None, all features from X will be used. Default is None.
selected_metric : str: Metric used to calculate feature importance, one of ['sum_gain', 'sum_cover', 'mean_gain', 'mean_cover', 'mean_depth', 'mean_weighted_depth', 'root_frequency', 'weighted_root_frequency']. Default is 'mean_gain'.
show_progress : bool: If True, progress bar will be shown. Default is False.
trees_df : pd.DataFrame, optional: DataFrame containing unified structure of the trained trees, can be precalculated by SplitScore method. Default is None.

Returns

pd.DataFrame: Result dataframe containing feature importance with columns: "Feature", "Importance"

Expand source code

def importance(
        self,
        model,
        features: Optional[List[str]] = None,
        selected_metric: str = SplitScoreImportanceMetric.SUM_GAIN,
        show_progress: bool = False,
        trees_df: Optional[pd.DataFrame] = None,
):
    """Calculates Split Score Feature Importance.

    Parameters
    ----------
    model : object
         Model for which importance will be calculated, should have predict_proba or predict method, or predict_function should be provided. 
    features : List[str], optional
        List of features for which importance will be calculated. If None, all features from X will be used. Default is None.
    selected_metric : str
        Metric used to calculate feature importance, 
        one of ['sum_gain', 'sum_cover', 'mean_gain', 'mean_cover', 'mean_depth', 
        'mean_weighted_depth', 'root_frequency', 'weighted_root_frequency'].
        Default is 'mean_gain'.
    show_progress : bool
        If True, progress bar will be shown. Default is False.
    trees_df : pd.DataFrame, optional
        DataFrame containing unified structure of the trained trees, can be precalculated by SplitScore method. Default is None.

    Returns
    -------
    pd.DataFrame
        Result dataframe containing feature importance with columns: "Feature", "Importance"
    """
    if trees_df is None:
        trees_df = model.trees_df

    if trees_df["depth"].isnull().values.any():
        trees_df = _calculate_depth(trees_df, show_progress)
    self.full_result = _calculate_all_feature_importance(
        trees_df, features, selected_metric
    )
    self.feature_importance = _select_metric(self.full_result, selected_metric)
    self.selected_metric = selected_metric

    return self.feature_importance