Module artemis.importance_methods.model_specific
Expand source code
from artemis.importance_methods.model_specific._minimal_depth import MinimalDepthImportance
from artemis.importance_methods.model_specific._split_score import SplitScoreImportance
__all__ = ["SplitScoreImportance", "MinimalDepthImportance"]
Classes
class MinimalDepthImportance
-
Minimal Depth Feature Importance. It applies to tree-based models like Random Forests. It uses data calculated in ConditionalMinimalDepth method from
interactions_methods
module and so needs to be calculated together.Importance of a feature is defined as the lowest depth of node using this feature as a split feature in a tree, averaged over all trees.
Attributes
method
:str
- Method name.
feature_importance
:pd.DataFrame
- Feature importance values.
References
Constructor for MinimalDepthImportance
Expand source code
class MinimalDepthImportance(FeatureImportanceMethod): """ Minimal Depth Feature Importance. It applies to tree-based models like Random Forests. It uses data calculated in ConditionalMinimalDepth method from `interactions_methods` module and so needs to be calculated together. Importance of a feature is defined as the lowest depth of node using this feature as a split feature in a tree, averaged over all trees. Attributes ---------- method : str Method name. feature_importance : pd.DataFrame Feature importance values. References ---------- - https://modeloriented.github.io/randomForestExplainer/ - https://doi.org/10.1198/jasa.2009.tm08622 """ def __init__(self): """Constructor for MinimalDepthImportance""" super().__init__(ImportanceMethod.MINIMAL_DEPTH_IMPORTANCE) def importance( self, model, tree_id_to_depth_split: dict, ) -> pd.DataFrame: """Calculates Minimal Depth Feature Importance. Parameters ---------- model : object Model for which importance will be calculated, should have predict method. tree_id_to_depth_split : dict Dictionary containing minimal depth of each node in each tree. Returns ------- pd.DataFrame Result dataframe containing feature importance with columns: "Feature", "Importance" """ _check_preconditions(self.method, tree_id_to_depth_split) columns = _make_column_dict(model.feature_names_in_) feature_to_depth = defaultdict(list) for tree_id in tree_id_to_depth_split.keys(): depth_tree, split_tree = tree_id_to_depth_split[tree_id] for f in split_tree.keys(): feature_to_depth[f].append(depth_tree[split_tree[f][0]]) records_result = [] for f in feature_to_depth.keys(): records_result.append( {"Feature": columns[f], "Importance": np.mean(feature_to_depth[f])} ) self.feature_importance = pd.DataFrame.from_records( records_result ).sort_values(by="Importance", ignore_index=True) return self.feature_importance @property def importance_ascending_order(self): return True
Ancestors
- artemis.importance_methods._method.FeatureImportanceMethod
Instance variables
var importance_ascending_order
-
Expand source code
@property def importance_ascending_order(self): return True
Methods
def importance(self, model, tree_id_to_depth_split: dict) ‑> pandas.core.frame.DataFrame
-
Calculates Minimal Depth Feature Importance.
Parameters
model
:object
- Model for which importance will be calculated, should have predict method.
tree_id_to_depth_split
:dict
- Dictionary containing minimal depth of each node in each tree.
Returns
pd.DataFrame
- Result dataframe containing feature importance with columns: "Feature", "Importance"
Expand source code
def importance( self, model, tree_id_to_depth_split: dict, ) -> pd.DataFrame: """Calculates Minimal Depth Feature Importance. Parameters ---------- model : object Model for which importance will be calculated, should have predict method. tree_id_to_depth_split : dict Dictionary containing minimal depth of each node in each tree. Returns ------- pd.DataFrame Result dataframe containing feature importance with columns: "Feature", "Importance" """ _check_preconditions(self.method, tree_id_to_depth_split) columns = _make_column_dict(model.feature_names_in_) feature_to_depth = defaultdict(list) for tree_id in tree_id_to_depth_split.keys(): depth_tree, split_tree = tree_id_to_depth_split[tree_id] for f in split_tree.keys(): feature_to_depth[f].append(depth_tree[split_tree[f][0]]) records_result = [] for f in feature_to_depth.keys(): records_result.append( {"Feature": columns[f], "Importance": np.mean(feature_to_depth[f])} ) self.feature_importance = pd.DataFrame.from_records( records_result ).sort_values(by="Importance", ignore_index=True) return self.feature_importance
class SplitScoreImportance
-
Split Score Feature Importance. It applies to gradient boosting tree-based models. It can use data calculated in SplitScore method from
interactions_methods
module and so needs to be calculated together.Importance of a feature is defined by the metric selected by user (default is sum of gains).
Attributes
method
:str
- Method name.
feature_importance
:pd.DataFrame
- Feature importance values.
selected_metric
:str
- Metric used for calculating importance.
References
Constructor for SplitScoreImportance
Expand source code
class SplitScoreImportance(FeatureImportanceMethod): """ Split Score Feature Importance. It applies to gradient boosting tree-based models. It can use data calculated in SplitScore method from `interactions_methods` module and so needs to be calculated together. Importance of a feature is defined by the metric selected by user (default is sum of gains). Attributes ---------- method : str Method name. feature_importance : pd.DataFrame Feature importance values. selected_metric : str Metric used for calculating importance. References ---------- - https://modeloriented.github.io/EIX/ """ def __init__(self): """Constructor for SplitScoreImportance""" super().__init__(ImportanceMethod.SPLIT_SCORE_IMPORTANCE) self.selected_metric = None def importance( self, model, features: Optional[List[str]] = None, selected_metric: str = SplitScoreImportanceMetric.SUM_GAIN, show_progress: bool = False, trees_df: Optional[pd.DataFrame] = None, ): """Calculates Split Score Feature Importance. Parameters ---------- model : object Model for which importance will be calculated, should have predict_proba or predict method, or predict_function should be provided. features : List[str], optional List of features for which importance will be calculated. If None, all features from X will be used. Default is None. selected_metric : str Metric used to calculate feature importance, one of ['sum_gain', 'sum_cover', 'mean_gain', 'mean_cover', 'mean_depth', 'mean_weighted_depth', 'root_frequency', 'weighted_root_frequency']. Default is 'mean_gain'. show_progress : bool If True, progress bar will be shown. Default is False. trees_df : pd.DataFrame, optional DataFrame containing unified structure of the trained trees, can be precalculated by SplitScore method. Default is None. Returns ------- pd.DataFrame Result dataframe containing feature importance with columns: "Feature", "Importance" """ if trees_df is None: trees_df = model.trees_df if trees_df["depth"].isnull().values.any(): trees_df = _calculate_depth(trees_df, show_progress) self.full_result = _calculate_all_feature_importance( trees_df, features, selected_metric ) self.feature_importance = _select_metric(self.full_result, selected_metric) self.selected_metric = selected_metric return self.feature_importance @property def importance_ascending_order(self): return self.selected_metric in [SplitScoreImportanceMetric.MEAN_DEPTH, SplitScoreImportanceMetric.MEAN_WEIGHTED_DEPTH]
Ancestors
- artemis.importance_methods._method.FeatureImportanceMethod
Instance variables
var importance_ascending_order
-
Expand source code
@property def importance_ascending_order(self): return self.selected_metric in [SplitScoreImportanceMetric.MEAN_DEPTH, SplitScoreImportanceMetric.MEAN_WEIGHTED_DEPTH]
Methods
def importance(self, model, features: Optional[List[str]] = None, selected_metric: str = 'sum_gain', show_progress: bool = False, trees_df: Optional[pandas.core.frame.DataFrame] = None)
-
Calculates Split Score Feature Importance.
Parameters
model
:object
- Model for which importance will be calculated, should have predict_proba or predict method, or predict_function should be provided.
features
:List[str]
, optional- List of features for which importance will be calculated. If None, all features from X will be used. Default is None.
selected_metric
:str
- Metric used to calculate feature importance, one of ['sum_gain', 'sum_cover', 'mean_gain', 'mean_cover', 'mean_depth', 'mean_weighted_depth', 'root_frequency', 'weighted_root_frequency']. Default is 'mean_gain'.
show_progress
:bool
- If True, progress bar will be shown. Default is False.
trees_df
:pd.DataFrame
, optional- DataFrame containing unified structure of the trained trees, can be precalculated by SplitScore method. Default is None.
Returns
pd.DataFrame
- Result dataframe containing feature importance with columns: "Feature", "Importance"
Expand source code
def importance( self, model, features: Optional[List[str]] = None, selected_metric: str = SplitScoreImportanceMetric.SUM_GAIN, show_progress: bool = False, trees_df: Optional[pd.DataFrame] = None, ): """Calculates Split Score Feature Importance. Parameters ---------- model : object Model for which importance will be calculated, should have predict_proba or predict method, or predict_function should be provided. features : List[str], optional List of features for which importance will be calculated. If None, all features from X will be used. Default is None. selected_metric : str Metric used to calculate feature importance, one of ['sum_gain', 'sum_cover', 'mean_gain', 'mean_cover', 'mean_depth', 'mean_weighted_depth', 'root_frequency', 'weighted_root_frequency']. Default is 'mean_gain'. show_progress : bool If True, progress bar will be shown. Default is False. trees_df : pd.DataFrame, optional DataFrame containing unified structure of the trained trees, can be precalculated by SplitScore method. Default is None. Returns ------- pd.DataFrame Result dataframe containing feature importance with columns: "Feature", "Importance" """ if trees_df is None: trees_df = model.trees_df if trees_df["depth"].isnull().values.any(): trees_df = _calculate_depth(trees_df, show_progress) self.full_result = _calculate_all_feature_importance( trees_df, features, selected_metric ) self.feature_importance = _select_metric(self.full_result, selected_metric) self.selected_metric = selected_metric return self.feature_importance