Module artemis.additivity

Expand source code
from ._additivity_meter import AdditivityMeter

__all__ = ["AdditivityMeter"]

Classes

class AdditivityMeter (random_state: Optional[int] = None)

AdditivityMeter is a class that calculates the additivity index of a model.

Attributes

additivity_index : float
Additivity index of the model.
full_result : pd.DataFrame
Dataframe with the results of the additivity index calculation. It contains centered partial dependence values and prediction for every observation and feature.
preds : np.ndarray
Predictions for the sampled data.
model : object
Model for which additivity index is calculated.
X_sampled : pd.DataFrame
Sampled data used for calculation.
pd_calculator : PartialDependenceCalculator
Object used to calculate and store partial dependence values.
Expand source code
class AdditivityMeter:
    """
    AdditivityMeter is a class that calculates the additivity index of a model.

    Attributes
    -----------
    additivity_index : float
        Additivity index of the model.
    full_result : pd.DataFrame
        Dataframe with the results of the additivity index calculation. 
        It contains centered partial dependence values and prediction for every observation and feature.
    preds: np.ndarray
        Predictions for the sampled data.
    model : object
        Model for which additivity index is calculated.
    X_sampled: pd.DataFrame
        Sampled data used for calculation.
    pd_calculator : PartialDependenceCalculator
        Object used to calculate and store partial dependence values.    
    """
    def __init__(self, random_state: Optional[int] = None):
        self._random_generator = np.random.default_rng(random_state)
        self.additivity_index = None
        self.full_result = None
        self.pred = None
        self.model = None
        self.X_smapled = None
        self.pd_calculator = None
        self

    def fit(
        self,
        model,
        X: pd.DataFrame,
        n: int = None,
        predict_function: Optional[Callable] = None,
        show_progress: bool = False,
        batchsize: int = 2000,
        pd_calculator: Optional[PartialDependenceCalculator] = None,
    ):
        """ 
        Calculates the additivity index of the given model.

        Parameters
        -----------
        model : object
            Model to calculate additivity index for, should have predict_proba or predict method, or predict_function should be provided. 
        X : pd.DataFrame
            Data used to calculate the additivity index. If n is not None, n rows from X will be sampled. 
        n : int, optional
            Number of samples to be used for calculation of the additivity index. If None, all rows from X will be used. Default is None.
        predict_function : Callable, optional
            Function used to predict model output. It should take model and dataset and outputs predictions. 
            If None, `predict_proba` method will be used if it exists, otherwise `predict` method. Default is None.
        show_progress : bool
            If True, progress bar will be shown. Default is False.
        batchsize : int
            Batch size for calculating partial dependence. Prediction requests are collected until the batchsize is exceeded, 
            then the model is queried for predictions jointly for many observations. It speeds up the operation of the method.
            Default is 2000.
        pd_calculator : PartialDependenceCalculator, optional
            PartialDependenceCalculator object containing partial dependence values for a given model and dataset. 
            Providing this object speeds up the calculation as partial dependence values do not need to be recalculated.
            If None, it will be created from scratch. Default is None.

        Returns
        --------
        additivity_index : float
            Additivity index of the model. Value from [0, 1] interval where 1 means that the model is additive, 
            and 0 means that the model is not additive.
        """
        self.predict_function = get_predict_function(model, predict_function)
        self.model = model
        self.X_sampled = sample_if_not_none(self._random_generator, X, n)

        if pd_calculator is None:
            self.pd_calculator = PartialDependenceCalculator(
                self.model, self.X_sampled, self.predict_function, batchsize
            )
        else:
            if pd_calculator.model != self.model:
                raise ValueError(
                    "Model in PDP calculator is different than the model in the method."
                )
            if not pd_calculator.X.equals(self.X_sampled):
                raise ValueError(
                    "Data in PDP calculator is different than the data in the method."
                )
            self.pd_calculator = pd_calculator

        self.full_result = self.X_sampled.copy()
        self.additivity_index = self._calculate_additivity(show_progress=show_progress)
        return self.additivity_index

    def _calculate_additivity(self, show_progress: bool):
        self.pd_calculator.calculate_pd_single(
            show_progress=show_progress, desc=ProgressInfoLog.CALC_ADD
        )

        self.preds = self.predict_function(self.model, self.X_sampled)
        for var in self.X_sampled.columns:
            self.full_result[var] = self.pd_calculator.get_pd_single(var, self.X_sampled[var].values) - np.mean(self.preds)

        self.full_result = self.full_result
        self.full_result["centered_prediction"] = self.preds - np.mean(self.preds)

        sum_first_order_effects = self.full_result.values[:, :-1].sum(axis=1) + np.mean(self.preds)
        return 1-np.sum((self.preds - sum_first_order_effects)**2) / np.sum((self.full_result["centered_prediction"])**2)

Methods

def fit(self, model, X: pandas.core.frame.DataFrame, n: int = None, predict_function: Optional[Callable] = None, show_progress: bool = False, batchsize: int = 2000, pd_calculator: Optional[artemis._utilities.pd_calculator.PartialDependenceCalculator] = None)

Calculates the additivity index of the given model.

Parameters

model : object
Model to calculate additivity index for, should have predict_proba or predict method, or predict_function should be provided.
X : pd.DataFrame
Data used to calculate the additivity index. If n is not None, n rows from X will be sampled.
n : int, optional
Number of samples to be used for calculation of the additivity index. If None, all rows from X will be used. Default is None.
predict_function : Callable, optional
Function used to predict model output. It should take model and dataset and outputs predictions. If None, predict_proba method will be used if it exists, otherwise predict method. Default is None.
show_progress : bool
If True, progress bar will be shown. Default is False.
batchsize : int
Batch size for calculating partial dependence. Prediction requests are collected until the batchsize is exceeded, then the model is queried for predictions jointly for many observations. It speeds up the operation of the method. Default is 2000.
pd_calculator : PartialDependenceCalculator, optional
PartialDependenceCalculator object containing partial dependence values for a given model and dataset. Providing this object speeds up the calculation as partial dependence values do not need to be recalculated. If None, it will be created from scratch. Default is None.

Returns

additivity_index : float
Additivity index of the model. Value from [0, 1] interval where 1 means that the model is additive, and 0 means that the model is not additive.
Expand source code
def fit(
    self,
    model,
    X: pd.DataFrame,
    n: int = None,
    predict_function: Optional[Callable] = None,
    show_progress: bool = False,
    batchsize: int = 2000,
    pd_calculator: Optional[PartialDependenceCalculator] = None,
):
    """ 
    Calculates the additivity index of the given model.

    Parameters
    -----------
    model : object
        Model to calculate additivity index for, should have predict_proba or predict method, or predict_function should be provided. 
    X : pd.DataFrame
        Data used to calculate the additivity index. If n is not None, n rows from X will be sampled. 
    n : int, optional
        Number of samples to be used for calculation of the additivity index. If None, all rows from X will be used. Default is None.
    predict_function : Callable, optional
        Function used to predict model output. It should take model and dataset and outputs predictions. 
        If None, `predict_proba` method will be used if it exists, otherwise `predict` method. Default is None.
    show_progress : bool
        If True, progress bar will be shown. Default is False.
    batchsize : int
        Batch size for calculating partial dependence. Prediction requests are collected until the batchsize is exceeded, 
        then the model is queried for predictions jointly for many observations. It speeds up the operation of the method.
        Default is 2000.
    pd_calculator : PartialDependenceCalculator, optional
        PartialDependenceCalculator object containing partial dependence values for a given model and dataset. 
        Providing this object speeds up the calculation as partial dependence values do not need to be recalculated.
        If None, it will be created from scratch. Default is None.

    Returns
    --------
    additivity_index : float
        Additivity index of the model. Value from [0, 1] interval where 1 means that the model is additive, 
        and 0 means that the model is not additive.
    """
    self.predict_function = get_predict_function(model, predict_function)
    self.model = model
    self.X_sampled = sample_if_not_none(self._random_generator, X, n)

    if pd_calculator is None:
        self.pd_calculator = PartialDependenceCalculator(
            self.model, self.X_sampled, self.predict_function, batchsize
        )
    else:
        if pd_calculator.model != self.model:
            raise ValueError(
                "Model in PDP calculator is different than the model in the method."
            )
        if not pd_calculator.X.equals(self.X_sampled):
            raise ValueError(
                "Data in PDP calculator is different than the data in the method."
            )
        self.pd_calculator = pd_calculator

    self.full_result = self.X_sampled.copy()
    self.additivity_index = self._calculate_additivity(show_progress=show_progress)
    return self.additivity_index