Source code for skeval.evaluators.regression

# Authors: The scikit-autoeval developers
# SPDX-License-Identifier: BSD-3-Clause
from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union, cast
import numpy as np

from sklearn.base import clone
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

from skeval.base import BaseEvaluator
from skeval.utils import check_is_fitted



[docs]
class RegressionEvaluator(BaseEvaluator):
    """Regression-based evaluator for classification models.

    This evaluator estimates the performance of a classification model (e.g.,
    accuracy, precision) without requiring labeled test data. It works by
    training a meta-regressor (e.g., Random Forest) that learns to map
    meta-features, extracted from a classifier's probability distributions,
    to its true performance score on unseen data.

    To use this evaluator, it must first be fitted on a collection of diverse
    datasets using the same model type to learn this mapping robustly.

    Parameters
    ----------
    model : object
        An unfitted classifier instance that will be used as the base model.
        Clones of this model will be trained during the `fit` process and
        the same model type must later be fitted manually before `estimate`.
    meta_regressor : object, default=None
        A regression model implementing `fit` and `predict`. If None, a
        `RandomForestRegressor` with 500 trees is used for each scorer.
    scorer : callable or dict of str -> callable, default=accuracy_score
        The performance metric(s) to estimate. If a dictionary is provided, a
        separate meta-regressor will be trained to estimate each metric.
    verbose : bool, default=False
        If True, prints informational messages during the training of the
        meta-regressors and during estimation.

    Attributes
    ----------
    meta_regressors_ : dict
        A dictionary mapping each scorer's name to its fitted meta-regressor
        instance. This attribute is populated after the `fit` method is called.

        Notes
        -----
        - The `fit` method expects lists of datasets (`x` and `y`), where each
            element is a dataset (features/labels) used to build the meta-dataset.
        - After training the meta-regressors, `fit` will also fit `self.model`
            on the first dataset provided (``x[0], y[0]``). If you prefer to use a
            differently trained final model before calling `estimate`, manually set
            `evaluator.model` to your fitted estimator.
        - The underlying classifier must implement `predict_proba` because the
            evaluator extracts meta-features from predicted probability
            distributions. This evaluator is designed for classification tasks.

    Examples
    --------
    >>> # Authors: The scikit-autoeval developers
    >>> # SPDX-License-Identifier: BSD-3-Clause
    >>>
    >>> # ==============================================================
    >>> # RegressionEvaluator Example
    >>> # ==============================================================
    >>> import pandas as pd
    >>> from sklearn.metrics import accuracy_score, f1_score
    >>> from sklearn.ensemble import RandomForestClassifier
    >>>
    >>> from skeval.evaluators import RegressionEvaluator
    >>> from skeval.utils import get_cv_and_real_scores, print_comparison
    >>>
    >>> def run_regression_eval(verbose=False):
    >>>     # =====================================
    >>>     # 1. Load datasets
    >>>     # =====================================
    >>>     geriatrics = pd.read_csv("./skeval/datasets/geriatria-controle-alzheimerLabel.csv")
    >>>     neurology = pd.read_csv("./skeval/datasets/neurologia-controle-alzheimerLabel.csv")
    >>>
    >>>     # =====================================
    >>>     # 2. Separate features and target
    >>>     # =====================================
    >>>     X1, y1 = geriatrics.drop(columns=["Alzheimer"]), geriatrics["Alzheimer"]
    >>>     X2, y2 = neurology.drop(columns=["Alzheimer"]), neurology["Alzheimer"]
    >>>
    >>>     # =====================================
    >>>     # 3. Define pipeline (KNNImputer + RandomForest)
    >>>     # =====================================
    >>>     model = RandomForestClassifier(n_estimators=180, random_state=42)
    >>>
    >>>     # =====================================
    >>>     # 4. Define scorers and evaluator
    >>>     # =====================================
    >>>     scorers = {
    >>>         "accuracy": accuracy_score,
    >>>         "f1_macro": lambda y, p: f1_score(y, p, average="macro"),
    >>>     }
    >>>
    >>>     evaluator = RegressionEvaluator(model=model, scorer=scorers, verbose=False)
    >>>
    >>>     # =====================================
    >>>     # 5. Fit evaluator using multiple datasets
    >>>     # =====================================
    >>>     evaluator.fit([X1, X2], [y1, y2], n_splits=4)
    >>>
    >>>     # =====================================
    >>>     # 6. Estimate scores for new dataset
    >>>     # =====================================
    >>>     estimated_scores = evaluator.estimate(X2)
    >>>
    >>>     # ======================
    >>>     # 7. Cross-Validation and Real Performance
    >>>     # ======================
    >>>     train_data = X1, y1
    >>>     test_data = X2, y2
    >>>     scores_dict = get_cv_and_real_scores(
    >>>         model=model, scorers=scorers, train_data=train_data, test_data=test_data
    >>>     )
    >>>     cv_scores = scores_dict["cv_scores"]
    >>>     real_scores = scores_dict["real_scores"]
    >>>
    >>>     if verbose:
    >>>         print_comparison(scorers, cv_scores, estimated_scores, real_scores)
    >>>
    >>>     return {"cv": cv_scores, "estimated": estimated_scores, "real": real_scores}
    >>>
    >>> if __name__ == "__main__":
    >>>     results = run_regression_eval(verbose=True)
    """

    def __init__(
        self,
        model: Any,
        scorer: Union[
            Callable[..., float], Dict[str, Callable[..., float]]
        ] = accuracy_score,
        verbose: bool = False,
        meta_regressor: Optional[Any] = None,
    ) -> None:
        super().__init__(model=model, scorer=scorer, verbose=verbose)
        self.meta_regressor = meta_regressor or RandomForestRegressor(
            n_estimators=500, random_state=42
        )
        self.meta_regressors_: Dict[str, Any] = {}


[docs]
    def fit(
        self, x: Sequence[Any], y: Sequence[Any], n_splits: int = 5
    ) -> "RegressionEvaluator":
        """Trains the internal meta-regressor(s) using a single model type.

        This method builds a meta-dataset to train the evaluator. For each dataset,
        it performs multiple random splits to increase the number of meta-examples.

        Parameters
        ----------
        x : list of array-like
            A list of datasets (features) used to train the meta-model.
        y : list of array-like
            A list of labels corresponding to `x`.
        n_splits : int, default=5
            Number of random splits per dataset to generate multiple meta-examples.

        Returns
        -------
        self : object
            The fitted evaluator instance.
        """
        scorers_names: List[str] = self._get_scorer_names()
        meta_targets: Dict[str, List[float]] = {name: [] for name in scorers_names}
        meta_features: List[np.ndarray] = []

        for x_i, y_i in zip(x, y):
            for split in range(n_splits):
                feats, y_holdout_meta, y_pred_holdout = self._generate_meta_example(
                    x_i=x_i, y_i=y_i, split=split
                )
                meta_features.append(feats)

                self._update_meta_targets(
                    y_true=y_holdout_meta,
                    y_pred=y_pred_holdout,
                    meta_targets=meta_targets,
                    scorers_names=scorers_names,
                )

        self.meta_regressors_ = {}

        for name in scorers_names:
            self.meta_regressors_[name] = self._fit_single_meta_regressor(
                name, np.array(meta_features), meta_targets
            )

            if self.verbose:
                print(f"[INFO] Meta-regressor for '{name}' has been trained.")

        self.model.fit(x[0], y[0])
        return self



[docs]
    def estimate(self, x_eval: Any) -> Dict[str, float]:
        """Estimates the performance of the current model on unlabeled data.

        The model assigned to `self.model` must already be a fitted classifier
        (manually trained by the user). This method extracts meta-features from
        its predictions on the unlabeled data `x_eval` and uses the pre-trained
        meta-regressor(s) to predict the performance scores.

        Parameters
        ----------
        x_eval : array-like of shape (n_samples, n_features)
            The unlabeled input data.

        Returns
        -------
        dict
            A dictionary with the estimated scores, where keys are the names
            of the scorers and values are the predicted performance scores.

        Raises
        ------
        RuntimeError
            If the `estimate` method is called before the evaluator has been
            fitted with the `fit` method.
        ValueError
            If `self.model` does not implement `predict_proba`.
        """
        check_is_fitted(self.model)

        if not hasattr(self, "meta_regressors_"):
            raise RuntimeError(
                "The evaluator has not been fitted yet. Call 'fit' before 'estimate'."
            )

        feats = self._extract_metafeatures(estimator=self.model, x=x_eval)
        scores = {}

        for name, reg in self.meta_regressors_.items():
            estimated_score = reg.predict(feats)[0]
            scores[name] = estimated_score
            if self.verbose:
                print(f"[INFO] Estimated {name}: {estimated_score:.4f}")
        return scores


    def _extract_metafeatures(self, estimator: Any, x: Any) -> np.ndarray:
        """Extracts meta-features from a fitted model's predicted probabilities.

        The extracted features include the mean and standard deviation of the
        maximum prediction probabilities (confidence) and the mean and standard
        deviation of the entropy of the probability distributions.

        Parameters
        ----------
        estimator : fitted classifier
            The classifier from which to extract prediction probabilities.
        x : array-like of shape (n_samples, n_features)
            The input data.

        Returns
        -------
        ndarray
            A 2D numpy array of shape (1, n_meta_features) containing the
            extracted features.

        Raises
        ------
        ValueError
            If the provided estimator does not have a `predict_proba` method.
        """
        if not hasattr(estimator, "predict_proba"):
            raise ValueError("The estimator must implement predict_proba.")

        probas = estimator.predict_proba(x)
        max_probs = np.max(probas, axis=1)
        eps = 1e-12
        entropy = -np.sum(probas * np.log(probas + eps), axis=1)

        features = {
            "mean_conf": np.mean(max_probs),
            "std_conf": np.std(max_probs),
            "mean_entropy": np.mean(entropy),
            "std_entropy": np.std(entropy),
        }
        return np.array(list(features.values())).reshape(1, -1)

    def _generate_meta_example(
        self, x_i: Any, y_i: Any, split: int
    ) -> Tuple[np.ndarray, Any, np.ndarray]:
        """Generates a single meta-example from a dataset split."""
        est = clone(self.model)

        stratify_y = self._safe_stratify(y_i)
        x_train, x_hold, y_train, y_hold = train_test_split(
            x_i,
            y_i,
            test_size=0.33,
            random_state=42 + split,
            stratify=stratify_y,
        )
        est.fit(x_train, y_train)

        feats = self._extract_metafeatures(estimator=est, x=x_hold)
        y_pred = est.predict(x_hold)

        return feats.flatten(), y_hold, y_pred

    def _fit_single_meta_regressor(
        self, name: str, meta_features: np.ndarray, meta_targets: Dict[str, List[float]]
    ) -> Any:
        """Fits a single meta-regressor for a given scorer."""
        meta_y = np.array(meta_targets[name])
        reg = clone(self.meta_regressor)
        reg.fit(meta_features, meta_y)
        return reg

    def _update_meta_targets(
        self,
        y_true: Any,
        y_pred: Any,
        meta_targets: Dict[str, List[float]],
        scorers_names: List[str],
    ) -> None:
        """Updates the meta-targets dictionary with new scores."""
        if isinstance(self.scorer, dict):
            for name in scorers_names:
                # scorer[name] is callable
                meta_targets[name].append(float(self.scorer[name](y_true, y_pred)))
        else:
            scorer_fn = cast(Callable[..., float], self.scorer)
            meta_targets["score"].append(float(scorer_fn(y_true, y_pred)))

    def _safe_stratify(self, y: Optional[Any]) -> Optional[Any]:
        if y is None:
            return None
        y = np.asarray(y)

        uniques, counts = np.unique(y, return_counts=True)
        if len(uniques) < 2:
            return None
        if np.min(counts) < 2:
            return None

        return y