Source code for skeval.evaluators.regression_noise

# Authors: The scikit-autoeval developers
# SPDX-License-Identifier: BSD-3-Clause
from typing import Any, Dict, List, Optional, Sequence, Tuple, Callable, cast

import numpy as np
import pandas as pd
from sklearn.base import clone
from sklearn.model_selection import train_test_split

from skeval.evaluators.regression import RegressionEvaluator



[docs]
class RegressionNoiseEvaluator(RegressionEvaluator):
    """Regression-based evaluator for classification models.

    This evaluator estimates the performance of a classification model (e.g.,
    accuracy, precision) without requiring labeled test data. It works by
    training a meta-regressor (e.g., Random Forest) that learns to map
    meta-features, extracted from a classifier's probability distributions,
    to its true performance score on unseen data.

    To use this evaluator, it must first be fitted on a collection of diverse
    datasets using the same model type to learn this mapping robustly.

    Parameters
    ----------
    model : object
        An unfitted classifier instance that will be used as the base model.
        Clones of this model will be trained during the `fit` process and
        the same model type must later be fitted manually before `estimate`.
    meta_regressor : object, default=None
        A regression model implementing `fit` and `predict`. If None, a
        `RandomForestRegressor` with 500 trees is used for each scorer.
    scorer : callable or dict of str -> callable, default=accuracy_score
        The performance metric(s) to estimate. If a dictionary is provided, a
        separate meta-regressor will be trained to estimate each metric.
    verbose : bool, default=False
        If True, prints informational messages during the training of the
        meta-regressors and during estimation.

    Attributes
    ----------
    meta_regressors_ : dict
        A dictionary mapping each scorer's name to its fitted meta-regressor
        instance. This attribute is populated after the `fit` method is called.

    Examples
    --------
    >>> # Authors: The scikit-autoeval developers
    >>> # SPDX-License-Identifier: BSD-3-Clause
    >>>
    >>> # ==============================================================
    >>> # RegressionNoiseEvaluator Example
    >>> # ==============================================================
    >>> import pandas as pd
    >>> from sklearn.metrics import accuracy_score, f1_score
    >>> from sklearn.ensemble import RandomForestClassifier
    >>>
    >>> from skeval.evaluators import RegressionNoiseEvaluator
    >>> from skeval.utils import get_cv_and_real_scores, print_comparison
    >>>
    >>> def run_regression_noise_eval(verbose=False):
    >>>     # 1. Load datasets
    >>>     geriatrics = pd.read_csv("./skeval/datasets/geriatria-controle-alzheimerLabel.csv")
    >>>     neurology = pd.read_csv("./skeval/datasets/neurologia-controle-alzheimerLabel.csv")
    >>>
    >>>     # 2. Separate features and target
    >>>     X1, y1 = geriatrics.drop(columns=["Alzheimer"]), geriatrics["Alzheimer"]
    >>>     X2, y2 = neurology.drop(columns=["Alzheimer"]), neurology["Alzheimer"]
    >>>
    >>>     # 3. Define pipeline (Optional preprocessing + RandomForest)
    >>>     model = RandomForestClassifier(n_estimators=180, random_state=42)
    >>>
    >>>     # 4. Define scorers and evaluator
    >>>     scorers = {
    >>>         "accuracy": accuracy_score,
    >>>         "f1_macro": lambda y, p: f1_score(y, p, average="macro"),
    >>>     }
    >>>
    >>>     evaluator = RegressionNoiseEvaluator(model=model, scorer=scorers, verbose=False)
    >>>
    >>>     # 5. Fit evaluator using multiple datasets (builds meta-dataset with label noise)
    >>>     evaluator.fit([X1, X2], [y1, y2], n_splits=5)
    >>>
    >>>     # 6. Estimate scores for new dataset
    >>>     estimated_scores = evaluator.estimate(X2)
    >>>
    >>>     # 7. Cross-Validation and Real Performance
    >>>     train_data = X1, y1
    >>>     test_data = X2, y2
    >>>     scores_dict = get_cv_and_real_scores(
    >>>         model=model, scorers=scorers, train_data=train_data, test_data=test_data
    >>>     )
    >>>     cv_scores = scores_dict["cv_scores"]
    >>>     real_scores = scores_dict["real_scores"]
    >>>
    >>>     if verbose:
    >>>         print_comparison(scorers, cv_scores, estimated_scores, real_scores)
    >>>     return {"cv": cv_scores, "estimated": estimated_scores, "real": real_scores}
    >>>
    >>> if __name__ == "__main__":
    >>>     results = run_regression_noise_eval(verbose=True)
    """


[docs]
    def fit(
        self,
        x: Sequence[Any],
        y: Sequence[Any],
        n_splits: int = 5,
        noise_cfg: Optional[Dict[str, int]] = None,
    ) -> "RegressionNoiseEvaluator":
        """Trains the internal meta-regressor(s) using a single model type.

        This method builds a meta-dataset to train the evaluator. For each dataset,
        it performs multiple random splits to increase the number of meta-examples.

        Parameters
        ----------
        x : list of array-like
            A list of datasets (features) used to train the meta-model.
        y : list of array-like
            A list of labels corresponding to `x`.
        n_splits : int, default=5
            Number of random splits per dataset to generate multiple meta-examples.
        noise_cfg : dict, default=None
            Configuration for label noise levels:
            - 'start': int, starting percentage of label noise (inclusive).
            - 'end': int, ending percentage of label noise (inclusive).
            - 'step': int, step size for noise percentage increments.

        Returns
        -------
        self : object
            The fitted evaluator instance.

        Notes
        -----
        - This evaluator extends `RegressionEvaluator` by generating meta-examples
            that incorporate controlled label noise. The `noise_cfg` parameter
            configures the percentage(s) of examples to permute during meta-example
            generation.
        - The `fit` method expects `x` and `y` to be lists of datasets (as in
            `RegressionEvaluator`). Each dataset is processed with multiple splits
            and multiple noise levels to build a larger meta-dataset.
        - After training meta-regressors, `fit` will fit `self.model` on the first
            provided dataset (`x[0], y[0]`). If you wish to use a separately trained
            final model before calling `estimate`, set `evaluator.model` to your
            fitted estimator manually.
        """
        if noise_cfg is None:
            noise_cfg = {"start": 10, "end": 100, "step": 10}

        self._validate_noise_params(noise_cfg)

        scorer_names = self._get_scorer_names()

        meta_cfg = {
            "features": [],
            "targets": {name: [] for name in scorer_names},
            "scorer_names": scorer_names,
        }

        for x_i, y_i in zip(x, y):
            self._process_single_dataset((x_i, y_i), noise_cfg, meta_cfg, n_splits)

        self._train_meta_regressors(meta_cfg)

        # Base model fit (lógica original preservada)
        self.model.fit(x[0], y[0])
        return self


    def _validate_noise_params(self, noise_cfg: Dict[str, int]) -> None:
        has_all_keys = all(key in noise_cfg for key in ["start", "end", "step"])

        if (
            not has_all_keys
            or noise_cfg["start"] < 0
            or noise_cfg["end"] > 100
            or noise_cfg["step"] <= 0
        ):
            raise ValueError(
                "Noise parameters must satisfy: "
                "0 <= start <= end <= 100 and step > 0."
            )

    def _process_single_dataset(
        self,
        train_data: Tuple[Any, Any],
        noise_cfg: Dict[str, int],
        meta_cfg: Dict[str, Any],
        n_splits: int = 5,
    ) -> None:
        """Processes one dataset by generating meta-examples."""
        y_i = train_data[1]
        unique_labels = np.unique(y_i)
        stratify_y = y_i if len(unique_labels) > 1 else None

        for split in range(n_splits):
            self._process_single_split(
                train_data, stratify_y, split, (noise_cfg, meta_cfg)
            )

    def _process_single_split(
        self,
        train_data: Tuple[Any, Any],
        stratify_y: Optional[Any],
        split: int,
        cfg: Tuple[Dict[str, int], Dict[str, Any]],
    ) -> None:
        """Processes each train/holdout split."""
        base_model = clone(self.model)
        noise_cfg, meta_cfg = cfg

        x_train, x_holdout, y_train, y_holdout = train_test_split(
            train_data[0],
            train_data[1],
            test_size=0.33,
            random_state=42 + split,
            stratify=stratify_y,
        )

        base_model.fit(x_train, y_train)
        metafeats = self._extract_metafeatures(base_model, x_holdout)

        for noise_p in range(
            noise_cfg["start"], noise_cfg["end"] + 1, noise_cfg["step"]
        ):
            generation_cfg = {
                "features": meta_cfg["features"],
                "targets": meta_cfg["targets"],
                "scorer_names": meta_cfg["scorer_names"],
                "metafeats": metafeats,
                "noise_p": noise_p,
            }
            self._generate_noise_meta_example(
                base_model, (x_holdout, y_holdout), split, generation_cfg
            )

    def _generate_noise_meta_example(
        self, base_model: Any, holdout: Tuple[Any, Any], split: int, cfg: Dict[str, Any]
    ) -> None:
        """Adds one meta-example (metafeatures + performance target)."""
        x_holdout, y_holdout = holdout
        # Ensure x_holdout is a DataFrame so column-wise permutation works
        if not hasattr(x_holdout, "columns"):
            x_holdout = pd.DataFrame(x_holdout)

        n_noisy = int(len(x_holdout) * (cfg["noise_p"] / 100.0))

        x_noisy = x_holdout.iloc[:n_noisy].copy()
        x_clean = x_holdout.iloc[n_noisy:].copy()

        rng = np.random.default_rng(42 + cfg["noise_p"] + split)

        for col in x_holdout.columns:
            x_noisy[col] = rng.permutation(x_noisy[col].values)

        x_concat = pd.concat([x_noisy, x_clean], axis=0)
        y_pred = base_model.predict(x_concat)

        cfg["features"].append(cfg["metafeats"].flatten())
        self._store_meta_targets(cfg["targets"], cfg["scorer_names"], y_holdout, y_pred)

    def _store_meta_targets(
        self,
        meta_targets: Dict[str, List[float]],
        scorer_names: List[str],
        y_true: Any,
        y_pred: Any,
    ) -> None:
        """Stores score values for one meta-example."""

        if isinstance(self.scorer, dict):
            for name in scorer_names:
                meta_targets[name].append(float(self.scorer[name](y_true, y_pred)))
        else:
            scorer_fn = cast(Callable[..., float], self.scorer)
            meta_targets["score"].append(float(scorer_fn(y_true, y_pred)))

    def _train_meta_regressors(self, meta_cfg: Dict[str, Any]) -> None:
        """Trains one regressor per scorer."""
        meta_features = np.array(meta_cfg["features"])
        self.meta_regressors_ = {}

        for name in meta_cfg["scorer_names"]:
            y_arr = np.array(meta_cfg["targets"][name])
            reg = clone(self.meta_regressor)
            reg.fit(meta_features, y_arr)
            self.meta_regressors_[name] = reg

            if self.verbose:
                print(f"[INFO] Meta-regressor for '{name}' has been trained.")