You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1218 lines
40 KiB

"""
Testing for the partial dependence module.
"""
import re
import warnings
import numpy as np
import pytest
import sklearn
from sklearn.base import BaseEstimator, ClassifierMixin, clone, is_regressor
from sklearn.cluster import KMeans
from sklearn.compose import make_column_transformer
from sklearn.datasets import load_iris, make_classification, make_regression
from sklearn.dummy import DummyClassifier
from sklearn.ensemble import (
GradientBoostingClassifier,
GradientBoostingRegressor,
HistGradientBoostingClassifier,
HistGradientBoostingRegressor,
RandomForestRegressor,
)
from sklearn.exceptions import NotFittedError
from sklearn.impute import SimpleImputer
from sklearn.inspection import partial_dependence
from sklearn.inspection._partial_dependence import (
_grid_from_X,
_partial_dependence_brute,
_partial_dependence_recursion,
)
from sklearn.linear_model import LinearRegression, LogisticRegression, MultiTaskLasso
from sklearn.metrics import r2_score
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import (
OneHotEncoder,
PolynomialFeatures,
RobustScaler,
StandardScaler,
scale,
)
from sklearn.tree import DecisionTreeRegressor
from sklearn.tree.tests.test_tree import assert_is_subtree
from sklearn.utils._testing import assert_allclose, assert_array_equal
from sklearn.utils.fixes import _IS_32BIT
from sklearn.utils.validation import check_random_state
# toy sample
X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]
y = [-1, -1, -1, 1, 1, 1]
# (X, y), n_targets <-- as expected in the output of partial_dep()
binary_classification_data = (make_classification(n_samples=50, random_state=0), 1)
multiclass_classification_data = (
make_classification(
n_samples=50, n_classes=3, n_clusters_per_class=1, random_state=0
),
3,
)
regression_data = (make_regression(n_samples=50, random_state=0), 1)
multioutput_regression_data = (
make_regression(n_samples=50, n_targets=2, random_state=0),
2,
)
# iris
iris = load_iris()
@pytest.mark.parametrize(
"Estimator, method, data",
[
(GradientBoostingClassifier, "auto", binary_classification_data),
(GradientBoostingClassifier, "auto", multiclass_classification_data),
(GradientBoostingClassifier, "brute", binary_classification_data),
(GradientBoostingClassifier, "brute", multiclass_classification_data),
(GradientBoostingRegressor, "auto", regression_data),
(GradientBoostingRegressor, "brute", regression_data),
(DecisionTreeRegressor, "brute", regression_data),
(LinearRegression, "brute", regression_data),
(LinearRegression, "brute", multioutput_regression_data),
(LogisticRegression, "brute", binary_classification_data),
(LogisticRegression, "brute", multiclass_classification_data),
(MultiTaskLasso, "brute", multioutput_regression_data),
],
)
@pytest.mark.parametrize("grid_resolution", (5, 10))
@pytest.mark.parametrize("features", ([1], [1, 2]))
@pytest.mark.parametrize("kind", ("average", "individual", "both"))
@pytest.mark.parametrize("use_custom_values", [True, False])
def test_output_shape(
Estimator, method, data, grid_resolution, features, kind, use_custom_values
):
# Check that partial_dependence has consistent output shape for different
# kinds of estimators:
# - classifiers with binary and multiclass settings
# - regressors
# - multi-task regressors
est = Estimator()
if hasattr(est, "n_estimators"):
est.set_params(n_estimators=2) # speed-up computations
# n_target corresponds to the number of classes (1 for binary classif) or
# the number of tasks / outputs in multi task settings. It's equal to 1 for
# classical regression_data.
(X, y), n_targets = data
n_instances = X.shape[0]
custom_values = None
if use_custom_values:
grid_resolution = 5
custom_values = {f: X[:grid_resolution, f] for f in features}
est.fit(X, y)
result = partial_dependence(
est,
X=X,
features=features,
method=method,
kind=kind,
grid_resolution=grid_resolution,
custom_values=custom_values,
)
pdp, axes = result, result["grid_values"]
expected_pdp_shape = (n_targets, *[grid_resolution for _ in range(len(features))])
expected_ice_shape = (
n_targets,
n_instances,
*[grid_resolution for _ in range(len(features))],
)
if kind == "average":
assert pdp.average.shape == expected_pdp_shape
elif kind == "individual":
assert pdp.individual.shape == expected_ice_shape
else: # 'both'
assert pdp.average.shape == expected_pdp_shape
assert pdp.individual.shape == expected_ice_shape
expected_axes_shape = (len(features), grid_resolution)
assert axes is not None
assert np.asarray(axes).shape == expected_axes_shape
def test_grid_from_X():
# tests for _grid_from_X: sanity check for output, and for shapes.
# Make sure that the grid is a cartesian product of the input (it will use
# the unique values instead of the percentiles)
percentiles = (0.05, 0.95)
grid_resolution = 100
is_categorical = [False, False]
X = np.asarray([[1, 2], [3, 4]])
grid, axes = _grid_from_X(X, percentiles, is_categorical, grid_resolution, {})
assert_array_equal(grid, [[1, 2], [1, 4], [3, 2], [3, 4]])
assert_array_equal(axes, X.T)
# test shapes of returned objects depending on the number of unique values
# for a feature.
rng = np.random.RandomState(0)
grid_resolution = 15
# n_unique_values > grid_resolution
X = rng.normal(size=(20, 2))
grid, axes = _grid_from_X(
X,
percentiles,
is_categorical,
grid_resolution=grid_resolution,
custom_values={},
)
assert grid.shape == (grid_resolution * grid_resolution, X.shape[1])
assert np.asarray(axes).shape == (2, grid_resolution)
assert grid.dtype == X.dtype
# n_unique_values < grid_resolution, will use actual values
n_unique_values = 12
X[n_unique_values - 1 :, 0] = 12345
rng.shuffle(X) # just to make sure the order is irrelevant
grid, axes = _grid_from_X(
X,
percentiles,
is_categorical,
grid_resolution=grid_resolution,
custom_values={},
)
assert grid.shape == (n_unique_values * grid_resolution, X.shape[1])
# axes is a list of arrays of different shapes
assert axes[0].shape == (n_unique_values,)
assert axes[1].shape == (grid_resolution,)
assert grid.dtype == X.dtype
# Check that uses custom_range
X = rng.normal(size=(20, 2))
X[n_unique_values - 1 :, 0] = 12345
col_1_range = [0, 2, 3]
grid, axes = _grid_from_X(
X,
percentiles,
is_categorical=is_categorical,
grid_resolution=grid_resolution,
custom_values={1: col_1_range},
)
assert grid.shape == (n_unique_values * len(col_1_range), X.shape[1])
# axes is a list of arrays of different shapes
assert axes[0].shape == (n_unique_values,)
assert axes[1].shape == (len(col_1_range),)
assert grid.dtype == X.dtype
# Check that grid_resolution does not impact custom_range
X = rng.normal(size=(20, 2))
col_0_range = [0, 2, 3, 4, 5, 6]
grid_resolution = 5
grid, axes = _grid_from_X(
X,
percentiles,
is_categorical=is_categorical,
grid_resolution=grid_resolution,
custom_values={0: col_0_range},
)
assert grid.shape == (grid_resolution * len(col_0_range), X.shape[1])
# axes is a list of arrays of different shapes
assert axes[0].shape == (len(col_0_range),)
assert axes[1].shape == (grid_resolution,)
assert grid.dtype == np.result_type(X, np.asarray(col_0_range).dtype)
X = np.array([[0, "a"], [1, "b"], [2, "c"]])
grid, axes = _grid_from_X(
X,
percentiles,
is_categorical=is_categorical,
grid_resolution=grid_resolution,
custom_values={1: ["a", "b", "c"]},
)
assert grid.dtype == object
@pytest.mark.parametrize(
"grid_resolution",
[
2, # since n_categories > 2, we should not use quantiles resampling
100,
],
)
def test_grid_from_X_with_categorical(grid_resolution):
"""Check that `_grid_from_X` always sample from categories and does not
depend from the percentiles.
"""
pd = pytest.importorskip("pandas")
percentiles = (0.05, 0.95)
is_categorical = [True]
X = pd.DataFrame({"cat_feature": ["A", "B", "C", "A", "B", "D", "E"]})
grid, axes = _grid_from_X(
X,
percentiles,
is_categorical,
grid_resolution=grid_resolution,
custom_values={},
)
assert grid.shape == (5, X.shape[1])
assert axes[0].shape == (5,)
@pytest.mark.parametrize("grid_resolution", [3, 100])
def test_grid_from_X_heterogeneous_type(grid_resolution):
"""Check that `_grid_from_X` always sample from categories and does not
depend from the percentiles.
"""
pd = pytest.importorskip("pandas")
percentiles = (0.05, 0.95)
is_categorical = [True, False]
X = pd.DataFrame(
{
"cat": ["A", "B", "C", "A", "B", "D", "E", "A", "B", "D"],
"num": [1, 1, 1, 2, 5, 6, 6, 6, 6, 8],
}
)
nunique = X.nunique()
grid, axes = _grid_from_X(
X,
percentiles,
is_categorical,
grid_resolution=grid_resolution,
custom_values={},
)
if grid_resolution == 3:
assert grid.shape == (15, 2)
assert axes[0].shape[0] == nunique["num"]
assert axes[1].shape[0] == grid_resolution
else:
assert grid.shape == (25, 2)
assert axes[0].shape[0] == nunique["cat"]
assert axes[1].shape[0] == nunique["cat"]
@pytest.mark.parametrize(
"grid_resolution, percentiles, err_msg",
[
(2, (0, 0.0001), "percentiles are too close"),
(100, (1, 2, 3, 4), "'percentiles' must be a sequence of 2 elements"),
(100, 12345, "'percentiles' must be a sequence of 2 elements"),
(100, (-1, 0.95), r"'percentiles' values must be in \[0, 1\]"),
(100, (0.05, 2), r"'percentiles' values must be in \[0, 1\]"),
(100, (0.9, 0.1), r"percentiles\[0\] must be strictly less than"),
(1, (0.05, 0.95), "'grid_resolution' must be strictly greater than 1"),
],
)
def test_grid_from_X_error(grid_resolution, percentiles, err_msg):
X = np.asarray([[1, 2], [3, 4]])
is_categorical = [False]
with pytest.raises(ValueError, match=err_msg):
_grid_from_X(X, percentiles, is_categorical, grid_resolution, custom_values={})
@pytest.mark.parametrize("target_feature", range(5))
@pytest.mark.parametrize(
"est, method",
[
(LinearRegression(), "brute"),
(GradientBoostingRegressor(random_state=0), "brute"),
(GradientBoostingRegressor(random_state=0), "recursion"),
(HistGradientBoostingRegressor(random_state=0), "brute"),
(HistGradientBoostingRegressor(random_state=0), "recursion"),
],
)
def test_partial_dependence_helpers(est, method, target_feature):
# Check that what is returned by _partial_dependence_brute or
# _partial_dependence_recursion is equivalent to manually setting a target
# feature to a given value, and computing the average prediction over all
# samples.
# This also checks that the brute and recursion methods give the same
# output.
# Note that even on the trainset, the brute and the recursion methods
# aren't always strictly equivalent, in particular when the slow method
# generates unrealistic samples that have low mass in the joint
# distribution of the input features, and when some of the features are
# dependent. Hence the high tolerance on the checks.
X, y = make_regression(random_state=0, n_features=5, n_informative=5)
# The 'init' estimator for GBDT (here the average prediction) isn't taken
# into account with the recursion method, for technical reasons. We set
# the mean to 0 to that this 'bug' doesn't have any effect.
y = y - y.mean()
# Clone is necessary to make the test thread-safe.
est = clone(est).fit(X, y)
# target feature will be set to .5 and then to 123
features = np.array([target_feature], dtype=np.intp)
grid = np.array([[0.5], [123]])
if method == "brute":
pdp, predictions = _partial_dependence_brute(
est, grid, features, X, response_method="auto"
)
else:
pdp = _partial_dependence_recursion(est, grid, features)
mean_predictions = []
for val in (0.5, 123):
X_ = X.copy()
X_[:, target_feature] = val
mean_predictions.append(est.predict(X_).mean())
pdp = pdp[0] # (shape is (1, 2) so make it (2,))
# allow for greater margin for error with recursion method
rtol = 1e-1 if method == "recursion" else 1e-3
assert np.allclose(pdp, mean_predictions, rtol=rtol)
@pytest.mark.parametrize("seed", range(1))
def test_recursion_decision_tree_vs_forest_and_gbdt(seed):
# Make sure that the recursion method gives the same results on a
# DecisionTreeRegressor and a GradientBoostingRegressor or a
# RandomForestRegressor with 1 tree and equivalent parameters.
rng = np.random.RandomState(seed)
# Purely random dataset to avoid correlated features
n_samples = 1000
n_features = 5
X = rng.randn(n_samples, n_features)
y = rng.randn(n_samples) * 10
# The 'init' estimator for GBDT (here the average prediction) isn't taken
# into account with the recursion method, for technical reasons. We set
# the mean to 0 to that this 'bug' doesn't have any effect.
y = y - y.mean()
# set max_depth not too high to avoid splits with same gain but different
# features
max_depth = 5
tree_seed = 0
forest = RandomForestRegressor(
n_estimators=1,
max_features=None,
bootstrap=False,
max_depth=max_depth,
random_state=tree_seed,
)
# The forest will use ensemble.base._set_random_states to set the
# random_state of the tree sub-estimator. We simulate this here to have
# equivalent estimators.
equiv_random_state = check_random_state(tree_seed).randint(np.iinfo(np.int32).max)
gbdt = GradientBoostingRegressor(
n_estimators=1,
learning_rate=1,
criterion="squared_error",
max_depth=max_depth,
random_state=equiv_random_state,
)
tree = DecisionTreeRegressor(max_depth=max_depth, random_state=equiv_random_state)
forest.fit(X, y)
gbdt.fit(X, y)
tree.fit(X, y)
# sanity check: if the trees aren't the same, the PD values won't be equal
try:
assert_is_subtree(tree.tree_, gbdt[0, 0].tree_)
assert_is_subtree(tree.tree_, forest[0].tree_)
except AssertionError:
# For some reason the trees aren't exactly equal on 32bits, so the PDs
# cannot be equal either. See
# https://github.com/scikit-learn/scikit-learn/issues/8853
assert _IS_32BIT, "this should only fail on 32 bit platforms"
return
grid = rng.randn(50).reshape(-1, 1)
for f in range(n_features):
features = np.array([f], dtype=np.intp)
pdp_forest = _partial_dependence_recursion(forest, grid, features)
pdp_gbdt = _partial_dependence_recursion(gbdt, grid, features)
pdp_tree = _partial_dependence_recursion(tree, grid, features)
np.testing.assert_allclose(pdp_gbdt, pdp_tree)
np.testing.assert_allclose(pdp_forest, pdp_tree)
@pytest.mark.parametrize(
"est",
(
GradientBoostingClassifier(random_state=0),
HistGradientBoostingClassifier(random_state=0),
),
)
@pytest.mark.parametrize("target_feature", (0, 1, 2, 3, 4, 5))
def test_recursion_decision_function(est, target_feature):
# Make sure the recursion method (implicitly uses decision_function) has
# the same result as using brute method with
# response_method=decision_function
X, y = make_classification(n_classes=2, n_clusters_per_class=1, random_state=1)
assert np.mean(y) == 0.5 # make sure the init estimator predicts 0 anyway
est = clone(est).fit(X, y)
preds_1 = partial_dependence(
est,
X,
[target_feature],
response_method="decision_function",
method="recursion",
kind="average",
)
preds_2 = partial_dependence(
est,
X,
[target_feature],
response_method="decision_function",
method="brute",
kind="average",
)
assert_allclose(preds_1["average"], preds_2["average"], atol=1e-7)
@pytest.mark.parametrize(
"est",
(
LinearRegression(),
GradientBoostingRegressor(random_state=0),
HistGradientBoostingRegressor(
random_state=0, min_samples_leaf=1, max_leaf_nodes=None, max_iter=1
),
DecisionTreeRegressor(random_state=0),
),
)
@pytest.mark.parametrize("power", (1, 2))
def test_partial_dependence_easy_target(est, power):
# If the target y only depends on one feature in an obvious way (linear or
# quadratic) then the partial dependence for that feature should reflect
# it.
# We here fit a linear regression_data model (with polynomial features if
# needed) and compute r_squared to check that the partial dependence
# correctly reflects the target.
rng = np.random.RandomState(0)
n_samples = 200
target_variable = 2
X = rng.normal(size=(n_samples, 5))
y = X[:, target_variable] ** power
est = clone(est).fit(X, y)
pdp = partial_dependence(
est, features=[target_variable], X=X, grid_resolution=1000, kind="average"
)
new_X = pdp["grid_values"][0].reshape(-1, 1)
new_y = pdp["average"][0]
# add polynomial features if needed
new_X = PolynomialFeatures(degree=power).fit_transform(new_X)
lr = LinearRegression().fit(new_X, new_y)
r2 = r2_score(new_y, lr.predict(new_X))
assert r2 > 0.99
@pytest.mark.parametrize(
"Estimator",
(
sklearn.tree.DecisionTreeClassifier,
sklearn.tree.ExtraTreeClassifier,
sklearn.ensemble.ExtraTreesClassifier,
sklearn.neighbors.KNeighborsClassifier,
sklearn.neighbors.RadiusNeighborsClassifier,
sklearn.ensemble.RandomForestClassifier,
),
)
def test_multiclass_multioutput(Estimator):
# Make sure error is raised for multiclass-multioutput classifiers
# make multiclass-multioutput dataset
X, y = make_classification(n_classes=3, n_clusters_per_class=1, random_state=0)
y = np.array([y, y]).T
est = Estimator()
est.fit(X, y)
with pytest.raises(
ValueError, match="Multiclass-multioutput estimators are not supported"
):
partial_dependence(est, X, [0])
class NoPredictProbaNoDecisionFunction(ClassifierMixin, BaseEstimator):
def fit(self, X, y):
# simulate that we have some classes
self.classes_ = [0, 1]
return self
@pytest.mark.parametrize(
"estimator, params, err_msg",
[
(
KMeans(random_state=0, n_init="auto"),
{"features": [0]},
"'estimator' must be a fitted regressor or classifier",
),
(
LinearRegression(),
{"features": [0], "response_method": "predict_proba"},
"The response_method parameter is ignored for regressors",
),
(
GradientBoostingClassifier(random_state=0),
{
"features": [0],
"response_method": "predict_proba",
"method": "recursion",
},
"'recursion' method, the response_method must be 'decision_function'",
),
(
GradientBoostingClassifier(random_state=0),
{"features": [0], "response_method": "predict_proba", "method": "auto"},
"'recursion' method, the response_method must be 'decision_function'",
),
(
LinearRegression(),
{"features": [0], "method": "recursion", "kind": "individual"},
"The 'recursion' method only applies when 'kind' is set to 'average'",
),
(
LinearRegression(),
{"features": [0], "method": "recursion", "kind": "both"},
"The 'recursion' method only applies when 'kind' is set to 'average'",
),
(
LinearRegression(),
{"features": [0], "method": "recursion"},
"Only the following estimators support the 'recursion' method:",
),
(
LinearRegression(),
{"features": [0, 1], "custom_values": {0: [1, 2, 3], 1: np.ones((3, 3))}},
(
"The custom grid for some features is not a one-dimensional array. "
"Feature 1: 2 dimensions"
),
),
],
)
def test_partial_dependence_error(estimator, params, err_msg):
X, y = make_classification(random_state=0)
estimator = clone(estimator).fit(X, y)
with pytest.raises(ValueError, match=err_msg):
partial_dependence(estimator, X, **params)
@pytest.mark.parametrize(
"estimator", [LinearRegression(), GradientBoostingClassifier(random_state=0)]
)
@pytest.mark.parametrize("features", [-1, 10000])
def test_partial_dependence_unknown_feature_indices(estimator, features):
X, y = make_classification(random_state=0)
estimator = clone(estimator).fit(X, y)
err_msg = "all features must be in"
with pytest.raises(ValueError, match=err_msg):
partial_dependence(estimator, X, [features])
@pytest.mark.parametrize(
"estimator", [LinearRegression(), GradientBoostingClassifier(random_state=0)]
)
def test_partial_dependence_unknown_feature_string(estimator):
pd = pytest.importorskip("pandas")
X, y = make_classification(random_state=0)
df = pd.DataFrame(X)
estimator = clone(estimator).fit(df, y)
features = ["random"]
err_msg = "A given column is not a column of the dataframe"
with pytest.raises(ValueError, match=err_msg):
partial_dependence(estimator, df, features)
@pytest.mark.parametrize(
"estimator", [LinearRegression(), GradientBoostingClassifier(random_state=0)]
)
def test_partial_dependence_X_list(estimator):
# check that array-like objects are accepted
X, y = make_classification(random_state=0)
estimator = clone(estimator).fit(X, y)
partial_dependence(estimator, list(X), [0], kind="average")
def test_warning_recursion_non_constant_init():
# make sure that passing a non-constant init parameter to a GBDT and using
# recursion method yields a warning.
gbc = GradientBoostingClassifier(init=DummyClassifier(), random_state=0)
gbc.fit(X, y)
with pytest.warns(
UserWarning, match="Using recursion method with a non-constant init predictor"
):
partial_dependence(gbc, X, [0], method="recursion", kind="average")
with pytest.warns(
UserWarning, match="Using recursion method with a non-constant init predictor"
):
partial_dependence(gbc, X, [0], method="recursion", kind="average")
def test_partial_dependence_sample_weight_of_fitted_estimator():
# Test near perfect correlation between partial dependence and diagonal
# when sample weights emphasize y = x predictions
# non-regression test for #13193
# TODO: extend to HistGradientBoosting once sample_weight is supported
N = 1000
rng = np.random.RandomState(123456)
mask = rng.randint(2, size=N, dtype=bool)
x = rng.rand(N)
# set y = x on mask and y = -x outside
y = x.copy()
y[~mask] = -y[~mask]
X = np.c_[mask, x]
# sample weights to emphasize data points where y = x
sample_weight = np.ones(N)
sample_weight[mask] = 1000.0
clf = GradientBoostingRegressor(n_estimators=10, random_state=1)
clf.fit(X, y, sample_weight=sample_weight)
pdp = partial_dependence(clf, X, features=[1], kind="average")
assert np.corrcoef(pdp["average"], pdp["grid_values"])[0, 1] > 0.99
def test_hist_gbdt_sw_not_supported():
# TODO: remove/fix when PDP supports HGBT with sample weights
clf = HistGradientBoostingRegressor(random_state=1)
clf.fit(X, y, sample_weight=np.ones(len(X)))
with pytest.raises(
NotImplementedError, match="does not support partial dependence"
):
partial_dependence(clf, X, features=[1])
def test_partial_dependence_pipeline():
# check that the partial dependence support pipeline
iris = load_iris()
scaler = StandardScaler()
clf = DummyClassifier(random_state=42)
pipe = make_pipeline(scaler, clf)
clf.fit(scaler.fit_transform(iris.data), iris.target)
pipe.fit(iris.data, iris.target)
features = 0
pdp_pipe = partial_dependence(
pipe, iris.data, features=[features], grid_resolution=10, kind="average"
)
pdp_clf = partial_dependence(
clf,
scaler.transform(iris.data),
features=[features],
grid_resolution=10,
kind="average",
)
assert_allclose(pdp_pipe["average"], pdp_clf["average"])
assert_allclose(
pdp_pipe["grid_values"][0],
pdp_clf["grid_values"][0] * scaler.scale_[features] + scaler.mean_[features],
)
@pytest.mark.parametrize(
"features, grid_resolution, n_vals_expected",
[
(["a"], 10, 10),
(["a"], 2, 2),
],
)
def test_partial_dependence_binary_model_grid_resolution(
features, grid_resolution, n_vals_expected
):
pd = pytest.importorskip("pandas")
model = DummyClassifier()
rng = np.random.RandomState(0)
X = pd.DataFrame(
{
"a": rng.randint(0, 10, size=100).astype(np.float64),
"b": rng.randint(0, 10, size=100).astype(np.float64),
}
)
y = pd.Series(rng.randint(0, 2, size=100))
model.fit(X, y)
part_dep = partial_dependence(
model,
X,
features=features,
grid_resolution=grid_resolution,
kind="average",
)
assert part_dep["average"].size == n_vals_expected
@pytest.mark.parametrize(
"features, custom_values, n_vals_expected",
[
(["a"], {"a": [1.0, 2.0, 3.0, 4.0]}, 4),
(["a"], {"a": [1.0, 2.0]}, 2),
(["a"], {"a": [1.0]}, 1),
],
)
def test_partial_dependence_binary_model_custom_values(
features, custom_values, n_vals_expected
):
pd = pytest.importorskip("pandas")
model = DummyClassifier()
X = pd.DataFrame({"a": [1.0, 2.0, 3.0, 4.0], "b": [6.0, 7.0, 8.0, 9.0]})
y = pd.Series([0, 1, 0, 1])
model.fit(X, y)
part_dep = partial_dependence(
model,
X,
features=features,
grid_resolution=3,
custom_values=custom_values,
kind="average",
)
assert part_dep["average"].size == n_vals_expected
@pytest.mark.parametrize(
"features, custom_values, n_vals_expected",
[
(["b"], {"b": ["a", "b"]}, 2),
(["b"], {"b": ["a"]}, 1),
(["a", "b"], {"a": [1.0, 2.0], "b": ["a", "b"]}, 4),
],
)
def test_partial_dependence_pipeline_custom_values(
features, custom_values, n_vals_expected
):
pd = pytest.importorskip("pandas")
pl = make_pipeline(
SimpleImputer(strategy="most_frequent"), OneHotEncoder(), DummyClassifier()
)
X = pd.DataFrame({"a": [1.0, 2.0, 3.0, 4.0], "b": ["a", "b", "a", "b"]})
y = pd.Series([0, 1, 0, 1])
pl.fit(X, y)
X_holdout = pd.DataFrame({"a": [1.0, 2.0, 3.0, 4.0], "b": ["a", "b", "a", None]})
part_dep = partial_dependence(
pl,
X_holdout,
features=features,
grid_resolution=3,
custom_values=custom_values,
kind="average",
)
assert part_dep["average"].size == n_vals_expected
@pytest.mark.parametrize(
"estimator",
[
LogisticRegression(max_iter=1000, random_state=0),
GradientBoostingClassifier(random_state=0, n_estimators=5),
],
ids=["estimator-brute", "estimator-recursion"],
)
@pytest.mark.parametrize(
"preprocessor",
[
None,
make_column_transformer(
(StandardScaler(), [iris.feature_names[i] for i in (0, 2)]),
(RobustScaler(), [iris.feature_names[i] for i in (1, 3)]),
),
make_column_transformer(
(StandardScaler(), [iris.feature_names[i] for i in (0, 2)]),
remainder="passthrough",
),
],
ids=["None", "column-transformer", "column-transformer-passthrough"],
)
@pytest.mark.parametrize(
"features",
[[0, 2], [iris.feature_names[i] for i in (0, 2)]],
ids=["features-integer", "features-string"],
)
def test_partial_dependence_dataframe(estimator, preprocessor, features):
# check that the partial dependence support dataframe and pipeline
# including a column transformer
pd = pytest.importorskip("pandas")
df = pd.DataFrame(scale(iris.data), columns=iris.feature_names)
pipe = make_pipeline(preprocessor, clone(estimator))
pipe.fit(df, iris.target)
pdp_pipe = partial_dependence(
pipe, df, features=features, grid_resolution=10, kind="average"
)
# the column transformer will reorder the column when transforming
# we mixed the index to be sure that we are computing the partial
# dependence of the right columns
if preprocessor is not None:
X_proc = clone(preprocessor).fit_transform(df)
features_clf = [0, 1]
else:
X_proc = df
features_clf = [0, 2]
clf = clone(estimator).fit(X_proc, iris.target)
pdp_clf = partial_dependence(
clf,
X_proc,
features=features_clf,
method="brute",
grid_resolution=10,
kind="average",
)
assert_allclose(pdp_pipe["average"], pdp_clf["average"])
if preprocessor is not None:
scaler = preprocessor.named_transformers_["standardscaler"]
assert_allclose(
pdp_pipe["grid_values"][1],
pdp_clf["grid_values"][1] * scaler.scale_[1] + scaler.mean_[1],
)
else:
assert_allclose(pdp_pipe["grid_values"][1], pdp_clf["grid_values"][1])
@pytest.mark.parametrize(
"features, custom_values, expected_pd_shape",
[
(0, None, (3, 10)),
(0, {0: [1.0, 2.0, 3.0]}, (3, 3)),
(iris.feature_names[0], None, (3, 10)),
(iris.feature_names[0], {iris.feature_names[0]: np.array([1.0, 2.0])}, (3, 2)),
([0, 2], None, (3, 10, 10)),
([0, 2], {2: [7, 8, 9, 10]}, (3, 10, 4)),
([iris.feature_names[i] for i in (0, 2)], None, (3, 10, 10)),
(
[iris.feature_names[i] for i in (0, 2)],
{iris.feature_names[2]: [1, 2, 3, 10]},
(3, 10, 4),
),
([iris.feature_names[i] for i in (0, 2)], {2: [1, 2, 3, 10]}, (3, 10, 10)),
(
[iris.feature_names[i] for i in (0, 2, 3)],
{iris.feature_names[2]: [1, 10]},
(3, 10, 2, 10),
),
([True, False, True, False], None, (3, 10, 10)),
],
ids=[
"scalar-int",
"scalar-int-custom-values",
"scalar-str",
"scalar-str-custom-values",
"list-int",
"list-int-custom-values",
"list-str",
"list-str-custom-values",
"list-str-custom-values-incorrect",
"list-str-three-features",
"mask",
],
)
def test_partial_dependence_feature_type(features, custom_values, expected_pd_shape):
# check all possible features type supported in PDP
pd = pytest.importorskip("pandas")
df = pd.DataFrame(iris.data, columns=iris.feature_names)
preprocessor = make_column_transformer(
(StandardScaler(), [iris.feature_names[i] for i in (0, 2)]),
(RobustScaler(), [iris.feature_names[i] for i in (1, 3)]),
)
pipe = make_pipeline(
preprocessor, LogisticRegression(max_iter=1000, random_state=0)
)
pipe.fit(df, iris.target)
pdp_pipe = partial_dependence(
pipe,
df,
features=features,
grid_resolution=10,
kind="average",
custom_values=custom_values,
)
assert pdp_pipe["average"].shape == expected_pd_shape
assert len(pdp_pipe["grid_values"]) == len(pdp_pipe["average"].shape) - 1
@pytest.mark.parametrize(
"estimator",
[
LinearRegression(),
LogisticRegression(),
GradientBoostingRegressor(),
GradientBoostingClassifier(),
],
)
def test_partial_dependence_unfitted(estimator):
X = iris.data
preprocessor = make_column_transformer(
(StandardScaler(), [0, 2]), (RobustScaler(), [1, 3])
)
pipe = make_pipeline(preprocessor, estimator)
with pytest.raises(NotFittedError, match="is not fitted yet"):
partial_dependence(pipe, X, features=[0, 2], grid_resolution=10)
with pytest.raises(NotFittedError, match="is not fitted yet"):
partial_dependence(estimator, X, features=[0, 2], grid_resolution=10)
@pytest.mark.parametrize(
"Estimator, data",
[
(LinearRegression, multioutput_regression_data),
(LogisticRegression, binary_classification_data),
],
)
def test_kind_average_and_average_of_individual(Estimator, data):
est = Estimator()
(X, y), n_targets = data
est.fit(X, y)
pdp_avg = partial_dependence(est, X=X, features=[1, 2], kind="average")
pdp_ind = partial_dependence(est, X=X, features=[1, 2], kind="individual")
avg_ind = np.mean(pdp_ind["individual"], axis=1)
assert_allclose(avg_ind, pdp_avg["average"])
@pytest.mark.parametrize(
"Estimator, data",
[
(LinearRegression, multioutput_regression_data),
(LogisticRegression, binary_classification_data),
],
)
def test_partial_dependence_kind_individual_ignores_sample_weight(Estimator, data):
"""Check that `sample_weight` does not have any effect on reported ICE."""
est = Estimator()
(X, y), n_targets = data
sample_weight = np.arange(X.shape[0])
est.fit(X, y)
pdp_nsw = partial_dependence(est, X=X, features=[1, 2], kind="individual")
pdp_sw = partial_dependence(
est, X=X, features=[1, 2], kind="individual", sample_weight=sample_weight
)
assert_allclose(pdp_nsw["individual"], pdp_sw["individual"])
assert_allclose(pdp_nsw["grid_values"], pdp_sw["grid_values"])
@pytest.mark.parametrize(
"estimator",
[
LinearRegression(),
LogisticRegression(),
RandomForestRegressor(),
GradientBoostingClassifier(),
],
)
@pytest.mark.parametrize("non_null_weight_idx", [0, 1, -1])
def test_partial_dependence_non_null_weight_idx(estimator, non_null_weight_idx):
"""Check that if we pass a `sample_weight` of zeros with only one index with
sample weight equals one, then the average `partial_dependence` with this
`sample_weight` is equal to the individual `partial_dependence` of the
corresponding index.
"""
X, y = iris.data, iris.target
preprocessor = make_column_transformer(
(StandardScaler(), [0, 2]), (RobustScaler(), [1, 3])
)
pipe = make_pipeline(preprocessor, clone(estimator)).fit(X, y)
sample_weight = np.zeros_like(y)
sample_weight[non_null_weight_idx] = 1
pdp_sw = partial_dependence(
pipe,
X,
[2, 3],
kind="average",
sample_weight=sample_weight,
grid_resolution=10,
)
pdp_ind = partial_dependence(pipe, X, [2, 3], kind="individual", grid_resolution=10)
output_dim = 1 if is_regressor(pipe) else len(np.unique(y))
for i in range(output_dim):
assert_allclose(
pdp_ind["individual"][i][non_null_weight_idx],
pdp_sw["average"][i],
)
@pytest.mark.parametrize(
"Estimator, data",
[
(LinearRegression, multioutput_regression_data),
(LogisticRegression, binary_classification_data),
],
)
def test_partial_dependence_equivalence_equal_sample_weight(Estimator, data):
"""Check that `sample_weight=None` is equivalent to having equal weights."""
est = Estimator()
(X, y), n_targets = data
est.fit(X, y)
sample_weight, params = None, {"X": X, "features": [1, 2], "kind": "average"}
pdp_sw_none = partial_dependence(est, **params, sample_weight=sample_weight)
sample_weight = np.ones(len(y))
pdp_sw_unit = partial_dependence(est, **params, sample_weight=sample_weight)
assert_allclose(pdp_sw_none["average"], pdp_sw_unit["average"])
sample_weight = 2 * np.ones(len(y))
pdp_sw_doubling = partial_dependence(est, **params, sample_weight=sample_weight)
assert_allclose(pdp_sw_none["average"], pdp_sw_doubling["average"])
def test_partial_dependence_sample_weight_size_error():
"""Check that we raise an error when the size of `sample_weight` is not
consistent with `X` and `y`.
"""
est = LogisticRegression()
(X, y), n_targets = binary_classification_data
sample_weight = np.ones_like(y)
est.fit(X, y)
with pytest.raises(ValueError, match="sample_weight.shape =="):
partial_dependence(
est, X, features=[0], sample_weight=sample_weight[1:], grid_resolution=10
)
def test_partial_dependence_sample_weight_with_recursion():
"""Check that we raise an error when `sample_weight` is provided with
`"recursion"` method.
"""
est = RandomForestRegressor()
(X, y), n_targets = regression_data
sample_weight = np.ones_like(y)
est.fit(X, y, sample_weight=sample_weight)
with pytest.raises(ValueError, match="'recursion' method can only be applied when"):
partial_dependence(
est, X, features=[0], method="recursion", sample_weight=sample_weight
)
def test_mixed_type_categorical():
"""Check that we raise a proper error when a column has mixed types and
the sorting of `np.unique` will fail."""
X = np.array(["A", "B", "C", np.nan], dtype=object).reshape(-1, 1)
y = np.array([0, 1, 0, 1])
from sklearn.preprocessing import OrdinalEncoder
clf = make_pipeline(
OrdinalEncoder(encoded_missing_value=-1),
LogisticRegression(),
).fit(X, y)
with pytest.raises(ValueError, match="The column #0 contains mixed data types"):
partial_dependence(clf, X, features=[0])
def test_reject_array_with_integer_dtype():
X = np.arange(8).reshape(4, 2)
y = np.array([0, 1, 0, 1])
clf = DummyClassifier()
clf.fit(X, y)
with pytest.warns(
FutureWarning, match=re.escape("The column 0 contains integer data.")
):
partial_dependence(clf, X, features=0)
with pytest.warns(
FutureWarning, match=re.escape("The column 1 contains integer data.")
):
partial_dependence(clf, X, features=[1], categorical_features=[0])
with pytest.warns(
FutureWarning, match=re.escape("The column 0 contains integer data.")
):
partial_dependence(clf, X, features=[0, 1])
# The following should not raise as we do not compute numerical partial
# dependence on integer columns.
with warnings.catch_warnings():
warnings.simplefilter("error")
partial_dependence(clf, X, features=1, categorical_features=[1])
def test_reject_pandas_with_integer_dtype():
pd = pytest.importorskip("pandas")
X = pd.DataFrame(
{
"a": [1.0, 2.0, 3.0],
"b": [1, 2, 3],
"c": [1, 2, 3],
}
)
y = np.array([0, 1, 0])
clf = DummyClassifier()
clf.fit(X, y)
with pytest.warns(
FutureWarning, match=re.escape("The column 'c' contains integer data.")
):
partial_dependence(clf, X, features="c")
with pytest.warns(
FutureWarning, match=re.escape("The column 'c' contains integer data.")
):
partial_dependence(clf, X, features=["a", "c"])
# The following should not raise as we do not compute numerical partial
# dependence on integer columns.
with warnings.catch_warnings():
warnings.simplefilter("error")
partial_dependence(clf, X, features=["a"])
partial_dependence(clf, X, features=["c"], categorical_features=["c"])
def test_partial_dependence_empty_categorical_features():
"""Check that we raise the proper exception when `categorical_features`
is an empty list"""
clf = make_pipeline(StandardScaler(), LogisticRegression())
clf.fit(iris.data, iris.target)
with pytest.raises(
ValueError,
match=re.escape(
"Passing an empty list (`[]`) to `categorical_features` is not "
"supported. Use `None` instead to indicate that there are no "
"categorical features."
),
):
partial_dependence(
estimator=clf, X=iris.data, features=[0], categorical_features=[]
)