You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1064 lines
42 KiB

import sys
import warnings
import numpy as np
import numpy.testing as npt
import pytest
from pytest import raises as assert_raises
from scipy.integrate import IntegrationWarning
import itertools
from scipy import stats
from .common_tests import (check_normalization, check_moment,
check_mean_expect,
check_var_expect, check_skew_expect,
check_kurt_expect, check_entropy,
check_private_entropy, check_entropy_vect_scale,
check_edge_support, check_named_args,
check_random_state_property,
check_meth_dtype, check_ppf_dtype,
check_cmplx_deriv,
check_pickling, check_rvs_broadcast,
check_freezing, check_munp_expect,)
from scipy.stats._distr_params import distcont
from scipy.stats._distn_infrastructure import rv_continuous_frozen
"""
Test all continuous distributions.
Parameters were chosen for those distributions that pass the
Kolmogorov-Smirnov test. This provides safe parameters for each
distributions so that we can perform further testing of class methods.
These tests currently check only/mostly for serious errors and exceptions,
not for numerically exact results.
"""
# Note that you need to add new distributions you want tested
# to _distr_params
DECIMAL = 5 # specify the precision of the tests # increased from 0 to 5
_IS_32BIT = (sys.maxsize < 2**32)
# Sets of tests to skip.
# Entries sorted by speed (very slow to slow).
# xslow took > 1s; slow took > 0.5s
xslow_test_cont_basic = {'studentized_range', 'kstwo', 'ksone', 'vonmises', 'kappa4',
'recipinvgauss', 'vonmises_line', 'gausshyper',
'rel_breitwigner', 'norminvgauss'}
slow_test_cont_basic = {'crystalball', 'powerlognorm', 'pearson3'}
# test_moments is already marked slow
xslow_test_moments = {'studentized_range', 'ksone', 'vonmises', 'vonmises_line',
'recipinvgauss', 'kstwo', 'kappa4'}
slow_fit_mle = {'exponweib', 'genexpon', 'genhyperbolic', 'johnsonsb',
'kappa4', 'powerlognorm', 'tukeylambda'}
xslow_fit_mle = {'gausshyper', 'ncf', 'ncx2', 'recipinvgauss', 'vonmises_line'}
xfail_fit_mle = {'ksone', 'kstwo', 'truncpareto', 'irwinhall'}
skip_fit_mle = {'levy_stable', 'studentized_range'} # far too slow (>10min)
slow_fit_mm = {'chi2', 'expon', 'lognorm', 'loguniform', 'powerlaw', 'reciprocal'}
xslow_fit_mm = {'argus', 'beta', 'exponpow', 'gausshyper', 'gengamma',
'genhalflogistic', 'geninvgauss', 'gompertz', 'halfgennorm',
'johnsonsb', 'kstwobign', 'ncx2', 'norminvgauss', 'trapezoid',
'truncnorm', 'truncweibull_min', 'wrapcauchy'}
xfail_fit_mm = {'alpha', 'betaprime', 'bradford', 'burr', 'burr12', 'cauchy',
'crystalball', 'dpareto_lognorm', 'exponweib', 'f', 'fisk',
'foldcauchy', 'genextreme', 'genpareto', 'halfcauchy', 'invgamma',
'irwinhall', 'jf_skew_t', 'johnsonsu', 'kappa3', 'kappa4', 'landau',
'levy', 'levy_l', 'loglaplace', 'lomax', 'mielke', 'ncf', 'nct',
'pareto', 'powerlognorm', 'powernorm', 'rel_breitwigner',
'skewcauchy', 't', 'truncexpon', 'truncpareto',
'tukeylambda', 'vonmises', 'vonmises_line'}
skip_fit_mm = {'genexpon', 'genhyperbolic', 'ksone', 'kstwo', 'levy_stable',
'recipinvgauss', 'studentized_range'} # far too slow (>10min)
# These distributions fail the complex derivative test below.
# Here 'fail' mean produce wrong results and/or raise exceptions, depending
# on the implementation details of corresponding special functions.
# cf https://github.com/scipy/scipy/pull/4979 for a discussion.
fails_cmplx = {'argus', 'beta', 'betaprime', 'cauchy', 'chi', 'chi2', 'cosine',
'dgamma', 'dpareto_lognorm', 'dweibull', 'erlang', 'f', 'foldcauchy',
'gamma', 'gausshyper', 'gengamma', 'genhyperbolic',
'geninvgauss', 'gennorm', 'genpareto',
'halfcauchy', 'halfgennorm', 'invgamma', 'irwinhall', 'jf_skew_t',
'ksone', 'kstwo', 'kstwobign', 'landau', 'levy_l', 'loggamma',
'logistic', 'loguniform', 'maxwell', 'nakagami',
'ncf', 'nct', 'ncx2', 'norminvgauss', 'pearson3',
'powerlaw', 'rdist', 'reciprocal', 'rice',
'skewnorm', 't', 'truncpareto', 'truncweibull_min',
'tukeylambda', 'vonmises', 'vonmises_line',
'rv_histogram_instance', 'truncnorm', 'studentized_range',
'johnsonsb', 'halflogistic', 'rel_breitwigner'}
# Slow test_method_with_lists
slow_with_lists = {'studentized_range'}
# rv_histogram instances, with uniform and non-uniform bins;
# stored as (dist, arg) tuples for cases_test_cont_basic
# and cases_test_moments.
histogram_test_instances = []
case1 = {'a': [1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6,
6, 6, 6, 7, 7, 7, 8, 8, 9], 'bins': 8} # equal width bins
case2 = {'a': [1, 1], 'bins': [0, 1, 10]} # unequal width bins
for case, density in itertools.product([case1, case2], [True, False]):
_hist = np.histogram(**case, density=density)
_rv_hist = stats.rv_histogram(_hist, density=density)
histogram_test_instances.append((_rv_hist, tuple()))
def cases_test_cont_basic():
for distname, arg in distcont[:] + histogram_test_instances:
if distname == 'levy_stable': # fails; tested separately
continue
if distname in slow_test_cont_basic:
yield pytest.param(distname, arg, marks=pytest.mark.slow)
elif distname in xslow_test_cont_basic:
yield pytest.param(distname, arg, marks=pytest.mark.xslow)
else:
yield distname, arg
@pytest.mark.parametrize('distname,arg', cases_test_cont_basic())
@pytest.mark.parametrize('sn', [500])
def test_cont_basic(distname, arg, sn, num_parallel_threads):
try:
distfn = getattr(stats, distname)
except TypeError:
distfn = distname
distname = 'rv_histogram_instance'
rng = np.random.default_rng(7654565)
rvs = distfn.rvs(size=sn, *arg, random_state=rng)
m, v = distfn.stats(*arg)
if distname not in {'laplace_asymmetric'}:
# TODO: multiple checks in this function are not robust, tweaking the
# seed above will make different distributions fail.
check_sample_meanvar_(m, v, rvs, rng)
check_cdf_ppf(distfn, arg, distname)
check_sf_isf(distfn, arg, distname)
check_cdf_sf(distfn, arg, distname)
check_ppf_isf(distfn, arg, distname)
check_pdf(distfn, arg, distname)
check_pdf_logpdf(distfn, arg, distname)
check_pdf_logpdf_at_endpoints(distfn, arg, distname)
check_cdf_logcdf(distfn, arg, distname)
check_sf_logsf(distfn, arg, distname)
check_ppf_broadcast(distfn, arg, distname)
alpha = 0.01
if distname == 'rv_histogram_instance':
check_distribution_rvs(distfn.cdf, arg, alpha, rvs)
elif distname != 'geninvgauss':
# skip kstest for geninvgauss since cdf is too slow; see test for
# rv generation in TestGenInvGauss in test_distributions.py
check_distribution_rvs(distname, arg, alpha, rvs)
locscale_defaults = (0, 1)
meths = [distfn.pdf, distfn.logpdf, distfn.cdf, distfn.logcdf,
distfn.logsf]
# make sure arguments are within support
spec_x = {'weibull_max': -0.5, 'levy_l': -0.5,
'pareto': 1.5, 'truncpareto': 3.2, 'tukeylambda': 0.3,
'rv_histogram_instance': 5.0}
x = spec_x.get(distname, 0.5)
if distname == 'invweibull':
arg = (1,)
elif distname == 'ksone':
arg = (3,)
check_named_args(distfn, x, arg, locscale_defaults, meths)
if num_parallel_threads == 1:
check_random_state_property(distfn, arg)
if distname in ['rel_breitwigner'] and _IS_32BIT:
# gh18414
pytest.skip("fails on Linux 32-bit")
else:
check_pickling(distfn, arg)
check_freezing(distfn, arg)
# Entropy
if distname not in ['kstwobign', 'kstwo', 'ncf']:
check_entropy(distfn, arg, distname)
if distfn.numargs == 0:
check_vecentropy(distfn, arg)
if (distfn.__class__._entropy != stats.rv_continuous._entropy
and distname != 'vonmises'):
check_private_entropy(distfn, arg, stats.rv_continuous)
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore", "The occurrence of roundoff error", IntegrationWarning)
warnings.filterwarnings("ignore", "Extremely bad integrand", IntegrationWarning)
warnings.filterwarnings("ignore", "invalid value", RuntimeWarning)
check_entropy_vect_scale(distfn, arg)
check_retrieving_support(distfn, arg)
check_edge_support(distfn, arg)
check_meth_dtype(distfn, arg, meths)
check_ppf_dtype(distfn, arg)
if distname not in fails_cmplx:
check_cmplx_deriv(distfn, arg)
if distname != 'truncnorm':
check_ppf_private(distfn, arg, distname)
def cases_test_cont_basic_fit():
slow = pytest.mark.slow
xslow = pytest.mark.xslow
fail = pytest.mark.skip(reason="Test fails and may be slow.")
skip = pytest.mark.skip(reason="Test too slow to run to completion (>10m).")
for distname, arg in distcont[:] + histogram_test_instances:
for method in ["MLE", "MM"]:
for fix_args in [True, False]:
if method == 'MLE' and distname in slow_fit_mle:
yield pytest.param(distname, arg, method, fix_args, marks=slow)
continue
if method == 'MLE' and distname in xslow_fit_mle:
yield pytest.param(distname, arg, method, fix_args, marks=xslow)
continue
if method == 'MLE' and distname in xfail_fit_mle:
yield pytest.param(distname, arg, method, fix_args, marks=fail)
continue
if method == 'MLE' and distname in skip_fit_mle:
yield pytest.param(distname, arg, method, fix_args, marks=skip)
continue
if method == 'MM' and distname in slow_fit_mm:
yield pytest.param(distname, arg, method, fix_args, marks=slow)
continue
if method == 'MM' and distname in xslow_fit_mm:
yield pytest.param(distname, arg, method, fix_args, marks=xslow)
continue
if method == 'MM' and distname in xfail_fit_mm:
yield pytest.param(distname, arg, method, fix_args, marks=fail)
continue
if method == 'MM' and distname in skip_fit_mm:
yield pytest.param(distname, arg, method, fix_args, marks=skip)
continue
yield distname, arg, method, fix_args
def test_cont_basic_fit_cases():
# Distribution names should not be in multiple MLE or MM sets
assert (len(xslow_fit_mle.union(xfail_fit_mle).union(skip_fit_mle)) ==
len(xslow_fit_mle) + len(xfail_fit_mle) + len(skip_fit_mle))
assert (len(xslow_fit_mm.union(xfail_fit_mm).union(skip_fit_mm)) ==
len(xslow_fit_mm) + len(xfail_fit_mm) + len(skip_fit_mm))
@pytest.mark.parametrize('distname, arg, method, fix_args',
cases_test_cont_basic_fit())
@pytest.mark.parametrize('n_fit_samples', [200])
def test_cont_basic_fit(distname, arg, n_fit_samples, method, fix_args):
try:
distfn = getattr(stats, distname)
except TypeError:
distfn = distname
rng = np.random.RandomState(765456)
rvs = distfn.rvs(size=n_fit_samples, *arg, random_state=rng)
if fix_args:
check_fit_args_fix(distfn, arg, rvs, method)
else:
check_fit_args(distfn, arg, rvs, method)
@pytest.mark.parametrize('distname,arg', cases_test_cont_basic())
def test_rvs_scalar(distname, arg):
# rvs should return a scalar when given scalar arguments (gh-12428)
try:
distfn = getattr(stats, distname)
except TypeError:
distfn = distname
distname = 'rv_histogram_instance'
assert np.isscalar(distfn.rvs(*arg))
assert np.isscalar(distfn.rvs(*arg, size=()))
assert np.isscalar(distfn.rvs(*arg, size=None))
@pytest.mark.thread_unsafe(reason="global rng")
def test_levy_stable_random_state_property():
# levy_stable only implements rvs(), so it is skipped in the
# main loop in test_cont_basic(). Here we apply just the test
# check_random_state_property to levy_stable.
check_random_state_property(stats.levy_stable, (0.5, 0.1))
def cases_test_moments():
fail_normalization = set()
fail_higher = {'ncf'}
fail_moment = {'johnsonsu'} # generic `munp` is inaccurate for johnsonsu
for distname, arg in distcont[:] + histogram_test_instances:
if distname == 'levy_stable':
continue
if distname in xslow_test_moments:
yield pytest.param(distname, arg, True, True, True, True,
marks=pytest.mark.xslow(reason="too slow"))
continue
cond1 = distname not in fail_normalization
cond2 = distname not in fail_higher
cond3 = distname not in fail_moment
marks = list()
# Currently unused, `marks` can be used to add a timeout to a test of
# a specific distribution. For example, this shows how a timeout could
# be added for the 'skewnorm' distribution:
#
# marks = list()
# if distname == 'skewnorm':
# marks.append(pytest.mark.timeout(300))
yield pytest.param(distname, arg, cond1, cond2, cond3,
False, marks=marks)
if not cond1 or not cond2 or not cond3:
# Run the distributions that have issues twice, once skipping the
# not_ok parts, once with the not_ok parts but marked as knownfail
yield pytest.param(distname, arg, True, True, True, True,
marks=[pytest.mark.xfail] + marks)
@pytest.mark.slow
@pytest.mark.parametrize('distname,arg,normalization_ok,higher_ok,moment_ok,'
'is_xfailing',
cases_test_moments())
def test_moments(distname, arg, normalization_ok, higher_ok, moment_ok,
is_xfailing):
try:
distfn = getattr(stats, distname)
except TypeError:
distfn = distname
distname = 'rv_histogram_instance'
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
"The integral is probably divergent, or slowly convergent.",
IntegrationWarning,
)
warnings.filterwarnings(
"ignore",
"The maximum number of subdivisions.",
IntegrationWarning
)
warnings.filterwarnings(
"ignore",
"The algorithm does not converge.",
IntegrationWarning
)
if is_xfailing:
warnings.simplefilter("ignore", IntegrationWarning)
m, v, s, k = distfn.stats(*arg, moments='mvsk')
with np.errstate(all="ignore"):
if normalization_ok:
check_normalization(distfn, arg, distname)
if higher_ok:
check_mean_expect(distfn, arg, m, distname)
check_skew_expect(distfn, arg, m, v, s, distname)
check_var_expect(distfn, arg, m, v, distname)
check_kurt_expect(distfn, arg, m, v, k, distname)
check_munp_expect(distfn, arg, distname)
check_loc_scale(distfn, arg, m, v, distname)
if moment_ok:
check_moment(distfn, arg, m, v, distname)
@pytest.mark.parametrize('dist,shape_args', distcont)
def test_rvs_broadcast(dist, shape_args):
if dist in ['gausshyper', 'studentized_range']:
pytest.skip("too slow")
if dist in ['rel_breitwigner'] and _IS_32BIT:
# gh18414
pytest.skip("fails on Linux 32-bit")
# If shape_only is True, it means the _rvs method of the
# distribution uses more than one random number to generate a random
# variate. That means the result of using rvs with broadcasting or
# with a nontrivial size will not necessarily be the same as using the
# numpy.vectorize'd version of rvs(), so we can only compare the shapes
# of the results, not the values.
# Whether or not a distribution is in the following list is an
# implementation detail of the distribution, not a requirement. If
# the implementation the rvs() method of a distribution changes, this
# test might also have to be changed.
shape_only = dist in ['argus', 'betaprime', 'dgamma', 'dpareto_lognorm', 'dweibull',
'exponnorm', 'genhyperbolic', 'geninvgauss', 'landau',
'levy_stable', 'nct', 'norminvgauss', 'rice',
'skewnorm', 'semicircular', 'gennorm', 'loggamma']
distfunc = getattr(stats, dist)
loc = np.zeros(2)
scale = np.ones((3, 1))
nargs = distfunc.numargs
allargs = []
bshape = [3, 2]
# Generate shape parameter arguments...
for k in range(nargs):
shp = (k + 4,) + (1,)*(k + 2)
allargs.append(shape_args[k]*np.ones(shp))
bshape.insert(0, k + 4)
allargs.extend([loc, scale])
# bshape holds the expected shape when loc, scale, and the shape
# parameters are all broadcast together.
check_rvs_broadcast(distfunc, dist, allargs, bshape, shape_only, 'd')
# Expected values of the SF, CDF, PDF were computed using
# mpmath with mpmath.mp.dps = 50 and output at 20:
#
# def ks(x, n):
# x = mpmath.mpf(x)
# logp = -mpmath.power(6.0*n*x+1.0, 2)/18.0/n
# sf, cdf = mpmath.exp(logp), -mpmath.expm1(logp)
# pdf = (6.0*n*x+1.0) * 2 * sf/3
# print(mpmath.nstr(sf, 20), mpmath.nstr(cdf, 20), mpmath.nstr(pdf, 20))
#
# Tests use 1/n < x < 1-1/n and n > 1e6 to use the asymptotic computation.
# Larger x has a smaller sf.
@pytest.mark.parametrize('x,n,sf,cdf,pdf,rtol',
[(2.0e-5, 1000000000,
0.44932297307934442379, 0.55067702692065557621,
35946.137394996276407, 5e-15),
(2.0e-9, 1000000000,
0.99999999061111115519, 9.3888888448132728224e-9,
8.6666665852962971765, 5e-14),
(5.0e-4, 1000000000,
7.1222019433090374624e-218, 1.0,
1.4244408634752704094e-211, 5e-14)])
def test_gh17775_regression(x, n, sf, cdf, pdf, rtol):
# Regression test for gh-17775. In scipy 1.9.3 and earlier,
# these test would fail.
#
# KS one asymptotic sf ~ e^(-(6nx+1)^2 / 18n)
# Given a large 32-bit integer n, 6n will overflow in the c implementation.
# Example of broken behaviour:
# ksone.sf(2.0e-5, 1000000000) == 0.9374359693473666
ks = stats.ksone
vals = np.array([ks.sf(x, n), ks.cdf(x, n), ks.pdf(x, n)])
expected = np.array([sf, cdf, pdf])
npt.assert_allclose(vals, expected, rtol=rtol)
# The sf+cdf must sum to 1.0.
npt.assert_equal(vals[0] + vals[1], 1.0)
# Check inverting the (potentially very small) sf (uses a lower tolerance)
npt.assert_allclose([ks.isf(sf, n)], [x], rtol=1e-8)
def test_rvs_gh2069_regression():
# Regression tests for gh-2069. In scipy 0.17 and earlier,
# these tests would fail.
#
# A typical example of the broken behavior:
# >>> norm.rvs(loc=np.zeros(5), scale=np.ones(5))
# array([-2.49613705, -2.49613705, -2.49613705, -2.49613705, -2.49613705])
rng = np.random.RandomState(123)
vals = stats.norm.rvs(loc=np.zeros(5), scale=1, random_state=rng)
d = np.diff(vals)
npt.assert_(np.all(d != 0), "All the values are equal, but they shouldn't be!")
vals = stats.norm.rvs(loc=0, scale=np.ones(5), random_state=rng)
d = np.diff(vals)
npt.assert_(np.all(d != 0), "All the values are equal, but they shouldn't be!")
vals = stats.norm.rvs(loc=np.zeros(5), scale=np.ones(5), random_state=rng)
d = np.diff(vals)
npt.assert_(np.all(d != 0), "All the values are equal, but they shouldn't be!")
vals = stats.norm.rvs(loc=np.array([[0], [0]]), scale=np.ones(5),
random_state=rng)
d = np.diff(vals.ravel())
npt.assert_(np.all(d != 0), "All the values are equal, but they shouldn't be!")
assert_raises(ValueError, stats.norm.rvs, [[0, 0], [0, 0]],
[[1, 1], [1, 1]], 1)
assert_raises(ValueError, stats.gamma.rvs, [2, 3, 4, 5], 0, 1, (2, 2))
assert_raises(ValueError, stats.gamma.rvs, [1, 1, 1, 1], [0, 0, 0, 0],
[[1], [2]], (4,))
def test_nomodify_gh9900_regression():
# Regression test for gh-9990
# Prior to gh-9990, calls to stats.truncnorm._cdf() use what ever was
# set inside the stats.truncnorm instance during stats.truncnorm.cdf().
# This could cause issues with multi-threaded code.
# Since then, the calls to cdf() are not permitted to modify the global
# stats.truncnorm instance.
tn = stats.truncnorm
# Use the right-half truncated normal
# Check that the cdf and _cdf return the same result.
npt.assert_almost_equal(tn.cdf(1, 0, np.inf),
0.6826894921370859)
npt.assert_almost_equal(tn._cdf([1], [0], [np.inf]),
0.6826894921370859)
# Now use the left-half truncated normal
npt.assert_almost_equal(tn.cdf(-1, -np.inf, 0),
0.31731050786291415)
npt.assert_almost_equal(tn._cdf([-1], [-np.inf], [0]),
0.31731050786291415)
# Check that the right-half truncated normal _cdf hasn't changed
npt.assert_almost_equal(tn._cdf([1], [0], [np.inf]),
0.6826894921370859) # Not 1.6826894921370859
npt.assert_almost_equal(tn.cdf(1, 0, np.inf),
0.6826894921370859)
# Check that the left-half truncated normal _cdf hasn't changed
npt.assert_almost_equal(tn._cdf([-1], [-np.inf], [0]),
0.31731050786291415) # Not -0.6826894921370859
npt.assert_almost_equal(tn.cdf(1, -np.inf, 0),
1) # Not 1.6826894921370859
npt.assert_almost_equal(tn.cdf(-1, -np.inf, 0),
0.31731050786291415) # Not -0.6826894921370859
def test_broadcast_gh9990_regression():
# Regression test for gh-9990
# The x-value 7 only lies within the support of 4 of the supplied
# distributions. Prior to 9990, one array passed to
# stats.reciprocal._cdf would have 4 elements, but an array
# previously stored by stats.reciprocal_argcheck() would have 6, leading
# to a broadcast error.
a = np.array([1, 2, 3, 4, 5, 6])
b = np.array([8, 16, 1, 32, 1, 48])
ans = [stats.reciprocal.cdf(7, _a, _b) for _a, _b in zip(a,b)]
npt.assert_array_almost_equal(stats.reciprocal.cdf(7, a, b), ans)
ans = [stats.reciprocal.cdf(1, _a, _b) for _a, _b in zip(a,b)]
npt.assert_array_almost_equal(stats.reciprocal.cdf(1, a, b), ans)
ans = [stats.reciprocal.cdf(_a, _a, _b) for _a, _b in zip(a,b)]
npt.assert_array_almost_equal(stats.reciprocal.cdf(a, a, b), ans)
ans = [stats.reciprocal.cdf(_b, _a, _b) for _a, _b in zip(a,b)]
npt.assert_array_almost_equal(stats.reciprocal.cdf(b, a, b), ans)
def test_broadcast_gh7933_regression():
# Check broadcast works
stats.truncnorm.logpdf(
np.array([3.0, 2.0, 1.0]),
a=(1.5 - np.array([6.0, 5.0, 4.0])) / 3.0,
b=np.inf,
loc=np.array([6.0, 5.0, 4.0]),
scale=3.0
)
def test_gh2002_regression():
# Add a check that broadcast works in situations where only some
# x-values are compatible with some of the shape arguments.
x = np.r_[-2:2:101j]
a = np.r_[-np.ones(50), np.ones(51)]
expected = [stats.truncnorm.pdf(_x, _a, np.inf) for _x, _a in zip(x, a)]
ans = stats.truncnorm.pdf(x, a, np.inf)
npt.assert_array_almost_equal(ans, expected)
def test_gh1320_regression():
# Check that the first example from gh-1320 now works.
c = 2.62
stats.genextreme.ppf(0.5, np.array([[c], [c + 0.5]]))
# The other examples in gh-1320 appear to have stopped working
# some time ago.
# ans = stats.genextreme.moment(2, np.array([c, c + 0.5]))
# expected = np.array([25.50105963, 115.11191437])
# stats.genextreme.moment(5, np.array([[c], [c + 0.5]]))
# stats.genextreme.moment(5, np.array([c, c + 0.5]))
def test_method_of_moments():
# example from https://en.wikipedia.org/wiki/Method_of_moments_(statistics)
x = [0, 0, 0, 0, 1]
a = 1/5 - 2*np.sqrt(3)/5
b = 1/5 + 2*np.sqrt(3)/5
# force use of method of moments (uniform.fit is overridden)
loc, scale = super(type(stats.uniform), stats.uniform).fit(x, method="MM")
npt.assert_almost_equal(loc, a, decimal=4)
npt.assert_almost_equal(loc+scale, b, decimal=4)
def check_sample_meanvar_(popmean, popvar, sample, rng):
if np.isfinite(popmean):
check_sample_mean(sample, popmean)
if np.isfinite(popvar):
check_sample_var(sample, popvar, rng)
def check_sample_mean(sample, popmean):
# Checks for unlikely difference between sample mean and population mean
prob = stats.ttest_1samp(sample, popmean).pvalue
assert prob > 0.01
def check_sample_var(sample, popvar, rng):
# check that population mean lies within the CI bootstrapped from the
# sample. This used to be a chi-squared test for variance, but there were
# too many false positives
res = stats.bootstrap(
(sample,),
lambda x, axis: x.var(ddof=1, axis=axis),
confidence_level=0.995,
rng=rng,
)
conf = res.confidence_interval
low, high = conf.low, conf.high
assert low <= popvar <= high
def check_cdf_ppf(distfn, arg, msg):
values = [0.001, 0.5, 0.999]
npt.assert_almost_equal(distfn.cdf(distfn.ppf(values, *arg), *arg),
values, decimal=DECIMAL, err_msg=msg +
' - cdf-ppf roundtrip')
def check_sf_isf(distfn, arg, msg):
npt.assert_almost_equal(distfn.sf(distfn.isf([0.1, 0.5, 0.9], *arg), *arg),
[0.1, 0.5, 0.9], decimal=DECIMAL, err_msg=msg +
' - sf-isf roundtrip')
def check_cdf_sf(distfn, arg, msg):
npt.assert_almost_equal(distfn.cdf([0.1, 0.9], *arg),
1.0 - distfn.sf([0.1, 0.9], *arg),
decimal=DECIMAL, err_msg=msg +
' - cdf-sf relationship')
def check_ppf_isf(distfn, arg, msg):
p = np.array([0.1, 0.9])
npt.assert_almost_equal(distfn.isf(p, *arg), distfn.ppf(1-p, *arg),
decimal=DECIMAL, err_msg=msg +
' - ppf-isf relationship')
def check_pdf(distfn, arg, msg):
# compares pdf at median with numerical derivative of cdf
median = distfn.ppf(0.5, *arg)
eps = 1e-6
pdfv = distfn.pdf(median, *arg)
if (pdfv < 1e-4) or (pdfv > 1e4):
# avoid checking a case where pdf is close to zero or
# huge (singularity)
median = median + 0.1
pdfv = distfn.pdf(median, *arg)
cdfdiff = (distfn.cdf(median + eps, *arg) -
distfn.cdf(median - eps, *arg))/eps/2.0
# replace with better diff and better test (more points),
# actually, this works pretty well
msg += ' - cdf-pdf relationship'
npt.assert_almost_equal(pdfv, cdfdiff, decimal=DECIMAL, err_msg=msg)
def check_pdf_logpdf(distfn, args, msg):
# compares pdf at several points with the log of the pdf
points = np.array([0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8])
vals = distfn.ppf(points, *args)
vals = vals[np.isfinite(vals)]
pdf = distfn.pdf(vals, *args)
logpdf = distfn.logpdf(vals, *args)
pdf = pdf[(pdf != 0) & np.isfinite(pdf)]
logpdf = logpdf[np.isfinite(logpdf)]
msg += " - logpdf-log(pdf) relationship"
npt.assert_almost_equal(np.log(pdf), logpdf, decimal=7, err_msg=msg)
def check_pdf_logpdf_at_endpoints(distfn, args, msg):
# compares pdf with the log of the pdf at the (finite) end points
points = np.array([0, 1])
vals = distfn.ppf(points, *args)
vals = vals[np.isfinite(vals)]
pdf = distfn.pdf(vals, *args)
logpdf = distfn.logpdf(vals, *args)
pdf = pdf[(pdf != 0) & np.isfinite(pdf)]
logpdf = logpdf[np.isfinite(logpdf)]
msg += " - logpdf-log(pdf) relationship"
npt.assert_almost_equal(np.log(pdf), logpdf, decimal=7, err_msg=msg)
def check_sf_logsf(distfn, args, msg):
# compares sf at several points with the log of the sf
points = np.array([0.0, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 1.0])
vals = distfn.ppf(points, *args)
vals = vals[np.isfinite(vals)]
sf = distfn.sf(vals, *args)
logsf = distfn.logsf(vals, *args)
sf = sf[sf != 0]
logsf = logsf[np.isfinite(logsf)]
msg += " - logsf-log(sf) relationship"
npt.assert_almost_equal(np.log(sf), logsf, decimal=7, err_msg=msg)
def check_cdf_logcdf(distfn, args, msg):
# compares cdf at several points with the log of the cdf
points = np.array([0, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 1.0])
vals = distfn.ppf(points, *args)
vals = vals[np.isfinite(vals)]
cdf = distfn.cdf(vals, *args)
logcdf = distfn.logcdf(vals, *args)
cdf = cdf[cdf != 0]
logcdf = logcdf[np.isfinite(logcdf)]
msg += " - logcdf-log(cdf) relationship"
npt.assert_almost_equal(np.log(cdf), logcdf, decimal=7, err_msg=msg)
def check_ppf_broadcast(distfn, arg, msg):
# compares ppf for multiple argsets.
num_repeats = 5
args = [] * num_repeats
if arg:
args = [np.array([_] * num_repeats) for _ in arg]
median = distfn.ppf(0.5, *arg)
medians = distfn.ppf(0.5, *args)
msg += " - ppf multiple"
npt.assert_almost_equal(medians, [median] * num_repeats, decimal=7, err_msg=msg)
def check_distribution_rvs(dist, args, alpha, rvs):
# dist is either a cdf function or name of a distribution in scipy.stats.
# args are the args for scipy.stats.dist(*args)
# alpha is a significance level, ~0.01
# rvs is array_like of random variables
# test from scipy.stats.tests
# this version reuses existing random variables
D, pval = stats.kstest(rvs, dist, args=args, N=1000)
if (pval < alpha):
# The rvs passed in failed the K-S test, which _could_ happen
# but is unlikely if alpha is small enough.
# Repeat the test with a new sample of rvs.
# Generate 1000 rvs, perform a K-S test that the new sample of rvs
# are distributed according to the distribution.
D, pval = stats.kstest(dist, dist, args=args, N=1000)
npt.assert_(pval > alpha, "D = " + str(D) + "; pval = " + str(pval) +
"; alpha = " + str(alpha) + "\nargs = " + str(args))
def check_vecentropy(distfn, args):
npt.assert_equal(distfn.vecentropy(*args), distfn._entropy(*args))
def check_loc_scale(distfn, arg, m, v, msg):
# Make `loc` and `scale` arrays to catch bugs like gh-13580 where
# `loc` and `scale` arrays improperly broadcast with shapes.
loc, scale = np.array([10.0, 20.0]), np.array([10.0, 20.0])
mt, vt = distfn.stats(*arg, loc=loc, scale=scale)
npt.assert_allclose(m*scale + loc, mt)
npt.assert_allclose(v*scale*scale, vt)
def check_ppf_private(distfn, arg, msg):
# fails by design for truncnorm self.nb not defined
ppfs = distfn._ppf(np.array([0.1, 0.5, 0.9]), *arg)
npt.assert_(not np.any(np.isnan(ppfs)), msg + 'ppf private is nan')
def check_retrieving_support(distfn, args):
loc, scale = 1, 2
supp = distfn.support(*args)
supp_loc_scale = distfn.support(*args, loc=loc, scale=scale)
npt.assert_almost_equal(np.array(supp)*scale + loc,
np.array(supp_loc_scale))
def check_fit_args(distfn, arg, rvs, method):
with np.errstate(all='ignore'), warnings.catch_warnings():
warnings.filterwarnings("ignore", category=RuntimeWarning,
message="The shape parameter of the erlang")
warnings.filterwarnings("ignore", category=RuntimeWarning,
message="floating point number truncated")
vals = distfn.fit(rvs, method=method)
vals2 = distfn.fit(rvs, optimizer='powell', method=method)
# Only check the length of the return; accuracy tested in test_fit.py
npt.assert_(len(vals) == 2+len(arg))
npt.assert_(len(vals2) == 2+len(arg))
def check_fit_args_fix(distfn, arg, rvs, method):
with np.errstate(all='ignore'), warnings.catch_warnings():
warnings.filterwarnings("ignore", category=RuntimeWarning,
message="The shape parameter of the erlang")
vals = distfn.fit(rvs, floc=0, method=method)
vals2 = distfn.fit(rvs, fscale=1, method=method)
npt.assert_(len(vals) == 2+len(arg))
npt.assert_(vals[-2] == 0)
npt.assert_(vals2[-1] == 1)
npt.assert_(len(vals2) == 2+len(arg))
if len(arg) > 0:
vals3 = distfn.fit(rvs, f0=arg[0], method=method)
npt.assert_(len(vals3) == 2+len(arg))
npt.assert_(vals3[0] == arg[0])
if len(arg) > 1:
vals4 = distfn.fit(rvs, f1=arg[1], method=method)
npt.assert_(len(vals4) == 2+len(arg))
npt.assert_(vals4[1] == arg[1])
if len(arg) > 2:
vals5 = distfn.fit(rvs, f2=arg[2], method=method)
npt.assert_(len(vals5) == 2+len(arg))
npt.assert_(vals5[2] == arg[2])
def cases_test_methods_with_lists():
for distname, arg in distcont:
if distname in slow_with_lists:
yield pytest.param(distname, arg, marks=pytest.mark.slow)
else:
yield distname, arg
@pytest.mark.parametrize('method', ['pdf', 'logpdf', 'cdf', 'logcdf',
'sf', 'logsf', 'ppf', 'isf'])
@pytest.mark.parametrize('distname, args', cases_test_methods_with_lists())
def test_methods_with_lists(method, distname, args):
# Test that the continuous distributions can accept Python lists
# as arguments.
dist = getattr(stats, distname)
f = getattr(dist, method)
if distname == 'invweibull' and method.startswith('log'):
x = [1.5, 2]
else:
x = [0.1, 0.2]
shape2 = [[a]*2 for a in args]
loc = [0, 0.1]
scale = [1, 1.01]
result = f(x, *shape2, loc=loc, scale=scale)
npt.assert_allclose(result,
[f(*v) for v in zip(x, *shape2, loc, scale)],
rtol=1e-14, atol=5e-14)
def test_burr_fisk_moment_gh13234_regression():
vals0 = stats.burr.moment(1, 5, 4)
assert isinstance(vals0, float)
vals1 = stats.fisk.moment(1, 8)
assert isinstance(vals1, float)
def test_moments_with_array_gh12192_regression():
# array loc and scalar scale
vals0 = stats.norm.moment(order=1, loc=np.array([1, 2, 3]), scale=1)
expected0 = np.array([1., 2., 3.])
npt.assert_equal(vals0, expected0)
# array loc and invalid scalar scale
vals1 = stats.norm.moment(order=1, loc=np.array([1, 2, 3]), scale=-1)
expected1 = np.array([np.nan, np.nan, np.nan])
npt.assert_equal(vals1, expected1)
# array loc and array scale with invalid entries
vals2 = stats.norm.moment(order=1, loc=np.array([1, 2, 3]),
scale=[-3, 1, 0])
expected2 = np.array([np.nan, 2., np.nan])
npt.assert_equal(vals2, expected2)
# (loc == 0) & (scale < 0)
vals3 = stats.norm.moment(order=2, loc=0, scale=-4)
expected3 = np.nan
npt.assert_equal(vals3, expected3)
assert isinstance(vals3, expected3.__class__)
# array loc with 0 entries and scale with invalid entries
vals4 = stats.norm.moment(order=2, loc=[1, 0, 2], scale=[3, -4, -5])
expected4 = np.array([10., np.nan, np.nan])
npt.assert_equal(vals4, expected4)
# all(loc == 0) & (array scale with invalid entries)
vals5 = stats.norm.moment(order=2, loc=[0, 0, 0], scale=[5., -2, 100.])
expected5 = np.array([25., np.nan, 10000.])
npt.assert_equal(vals5, expected5)
# all( (loc == 0) & (scale < 0) )
vals6 = stats.norm.moment(order=2, loc=[0, 0, 0], scale=[-5., -2, -100.])
expected6 = np.array([np.nan, np.nan, np.nan])
npt.assert_equal(vals6, expected6)
# scalar args, loc, and scale
vals7 = stats.chi.moment(order=2, df=1, loc=0, scale=0)
expected7 = np.nan
npt.assert_equal(vals7, expected7)
assert isinstance(vals7, expected7.__class__)
# array args, scalar loc, and scalar scale
vals8 = stats.chi.moment(order=2, df=[1, 2, 3], loc=0, scale=0)
expected8 = np.array([np.nan, np.nan, np.nan])
npt.assert_equal(vals8, expected8)
# array args, array loc, and array scale
vals9 = stats.chi.moment(order=2, df=[1, 2, 3], loc=[1., 0., 2.],
scale=[1., -3., 0.])
expected9 = np.array([3.59576912, np.nan, np.nan])
npt.assert_allclose(vals9, expected9, rtol=1e-8)
# (n > 4), all(loc != 0), and all(scale != 0)
vals10 = stats.norm.moment(5, [1., 2.], [1., 2.])
expected10 = np.array([26., 832.])
npt.assert_allclose(vals10, expected10, rtol=1e-13)
# test broadcasting and more
a = [-1.1, 0, 1, 2.2, np.pi]
b = [-1.1, 0, 1, 2.2, np.pi]
loc = [-1.1, 0, np.sqrt(2)]
scale = [-2.1, 0, 1, 2.2, np.pi]
a = np.array(a).reshape((-1, 1, 1, 1))
b = np.array(b).reshape((-1, 1, 1))
loc = np.array(loc).reshape((-1, 1))
scale = np.array(scale)
vals11 = stats.beta.moment(order=2, a=a, b=b, loc=loc, scale=scale)
a, b, loc, scale = np.broadcast_arrays(a, b, loc, scale)
for i in np.ndenumerate(a):
with np.errstate(invalid='ignore', divide='ignore'):
i = i[0] # just get the index
# check against same function with scalar input
expected = stats.beta.moment(order=2, a=a[i], b=b[i],
loc=loc[i], scale=scale[i])
np.testing.assert_equal(vals11[i], expected)
def test_broadcasting_in_moments_gh12192_regression():
vals0 = stats.norm.moment(order=1, loc=np.array([1, 2, 3]), scale=[[1]])
expected0 = np.array([[1., 2., 3.]])
npt.assert_equal(vals0, expected0)
assert vals0.shape == expected0.shape
vals1 = stats.norm.moment(order=1, loc=np.array([[1], [2], [3]]),
scale=[1, 2, 3])
expected1 = np.array([[1., 1., 1.], [2., 2., 2.], [3., 3., 3.]])
npt.assert_equal(vals1, expected1)
assert vals1.shape == expected1.shape
vals2 = stats.chi.moment(order=1, df=[1., 2., 3.], loc=0., scale=1.)
expected2 = np.array([0.79788456, 1.25331414, 1.59576912])
npt.assert_allclose(vals2, expected2, rtol=1e-8)
assert vals2.shape == expected2.shape
vals3 = stats.chi.moment(order=1, df=[[1.], [2.], [3.]], loc=[0., 1., 2.],
scale=[-1., 0., 3.])
expected3 = np.array([[np.nan, np.nan, 4.39365368],
[np.nan, np.nan, 5.75994241],
[np.nan, np.nan, 6.78730736]])
npt.assert_allclose(vals3, expected3, rtol=1e-8)
assert vals3.shape == expected3.shape
@pytest.mark.slow
def test_kappa3_array_gh13582():
# https://github.com/scipy/scipy/pull/15140#issuecomment-994958241
shapes = [0.5, 1.5, 2.5, 3.5, 4.5]
moments = 'mvsk'
res = np.array([[stats.kappa3.stats(shape, moments=moment)
for shape in shapes] for moment in moments])
res2 = np.array(stats.kappa3.stats(shapes, moments=moments))
npt.assert_allclose(res, res2)
@pytest.mark.xslow
def test_kappa4_array_gh13582():
h = np.array([-0.5, 2.5, 3.5, 4.5, -3])
k = np.array([-0.5, 1, -1.5, 0, 3.5])
moments = 'mvsk'
res = np.array([[stats.kappa4.stats(h[i], k[i], moments=moment)
for i in range(5)] for moment in moments])
res2 = np.array(stats.kappa4.stats(h, k, moments=moments))
npt.assert_allclose(res, res2)
# https://github.com/scipy/scipy/pull/15250#discussion_r775112913
h = np.array([-1, -1/4, -1/4, 1, -1, 0])
k = np.array([1, 1, 1/2, -1/3, -1, 0])
res = np.array([[stats.kappa4.stats(h[i], k[i], moments=moment)
for i in range(6)] for moment in moments])
res2 = np.array(stats.kappa4.stats(h, k, moments=moments))
npt.assert_allclose(res, res2)
# https://github.com/scipy/scipy/pull/15250#discussion_r775115021
h = np.array([-1, -0.5, 1])
k = np.array([-1, -0.5, 0, 1])[:, None]
res2 = np.array(stats.kappa4.stats(h, k, moments=moments))
assert res2.shape == (4, 4, 3)
def test_frozen_attributes(monkeypatch):
# gh-14827 reported that all frozen distributions had both pmf and pdf
# attributes; continuous should have pdf and discrete should have pmf.
message = "'rv_continuous_frozen' object has no attribute"
with pytest.raises(AttributeError, match=message):
stats.norm().pmf
with pytest.raises(AttributeError, match=message):
stats.norm().logpmf
monkeypatch.setattr(stats.norm, "pmf", "herring", raising=False)
frozen_norm = stats.norm()
assert isinstance(frozen_norm, rv_continuous_frozen)
assert not hasattr(frozen_norm, "pmf")
def test_skewnorm_pdf_gh16038():
rng = np.random.default_rng(0)
x, a = -np.inf, 0
npt.assert_equal(stats.skewnorm.pdf(x, a), stats.norm.pdf(x))
x, a = rng.random(size=(3, 3)), rng.random(size=(3, 3))
mask = rng.random(size=(3, 3)) < 0.5
a[mask] = 0
x_norm = x[mask]
res = stats.skewnorm.pdf(x, a)
npt.assert_equal(res[mask], stats.norm.pdf(x_norm))
npt.assert_equal(res[~mask], stats.skewnorm.pdf(x[~mask], a[~mask]))
# for scalar input, these functions should return scalar output
scalar_out = [['rvs', []], ['pdf', [0]], ['logpdf', [0]], ['cdf', [0]],
['logcdf', [0]], ['sf', [0]], ['logsf', [0]], ['ppf', [0]],
['isf', [0]], ['moment', [1]], ['entropy', []], ['expect', []],
['median', []], ['mean', []], ['std', []], ['var', []]]
scalars_out = [['interval', [0.95]], ['support', []], ['stats', ['mv']]]
@pytest.mark.parametrize('case', scalar_out + scalars_out)
def test_scalar_for_scalar(case):
# Some rv_continuous functions returned 0d array instead of NumPy scalar
# Guard against regression
method_name, args = case
method = getattr(stats.norm(), method_name)
res = method(*args)
if case in scalar_out:
assert isinstance(res, np.number)
else:
assert isinstance(res[0], np.number)
assert isinstance(res[1], np.number)
def test_scalar_for_scalar2():
# test methods that are not attributes of frozen distributions
res = stats.norm.fit([1, 2, 3])
assert isinstance(res[0], np.number)
assert isinstance(res[1], np.number)
res = stats.norm.fit_loc_scale([1, 2, 3])
assert isinstance(res[0], np.number)
assert isinstance(res[1], np.number)
res = stats.norm.nnlf((0, 1), [1, 2, 3])
assert isinstance(res, np.number)