You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

748 lines
26 KiB

import pytest
import itertools
import warnings
from scipy import stats
from scipy.stats import (betabinom, betanbinom, hypergeom, nhypergeom,
bernoulli, boltzmann, skellam, zipf, zipfian, binom,
nbinom, nchypergeom_fisher, nchypergeom_wallenius,
randint, poisson_binom)
import numpy as np
from numpy.testing import (
assert_almost_equal, assert_equal, assert_allclose
)
from scipy.special import binom as special_binom
from scipy.optimize import root_scalar
from scipy.integrate import quad
# The expected values were computed with Wolfram Alpha, using
# the expression CDF[HypergeometricDistribution[N, n, M], k].
@pytest.mark.parametrize('k, M, n, N, expected, rtol',
[(3, 10, 4, 5,
0.9761904761904762, 1e-15),
(107, 10000, 3000, 215,
0.9999999997226765, 1e-15),
(10, 10000, 3000, 215,
2.681682217692179e-21, 5e-11)])
def test_hypergeom_cdf(k, M, n, N, expected, rtol):
p = hypergeom.cdf(k, M, n, N)
assert_allclose(p, expected, rtol=rtol)
# The expected values were computed with Wolfram Alpha, using
# the expression SurvivalFunction[HypergeometricDistribution[N, n, M], k].
@pytest.mark.parametrize('k, M, n, N, expected, rtol',
[(25, 10000, 3000, 215,
0.9999999999052958, 1e-15),
(125, 10000, 3000, 215,
1.4416781705752128e-18, 5e-11)])
def test_hypergeom_sf(k, M, n, N, expected, rtol):
p = hypergeom.sf(k, M, n, N)
assert_allclose(p, expected, rtol=rtol)
def test_hypergeom_logpmf():
# symmetries test
# f(k,N,K,n) = f(n-k,N,N-K,n) = f(K-k,N,K,N-n) = f(k,N,n,K)
k = 5
N = 50
K = 10
n = 5
logpmf1 = hypergeom.logpmf(k, N, K, n)
logpmf2 = hypergeom.logpmf(n - k, N, N - K, n)
logpmf3 = hypergeom.logpmf(K - k, N, K, N - n)
logpmf4 = hypergeom.logpmf(k, N, n, K)
assert_almost_equal(logpmf1, logpmf2, decimal=12)
assert_almost_equal(logpmf1, logpmf3, decimal=12)
assert_almost_equal(logpmf1, logpmf4, decimal=12)
# test related distribution
# Bernoulli distribution if n = 1
k = 1
N = 10
K = 7
n = 1
hypergeom_logpmf = hypergeom.logpmf(k, N, K, n)
bernoulli_logpmf = bernoulli.logpmf(k, K/N)
assert_almost_equal(hypergeom_logpmf, bernoulli_logpmf, decimal=12)
def test_nhypergeom_pmf():
# test with hypergeom
M, n, r = 45, 13, 8
k = 6
NHG = nhypergeom.pmf(k, M, n, r)
HG = hypergeom.pmf(k, M, n, k+r-1) * (M - n - (r-1)) / (M - (k+r-1))
assert_allclose(HG, NHG, rtol=1e-10)
def test_nhypergeom_pmfcdf():
# test pmf and cdf with arbitrary values.
M = 8
n = 3
r = 4
support = np.arange(n+1)
pmf = nhypergeom.pmf(support, M, n, r)
cdf = nhypergeom.cdf(support, M, n, r)
assert_allclose(pmf, [1/14, 3/14, 5/14, 5/14], rtol=1e-13)
assert_allclose(cdf, [1/14, 4/14, 9/14, 1.0], rtol=1e-13)
def test_nhypergeom_r0():
# test with `r = 0`.
M = 10
n = 3
r = 0
pmf = nhypergeom.pmf([[0, 1, 2, 0], [1, 2, 0, 3]], M, n, r)
assert_allclose(pmf, [[1, 0, 0, 1], [0, 0, 1, 0]], rtol=1e-13)
def test_nhypergeom_rvs_shape():
# Check that when given a size with more dimensions than the
# dimensions of the broadcast parameters, rvs returns an array
# with the correct shape.
x = nhypergeom.rvs(22, [7, 8, 9], [[12], [13]], size=(5, 1, 2, 3))
assert x.shape == (5, 1, 2, 3)
def test_nhypergeom_accuracy():
# Check that nhypergeom.rvs post-gh-13431 gives the same values as
# inverse transform sampling
rng = np.random.RandomState(0)
x = nhypergeom.rvs(22, 7, 11, size=100, random_state=rng)
rng = np.random.RandomState(0)
p = rng.uniform(size=100)
y = nhypergeom.ppf(p, 22, 7, 11)
assert_equal(x, y)
def test_boltzmann_upper_bound():
k = np.arange(-3, 5)
N = 1
p = boltzmann.pmf(k, 0.123, N)
expected = k == 0
assert_equal(p, expected)
lam = np.log(2)
N = 3
p = boltzmann.pmf(k, lam, N)
expected = [0, 0, 0, 4/7, 2/7, 1/7, 0, 0]
assert_allclose(p, expected, rtol=1e-13)
c = boltzmann.cdf(k, lam, N)
expected = [0, 0, 0, 4/7, 6/7, 1, 1, 1]
assert_allclose(c, expected, rtol=1e-13)
def test_betabinom_a_and_b_unity():
# test limiting case that betabinom(n, 1, 1) is a discrete uniform
# distribution from 0 to n
n = 20
k = np.arange(n + 1)
p = betabinom(n, 1, 1).pmf(k)
expected = np.repeat(1 / (n + 1), n + 1)
assert_almost_equal(p, expected)
@pytest.mark.parametrize('dtypes', itertools.product(*[(int, float)]*3))
def test_betabinom_stats_a_and_b_integers_gh18026(dtypes):
# gh-18026 reported that `betabinom` kurtosis calculation fails when some
# parameters are integers. Check that this is resolved.
n_type, a_type, b_type = dtypes
n, a, b = n_type(10), a_type(2), b_type(3)
assert_allclose(betabinom.stats(n, a, b, moments='k'), -0.6904761904761907)
def test_betabinom_bernoulli():
# test limiting case that betabinom(1, a, b) = bernoulli(a / (a + b))
a = 2.3
b = 0.63
k = np.arange(2)
p = betabinom(1, a, b).pmf(k)
expected = bernoulli(a / (a + b)).pmf(k)
assert_almost_equal(p, expected)
def test_issue_10317():
alpha, n, p = 0.9, 10, 1
assert_equal(nbinom.interval(confidence=alpha, n=n, p=p), (0, 0))
def test_issue_11134():
alpha, n, p = 0.95, 10, 0
assert_equal(binom.interval(confidence=alpha, n=n, p=p), (0, 0))
def test_issue_7406():
rng = np.random.default_rng(4763112764)
assert_equal(binom.ppf(rng.random(10), 0, 0.5), 0)
# Also check that endpoints (q=0, q=1) are correct
assert_equal(binom.ppf(0, 0, 0.5), -1)
assert_equal(binom.ppf(1, 0, 0.5), 0)
def test_issue_5122():
rng = np.random.default_rng(8312492117)
p = 0
n = rng.integers(100, size=10)
x = 0
ppf = binom.ppf(x, n, p)
assert_equal(ppf, -1)
x = np.linspace(0.01, 0.99, 10)
ppf = binom.ppf(x, n, p)
assert_equal(ppf, 0)
x = 1
ppf = binom.ppf(x, n, p)
assert_equal(ppf, n)
def test_issue_1603():
assert_equal(binom(1000, np.logspace(-3, -100)).ppf(0.01), 0)
def test_issue_5503():
p = 0.5
x = np.logspace(3, 14, 12)
assert_allclose(binom.cdf(x, 2*x, p), 0.5, atol=1e-2)
@pytest.mark.parametrize('x, n, p, cdf_desired', [
(300, 1000, 3/10, 0.51559351981411995636),
(3000, 10000, 3/10, 0.50493298381929698016),
(30000, 100000, 3/10, 0.50156000591726422864),
(300000, 1000000, 3/10, 0.50049331906666960038),
(3000000, 10000000, 3/10, 0.50015600124585261196),
(30000000, 100000000, 3/10, 0.50004933192735230102),
(30010000, 100000000, 3/10, 0.98545384016570790717),
(29990000, 100000000, 3/10, 0.01455017177985268670),
(29950000, 100000000, 3/10, 5.02250963487432024943e-28),
(300_000_000, 1_000_000_000, 3/10, 0.50001560012523938),
(3_000_000_000, 10_000_000_000, 3/10, 0.50000493319275)
])
def test_issue_5503pt2(x, n, p, cdf_desired):
assert_allclose(binom.cdf(x, n, p), cdf_desired)
def test_issue_5503pt3():
# From Wolfram Alpha: CDF[BinomialDistribution[1e12, 1e-12], 2]
assert_allclose(binom.cdf(2, 10**12, 10**-12), 0.91969860292869777384)
def test_issue_6682():
# Reference value from R:
# options(digits=16)
# print(pnbinom(250, 50, 32/63, lower.tail=FALSE))
assert_allclose(nbinom.sf(250, 50, 32./63.), 1.460458510976452e-35)
def test_issue_19747():
# test that negative k does not raise an error in nbinom.logcdf
result = nbinom.logcdf([5, -1, 1], 5, 0.5)
reference = [-0.47313352, -np.inf, -2.21297293]
assert_allclose(result, reference)
def test_boost_divide_by_zero_issue_15101():
n = 1000
p = 0.01
k = 996
assert_allclose(binom.pmf(k, n, p), 0.0)
def test_skellam_gh11474():
# test issue reported in gh-11474 caused by `cdfchn`
mu = [1, 10, 100, 1000, 5000, 5050, 5100, 5250, 6000]
cdf = skellam.cdf(0, mu, mu)
# generated in R
# library(skellam)
# options(digits = 16)
# mu = c(1, 10, 100, 1000, 5000, 5050, 5100, 5250, 6000)
# pskellam(0, mu, mu, TRUE)
cdf_expected = [0.6542541612768356, 0.5448901559424127, 0.5141135799745580,
0.5044605891382528, 0.5019947363350450, 0.5019848365953181,
0.5019750827993392, 0.5019466621805060, 0.5018209330219539]
assert_allclose(cdf, cdf_expected)
class TestZipfian:
def test_zipfian_asymptotic(self):
# test limiting case that zipfian(a, n) -> zipf(a) as n-> oo
a = 6.5
N = 10000000
k = np.arange(1, 21)
assert_allclose(zipfian.pmf(k, a, N), zipf.pmf(k, a))
assert_allclose(zipfian.cdf(k, a, N), zipf.cdf(k, a))
assert_allclose(zipfian.sf(k, a, N), zipf.sf(k, a))
assert_allclose(zipfian.stats(a, N, moments='msvk'),
zipf.stats(a, moments='msvk'))
def test_zipfian_continuity(self):
# test that zipfian(0.999999, n) ~ zipfian(1.000001, n)
# (a = 1 switches between methods of calculating harmonic sum)
alt1, agt1 = 0.99999999, 1.00000001
N = 30
k = np.arange(1, N + 1)
assert_allclose(zipfian.pmf(k, alt1, N), zipfian.pmf(k, agt1, N),
rtol=5e-7)
assert_allclose(zipfian.cdf(k, alt1, N), zipfian.cdf(k, agt1, N),
rtol=5e-7)
assert_allclose(zipfian.sf(k, alt1, N), zipfian.sf(k, agt1, N),
rtol=5e-7)
assert_allclose(zipfian.stats(alt1, N, moments='msvk'),
zipfian.stats(agt1, N, moments='msvk'), rtol=5e-7)
def test_zipfian_R(self):
# test against R VGAM package
# library(VGAM)
# k <- c(13, 16, 1, 4, 4, 8, 10, 19, 5, 7)
# a <- c(1.56712977, 3.72656295, 5.77665117, 9.12168729, 5.79977172,
# 4.92784796, 9.36078764, 4.3739616 , 7.48171872, 4.6824154)
# n <- c(70, 80, 48, 65, 83, 89, 50, 30, 20, 20)
# pmf <- dzipf(k, N = n, shape = a)
# cdf <- pzipf(k, N = n, shape = a)
# print(pmf)
# print(cdf)
rng = np.random.RandomState(0)
k = rng.randint(1, 20, size=10)
a = rng.rand(10)*10 + 1
n = rng.randint(1, 100, size=10)
pmf = [8.076972e-03, 2.950214e-05, 9.799333e-01, 3.216601e-06,
3.158895e-04, 3.412497e-05, 4.350472e-10, 2.405773e-06,
5.860662e-06, 1.053948e-04]
cdf = [0.8964133, 0.9998666, 0.9799333, 0.9999995, 0.9998584,
0.9999458, 1.0000000, 0.9999920, 0.9999977, 0.9998498]
# skip the first point; zipUC is not accurate for low a, n
assert_allclose(zipfian.pmf(k, a, n)[1:], pmf[1:], rtol=1e-6)
assert_allclose(zipfian.cdf(k, a, n)[1:], cdf[1:], rtol=5e-5)
rng = np.random.RandomState(0)
naive_tests = np.vstack((np.logspace(-2, 1, 10),
rng.randint(2, 40, 10))).T
@pytest.mark.parametrize("a, n", naive_tests)
def test_zipfian_naive(self, a, n):
# test against bare-bones implementation
@np.vectorize
def Hns(n, s):
"""Naive implementation of harmonic sum"""
return (1/np.arange(1, n+1)**s).sum()
@np.vectorize
def pzip(k, a, n):
"""Naive implementation of zipfian pmf"""
if k < 1 or k > n:
return 0.
else:
return 1 / k**a / Hns(n, a)
k = np.arange(n+1)
pmf = pzip(k, a, n)
cdf = np.cumsum(pmf)
mean = np.average(k, weights=pmf)
var = np.average((k - mean)**2, weights=pmf)
std = var**0.5
skew = np.average(((k-mean)/std)**3, weights=pmf)
kurtosis = np.average(((k-mean)/std)**4, weights=pmf) - 3
assert_allclose(zipfian.pmf(k, a, n), pmf)
assert_allclose(zipfian.cdf(k, a, n), cdf)
assert_allclose(zipfian.stats(a, n, moments="mvsk"),
[mean, var, skew, kurtosis])
def test_pmf_integer_k(self):
k = np.arange(0, 1000)
k_int32 = k.astype(np.int32)
dist = zipfian(111, 22)
pmf = dist.pmf(k)
pmf_k_int32 = dist.pmf(k_int32)
assert_equal(pmf, pmf_k_int32)
# Reference values were computed with mpmath.
@pytest.mark.parametrize(
'k, a, n, ref',
[(3, 1 + 1e-12, 10, 0.11380571738244807),
(995, 1 + 1e-9, 1000, 0.0001342634472310051)]
)
def test_pmf_against_mpmath(self, k, a, n, ref):
p = zipfian.pmf(k, a, n)
assert_allclose(p, ref, 5e-16)
# Reference values were computed with mpmath.
@pytest.mark.parametrize(
'k, a, n, ref',
[(4990, 1.25, 5000, 5.780138225335147e-05),
(9998, 3.5, 10000, 1.775352757966365e-14),
(50000, 3.5, 100000, 5.227803621367486e-13),
(10, 6.0, 100, 1.523108153557902e-06),
(95, 6.0, 100, 5.572438078308601e-12)]
)
def test_sf_against_mpmath(self, k, a, n, ref):
sf = zipfian.sf(k, a, n)
assert_allclose(sf, ref, rtol=8e-15)
# Reference values were computed with mpmath.
@pytest.mark.parametrize(
'a, n, ref',
[(1 + 1e-9, 100, 19.27756356649707),
(1 + 1e-7, 100000, 8271.194454731552),
(1.001, 3, 1.6360225521183804)]
)
def test_mean_against_mpmath(self, a, n, ref):
m = zipfian.mean(a, n)
assert_allclose(m, ref, rtol=8e-15)
def test_ridiculously_large_n(self):
# This should return nan with no errors or warnings.
a = 2.5
n = 1e100
p = zipfian.pmf(10, a, n)
assert_equal(p, np.nan)
class TestNCH:
def setup_method(self):
rng = np.random.default_rng(7162434334)
shape = (2, 4, 3)
max_m = 100
m1 = rng.integers(1, max_m, size=shape) # red balls
m2 = rng.integers(1, max_m, size=shape) # white balls
N = m1 + m2 # total balls
n = randint.rvs(0, N, size=N.shape, random_state=rng) # number of draws
xl = np.maximum(0, n-m2) # lower bound of support
xu = np.minimum(n, m1) # upper bound of support
x = randint.rvs(xl, xu, size=xl.shape, random_state=rng)
odds = rng.random(x.shape)*2
self.x, self.N, self.m1, self.n, self.odds = x, N, m1, n, odds
# test output is more readable when function names (strings) are passed
@pytest.mark.parametrize('dist_name',
['nchypergeom_fisher', 'nchypergeom_wallenius'])
def test_nch_hypergeom(self, dist_name):
# Both noncentral hypergeometric distributions reduce to the
# hypergeometric distribution when odds = 1
dists = {'nchypergeom_fisher': nchypergeom_fisher,
'nchypergeom_wallenius': nchypergeom_wallenius}
dist = dists[dist_name]
x, N, m1, n = self.x, self.N, self.m1, self.n
assert_allclose(dist.pmf(x, N, m1, n, odds=1),
hypergeom.pmf(x, N, m1, n))
def test_nchypergeom_fisher_naive(self):
# test against a very simple implementation
x, N, m1, n, odds = self.x, self.N, self.m1, self.n, self.odds
@np.vectorize
def pmf_mean_var(x, N, m1, n, w):
# simple implementation of nchypergeom_fisher pmf
m2 = N - m1
xl = np.maximum(0, n-m2)
xu = np.minimum(n, m1)
def f(x):
t1 = special_binom(m1, x)
t2 = special_binom(m2, n - x)
return t1 * t2 * w**x
def P(k):
return sum(f(y)*y**k for y in range(xl, xu + 1))
P0 = P(0)
P1 = P(1)
P2 = P(2)
pmf = f(x) / P0
mean = P1 / P0
var = P2 / P0 - (P1 / P0)**2
return pmf, mean, var
pmf, mean, var = pmf_mean_var(x, N, m1, n, odds)
assert_allclose(nchypergeom_fisher.pmf(x, N, m1, n, odds), pmf)
assert_allclose(nchypergeom_fisher.stats(N, m1, n, odds, moments='m'),
mean)
assert_allclose(nchypergeom_fisher.stats(N, m1, n, odds, moments='v'),
var)
def test_nchypergeom_wallenius_naive(self):
# test against a very simple implementation
rng = np.random.RandomState(2)
shape = (2, 4, 3)
max_m = 100
m1 = rng.randint(1, max_m, size=shape)
m2 = rng.randint(1, max_m, size=shape)
N = m1 + m2
n = randint.rvs(0, N, size=N.shape, random_state=rng)
xl = np.maximum(0, n-m2)
xu = np.minimum(n, m1)
x = randint.rvs(xl, xu, size=xl.shape, random_state=rng)
w = rng.rand(*x.shape)*2
def support(N, m1, n, w):
m2 = N - m1
xl = np.maximum(0, n-m2)
xu = np.minimum(n, m1)
return xl, xu
@np.vectorize
def mean(N, m1, n, w):
m2 = N - m1
xl, xu = support(N, m1, n, w)
def fun(u):
return u/m1 + (1 - (n-u)/m2)**w - 1
return root_scalar(fun, bracket=(xl, xu)).root
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=RuntimeWarning,
message="invalid value encountered in mean")
assert_allclose(nchypergeom_wallenius.mean(N, m1, n, w),
mean(N, m1, n, w), rtol=2e-2)
@np.vectorize
def variance(N, m1, n, w):
m2 = N - m1
u = mean(N, m1, n, w)
a = u * (m1 - u)
b = (n-u)*(u + m2 - n)
return N*a*b / ((N-1) * (m1*b + m2*a))
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=RuntimeWarning,
message="invalid value encountered in mean")
assert_allclose(
nchypergeom_wallenius.stats(N, m1, n, w, moments='v'),
variance(N, m1, n, w),
rtol=5e-2
)
@np.vectorize
def pmf(x, N, m1, n, w):
m2 = N - m1
xl, xu = support(N, m1, n, w)
def integrand(t):
D = w*(m1 - x) + (m2 - (n-x))
res = (1-t**(w/D))**x * (1-t**(1/D))**(n-x)
return res
def f(x):
t1 = special_binom(m1, x)
t2 = special_binom(m2, n - x)
the_integral = quad(integrand, 0, 1,
epsrel=1e-16, epsabs=1e-16)
return t1 * t2 * the_integral[0]
return f(x)
pmf0 = pmf(x, N, m1, n, w)
pmf1 = nchypergeom_wallenius.pmf(x, N, m1, n, w)
atol, rtol = 1e-6, 1e-6
i = np.abs(pmf1 - pmf0) < atol + rtol*np.abs(pmf0)
assert i.sum() > np.prod(shape) / 2 # works at least half the time
# for those that fail, discredit the naive implementation
for N, m1, n, w in zip(N[~i], m1[~i], n[~i], w[~i]):
# get the support
m2 = N - m1
xl, xu = support(N, m1, n, w)
x = np.arange(xl, xu + 1)
# calculate sum of pmf over the support
# the naive implementation is very wrong in these cases
assert pmf(x, N, m1, n, w).sum() < .5
assert_allclose(nchypergeom_wallenius.pmf(x, N, m1, n, w).sum(), 1)
def test_wallenius_against_mpmath(self):
# precompute data with mpmath since naive implementation above
# is not reliable. See source code in gh-13330.
M = 50
n = 30
N = 20
odds = 2.25
# Expected results, computed with mpmath.
sup = np.arange(21)
pmf = np.array([3.699003068656875e-20,
5.89398584245431e-17,
2.1594437742911123e-14,
3.221458044649955e-12,
2.4658279241205077e-10,
1.0965862603981212e-08,
3.057890479665704e-07,
5.622818831643761e-06,
7.056482841531681e-05,
0.000618899425358671,
0.003854172932571669,
0.01720592676256026,
0.05528844897093792,
0.12772363313574242,
0.21065898367825722,
0.24465958845359234,
0.1955114898110033,
0.10355390084949237,
0.03414490375225675,
0.006231989845775931,
0.0004715577304677075])
mean = 14.808018384813426
var = 2.6085975877923717
# nchypergeom_wallenius.pmf returns 0 for pmf(0) and pmf(1), and pmf(2)
# has only three digits of accuracy (~ 2.1511e-14).
assert_allclose(nchypergeom_wallenius.pmf(sup, M, n, N, odds), pmf,
rtol=1e-13, atol=1e-13)
assert_allclose(nchypergeom_wallenius.mean(M, n, N, odds),
mean, rtol=1e-13)
assert_allclose(nchypergeom_wallenius.var(M, n, N, odds),
var, rtol=1e-11)
@pytest.mark.parametrize('dist_name',
['nchypergeom_fisher', 'nchypergeom_wallenius'])
def test_rvs_shape(self, dist_name):
# Check that when given a size with more dimensions than the
# dimensions of the broadcast parameters, rvs returns an array
# with the correct shape.
dists = {'nchypergeom_fisher': nchypergeom_fisher,
'nchypergeom_wallenius': nchypergeom_wallenius}
dist = dists[dist_name]
x = dist.rvs(50, 30, [[10], [20]], [0.5, 1.0, 2.0], size=(5, 1, 2, 3))
assert x.shape == (5, 1, 2, 3)
@pytest.mark.parametrize("mu, q, expected",
[[10, 120, -1.240089881791596e-38],
[1500, 0, -86.61466680572661]])
def test_nbinom_11465(mu, q, expected):
# test nbinom.logcdf at extreme tails
size = 20
n, p = size, size/(size+mu)
# In R:
# options(digits=16)
# pnbinom(mu=10, size=20, q=120, log.p=TRUE)
assert_allclose(nbinom.logcdf(q, n, p), expected)
def test_gh_17146():
# Check that discrete distributions return PMF of zero at non-integral x.
# See gh-17146.
x = np.linspace(0, 1, 11)
p = 0.8
pmf = bernoulli(p).pmf(x)
i = (x % 1 == 0)
assert_allclose(pmf[-1], p)
assert_allclose(pmf[0], 1-p)
assert_equal(pmf[~i], 0)
class TestBetaNBinom:
@pytest.mark.parametrize('x, n, a, b, ref',
[[5, 5e6, 5, 20, 1.1520944824139114e-107],
[100, 50, 5, 20, 0.002855762954310226],
[10000, 1000, 5, 20, 1.9648515726019154e-05]])
def test_betanbinom_pmf(self, x, n, a, b, ref):
# test that PMF stays accurate in the distribution tails
# reference values computed with mpmath
# from mpmath import mp
# mp.dps = 500
# def betanbinom_pmf(k, n, a, b):
# k = mp.mpf(k)
# a = mp.mpf(a)
# b = mp.mpf(b)
# n = mp.mpf(n)
# return float(mp.binomial(n + k - mp.one, k)
# * mp.beta(a + n, b + k) / mp.beta(a, b))
assert_allclose(betanbinom.pmf(x, n, a, b), ref, rtol=1e-10)
@pytest.mark.parametrize('n, a, b, ref',
[[10000, 5000, 50, 0.12841520515722202],
[10, 9, 9, 7.9224400871459695],
[100, 1000, 10, 1.5849602176622748]])
def test_betanbinom_kurtosis(self, n, a, b, ref):
# reference values were computed via mpmath
# from mpmath import mp
# def kurtosis_betanegbinom(n, a, b):
# n = mp.mpf(n)
# a = mp.mpf(a)
# b = mp.mpf(b)
# four = mp.mpf(4.)
# mean = n * b / (a - mp.one)
# var = (n * b * (n + a - 1.) * (a + b - 1.)
# / ((a - 2.) * (a - 1.)**2.))
# def f(k):
# return (mp.binomial(n + k - mp.one, k)
# * mp.beta(a + n, b + k) / mp.beta(a, b)
# * (k - mean)**four)
# fourth_moment = mp.nsum(f, [0, mp.inf])
# return float(fourth_moment/var**2 - 3.)
assert_allclose(betanbinom.stats(n, a, b, moments="k"),
ref, rtol=3e-15)
class TestZipf:
def test_gh20692(self):
# test that int32 data for k generates same output as double
k = np.arange(0, 1000)
k_int32 = k.astype(np.int32)
dist = zipf(9)
pmf = dist.pmf(k)
pmf_k_int32 = dist.pmf(k_int32)
assert_equal(pmf, pmf_k_int32)
def test_gh20048():
# gh-20048 reported an infinite loop in _drv2_ppfsingle
# check that the one identified is resolved
class test_dist_gen(stats.rv_discrete):
def _cdf(self, k):
return min(k / 100, 0.99)
test_dist = test_dist_gen(b=np.inf)
message = "Arguments that bracket..."
with pytest.raises(RuntimeError, match=message):
test_dist.ppf(0.999)
class TestPoissonBinomial:
def test_pmf(self):
# Test pmf against R `poisbinom` to confirm that this is indeed the Poisson
# binomial distribution. Consistency of other methods and all other behavior
# should be covered by generic tests. (If not, please add a generic test.)
# Like many other distributions, no special attempt is made to be more
# accurate than the usual formulas provide, so we use default tolerances.
#
# library(poisbinom)
# options(digits=16)
# k = c(0, 1, 2, 3, 4)
# p = c(0.9480654803913988, 0.052428488100509374,
# 0.25863527358887417, 0.057764076043633206)
# dpoisbinom(k, p)
rng = np.random.default_rng(259823598254)
n = rng.integers(10) # 4
k = np.arange(n + 1)
p = rng.random(n) # [0.9480654803913988, 0.052428488100509374,
# 0.25863527358887417, 0.057764076043633206]
res = poisson_binom.pmf(k, p)
ref = [0.0343763443678060318, 0.6435428452689714307, 0.2936345519235536994,
0.0277036647503902354, 0.0007425936892786034]
assert_allclose(res, ref)
class TestRandInt:
def test_gh19759(self):
# test zero PMF values within the support reported by gh-19759
a = -354
max_range = abs(a)
all_b_1 = [a + 2 ** 31 + i for i in range(max_range)]
res = randint.pmf(325, a, all_b_1)
assert (res > 0).all()
ref = 1 / (np.asarray(all_b_1, dtype=np.float64) - a)
assert_allclose(res, ref)