182 lines
4.1 KiB
Python
182 lines
4.1 KiB
Python
import random
|
|
import math
|
|
|
|
from ..combinatorics.counting import Combination
|
|
|
|
class Distribution():
|
|
'''
|
|
Consider extending base class to Continuous and Discrete subclasses.
|
|
There are some slight differences that might matter to the API (e.g.
|
|
"pdf" vs "pmf")
|
|
'''
|
|
def __init__(self, *params):
|
|
self.params = params
|
|
|
|
def pdf(self, x):
|
|
'''return pdf(x) = density(x)'''
|
|
pass
|
|
|
|
def cdf(self, x):
|
|
'''return cdf(x) = Pr(x <= X)'''
|
|
pass
|
|
|
|
def quantile(self, x):
|
|
'''return cdf^{-1}(p) = [Pr(x <= X) == p]'''
|
|
pass
|
|
|
|
def sample(self, n=1):
|
|
'''
|
|
n: number of samples
|
|
|
|
Dev note: should consider returning something like a "Sample" object, which wraps
|
|
the samples and provides convenient empirical estimates e.g. MLE of parameters
|
|
'''
|
|
if n == -1:
|
|
while True:
|
|
yield next(self.sample())
|
|
|
|
'''common moments (consider making @property)'''
|
|
def mean(self):
|
|
'''distribution mean'''
|
|
pass
|
|
|
|
def variance(self):
|
|
'''distribution variance'''
|
|
pass
|
|
|
|
class DiscreteDistribution(Distribution):
|
|
'''
|
|
Discrete distribution base class
|
|
'''
|
|
pass
|
|
|
|
class ContinuousDistribution(Distribution):
|
|
'''
|
|
Continuous distribution base class
|
|
'''
|
|
pass
|
|
|
|
class Bernoulli(DiscreteDistribution):
|
|
def __init__(self, p):
|
|
self.p = p
|
|
|
|
def pdf(self, x):
|
|
return self.p**x * (1-self.p)**(1-x)
|
|
|
|
def cdf(self, x):
|
|
return (1-self.p)**(1-int(x))
|
|
|
|
def sample(self, n=1):
|
|
for _ in range(n):
|
|
yield 1 if random.random() < self.p else 0
|
|
|
|
def mean(self):
|
|
return self.p
|
|
|
|
def variance(self):
|
|
return self.p*(1-self.p)
|
|
|
|
class Binomial(DiscreteDistribution):
|
|
'''
|
|
consider indexing probabilities at different
|
|
values for later access. precompute could be cheap
|
|
and save time later, or could expensive and never
|
|
really used. have to compare the options
|
|
'''
|
|
|
|
def __init__(self, n, p):
|
|
self.n = n
|
|
self.p = p
|
|
self._cdf = {}
|
|
|
|
def pdf(self, x):
|
|
return Combination.nCk(self.n, x)*self.p**x*(1-self.p)**(self.n-x)
|
|
|
|
def cdf(self, x, index=False):
|
|
'''iteratively (naively) compute
|
|
P(X <= x)'''
|
|
p = 0
|
|
for i in range(int(x)+1):
|
|
if i == self.n: p = 1
|
|
else: p += self.pdf(i)
|
|
if index:
|
|
self._cdf[i] = p
|
|
return p
|
|
|
|
def sample(self, n=1):
|
|
'''
|
|
naive implementation (for now). meant to be used
|
|
with relatively small n. consider poisson
|
|
sampling for sufficiently large n
|
|
'''
|
|
super().sample(n)
|
|
# index entire cdf
|
|
if self.n not in self._cdf:
|
|
self.cdf(self.n, index=True)
|
|
|
|
for _ in range(n):
|
|
r = random.random()
|
|
for x in self._cdf:
|
|
if self._cdf[x] >= r: break
|
|
yield x
|
|
|
|
def mean(self):
|
|
return self.n*self.p
|
|
|
|
def variance(self):
|
|
return self.n* self.p*(1-self.p)
|
|
|
|
class Exponential(ContinuousDistribution): pass
|
|
|
|
class Normal(ContinuousDistribution): pass
|
|
|
|
class Poisson(ContinuousDistribution):
|
|
def __init__(self, lmda):
|
|
self.lmda = lmda
|
|
|
|
def pdf(self, k):
|
|
return self.lmda**k*math.e**(-self.lmda)/math.factorial(k)
|
|
|
|
def cdf(self, x):
|
|
pass
|
|
|
|
def quantile(self, x):
|
|
pass
|
|
|
|
def sample(self, n):
|
|
for _ in range(n):
|
|
yield None
|
|
|
|
def mean(self):
|
|
return self.lmda
|
|
|
|
def variance(self):
|
|
return self.lmda
|
|
|
|
class Uniform(Distribution):
|
|
def __init__(self, a, b):
|
|
self.a = a
|
|
self.b = b
|
|
self.lower = min(a,b)
|
|
self.width = abs(a-b)
|
|
|
|
def pdf(self, x):
|
|
return 1 / self.width
|
|
|
|
def cdf(self, x):
|
|
return (x-self.lower) / self.width
|
|
|
|
def quantile(self, x):
|
|
pass
|
|
|
|
def sample(self, n=1):
|
|
yield from super().sample(n)
|
|
for _ in range(n):
|
|
yield random.random()*self.width+self.lower
|
|
|
|
def mean(self):
|
|
return (self.a+self.b)/2
|
|
|
|
def variance(self):
|
|
return (self.b-self.a)**2/12
|