objectlib/objectlib/probability/distributions.py
2024-05-15 20:36:13 -07:00

182 lines
4.1 KiB
Python

import random
import math
from ..combinatorics.counting import Combination
class Distribution():
'''
Consider extending base class to Continuous and Discrete subclasses.
There are some slight differences that might matter to the API (e.g.
"pdf" vs "pmf")
'''
def __init__(self, *params):
self.params = params
def pdf(self, x):
'''return pdf(x) = density(x)'''
pass
def cdf(self, x):
'''return cdf(x) = Pr(x <= X)'''
pass
def quantile(self, x):
'''return cdf^{-1}(p) = [Pr(x <= X) == p]'''
pass
def sample(self, n=1):
'''
n: number of samples
Dev note: should consider returning something like a "Sample" object, which wraps
the samples and provides convenient empirical estimates e.g. MLE of parameters
'''
if n == -1:
while True:
yield next(self.sample())
'''common moments (consider making @property)'''
def mean(self):
'''distribution mean'''
pass
def variance(self):
'''distribution variance'''
pass
class DiscreteDistribution(Distribution):
'''
Discrete distribution base class
'''
pass
class ContinuousDistribution(Distribution):
'''
Continuous distribution base class
'''
pass
class Bernoulli(DiscreteDistribution):
def __init__(self, p):
self.p = p
def pdf(self, x):
return self.p**x * (1-self.p)**(1-x)
def cdf(self, x):
return (1-self.p)**(1-int(x))
def sample(self, n=1):
for _ in range(n):
yield 1 if random.random() < self.p else 0
def mean(self):
return self.p
def variance(self):
return self.p*(1-self.p)
class Binomial(DiscreteDistribution):
'''
consider indexing probabilities at different
values for later access. precompute could be cheap
and save time later, or could expensive and never
really used. have to compare the options
'''
def __init__(self, n, p):
self.n = n
self.p = p
self._cdf = {}
def pdf(self, x):
return Combination.nCk(self.n, x)*self.p**x*(1-self.p)**(self.n-x)
def cdf(self, x, index=False):
'''iteratively (naively) compute
P(X <= x)'''
p = 0
for i in range(int(x)+1):
if i == self.n: p = 1
else: p += self.pdf(i)
if index:
self._cdf[i] = p
return p
def sample(self, n=1):
'''
naive implementation (for now). meant to be used
with relatively small n. consider poisson
sampling for sufficiently large n
'''
super().sample(n)
# index entire cdf
if self.n not in self._cdf:
self.cdf(self.n, index=True)
for _ in range(n):
r = random.random()
for x in self._cdf:
if self._cdf[x] >= r: break
yield x
def mean(self):
return self.n*self.p
def variance(self):
return self.n* self.p*(1-self.p)
class Exponential(ContinuousDistribution): pass
class Normal(ContinuousDistribution): pass
class Poisson(ContinuousDistribution):
def __init__(self, lmda):
self.lmda = lmda
def pdf(self, k):
return self.lmda**k*math.e**(-self.lmda)/math.factorial(k)
def cdf(self, x):
pass
def quantile(self, x):
pass
def sample(self, n):
for _ in range(n):
yield None
def mean(self):
return self.lmda
def variance(self):
return self.lmda
class Uniform(Distribution):
def __init__(self, a, b):
self.a = a
self.b = b
self.lower = min(a,b)
self.width = abs(a-b)
def pdf(self, x):
return 1 / self.width
def cdf(self, x):
return (x-self.lower) / self.width
def quantile(self, x):
pass
def sample(self, n=1):
yield from super().sample(n)
for _ in range(n):
yield random.random()*self.width+self.lower
def mean(self):
return (self.a+self.b)/2
def variance(self):
return (self.b-self.a)**2/12