initial commit

2024-05-15 20:36:13 -07:00 · 2024-05-15 20:36:13 -07:00 · d48cc8fbc8
commit d48cc8fbc8
24 changed files with 1015 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,18 @@
 # generic py
 __pycache__/
 .pytest_cache/
 *.egg-info/
 .ipynb_checkpoints/
 .pytest_cache/
 .python-version
 # vendor and build files
 dist/
 build/
 docs/_autoref/
 docs/_autosummary/
 docs/_build/
 # local
 notebooks/
 /Makefile
--- a/22
+++ b/22
@ -0,0 +1,22 @@
 MIT License
 Copyright (c) 2024 Sam Griesemer
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
--- a/README.md
+++ b/README.md
@ -0,0 +1,3 @@
 # objectlib
 A Python library for misc object types (primarily probabilistic and algorithmic) and
 serial streaming utilities
--- a/objectlib/init.py
+++ b/objectlib/init.py
@ -0,0 +1,3 @@
 from . import combinatorics
 from . import evolution
 from . import probability
--- a/objectlib/combinatorics/init.py
+++ b/objectlib/combinatorics/init.py
@ -0,0 +1 @@
 from . import counting
--- a/objectlib/combinatorics/counting.py
+++ b/objectlib/combinatorics/counting.py
@ -0,0 +1,143 @@
 from math import factorial
 import itertools
 import random
 class Product:
    '''Cartesian product of iterables'''
    def __init__(self, *data, repeat=1):
        '''
        TODO: address repeat variable in more efficient manner (i.e. dont repeat explicitly in memory)
        params: any number of iterables
        -> data: list of all passed iterables
        '''
        self.data = data*repeat
    def count(self):
        '''Compute number of elements in product'''
        count = 1
        for datum in self.data: count *= len(datum)
        return count
    def generate(self):
        '''Generate all Cartesian product set'''
        return itertools.product(*self.data)
    def sample(self, m=1):
        '''Randomly generate m samples from the product'''
        for _ in range(m):
            yield tuple(random.choice(exp) for exp in self.data)
    def sample_without_replacement(self, m=1):
        '''
        Randomly generate m unique samples from the product. If m
        greater than the number of possibles samples, return as
        many as possible.
        TODO: can implement approaches described in blog analysis.
        Iterative resampling is cheap early, but expensive later.
        Can attempt to dynamically implement both strategies for
        optimal performance.
        Decide on usage for never ending generator; for now loop
        change allows negative values to work.
        '''
        if m > self.count(): return None
        generated = set()
        while len(generated) != m:
            gtuple = next(self.sample(1))
            if gtuple not in generated:
                generated.add(gtuple)
                yield gtuple
 class Permutation:
    '''Permutations of iterables'''
    def __init__(self, data):
        self.data = data
        self.n = len(data)
    @staticmethod
    def nPk(n, k):
        '''Compute nPk'''
        return int(factorial(n) / factorial(n-k))
    def count(self, k=None):
        '''Compute nPk expxlicitly on object data'''
        if k is None: k = self.n
        if k > self.n: return None
        return Permutation.nPk(self.n, k)
    def generate(self, k=None):
        '''Return generator over all permutations of object data'''
        if k is None: k = self.n
        return itertools.permutations(self.data, k)
    def generate_with_repetition(self, k=None):
        pass
    def sample(self, k=None, m=1):
        '''Return generator over m random samples from k-permutations of object data'''
        if k is None: k = self.n
        if k > self.n: return None
        for _ in range(m):
            yield tuple(random.sample(self.data, k))
    def sample_without_replacement(self, m=1):
        '''Randomly generate m unique permutations of object data'''
        if m > self.count(): return None
        generated = []
        while len(generated) < m:
            gtuple = next(self.sample(1))
            if gtuple not in generated:
                generated.append(gtuple)
                yield gtuple
    def duplicates(self, k=None):
        if k is None: k = self.n
        if k > self.n: return None
        return set(self.generate(k))
 class Combination:
    '''Combinations of iterables'''
    def __init__(self, data):
        self.data = data
        self.n = len(data)
    @staticmethod
    def nCk(n, k):
        '''Compute nCk'''
        return int(factorial(n) / (factorial(k)*factorial(n-k)))
    def count(self, k=None):
        '''Compute nCk implicitly on object data'''
        if k is None: k = self.n
        if k > self.n: return None
        return Combination.nCk(self.n, k)
    def generate(self, k=None):
        '''Return generator over all combinations of object data'''
        if k is None: k = self.n
        return itertools.combinations(self.data, k)
    def generate_with_repetition(self, k=None):
        pass
    def sample(self, k=None, m=1):
        '''Return generator over m random samples from k-combinations of object data'''
        if k is None: k = self.n
        if k > self.n: return None
        for _ in range(m):
            indices = sorted(random.sample(range(self.n), k))
            yield tuple(self.data[i] for i in indices)
    def sample_without_replacement(self, m=1):
        '''Randomly generate m unique permutations of object data'''
        if m > self.count(): return None
        generated = []
        while len(generated) < m:
            gtuple = next(self.sample(1))
            if gtuple not in generated:
                generated.append(gtuple)
                yield gtuple
    def duplicates(self, k):
        if k is None: k = self.n
        if k > self.n: return None
        return set(self.generate(k))
--- a/objectlib/evolution/init.py
+++ b/objectlib/evolution/init.py
@ -0,0 +1,6 @@
 from . import candidate
 from . import crossover
 from . import evolutionary
 from . import genetic
 from . import mutation
 from . import selection
--- a/objectlib/evolution/candidate.py
+++ b/objectlib/evolution/candidate.py
@ -0,0 +1,146 @@
 import string
 from ..sim.agent import Agent
 from ..combinatorics import counting
 from ..ml import nn
 class Candidate(Agent):
    '''
    Base candidate class for evolutionary algorithms
    NOTE: can always consider switching the constructor to
    by default take random attributes and yield a stochastic
    candidate. This might make sense if we are to strictly
    follow what is most commonly used. However, it's not
    obvious how to go from specified genotype into a constructor
    expecting parameters for random generation; this would certainly
    be more sloppy than the current simple entry point constructor.
    Want to separate candidate from agent. Candidates dont need to be
    defined in the context of a gym evnironment. They just hold a genotype
    and inherit the basic functions seen in base
    UPDATE: candidates ARE agents. They NEED to be defined in the context
    of a gym environment, as there must be a way of evaluating the candidates
    in an objective manner. This environment can be completely static, but
    the point is that it provides context for evaluating fitness. Candiates
    are to inherit the same methods as any agent, but have the additional
    `genotype` attribute which holds their internal representation in the
    context of a genetic algorithm process. 
    '''
    def __init__(self, genotype):
        super().__init__()
        self.genotype = genotype
    def __str__(self):
        return self.epigenesis().__str__
    @classmethod
    def random(cls, genotype):
        '''
        Alternate constructor for random candidate construction, to be
        implemented by subclassing type if stochastic construction. 
        NOTE: This is currently the official way of creating random objects:
        define a standard constructor that sets internal variables based on
        given arguments. Then define class methods which take their own set
        of parameters and construct the main object by creating values and
        sending them to the constructor. This is, so far, the cleanest and
        most extendible approach to constructor overloading I've seen, and
        has since worked very well.
        '''
        pass
    def epigenesis(self):
        '''Process of turning genotype into phenotype'''
        return self.genotype
 class AlphaString(Candidate):
    '''Candidate child for genetic string'''
    @classmethod
    def random(cls, length, alphabet=string.printable):
        '''
        Create random AlphaString
        alphastr = AlphaString.random(length)
        alphastr = AlphaString.random(length, 'abc')
        :genotype: list (mutable)
        :phenotype: conversion to string
        '''
        gene = counting.Product(*[alphabet]*length)
        gene = list(next(gene.sample()))
        return cls(gene)
    def epigenesis(self):
        return ''.join(self.genotype)
 class BitString(AlphaString):
    '''Candidate child for genetic string'''
    @classmethod
    def random(cls, length):
        return super().random(length, '01')
 class NeuralNetwork(Candidate):
    '''
    NeuralNetwork candidate object for use in
    neuroevolution implementations. This candidate
    has a phenotype represented by its observable
    actions resulting from inference, and a genotype
    represented by its underlying internal network
    structure and weights. All evolution operations (as
    usual) are performed on the genotype level.
    :phenotype: output from inference and resulting behavior
    :genotype: internal network structure and weight values
    '''
    def __init__(self, genotype):
        '''
        Genotype expected to be of the form of `.weights`
        attribute from the NeuralNetwork class (i.e. a list
        of NumPy arrays)
        DONT need if just going to set genotype
        '''
        super().__init__(genotype)
        self.time_alive = 0
    @classmethod
    def random(cls, layers, rng=1):
        '''
        Take layers structure as input, instantiate neural
        network with given layers, set random weights according
        to [-rng, +rng]
        :layers: list of network layer size
        :rng: weights generated from [-rng, +rng]
        '''
        net = nn.NeuralNetwork(layers, epsilon=rng)
        return cls(net.weights)
    def epigenesis(self):
        '''
        Convert from network structure to observable actions
        via inference on live neural network architecture using
        genotype weights. This process requires a data point on
        which to evaluate the network
        TODO: consider how this is being done; should a nn object
        be kept in memory at all times and modifications be made
        directly to its weights so come inference time everything is
        ready to go? This seems a little bulky but may end up being
        more efficient. Initializing a network each time from weights
        though has a tiny overhead; it just sets the nn object's weights
        and no additional computation is needed.
        Also how are we going to pass the incoming data to the network
        for the actual inference procedure? Should the data be set to
        the network itself, passed to the function, or set under the
        candidate object?
        '''
        net = nn.NeuralNetwork.from_weights(self.genotype)
        return net
    def update(self):
        self.time_alive += 1
    def act(self):
        net = self.epigenesis()
        return net.predict(self.state)[0]
--- a/objectlib/evolution/crossover.py
+++ b/objectlib/evolution/crossover.py
@ -0,0 +1,20 @@
 import random
 def single_point(parent1, parent2):
    '''
    General single point crossover method for any two
    iterables of the same length
    '''
    child = parent1.genotype.copy()
    begin = random.randint(0, len(child) - 1)
    end = random.randint(0, len(child) - 1)
    start, stop = min(begin, end), max(begin, end)
    child[start:stop] = parent2.genotype[start:stop]
    return child
 def multipoint(parent1, parent2):
    '''Generalizes single point crossover, could make redundant'''
    pass
 def weight_slice(net1, net2):
    return net1
--- a/objectlib/evolution/evolutionary.py
+++ b/objectlib/evolution/evolutionary.py
@ -0,0 +1,59 @@
 import random
 class Evolutionary:
    '''Base evolutionary algorithm class'''
    def __init__(self, population_size, num_generations, mutation_params, candidate, cand_params, num_offspring=1, gym=None):
        self.population = []
        self.population_size = population_size
        self.num_generations = num_generations
        self.mutation_params = mutation_params
        self.candidate = candidate
        self.cand_params = cand_params
        self.num_offspring = num_offspring
        self.gym = gym
        self.action = []
    def fitness(self, candidate):
        '''Fitness function for evaluating candidate quality'''
        raise NotImplementedError
    def selection(self, population):
        '''Method of parent selection for crossover'''
        raise NotImplementedError
    def crossover(self, parent1, parent2):
        '''Method of reproduction between candidates'''
        raise NotImplementedError
    def mutation(self, candidate):
        '''Method of random mutation in candidate'''
        raise NotImplementedError
    def termination(self, population):
        '''
        Termination condition for simulation
        By default, return False so that simulation
        runs for all generations
        '''
        return False
    def create_population(self):
        for _ in range(self.population_size):
            # create random candidate from given params
            cand = self.candidate.random(**self.cand_params)
            # add candidate to population
            self.population.append(cand)
            # register agent in gym if applicable
            if self.gym: self.gym.register_agent(cand)
    def run(self):
        '''
        Run evolutionary simulation, after class setup has been completed.
        Implementation will vary based on subclassing type. General approach
        will iterate until termination condition met, evaluating, selection,
        breeding, and mutating a population of candidates. Generator yielding
        generation specific details is encouraged functional form.
        '''
        raise NotImplementedError
--- a/objectlib/evolution/genetic.py
+++ b/objectlib/evolution/genetic.py
@ -0,0 +1,60 @@
 from . import evolutionary
 class GeneticAlgorithm(evolutionary.Evolutionary):
    '''
    Standard genetic algorithm (in a way, the genetic algo is itself
    an agent, taking states, maintaining internal representation,
    reacting and responding to the environment
    '''
    def run(self):
        # initialize population of candidates
        self.create_population()
        self.gym.start()
        # begin generation loop
        for gen in range(self.num_generations):
            # execute actions and get new gym state
            self.gym.tick()
            # rank individuals based on current fitness
            self.population.sort(key=lambda x: self.fitness(x), reverse=True)
            # balance population size
            self.population = self.population[:self.population_size]
            # maintain gym agent registry
            self.gym.update_agents(self.population)
            self.gym.refresh_state()
            # yield generation specific details
            top_candidate = self.population[0]
            bot_candidate = self.population[-1]
            yield {'generation'    : gen,
                   'best_candidate': str(top_candidate.epigenesis()),
                   'best_fitness'  : self.fitness(top_candidate),
                   'worst_fitness' : self.fitness(bot_candidate),}
                   #'state'         : self.gym.state}
            # for cand in self.population:
            #     print(str(cand.epigenesis()))
            # check termination condition
            if self.termination(self.population):
                return self.population[0]
            # consider multiple offspring per generation
            for _ in range(self.num_offspring):
                # stochastically select parent candidates
                parent1 = self.selection(self.population)
                parent2 = self.selection(self.population)
                # create child candidate via crossover
                child_genotype = self.crossover(parent1, parent2)
                child = self.candidate(child_genotype)
                # perform (possible) mutations on child
                self.mutation(child, **self.mutation_params)
                # add child to population, gym for next round eval
                self.population.append(child)
                self.gym.register_agent(child)
--- a/objectlib/evolution/mutation.py
+++ b/objectlib/evolution/mutation.py
@ -0,0 +1,41 @@
 import random
 def mutation_decorator(mutate):
    def wrapper(candidate, rate, **kwargs):
        if random.random() < rate:
            mutate(candidate, **kwargs)
    return wrapper
 def class_mutation_decorator(mutate):
    def wrapper(self, candidate, rate, **kwargs):
        if random.random() < rate:
            mutate(self, candidate, **kwargs)
    return wrapper
@mutation_decorator
 def bitflip(candidate):
    '''in-place flip bit in bit-array'''
    gene = candidate.genotype
    rand = random.randint(0, len(gene)-1)
    gene[rand] = str(int(gene[rand])^1)
@mutation_decorator
 def alterchar(candidate):
    '''shift character up or down'''
    gene = candidate.genotype
    rand = random.randint(0, len(gene)-1)
    gene[rand] = chr(ord(gene[rand]) + random.choice([-1, 1]))
@mutation_decorator
 def alter_weight(candidate, rng):
    '''
    Modify real numbers uniformly at random from a 
    NeuralNetwork weight vector
    '''
    weights = candidate.genotype
    layer = random.randint(0,len(weights)-1)
    shape = weights[layer].shape
    i, j = random.randint(0,shape[0]-1), random.randint(0,shape[1]-1)
    weights[layer][i,j] += random.uniform(-rng, rng)
--- a/objectlib/evolution/selection.py
+++ b/objectlib/evolution/selection.py
@ -0,0 +1,6 @@
 import random
 def roulette(population):
    rand = random.random()*random.random()
    rand = int(rand*len(population))
    return population[rand]
--- a/objectlib/probability/init.py
+++ b/objectlib/probability/init.py
@ -0,0 +1,2 @@
 from . import distributions
 from . import sampling
--- a/objectlib/probability/distributions.py
+++ b/objectlib/probability/distributions.py
@ -0,0 +1,181 @@
 import random
 import math
 from ..combinatorics.counting import Combination
 class Distribution():
    '''
    Consider extending base class to Continuous and Discrete subclasses.
    There are some slight differences that might matter to the API (e.g.
    "pdf" vs "pmf")
    '''
    def __init__(self, *params):
        self.params = params
    def pdf(self, x):
        '''return pdf(x) = density(x)'''
        pass
    def cdf(self, x):
        '''return cdf(x) = Pr(x <= X)'''
        pass
    def quantile(self, x):
        '''return cdf^{-1}(p) = [Pr(x <= X) == p]'''
        pass
    def sample(self, n=1):
        '''
        n: number of samples
        Dev note: should consider returning something like a "Sample" object, which wraps
        the samples and provides convenient empirical estimates e.g. MLE of parameters
        '''
        if n == -1:
            while True:
                yield next(self.sample())
    '''common moments (consider making @property)'''
    def mean(self):
        '''distribution mean'''
        pass
    def variance(self):
        '''distribution variance'''
        pass
 class DiscreteDistribution(Distribution):
    '''
    Discrete distribution base class
    '''
    pass
 class ContinuousDistribution(Distribution):
    '''
    Continuous distribution base class
    '''
    pass
 class Bernoulli(DiscreteDistribution):
    def __init__(self, p):
        self.p = p
    def pdf(self, x):
        return self.p**x * (1-self.p)**(1-x)
    def cdf(self, x):
        return (1-self.p)**(1-int(x))
    def sample(self, n=1):
        for _ in range(n):
            yield 1 if random.random() < self.p else 0
    def mean(self):
        return self.p
    def variance(self):
        return self.p*(1-self.p) 
 class Binomial(DiscreteDistribution):
    '''
    consider indexing probabilities at different
    values for later access. precompute could be cheap
    and save time later, or could expensive and never
    really used. have to compare the options
    '''
    def __init__(self, n, p):
        self.n = n
        self.p = p
        self._cdf = {}
    def pdf(self, x):
        return Combination.nCk(self.n, x)*self.p**x*(1-self.p)**(self.n-x)
    def cdf(self, x, index=False):
        '''iteratively (naively) compute
        P(X <= x)'''
        p = 0
        for i in range(int(x)+1):
            if i == self.n: p = 1
            else: p += self.pdf(i)
            if index:
                self._cdf[i] = p
        return p
    def sample(self, n=1):
        '''
        naive implementation (for now). meant to be used
        with relatively small n. consider poisson
        sampling for sufficiently large n
        '''
        super().sample(n)
        # index entire cdf
        if self.n not in self._cdf:
            self.cdf(self.n, index=True)
        for _ in range(n):
            r = random.random()
            for x in self._cdf:
                if self._cdf[x] >= r: break 
            yield x
    def mean(self):
        return self.n*self.p
    def variance(self):
        return self.n* self.p*(1-self.p) 
 class Exponential(ContinuousDistribution): pass
 class Normal(ContinuousDistribution): pass
 class Poisson(ContinuousDistribution):
    def __init__(self, lmda):
        self.lmda = lmda  
    def pdf(self, k):
        return self.lmda**k*math.e**(-self.lmda)/math.factorial(k)
    def cdf(self, x):
        pass
    def quantile(self, x):
        pass
    def sample(self, n):
        for _ in range(n):
            yield None
    def mean(self):
        return self.lmda
    def variance(self):
        return self.lmda
 class Uniform(Distribution):
    def __init__(self, a, b):
        self.a = a
        self.b = b
        self.lower = min(a,b)
        self.width = abs(a-b)
    def pdf(self, x):
        return 1 / self.width
    def cdf(self, x):
        return (x-self.lower) / self.width
    def quantile(self, x):
        pass
    def sample(self, n=1):
        yield from super().sample(n)
        for _ in range(n):
            yield random.random()*self.width+self.lower
    def mean(self):
        return (self.a+self.b)/2
    def variance(self):
        return (self.b-self.a)**2/12
--- a/objectlib/probability/sampling.py
+++ b/objectlib/probability/sampling.py
@ -0,0 +1,23 @@
 import math
 import random
 def sample(population, k=1):
    for i in range(k):
        r = math.floor(random.random()*len(population))
        yield population[r]
        population.pop(r)
 def inverse_transform(inv_cdf):
    r = random.random()
    return inv_cdf(r)
 #def discrete_inverse_transform(cdf):
 #  '''general naive implementation'''
 #  def inv(p):
 #    x = 0
 #    for i in range(x):
 #      p +=  
 def rejection_sampling(): pass
 def importance_sampling(): pass
--- a/objectlib/utils/init.py
+++ b/objectlib/utils/init.py
--- a/objectlib/utils/dataloader.py
+++ b/objectlib/utils/dataloader.py
@ -0,0 +1,11 @@
 class DataLoader:
  def __init__(self, path, batch_size):
    self.path = path
    self.batch_size
    self.load()
  def load(self):
    pass
    # return iterator over dataset files
--- a/objectlib/utils/generator.py
+++ b/objectlib/utils/generator.py
@ -0,0 +1,60 @@
 def exhaust(gen, func=None, interval=1, verbose=True, last=True):
    '''
    Exhaust given generator, applying given function (func)
    to items along  the way (if verbose=True) at specified 
    interval. Return final element of the generator.
    '''
    for i, item in enumerate(gen):
        if i % interval == 0:
            if func: func(item)
            if verbose: print(item)
    # perform same actions for last item
    if last and i % interval != 0:
        if func: func(item)
        if verbose: print(item)
    return item
 async def async_exhaust(gen, func=None, interval=1, verbose=True, last=True):
    '''
    Exhaust given generator, applying given function (func)
    to items along  the way (if verbose=True) at specified 
    interval. Return final element of the generator.
    '''
    for i, item in enumerate(gen):
        if i % interval == 0:
            if func: await func(item)
            if verbose: print(item)
    # perform same actions for last item
    if last and i % interval != 0:
        if func: await func(item)
        if verbose: print(item)
    return item
 def chunk(gen, n, last=True):
    '''
    map generator <gen> to "chunked" generator,
    yielding lists of <n> elements of the original
    generator. Last chunk not guaranteed to be size
    <n>, can specific last=False if partial chunks 
    shouldn't be returned.
    TODO: consider adding a time parameter as well, 
    such that if <n> items have not arrived from the
    original generator in <t> seconds, return the 
    current chunk. Could protect against long running,
    async generator processes (and may be useful for
    time dependent physics sims)
    '''
    chunk = []
    for item in gen:
        chunk.append(item)
        if len(chunk) == n:
            yield chunk
            chunk = []
    if last and chunk:
        yield chunk
--- a/objectlib/utils/naming.py
+++ b/objectlib/utils/naming.py
@ -0,0 +1,2 @@
 def camel_to_snake(s):
    return ''.join(['_'+c.lower() if c.isupper() else c for c in s]).lstrip('_')
--- a/objectlib/utils/options.py
+++ b/objectlib/utils/options.py
@ -0,0 +1,75 @@
 class Opt(dict):
    '''
    TODO: may want to be able to set values using standard dict API, so would have
    to redirect options set to the value dict
    TODO: could maybe throw specialized errors for patterns like require, but a key error
    will be thrown either way, which might be good enough
    Default pattern value is 'optional', as it seems to make the least assumptions
    about the nature of the parameter. When the base has some set values, but no pattern
    is given, we simply recover the default `update` behaviour of standard dicts. Here we
    iterate over the target keys, and if the key has no pattern, we set it directly to the
    base. This takes care of both keys that are in the base but we've left them to be taken
    care of optionally by default, and any other keys unknown to both the base and to
    patterns. The subset of the target keys without a pattern is the only group of unprocessed
    keys at that point in time.
    Note that dicts can use their `update` method on an Opt object, and of course vice versa.
    There is not point in using an Opt object if no pattern is specified, as all patterns
    not specified are assumed to be optional, which is exactly what regular dicts do.
    Why is "ignore" needed? If you don't want your defaults changed, why not just leave them
    out? Well this is a valid point, but in the case you are using the Opt object to set your
    class attributes, there are some you want to ensure _dont_ get set (which will overwrite
    your defaults you may have set outside the Opt object).
    Permission options include:
    - require: require provided dict specifies key, no base value needed
    - optional: key is optional in provided dict, will be used instad of any base values
    - merge: merge provided values with base values in expectable way
    - ignore: ignore provided values under this key in preference for base (if defined, doesn't need to be)
    '''
    def __init__(self, d):
        super().__init__(**d)
    def set_pattern(self, pattern):
        self.pattern = pattern
    def update(self, target):
        '''
        Main purpose of this class. Update base values with target values according to the
        permissions set.
        '''
        # execute update pattern
        for key, pattern in self.pattern.items():
            if pattern == 'require':
                self[key] = target[key]
            elif pattern == 'optional':
                self[key] = target.get(key, self.get(key, None))
            elif pattern == 'merge':
                self[key] = self.merge(self.get(key), target.get(key))
        # add key-value pairs that don't have a pattern, but may or may not already have
        # an entry in base. All keys with a pattern have already been processed (if they've
        # been ignored, and there wasn't an entry in base, then that key doesn't have a
        # representative in the base, but this is intentional
        for key, value in target.items():
            if self.pattern.get(key) is None:
                self[key] = value
    def merge(self, val1, val2):
        if val1 is None and val2 is None:
            raise Exception('no values provided to merge')
        if val1 is None: return val2
        if val2 is None: return val1
        if type(val1) != type(val2):
            raise Exception('mismatching types on merge')
        if type(val1) == dict:
            return {**val1, **val2}
        else:
            return val1 + val2
--- a/objectlib/utils/selection.py
+++ b/objectlib/utils/selection.py
@ -0,0 +1,84 @@
 class Collection():
    """Docstring for Collection."""
    def __init__(self, objs=[], data=[]):
        self.objs = objs
        self._data = data
        self.state = []
        self.state_map = {}
        self.data_map = {}
        self.key = lambda d,i: i
    def data(self, data, key=None):
        self._data = data
        if key is not None:
            self.key = key
        for i, d in enumerate(self._data):
            idx = self.key(d, i)
            self.data_map[idx] = d
        for i, o in enumerate(self.state):
            idx = self.key(o['dat'], i)
            self.state_map[idx] = o['obj']
    def enter(self):
        '''
        check keys across data to state dicts; those keys in the data dict not in the
        state dict are in the enter selection
        '''
        enter = []
        for k, v in self.data_map.items():
            if k not in self.state_map:
                enter.append(v)
        return Collection(data=enter)
    def merge(self):
        return self
    def update(self):
        '''
        check keys across data to state dicts; those keys in both the state dict and the
        data dict are in the update selection
        '''
        return self
    def exit(self):
        '''
        check keys across data to state dicts; those keys in the state dict not in the
        data dict are in the exit selection
        '''
        exit = []
        for k, v in self.state_map.items():
            if k not in self.data_map:
                exit.append(v)
        return Collection(objs=exit)
    def append(self, func):
        '''
        Takes a function and applies it to each
        '''
        for i, d in enumerate(self._data):
            obj = func(d,i)
            self.objs.append(obj)
            self.state.append({'dat': d, 'obj': obj})
 #.select('group')
 #.data([])
 #.enter().append()
 #.update()
 #.exit().remove()
 #groups = {
 #    'group': Collection([1,2,3,4,5])
 #}
 #
 #col = groups['group']
 #col.data([1,2,3])
 #col.enter()
 #col.exit()
--- a/objectlib/utils/timing.py
+++ b/objectlib/utils/timing.py
--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,49 @@
 [build-system]
 requires = ["setuptools", "wheel", "setuptools-git-versioning>=2.0,<3"]
 build-backend = "setuptools.build_meta"
 [tool.setuptools-git-versioning]
 enabled = true
 [project]
 name = "objectlib"
 description = "Object serialization and streaming utilities"
 readme = "README.md"
 requires-python = ">=3.12"
 dynamic = ["version"]
 #license = {file = "LICENSE"}
 authors = [
  { name="Sam Griesemer", email="samgriesemer+git@gmail.com" },
 ]
 keywords = [""]
 classifiers = [
    "Programming Language :: Python :: 3.12",
    "License :: OSI Approved :: MIT License",
    "Operating System :: OS Independent",
    "Development Status :: 2 - Pre-Alpha",
    "Intended Audience :: Developers",
 ]
 dependencies = [
    "numpy",
    "colorama",
 ]
 [project.optional-dependencies]
 tests = ["pytest"]
 docs = [
    "sphinx",
    "sphinx-togglebutton",
    "sphinx-autodoc-typehints",
    "furo",
    "myst-parser",
 ]
 [project.urls]
 Homepage = "https://doc.olog.io/objectlib"
 Documentation = "https://doc.olog.io/objectlib"
 Repository = "https://git.olog.io/olog/objectlib"
 Issues = "https://git.olog.io/olog/objectlib/issues"
 [tool.setuptools.packages.find]
 include = ["objectlib*"] # pattern to match package names
		`@ -0,0 +1,2 @@`
							`from . import distributions`
							`from . import sampling`
		`@ -0,0 +1,2 @@`
							`def camel_to_snake(s):`
							`return ''.join(['_'+c.lower() if c.isupper() else c for c in s]).lstrip('_')`