From d48cc8fbc8b7a9b462fecad195202fed072cbd78 Mon Sep 17 00:00:00 2001 From: "Sam G." Date: Wed, 15 May 2024 20:36:13 -0700 Subject: [PATCH] initial commit --- .gitignore | 18 +++ LICENSE | 22 +++ README.md | 3 + objectlib/__init__.py | 3 + objectlib/combinatorics/__init__.py | 1 + objectlib/combinatorics/counting.py | 143 +++++++++++++++++++ objectlib/evolution/__init__.py | 6 + objectlib/evolution/candidate.py | 146 ++++++++++++++++++++ objectlib/evolution/crossover.py | 20 +++ objectlib/evolution/evolutionary.py | 59 ++++++++ objectlib/evolution/genetic.py | 60 ++++++++ objectlib/evolution/mutation.py | 41 ++++++ objectlib/evolution/selection.py | 6 + objectlib/probability/__init__.py | 2 + objectlib/probability/distributions.py | 181 +++++++++++++++++++++++++ objectlib/probability/sampling.py | 23 ++++ objectlib/utils/__init__.py | 0 objectlib/utils/dataloader.py | 11 ++ objectlib/utils/generator.py | 60 ++++++++ objectlib/utils/naming.py | 2 + objectlib/utils/options.py | 75 ++++++++++ objectlib/utils/selection.py | 84 ++++++++++++ objectlib/utils/timing.py | 0 pyproject.toml | 49 +++++++ 24 files changed, 1015 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 README.md create mode 100644 objectlib/__init__.py create mode 100644 objectlib/combinatorics/__init__.py create mode 100644 objectlib/combinatorics/counting.py create mode 100644 objectlib/evolution/__init__.py create mode 100644 objectlib/evolution/candidate.py create mode 100644 objectlib/evolution/crossover.py create mode 100644 objectlib/evolution/evolutionary.py create mode 100644 objectlib/evolution/genetic.py create mode 100644 objectlib/evolution/mutation.py create mode 100644 objectlib/evolution/selection.py create mode 100644 objectlib/probability/__init__.py create mode 100644 objectlib/probability/distributions.py create mode 100644 objectlib/probability/sampling.py create mode 100644 objectlib/utils/__init__.py create mode 100644 objectlib/utils/dataloader.py create mode 100644 objectlib/utils/generator.py create mode 100644 objectlib/utils/naming.py create mode 100644 objectlib/utils/options.py create mode 100644 objectlib/utils/selection.py create mode 100644 objectlib/utils/timing.py create mode 100644 pyproject.toml diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2511f64 --- /dev/null +++ b/.gitignore @@ -0,0 +1,18 @@ +# generic py +__pycache__/ +.pytest_cache/ +*.egg-info/ +.ipynb_checkpoints/ +.pytest_cache/ +.python-version + +# vendor and build files +dist/ +build/ +docs/_autoref/ +docs/_autosummary/ +docs/_build/ + +# local +notebooks/ +/Makefile diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..93f98c5 --- /dev/null +++ b/LICENSE @@ -0,0 +1,22 @@ +MIT License + +Copyright (c) 2024 Sam Griesemer + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/README.md b/README.md new file mode 100644 index 0000000..b03cc46 --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# objectlib +A Python library for misc object types (primarily probabilistic and algorithmic) and +serial streaming utilities diff --git a/objectlib/__init__.py b/objectlib/__init__.py new file mode 100644 index 0000000..0cb54b0 --- /dev/null +++ b/objectlib/__init__.py @@ -0,0 +1,3 @@ +from . import combinatorics +from . import evolution +from . import probability diff --git a/objectlib/combinatorics/__init__.py b/objectlib/combinatorics/__init__.py new file mode 100644 index 0000000..0788707 --- /dev/null +++ b/objectlib/combinatorics/__init__.py @@ -0,0 +1 @@ +from . import counting \ No newline at end of file diff --git a/objectlib/combinatorics/counting.py b/objectlib/combinatorics/counting.py new file mode 100644 index 0000000..e93fdf5 --- /dev/null +++ b/objectlib/combinatorics/counting.py @@ -0,0 +1,143 @@ +from math import factorial +import itertools +import random + +class Product: + '''Cartesian product of iterables''' + def __init__(self, *data, repeat=1): + ''' + TODO: address repeat variable in more efficient manner (i.e. dont repeat explicitly in memory) + params: any number of iterables + -> data: list of all passed iterables + ''' + self.data = data*repeat + + def count(self): + '''Compute number of elements in product''' + count = 1 + for datum in self.data: count *= len(datum) + return count + + def generate(self): + '''Generate all Cartesian product set''' + return itertools.product(*self.data) + + def sample(self, m=1): + '''Randomly generate m samples from the product''' + for _ in range(m): + yield tuple(random.choice(exp) for exp in self.data) + + def sample_without_replacement(self, m=1): + ''' + Randomly generate m unique samples from the product. If m + greater than the number of possibles samples, return as + many as possible. + TODO: can implement approaches described in blog analysis. + Iterative resampling is cheap early, but expensive later. + Can attempt to dynamically implement both strategies for + optimal performance. + Decide on usage for never ending generator; for now loop + change allows negative values to work. + ''' + if m > self.count(): return None + generated = set() + while len(generated) != m: + gtuple = next(self.sample(1)) + if gtuple not in generated: + generated.add(gtuple) + yield gtuple + +class Permutation: + '''Permutations of iterables''' + def __init__(self, data): + self.data = data + self.n = len(data) + + @staticmethod + def nPk(n, k): + '''Compute nPk''' + return int(factorial(n) / factorial(n-k)) + + def count(self, k=None): + '''Compute nPk expxlicitly on object data''' + if k is None: k = self.n + if k > self.n: return None + return Permutation.nPk(self.n, k) + + def generate(self, k=None): + '''Return generator over all permutations of object data''' + if k is None: k = self.n + return itertools.permutations(self.data, k) + + def generate_with_repetition(self, k=None): + pass + + def sample(self, k=None, m=1): + '''Return generator over m random samples from k-permutations of object data''' + if k is None: k = self.n + if k > self.n: return None + for _ in range(m): + yield tuple(random.sample(self.data, k)) + + def sample_without_replacement(self, m=1): + '''Randomly generate m unique permutations of object data''' + if m > self.count(): return None + generated = [] + while len(generated) < m: + gtuple = next(self.sample(1)) + if gtuple not in generated: + generated.append(gtuple) + yield gtuple + + def duplicates(self, k=None): + if k is None: k = self.n + if k > self.n: return None + return set(self.generate(k)) + +class Combination: + '''Combinations of iterables''' + def __init__(self, data): + self.data = data + self.n = len(data) + + @staticmethod + def nCk(n, k): + '''Compute nCk''' + return int(factorial(n) / (factorial(k)*factorial(n-k))) + + def count(self, k=None): + '''Compute nCk implicitly on object data''' + if k is None: k = self.n + if k > self.n: return None + return Combination.nCk(self.n, k) + + def generate(self, k=None): + '''Return generator over all combinations of object data''' + if k is None: k = self.n + return itertools.combinations(self.data, k) + + def generate_with_repetition(self, k=None): + pass + + def sample(self, k=None, m=1): + '''Return generator over m random samples from k-combinations of object data''' + if k is None: k = self.n + if k > self.n: return None + for _ in range(m): + indices = sorted(random.sample(range(self.n), k)) + yield tuple(self.data[i] for i in indices) + + def sample_without_replacement(self, m=1): + '''Randomly generate m unique permutations of object data''' + if m > self.count(): return None + generated = [] + while len(generated) < m: + gtuple = next(self.sample(1)) + if gtuple not in generated: + generated.append(gtuple) + yield gtuple + + def duplicates(self, k): + if k is None: k = self.n + if k > self.n: return None + return set(self.generate(k)) diff --git a/objectlib/evolution/__init__.py b/objectlib/evolution/__init__.py new file mode 100644 index 0000000..d93fd81 --- /dev/null +++ b/objectlib/evolution/__init__.py @@ -0,0 +1,6 @@ +from . import candidate +from . import crossover +from . import evolutionary +from . import genetic +from . import mutation +from . import selection \ No newline at end of file diff --git a/objectlib/evolution/candidate.py b/objectlib/evolution/candidate.py new file mode 100644 index 0000000..1c4ec01 --- /dev/null +++ b/objectlib/evolution/candidate.py @@ -0,0 +1,146 @@ +import string + +from ..sim.agent import Agent +from ..combinatorics import counting +from ..ml import nn + +class Candidate(Agent): + ''' + Base candidate class for evolutionary algorithms + NOTE: can always consider switching the constructor to + by default take random attributes and yield a stochastic + candidate. This might make sense if we are to strictly + follow what is most commonly used. However, it's not + obvious how to go from specified genotype into a constructor + expecting parameters for random generation; this would certainly + be more sloppy than the current simple entry point constructor. + + Want to separate candidate from agent. Candidates dont need to be + defined in the context of a gym evnironment. They just hold a genotype + and inherit the basic functions seen in base + + UPDATE: candidates ARE agents. They NEED to be defined in the context + of a gym environment, as there must be a way of evaluating the candidates + in an objective manner. This environment can be completely static, but + the point is that it provides context for evaluating fitness. Candiates + are to inherit the same methods as any agent, but have the additional + `genotype` attribute which holds their internal representation in the + context of a genetic algorithm process. + ''' + def __init__(self, genotype): + super().__init__() + self.genotype = genotype + + def __str__(self): + return self.epigenesis().__str__ + + @classmethod + def random(cls, genotype): + ''' + Alternate constructor for random candidate construction, to be + implemented by subclassing type if stochastic construction. + + NOTE: This is currently the official way of creating random objects: + define a standard constructor that sets internal variables based on + given arguments. Then define class methods which take their own set + of parameters and construct the main object by creating values and + sending them to the constructor. This is, so far, the cleanest and + most extendible approach to constructor overloading I've seen, and + has since worked very well. + ''' + pass + + def epigenesis(self): + '''Process of turning genotype into phenotype''' + return self.genotype + +class AlphaString(Candidate): + '''Candidate child for genetic string''' + @classmethod + def random(cls, length, alphabet=string.printable): + ''' + Create random AlphaString + + alphastr = AlphaString.random(length) + alphastr = AlphaString.random(length, 'abc') + + :genotype: list (mutable) + :phenotype: conversion to string + ''' + gene = counting.Product(*[alphabet]*length) + gene = list(next(gene.sample())) + return cls(gene) + + def epigenesis(self): + return ''.join(self.genotype) + +class BitString(AlphaString): + '''Candidate child for genetic string''' + @classmethod + def random(cls, length): + return super().random(length, '01') + +class NeuralNetwork(Candidate): + ''' + NeuralNetwork candidate object for use in + neuroevolution implementations. This candidate + has a phenotype represented by its observable + actions resulting from inference, and a genotype + represented by its underlying internal network + structure and weights. All evolution operations (as + usual) are performed on the genotype level. + + :phenotype: output from inference and resulting behavior + :genotype: internal network structure and weight values + ''' + def __init__(self, genotype): + ''' + Genotype expected to be of the form of `.weights` + attribute from the NeuralNetwork class (i.e. a list + of NumPy arrays) + DONT need if just going to set genotype + ''' + super().__init__(genotype) + self.time_alive = 0 + + @classmethod + def random(cls, layers, rng=1): + ''' + Take layers structure as input, instantiate neural + network with given layers, set random weights according + to [-rng, +rng] + + :layers: list of network layer size + :rng: weights generated from [-rng, +rng] + ''' + net = nn.NeuralNetwork(layers, epsilon=rng) + return cls(net.weights) + + def epigenesis(self): + ''' + Convert from network structure to observable actions + via inference on live neural network architecture using + genotype weights. This process requires a data point on + which to evaluate the network + + TODO: consider how this is being done; should a nn object + be kept in memory at all times and modifications be made + directly to its weights so come inference time everything is + ready to go? This seems a little bulky but may end up being + more efficient. Initializing a network each time from weights + though has a tiny overhead; it just sets the nn object's weights + and no additional computation is needed. + Also how are we going to pass the incoming data to the network + for the actual inference procedure? Should the data be set to + the network itself, passed to the function, or set under the + candidate object? + ''' + net = nn.NeuralNetwork.from_weights(self.genotype) + return net + + def update(self): + self.time_alive += 1 + + def act(self): + net = self.epigenesis() + return net.predict(self.state)[0] diff --git a/objectlib/evolution/crossover.py b/objectlib/evolution/crossover.py new file mode 100644 index 0000000..c001af7 --- /dev/null +++ b/objectlib/evolution/crossover.py @@ -0,0 +1,20 @@ +import random + +def single_point(parent1, parent2): + ''' + General single point crossover method for any two + iterables of the same length + ''' + child = parent1.genotype.copy() + begin = random.randint(0, len(child) - 1) + end = random.randint(0, len(child) - 1) + start, stop = min(begin, end), max(begin, end) + child[start:stop] = parent2.genotype[start:stop] + return child + +def multipoint(parent1, parent2): + '''Generalizes single point crossover, could make redundant''' + pass + +def weight_slice(net1, net2): + return net1 \ No newline at end of file diff --git a/objectlib/evolution/evolutionary.py b/objectlib/evolution/evolutionary.py new file mode 100644 index 0000000..7250a3a --- /dev/null +++ b/objectlib/evolution/evolutionary.py @@ -0,0 +1,59 @@ +import random + +class Evolutionary: + '''Base evolutionary algorithm class''' + def __init__(self, population_size, num_generations, mutation_params, candidate, cand_params, num_offspring=1, gym=None): + self.population = [] + self.population_size = population_size + self.num_generations = num_generations + self.mutation_params = mutation_params + self.candidate = candidate + self.cand_params = cand_params + self.num_offspring = num_offspring + self.gym = gym + self.action = [] + + def fitness(self, candidate): + '''Fitness function for evaluating candidate quality''' + raise NotImplementedError + + def selection(self, population): + '''Method of parent selection for crossover''' + raise NotImplementedError + + def crossover(self, parent1, parent2): + '''Method of reproduction between candidates''' + raise NotImplementedError + + def mutation(self, candidate): + '''Method of random mutation in candidate''' + raise NotImplementedError + + def termination(self, population): + ''' + Termination condition for simulation + By default, return False so that simulation + runs for all generations + ''' + return False + + def create_population(self): + for _ in range(self.population_size): + # create random candidate from given params + cand = self.candidate.random(**self.cand_params) + + # add candidate to population + self.population.append(cand) + + # register agent in gym if applicable + if self.gym: self.gym.register_agent(cand) + + def run(self): + ''' + Run evolutionary simulation, after class setup has been completed. + Implementation will vary based on subclassing type. General approach + will iterate until termination condition met, evaluating, selection, + breeding, and mutating a population of candidates. Generator yielding + generation specific details is encouraged functional form. + ''' + raise NotImplementedError diff --git a/objectlib/evolution/genetic.py b/objectlib/evolution/genetic.py new file mode 100644 index 0000000..a17232d --- /dev/null +++ b/objectlib/evolution/genetic.py @@ -0,0 +1,60 @@ +from . import evolutionary + +class GeneticAlgorithm(evolutionary.Evolutionary): + ''' + Standard genetic algorithm (in a way, the genetic algo is itself + an agent, taking states, maintaining internal representation, + reacting and responding to the environment + ''' + def run(self): + # initialize population of candidates + self.create_population() + self.gym.start() + + # begin generation loop + for gen in range(self.num_generations): + # execute actions and get new gym state + self.gym.tick() + + # rank individuals based on current fitness + self.population.sort(key=lambda x: self.fitness(x), reverse=True) + + # balance population size + self.population = self.population[:self.population_size] + + # maintain gym agent registry + self.gym.update_agents(self.population) + self.gym.refresh_state() + + # yield generation specific details + top_candidate = self.population[0] + bot_candidate = self.population[-1] + yield {'generation' : gen, + 'best_candidate': str(top_candidate.epigenesis()), + 'best_fitness' : self.fitness(top_candidate), + 'worst_fitness' : self.fitness(bot_candidate),} + #'state' : self.gym.state} + + # for cand in self.population: + # print(str(cand.epigenesis())) + + # check termination condition + if self.termination(self.population): + return self.population[0] + + # consider multiple offspring per generation + for _ in range(self.num_offspring): + # stochastically select parent candidates + parent1 = self.selection(self.population) + parent2 = self.selection(self.population) + + # create child candidate via crossover + child_genotype = self.crossover(parent1, parent2) + child = self.candidate(child_genotype) + + # perform (possible) mutations on child + self.mutation(child, **self.mutation_params) + + # add child to population, gym for next round eval + self.population.append(child) + self.gym.register_agent(child) diff --git a/objectlib/evolution/mutation.py b/objectlib/evolution/mutation.py new file mode 100644 index 0000000..fa0afb3 --- /dev/null +++ b/objectlib/evolution/mutation.py @@ -0,0 +1,41 @@ +import random + +def mutation_decorator(mutate): + def wrapper(candidate, rate, **kwargs): + if random.random() < rate: + mutate(candidate, **kwargs) + return wrapper + +def class_mutation_decorator(mutate): + def wrapper(self, candidate, rate, **kwargs): + if random.random() < rate: + mutate(self, candidate, **kwargs) + return wrapper + +@mutation_decorator +def bitflip(candidate): + '''in-place flip bit in bit-array''' + gene = candidate.genotype + rand = random.randint(0, len(gene)-1) + gene[rand] = str(int(gene[rand])^1) + +@mutation_decorator +def alterchar(candidate): + '''shift character up or down''' + gene = candidate.genotype + rand = random.randint(0, len(gene)-1) + gene[rand] = chr(ord(gene[rand]) + random.choice([-1, 1])) + +@mutation_decorator +def alter_weight(candidate, rng): + ''' + Modify real numbers uniformly at random from a + NeuralNetwork weight vector + ''' + weights = candidate.genotype + layer = random.randint(0,len(weights)-1) + shape = weights[layer].shape + i, j = random.randint(0,shape[0]-1), random.randint(0,shape[1]-1) + weights[layer][i,j] += random.uniform(-rng, rng) + + \ No newline at end of file diff --git a/objectlib/evolution/selection.py b/objectlib/evolution/selection.py new file mode 100644 index 0000000..97fc076 --- /dev/null +++ b/objectlib/evolution/selection.py @@ -0,0 +1,6 @@ +import random + +def roulette(population): + rand = random.random()*random.random() + rand = int(rand*len(population)) + return population[rand] diff --git a/objectlib/probability/__init__.py b/objectlib/probability/__init__.py new file mode 100644 index 0000000..85651f6 --- /dev/null +++ b/objectlib/probability/__init__.py @@ -0,0 +1,2 @@ +from . import distributions +from . import sampling \ No newline at end of file diff --git a/objectlib/probability/distributions.py b/objectlib/probability/distributions.py new file mode 100644 index 0000000..f87b8f5 --- /dev/null +++ b/objectlib/probability/distributions.py @@ -0,0 +1,181 @@ +import random +import math + +from ..combinatorics.counting import Combination + +class Distribution(): + ''' + Consider extending base class to Continuous and Discrete subclasses. + There are some slight differences that might matter to the API (e.g. + "pdf" vs "pmf") + ''' + def __init__(self, *params): + self.params = params + + def pdf(self, x): + '''return pdf(x) = density(x)''' + pass + + def cdf(self, x): + '''return cdf(x) = Pr(x <= X)''' + pass + + def quantile(self, x): + '''return cdf^{-1}(p) = [Pr(x <= X) == p]''' + pass + + def sample(self, n=1): + ''' + n: number of samples + + Dev note: should consider returning something like a "Sample" object, which wraps + the samples and provides convenient empirical estimates e.g. MLE of parameters + ''' + if n == -1: + while True: + yield next(self.sample()) + + '''common moments (consider making @property)''' + def mean(self): + '''distribution mean''' + pass + + def variance(self): + '''distribution variance''' + pass + +class DiscreteDistribution(Distribution): + ''' + Discrete distribution base class + ''' + pass + +class ContinuousDistribution(Distribution): + ''' + Continuous distribution base class + ''' + pass + +class Bernoulli(DiscreteDistribution): + def __init__(self, p): + self.p = p + + def pdf(self, x): + return self.p**x * (1-self.p)**(1-x) + + def cdf(self, x): + return (1-self.p)**(1-int(x)) + + def sample(self, n=1): + for _ in range(n): + yield 1 if random.random() < self.p else 0 + + def mean(self): + return self.p + + def variance(self): + return self.p*(1-self.p) + +class Binomial(DiscreteDistribution): + ''' + consider indexing probabilities at different + values for later access. precompute could be cheap + and save time later, or could expensive and never + really used. have to compare the options + ''' + + def __init__(self, n, p): + self.n = n + self.p = p + self._cdf = {} + + def pdf(self, x): + return Combination.nCk(self.n, x)*self.p**x*(1-self.p)**(self.n-x) + + def cdf(self, x, index=False): + '''iteratively (naively) compute + P(X <= x)''' + p = 0 + for i in range(int(x)+1): + if i == self.n: p = 1 + else: p += self.pdf(i) + if index: + self._cdf[i] = p + return p + + def sample(self, n=1): + ''' + naive implementation (for now). meant to be used + with relatively small n. consider poisson + sampling for sufficiently large n + ''' + super().sample(n) + # index entire cdf + if self.n not in self._cdf: + self.cdf(self.n, index=True) + + for _ in range(n): + r = random.random() + for x in self._cdf: + if self._cdf[x] >= r: break + yield x + + def mean(self): + return self.n*self.p + + def variance(self): + return self.n* self.p*(1-self.p) + +class Exponential(ContinuousDistribution): pass + +class Normal(ContinuousDistribution): pass + +class Poisson(ContinuousDistribution): + def __init__(self, lmda): + self.lmda = lmda + + def pdf(self, k): + return self.lmda**k*math.e**(-self.lmda)/math.factorial(k) + + def cdf(self, x): + pass + + def quantile(self, x): + pass + + def sample(self, n): + for _ in range(n): + yield None + + def mean(self): + return self.lmda + + def variance(self): + return self.lmda + +class Uniform(Distribution): + def __init__(self, a, b): + self.a = a + self.b = b + self.lower = min(a,b) + self.width = abs(a-b) + + def pdf(self, x): + return 1 / self.width + + def cdf(self, x): + return (x-self.lower) / self.width + + def quantile(self, x): + pass + + def sample(self, n=1): + yield from super().sample(n) + for _ in range(n): + yield random.random()*self.width+self.lower + + def mean(self): + return (self.a+self.b)/2 + + def variance(self): + return (self.b-self.a)**2/12 diff --git a/objectlib/probability/sampling.py b/objectlib/probability/sampling.py new file mode 100644 index 0000000..b7fa54f --- /dev/null +++ b/objectlib/probability/sampling.py @@ -0,0 +1,23 @@ +import math +import random + +def sample(population, k=1): + for i in range(k): + r = math.floor(random.random()*len(population)) + yield population[r] + population.pop(r) + +def inverse_transform(inv_cdf): + r = random.random() + return inv_cdf(r) + +#def discrete_inverse_transform(cdf): +# '''general naive implementation''' +# def inv(p): +# x = 0 +# for i in range(x): +# p += + +def rejection_sampling(): pass +def importance_sampling(): pass + diff --git a/objectlib/utils/__init__.py b/objectlib/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/objectlib/utils/dataloader.py b/objectlib/utils/dataloader.py new file mode 100644 index 0000000..d59f053 --- /dev/null +++ b/objectlib/utils/dataloader.py @@ -0,0 +1,11 @@ +class DataLoader: + def __init__(self, path, batch_size): + self.path = path + self.batch_size + + self.load() + + def load(self): + pass + # return iterator over dataset files + diff --git a/objectlib/utils/generator.py b/objectlib/utils/generator.py new file mode 100644 index 0000000..90bb289 --- /dev/null +++ b/objectlib/utils/generator.py @@ -0,0 +1,60 @@ +def exhaust(gen, func=None, interval=1, verbose=True, last=True): + ''' + Exhaust given generator, applying given function (func) + to items along the way (if verbose=True) at specified + interval. Return final element of the generator. + ''' + for i, item in enumerate(gen): + if i % interval == 0: + if func: func(item) + if verbose: print(item) + + # perform same actions for last item + if last and i % interval != 0: + if func: func(item) + if verbose: print(item) + + return item + +async def async_exhaust(gen, func=None, interval=1, verbose=True, last=True): + ''' + Exhaust given generator, applying given function (func) + to items along the way (if verbose=True) at specified + interval. Return final element of the generator. + ''' + for i, item in enumerate(gen): + if i % interval == 0: + if func: await func(item) + if verbose: print(item) + + # perform same actions for last item + if last and i % interval != 0: + if func: await func(item) + if verbose: print(item) + + return item + +def chunk(gen, n, last=True): + ''' + map generator to "chunked" generator, + yielding lists of elements of the original + generator. Last chunk not guaranteed to be size + , can specific last=False if partial chunks + shouldn't be returned. + + TODO: consider adding a time parameter as well, + such that if items have not arrived from the + original generator in seconds, return the + current chunk. Could protect against long running, + async generator processes (and may be useful for + time dependent physics sims) + ''' + chunk = [] + for item in gen: + chunk.append(item) + if len(chunk) == n: + yield chunk + chunk = [] + + if last and chunk: + yield chunk diff --git a/objectlib/utils/naming.py b/objectlib/utils/naming.py new file mode 100644 index 0000000..b58d337 --- /dev/null +++ b/objectlib/utils/naming.py @@ -0,0 +1,2 @@ +def camel_to_snake(s): + return ''.join(['_'+c.lower() if c.isupper() else c for c in s]).lstrip('_') diff --git a/objectlib/utils/options.py b/objectlib/utils/options.py new file mode 100644 index 0000000..50ce65c --- /dev/null +++ b/objectlib/utils/options.py @@ -0,0 +1,75 @@ +class Opt(dict): + ''' + TODO: may want to be able to set values using standard dict API, so would have + to redirect options set to the value dict + + TODO: could maybe throw specialized errors for patterns like require, but a key error + will be thrown either way, which might be good enough + + Default pattern value is 'optional', as it seems to make the least assumptions + about the nature of the parameter. When the base has some set values, but no pattern + is given, we simply recover the default `update` behaviour of standard dicts. Here we + iterate over the target keys, and if the key has no pattern, we set it directly to the + base. This takes care of both keys that are in the base but we've left them to be taken + care of optionally by default, and any other keys unknown to both the base and to + patterns. The subset of the target keys without a pattern is the only group of unprocessed + keys at that point in time. + + Note that dicts can use their `update` method on an Opt object, and of course vice versa. + There is not point in using an Opt object if no pattern is specified, as all patterns + not specified are assumed to be optional, which is exactly what regular dicts do. + + Why is "ignore" needed? If you don't want your defaults changed, why not just leave them + out? Well this is a valid point, but in the case you are using the Opt object to set your + class attributes, there are some you want to ensure _dont_ get set (which will overwrite + your defaults you may have set outside the Opt object). + + Permission options include: + - require: require provided dict specifies key, no base value needed + - optional: key is optional in provided dict, will be used instad of any base values + - merge: merge provided values with base values in expectable way + - ignore: ignore provided values under this key in preference for base (if defined, doesn't need to be) + ''' + def __init__(self, d): + super().__init__(**d) + + def set_pattern(self, pattern): + self.pattern = pattern + + def update(self, target): + ''' + Main purpose of this class. Update base values with target values according to the + permissions set. + ''' + # execute update pattern + for key, pattern in self.pattern.items(): + if pattern == 'require': + self[key] = target[key] + elif pattern == 'optional': + self[key] = target.get(key, self.get(key, None)) + elif pattern == 'merge': + self[key] = self.merge(self.get(key), target.get(key)) + + # add key-value pairs that don't have a pattern, but may or may not already have + # an entry in base. All keys with a pattern have already been processed (if they've + # been ignored, and there wasn't an entry in base, then that key doesn't have a + # representative in the base, but this is intentional + for key, value in target.items(): + if self.pattern.get(key) is None: + self[key] = value + + def merge(self, val1, val2): + if val1 is None and val2 is None: + raise Exception('no values provided to merge') + + if val1 is None: return val2 + if val2 is None: return val1 + + if type(val1) != type(val2): + raise Exception('mismatching types on merge') + + if type(val1) == dict: + return {**val1, **val2} + else: + return val1 + val2 + diff --git a/objectlib/utils/selection.py b/objectlib/utils/selection.py new file mode 100644 index 0000000..96b8279 --- /dev/null +++ b/objectlib/utils/selection.py @@ -0,0 +1,84 @@ +class Collection(): + """Docstring for Collection.""" + + def __init__(self, objs=[], data=[]): + self.objs = objs + self._data = data + self.state = [] + + self.state_map = {} + self.data_map = {} + + self.key = lambda d,i: i + + def data(self, data, key=None): + self._data = data + + if key is not None: + self.key = key + + for i, d in enumerate(self._data): + idx = self.key(d, i) + self.data_map[idx] = d + + for i, o in enumerate(self.state): + idx = self.key(o['dat'], i) + self.state_map[idx] = o['obj'] + + def enter(self): + ''' + check keys across data to state dicts; those keys in the data dict not in the + state dict are in the enter selection + ''' + enter = [] + for k, v in self.data_map.items(): + if k not in self.state_map: + enter.append(v) + + return Collection(data=enter) + + def merge(self): + return self + + def update(self): + ''' + check keys across data to state dicts; those keys in both the state dict and the + data dict are in the update selection + ''' + return self + + def exit(self): + ''' + check keys across data to state dicts; those keys in the state dict not in the + data dict are in the exit selection + ''' + exit = [] + for k, v in self.state_map.items(): + if k not in self.data_map: + exit.append(v) + + return Collection(objs=exit) + + def append(self, func): + ''' + Takes a function and applies it to each + ''' + for i, d in enumerate(self._data): + obj = func(d,i) + self.objs.append(obj) + self.state.append({'dat': d, 'obj': obj}) + +#.select('group') +#.data([]) +#.enter().append() +#.update() +#.exit().remove() + +#groups = { +# 'group': Collection([1,2,3,4,5]) +#} +# +#col = groups['group'] +#col.data([1,2,3]) +#col.enter() +#col.exit() diff --git a/objectlib/utils/timing.py b/objectlib/utils/timing.py new file mode 100644 index 0000000..e69de29 diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..3b03a01 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,49 @@ +[build-system] +requires = ["setuptools", "wheel", "setuptools-git-versioning>=2.0,<3"] +build-backend = "setuptools.build_meta" + +[tool.setuptools-git-versioning] +enabled = true + +[project] +name = "objectlib" +description = "Object serialization and streaming utilities" +readme = "README.md" +requires-python = ">=3.12" +dynamic = ["version"] +#license = {file = "LICENSE"} +authors = [ + { name="Sam Griesemer", email="samgriesemer+git@gmail.com" }, +] +keywords = [""] +classifiers = [ + "Programming Language :: Python :: 3.12", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Development Status :: 2 - Pre-Alpha", + "Intended Audience :: Developers", +] +dependencies = [ + "numpy", + "colorama", +] + +[project.optional-dependencies] +tests = ["pytest"] +docs = [ + "sphinx", + "sphinx-togglebutton", + "sphinx-autodoc-typehints", + "furo", + "myst-parser", +] + +[project.urls] +Homepage = "https://doc.olog.io/objectlib" +Documentation = "https://doc.olog.io/objectlib" +Repository = "https://git.olog.io/olog/objectlib" +Issues = "https://git.olog.io/olog/objectlib/issues" + + +[tool.setuptools.packages.find] +include = ["objectlib*"] # pattern to match package names