Compare commits
2 Commits
b726f495b6
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
| c566dcf678 | |||
| 090c122c60 |
21
.gitignore
vendored
21
.gitignore
vendored
@@ -1,18 +1,19 @@
|
||||
# generic py
|
||||
# generic
|
||||
__pycache__/
|
||||
.pytest_cache/
|
||||
*.egg-info/
|
||||
.ipynb_checkpoints/
|
||||
.pytest_cache/
|
||||
.python-version
|
||||
|
||||
# vendor and build files
|
||||
# package-specific
|
||||
.ipynb_checkpoints/
|
||||
.pytest_cache/
|
||||
|
||||
# vendor/build files
|
||||
dist/
|
||||
build/
|
||||
docs/_autoref/
|
||||
docs/_autosummary/
|
||||
docs/_build/
|
||||
doc/_autoref/
|
||||
doc/_autosummary/
|
||||
doc/_build/
|
||||
|
||||
# local
|
||||
notebooks/
|
||||
# misc local
|
||||
/Makefile
|
||||
notebooks/
|
||||
|
||||
59
README.md
59
README.md
@@ -1,35 +1,36 @@
|
||||
# Overview
|
||||
`co3` is a lightweight Python ORM for hierarchical storage management. It implements a
|
||||
general type system for defining database components like relations, schemas, engines,
|
||||
etc. Objects inheriting from the `CO3` base class can then define data transformations
|
||||
that connect to database components, and can be automatically collected for coordinated
|
||||
database insertion.
|
||||
`co3` is a lightweight Python ORM for hierarchical storage management. It
|
||||
implements a general type system for defining database components like
|
||||
relations, schemas, engines, etc. Objects inheriting from the `CO3` base class
|
||||
can then define data transformations that connect to database components, and
|
||||
can be automatically collected for coordinated database insertion.
|
||||
|
||||
`co3` attempts to provide a general interface for interacting with storage media (e.g.,
|
||||
databases, pickled objects, VSS framework, in-memory key-value stores, etc). The following
|
||||
top-level classes capture the bulk of the operational model:
|
||||
`co3` attempts to provide a general interface for interacting with storage
|
||||
media (e.g., databases, pickled objects, VSS framework, in-memory key-value
|
||||
stores, etc). The following top-level classes capture the bulk of the
|
||||
operational model:
|
||||
|
||||
- **Database**: reference to a storage medium, with an `Accessor` for accessing data,
|
||||
`Manager` for managing database state, and an `Engine` for managing connections and
|
||||
external operations.
|
||||
- **Accessor**: provides access to stored items in a `Database`, typically via a supported
|
||||
`select` operation over known `Component` types
|
||||
- **Manager**: manages database storage state (e.g., supported inserts or database sync
|
||||
operations)
|
||||
- **Mapper**: associates `CO3` types with `Schema` components, and provides automatic
|
||||
collection and composition operations for supported items
|
||||
- **Collector**: collects data from defined `CO3` type transformations and prepares for
|
||||
`Database` insert operations
|
||||
- **Component**: atomic storage groups for databases (i.e., generalized notion of a
|
||||
"relation" in relational algebra).
|
||||
- **Database**: reference to a storage medium, with an `Accessor` for accessing
|
||||
data, `Manager` for managing database state, and an `Engine` for managing
|
||||
connections and external operations.
|
||||
- **Accessor**: provides access to stored items in a `Database`, typically via
|
||||
a supported `select` operation over known `Component` types
|
||||
- **Manager**: manages database storage state (e.g., supported inserts or
|
||||
database sync operations)
|
||||
- **Mapper**: associates `CO3` types with `Schema` components, and provides
|
||||
automatic collection and composition operations for supported items
|
||||
- **Collector**: collects data from defined `CO3` type transformations and
|
||||
prepares for `Database` insert operations
|
||||
- **Component**: atomic storage groups for databases (i.e., generalized notion
|
||||
of a "relation" in relational algebra).
|
||||
- **Indexer**: automatic caching of supported access queries to a `Database`
|
||||
- **Schema**: general schema analog for grouping related `Component` sets
|
||||
- **Differ**: facilitates set operations on results from selectable resources (e.g.,
|
||||
automatic comparison between file data on disk and file rows in a SQL database)
|
||||
- **Syncer**: generalized syncing procedure for items between data resources (e.g.,
|
||||
syncing new, modified, and deleted files from disk to a SQL database that stores file
|
||||
metadata).
|
||||
|
||||
The **CO3** an abstract base class then makes it easy to integrate this model with regular
|
||||
Python object hierarchies that can be mapped to a storage schema.
|
||||
- **Differ**: facilitates set operations on results from selectable resources
|
||||
(e.g., automatic comparison between file data on disk and file rows in a SQL
|
||||
database)
|
||||
- **Syncer**: generalized syncing procedure for items between data resources
|
||||
(e.g., syncing new, modified, and deleted files from disk to a SQL database
|
||||
that stores file metadata).
|
||||
|
||||
The **CO3** an abstract base class then makes it easy to integrate this model
|
||||
with regular Python object hierarchies that can be mapped to a storage schema.
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
'''
|
||||
Provides access to an underlying schema through a supported set of operations. Class
|
||||
methods could be general, high-level SQL wrappers, or convenience functions for common
|
||||
schema-specific queries.
|
||||
Provides access to an underlying schema through a supported set of operations.
|
||||
Class methods could be general, high-level SQL wrappers, or convenience
|
||||
functions for common schema-specific queries.
|
||||
'''
|
||||
import time
|
||||
import inspect
|
||||
@@ -16,13 +16,14 @@ from co3.component import Component
|
||||
|
||||
class Accessor[C: Component](metaclass=ABCMeta):
|
||||
'''
|
||||
Access wrapper class for complex queries and easy integration with Composer tables.
|
||||
Implements high-level access to things like common constrained SELECT queries.
|
||||
Access wrapper class for complex queries and easy integration with Composer
|
||||
tables. Implements high-level access to things like common constrained
|
||||
SELECT queries.
|
||||
|
||||
Instance variables:
|
||||
access_log: time-indexed log of access queries performed
|
||||
'''
|
||||
def __init__(self):
|
||||
def __init__(self) -> None:
|
||||
self.access_log = {}
|
||||
|
||||
def log_access(self, stmt):
|
||||
|
||||
179
co3/co3.py
179
co3/co3.py
@@ -1,8 +1,9 @@
|
||||
'''
|
||||
CO3 is an abstract base class for scaffolding object hierarchies and managing operations
|
||||
with associated database schemas. It facilitates something like a "lightweight ORM" for
|
||||
classes/tables/states with fixed transformations of interest. The canonical use case is
|
||||
managing hierarchical document relations, format conversions, and syntactical components.
|
||||
CO3 is an abstract base class for scaffolding object hierarchies and managing
|
||||
operations with associated database schemas. It facilitates something like a
|
||||
"lightweight ORM" for classes/tables/states with fixed transformations of
|
||||
interest. The canonical use case is managing hierarchical document relations,
|
||||
format conversions, and syntactical components.
|
||||
|
||||
Generic collation syntax:
|
||||
|
||||
@@ -22,12 +23,13 @@ Generic collation syntax:
|
||||
|
||||
.. admonition:: On multi-key attachment
|
||||
|
||||
One possible quirk of the current collation registry scheme is the rather black and
|
||||
white nature of key attachment. You either specify a single key, possibly to several
|
||||
groups, or allow any key via passthrough under an implicit group. There's no explicit
|
||||
"multi-key" pattern to make use of here, be it through "restricted passthrough"
|
||||
(method still parameterized by the key, but only allows keys from a provided list) or
|
||||
just simple duplicated attachment. To demonstrate via the above example:
|
||||
One possible quirk of the current collation registry scheme is the rather
|
||||
black and white nature of key attachment. You either specify a single key,
|
||||
possibly to several groups, or allow any key via passthrough under an
|
||||
implicit group. There's no explicit "multi-key" pattern to make use of
|
||||
here, be it through "restricted passthrough" (method still parameterized by
|
||||
the key, but only allows keys from a provided list) or just simple
|
||||
duplicated attachment. To demonstrate via the above example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
@@ -54,8 +56,8 @@ Generic collation syntax:
|
||||
|
||||
...
|
||||
|
||||
or with a central handler and separate collation points (at least when the key list is
|
||||
small):
|
||||
or with a central handler and separate collation points (at least when the
|
||||
key list is small):
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
@@ -71,38 +73,41 @@ Generic collation syntax:
|
||||
def key2(self):
|
||||
self._handle_supported_keys('key2')
|
||||
|
||||
The former scales better and allows general key rejection patterns if needed, while
|
||||
the latter integrates a bit better with the formal collation process, e.g., will
|
||||
throw ``ValueErrors`` based on key mismatches automatically.
|
||||
The former scales better and allows general key rejection patterns if
|
||||
needed, while the latter integrates a bit better with the formal collation
|
||||
process, e.g., will throw ``ValueErrors`` based on key mismatches
|
||||
automatically.
|
||||
'''
|
||||
import inspect
|
||||
import logging
|
||||
from collections import defaultdict
|
||||
from functools import wraps, partial
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def collate(key, groups=None):
|
||||
'''
|
||||
Collation decorator for CO3 subtype action registry.
|
||||
|
||||
Dynamic decorator; can be used as ``collate`` without any arguments, or with all. In
|
||||
the former case, ``key`` will be a function, so we check for this.
|
||||
Dynamic decorator; can be used as ``collate`` without any arguments, or
|
||||
with all. In the former case, ``key`` will be a function, so we check for
|
||||
this.
|
||||
|
||||
.. admonition:: Usage
|
||||
|
||||
Collation registration is the process of exposing various actions for use in
|
||||
**hierarchical collection** (see ``Mapper.collect``). Collation *keys* are unique
|
||||
identifiers of a particular action that emits data. Keys can belong to an arbitrary
|
||||
number of *groups*, which serve as semantically meaningful collections of similar
|
||||
actions. Group assignment also determines the associated *collation component*
|
||||
to be used as a storage target; the results of actions $K_G$ belonging to group
|
||||
$G$ will all be stored in the attached $G$-component. Specification of key-group
|
||||
relations can be done in a few ways:
|
||||
Collation registration is the process of exposing various actions for
|
||||
use in **hierarchical collection** (see ``Mapper.collect``). Collation
|
||||
*keys* are unique identifiers of a particular action that emits data.
|
||||
Keys can belong to an arbitrary number of *groups*, which serve as
|
||||
semantically meaningful collections of similar actions. Group
|
||||
assignment also determines the associated *collation component* to be
|
||||
used as a storage target; the results of actions $K_G$ belonging to
|
||||
group $G$ will all be stored in the attached $G$-component.
|
||||
Specification of key-group relations can be done in a few ways:
|
||||
|
||||
- Explicit key-group specification: a specific key and associated groups can be
|
||||
provided as arguments to the decorator:
|
||||
- Explicit key-group specification: a specific key and associated
|
||||
groups can be provided as arguments to the decorator:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
@@ -127,14 +132,14 @@ def collate(key, groups=None):
|
||||
...
|
||||
}
|
||||
|
||||
If ``groups`` is left unspecified, the key will be attached to the default
|
||||
``None`` group.
|
||||
If ``groups`` is left unspecified, the key will be attached to the
|
||||
default ``None`` group.
|
||||
|
||||
- Implicit key-group association: in some cases, you may want to support an entire
|
||||
"action class," and associate any operations under the class to the same storage
|
||||
component. Here we still use the notion of connecting groups to components, but
|
||||
allow the key to be dynamically specified and passed through to the collation
|
||||
method:
|
||||
- Implicit key-group association: in some cases, you may want to
|
||||
support an entire "action class," and associate any operations under
|
||||
the class to the same storage component. Here we still use the notion
|
||||
of connecting groups to components, but allow the key to be
|
||||
dynamically specified and passed through to the collation method:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
@@ -160,27 +165,31 @@ def collate(key, groups=None):
|
||||
|
||||
A few important notes:
|
||||
|
||||
- Implicit key-group specifications attach the *group* to a single method,
|
||||
whereas in the explicit case, groups can be affiliated with many keys. When
|
||||
explicitly provided, only those exact key values are supported. But in the
|
||||
implicit case, *any* key is allowed; the group still remains a proxy for the
|
||||
entire action class, but without needing to map from specifically stored key
|
||||
values. That is, the utility of the group remains consistent across implicit
|
||||
- Implicit key-group specifications attach the *group* to a single
|
||||
method, whereas in the explicit case, groups can be affiliated with
|
||||
many keys. When explicitly provided, only those exact key values
|
||||
are supported. But in the implicit case, *any* key is allowed; the
|
||||
group still remains a proxy for the entire action class, but
|
||||
without needing to map from specifically stored key values. That
|
||||
is, the utility of the group remains consistent across implicit
|
||||
and explicit cases, but stores the associations differently.
|
||||
- The ``None`` key, rather than point to a ``(<method>, <group-list>)`` tuple,
|
||||
instead points to a dictionary of ``group``-``method`` pairs. When attempting
|
||||
execute a key under a particular group, the group registry indicates
|
||||
whether the key is explicitly supported. If ``None`` is present for the group,
|
||||
then ``key_registry[None][<group-name>]`` can be used to recover the method
|
||||
implicitly affiliated with the key (along with any other key under the group).
|
||||
- When any method has been implicitly registered, *any* key (even when
|
||||
attempting to specify an explicit key) will match that group. This can
|
||||
effectively mean keys are not unique when an implicit group has been
|
||||
registered. There is a protection in place here, however; in methods like
|
||||
``CO3.collate`` and ``Mapper.collect``, an implicit group must be directly
|
||||
named in order for a given key to be considered. That is, when attempting
|
||||
collation outside specific group context, provided keys will only be
|
||||
considered against explicitly registered keys.
|
||||
- The ``None`` key, rather than point to a ``(<method>,
|
||||
<group-list>)`` tuple, instead points to a dictionary of
|
||||
``group``-``method`` pairs. When attempting execute a key under a
|
||||
particular group, the group registry indicates whether the key is
|
||||
explicitly supported. If ``None`` is present for the group, then
|
||||
``key_registry[None][<group-name>]`` can be used to recover the
|
||||
method implicitly affiliated with the key (along with any other key
|
||||
under the group).
|
||||
- When any method has been implicitly registered, *any* key (even
|
||||
when attempting to specify an explicit key) will match that group.
|
||||
This can effectively mean keys are not unique when an implicit
|
||||
group has been registered. There is a protection in place here,
|
||||
however; in methods like ``CO3.collate`` and ``Mapper.collect``, an
|
||||
implicit group must be directly named in order for a given key to
|
||||
be considered. That is, when attempting collation outside specific
|
||||
group context, provided keys will only be considered against
|
||||
explicitly registered keys.
|
||||
'''
|
||||
func = None
|
||||
if inspect.isfunction(key):
|
||||
@@ -200,6 +209,7 @@ def collate(key, groups=None):
|
||||
|
||||
return decorator
|
||||
|
||||
|
||||
class FormatRegistryMeta(type):
|
||||
'''
|
||||
Metaclass handling collation registry at the class level.
|
||||
@@ -225,8 +235,8 @@ class FormatRegistryMeta(type):
|
||||
for _, method in methods:
|
||||
register_action(method)
|
||||
|
||||
# add final registered formats for the current class, overwriting any found in
|
||||
# superclass chain
|
||||
# add final registered formats for the current class, overwriting any
|
||||
# found in superclass chain
|
||||
for attr_name, attr_value in attrs.items():
|
||||
register_action(attr_value)
|
||||
|
||||
@@ -235,41 +245,49 @@ class FormatRegistryMeta(type):
|
||||
|
||||
return super().__new__(cls, name, bases, attrs)
|
||||
|
||||
|
||||
class CO3(metaclass=FormatRegistryMeta):
|
||||
'''
|
||||
Base class supporting the central "COllate, COllect, COmpose" paradigm.
|
||||
|
||||
- Collate: organize and transform conversion outputs, possibly across class components
|
||||
- Collect: gather core attributes, conversion data, and subcomponents for DB insertion
|
||||
- Compose: construct object-associated DB table references through the class hierarchy
|
||||
- Collate: organize and transform conversion outputs, possibly across class
|
||||
components
|
||||
- Collect: gather core attributes, conversion data, and subcomponents for
|
||||
DB insertion
|
||||
- Compose: construct object-associated DB table references through the
|
||||
class hierarchy
|
||||
|
||||
.. admonition:: on action groups
|
||||
|
||||
Group keys are simply named collections to make it easy for storage components to
|
||||
be attached to action subsets. They do _not_ augment the action registration
|
||||
namespace, meaning the action key should still be unique; the group key is purely
|
||||
auxiliary.
|
||||
Group keys are simply named collections to make it easy for storage
|
||||
components to be attached to action subsets. They do _not_ augment the
|
||||
action registration namespace, meaning the action key should still be
|
||||
unique; the group key is purely auxiliary.
|
||||
|
||||
Action methods can also be attached to several groups, in case there is
|
||||
overlapping utility within or across schemas or storage media. In this case, it
|
||||
becomes particularly critical to ensure registered ``collate`` methods really are
|
||||
just "gathering results" from possibly heavy-duty operations, rather than
|
||||
performing them when called, so as to reduce wasted computation.
|
||||
overlapping utility within or across schemas or storage media. In this
|
||||
case, it becomes particularly critical to ensure registered ``collate``
|
||||
methods really are just "gathering results" from possibly heavy-duty
|
||||
operations, rather than performing them when called, so as to reduce
|
||||
wasted computation.
|
||||
|
||||
.. admonition:: New: collation caching
|
||||
|
||||
To help facilitate the common pattern of storing collation results, a
|
||||
``collate_cache`` parameter has been added to store key-group indexed collation
|
||||
results. (Note: now requires explicit superclass instantiation.)
|
||||
``collate_cache`` parameter has been added to store key-group indexed
|
||||
collation results. (Note: now requires explicit superclass
|
||||
instantiation.)
|
||||
'''
|
||||
|
||||
def __init__(self):
|
||||
self._collate_cache = {}
|
||||
|
||||
@property
|
||||
def attributes(self):
|
||||
'''
|
||||
Method to define how a subtype's inserts should be handled under ``collect`` for
|
||||
canonical attributes, i.e., inserts to the type's table.
|
||||
Method to define how a subtype's inserts should be handled under
|
||||
``collect`` for canonical attributes, i.e., inserts to the type's
|
||||
table.
|
||||
'''
|
||||
return vars(self)
|
||||
|
||||
@@ -284,14 +302,15 @@ class CO3(metaclass=FormatRegistryMeta):
|
||||
def collation_attributes(self, key, group):
|
||||
'''
|
||||
Return "connective" collation component data, possibly dependent on
|
||||
instance-specific attributes and the action arguments. This is typically the
|
||||
auxiliary structure that may be needed to attach to responses from registered
|
||||
``collate`` calls to complete inserts.
|
||||
instance-specific attributes and the action arguments. This is
|
||||
typically the auxiliary structure that may be needed to attach to
|
||||
responses from registered ``collate`` calls to complete inserts.
|
||||
|
||||
Note: this method is primarily used by ``Mapper.collect()``, and is called just
|
||||
prior to collector send-off for collation inserts and injected alongside collation
|
||||
data. Common structure in collation components can make this function easy to
|
||||
define, independent of action group for instance.
|
||||
Note: this method is primarily used by ``Mapper.collect()``, and is
|
||||
called just prior to collector send-off for collation inserts and
|
||||
injected alongside collation data. Common structure in collation
|
||||
components can make this function easy to define, independent of action
|
||||
group for instance.
|
||||
'''
|
||||
return {}
|
||||
|
||||
@@ -350,5 +369,3 @@ class CO3(metaclass=FormatRegistryMeta):
|
||||
self._collate_cache[(key, group)] = result
|
||||
|
||||
return result
|
||||
|
||||
|
||||
|
||||
@@ -2,19 +2,21 @@ class Component[T]:
|
||||
'''
|
||||
Component
|
||||
|
||||
General wrapper for storage components to be used in various database contexts. Relations
|
||||
can be thought of generally as named data containers/entities serving as a fundamental
|
||||
abstractions within particular storage protocols.
|
||||
General wrapper for storage components to be used in various database
|
||||
contexts. Relations can be thought of generally as named data
|
||||
containers/entities serving as a fundamental abstractions within particular
|
||||
storage protocols.
|
||||
'''
|
||||
def __init__(self, name, obj: T):
|
||||
|
||||
def __init__(self, name, obj: T) -> None:
|
||||
self.name = name
|
||||
self.obj = obj
|
||||
|
||||
def __str__(self):
|
||||
def __str__(self) -> str:
|
||||
return f'<Component ({self.__class__.__name__})> {self.name}'
|
||||
|
||||
def __repr__(self):
|
||||
def __repr__(self) -> str:
|
||||
return f'<Component ({self.__class__.__name__})> {self.name}'
|
||||
|
||||
def get_attributes(self):
|
||||
def get_attributes(self) -> dict:
|
||||
raise NotImplementedError
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
'''
|
||||
Dev note:
|
||||
Any reason to have ComposeableComponents and Relations as separate types? The thought
|
||||
is that there may be some possible Component types we want to be able to Compose that
|
||||
wouldn't logically be Relations. But the gap here might be quite small
|
||||
Any reason to have ComposableComponents and Relations as separate types?
|
||||
The thought is that there may be some possible Component types we want to
|
||||
be able to Compose that wouldn't logically be Relations. But the gap here
|
||||
might be quite small
|
||||
'''
|
||||
|
||||
from typing import Self
|
||||
@@ -29,9 +30,9 @@ class ComposableComponent[T](Component[T], metaclass=ABCMeta):
|
||||
# relational databases
|
||||
class Relation[T](ComposableComponent[T]):
|
||||
'''
|
||||
Relation base for tabular components to be used in relation DB settings. Attempts to
|
||||
adhere to the set-theoretic base outlined in the relational model [1]. Some
|
||||
terminology:
|
||||
Relation base for tabular components to be used in relation DB settings.
|
||||
Attempts to adhere to the set-theoretic base outlined in the relational
|
||||
model [1]. Some terminology:
|
||||
|
||||
Relation: table-like container
|
||||
| -> Heading: set of attributes
|
||||
@@ -43,15 +44,16 @@ class Relation[T](ComposableComponent[T]):
|
||||
[1]: https://en.wikipedia.org/wiki/Relational_model#Set-theoretic_formulation
|
||||
|
||||
Note: development tasks
|
||||
As it stands, the Relation skeleton is incredibly lax compared to the properties and
|
||||
operations that should be formally available, according its pure relational algebra
|
||||
analog.
|
||||
As it stands, the Relation skeleton is incredibly lax compared to the
|
||||
properties and operations that should be formally available, according
|
||||
its pure relational algebra analog.
|
||||
|
||||
Relations are also generic up to a type T, which ultimately serves as the base object
|
||||
for Relation instances. We aren't attempting to implement some generally useful
|
||||
table-like class here; instead we're just exposing a lightweight interface that's
|
||||
needed for a few CO3 contexts, and commonly off-loading most of the heavy-lifting to
|
||||
true relation objects like SQLAlchemy tables.
|
||||
Relations are also generic up to a type T, which ultimately serves as
|
||||
the base object for Relation instances. We aren't attempting to
|
||||
implement some generally useful table-like class here; instead we're
|
||||
just exposing a lightweight interface that's needed for a few CO3
|
||||
contexts, and commonly off-loading most of the heavy-lifting to true
|
||||
relation objects like SQLAlchemy tables.
|
||||
'''
|
||||
def compose(
|
||||
self,
|
||||
@@ -65,8 +67,8 @@ class SQLTable(Relation[SQLTableLike]):
|
||||
@classmethod
|
||||
def from_table(cls, table: sa.Table):
|
||||
'''
|
||||
Note that the sa.Table type is intentional here; not all matching types for
|
||||
SQLTableLike have a defined ``name`` property
|
||||
Note that the sa.Table type is intentional here; not all matching types
|
||||
for SQLTableLike have a defined ``name`` property
|
||||
'''
|
||||
return cls(table.name, table)
|
||||
|
||||
@@ -78,7 +80,8 @@ class SQLTable(Relation[SQLTableLike]):
|
||||
Provide column:default pairs for a provided SQLAlchemy table.
|
||||
|
||||
Parameters:
|
||||
include_all: whether to include all columns, even those without explicit defaults
|
||||
include_all: whether to include all columns, even those without
|
||||
explicit defaults
|
||||
'''
|
||||
default_values = {}
|
||||
for column in self.get_attributes():
|
||||
@@ -87,8 +90,8 @@ class SQLTable(Relation[SQLTableLike]):
|
||||
elif column.nullable:
|
||||
default_values[column.name] = None
|
||||
else:
|
||||
# assume empty string if include_all and col has no explicit default
|
||||
# and isn't nullable
|
||||
# assume empty string if include_all and col has no explicit
|
||||
# default and isn't nullable
|
||||
if include_all and column.name != 'id':
|
||||
default_values[column.name] = ''
|
||||
|
||||
|
||||
@@ -1,19 +1,20 @@
|
||||
'''
|
||||
Database
|
||||
|
||||
Central object for defining storage protocol-specific interfaces. The database wraps up
|
||||
central items for interacting with database resources, namely the Accessor and Manager
|
||||
objects.
|
||||
Central object for defining storage protocol-specific interfaces. The database
|
||||
wraps up central items for interacting with database resources, namely the
|
||||
Accessor and Manager objects.
|
||||
|
||||
The Database type hierarchy attempts to be exceedingly general; SQL-derivatives should
|
||||
subclass from the RelationalDatabase subtype, for example, which itself becomes a new
|
||||
generic via a type dependence on Relation.
|
||||
The Database type hierarchy attempts to be exceedingly general; SQL-derivatives
|
||||
should subclass from the RelationalDatabase subtype, for example, which itself
|
||||
becomes a new generic via a type dependence on Relation.
|
||||
|
||||
While relying no many constituent pieces, Databases intend to provide all needed objects
|
||||
under one roof. This includes the Engine (opens up connections to the database), Accessors
|
||||
(running select-like queries on DB data), Managers (updating DB state with sync
|
||||
insert-like actions), and Indexers (systematically caching Accessor queries). Generalized
|
||||
behavior is supported by explicitly leveraging the individual components. For example,
|
||||
While relying on many constituent pieces, Databases intend to provide all
|
||||
needed objects under one roof. This includes the Engine (opens up connections
|
||||
to the database), Accessors (running select-like queries on DB data), Managers
|
||||
(updating DB state with sync insert-like actions), and Indexers (systematically
|
||||
caching Accessor queries). Generalized behavior is supported by explicitly
|
||||
leveraging the individual components. For example,
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
@@ -28,9 +29,9 @@ behavior is supported by explicitly leveraging the individual components. For ex
|
||||
data
|
||||
)
|
||||
|
||||
The Database also supports a few directly callable methods for simplified interaction.
|
||||
These methods manage a connection context internally, passing them through the way they
|
||||
might otherwise be handled explicitly, as seen above.
|
||||
The Database also supports a few directly callable methods for simplified
|
||||
interaction. These methods manage a connection context internally, passing them
|
||||
through the way they might otherwise be handled explicitly, as seen above.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
@@ -41,17 +42,18 @@ might otherwise be handled explicitly, as seen above.
|
||||
|
||||
.. admonition:: on explicit connection contexts
|
||||
|
||||
Older models supported Accessors/Managers that housed their own Engine instances, and
|
||||
when performing actions like ``insert``, the Engine would be passed all the way through
|
||||
until a Connection could be spawned, and in that context the single action would be
|
||||
made. This model forfeits a lot of connection control, preventing multiple actions
|
||||
under a single connection.
|
||||
Older models supported Accessors/Managers that housed their own Engine
|
||||
instances, and when performing actions like ``insert``, the Engine would be
|
||||
passed all the way through until a Connection could be spawned, and in that
|
||||
context the single action would be made. This model forfeits a lot of
|
||||
connection control, preventing multiple actions under a single connection.
|
||||
|
||||
The newer model now avoids directly allowing Managers/Accessors access to their own
|
||||
engines, and instead they expose methods that explicitly require Connection objects.
|
||||
This means a user can invoke these methods in their own Connection contexts (seen
|
||||
above) and group up operations as they please, reducing overhead. The Database then
|
||||
wraps up a few single-operation contexts where outer connection control is not needed.
|
||||
The newer model now avoids directly allowing Managers/Accessors access to
|
||||
their own engines, and instead they expose methods that explicitly require
|
||||
Connection objects. This means a user can invoke these methods in their own
|
||||
Connection contexts (seen above) and group up operations as they please,
|
||||
reducing overhead. The Database then wraps up a few single-operation
|
||||
contexts where outer connection control is not needed.
|
||||
'''
|
||||
import logging
|
||||
|
||||
@@ -68,27 +70,29 @@ class Database[C: Component]:
|
||||
'''
|
||||
Generic Database definition
|
||||
|
||||
Generic to both a Component (C), and an Engine resource type (R). The Engine's
|
||||
generic openness must be propagated here, as it's intended to be fully abstracted away
|
||||
under the Database roof. Note that we cannot explicitly use an Engine type in its
|
||||
place, as it obscures its internal resource type dependence when we need it for
|
||||
hinting here in ``__init__``.
|
||||
Generic to both a Component (C), and an Engine resource type (R). The
|
||||
Engine's generic openness must be propagated here, as it's intended to be
|
||||
fully abstracted away under the Database roof. Note that we cannot
|
||||
explicitly use an Engine type in its place, as it obscures its internal
|
||||
resource type dependence when we need it for hinting here in ``__init__``.
|
||||
|
||||
.. admonition:: Development TODO list
|
||||
|
||||
Decide on official ruling for assigning Schema objects, and verifying any
|
||||
attempted Component-based actions (e.g., inserts, selects) to belong to or be a
|
||||
composition of Components within an attached Schema. Reasons for: helps complete
|
||||
the sense of a "Database" here programmatically, incorporating a more structurally
|
||||
accurate representation of allowed operations, and prevent possible attribute and
|
||||
type collisions. Reasons against: generally not a huge concern to align Schemas as
|
||||
transactions will rollback, broadly increases a bit of bulk, and users often
|
||||
expected know which components belong to a particular DB. Leaning more to **for**,
|
||||
and would only apply to the directly supported method passthroughs (and thus would
|
||||
have no impact on independent methods like ``Accessor.raw_select``). Additionally,
|
||||
Decide on official ruling for assigning Schema objects, and verifying
|
||||
any attempted Component-based actions (e.g., inserts, selects) to
|
||||
belong to or be a composition of Components within an attached Schema.
|
||||
Reasons for: helps complete the sense of a "Database" here
|
||||
programmatically, incorporating a more structurally accurate
|
||||
representation of allowed operations, and prevent possible attribute
|
||||
and type collisions. Reasons against: generally not a huge concern to
|
||||
align Schemas as transactions will rollback, broadly increases a bit of
|
||||
bulk, and users often expected know which components belong to a
|
||||
particular DB. Leaning more to **for**, and would only apply to the
|
||||
directly supported method passthroughs (and thus would have no impact
|
||||
on independent methods like ``Accessor.raw_select``). Additionally,
|
||||
even if component clashes don't pose serious risk, it can be helpful to
|
||||
systematically address the cases where a misalignment is occurring (by having
|
||||
helpful ``verify`` methods that can be ran before any actions).
|
||||
systematically address the cases where a misalignment is occurring (by
|
||||
having helpful ``verify`` methods that can be ran before any actions).
|
||||
'''
|
||||
_accessor_cls: type[Accessor[C]] = Accessor[C]
|
||||
_manager_cls: type[Manager[C]] = Manager[C]
|
||||
|
||||
@@ -1,20 +1,18 @@
|
||||
import sqlalchemy as sa
|
||||
|
||||
from co3.database import Database, Engine
|
||||
|
||||
from co3.accessors.sql import RelationalAccessor, SQLAccessor
|
||||
from co3.managers.sql import RelationalManager, SQLManager
|
||||
|
||||
from co3.engines import SQLEngine
|
||||
from co3.database import Database
|
||||
from co3.components import Relation, SQLTable
|
||||
from co3.managers.sql import RelationalManager, SQLManager
|
||||
from co3.accessors.sql import RelationalAccessor, SQLAccessor
|
||||
|
||||
|
||||
class RelationalDatabase[C: Relation](Database[C]):
|
||||
'''
|
||||
accessor/manager assignments satisfy supertype's type settings;
|
||||
``TabluarAccessor[Self, C]`` is of type ``type[RelationalAccessor[Self, C]]``
|
||||
(and yes, ``type[]`` specifies that the variable is itself being set to a type or a
|
||||
class, rather than a satisfying _instance_)
|
||||
``TabluarAccessor[Self, C]`` is of type ``type[RelationalAccessor[Self,
|
||||
C]]`` (and yes, ``type[]`` specifies that the variable is itself being set
|
||||
to a type or a class, rather than a satisfying _instance_)
|
||||
'''
|
||||
_accessor_cls: type[RelationalAccessor[C]] = RelationalAccessor[C]
|
||||
_manager_cls: type[RelationalManager[C]] = RelationalManager[C]
|
||||
|
||||
@@ -11,12 +11,10 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
class Indexer:
|
||||
'''
|
||||
Indexer class
|
||||
Indexer base class
|
||||
|
||||
Provides restricted access to an underlying Accessor to enable more efficient, superficial
|
||||
caching.
|
||||
|
||||
Cache clearing is to be handled by a wrapper class, like the Database.
|
||||
caching. Note that cache clearing is to be handled by a wrapper class, like the Database.
|
||||
|
||||
Caching occurs at the class level, with indexes prefixed by table's origin Composer.
|
||||
This means that cached selects/group-bys will be available regardless of the provided
|
||||
@@ -317,6 +315,7 @@ class Indexer:
|
||||
|
||||
return list(group_by_idx.values())
|
||||
|
||||
|
||||
class CacheBlock:
|
||||
'''
|
||||
Wraps up a set of query parameters for a specific entity, and provides cached access
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
import logging
|
||||
from contextlib import contextmanager
|
||||
|
||||
from co3.engine import Engine, Connection, Resource, Group
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -10,11 +12,14 @@ class Medium[R: Resource]:
|
||||
|
||||
A Resource space
|
||||
'''
|
||||
def __init__(self, scope):
|
||||
_engine_cls: type[Engine] = Engine
|
||||
|
||||
def __init__(self, *scope_args, **scope_kwargs):
|
||||
self.engine = self._engine_cls(*engine_args, **engine_kwargs)
|
||||
pass
|
||||
|
||||
@contextmanager
|
||||
def connect(self, timeout=None):
|
||||
def connect(self, timeout=None) -> Connection:
|
||||
'''
|
||||
Open a connection to the database specified by the resource. Exactly what the
|
||||
returned connection looks like remains relatively unconstrained given the wide
|
||||
@@ -22,21 +27,59 @@ class Medium[R: Resource]:
|
||||
with-statement contexts, constituting an "interaction session" with the database
|
||||
(i.e., allowing several actions to be performed using the same connection).
|
||||
'''
|
||||
raise NotImplementedError
|
||||
return self.engine.connect(timeout=timeout)
|
||||
|
||||
def execute(self, query: Query[QL]):
|
||||
pass
|
||||
|
||||
|
||||
class BrowsableMedium[R: Resource](Medium[R]):
|
||||
def browse(self, uri: URI[R]):
|
||||
class ReadableMedium[R: Resource](Medium[R]):
|
||||
def _resolve_relative_uri(self, protocol, value):
|
||||
'''
|
||||
Subclass to implement: fetch child object for supported protocol, i.e., single
|
||||
component subpath
|
||||
'''
|
||||
...
|
||||
|
||||
def resolve_uri(self, uri: URI) -> ResourceCollection:
|
||||
assert uri.protocols[0] in self.supported_protocols
|
||||
|
||||
obj = self._resolve_relative_uri(uri.protocols[0], uri.components[0])
|
||||
|
||||
# core the uri and recurse
|
||||
cored_uri = uri.core():
|
||||
|
||||
if cored_uri:
|
||||
rc = obj.resolve_uri(cored_uri)
|
||||
else:
|
||||
assert type(obj) is Resource
|
||||
rc = ResourceCollection([obj])
|
||||
|
||||
return rc
|
||||
|
||||
def _to_uri_list(self, uri_like):
|
||||
if type(uri) is not list:
|
||||
uri = [uri]
|
||||
...
|
||||
|
||||
def browse(
|
||||
self,
|
||||
connection: Connection,
|
||||
uri: str | URI | list[str] | list[URI]
|
||||
) -> ResourceCollection:
|
||||
'''
|
||||
Analog for Read (CRUD), SELECT (SQL), GET (REST)
|
||||
'''
|
||||
pass
|
||||
uris = self._to_uri_list(uri)
|
||||
|
||||
rc = ResourceCollection()
|
||||
for uri in uris:
|
||||
rc.extend(self._resolve_uri(uri))
|
||||
|
||||
return rc
|
||||
|
||||
|
||||
class ABCDMedium[R: Resource](BrowsableMedium[R]):
|
||||
class WritableMedium[R: Resource](ReadableMedium[R]):
|
||||
def append(self, uri: URI[R], resource: R):
|
||||
'''
|
||||
Analog for Create (CRUD), INSERT (SQL), POST/PUT (REST)
|
||||
|
||||
6
co3/mediums/disk.py
Normal file
6
co3/mediums/disk.py
Normal file
@@ -0,0 +1,6 @@
|
||||
from co3 import Medium
|
||||
from co3.resources import INode
|
||||
|
||||
|
||||
class Disk[INode](Medium):
|
||||
pass
|
||||
@@ -1,10 +1,15 @@
|
||||
from typing import Protocol
|
||||
from typing import BinaryIO
|
||||
|
||||
from co3 import URI, Medium
|
||||
|
||||
|
||||
class Resource:
|
||||
def content(self) -> BinaryIO:
|
||||
pass
|
||||
def __init__(
|
||||
self,
|
||||
context: Medium | list[Medium],
|
||||
uri: URI
|
||||
):
|
||||
self.uri = uri
|
||||
|
||||
class SelectableResource(Protocol):
|
||||
def select(self, component, *args, **kwargs):
|
||||
raise NotImplementedError
|
||||
def open(self) -> BinaryIO:
|
||||
pass
|
||||
|
||||
5
co3/resources/inode.py
Normal file
5
co3/resources/inode.py
Normal file
@@ -0,0 +1,5 @@
|
||||
from co3 import Resource
|
||||
|
||||
|
||||
class INode(Resource):
|
||||
pass
|
||||
21
co3/uri.py
21
co3/uri.py
@@ -1,4 +1,4 @@
|
||||
from urllib import parse
|
||||
from urllib.parse import urlparse
|
||||
|
||||
|
||||
class URI:
|
||||
@@ -13,3 +13,22 @@ class URN(URI):
|
||||
def __init__(self, url_str: str):
|
||||
self.url_str = url_str
|
||||
|
||||
|
||||
class CompositeURI(URI):
|
||||
def __init__(self, url_str: str):
|
||||
url_obj = urlparse(url_str)
|
||||
|
||||
self.protocols = url_obj.scheme.split('+')[::-1]
|
||||
self.components = url_obj.scheme.split('+')[::-1]
|
||||
|
||||
def core(self, layers=1) -> 'CompositeURI':
|
||||
'''
|
||||
"Core" the innermost ``layers`` layers of the composite URI.
|
||||
'''
|
||||
pass
|
||||
|
||||
def shed(self, layers=1) -> 'CompositeURI':
|
||||
'''
|
||||
"Shed" the outermost ``layers`` layers of the composite URI.
|
||||
'''
|
||||
pass
|
||||
|
||||
11
doc/reference/uri.md
Normal file
11
doc/reference/uri.md
Normal file
@@ -0,0 +1,11 @@
|
||||
Quick thoughts and ideals:
|
||||
|
||||
- Schemes are compositional, "wrapping" super contexts: `c+b+a://a/b/c`
|
||||
- The scheme communicates the target type (above is `c`)
|
||||
- URIs can be arbitrarily relative so long as they're resolved in the right contexts.
|
||||
Above, `c+b://b/c` can be resolved in the context of `a://a`
|
||||
- URIs are resolved by unwrapping schemes and resolving in to out
|
||||
- URL params can apply only to the target type (this is the most consistent and probably
|
||||
not too restrictive)
|
||||
- Trajectories from one scheme to another can be inferred from the type hierarchy; there
|
||||
may be many
|
||||
@@ -1,27 +1,25 @@
|
||||
[build-system]
|
||||
requires = ["setuptools", "wheel", "setuptools-git-versioning>=2.0,<3"]
|
||||
requires = ["setuptools", "wheel"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[tool.setuptools-git-versioning]
|
||||
enabled = true
|
||||
|
||||
[project]
|
||||
name = "co3"
|
||||
version = "0.6.3"
|
||||
description = "Lightweight Python ORM for hierarchical storage management"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.12"
|
||||
dynamic = ["version"]
|
||||
#license = {file = "LICENSE"}
|
||||
authors = [
|
||||
{ name="Sam Griesemer", email="samgriesemer+git@gmail.com" },
|
||||
{ name="Sam Griesemer", email="git@olog.io" },
|
||||
]
|
||||
readme = "README.md"
|
||||
license = "MIT"
|
||||
keywords = ["database", "orm"]
|
||||
classifiers = [
|
||||
"Programming Language :: Python :: 3.12",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Programming Language :: Python",
|
||||
"Operating System :: OS Independent",
|
||||
"Development Status :: 3 - Alpha",
|
||||
|
||||
"Intended Audience :: Developers",
|
||||
"Intended Audience :: End Users/Desktop",
|
||||
]
|
||||
dependencies = [
|
||||
"tqdm",
|
||||
@@ -31,16 +29,23 @@ dependencies = [
|
||||
"colorama",
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
symconf = "co3.__main__:main"
|
||||
|
||||
[project.optional-dependencies]
|
||||
tests = ["pytest"]
|
||||
docs = [
|
||||
dev = [
|
||||
"ipykernel",
|
||||
]
|
||||
doc = [
|
||||
"furo",
|
||||
"myst-parser",
|
||||
"sphinx",
|
||||
"sphinx-togglebutton",
|
||||
"sphinx-autodoc-typehints",
|
||||
"furo",
|
||||
"myst-parser",
|
||||
]
|
||||
jupyter = ["ipykernel"]
|
||||
test = [
|
||||
"pytest",
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
Homepage = "https://doc.olog.io/co3"
|
||||
@@ -48,6 +53,26 @@ Documentation = "https://doc.olog.io/co3"
|
||||
Repository = "https://git.olog.io/olog/co3"
|
||||
Issues = "https://git.olog.io/olog/co3/issues"
|
||||
|
||||
[dependency-groups]
|
||||
dev = [
|
||||
"ipykernel",
|
||||
]
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
include = ["co3*"] # pattern to match package names
|
||||
include = ["co3*"]
|
||||
|
||||
[tool.ruff]
|
||||
line-length = 79
|
||||
|
||||
[tool.ruff.lint]
|
||||
select = ["ANN", "E", "F", "UP", "B", "SIM", "I", "C4", "PERF"]
|
||||
|
||||
[tool.ruff.lint.isort]
|
||||
length-sort = true
|
||||
order-by-type = false
|
||||
force-sort-within-sections = false
|
||||
|
||||
[tool.ruff.format]
|
||||
quote-style = "double"
|
||||
indent-style = "space"
|
||||
docstring-code-format = true
|
||||
|
||||
5
test/co3_medium_demo.py
Normal file
5
test/co3_medium_demo.py
Normal file
@@ -0,0 +1,5 @@
|
||||
from co3.mediums import Disk
|
||||
|
||||
|
||||
disk = Disk('disk:///')
|
||||
disk.browse('dir://home')
|
||||
Reference in New Issue
Block a user