Compare commits

...

2 Commits

Author SHA1 Message Date
c566dcf678 perform minor, partial reformatting 2026-01-02 17:26:28 -08:00
090c122c60 intermediate Medium/Resource refactoring 2024-12-28 18:34:40 -08:00
33 changed files with 1649 additions and 248 deletions

21
.gitignore vendored
View File

@@ -1,18 +1,19 @@
# generic py # generic
__pycache__/ __pycache__/
.pytest_cache/
*.egg-info/ *.egg-info/
.ipynb_checkpoints/
.pytest_cache/
.python-version .python-version
# vendor and build files # package-specific
.ipynb_checkpoints/
.pytest_cache/
# vendor/build files
dist/ dist/
build/ build/
docs/_autoref/ doc/_autoref/
docs/_autosummary/ doc/_autosummary/
docs/_build/ doc/_build/
# local # misc local
notebooks/
/Makefile /Makefile
notebooks/

View File

@@ -1,35 +1,36 @@
# Overview # Overview
`co3` is a lightweight Python ORM for hierarchical storage management. It implements a `co3` is a lightweight Python ORM for hierarchical storage management. It
general type system for defining database components like relations, schemas, engines, implements a general type system for defining database components like
etc. Objects inheriting from the `CO3` base class can then define data transformations relations, schemas, engines, etc. Objects inheriting from the `CO3` base class
that connect to database components, and can be automatically collected for coordinated can then define data transformations that connect to database components, and
database insertion. can be automatically collected for coordinated database insertion.
`co3` attempts to provide a general interface for interacting with storage media (e.g., `co3` attempts to provide a general interface for interacting with storage
databases, pickled objects, VSS framework, in-memory key-value stores, etc). The following media (e.g., databases, pickled objects, VSS framework, in-memory key-value
top-level classes capture the bulk of the operational model: stores, etc). The following top-level classes capture the bulk of the
operational model:
- **Database**: reference to a storage medium, with an `Accessor` for accessing data, - **Database**: reference to a storage medium, with an `Accessor` for accessing
`Manager` for managing database state, and an `Engine` for managing connections and data, `Manager` for managing database state, and an `Engine` for managing
external operations. connections and external operations.
- **Accessor**: provides access to stored items in a `Database`, typically via a supported - **Accessor**: provides access to stored items in a `Database`, typically via
`select` operation over known `Component` types a supported `select` operation over known `Component` types
- **Manager**: manages database storage state (e.g., supported inserts or database sync - **Manager**: manages database storage state (e.g., supported inserts or
operations) database sync operations)
- **Mapper**: associates `CO3` types with `Schema` components, and provides automatic - **Mapper**: associates `CO3` types with `Schema` components, and provides
collection and composition operations for supported items automatic collection and composition operations for supported items
- **Collector**: collects data from defined `CO3` type transformations and prepares for - **Collector**: collects data from defined `CO3` type transformations and
`Database` insert operations prepares for `Database` insert operations
- **Component**: atomic storage groups for databases (i.e., generalized notion of a - **Component**: atomic storage groups for databases (i.e., generalized notion
"relation" in relational algebra). of a "relation" in relational algebra).
- **Indexer**: automatic caching of supported access queries to a `Database` - **Indexer**: automatic caching of supported access queries to a `Database`
- **Schema**: general schema analog for grouping related `Component` sets - **Schema**: general schema analog for grouping related `Component` sets
- **Differ**: facilitates set operations on results from selectable resources (e.g., - **Differ**: facilitates set operations on results from selectable resources
automatic comparison between file data on disk and file rows in a SQL database) (e.g., automatic comparison between file data on disk and file rows in a SQL
- **Syncer**: generalized syncing procedure for items between data resources (e.g., database)
syncing new, modified, and deleted files from disk to a SQL database that stores file - **Syncer**: generalized syncing procedure for items between data resources
metadata). (e.g., syncing new, modified, and deleted files from disk to a SQL database
that stores file metadata).
The **CO3** an abstract base class then makes it easy to integrate this model with regular
Python object hierarchies that can be mapped to a storage schema.
The **CO3** an abstract base class then makes it easy to integrate this model
with regular Python object hierarchies that can be mapped to a storage schema.

View File

@@ -1,7 +1,7 @@
''' '''
Provides access to an underlying schema through a supported set of operations. Class Provides access to an underlying schema through a supported set of operations.
methods could be general, high-level SQL wrappers, or convenience functions for common Class methods could be general, high-level SQL wrappers, or convenience
schema-specific queries. functions for common schema-specific queries.
''' '''
import time import time
import inspect import inspect
@@ -16,13 +16,14 @@ from co3.component import Component
class Accessor[C: Component](metaclass=ABCMeta): class Accessor[C: Component](metaclass=ABCMeta):
''' '''
Access wrapper class for complex queries and easy integration with Composer tables. Access wrapper class for complex queries and easy integration with Composer
Implements high-level access to things like common constrained SELECT queries. tables. Implements high-level access to things like common constrained
SELECT queries.
Instance variables: Instance variables:
access_log: time-indexed log of access queries performed access_log: time-indexed log of access queries performed
''' '''
def __init__(self): def __init__(self) -> None:
self.access_log = {} self.access_log = {}
def log_access(self, stmt): def log_access(self, stmt):

View File

@@ -1,8 +1,9 @@
''' '''
CO3 is an abstract base class for scaffolding object hierarchies and managing operations CO3 is an abstract base class for scaffolding object hierarchies and managing
with associated database schemas. It facilitates something like a "lightweight ORM" for operations with associated database schemas. It facilitates something like a
classes/tables/states with fixed transformations of interest. The canonical use case is "lightweight ORM" for classes/tables/states with fixed transformations of
managing hierarchical document relations, format conversions, and syntactical components. interest. The canonical use case is managing hierarchical document relations,
format conversions, and syntactical components.
Generic collation syntax: Generic collation syntax:
@@ -22,12 +23,13 @@ Generic collation syntax:
.. admonition:: On multi-key attachment .. admonition:: On multi-key attachment
One possible quirk of the current collation registry scheme is the rather black and One possible quirk of the current collation registry scheme is the rather
white nature of key attachment. You either specify a single key, possibly to several black and white nature of key attachment. You either specify a single key,
groups, or allow any key via passthrough under an implicit group. There's no explicit possibly to several groups, or allow any key via passthrough under an
"multi-key" pattern to make use of here, be it through "restricted passthrough" implicit group. There's no explicit "multi-key" pattern to make use of
(method still parameterized by the key, but only allows keys from a provided list) or here, be it through "restricted passthrough" (method still parameterized by
just simple duplicated attachment. To demonstrate via the above example: the key, but only allows keys from a provided list) or just simple
duplicated attachment. To demonstrate via the above example:
.. code-block:: python .. code-block:: python
@@ -54,8 +56,8 @@ Generic collation syntax:
... ...
or with a central handler and separate collation points (at least when the key list is or with a central handler and separate collation points (at least when the
small): key list is small):
.. code-block:: python .. code-block:: python
@@ -71,38 +73,41 @@ Generic collation syntax:
def key2(self): def key2(self):
self._handle_supported_keys('key2') self._handle_supported_keys('key2')
The former scales better and allows general key rejection patterns if needed, while The former scales better and allows general key rejection patterns if
the latter integrates a bit better with the formal collation process, e.g., will needed, while the latter integrates a bit better with the formal collation
throw ``ValueErrors`` based on key mismatches automatically. process, e.g., will throw ``ValueErrors`` based on key mismatches
automatically.
''' '''
import inspect import inspect
import logging import logging
from collections import defaultdict from collections import defaultdict
from functools import wraps, partial
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def collate(key, groups=None): def collate(key, groups=None):
''' '''
Collation decorator for CO3 subtype action registry. Collation decorator for CO3 subtype action registry.
Dynamic decorator; can be used as ``collate`` without any arguments, or with all. In Dynamic decorator; can be used as ``collate`` without any arguments, or
the former case, ``key`` will be a function, so we check for this. with all. In the former case, ``key`` will be a function, so we check for
this.
.. admonition:: Usage .. admonition:: Usage
Collation registration is the process of exposing various actions for use in Collation registration is the process of exposing various actions for
**hierarchical collection** (see ``Mapper.collect``). Collation *keys* are unique use in **hierarchical collection** (see ``Mapper.collect``). Collation
identifiers of a particular action that emits data. Keys can belong to an arbitrary *keys* are unique identifiers of a particular action that emits data.
number of *groups*, which serve as semantically meaningful collections of similar Keys can belong to an arbitrary number of *groups*, which serve as
actions. Group assignment also determines the associated *collation component* semantically meaningful collections of similar actions. Group
to be used as a storage target; the results of actions $K_G$ belonging to group assignment also determines the associated *collation component* to be
$G$ will all be stored in the attached $G$-component. Specification of key-group used as a storage target; the results of actions $K_G$ belonging to
relations can be done in a few ways: group $G$ will all be stored in the attached $G$-component.
Specification of key-group relations can be done in a few ways:
- Explicit key-group specification: a specific key and associated groups can be - Explicit key-group specification: a specific key and associated
provided as arguments to the decorator: groups can be provided as arguments to the decorator:
.. code-block:: python .. code-block:: python
@@ -127,14 +132,14 @@ def collate(key, groups=None):
... ...
} }
If ``groups`` is left unspecified, the key will be attached to the default If ``groups`` is left unspecified, the key will be attached to the
``None`` group. default ``None`` group.
- Implicit key-group association: in some cases, you may want to support an entire - Implicit key-group association: in some cases, you may want to
"action class," and associate any operations under the class to the same storage support an entire "action class," and associate any operations under
component. Here we still use the notion of connecting groups to components, but the class to the same storage component. Here we still use the notion
allow the key to be dynamically specified and passed through to the collation of connecting groups to components, but allow the key to be
method: dynamically specified and passed through to the collation method:
.. code-block:: python .. code-block:: python
@@ -160,27 +165,31 @@ def collate(key, groups=None):
A few important notes: A few important notes:
- Implicit key-group specifications attach the *group* to a single method, - Implicit key-group specifications attach the *group* to a single
whereas in the explicit case, groups can be affiliated with many keys. When method, whereas in the explicit case, groups can be affiliated with
explicitly provided, only those exact key values are supported. But in the many keys. When explicitly provided, only those exact key values
implicit case, *any* key is allowed; the group still remains a proxy for the are supported. But in the implicit case, *any* key is allowed; the
entire action class, but without needing to map from specifically stored key group still remains a proxy for the entire action class, but
values. That is, the utility of the group remains consistent across implicit without needing to map from specifically stored key values. That
is, the utility of the group remains consistent across implicit
and explicit cases, but stores the associations differently. and explicit cases, but stores the associations differently.
- The ``None`` key, rather than point to a ``(<method>, <group-list>)`` tuple, - The ``None`` key, rather than point to a ``(<method>,
instead points to a dictionary of ``group``-``method`` pairs. When attempting <group-list>)`` tuple, instead points to a dictionary of
execute a key under a particular group, the group registry indicates ``group``-``method`` pairs. When attempting execute a key under a
whether the key is explicitly supported. If ``None`` is present for the group, particular group, the group registry indicates whether the key is
then ``key_registry[None][<group-name>]`` can be used to recover the method explicitly supported. If ``None`` is present for the group, then
implicitly affiliated with the key (along with any other key under the group). ``key_registry[None][<group-name>]`` can be used to recover the
- When any method has been implicitly registered, *any* key (even when method implicitly affiliated with the key (along with any other key
attempting to specify an explicit key) will match that group. This can under the group).
effectively mean keys are not unique when an implicit group has been - When any method has been implicitly registered, *any* key (even
registered. There is a protection in place here, however; in methods like when attempting to specify an explicit key) will match that group.
``CO3.collate`` and ``Mapper.collect``, an implicit group must be directly This can effectively mean keys are not unique when an implicit
named in order for a given key to be considered. That is, when attempting group has been registered. There is a protection in place here,
collation outside specific group context, provided keys will only be however; in methods like ``CO3.collate`` and ``Mapper.collect``, an
considered against explicitly registered keys. implicit group must be directly named in order for a given key to
be considered. That is, when attempting collation outside specific
group context, provided keys will only be considered against
explicitly registered keys.
''' '''
func = None func = None
if inspect.isfunction(key): if inspect.isfunction(key):
@@ -200,6 +209,7 @@ def collate(key, groups=None):
return decorator return decorator
class FormatRegistryMeta(type): class FormatRegistryMeta(type):
''' '''
Metaclass handling collation registry at the class level. Metaclass handling collation registry at the class level.
@@ -225,8 +235,8 @@ class FormatRegistryMeta(type):
for _, method in methods: for _, method in methods:
register_action(method) register_action(method)
# add final registered formats for the current class, overwriting any found in # add final registered formats for the current class, overwriting any
# superclass chain # found in superclass chain
for attr_name, attr_value in attrs.items(): for attr_name, attr_value in attrs.items():
register_action(attr_value) register_action(attr_value)
@@ -235,41 +245,49 @@ class FormatRegistryMeta(type):
return super().__new__(cls, name, bases, attrs) return super().__new__(cls, name, bases, attrs)
class CO3(metaclass=FormatRegistryMeta): class CO3(metaclass=FormatRegistryMeta):
''' '''
Base class supporting the central "COllate, COllect, COmpose" paradigm. Base class supporting the central "COllate, COllect, COmpose" paradigm.
- Collate: organize and transform conversion outputs, possibly across class components - Collate: organize and transform conversion outputs, possibly across class
- Collect: gather core attributes, conversion data, and subcomponents for DB insertion components
- Compose: construct object-associated DB table references through the class hierarchy - Collect: gather core attributes, conversion data, and subcomponents for
DB insertion
- Compose: construct object-associated DB table references through the
class hierarchy
.. admonition:: on action groups .. admonition:: on action groups
Group keys are simply named collections to make it easy for storage components to Group keys are simply named collections to make it easy for storage
be attached to action subsets. They do _not_ augment the action registration components to be attached to action subsets. They do _not_ augment the
namespace, meaning the action key should still be unique; the group key is purely action registration namespace, meaning the action key should still be
auxiliary. unique; the group key is purely auxiliary.
Action methods can also be attached to several groups, in case there is Action methods can also be attached to several groups, in case there is
overlapping utility within or across schemas or storage media. In this case, it overlapping utility within or across schemas or storage media. In this
becomes particularly critical to ensure registered ``collate`` methods really are case, it becomes particularly critical to ensure registered ``collate``
just "gathering results" from possibly heavy-duty operations, rather than methods really are just "gathering results" from possibly heavy-duty
performing them when called, so as to reduce wasted computation. operations, rather than performing them when called, so as to reduce
wasted computation.
.. admonition:: New: collation caching .. admonition:: New: collation caching
To help facilitate the common pattern of storing collation results, a To help facilitate the common pattern of storing collation results, a
``collate_cache`` parameter has been added to store key-group indexed collation ``collate_cache`` parameter has been added to store key-group indexed
results. (Note: now requires explicit superclass instantiation.) collation results. (Note: now requires explicit superclass
instantiation.)
''' '''
def __init__(self): def __init__(self):
self._collate_cache = {} self._collate_cache = {}
@property @property
def attributes(self): def attributes(self):
''' '''
Method to define how a subtype's inserts should be handled under ``collect`` for Method to define how a subtype's inserts should be handled under
canonical attributes, i.e., inserts to the type's table. ``collect`` for canonical attributes, i.e., inserts to the type's
table.
''' '''
return vars(self) return vars(self)
@@ -284,14 +302,15 @@ class CO3(metaclass=FormatRegistryMeta):
def collation_attributes(self, key, group): def collation_attributes(self, key, group):
''' '''
Return "connective" collation component data, possibly dependent on Return "connective" collation component data, possibly dependent on
instance-specific attributes and the action arguments. This is typically the instance-specific attributes and the action arguments. This is
auxiliary structure that may be needed to attach to responses from registered typically the auxiliary structure that may be needed to attach to
``collate`` calls to complete inserts. responses from registered ``collate`` calls to complete inserts.
Note: this method is primarily used by ``Mapper.collect()``, and is called just Note: this method is primarily used by ``Mapper.collect()``, and is
prior to collector send-off for collation inserts and injected alongside collation called just prior to collector send-off for collation inserts and
data. Common structure in collation components can make this function easy to injected alongside collation data. Common structure in collation
define, independent of action group for instance. components can make this function easy to define, independent of action
group for instance.
''' '''
return {} return {}
@@ -350,5 +369,3 @@ class CO3(metaclass=FormatRegistryMeta):
self._collate_cache[(key, group)] = result self._collate_cache[(key, group)] = result
return result return result

View File

@@ -2,19 +2,21 @@ class Component[T]:
''' '''
Component Component
General wrapper for storage components to be used in various database contexts. Relations General wrapper for storage components to be used in various database
can be thought of generally as named data containers/entities serving as a fundamental contexts. Relations can be thought of generally as named data
abstractions within particular storage protocols. containers/entities serving as a fundamental abstractions within particular
storage protocols.
''' '''
def __init__(self, name, obj: T):
def __init__(self, name, obj: T) -> None:
self.name = name self.name = name
self.obj = obj self.obj = obj
def __str__(self): def __str__(self) -> str:
return f'<Component ({self.__class__.__name__})> {self.name}' return f'<Component ({self.__class__.__name__})> {self.name}'
def __repr__(self): def __repr__(self) -> str:
return f'<Component ({self.__class__.__name__})> {self.name}' return f'<Component ({self.__class__.__name__})> {self.name}'
def get_attributes(self): def get_attributes(self) -> dict:
raise NotImplementedError raise NotImplementedError

View File

@@ -1,8 +1,9 @@
''' '''
Dev note: Dev note:
Any reason to have ComposeableComponents and Relations as separate types? The thought Any reason to have ComposableComponents and Relations as separate types?
is that there may be some possible Component types we want to be able to Compose that The thought is that there may be some possible Component types we want to
wouldn't logically be Relations. But the gap here might be quite small be able to Compose that wouldn't logically be Relations. But the gap here
might be quite small
''' '''
from typing import Self from typing import Self
@@ -29,9 +30,9 @@ class ComposableComponent[T](Component[T], metaclass=ABCMeta):
# relational databases # relational databases
class Relation[T](ComposableComponent[T]): class Relation[T](ComposableComponent[T]):
''' '''
Relation base for tabular components to be used in relation DB settings. Attempts to Relation base for tabular components to be used in relation DB settings.
adhere to the set-theoretic base outlined in the relational model [1]. Some Attempts to adhere to the set-theoretic base outlined in the relational
terminology: model [1]. Some terminology:
Relation: table-like container Relation: table-like container
| -> Heading: set of attributes | -> Heading: set of attributes
@@ -43,15 +44,16 @@ class Relation[T](ComposableComponent[T]):
[1]: https://en.wikipedia.org/wiki/Relational_model#Set-theoretic_formulation [1]: https://en.wikipedia.org/wiki/Relational_model#Set-theoretic_formulation
Note: development tasks Note: development tasks
As it stands, the Relation skeleton is incredibly lax compared to the properties and As it stands, the Relation skeleton is incredibly lax compared to the
operations that should be formally available, according its pure relational algebra properties and operations that should be formally available, according
analog. its pure relational algebra analog.
Relations are also generic up to a type T, which ultimately serves as the base object Relations are also generic up to a type T, which ultimately serves as
for Relation instances. We aren't attempting to implement some generally useful the base object for Relation instances. We aren't attempting to
table-like class here; instead we're just exposing a lightweight interface that's implement some generally useful table-like class here; instead we're
needed for a few CO3 contexts, and commonly off-loading most of the heavy-lifting to just exposing a lightweight interface that's needed for a few CO3
true relation objects like SQLAlchemy tables. contexts, and commonly off-loading most of the heavy-lifting to true
relation objects like SQLAlchemy tables.
''' '''
def compose( def compose(
self, self,
@@ -65,8 +67,8 @@ class SQLTable(Relation[SQLTableLike]):
@classmethod @classmethod
def from_table(cls, table: sa.Table): def from_table(cls, table: sa.Table):
''' '''
Note that the sa.Table type is intentional here; not all matching types for Note that the sa.Table type is intentional here; not all matching types
SQLTableLike have a defined ``name`` property for SQLTableLike have a defined ``name`` property
''' '''
return cls(table.name, table) return cls(table.name, table)
@@ -78,7 +80,8 @@ class SQLTable(Relation[SQLTableLike]):
Provide column:default pairs for a provided SQLAlchemy table. Provide column:default pairs for a provided SQLAlchemy table.
Parameters: Parameters:
include_all: whether to include all columns, even those without explicit defaults include_all: whether to include all columns, even those without
explicit defaults
''' '''
default_values = {} default_values = {}
for column in self.get_attributes(): for column in self.get_attributes():
@@ -87,8 +90,8 @@ class SQLTable(Relation[SQLTableLike]):
elif column.nullable: elif column.nullable:
default_values[column.name] = None default_values[column.name] = None
else: else:
# assume empty string if include_all and col has no explicit default # assume empty string if include_all and col has no explicit
# and isn't nullable # default and isn't nullable
if include_all and column.name != 'id': if include_all and column.name != 'id':
default_values[column.name] = '' default_values[column.name] = ''

View File

@@ -1,19 +1,20 @@
''' '''
Database Database
Central object for defining storage protocol-specific interfaces. The database wraps up Central object for defining storage protocol-specific interfaces. The database
central items for interacting with database resources, namely the Accessor and Manager wraps up central items for interacting with database resources, namely the
objects. Accessor and Manager objects.
The Database type hierarchy attempts to be exceedingly general; SQL-derivatives should The Database type hierarchy attempts to be exceedingly general; SQL-derivatives
subclass from the RelationalDatabase subtype, for example, which itself becomes a new should subclass from the RelationalDatabase subtype, for example, which itself
generic via a type dependence on Relation. becomes a new generic via a type dependence on Relation.
While relying no many constituent pieces, Databases intend to provide all needed objects While relying on many constituent pieces, Databases intend to provide all
under one roof. This includes the Engine (opens up connections to the database), Accessors needed objects under one roof. This includes the Engine (opens up connections
(running select-like queries on DB data), Managers (updating DB state with sync to the database), Accessors (running select-like queries on DB data), Managers
insert-like actions), and Indexers (systematically caching Accessor queries). Generalized (updating DB state with sync insert-like actions), and Indexers (systematically
behavior is supported by explicitly leveraging the individual components. For example, caching Accessor queries). Generalized behavior is supported by explicitly
leveraging the individual components. For example,
.. code-block:: python .. code-block:: python
@@ -28,9 +29,9 @@ behavior is supported by explicitly leveraging the individual components. For ex
data data
) )
The Database also supports a few directly callable methods for simplified interaction. The Database also supports a few directly callable methods for simplified
These methods manage a connection context internally, passing them through the way they interaction. These methods manage a connection context internally, passing them
might otherwise be handled explicitly, as seen above. through the way they might otherwise be handled explicitly, as seen above.
.. code-block:: python .. code-block:: python
@@ -41,24 +42,25 @@ might otherwise be handled explicitly, as seen above.
.. admonition:: on explicit connection contexts .. admonition:: on explicit connection contexts
Older models supported Accessors/Managers that housed their own Engine instances, and Older models supported Accessors/Managers that housed their own Engine
when performing actions like ``insert``, the Engine would be passed all the way through instances, and when performing actions like ``insert``, the Engine would be
until a Connection could be spawned, and in that context the single action would be passed all the way through until a Connection could be spawned, and in that
made. This model forfeits a lot of connection control, preventing multiple actions context the single action would be made. This model forfeits a lot of
under a single connection. connection control, preventing multiple actions under a single connection.
The newer model now avoids directly allowing Managers/Accessors access to their own The newer model now avoids directly allowing Managers/Accessors access to
engines, and instead they expose methods that explicitly require Connection objects. their own engines, and instead they expose methods that explicitly require
This means a user can invoke these methods in their own Connection contexts (seen Connection objects. This means a user can invoke these methods in their own
above) and group up operations as they please, reducing overhead. The Database then Connection contexts (seen above) and group up operations as they please,
wraps up a few single-operation contexts where outer connection control is not needed. reducing overhead. The Database then wraps up a few single-operation
contexts where outer connection control is not needed.
''' '''
import logging import logging
from co3.engine import Engine from co3.engine import Engine
from co3.schema import Schema from co3.schema import Schema
from co3.manager import Manager from co3.manager import Manager
from co3.indexer import Indexer from co3.indexer import Indexer
from co3.accessor import Accessor from co3.accessor import Accessor
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -68,27 +70,29 @@ class Database[C: Component]:
''' '''
Generic Database definition Generic Database definition
Generic to both a Component (C), and an Engine resource type (R). The Engine's Generic to both a Component (C), and an Engine resource type (R). The
generic openness must be propagated here, as it's intended to be fully abstracted away Engine's generic openness must be propagated here, as it's intended to be
under the Database roof. Note that we cannot explicitly use an Engine type in its fully abstracted away under the Database roof. Note that we cannot
place, as it obscures its internal resource type dependence when we need it for explicitly use an Engine type in its place, as it obscures its internal
hinting here in ``__init__``. resource type dependence when we need it for hinting here in ``__init__``.
.. admonition:: Development TODO list .. admonition:: Development TODO list
Decide on official ruling for assigning Schema objects, and verifying any Decide on official ruling for assigning Schema objects, and verifying
attempted Component-based actions (e.g., inserts, selects) to belong to or be a any attempted Component-based actions (e.g., inserts, selects) to
composition of Components within an attached Schema. Reasons for: helps complete belong to or be a composition of Components within an attached Schema.
the sense of a "Database" here programmatically, incorporating a more structurally Reasons for: helps complete the sense of a "Database" here
accurate representation of allowed operations, and prevent possible attribute and programmatically, incorporating a more structurally accurate
type collisions. Reasons against: generally not a huge concern to align Schemas as representation of allowed operations, and prevent possible attribute
transactions will rollback, broadly increases a bit of bulk, and users often and type collisions. Reasons against: generally not a huge concern to
expected know which components belong to a particular DB. Leaning more to **for**, align Schemas as transactions will rollback, broadly increases a bit of
and would only apply to the directly supported method passthroughs (and thus would bulk, and users often expected know which components belong to a
have no impact on independent methods like ``Accessor.raw_select``). Additionally, particular DB. Leaning more to **for**, and would only apply to the
directly supported method passthroughs (and thus would have no impact
on independent methods like ``Accessor.raw_select``). Additionally,
even if component clashes don't pose serious risk, it can be helpful to even if component clashes don't pose serious risk, it can be helpful to
systematically address the cases where a misalignment is occurring (by having systematically address the cases where a misalignment is occurring (by
helpful ``verify`` methods that can be ran before any actions). having helpful ``verify`` methods that can be ran before any actions).
''' '''
_accessor_cls: type[Accessor[C]] = Accessor[C] _accessor_cls: type[Accessor[C]] = Accessor[C]
_manager_cls: type[Manager[C]] = Manager[C] _manager_cls: type[Manager[C]] = Manager[C]

View File

@@ -1,20 +1,18 @@
import sqlalchemy as sa import sqlalchemy as sa
from co3.database import Database, Engine
from co3.accessors.sql import RelationalAccessor, SQLAccessor
from co3.managers.sql import RelationalManager, SQLManager
from co3.engines import SQLEngine from co3.engines import SQLEngine
from co3.database import Database
from co3.components import Relation, SQLTable from co3.components import Relation, SQLTable
from co3.managers.sql import RelationalManager, SQLManager
from co3.accessors.sql import RelationalAccessor, SQLAccessor
class RelationalDatabase[C: Relation](Database[C]): class RelationalDatabase[C: Relation](Database[C]):
''' '''
accessor/manager assignments satisfy supertype's type settings; accessor/manager assignments satisfy supertype's type settings;
``TabluarAccessor[Self, C]`` is of type ``type[RelationalAccessor[Self, C]]`` ``TabluarAccessor[Self, C]`` is of type ``type[RelationalAccessor[Self,
(and yes, ``type[]`` specifies that the variable is itself being set to a type or a C]]`` (and yes, ``type[]`` specifies that the variable is itself being set
class, rather than a satisfying _instance_) to a type or a class, rather than a satisfying _instance_)
''' '''
_accessor_cls: type[RelationalAccessor[C]] = RelationalAccessor[C] _accessor_cls: type[RelationalAccessor[C]] = RelationalAccessor[C]
_manager_cls: type[RelationalManager[C]] = RelationalManager[C] _manager_cls: type[RelationalManager[C]] = RelationalManager[C]

View File

@@ -11,12 +11,10 @@ logger = logging.getLogger(__name__)
class Indexer: class Indexer:
''' '''
Indexer class Indexer base class
Provides restricted access to an underlying Accessor to enable more efficient, superficial Provides restricted access to an underlying Accessor to enable more efficient, superficial
caching. caching. Note that cache clearing is to be handled by a wrapper class, like the Database.
Cache clearing is to be handled by a wrapper class, like the Database.
Caching occurs at the class level, with indexes prefixed by table's origin Composer. Caching occurs at the class level, with indexes prefixed by table's origin Composer.
This means that cached selects/group-bys will be available regardless of the provided This means that cached selects/group-bys will be available regardless of the provided
@@ -261,12 +259,12 @@ class Indexer:
agg_on = agg_on_names agg_on = agg_on_names
index_on = index_on_names index_on = index_on_names
#print(f'rows_are_mappings: {rows_are_mappings}') # print(f'rows_are_mappings: {rows_are_mappings}')
#print(f'group_by: {group_by}') # print(f'group_by: {group_by}')
#print(f'agg_on: {agg_on}') # print(f'agg_on: {agg_on}')
#print(f'agg_on_names: {agg_on_names}') # print(f'agg_on_names: {agg_on_names}')
#print(f'index_on: {index_on}') # print(f'index_on: {index_on}')
#print(f'index_on_names: {index_on_names}') # print(f'index_on_names: {index_on_names}')
# "group by" block ID and wrangle the links into a list # "group by" block ID and wrangle the links into a list
group_by_idx = {} group_by_idx = {}
@@ -317,6 +315,7 @@ class Indexer:
return list(group_by_idx.values()) return list(group_by_idx.values())
class CacheBlock: class CacheBlock:
''' '''
Wraps up a set of query parameters for a specific entity, and provides cached access Wraps up a set of query parameters for a specific entity, and provides cached access

View File

@@ -1,6 +1,8 @@
import logging import logging
from contextlib import contextmanager from contextlib import contextmanager
from co3.engine import Engine, Connection, Resource, Group
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -10,11 +12,14 @@ class Medium[R: Resource]:
A Resource space A Resource space
''' '''
def __init__(self, scope): _engine_cls: type[Engine] = Engine
def __init__(self, *scope_args, **scope_kwargs):
self.engine = self._engine_cls(*engine_args, **engine_kwargs)
pass pass
@contextmanager @contextmanager
def connect(self, timeout=None): def connect(self, timeout=None) -> Connection:
''' '''
Open a connection to the database specified by the resource. Exactly what the Open a connection to the database specified by the resource. Exactly what the
returned connection looks like remains relatively unconstrained given the wide returned connection looks like remains relatively unconstrained given the wide
@@ -22,21 +27,59 @@ class Medium[R: Resource]:
with-statement contexts, constituting an "interaction session" with the database with-statement contexts, constituting an "interaction session" with the database
(i.e., allowing several actions to be performed using the same connection). (i.e., allowing several actions to be performed using the same connection).
''' '''
raise NotImplementedError return self.engine.connect(timeout=timeout)
def execute(self, query: Query[QL]): def execute(self, query: Query[QL]):
pass pass
class BrowsableMedium[R: Resource](Medium[R]): class ReadableMedium[R: Resource](Medium[R]):
def browse(self, uri: URI[R]): def _resolve_relative_uri(self, protocol, value):
'''
Subclass to implement: fetch child object for supported protocol, i.e., single
component subpath
'''
...
def resolve_uri(self, uri: URI) -> ResourceCollection:
assert uri.protocols[0] in self.supported_protocols
obj = self._resolve_relative_uri(uri.protocols[0], uri.components[0])
# core the uri and recurse
cored_uri = uri.core():
if cored_uri:
rc = obj.resolve_uri(cored_uri)
else:
assert type(obj) is Resource
rc = ResourceCollection([obj])
return rc
def _to_uri_list(self, uri_like):
if type(uri) is not list:
uri = [uri]
...
def browse(
self,
connection: Connection,
uri: str | URI | list[str] | list[URI]
) -> ResourceCollection:
''' '''
Analog for Read (CRUD), SELECT (SQL), GET (REST) Analog for Read (CRUD), SELECT (SQL), GET (REST)
''' '''
pass uris = self._to_uri_list(uri)
rc = ResourceCollection()
for uri in uris:
rc.extend(self._resolve_uri(uri))
return rc
class ABCDMedium[R: Resource](BrowsableMedium[R]): class WritableMedium[R: Resource](ReadableMedium[R]):
def append(self, uri: URI[R], resource: R): def append(self, uri: URI[R], resource: R):
''' '''
Analog for Create (CRUD), INSERT (SQL), POST/PUT (REST) Analog for Create (CRUD), INSERT (SQL), POST/PUT (REST)

6
co3/mediums/disk.py Normal file
View File

@@ -0,0 +1,6 @@
from co3 import Medium
from co3.resources import INode
class Disk[INode](Medium):
pass

View File

@@ -1,10 +1,15 @@
from typing import Protocol from typing import BinaryIO
from co3 import URI, Medium
class Resource: class Resource:
def content(self) -> BinaryIO: def __init__(
pass self,
context: Medium | list[Medium],
uri: URI
):
self.uri = uri
class SelectableResource(Protocol): def open(self) -> BinaryIO:
def select(self, component, *args, **kwargs): pass
raise NotImplementedError

5
co3/resources/inode.py Normal file
View File

@@ -0,0 +1,5 @@
from co3 import Resource
class INode(Resource):
pass

View File

@@ -1,4 +1,4 @@
from urllib import parse from urllib.parse import urlparse
class URI: class URI:
@@ -13,3 +13,22 @@ class URN(URI):
def __init__(self, url_str: str): def __init__(self, url_str: str):
self.url_str = url_str self.url_str = url_str
class CompositeURI(URI):
def __init__(self, url_str: str):
url_obj = urlparse(url_str)
self.protocols = url_obj.scheme.split('+')[::-1]
self.components = url_obj.scheme.split('+')[::-1]
def core(self, layers=1) -> 'CompositeURI':
'''
"Core" the innermost ``layers`` layers of the composite URI.
'''
pass
def shed(self, layers=1) -> 'CompositeURI':
'''
"Shed" the outermost ``layers`` layers of the composite URI.
'''
pass

11
doc/reference/uri.md Normal file
View File

@@ -0,0 +1,11 @@
Quick thoughts and ideals:
- Schemes are compositional, "wrapping" super contexts: `c+b+a://a/b/c`
- The scheme communicates the target type (above is `c`)
- URIs can be arbitrarily relative so long as they're resolved in the right contexts.
Above, `c+b://b/c` can be resolved in the context of `a://a`
- URIs are resolved by unwrapping schemes and resolving in to out
- URL params can apply only to the target type (this is the most consistent and probably
not too restrictive)
- Trajectories from one scheme to another can be inferred from the type hierarchy; there
may be many

View File

@@ -1,27 +1,25 @@
[build-system] [build-system]
requires = ["setuptools", "wheel", "setuptools-git-versioning>=2.0,<3"] requires = ["setuptools", "wheel"]
build-backend = "setuptools.build_meta" build-backend = "setuptools.build_meta"
[tool.setuptools-git-versioning]
enabled = true
[project] [project]
name = "co3" name = "co3"
version = "0.6.3"
description = "Lightweight Python ORM for hierarchical storage management" description = "Lightweight Python ORM for hierarchical storage management"
readme = "README.md"
requires-python = ">=3.12" requires-python = ">=3.12"
dynamic = ["version"]
#license = {file = "LICENSE"}
authors = [ authors = [
{ name="Sam Griesemer", email="samgriesemer+git@gmail.com" }, { name="Sam Griesemer", email="git@olog.io" },
] ]
readme = "README.md"
license = "MIT"
keywords = ["database", "orm"] keywords = ["database", "orm"]
classifiers = [ classifiers = [
"Programming Language :: Python :: 3.12", "Programming Language :: Python",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent", "Operating System :: OS Independent",
"Development Status :: 3 - Alpha", "Development Status :: 3 - Alpha",
"Intended Audience :: Developers", "Intended Audience :: Developers",
"Intended Audience :: End Users/Desktop",
] ]
dependencies = [ dependencies = [
"tqdm", "tqdm",
@@ -31,16 +29,23 @@ dependencies = [
"colorama", "colorama",
] ]
[project.scripts]
symconf = "co3.__main__:main"
[project.optional-dependencies] [project.optional-dependencies]
tests = ["pytest"] dev = [
docs = [ "ipykernel",
]
doc = [
"furo",
"myst-parser",
"sphinx", "sphinx",
"sphinx-togglebutton", "sphinx-togglebutton",
"sphinx-autodoc-typehints", "sphinx-autodoc-typehints",
"furo",
"myst-parser",
] ]
jupyter = ["ipykernel"] test = [
"pytest",
]
[project.urls] [project.urls]
Homepage = "https://doc.olog.io/co3" Homepage = "https://doc.olog.io/co3"
@@ -48,6 +53,26 @@ Documentation = "https://doc.olog.io/co3"
Repository = "https://git.olog.io/olog/co3" Repository = "https://git.olog.io/olog/co3"
Issues = "https://git.olog.io/olog/co3/issues" Issues = "https://git.olog.io/olog/co3/issues"
[dependency-groups]
dev = [
"ipykernel",
]
[tool.setuptools.packages.find] [tool.setuptools.packages.find]
include = ["co3*"] # pattern to match package names include = ["co3*"]
[tool.ruff]
line-length = 79
[tool.ruff.lint]
select = ["ANN", "E", "F", "UP", "B", "SIM", "I", "C4", "PERF"]
[tool.ruff.lint.isort]
length-sort = true
order-by-type = false
force-sort-within-sections = false
[tool.ruff.format]
quote-style = "double"
indent-style = "space"
docstring-code-format = true

5
test/co3_medium_demo.py Normal file
View File

@@ -0,0 +1,5 @@
from co3.mediums import Disk
disk = Disk('disk:///')
disk.browse('dir://home')

1256
uv.lock generated Normal file

File diff suppressed because it is too large Load Diff