perform minor, partial reformatting

This commit is contained in:
2026-01-02 17:26:28 -08:00
parent 090c122c60
commit c566dcf678
25 changed files with 1431 additions and 140 deletions

21
.gitignore vendored
View File

@@ -1,18 +1,19 @@
# generic py
# generic
__pycache__/
.pytest_cache/
*.egg-info/
.ipynb_checkpoints/
.pytest_cache/
.python-version
# vendor and build files
# package-specific
.ipynb_checkpoints/
.pytest_cache/
# vendor/build files
dist/
build/
docs/_autoref/
docs/_autosummary/
docs/_build/
doc/_autoref/
doc/_autosummary/
doc/_build/
# local
notebooks/
# misc local
/Makefile
notebooks/

View File

View File

@@ -1,35 +1,36 @@
# Overview
`co3` is a lightweight Python ORM for hierarchical storage management. It implements a
general type system for defining database components like relations, schemas, engines,
etc. Objects inheriting from the `CO3` base class can then define data transformations
that connect to database components, and can be automatically collected for coordinated
database insertion.
`co3` is a lightweight Python ORM for hierarchical storage management. It
implements a general type system for defining database components like
relations, schemas, engines, etc. Objects inheriting from the `CO3` base class
can then define data transformations that connect to database components, and
can be automatically collected for coordinated database insertion.
`co3` attempts to provide a general interface for interacting with storage media (e.g.,
databases, pickled objects, VSS framework, in-memory key-value stores, etc). The following
top-level classes capture the bulk of the operational model:
`co3` attempts to provide a general interface for interacting with storage
media (e.g., databases, pickled objects, VSS framework, in-memory key-value
stores, etc). The following top-level classes capture the bulk of the
operational model:
- **Database**: reference to a storage medium, with an `Accessor` for accessing data,
`Manager` for managing database state, and an `Engine` for managing connections and
external operations.
- **Accessor**: provides access to stored items in a `Database`, typically via a supported
`select` operation over known `Component` types
- **Manager**: manages database storage state (e.g., supported inserts or database sync
operations)
- **Mapper**: associates `CO3` types with `Schema` components, and provides automatic
collection and composition operations for supported items
- **Collector**: collects data from defined `CO3` type transformations and prepares for
`Database` insert operations
- **Component**: atomic storage groups for databases (i.e., generalized notion of a
"relation" in relational algebra).
- **Database**: reference to a storage medium, with an `Accessor` for accessing
data, `Manager` for managing database state, and an `Engine` for managing
connections and external operations.
- **Accessor**: provides access to stored items in a `Database`, typically via
a supported `select` operation over known `Component` types
- **Manager**: manages database storage state (e.g., supported inserts or
database sync operations)
- **Mapper**: associates `CO3` types with `Schema` components, and provides
automatic collection and composition operations for supported items
- **Collector**: collects data from defined `CO3` type transformations and
prepares for `Database` insert operations
- **Component**: atomic storage groups for databases (i.e., generalized notion
of a "relation" in relational algebra).
- **Indexer**: automatic caching of supported access queries to a `Database`
- **Schema**: general schema analog for grouping related `Component` sets
- **Differ**: facilitates set operations on results from selectable resources (e.g.,
automatic comparison between file data on disk and file rows in a SQL database)
- **Syncer**: generalized syncing procedure for items between data resources (e.g.,
syncing new, modified, and deleted files from disk to a SQL database that stores file
metadata).
The **CO3** an abstract base class then makes it easy to integrate this model with regular
Python object hierarchies that can be mapped to a storage schema.
- **Differ**: facilitates set operations on results from selectable resources
(e.g., automatic comparison between file data on disk and file rows in a SQL
database)
- **Syncer**: generalized syncing procedure for items between data resources
(e.g., syncing new, modified, and deleted files from disk to a SQL database
that stores file metadata).
The **CO3** an abstract base class then makes it easy to integrate this model
with regular Python object hierarchies that can be mapped to a storage schema.

View File

@@ -1,7 +1,7 @@
'''
Provides access to an underlying schema through a supported set of operations. Class
methods could be general, high-level SQL wrappers, or convenience functions for common
schema-specific queries.
Provides access to an underlying schema through a supported set of operations.
Class methods could be general, high-level SQL wrappers, or convenience
functions for common schema-specific queries.
'''
import time
import inspect
@@ -16,13 +16,14 @@ from co3.component import Component
class Accessor[C: Component](metaclass=ABCMeta):
'''
Access wrapper class for complex queries and easy integration with Composer tables.
Implements high-level access to things like common constrained SELECT queries.
Access wrapper class for complex queries and easy integration with Composer
tables. Implements high-level access to things like common constrained
SELECT queries.
Instance variables:
access_log: time-indexed log of access queries performed
'''
def __init__(self):
def __init__(self) -> None:
self.access_log = {}
def log_access(self, stmt):

View File

@@ -2,19 +2,21 @@ class Component[T]:
'''
Component
General wrapper for storage components to be used in various database contexts. Relations
can be thought of generally as named data containers/entities serving as a fundamental
abstractions within particular storage protocols.
General wrapper for storage components to be used in various database
contexts. Relations can be thought of generally as named data
containers/entities serving as a fundamental abstractions within particular
storage protocols.
'''
def __init__(self, name, obj: T):
def __init__(self, name, obj: T) -> None:
self.name = name
self.obj = obj
def __str__(self):
def __str__(self) -> str:
return f'<Component ({self.__class__.__name__})> {self.name}'
def __repr__(self):
def __repr__(self) -> str:
return f'<Component ({self.__class__.__name__})> {self.name}'
def get_attributes(self):
def get_attributes(self) -> dict:
raise NotImplementedError

View File

@@ -1,8 +1,9 @@
'''
Dev note:
Any reason to have ComposeableComponents and Relations as separate types? The thought
is that there may be some possible Component types we want to be able to Compose that
wouldn't logically be Relations. But the gap here might be quite small
Any reason to have ComposableComponents and Relations as separate types?
The thought is that there may be some possible Component types we want to
be able to Compose that wouldn't logically be Relations. But the gap here
might be quite small
'''
from typing import Self
@@ -29,9 +30,9 @@ class ComposableComponent[T](Component[T], metaclass=ABCMeta):
# relational databases
class Relation[T](ComposableComponent[T]):
'''
Relation base for tabular components to be used in relation DB settings. Attempts to
adhere to the set-theoretic base outlined in the relational model [1]. Some
terminology:
Relation base for tabular components to be used in relation DB settings.
Attempts to adhere to the set-theoretic base outlined in the relational
model [1]. Some terminology:
Relation: table-like container
| -> Heading: set of attributes
@@ -43,15 +44,16 @@ class Relation[T](ComposableComponent[T]):
[1]: https://en.wikipedia.org/wiki/Relational_model#Set-theoretic_formulation
Note: development tasks
As it stands, the Relation skeleton is incredibly lax compared to the properties and
operations that should be formally available, according its pure relational algebra
analog.
As it stands, the Relation skeleton is incredibly lax compared to the
properties and operations that should be formally available, according
its pure relational algebra analog.
Relations are also generic up to a type T, which ultimately serves as the base object
for Relation instances. We aren't attempting to implement some generally useful
table-like class here; instead we're just exposing a lightweight interface that's
needed for a few CO3 contexts, and commonly off-loading most of the heavy-lifting to
true relation objects like SQLAlchemy tables.
Relations are also generic up to a type T, which ultimately serves as
the base object for Relation instances. We aren't attempting to
implement some generally useful table-like class here; instead we're
just exposing a lightweight interface that's needed for a few CO3
contexts, and commonly off-loading most of the heavy-lifting to true
relation objects like SQLAlchemy tables.
'''
def compose(
self,
@@ -65,8 +67,8 @@ class SQLTable(Relation[SQLTableLike]):
@classmethod
def from_table(cls, table: sa.Table):
'''
Note that the sa.Table type is intentional here; not all matching types for
SQLTableLike have a defined ``name`` property
Note that the sa.Table type is intentional here; not all matching types
for SQLTableLike have a defined ``name`` property
'''
return cls(table.name, table)
@@ -78,7 +80,8 @@ class SQLTable(Relation[SQLTableLike]):
Provide column:default pairs for a provided SQLAlchemy table.
Parameters:
include_all: whether to include all columns, even those without explicit defaults
include_all: whether to include all columns, even those without
explicit defaults
'''
default_values = {}
for column in self.get_attributes():
@@ -87,8 +90,8 @@ class SQLTable(Relation[SQLTableLike]):
elif column.nullable:
default_values[column.name] = None
else:
# assume empty string if include_all and col has no explicit default
# and isn't nullable
# assume empty string if include_all and col has no explicit
# default and isn't nullable
if include_all and column.name != 'id':
default_values[column.name] = ''

View File

@@ -1,19 +1,20 @@
'''
Database
Central object for defining storage protocol-specific interfaces. The database wraps up
central items for interacting with database resources, namely the Accessor and Manager
objects.
Central object for defining storage protocol-specific interfaces. The database
wraps up central items for interacting with database resources, namely the
Accessor and Manager objects.
The Database type hierarchy attempts to be exceedingly general; SQL-derivatives should
subclass from the RelationalDatabase subtype, for example, which itself becomes a new
generic via a type dependence on Relation.
The Database type hierarchy attempts to be exceedingly general; SQL-derivatives
should subclass from the RelationalDatabase subtype, for example, which itself
becomes a new generic via a type dependence on Relation.
While relying no many constituent pieces, Databases intend to provide all needed objects
under one roof. This includes the Engine (opens up connections to the database), Accessors
(running select-like queries on DB data), Managers (updating DB state with sync
insert-like actions), and Indexers (systematically caching Accessor queries). Generalized
behavior is supported by explicitly leveraging the individual components. For example,
While relying on many constituent pieces, Databases intend to provide all
needed objects under one roof. This includes the Engine (opens up connections
to the database), Accessors (running select-like queries on DB data), Managers
(updating DB state with sync insert-like actions), and Indexers (systematically
caching Accessor queries). Generalized behavior is supported by explicitly
leveraging the individual components. For example,
.. code-block:: python
@@ -28,9 +29,9 @@ behavior is supported by explicitly leveraging the individual components. For ex
data
)
The Database also supports a few directly callable methods for simplified interaction.
These methods manage a connection context internally, passing them through the way they
might otherwise be handled explicitly, as seen above.
The Database also supports a few directly callable methods for simplified
interaction. These methods manage a connection context internally, passing them
through the way they might otherwise be handled explicitly, as seen above.
.. code-block:: python
@@ -41,24 +42,25 @@ might otherwise be handled explicitly, as seen above.
.. admonition:: on explicit connection contexts
Older models supported Accessors/Managers that housed their own Engine instances, and
when performing actions like ``insert``, the Engine would be passed all the way through
until a Connection could be spawned, and in that context the single action would be
made. This model forfeits a lot of connection control, preventing multiple actions
under a single connection.
Older models supported Accessors/Managers that housed their own Engine
instances, and when performing actions like ``insert``, the Engine would be
passed all the way through until a Connection could be spawned, and in that
context the single action would be made. This model forfeits a lot of
connection control, preventing multiple actions under a single connection.
The newer model now avoids directly allowing Managers/Accessors access to their own
engines, and instead they expose methods that explicitly require Connection objects.
This means a user can invoke these methods in their own Connection contexts (seen
above) and group up operations as they please, reducing overhead. The Database then
wraps up a few single-operation contexts where outer connection control is not needed.
The newer model now avoids directly allowing Managers/Accessors access to
their own engines, and instead they expose methods that explicitly require
Connection objects. This means a user can invoke these methods in their own
Connection contexts (seen above) and group up operations as they please,
reducing overhead. The Database then wraps up a few single-operation
contexts where outer connection control is not needed.
'''
import logging
from co3.engine import Engine
from co3.schema import Schema
from co3.manager import Manager
from co3.indexer import Indexer
from co3.engine import Engine
from co3.schema import Schema
from co3.manager import Manager
from co3.indexer import Indexer
from co3.accessor import Accessor
logger = logging.getLogger(__name__)
@@ -68,27 +70,29 @@ class Database[C: Component]:
'''
Generic Database definition
Generic to both a Component (C), and an Engine resource type (R). The Engine's
generic openness must be propagated here, as it's intended to be fully abstracted away
under the Database roof. Note that we cannot explicitly use an Engine type in its
place, as it obscures its internal resource type dependence when we need it for
hinting here in ``__init__``.
Generic to both a Component (C), and an Engine resource type (R). The
Engine's generic openness must be propagated here, as it's intended to be
fully abstracted away under the Database roof. Note that we cannot
explicitly use an Engine type in its place, as it obscures its internal
resource type dependence when we need it for hinting here in ``__init__``.
.. admonition:: Development TODO list
Decide on official ruling for assigning Schema objects, and verifying any
attempted Component-based actions (e.g., inserts, selects) to belong to or be a
composition of Components within an attached Schema. Reasons for: helps complete
the sense of a "Database" here programmatically, incorporating a more structurally
accurate representation of allowed operations, and prevent possible attribute and
type collisions. Reasons against: generally not a huge concern to align Schemas as
transactions will rollback, broadly increases a bit of bulk, and users often
expected know which components belong to a particular DB. Leaning more to **for**,
and would only apply to the directly supported method passthroughs (and thus would
have no impact on independent methods like ``Accessor.raw_select``). Additionally,
Decide on official ruling for assigning Schema objects, and verifying
any attempted Component-based actions (e.g., inserts, selects) to
belong to or be a composition of Components within an attached Schema.
Reasons for: helps complete the sense of a "Database" here
programmatically, incorporating a more structurally accurate
representation of allowed operations, and prevent possible attribute
and type collisions. Reasons against: generally not a huge concern to
align Schemas as transactions will rollback, broadly increases a bit of
bulk, and users often expected know which components belong to a
particular DB. Leaning more to **for**, and would only apply to the
directly supported method passthroughs (and thus would have no impact
on independent methods like ``Accessor.raw_select``). Additionally,
even if component clashes don't pose serious risk, it can be helpful to
systematically address the cases where a misalignment is occurring (by having
helpful ``verify`` methods that can be ran before any actions).
systematically address the cases where a misalignment is occurring (by
having helpful ``verify`` methods that can be ran before any actions).
'''
_accessor_cls: type[Accessor[C]] = Accessor[C]
_manager_cls: type[Manager[C]] = Manager[C]

View File

@@ -1,20 +1,18 @@
import sqlalchemy as sa
from co3.database import Database, Engine
from co3.accessors.sql import RelationalAccessor, SQLAccessor
from co3.managers.sql import RelationalManager, SQLManager
from co3.engines import SQLEngine
from co3.database import Database
from co3.components import Relation, SQLTable
from co3.managers.sql import RelationalManager, SQLManager
from co3.accessors.sql import RelationalAccessor, SQLAccessor
class RelationalDatabase[C: Relation](Database[C]):
'''
accessor/manager assignments satisfy supertype's type settings;
``TabluarAccessor[Self, C]`` is of type ``type[RelationalAccessor[Self, C]]``
(and yes, ``type[]`` specifies that the variable is itself being set to a type or a
class, rather than a satisfying _instance_)
``TabluarAccessor[Self, C]`` is of type ``type[RelationalAccessor[Self,
C]]`` (and yes, ``type[]`` specifies that the variable is itself being set
to a type or a class, rather than a satisfying _instance_)
'''
_accessor_cls: type[RelationalAccessor[C]] = RelationalAccessor[C]
_manager_cls: type[RelationalManager[C]] = RelationalManager[C]

View File

@@ -1,27 +1,25 @@
[build-system]
requires = ["setuptools", "wheel", "setuptools-git-versioning>=2.0,<3"]
requires = ["setuptools", "wheel"]
build-backend = "setuptools.build_meta"
[tool.setuptools-git-versioning]
enabled = true
[project]
name = "co3"
version = "0.6.3"
description = "Lightweight Python ORM for hierarchical storage management"
readme = "README.md"
requires-python = ">=3.12"
dynamic = ["version"]
#license = {file = "LICENSE"}
authors = [
{ name="Sam Griesemer", email="samgriesemer+git@gmail.com" },
{ name="Sam Griesemer", email="git@olog.io" },
]
readme = "README.md"
license = "MIT"
keywords = ["database", "orm"]
classifiers = [
"Programming Language :: Python :: 3.12",
"License :: OSI Approved :: MIT License",
"Programming Language :: Python",
"Operating System :: OS Independent",
"Development Status :: 3 - Alpha",
"Intended Audience :: Developers",
"Intended Audience :: End Users/Desktop",
]
dependencies = [
"tqdm",
@@ -31,16 +29,23 @@ dependencies = [
"colorama",
]
[project.scripts]
symconf = "co3.__main__:main"
[project.optional-dependencies]
tests = ["pytest"]
docs = [
dev = [
"ipykernel",
]
doc = [
"furo",
"myst-parser",
"sphinx",
"sphinx-togglebutton",
"sphinx-autodoc-typehints",
"furo",
"myst-parser",
]
jupyter = ["ipykernel"]
test = [
"pytest",
]
[project.urls]
Homepage = "https://doc.olog.io/co3"
@@ -48,6 +53,26 @@ Documentation = "https://doc.olog.io/co3"
Repository = "https://git.olog.io/olog/co3"
Issues = "https://git.olog.io/olog/co3/issues"
[dependency-groups]
dev = [
"ipykernel",
]
[tool.setuptools.packages.find]
include = ["co3*"] # pattern to match package names
include = ["co3*"]
[tool.ruff]
line-length = 79
[tool.ruff.lint]
select = ["ANN", "E", "F", "UP", "B", "SIM", "I", "C4", "PERF"]
[tool.ruff.lint.isort]
length-sort = true
order-by-type = false
force-sort-within-sections = false
[tool.ruff.format]
quote-style = "double"
indent-style = "space"
docstring-code-format = true

1256
uv.lock generated Normal file

File diff suppressed because it is too large Load Diff