perform minor, partial reformatting
This commit is contained in:
21
.gitignore
vendored
21
.gitignore
vendored
@@ -1,18 +1,19 @@
|
||||
# generic py
|
||||
# generic
|
||||
__pycache__/
|
||||
.pytest_cache/
|
||||
*.egg-info/
|
||||
.ipynb_checkpoints/
|
||||
.pytest_cache/
|
||||
.python-version
|
||||
|
||||
# vendor and build files
|
||||
# package-specific
|
||||
.ipynb_checkpoints/
|
||||
.pytest_cache/
|
||||
|
||||
# vendor/build files
|
||||
dist/
|
||||
build/
|
||||
docs/_autoref/
|
||||
docs/_autosummary/
|
||||
docs/_build/
|
||||
doc/_autoref/
|
||||
doc/_autosummary/
|
||||
doc/_build/
|
||||
|
||||
# local
|
||||
notebooks/
|
||||
# misc local
|
||||
/Makefile
|
||||
notebooks/
|
||||
|
||||
59
README.md
59
README.md
@@ -1,35 +1,36 @@
|
||||
# Overview
|
||||
`co3` is a lightweight Python ORM for hierarchical storage management. It implements a
|
||||
general type system for defining database components like relations, schemas, engines,
|
||||
etc. Objects inheriting from the `CO3` base class can then define data transformations
|
||||
that connect to database components, and can be automatically collected for coordinated
|
||||
database insertion.
|
||||
`co3` is a lightweight Python ORM for hierarchical storage management. It
|
||||
implements a general type system for defining database components like
|
||||
relations, schemas, engines, etc. Objects inheriting from the `CO3` base class
|
||||
can then define data transformations that connect to database components, and
|
||||
can be automatically collected for coordinated database insertion.
|
||||
|
||||
`co3` attempts to provide a general interface for interacting with storage media (e.g.,
|
||||
databases, pickled objects, VSS framework, in-memory key-value stores, etc). The following
|
||||
top-level classes capture the bulk of the operational model:
|
||||
`co3` attempts to provide a general interface for interacting with storage
|
||||
media (e.g., databases, pickled objects, VSS framework, in-memory key-value
|
||||
stores, etc). The following top-level classes capture the bulk of the
|
||||
operational model:
|
||||
|
||||
- **Database**: reference to a storage medium, with an `Accessor` for accessing data,
|
||||
`Manager` for managing database state, and an `Engine` for managing connections and
|
||||
external operations.
|
||||
- **Accessor**: provides access to stored items in a `Database`, typically via a supported
|
||||
`select` operation over known `Component` types
|
||||
- **Manager**: manages database storage state (e.g., supported inserts or database sync
|
||||
operations)
|
||||
- **Mapper**: associates `CO3` types with `Schema` components, and provides automatic
|
||||
collection and composition operations for supported items
|
||||
- **Collector**: collects data from defined `CO3` type transformations and prepares for
|
||||
`Database` insert operations
|
||||
- **Component**: atomic storage groups for databases (i.e., generalized notion of a
|
||||
"relation" in relational algebra).
|
||||
- **Database**: reference to a storage medium, with an `Accessor` for accessing
|
||||
data, `Manager` for managing database state, and an `Engine` for managing
|
||||
connections and external operations.
|
||||
- **Accessor**: provides access to stored items in a `Database`, typically via
|
||||
a supported `select` operation over known `Component` types
|
||||
- **Manager**: manages database storage state (e.g., supported inserts or
|
||||
database sync operations)
|
||||
- **Mapper**: associates `CO3` types with `Schema` components, and provides
|
||||
automatic collection and composition operations for supported items
|
||||
- **Collector**: collects data from defined `CO3` type transformations and
|
||||
prepares for `Database` insert operations
|
||||
- **Component**: atomic storage groups for databases (i.e., generalized notion
|
||||
of a "relation" in relational algebra).
|
||||
- **Indexer**: automatic caching of supported access queries to a `Database`
|
||||
- **Schema**: general schema analog for grouping related `Component` sets
|
||||
- **Differ**: facilitates set operations on results from selectable resources (e.g.,
|
||||
automatic comparison between file data on disk and file rows in a SQL database)
|
||||
- **Syncer**: generalized syncing procedure for items between data resources (e.g.,
|
||||
syncing new, modified, and deleted files from disk to a SQL database that stores file
|
||||
metadata).
|
||||
|
||||
The **CO3** an abstract base class then makes it easy to integrate this model with regular
|
||||
Python object hierarchies that can be mapped to a storage schema.
|
||||
- **Differ**: facilitates set operations on results from selectable resources
|
||||
(e.g., automatic comparison between file data on disk and file rows in a SQL
|
||||
database)
|
||||
- **Syncer**: generalized syncing procedure for items between data resources
|
||||
(e.g., syncing new, modified, and deleted files from disk to a SQL database
|
||||
that stores file metadata).
|
||||
|
||||
The **CO3** an abstract base class then makes it easy to integrate this model
|
||||
with regular Python object hierarchies that can be mapped to a storage schema.
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
'''
|
||||
Provides access to an underlying schema through a supported set of operations. Class
|
||||
methods could be general, high-level SQL wrappers, or convenience functions for common
|
||||
schema-specific queries.
|
||||
Provides access to an underlying schema through a supported set of operations.
|
||||
Class methods could be general, high-level SQL wrappers, or convenience
|
||||
functions for common schema-specific queries.
|
||||
'''
|
||||
import time
|
||||
import inspect
|
||||
@@ -16,13 +16,14 @@ from co3.component import Component
|
||||
|
||||
class Accessor[C: Component](metaclass=ABCMeta):
|
||||
'''
|
||||
Access wrapper class for complex queries and easy integration with Composer tables.
|
||||
Implements high-level access to things like common constrained SELECT queries.
|
||||
Access wrapper class for complex queries and easy integration with Composer
|
||||
tables. Implements high-level access to things like common constrained
|
||||
SELECT queries.
|
||||
|
||||
Instance variables:
|
||||
access_log: time-indexed log of access queries performed
|
||||
'''
|
||||
def __init__(self):
|
||||
def __init__(self) -> None:
|
||||
self.access_log = {}
|
||||
|
||||
def log_access(self, stmt):
|
||||
|
||||
@@ -2,19 +2,21 @@ class Component[T]:
|
||||
'''
|
||||
Component
|
||||
|
||||
General wrapper for storage components to be used in various database contexts. Relations
|
||||
can be thought of generally as named data containers/entities serving as a fundamental
|
||||
abstractions within particular storage protocols.
|
||||
General wrapper for storage components to be used in various database
|
||||
contexts. Relations can be thought of generally as named data
|
||||
containers/entities serving as a fundamental abstractions within particular
|
||||
storage protocols.
|
||||
'''
|
||||
def __init__(self, name, obj: T):
|
||||
|
||||
def __init__(self, name, obj: T) -> None:
|
||||
self.name = name
|
||||
self.obj = obj
|
||||
|
||||
def __str__(self):
|
||||
def __str__(self) -> str:
|
||||
return f'<Component ({self.__class__.__name__})> {self.name}'
|
||||
|
||||
def __repr__(self):
|
||||
def __repr__(self) -> str:
|
||||
return f'<Component ({self.__class__.__name__})> {self.name}'
|
||||
|
||||
def get_attributes(self):
|
||||
def get_attributes(self) -> dict:
|
||||
raise NotImplementedError
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
'''
|
||||
Dev note:
|
||||
Any reason to have ComposeableComponents and Relations as separate types? The thought
|
||||
is that there may be some possible Component types we want to be able to Compose that
|
||||
wouldn't logically be Relations. But the gap here might be quite small
|
||||
Any reason to have ComposableComponents and Relations as separate types?
|
||||
The thought is that there may be some possible Component types we want to
|
||||
be able to Compose that wouldn't logically be Relations. But the gap here
|
||||
might be quite small
|
||||
'''
|
||||
|
||||
from typing import Self
|
||||
@@ -29,9 +30,9 @@ class ComposableComponent[T](Component[T], metaclass=ABCMeta):
|
||||
# relational databases
|
||||
class Relation[T](ComposableComponent[T]):
|
||||
'''
|
||||
Relation base for tabular components to be used in relation DB settings. Attempts to
|
||||
adhere to the set-theoretic base outlined in the relational model [1]. Some
|
||||
terminology:
|
||||
Relation base for tabular components to be used in relation DB settings.
|
||||
Attempts to adhere to the set-theoretic base outlined in the relational
|
||||
model [1]. Some terminology:
|
||||
|
||||
Relation: table-like container
|
||||
| -> Heading: set of attributes
|
||||
@@ -43,15 +44,16 @@ class Relation[T](ComposableComponent[T]):
|
||||
[1]: https://en.wikipedia.org/wiki/Relational_model#Set-theoretic_formulation
|
||||
|
||||
Note: development tasks
|
||||
As it stands, the Relation skeleton is incredibly lax compared to the properties and
|
||||
operations that should be formally available, according its pure relational algebra
|
||||
analog.
|
||||
As it stands, the Relation skeleton is incredibly lax compared to the
|
||||
properties and operations that should be formally available, according
|
||||
its pure relational algebra analog.
|
||||
|
||||
Relations are also generic up to a type T, which ultimately serves as the base object
|
||||
for Relation instances. We aren't attempting to implement some generally useful
|
||||
table-like class here; instead we're just exposing a lightweight interface that's
|
||||
needed for a few CO3 contexts, and commonly off-loading most of the heavy-lifting to
|
||||
true relation objects like SQLAlchemy tables.
|
||||
Relations are also generic up to a type T, which ultimately serves as
|
||||
the base object for Relation instances. We aren't attempting to
|
||||
implement some generally useful table-like class here; instead we're
|
||||
just exposing a lightweight interface that's needed for a few CO3
|
||||
contexts, and commonly off-loading most of the heavy-lifting to true
|
||||
relation objects like SQLAlchemy tables.
|
||||
'''
|
||||
def compose(
|
||||
self,
|
||||
@@ -65,8 +67,8 @@ class SQLTable(Relation[SQLTableLike]):
|
||||
@classmethod
|
||||
def from_table(cls, table: sa.Table):
|
||||
'''
|
||||
Note that the sa.Table type is intentional here; not all matching types for
|
||||
SQLTableLike have a defined ``name`` property
|
||||
Note that the sa.Table type is intentional here; not all matching types
|
||||
for SQLTableLike have a defined ``name`` property
|
||||
'''
|
||||
return cls(table.name, table)
|
||||
|
||||
@@ -78,7 +80,8 @@ class SQLTable(Relation[SQLTableLike]):
|
||||
Provide column:default pairs for a provided SQLAlchemy table.
|
||||
|
||||
Parameters:
|
||||
include_all: whether to include all columns, even those without explicit defaults
|
||||
include_all: whether to include all columns, even those without
|
||||
explicit defaults
|
||||
'''
|
||||
default_values = {}
|
||||
for column in self.get_attributes():
|
||||
@@ -87,8 +90,8 @@ class SQLTable(Relation[SQLTableLike]):
|
||||
elif column.nullable:
|
||||
default_values[column.name] = None
|
||||
else:
|
||||
# assume empty string if include_all and col has no explicit default
|
||||
# and isn't nullable
|
||||
# assume empty string if include_all and col has no explicit
|
||||
# default and isn't nullable
|
||||
if include_all and column.name != 'id':
|
||||
default_values[column.name] = ''
|
||||
|
||||
|
||||
@@ -1,19 +1,20 @@
|
||||
'''
|
||||
Database
|
||||
|
||||
Central object for defining storage protocol-specific interfaces. The database wraps up
|
||||
central items for interacting with database resources, namely the Accessor and Manager
|
||||
objects.
|
||||
Central object for defining storage protocol-specific interfaces. The database
|
||||
wraps up central items for interacting with database resources, namely the
|
||||
Accessor and Manager objects.
|
||||
|
||||
The Database type hierarchy attempts to be exceedingly general; SQL-derivatives should
|
||||
subclass from the RelationalDatabase subtype, for example, which itself becomes a new
|
||||
generic via a type dependence on Relation.
|
||||
The Database type hierarchy attempts to be exceedingly general; SQL-derivatives
|
||||
should subclass from the RelationalDatabase subtype, for example, which itself
|
||||
becomes a new generic via a type dependence on Relation.
|
||||
|
||||
While relying no many constituent pieces, Databases intend to provide all needed objects
|
||||
under one roof. This includes the Engine (opens up connections to the database), Accessors
|
||||
(running select-like queries on DB data), Managers (updating DB state with sync
|
||||
insert-like actions), and Indexers (systematically caching Accessor queries). Generalized
|
||||
behavior is supported by explicitly leveraging the individual components. For example,
|
||||
While relying on many constituent pieces, Databases intend to provide all
|
||||
needed objects under one roof. This includes the Engine (opens up connections
|
||||
to the database), Accessors (running select-like queries on DB data), Managers
|
||||
(updating DB state with sync insert-like actions), and Indexers (systematically
|
||||
caching Accessor queries). Generalized behavior is supported by explicitly
|
||||
leveraging the individual components. For example,
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
@@ -28,9 +29,9 @@ behavior is supported by explicitly leveraging the individual components. For ex
|
||||
data
|
||||
)
|
||||
|
||||
The Database also supports a few directly callable methods for simplified interaction.
|
||||
These methods manage a connection context internally, passing them through the way they
|
||||
might otherwise be handled explicitly, as seen above.
|
||||
The Database also supports a few directly callable methods for simplified
|
||||
interaction. These methods manage a connection context internally, passing them
|
||||
through the way they might otherwise be handled explicitly, as seen above.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
@@ -41,24 +42,25 @@ might otherwise be handled explicitly, as seen above.
|
||||
|
||||
.. admonition:: on explicit connection contexts
|
||||
|
||||
Older models supported Accessors/Managers that housed their own Engine instances, and
|
||||
when performing actions like ``insert``, the Engine would be passed all the way through
|
||||
until a Connection could be spawned, and in that context the single action would be
|
||||
made. This model forfeits a lot of connection control, preventing multiple actions
|
||||
under a single connection.
|
||||
Older models supported Accessors/Managers that housed their own Engine
|
||||
instances, and when performing actions like ``insert``, the Engine would be
|
||||
passed all the way through until a Connection could be spawned, and in that
|
||||
context the single action would be made. This model forfeits a lot of
|
||||
connection control, preventing multiple actions under a single connection.
|
||||
|
||||
The newer model now avoids directly allowing Managers/Accessors access to their own
|
||||
engines, and instead they expose methods that explicitly require Connection objects.
|
||||
This means a user can invoke these methods in their own Connection contexts (seen
|
||||
above) and group up operations as they please, reducing overhead. The Database then
|
||||
wraps up a few single-operation contexts where outer connection control is not needed.
|
||||
The newer model now avoids directly allowing Managers/Accessors access to
|
||||
their own engines, and instead they expose methods that explicitly require
|
||||
Connection objects. This means a user can invoke these methods in their own
|
||||
Connection contexts (seen above) and group up operations as they please,
|
||||
reducing overhead. The Database then wraps up a few single-operation
|
||||
contexts where outer connection control is not needed.
|
||||
'''
|
||||
import logging
|
||||
|
||||
from co3.engine import Engine
|
||||
from co3.schema import Schema
|
||||
from co3.manager import Manager
|
||||
from co3.indexer import Indexer
|
||||
from co3.engine import Engine
|
||||
from co3.schema import Schema
|
||||
from co3.manager import Manager
|
||||
from co3.indexer import Indexer
|
||||
from co3.accessor import Accessor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -68,27 +70,29 @@ class Database[C: Component]:
|
||||
'''
|
||||
Generic Database definition
|
||||
|
||||
Generic to both a Component (C), and an Engine resource type (R). The Engine's
|
||||
generic openness must be propagated here, as it's intended to be fully abstracted away
|
||||
under the Database roof. Note that we cannot explicitly use an Engine type in its
|
||||
place, as it obscures its internal resource type dependence when we need it for
|
||||
hinting here in ``__init__``.
|
||||
Generic to both a Component (C), and an Engine resource type (R). The
|
||||
Engine's generic openness must be propagated here, as it's intended to be
|
||||
fully abstracted away under the Database roof. Note that we cannot
|
||||
explicitly use an Engine type in its place, as it obscures its internal
|
||||
resource type dependence when we need it for hinting here in ``__init__``.
|
||||
|
||||
.. admonition:: Development TODO list
|
||||
|
||||
Decide on official ruling for assigning Schema objects, and verifying any
|
||||
attempted Component-based actions (e.g., inserts, selects) to belong to or be a
|
||||
composition of Components within an attached Schema. Reasons for: helps complete
|
||||
the sense of a "Database" here programmatically, incorporating a more structurally
|
||||
accurate representation of allowed operations, and prevent possible attribute and
|
||||
type collisions. Reasons against: generally not a huge concern to align Schemas as
|
||||
transactions will rollback, broadly increases a bit of bulk, and users often
|
||||
expected know which components belong to a particular DB. Leaning more to **for**,
|
||||
and would only apply to the directly supported method passthroughs (and thus would
|
||||
have no impact on independent methods like ``Accessor.raw_select``). Additionally,
|
||||
Decide on official ruling for assigning Schema objects, and verifying
|
||||
any attempted Component-based actions (e.g., inserts, selects) to
|
||||
belong to or be a composition of Components within an attached Schema.
|
||||
Reasons for: helps complete the sense of a "Database" here
|
||||
programmatically, incorporating a more structurally accurate
|
||||
representation of allowed operations, and prevent possible attribute
|
||||
and type collisions. Reasons against: generally not a huge concern to
|
||||
align Schemas as transactions will rollback, broadly increases a bit of
|
||||
bulk, and users often expected know which components belong to a
|
||||
particular DB. Leaning more to **for**, and would only apply to the
|
||||
directly supported method passthroughs (and thus would have no impact
|
||||
on independent methods like ``Accessor.raw_select``). Additionally,
|
||||
even if component clashes don't pose serious risk, it can be helpful to
|
||||
systematically address the cases where a misalignment is occurring (by having
|
||||
helpful ``verify`` methods that can be ran before any actions).
|
||||
systematically address the cases where a misalignment is occurring (by
|
||||
having helpful ``verify`` methods that can be ran before any actions).
|
||||
'''
|
||||
_accessor_cls: type[Accessor[C]] = Accessor[C]
|
||||
_manager_cls: type[Manager[C]] = Manager[C]
|
||||
|
||||
@@ -1,20 +1,18 @@
|
||||
import sqlalchemy as sa
|
||||
|
||||
from co3.database import Database, Engine
|
||||
|
||||
from co3.accessors.sql import RelationalAccessor, SQLAccessor
|
||||
from co3.managers.sql import RelationalManager, SQLManager
|
||||
|
||||
from co3.engines import SQLEngine
|
||||
from co3.database import Database
|
||||
from co3.components import Relation, SQLTable
|
||||
from co3.managers.sql import RelationalManager, SQLManager
|
||||
from co3.accessors.sql import RelationalAccessor, SQLAccessor
|
||||
|
||||
|
||||
class RelationalDatabase[C: Relation](Database[C]):
|
||||
'''
|
||||
accessor/manager assignments satisfy supertype's type settings;
|
||||
``TabluarAccessor[Self, C]`` is of type ``type[RelationalAccessor[Self, C]]``
|
||||
(and yes, ``type[]`` specifies that the variable is itself being set to a type or a
|
||||
class, rather than a satisfying _instance_)
|
||||
``TabluarAccessor[Self, C]`` is of type ``type[RelationalAccessor[Self,
|
||||
C]]`` (and yes, ``type[]`` specifies that the variable is itself being set
|
||||
to a type or a class, rather than a satisfying _instance_)
|
||||
'''
|
||||
_accessor_cls: type[RelationalAccessor[C]] = RelationalAccessor[C]
|
||||
_manager_cls: type[RelationalManager[C]] = RelationalManager[C]
|
||||
|
||||
@@ -1,27 +1,25 @@
|
||||
[build-system]
|
||||
requires = ["setuptools", "wheel", "setuptools-git-versioning>=2.0,<3"]
|
||||
requires = ["setuptools", "wheel"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[tool.setuptools-git-versioning]
|
||||
enabled = true
|
||||
|
||||
[project]
|
||||
name = "co3"
|
||||
version = "0.6.3"
|
||||
description = "Lightweight Python ORM for hierarchical storage management"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.12"
|
||||
dynamic = ["version"]
|
||||
#license = {file = "LICENSE"}
|
||||
authors = [
|
||||
{ name="Sam Griesemer", email="samgriesemer+git@gmail.com" },
|
||||
{ name="Sam Griesemer", email="git@olog.io" },
|
||||
]
|
||||
readme = "README.md"
|
||||
license = "MIT"
|
||||
keywords = ["database", "orm"]
|
||||
classifiers = [
|
||||
"Programming Language :: Python :: 3.12",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Programming Language :: Python",
|
||||
"Operating System :: OS Independent",
|
||||
"Development Status :: 3 - Alpha",
|
||||
|
||||
"Intended Audience :: Developers",
|
||||
"Intended Audience :: End Users/Desktop",
|
||||
]
|
||||
dependencies = [
|
||||
"tqdm",
|
||||
@@ -31,16 +29,23 @@ dependencies = [
|
||||
"colorama",
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
symconf = "co3.__main__:main"
|
||||
|
||||
[project.optional-dependencies]
|
||||
tests = ["pytest"]
|
||||
docs = [
|
||||
dev = [
|
||||
"ipykernel",
|
||||
]
|
||||
doc = [
|
||||
"furo",
|
||||
"myst-parser",
|
||||
"sphinx",
|
||||
"sphinx-togglebutton",
|
||||
"sphinx-autodoc-typehints",
|
||||
"furo",
|
||||
"myst-parser",
|
||||
]
|
||||
jupyter = ["ipykernel"]
|
||||
test = [
|
||||
"pytest",
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
Homepage = "https://doc.olog.io/co3"
|
||||
@@ -48,6 +53,26 @@ Documentation = "https://doc.olog.io/co3"
|
||||
Repository = "https://git.olog.io/olog/co3"
|
||||
Issues = "https://git.olog.io/olog/co3/issues"
|
||||
|
||||
[dependency-groups]
|
||||
dev = [
|
||||
"ipykernel",
|
||||
]
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
include = ["co3*"] # pattern to match package names
|
||||
include = ["co3*"]
|
||||
|
||||
[tool.ruff]
|
||||
line-length = 79
|
||||
|
||||
[tool.ruff.lint]
|
||||
select = ["ANN", "E", "F", "UP", "B", "SIM", "I", "C4", "PERF"]
|
||||
|
||||
[tool.ruff.lint.isort]
|
||||
length-sort = true
|
||||
order-by-type = false
|
||||
force-sort-within-sections = false
|
||||
|
||||
[tool.ruff.format]
|
||||
quote-style = "double"
|
||||
indent-style = "space"
|
||||
docstring-code-format = true
|
||||
|
||||
Reference in New Issue
Block a user