516 lines
22 KiB
Python
516 lines
22 KiB
Python
'''
|
|
Used to house useful objects for storage schemas (e.g., SQLAlchemy table definitions).
|
|
Provides a general interface for mapping from CO3 class names to storage structures for
|
|
auto-collection and composition.
|
|
|
|
Example:
|
|
|
|
.. code-block:: python
|
|
|
|
mapper = Mapper[sa.Table]()
|
|
|
|
mapper.attach(
|
|
Type,
|
|
attr_comp=TypeTable,
|
|
coll_comp=CollateTable,
|
|
coll_groups={
|
|
'name': NameConversions
|
|
}
|
|
)
|
|
|
|
.. admonition:: Development log
|
|
|
|
- Overruled design decision: Mappers were previously designed to map from a specific
|
|
CO3 hierarchy to a specific Schema. The intention was to allow only related types to
|
|
be attached to a single schema, at least under a particular Mapper. The type
|
|
restriction has since been removed, however, as it isn't particularly well-founded.
|
|
During ``collect()``, a particular instance collects data from both its attributes and
|
|
its collation actions. It then repeats the same upward for parent types (part of the
|
|
same type hierarchy), and down to components (often not part of the same type
|
|
hierarchy). As such, to fully collect from a type, the Mapper needs to leave
|
|
registration open to various types, not just those part of the same hierarchy.
|
|
'''
|
|
import logging
|
|
from inspect import signature
|
|
from typing import Callable, Any
|
|
from collections import defaultdict
|
|
|
|
from co3.co3 import CO3
|
|
from co3.schema import Schema
|
|
from co3.collector import Collector
|
|
from co3.component import Component
|
|
from co3.components import ComposableComponent
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class Mapper[C: Component]:
|
|
'''
|
|
Mapper base class for housing schema components and managing relationships between CO3
|
|
types and storage components (of type C).
|
|
|
|
Mappers are responsible for two primary tasks:
|
|
|
|
1. Attaching CO3 types to database Components from within a single schema
|
|
2. Facilitating collection of Component-related insertion data from instances of
|
|
attached CO3 types
|
|
|
|
Additionally, the Mapper manages its own Collector and Composer instances. The
|
|
Collector receives the inserts from ``.collect()`` calls, and will subsequently be
|
|
"dropped off" at an appropriate Database's Manager to actually perform the requested
|
|
inserts (hence why we tie Mappers to Schemas one-to-one).
|
|
|
|
.. admonition:: Dev note
|
|
|
|
The Composer needs reconsideration, or at least its positioning directly in this
|
|
class. It may be more appropriate to have at the Schema level, or even just
|
|
dissolved altogether if arbitrary named Components can be attached to schemas.
|
|
|
|
- Consider pushing this into a Mapper factory; on init, could check if provided
|
|
Schema wraps up composable Components or not
|
|
'''
|
|
_collector_cls: type[Collector[C]] = Collector[C]
|
|
|
|
def __init__(self, schema: Schema[C]):
|
|
'''
|
|
Parameters:
|
|
schema: Schema object holding the set of components eligible as attachment
|
|
targets for registered CO3 types
|
|
'''
|
|
self.schema = schema
|
|
|
|
self.collector = self._collector_cls(schema)
|
|
|
|
self.attribute_comps: dict[type[CO3], C] = {}
|
|
self.collation_groups: dict[type[CO3], dict[str|None, C]] = defaultdict(dict)
|
|
|
|
def _check_component(self, comp: str | C, strict=True):
|
|
if type(comp) is str:
|
|
comp_key = comp
|
|
comp = self.schema.get_component(comp_key)
|
|
if comp is None:
|
|
err_msg = f'Component key "{comp_key}" not available in attached schema'
|
|
|
|
if strict:
|
|
raise ValueError(err_msg)
|
|
else:
|
|
logger.info(err_msg)
|
|
return None
|
|
else:
|
|
if comp not in self.schema:
|
|
err_msg = f'Component "{comp}" not registered to Mapper schema {self.schema}'
|
|
|
|
if strict:
|
|
raise TypeError(err_msg)
|
|
else:
|
|
logger.info(err_msg)
|
|
return None
|
|
|
|
return comp
|
|
|
|
def attach(
|
|
self,
|
|
type_ref : type[CO3],
|
|
attr_comp : str | C,
|
|
coll_comp : str | C | None = None,
|
|
coll_groups : dict[str | None, str | C] | None = None,
|
|
strict = True,
|
|
) -> None:
|
|
'''
|
|
Parameters:
|
|
type_ref: CO3 subtype to map to provided storage components
|
|
attr_comp: storage component for provided type's canonical attributes
|
|
coll_comp: storage component for provided type's default/unnamed collation
|
|
targets
|
|
coll_groups: storage components for named collation groups; dict mapping group
|
|
names to components
|
|
'''
|
|
# check attribute component in registered schema
|
|
attr_comp = self._check_component(attr_comp, strict=strict)
|
|
self.attribute_comps[type_ref] = attr_comp
|
|
|
|
# check default component in registered schema
|
|
if coll_comp is not None:
|
|
coll_comp = self._check_component(coll_comp, strict=strict)
|
|
self.collation_groups[type_ref][None] = coll_comp
|
|
|
|
# check if any component in group dict not in registered schema
|
|
if coll_groups is not None:
|
|
for coll_key in coll_groups:
|
|
coll_groups[coll_key] = self._check_component(coll_groups[coll_key], strict=strict)
|
|
|
|
self.collation_groups[type_ref].update(coll_groups)
|
|
|
|
def attach_many(
|
|
self,
|
|
type_list: list[type[CO3]],
|
|
attr_name_map: Callable[[type[CO3]], str | C],
|
|
coll_name_map: Callable[[type[CO3], str], str | C] | None = None,
|
|
strict = False,
|
|
) -> None:
|
|
'''
|
|
Auto-register a set of types to the Mapper's attached Schema. Associations are
|
|
made from types to both attribute and collation component names, through
|
|
``attr_name_map`` and ``coll_name_map``, respectively. Collation targets are inferred
|
|
through the registered groups in each type.
|
|
|
|
Parameters:
|
|
type_ref: reference to CO3 type
|
|
attr_name_map: function mapping from types/classes to attribute component names
|
|
in the attached Mapper Schema
|
|
coll_name_map: function mapping from types/classes & action groups to
|
|
collation component names in the attached Mapper Schema. ``None``
|
|
is passed as the action group to retrieve the default
|
|
collection target.
|
|
'''
|
|
for _type in type_list:
|
|
attr_comp = attr_name_map(_type)
|
|
coll_groups = {}
|
|
|
|
if coll_name_map:
|
|
for group in _type.group_registry:
|
|
coll_groups[group] = coll_name_map(_type, group)
|
|
|
|
self.attach(_type, attr_comp, coll_groups=coll_groups, strict=strict)
|
|
|
|
def get_attr_comp(
|
|
self,
|
|
co3_ref: CO3 | type[CO3]
|
|
) -> C | None:
|
|
type_ref = co3_ref
|
|
if isinstance(co3_ref, CO3):
|
|
type_ref = co3_ref.__class__
|
|
|
|
return self.attribute_comps.get(type_ref, None)
|
|
|
|
def get_coll_comp(
|
|
self,
|
|
co3_ref: CO3 | type[CO3],
|
|
group=str | None,
|
|
) -> C | None:
|
|
type_ref = co3_ref
|
|
if isinstance(co3_ref, CO3):
|
|
type_ref = co3_ref.__class__
|
|
|
|
return self.collation_groups.get(type_ref, {}).get(group, None)
|
|
|
|
def collect(
|
|
self,
|
|
obj : CO3,
|
|
keys : list[str] = None,
|
|
groups : list[str] = None,
|
|
) -> list:
|
|
'''
|
|
Stages inserts up the inheritance chain, and down through components.
|
|
|
|
Note:
|
|
Even with ORM, a method like this would be needed to trace up parent tables and
|
|
how inserts should be handled for inheritance. ORM would make component
|
|
inserts a little easier perhaps, since they can be attached as attributes to
|
|
constructed row objects and a sa.Relationship will handle the rest. Granted,
|
|
we don't do a whole lot more here: we just call ``collect`` over those
|
|
components, adding them to the collector session all the same.
|
|
|
|
Parameters:
|
|
obj: CO3 instance to collect from
|
|
keys: keys for actions to collect from
|
|
group: group contexts for the keys to collect from. If None, explicit group
|
|
contexts registered for the keys will be inferred (but implicit groups
|
|
will not be detected).
|
|
|
|
Returns: collector receipts for staged inserts
|
|
'''
|
|
# default is to have no actions
|
|
if keys is None:
|
|
keys = []
|
|
#keys = list(obj.key_registry.keys())
|
|
|
|
collation_data = defaultdict(dict)
|
|
for key in keys:
|
|
# keys must be defined
|
|
if key is None:
|
|
continue
|
|
|
|
logger.debug(f'Collecting for key "{key}"')
|
|
|
|
# if groups not specified, dynamically grab those explicitly attached groups
|
|
# for each key
|
|
group_dict = {}
|
|
if groups is None:
|
|
group_dict = obj.key_registry.get(key, {})
|
|
else:
|
|
for group in groups:
|
|
group_dict[group] = obj.key_registry.get(key, {}).get(group)
|
|
|
|
# method regroup: under key, index by method and run once per
|
|
method_groups = defaultdict(list)
|
|
for group_name, group_method in group_dict.items():
|
|
method_groups[group_method].append(group_name)
|
|
|
|
logger.debug(f'Equivalence classes: "{list(method_groups.values())}"')
|
|
|
|
# collate for method equivalence classes; only need on representative group to
|
|
# pass to CO3.collate to call the method
|
|
key_collation_data = {}
|
|
for collation_method, collation_groups in method_groups.items():
|
|
key_method_collation_data = obj.collate(key, group=collation_groups[0])
|
|
|
|
if key_method_collation_data is None:
|
|
logger.debug(
|
|
f'Equivalence class "{collation_groups}" yielded no data, skipping'
|
|
)
|
|
continue
|
|
|
|
for collation_group in collation_groups:
|
|
# gather connective data for collation components
|
|
# -> we do this here as it's obj dependent
|
|
connective_data = obj.collation_attributes(key, collation_group)
|
|
|
|
if connective_data is None:
|
|
connective_data = {}
|
|
|
|
key_collation_data[collation_group] = {
|
|
**connective_data,
|
|
**key_method_collation_data,
|
|
}
|
|
|
|
collation_data[key] = key_collation_data
|
|
|
|
receipts = []
|
|
attributes = obj.attributes
|
|
|
|
for _cls in reversed(obj.__class__.__mro__[:-2]):
|
|
attribute_component = self.get_attr_comp(_cls)
|
|
|
|
# require an attribute component for type consideration
|
|
if attribute_component is None:
|
|
continue
|
|
|
|
self.collector.add_insert(
|
|
attribute_component,
|
|
attributes,
|
|
receipts=receipts,
|
|
)
|
|
|
|
for key, key_collation_data in collation_data.items():
|
|
# if method either returned no data or isn't registered, ignore
|
|
if not key_collation_data:
|
|
continue
|
|
|
|
for group, group_collation_data in key_collation_data.items():
|
|
collation_component = self.get_coll_comp(_cls, group=group)
|
|
|
|
if collation_component is None:
|
|
continue
|
|
|
|
self.collector.add_insert(
|
|
collation_component,
|
|
group_collation_data,
|
|
receipts=receipts,
|
|
)
|
|
|
|
# handle components
|
|
for comp in [c for c in obj.components if isinstance(c, CO3)]:
|
|
receipts.extend(self.collect(comp, keys=keys, groups=groups))
|
|
|
|
return receipts
|
|
|
|
|
|
class ComposableMapper[C: ComposableComponent](Mapper[C]):
|
|
'''
|
|
.. admonition:: class design
|
|
|
|
Heavily debating between multiple possible design approaches here. The main
|
|
purpose of this subtype is make clear the need for additional compositional
|
|
mapping details, namely functions that can produce pairwise join conditions for
|
|
both the attribute tree (vertical traversal) and the collation components
|
|
(horizontal traversal). Here's a few remarks:
|
|
|
|
- I want the necessary maps to provided/stored *outside* of ``compose`` calls to
|
|
reduce overhead for downstream callers. It's awkward to have think about the
|
|
exact attr-to-attr associations each time you want a type's associated
|
|
composition, especially when they don't change under the same Mapper (i.e.,
|
|
if you have the Mapper reference, the compositional associations should be
|
|
implicit).
|
|
- The barebones spec here appears to be two pairwise "composer" maps: one for
|
|
attribute comps, and one for collation comps. For now I think this makes sense
|
|
as additional init params, but there may later be reason to wrap this up a bit
|
|
more.
|
|
- Considering the full deprecation for the Composer type, or whether this could be
|
|
the place where it serves some purpose. Aesthetically, there's symmetry with the
|
|
``collect`` and Collector method-type pairing, but that isn't a good enough reason
|
|
to justify a separate type here. The difference is that Collector instances
|
|
actually store type references, whereas the considered Composer type would
|
|
effectively just be a convenient collection of utility functions. Still possibly
|
|
useful, but not as clearly justifiable.
|
|
- If a separate Composer type were to be justified here, it would serve as a
|
|
"reusable connective tissue" for possibly many Mappers with the same kinds of
|
|
edge-wise relationships. Can think of it like this:
|
|
|
|
* Schemas collect up "nodes" (Components). These are explicit storage structures
|
|
in a DB, and can include some explicit attribute connections (foreign keys),
|
|
although those are connections made on the DB side.
|
|
* Mappers provide an exoskeleton for a Schema's nodes. It structures Components into
|
|
attributes and collation types, and additionally ties them to external CO3
|
|
types. The handy analogy here has been that attribute comps connect
|
|
*vertically* (in a tree like fashion; point up for parents and down for
|
|
children), and collation comps point *horizontally* (or perhaps more aptly,
|
|
*outward*; at each node in the attribute tree, you have a "circle" of
|
|
collation comps that can point to it, and are not involved as formal tree
|
|
nodes. Can maybe think of these like "ornaments" or bulbs or orbitals).
|
|
* While the Mappers may provide the "bones," there's no way to communicate
|
|
*across* them. While I might know that one attribute is the "parent" of
|
|
another, I don't know *why* that relationship is there. A Composer, or the
|
|
composer details to be provided to this class, serve as the "nerves" to be
|
|
paired with the bone, actually establishing a line of communication. More
|
|
specifically, the nerves here are attribute-based mappings between pairs of
|
|
Components, i.e., (generalized) join conditions.
|
|
|
|
- Note that, by the above logic, we should then want/need a type to manage the
|
|
functions provided to ``attach_many``. These functions help automatically
|
|
characterize the shape of the type skeleton in the same way the proposed
|
|
Composer wrapper would. In fact, the barebones presentation here is really just
|
|
the same two function signatures as are expected by that method. The above
|
|
analogy simply made me ask why the "bones" wouldn't be reusable if the "nerves"
|
|
were going to be. So we should perhaps coordinate a decision on this front; if
|
|
one goes, the other must as well. This may also help me keep it simpler for the
|
|
time being.
|
|
- One other aspect of a dedicated Composer type (and by the above point, a
|
|
hypothetical type to aid in ``attach_many`` specification) could have some sort of
|
|
"auto" feature about it. With a clear enough "discovery system," we could
|
|
encourage certain kinds of Schemas and components are named and structured. Such
|
|
an auto-composer could "scan" all components in a provided Schema and attempt to
|
|
find common attributes across tables that are unlinked (i.e., the reused
|
|
column names implicit across types in the attribute hierarchy; e.g., File.name
|
|
-> Note.name), as well as explicit connections which may suggest collation
|
|
attachment (e.g., ``note_conversions.name --FK-> Note.name``). This, of course,
|
|
could always be overridden with manual specification, but being aware of some
|
|
automatic discovery structures could help constrain schema definitions to be
|
|
more in-line with the CO3 operational model. That all being said, this is a
|
|
large amount of complexity and should likely be avoided until necessary.
|
|
|
|
.. admonition:: Instance variables
|
|
|
|
- ``type_compose_cache``: index for previously computed compositions. This index
|
|
is reset if either ``attach`` or ``attach_many`` is
|
|
called to allow possible new type propagation.
|
|
'''
|
|
def __init__(
|
|
self,
|
|
schema : Schema[C],
|
|
attr_compose_map : Callable[[str | C, str | C], Any] | None = None,
|
|
coll_compose_map : Callable[[str | C, str | C], Any] | None = None,
|
|
):
|
|
super().__init__(schema)
|
|
|
|
self.attr_compose_map = attr_compose_map
|
|
self.coll_compose_map = coll_compose_map
|
|
|
|
self.type_compose_cache = {}
|
|
|
|
def attach(self, *args, **kwargs):
|
|
self.type_compose_cache = {}
|
|
|
|
super().attach(*args, **kwargs)
|
|
|
|
def attach_many(self, *args, **kwargs):
|
|
self.type_compose_cache = {}
|
|
|
|
super().attach_many(*args, **kwargs)
|
|
|
|
def compose(
|
|
self,
|
|
co3_ref: CO3 | type[CO3],
|
|
groups: list[str] | None = None,
|
|
*compose_args,
|
|
**compose_kwargs,
|
|
):
|
|
'''
|
|
Compose tables up the type hierarchy, and across through action groups to
|
|
collation components.
|
|
|
|
Note:
|
|
Comparing to ORM, this method would likely also still be needed, since it may
|
|
not be explicitly clear how some JOINs should be handled up the inheritance
|
|
chain (for components / sa.Relationships, it's a little easier).
|
|
|
|
|
|
.. admonition:: On compose order
|
|
|
|
Parameters:
|
|
obj: either a CO3 instance or a type reference
|
|
'''
|
|
type_ref = co3_ref
|
|
if isinstance(co3_ref, CO3):
|
|
type_ref = co3_ref.__class__
|
|
|
|
if type_ref in self.type_compose_cache:
|
|
return self.type_compose_cache[type_ref]
|
|
|
|
comp_agg = None
|
|
last_attr_comp = None
|
|
last_coll_comps = None
|
|
for _cls in reversed(type_ref.__mro__[:-2]):
|
|
attr_comp = self.get_attr_comp(_cls)
|
|
|
|
# require an attribute component for type consideration
|
|
if attr_comp is None:
|
|
continue
|
|
|
|
if comp_agg is None:
|
|
comp_agg = attr_comp
|
|
else:
|
|
# note the reduced attr_comp (produced this iteration) and the
|
|
# last_attr_comp (last iteration) refs passed to compose map, rather than
|
|
# their aggregated counterparts. This is because compose conditions often
|
|
# need to be specified between *atomic* components within compositional
|
|
# components, as compositions don't also expose the necessary attributes
|
|
# (or if they do, they aren't necessarily unique; e.g., JOIN two
|
|
# SQLAlchemy tables does not allow direct column access).
|
|
compose_condition = self.attr_compose_map(last_attr_comp, attr_comp)
|
|
comp_agg = comp_agg.compose(
|
|
attr_comp,
|
|
compose_condition,
|
|
*compose_args,
|
|
**compose_kwargs,
|
|
)
|
|
|
|
# compose horizontally with components from provided action groups
|
|
coll_list = []
|
|
if groups is not None:
|
|
for group in groups:
|
|
coll_comp = self.get_coll_comp(_cls, group=group)
|
|
|
|
if coll_comp is None:
|
|
continue
|
|
|
|
# valid collation comps added to coll_list, to be passed to the
|
|
# coll_map in the next iteration
|
|
coll_list.append(coll_comp)
|
|
|
|
# note how the join condition is specified using the non-composite
|
|
# `attr_comp` and new `coll_comp`; the composite doesn't typically
|
|
# have the same attribute access and needs a ref to a specific comp
|
|
if len(signature(self.coll_compose_map).parameters) > 2:
|
|
compose_condition = self.coll_compose_map(
|
|
attr_comp,
|
|
coll_comp,
|
|
last_coll_comps
|
|
)
|
|
else:
|
|
compose_condition = self.coll_compose_map(attr_comp, coll_comp)
|
|
|
|
comp_agg = comp_agg.compose(
|
|
coll_comp,
|
|
compose_condition,
|
|
*compose_args,
|
|
**compose_kwargs,
|
|
)
|
|
|
|
last_attr_comp = attr_comp
|
|
last_coll_comps = coll_list
|
|
|
|
self.type_compose_cache[type_ref] = comp_agg
|
|
|
|
return comp_agg
|