From 090c122c603a9d431288f006e345acee049dbdbc Mon Sep 17 00:00:00 2001 From: "Sam G." Date: Sat, 28 Dec 2024 18:34:40 -0800 Subject: [PATCH] intermediate Medium/Resource refactoring --- co3/co3.py | 183 +++++++++++---------- co3/collector.py | 2 +- co3/indexer.py | 19 +-- co3/medium.py | 57 ++++++- co3/mediums/__init__.py | 0 co3/mediums/disk.py | 6 + co3/resource.py | 17 +- co3/resources/inode.py | 5 + co3/uri.py | 21 ++- docs/reference/uri.md | 11 ++ tests/{co4_api_demo.py => co3_api_demo.py} | 0 tests/co3_medium_demo.py | 5 + 12 files changed, 218 insertions(+), 108 deletions(-) create mode 100644 co3/mediums/__init__.py create mode 100644 co3/mediums/disk.py create mode 100644 co3/resources/inode.py create mode 100644 docs/reference/uri.md rename tests/{co4_api_demo.py => co3_api_demo.py} (100%) create mode 100644 tests/co3_medium_demo.py diff --git a/co3/co3.py b/co3/co3.py index 9283a1b..49edb3f 100644 --- a/co3/co3.py +++ b/co3/co3.py @@ -1,8 +1,9 @@ ''' -CO3 is an abstract base class for scaffolding object hierarchies and managing operations -with associated database schemas. It facilitates something like a "lightweight ORM" for -classes/tables/states with fixed transformations of interest. The canonical use case is -managing hierarchical document relations, format conversions, and syntactical components. +CO3 is an abstract base class for scaffolding object hierarchies and managing +operations with associated database schemas. It facilitates something like a +"lightweight ORM" for classes/tables/states with fixed transformations of +interest. The canonical use case is managing hierarchical document relations, +format conversions, and syntactical components. Generic collation syntax: @@ -22,12 +23,13 @@ Generic collation syntax: .. admonition:: On multi-key attachment - One possible quirk of the current collation registry scheme is the rather black and - white nature of key attachment. You either specify a single key, possibly to several - groups, or allow any key via passthrough under an implicit group. There's no explicit - "multi-key" pattern to make use of here, be it through "restricted passthrough" - (method still parameterized by the key, but only allows keys from a provided list) or - just simple duplicated attachment. To demonstrate via the above example: + One possible quirk of the current collation registry scheme is the rather + black and white nature of key attachment. You either specify a single key, + possibly to several groups, or allow any key via passthrough under an + implicit group. There's no explicit "multi-key" pattern to make use of + here, be it through "restricted passthrough" (method still parameterized by + the key, but only allows keys from a provided list) or just simple + duplicated attachment. To demonstrate via the above example: .. code-block:: python @@ -54,15 +56,15 @@ Generic collation syntax: ... - or with a central handler and separate collation points (at least when the key list is - small): + or with a central handler and separate collation points (at least when the + key list is small): .. code-block:: python def _handle_supported_keys(self, key): # expects only supported keys, e.g., 'key1' and 'key2' ... - + @collate('key1') def key1(self): self._handle_supported_keys('key1') @@ -71,38 +73,41 @@ Generic collation syntax: def key2(self): self._handle_supported_keys('key2') - The former scales better and allows general key rejection patterns if needed, while - the latter integrates a bit better with the formal collation process, e.g., will - throw ``ValueErrors`` based on key mismatches automatically. + The former scales better and allows general key rejection patterns if + needed, while the latter integrates a bit better with the formal collation + process, e.g., will throw ``ValueErrors`` based on key mismatches + automatically. ''' import inspect import logging from collections import defaultdict -from functools import wraps, partial logger = logging.getLogger(__name__) + def collate(key, groups=None): ''' Collation decorator for CO3 subtype action registry. - Dynamic decorator; can be used as ``collate`` without any arguments, or with all. In - the former case, ``key`` will be a function, so we check for this. + Dynamic decorator; can be used as ``collate`` without any arguments, or + with all. In the former case, ``key`` will be a function, so we check for + this. .. admonition:: Usage - Collation registration is the process of exposing various actions for use in - **hierarchical collection** (see ``Mapper.collect``). Collation *keys* are unique - identifiers of a particular action that emits data. Keys can belong to an arbitrary - number of *groups*, which serve as semantically meaningful collections of similar - actions. Group assignment also determines the associated *collation component* - to be used as a storage target; the results of actions $K_G$ belonging to group - $G$ will all be stored in the attached $G$-component. Specification of key-group - relations can be done in a few ways: + Collation registration is the process of exposing various actions for + use in **hierarchical collection** (see ``Mapper.collect``). Collation + *keys* are unique identifiers of a particular action that emits data. + Keys can belong to an arbitrary number of *groups*, which serve as + semantically meaningful collections of similar actions. Group + assignment also determines the associated *collation component* to be + used as a storage target; the results of actions $K_G$ belonging to + group $G$ will all be stored in the attached $G$-component. + Specification of key-group relations can be done in a few ways: - - Explicit key-group specification: a specific key and associated groups can be - provided as arguments to the decorator: + - Explicit key-group specification: a specific key and associated + groups can be provided as arguments to the decorator: .. code-block:: python @@ -127,14 +132,14 @@ def collate(key, groups=None): ... } - If ``groups`` is left unspecified, the key will be attached to the default - ``None`` group. + If ``groups`` is left unspecified, the key will be attached to the + default ``None`` group. - - Implicit key-group association: in some cases, you may want to support an entire - "action class," and associate any operations under the class to the same storage - component. Here we still use the notion of connecting groups to components, but - allow the key to be dynamically specified and passed through to the collation - method: + - Implicit key-group association: in some cases, you may want to + support an entire "action class," and associate any operations under + the class to the same storage component. Here we still use the notion + of connecting groups to components, but allow the key to be + dynamically specified and passed through to the collation method: .. code-block:: python @@ -142,7 +147,7 @@ def collate(key, groups=None): def group(self, key): # disambiguate key ... - + and in the registries: .. code-block:: python @@ -160,27 +165,31 @@ def collate(key, groups=None): A few important notes: - - Implicit key-group specifications attach the *group* to a single method, - whereas in the explicit case, groups can be affiliated with many keys. When - explicitly provided, only those exact key values are supported. But in the - implicit case, *any* key is allowed; the group still remains a proxy for the - entire action class, but without needing to map from specifically stored key - values. That is, the utility of the group remains consistent across implicit + - Implicit key-group specifications attach the *group* to a single + method, whereas in the explicit case, groups can be affiliated with + many keys. When explicitly provided, only those exact key values + are supported. But in the implicit case, *any* key is allowed; the + group still remains a proxy for the entire action class, but + without needing to map from specifically stored key values. That + is, the utility of the group remains consistent across implicit and explicit cases, but stores the associations differently. - - The ``None`` key, rather than point to a ``(, )`` tuple, - instead points to a dictionary of ``group``-``method`` pairs. When attempting - execute a key under a particular group, the group registry indicates - whether the key is explicitly supported. If ``None`` is present for the group, - then ``key_registry[None][]`` can be used to recover the method - implicitly affiliated with the key (along with any other key under the group). - - When any method has been implicitly registered, *any* key (even when - attempting to specify an explicit key) will match that group. This can - effectively mean keys are not unique when an implicit group has been - registered. There is a protection in place here, however; in methods like - ``CO3.collate`` and ``Mapper.collect``, an implicit group must be directly - named in order for a given key to be considered. That is, when attempting - collation outside specific group context, provided keys will only be - considered against explicitly registered keys. + - The ``None`` key, rather than point to a ``(, + )`` tuple, instead points to a dictionary of + ``group``-``method`` pairs. When attempting execute a key under a + particular group, the group registry indicates whether the key is + explicitly supported. If ``None`` is present for the group, then + ``key_registry[None][]`` can be used to recover the + method implicitly affiliated with the key (along with any other key + under the group). + - When any method has been implicitly registered, *any* key (even + when attempting to specify an explicit key) will match that group. + This can effectively mean keys are not unique when an implicit + group has been registered. There is a protection in place here, + however; in methods like ``CO3.collate`` and ``Mapper.collect``, an + implicit group must be directly named in order for a given key to + be considered. That is, when attempting collation outside specific + group context, provided keys will only be considered against + explicitly registered keys. ''' func = None if inspect.isfunction(key): @@ -200,6 +209,7 @@ def collate(key, groups=None): return decorator + class FormatRegistryMeta(type): ''' Metaclass handling collation registry at the class level. @@ -225,8 +235,8 @@ class FormatRegistryMeta(type): for _, method in methods: register_action(method) - # add final registered formats for the current class, overwriting any found in - # superclass chain + # add final registered formats for the current class, overwriting any + # found in superclass chain for attr_name, attr_value in attrs.items(): register_action(attr_value) @@ -235,41 +245,49 @@ class FormatRegistryMeta(type): return super().__new__(cls, name, bases, attrs) + class CO3(metaclass=FormatRegistryMeta): ''' Base class supporting the central "COllate, COllect, COmpose" paradigm. - - Collate: organize and transform conversion outputs, possibly across class components - - Collect: gather core attributes, conversion data, and subcomponents for DB insertion - - Compose: construct object-associated DB table references through the class hierarchy + - Collate: organize and transform conversion outputs, possibly across class + components + - Collect: gather core attributes, conversion data, and subcomponents for + DB insertion + - Compose: construct object-associated DB table references through the + class hierarchy .. admonition:: on action groups - Group keys are simply named collections to make it easy for storage components to - be attached to action subsets. They do _not_ augment the action registration - namespace, meaning the action key should still be unique; the group key is purely - auxiliary. + Group keys are simply named collections to make it easy for storage + components to be attached to action subsets. They do _not_ augment the + action registration namespace, meaning the action key should still be + unique; the group key is purely auxiliary. Action methods can also be attached to several groups, in case there is - overlapping utility within or across schemas or storage media. In this case, it - becomes particularly critical to ensure registered ``collate`` methods really are - just "gathering results" from possibly heavy-duty operations, rather than - performing them when called, so as to reduce wasted computation. + overlapping utility within or across schemas or storage media. In this + case, it becomes particularly critical to ensure registered ``collate`` + methods really are just "gathering results" from possibly heavy-duty + operations, rather than performing them when called, so as to reduce + wasted computation. .. admonition:: New: collation caching To help facilitate the common pattern of storing collation results, a - ``collate_cache`` parameter has been added to store key-group indexed collation - results. (Note: now requires explicit superclass instantiation.) + ``collate_cache`` parameter has been added to store key-group indexed + collation results. (Note: now requires explicit superclass + instantiation.) ''' + def __init__(self): self._collate_cache = {} @property def attributes(self): ''' - Method to define how a subtype's inserts should be handled under ``collect`` for - canonical attributes, i.e., inserts to the type's table. + Method to define how a subtype's inserts should be handled under + ``collect`` for canonical attributes, i.e., inserts to the type's + table. ''' return vars(self) @@ -284,14 +302,15 @@ class CO3(metaclass=FormatRegistryMeta): def collation_attributes(self, key, group): ''' Return "connective" collation component data, possibly dependent on - instance-specific attributes and the action arguments. This is typically the - auxiliary structure that may be needed to attach to responses from registered - ``collate`` calls to complete inserts. + instance-specific attributes and the action arguments. This is + typically the auxiliary structure that may be needed to attach to + responses from registered ``collate`` calls to complete inserts. - Note: this method is primarily used by ``Mapper.collect()``, and is called just - prior to collector send-off for collation inserts and injected alongside collation - data. Common structure in collation components can make this function easy to - define, independent of action group for instance. + Note: this method is primarily used by ``Mapper.collect()``, and is + called just prior to collector send-off for collation inserts and + injected alongside collation data. Common structure in collation + components can make this function easy to define, independent of action + group for instance. ''' return {} @@ -350,5 +369,3 @@ class CO3(metaclass=FormatRegistryMeta): self._collate_cache[(key, group)] = result return result - - diff --git a/co3/collector.py b/co3/collector.py index cc977bf..1bc2f39 100644 --- a/co3/collector.py +++ b/co3/collector.py @@ -51,7 +51,7 @@ class Collector[C: Component]: inserts, and ``pop`` to remove encountered receipts from the internal store. ''' inserts = defaultdict(list) - + if receipts is None: receipts = list(self._inserts.keys()) diff --git a/co3/indexer.py b/co3/indexer.py index becb925..813edf1 100644 --- a/co3/indexer.py +++ b/co3/indexer.py @@ -11,12 +11,10 @@ logger = logging.getLogger(__name__) class Indexer: ''' - Indexer class + Indexer base class Provides restricted access to an underlying Accessor to enable more efficient, superficial - caching. - - Cache clearing is to be handled by a wrapper class, like the Database. + caching. Note that cache clearing is to be handled by a wrapper class, like the Database. Caching occurs at the class level, with indexes prefixed by table's origin Composer. This means that cached selects/group-bys will be available regardless of the provided @@ -261,12 +259,12 @@ class Indexer: agg_on = agg_on_names index_on = index_on_names - #print(f'rows_are_mappings: {rows_are_mappings}') - #print(f'group_by: {group_by}') - #print(f'agg_on: {agg_on}') - #print(f'agg_on_names: {agg_on_names}') - #print(f'index_on: {index_on}') - #print(f'index_on_names: {index_on_names}') + # print(f'rows_are_mappings: {rows_are_mappings}') + # print(f'group_by: {group_by}') + # print(f'agg_on: {agg_on}') + # print(f'agg_on_names: {agg_on_names}') + # print(f'index_on: {index_on}') + # print(f'index_on_names: {index_on_names}') # "group by" block ID and wrangle the links into a list group_by_idx = {} @@ -317,6 +315,7 @@ class Indexer: return list(group_by_idx.values()) + class CacheBlock: ''' Wraps up a set of query parameters for a specific entity, and provides cached access diff --git a/co3/medium.py b/co3/medium.py index 5932500..dde1ff7 100644 --- a/co3/medium.py +++ b/co3/medium.py @@ -1,6 +1,8 @@ import logging from contextlib import contextmanager +from co3.engine import Engine, Connection, Resource, Group + logger = logging.getLogger(__name__) @@ -10,11 +12,14 @@ class Medium[R: Resource]: A Resource space ''' - def __init__(self, scope): + _engine_cls: type[Engine] = Engine + + def __init__(self, *scope_args, **scope_kwargs): + self.engine = self._engine_cls(*engine_args, **engine_kwargs) pass @contextmanager - def connect(self, timeout=None): + def connect(self, timeout=None) -> Connection: ''' Open a connection to the database specified by the resource. Exactly what the returned connection looks like remains relatively unconstrained given the wide @@ -22,21 +27,59 @@ class Medium[R: Resource]: with-statement contexts, constituting an "interaction session" with the database (i.e., allowing several actions to be performed using the same connection). ''' - raise NotImplementedError + return self.engine.connect(timeout=timeout) def execute(self, query: Query[QL]): pass -class BrowsableMedium[R: Resource](Medium[R]): - def browse(self, uri: URI[R]): +class ReadableMedium[R: Resource](Medium[R]): + def _resolve_relative_uri(self, protocol, value): + ''' + Subclass to implement: fetch child object for supported protocol, i.e., single + component subpath + ''' + ... + + def resolve_uri(self, uri: URI) -> ResourceCollection: + assert uri.protocols[0] in self.supported_protocols + + obj = self._resolve_relative_uri(uri.protocols[0], uri.components[0]) + + # core the uri and recurse + cored_uri = uri.core(): + + if cored_uri: + rc = obj.resolve_uri(cored_uri) + else: + assert type(obj) is Resource + rc = ResourceCollection([obj]) + + return rc + + def _to_uri_list(self, uri_like): + if type(uri) is not list: + uri = [uri] + ... + + def browse( + self, + connection: Connection, + uri: str | URI | list[str] | list[URI] + ) -> ResourceCollection: ''' Analog for Read (CRUD), SELECT (SQL), GET (REST) ''' - pass + uris = self._to_uri_list(uri) + + rc = ResourceCollection() + for uri in uris: + rc.extend(self._resolve_uri(uri)) + + return rc -class ABCDMedium[R: Resource](BrowsableMedium[R]): +class WritableMedium[R: Resource](ReadableMedium[R]): def append(self, uri: URI[R], resource: R): ''' Analog for Create (CRUD), INSERT (SQL), POST/PUT (REST) diff --git a/co3/mediums/__init__.py b/co3/mediums/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/co3/mediums/disk.py b/co3/mediums/disk.py new file mode 100644 index 0000000..da3087f --- /dev/null +++ b/co3/mediums/disk.py @@ -0,0 +1,6 @@ +from co3 import Medium +from co3.resources import INode + + +class Disk[INode](Medium): + pass diff --git a/co3/resource.py b/co3/resource.py index abb6bb7..f4cd8d2 100644 --- a/co3/resource.py +++ b/co3/resource.py @@ -1,10 +1,15 @@ -from typing import Protocol +from typing import BinaryIO + +from co3 import URI, Medium class Resource: - def content(self) -> BinaryIO: - pass + def __init__( + self, + context: Medium | list[Medium], + uri: URI + ): + self.uri = uri -class SelectableResource(Protocol): - def select(self, component, *args, **kwargs): - raise NotImplementedError + def open(self) -> BinaryIO: + pass diff --git a/co3/resources/inode.py b/co3/resources/inode.py new file mode 100644 index 0000000..44de862 --- /dev/null +++ b/co3/resources/inode.py @@ -0,0 +1,5 @@ +from co3 import Resource + + +class INode(Resource): + pass diff --git a/co3/uri.py b/co3/uri.py index e908149..6af4114 100644 --- a/co3/uri.py +++ b/co3/uri.py @@ -1,4 +1,4 @@ -from urllib import parse +from urllib.parse import urlparse class URI: @@ -13,3 +13,22 @@ class URN(URI): def __init__(self, url_str: str): self.url_str = url_str + +class CompositeURI(URI): + def __init__(self, url_str: str): + url_obj = urlparse(url_str) + + self.protocols = url_obj.scheme.split('+')[::-1] + self.components = url_obj.scheme.split('+')[::-1] + + def core(self, layers=1) -> 'CompositeURI': + ''' + "Core" the innermost ``layers`` layers of the composite URI. + ''' + pass + + def shed(self, layers=1) -> 'CompositeURI': + ''' + "Shed" the outermost ``layers`` layers of the composite URI. + ''' + pass diff --git a/docs/reference/uri.md b/docs/reference/uri.md new file mode 100644 index 0000000..d737686 --- /dev/null +++ b/docs/reference/uri.md @@ -0,0 +1,11 @@ +Quick thoughts and ideals: + +- Schemes are compositional, "wrapping" super contexts: `c+b+a://a/b/c` +- The scheme communicates the target type (above is `c`) +- URIs can be arbitrarily relative so long as they're resolved in the right contexts. + Above, `c+b://b/c` can be resolved in the context of `a://a` +- URIs are resolved by unwrapping schemes and resolving in to out +- URL params can apply only to the target type (this is the most consistent and probably + not too restrictive) +- Trajectories from one scheme to another can be inferred from the type hierarchy; there + may be many diff --git a/tests/co4_api_demo.py b/tests/co3_api_demo.py similarity index 100% rename from tests/co4_api_demo.py rename to tests/co3_api_demo.py diff --git a/tests/co3_medium_demo.py b/tests/co3_medium_demo.py new file mode 100644 index 0000000..f43a4db --- /dev/null +++ b/tests/co3_medium_demo.py @@ -0,0 +1,5 @@ +from co3.mediums import Disk + + +disk = Disk('disk:///') +disk.browse('dir://home')