initial commit

2024-03-28 23:11:30 -07:00 · 2024-03-28 23:11:30 -07:00 · 9218f4a404
commit 9218f4a404
199 changed files with 10051 additions and 0 deletions
--- a/.python-version
+++ b/.python-version
@ -0,0 +1 @@
+co4
--- a/MANIFEST.in
+++ b/MANIFEST.in
--- a/README.md
+++ b/README.md
@ -0,0 +1,21 @@
+# Overview
+`co3` is a package for file conversion and associated database operations. The `CO3` base class
+provides a standard interface for performing conversions, preparing inserts, and
+interacting with database schemas that mirror the class hierarchy.
+
+Simplified description of the operational model:
+
+**Goal**: interact with a storage medium (database, pickled structure, VSS framework) with
+a known schema.
+
+- **Accessor** to provide access to stored items
+- **Composer** to compose common access points (e.g., JOINed tables)
+- **Indexer** to index/cache access queries
+- **Manager** to manage storage state (e.g., supported inserts, database syncs)
+- **Collector** to collect data for updating storage state
+- **Database** to collect data for updating storage state
+- **Mapper** to collect data for updating storage state
+- **Relation** to collect data for updating storage state
+
+**CO3** is an abstract base class that makes it easy to integrate this model with object
+hierarchies that mirror a storage schema.
--- a/build/editable.co3-0.1.1-py3-none-any/co3/init.py
+++ b/build/editable.co3-0.1.1-py3-none-any/co3/init.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co3/co3/__init__.py
--- a/build/editable.co3-0.1.1-py3-none-any/co3/accessor.py
+++ b/build/editable.co3-0.1.1-py3-none-any/co3/accessor.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co3/co3/accessor.py
--- a/build/editable.co3-0.1.1-py3-none-any/co3/accessors/init.py
+++ b/build/editable.co3-0.1.1-py3-none-any/co3/accessors/init.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co3/co3/accessors/__init__.py
--- a/build/editable.co3-0.1.1-py3-none-any/co3/accessors/fts.py
+++ b/build/editable.co3-0.1.1-py3-none-any/co3/accessors/fts.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co3/co3/accessors/fts.py
--- a/build/editable.co3-0.1.1-py3-none-any/co3/accessors/sql.py
+++ b/build/editable.co3-0.1.1-py3-none-any/co3/accessors/sql.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co3/co3/accessors/sql.py
--- a/build/editable.co3-0.1.1-py3-none-any/co3/accessors/vss.py
+++ b/build/editable.co3-0.1.1-py3-none-any/co3/accessors/vss.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co3/co3/accessors/vss.py
--- a/build/editable.co3-0.1.1-py3-none-any/co3/co3.py
+++ b/build/editable.co3-0.1.1-py3-none-any/co3/co3.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co3/co3/co3.py
--- a/build/editable.co3-0.1.1-py3-none-any/co3/collector.py
+++ b/build/editable.co3-0.1.1-py3-none-any/co3/collector.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co3/co3/collector.py
--- a/build/editable.co3-0.1.1-py3-none-any/co3/composer.py
+++ b/build/editable.co3-0.1.1-py3-none-any/co3/composer.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co3/co3/composer.py
--- a/build/editable.co3-0.1.1-py3-none-any/co3/database.py
+++ b/build/editable.co3-0.1.1-py3-none-any/co3/database.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co3/co3/database.py
--- a/build/editable.co3-0.1.1-py3-none-any/co3/databases/init.py
+++ b/build/editable.co3-0.1.1-py3-none-any/co3/databases/init.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co3/co3/databases/__init__.py
--- a/build/editable.co3-0.1.1-py3-none-any/co3/databases/fts.py
+++ b/build/editable.co3-0.1.1-py3-none-any/co3/databases/fts.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co3/co3/databases/fts.py
--- a/build/editable.co3-0.1.1-py3-none-any/co3/databases/sql.py
+++ b/build/editable.co3-0.1.1-py3-none-any/co3/databases/sql.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co3/co3/databases/sql.py
--- a/build/editable.co3-0.1.1-py3-none-any/co3/databases/vss.py
+++ b/build/editable.co3-0.1.1-py3-none-any/co3/databases/vss.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co3/co3/databases/vss.py
--- a/build/editable.co3-0.1.1-py3-none-any/co3/indexer.py
+++ b/build/editable.co3-0.1.1-py3-none-any/co3/indexer.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co3/co3/indexer.py
--- a/build/editable.co3-0.1.1-py3-none-any/co3/manager.py
+++ b/build/editable.co3-0.1.1-py3-none-any/co3/manager.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co3/co3/manager.py
--- a/build/editable.co3-0.1.1-py3-none-any/co3/managers/init.py
+++ b/build/editable.co3-0.1.1-py3-none-any/co3/managers/init.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co3/co3/managers/__init__.py
--- a/build/editable.co3-0.1.1-py3-none-any/co3/managers/fts.py
+++ b/build/editable.co3-0.1.1-py3-none-any/co3/managers/fts.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co3/co3/managers/fts.py
--- a/build/editable.co3-0.1.1-py3-none-any/co3/managers/sql.py
+++ b/build/editable.co3-0.1.1-py3-none-any/co3/managers/sql.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co3/co3/managers/sql.py
--- a/build/editable.co3-0.1.1-py3-none-any/co3/managers/vss.py
+++ b/build/editable.co3-0.1.1-py3-none-any/co3/managers/vss.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co3/co3/managers/vss.py
--- a/build/editable.co3-0.1.1-py3-none-any/co3/mapper.py
+++ b/build/editable.co3-0.1.1-py3-none-any/co3/mapper.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co3/co3/mapper.py
--- a/build/editable.co3-0.1.1-py3-none-any/co3/relation.py
+++ b/build/editable.co3-0.1.1-py3-none-any/co3/relation.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co3/co3/relation.py
--- a/build/editable.co3-0.1.1-py3-none-any/co3/relations/init.py
+++ b/build/editable.co3-0.1.1-py3-none-any/co3/relations/init.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co3/co3/relations/__init__.py
--- a/build/editable.co3-0.1.1-py3-none-any/co3/util/init.py
+++ b/build/editable.co3-0.1.1-py3-none-any/co3/util/init.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co3/co3/util/__init__.py
--- a/build/editable.co3-0.1.1-py3-none-any/co3/util/db.py
+++ b/build/editable.co3-0.1.1-py3-none-any/co3/util/db.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co3/co3/util/db.py
--- a/build/editable.co3-0.1.1-py3-none-any/co3/util/regex.py
+++ b/build/editable.co3-0.1.1-py3-none-any/co3/util/regex.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co3/co3/util/regex.py
--- a/build/editable.co4-0.1.1-py3-none-any/co4/init.py
+++ b/build/editable.co4-0.1.1-py3-none-any/co4/init.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co4/co4/__init__.py
--- a/build/editable.co4-0.1.1-py3-none-any/co4/accessor.py
+++ b/build/editable.co4-0.1.1-py3-none-any/co4/accessor.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co4/co4/accessor.py
--- a/build/editable.co4-0.1.1-py3-none-any/co4/accessors/init.py
+++ b/build/editable.co4-0.1.1-py3-none-any/co4/accessors/init.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co4/co4/accessors/__init__.py
--- a/build/editable.co4-0.1.1-py3-none-any/co4/accessors/fts.py
+++ b/build/editable.co4-0.1.1-py3-none-any/co4/accessors/fts.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co4/co4/accessors/fts.py
--- a/build/editable.co4-0.1.1-py3-none-any/co4/accessors/table.py
+++ b/build/editable.co4-0.1.1-py3-none-any/co4/accessors/table.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co4/co4/accessors/table.py
--- a/build/editable.co4-0.1.1-py3-none-any/co4/accessors/vss.py
+++ b/build/editable.co4-0.1.1-py3-none-any/co4/accessors/vss.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co4/co4/accessors/vss.py
--- a/build/editable.co4-0.1.1-py3-none-any/co4/co4.py
+++ b/build/editable.co4-0.1.1-py3-none-any/co4/co4.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co4/co4/co4.py
--- a/build/editable.co4-0.1.1-py3-none-any/co4/collector.py
+++ b/build/editable.co4-0.1.1-py3-none-any/co4/collector.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co4/co4/collector.py
--- a/build/editable.co4-0.1.1-py3-none-any/co4/composer.py
+++ b/build/editable.co4-0.1.1-py3-none-any/co4/composer.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co4/co4/composer.py
--- a/build/editable.co4-0.1.1-py3-none-any/co4/databases/init.py
+++ b/build/editable.co4-0.1.1-py3-none-any/co4/databases/init.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co4/co4/databases/__init__.py
--- a/build/editable.co4-0.1.1-py3-none-any/co4/databases/_base.py
+++ b/build/editable.co4-0.1.1-py3-none-any/co4/databases/_base.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co4/co4/databases/_base.py
--- a/build/editable.co4-0.1.1-py3-none-any/co4/databases/core.py
+++ b/build/editable.co4-0.1.1-py3-none-any/co4/databases/core.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co4/co4/databases/core.py
--- a/build/editable.co4-0.1.1-py3-none-any/co4/databases/fts.py
+++ b/build/editable.co4-0.1.1-py3-none-any/co4/databases/fts.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co4/co4/databases/fts.py
--- a/build/editable.co4-0.1.1-py3-none-any/co4/databases/vss.py
+++ b/build/editable.co4-0.1.1-py3-none-any/co4/databases/vss.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co4/co4/databases/vss.py
--- a/build/editable.co4-0.1.1-py3-none-any/co4/indexer.py
+++ b/build/editable.co4-0.1.1-py3-none-any/co4/indexer.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co4/co4/indexer.py
--- a/build/editable.co4-0.1.1-py3-none-any/co4/manager.py
+++ b/build/editable.co4-0.1.1-py3-none-any/co4/manager.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co4/co4/manager.py
--- a/build/editable.co4-0.1.1-py3-none-any/co4/managers/init.py
+++ b/build/editable.co4-0.1.1-py3-none-any/co4/managers/init.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co4/co4/managers/__init__.py
--- a/build/editable.co4-0.1.1-py3-none-any/co4/managers/core.py
+++ b/build/editable.co4-0.1.1-py3-none-any/co4/managers/core.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co4/co4/managers/core.py
--- a/build/editable.co4-0.1.1-py3-none-any/co4/managers/fts.py
+++ b/build/editable.co4-0.1.1-py3-none-any/co4/managers/fts.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co4/co4/managers/fts.py
--- a/build/editable.co4-0.1.1-py3-none-any/co4/managers/vss.py
+++ b/build/editable.co4-0.1.1-py3-none-any/co4/managers/vss.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co4/co4/managers/vss.py
--- a/build/editable.co4-0.1.1-py3-none-any/co4/utils/db.py
+++ b/build/editable.co4-0.1.1-py3-none-any/co4/utils/db.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co4/co4/utils/db.py
--- a/build/editable.co4-0.1.1-py3-none-any/co4/utils/paths.py
+++ b/build/editable.co4-0.1.1-py3-none-any/co4/utils/paths.py
@ -0,0 +1 @@
+/home/smgr/Documents/projects/ontolog/co4/co4/utils/paths.py
--- a/co3/init.py
+++ b/co3/init.py
@ -0,0 +1,109 @@
+'''
+Database submodule
+
+- `db`: contains SQLAlchemy-based schema definitions
+- `accessors`: convenience methods for accessing database entries
+- `populate`: convenience methods for populating database tables
+
+The `accessors` and `populate` submodules are each split into `schema` and `fts` method
+groups. The former concerns methods relating to the actual database schema, the latter to
+their SQLite FTS counterparts.
+
+Note: Subpackages organization
+    Subpackages are broken up by inheritance. Within a given submodule, you have a
+    `_base.py` file defining the base class associated with that submodule's title, along
+    with concrete subclasses of that base in their own files. Deeper inheritance would
+    recursively extend this structure. The `__init__.py` for a given submodule then
+    exposes the concrete instances, leaving the base hidden. For example,
+
+    accessors/
+        _base.py
+        core.py
+        fts.py
+
+    `core` and `fts` house the `CoreAccessor` and `FTSAccessor` classes, respectively,
+    and are the direct subclasses of the `Accessor` parent found in the `_base`. This base
+    class _could_ be placed outside of the submodule in the parent directory (imported
+    with something like `from db import accessor` instead of `from db.accessor import
+    _base`). This is entirely valid, but I tend to prefer when the base class is among its
+    direct children, as
+
+    - In this case at least, the base doesn't need to be exposed
+    - The base class is being stowed away under an appropriately named submodule; having a
+      separate `accessor.py` and `accessors/` file/directory can feel a little cluttered.
+    - It makes imports across the accessors feel standardized:
+
+      ```py
+      from localsys.db.accessors._base import Accessor
+
+      from localsys.db.accessors.core import CoreAccessor
+      ```
+
+      Both have the same level of nesting to reach the class.
+
+    Frankly, both means of organization are perfectly fine, and as far as I can tell,
+    semantically sound in their own right. This particular scheme is just a preference in
+    the moment, and so long as I keep things consistent, choosing one over the other
+    shouldn't matter.
+
+    Additionally, note how `__init__.py`s are typically set up when providing wider access
+    to internal modules. The `init` typically pulls out classes from sibling modules
+    (i.e., files), but will import subpackages are the topmost level. For example, for the
+    structure
+
+    ```
+    db/
+        __init__.py
+        accessors/
+            __init__.py
+            _base.py
+            core.py
+            fts.py
+    ```
+
+    we have
+
+    ```db/__init__.py
+    from localsys.db import accessors
+    ```
+
+    which just imports the subpackage `accessors`. However, within subpackage:
+
+    ```db/accessors/__init__.py
+    from localsys.db.accessors.core import CoreAccessor
+    ```
+
+    we don't just import the submodule `core`; we did into the file to grab the relevant
+    class and pull it into the outer namespace. Overarching point: `__init__.py` files
+    typically reach into the sibling files (submodules) and pull out classes. Given that
+    this behavior is recursive, `__init__.py` then respect subpackages (nested
+    directories), importing them at the top-level and expecting an internal `__init__.py`
+    will have managed access appropriately.
+
+Note: Organization for inheritance over composition
+    At a glance, the organization of subpackages here feels like it clashes with those
+    seen in `localsys.primitives`. `note_components`, for instance, houses the components
+    for the outer `note` module. Contrast this with how the `core` submodule looks: it's
+    composing `*/core.py` files across subpackages `accessors` and `managers`, rather than
+    a single subpackage like `note`. This seems inconsistent, but the subpackages here are
+    actually still organized in the same way: by inheritance. It just happens that the
+    all of the note components inherit from the same base class, and are thus confined to
+    a single subpackage. This aside, the subpackages themselves are still created around
+    inheritance, wrapping up a base and direct subclasses.
+'''
+
+from co3.accessor  import Accessor
+from co3.co3       import CO3
+from co3.collector import Collector
+from co3.composer  import Composer
+from co3.database  import Database
+from co3.indexer   import Indexer
+from co3.manager   import Manager
+from co3.mapper    import Mapper
+from co3.relation  import Relation
+
+from co3 import accessors
+from co3 import databases
+from co3 import managers
+from co3 import relations
+from co3 import util
--- a/co3/pycache/init.cpython-311.pyc
+++ b/co3/pycache/init.cpython-311.pyc
--- a/co3/pycache/init.cpython-312.pyc
+++ b/co3/pycache/init.cpython-312.pyc
--- a/co3/pycache/accessor.cpython-311.pyc
+++ b/co3/pycache/accessor.cpython-311.pyc
--- a/co3/pycache/accessor.cpython-312.pyc
+++ b/co3/pycache/accessor.cpython-312.pyc
--- a/co3/pycache/co3.cpython-312.pyc
+++ b/co3/pycache/co3.cpython-312.pyc
--- a/co3/pycache/co4.cpython-311.pyc
+++ b/co3/pycache/co4.cpython-311.pyc
--- a/co3/pycache/collector.cpython-311.pyc
+++ b/co3/pycache/collector.cpython-311.pyc
--- a/co3/pycache/collector.cpython-312.pyc
+++ b/co3/pycache/collector.cpython-312.pyc
--- a/co3/pycache/composer.cpython-311.pyc
+++ b/co3/pycache/composer.cpython-311.pyc
--- a/co3/pycache/composer.cpython-312.pyc
+++ b/co3/pycache/composer.cpython-312.pyc
--- a/co3/pycache/database.cpython-312.pyc
+++ b/co3/pycache/database.cpython-312.pyc
--- a/co3/pycache/indexer.cpython-311.pyc
+++ b/co3/pycache/indexer.cpython-311.pyc
--- a/co3/pycache/indexer.cpython-312.pyc
+++ b/co3/pycache/indexer.cpython-312.pyc
--- a/co3/pycache/manager.cpython-311.pyc
+++ b/co3/pycache/manager.cpython-311.pyc
--- a/co3/pycache/manager.cpython-312.pyc
+++ b/co3/pycache/manager.cpython-312.pyc
--- a/co3/pycache/mapper.cpython-312.pyc
+++ b/co3/pycache/mapper.cpython-312.pyc
--- a/co3/pycache/relation.cpython-312.pyc
+++ b/co3/pycache/relation.cpython-312.pyc
--- a/co3/accessor.py
+++ b/co3/accessor.py
@ -0,0 +1,28 @@
+'''
+Accessor
+
+Provides access to an underlying schema through a supported set of operations. Class
+methods could be general, high-level SQL wrappers, or convenience functions for common
+schema-specific queries.
+'''
+import inspect
+from pathlib import Path
+from collections import defaultdict
+
+import sqlalchemy as sa
+
+#from co3.database import Database
+
+
+class Accessor[D: 'Database']:
+    '''
+    Access wrapper class for complex queries and easy integration with Composer tables.
+    Implements high-level access to things like common constrained SELECT queries.
+
+    Parameters:
+        engine: SQLAlchemy engine to use for queries. Engine is initialized dynamically as
+                a property (based on the config) if not provided
+    '''
+    def __init__(self, database: D):
+        self.database = database
+
--- a/co3/accessors/init.py
+++ b/co3/accessors/init.py
@ -0,0 +1,24 @@
+'''
+Note that subclasses in this subpackage are split differently to other subpackages in the
+DB. Instead of being split by table group, corresponding to a Composer (which defines that
+table group), Accessors are split by a separate dimension: table "type". This is why we
+have a "TableAccessor" and an "FTSAccessor": the former exposes access operations
+available to generic tables, the latter to FTS tables (instead of being designed
+specifically around "core" and "fts" groups, for instance).
+
+Seeing as FTS tables are "generic" tables, it seems inconsistent not to have FTSAccessor
+inherit from TableAccessor. While this would work fine, the model we're working with
+doesn't really need it; you can instead think of the FTSAccessor as defining _only_
+FTS-specific operations. Given that you have a Composer for your desired table group, you
+can then wrap it with your desired set of "access actions," available in separate Accessor
+subclasses.
+
+For instance, you could wrap an FTSComposer in either a TableAccessor or FTSAccessor. The
+former will treat the tables in the composer like regular tables, exposing methods like
+`.select` and `.select_one`, whereas the latter defines FTS-specific actions like
+`.search`.
+'''
+
+from co3.accessors.sql import SQLAccessor
+from co3.accessors.fts import FTSAccessor
+from co3.accessors.vss import VSSAccessor
--- a/co3/accessors/pycache/init.cpython-311.pyc
+++ b/co3/accessors/pycache/init.cpython-311.pyc
--- a/co3/accessors/pycache/init.cpython-312.pyc
+++ b/co3/accessors/pycache/init.cpython-312.pyc
--- a/co3/accessors/pycache/_base.cpython-311.pyc
+++ b/co3/accessors/pycache/_base.cpython-311.pyc
--- a/co3/accessors/pycache/fts.cpython-311.pyc
+++ b/co3/accessors/pycache/fts.cpython-311.pyc
--- a/co3/accessors/pycache/fts.cpython-312.pyc
+++ b/co3/accessors/pycache/fts.cpython-312.pyc
--- a/co3/accessors/pycache/sql.cpython-312.pyc
+++ b/co3/accessors/pycache/sql.cpython-312.pyc
--- a/co3/accessors/pycache/table.cpython-311.pyc
+++ b/co3/accessors/pycache/table.cpython-311.pyc
--- a/co3/accessors/pycache/vss.cpython-311.pyc
+++ b/co3/accessors/pycache/vss.cpython-311.pyc
--- a/co3/accessors/pycache/vss.cpython-312.pyc
+++ b/co3/accessors/pycache/vss.cpython-312.pyc
--- a/co3/accessors/fts.py
+++ b/co3/accessors/fts.py
@ -0,0 +1,147 @@
+import sqlalchemy as sa
+
+from co3 import util
+from co3.accessor import Accessor
+
+
+class FTSAccessor(Accessor):
+    def search(
+        self,
+        table_name  : str,
+        select_cols : str  | list | None = '*',
+        search_cols : str  | None = None,
+        q           : str  | None = None,
+        colq        : str  | None = None,
+        snip_col    : int  | None = 0,
+        hl_col      : int  | None = 0,
+        limit       : int  | None = 100,
+        snip        : int  | None = 64,
+        tokenizer   : str  | None = 'unicode61',
+        group_by    : str  | None = None,
+        agg_cols    : list | None = None,
+        wherein_dict: dict | None = None,
+        unique_on   : dict | None = None,
+    ):
+        '''
+        Execute a search query against an indexed FTS table for specific primitives. This
+        method is mostly a generic FTS handler, capable of handling queries to any available
+        FTS table with a matching naming scheme (`fts_<type>_<tokenizer>`). The current
+        intention is support all tokenizers, for file, note, block, and link primitives.
+
+        Search results include all FTS table columns, as well as SQLite-supported `snippet`s
+        and `highlight`s for matches. Matches are filtered and ordered by SQLite's
+        `MATCH`-based score for the text & column queries. Results are (a list of) fully
+        expanded dictionaries housing column-value pairs.
+
+        Note:
+            GROUP BY cannot be paired with SQLITE FTS extensions; thus, we perform manual
+            group checks on the result set in Python before response
+
+        Analysis:
+            The returned JSON structure has been (loosely) optimized for speed on the client
+            side. Fully forming individual dictionary based responses saves time in
+            Javascript, as the JSON parser is expected to be able to create the objects
+            faster than post-hoc construction in JS. This return structure was compared
+            against returning an array of arrays (all ordered in the same fashion), along with
+            a column list to be associated with each of the result values. While this saves
+            some size on the payload (the same column names don't have to be transmitted for
+            each result), the size of the returned content massively outweighs the
+            predominantly short column names. The only way this structure would be viable is
+            if a significant amount was saved on transfer compared to the slow down in JS
+            object construction; this is (almost) never the case.
+
+        Parameters:
+            table_name  : name of FTS table to search
+            search_cols : space separated string of columns to use for primary queries
+            q           : search query
+            colq        : column constraint string; must conform to SQLite standards (e.g.,
+                          `<col>:<text>`
+            snip_col    : table column to use for snippets (default: 1; source content column)
+            hl_col      : table column to use for highlights (default: 2; format column, applied
+                          to HTML targets)
+            limit       : maximum number of results to return in the SQL query
+            snip        : snippet length (max: 64)
+            tokenizer   : tokenizer to use (assumes relevant FTS table has been built)
+            ...
+            wherein_dict: (col-name, value-list) pairs to match result set against, via
+                          WHERE ... IN clauses
+
+        Returns:
+            Dictionary with search results (list of column indexed dictionaries) and relevant
+            metadata.
+        '''
+        search_q = ''
+
+        if type(select_cols) is list:
+            select_cols = ', '.join(select_cols)
+
+        # construct main search query
+        if search_cols and q:
+            search_q = f'{{{search_cols}}} : {q}'
+
+        # add auxiliary search constraints
+        if colq:
+            search_q += f' {colq}'
+
+        search_q = search_q.strip()
+
+        hl_start = '<b><mark>'
+        hl_end   = '</mark></b>'
+
+        fts_table_name = f'{table_name}_fts_{tokenizer}'
+        
+        sql = f'''
+        SELECT
+            {select_cols},
+            snippet({fts_table_name}, {snip_col}, '{hl_start}', '{hl_end}', '...', {snip}) AS snippet,
+            highlight({fts_table_name}, {hl_col}, '{hl_start}', '{hl_end}') AS highlight 
+        FROM {fts_table_name}
+        '''
+        
+        where_clauses = []
+        if search_q:
+            where_clauses.append(f"{fts_table_name} MATCH '{search_q}'\n")
+
+        if wherein_dict:
+            for col, vals in wherein_dict.items():
+                where_clauses.append(f'{col} IN {tuple(vals)}\n')
+
+        if where_clauses:
+            where_str = " AND ".join(where_clauses)
+            sql += f'WHERE {where_str}'
+
+        sql += f'ORDER BY rank LIMIT {limit};'
+
+        row_dicts, cols = self.raw_select(sql, include_cols=True)
+
+        if group_by is None:
+            return row_dicts, cols
+
+        if agg_cols is None:
+            agg_cols = []
+
+        # "group by" block ID and wrangle the links into a list
+        # note we can't perform native GROUP BYs with FTS results
+        group_by_idx = {}
+        for row in row_dicts:
+            group_by_attr = row.get(group_by)
+
+            # add new entries
+            for agg_col in agg_cols:
+                row[f'{agg_col}_agg'] = set()
+
+            if group_by_attr is None:
+                continue
+
+            if group_by_attr not in group_by_idx:
+                group_by_idx[group_by_attr] = row
+
+            for agg_col in agg_cols:
+                if agg_col in row:
+                    group_by_idx[group_by_attr][f'{agg_col}_agg'].add(row[agg_col])
+
+        return {
+            'results'     : group_by_idx,
+            'columns'     : cols,
+            'num_results' : len(row_dicts),
+        }
--- a/co3/accessors/sql.py
+++ b/co3/accessors/sql.py
@ -0,0 +1,96 @@
+from pathlib import Path
+from collections.abc import Iterable
+import inspect
+from functools import cache
+
+import sqlalchemy as sa
+
+from co3 import util
+from co3.accessor import Accessor
+from co3.relation import Relation
+
+#from co3.databases.sql import RelationalDatabase, TabularDatabase, SQLDatabase
+from co3.relations import TabularRelation, SQLTable
+
+
+class RelationalAccessor[D: 'RelationalDatabase', R: Relation](Accessor[D]):
+    pass
+
+
+class TabularAccessor[D: 'TabularDatabase', R: TabularRelation](RelationalAccessor[D, R]):
+    pass
+
+
+class SQLAccessor(TabularAccessor['SQLDatabase', SQLTable]):
+    def raw_select(
+        self,
+        sql,
+        bind_params=None,
+        mappings=False,
+        include_cols=False,
+    ):
+        res_method = utils.db.sa_exec_dicts
+        if mappings:
+            res_method = utils.db.sa_exec_mappings
+
+        return res_method(self.database.engine, sa.text(sql), bind_params=bind_params, include_cols=include_cols)
+
+    def select(
+        self,
+        table: sa.Table | sa.Subquery | sa.Join,
+        cols         = None,
+        where        = None,
+        distinct_on  = None,
+        order_by     = None,
+        limit        = 0,
+        mappings     = False,
+        include_cols = False,
+    ):
+        '''
+        Perform a SELECT query against the provided table-like object (see
+        `check_table()`).
+
+        Deprecated: String aliases
+            String aliases for tables are no longer supported. This method no longer checks
+            against any specific schema table-maps or Composers. Instead, this should be
+            done outside the Accessor.
+
+        Parameters:
+            group_by: list of columns to group by; for now serves as a proxy for DISTINCT
+                      (no aggregation methods accepted)
+            order_by: column to order results by (can use <col>.desc() to order
+                      by descending)
+        '''
+        if where is None:
+            where = sa.true()
+
+        res_method = utils.db.sa_exec_dicts
+        if mappings:
+            res_method = utils.db.sa_exec_mappings
+
+        stmt = sa.select(table).where(where)
+        if cols is not None:
+            stmt = sa.select(*cols).select_from(table).where(where)
+
+        if distinct_on is not None:
+            stmt = stmt.group_by(distinct_on)
+
+        if order_by is not None:
+            stmt = stmt.order_by(order_by)
+
+        if limit > 0:
+            stmt = stmt.limit(limit)
+
+        return res_method(self.engine, stmt, include_cols=include_cols)
+
+    def select_one(self, table, cols=None, where=None, mappings=False, include_cols=False):
+        res = self.select(table, cols, where, mappings, include_cols, limit=1)
+
+        if include_cols and len(res[0]) > 0:
+            return res[0][0], res[1]
+
+        if len(res) > 0:
+            return res[0]
+
+        return None
+
--- a/co3/accessors/vss.py
+++ b/co3/accessors/vss.py
@ -0,0 +1,100 @@
+import pickle
+import logging
+from pathlib import Path
+import time
+
+import sqlalchemy as sa
+#from sentence_transformers import SentenceTransformer, util
+
+from co3.accessor import Accessor
+
+
+logger = logging.getLogger(__name__)
+
+class VSSAccessor(Accessor):
+    def __init__(self, cache_path):
+        super().__init__()
+
+        self._model      = None
+        self._embeddings = None
+
+        self._embedding_size = 384
+        self.embedding_path = Path(cache_path, 'embeddings.pkl')
+
+    def write_embeddings(self, embedding_dict):
+        self.embedding_path.write_bytes(pickle.dumps(embedding_dict))
+
+    def read_embeddings(self):
+        if not self.embedding_path.exists():
+            logger.warning(
+                f'Attempting to access non-existent embeddings at {self.embedding_path}'
+            )
+            return None
+
+        return pickle.loads(self.embedding_path.read_bytes())
+
+    @property
+    def model(self):
+        if self._model is None:
+            # model trained with 128 token seqs
+            self._model = SentenceTransformer('sentence-transformers/all-MiniLM-L12-v2')
+        return self._model
+
+    @property
+    def embeddings(self):
+        if self._embeddings is None:
+            self._embeddings = self.read_embeddings()
+        return self._embeddings
+
+    def embed_chunks(self, chunks, batch_size=64, show_prog=True):
+        return self.model.encode(
+            chunks,
+            batch_size           = batch_size,
+            show_progress_bar    = show_prog,
+            convert_to_numpy     = True,
+            normalize_embeddings = True
+        )
+
+    def search(
+        self,
+        query      : str,
+        index_name : str,
+        limit      : int = 10,
+        score_threshold = 0.5,
+    ):
+        '''
+        Parameters:
+            index_name: one of ['chunks','blocks','notes']
+        '''
+        if not query:
+            return None
+
+        if index_name not in self.embeddings:
+            logger.warning(
+                f'Index "{index_name}" does not exist'
+            )
+            return None
+
+        start = time.time()
+
+        query_embedding = self.embed_chunks(query, show_prog=False)
+        index_ids, index_embeddings, index_items = self.embeddings[index_name]
+
+        hits = util.semantic_search(
+            query_embedding,
+            index_embeddings,
+            top_k=limit,
+            score_function=util.dot_score
+        )[0]
+
+        hits = [hit for hit in hits if hit['score'] >= score_threshold]
+
+        for hit in hits:
+            idx               = hit['corpus_id']
+            hit['group_name'] = index_ids[idx]
+            hit['item']       = index_items[idx]
+
+        logger.info(f'{len(hits)} hits in {time.time()-start:.2f}s')
+
+        return hits
+
--- a/co3/co3.py
+++ b/co3/co3.py
@ -0,0 +1,106 @@
+'''
+CO4
+
+CO4 is an abstract base class for scaffolding object hierarchies and managing operations
+with associated database schemas. It facilitates something like a "lightweight ORM" for
+classes/tables/states with fixed transformations of interest. The canonical use case is
+managing hierarchical document relations, format conversions, and syntactical components.
+'''
+
+import inspect
+import logging
+from functools import wraps, partial
+
+#from localsys.db.schema import tables
+
+logger = logging.getLogger(__name__)
+
+
+#def register_format(_format):
+#    def decorator(func):
+#        self.collate.format_map[_format] = func
+#
+#        @wraps(func)
+#        def register(*args, **kwargs):
+#            return func(*args, **kwargs)
+#
+#        return register
+#    return decorator
+
+def collate(action_key, action_groups=None):
+    def decorator(func):
+        if action_groups is None:
+            action_groups = [None]
+        func._action_data = (action_key, action_groups)
+        return func
+    return decorator
+
+class FormatRegistryMeta(type):
+    def __new__(cls, name, bases, attrs):
+        action_registry = {}
+
+        # add registered superclass methods; iterate over bases (usually just one), then
+        # that base's chain down (reversed), then methods from each subclass
+        for base in bases:
+            for _class in reversed(base.mro()):
+                methods = inspect.getmembers(_class, predicate=inspect.isfunction)
+                for _, method in methods:
+                    if hasattr(method, '_action_data'):
+                        action_key, action_groups = method._action_data
+                        action_registry[action_key] = (method, action_groups)
+
+        # add final registered formats for the current class, overwriting any found in
+        # superclass chain
+        for attr_name, attr_value in attrs.items():
+            if hasattr(attr_value, '_action_data'):
+                action_key, action_groups = attr_value._action_data
+                action_registry[action_key] = (method, action_groups)
+
+        attrs['action_map'] = action_registry
+
+        return super().__new__(cls, name, bases, attrs)
+
+class CO3(metaclass=FormatRegistryMeta):
+    '''
+    CO3: COllate, COllect, COmpose - conversion & DB insertion base
+
+    - Collate: organize and transform conversion outputs, possibly across class components
+    - Collect: gather core attributes, conversion data, and subcomponents for DB insertion
+    - Compose: construct object-associated DB table references through the class hierarchy
+
+    Note: on action groups
+        Group keys are simply named collections to make it easy for storage components to
+        be attached to action subsets. They do _not_ augment the action registration
+        namespace, meaning the action key should still be unique; the group key is purely
+        auxiliary.
+
+        Action methods can also be attached to several groups, in case there is
+        overlapping utility within or across schemas or storage media. In this case, it
+        becomes particularly critical to ensure registered `collate` methods really are
+        just "gathering results" from possibly heavy-duty operations, rather than
+        performing them when called, so as to reduce wasted computation.
+    '''
+    @property
+    def attributes(self):
+        '''
+        Method to define how a subtype's inserts should be handled under `collect` for
+        canonical attributes, i.e., inserts to the type's table.
+        '''
+        return vars(self)
+
+    @property
+    def components(self):
+        '''
+        Method to define how a subtype's inserts should be handled under `collect` for
+        constituent components that need handling.
+        '''
+        return []
+
+    def collate(self, action_key, *action_args, **action_kwargs):
+        if action_key not in self.action_map:
+            logger.debug(f'Collation for {action_key} not supported')
+            return None
+        else:
+            return self.action_map[action_key](self)
+
+
--- a/co3/collector.py
+++ b/co3/collector.py
@ -0,0 +1,109 @@
+'''
+Defines the Collector base class.
+
+This module is the critical "middleware" connecting the primitive object definitions and
+their representations in the database. It operates with full knowledge of how both are
+defined, and abstracts away both the prep work for DB insertions as well as updates
+trickling down the primitive hierarchy.
+
+The `src` format target is re-used for both canonical tables/primitives, as well as
+<prim>_conversion_matter tables in tables/conversions under the `src` format. The latter
+is meant to extend those attributes that are format-specific (i.e., would change when, say,
+converting to `html5`), and thus need to be broken across the format dimension.
+
+Note:
+    Despite the structure of the database module, this class does not currently inherit
+    from a super class in localsys.db (like the accessors and managers, for instance).
+    This will likely ultimately be the model that's embraced, but until FTS (or other
+    groups) need a collector, this will be remain an independent class. It is, however,
+    named like a concrete subclass, taking on the "Core" prefix.
+'''
+
+from pathlib import Path
+from collections import defaultdict
+import logging
+import importlib
+import subprocess
+from uuid import uuid4
+
+import sqlalchemy as sa
+
+from co3 import util
+#from localsys.db.schema import tables
+
+
+logger = logging.getLogger(__name__)
+
+class Collector:
+    def __init__(self):
+        self._inserts = defaultdict(lambda: defaultdict(list))
+
+    @property
+    def inserts(self):
+        return self._inserts_from_receipts()
+
+    def _inserts_from_receipts(self, receipts=None, pop=False):
+        inserts = defaultdict(list)
+        
+        if receipts is None:
+            receipts = list(self._inserts.keys())
+
+        for receipt in receipts:
+            if pop: insert_dict = self._inserts.pop(receipt, {})
+            else:   insert_dict = self._inserts[receipt]
+
+            for table, insert_list in insert_dict.items():
+                inserts[table].extend(insert_list)
+
+        return dict(inserts)
+
+    def _reset_session(self):
+        self._inserts = defaultdict(lambda: defaultdict(list))
+
+    def _generate_unique_receipt(self):
+        return str(uuid4())
+
+    def add_insert(self, table_name, insert_dict, receipts=None):
+        '''
+        TODO: formalize table_name mapping; at class level provide a `table_map`, or provide
+        the table object itself to this method
+        '''
+        if table_name not in tables.table_map:
+            #logger.debug(f'Inserts provided for non-existent table {table_name}')
+            return None
+
+        receipt = self._generate_unique_receipt()
+
+        self._inserts[receipt][table_name].append(
+            utils.db.prepare_insert(
+                tables.table_map[table_name], 
+                insert_dict
+            )
+        )
+
+        if receipts is not None:
+            receipts.append(receipt)
+
+        return receipt
+
+    def collect_inserts(self, receipts=None):
+        '''
+        Collect insert-ready dictionaries for the core primitive schema. This method is
+        effectively a light wrapper around the File and Note-based collection logic
+        elsewhere in the class. 
+
+        The overall collection scheme embraces a session-like sequential update model to
+        an internal insert tracker. The sequence of insert methods is ordered according to
+        the schema hierarchy, and higher level inserts dictate the scope for lower level
+        inserts (all methods check and populate the same `inserts` dictionary). Calling
+        this method flushes any existing inserts, ensuring a re-scan takes place across
+        calls (or "sessions").
+
+        Parameters:
+            skip_updated: whether to ignore primitives with existing up-to-date
+                          database entries
+
+        Returns:
+            Table name-indexed dictionary of insert lists (of column name-indexed dicts)
+        '''
+        return self._inserts_from_receipts(receipts, pop=True)
--- a/co3/composer.py
+++ b/co3/composer.py
@ -0,0 +1,89 @@
+'''
+Composer
+
+Base for manually defining table compositions outside those natural to the schema
+hierarchy (i.e., constructable by a `CO4.compose()` call).
+
+Example: suppose we have a simple object hierarchy A(CO4) -> B -> C. C's in-built
+`compose()` method may not always be desirable when constructing composite tables and
+running related queries. In this case, a custom Composer can be used to make needed
+composite tables easier to reference; in the case below, we define the "BC" composite
+table.
+
+```
+class ExampleComposer(Composer):
+
+    @register_table
+    def BC(self):
+        full_B = B.compose(full=True)
+        full_C = C.compose(full=True)
+        
+        return full_B.join(
+            full_C,
+            full_B.c.name == full_C.c.name, # TODO: is this fine? or do we need base table refs
+            outer=True
+        )
+'''
+from pathlib import Path
+
+from co3.mapper import Mapper
+
+
+def register_table(table_name=None):
+    '''
+    Registry decorator for defined composer classes. Decorating a class method simply
+    attaches a `table_name` attribute to it, setting it to either a provided value or the
+    name of the method itself. Methods with a `table_name` attribute are later swept up at
+    the class level and placed in the `table_map`.
+    '''
+    def decorator(func):
+        if table_name is None:
+            table_name = func.__name__
+        func.table_name = table_name
+        return func
+    return decorator
+
+class Composer[M: Mapper]:
+    '''
+    Base composer wrapper for table groupings.
+
+    The schema is centered around a connected group of tables (via foreign keys). Thus,
+    most operations need to be coordinated across tables. The `accessors` submodules
+    are mostly intended to provide a "secondary layer" over the base set of tables in the
+    schema, exposing common higher level table compositions (i.e., chained JOINs). See
+    concrete instances (e.g., CoreAccess, FTSAccessor) for actual implementations these
+    tables; the base class does not expose
+
+    Tables in subclasses are registered with the `register_table` decorator, automatically
+    indexing them under the provided name and making them available via the `table_map`.
+    '''
+    def __init__(self):
+        self._set_tables()
+
+    def _set_tables(self):
+        '''
+        Skip properties (so appropriate delays can be used), and 
+
+        Set the table registry at the class level. This only takes place during the first
+        instantiation of the class, and makes it possible to definitively tie methods to
+        composed tables during lookup with `get_table()`.
+        '''
+        cls = self.__class__
+
+        # in case the class has already be instantiated
+        if hasattr(cls, 'table_map'): return
+
+        table_map = {}
+        for key, value in cls.__dict__.items():
+            if isinstance(value, property):
+                continue  # Skip properties
+            if callable(value) and hasattr(value, 'table_name'):
+                table_map[value.table_name] = value(self)
+
+        cls.table_map = table_map
+
+    def get_table(self, table_name):
+        '''
+        Retrieve the named table composition, if defined.
+        '''
+        return self.table_map.get(table_name)
--- a/co3/database.py
+++ b/co3/database.py
@ -0,0 +1,87 @@
+'''
+Database
+
+Central object for defining storage protocol-specific interfaces. The database wraps up
+central items for interacting with database resources, namely the Accessor and Manager
+objects.
+
+The Database type hierarchy attempts to be exceedingly general; SQL-derivatives should
+subclass from the RelationalDatabase subtype, for example, which itself becomes a new
+generic via type dependence on Relation.
+'''
+
+import logging
+from typing import Self
+
+from co3.accessor import Accessor
+from co3.composer import Composer
+from co3.manager  import Manager
+from co3.indexer  import Indexer
+
+logger = logging.getLogger(__name__)
+
+
+class Database:
+    accessor: type[Accessor[Self]] = Accessor
+    manager:  type[Manager[Self]]  = Manager
+
+    def __init__(self, resource):
+        '''
+        Variables:
+            _local_cache: a database-local property store for ad-hoc CacheBlock-esque
+                          methods, that are nevertheless _not_ query/group-by responses to
+                          pass on to the Indexer. Dependent properties should write to the
+                          this cache and check for existence of stored results; the cache
+                          state must be managed globally.
+        ''' 
+        self.resource = resource
+
+        self._access = self.accessor(self)
+        self._manage = self.manager(self)
+
+        self._index  = Indexer(self._access)
+        self._local_cache = {}
+
+        self.reset_cache = False
+
+    @property
+    def engine(self):
+        '''
+        Database property to provide a singleton engine for DB interaction, initializing
+        the database if it doesn't already exist.
+
+        TODO: figure out thread safety across engines and/or connection. Any issue with
+        hanging on to the same engine instance for the Database instance?
+        '''
+        raise NotImplementedError
+
+    def connect(self):
+        self.engine.connect()
+
+    @property
+    def access(self):
+        return self._access
+
+    @property
+    def compose(self):
+        return self._compose
+
+    @property
+    def index(self):
+        if self.reset_cache:
+            self._index.cache_clear()
+            self.reset_cache = False
+        return self._index
+
+    @property
+    def manage(self):
+        '''
+        Accessing `.manage` queues a cache clear on the external index, as well wipes the
+        local index.
+        '''
+        self.reset_cache = True
+        self._local_cache = {}
+        return self._manage
+
+    def populate_indexes(self): pass
+
--- a/co3/databases/init.py
+++ b/co3/databases/init.py
@ -0,0 +1,3 @@
+from co3.databases.sql import *
+from co3.databases.fts import FTSDatabase
+from co3.databases.vss import VSSDatabase
--- a/co3/databases/pycache/init.cpython-311.pyc
+++ b/co3/databases/pycache/init.cpython-311.pyc
--- a/co3/databases/pycache/init.cpython-312.pyc
+++ b/co3/databases/pycache/init.cpython-312.pyc
--- a/co3/databases/pycache/_base.cpython-311.pyc
+++ b/co3/databases/pycache/_base.cpython-311.pyc
--- a/co3/databases/pycache/core.cpython-311.pyc
+++ b/co3/databases/pycache/core.cpython-311.pyc
--- a/co3/databases/pycache/fts.cpython-311.pyc
+++ b/co3/databases/pycache/fts.cpython-311.pyc
--- a/co3/databases/pycache/fts.cpython-312.pyc
+++ b/co3/databases/pycache/fts.cpython-312.pyc
--- a/co3/databases/pycache/sql.cpython-312.pyc
+++ b/co3/databases/pycache/sql.cpython-312.pyc
--- a/co3/databases/pycache/vss.cpython-312.pyc
+++ b/co3/databases/pycache/vss.cpython-312.pyc
--- a/co3/databases/fts.py
+++ b/co3/databases/fts.py
@ -0,0 +1,8 @@
+from co3.database import Database
+
+from co3.accessors.fts import FTSAccessor
+from co3.managers.fts  import FTSManager
+
+class FTSDatabase(Database):
+    accessor = FTSAccessor
+    manager  = FTSManager
--- a/co3/databases/sql.py
+++ b/co3/databases/sql.py
@ -0,0 +1,34 @@
+from typing import Self
+
+from co3.database import Database
+
+from co3.accessors.sql import RelationalAccessor, TabularAccessor, SQLAccessor
+from co3.managers.sql  import RelationalManager,  TabularManager,  SQLManager
+
+from co3.relation import Relation
+from co3.relations import TabularRelation, SQLTable
+
+
+class RelationalDatabase[R: Relation](Database):
+    accessor: type[RelationalAccessor[Self, R]] = RelationalAccessor[Self, R]
+    manager:  type[RelationalManager[Self, R]]  = RelationalManager[Self, R]
+
+
+class TabularDatabase[R: TabularRelation](RelationalDatabase[R]):
+    '''
+    accessor/manager assignments satisfy supertype's type settings;
+    `TabluarAccessor[Self, R]` is of type `type[RelationalAccessor[Self, R]]`
+    (and yes, `type[]` specifies that the variable is itself being set to a type or a
+    class, rather than a satisfying _instance_)
+    '''
+    accessor: type[TabularAccessor[Self, R]] = TabularAccessor[Self, R]
+    manager:  type[TabularManager[Self, R]]  = TabularManager[Self, R]
+
+
+class SQLDatabase[R: SQLTable](TabularDatabase[R]):
+    accessor = SQLAccessor
+    manager  = SQLManager
+
+
+class SQLiteDatabase(SQLDatabase[SQLTable]):
+    pass
--- a/co3/databases/vss.py
+++ b/co3/databases/vss.py
@ -0,0 +1,9 @@
+from co3.database import Database
+
+from co3.accessors.vss import VSSAccessor
+from co3.managers.vss  import VSSManager
+
+
+class VSSDatabase(Database):
+    accessor = VSSAccessor
+    manager  = VSSManager
--- a/co3/indexer.py
+++ b/co3/indexer.py
@ -0,0 +1,433 @@
+import time
+import logging
+import threading
+from collections import defaultdict
+from collections.abc import Iterable
+
+import sqlalchemy as sa
+
+
+logger = logging.getLogger(__name__)
+
+class Indexer:
+    '''
+    Indexer class
+
+    Provides restricted access to an underlying Accessor to enable more efficient, superficial
+    caching.
+
+    Cache clearing is to be handled by a wrapper class, like the Database.
+
+    Caching occurs at the class level, with indexes prefixed by table's origin Composer.
+    This means that cached selects/group-bys will be available regardless of the provided
+    Accessors so long as the same Composer is used under the hood. 
+    '''
+    _cls_select_cache  = {}
+    _cls_groupby_cache = defaultdict(dict)
+    
+    def __init__(self, accessor, cache_select=True, cache_groupby=True):
+        self.accessor = accessor
+
+        # set instance caches; if remains None, methods can't index
+        self._select_cache = None
+        self._groupby_cache = None
+
+        if cache_groupby and not cache_select:
+            raise ValueError('cannot cache groupbys without select caching enabled')
+
+        if cache_select:
+            self._select_cache = self._cls_select_cache
+
+        if cache_groupby:
+            self._groupby_cache = self._cls_groupby_cache
+
+        self._access_lock = threading.Lock()
+
+    def cache_clear(self, group_by_only=False): 
+        self._groupby_cache.clear()
+        if not group_by_only:
+            self._select_cache.clear()
+
+    def cache_block(
+        self,
+        table,
+        **kwargs,
+    ):
+        '''
+        Provide a user-friendly, dynamically re-indexable
+        '''
+        return CacheBlock(
+            indexer = self,
+            table   = table,
+            **kwargs,
+        )
+
+    def cached_query(
+        self,
+        table, 
+
+        cols        = None,
+        where       = None,
+        distinct_on = None,
+        order_by    = None,
+        limit       = 0,
+
+        group_by    = None,
+        agg_on      = None,
+        index_on    = None,
+    ):
+        '''
+        Like `group_by`, but makes a full query to the Accessors table `table_name` and
+        caches the results. The processing performed by the GROUP BY is also cached.
+
+        Update: `cached_select` and `cached_group_by` now unified by a single
+        `cached_query` method. This allows better defined GROUP BY caches, that are
+        reactive to the full set of parameters returning the result set (and not just the
+        table, requiring a full query).
+
+        Note: on cache keys
+            Cache keys are now fully stringified, as many objects are now allowed to be
+            native SQLAlchemy objects. Indexing these objects works, but doing so will
+            condition the cache on their memory addresses, which isn't what we want.
+            SQLAlchemy converts most join/column/table-like objects to reasonable strings,
+            which will look the same regardless of instance.
+
+            Context: this became a clear issue when passing in more
+            `order_by=<col>.desc()`. The `desc()` causes the index to store the column in
+            an instance-specific way, rather than an easily re-usable, canonical column
+            reference. Each time the CoreDatabase.files() was being called, for instance,
+            that @property would be re-evaluated, causing `desc()` to be re-initialized,
+            and thus look different to the cache. Stringifying everything prevents this
+            (although this could well be an indication that only a single `cache_block`
+            should ever be returned be database properties).
+
+        Note: on access locks
+            A double-checked locking scheme is employed before both of the stages (select
+            and manual group by), using the same lock. This resolves the common scenario
+            where many threads need to look up a query in the cache, experience a cache
+            miss, and try to do the work. This non-linearly explodes the total time to
+            wait in my experience, so doing this only when needed saves tons of time,
+            especially in high-congestion moments.
+        '''
+        start     = time.time()
+        cache_key = tuple(map(str, (table, cols, where, distinct_on, order_by, limit)))
+
+        # apparently this is the double-check locking scheme (didn't realize when implementing)
+        if self._select_cache is None or cache_key not in self._select_cache:
+            # cache re-compute possible, acquire lock to continue. A later thread may
+            # acquire this after work has been done by an earlier thread, so re-eval the
+            # condition below before actually performing a DB read. If access results in a
+            # cache hit, locking isn't needed.
+            with self._access_lock:
+                if self._select_cache is None or cache_key not in self._select_cache:
+                    results = self.accessor.select(
+                        table,
+                        cols=cols,
+                        where=where,
+                        distinct_on=distinct_on,
+                        order_by=order_by,
+                        limit=limit,
+                        mappings=True
+                    )
+
+                    # cache results if select_cache is defined
+                    if self._select_cache is not None: 
+                        self._select_cache[cache_key] = results
+
+                    logger.debug(
+                        f'Indexer "select" cache miss for table "{table}": access in {time.time()-start:.4f}s'
+                    )
+                else:
+                    results = self._select_cache[cache_key]
+                    logger.debug(
+                        f'Indexer "select" cache hit for table "{table}": access in {time.time()-start:.4f}s'
+                    )
+        else:
+            results = self._select_cache[cache_key]
+            logger.debug(
+                f'Indexer "select" cache hit for table "{table}": access in {time.time()-start:.4f}s'
+            )
+
+        start     = time.time()
+        cache_key = (*cache_key, group_by, agg_on, index_on)
+
+        if group_by is not None:
+            if self._groupby_cache is None or cache_key not in self._groupby_cache:
+                with self._access_lock:
+                    if self._groupby_cache is None or cache_key not in self._groupby_cache:
+                        results = self.group_by(
+                            results,
+                            group_by     = group_by,
+                            agg_on       = agg_on,
+                            index_on     = index_on,
+                            return_index = True,
+                        )
+
+                        if self._groupby_cache is not None:
+                            self._groupby_cache[cache_key] = results
+
+                        logger.debug(
+                            f'Indexer "group_by" cache miss for table "{table}": access in {time.time()-start:.4f}s'
+                        )
+                    else:
+                        results = self._groupby_cache[cache_key]
+                        logger.debug(
+                            f'Indexer "group_by" cache hit for table "{table}": access in {time.time()-start:.4f}s'
+                        )
+            else:
+                results = self._groupby_cache[cache_key]
+                logger.debug(
+                    f'Indexer "group_by" cache hit for table "{table}": access in {time.time()-start:.4f}s'
+                )
+
+        return results
+        
+    @classmethod
+    def group_by(
+        cls,
+        rows,
+        group_by,
+        agg_on=None,
+        index_on=None,
+        return_index=False,
+    ):
+        '''
+        Post-query "group by"-like aggregation. Creates an index over a set of columns
+        (`group_by_cols`), and aggregates values from `agg_cols` under the groups.
+
+        Rows can be dicts or mappings, and columns can be strings or SQLAlchemy columns.
+        To ensure the right columns are being used for the operation, it's best to pass in
+        mappings and use SQA columns if you aren't sure exactly how the keys look in your
+        results (dicts can have ambiguous keys across tables with the same columns and/or
+        different labeling schemes altogether).
+
+        TODO: add a flag that handles None's as distinct. That is, for the group_by
+        column(s) of interest, if rows in the provided query set have NULL values for
+        these columns, treat all such rows as their "own group" and return them alongside
+        the grouped/aggregated ones. This is behavior desired by something like
+        FTSManager.recreate(), which wants to bundle up conversions for blocks
+        (effectively grouping by blocks.name and link.id, aggregating on
+        block_conversions.format, then flattening). You could either do this, or as the
+        caller just make sure to first filter the result set before grouping (e.g.,
+        splitting the NULL-valued rows from those that are well-defined), and then
+        stitching the two sets back together afterward.
+
+        Multi-dim update:
+        
+        - group_by: can be a tuple of tuples of columns. Each inner tuple is a nested
+          "group by index" in the group by index
+        - 
+        '''
+        if not rows:
+            return {} if return_index else []
+
+        rows_are_mappings = not isinstance(rows[0], dict)
+
+        if not rows_are_mappings:
+            if isinstance(group_by, sa.Column):
+                group_by = group_by.name
+            else:
+                group_by = str(group_by)
+
+        #if group_by is None:                     group_by = []
+        #elif not isinstance(group_by, Iterable): group_by = [group_by]
+
+        if agg_on is None:                     agg_on = []
+        elif not isinstance(agg_on, Iterable): agg_on = [agg_on]
+
+        if index_on is None:                     index_on = []
+        elif not isinstance(index_on, Iterable): index_on = [index_on]
+
+        agg_on_names = []
+        for agg in agg_on:
+            # if a SQA column, can either use `.name` or `str(c)`. The latter includes the
+            # table name, the former doesn't; ambiguity can be introduced here.
+            if isinstance(agg, sa.Column):
+                agg_on_names.append(agg.name)
+            else:
+                agg_on_names.append(str(agg))
+
+        index_on_names = []
+        for index in index_on:
+            # if a SQA column, can either use `.name` or `str(c)`. The latter includes the
+            # table name, the former doesn't; ambiguity can be introduced here.
+            if isinstance(index, sa.Column):
+                index_on_names.append(index.name)
+            else:
+                index_on_names.append(str(index))
+
+        # when rows are dicts, use columns' string names
+        if not rows_are_mappings:
+            agg_on = agg_on_names
+            index_on = index_on_names
+
+        #print(f'rows_are_mappings: {rows_are_mappings}')
+        #print(f'group_by: {group_by}')
+        #print(f'agg_on: {agg_on}')
+        #print(f'agg_on_names: {agg_on_names}')
+        #print(f'index_on: {index_on}')
+        #print(f'index_on_names: {index_on_names}')
+
+        # "group by" block ID and wrangle the links into a list
+        group_by_idx = {}
+        for row in rows:
+            # generic get
+            group_by_attr = row.get(group_by)
+
+            # wrap possible mapping dict
+            row_dict = dict(row)
+
+            # add new entries; standardize 
+            #aggregates = {}
+            #for agg_name in agg_on_names:
+            #    aggregates[agg_name] = []
+            #row_dict['aggregates'] = aggregates
+            row_dict['aggregates'] = []
+
+            indexes = {}
+            for index_name in index_on_names:
+                indexes[index_name] = {}
+            row_dict['indexes'] = indexes
+
+            if group_by_attr is None:
+                continue
+
+            if group_by_attr not in group_by_idx:
+                group_by_idx[group_by_attr] = row_dict
+
+            # actually include all agg cols, even if None, so agg array indexes align
+            agg_dict = {
+                agg_on_names[i] : row.get(agg_col)
+                for i, agg_col in enumerate(agg_on)
+            }
+
+            #aggregates = group_by_idx[group_by_attr]['aggregates']
+            #for agg_key, agg_val in agg_dict.items():
+            #    aggregates[agg_key].append(agg_val)
+            aggregates = group_by_idx[group_by_attr]['aggregates']
+            aggregates.append(agg_dict)
+
+            indexes = group_by_idx[group_by_attr]['indexes']
+            for i, index_col in enumerate(index_on):
+                index_name = index_on_names[i]
+                indexes[index_name][row[index_col]] = agg_dict
+
+        if return_index:
+            return group_by_idx
+
+        return list(group_by_idx.values())
+
+class CacheBlock:
+    '''
+    CacheBlock class
+
+    Wraps up a set of query parameters for a specific entity, and provides cached access
+    to different types of "re-queries" via an associated Indexer.
+
+    The goal here is to help build/define entities as the possibly complex transformations
+    on the base schema that they are. For example, the Note primitive (entity)
+    incorporates details across `files`, `notes`, `note_conversions`, and
+    `note_conversion_matter` tables (defined in a single endpoint by a Composer), often
+    needs to be selected in particular ways (via an Accessor), and results stored for fast
+    access later on (handled by an Indexer). This pipeline can be daunting and requires
+    too many moving parts to be handled explicitly everywhere. CacheBlocks wrap up a set
+    of query "preferences," exposing a simpler interface for downstream access to
+    entities. It still allows for low-level control over re-grouping/indexing, raw hits to
+    the actual DB, etc, but keeps things tighter and well-behaved for the Indexer.
+    
+    You can think of these as the Indexer's "fingers"; they're deployable mini-Indexes
+    that "send back" results to the class cache, which is "broadcast" to all other
+    instances for use when necessary.
+
+    Note: Example usage
+        
+        ```py
+        cb = CacheBlock()
+
+        # Set up cached queries with chained params or via call:
+        
+        cb.where(t.notes.c.name=="name").group_by(t.note_conversions.c.format)
+        cb() # get results
+
+        # - OR - # (use strings when known)
+
+        cb.where(t.notes.c.name=="name").group_by('format')
+        cb() # get results
+
+        # - OR - # (use kwargs in the call; results returned right away)
+
+        cb(
+            where=(t.notes.c.name=="name"),
+            group_by='format'
+        )
+        ```
+
+    '''
+    def __init__(
+        self,
+        indexer,
+        table,
+
+        cols        = None,
+        where       = None,
+        distinct_on = None,
+        order_by    = None,
+        limit       = 0,
+
+        group_by    = None,
+        agg_on      = None,
+        index_on    = None,
+    ):
+        self.indexer = indexer
+
+        self.query_args = {
+            'table'       : table,
+    
+            'cols'        : cols,
+            'where'       : where,
+            'distinct_on' : distinct_on,
+            'order_by'    : order_by,
+            'limit'       : limit,
+    
+            'group_by'    : group_by,
+            'agg_on'      : agg_on,
+            'index_on'    : index_on,
+        }
+
+    def _query(self, **kwargs):
+        '''Make cached query with defaults, override with those provided'''
+        return self.indexer.cached_query(**{
+            k : (v if k not in kwargs else kwargs[k])
+            for k,v in self.query_args.items()
+        })
+
+    def __call__(self, **kwargs):
+        '''
+        TODO: overload this for the queries, i.e. getting keys or returning aggregates
+        '''
+        return self._query(**kwargs)
+
+    def where(self, where):
+        self.query_args['where'] = where
+        return self
+        #return self._query(where=where)
+
+    def distinct_on(self, distinct_on):
+        self.query_args['distinct_on'] = distinct_on
+        return self
+        #return self._query(distinct_on=distinct_on)
+
+    def order_by(self, order_by):
+        self.query_args['order_by'] = order_by
+        return self
+        #return self._query(order_by=order_by)
+
+    def limit(self, limit):
+        self.query_args['limit'] = limit
+        return self
+
+    def group_by(self, group_by):
+        self.query_args['group_by'] = group_by
+        return self
+        #return self._query(group_by=group_by)
+    
--- a/Show More
+++ b/Show More
				`@ -0,0 +1 @@`
				`/home/smgr/Documents/projects/ontolog/co3/co3/__init__.py`