initial commit
This commit is contained in:
commit
9218f4a404
1
.python-version
Normal file
1
.python-version
Normal file
@ -0,0 +1 @@
|
||||
co4
|
0
MANIFEST.in
Normal file
0
MANIFEST.in
Normal file
21
README.md
Normal file
21
README.md
Normal file
@ -0,0 +1,21 @@
|
||||
# Overview
|
||||
`co3` is a package for file conversion and associated database operations. The `CO3` base class
|
||||
provides a standard interface for performing conversions, preparing inserts, and
|
||||
interacting with database schemas that mirror the class hierarchy.
|
||||
|
||||
Simplified description of the operational model:
|
||||
|
||||
**Goal**: interact with a storage medium (database, pickled structure, VSS framework) with
|
||||
a known schema.
|
||||
|
||||
- **Accessor** to provide access to stored items
|
||||
- **Composer** to compose common access points (e.g., JOINed tables)
|
||||
- **Indexer** to index/cache access queries
|
||||
- **Manager** to manage storage state (e.g., supported inserts, database syncs)
|
||||
- **Collector** to collect data for updating storage state
|
||||
- **Database** to collect data for updating storage state
|
||||
- **Mapper** to collect data for updating storage state
|
||||
- **Relation** to collect data for updating storage state
|
||||
|
||||
**CO3** is an abstract base class that makes it easy to integrate this model with object
|
||||
hierarchies that mirror a storage schema.
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/__init__.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/__init__.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/__init__.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/accessor.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/accessor.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/accessor.py
|
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/accessors/__init__.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/accessors/fts.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/accessors/fts.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/accessors/fts.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/accessors/sql.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/accessors/sql.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/accessors/sql.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/accessors/vss.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/accessors/vss.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/accessors/vss.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/co3.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/co3.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/co3.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/collector.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/collector.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/collector.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/composer.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/composer.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/composer.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/database.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/database.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/database.py
|
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/databases/__init__.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/databases/fts.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/databases/fts.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/databases/fts.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/databases/sql.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/databases/sql.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/databases/sql.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/databases/vss.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/databases/vss.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/databases/vss.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/indexer.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/indexer.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/indexer.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/manager.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/manager.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/manager.py
|
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/managers/__init__.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/managers/fts.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/managers/fts.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/managers/fts.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/managers/sql.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/managers/sql.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/managers/sql.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/managers/vss.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/managers/vss.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/managers/vss.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/mapper.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/mapper.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/mapper.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/relation.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/relation.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/relation.py
|
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/relations/__init__.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/util/__init__.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/util/__init__.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/util/__init__.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/util/db.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/util/db.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/util/db.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/util/regex.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/util/regex.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/util/regex.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/__init__.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/__init__.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co4/co4/__init__.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/accessor.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/accessor.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co4/co4/accessor.py
|
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co4/co4/accessors/__init__.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/accessors/fts.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/accessors/fts.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co4/co4/accessors/fts.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/accessors/table.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/accessors/table.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co4/co4/accessors/table.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/accessors/vss.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/accessors/vss.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co4/co4/accessors/vss.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/co4.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/co4.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co4/co4/co4.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/collector.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/collector.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co4/co4/collector.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/composer.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/composer.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co4/co4/composer.py
|
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co4/co4/databases/__init__.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/databases/_base.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/databases/_base.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co4/co4/databases/_base.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/databases/core.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/databases/core.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co4/co4/databases/core.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/databases/fts.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/databases/fts.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co4/co4/databases/fts.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/databases/vss.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/databases/vss.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co4/co4/databases/vss.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/indexer.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/indexer.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co4/co4/indexer.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/manager.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/manager.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co4/co4/manager.py
|
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co4/co4/managers/__init__.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/managers/core.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/managers/core.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co4/co4/managers/core.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/managers/fts.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/managers/fts.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co4/co4/managers/fts.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/managers/vss.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/managers/vss.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co4/co4/managers/vss.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/utils/db.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/utils/db.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co4/co4/utils/db.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/utils/paths.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/utils/paths.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co4/co4/utils/paths.py
|
109
co3/__init__.py
Normal file
109
co3/__init__.py
Normal file
@ -0,0 +1,109 @@
|
||||
'''
|
||||
Database submodule
|
||||
|
||||
- `db`: contains SQLAlchemy-based schema definitions
|
||||
- `accessors`: convenience methods for accessing database entries
|
||||
- `populate`: convenience methods for populating database tables
|
||||
|
||||
The `accessors` and `populate` submodules are each split into `schema` and `fts` method
|
||||
groups. The former concerns methods relating to the actual database schema, the latter to
|
||||
their SQLite FTS counterparts.
|
||||
|
||||
Note: Subpackages organization
|
||||
Subpackages are broken up by inheritance. Within a given submodule, you have a
|
||||
`_base.py` file defining the base class associated with that submodule's title, along
|
||||
with concrete subclasses of that base in their own files. Deeper inheritance would
|
||||
recursively extend this structure. The `__init__.py` for a given submodule then
|
||||
exposes the concrete instances, leaving the base hidden. For example,
|
||||
|
||||
accessors/
|
||||
_base.py
|
||||
core.py
|
||||
fts.py
|
||||
|
||||
`core` and `fts` house the `CoreAccessor` and `FTSAccessor` classes, respectively,
|
||||
and are the direct subclasses of the `Accessor` parent found in the `_base`. This base
|
||||
class _could_ be placed outside of the submodule in the parent directory (imported
|
||||
with something like `from db import accessor` instead of `from db.accessor import
|
||||
_base`). This is entirely valid, but I tend to prefer when the base class is among its
|
||||
direct children, as
|
||||
|
||||
- In this case at least, the base doesn't need to be exposed
|
||||
- The base class is being stowed away under an appropriately named submodule; having a
|
||||
separate `accessor.py` and `accessors/` file/directory can feel a little cluttered.
|
||||
- It makes imports across the accessors feel standardized:
|
||||
|
||||
```py
|
||||
from localsys.db.accessors._base import Accessor
|
||||
|
||||
from localsys.db.accessors.core import CoreAccessor
|
||||
```
|
||||
|
||||
Both have the same level of nesting to reach the class.
|
||||
|
||||
Frankly, both means of organization are perfectly fine, and as far as I can tell,
|
||||
semantically sound in their own right. This particular scheme is just a preference in
|
||||
the moment, and so long as I keep things consistent, choosing one over the other
|
||||
shouldn't matter.
|
||||
|
||||
Additionally, note how `__init__.py`s are typically set up when providing wider access
|
||||
to internal modules. The `init` typically pulls out classes from sibling modules
|
||||
(i.e., files), but will import subpackages are the topmost level. For example, for the
|
||||
structure
|
||||
|
||||
```
|
||||
db/
|
||||
__init__.py
|
||||
accessors/
|
||||
__init__.py
|
||||
_base.py
|
||||
core.py
|
||||
fts.py
|
||||
```
|
||||
|
||||
we have
|
||||
|
||||
```db/__init__.py
|
||||
from localsys.db import accessors
|
||||
```
|
||||
|
||||
which just imports the subpackage `accessors`. However, within subpackage:
|
||||
|
||||
```db/accessors/__init__.py
|
||||
from localsys.db.accessors.core import CoreAccessor
|
||||
```
|
||||
|
||||
we don't just import the submodule `core`; we did into the file to grab the relevant
|
||||
class and pull it into the outer namespace. Overarching point: `__init__.py` files
|
||||
typically reach into the sibling files (submodules) and pull out classes. Given that
|
||||
this behavior is recursive, `__init__.py` then respect subpackages (nested
|
||||
directories), importing them at the top-level and expecting an internal `__init__.py`
|
||||
will have managed access appropriately.
|
||||
|
||||
Note: Organization for inheritance over composition
|
||||
At a glance, the organization of subpackages here feels like it clashes with those
|
||||
seen in `localsys.primitives`. `note_components`, for instance, houses the components
|
||||
for the outer `note` module. Contrast this with how the `core` submodule looks: it's
|
||||
composing `*/core.py` files across subpackages `accessors` and `managers`, rather than
|
||||
a single subpackage like `note`. This seems inconsistent, but the subpackages here are
|
||||
actually still organized in the same way: by inheritance. It just happens that the
|
||||
all of the note components inherit from the same base class, and are thus confined to
|
||||
a single subpackage. This aside, the subpackages themselves are still created around
|
||||
inheritance, wrapping up a base and direct subclasses.
|
||||
'''
|
||||
|
||||
from co3.accessor import Accessor
|
||||
from co3.co3 import CO3
|
||||
from co3.collector import Collector
|
||||
from co3.composer import Composer
|
||||
from co3.database import Database
|
||||
from co3.indexer import Indexer
|
||||
from co3.manager import Manager
|
||||
from co3.mapper import Mapper
|
||||
from co3.relation import Relation
|
||||
|
||||
from co3 import accessors
|
||||
from co3 import databases
|
||||
from co3 import managers
|
||||
from co3 import relations
|
||||
from co3 import util
|
BIN
co3/__pycache__/__init__.cpython-311.pyc
Normal file
BIN
co3/__pycache__/__init__.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/__init__.cpython-312.pyc
Normal file
BIN
co3/__pycache__/__init__.cpython-312.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/accessor.cpython-311.pyc
Normal file
BIN
co3/__pycache__/accessor.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/accessor.cpython-312.pyc
Normal file
BIN
co3/__pycache__/accessor.cpython-312.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/co3.cpython-312.pyc
Normal file
BIN
co3/__pycache__/co3.cpython-312.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/co4.cpython-311.pyc
Normal file
BIN
co3/__pycache__/co4.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/collector.cpython-311.pyc
Normal file
BIN
co3/__pycache__/collector.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/collector.cpython-312.pyc
Normal file
BIN
co3/__pycache__/collector.cpython-312.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/composer.cpython-311.pyc
Normal file
BIN
co3/__pycache__/composer.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/composer.cpython-312.pyc
Normal file
BIN
co3/__pycache__/composer.cpython-312.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/database.cpython-312.pyc
Normal file
BIN
co3/__pycache__/database.cpython-312.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/indexer.cpython-311.pyc
Normal file
BIN
co3/__pycache__/indexer.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/indexer.cpython-312.pyc
Normal file
BIN
co3/__pycache__/indexer.cpython-312.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/manager.cpython-311.pyc
Normal file
BIN
co3/__pycache__/manager.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/manager.cpython-312.pyc
Normal file
BIN
co3/__pycache__/manager.cpython-312.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/mapper.cpython-312.pyc
Normal file
BIN
co3/__pycache__/mapper.cpython-312.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/relation.cpython-312.pyc
Normal file
BIN
co3/__pycache__/relation.cpython-312.pyc
Normal file
Binary file not shown.
28
co3/accessor.py
Normal file
28
co3/accessor.py
Normal file
@ -0,0 +1,28 @@
|
||||
'''
|
||||
Accessor
|
||||
|
||||
Provides access to an underlying schema through a supported set of operations. Class
|
||||
methods could be general, high-level SQL wrappers, or convenience functions for common
|
||||
schema-specific queries.
|
||||
'''
|
||||
import inspect
|
||||
from pathlib import Path
|
||||
from collections import defaultdict
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
#from co3.database import Database
|
||||
|
||||
|
||||
class Accessor[D: 'Database']:
|
||||
'''
|
||||
Access wrapper class for complex queries and easy integration with Composer tables.
|
||||
Implements high-level access to things like common constrained SELECT queries.
|
||||
|
||||
Parameters:
|
||||
engine: SQLAlchemy engine to use for queries. Engine is initialized dynamically as
|
||||
a property (based on the config) if not provided
|
||||
'''
|
||||
def __init__(self, database: D):
|
||||
self.database = database
|
||||
|
24
co3/accessors/__init__.py
Normal file
24
co3/accessors/__init__.py
Normal file
@ -0,0 +1,24 @@
|
||||
'''
|
||||
Note that subclasses in this subpackage are split differently to other subpackages in the
|
||||
DB. Instead of being split by table group, corresponding to a Composer (which defines that
|
||||
table group), Accessors are split by a separate dimension: table "type". This is why we
|
||||
have a "TableAccessor" and an "FTSAccessor": the former exposes access operations
|
||||
available to generic tables, the latter to FTS tables (instead of being designed
|
||||
specifically around "core" and "fts" groups, for instance).
|
||||
|
||||
Seeing as FTS tables are "generic" tables, it seems inconsistent not to have FTSAccessor
|
||||
inherit from TableAccessor. While this would work fine, the model we're working with
|
||||
doesn't really need it; you can instead think of the FTSAccessor as defining _only_
|
||||
FTS-specific operations. Given that you have a Composer for your desired table group, you
|
||||
can then wrap it with your desired set of "access actions," available in separate Accessor
|
||||
subclasses.
|
||||
|
||||
For instance, you could wrap an FTSComposer in either a TableAccessor or FTSAccessor. The
|
||||
former will treat the tables in the composer like regular tables, exposing methods like
|
||||
`.select` and `.select_one`, whereas the latter defines FTS-specific actions like
|
||||
`.search`.
|
||||
'''
|
||||
|
||||
from co3.accessors.sql import SQLAccessor
|
||||
from co3.accessors.fts import FTSAccessor
|
||||
from co3.accessors.vss import VSSAccessor
|
BIN
co3/accessors/__pycache__/__init__.cpython-311.pyc
Normal file
BIN
co3/accessors/__pycache__/__init__.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/accessors/__pycache__/__init__.cpython-312.pyc
Normal file
BIN
co3/accessors/__pycache__/__init__.cpython-312.pyc
Normal file
Binary file not shown.
BIN
co3/accessors/__pycache__/_base.cpython-311.pyc
Normal file
BIN
co3/accessors/__pycache__/_base.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/accessors/__pycache__/fts.cpython-311.pyc
Normal file
BIN
co3/accessors/__pycache__/fts.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/accessors/__pycache__/fts.cpython-312.pyc
Normal file
BIN
co3/accessors/__pycache__/fts.cpython-312.pyc
Normal file
Binary file not shown.
BIN
co3/accessors/__pycache__/sql.cpython-312.pyc
Normal file
BIN
co3/accessors/__pycache__/sql.cpython-312.pyc
Normal file
Binary file not shown.
BIN
co3/accessors/__pycache__/table.cpython-311.pyc
Normal file
BIN
co3/accessors/__pycache__/table.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/accessors/__pycache__/vss.cpython-311.pyc
Normal file
BIN
co3/accessors/__pycache__/vss.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/accessors/__pycache__/vss.cpython-312.pyc
Normal file
BIN
co3/accessors/__pycache__/vss.cpython-312.pyc
Normal file
Binary file not shown.
147
co3/accessors/fts.py
Normal file
147
co3/accessors/fts.py
Normal file
@ -0,0 +1,147 @@
|
||||
import sqlalchemy as sa
|
||||
|
||||
from co3 import util
|
||||
from co3.accessor import Accessor
|
||||
|
||||
|
||||
class FTSAccessor(Accessor):
|
||||
def search(
|
||||
self,
|
||||
table_name : str,
|
||||
select_cols : str | list | None = '*',
|
||||
search_cols : str | None = None,
|
||||
q : str | None = None,
|
||||
colq : str | None = None,
|
||||
snip_col : int | None = 0,
|
||||
hl_col : int | None = 0,
|
||||
limit : int | None = 100,
|
||||
snip : int | None = 64,
|
||||
tokenizer : str | None = 'unicode61',
|
||||
group_by : str | None = None,
|
||||
agg_cols : list | None = None,
|
||||
wherein_dict: dict | None = None,
|
||||
unique_on : dict | None = None,
|
||||
):
|
||||
'''
|
||||
Execute a search query against an indexed FTS table for specific primitives. This
|
||||
method is mostly a generic FTS handler, capable of handling queries to any available
|
||||
FTS table with a matching naming scheme (`fts_<type>_<tokenizer>`). The current
|
||||
intention is support all tokenizers, for file, note, block, and link primitives.
|
||||
|
||||
Search results include all FTS table columns, as well as SQLite-supported `snippet`s
|
||||
and `highlight`s for matches. Matches are filtered and ordered by SQLite's
|
||||
`MATCH`-based score for the text & column queries. Results are (a list of) fully
|
||||
expanded dictionaries housing column-value pairs.
|
||||
|
||||
Note:
|
||||
GROUP BY cannot be paired with SQLITE FTS extensions; thus, we perform manual
|
||||
group checks on the result set in Python before response
|
||||
|
||||
Analysis:
|
||||
The returned JSON structure has been (loosely) optimized for speed on the client
|
||||
side. Fully forming individual dictionary based responses saves time in
|
||||
Javascript, as the JSON parser is expected to be able to create the objects
|
||||
faster than post-hoc construction in JS. This return structure was compared
|
||||
against returning an array of arrays (all ordered in the same fashion), along with
|
||||
a column list to be associated with each of the result values. While this saves
|
||||
some size on the payload (the same column names don't have to be transmitted for
|
||||
each result), the size of the returned content massively outweighs the
|
||||
predominantly short column names. The only way this structure would be viable is
|
||||
if a significant amount was saved on transfer compared to the slow down in JS
|
||||
object construction; this is (almost) never the case.
|
||||
|
||||
Parameters:
|
||||
table_name : name of FTS table to search
|
||||
search_cols : space separated string of columns to use for primary queries
|
||||
q : search query
|
||||
colq : column constraint string; must conform to SQLite standards (e.g.,
|
||||
`<col>:<text>`
|
||||
snip_col : table column to use for snippets (default: 1; source content column)
|
||||
hl_col : table column to use for highlights (default: 2; format column, applied
|
||||
to HTML targets)
|
||||
limit : maximum number of results to return in the SQL query
|
||||
snip : snippet length (max: 64)
|
||||
tokenizer : tokenizer to use (assumes relevant FTS table has been built)
|
||||
...
|
||||
wherein_dict: (col-name, value-list) pairs to match result set against, via
|
||||
WHERE ... IN clauses
|
||||
|
||||
Returns:
|
||||
Dictionary with search results (list of column indexed dictionaries) and relevant
|
||||
metadata.
|
||||
'''
|
||||
search_q = ''
|
||||
|
||||
if type(select_cols) is list:
|
||||
select_cols = ', '.join(select_cols)
|
||||
|
||||
# construct main search query
|
||||
if search_cols and q:
|
||||
search_q = f'{{{search_cols}}} : {q}'
|
||||
|
||||
# add auxiliary search constraints
|
||||
if colq:
|
||||
search_q += f' {colq}'
|
||||
|
||||
search_q = search_q.strip()
|
||||
|
||||
hl_start = '<b><mark>'
|
||||
hl_end = '</mark></b>'
|
||||
|
||||
fts_table_name = f'{table_name}_fts_{tokenizer}'
|
||||
|
||||
sql = f'''
|
||||
SELECT
|
||||
{select_cols},
|
||||
snippet({fts_table_name}, {snip_col}, '{hl_start}', '{hl_end}', '...', {snip}) AS snippet,
|
||||
highlight({fts_table_name}, {hl_col}, '{hl_start}', '{hl_end}') AS highlight
|
||||
FROM {fts_table_name}
|
||||
'''
|
||||
|
||||
where_clauses = []
|
||||
if search_q:
|
||||
where_clauses.append(f"{fts_table_name} MATCH '{search_q}'\n")
|
||||
|
||||
if wherein_dict:
|
||||
for col, vals in wherein_dict.items():
|
||||
where_clauses.append(f'{col} IN {tuple(vals)}\n')
|
||||
|
||||
if where_clauses:
|
||||
where_str = " AND ".join(where_clauses)
|
||||
sql += f'WHERE {where_str}'
|
||||
|
||||
sql += f'ORDER BY rank LIMIT {limit};'
|
||||
|
||||
row_dicts, cols = self.raw_select(sql, include_cols=True)
|
||||
|
||||
if group_by is None:
|
||||
return row_dicts, cols
|
||||
|
||||
if agg_cols is None:
|
||||
agg_cols = []
|
||||
|
||||
# "group by" block ID and wrangle the links into a list
|
||||
# note we can't perform native GROUP BYs with FTS results
|
||||
group_by_idx = {}
|
||||
for row in row_dicts:
|
||||
group_by_attr = row.get(group_by)
|
||||
|
||||
# add new entries
|
||||
for agg_col in agg_cols:
|
||||
row[f'{agg_col}_agg'] = set()
|
||||
|
||||
if group_by_attr is None:
|
||||
continue
|
||||
|
||||
if group_by_attr not in group_by_idx:
|
||||
group_by_idx[group_by_attr] = row
|
||||
|
||||
for agg_col in agg_cols:
|
||||
if agg_col in row:
|
||||
group_by_idx[group_by_attr][f'{agg_col}_agg'].add(row[agg_col])
|
||||
|
||||
return {
|
||||
'results' : group_by_idx,
|
||||
'columns' : cols,
|
||||
'num_results' : len(row_dicts),
|
||||
}
|
96
co3/accessors/sql.py
Normal file
96
co3/accessors/sql.py
Normal file
@ -0,0 +1,96 @@
|
||||
from pathlib import Path
|
||||
from collections.abc import Iterable
|
||||
import inspect
|
||||
from functools import cache
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from co3 import util
|
||||
from co3.accessor import Accessor
|
||||
from co3.relation import Relation
|
||||
|
||||
#from co3.databases.sql import RelationalDatabase, TabularDatabase, SQLDatabase
|
||||
from co3.relations import TabularRelation, SQLTable
|
||||
|
||||
|
||||
class RelationalAccessor[D: 'RelationalDatabase', R: Relation](Accessor[D]):
|
||||
pass
|
||||
|
||||
|
||||
class TabularAccessor[D: 'TabularDatabase', R: TabularRelation](RelationalAccessor[D, R]):
|
||||
pass
|
||||
|
||||
|
||||
class SQLAccessor(TabularAccessor['SQLDatabase', SQLTable]):
|
||||
def raw_select(
|
||||
self,
|
||||
sql,
|
||||
bind_params=None,
|
||||
mappings=False,
|
||||
include_cols=False,
|
||||
):
|
||||
res_method = utils.db.sa_exec_dicts
|
||||
if mappings:
|
||||
res_method = utils.db.sa_exec_mappings
|
||||
|
||||
return res_method(self.database.engine, sa.text(sql), bind_params=bind_params, include_cols=include_cols)
|
||||
|
||||
def select(
|
||||
self,
|
||||
table: sa.Table | sa.Subquery | sa.Join,
|
||||
cols = None,
|
||||
where = None,
|
||||
distinct_on = None,
|
||||
order_by = None,
|
||||
limit = 0,
|
||||
mappings = False,
|
||||
include_cols = False,
|
||||
):
|
||||
'''
|
||||
Perform a SELECT query against the provided table-like object (see
|
||||
`check_table()`).
|
||||
|
||||
Deprecated: String aliases
|
||||
String aliases for tables are no longer supported. This method no longer checks
|
||||
against any specific schema table-maps or Composers. Instead, this should be
|
||||
done outside the Accessor.
|
||||
|
||||
Parameters:
|
||||
group_by: list of columns to group by; for now serves as a proxy for DISTINCT
|
||||
(no aggregation methods accepted)
|
||||
order_by: column to order results by (can use <col>.desc() to order
|
||||
by descending)
|
||||
'''
|
||||
if where is None:
|
||||
where = sa.true()
|
||||
|
||||
res_method = utils.db.sa_exec_dicts
|
||||
if mappings:
|
||||
res_method = utils.db.sa_exec_mappings
|
||||
|
||||
stmt = sa.select(table).where(where)
|
||||
if cols is not None:
|
||||
stmt = sa.select(*cols).select_from(table).where(where)
|
||||
|
||||
if distinct_on is not None:
|
||||
stmt = stmt.group_by(distinct_on)
|
||||
|
||||
if order_by is not None:
|
||||
stmt = stmt.order_by(order_by)
|
||||
|
||||
if limit > 0:
|
||||
stmt = stmt.limit(limit)
|
||||
|
||||
return res_method(self.engine, stmt, include_cols=include_cols)
|
||||
|
||||
def select_one(self, table, cols=None, where=None, mappings=False, include_cols=False):
|
||||
res = self.select(table, cols, where, mappings, include_cols, limit=1)
|
||||
|
||||
if include_cols and len(res[0]) > 0:
|
||||
return res[0][0], res[1]
|
||||
|
||||
if len(res) > 0:
|
||||
return res[0]
|
||||
|
||||
return None
|
||||
|
100
co3/accessors/vss.py
Normal file
100
co3/accessors/vss.py
Normal file
@ -0,0 +1,100 @@
|
||||
import pickle
|
||||
import logging
|
||||
from pathlib import Path
|
||||
import time
|
||||
|
||||
import sqlalchemy as sa
|
||||
#from sentence_transformers import SentenceTransformer, util
|
||||
|
||||
from co3.accessor import Accessor
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class VSSAccessor(Accessor):
|
||||
def __init__(self, cache_path):
|
||||
super().__init__()
|
||||
|
||||
self._model = None
|
||||
self._embeddings = None
|
||||
|
||||
self._embedding_size = 384
|
||||
self.embedding_path = Path(cache_path, 'embeddings.pkl')
|
||||
|
||||
def write_embeddings(self, embedding_dict):
|
||||
self.embedding_path.write_bytes(pickle.dumps(embedding_dict))
|
||||
|
||||
def read_embeddings(self):
|
||||
if not self.embedding_path.exists():
|
||||
logger.warning(
|
||||
f'Attempting to access non-existent embeddings at {self.embedding_path}'
|
||||
)
|
||||
return None
|
||||
|
||||
return pickle.loads(self.embedding_path.read_bytes())
|
||||
|
||||
@property
|
||||
def model(self):
|
||||
if self._model is None:
|
||||
# model trained with 128 token seqs
|
||||
self._model = SentenceTransformer('sentence-transformers/all-MiniLM-L12-v2')
|
||||
return self._model
|
||||
|
||||
@property
|
||||
def embeddings(self):
|
||||
if self._embeddings is None:
|
||||
self._embeddings = self.read_embeddings()
|
||||
return self._embeddings
|
||||
|
||||
def embed_chunks(self, chunks, batch_size=64, show_prog=True):
|
||||
return self.model.encode(
|
||||
chunks,
|
||||
batch_size = batch_size,
|
||||
show_progress_bar = show_prog,
|
||||
convert_to_numpy = True,
|
||||
normalize_embeddings = True
|
||||
)
|
||||
|
||||
def search(
|
||||
self,
|
||||
query : str,
|
||||
index_name : str,
|
||||
limit : int = 10,
|
||||
score_threshold = 0.5,
|
||||
):
|
||||
'''
|
||||
Parameters:
|
||||
index_name: one of ['chunks','blocks','notes']
|
||||
'''
|
||||
if not query:
|
||||
return None
|
||||
|
||||
if index_name not in self.embeddings:
|
||||
logger.warning(
|
||||
f'Index "{index_name}" does not exist'
|
||||
)
|
||||
return None
|
||||
|
||||
start = time.time()
|
||||
|
||||
query_embedding = self.embed_chunks(query, show_prog=False)
|
||||
index_ids, index_embeddings, index_items = self.embeddings[index_name]
|
||||
|
||||
hits = util.semantic_search(
|
||||
query_embedding,
|
||||
index_embeddings,
|
||||
top_k=limit,
|
||||
score_function=util.dot_score
|
||||
)[0]
|
||||
|
||||
hits = [hit for hit in hits if hit['score'] >= score_threshold]
|
||||
|
||||
for hit in hits:
|
||||
idx = hit['corpus_id']
|
||||
hit['group_name'] = index_ids[idx]
|
||||
hit['item'] = index_items[idx]
|
||||
|
||||
logger.info(f'{len(hits)} hits in {time.time()-start:.2f}s')
|
||||
|
||||
return hits
|
||||
|
106
co3/co3.py
Normal file
106
co3/co3.py
Normal file
@ -0,0 +1,106 @@
|
||||
'''
|
||||
CO4
|
||||
|
||||
CO4 is an abstract base class for scaffolding object hierarchies and managing operations
|
||||
with associated database schemas. It facilitates something like a "lightweight ORM" for
|
||||
classes/tables/states with fixed transformations of interest. The canonical use case is
|
||||
managing hierarchical document relations, format conversions, and syntactical components.
|
||||
'''
|
||||
|
||||
import inspect
|
||||
import logging
|
||||
from functools import wraps, partial
|
||||
|
||||
#from localsys.db.schema import tables
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
#def register_format(_format):
|
||||
# def decorator(func):
|
||||
# self.collate.format_map[_format] = func
|
||||
#
|
||||
# @wraps(func)
|
||||
# def register(*args, **kwargs):
|
||||
# return func(*args, **kwargs)
|
||||
#
|
||||
# return register
|
||||
# return decorator
|
||||
|
||||
def collate(action_key, action_groups=None):
|
||||
def decorator(func):
|
||||
if action_groups is None:
|
||||
action_groups = [None]
|
||||
func._action_data = (action_key, action_groups)
|
||||
return func
|
||||
return decorator
|
||||
|
||||
class FormatRegistryMeta(type):
|
||||
def __new__(cls, name, bases, attrs):
|
||||
action_registry = {}
|
||||
|
||||
# add registered superclass methods; iterate over bases (usually just one), then
|
||||
# that base's chain down (reversed), then methods from each subclass
|
||||
for base in bases:
|
||||
for _class in reversed(base.mro()):
|
||||
methods = inspect.getmembers(_class, predicate=inspect.isfunction)
|
||||
for _, method in methods:
|
||||
if hasattr(method, '_action_data'):
|
||||
action_key, action_groups = method._action_data
|
||||
action_registry[action_key] = (method, action_groups)
|
||||
|
||||
# add final registered formats for the current class, overwriting any found in
|
||||
# superclass chain
|
||||
for attr_name, attr_value in attrs.items():
|
||||
if hasattr(attr_value, '_action_data'):
|
||||
action_key, action_groups = attr_value._action_data
|
||||
action_registry[action_key] = (method, action_groups)
|
||||
|
||||
attrs['action_map'] = action_registry
|
||||
|
||||
return super().__new__(cls, name, bases, attrs)
|
||||
|
||||
class CO3(metaclass=FormatRegistryMeta):
|
||||
'''
|
||||
CO3: COllate, COllect, COmpose - conversion & DB insertion base
|
||||
|
||||
- Collate: organize and transform conversion outputs, possibly across class components
|
||||
- Collect: gather core attributes, conversion data, and subcomponents for DB insertion
|
||||
- Compose: construct object-associated DB table references through the class hierarchy
|
||||
|
||||
Note: on action groups
|
||||
Group keys are simply named collections to make it easy for storage components to
|
||||
be attached to action subsets. They do _not_ augment the action registration
|
||||
namespace, meaning the action key should still be unique; the group key is purely
|
||||
auxiliary.
|
||||
|
||||
Action methods can also be attached to several groups, in case there is
|
||||
overlapping utility within or across schemas or storage media. In this case, it
|
||||
becomes particularly critical to ensure registered `collate` methods really are
|
||||
just "gathering results" from possibly heavy-duty operations, rather than
|
||||
performing them when called, so as to reduce wasted computation.
|
||||
'''
|
||||
@property
|
||||
def attributes(self):
|
||||
'''
|
||||
Method to define how a subtype's inserts should be handled under `collect` for
|
||||
canonical attributes, i.e., inserts to the type's table.
|
||||
'''
|
||||
return vars(self)
|
||||
|
||||
@property
|
||||
def components(self):
|
||||
'''
|
||||
Method to define how a subtype's inserts should be handled under `collect` for
|
||||
constituent components that need handling.
|
||||
'''
|
||||
return []
|
||||
|
||||
def collate(self, action_key, *action_args, **action_kwargs):
|
||||
if action_key not in self.action_map:
|
||||
logger.debug(f'Collation for {action_key} not supported')
|
||||
return None
|
||||
else:
|
||||
return self.action_map[action_key](self)
|
||||
|
||||
|
109
co3/collector.py
Normal file
109
co3/collector.py
Normal file
@ -0,0 +1,109 @@
|
||||
'''
|
||||
Defines the Collector base class.
|
||||
|
||||
This module is the critical "middleware" connecting the primitive object definitions and
|
||||
their representations in the database. It operates with full knowledge of how both are
|
||||
defined, and abstracts away both the prep work for DB insertions as well as updates
|
||||
trickling down the primitive hierarchy.
|
||||
|
||||
The `src` format target is re-used for both canonical tables/primitives, as well as
|
||||
<prim>_conversion_matter tables in tables/conversions under the `src` format. The latter
|
||||
is meant to extend those attributes that are format-specific (i.e., would change when, say,
|
||||
converting to `html5`), and thus need to be broken across the format dimension.
|
||||
|
||||
Note:
|
||||
Despite the structure of the database module, this class does not currently inherit
|
||||
from a super class in localsys.db (like the accessors and managers, for instance).
|
||||
This will likely ultimately be the model that's embraced, but until FTS (or other
|
||||
groups) need a collector, this will be remain an independent class. It is, however,
|
||||
named like a concrete subclass, taking on the "Core" prefix.
|
||||
'''
|
||||
|
||||
from pathlib import Path
|
||||
from collections import defaultdict
|
||||
import logging
|
||||
import importlib
|
||||
import subprocess
|
||||
from uuid import uuid4
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from co3 import util
|
||||
#from localsys.db.schema import tables
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class Collector:
|
||||
def __init__(self):
|
||||
self._inserts = defaultdict(lambda: defaultdict(list))
|
||||
|
||||
@property
|
||||
def inserts(self):
|
||||
return self._inserts_from_receipts()
|
||||
|
||||
def _inserts_from_receipts(self, receipts=None, pop=False):
|
||||
inserts = defaultdict(list)
|
||||
|
||||
if receipts is None:
|
||||
receipts = list(self._inserts.keys())
|
||||
|
||||
for receipt in receipts:
|
||||
if pop: insert_dict = self._inserts.pop(receipt, {})
|
||||
else: insert_dict = self._inserts[receipt]
|
||||
|
||||
for table, insert_list in insert_dict.items():
|
||||
inserts[table].extend(insert_list)
|
||||
|
||||
return dict(inserts)
|
||||
|
||||
def _reset_session(self):
|
||||
self._inserts = defaultdict(lambda: defaultdict(list))
|
||||
|
||||
def _generate_unique_receipt(self):
|
||||
return str(uuid4())
|
||||
|
||||
def add_insert(self, table_name, insert_dict, receipts=None):
|
||||
'''
|
||||
TODO: formalize table_name mapping; at class level provide a `table_map`, or provide
|
||||
the table object itself to this method
|
||||
'''
|
||||
if table_name not in tables.table_map:
|
||||
#logger.debug(f'Inserts provided for non-existent table {table_name}')
|
||||
return None
|
||||
|
||||
receipt = self._generate_unique_receipt()
|
||||
|
||||
self._inserts[receipt][table_name].append(
|
||||
utils.db.prepare_insert(
|
||||
tables.table_map[table_name],
|
||||
insert_dict
|
||||
)
|
||||
)
|
||||
|
||||
if receipts is not None:
|
||||
receipts.append(receipt)
|
||||
|
||||
return receipt
|
||||
|
||||
def collect_inserts(self, receipts=None):
|
||||
'''
|
||||
Collect insert-ready dictionaries for the core primitive schema. This method is
|
||||
effectively a light wrapper around the File and Note-based collection logic
|
||||
elsewhere in the class.
|
||||
|
||||
The overall collection scheme embraces a session-like sequential update model to
|
||||
an internal insert tracker. The sequence of insert methods is ordered according to
|
||||
the schema hierarchy, and higher level inserts dictate the scope for lower level
|
||||
inserts (all methods check and populate the same `inserts` dictionary). Calling
|
||||
this method flushes any existing inserts, ensuring a re-scan takes place across
|
||||
calls (or "sessions").
|
||||
|
||||
Parameters:
|
||||
skip_updated: whether to ignore primitives with existing up-to-date
|
||||
database entries
|
||||
|
||||
Returns:
|
||||
Table name-indexed dictionary of insert lists (of column name-indexed dicts)
|
||||
'''
|
||||
return self._inserts_from_receipts(receipts, pop=True)
|
89
co3/composer.py
Normal file
89
co3/composer.py
Normal file
@ -0,0 +1,89 @@
|
||||
'''
|
||||
Composer
|
||||
|
||||
Base for manually defining table compositions outside those natural to the schema
|
||||
hierarchy (i.e., constructable by a `CO4.compose()` call).
|
||||
|
||||
Example: suppose we have a simple object hierarchy A(CO4) -> B -> C. C's in-built
|
||||
`compose()` method may not always be desirable when constructing composite tables and
|
||||
running related queries. In this case, a custom Composer can be used to make needed
|
||||
composite tables easier to reference; in the case below, we define the "BC" composite
|
||||
table.
|
||||
|
||||
```
|
||||
class ExampleComposer(Composer):
|
||||
|
||||
@register_table
|
||||
def BC(self):
|
||||
full_B = B.compose(full=True)
|
||||
full_C = C.compose(full=True)
|
||||
|
||||
return full_B.join(
|
||||
full_C,
|
||||
full_B.c.name == full_C.c.name, # TODO: is this fine? or do we need base table refs
|
||||
outer=True
|
||||
)
|
||||
'''
|
||||
from pathlib import Path
|
||||
|
||||
from co3.mapper import Mapper
|
||||
|
||||
|
||||
def register_table(table_name=None):
|
||||
'''
|
||||
Registry decorator for defined composer classes. Decorating a class method simply
|
||||
attaches a `table_name` attribute to it, setting it to either a provided value or the
|
||||
name of the method itself. Methods with a `table_name` attribute are later swept up at
|
||||
the class level and placed in the `table_map`.
|
||||
'''
|
||||
def decorator(func):
|
||||
if table_name is None:
|
||||
table_name = func.__name__
|
||||
func.table_name = table_name
|
||||
return func
|
||||
return decorator
|
||||
|
||||
class Composer[M: Mapper]:
|
||||
'''
|
||||
Base composer wrapper for table groupings.
|
||||
|
||||
The schema is centered around a connected group of tables (via foreign keys). Thus,
|
||||
most operations need to be coordinated across tables. The `accessors` submodules
|
||||
are mostly intended to provide a "secondary layer" over the base set of tables in the
|
||||
schema, exposing common higher level table compositions (i.e., chained JOINs). See
|
||||
concrete instances (e.g., CoreAccess, FTSAccessor) for actual implementations these
|
||||
tables; the base class does not expose
|
||||
|
||||
Tables in subclasses are registered with the `register_table` decorator, automatically
|
||||
indexing them under the provided name and making them available via the `table_map`.
|
||||
'''
|
||||
def __init__(self):
|
||||
self._set_tables()
|
||||
|
||||
def _set_tables(self):
|
||||
'''
|
||||
Skip properties (so appropriate delays can be used), and
|
||||
|
||||
Set the table registry at the class level. This only takes place during the first
|
||||
instantiation of the class, and makes it possible to definitively tie methods to
|
||||
composed tables during lookup with `get_table()`.
|
||||
'''
|
||||
cls = self.__class__
|
||||
|
||||
# in case the class has already be instantiated
|
||||
if hasattr(cls, 'table_map'): return
|
||||
|
||||
table_map = {}
|
||||
for key, value in cls.__dict__.items():
|
||||
if isinstance(value, property):
|
||||
continue # Skip properties
|
||||
if callable(value) and hasattr(value, 'table_name'):
|
||||
table_map[value.table_name] = value(self)
|
||||
|
||||
cls.table_map = table_map
|
||||
|
||||
def get_table(self, table_name):
|
||||
'''
|
||||
Retrieve the named table composition, if defined.
|
||||
'''
|
||||
return self.table_map.get(table_name)
|
87
co3/database.py
Normal file
87
co3/database.py
Normal file
@ -0,0 +1,87 @@
|
||||
'''
|
||||
Database
|
||||
|
||||
Central object for defining storage protocol-specific interfaces. The database wraps up
|
||||
central items for interacting with database resources, namely the Accessor and Manager
|
||||
objects.
|
||||
|
||||
The Database type hierarchy attempts to be exceedingly general; SQL-derivatives should
|
||||
subclass from the RelationalDatabase subtype, for example, which itself becomes a new
|
||||
generic via type dependence on Relation.
|
||||
'''
|
||||
|
||||
import logging
|
||||
from typing import Self
|
||||
|
||||
from co3.accessor import Accessor
|
||||
from co3.composer import Composer
|
||||
from co3.manager import Manager
|
||||
from co3.indexer import Indexer
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Database:
|
||||
accessor: type[Accessor[Self]] = Accessor
|
||||
manager: type[Manager[Self]] = Manager
|
||||
|
||||
def __init__(self, resource):
|
||||
'''
|
||||
Variables:
|
||||
_local_cache: a database-local property store for ad-hoc CacheBlock-esque
|
||||
methods, that are nevertheless _not_ query/group-by responses to
|
||||
pass on to the Indexer. Dependent properties should write to the
|
||||
this cache and check for existence of stored results; the cache
|
||||
state must be managed globally.
|
||||
'''
|
||||
self.resource = resource
|
||||
|
||||
self._access = self.accessor(self)
|
||||
self._manage = self.manager(self)
|
||||
|
||||
self._index = Indexer(self._access)
|
||||
self._local_cache = {}
|
||||
|
||||
self.reset_cache = False
|
||||
|
||||
@property
|
||||
def engine(self):
|
||||
'''
|
||||
Database property to provide a singleton engine for DB interaction, initializing
|
||||
the database if it doesn't already exist.
|
||||
|
||||
TODO: figure out thread safety across engines and/or connection. Any issue with
|
||||
hanging on to the same engine instance for the Database instance?
|
||||
'''
|
||||
raise NotImplementedError
|
||||
|
||||
def connect(self):
|
||||
self.engine.connect()
|
||||
|
||||
@property
|
||||
def access(self):
|
||||
return self._access
|
||||
|
||||
@property
|
||||
def compose(self):
|
||||
return self._compose
|
||||
|
||||
@property
|
||||
def index(self):
|
||||
if self.reset_cache:
|
||||
self._index.cache_clear()
|
||||
self.reset_cache = False
|
||||
return self._index
|
||||
|
||||
@property
|
||||
def manage(self):
|
||||
'''
|
||||
Accessing `.manage` queues a cache clear on the external index, as well wipes the
|
||||
local index.
|
||||
'''
|
||||
self.reset_cache = True
|
||||
self._local_cache = {}
|
||||
return self._manage
|
||||
|
||||
def populate_indexes(self): pass
|
||||
|
3
co3/databases/__init__.py
Normal file
3
co3/databases/__init__.py
Normal file
@ -0,0 +1,3 @@
|
||||
from co3.databases.sql import *
|
||||
from co3.databases.fts import FTSDatabase
|
||||
from co3.databases.vss import VSSDatabase
|
BIN
co3/databases/__pycache__/__init__.cpython-311.pyc
Normal file
BIN
co3/databases/__pycache__/__init__.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/databases/__pycache__/__init__.cpython-312.pyc
Normal file
BIN
co3/databases/__pycache__/__init__.cpython-312.pyc
Normal file
Binary file not shown.
BIN
co3/databases/__pycache__/_base.cpython-311.pyc
Normal file
BIN
co3/databases/__pycache__/_base.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/databases/__pycache__/core.cpython-311.pyc
Normal file
BIN
co3/databases/__pycache__/core.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/databases/__pycache__/fts.cpython-311.pyc
Normal file
BIN
co3/databases/__pycache__/fts.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/databases/__pycache__/fts.cpython-312.pyc
Normal file
BIN
co3/databases/__pycache__/fts.cpython-312.pyc
Normal file
Binary file not shown.
BIN
co3/databases/__pycache__/sql.cpython-312.pyc
Normal file
BIN
co3/databases/__pycache__/sql.cpython-312.pyc
Normal file
Binary file not shown.
BIN
co3/databases/__pycache__/vss.cpython-312.pyc
Normal file
BIN
co3/databases/__pycache__/vss.cpython-312.pyc
Normal file
Binary file not shown.
8
co3/databases/fts.py
Normal file
8
co3/databases/fts.py
Normal file
@ -0,0 +1,8 @@
|
||||
from co3.database import Database
|
||||
|
||||
from co3.accessors.fts import FTSAccessor
|
||||
from co3.managers.fts import FTSManager
|
||||
|
||||
class FTSDatabase(Database):
|
||||
accessor = FTSAccessor
|
||||
manager = FTSManager
|
34
co3/databases/sql.py
Normal file
34
co3/databases/sql.py
Normal file
@ -0,0 +1,34 @@
|
||||
from typing import Self
|
||||
|
||||
from co3.database import Database
|
||||
|
||||
from co3.accessors.sql import RelationalAccessor, TabularAccessor, SQLAccessor
|
||||
from co3.managers.sql import RelationalManager, TabularManager, SQLManager
|
||||
|
||||
from co3.relation import Relation
|
||||
from co3.relations import TabularRelation, SQLTable
|
||||
|
||||
|
||||
class RelationalDatabase[R: Relation](Database):
|
||||
accessor: type[RelationalAccessor[Self, R]] = RelationalAccessor[Self, R]
|
||||
manager: type[RelationalManager[Self, R]] = RelationalManager[Self, R]
|
||||
|
||||
|
||||
class TabularDatabase[R: TabularRelation](RelationalDatabase[R]):
|
||||
'''
|
||||
accessor/manager assignments satisfy supertype's type settings;
|
||||
`TabluarAccessor[Self, R]` is of type `type[RelationalAccessor[Self, R]]`
|
||||
(and yes, `type[]` specifies that the variable is itself being set to a type or a
|
||||
class, rather than a satisfying _instance_)
|
||||
'''
|
||||
accessor: type[TabularAccessor[Self, R]] = TabularAccessor[Self, R]
|
||||
manager: type[TabularManager[Self, R]] = TabularManager[Self, R]
|
||||
|
||||
|
||||
class SQLDatabase[R: SQLTable](TabularDatabase[R]):
|
||||
accessor = SQLAccessor
|
||||
manager = SQLManager
|
||||
|
||||
|
||||
class SQLiteDatabase(SQLDatabase[SQLTable]):
|
||||
pass
|
9
co3/databases/vss.py
Normal file
9
co3/databases/vss.py
Normal file
@ -0,0 +1,9 @@
|
||||
from co3.database import Database
|
||||
|
||||
from co3.accessors.vss import VSSAccessor
|
||||
from co3.managers.vss import VSSManager
|
||||
|
||||
|
||||
class VSSDatabase(Database):
|
||||
accessor = VSSAccessor
|
||||
manager = VSSManager
|
433
co3/indexer.py
Normal file
433
co3/indexer.py
Normal file
@ -0,0 +1,433 @@
|
||||
import time
|
||||
import logging
|
||||
import threading
|
||||
from collections import defaultdict
|
||||
from collections.abc import Iterable
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class Indexer:
|
||||
'''
|
||||
Indexer class
|
||||
|
||||
Provides restricted access to an underlying Accessor to enable more efficient, superficial
|
||||
caching.
|
||||
|
||||
Cache clearing is to be handled by a wrapper class, like the Database.
|
||||
|
||||
Caching occurs at the class level, with indexes prefixed by table's origin Composer.
|
||||
This means that cached selects/group-bys will be available regardless of the provided
|
||||
Accessors so long as the same Composer is used under the hood.
|
||||
'''
|
||||
_cls_select_cache = {}
|
||||
_cls_groupby_cache = defaultdict(dict)
|
||||
|
||||
def __init__(self, accessor, cache_select=True, cache_groupby=True):
|
||||
self.accessor = accessor
|
||||
|
||||
# set instance caches; if remains None, methods can't index
|
||||
self._select_cache = None
|
||||
self._groupby_cache = None
|
||||
|
||||
if cache_groupby and not cache_select:
|
||||
raise ValueError('cannot cache groupbys without select caching enabled')
|
||||
|
||||
if cache_select:
|
||||
self._select_cache = self._cls_select_cache
|
||||
|
||||
if cache_groupby:
|
||||
self._groupby_cache = self._cls_groupby_cache
|
||||
|
||||
self._access_lock = threading.Lock()
|
||||
|
||||
def cache_clear(self, group_by_only=False):
|
||||
self._groupby_cache.clear()
|
||||
if not group_by_only:
|
||||
self._select_cache.clear()
|
||||
|
||||
def cache_block(
|
||||
self,
|
||||
table,
|
||||
**kwargs,
|
||||
):
|
||||
'''
|
||||
Provide a user-friendly, dynamically re-indexable
|
||||
'''
|
||||
return CacheBlock(
|
||||
indexer = self,
|
||||
table = table,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def cached_query(
|
||||
self,
|
||||
table,
|
||||
|
||||
cols = None,
|
||||
where = None,
|
||||
distinct_on = None,
|
||||
order_by = None,
|
||||
limit = 0,
|
||||
|
||||
group_by = None,
|
||||
agg_on = None,
|
||||
index_on = None,
|
||||
):
|
||||
'''
|
||||
Like `group_by`, but makes a full query to the Accessors table `table_name` and
|
||||
caches the results. The processing performed by the GROUP BY is also cached.
|
||||
|
||||
Update: `cached_select` and `cached_group_by` now unified by a single
|
||||
`cached_query` method. This allows better defined GROUP BY caches, that are
|
||||
reactive to the full set of parameters returning the result set (and not just the
|
||||
table, requiring a full query).
|
||||
|
||||
Note: on cache keys
|
||||
Cache keys are now fully stringified, as many objects are now allowed to be
|
||||
native SQLAlchemy objects. Indexing these objects works, but doing so will
|
||||
condition the cache on their memory addresses, which isn't what we want.
|
||||
SQLAlchemy converts most join/column/table-like objects to reasonable strings,
|
||||
which will look the same regardless of instance.
|
||||
|
||||
Context: this became a clear issue when passing in more
|
||||
`order_by=<col>.desc()`. The `desc()` causes the index to store the column in
|
||||
an instance-specific way, rather than an easily re-usable, canonical column
|
||||
reference. Each time the CoreDatabase.files() was being called, for instance,
|
||||
that @property would be re-evaluated, causing `desc()` to be re-initialized,
|
||||
and thus look different to the cache. Stringifying everything prevents this
|
||||
(although this could well be an indication that only a single `cache_block`
|
||||
should ever be returned be database properties).
|
||||
|
||||
Note: on access locks
|
||||
A double-checked locking scheme is employed before both of the stages (select
|
||||
and manual group by), using the same lock. This resolves the common scenario
|
||||
where many threads need to look up a query in the cache, experience a cache
|
||||
miss, and try to do the work. This non-linearly explodes the total time to
|
||||
wait in my experience, so doing this only when needed saves tons of time,
|
||||
especially in high-congestion moments.
|
||||
'''
|
||||
start = time.time()
|
||||
cache_key = tuple(map(str, (table, cols, where, distinct_on, order_by, limit)))
|
||||
|
||||
# apparently this is the double-check locking scheme (didn't realize when implementing)
|
||||
if self._select_cache is None or cache_key not in self._select_cache:
|
||||
# cache re-compute possible, acquire lock to continue. A later thread may
|
||||
# acquire this after work has been done by an earlier thread, so re-eval the
|
||||
# condition below before actually performing a DB read. If access results in a
|
||||
# cache hit, locking isn't needed.
|
||||
with self._access_lock:
|
||||
if self._select_cache is None or cache_key not in self._select_cache:
|
||||
results = self.accessor.select(
|
||||
table,
|
||||
cols=cols,
|
||||
where=where,
|
||||
distinct_on=distinct_on,
|
||||
order_by=order_by,
|
||||
limit=limit,
|
||||
mappings=True
|
||||
)
|
||||
|
||||
# cache results if select_cache is defined
|
||||
if self._select_cache is not None:
|
||||
self._select_cache[cache_key] = results
|
||||
|
||||
logger.debug(
|
||||
f'Indexer "select" cache miss for table "{table}": access in {time.time()-start:.4f}s'
|
||||
)
|
||||
else:
|
||||
results = self._select_cache[cache_key]
|
||||
logger.debug(
|
||||
f'Indexer "select" cache hit for table "{table}": access in {time.time()-start:.4f}s'
|
||||
)
|
||||
else:
|
||||
results = self._select_cache[cache_key]
|
||||
logger.debug(
|
||||
f'Indexer "select" cache hit for table "{table}": access in {time.time()-start:.4f}s'
|
||||
)
|
||||
|
||||
start = time.time()
|
||||
cache_key = (*cache_key, group_by, agg_on, index_on)
|
||||
|
||||
if group_by is not None:
|
||||
if self._groupby_cache is None or cache_key not in self._groupby_cache:
|
||||
with self._access_lock:
|
||||
if self._groupby_cache is None or cache_key not in self._groupby_cache:
|
||||
results = self.group_by(
|
||||
results,
|
||||
group_by = group_by,
|
||||
agg_on = agg_on,
|
||||
index_on = index_on,
|
||||
return_index = True,
|
||||
)
|
||||
|
||||
if self._groupby_cache is not None:
|
||||
self._groupby_cache[cache_key] = results
|
||||
|
||||
logger.debug(
|
||||
f'Indexer "group_by" cache miss for table "{table}": access in {time.time()-start:.4f}s'
|
||||
)
|
||||
else:
|
||||
results = self._groupby_cache[cache_key]
|
||||
logger.debug(
|
||||
f'Indexer "group_by" cache hit for table "{table}": access in {time.time()-start:.4f}s'
|
||||
)
|
||||
else:
|
||||
results = self._groupby_cache[cache_key]
|
||||
logger.debug(
|
||||
f'Indexer "group_by" cache hit for table "{table}": access in {time.time()-start:.4f}s'
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
@classmethod
|
||||
def group_by(
|
||||
cls,
|
||||
rows,
|
||||
group_by,
|
||||
agg_on=None,
|
||||
index_on=None,
|
||||
return_index=False,
|
||||
):
|
||||
'''
|
||||
Post-query "group by"-like aggregation. Creates an index over a set of columns
|
||||
(`group_by_cols`), and aggregates values from `agg_cols` under the groups.
|
||||
|
||||
Rows can be dicts or mappings, and columns can be strings or SQLAlchemy columns.
|
||||
To ensure the right columns are being used for the operation, it's best to pass in
|
||||
mappings and use SQA columns if you aren't sure exactly how the keys look in your
|
||||
results (dicts can have ambiguous keys across tables with the same columns and/or
|
||||
different labeling schemes altogether).
|
||||
|
||||
TODO: add a flag that handles None's as distinct. That is, for the group_by
|
||||
column(s) of interest, if rows in the provided query set have NULL values for
|
||||
these columns, treat all such rows as their "own group" and return them alongside
|
||||
the grouped/aggregated ones. This is behavior desired by something like
|
||||
FTSManager.recreate(), which wants to bundle up conversions for blocks
|
||||
(effectively grouping by blocks.name and link.id, aggregating on
|
||||
block_conversions.format, then flattening). You could either do this, or as the
|
||||
caller just make sure to first filter the result set before grouping (e.g.,
|
||||
splitting the NULL-valued rows from those that are well-defined), and then
|
||||
stitching the two sets back together afterward.
|
||||
|
||||
Multi-dim update:
|
||||
|
||||
- group_by: can be a tuple of tuples of columns. Each inner tuple is a nested
|
||||
"group by index" in the group by index
|
||||
-
|
||||
'''
|
||||
if not rows:
|
||||
return {} if return_index else []
|
||||
|
||||
rows_are_mappings = not isinstance(rows[0], dict)
|
||||
|
||||
if not rows_are_mappings:
|
||||
if isinstance(group_by, sa.Column):
|
||||
group_by = group_by.name
|
||||
else:
|
||||
group_by = str(group_by)
|
||||
|
||||
#if group_by is None: group_by = []
|
||||
#elif not isinstance(group_by, Iterable): group_by = [group_by]
|
||||
|
||||
if agg_on is None: agg_on = []
|
||||
elif not isinstance(agg_on, Iterable): agg_on = [agg_on]
|
||||
|
||||
if index_on is None: index_on = []
|
||||
elif not isinstance(index_on, Iterable): index_on = [index_on]
|
||||
|
||||
agg_on_names = []
|
||||
for agg in agg_on:
|
||||
# if a SQA column, can either use `.name` or `str(c)`. The latter includes the
|
||||
# table name, the former doesn't; ambiguity can be introduced here.
|
||||
if isinstance(agg, sa.Column):
|
||||
agg_on_names.append(agg.name)
|
||||
else:
|
||||
agg_on_names.append(str(agg))
|
||||
|
||||
index_on_names = []
|
||||
for index in index_on:
|
||||
# if a SQA column, can either use `.name` or `str(c)`. The latter includes the
|
||||
# table name, the former doesn't; ambiguity can be introduced here.
|
||||
if isinstance(index, sa.Column):
|
||||
index_on_names.append(index.name)
|
||||
else:
|
||||
index_on_names.append(str(index))
|
||||
|
||||
# when rows are dicts, use columns' string names
|
||||
if not rows_are_mappings:
|
||||
agg_on = agg_on_names
|
||||
index_on = index_on_names
|
||||
|
||||
#print(f'rows_are_mappings: {rows_are_mappings}')
|
||||
#print(f'group_by: {group_by}')
|
||||
#print(f'agg_on: {agg_on}')
|
||||
#print(f'agg_on_names: {agg_on_names}')
|
||||
#print(f'index_on: {index_on}')
|
||||
#print(f'index_on_names: {index_on_names}')
|
||||
|
||||
# "group by" block ID and wrangle the links into a list
|
||||
group_by_idx = {}
|
||||
for row in rows:
|
||||
# generic get
|
||||
group_by_attr = row.get(group_by)
|
||||
|
||||
# wrap possible mapping dict
|
||||
row_dict = dict(row)
|
||||
|
||||
# add new entries; standardize
|
||||
#aggregates = {}
|
||||
#for agg_name in agg_on_names:
|
||||
# aggregates[agg_name] = []
|
||||
#row_dict['aggregates'] = aggregates
|
||||
row_dict['aggregates'] = []
|
||||
|
||||
indexes = {}
|
||||
for index_name in index_on_names:
|
||||
indexes[index_name] = {}
|
||||
row_dict['indexes'] = indexes
|
||||
|
||||
if group_by_attr is None:
|
||||
continue
|
||||
|
||||
if group_by_attr not in group_by_idx:
|
||||
group_by_idx[group_by_attr] = row_dict
|
||||
|
||||
# actually include all agg cols, even if None, so agg array indexes align
|
||||
agg_dict = {
|
||||
agg_on_names[i] : row.get(agg_col)
|
||||
for i, agg_col in enumerate(agg_on)
|
||||
}
|
||||
|
||||
#aggregates = group_by_idx[group_by_attr]['aggregates']
|
||||
#for agg_key, agg_val in agg_dict.items():
|
||||
# aggregates[agg_key].append(agg_val)
|
||||
aggregates = group_by_idx[group_by_attr]['aggregates']
|
||||
aggregates.append(agg_dict)
|
||||
|
||||
indexes = group_by_idx[group_by_attr]['indexes']
|
||||
for i, index_col in enumerate(index_on):
|
||||
index_name = index_on_names[i]
|
||||
indexes[index_name][row[index_col]] = agg_dict
|
||||
|
||||
if return_index:
|
||||
return group_by_idx
|
||||
|
||||
return list(group_by_idx.values())
|
||||
|
||||
class CacheBlock:
|
||||
'''
|
||||
CacheBlock class
|
||||
|
||||
Wraps up a set of query parameters for a specific entity, and provides cached access
|
||||
to different types of "re-queries" via an associated Indexer.
|
||||
|
||||
The goal here is to help build/define entities as the possibly complex transformations
|
||||
on the base schema that they are. For example, the Note primitive (entity)
|
||||
incorporates details across `files`, `notes`, `note_conversions`, and
|
||||
`note_conversion_matter` tables (defined in a single endpoint by a Composer), often
|
||||
needs to be selected in particular ways (via an Accessor), and results stored for fast
|
||||
access later on (handled by an Indexer). This pipeline can be daunting and requires
|
||||
too many moving parts to be handled explicitly everywhere. CacheBlocks wrap up a set
|
||||
of query "preferences," exposing a simpler interface for downstream access to
|
||||
entities. It still allows for low-level control over re-grouping/indexing, raw hits to
|
||||
the actual DB, etc, but keeps things tighter and well-behaved for the Indexer.
|
||||
|
||||
You can think of these as the Indexer's "fingers"; they're deployable mini-Indexes
|
||||
that "send back" results to the class cache, which is "broadcast" to all other
|
||||
instances for use when necessary.
|
||||
|
||||
Note: Example usage
|
||||
|
||||
```py
|
||||
cb = CacheBlock()
|
||||
|
||||
# Set up cached queries with chained params or via call:
|
||||
|
||||
cb.where(t.notes.c.name=="name").group_by(t.note_conversions.c.format)
|
||||
cb() # get results
|
||||
|
||||
# - OR - # (use strings when known)
|
||||
|
||||
cb.where(t.notes.c.name=="name").group_by('format')
|
||||
cb() # get results
|
||||
|
||||
# - OR - # (use kwargs in the call; results returned right away)
|
||||
|
||||
cb(
|
||||
where=(t.notes.c.name=="name"),
|
||||
group_by='format'
|
||||
)
|
||||
```
|
||||
|
||||
'''
|
||||
def __init__(
|
||||
self,
|
||||
indexer,
|
||||
table,
|
||||
|
||||
cols = None,
|
||||
where = None,
|
||||
distinct_on = None,
|
||||
order_by = None,
|
||||
limit = 0,
|
||||
|
||||
group_by = None,
|
||||
agg_on = None,
|
||||
index_on = None,
|
||||
):
|
||||
self.indexer = indexer
|
||||
|
||||
self.query_args = {
|
||||
'table' : table,
|
||||
|
||||
'cols' : cols,
|
||||
'where' : where,
|
||||
'distinct_on' : distinct_on,
|
||||
'order_by' : order_by,
|
||||
'limit' : limit,
|
||||
|
||||
'group_by' : group_by,
|
||||
'agg_on' : agg_on,
|
||||
'index_on' : index_on,
|
||||
}
|
||||
|
||||
def _query(self, **kwargs):
|
||||
'''Make cached query with defaults, override with those provided'''
|
||||
return self.indexer.cached_query(**{
|
||||
k : (v if k not in kwargs else kwargs[k])
|
||||
for k,v in self.query_args.items()
|
||||
})
|
||||
|
||||
def __call__(self, **kwargs):
|
||||
'''
|
||||
TODO: overload this for the queries, i.e. getting keys or returning aggregates
|
||||
'''
|
||||
return self._query(**kwargs)
|
||||
|
||||
def where(self, where):
|
||||
self.query_args['where'] = where
|
||||
return self
|
||||
#return self._query(where=where)
|
||||
|
||||
def distinct_on(self, distinct_on):
|
||||
self.query_args['distinct_on'] = distinct_on
|
||||
return self
|
||||
#return self._query(distinct_on=distinct_on)
|
||||
|
||||
def order_by(self, order_by):
|
||||
self.query_args['order_by'] = order_by
|
||||
return self
|
||||
#return self._query(order_by=order_by)
|
||||
|
||||
def limit(self, limit):
|
||||
self.query_args['limit'] = limit
|
||||
return self
|
||||
|
||||
def group_by(self, group_by):
|
||||
self.query_args['group_by'] = group_by
|
||||
return self
|
||||
#return self._query(group_by=group_by)
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user