initial commit
This commit is contained in:
commit
057e20163d
1
.python-version
Normal file
1
.python-version
Normal file
@ -0,0 +1 @@
|
||||
co4
|
0
MANIFEST.in
Normal file
0
MANIFEST.in
Normal file
16
Makefile
Normal file
16
Makefile
Normal file
@ -0,0 +1,16 @@
|
||||
PYTHON=/home/smgr/.pyenv/versions/co4/bin/python
|
||||
BASH=/usr/bin/bash
|
||||
|
||||
|
||||
## ------------------ docs ------------------ ##
|
||||
docs-build:
|
||||
sphinx-apidoc --module-first -o docs/_autoref/ co4
|
||||
make -C docs/ html
|
||||
|
||||
docs-serve:
|
||||
cd docs/_build/html && python -m http.server 9090
|
||||
|
||||
docs-clean:
|
||||
make -C docs/ clean
|
||||
## ------------------------------------------ ##
|
||||
|
21
README.md
Normal file
21
README.md
Normal file
@ -0,0 +1,21 @@
|
||||
# Overview
|
||||
`co3` is a package for file conversion and associated database operations. The `CO3` base class
|
||||
provides a standard interface for performing conversions, preparing inserts, and
|
||||
interacting with database schemas that mirror the class hierarchy.
|
||||
|
||||
Simplified description of the operational model:
|
||||
|
||||
**Goal**: interact with a storage medium (database, pickled structure, VSS framework) with
|
||||
a known schema.
|
||||
|
||||
- **Accessor** to provide access to stored items
|
||||
- **Composer** to compose common access points (e.g., JOINed tables)
|
||||
- **Indexer** to index/cache access queries
|
||||
- **Manager** to manage storage state (e.g., supported inserts, database syncs)
|
||||
- **Collector** to collect data for updating storage state
|
||||
- **Database** to collect data for updating storage state
|
||||
- **Mapper** to collect data for updating storage state
|
||||
- **Relation** to collect data for updating storage state
|
||||
|
||||
**CO3** is an abstract base class that makes it easy to integrate this model with object
|
||||
hierarchies that mirror a storage schema.
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/__init__.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/__init__.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/__init__.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/accessor.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/accessor.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/accessor.py
|
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/accessors/__init__.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/accessors/fts.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/accessors/fts.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/accessors/fts.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/accessors/sql.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/accessors/sql.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/accessors/sql.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/accessors/vss.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/accessors/vss.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/accessors/vss.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/co3.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/co3.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/co3.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/collector.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/collector.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/collector.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/composer.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/composer.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/composer.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/database.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/database.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/database.py
|
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/databases/__init__.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/databases/fts.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/databases/fts.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/databases/fts.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/databases/sql.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/databases/sql.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/databases/sql.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/databases/vss.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/databases/vss.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/databases/vss.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/indexer.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/indexer.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/indexer.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/manager.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/manager.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/manager.py
|
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/managers/__init__.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/managers/fts.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/managers/fts.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/managers/fts.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/managers/sql.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/managers/sql.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/managers/sql.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/managers/vss.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/managers/vss.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/managers/vss.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/mapper.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/mapper.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/mapper.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/relation.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/relation.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/relation.py
|
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/relations/__init__.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/util/__init__.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/util/__init__.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/util/__init__.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/util/db.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/util/db.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/util/db.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/util/regex.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/util/regex.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co3/co3/util/regex.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/__init__.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/__init__.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co4/co4/__init__.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/accessor.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/accessor.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co4/co4/accessor.py
|
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co4/co4/accessors/__init__.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/accessors/fts.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/accessors/fts.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co4/co4/accessors/fts.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/accessors/table.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/accessors/table.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co4/co4/accessors/table.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/accessors/vss.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/accessors/vss.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co4/co4/accessors/vss.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/co4.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/co4.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co4/co4/co4.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/collector.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/collector.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co4/co4/collector.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/composer.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/composer.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co4/co4/composer.py
|
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co4/co4/databases/__init__.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/databases/_base.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/databases/_base.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co4/co4/databases/_base.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/databases/core.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/databases/core.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co4/co4/databases/core.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/databases/fts.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/databases/fts.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co4/co4/databases/fts.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/databases/vss.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/databases/vss.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co4/co4/databases/vss.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/indexer.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/indexer.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co4/co4/indexer.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/manager.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/manager.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co4/co4/manager.py
|
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co4/co4/managers/__init__.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/managers/core.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/managers/core.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co4/co4/managers/core.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/managers/fts.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/managers/fts.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co4/co4/managers/fts.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/managers/vss.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/managers/vss.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co4/co4/managers/vss.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/utils/db.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/utils/db.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co4/co4/utils/db.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/utils/paths.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/utils/paths.py
Symbolic link
@ -0,0 +1 @@
|
||||
/home/smgr/Documents/projects/ontolog/co4/co4/utils/paths.py
|
30
co3.egg-info/PKG-INFO
Normal file
30
co3.egg-info/PKG-INFO
Normal file
@ -0,0 +1,30 @@
|
||||
Metadata-Version: 2.1
|
||||
Name: co3
|
||||
Version: 0.1.1
|
||||
Summary: Lightweight ORM
|
||||
Author-email: Sam Griesemer <samgriesemer@gmail.com>
|
||||
Classifier: Programming Language :: Python :: 3
|
||||
Classifier: License :: OSI Approved :: MIT License
|
||||
Classifier: Operating System :: OS Independent
|
||||
Requires-Python: >=3.11
|
||||
Description-Content-Type: text/markdown
|
||||
Requires-Dist: tqdm
|
||||
|
||||
# Overview
|
||||
`co4` is a package for file conversion and associated database operations. The `CO4` base class
|
||||
provides a standard interface for performing conversions, preparing inserts, and
|
||||
interacting with database schemas that mirror the class hierarchy.
|
||||
|
||||
Simplified description of the operational model:
|
||||
|
||||
**Goal**: interact with a storage medium (database, pickled structure, VSS framework) with
|
||||
a known schema.
|
||||
|
||||
- **Accessor** to provide access to stored items
|
||||
- **Composer** to compose common access points (e.g., JOINed tables)
|
||||
- **Indexer** to index/cache access queries
|
||||
- **Manager** to manage storage state (e.g., supported inserts, database syncs)
|
||||
- **Collector** to collect data for updating storage state
|
||||
|
||||
**CO4** is an abstract base class that makes it easy to integrate this model with object
|
||||
hierarchies that mirror a storage schema.
|
34
co3.egg-info/SOURCES.txt
Normal file
34
co3.egg-info/SOURCES.txt
Normal file
@ -0,0 +1,34 @@
|
||||
MANIFEST.in
|
||||
README.md
|
||||
pyproject.toml
|
||||
co3/__init__.py
|
||||
co3/accessor.py
|
||||
co3/co3.py
|
||||
co3/collector.py
|
||||
co3/composer.py
|
||||
co3/database.py
|
||||
co3/indexer.py
|
||||
co3/manager.py
|
||||
co3/mapper.py
|
||||
co3/relation.py
|
||||
co3.egg-info/PKG-INFO
|
||||
co3.egg-info/SOURCES.txt
|
||||
co3.egg-info/dependency_links.txt
|
||||
co3.egg-info/requires.txt
|
||||
co3.egg-info/top_level.txt
|
||||
co3/accessors/__init__.py
|
||||
co3/accessors/fts.py
|
||||
co3/accessors/sql.py
|
||||
co3/accessors/vss.py
|
||||
co3/databases/__init__.py
|
||||
co3/databases/fts.py
|
||||
co3/databases/sql.py
|
||||
co3/databases/vss.py
|
||||
co3/managers/__init__.py
|
||||
co3/managers/fts.py
|
||||
co3/managers/sql.py
|
||||
co3/managers/vss.py
|
||||
co3/relations/__init__.py
|
||||
co3/util/__init__.py
|
||||
co3/util/db.py
|
||||
co3/util/regex.py
|
1
co3.egg-info/dependency_links.txt
Normal file
1
co3.egg-info/dependency_links.txt
Normal file
@ -0,0 +1 @@
|
||||
|
1
co3.egg-info/requires.txt
Normal file
1
co3.egg-info/requires.txt
Normal file
@ -0,0 +1 @@
|
||||
tqdm
|
1
co3.egg-info/top_level.txt
Normal file
1
co3.egg-info/top_level.txt
Normal file
@ -0,0 +1 @@
|
||||
co3
|
109
co3/__init__.py
Normal file
109
co3/__init__.py
Normal file
@ -0,0 +1,109 @@
|
||||
'''
|
||||
Database submodule
|
||||
|
||||
- `db`: contains SQLAlchemy-based schema definitions
|
||||
- `accessors`: convenience methods for accessing database entries
|
||||
- `populate`: convenience methods for populating database tables
|
||||
|
||||
The `accessors` and `populate` submodules are each split into `schema` and `fts` method
|
||||
groups. The former concerns methods relating to the actual database schema, the latter to
|
||||
their SQLite FTS counterparts.
|
||||
|
||||
Note: Subpackages organization
|
||||
Subpackages are broken up by inheritance. Within a given submodule, you have a
|
||||
`_base.py` file defining the base class associated with that submodule's title, along
|
||||
with concrete subclasses of that base in their own files. Deeper inheritance would
|
||||
recursively extend this structure. The `__init__.py` for a given submodule then
|
||||
exposes the concrete instances, leaving the base hidden. For example,
|
||||
|
||||
accessors/
|
||||
_base.py
|
||||
core.py
|
||||
fts.py
|
||||
|
||||
`core` and `fts` house the `CoreAccessor` and `FTSAccessor` classes, respectively,
|
||||
and are the direct subclasses of the `Accessor` parent found in the `_base`. This base
|
||||
class _could_ be placed outside of the submodule in the parent directory (imported
|
||||
with something like `from db import accessor` instead of `from db.accessor import
|
||||
_base`). This is entirely valid, but I tend to prefer when the base class is among its
|
||||
direct children, as
|
||||
|
||||
- In this case at least, the base doesn't need to be exposed
|
||||
- The base class is being stowed away under an appropriately named submodule; having a
|
||||
separate `accessor.py` and `accessors/` file/directory can feel a little cluttered.
|
||||
- It makes imports across the accessors feel standardized:
|
||||
|
||||
```py
|
||||
from localsys.db.accessors._base import Accessor
|
||||
|
||||
from localsys.db.accessors.core import CoreAccessor
|
||||
```
|
||||
|
||||
Both have the same level of nesting to reach the class.
|
||||
|
||||
Frankly, both means of organization are perfectly fine, and as far as I can tell,
|
||||
semantically sound in their own right. This particular scheme is just a preference in
|
||||
the moment, and so long as I keep things consistent, choosing one over the other
|
||||
shouldn't matter.
|
||||
|
||||
Additionally, note how `__init__.py`s are typically set up when providing wider access
|
||||
to internal modules. The `init` typically pulls out classes from sibling modules
|
||||
(i.e., files), but will import subpackages are the topmost level. For example, for the
|
||||
structure
|
||||
|
||||
```
|
||||
db/
|
||||
__init__.py
|
||||
accessors/
|
||||
__init__.py
|
||||
_base.py
|
||||
core.py
|
||||
fts.py
|
||||
```
|
||||
|
||||
we have
|
||||
|
||||
```db/__init__.py
|
||||
from localsys.db import accessors
|
||||
```
|
||||
|
||||
which just imports the subpackage `accessors`. However, within subpackage:
|
||||
|
||||
```db/accessors/__init__.py
|
||||
from localsys.db.accessors.core import CoreAccessor
|
||||
```
|
||||
|
||||
we don't just import the submodule `core`; we did into the file to grab the relevant
|
||||
class and pull it into the outer namespace. Overarching point: `__init__.py` files
|
||||
typically reach into the sibling files (submodules) and pull out classes. Given that
|
||||
this behavior is recursive, `__init__.py` then respect subpackages (nested
|
||||
directories), importing them at the top-level and expecting an internal `__init__.py`
|
||||
will have managed access appropriately.
|
||||
|
||||
Note: Organization for inheritance over composition
|
||||
At a glance, the organization of subpackages here feels like it clashes with those
|
||||
seen in `localsys.primitives`. `note_components`, for instance, houses the components
|
||||
for the outer `note` module. Contrast this with how the `core` submodule looks: it's
|
||||
composing `*/core.py` files across subpackages `accessors` and `managers`, rather than
|
||||
a single subpackage like `note`. This seems inconsistent, but the subpackages here are
|
||||
actually still organized in the same way: by inheritance. It just happens that the
|
||||
all of the note components inherit from the same base class, and are thus confined to
|
||||
a single subpackage. This aside, the subpackages themselves are still created around
|
||||
inheritance, wrapping up a base and direct subclasses.
|
||||
'''
|
||||
|
||||
from co3.accessor import Accessor
|
||||
from co3.co3 import CO3
|
||||
from co3.collector import Collector
|
||||
from co3.composer import Composer
|
||||
from co3.database import Database
|
||||
from co3.indexer import Indexer
|
||||
from co3.manager import Manager
|
||||
from co3.mapper import Mapper
|
||||
from co3.relation import Relation
|
||||
|
||||
from co3 import accessors
|
||||
from co3 import databases
|
||||
from co3 import managers
|
||||
from co3 import relations
|
||||
from co3 import util
|
BIN
co3/__pycache__/__init__.cpython-311.pyc
Normal file
BIN
co3/__pycache__/__init__.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/__init__.cpython-312.pyc
Normal file
BIN
co3/__pycache__/__init__.cpython-312.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/accessor.cpython-311.pyc
Normal file
BIN
co3/__pycache__/accessor.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/accessor.cpython-312.pyc
Normal file
BIN
co3/__pycache__/accessor.cpython-312.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/co3.cpython-312.pyc
Normal file
BIN
co3/__pycache__/co3.cpython-312.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/co4.cpython-311.pyc
Normal file
BIN
co3/__pycache__/co4.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/collector.cpython-311.pyc
Normal file
BIN
co3/__pycache__/collector.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/collector.cpython-312.pyc
Normal file
BIN
co3/__pycache__/collector.cpython-312.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/composer.cpython-311.pyc
Normal file
BIN
co3/__pycache__/composer.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/composer.cpython-312.pyc
Normal file
BIN
co3/__pycache__/composer.cpython-312.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/database.cpython-312.pyc
Normal file
BIN
co3/__pycache__/database.cpython-312.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/indexer.cpython-311.pyc
Normal file
BIN
co3/__pycache__/indexer.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/indexer.cpython-312.pyc
Normal file
BIN
co3/__pycache__/indexer.cpython-312.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/manager.cpython-311.pyc
Normal file
BIN
co3/__pycache__/manager.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/manager.cpython-312.pyc
Normal file
BIN
co3/__pycache__/manager.cpython-312.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/mapper.cpython-312.pyc
Normal file
BIN
co3/__pycache__/mapper.cpython-312.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/relation.cpython-312.pyc
Normal file
BIN
co3/__pycache__/relation.cpython-312.pyc
Normal file
Binary file not shown.
28
co3/accessor.py
Normal file
28
co3/accessor.py
Normal file
@ -0,0 +1,28 @@
|
||||
'''
|
||||
Accessor
|
||||
|
||||
Provides access to an underlying schema through a supported set of operations. Class
|
||||
methods could be general, high-level SQL wrappers, or convenience functions for common
|
||||
schema-specific queries.
|
||||
'''
|
||||
import inspect
|
||||
from pathlib import Path
|
||||
from collections import defaultdict
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
#from co3.database import Database
|
||||
|
||||
|
||||
class Accessor[D: 'Database']:
|
||||
'''
|
||||
Access wrapper class for complex queries and easy integration with Composer tables.
|
||||
Implements high-level access to things like common constrained SELECT queries.
|
||||
|
||||
Parameters:
|
||||
engine: SQLAlchemy engine to use for queries. Engine is initialized dynamically as
|
||||
a property (based on the config) if not provided
|
||||
'''
|
||||
def __init__(self, database: D):
|
||||
self.database = database
|
||||
|
24
co3/accessors/__init__.py
Normal file
24
co3/accessors/__init__.py
Normal file
@ -0,0 +1,24 @@
|
||||
'''
|
||||
Note that subclasses in this subpackage are split differently to other subpackages in the
|
||||
DB. Instead of being split by table group, corresponding to a Composer (which defines that
|
||||
table group), Accessors are split by a separate dimension: table "type". This is why we
|
||||
have a "TableAccessor" and an "FTSAccessor": the former exposes access operations
|
||||
available to generic tables, the latter to FTS tables (instead of being designed
|
||||
specifically around "core" and "fts" groups, for instance).
|
||||
|
||||
Seeing as FTS tables are "generic" tables, it seems inconsistent not to have FTSAccessor
|
||||
inherit from TableAccessor. While this would work fine, the model we're working with
|
||||
doesn't really need it; you can instead think of the FTSAccessor as defining _only_
|
||||
FTS-specific operations. Given that you have a Composer for your desired table group, you
|
||||
can then wrap it with your desired set of "access actions," available in separate Accessor
|
||||
subclasses.
|
||||
|
||||
For instance, you could wrap an FTSComposer in either a TableAccessor or FTSAccessor. The
|
||||
former will treat the tables in the composer like regular tables, exposing methods like
|
||||
`.select` and `.select_one`, whereas the latter defines FTS-specific actions like
|
||||
`.search`.
|
||||
'''
|
||||
|
||||
from co3.accessors.sql import SQLAccessor
|
||||
from co3.accessors.fts import FTSAccessor
|
||||
from co3.accessors.vss import VSSAccessor
|
BIN
co3/accessors/__pycache__/__init__.cpython-311.pyc
Normal file
BIN
co3/accessors/__pycache__/__init__.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/accessors/__pycache__/__init__.cpython-312.pyc
Normal file
BIN
co3/accessors/__pycache__/__init__.cpython-312.pyc
Normal file
Binary file not shown.
BIN
co3/accessors/__pycache__/_base.cpython-311.pyc
Normal file
BIN
co3/accessors/__pycache__/_base.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/accessors/__pycache__/fts.cpython-311.pyc
Normal file
BIN
co3/accessors/__pycache__/fts.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/accessors/__pycache__/fts.cpython-312.pyc
Normal file
BIN
co3/accessors/__pycache__/fts.cpython-312.pyc
Normal file
Binary file not shown.
BIN
co3/accessors/__pycache__/sql.cpython-312.pyc
Normal file
BIN
co3/accessors/__pycache__/sql.cpython-312.pyc
Normal file
Binary file not shown.
BIN
co3/accessors/__pycache__/table.cpython-311.pyc
Normal file
BIN
co3/accessors/__pycache__/table.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/accessors/__pycache__/vss.cpython-311.pyc
Normal file
BIN
co3/accessors/__pycache__/vss.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/accessors/__pycache__/vss.cpython-312.pyc
Normal file
BIN
co3/accessors/__pycache__/vss.cpython-312.pyc
Normal file
Binary file not shown.
147
co3/accessors/fts.py
Normal file
147
co3/accessors/fts.py
Normal file
@ -0,0 +1,147 @@
|
||||
import sqlalchemy as sa
|
||||
|
||||
from co3 import util
|
||||
from co3.accessor import Accessor
|
||||
|
||||
|
||||
class FTSAccessor(Accessor):
|
||||
def search(
|
||||
self,
|
||||
table_name : str,
|
||||
select_cols : str | list | None = '*',
|
||||
search_cols : str | None = None,
|
||||
q : str | None = None,
|
||||
colq : str | None = None,
|
||||
snip_col : int | None = 0,
|
||||
hl_col : int | None = 0,
|
||||
limit : int | None = 100,
|
||||
snip : int | None = 64,
|
||||
tokenizer : str | None = 'unicode61',
|
||||
group_by : str | None = None,
|
||||
agg_cols : list | None = None,
|
||||
wherein_dict: dict | None = None,
|
||||
unique_on : dict | None = None,
|
||||
):
|
||||
'''
|
||||
Execute a search query against an indexed FTS table for specific primitives. This
|
||||
method is mostly a generic FTS handler, capable of handling queries to any available
|
||||
FTS table with a matching naming scheme (`fts_<type>_<tokenizer>`). The current
|
||||
intention is support all tokenizers, for file, note, block, and link primitives.
|
||||
|
||||
Search results include all FTS table columns, as well as SQLite-supported `snippet`s
|
||||
and `highlight`s for matches. Matches are filtered and ordered by SQLite's
|
||||
`MATCH`-based score for the text & column queries. Results are (a list of) fully
|
||||
expanded dictionaries housing column-value pairs.
|
||||
|
||||
Note:
|
||||
GROUP BY cannot be paired with SQLITE FTS extensions; thus, we perform manual
|
||||
group checks on the result set in Python before response
|
||||
|
||||
Analysis:
|
||||
The returned JSON structure has been (loosely) optimized for speed on the client
|
||||
side. Fully forming individual dictionary based responses saves time in
|
||||
Javascript, as the JSON parser is expected to be able to create the objects
|
||||
faster than post-hoc construction in JS. This return structure was compared
|
||||
against returning an array of arrays (all ordered in the same fashion), along with
|
||||
a column list to be associated with each of the result values. While this saves
|
||||
some size on the payload (the same column names don't have to be transmitted for
|
||||
each result), the size of the returned content massively outweighs the
|
||||
predominantly short column names. The only way this structure would be viable is
|
||||
if a significant amount was saved on transfer compared to the slow down in JS
|
||||
object construction; this is (almost) never the case.
|
||||
|
||||
Parameters:
|
||||
table_name : name of FTS table to search
|
||||
search_cols : space separated string of columns to use for primary queries
|
||||
q : search query
|
||||
colq : column constraint string; must conform to SQLite standards (e.g.,
|
||||
`<col>:<text>`
|
||||
snip_col : table column to use for snippets (default: 1; source content column)
|
||||
hl_col : table column to use for highlights (default: 2; format column, applied
|
||||
to HTML targets)
|
||||
limit : maximum number of results to return in the SQL query
|
||||
snip : snippet length (max: 64)
|
||||
tokenizer : tokenizer to use (assumes relevant FTS table has been built)
|
||||
...
|
||||
wherein_dict: (col-name, value-list) pairs to match result set against, via
|
||||
WHERE ... IN clauses
|
||||
|
||||
Returns:
|
||||
Dictionary with search results (list of column indexed dictionaries) and relevant
|
||||
metadata.
|
||||
'''
|
||||
search_q = ''
|
||||
|
||||
if type(select_cols) is list:
|
||||
select_cols = ', '.join(select_cols)
|
||||
|
||||
# construct main search query
|
||||
if search_cols and q:
|
||||
search_q = f'{{{search_cols}}} : {q}'
|
||||
|
||||
# add auxiliary search constraints
|
||||
if colq:
|
||||
search_q += f' {colq}'
|
||||
|
||||
search_q = search_q.strip()
|
||||
|
||||
hl_start = '<b><mark>'
|
||||
hl_end = '</mark></b>'
|
||||
|
||||
fts_table_name = f'{table_name}_fts_{tokenizer}'
|
||||
|
||||
sql = f'''
|
||||
SELECT
|
||||
{select_cols},
|
||||
snippet({fts_table_name}, {snip_col}, '{hl_start}', '{hl_end}', '...', {snip}) AS snippet,
|
||||
highlight({fts_table_name}, {hl_col}, '{hl_start}', '{hl_end}') AS highlight
|
||||
FROM {fts_table_name}
|
||||
'''
|
||||
|
||||
where_clauses = []
|
||||
if search_q:
|
||||
where_clauses.append(f"{fts_table_name} MATCH '{search_q}'\n")
|
||||
|
||||
if wherein_dict:
|
||||
for col, vals in wherein_dict.items():
|
||||
where_clauses.append(f'{col} IN {tuple(vals)}\n')
|
||||
|
||||
if where_clauses:
|
||||
where_str = " AND ".join(where_clauses)
|
||||
sql += f'WHERE {where_str}'
|
||||
|
||||
sql += f'ORDER BY rank LIMIT {limit};'
|
||||
|
||||
row_dicts, cols = self.raw_select(sql, include_cols=True)
|
||||
|
||||
if group_by is None:
|
||||
return row_dicts, cols
|
||||
|
||||
if agg_cols is None:
|
||||
agg_cols = []
|
||||
|
||||
# "group by" block ID and wrangle the links into a list
|
||||
# note we can't perform native GROUP BYs with FTS results
|
||||
group_by_idx = {}
|
||||
for row in row_dicts:
|
||||
group_by_attr = row.get(group_by)
|
||||
|
||||
# add new entries
|
||||
for agg_col in agg_cols:
|
||||
row[f'{agg_col}_agg'] = set()
|
||||
|
||||
if group_by_attr is None:
|
||||
continue
|
||||
|
||||
if group_by_attr not in group_by_idx:
|
||||
group_by_idx[group_by_attr] = row
|
||||
|
||||
for agg_col in agg_cols:
|
||||
if agg_col in row:
|
||||
group_by_idx[group_by_attr][f'{agg_col}_agg'].add(row[agg_col])
|
||||
|
||||
return {
|
||||
'results' : group_by_idx,
|
||||
'columns' : cols,
|
||||
'num_results' : len(row_dicts),
|
||||
}
|
96
co3/accessors/sql.py
Normal file
96
co3/accessors/sql.py
Normal file
@ -0,0 +1,96 @@
|
||||
from pathlib import Path
|
||||
from collections.abc import Iterable
|
||||
import inspect
|
||||
from functools import cache
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from co3 import util
|
||||
from co3.accessor import Accessor
|
||||
from co3.relation import Relation
|
||||
|
||||
#from co3.databases.sql import RelationalDatabase, TabularDatabase, SQLDatabase
|
||||
from co3.relations import TabularRelation, SQLTable
|
||||
|
||||
|
||||
class RelationalAccessor[D: 'RelationalDatabase', R: Relation](Accessor[D]):
|
||||
pass
|
||||
|
||||
|
||||
class TabularAccessor[D: 'TabularDatabase', R: TabularRelation](RelationalAccessor[D, R]):
|
||||
pass
|
||||
|
||||
|
||||
class SQLAccessor(TabularAccessor['SQLDatabase', SQLTable]):
|
||||
def raw_select(
|
||||
self,
|
||||
sql,
|
||||
bind_params=None,
|
||||
mappings=False,
|
||||
include_cols=False,
|
||||
):
|
||||
res_method = utils.db.sa_exec_dicts
|
||||
if mappings:
|
||||
res_method = utils.db.sa_exec_mappings
|
||||
|
||||
return res_method(self.database.engine, sa.text(sql), bind_params=bind_params, include_cols=include_cols)
|
||||
|
||||
def select(
|
||||
self,
|
||||
table: sa.Table | sa.Subquery | sa.Join,
|
||||
cols = None,
|
||||
where = None,
|
||||
distinct_on = None,
|
||||
order_by = None,
|
||||
limit = 0,
|
||||
mappings = False,
|
||||
include_cols = False,
|
||||
):
|
||||
'''
|
||||
Perform a SELECT query against the provided table-like object (see
|
||||
`check_table()`).
|
||||
|
||||
Deprecated: String aliases
|
||||
String aliases for tables are no longer supported. This method no longer checks
|
||||
against any specific schema table-maps or Composers. Instead, this should be
|
||||
done outside the Accessor.
|
||||
|
||||
Parameters:
|
||||
group_by: list of columns to group by; for now serves as a proxy for DISTINCT
|
||||
(no aggregation methods accepted)
|
||||
order_by: column to order results by (can use <col>.desc() to order
|
||||
by descending)
|
||||
'''
|
||||
if where is None:
|
||||
where = sa.true()
|
||||
|
||||
res_method = utils.db.sa_exec_dicts
|
||||
if mappings:
|
||||
res_method = utils.db.sa_exec_mappings
|
||||
|
||||
stmt = sa.select(table).where(where)
|
||||
if cols is not None:
|
||||
stmt = sa.select(*cols).select_from(table).where(where)
|
||||
|
||||
if distinct_on is not None:
|
||||
stmt = stmt.group_by(distinct_on)
|
||||
|
||||
if order_by is not None:
|
||||
stmt = stmt.order_by(order_by)
|
||||
|
||||
if limit > 0:
|
||||
stmt = stmt.limit(limit)
|
||||
|
||||
return res_method(self.engine, stmt, include_cols=include_cols)
|
||||
|
||||
def select_one(self, table, cols=None, where=None, mappings=False, include_cols=False):
|
||||
res = self.select(table, cols, where, mappings, include_cols, limit=1)
|
||||
|
||||
if include_cols and len(res[0]) > 0:
|
||||
return res[0][0], res[1]
|
||||
|
||||
if len(res) > 0:
|
||||
return res[0]
|
||||
|
||||
return None
|
||||
|
100
co3/accessors/vss.py
Normal file
100
co3/accessors/vss.py
Normal file
@ -0,0 +1,100 @@
|
||||
import pickle
|
||||
import logging
|
||||
from pathlib import Path
|
||||
import time
|
||||
|
||||
import sqlalchemy as sa
|
||||
#from sentence_transformers import SentenceTransformer, util
|
||||
|
||||
from co3.accessor import Accessor
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class VSSAccessor(Accessor):
|
||||
def __init__(self, cache_path):
|
||||
super().__init__()
|
||||
|
||||
self._model = None
|
||||
self._embeddings = None
|
||||
|
||||
self._embedding_size = 384
|
||||
self.embedding_path = Path(cache_path, 'embeddings.pkl')
|
||||
|
||||
def write_embeddings(self, embedding_dict):
|
||||
self.embedding_path.write_bytes(pickle.dumps(embedding_dict))
|
||||
|
||||
def read_embeddings(self):
|
||||
if not self.embedding_path.exists():
|
||||
logger.warning(
|
||||
f'Attempting to access non-existent embeddings at {self.embedding_path}'
|
||||
)
|
||||
return None
|
||||
|
||||
return pickle.loads(self.embedding_path.read_bytes())
|
||||
|
||||
@property
|
||||
def model(self):
|
||||
if self._model is None:
|
||||
# model trained with 128 token seqs
|
||||
self._model = SentenceTransformer('sentence-transformers/all-MiniLM-L12-v2')
|
||||
return self._model
|
||||
|
||||
@property
|
||||
def embeddings(self):
|
||||
if self._embeddings is None:
|
||||
self._embeddings = self.read_embeddings()
|
||||
return self._embeddings
|
||||
|
||||
def embed_chunks(self, chunks, batch_size=64, show_prog=True):
|
||||
return self.model.encode(
|
||||
chunks,
|
||||
batch_size = batch_size,
|
||||
show_progress_bar = show_prog,
|
||||
convert_to_numpy = True,
|
||||
normalize_embeddings = True
|
||||
)
|
||||
|
||||
def search(
|
||||
self,
|
||||
query : str,
|
||||
index_name : str,
|
||||
limit : int = 10,
|
||||
score_threshold = 0.5,
|
||||
):
|
||||
'''
|
||||
Parameters:
|
||||
index_name: one of ['chunks','blocks','notes']
|
||||
'''
|
||||
if not query:
|
||||
return None
|
||||
|
||||
if index_name not in self.embeddings:
|
||||
logger.warning(
|
||||
f'Index "{index_name}" does not exist'
|
||||
)
|
||||
return None
|
||||
|
||||
start = time.time()
|
||||
|
||||
query_embedding = self.embed_chunks(query, show_prog=False)
|
||||
index_ids, index_embeddings, index_items = self.embeddings[index_name]
|
||||
|
||||
hits = util.semantic_search(
|
||||
query_embedding,
|
||||
index_embeddings,
|
||||
top_k=limit,
|
||||
score_function=util.dot_score
|
||||
)[0]
|
||||
|
||||
hits = [hit for hit in hits if hit['score'] >= score_threshold]
|
||||
|
||||
for hit in hits:
|
||||
idx = hit['corpus_id']
|
||||
hit['group_name'] = index_ids[idx]
|
||||
hit['item'] = index_items[idx]
|
||||
|
||||
logger.info(f'{len(hits)} hits in {time.time()-start:.2f}s')
|
||||
|
||||
return hits
|
||||
|
106
co3/co3.py
Normal file
106
co3/co3.py
Normal file
@ -0,0 +1,106 @@
|
||||
'''
|
||||
CO4
|
||||
|
||||
CO4 is an abstract base class for scaffolding object hierarchies and managing operations
|
||||
with associated database schemas. It facilitates something like a "lightweight ORM" for
|
||||
classes/tables/states with fixed transformations of interest. The canonical use case is
|
||||
managing hierarchical document relations, format conversions, and syntactical components.
|
||||
'''
|
||||
|
||||
import inspect
|
||||
import logging
|
||||
from functools import wraps, partial
|
||||
|
||||
#from localsys.db.schema import tables
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
#def register_format(_format):
|
||||
# def decorator(func):
|
||||
# self.collate.format_map[_format] = func
|
||||
#
|
||||
# @wraps(func)
|
||||
# def register(*args, **kwargs):
|
||||
# return func(*args, **kwargs)
|
||||
#
|
||||
# return register
|
||||
# return decorator
|
||||
|
||||
def collate(action_key, action_groups=None):
|
||||
def decorator(func):
|
||||
if action_groups is None:
|
||||
action_groups = [None]
|
||||
func._action_data = (action_key, action_groups)
|
||||
return func
|
||||
return decorator
|
||||
|
||||
class FormatRegistryMeta(type):
|
||||
def __new__(cls, name, bases, attrs):
|
||||
action_registry = {}
|
||||
|
||||
# add registered superclass methods; iterate over bases (usually just one), then
|
||||
# that base's chain down (reversed), then methods from each subclass
|
||||
for base in bases:
|
||||
for _class in reversed(base.mro()):
|
||||
methods = inspect.getmembers(_class, predicate=inspect.isfunction)
|
||||
for _, method in methods:
|
||||
if hasattr(method, '_action_data'):
|
||||
action_key, action_groups = method._action_data
|
||||
action_registry[action_key] = (method, action_groups)
|
||||
|
||||
# add final registered formats for the current class, overwriting any found in
|
||||
# superclass chain
|
||||
for attr_name, attr_value in attrs.items():
|
||||
if hasattr(attr_value, '_action_data'):
|
||||
action_key, action_groups = attr_value._action_data
|
||||
action_registry[action_key] = (method, action_groups)
|
||||
|
||||
attrs['action_map'] = action_registry
|
||||
|
||||
return super().__new__(cls, name, bases, attrs)
|
||||
|
||||
class CO3(metaclass=FormatRegistryMeta):
|
||||
'''
|
||||
CO3: COllate, COllect, COmpose - conversion & DB insertion base
|
||||
|
||||
- Collate: organize and transform conversion outputs, possibly across class components
|
||||
- Collect: gather core attributes, conversion data, and subcomponents for DB insertion
|
||||
- Compose: construct object-associated DB table references through the class hierarchy
|
||||
|
||||
Note: on action groups
|
||||
Group keys are simply named collections to make it easy for storage components to
|
||||
be attached to action subsets. They do _not_ augment the action registration
|
||||
namespace, meaning the action key should still be unique; the group key is purely
|
||||
auxiliary.
|
||||
|
||||
Action methods can also be attached to several groups, in case there is
|
||||
overlapping utility within or across schemas or storage media. In this case, it
|
||||
becomes particularly critical to ensure registered `collate` methods really are
|
||||
just "gathering results" from possibly heavy-duty operations, rather than
|
||||
performing them when called, so as to reduce wasted computation.
|
||||
'''
|
||||
@property
|
||||
def attributes(self):
|
||||
'''
|
||||
Method to define how a subtype's inserts should be handled under `collect` for
|
||||
canonical attributes, i.e., inserts to the type's table.
|
||||
'''
|
||||
return vars(self)
|
||||
|
||||
@property
|
||||
def components(self):
|
||||
'''
|
||||
Method to define how a subtype's inserts should be handled under `collect` for
|
||||
constituent components that need handling.
|
||||
'''
|
||||
return []
|
||||
|
||||
def collate(self, action_key, *action_args, **action_kwargs):
|
||||
if action_key not in self.action_map:
|
||||
logger.debug(f'Collation for {action_key} not supported')
|
||||
return None
|
||||
else:
|
||||
return self.action_map[action_key](self)
|
||||
|
||||
|
109
co3/collector.py
Normal file
109
co3/collector.py
Normal file
@ -0,0 +1,109 @@
|
||||
'''
|
||||
Defines the Collector base class.
|
||||
|
||||
This module is the critical "middleware" connecting the primitive object definitions and
|
||||
their representations in the database. It operates with full knowledge of how both are
|
||||
defined, and abstracts away both the prep work for DB insertions as well as updates
|
||||
trickling down the primitive hierarchy.
|
||||
|
||||
The `src` format target is re-used for both canonical tables/primitives, as well as
|
||||
<prim>_conversion_matter tables in tables/conversions under the `src` format. The latter
|
||||
is meant to extend those attributes that are format-specific (i.e., would change when, say,
|
||||
converting to `html5`), and thus need to be broken across the format dimension.
|
||||
|
||||
Note:
|
||||
Despite the structure of the database module, this class does not currently inherit
|
||||
from a super class in localsys.db (like the accessors and managers, for instance).
|
||||
This will likely ultimately be the model that's embraced, but until FTS (or other
|
||||
groups) need a collector, this will be remain an independent class. It is, however,
|
||||
named like a concrete subclass, taking on the "Core" prefix.
|
||||
'''
|
||||
|
||||
from pathlib import Path
|
||||
from collections import defaultdict
|
||||
import logging
|
||||
import importlib
|
||||
import subprocess
|
||||
from uuid import uuid4
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from co3 import util
|
||||
#from localsys.db.schema import tables
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class Collector:
|
||||
def __init__(self):
|
||||
self._inserts = defaultdict(lambda: defaultdict(list))
|
||||
|
||||
@property
|
||||
def inserts(self):
|
||||
return self._inserts_from_receipts()
|
||||
|
||||
def _inserts_from_receipts(self, receipts=None, pop=False):
|
||||
inserts = defaultdict(list)
|
||||
|
||||
if receipts is None:
|
||||
receipts = list(self._inserts.keys())
|
||||
|
||||
for receipt in receipts:
|
||||
if pop: insert_dict = self._inserts.pop(receipt, {})
|
||||
else: insert_dict = self._inserts[receipt]
|
||||
|
||||
for table, insert_list in insert_dict.items():
|
||||
inserts[table].extend(insert_list)
|
||||
|
||||
return dict(inserts)
|
||||
|
||||
def _reset_session(self):
|
||||
self._inserts = defaultdict(lambda: defaultdict(list))
|
||||
|
||||
def _generate_unique_receipt(self):
|
||||
return str(uuid4())
|
||||
|
||||
def add_insert(self, table_name, insert_dict, receipts=None):
|
||||
'''
|
||||
TODO: formalize table_name mapping; at class level provide a `table_map`, or provide
|
||||
the table object itself to this method
|
||||
'''
|
||||
if table_name not in tables.table_map:
|
||||
#logger.debug(f'Inserts provided for non-existent table {table_name}')
|
||||
return None
|
||||
|
||||
receipt = self._generate_unique_receipt()
|
||||
|
||||
self._inserts[receipt][table_name].append(
|
||||
utils.db.prepare_insert(
|
||||
tables.table_map[table_name],
|
||||
insert_dict
|
||||
)
|
||||
)
|
||||
|
||||
if receipts is not None:
|
||||
receipts.append(receipt)
|
||||
|
||||
return receipt
|
||||
|
||||
def collect_inserts(self, receipts=None):
|
||||
'''
|
||||
Collect insert-ready dictionaries for the core primitive schema. This method is
|
||||
effectively a light wrapper around the File and Note-based collection logic
|
||||
elsewhere in the class.
|
||||
|
||||
The overall collection scheme embraces a session-like sequential update model to
|
||||
an internal insert tracker. The sequence of insert methods is ordered according to
|
||||
the schema hierarchy, and higher level inserts dictate the scope for lower level
|
||||
inserts (all methods check and populate the same `inserts` dictionary). Calling
|
||||
this method flushes any existing inserts, ensuring a re-scan takes place across
|
||||
calls (or "sessions").
|
||||
|
||||
Parameters:
|
||||
skip_updated: whether to ignore primitives with existing up-to-date
|
||||
database entries
|
||||
|
||||
Returns:
|
||||
Table name-indexed dictionary of insert lists (of column name-indexed dicts)
|
||||
'''
|
||||
return self._inserts_from_receipts(receipts, pop=True)
|
89
co3/composer.py
Normal file
89
co3/composer.py
Normal file
@ -0,0 +1,89 @@
|
||||
'''
|
||||
Composer
|
||||
|
||||
Base for manually defining table compositions outside those natural to the schema
|
||||
hierarchy (i.e., constructable by a `CO4.compose()` call).
|
||||
|
||||
Example: suppose we have a simple object hierarchy A(CO4) -> B -> C. C's in-built
|
||||
`compose()` method may not always be desirable when constructing composite tables and
|
||||
running related queries. In this case, a custom Composer can be used to make needed
|
||||
composite tables easier to reference; in the case below, we define the "BC" composite
|
||||
table.
|
||||
|
||||
```
|
||||
class ExampleComposer(Composer):
|
||||
|
||||
@register_table
|
||||
def BC(self):
|
||||
full_B = B.compose(full=True)
|
||||
full_C = C.compose(full=True)
|
||||
|
||||
return full_B.join(
|
||||
full_C,
|
||||
full_B.c.name == full_C.c.name, # TODO: is this fine? or do we need base table refs
|
||||
outer=True
|
||||
)
|
||||
'''
|
||||
from pathlib import Path
|
||||
|
||||
from co3.mapper import Mapper
|
||||
|
||||
|
||||
def register_table(table_name=None):
|
||||
'''
|
||||
Registry decorator for defined composer classes. Decorating a class method simply
|
||||
attaches a `table_name` attribute to it, setting it to either a provided value or the
|
||||
name of the method itself. Methods with a `table_name` attribute are later swept up at
|
||||
the class level and placed in the `table_map`.
|
||||
'''
|
||||
def decorator(func):
|
||||
if table_name is None:
|
||||
table_name = func.__name__
|
||||
func.table_name = table_name
|
||||
return func
|
||||
return decorator
|
||||
|
||||
class Composer[M: Mapper]:
|
||||
'''
|
||||
Base composer wrapper for table groupings.
|
||||
|
||||
The schema is centered around a connected group of tables (via foreign keys). Thus,
|
||||
most operations need to be coordinated across tables. The `accessors` submodules
|
||||
are mostly intended to provide a "secondary layer" over the base set of tables in the
|
||||
schema, exposing common higher level table compositions (i.e., chained JOINs). See
|
||||
concrete instances (e.g., CoreAccess, FTSAccessor) for actual implementations these
|
||||
tables; the base class does not expose
|
||||
|
||||
Tables in subclasses are registered with the `register_table` decorator, automatically
|
||||
indexing them under the provided name and making them available via the `table_map`.
|
||||
'''
|
||||
def __init__(self):
|
||||
self._set_tables()
|
||||
|
||||
def _set_tables(self):
|
||||
'''
|
||||
Skip properties (so appropriate delays can be used), and
|
||||
|
||||
Set the table registry at the class level. This only takes place during the first
|
||||
instantiation of the class, and makes it possible to definitively tie methods to
|
||||
composed tables during lookup with `get_table()`.
|
||||
'''
|
||||
cls = self.__class__
|
||||
|
||||
# in case the class has already be instantiated
|
||||
if hasattr(cls, 'table_map'): return
|
||||
|
||||
table_map = {}
|
||||
for key, value in cls.__dict__.items():
|
||||
if isinstance(value, property):
|
||||
continue # Skip properties
|
||||
if callable(value) and hasattr(value, 'table_name'):
|
||||
table_map[value.table_name] = value(self)
|
||||
|
||||
cls.table_map = table_map
|
||||
|
||||
def get_table(self, table_name):
|
||||
'''
|
||||
Retrieve the named table composition, if defined.
|
||||
'''
|
||||
return self.table_map.get(table_name)
|
87
co3/database.py
Normal file
87
co3/database.py
Normal file
@ -0,0 +1,87 @@
|
||||
'''
|
||||
Database
|
||||
|
||||
Central object for defining storage protocol-specific interfaces. The database wraps up
|
||||
central items for interacting with database resources, namely the Accessor and Manager
|
||||
objects.
|
||||
|
||||
The Database type hierarchy attempts to be exceedingly general; SQL-derivatives should
|
||||
subclass from the RelationalDatabase subtype, for example, which itself becomes a new
|
||||
generic via type dependence on Relation.
|
||||
'''
|
||||
|
||||
import logging
|
||||
from typing import Self
|
||||
|
||||
from co3.accessor import Accessor
|
||||
from co3.composer import Composer
|
||||
from co3.manager import Manager
|
||||
from co3.indexer import Indexer
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Database:
|
||||
accessor: type[Accessor[Self]] = Accessor
|
||||
manager: type[Manager[Self]] = Manager
|
||||
|
||||
def __init__(self, resource):
|
||||
'''
|
||||
Variables:
|
||||
_local_cache: a database-local property store for ad-hoc CacheBlock-esque
|
||||
methods, that are nevertheless _not_ query/group-by responses to
|
||||
pass on to the Indexer. Dependent properties should write to the
|
||||
this cache and check for existence of stored results; the cache
|
||||
state must be managed globally.
|
||||
'''
|
||||
self.resource = resource
|
||||
|
||||
self._access = self.accessor(self)
|
||||
self._manage = self.manager(self)
|
||||
|
||||
self._index = Indexer(self._access)
|
||||
self._local_cache = {}
|
||||
|
||||
self.reset_cache = False
|
||||
|
||||
@property
|
||||
def engine(self):
|
||||
'''
|
||||
Database property to provide a singleton engine for DB interaction, initializing
|
||||
the database if it doesn't already exist.
|
||||
|
||||
TODO: figure out thread safety across engines and/or connection. Any issue with
|
||||
hanging on to the same engine instance for the Database instance?
|
||||
'''
|
||||
raise NotImplementedError
|
||||
|
||||
def connect(self):
|
||||
self.engine.connect()
|
||||
|
||||
@property
|
||||
def access(self):
|
||||
return self._access
|
||||
|
||||
@property
|
||||
def compose(self):
|
||||
return self._compose
|
||||
|
||||
@property
|
||||
def index(self):
|
||||
if self.reset_cache:
|
||||
self._index.cache_clear()
|
||||
self.reset_cache = False
|
||||
return self._index
|
||||
|
||||
@property
|
||||
def manage(self):
|
||||
'''
|
||||
Accessing `.manage` queues a cache clear on the external index, as well wipes the
|
||||
local index.
|
||||
'''
|
||||
self.reset_cache = True
|
||||
self._local_cache = {}
|
||||
return self._manage
|
||||
|
||||
def populate_indexes(self): pass
|
||||
|
3
co3/databases/__init__.py
Normal file
3
co3/databases/__init__.py
Normal file
@ -0,0 +1,3 @@
|
||||
from co3.databases.sql import *
|
||||
from co3.databases.fts import FTSDatabase
|
||||
from co3.databases.vss import VSSDatabase
|
BIN
co3/databases/__pycache__/__init__.cpython-311.pyc
Normal file
BIN
co3/databases/__pycache__/__init__.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/databases/__pycache__/__init__.cpython-312.pyc
Normal file
BIN
co3/databases/__pycache__/__init__.cpython-312.pyc
Normal file
Binary file not shown.
BIN
co3/databases/__pycache__/_base.cpython-311.pyc
Normal file
BIN
co3/databases/__pycache__/_base.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/databases/__pycache__/core.cpython-311.pyc
Normal file
BIN
co3/databases/__pycache__/core.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/databases/__pycache__/fts.cpython-311.pyc
Normal file
BIN
co3/databases/__pycache__/fts.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/databases/__pycache__/fts.cpython-312.pyc
Normal file
BIN
co3/databases/__pycache__/fts.cpython-312.pyc
Normal file
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user