initial commit
This commit is contained in:
commit
057e20163d
1
.python-version
Normal file
1
.python-version
Normal file
@ -0,0 +1 @@
|
|||||||
|
co4
|
0
MANIFEST.in
Normal file
0
MANIFEST.in
Normal file
16
Makefile
Normal file
16
Makefile
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
PYTHON=/home/smgr/.pyenv/versions/co4/bin/python
|
||||||
|
BASH=/usr/bin/bash
|
||||||
|
|
||||||
|
|
||||||
|
## ------------------ docs ------------------ ##
|
||||||
|
docs-build:
|
||||||
|
sphinx-apidoc --module-first -o docs/_autoref/ co4
|
||||||
|
make -C docs/ html
|
||||||
|
|
||||||
|
docs-serve:
|
||||||
|
cd docs/_build/html && python -m http.server 9090
|
||||||
|
|
||||||
|
docs-clean:
|
||||||
|
make -C docs/ clean
|
||||||
|
## ------------------------------------------ ##
|
||||||
|
|
21
README.md
Normal file
21
README.md
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
# Overview
|
||||||
|
`co3` is a package for file conversion and associated database operations. The `CO3` base class
|
||||||
|
provides a standard interface for performing conversions, preparing inserts, and
|
||||||
|
interacting with database schemas that mirror the class hierarchy.
|
||||||
|
|
||||||
|
Simplified description of the operational model:
|
||||||
|
|
||||||
|
**Goal**: interact with a storage medium (database, pickled structure, VSS framework) with
|
||||||
|
a known schema.
|
||||||
|
|
||||||
|
- **Accessor** to provide access to stored items
|
||||||
|
- **Composer** to compose common access points (e.g., JOINed tables)
|
||||||
|
- **Indexer** to index/cache access queries
|
||||||
|
- **Manager** to manage storage state (e.g., supported inserts, database syncs)
|
||||||
|
- **Collector** to collect data for updating storage state
|
||||||
|
- **Database** to collect data for updating storage state
|
||||||
|
- **Mapper** to collect data for updating storage state
|
||||||
|
- **Relation** to collect data for updating storage state
|
||||||
|
|
||||||
|
**CO3** is an abstract base class that makes it easy to integrate this model with object
|
||||||
|
hierarchies that mirror a storage schema.
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/__init__.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/__init__.py
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co3/co3/__init__.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/accessor.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/accessor.py
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co3/co3/accessor.py
|
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co3/co3/accessors/__init__.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/accessors/fts.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/accessors/fts.py
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co3/co3/accessors/fts.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/accessors/sql.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/accessors/sql.py
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co3/co3/accessors/sql.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/accessors/vss.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/accessors/vss.py
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co3/co3/accessors/vss.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/co3.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/co3.py
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co3/co3/co3.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/collector.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/collector.py
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co3/co3/collector.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/composer.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/composer.py
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co3/co3/composer.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/database.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/database.py
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co3/co3/database.py
|
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co3/co3/databases/__init__.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/databases/fts.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/databases/fts.py
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co3/co3/databases/fts.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/databases/sql.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/databases/sql.py
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co3/co3/databases/sql.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/databases/vss.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/databases/vss.py
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co3/co3/databases/vss.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/indexer.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/indexer.py
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co3/co3/indexer.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/manager.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/manager.py
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co3/co3/manager.py
|
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co3/co3/managers/__init__.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/managers/fts.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/managers/fts.py
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co3/co3/managers/fts.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/managers/sql.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/managers/sql.py
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co3/co3/managers/sql.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/managers/vss.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/managers/vss.py
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co3/co3/managers/vss.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/mapper.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/mapper.py
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co3/co3/mapper.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/relation.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/relation.py
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co3/co3/relation.py
|
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co3/co3/relations/__init__.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/util/__init__.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/util/__init__.py
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co3/co3/util/__init__.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/util/db.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/util/db.py
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co3/co3/util/db.py
|
1
build/__editable__.co3-0.1.1-py3-none-any/co3/util/regex.py
Symbolic link
1
build/__editable__.co3-0.1.1-py3-none-any/co3/util/regex.py
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co3/co3/util/regex.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/__init__.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/__init__.py
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co4/co4/__init__.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/accessor.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/accessor.py
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co4/co4/accessor.py
|
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co4/co4/accessors/__init__.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/accessors/fts.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/accessors/fts.py
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co4/co4/accessors/fts.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/accessors/table.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/accessors/table.py
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co4/co4/accessors/table.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/accessors/vss.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/accessors/vss.py
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co4/co4/accessors/vss.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/co4.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/co4.py
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co4/co4/co4.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/collector.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/collector.py
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co4/co4/collector.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/composer.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/composer.py
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co4/co4/composer.py
|
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co4/co4/databases/__init__.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/databases/_base.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/databases/_base.py
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co4/co4/databases/_base.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/databases/core.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/databases/core.py
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co4/co4/databases/core.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/databases/fts.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/databases/fts.py
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co4/co4/databases/fts.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/databases/vss.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/databases/vss.py
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co4/co4/databases/vss.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/indexer.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/indexer.py
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co4/co4/indexer.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/manager.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/manager.py
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co4/co4/manager.py
|
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co4/co4/managers/__init__.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/managers/core.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/managers/core.py
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co4/co4/managers/core.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/managers/fts.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/managers/fts.py
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co4/co4/managers/fts.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/managers/vss.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/managers/vss.py
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co4/co4/managers/vss.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/utils/db.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/utils/db.py
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co4/co4/utils/db.py
|
1
build/__editable__.co4-0.1.1-py3-none-any/co4/utils/paths.py
Symbolic link
1
build/__editable__.co4-0.1.1-py3-none-any/co4/utils/paths.py
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
/home/smgr/Documents/projects/ontolog/co4/co4/utils/paths.py
|
30
co3.egg-info/PKG-INFO
Normal file
30
co3.egg-info/PKG-INFO
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
Metadata-Version: 2.1
|
||||||
|
Name: co3
|
||||||
|
Version: 0.1.1
|
||||||
|
Summary: Lightweight ORM
|
||||||
|
Author-email: Sam Griesemer <samgriesemer@gmail.com>
|
||||||
|
Classifier: Programming Language :: Python :: 3
|
||||||
|
Classifier: License :: OSI Approved :: MIT License
|
||||||
|
Classifier: Operating System :: OS Independent
|
||||||
|
Requires-Python: >=3.11
|
||||||
|
Description-Content-Type: text/markdown
|
||||||
|
Requires-Dist: tqdm
|
||||||
|
|
||||||
|
# Overview
|
||||||
|
`co4` is a package for file conversion and associated database operations. The `CO4` base class
|
||||||
|
provides a standard interface for performing conversions, preparing inserts, and
|
||||||
|
interacting with database schemas that mirror the class hierarchy.
|
||||||
|
|
||||||
|
Simplified description of the operational model:
|
||||||
|
|
||||||
|
**Goal**: interact with a storage medium (database, pickled structure, VSS framework) with
|
||||||
|
a known schema.
|
||||||
|
|
||||||
|
- **Accessor** to provide access to stored items
|
||||||
|
- **Composer** to compose common access points (e.g., JOINed tables)
|
||||||
|
- **Indexer** to index/cache access queries
|
||||||
|
- **Manager** to manage storage state (e.g., supported inserts, database syncs)
|
||||||
|
- **Collector** to collect data for updating storage state
|
||||||
|
|
||||||
|
**CO4** is an abstract base class that makes it easy to integrate this model with object
|
||||||
|
hierarchies that mirror a storage schema.
|
34
co3.egg-info/SOURCES.txt
Normal file
34
co3.egg-info/SOURCES.txt
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
MANIFEST.in
|
||||||
|
README.md
|
||||||
|
pyproject.toml
|
||||||
|
co3/__init__.py
|
||||||
|
co3/accessor.py
|
||||||
|
co3/co3.py
|
||||||
|
co3/collector.py
|
||||||
|
co3/composer.py
|
||||||
|
co3/database.py
|
||||||
|
co3/indexer.py
|
||||||
|
co3/manager.py
|
||||||
|
co3/mapper.py
|
||||||
|
co3/relation.py
|
||||||
|
co3.egg-info/PKG-INFO
|
||||||
|
co3.egg-info/SOURCES.txt
|
||||||
|
co3.egg-info/dependency_links.txt
|
||||||
|
co3.egg-info/requires.txt
|
||||||
|
co3.egg-info/top_level.txt
|
||||||
|
co3/accessors/__init__.py
|
||||||
|
co3/accessors/fts.py
|
||||||
|
co3/accessors/sql.py
|
||||||
|
co3/accessors/vss.py
|
||||||
|
co3/databases/__init__.py
|
||||||
|
co3/databases/fts.py
|
||||||
|
co3/databases/sql.py
|
||||||
|
co3/databases/vss.py
|
||||||
|
co3/managers/__init__.py
|
||||||
|
co3/managers/fts.py
|
||||||
|
co3/managers/sql.py
|
||||||
|
co3/managers/vss.py
|
||||||
|
co3/relations/__init__.py
|
||||||
|
co3/util/__init__.py
|
||||||
|
co3/util/db.py
|
||||||
|
co3/util/regex.py
|
1
co3.egg-info/dependency_links.txt
Normal file
1
co3.egg-info/dependency_links.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
|
1
co3.egg-info/requires.txt
Normal file
1
co3.egg-info/requires.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
tqdm
|
1
co3.egg-info/top_level.txt
Normal file
1
co3.egg-info/top_level.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
co3
|
109
co3/__init__.py
Normal file
109
co3/__init__.py
Normal file
@ -0,0 +1,109 @@
|
|||||||
|
'''
|
||||||
|
Database submodule
|
||||||
|
|
||||||
|
- `db`: contains SQLAlchemy-based schema definitions
|
||||||
|
- `accessors`: convenience methods for accessing database entries
|
||||||
|
- `populate`: convenience methods for populating database tables
|
||||||
|
|
||||||
|
The `accessors` and `populate` submodules are each split into `schema` and `fts` method
|
||||||
|
groups. The former concerns methods relating to the actual database schema, the latter to
|
||||||
|
their SQLite FTS counterparts.
|
||||||
|
|
||||||
|
Note: Subpackages organization
|
||||||
|
Subpackages are broken up by inheritance. Within a given submodule, you have a
|
||||||
|
`_base.py` file defining the base class associated with that submodule's title, along
|
||||||
|
with concrete subclasses of that base in their own files. Deeper inheritance would
|
||||||
|
recursively extend this structure. The `__init__.py` for a given submodule then
|
||||||
|
exposes the concrete instances, leaving the base hidden. For example,
|
||||||
|
|
||||||
|
accessors/
|
||||||
|
_base.py
|
||||||
|
core.py
|
||||||
|
fts.py
|
||||||
|
|
||||||
|
`core` and `fts` house the `CoreAccessor` and `FTSAccessor` classes, respectively,
|
||||||
|
and are the direct subclasses of the `Accessor` parent found in the `_base`. This base
|
||||||
|
class _could_ be placed outside of the submodule in the parent directory (imported
|
||||||
|
with something like `from db import accessor` instead of `from db.accessor import
|
||||||
|
_base`). This is entirely valid, but I tend to prefer when the base class is among its
|
||||||
|
direct children, as
|
||||||
|
|
||||||
|
- In this case at least, the base doesn't need to be exposed
|
||||||
|
- The base class is being stowed away under an appropriately named submodule; having a
|
||||||
|
separate `accessor.py` and `accessors/` file/directory can feel a little cluttered.
|
||||||
|
- It makes imports across the accessors feel standardized:
|
||||||
|
|
||||||
|
```py
|
||||||
|
from localsys.db.accessors._base import Accessor
|
||||||
|
|
||||||
|
from localsys.db.accessors.core import CoreAccessor
|
||||||
|
```
|
||||||
|
|
||||||
|
Both have the same level of nesting to reach the class.
|
||||||
|
|
||||||
|
Frankly, both means of organization are perfectly fine, and as far as I can tell,
|
||||||
|
semantically sound in their own right. This particular scheme is just a preference in
|
||||||
|
the moment, and so long as I keep things consistent, choosing one over the other
|
||||||
|
shouldn't matter.
|
||||||
|
|
||||||
|
Additionally, note how `__init__.py`s are typically set up when providing wider access
|
||||||
|
to internal modules. The `init` typically pulls out classes from sibling modules
|
||||||
|
(i.e., files), but will import subpackages are the topmost level. For example, for the
|
||||||
|
structure
|
||||||
|
|
||||||
|
```
|
||||||
|
db/
|
||||||
|
__init__.py
|
||||||
|
accessors/
|
||||||
|
__init__.py
|
||||||
|
_base.py
|
||||||
|
core.py
|
||||||
|
fts.py
|
||||||
|
```
|
||||||
|
|
||||||
|
we have
|
||||||
|
|
||||||
|
```db/__init__.py
|
||||||
|
from localsys.db import accessors
|
||||||
|
```
|
||||||
|
|
||||||
|
which just imports the subpackage `accessors`. However, within subpackage:
|
||||||
|
|
||||||
|
```db/accessors/__init__.py
|
||||||
|
from localsys.db.accessors.core import CoreAccessor
|
||||||
|
```
|
||||||
|
|
||||||
|
we don't just import the submodule `core`; we did into the file to grab the relevant
|
||||||
|
class and pull it into the outer namespace. Overarching point: `__init__.py` files
|
||||||
|
typically reach into the sibling files (submodules) and pull out classes. Given that
|
||||||
|
this behavior is recursive, `__init__.py` then respect subpackages (nested
|
||||||
|
directories), importing them at the top-level and expecting an internal `__init__.py`
|
||||||
|
will have managed access appropriately.
|
||||||
|
|
||||||
|
Note: Organization for inheritance over composition
|
||||||
|
At a glance, the organization of subpackages here feels like it clashes with those
|
||||||
|
seen in `localsys.primitives`. `note_components`, for instance, houses the components
|
||||||
|
for the outer `note` module. Contrast this with how the `core` submodule looks: it's
|
||||||
|
composing `*/core.py` files across subpackages `accessors` and `managers`, rather than
|
||||||
|
a single subpackage like `note`. This seems inconsistent, but the subpackages here are
|
||||||
|
actually still organized in the same way: by inheritance. It just happens that the
|
||||||
|
all of the note components inherit from the same base class, and are thus confined to
|
||||||
|
a single subpackage. This aside, the subpackages themselves are still created around
|
||||||
|
inheritance, wrapping up a base and direct subclasses.
|
||||||
|
'''
|
||||||
|
|
||||||
|
from co3.accessor import Accessor
|
||||||
|
from co3.co3 import CO3
|
||||||
|
from co3.collector import Collector
|
||||||
|
from co3.composer import Composer
|
||||||
|
from co3.database import Database
|
||||||
|
from co3.indexer import Indexer
|
||||||
|
from co3.manager import Manager
|
||||||
|
from co3.mapper import Mapper
|
||||||
|
from co3.relation import Relation
|
||||||
|
|
||||||
|
from co3 import accessors
|
||||||
|
from co3 import databases
|
||||||
|
from co3 import managers
|
||||||
|
from co3 import relations
|
||||||
|
from co3 import util
|
BIN
co3/__pycache__/__init__.cpython-311.pyc
Normal file
BIN
co3/__pycache__/__init__.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/__init__.cpython-312.pyc
Normal file
BIN
co3/__pycache__/__init__.cpython-312.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/accessor.cpython-311.pyc
Normal file
BIN
co3/__pycache__/accessor.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/accessor.cpython-312.pyc
Normal file
BIN
co3/__pycache__/accessor.cpython-312.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/co3.cpython-312.pyc
Normal file
BIN
co3/__pycache__/co3.cpython-312.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/co4.cpython-311.pyc
Normal file
BIN
co3/__pycache__/co4.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/collector.cpython-311.pyc
Normal file
BIN
co3/__pycache__/collector.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/collector.cpython-312.pyc
Normal file
BIN
co3/__pycache__/collector.cpython-312.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/composer.cpython-311.pyc
Normal file
BIN
co3/__pycache__/composer.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/composer.cpython-312.pyc
Normal file
BIN
co3/__pycache__/composer.cpython-312.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/database.cpython-312.pyc
Normal file
BIN
co3/__pycache__/database.cpython-312.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/indexer.cpython-311.pyc
Normal file
BIN
co3/__pycache__/indexer.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/indexer.cpython-312.pyc
Normal file
BIN
co3/__pycache__/indexer.cpython-312.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/manager.cpython-311.pyc
Normal file
BIN
co3/__pycache__/manager.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/manager.cpython-312.pyc
Normal file
BIN
co3/__pycache__/manager.cpython-312.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/mapper.cpython-312.pyc
Normal file
BIN
co3/__pycache__/mapper.cpython-312.pyc
Normal file
Binary file not shown.
BIN
co3/__pycache__/relation.cpython-312.pyc
Normal file
BIN
co3/__pycache__/relation.cpython-312.pyc
Normal file
Binary file not shown.
28
co3/accessor.py
Normal file
28
co3/accessor.py
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
'''
|
||||||
|
Accessor
|
||||||
|
|
||||||
|
Provides access to an underlying schema through a supported set of operations. Class
|
||||||
|
methods could be general, high-level SQL wrappers, or convenience functions for common
|
||||||
|
schema-specific queries.
|
||||||
|
'''
|
||||||
|
import inspect
|
||||||
|
from pathlib import Path
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
|
import sqlalchemy as sa
|
||||||
|
|
||||||
|
#from co3.database import Database
|
||||||
|
|
||||||
|
|
||||||
|
class Accessor[D: 'Database']:
|
||||||
|
'''
|
||||||
|
Access wrapper class for complex queries and easy integration with Composer tables.
|
||||||
|
Implements high-level access to things like common constrained SELECT queries.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
engine: SQLAlchemy engine to use for queries. Engine is initialized dynamically as
|
||||||
|
a property (based on the config) if not provided
|
||||||
|
'''
|
||||||
|
def __init__(self, database: D):
|
||||||
|
self.database = database
|
||||||
|
|
24
co3/accessors/__init__.py
Normal file
24
co3/accessors/__init__.py
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
'''
|
||||||
|
Note that subclasses in this subpackage are split differently to other subpackages in the
|
||||||
|
DB. Instead of being split by table group, corresponding to a Composer (which defines that
|
||||||
|
table group), Accessors are split by a separate dimension: table "type". This is why we
|
||||||
|
have a "TableAccessor" and an "FTSAccessor": the former exposes access operations
|
||||||
|
available to generic tables, the latter to FTS tables (instead of being designed
|
||||||
|
specifically around "core" and "fts" groups, for instance).
|
||||||
|
|
||||||
|
Seeing as FTS tables are "generic" tables, it seems inconsistent not to have FTSAccessor
|
||||||
|
inherit from TableAccessor. While this would work fine, the model we're working with
|
||||||
|
doesn't really need it; you can instead think of the FTSAccessor as defining _only_
|
||||||
|
FTS-specific operations. Given that you have a Composer for your desired table group, you
|
||||||
|
can then wrap it with your desired set of "access actions," available in separate Accessor
|
||||||
|
subclasses.
|
||||||
|
|
||||||
|
For instance, you could wrap an FTSComposer in either a TableAccessor or FTSAccessor. The
|
||||||
|
former will treat the tables in the composer like regular tables, exposing methods like
|
||||||
|
`.select` and `.select_one`, whereas the latter defines FTS-specific actions like
|
||||||
|
`.search`.
|
||||||
|
'''
|
||||||
|
|
||||||
|
from co3.accessors.sql import SQLAccessor
|
||||||
|
from co3.accessors.fts import FTSAccessor
|
||||||
|
from co3.accessors.vss import VSSAccessor
|
BIN
co3/accessors/__pycache__/__init__.cpython-311.pyc
Normal file
BIN
co3/accessors/__pycache__/__init__.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/accessors/__pycache__/__init__.cpython-312.pyc
Normal file
BIN
co3/accessors/__pycache__/__init__.cpython-312.pyc
Normal file
Binary file not shown.
BIN
co3/accessors/__pycache__/_base.cpython-311.pyc
Normal file
BIN
co3/accessors/__pycache__/_base.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/accessors/__pycache__/fts.cpython-311.pyc
Normal file
BIN
co3/accessors/__pycache__/fts.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/accessors/__pycache__/fts.cpython-312.pyc
Normal file
BIN
co3/accessors/__pycache__/fts.cpython-312.pyc
Normal file
Binary file not shown.
BIN
co3/accessors/__pycache__/sql.cpython-312.pyc
Normal file
BIN
co3/accessors/__pycache__/sql.cpython-312.pyc
Normal file
Binary file not shown.
BIN
co3/accessors/__pycache__/table.cpython-311.pyc
Normal file
BIN
co3/accessors/__pycache__/table.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/accessors/__pycache__/vss.cpython-311.pyc
Normal file
BIN
co3/accessors/__pycache__/vss.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/accessors/__pycache__/vss.cpython-312.pyc
Normal file
BIN
co3/accessors/__pycache__/vss.cpython-312.pyc
Normal file
Binary file not shown.
147
co3/accessors/fts.py
Normal file
147
co3/accessors/fts.py
Normal file
@ -0,0 +1,147 @@
|
|||||||
|
import sqlalchemy as sa
|
||||||
|
|
||||||
|
from co3 import util
|
||||||
|
from co3.accessor import Accessor
|
||||||
|
|
||||||
|
|
||||||
|
class FTSAccessor(Accessor):
|
||||||
|
def search(
|
||||||
|
self,
|
||||||
|
table_name : str,
|
||||||
|
select_cols : str | list | None = '*',
|
||||||
|
search_cols : str | None = None,
|
||||||
|
q : str | None = None,
|
||||||
|
colq : str | None = None,
|
||||||
|
snip_col : int | None = 0,
|
||||||
|
hl_col : int | None = 0,
|
||||||
|
limit : int | None = 100,
|
||||||
|
snip : int | None = 64,
|
||||||
|
tokenizer : str | None = 'unicode61',
|
||||||
|
group_by : str | None = None,
|
||||||
|
agg_cols : list | None = None,
|
||||||
|
wherein_dict: dict | None = None,
|
||||||
|
unique_on : dict | None = None,
|
||||||
|
):
|
||||||
|
'''
|
||||||
|
Execute a search query against an indexed FTS table for specific primitives. This
|
||||||
|
method is mostly a generic FTS handler, capable of handling queries to any available
|
||||||
|
FTS table with a matching naming scheme (`fts_<type>_<tokenizer>`). The current
|
||||||
|
intention is support all tokenizers, for file, note, block, and link primitives.
|
||||||
|
|
||||||
|
Search results include all FTS table columns, as well as SQLite-supported `snippet`s
|
||||||
|
and `highlight`s for matches. Matches are filtered and ordered by SQLite's
|
||||||
|
`MATCH`-based score for the text & column queries. Results are (a list of) fully
|
||||||
|
expanded dictionaries housing column-value pairs.
|
||||||
|
|
||||||
|
Note:
|
||||||
|
GROUP BY cannot be paired with SQLITE FTS extensions; thus, we perform manual
|
||||||
|
group checks on the result set in Python before response
|
||||||
|
|
||||||
|
Analysis:
|
||||||
|
The returned JSON structure has been (loosely) optimized for speed on the client
|
||||||
|
side. Fully forming individual dictionary based responses saves time in
|
||||||
|
Javascript, as the JSON parser is expected to be able to create the objects
|
||||||
|
faster than post-hoc construction in JS. This return structure was compared
|
||||||
|
against returning an array of arrays (all ordered in the same fashion), along with
|
||||||
|
a column list to be associated with each of the result values. While this saves
|
||||||
|
some size on the payload (the same column names don't have to be transmitted for
|
||||||
|
each result), the size of the returned content massively outweighs the
|
||||||
|
predominantly short column names. The only way this structure would be viable is
|
||||||
|
if a significant amount was saved on transfer compared to the slow down in JS
|
||||||
|
object construction; this is (almost) never the case.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
table_name : name of FTS table to search
|
||||||
|
search_cols : space separated string of columns to use for primary queries
|
||||||
|
q : search query
|
||||||
|
colq : column constraint string; must conform to SQLite standards (e.g.,
|
||||||
|
`<col>:<text>`
|
||||||
|
snip_col : table column to use for snippets (default: 1; source content column)
|
||||||
|
hl_col : table column to use for highlights (default: 2; format column, applied
|
||||||
|
to HTML targets)
|
||||||
|
limit : maximum number of results to return in the SQL query
|
||||||
|
snip : snippet length (max: 64)
|
||||||
|
tokenizer : tokenizer to use (assumes relevant FTS table has been built)
|
||||||
|
...
|
||||||
|
wherein_dict: (col-name, value-list) pairs to match result set against, via
|
||||||
|
WHERE ... IN clauses
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary with search results (list of column indexed dictionaries) and relevant
|
||||||
|
metadata.
|
||||||
|
'''
|
||||||
|
search_q = ''
|
||||||
|
|
||||||
|
if type(select_cols) is list:
|
||||||
|
select_cols = ', '.join(select_cols)
|
||||||
|
|
||||||
|
# construct main search query
|
||||||
|
if search_cols and q:
|
||||||
|
search_q = f'{{{search_cols}}} : {q}'
|
||||||
|
|
||||||
|
# add auxiliary search constraints
|
||||||
|
if colq:
|
||||||
|
search_q += f' {colq}'
|
||||||
|
|
||||||
|
search_q = search_q.strip()
|
||||||
|
|
||||||
|
hl_start = '<b><mark>'
|
||||||
|
hl_end = '</mark></b>'
|
||||||
|
|
||||||
|
fts_table_name = f'{table_name}_fts_{tokenizer}'
|
||||||
|
|
||||||
|
sql = f'''
|
||||||
|
SELECT
|
||||||
|
{select_cols},
|
||||||
|
snippet({fts_table_name}, {snip_col}, '{hl_start}', '{hl_end}', '...', {snip}) AS snippet,
|
||||||
|
highlight({fts_table_name}, {hl_col}, '{hl_start}', '{hl_end}') AS highlight
|
||||||
|
FROM {fts_table_name}
|
||||||
|
'''
|
||||||
|
|
||||||
|
where_clauses = []
|
||||||
|
if search_q:
|
||||||
|
where_clauses.append(f"{fts_table_name} MATCH '{search_q}'\n")
|
||||||
|
|
||||||
|
if wherein_dict:
|
||||||
|
for col, vals in wherein_dict.items():
|
||||||
|
where_clauses.append(f'{col} IN {tuple(vals)}\n')
|
||||||
|
|
||||||
|
if where_clauses:
|
||||||
|
where_str = " AND ".join(where_clauses)
|
||||||
|
sql += f'WHERE {where_str}'
|
||||||
|
|
||||||
|
sql += f'ORDER BY rank LIMIT {limit};'
|
||||||
|
|
||||||
|
row_dicts, cols = self.raw_select(sql, include_cols=True)
|
||||||
|
|
||||||
|
if group_by is None:
|
||||||
|
return row_dicts, cols
|
||||||
|
|
||||||
|
if agg_cols is None:
|
||||||
|
agg_cols = []
|
||||||
|
|
||||||
|
# "group by" block ID and wrangle the links into a list
|
||||||
|
# note we can't perform native GROUP BYs with FTS results
|
||||||
|
group_by_idx = {}
|
||||||
|
for row in row_dicts:
|
||||||
|
group_by_attr = row.get(group_by)
|
||||||
|
|
||||||
|
# add new entries
|
||||||
|
for agg_col in agg_cols:
|
||||||
|
row[f'{agg_col}_agg'] = set()
|
||||||
|
|
||||||
|
if group_by_attr is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if group_by_attr not in group_by_idx:
|
||||||
|
group_by_idx[group_by_attr] = row
|
||||||
|
|
||||||
|
for agg_col in agg_cols:
|
||||||
|
if agg_col in row:
|
||||||
|
group_by_idx[group_by_attr][f'{agg_col}_agg'].add(row[agg_col])
|
||||||
|
|
||||||
|
return {
|
||||||
|
'results' : group_by_idx,
|
||||||
|
'columns' : cols,
|
||||||
|
'num_results' : len(row_dicts),
|
||||||
|
}
|
96
co3/accessors/sql.py
Normal file
96
co3/accessors/sql.py
Normal file
@ -0,0 +1,96 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
from collections.abc import Iterable
|
||||||
|
import inspect
|
||||||
|
from functools import cache
|
||||||
|
|
||||||
|
import sqlalchemy as sa
|
||||||
|
|
||||||
|
from co3 import util
|
||||||
|
from co3.accessor import Accessor
|
||||||
|
from co3.relation import Relation
|
||||||
|
|
||||||
|
#from co3.databases.sql import RelationalDatabase, TabularDatabase, SQLDatabase
|
||||||
|
from co3.relations import TabularRelation, SQLTable
|
||||||
|
|
||||||
|
|
||||||
|
class RelationalAccessor[D: 'RelationalDatabase', R: Relation](Accessor[D]):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class TabularAccessor[D: 'TabularDatabase', R: TabularRelation](RelationalAccessor[D, R]):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class SQLAccessor(TabularAccessor['SQLDatabase', SQLTable]):
|
||||||
|
def raw_select(
|
||||||
|
self,
|
||||||
|
sql,
|
||||||
|
bind_params=None,
|
||||||
|
mappings=False,
|
||||||
|
include_cols=False,
|
||||||
|
):
|
||||||
|
res_method = utils.db.sa_exec_dicts
|
||||||
|
if mappings:
|
||||||
|
res_method = utils.db.sa_exec_mappings
|
||||||
|
|
||||||
|
return res_method(self.database.engine, sa.text(sql), bind_params=bind_params, include_cols=include_cols)
|
||||||
|
|
||||||
|
def select(
|
||||||
|
self,
|
||||||
|
table: sa.Table | sa.Subquery | sa.Join,
|
||||||
|
cols = None,
|
||||||
|
where = None,
|
||||||
|
distinct_on = None,
|
||||||
|
order_by = None,
|
||||||
|
limit = 0,
|
||||||
|
mappings = False,
|
||||||
|
include_cols = False,
|
||||||
|
):
|
||||||
|
'''
|
||||||
|
Perform a SELECT query against the provided table-like object (see
|
||||||
|
`check_table()`).
|
||||||
|
|
||||||
|
Deprecated: String aliases
|
||||||
|
String aliases for tables are no longer supported. This method no longer checks
|
||||||
|
against any specific schema table-maps or Composers. Instead, this should be
|
||||||
|
done outside the Accessor.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
group_by: list of columns to group by; for now serves as a proxy for DISTINCT
|
||||||
|
(no aggregation methods accepted)
|
||||||
|
order_by: column to order results by (can use <col>.desc() to order
|
||||||
|
by descending)
|
||||||
|
'''
|
||||||
|
if where is None:
|
||||||
|
where = sa.true()
|
||||||
|
|
||||||
|
res_method = utils.db.sa_exec_dicts
|
||||||
|
if mappings:
|
||||||
|
res_method = utils.db.sa_exec_mappings
|
||||||
|
|
||||||
|
stmt = sa.select(table).where(where)
|
||||||
|
if cols is not None:
|
||||||
|
stmt = sa.select(*cols).select_from(table).where(where)
|
||||||
|
|
||||||
|
if distinct_on is not None:
|
||||||
|
stmt = stmt.group_by(distinct_on)
|
||||||
|
|
||||||
|
if order_by is not None:
|
||||||
|
stmt = stmt.order_by(order_by)
|
||||||
|
|
||||||
|
if limit > 0:
|
||||||
|
stmt = stmt.limit(limit)
|
||||||
|
|
||||||
|
return res_method(self.engine, stmt, include_cols=include_cols)
|
||||||
|
|
||||||
|
def select_one(self, table, cols=None, where=None, mappings=False, include_cols=False):
|
||||||
|
res = self.select(table, cols, where, mappings, include_cols, limit=1)
|
||||||
|
|
||||||
|
if include_cols and len(res[0]) > 0:
|
||||||
|
return res[0][0], res[1]
|
||||||
|
|
||||||
|
if len(res) > 0:
|
||||||
|
return res[0]
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
100
co3/accessors/vss.py
Normal file
100
co3/accessors/vss.py
Normal file
@ -0,0 +1,100 @@
|
|||||||
|
import pickle
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
import time
|
||||||
|
|
||||||
|
import sqlalchemy as sa
|
||||||
|
#from sentence_transformers import SentenceTransformer, util
|
||||||
|
|
||||||
|
from co3.accessor import Accessor
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class VSSAccessor(Accessor):
|
||||||
|
def __init__(self, cache_path):
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
self._model = None
|
||||||
|
self._embeddings = None
|
||||||
|
|
||||||
|
self._embedding_size = 384
|
||||||
|
self.embedding_path = Path(cache_path, 'embeddings.pkl')
|
||||||
|
|
||||||
|
def write_embeddings(self, embedding_dict):
|
||||||
|
self.embedding_path.write_bytes(pickle.dumps(embedding_dict))
|
||||||
|
|
||||||
|
def read_embeddings(self):
|
||||||
|
if not self.embedding_path.exists():
|
||||||
|
logger.warning(
|
||||||
|
f'Attempting to access non-existent embeddings at {self.embedding_path}'
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
return pickle.loads(self.embedding_path.read_bytes())
|
||||||
|
|
||||||
|
@property
|
||||||
|
def model(self):
|
||||||
|
if self._model is None:
|
||||||
|
# model trained with 128 token seqs
|
||||||
|
self._model = SentenceTransformer('sentence-transformers/all-MiniLM-L12-v2')
|
||||||
|
return self._model
|
||||||
|
|
||||||
|
@property
|
||||||
|
def embeddings(self):
|
||||||
|
if self._embeddings is None:
|
||||||
|
self._embeddings = self.read_embeddings()
|
||||||
|
return self._embeddings
|
||||||
|
|
||||||
|
def embed_chunks(self, chunks, batch_size=64, show_prog=True):
|
||||||
|
return self.model.encode(
|
||||||
|
chunks,
|
||||||
|
batch_size = batch_size,
|
||||||
|
show_progress_bar = show_prog,
|
||||||
|
convert_to_numpy = True,
|
||||||
|
normalize_embeddings = True
|
||||||
|
)
|
||||||
|
|
||||||
|
def search(
|
||||||
|
self,
|
||||||
|
query : str,
|
||||||
|
index_name : str,
|
||||||
|
limit : int = 10,
|
||||||
|
score_threshold = 0.5,
|
||||||
|
):
|
||||||
|
'''
|
||||||
|
Parameters:
|
||||||
|
index_name: one of ['chunks','blocks','notes']
|
||||||
|
'''
|
||||||
|
if not query:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if index_name not in self.embeddings:
|
||||||
|
logger.warning(
|
||||||
|
f'Index "{index_name}" does not exist'
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
|
||||||
|
query_embedding = self.embed_chunks(query, show_prog=False)
|
||||||
|
index_ids, index_embeddings, index_items = self.embeddings[index_name]
|
||||||
|
|
||||||
|
hits = util.semantic_search(
|
||||||
|
query_embedding,
|
||||||
|
index_embeddings,
|
||||||
|
top_k=limit,
|
||||||
|
score_function=util.dot_score
|
||||||
|
)[0]
|
||||||
|
|
||||||
|
hits = [hit for hit in hits if hit['score'] >= score_threshold]
|
||||||
|
|
||||||
|
for hit in hits:
|
||||||
|
idx = hit['corpus_id']
|
||||||
|
hit['group_name'] = index_ids[idx]
|
||||||
|
hit['item'] = index_items[idx]
|
||||||
|
|
||||||
|
logger.info(f'{len(hits)} hits in {time.time()-start:.2f}s')
|
||||||
|
|
||||||
|
return hits
|
||||||
|
|
106
co3/co3.py
Normal file
106
co3/co3.py
Normal file
@ -0,0 +1,106 @@
|
|||||||
|
'''
|
||||||
|
CO4
|
||||||
|
|
||||||
|
CO4 is an abstract base class for scaffolding object hierarchies and managing operations
|
||||||
|
with associated database schemas. It facilitates something like a "lightweight ORM" for
|
||||||
|
classes/tables/states with fixed transformations of interest. The canonical use case is
|
||||||
|
managing hierarchical document relations, format conversions, and syntactical components.
|
||||||
|
'''
|
||||||
|
|
||||||
|
import inspect
|
||||||
|
import logging
|
||||||
|
from functools import wraps, partial
|
||||||
|
|
||||||
|
#from localsys.db.schema import tables
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
#def register_format(_format):
|
||||||
|
# def decorator(func):
|
||||||
|
# self.collate.format_map[_format] = func
|
||||||
|
#
|
||||||
|
# @wraps(func)
|
||||||
|
# def register(*args, **kwargs):
|
||||||
|
# return func(*args, **kwargs)
|
||||||
|
#
|
||||||
|
# return register
|
||||||
|
# return decorator
|
||||||
|
|
||||||
|
def collate(action_key, action_groups=None):
|
||||||
|
def decorator(func):
|
||||||
|
if action_groups is None:
|
||||||
|
action_groups = [None]
|
||||||
|
func._action_data = (action_key, action_groups)
|
||||||
|
return func
|
||||||
|
return decorator
|
||||||
|
|
||||||
|
class FormatRegistryMeta(type):
|
||||||
|
def __new__(cls, name, bases, attrs):
|
||||||
|
action_registry = {}
|
||||||
|
|
||||||
|
# add registered superclass methods; iterate over bases (usually just one), then
|
||||||
|
# that base's chain down (reversed), then methods from each subclass
|
||||||
|
for base in bases:
|
||||||
|
for _class in reversed(base.mro()):
|
||||||
|
methods = inspect.getmembers(_class, predicate=inspect.isfunction)
|
||||||
|
for _, method in methods:
|
||||||
|
if hasattr(method, '_action_data'):
|
||||||
|
action_key, action_groups = method._action_data
|
||||||
|
action_registry[action_key] = (method, action_groups)
|
||||||
|
|
||||||
|
# add final registered formats for the current class, overwriting any found in
|
||||||
|
# superclass chain
|
||||||
|
for attr_name, attr_value in attrs.items():
|
||||||
|
if hasattr(attr_value, '_action_data'):
|
||||||
|
action_key, action_groups = attr_value._action_data
|
||||||
|
action_registry[action_key] = (method, action_groups)
|
||||||
|
|
||||||
|
attrs['action_map'] = action_registry
|
||||||
|
|
||||||
|
return super().__new__(cls, name, bases, attrs)
|
||||||
|
|
||||||
|
class CO3(metaclass=FormatRegistryMeta):
|
||||||
|
'''
|
||||||
|
CO3: COllate, COllect, COmpose - conversion & DB insertion base
|
||||||
|
|
||||||
|
- Collate: organize and transform conversion outputs, possibly across class components
|
||||||
|
- Collect: gather core attributes, conversion data, and subcomponents for DB insertion
|
||||||
|
- Compose: construct object-associated DB table references through the class hierarchy
|
||||||
|
|
||||||
|
Note: on action groups
|
||||||
|
Group keys are simply named collections to make it easy for storage components to
|
||||||
|
be attached to action subsets. They do _not_ augment the action registration
|
||||||
|
namespace, meaning the action key should still be unique; the group key is purely
|
||||||
|
auxiliary.
|
||||||
|
|
||||||
|
Action methods can also be attached to several groups, in case there is
|
||||||
|
overlapping utility within or across schemas or storage media. In this case, it
|
||||||
|
becomes particularly critical to ensure registered `collate` methods really are
|
||||||
|
just "gathering results" from possibly heavy-duty operations, rather than
|
||||||
|
performing them when called, so as to reduce wasted computation.
|
||||||
|
'''
|
||||||
|
@property
|
||||||
|
def attributes(self):
|
||||||
|
'''
|
||||||
|
Method to define how a subtype's inserts should be handled under `collect` for
|
||||||
|
canonical attributes, i.e., inserts to the type's table.
|
||||||
|
'''
|
||||||
|
return vars(self)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def components(self):
|
||||||
|
'''
|
||||||
|
Method to define how a subtype's inserts should be handled under `collect` for
|
||||||
|
constituent components that need handling.
|
||||||
|
'''
|
||||||
|
return []
|
||||||
|
|
||||||
|
def collate(self, action_key, *action_args, **action_kwargs):
|
||||||
|
if action_key not in self.action_map:
|
||||||
|
logger.debug(f'Collation for {action_key} not supported')
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
return self.action_map[action_key](self)
|
||||||
|
|
||||||
|
|
109
co3/collector.py
Normal file
109
co3/collector.py
Normal file
@ -0,0 +1,109 @@
|
|||||||
|
'''
|
||||||
|
Defines the Collector base class.
|
||||||
|
|
||||||
|
This module is the critical "middleware" connecting the primitive object definitions and
|
||||||
|
their representations in the database. It operates with full knowledge of how both are
|
||||||
|
defined, and abstracts away both the prep work for DB insertions as well as updates
|
||||||
|
trickling down the primitive hierarchy.
|
||||||
|
|
||||||
|
The `src` format target is re-used for both canonical tables/primitives, as well as
|
||||||
|
<prim>_conversion_matter tables in tables/conversions under the `src` format. The latter
|
||||||
|
is meant to extend those attributes that are format-specific (i.e., would change when, say,
|
||||||
|
converting to `html5`), and thus need to be broken across the format dimension.
|
||||||
|
|
||||||
|
Note:
|
||||||
|
Despite the structure of the database module, this class does not currently inherit
|
||||||
|
from a super class in localsys.db (like the accessors and managers, for instance).
|
||||||
|
This will likely ultimately be the model that's embraced, but until FTS (or other
|
||||||
|
groups) need a collector, this will be remain an independent class. It is, however,
|
||||||
|
named like a concrete subclass, taking on the "Core" prefix.
|
||||||
|
'''
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
from collections import defaultdict
|
||||||
|
import logging
|
||||||
|
import importlib
|
||||||
|
import subprocess
|
||||||
|
from uuid import uuid4
|
||||||
|
|
||||||
|
import sqlalchemy as sa
|
||||||
|
|
||||||
|
from co3 import util
|
||||||
|
#from localsys.db.schema import tables
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class Collector:
|
||||||
|
def __init__(self):
|
||||||
|
self._inserts = defaultdict(lambda: defaultdict(list))
|
||||||
|
|
||||||
|
@property
|
||||||
|
def inserts(self):
|
||||||
|
return self._inserts_from_receipts()
|
||||||
|
|
||||||
|
def _inserts_from_receipts(self, receipts=None, pop=False):
|
||||||
|
inserts = defaultdict(list)
|
||||||
|
|
||||||
|
if receipts is None:
|
||||||
|
receipts = list(self._inserts.keys())
|
||||||
|
|
||||||
|
for receipt in receipts:
|
||||||
|
if pop: insert_dict = self._inserts.pop(receipt, {})
|
||||||
|
else: insert_dict = self._inserts[receipt]
|
||||||
|
|
||||||
|
for table, insert_list in insert_dict.items():
|
||||||
|
inserts[table].extend(insert_list)
|
||||||
|
|
||||||
|
return dict(inserts)
|
||||||
|
|
||||||
|
def _reset_session(self):
|
||||||
|
self._inserts = defaultdict(lambda: defaultdict(list))
|
||||||
|
|
||||||
|
def _generate_unique_receipt(self):
|
||||||
|
return str(uuid4())
|
||||||
|
|
||||||
|
def add_insert(self, table_name, insert_dict, receipts=None):
|
||||||
|
'''
|
||||||
|
TODO: formalize table_name mapping; at class level provide a `table_map`, or provide
|
||||||
|
the table object itself to this method
|
||||||
|
'''
|
||||||
|
if table_name not in tables.table_map:
|
||||||
|
#logger.debug(f'Inserts provided for non-existent table {table_name}')
|
||||||
|
return None
|
||||||
|
|
||||||
|
receipt = self._generate_unique_receipt()
|
||||||
|
|
||||||
|
self._inserts[receipt][table_name].append(
|
||||||
|
utils.db.prepare_insert(
|
||||||
|
tables.table_map[table_name],
|
||||||
|
insert_dict
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if receipts is not None:
|
||||||
|
receipts.append(receipt)
|
||||||
|
|
||||||
|
return receipt
|
||||||
|
|
||||||
|
def collect_inserts(self, receipts=None):
|
||||||
|
'''
|
||||||
|
Collect insert-ready dictionaries for the core primitive schema. This method is
|
||||||
|
effectively a light wrapper around the File and Note-based collection logic
|
||||||
|
elsewhere in the class.
|
||||||
|
|
||||||
|
The overall collection scheme embraces a session-like sequential update model to
|
||||||
|
an internal insert tracker. The sequence of insert methods is ordered according to
|
||||||
|
the schema hierarchy, and higher level inserts dictate the scope for lower level
|
||||||
|
inserts (all methods check and populate the same `inserts` dictionary). Calling
|
||||||
|
this method flushes any existing inserts, ensuring a re-scan takes place across
|
||||||
|
calls (or "sessions").
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
skip_updated: whether to ignore primitives with existing up-to-date
|
||||||
|
database entries
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Table name-indexed dictionary of insert lists (of column name-indexed dicts)
|
||||||
|
'''
|
||||||
|
return self._inserts_from_receipts(receipts, pop=True)
|
89
co3/composer.py
Normal file
89
co3/composer.py
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
'''
|
||||||
|
Composer
|
||||||
|
|
||||||
|
Base for manually defining table compositions outside those natural to the schema
|
||||||
|
hierarchy (i.e., constructable by a `CO4.compose()` call).
|
||||||
|
|
||||||
|
Example: suppose we have a simple object hierarchy A(CO4) -> B -> C. C's in-built
|
||||||
|
`compose()` method may not always be desirable when constructing composite tables and
|
||||||
|
running related queries. In this case, a custom Composer can be used to make needed
|
||||||
|
composite tables easier to reference; in the case below, we define the "BC" composite
|
||||||
|
table.
|
||||||
|
|
||||||
|
```
|
||||||
|
class ExampleComposer(Composer):
|
||||||
|
|
||||||
|
@register_table
|
||||||
|
def BC(self):
|
||||||
|
full_B = B.compose(full=True)
|
||||||
|
full_C = C.compose(full=True)
|
||||||
|
|
||||||
|
return full_B.join(
|
||||||
|
full_C,
|
||||||
|
full_B.c.name == full_C.c.name, # TODO: is this fine? or do we need base table refs
|
||||||
|
outer=True
|
||||||
|
)
|
||||||
|
'''
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from co3.mapper import Mapper
|
||||||
|
|
||||||
|
|
||||||
|
def register_table(table_name=None):
|
||||||
|
'''
|
||||||
|
Registry decorator for defined composer classes. Decorating a class method simply
|
||||||
|
attaches a `table_name` attribute to it, setting it to either a provided value or the
|
||||||
|
name of the method itself. Methods with a `table_name` attribute are later swept up at
|
||||||
|
the class level and placed in the `table_map`.
|
||||||
|
'''
|
||||||
|
def decorator(func):
|
||||||
|
if table_name is None:
|
||||||
|
table_name = func.__name__
|
||||||
|
func.table_name = table_name
|
||||||
|
return func
|
||||||
|
return decorator
|
||||||
|
|
||||||
|
class Composer[M: Mapper]:
|
||||||
|
'''
|
||||||
|
Base composer wrapper for table groupings.
|
||||||
|
|
||||||
|
The schema is centered around a connected group of tables (via foreign keys). Thus,
|
||||||
|
most operations need to be coordinated across tables. The `accessors` submodules
|
||||||
|
are mostly intended to provide a "secondary layer" over the base set of tables in the
|
||||||
|
schema, exposing common higher level table compositions (i.e., chained JOINs). See
|
||||||
|
concrete instances (e.g., CoreAccess, FTSAccessor) for actual implementations these
|
||||||
|
tables; the base class does not expose
|
||||||
|
|
||||||
|
Tables in subclasses are registered with the `register_table` decorator, automatically
|
||||||
|
indexing them under the provided name and making them available via the `table_map`.
|
||||||
|
'''
|
||||||
|
def __init__(self):
|
||||||
|
self._set_tables()
|
||||||
|
|
||||||
|
def _set_tables(self):
|
||||||
|
'''
|
||||||
|
Skip properties (so appropriate delays can be used), and
|
||||||
|
|
||||||
|
Set the table registry at the class level. This only takes place during the first
|
||||||
|
instantiation of the class, and makes it possible to definitively tie methods to
|
||||||
|
composed tables during lookup with `get_table()`.
|
||||||
|
'''
|
||||||
|
cls = self.__class__
|
||||||
|
|
||||||
|
# in case the class has already be instantiated
|
||||||
|
if hasattr(cls, 'table_map'): return
|
||||||
|
|
||||||
|
table_map = {}
|
||||||
|
for key, value in cls.__dict__.items():
|
||||||
|
if isinstance(value, property):
|
||||||
|
continue # Skip properties
|
||||||
|
if callable(value) and hasattr(value, 'table_name'):
|
||||||
|
table_map[value.table_name] = value(self)
|
||||||
|
|
||||||
|
cls.table_map = table_map
|
||||||
|
|
||||||
|
def get_table(self, table_name):
|
||||||
|
'''
|
||||||
|
Retrieve the named table composition, if defined.
|
||||||
|
'''
|
||||||
|
return self.table_map.get(table_name)
|
87
co3/database.py
Normal file
87
co3/database.py
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
'''
|
||||||
|
Database
|
||||||
|
|
||||||
|
Central object for defining storage protocol-specific interfaces. The database wraps up
|
||||||
|
central items for interacting with database resources, namely the Accessor and Manager
|
||||||
|
objects.
|
||||||
|
|
||||||
|
The Database type hierarchy attempts to be exceedingly general; SQL-derivatives should
|
||||||
|
subclass from the RelationalDatabase subtype, for example, which itself becomes a new
|
||||||
|
generic via type dependence on Relation.
|
||||||
|
'''
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from typing import Self
|
||||||
|
|
||||||
|
from co3.accessor import Accessor
|
||||||
|
from co3.composer import Composer
|
||||||
|
from co3.manager import Manager
|
||||||
|
from co3.indexer import Indexer
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class Database:
|
||||||
|
accessor: type[Accessor[Self]] = Accessor
|
||||||
|
manager: type[Manager[Self]] = Manager
|
||||||
|
|
||||||
|
def __init__(self, resource):
|
||||||
|
'''
|
||||||
|
Variables:
|
||||||
|
_local_cache: a database-local property store for ad-hoc CacheBlock-esque
|
||||||
|
methods, that are nevertheless _not_ query/group-by responses to
|
||||||
|
pass on to the Indexer. Dependent properties should write to the
|
||||||
|
this cache and check for existence of stored results; the cache
|
||||||
|
state must be managed globally.
|
||||||
|
'''
|
||||||
|
self.resource = resource
|
||||||
|
|
||||||
|
self._access = self.accessor(self)
|
||||||
|
self._manage = self.manager(self)
|
||||||
|
|
||||||
|
self._index = Indexer(self._access)
|
||||||
|
self._local_cache = {}
|
||||||
|
|
||||||
|
self.reset_cache = False
|
||||||
|
|
||||||
|
@property
|
||||||
|
def engine(self):
|
||||||
|
'''
|
||||||
|
Database property to provide a singleton engine for DB interaction, initializing
|
||||||
|
the database if it doesn't already exist.
|
||||||
|
|
||||||
|
TODO: figure out thread safety across engines and/or connection. Any issue with
|
||||||
|
hanging on to the same engine instance for the Database instance?
|
||||||
|
'''
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def connect(self):
|
||||||
|
self.engine.connect()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def access(self):
|
||||||
|
return self._access
|
||||||
|
|
||||||
|
@property
|
||||||
|
def compose(self):
|
||||||
|
return self._compose
|
||||||
|
|
||||||
|
@property
|
||||||
|
def index(self):
|
||||||
|
if self.reset_cache:
|
||||||
|
self._index.cache_clear()
|
||||||
|
self.reset_cache = False
|
||||||
|
return self._index
|
||||||
|
|
||||||
|
@property
|
||||||
|
def manage(self):
|
||||||
|
'''
|
||||||
|
Accessing `.manage` queues a cache clear on the external index, as well wipes the
|
||||||
|
local index.
|
||||||
|
'''
|
||||||
|
self.reset_cache = True
|
||||||
|
self._local_cache = {}
|
||||||
|
return self._manage
|
||||||
|
|
||||||
|
def populate_indexes(self): pass
|
||||||
|
|
3
co3/databases/__init__.py
Normal file
3
co3/databases/__init__.py
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
from co3.databases.sql import *
|
||||||
|
from co3.databases.fts import FTSDatabase
|
||||||
|
from co3.databases.vss import VSSDatabase
|
BIN
co3/databases/__pycache__/__init__.cpython-311.pyc
Normal file
BIN
co3/databases/__pycache__/__init__.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/databases/__pycache__/__init__.cpython-312.pyc
Normal file
BIN
co3/databases/__pycache__/__init__.cpython-312.pyc
Normal file
Binary file not shown.
BIN
co3/databases/__pycache__/_base.cpython-311.pyc
Normal file
BIN
co3/databases/__pycache__/_base.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/databases/__pycache__/core.cpython-311.pyc
Normal file
BIN
co3/databases/__pycache__/core.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/databases/__pycache__/fts.cpython-311.pyc
Normal file
BIN
co3/databases/__pycache__/fts.cpython-311.pyc
Normal file
Binary file not shown.
BIN
co3/databases/__pycache__/fts.cpython-312.pyc
Normal file
BIN
co3/databases/__pycache__/fts.cpython-312.pyc
Normal file
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user