clean up repo files, README, auxiliary files (pre-BFG)
This commit is contained in:
parent
1bc7752b06
commit
c2e09b2c10
6
.gitignore
vendored
6
.gitignore
vendored
@ -1,7 +1,7 @@
|
|||||||
# generic py
|
# generic py
|
||||||
__pycache__/
|
__pycache__/
|
||||||
.pytest_cache/
|
.pytest_cache/
|
||||||
localsys.egg-info/
|
*.egg-info/
|
||||||
.ipynb_checkpoints/
|
.ipynb_checkpoints/
|
||||||
.pytest_cache/
|
.pytest_cache/
|
||||||
.python-version
|
.python-version
|
||||||
@ -12,3 +12,7 @@ build/
|
|||||||
docs/_autoref/
|
docs/_autoref/
|
||||||
docs/_autosummary/
|
docs/_autosummary/
|
||||||
docs/_build/
|
docs/_build/
|
||||||
|
|
||||||
|
# local
|
||||||
|
notebooks/
|
||||||
|
/Makefile
|
||||||
|
22
LICENSE
Normal file
22
LICENSE
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2024 Sam Griesemer
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
|
|
60
README.md
60
README.md
@ -1,35 +1,35 @@
|
|||||||
# Overview
|
# Overview
|
||||||
`co3` is a package for file conversion and associated database operations. The `CO3` base class
|
`co3` is a lightweight Python ORM for hierarchical storage management. It implements a
|
||||||
provides a standard interface for performing conversions, preparing inserts, and
|
general type system for defining database components like relations, schemas, engines,
|
||||||
interacting with database schemas that mirror the class hierarchy.
|
etc. Objects inheriting from the `CO3` base class can then define data transformations
|
||||||
|
that connect to database components, and can be automatically collected for coordinated
|
||||||
|
database insertion.
|
||||||
|
|
||||||
Simplified description of the operational model:
|
`co3` attempts to provide a general interface for interacting with a storage media (e.g.,
|
||||||
|
database, pickled objects, VSS framework, in-memory key-value stores, etc). The following
|
||||||
|
top-level classes capture the bulk of the operational model:
|
||||||
|
|
||||||
**Goal**: interact with a storage medium (database, pickled structure, VSS framework) with
|
- **Database**: reference to a storage medium, with an `Accessor` for accessing data,
|
||||||
a known schema.
|
`Manager` for managing database state, and an `Engine` for managing connections and
|
||||||
|
external operations.
|
||||||
|
- **Accessor**: provides access to stored items in a `Database`, typically via a supported
|
||||||
|
`select` operation over known `Component` types
|
||||||
|
- **Manager**: manages database storage state (e.g., supported inserts or database sync
|
||||||
|
operations)
|
||||||
|
- **Mapper**: associates `CO3` types with `Schema` components, and provides automatic
|
||||||
|
collection and composition operations for supported items
|
||||||
|
- **Collector**: collects data from defined `CO3` type transformations and prepares for
|
||||||
|
`Database` insert operations
|
||||||
|
- **Component**: atomic storage groups for databases (i.e., generalized notion of a
|
||||||
|
"relation" in relational algebra).
|
||||||
|
- **Indexer**: automatic caching of supported access queries to a `Database`
|
||||||
|
- **Schema**: general schema analog for grouping related `Component` sets
|
||||||
|
- **Differ**: facilitates set operations on results from selectable resources (e.g.,
|
||||||
|
automatic comparison between file data on disk and file rows in a SQL database)
|
||||||
|
- **Syncer**: generalized syncing procedure for items between data resources (e.g.,
|
||||||
|
syncing new, modified, and deleted files from disk to a SQL database that stores file
|
||||||
|
metadata).
|
||||||
|
|
||||||
- **Accessor** to provide access to stored items
|
The **CO3** an abstract base class then makes it easy to integrate this model with regular
|
||||||
- **Composer** to compose common access points (e.g., JOINed tables)
|
Python object hierarchies that can be mapped to a storage schema.
|
||||||
- **Indexer** to index/cache access queries
|
|
||||||
- **Manager** to manage storage state (e.g., supported inserts, database syncs)
|
|
||||||
- **Collector** to collect data for updating storage state
|
|
||||||
- **Database** to collect data for updating storage state
|
|
||||||
- **Mapper** to collect data for updating storage state
|
|
||||||
- **Component** to collect data for updating storage state
|
|
||||||
|
|
||||||
**CO3** is an abstract base class that makes it easy to integrate this model with object
|
|
||||||
hierarchies that mirror a storage schema.
|
|
||||||
|
|
||||||
# Detailed structural breakdown
|
|
||||||
There are a few pillars of the CO3 model that meaningfully group up functionality:
|
|
||||||
|
|
||||||
- Database: generic to a Component type, provides basic connection to a database at a
|
|
||||||
specific address/location. The explicit Component type makes it easy to hook into
|
|
||||||
appropriately typed functional objects:
|
|
||||||
* Manager: generic to a Component and Database type, provides a supported set of
|
|
||||||
state-modifying operations to a constituent database
|
|
||||||
* Accessor: generic to a Component and Database type, provides a supported set of
|
|
||||||
state inspection operations on a constituent database
|
|
||||||
* Indexer:
|
|
||||||
- Mapper: generic to a Component, serves as the fundamental connective component between
|
|
||||||
types in the data representation hierarchy (CO3 subclasses) and database Components.
|
|
||||||
|
@ -11,6 +11,8 @@ from co3.accessor import Accessor
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
class VSSAccessor(Accessor):
|
class VSSAccessor(Accessor):
|
||||||
|
_model_cls = None
|
||||||
|
|
||||||
def __init__(self, cache_path):
|
def __init__(self, cache_path):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
@ -35,8 +37,7 @@ class VSSAccessor(Accessor):
|
|||||||
@property
|
@property
|
||||||
def model(self):
|
def model(self):
|
||||||
if self._model is None:
|
if self._model is None:
|
||||||
# model trained with 128 token seqs
|
self._model = self._model_cls()
|
||||||
self._model = SentenceTransformer('sentence-transformers/all-MiniLM-L12-v2')
|
|
||||||
return self._model
|
return self._model
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@ -60,12 +61,8 @@ class VSSAccessor(Accessor):
|
|||||||
index_name : str,
|
index_name : str,
|
||||||
query : str,
|
query : str,
|
||||||
limit : int = 10,
|
limit : int = 10,
|
||||||
score_threshold = 0.5,
|
score_threshold = 0.5,
|
||||||
):
|
):
|
||||||
'''
|
|
||||||
Parameters:
|
|
||||||
index_name: one of ['chunks','blocks','notes']
|
|
||||||
'''
|
|
||||||
if not query:
|
if not query:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
20
docs/Makefile
Normal file
20
docs/Makefile
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
# Minimal makefile for Sphinx documentation
|
||||||
|
#
|
||||||
|
|
||||||
|
# You can set these variables from the command line, and also
|
||||||
|
# from the environment for the first two.
|
||||||
|
SPHINXOPTS ?=
|
||||||
|
SPHINXBUILD ?= sphinx-build
|
||||||
|
SOURCEDIR = .
|
||||||
|
BUILDDIR = _build
|
||||||
|
|
||||||
|
# Put it first so that "make" without argument is like "make help".
|
||||||
|
help:
|
||||||
|
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||||
|
|
||||||
|
.PHONY: help Makefile
|
||||||
|
|
||||||
|
# Catch-all target: route all unknown targets to Sphinx using the new
|
||||||
|
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
|
||||||
|
%: Makefile
|
||||||
|
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
@ -1,27 +1,49 @@
|
|||||||
[build-system]
|
[build-system]
|
||||||
requires = ["setuptools"]
|
requires = ["setuptools", "wheel", "setuptools-git-versioning>=2.0,<3"]
|
||||||
build-backend = "setuptools.build_meta"
|
build-backend = "setuptools.build_meta"
|
||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "co3"
|
name = "co3"
|
||||||
version = "0.1.1"
|
description = "Lightweight Python ORM for hierarchical storage management"
|
||||||
authors = [
|
|
||||||
{ name="Sam Griesemer", email="samgriesemer@gmail.com" },
|
|
||||||
]
|
|
||||||
description = "Lightweight ORM"
|
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.11"
|
requires-python = ">=3.12"
|
||||||
|
dynamic = ["version"]
|
||||||
|
#license = {file = "LICENSE"}
|
||||||
|
authors = [
|
||||||
|
{ name="Sam Griesemer", email="samgriesemer+git@gmail.com" },
|
||||||
|
]
|
||||||
|
keywords = ["database", "orm"]
|
||||||
classifiers = [
|
classifiers = [
|
||||||
"Programming Language :: Python :: 3",
|
"Programming Language :: Python :: 3.12",
|
||||||
"License :: OSI Approved :: MIT License",
|
"License :: OSI Approved :: MIT License",
|
||||||
"Operating System :: OS Independent",
|
"Operating System :: OS Independent",
|
||||||
|
"Development Status :: 3 - Alpha",
|
||||||
|
"Intended Audience :: Developers",
|
||||||
]
|
]
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"tqdm"
|
"tqdm",
|
||||||
|
"wcmatch",
|
||||||
|
"numpy",
|
||||||
|
"sqlalchemy",
|
||||||
|
"colorama",
|
||||||
]
|
]
|
||||||
|
|
||||||
[tool.setuptools.packages.find]
|
[project.optional-dependencies]
|
||||||
#where = ["localsys"] # this is too deeply nested, need to remain at root level & use
|
tests = ["pytest"]
|
||||||
#include; by default, `where` is `.`
|
docs = [
|
||||||
include = ["co3*"] # pattern to match package names
|
"sphinx",
|
||||||
|
"sphinx-togglebutton",
|
||||||
|
"sphinx-autodoc-typehints",
|
||||||
|
"furo",
|
||||||
|
"myst-parser",
|
||||||
|
]
|
||||||
|
|
||||||
|
[project.urls]
|
||||||
|
Homepage = "https://doc.olog.io/co3"
|
||||||
|
Documentation = "https://doc.olog.io/co3"
|
||||||
|
Repository = "https://git.olog.io/olog/co3"
|
||||||
|
Issues = "https://git.olog.io/olog/co3/issues"
|
||||||
|
|
||||||
|
|
||||||
|
[tool.setuptools.packages.find]
|
||||||
|
include = ["co3*"] # pattern to match package names
|
||||||
|
Loading…
Reference in New Issue
Block a user