clean up repo files, README, auxiliary files (pre-BFG)
This commit is contained in:
parent
1bc7752b06
commit
c2e09b2c10
6
.gitignore
vendored
6
.gitignore
vendored
@ -1,7 +1,7 @@
|
||||
# generic py
|
||||
__pycache__/
|
||||
.pytest_cache/
|
||||
localsys.egg-info/
|
||||
*.egg-info/
|
||||
.ipynb_checkpoints/
|
||||
.pytest_cache/
|
||||
.python-version
|
||||
@ -12,3 +12,7 @@ build/
|
||||
docs/_autoref/
|
||||
docs/_autosummary/
|
||||
docs/_build/
|
||||
|
||||
# local
|
||||
notebooks/
|
||||
/Makefile
|
||||
|
22
LICENSE
Normal file
22
LICENSE
Normal file
@ -0,0 +1,22 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2024 Sam Griesemer
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
60
README.md
60
README.md
@ -1,35 +1,35 @@
|
||||
# Overview
|
||||
`co3` is a package for file conversion and associated database operations. The `CO3` base class
|
||||
provides a standard interface for performing conversions, preparing inserts, and
|
||||
interacting with database schemas that mirror the class hierarchy.
|
||||
`co3` is a lightweight Python ORM for hierarchical storage management. It implements a
|
||||
general type system for defining database components like relations, schemas, engines,
|
||||
etc. Objects inheriting from the `CO3` base class can then define data transformations
|
||||
that connect to database components, and can be automatically collected for coordinated
|
||||
database insertion.
|
||||
|
||||
Simplified description of the operational model:
|
||||
`co3` attempts to provide a general interface for interacting with a storage media (e.g.,
|
||||
database, pickled objects, VSS framework, in-memory key-value stores, etc). The following
|
||||
top-level classes capture the bulk of the operational model:
|
||||
|
||||
**Goal**: interact with a storage medium (database, pickled structure, VSS framework) with
|
||||
a known schema.
|
||||
- **Database**: reference to a storage medium, with an `Accessor` for accessing data,
|
||||
`Manager` for managing database state, and an `Engine` for managing connections and
|
||||
external operations.
|
||||
- **Accessor**: provides access to stored items in a `Database`, typically via a supported
|
||||
`select` operation over known `Component` types
|
||||
- **Manager**: manages database storage state (e.g., supported inserts or database sync
|
||||
operations)
|
||||
- **Mapper**: associates `CO3` types with `Schema` components, and provides automatic
|
||||
collection and composition operations for supported items
|
||||
- **Collector**: collects data from defined `CO3` type transformations and prepares for
|
||||
`Database` insert operations
|
||||
- **Component**: atomic storage groups for databases (i.e., generalized notion of a
|
||||
"relation" in relational algebra).
|
||||
- **Indexer**: automatic caching of supported access queries to a `Database`
|
||||
- **Schema**: general schema analog for grouping related `Component` sets
|
||||
- **Differ**: facilitates set operations on results from selectable resources (e.g.,
|
||||
automatic comparison between file data on disk and file rows in a SQL database)
|
||||
- **Syncer**: generalized syncing procedure for items between data resources (e.g.,
|
||||
syncing new, modified, and deleted files from disk to a SQL database that stores file
|
||||
metadata).
|
||||
|
||||
- **Accessor** to provide access to stored items
|
||||
- **Composer** to compose common access points (e.g., JOINed tables)
|
||||
- **Indexer** to index/cache access queries
|
||||
- **Manager** to manage storage state (e.g., supported inserts, database syncs)
|
||||
- **Collector** to collect data for updating storage state
|
||||
- **Database** to collect data for updating storage state
|
||||
- **Mapper** to collect data for updating storage state
|
||||
- **Component** to collect data for updating storage state
|
||||
The **CO3** an abstract base class then makes it easy to integrate this model with regular
|
||||
Python object hierarchies that can be mapped to a storage schema.
|
||||
|
||||
**CO3** is an abstract base class that makes it easy to integrate this model with object
|
||||
hierarchies that mirror a storage schema.
|
||||
|
||||
# Detailed structural breakdown
|
||||
There are a few pillars of the CO3 model that meaningfully group up functionality:
|
||||
|
||||
- Database: generic to a Component type, provides basic connection to a database at a
|
||||
specific address/location. The explicit Component type makes it easy to hook into
|
||||
appropriately typed functional objects:
|
||||
* Manager: generic to a Component and Database type, provides a supported set of
|
||||
state-modifying operations to a constituent database
|
||||
* Accessor: generic to a Component and Database type, provides a supported set of
|
||||
state inspection operations on a constituent database
|
||||
* Indexer:
|
||||
- Mapper: generic to a Component, serves as the fundamental connective component between
|
||||
types in the data representation hierarchy (CO3 subclasses) and database Components.
|
||||
|
@ -11,6 +11,8 @@ from co3.accessor import Accessor
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class VSSAccessor(Accessor):
|
||||
_model_cls = None
|
||||
|
||||
def __init__(self, cache_path):
|
||||
super().__init__()
|
||||
|
||||
@ -35,8 +37,7 @@ class VSSAccessor(Accessor):
|
||||
@property
|
||||
def model(self):
|
||||
if self._model is None:
|
||||
# model trained with 128 token seqs
|
||||
self._model = SentenceTransformer('sentence-transformers/all-MiniLM-L12-v2')
|
||||
self._model = self._model_cls()
|
||||
return self._model
|
||||
|
||||
@property
|
||||
@ -60,12 +61,8 @@ class VSSAccessor(Accessor):
|
||||
index_name : str,
|
||||
query : str,
|
||||
limit : int = 10,
|
||||
score_threshold = 0.5,
|
||||
score_threshold = 0.5,
|
||||
):
|
||||
'''
|
||||
Parameters:
|
||||
index_name: one of ['chunks','blocks','notes']
|
||||
'''
|
||||
if not query:
|
||||
return None
|
||||
|
||||
|
20
docs/Makefile
Normal file
20
docs/Makefile
Normal file
@ -0,0 +1,20 @@
|
||||
# Minimal makefile for Sphinx documentation
|
||||
#
|
||||
|
||||
# You can set these variables from the command line, and also
|
||||
# from the environment for the first two.
|
||||
SPHINXOPTS ?=
|
||||
SPHINXBUILD ?= sphinx-build
|
||||
SOURCEDIR = .
|
||||
BUILDDIR = _build
|
||||
|
||||
# Put it first so that "make" without argument is like "make help".
|
||||
help:
|
||||
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||
|
||||
.PHONY: help Makefile
|
||||
|
||||
# Catch-all target: route all unknown targets to Sphinx using the new
|
||||
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
|
||||
%: Makefile
|
||||
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
@ -1,27 +1,49 @@
|
||||
[build-system]
|
||||
requires = ["setuptools"]
|
||||
requires = ["setuptools", "wheel", "setuptools-git-versioning>=2.0,<3"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "co3"
|
||||
version = "0.1.1"
|
||||
authors = [
|
||||
{ name="Sam Griesemer", email="samgriesemer@gmail.com" },
|
||||
]
|
||||
description = "Lightweight ORM"
|
||||
description = "Lightweight Python ORM for hierarchical storage management"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.11"
|
||||
requires-python = ">=3.12"
|
||||
dynamic = ["version"]
|
||||
#license = {file = "LICENSE"}
|
||||
authors = [
|
||||
{ name="Sam Griesemer", email="samgriesemer+git@gmail.com" },
|
||||
]
|
||||
keywords = ["database", "orm"]
|
||||
classifiers = [
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Operating System :: OS Independent",
|
||||
"Development Status :: 3 - Alpha",
|
||||
"Intended Audience :: Developers",
|
||||
]
|
||||
dependencies = [
|
||||
"tqdm"
|
||||
"tqdm",
|
||||
"wcmatch",
|
||||
"numpy",
|
||||
"sqlalchemy",
|
||||
"colorama",
|
||||
]
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
#where = ["localsys"] # this is too deeply nested, need to remain at root level & use
|
||||
#include; by default, `where` is `.`
|
||||
include = ["co3*"] # pattern to match package names
|
||||
[project.optional-dependencies]
|
||||
tests = ["pytest"]
|
||||
docs = [
|
||||
"sphinx",
|
||||
"sphinx-togglebutton",
|
||||
"sphinx-autodoc-typehints",
|
||||
"furo",
|
||||
"myst-parser",
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
Homepage = "https://doc.olog.io/co3"
|
||||
Documentation = "https://doc.olog.io/co3"
|
||||
Repository = "https://git.olog.io/olog/co3"
|
||||
Issues = "https://git.olog.io/olog/co3/issues"
|
||||
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
include = ["co3*"] # pattern to match package names
|
||||
|
Loading…
Reference in New Issue
Block a user