diff --git a/.gitignore b/.gitignore index d8bede4..2511f64 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,7 @@ # generic py __pycache__/ .pytest_cache/ -localsys.egg-info/ +*.egg-info/ .ipynb_checkpoints/ .pytest_cache/ .python-version @@ -12,3 +12,7 @@ build/ docs/_autoref/ docs/_autosummary/ docs/_build/ + +# local +notebooks/ +/Makefile diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..93f98c5 --- /dev/null +++ b/LICENSE @@ -0,0 +1,22 @@ +MIT License + +Copyright (c) 2024 Sam Griesemer + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/Makefile b/Makefile deleted file mode 100644 index 8c104bc..0000000 --- a/Makefile +++ /dev/null @@ -1,21 +0,0 @@ -PYTHON=/home/smgr/.pyenv/versions/co4/bin/python -BASH=/usr/bin/bash - - -## ------------------ docs ------------------ ## -docs-build: - sphinx-apidoc --module-first --separate -o docs/_autoref/ co3 - make -C docs/ html - -docs-serve: - cd docs/_build/html && python -m http.server 9090 - -docs-clean: - make -C docs/ clean - rm -rf docs/_autoref - rm -rf docs/_autosummary -## ------------------------------------------ ## - -## ----------------- tests ------------------ ## -test: - pytest --pyargs tests -v diff --git a/README.md b/README.md index 45c7486..480168f 100644 --- a/README.md +++ b/README.md @@ -1,35 +1,35 @@ # Overview -`co3` is a package for file conversion and associated database operations. The `CO3` base class -provides a standard interface for performing conversions, preparing inserts, and -interacting with database schemas that mirror the class hierarchy. +`co3` is a lightweight Python ORM for hierarchical storage management. It implements a +general type system for defining database components like relations, schemas, engines, +etc. Objects inheriting from the `CO3` base class can then define data transformations +that connect to database components, and can be automatically collected for coordinated +database insertion. -Simplified description of the operational model: +`co3` attempts to provide a general interface for interacting with a storage media (e.g., +database, pickled objects, VSS framework, in-memory key-value stores, etc). The following +top-level classes capture the bulk of the operational model: -**Goal**: interact with a storage medium (database, pickled structure, VSS framework) with -a known schema. +- **Database**: reference to a storage medium, with an `Accessor` for accessing data, + `Manager` for managing database state, and an `Engine` for managing connections and + external operations. +- **Accessor**: provides access to stored items in a `Database`, typically via a supported + `select` operation over known `Component` types +- **Manager**: manages database storage state (e.g., supported inserts or database sync + operations) +- **Mapper**: associates `CO3` types with `Schema` components, and provides automatic + collection and composition operations for supported items +- **Collector**: collects data from defined `CO3` type transformations and prepares for + `Database` insert operations +- **Component**: atomic storage groups for databases (i.e., generalized notion of a + "relation" in relational algebra). +- **Indexer**: automatic caching of supported access queries to a `Database` +- **Schema**: general schema analog for grouping related `Component` sets +- **Differ**: facilitates set operations on results from selectable resources (e.g., + automatic comparison between file data on disk and file rows in a SQL database) +- **Syncer**: generalized syncing procedure for items between data resources (e.g., + syncing new, modified, and deleted files from disk to a SQL database that stores file + metadata). -- **Accessor** to provide access to stored items -- **Composer** to compose common access points (e.g., JOINed tables) -- **Indexer** to index/cache access queries -- **Manager** to manage storage state (e.g., supported inserts, database syncs) -- **Collector** to collect data for updating storage state -- **Database** to collect data for updating storage state -- **Mapper** to collect data for updating storage state -- **Component** to collect data for updating storage state +The **CO3** an abstract base class then makes it easy to integrate this model with regular +Python object hierarchies that can be mapped to a storage schema. -**CO3** is an abstract base class that makes it easy to integrate this model with object -hierarchies that mirror a storage schema. - -# Detailed structural breakdown -There are a few pillars of the CO3 model that meaningfully group up functionality: - -- Database: generic to a Component type, provides basic connection to a database at a - specific address/location. The explicit Component type makes it easy to hook into - appropriately typed functional objects: - * Manager: generic to a Component and Database type, provides a supported set of - state-modifying operations to a constituent database - * Accessor: generic to a Component and Database type, provides a supported set of - state inspection operations on a constituent database - * Indexer: -- Mapper: generic to a Component, serves as the fundamental connective component between - types in the data representation hierarchy (CO3 subclasses) and database Components. diff --git a/co3.egg-info/PKG-INFO b/co3.egg-info/PKG-INFO deleted file mode 100644 index 6d2bd8f..0000000 --- a/co3.egg-info/PKG-INFO +++ /dev/null @@ -1,47 +0,0 @@ -Metadata-Version: 2.1 -Name: co3 -Version: 0.1.1 -Summary: Lightweight ORM -Author-email: Sam Griesemer -Classifier: Programming Language :: Python :: 3 -Classifier: License :: OSI Approved :: MIT License -Classifier: Operating System :: OS Independent -Requires-Python: >=3.11 -Description-Content-Type: text/markdown -Requires-Dist: tqdm - -# Overview -`co3` is a package for file conversion and associated database operations. The `CO3` base class -provides a standard interface for performing conversions, preparing inserts, and -interacting with database schemas that mirror the class hierarchy. - -Simplified description of the operational model: - -**Goal**: interact with a storage medium (database, pickled structure, VSS framework) with -a known schema. - -- **Accessor** to provide access to stored items -- **Composer** to compose common access points (e.g., JOINed tables) -- **Indexer** to index/cache access queries -- **Manager** to manage storage state (e.g., supported inserts, database syncs) -- **Collector** to collect data for updating storage state -- **Database** to collect data for updating storage state -- **Mapper** to collect data for updating storage state -- **Component** to collect data for updating storage state - -**CO3** is an abstract base class that makes it easy to integrate this model with object -hierarchies that mirror a storage schema. - -# Detailed structural breakdown -There are a few pillars of the CO3 model that meaningfully group up functionality: - -- Database: generic to a Component type, provides basic connection to a database at a - specific address/location. The explicit Component type makes it easy to hook into - appropriately typed functional objects: - * Manager: generic to a Component and Database type, provides a supported set of - state-modifying operations to a constituent database - * Accessor: generic to a Component and Database type, provides a supported set of - state inspection operations on a constituent database - * Indexer: -- Mapper: generic to a Component, serves as the fundamental connective component between - types in the data representation hierarchy (CO3 subclasses) and database Components. diff --git a/co3.egg-info/SOURCES.txt b/co3.egg-info/SOURCES.txt deleted file mode 100644 index 504b8a7..0000000 --- a/co3.egg-info/SOURCES.txt +++ /dev/null @@ -1,52 +0,0 @@ -MANIFEST.in -README.md -pyproject.toml -co3/__init__.py -co3/accessor.py -co3/co3.py -co3/collector.py -co3/component.py -co3/database.py -co3/differ.py -co3/engine.py -co3/indexer.py -co3/manager.py -co3/mapper.py -co3/resource.py -co3/schema.py -co3/syncer.py -co3.egg-info/PKG-INFO -co3.egg-info/SOURCES.txt -co3.egg-info/dependency_links.txt -co3.egg-info/requires.txt -co3.egg-info/top_level.txt -co3/accessors/__init__.py -co3/accessors/fts.py -co3/accessors/sql.py -co3/accessors/vss.py -co3/components/__init__.py -co3/databases/__init__.py -co3/databases/fts.py -co3/databases/sql.py -co3/databases/vss.py -co3/engines/__init__.py -co3/indexers/relational.py -co3/managers/__init__.py -co3/managers/fts.py -co3/managers/sql.py -co3/managers/vss.py -co3/mappers/__init__.py -co3/resources/__init__.py -co3/resources/disk.py -co3/schemas/__init__.py -co3/util/__init__.py -co3/util/db.py -co3/util/generic.py -co3/util/paths.py -co3/util/regex.py -co3/util/types.py -tests/test_co3.py -tests/test_database.py -tests/test_imports.py -tests/test_mapper.py -tests/test_schema.py \ No newline at end of file diff --git a/co3.egg-info/dependency_links.txt b/co3.egg-info/dependency_links.txt deleted file mode 100644 index 8b13789..0000000 --- a/co3.egg-info/dependency_links.txt +++ /dev/null @@ -1 +0,0 @@ - diff --git a/co3.egg-info/requires.txt b/co3.egg-info/requires.txt deleted file mode 100644 index 78620c4..0000000 --- a/co3.egg-info/requires.txt +++ /dev/null @@ -1 +0,0 @@ -tqdm diff --git a/co3.egg-info/top_level.txt b/co3.egg-info/top_level.txt deleted file mode 100644 index 7d71d10..0000000 --- a/co3.egg-info/top_level.txt +++ /dev/null @@ -1 +0,0 @@ -co3 diff --git a/co3/accessors/vss.py b/co3/accessors/vss.py index 6b49882..e80db7b 100644 --- a/co3/accessors/vss.py +++ b/co3/accessors/vss.py @@ -11,6 +11,8 @@ from co3.accessor import Accessor logger = logging.getLogger(__name__) class VSSAccessor(Accessor): + _model_cls = None + def __init__(self, cache_path): super().__init__() @@ -35,8 +37,7 @@ class VSSAccessor(Accessor): @property def model(self): if self._model is None: - # model trained with 128 token seqs - self._model = SentenceTransformer('sentence-transformers/all-MiniLM-L12-v2') + self._model = self._model_cls() return self._model @property @@ -60,12 +61,8 @@ class VSSAccessor(Accessor): index_name : str, query : str, limit : int = 10, - score_threshold = 0.5, + score_threshold = 0.5, ): - ''' - Parameters: - index_name: one of ['chunks','blocks','notes'] - ''' if not query: return None diff --git a/notebooks/database.ipynb b/notebooks/database.ipynb deleted file mode 100644 index 8ed8b56..0000000 --- a/notebooks/database.ipynb +++ /dev/null @@ -1,429 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "6f6fbc7e-4fb9-4353-b2ee-9ea819a3c896", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/smgr/.pyenv/versions/co4/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n" - ] - } - ], - "source": [ - "import vegetables" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "88fd0ea8-9c94-4569-a51b-823a04f32f55", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'age': 5}" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "tomato = vegetables.Tomato('t1', 5)\n", - "\n", - "# test a register collation action\n", - "tomato.collate('ripe')" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "348926d9-7137-4eff-a919-508788553dd2", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['9ca6772e-6621-4511-a4a6-ad451a1da91f',\n", - " '2a91b423-4e08-491c-b1d2-5ec25259191e',\n", - " '4a9edb2b-4ac5-467e-82ef-b254829ac2a2']" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "vegetables.vegetable_mapper.collect(tomato, ['ripe'])" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "4e5e7319-11bf-4051-951b-08c84e9f3874", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - " vegetable+tomato+tomato_aging_states+tomato_cooking_states" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "vegetables.vegetable_mapper.compose(tomato, action_groups=['aging', 'cooking'])" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "aa290686-8074-4038-a3cc-ce6817844653", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[Column('id', Integer(), table=, primary_key=True, nullable=False),\n", - " Column('name', String(), table=),\n", - " Column('color', String(), table=),\n", - " Column('id', Integer(), table=, primary_key=True, nullable=False),\n", - " Column('name', String(), ForeignKey('vegetable.name'), table=),\n", - " Column('radius', Integer(), table=),\n", - " Column('id', Integer(), table=, primary_key=True, nullable=False),\n", - " Column('name', String(), ForeignKey('tomato.name'), table=),\n", - " Column('state', String(), table=),\n", - " Column('age', Integer(), table=)]" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "list(vegetables.vegetable_mapper.compose(tomato, action_groups=['aging']).obj.columns)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "f3c7e37d-ba9e-4bae-ae44-adc922bf5f4c", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(vegetables.Tomato, vegetables.Vegetable, co3.co3.CO3, object)" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "tomato.__class__.__mro__" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "c21d2c54-39e2-4de3-93bc-763896ed348e", - "metadata": {}, - "outputs": [], - "source": [ - "from co3.databases import SQLDatabase\n", - "\n", - "db = SQLDatabase('sqlite://') #, echo=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "a785d202-99d3-4ae7-859e-ee22b481f8df", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "db.recreate(vegetables.vegetable_schema)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "cda01cb0-1666-4cb1-aa64-bcdca871aff5", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{ vegetable: [{'name': 't1', 'color': 'red'}],\n", - " tomato: [{'name': 't1', 'radius': 5}],\n", - " tomato_aging_states: [{'name': 't1',\n", - " 'state': 'ripe',\n", - " 'age': 2}]}" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "vegetables.vegetable_mapper.collector.inserts" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "af7124ed-3031-4f28-89a6-553eb5b3cc7a", - "metadata": {}, - "outputs": [], - "source": [ - "with db.engine.connect() as connection:\n", - " db.manager.insert_many(\n", - " connection,\n", - " vegetables.vegetable_mapper.collector.inserts,\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "0149e14e-5d07-42af-847d-af5c190f8946", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[{'id': 1, 'name': 't1', 'radius': 5}]\n" - ] - } - ], - "source": [ - "with db.engine.connect() as connection:\n", - " print(db.accessor.select(\n", - " connection, \n", - " vegetables.vegetable_schema.get_component('tomato')\n", - " ))" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "id": "668d1b8c-b47f-4a58-914d-e43402443fe6", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[{'id': 1, 'name': 't1', 'color': 'red', 'id_1': 1, 'name_1': 't1', 'radius': 5}]\n" - ] - } - ], - "source": [ - "agg_table = vegetables.vegetable_mapper.compose(tomato)\n", - "\n", - "with db.engine.connect() as connection:\n", - " agg_res = db.accessor.select(\n", - " connection, \n", - " agg_table,\n", - " mappings=True,\n", - " )\n", - "\n", - "print(agg_res)" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "id": "a051d72d-a867-46dc-bb5e-69341f39a056", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[{'id': 1, 'name': 't1', 'color': 'red', 'id_1': 1, 'name_1': 't1', 'radius': 5, 'id_2': 1, 'name_2': 't1', 'state': 'ripe', 'age': 2}]\n" - ] - } - ], - "source": [ - "agg_table = vegetables.vegetable_mapper.compose(tomato, action_groups=['aging'])#, outer=True)\n", - "\n", - "with db.engine.connect() as connection:\n", - " agg_res = db.accessor.select(\n", - " connection, \n", - " agg_table,\n", - " mappings=True,\n", - " )\n", - "\n", - "print(agg_res)" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "id": "6a80cfd7-3175-4526-96e0-374765d64a27", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "sqlalchemy.engine.row.RowMapping" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "type(agg_res[0])" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "7cf05ddd-2328-4051-9cf8-4ac01352405e", - "metadata": {}, - "outputs": [], - "source": [ - "import sqlalchemy as sa\n", - "from co3.engines import SQLEngine\n", - "\n", - "a = SQLEngine.execute(db.engine.connect(), sa.select(agg_table.obj))" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "c1edf68e-1fde-4a1f-8ec3-084713a8da45", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[]" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "a.mappings().all()\n" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "id": "8b8a9e47-7f5f-4828-a99e-5d9a12697f46", - "metadata": {}, - "outputs": [], - "source": [ - "tomato2 = vegetables.Tomato('t2', 8)" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "id": "062aa4de-7aea-4fd3-b5db-82af147d023e", - "metadata": {}, - "outputs": [ - { - "ename": "AttributeError", - "evalue": "'Tomato' object has no attribute 'action_map'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[38], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mvegetables\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvegetable_mapper\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcollect\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtomato2\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/Documents/projects/ontolog/co3/build/__editable__.co3-0.1.1-py3-none-any/co3/mapper.py:198\u001b[0m, in \u001b[0;36mMapper.collect\u001b[0;34m(self, obj, action_keys, action_groups)\u001b[0m\n\u001b[1;32m 179\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m'''\u001b[39;00m\n\u001b[1;32m 180\u001b[0m \u001b[38;5;124;03mStages inserts up the inheritance chain, and down through components.\u001b[39;00m\n\u001b[1;32m 181\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 195\u001b[0m \u001b[38;5;124;03mReturns: dict with keys and values relevant for associated SQLite tables\u001b[39;00m\n\u001b[1;32m 196\u001b[0m \u001b[38;5;124;03m'''\u001b[39;00m\n\u001b[1;32m 197\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m action_keys \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 198\u001b[0m action_keys \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(\u001b[43mobj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43maction_map\u001b[49m\u001b[38;5;241m.\u001b[39mkeys())\n\u001b[1;32m 200\u001b[0m receipts \u001b[38;5;241m=\u001b[39m []\n\u001b[1;32m 201\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m _cls \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mreversed\u001b[39m(obj\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__mro__\u001b[39m[:\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m2\u001b[39m]):\n", - "\u001b[0;31mAttributeError\u001b[0m: 'Tomato' object has no attribute 'action_map'" - ] - } - ], - "source": [ - "vegetables.vegetable_mapper.collect(tomato2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4673ddc8-3f76-4d8c-8186-bbed4a682e0d", - "metadata": {}, - "outputs": [], - "source": [ - "db.insert(vegetables.vegetable_schema.get_component('tomato'), " - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "id": "9314be4e-c1d5-4af8-ad23-0b208d24b3eb", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[{'id': 1, 'name': 't1', 'radius': 5}]" - ] - }, - "execution_count": 36, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "db.select(vegetables.vegetable_schema.get_component('tomato'))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a2efd060-f298-4ca6-8a58-7ed5acf1dd15", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "co3", - "language": "python", - "name": "co3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.2" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/mapper.ipynb b/notebooks/mapper.ipynb deleted file mode 100644 index 3aac3d4..0000000 --- a/notebooks/mapper.ipynb +++ /dev/null @@ -1,403 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "e02ccafe-e04d-4312-acba-e41cf7b1c021", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/smgr/.pyenv/versions/co4/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n" - ] - } - ], - "source": [ - "import vegetables" - ] - }, - { - "cell_type": "markdown", - "id": "c0914069-7f3c-4213-8d34-f7566033e054", - "metadata": {}, - "source": [ - "## Development notes\n", - "- No registry actually needs to take place if there's a default type2component map or one supplied on creation. Can just collect right out of the gate\n", - "- Need connective function (type to collation) and attribute map. Do we need to this with a subclass? If a func is passed in on init, I can type it appropriately I guess `Callable[[type[CO3],str,str|None],dict]`" - ] - }, - { - "cell_type": "markdown", - "id": "ef733715-bb75-4263-b216-45e778a06b21", - "metadata": {}, - "source": [ - "## Usage\n", - "The Mapper's primary job is to associate class hierarchies with database components. This can be done in a few ways:\n", - "\n", - "1. Manually attaching a type reference to a Component\n", - "2. Attaching a type reference to a Component's name as registered in a schema\n", - "3. Automatically register the CO3 heirarchy to matching schema component names (through transformation)" - ] - }, - { - "cell_type": "markdown", - "id": "d2672422-3596-4eab-ac44-5da617f74b80", - "metadata": {}, - "source": [ - "## Explicit example steps" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "7d80f7b9-7458-4ad4-8c1a-3ea56e796b4e", - "metadata": {}, - "outputs": [], - "source": [ - "from co3 import Mapper\n", - "\n", - "vegetable_mapper = Mapper(\n", - " vegetables.Vegetable,\n", - " vegetables.vegetable_schema\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "d24d31b4-c4a6-4a1e-8bea-c44378aadfdd", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'\\nvegetable_mapper.attach(\\n vegetables.Vegetable,\\n vegetables.vegetable_table,\\n)\\n'" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# not valid; tables need to be wrapped in CO3 Components\n", - "'''\n", - "vegetable_mapper.attach(\n", - " vegetables.Vegetable,\n", - " vegetables.vegetable_table,\n", - ")\n", - "'''" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "f9408562-bf50-4522-909c-318557f85948", - "metadata": {}, - "outputs": [], - "source": [ - "# manually attach component\n", - "vegetable_mapper.attach(\n", - " vegetables.Tomato,\n", - " vegetables.vegetable_schema.get_component('tomato'),\n", - " coll_groups={\n", - " 'aging': vegetables.vegetable_schema.get_component('tomato_aging_states'),\n", - " 'cooking': vegetables.vegetable_schema.get_component('tomato_cooking_states'),\n", - " },\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "05fdd404-87ee-4187-832f-2305272758ae", - "metadata": {}, - "outputs": [], - "source": [ - "# attach by name in schema\n", - "vegetable_mapper.attach(\n", - " vegetables.Tomato,\n", - " 'tomato',\n", - " coll_groups={\n", - " 'aging': 'tomato_aging_states',\n", - " 'cooking': 'tomato_cooking_states',\n", - " },\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "e9b6af49-a69d-41cc-beae-1b6f171cd2f5", - "metadata": {}, - "outputs": [], - "source": [ - "# attach entire type hierarchy w/ type->name map\n", - "vegetable_mapper.attach_hierarchy(\n", - " # this might make more sense during init\n", - " vegetables.Vegetable,\n", - " lambda x:x.__name__.lower()\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "0fb45a86-5c9b-41b1-a3ab-5691444f175e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "vegetable_mapper.get_collation_comp(vegetables.Tomato, 'cooking')" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "2e4336ab-5b5f-484d-815d-164d4b6f40a0", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'co3_root': vegetables.Vegetable,\n", - " 'schema': ,\n", - " 'collector': ,\n", - " 'composer': ,\n", - " 'attribute_comps': {vegetables.Tomato: },\n", - " 'collation_groups': defaultdict(dict,\n", - " {vegetables.Tomato: {'aging': ,\n", - " 'cooking': }})}" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "vars(vegetable_mapper)" - ] - }, - { - "cell_type": "markdown", - "id": "47859e25-b803-4459-a581-f10bbcfac716", - "metadata": {}, - "source": [ - "## Holistic attachment" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "70c9baed-b870-4021-8949-9b713d863de6", - "metadata": {}, - "outputs": [], - "source": [ - "def attr_name_map(cls):\n", - " return f'{cls.__name__.lower()}'\n", - "\n", - "def coll_name_map(cls, action_group):\n", - " return f'{cls.__name__.lower()}_{action_group}_states'\n", - "\n", - "vegetables.vegetable_mapper.attach_many(\n", - " vegetables.type_list,\n", - " attr_name_map,\n", - " coll_name_map,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "c16786d4-0b71-42d9-97f7-7893c542104e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'age': 4}" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# create new CO3 descendant\n", - "tomato = vegetables.Tomato('t1', 5)\n", - "\n", - "# test a register collation action\n", - "tomato.collate('ripe')" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "d7fa94ca-3ecd-4ee3-b0dc-f3b2b65ee47c", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - " tomato" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "vegetables.vegetable_mapper.get_attribute_comp(vegetables.Tomato)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "1adc3bc5-957f-4b5a-bc2c-2d172675826d", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'schema': ,\n", - " 'collector': ,\n", - " 'composer': ,\n", - " 'attribute_comps': {vegetables.Vegetable: vegetable,\n", - " vegetables.Tomato: tomato},\n", - " 'collation_groups': defaultdict(dict,\n", - " {vegetables.Vegetable: {},\n", - " vegetables.Tomato: {'aging': tomato_aging_states,\n", - " 'cooking': tomato_cooking_states}})}" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "vars(vegetables.vegetable_mapper)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "f32d1f65-9b1d-4600-b396-8551fbd1fcf7", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['3bf42abc-8a12-452f-baf6-38a05fc5d420',\n", - " '271b7b84-846e-4d1d-87f6-bcabc90a7b55',\n", - " 'f9fc5d16-c5cb-47a7-9eca-7df8a3ba5d10']" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "vegetables.vegetable_mapper.collect(tomato, ['ripe'])" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "380dfbea-90cc-49fc-aef1-ebb342872632", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "defaultdict(.()>,\n", - " {'3bf42abc-8a12-452f-baf6-38a05fc5d420': ( vegetable,\n", - " {'name': 't1', 'color': 'red'}),\n", - " '271b7b84-846e-4d1d-87f6-bcabc90a7b55': ( tomato,\n", - " {'name': 't1', 'radius': 5}),\n", - " 'f9fc5d16-c5cb-47a7-9eca-7df8a3ba5d10': ( tomato_aging_states,\n", - " {'name': 't1', 'state': 'ripe', 'age': 1})})" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "vegetables.vegetable_mapper.collector._inserts" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "905bb2a9-9c22-4187-be15-3dd32d206e26", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{ vegetable: [{'name': 't1', 'color': 'red'}],\n", - " tomato: [{'name': 't1', 'radius': 5}],\n", - " tomato_aging_states: [{'name': 't1',\n", - " 'state': 'ripe',\n", - " 'age': 1}]}" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "vegetables.vegetable_mapper.collector.inserts" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d166b9af-e3ba-4750-9dcb-d8d4e08fe4d3", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "co3", - "language": "python", - "name": "co3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.2" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/pyproject.toml b/pyproject.toml index c5e7a78..712cef8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,27 +1,49 @@ [build-system] -requires = ["setuptools"] +requires = ["setuptools", "wheel", "setuptools-git-versioning>=2.0,<3"] build-backend = "setuptools.build_meta" [project] name = "co3" -version = "0.1.1" -authors = [ - { name="Sam Griesemer", email="samgriesemer@gmail.com" }, -] -description = "Lightweight ORM" +description = "Lightweight Python ORM for hierarchical storage management" readme = "README.md" -requires-python = ">=3.11" +requires-python = ">=3.12" +dynamic = ["version"] +#license = {file = "LICENSE"} +authors = [ + { name="Sam Griesemer", email="samgriesemer+git@gmail.com" }, +] +keywords = ["database", "orm"] classifiers = [ - "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.12", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", ] dependencies = [ - "tqdm" + "tqdm", + "wcmatch", + "numpy", + "sqlalchemy", + "colorama", ] -[tool.setuptools.packages.find] -#where = ["localsys"] # this is too deeply nested, need to remain at root level & use -#include; by default, `where` is `.` -include = ["co3*"] # pattern to match package names +[project.optional-dependencies] +tests = ["pytest"] +docs = [ + "sphinx", + "sphinx-togglebutton", + "sphinx-autodoc-typehints", + "furo", + "myst-parser", +] +[project.urls] +Homepage = "https://doc.olog.io/co3" +Documentation = "https://doc.olog.io/co3" +Repository = "https://git.olog.io/olog/co3" +Issues = "https://git.olog.io/olog/co3/issues" + + +[tool.setuptools.packages.find] +include = ["co3*"] # pattern to match package names