initial commit
This commit is contained in:
		
						commit
						9218f4a404
					
				
							
								
								
									
										1
									
								
								.python-version
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								.python-version
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1 @@
 | 
			
		||||
co4
 | 
			
		||||
							
								
								
									
										0
									
								
								MANIFEST.in
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								MANIFEST.in
									
									
									
									
									
										Normal file
									
								
							
							
								
								
									
										21
									
								
								README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										21
									
								
								README.md
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,21 @@
 | 
			
		||||
# Overview
 | 
			
		||||
`co3` is a package for file conversion and associated database operations. The `CO3` base class
 | 
			
		||||
provides a standard interface for performing conversions, preparing inserts, and
 | 
			
		||||
interacting with database schemas that mirror the class hierarchy.
 | 
			
		||||
 | 
			
		||||
Simplified description of the operational model:
 | 
			
		||||
 | 
			
		||||
**Goal**: interact with a storage medium (database, pickled structure, VSS framework) with
 | 
			
		||||
a known schema.
 | 
			
		||||
 | 
			
		||||
- **Accessor** to provide access to stored items
 | 
			
		||||
- **Composer** to compose common access points (e.g., JOINed tables)
 | 
			
		||||
- **Indexer** to index/cache access queries
 | 
			
		||||
- **Manager** to manage storage state (e.g., supported inserts, database syncs)
 | 
			
		||||
- **Collector** to collect data for updating storage state
 | 
			
		||||
- **Database** to collect data for updating storage state
 | 
			
		||||
- **Mapper** to collect data for updating storage state
 | 
			
		||||
- **Relation** to collect data for updating storage state
 | 
			
		||||
 | 
			
		||||
**CO3** is an abstract base class that makes it easy to integrate this model with object
 | 
			
		||||
hierarchies that mirror a storage schema.
 | 
			
		||||
							
								
								
									
										1
									
								
								build/__editable__.co3-0.1.1-py3-none-any/co3/__init__.py
									
									
									
									
									
										Symbolic link
									
								
							
							
						
						
									
										1
									
								
								build/__editable__.co3-0.1.1-py3-none-any/co3/__init__.py
									
									
									
									
									
										Symbolic link
									
								
							@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co3/co3/__init__.py
 | 
			
		||||
							
								
								
									
										1
									
								
								build/__editable__.co3-0.1.1-py3-none-any/co3/accessor.py
									
									
									
									
									
										Symbolic link
									
								
							
							
						
						
									
										1
									
								
								build/__editable__.co3-0.1.1-py3-none-any/co3/accessor.py
									
									
									
									
									
										Symbolic link
									
								
							@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co3/co3/accessor.py
 | 
			
		||||
@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co3/co3/accessors/__init__.py
 | 
			
		||||
							
								
								
									
										1
									
								
								build/__editable__.co3-0.1.1-py3-none-any/co3/accessors/fts.py
									
									
									
									
									
										Symbolic link
									
								
							
							
						
						
									
										1
									
								
								build/__editable__.co3-0.1.1-py3-none-any/co3/accessors/fts.py
									
									
									
									
									
										Symbolic link
									
								
							@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co3/co3/accessors/fts.py
 | 
			
		||||
							
								
								
									
										1
									
								
								build/__editable__.co3-0.1.1-py3-none-any/co3/accessors/sql.py
									
									
									
									
									
										Symbolic link
									
								
							
							
						
						
									
										1
									
								
								build/__editable__.co3-0.1.1-py3-none-any/co3/accessors/sql.py
									
									
									
									
									
										Symbolic link
									
								
							@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co3/co3/accessors/sql.py
 | 
			
		||||
							
								
								
									
										1
									
								
								build/__editable__.co3-0.1.1-py3-none-any/co3/accessors/vss.py
									
									
									
									
									
										Symbolic link
									
								
							
							
						
						
									
										1
									
								
								build/__editable__.co3-0.1.1-py3-none-any/co3/accessors/vss.py
									
									
									
									
									
										Symbolic link
									
								
							@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co3/co3/accessors/vss.py
 | 
			
		||||
							
								
								
									
										1
									
								
								build/__editable__.co3-0.1.1-py3-none-any/co3/co3.py
									
									
									
									
									
										Symbolic link
									
								
							
							
						
						
									
										1
									
								
								build/__editable__.co3-0.1.1-py3-none-any/co3/co3.py
									
									
									
									
									
										Symbolic link
									
								
							@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co3/co3/co3.py
 | 
			
		||||
							
								
								
									
										1
									
								
								build/__editable__.co3-0.1.1-py3-none-any/co3/collector.py
									
									
									
									
									
										Symbolic link
									
								
							
							
						
						
									
										1
									
								
								build/__editable__.co3-0.1.1-py3-none-any/co3/collector.py
									
									
									
									
									
										Symbolic link
									
								
							@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co3/co3/collector.py
 | 
			
		||||
							
								
								
									
										1
									
								
								build/__editable__.co3-0.1.1-py3-none-any/co3/composer.py
									
									
									
									
									
										Symbolic link
									
								
							
							
						
						
									
										1
									
								
								build/__editable__.co3-0.1.1-py3-none-any/co3/composer.py
									
									
									
									
									
										Symbolic link
									
								
							@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co3/co3/composer.py
 | 
			
		||||
							
								
								
									
										1
									
								
								build/__editable__.co3-0.1.1-py3-none-any/co3/database.py
									
									
									
									
									
										Symbolic link
									
								
							
							
						
						
									
										1
									
								
								build/__editable__.co3-0.1.1-py3-none-any/co3/database.py
									
									
									
									
									
										Symbolic link
									
								
							@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co3/co3/database.py
 | 
			
		||||
@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co3/co3/databases/__init__.py
 | 
			
		||||
							
								
								
									
										1
									
								
								build/__editable__.co3-0.1.1-py3-none-any/co3/databases/fts.py
									
									
									
									
									
										Symbolic link
									
								
							
							
						
						
									
										1
									
								
								build/__editable__.co3-0.1.1-py3-none-any/co3/databases/fts.py
									
									
									
									
									
										Symbolic link
									
								
							@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co3/co3/databases/fts.py
 | 
			
		||||
							
								
								
									
										1
									
								
								build/__editable__.co3-0.1.1-py3-none-any/co3/databases/sql.py
									
									
									
									
									
										Symbolic link
									
								
							
							
						
						
									
										1
									
								
								build/__editable__.co3-0.1.1-py3-none-any/co3/databases/sql.py
									
									
									
									
									
										Symbolic link
									
								
							@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co3/co3/databases/sql.py
 | 
			
		||||
							
								
								
									
										1
									
								
								build/__editable__.co3-0.1.1-py3-none-any/co3/databases/vss.py
									
									
									
									
									
										Symbolic link
									
								
							
							
						
						
									
										1
									
								
								build/__editable__.co3-0.1.1-py3-none-any/co3/databases/vss.py
									
									
									
									
									
										Symbolic link
									
								
							@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co3/co3/databases/vss.py
 | 
			
		||||
							
								
								
									
										1
									
								
								build/__editable__.co3-0.1.1-py3-none-any/co3/indexer.py
									
									
									
									
									
										Symbolic link
									
								
							
							
						
						
									
										1
									
								
								build/__editable__.co3-0.1.1-py3-none-any/co3/indexer.py
									
									
									
									
									
										Symbolic link
									
								
							@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co3/co3/indexer.py
 | 
			
		||||
							
								
								
									
										1
									
								
								build/__editable__.co3-0.1.1-py3-none-any/co3/manager.py
									
									
									
									
									
										Symbolic link
									
								
							
							
						
						
									
										1
									
								
								build/__editable__.co3-0.1.1-py3-none-any/co3/manager.py
									
									
									
									
									
										Symbolic link
									
								
							@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co3/co3/manager.py
 | 
			
		||||
@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co3/co3/managers/__init__.py
 | 
			
		||||
							
								
								
									
										1
									
								
								build/__editable__.co3-0.1.1-py3-none-any/co3/managers/fts.py
									
									
									
									
									
										Symbolic link
									
								
							
							
						
						
									
										1
									
								
								build/__editable__.co3-0.1.1-py3-none-any/co3/managers/fts.py
									
									
									
									
									
										Symbolic link
									
								
							@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co3/co3/managers/fts.py
 | 
			
		||||
							
								
								
									
										1
									
								
								build/__editable__.co3-0.1.1-py3-none-any/co3/managers/sql.py
									
									
									
									
									
										Symbolic link
									
								
							
							
						
						
									
										1
									
								
								build/__editable__.co3-0.1.1-py3-none-any/co3/managers/sql.py
									
									
									
									
									
										Symbolic link
									
								
							@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co3/co3/managers/sql.py
 | 
			
		||||
							
								
								
									
										1
									
								
								build/__editable__.co3-0.1.1-py3-none-any/co3/managers/vss.py
									
									
									
									
									
										Symbolic link
									
								
							
							
						
						
									
										1
									
								
								build/__editable__.co3-0.1.1-py3-none-any/co3/managers/vss.py
									
									
									
									
									
										Symbolic link
									
								
							@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co3/co3/managers/vss.py
 | 
			
		||||
							
								
								
									
										1
									
								
								build/__editable__.co3-0.1.1-py3-none-any/co3/mapper.py
									
									
									
									
									
										Symbolic link
									
								
							
							
						
						
									
										1
									
								
								build/__editable__.co3-0.1.1-py3-none-any/co3/mapper.py
									
									
									
									
									
										Symbolic link
									
								
							@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co3/co3/mapper.py
 | 
			
		||||
							
								
								
									
										1
									
								
								build/__editable__.co3-0.1.1-py3-none-any/co3/relation.py
									
									
									
									
									
										Symbolic link
									
								
							
							
						
						
									
										1
									
								
								build/__editable__.co3-0.1.1-py3-none-any/co3/relation.py
									
									
									
									
									
										Symbolic link
									
								
							@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co3/co3/relation.py
 | 
			
		||||
@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co3/co3/relations/__init__.py
 | 
			
		||||
							
								
								
									
										1
									
								
								build/__editable__.co3-0.1.1-py3-none-any/co3/util/__init__.py
									
									
									
									
									
										Symbolic link
									
								
							
							
						
						
									
										1
									
								
								build/__editable__.co3-0.1.1-py3-none-any/co3/util/__init__.py
									
									
									
									
									
										Symbolic link
									
								
							@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co3/co3/util/__init__.py
 | 
			
		||||
							
								
								
									
										1
									
								
								build/__editable__.co3-0.1.1-py3-none-any/co3/util/db.py
									
									
									
									
									
										Symbolic link
									
								
							
							
						
						
									
										1
									
								
								build/__editable__.co3-0.1.1-py3-none-any/co3/util/db.py
									
									
									
									
									
										Symbolic link
									
								
							@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co3/co3/util/db.py
 | 
			
		||||
							
								
								
									
										1
									
								
								build/__editable__.co3-0.1.1-py3-none-any/co3/util/regex.py
									
									
									
									
									
										Symbolic link
									
								
							
							
						
						
									
										1
									
								
								build/__editable__.co3-0.1.1-py3-none-any/co3/util/regex.py
									
									
									
									
									
										Symbolic link
									
								
							@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co3/co3/util/regex.py
 | 
			
		||||
							
								
								
									
										1
									
								
								build/__editable__.co4-0.1.1-py3-none-any/co4/__init__.py
									
									
									
									
									
										Symbolic link
									
								
							
							
						
						
									
										1
									
								
								build/__editable__.co4-0.1.1-py3-none-any/co4/__init__.py
									
									
									
									
									
										Symbolic link
									
								
							@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co4/co4/__init__.py
 | 
			
		||||
							
								
								
									
										1
									
								
								build/__editable__.co4-0.1.1-py3-none-any/co4/accessor.py
									
									
									
									
									
										Symbolic link
									
								
							
							
						
						
									
										1
									
								
								build/__editable__.co4-0.1.1-py3-none-any/co4/accessor.py
									
									
									
									
									
										Symbolic link
									
								
							@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co4/co4/accessor.py
 | 
			
		||||
@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co4/co4/accessors/__init__.py
 | 
			
		||||
							
								
								
									
										1
									
								
								build/__editable__.co4-0.1.1-py3-none-any/co4/accessors/fts.py
									
									
									
									
									
										Symbolic link
									
								
							
							
						
						
									
										1
									
								
								build/__editable__.co4-0.1.1-py3-none-any/co4/accessors/fts.py
									
									
									
									
									
										Symbolic link
									
								
							@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co4/co4/accessors/fts.py
 | 
			
		||||
							
								
								
									
										1
									
								
								build/__editable__.co4-0.1.1-py3-none-any/co4/accessors/table.py
									
									
									
									
									
										Symbolic link
									
								
							
							
						
						
									
										1
									
								
								build/__editable__.co4-0.1.1-py3-none-any/co4/accessors/table.py
									
									
									
									
									
										Symbolic link
									
								
							@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co4/co4/accessors/table.py
 | 
			
		||||
							
								
								
									
										1
									
								
								build/__editable__.co4-0.1.1-py3-none-any/co4/accessors/vss.py
									
									
									
									
									
										Symbolic link
									
								
							
							
						
						
									
										1
									
								
								build/__editable__.co4-0.1.1-py3-none-any/co4/accessors/vss.py
									
									
									
									
									
										Symbolic link
									
								
							@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co4/co4/accessors/vss.py
 | 
			
		||||
							
								
								
									
										1
									
								
								build/__editable__.co4-0.1.1-py3-none-any/co4/co4.py
									
									
									
									
									
										Symbolic link
									
								
							
							
						
						
									
										1
									
								
								build/__editable__.co4-0.1.1-py3-none-any/co4/co4.py
									
									
									
									
									
										Symbolic link
									
								
							@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co4/co4/co4.py
 | 
			
		||||
							
								
								
									
										1
									
								
								build/__editable__.co4-0.1.1-py3-none-any/co4/collector.py
									
									
									
									
									
										Symbolic link
									
								
							
							
						
						
									
										1
									
								
								build/__editable__.co4-0.1.1-py3-none-any/co4/collector.py
									
									
									
									
									
										Symbolic link
									
								
							@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co4/co4/collector.py
 | 
			
		||||
							
								
								
									
										1
									
								
								build/__editable__.co4-0.1.1-py3-none-any/co4/composer.py
									
									
									
									
									
										Symbolic link
									
								
							
							
						
						
									
										1
									
								
								build/__editable__.co4-0.1.1-py3-none-any/co4/composer.py
									
									
									
									
									
										Symbolic link
									
								
							@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co4/co4/composer.py
 | 
			
		||||
@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co4/co4/databases/__init__.py
 | 
			
		||||
							
								
								
									
										1
									
								
								build/__editable__.co4-0.1.1-py3-none-any/co4/databases/_base.py
									
									
									
									
									
										Symbolic link
									
								
							
							
						
						
									
										1
									
								
								build/__editable__.co4-0.1.1-py3-none-any/co4/databases/_base.py
									
									
									
									
									
										Symbolic link
									
								
							@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co4/co4/databases/_base.py
 | 
			
		||||
							
								
								
									
										1
									
								
								build/__editable__.co4-0.1.1-py3-none-any/co4/databases/core.py
									
									
									
									
									
										Symbolic link
									
								
							
							
						
						
									
										1
									
								
								build/__editable__.co4-0.1.1-py3-none-any/co4/databases/core.py
									
									
									
									
									
										Symbolic link
									
								
							@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co4/co4/databases/core.py
 | 
			
		||||
							
								
								
									
										1
									
								
								build/__editable__.co4-0.1.1-py3-none-any/co4/databases/fts.py
									
									
									
									
									
										Symbolic link
									
								
							
							
						
						
									
										1
									
								
								build/__editable__.co4-0.1.1-py3-none-any/co4/databases/fts.py
									
									
									
									
									
										Symbolic link
									
								
							@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co4/co4/databases/fts.py
 | 
			
		||||
							
								
								
									
										1
									
								
								build/__editable__.co4-0.1.1-py3-none-any/co4/databases/vss.py
									
									
									
									
									
										Symbolic link
									
								
							
							
						
						
									
										1
									
								
								build/__editable__.co4-0.1.1-py3-none-any/co4/databases/vss.py
									
									
									
									
									
										Symbolic link
									
								
							@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co4/co4/databases/vss.py
 | 
			
		||||
							
								
								
									
										1
									
								
								build/__editable__.co4-0.1.1-py3-none-any/co4/indexer.py
									
									
									
									
									
										Symbolic link
									
								
							
							
						
						
									
										1
									
								
								build/__editable__.co4-0.1.1-py3-none-any/co4/indexer.py
									
									
									
									
									
										Symbolic link
									
								
							@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co4/co4/indexer.py
 | 
			
		||||
							
								
								
									
										1
									
								
								build/__editable__.co4-0.1.1-py3-none-any/co4/manager.py
									
									
									
									
									
										Symbolic link
									
								
							
							
						
						
									
										1
									
								
								build/__editable__.co4-0.1.1-py3-none-any/co4/manager.py
									
									
									
									
									
										Symbolic link
									
								
							@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co4/co4/manager.py
 | 
			
		||||
@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co4/co4/managers/__init__.py
 | 
			
		||||
							
								
								
									
										1
									
								
								build/__editable__.co4-0.1.1-py3-none-any/co4/managers/core.py
									
									
									
									
									
										Symbolic link
									
								
							
							
						
						
									
										1
									
								
								build/__editable__.co4-0.1.1-py3-none-any/co4/managers/core.py
									
									
									
									
									
										Symbolic link
									
								
							@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co4/co4/managers/core.py
 | 
			
		||||
							
								
								
									
										1
									
								
								build/__editable__.co4-0.1.1-py3-none-any/co4/managers/fts.py
									
									
									
									
									
										Symbolic link
									
								
							
							
						
						
									
										1
									
								
								build/__editable__.co4-0.1.1-py3-none-any/co4/managers/fts.py
									
									
									
									
									
										Symbolic link
									
								
							@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co4/co4/managers/fts.py
 | 
			
		||||
							
								
								
									
										1
									
								
								build/__editable__.co4-0.1.1-py3-none-any/co4/managers/vss.py
									
									
									
									
									
										Symbolic link
									
								
							
							
						
						
									
										1
									
								
								build/__editable__.co4-0.1.1-py3-none-any/co4/managers/vss.py
									
									
									
									
									
										Symbolic link
									
								
							@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co4/co4/managers/vss.py
 | 
			
		||||
							
								
								
									
										1
									
								
								build/__editable__.co4-0.1.1-py3-none-any/co4/utils/db.py
									
									
									
									
									
										Symbolic link
									
								
							
							
						
						
									
										1
									
								
								build/__editable__.co4-0.1.1-py3-none-any/co4/utils/db.py
									
									
									
									
									
										Symbolic link
									
								
							@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co4/co4/utils/db.py
 | 
			
		||||
							
								
								
									
										1
									
								
								build/__editable__.co4-0.1.1-py3-none-any/co4/utils/paths.py
									
									
									
									
									
										Symbolic link
									
								
							
							
						
						
									
										1
									
								
								build/__editable__.co4-0.1.1-py3-none-any/co4/utils/paths.py
									
									
									
									
									
										Symbolic link
									
								
							@ -0,0 +1 @@
 | 
			
		||||
/home/smgr/Documents/projects/ontolog/co4/co4/utils/paths.py
 | 
			
		||||
							
								
								
									
										109
									
								
								co3/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										109
									
								
								co3/__init__.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,109 @@
 | 
			
		||||
'''
 | 
			
		||||
Database submodule
 | 
			
		||||
 | 
			
		||||
- `db`: contains SQLAlchemy-based schema definitions
 | 
			
		||||
- `accessors`: convenience methods for accessing database entries
 | 
			
		||||
- `populate`: convenience methods for populating database tables
 | 
			
		||||
 | 
			
		||||
The `accessors` and `populate` submodules are each split into `schema` and `fts` method
 | 
			
		||||
groups. The former concerns methods relating to the actual database schema, the latter to
 | 
			
		||||
their SQLite FTS counterparts.
 | 
			
		||||
 | 
			
		||||
Note: Subpackages organization
 | 
			
		||||
    Subpackages are broken up by inheritance. Within a given submodule, you have a
 | 
			
		||||
    `_base.py` file defining the base class associated with that submodule's title, along
 | 
			
		||||
    with concrete subclasses of that base in their own files. Deeper inheritance would
 | 
			
		||||
    recursively extend this structure. The `__init__.py` for a given submodule then
 | 
			
		||||
    exposes the concrete instances, leaving the base hidden. For example,
 | 
			
		||||
 | 
			
		||||
    accessors/
 | 
			
		||||
        _base.py
 | 
			
		||||
        core.py
 | 
			
		||||
        fts.py
 | 
			
		||||
 | 
			
		||||
    `core` and `fts` house the `CoreAccessor` and `FTSAccessor` classes, respectively,
 | 
			
		||||
    and are the direct subclasses of the `Accessor` parent found in the `_base`. This base
 | 
			
		||||
    class _could_ be placed outside of the submodule in the parent directory (imported
 | 
			
		||||
    with something like `from db import accessor` instead of `from db.accessor import
 | 
			
		||||
    _base`). This is entirely valid, but I tend to prefer when the base class is among its
 | 
			
		||||
    direct children, as
 | 
			
		||||
 | 
			
		||||
    - In this case at least, the base doesn't need to be exposed
 | 
			
		||||
    - The base class is being stowed away under an appropriately named submodule; having a
 | 
			
		||||
      separate `accessor.py` and `accessors/` file/directory can feel a little cluttered.
 | 
			
		||||
    - It makes imports across the accessors feel standardized:
 | 
			
		||||
 | 
			
		||||
      ```py
 | 
			
		||||
      from localsys.db.accessors._base import Accessor
 | 
			
		||||
 | 
			
		||||
      from localsys.db.accessors.core import CoreAccessor
 | 
			
		||||
      ```
 | 
			
		||||
 | 
			
		||||
      Both have the same level of nesting to reach the class.
 | 
			
		||||
 | 
			
		||||
    Frankly, both means of organization are perfectly fine, and as far as I can tell,
 | 
			
		||||
    semantically sound in their own right. This particular scheme is just a preference in
 | 
			
		||||
    the moment, and so long as I keep things consistent, choosing one over the other
 | 
			
		||||
    shouldn't matter.
 | 
			
		||||
 | 
			
		||||
    Additionally, note how `__init__.py`s are typically set up when providing wider access
 | 
			
		||||
    to internal modules. The `init` typically pulls out classes from sibling modules
 | 
			
		||||
    (i.e., files), but will import subpackages are the topmost level. For example, for the
 | 
			
		||||
    structure
 | 
			
		||||
 | 
			
		||||
    ```
 | 
			
		||||
    db/
 | 
			
		||||
        __init__.py
 | 
			
		||||
        accessors/
 | 
			
		||||
            __init__.py
 | 
			
		||||
            _base.py
 | 
			
		||||
            core.py
 | 
			
		||||
            fts.py
 | 
			
		||||
    ```
 | 
			
		||||
 | 
			
		||||
    we have
 | 
			
		||||
 | 
			
		||||
    ```db/__init__.py
 | 
			
		||||
    from localsys.db import accessors
 | 
			
		||||
    ```
 | 
			
		||||
 | 
			
		||||
    which just imports the subpackage `accessors`. However, within subpackage:
 | 
			
		||||
 | 
			
		||||
    ```db/accessors/__init__.py
 | 
			
		||||
    from localsys.db.accessors.core import CoreAccessor
 | 
			
		||||
    ```
 | 
			
		||||
 | 
			
		||||
    we don't just import the submodule `core`; we did into the file to grab the relevant
 | 
			
		||||
    class and pull it into the outer namespace. Overarching point: `__init__.py` files
 | 
			
		||||
    typically reach into the sibling files (submodules) and pull out classes. Given that
 | 
			
		||||
    this behavior is recursive, `__init__.py` then respect subpackages (nested
 | 
			
		||||
    directories), importing them at the top-level and expecting an internal `__init__.py`
 | 
			
		||||
    will have managed access appropriately.
 | 
			
		||||
 | 
			
		||||
Note: Organization for inheritance over composition
 | 
			
		||||
    At a glance, the organization of subpackages here feels like it clashes with those
 | 
			
		||||
    seen in `localsys.primitives`. `note_components`, for instance, houses the components
 | 
			
		||||
    for the outer `note` module. Contrast this with how the `core` submodule looks: it's
 | 
			
		||||
    composing `*/core.py` files across subpackages `accessors` and `managers`, rather than
 | 
			
		||||
    a single subpackage like `note`. This seems inconsistent, but the subpackages here are
 | 
			
		||||
    actually still organized in the same way: by inheritance. It just happens that the
 | 
			
		||||
    all of the note components inherit from the same base class, and are thus confined to
 | 
			
		||||
    a single subpackage. This aside, the subpackages themselves are still created around
 | 
			
		||||
    inheritance, wrapping up a base and direct subclasses.
 | 
			
		||||
'''
 | 
			
		||||
 | 
			
		||||
from co3.accessor  import Accessor
 | 
			
		||||
from co3.co3       import CO3
 | 
			
		||||
from co3.collector import Collector
 | 
			
		||||
from co3.composer  import Composer
 | 
			
		||||
from co3.database  import Database
 | 
			
		||||
from co3.indexer   import Indexer
 | 
			
		||||
from co3.manager   import Manager
 | 
			
		||||
from co3.mapper    import Mapper
 | 
			
		||||
from co3.relation  import Relation
 | 
			
		||||
 | 
			
		||||
from co3 import accessors
 | 
			
		||||
from co3 import databases
 | 
			
		||||
from co3 import managers
 | 
			
		||||
from co3 import relations
 | 
			
		||||
from co3 import util
 | 
			
		||||
							
								
								
									
										
											BIN
										
									
								
								co3/__pycache__/__init__.cpython-311.pyc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								co3/__pycache__/__init__.cpython-311.pyc
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								co3/__pycache__/__init__.cpython-312.pyc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								co3/__pycache__/__init__.cpython-312.pyc
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								co3/__pycache__/accessor.cpython-311.pyc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								co3/__pycache__/accessor.cpython-311.pyc
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								co3/__pycache__/accessor.cpython-312.pyc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								co3/__pycache__/accessor.cpython-312.pyc
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								co3/__pycache__/co3.cpython-312.pyc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								co3/__pycache__/co3.cpython-312.pyc
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								co3/__pycache__/co4.cpython-311.pyc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								co3/__pycache__/co4.cpython-311.pyc
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								co3/__pycache__/collector.cpython-311.pyc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								co3/__pycache__/collector.cpython-311.pyc
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								co3/__pycache__/collector.cpython-312.pyc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								co3/__pycache__/collector.cpython-312.pyc
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								co3/__pycache__/composer.cpython-311.pyc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								co3/__pycache__/composer.cpython-311.pyc
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								co3/__pycache__/composer.cpython-312.pyc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								co3/__pycache__/composer.cpython-312.pyc
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								co3/__pycache__/database.cpython-312.pyc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								co3/__pycache__/database.cpython-312.pyc
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								co3/__pycache__/indexer.cpython-311.pyc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								co3/__pycache__/indexer.cpython-311.pyc
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								co3/__pycache__/indexer.cpython-312.pyc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								co3/__pycache__/indexer.cpython-312.pyc
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								co3/__pycache__/manager.cpython-311.pyc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								co3/__pycache__/manager.cpython-311.pyc
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								co3/__pycache__/manager.cpython-312.pyc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								co3/__pycache__/manager.cpython-312.pyc
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								co3/__pycache__/mapper.cpython-312.pyc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								co3/__pycache__/mapper.cpython-312.pyc
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								co3/__pycache__/relation.cpython-312.pyc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								co3/__pycache__/relation.cpython-312.pyc
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										28
									
								
								co3/accessor.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										28
									
								
								co3/accessor.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,28 @@
 | 
			
		||||
'''
 | 
			
		||||
Accessor
 | 
			
		||||
 | 
			
		||||
Provides access to an underlying schema through a supported set of operations. Class
 | 
			
		||||
methods could be general, high-level SQL wrappers, or convenience functions for common
 | 
			
		||||
schema-specific queries.
 | 
			
		||||
'''
 | 
			
		||||
import inspect
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
from collections import defaultdict
 | 
			
		||||
 | 
			
		||||
import sqlalchemy as sa
 | 
			
		||||
 | 
			
		||||
#from co3.database import Database
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Accessor[D: 'Database']:
 | 
			
		||||
    '''
 | 
			
		||||
    Access wrapper class for complex queries and easy integration with Composer tables.
 | 
			
		||||
    Implements high-level access to things like common constrained SELECT queries.
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
        engine: SQLAlchemy engine to use for queries. Engine is initialized dynamically as
 | 
			
		||||
                a property (based on the config) if not provided
 | 
			
		||||
    '''
 | 
			
		||||
    def __init__(self, database: D):
 | 
			
		||||
        self.database = database
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										24
									
								
								co3/accessors/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										24
									
								
								co3/accessors/__init__.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,24 @@
 | 
			
		||||
'''
 | 
			
		||||
Note that subclasses in this subpackage are split differently to other subpackages in the
 | 
			
		||||
DB. Instead of being split by table group, corresponding to a Composer (which defines that
 | 
			
		||||
table group), Accessors are split by a separate dimension: table "type". This is why we
 | 
			
		||||
have a "TableAccessor" and an "FTSAccessor": the former exposes access operations
 | 
			
		||||
available to generic tables, the latter to FTS tables (instead of being designed
 | 
			
		||||
specifically around "core" and "fts" groups, for instance).
 | 
			
		||||
 | 
			
		||||
Seeing as FTS tables are "generic" tables, it seems inconsistent not to have FTSAccessor
 | 
			
		||||
inherit from TableAccessor. While this would work fine, the model we're working with
 | 
			
		||||
doesn't really need it; you can instead think of the FTSAccessor as defining _only_
 | 
			
		||||
FTS-specific operations. Given that you have a Composer for your desired table group, you
 | 
			
		||||
can then wrap it with your desired set of "access actions," available in separate Accessor
 | 
			
		||||
subclasses.
 | 
			
		||||
 | 
			
		||||
For instance, you could wrap an FTSComposer in either a TableAccessor or FTSAccessor. The
 | 
			
		||||
former will treat the tables in the composer like regular tables, exposing methods like
 | 
			
		||||
`.select` and `.select_one`, whereas the latter defines FTS-specific actions like
 | 
			
		||||
`.search`.
 | 
			
		||||
'''
 | 
			
		||||
 | 
			
		||||
from co3.accessors.sql import SQLAccessor
 | 
			
		||||
from co3.accessors.fts import FTSAccessor
 | 
			
		||||
from co3.accessors.vss import VSSAccessor
 | 
			
		||||
							
								
								
									
										
											BIN
										
									
								
								co3/accessors/__pycache__/__init__.cpython-311.pyc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								co3/accessors/__pycache__/__init__.cpython-311.pyc
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								co3/accessors/__pycache__/__init__.cpython-312.pyc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								co3/accessors/__pycache__/__init__.cpython-312.pyc
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								co3/accessors/__pycache__/_base.cpython-311.pyc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								co3/accessors/__pycache__/_base.cpython-311.pyc
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								co3/accessors/__pycache__/fts.cpython-311.pyc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								co3/accessors/__pycache__/fts.cpython-311.pyc
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								co3/accessors/__pycache__/fts.cpython-312.pyc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								co3/accessors/__pycache__/fts.cpython-312.pyc
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								co3/accessors/__pycache__/sql.cpython-312.pyc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								co3/accessors/__pycache__/sql.cpython-312.pyc
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								co3/accessors/__pycache__/table.cpython-311.pyc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								co3/accessors/__pycache__/table.cpython-311.pyc
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								co3/accessors/__pycache__/vss.cpython-311.pyc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								co3/accessors/__pycache__/vss.cpython-311.pyc
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								co3/accessors/__pycache__/vss.cpython-312.pyc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								co3/accessors/__pycache__/vss.cpython-312.pyc
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										147
									
								
								co3/accessors/fts.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										147
									
								
								co3/accessors/fts.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,147 @@
 | 
			
		||||
import sqlalchemy as sa
 | 
			
		||||
 | 
			
		||||
from co3 import util
 | 
			
		||||
from co3.accessor import Accessor
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class FTSAccessor(Accessor):
 | 
			
		||||
    def search(
 | 
			
		||||
        self,
 | 
			
		||||
        table_name  : str,
 | 
			
		||||
        select_cols : str  | list | None = '*',
 | 
			
		||||
        search_cols : str  | None = None,
 | 
			
		||||
        q           : str  | None = None,
 | 
			
		||||
        colq        : str  | None = None,
 | 
			
		||||
        snip_col    : int  | None = 0,
 | 
			
		||||
        hl_col      : int  | None = 0,
 | 
			
		||||
        limit       : int  | None = 100,
 | 
			
		||||
        snip        : int  | None = 64,
 | 
			
		||||
        tokenizer   : str  | None = 'unicode61',
 | 
			
		||||
        group_by    : str  | None = None,
 | 
			
		||||
        agg_cols    : list | None = None,
 | 
			
		||||
        wherein_dict: dict | None = None,
 | 
			
		||||
        unique_on   : dict | None = None,
 | 
			
		||||
    ):
 | 
			
		||||
        '''
 | 
			
		||||
        Execute a search query against an indexed FTS table for specific primitives. This
 | 
			
		||||
        method is mostly a generic FTS handler, capable of handling queries to any available
 | 
			
		||||
        FTS table with a matching naming scheme (`fts_<type>_<tokenizer>`). The current
 | 
			
		||||
        intention is support all tokenizers, for file, note, block, and link primitives.
 | 
			
		||||
 | 
			
		||||
        Search results include all FTS table columns, as well as SQLite-supported `snippet`s
 | 
			
		||||
        and `highlight`s for matches. Matches are filtered and ordered by SQLite's
 | 
			
		||||
        `MATCH`-based score for the text & column queries. Results are (a list of) fully
 | 
			
		||||
        expanded dictionaries housing column-value pairs.
 | 
			
		||||
 | 
			
		||||
        Note:
 | 
			
		||||
            GROUP BY cannot be paired with SQLITE FTS extensions; thus, we perform manual
 | 
			
		||||
            group checks on the result set in Python before response
 | 
			
		||||
 | 
			
		||||
        Analysis:
 | 
			
		||||
            The returned JSON structure has been (loosely) optimized for speed on the client
 | 
			
		||||
            side. Fully forming individual dictionary based responses saves time in
 | 
			
		||||
            Javascript, as the JSON parser is expected to be able to create the objects
 | 
			
		||||
            faster than post-hoc construction in JS. This return structure was compared
 | 
			
		||||
            against returning an array of arrays (all ordered in the same fashion), along with
 | 
			
		||||
            a column list to be associated with each of the result values. While this saves
 | 
			
		||||
            some size on the payload (the same column names don't have to be transmitted for
 | 
			
		||||
            each result), the size of the returned content massively outweighs the
 | 
			
		||||
            predominantly short column names. The only way this structure would be viable is
 | 
			
		||||
            if a significant amount was saved on transfer compared to the slow down in JS
 | 
			
		||||
            object construction; this is (almost) never the case.
 | 
			
		||||
 | 
			
		||||
        Parameters:
 | 
			
		||||
            table_name  : name of FTS table to search
 | 
			
		||||
            search_cols : space separated string of columns to use for primary queries
 | 
			
		||||
            q           : search query
 | 
			
		||||
            colq        : column constraint string; must conform to SQLite standards (e.g.,
 | 
			
		||||
                          `<col>:<text>`
 | 
			
		||||
            snip_col    : table column to use for snippets (default: 1; source content column)
 | 
			
		||||
            hl_col      : table column to use for highlights (default: 2; format column, applied
 | 
			
		||||
                          to HTML targets)
 | 
			
		||||
            limit       : maximum number of results to return in the SQL query
 | 
			
		||||
            snip        : snippet length (max: 64)
 | 
			
		||||
            tokenizer   : tokenizer to use (assumes relevant FTS table has been built)
 | 
			
		||||
            ...
 | 
			
		||||
            wherein_dict: (col-name, value-list) pairs to match result set against, via
 | 
			
		||||
                          WHERE ... IN clauses
 | 
			
		||||
 | 
			
		||||
        Returns:
 | 
			
		||||
            Dictionary with search results (list of column indexed dictionaries) and relevant
 | 
			
		||||
            metadata.
 | 
			
		||||
        '''
 | 
			
		||||
        search_q = ''
 | 
			
		||||
 | 
			
		||||
        if type(select_cols) is list:
 | 
			
		||||
            select_cols = ', '.join(select_cols)
 | 
			
		||||
 | 
			
		||||
        # construct main search query
 | 
			
		||||
        if search_cols and q:
 | 
			
		||||
            search_q = f'{{{search_cols}}} : {q}'
 | 
			
		||||
 | 
			
		||||
        # add auxiliary search constraints
 | 
			
		||||
        if colq:
 | 
			
		||||
            search_q += f' {colq}'
 | 
			
		||||
 | 
			
		||||
        search_q = search_q.strip()
 | 
			
		||||
 | 
			
		||||
        hl_start = '<b><mark>'
 | 
			
		||||
        hl_end   = '</mark></b>'
 | 
			
		||||
 | 
			
		||||
        fts_table_name = f'{table_name}_fts_{tokenizer}'
 | 
			
		||||
        
 | 
			
		||||
        sql = f'''
 | 
			
		||||
        SELECT
 | 
			
		||||
            {select_cols},
 | 
			
		||||
            snippet({fts_table_name}, {snip_col}, '{hl_start}', '{hl_end}', '...', {snip}) AS snippet,
 | 
			
		||||
            highlight({fts_table_name}, {hl_col}, '{hl_start}', '{hl_end}') AS highlight 
 | 
			
		||||
        FROM {fts_table_name}
 | 
			
		||||
        '''
 | 
			
		||||
        
 | 
			
		||||
        where_clauses = []
 | 
			
		||||
        if search_q:
 | 
			
		||||
            where_clauses.append(f"{fts_table_name} MATCH '{search_q}'\n")
 | 
			
		||||
 | 
			
		||||
        if wherein_dict:
 | 
			
		||||
            for col, vals in wherein_dict.items():
 | 
			
		||||
                where_clauses.append(f'{col} IN {tuple(vals)}\n')
 | 
			
		||||
 | 
			
		||||
        if where_clauses:
 | 
			
		||||
            where_str = " AND ".join(where_clauses)
 | 
			
		||||
            sql += f'WHERE {where_str}'
 | 
			
		||||
 | 
			
		||||
        sql += f'ORDER BY rank LIMIT {limit};'
 | 
			
		||||
 | 
			
		||||
        row_dicts, cols = self.raw_select(sql, include_cols=True)
 | 
			
		||||
 | 
			
		||||
        if group_by is None:
 | 
			
		||||
            return row_dicts, cols
 | 
			
		||||
 | 
			
		||||
        if agg_cols is None:
 | 
			
		||||
            agg_cols = []
 | 
			
		||||
 | 
			
		||||
        # "group by" block ID and wrangle the links into a list
 | 
			
		||||
        # note we can't perform native GROUP BYs with FTS results
 | 
			
		||||
        group_by_idx = {}
 | 
			
		||||
        for row in row_dicts:
 | 
			
		||||
            group_by_attr = row.get(group_by)
 | 
			
		||||
 | 
			
		||||
            # add new entries
 | 
			
		||||
            for agg_col in agg_cols:
 | 
			
		||||
                row[f'{agg_col}_agg'] = set()
 | 
			
		||||
 | 
			
		||||
            if group_by_attr is None:
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
            if group_by_attr not in group_by_idx:
 | 
			
		||||
                group_by_idx[group_by_attr] = row
 | 
			
		||||
 | 
			
		||||
            for agg_col in agg_cols:
 | 
			
		||||
                if agg_col in row:
 | 
			
		||||
                    group_by_idx[group_by_attr][f'{agg_col}_agg'].add(row[agg_col])
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'results'     : group_by_idx,
 | 
			
		||||
            'columns'     : cols,
 | 
			
		||||
            'num_results' : len(row_dicts),
 | 
			
		||||
        }
 | 
			
		||||
							
								
								
									
										96
									
								
								co3/accessors/sql.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										96
									
								
								co3/accessors/sql.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,96 @@
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
from collections.abc import Iterable
 | 
			
		||||
import inspect
 | 
			
		||||
from functools import cache
 | 
			
		||||
 | 
			
		||||
import sqlalchemy as sa
 | 
			
		||||
 | 
			
		||||
from co3 import util
 | 
			
		||||
from co3.accessor import Accessor
 | 
			
		||||
from co3.relation import Relation
 | 
			
		||||
 | 
			
		||||
#from co3.databases.sql import RelationalDatabase, TabularDatabase, SQLDatabase
 | 
			
		||||
from co3.relations import TabularRelation, SQLTable
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class RelationalAccessor[D: 'RelationalDatabase', R: Relation](Accessor[D]):
 | 
			
		||||
    pass
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TabularAccessor[D: 'TabularDatabase', R: TabularRelation](RelationalAccessor[D, R]):
 | 
			
		||||
    pass
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class SQLAccessor(TabularAccessor['SQLDatabase', SQLTable]):
 | 
			
		||||
    def raw_select(
 | 
			
		||||
        self,
 | 
			
		||||
        sql,
 | 
			
		||||
        bind_params=None,
 | 
			
		||||
        mappings=False,
 | 
			
		||||
        include_cols=False,
 | 
			
		||||
    ):
 | 
			
		||||
        res_method = utils.db.sa_exec_dicts
 | 
			
		||||
        if mappings:
 | 
			
		||||
            res_method = utils.db.sa_exec_mappings
 | 
			
		||||
 | 
			
		||||
        return res_method(self.database.engine, sa.text(sql), bind_params=bind_params, include_cols=include_cols)
 | 
			
		||||
 | 
			
		||||
    def select(
 | 
			
		||||
        self,
 | 
			
		||||
        table: sa.Table | sa.Subquery | sa.Join,
 | 
			
		||||
        cols         = None,
 | 
			
		||||
        where        = None,
 | 
			
		||||
        distinct_on  = None,
 | 
			
		||||
        order_by     = None,
 | 
			
		||||
        limit        = 0,
 | 
			
		||||
        mappings     = False,
 | 
			
		||||
        include_cols = False,
 | 
			
		||||
    ):
 | 
			
		||||
        '''
 | 
			
		||||
        Perform a SELECT query against the provided table-like object (see
 | 
			
		||||
        `check_table()`).
 | 
			
		||||
 | 
			
		||||
        Deprecated: String aliases
 | 
			
		||||
            String aliases for tables are no longer supported. This method no longer checks
 | 
			
		||||
            against any specific schema table-maps or Composers. Instead, this should be
 | 
			
		||||
            done outside the Accessor.
 | 
			
		||||
 | 
			
		||||
        Parameters:
 | 
			
		||||
            group_by: list of columns to group by; for now serves as a proxy for DISTINCT
 | 
			
		||||
                      (no aggregation methods accepted)
 | 
			
		||||
            order_by: column to order results by (can use <col>.desc() to order
 | 
			
		||||
                      by descending)
 | 
			
		||||
        '''
 | 
			
		||||
        if where is None:
 | 
			
		||||
            where = sa.true()
 | 
			
		||||
 | 
			
		||||
        res_method = utils.db.sa_exec_dicts
 | 
			
		||||
        if mappings:
 | 
			
		||||
            res_method = utils.db.sa_exec_mappings
 | 
			
		||||
 | 
			
		||||
        stmt = sa.select(table).where(where)
 | 
			
		||||
        if cols is not None:
 | 
			
		||||
            stmt = sa.select(*cols).select_from(table).where(where)
 | 
			
		||||
 | 
			
		||||
        if distinct_on is not None:
 | 
			
		||||
            stmt = stmt.group_by(distinct_on)
 | 
			
		||||
 | 
			
		||||
        if order_by is not None:
 | 
			
		||||
            stmt = stmt.order_by(order_by)
 | 
			
		||||
 | 
			
		||||
        if limit > 0:
 | 
			
		||||
            stmt = stmt.limit(limit)
 | 
			
		||||
 | 
			
		||||
        return res_method(self.engine, stmt, include_cols=include_cols)
 | 
			
		||||
 | 
			
		||||
    def select_one(self, table, cols=None, where=None, mappings=False, include_cols=False):
 | 
			
		||||
        res = self.select(table, cols, where, mappings, include_cols, limit=1)
 | 
			
		||||
 | 
			
		||||
        if include_cols and len(res[0]) > 0:
 | 
			
		||||
            return res[0][0], res[1]
 | 
			
		||||
 | 
			
		||||
        if len(res) > 0:
 | 
			
		||||
            return res[0]
 | 
			
		||||
 | 
			
		||||
        return None
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										100
									
								
								co3/accessors/vss.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										100
									
								
								co3/accessors/vss.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,100 @@
 | 
			
		||||
import pickle
 | 
			
		||||
import logging
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
import time
 | 
			
		||||
 | 
			
		||||
import sqlalchemy as sa
 | 
			
		||||
#from sentence_transformers import SentenceTransformer, util
 | 
			
		||||
 | 
			
		||||
from co3.accessor import Accessor
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
logger = logging.getLogger(__name__)
 | 
			
		||||
 | 
			
		||||
class VSSAccessor(Accessor):
 | 
			
		||||
    def __init__(self, cache_path):
 | 
			
		||||
        super().__init__()
 | 
			
		||||
 | 
			
		||||
        self._model      = None
 | 
			
		||||
        self._embeddings = None
 | 
			
		||||
 | 
			
		||||
        self._embedding_size = 384
 | 
			
		||||
        self.embedding_path = Path(cache_path, 'embeddings.pkl')
 | 
			
		||||
 | 
			
		||||
    def write_embeddings(self, embedding_dict):
 | 
			
		||||
        self.embedding_path.write_bytes(pickle.dumps(embedding_dict))
 | 
			
		||||
 | 
			
		||||
    def read_embeddings(self):
 | 
			
		||||
        if not self.embedding_path.exists():
 | 
			
		||||
            logger.warning(
 | 
			
		||||
                f'Attempting to access non-existent embeddings at {self.embedding_path}'
 | 
			
		||||
            )
 | 
			
		||||
            return None
 | 
			
		||||
 | 
			
		||||
        return pickle.loads(self.embedding_path.read_bytes())
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def model(self):
 | 
			
		||||
        if self._model is None:
 | 
			
		||||
            # model trained with 128 token seqs
 | 
			
		||||
            self._model = SentenceTransformer('sentence-transformers/all-MiniLM-L12-v2')
 | 
			
		||||
        return self._model
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def embeddings(self):
 | 
			
		||||
        if self._embeddings is None:
 | 
			
		||||
            self._embeddings = self.read_embeddings()
 | 
			
		||||
        return self._embeddings
 | 
			
		||||
 | 
			
		||||
    def embed_chunks(self, chunks, batch_size=64, show_prog=True):
 | 
			
		||||
        return self.model.encode(
 | 
			
		||||
            chunks,
 | 
			
		||||
            batch_size           = batch_size,
 | 
			
		||||
            show_progress_bar    = show_prog,
 | 
			
		||||
            convert_to_numpy     = True,
 | 
			
		||||
            normalize_embeddings = True
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    def search(
 | 
			
		||||
        self,
 | 
			
		||||
        query      : str,
 | 
			
		||||
        index_name : str,
 | 
			
		||||
        limit      : int = 10,
 | 
			
		||||
        score_threshold = 0.5,
 | 
			
		||||
    ):
 | 
			
		||||
        '''
 | 
			
		||||
        Parameters:
 | 
			
		||||
            index_name: one of ['chunks','blocks','notes']
 | 
			
		||||
        '''
 | 
			
		||||
        if not query:
 | 
			
		||||
            return None
 | 
			
		||||
 | 
			
		||||
        if index_name not in self.embeddings:
 | 
			
		||||
            logger.warning(
 | 
			
		||||
                f'Index "{index_name}" does not exist'
 | 
			
		||||
            )
 | 
			
		||||
            return None
 | 
			
		||||
 | 
			
		||||
        start = time.time()
 | 
			
		||||
 | 
			
		||||
        query_embedding = self.embed_chunks(query, show_prog=False)
 | 
			
		||||
        index_ids, index_embeddings, index_items = self.embeddings[index_name]
 | 
			
		||||
 | 
			
		||||
        hits = util.semantic_search(
 | 
			
		||||
            query_embedding,
 | 
			
		||||
            index_embeddings,
 | 
			
		||||
            top_k=limit,
 | 
			
		||||
            score_function=util.dot_score
 | 
			
		||||
        )[0]
 | 
			
		||||
 | 
			
		||||
        hits = [hit for hit in hits if hit['score'] >= score_threshold]
 | 
			
		||||
 | 
			
		||||
        for hit in hits:
 | 
			
		||||
            idx               = hit['corpus_id']
 | 
			
		||||
            hit['group_name'] = index_ids[idx]
 | 
			
		||||
            hit['item']       = index_items[idx]
 | 
			
		||||
 | 
			
		||||
        logger.info(f'{len(hits)} hits in {time.time()-start:.2f}s')
 | 
			
		||||
 | 
			
		||||
        return hits
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										106
									
								
								co3/co3.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										106
									
								
								co3/co3.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,106 @@
 | 
			
		||||
'''
 | 
			
		||||
CO4
 | 
			
		||||
 | 
			
		||||
CO4 is an abstract base class for scaffolding object hierarchies and managing operations
 | 
			
		||||
with associated database schemas. It facilitates something like a "lightweight ORM" for
 | 
			
		||||
classes/tables/states with fixed transformations of interest. The canonical use case is
 | 
			
		||||
managing hierarchical document relations, format conversions, and syntactical components.
 | 
			
		||||
'''
 | 
			
		||||
 | 
			
		||||
import inspect
 | 
			
		||||
import logging
 | 
			
		||||
from functools import wraps, partial
 | 
			
		||||
 | 
			
		||||
#from localsys.db.schema import tables
 | 
			
		||||
 | 
			
		||||
logger = logging.getLogger(__name__)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#def register_format(_format):
 | 
			
		||||
#    def decorator(func):
 | 
			
		||||
#        self.collate.format_map[_format] = func
 | 
			
		||||
#
 | 
			
		||||
#        @wraps(func)
 | 
			
		||||
#        def register(*args, **kwargs):
 | 
			
		||||
#            return func(*args, **kwargs)
 | 
			
		||||
#
 | 
			
		||||
#        return register
 | 
			
		||||
#    return decorator
 | 
			
		||||
 | 
			
		||||
def collate(action_key, action_groups=None):
 | 
			
		||||
    def decorator(func):
 | 
			
		||||
        if action_groups is None:
 | 
			
		||||
            action_groups = [None]
 | 
			
		||||
        func._action_data = (action_key, action_groups)
 | 
			
		||||
        return func
 | 
			
		||||
    return decorator
 | 
			
		||||
 | 
			
		||||
class FormatRegistryMeta(type):
 | 
			
		||||
    def __new__(cls, name, bases, attrs):
 | 
			
		||||
        action_registry = {}
 | 
			
		||||
 | 
			
		||||
        # add registered superclass methods; iterate over bases (usually just one), then
 | 
			
		||||
        # that base's chain down (reversed), then methods from each subclass
 | 
			
		||||
        for base in bases:
 | 
			
		||||
            for _class in reversed(base.mro()):
 | 
			
		||||
                methods = inspect.getmembers(_class, predicate=inspect.isfunction)
 | 
			
		||||
                for _, method in methods:
 | 
			
		||||
                    if hasattr(method, '_action_data'):
 | 
			
		||||
                        action_key, action_groups = method._action_data
 | 
			
		||||
                        action_registry[action_key] = (method, action_groups)
 | 
			
		||||
 | 
			
		||||
        # add final registered formats for the current class, overwriting any found in
 | 
			
		||||
        # superclass chain
 | 
			
		||||
        for attr_name, attr_value in attrs.items():
 | 
			
		||||
            if hasattr(attr_value, '_action_data'):
 | 
			
		||||
                action_key, action_groups = attr_value._action_data
 | 
			
		||||
                action_registry[action_key] = (method, action_groups)
 | 
			
		||||
 | 
			
		||||
        attrs['action_map'] = action_registry
 | 
			
		||||
 | 
			
		||||
        return super().__new__(cls, name, bases, attrs)
 | 
			
		||||
 | 
			
		||||
class CO3(metaclass=FormatRegistryMeta):
 | 
			
		||||
    '''
 | 
			
		||||
    CO3: COllate, COllect, COmpose - conversion & DB insertion base
 | 
			
		||||
 | 
			
		||||
    - Collate: organize and transform conversion outputs, possibly across class components
 | 
			
		||||
    - Collect: gather core attributes, conversion data, and subcomponents for DB insertion
 | 
			
		||||
    - Compose: construct object-associated DB table references through the class hierarchy
 | 
			
		||||
 | 
			
		||||
    Note: on action groups
 | 
			
		||||
        Group keys are simply named collections to make it easy for storage components to
 | 
			
		||||
        be attached to action subsets. They do _not_ augment the action registration
 | 
			
		||||
        namespace, meaning the action key should still be unique; the group key is purely
 | 
			
		||||
        auxiliary.
 | 
			
		||||
 | 
			
		||||
        Action methods can also be attached to several groups, in case there is
 | 
			
		||||
        overlapping utility within or across schemas or storage media. In this case, it
 | 
			
		||||
        becomes particularly critical to ensure registered `collate` methods really are
 | 
			
		||||
        just "gathering results" from possibly heavy-duty operations, rather than
 | 
			
		||||
        performing them when called, so as to reduce wasted computation.
 | 
			
		||||
    '''
 | 
			
		||||
    @property
 | 
			
		||||
    def attributes(self):
 | 
			
		||||
        '''
 | 
			
		||||
        Method to define how a subtype's inserts should be handled under `collect` for
 | 
			
		||||
        canonical attributes, i.e., inserts to the type's table.
 | 
			
		||||
        '''
 | 
			
		||||
        return vars(self)
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def components(self):
 | 
			
		||||
        '''
 | 
			
		||||
        Method to define how a subtype's inserts should be handled under `collect` for
 | 
			
		||||
        constituent components that need handling.
 | 
			
		||||
        '''
 | 
			
		||||
        return []
 | 
			
		||||
 | 
			
		||||
    def collate(self, action_key, *action_args, **action_kwargs):
 | 
			
		||||
        if action_key not in self.action_map:
 | 
			
		||||
            logger.debug(f'Collation for {action_key} not supported')
 | 
			
		||||
            return None
 | 
			
		||||
        else:
 | 
			
		||||
            return self.action_map[action_key](self)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										109
									
								
								co3/collector.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										109
									
								
								co3/collector.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,109 @@
 | 
			
		||||
'''
 | 
			
		||||
Defines the Collector base class.
 | 
			
		||||
 | 
			
		||||
This module is the critical "middleware" connecting the primitive object definitions and
 | 
			
		||||
their representations in the database. It operates with full knowledge of how both are
 | 
			
		||||
defined, and abstracts away both the prep work for DB insertions as well as updates
 | 
			
		||||
trickling down the primitive hierarchy.
 | 
			
		||||
 | 
			
		||||
The `src` format target is re-used for both canonical tables/primitives, as well as
 | 
			
		||||
<prim>_conversion_matter tables in tables/conversions under the `src` format. The latter
 | 
			
		||||
is meant to extend those attributes that are format-specific (i.e., would change when, say,
 | 
			
		||||
converting to `html5`), and thus need to be broken across the format dimension.
 | 
			
		||||
 | 
			
		||||
Note:
 | 
			
		||||
    Despite the structure of the database module, this class does not currently inherit
 | 
			
		||||
    from a super class in localsys.db (like the accessors and managers, for instance).
 | 
			
		||||
    This will likely ultimately be the model that's embraced, but until FTS (or other
 | 
			
		||||
    groups) need a collector, this will be remain an independent class. It is, however,
 | 
			
		||||
    named like a concrete subclass, taking on the "Core" prefix.
 | 
			
		||||
'''
 | 
			
		||||
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
from collections import defaultdict
 | 
			
		||||
import logging
 | 
			
		||||
import importlib
 | 
			
		||||
import subprocess
 | 
			
		||||
from uuid import uuid4
 | 
			
		||||
 | 
			
		||||
import sqlalchemy as sa
 | 
			
		||||
 | 
			
		||||
from co3 import util
 | 
			
		||||
#from localsys.db.schema import tables
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
logger = logging.getLogger(__name__)
 | 
			
		||||
 | 
			
		||||
class Collector:
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
        self._inserts = defaultdict(lambda: defaultdict(list))
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def inserts(self):
 | 
			
		||||
        return self._inserts_from_receipts()
 | 
			
		||||
 | 
			
		||||
    def _inserts_from_receipts(self, receipts=None, pop=False):
 | 
			
		||||
        inserts = defaultdict(list)
 | 
			
		||||
        
 | 
			
		||||
        if receipts is None:
 | 
			
		||||
            receipts = list(self._inserts.keys())
 | 
			
		||||
 | 
			
		||||
        for receipt in receipts:
 | 
			
		||||
            if pop: insert_dict = self._inserts.pop(receipt, {})
 | 
			
		||||
            else:   insert_dict = self._inserts[receipt]
 | 
			
		||||
 | 
			
		||||
            for table, insert_list in insert_dict.items():
 | 
			
		||||
                inserts[table].extend(insert_list)
 | 
			
		||||
 | 
			
		||||
        return dict(inserts)
 | 
			
		||||
 | 
			
		||||
    def _reset_session(self):
 | 
			
		||||
        self._inserts = defaultdict(lambda: defaultdict(list))
 | 
			
		||||
 | 
			
		||||
    def _generate_unique_receipt(self):
 | 
			
		||||
        return str(uuid4())
 | 
			
		||||
 | 
			
		||||
    def add_insert(self, table_name, insert_dict, receipts=None):
 | 
			
		||||
        '''
 | 
			
		||||
        TODO: formalize table_name mapping; at class level provide a `table_map`, or provide
 | 
			
		||||
        the table object itself to this method
 | 
			
		||||
        '''
 | 
			
		||||
        if table_name not in tables.table_map:
 | 
			
		||||
            #logger.debug(f'Inserts provided for non-existent table {table_name}')
 | 
			
		||||
            return None
 | 
			
		||||
 | 
			
		||||
        receipt = self._generate_unique_receipt()
 | 
			
		||||
 | 
			
		||||
        self._inserts[receipt][table_name].append(
 | 
			
		||||
            utils.db.prepare_insert(
 | 
			
		||||
                tables.table_map[table_name], 
 | 
			
		||||
                insert_dict
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        if receipts is not None:
 | 
			
		||||
            receipts.append(receipt)
 | 
			
		||||
 | 
			
		||||
        return receipt
 | 
			
		||||
 | 
			
		||||
    def collect_inserts(self, receipts=None):
 | 
			
		||||
        '''
 | 
			
		||||
        Collect insert-ready dictionaries for the core primitive schema. This method is
 | 
			
		||||
        effectively a light wrapper around the File and Note-based collection logic
 | 
			
		||||
        elsewhere in the class. 
 | 
			
		||||
 | 
			
		||||
        The overall collection scheme embraces a session-like sequential update model to
 | 
			
		||||
        an internal insert tracker. The sequence of insert methods is ordered according to
 | 
			
		||||
        the schema hierarchy, and higher level inserts dictate the scope for lower level
 | 
			
		||||
        inserts (all methods check and populate the same `inserts` dictionary). Calling
 | 
			
		||||
        this method flushes any existing inserts, ensuring a re-scan takes place across
 | 
			
		||||
        calls (or "sessions").
 | 
			
		||||
 | 
			
		||||
        Parameters:
 | 
			
		||||
            skip_updated: whether to ignore primitives with existing up-to-date
 | 
			
		||||
                          database entries
 | 
			
		||||
 | 
			
		||||
        Returns:
 | 
			
		||||
            Table name-indexed dictionary of insert lists (of column name-indexed dicts)
 | 
			
		||||
        '''
 | 
			
		||||
        return self._inserts_from_receipts(receipts, pop=True)
 | 
			
		||||
							
								
								
									
										89
									
								
								co3/composer.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										89
									
								
								co3/composer.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,89 @@
 | 
			
		||||
'''
 | 
			
		||||
Composer
 | 
			
		||||
 | 
			
		||||
Base for manually defining table compositions outside those natural to the schema
 | 
			
		||||
hierarchy (i.e., constructable by a `CO4.compose()` call).
 | 
			
		||||
 | 
			
		||||
Example: suppose we have a simple object hierarchy A(CO4) -> B -> C. C's in-built
 | 
			
		||||
`compose()` method may not always be desirable when constructing composite tables and
 | 
			
		||||
running related queries. In this case, a custom Composer can be used to make needed
 | 
			
		||||
composite tables easier to reference; in the case below, we define the "BC" composite
 | 
			
		||||
table.
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
class ExampleComposer(Composer):
 | 
			
		||||
 | 
			
		||||
    @register_table
 | 
			
		||||
    def BC(self):
 | 
			
		||||
        full_B = B.compose(full=True)
 | 
			
		||||
        full_C = C.compose(full=True)
 | 
			
		||||
        
 | 
			
		||||
        return full_B.join(
 | 
			
		||||
            full_C,
 | 
			
		||||
            full_B.c.name == full_C.c.name, # TODO: is this fine? or do we need base table refs
 | 
			
		||||
            outer=True
 | 
			
		||||
        )
 | 
			
		||||
'''
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
 | 
			
		||||
from co3.mapper import Mapper
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def register_table(table_name=None):
 | 
			
		||||
    '''
 | 
			
		||||
    Registry decorator for defined composer classes. Decorating a class method simply
 | 
			
		||||
    attaches a `table_name` attribute to it, setting it to either a provided value or the
 | 
			
		||||
    name of the method itself. Methods with a `table_name` attribute are later swept up at
 | 
			
		||||
    the class level and placed in the `table_map`.
 | 
			
		||||
    '''
 | 
			
		||||
    def decorator(func):
 | 
			
		||||
        if table_name is None:
 | 
			
		||||
            table_name = func.__name__
 | 
			
		||||
        func.table_name = table_name
 | 
			
		||||
        return func
 | 
			
		||||
    return decorator
 | 
			
		||||
 | 
			
		||||
class Composer[M: Mapper]:
 | 
			
		||||
    '''
 | 
			
		||||
    Base composer wrapper for table groupings.
 | 
			
		||||
 | 
			
		||||
    The schema is centered around a connected group of tables (via foreign keys). Thus,
 | 
			
		||||
    most operations need to be coordinated across tables. The `accessors` submodules
 | 
			
		||||
    are mostly intended to provide a "secondary layer" over the base set of tables in the
 | 
			
		||||
    schema, exposing common higher level table compositions (i.e., chained JOINs). See
 | 
			
		||||
    concrete instances (e.g., CoreAccess, FTSAccessor) for actual implementations these
 | 
			
		||||
    tables; the base class does not expose
 | 
			
		||||
 | 
			
		||||
    Tables in subclasses are registered with the `register_table` decorator, automatically
 | 
			
		||||
    indexing them under the provided name and making them available via the `table_map`.
 | 
			
		||||
    '''
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
        self._set_tables()
 | 
			
		||||
 | 
			
		||||
    def _set_tables(self):
 | 
			
		||||
        '''
 | 
			
		||||
        Skip properties (so appropriate delays can be used), and 
 | 
			
		||||
 | 
			
		||||
        Set the table registry at the class level. This only takes place during the first
 | 
			
		||||
        instantiation of the class, and makes it possible to definitively tie methods to
 | 
			
		||||
        composed tables during lookup with `get_table()`.
 | 
			
		||||
        '''
 | 
			
		||||
        cls = self.__class__
 | 
			
		||||
 | 
			
		||||
        # in case the class has already be instantiated
 | 
			
		||||
        if hasattr(cls, 'table_map'): return
 | 
			
		||||
 | 
			
		||||
        table_map = {}
 | 
			
		||||
        for key, value in cls.__dict__.items():
 | 
			
		||||
            if isinstance(value, property):
 | 
			
		||||
                continue  # Skip properties
 | 
			
		||||
            if callable(value) and hasattr(value, 'table_name'):
 | 
			
		||||
                table_map[value.table_name] = value(self)
 | 
			
		||||
 | 
			
		||||
        cls.table_map = table_map
 | 
			
		||||
 | 
			
		||||
    def get_table(self, table_name):
 | 
			
		||||
        '''
 | 
			
		||||
        Retrieve the named table composition, if defined.
 | 
			
		||||
        '''
 | 
			
		||||
        return self.table_map.get(table_name)
 | 
			
		||||
							
								
								
									
										87
									
								
								co3/database.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										87
									
								
								co3/database.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,87 @@
 | 
			
		||||
'''
 | 
			
		||||
Database
 | 
			
		||||
 | 
			
		||||
Central object for defining storage protocol-specific interfaces. The database wraps up
 | 
			
		||||
central items for interacting with database resources, namely the Accessor and Manager
 | 
			
		||||
objects.
 | 
			
		||||
 | 
			
		||||
The Database type hierarchy attempts to be exceedingly general; SQL-derivatives should
 | 
			
		||||
subclass from the RelationalDatabase subtype, for example, which itself becomes a new
 | 
			
		||||
generic via type dependence on Relation.
 | 
			
		||||
'''
 | 
			
		||||
 | 
			
		||||
import logging
 | 
			
		||||
from typing import Self
 | 
			
		||||
 | 
			
		||||
from co3.accessor import Accessor
 | 
			
		||||
from co3.composer import Composer
 | 
			
		||||
from co3.manager  import Manager
 | 
			
		||||
from co3.indexer  import Indexer
 | 
			
		||||
 | 
			
		||||
logger = logging.getLogger(__name__)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Database:
 | 
			
		||||
    accessor: type[Accessor[Self]] = Accessor
 | 
			
		||||
    manager:  type[Manager[Self]]  = Manager
 | 
			
		||||
 | 
			
		||||
    def __init__(self, resource):
 | 
			
		||||
        '''
 | 
			
		||||
        Variables:
 | 
			
		||||
            _local_cache: a database-local property store for ad-hoc CacheBlock-esque
 | 
			
		||||
                          methods, that are nevertheless _not_ query/group-by responses to
 | 
			
		||||
                          pass on to the Indexer. Dependent properties should write to the
 | 
			
		||||
                          this cache and check for existence of stored results; the cache
 | 
			
		||||
                          state must be managed globally.
 | 
			
		||||
        ''' 
 | 
			
		||||
        self.resource = resource
 | 
			
		||||
 | 
			
		||||
        self._access = self.accessor(self)
 | 
			
		||||
        self._manage = self.manager(self)
 | 
			
		||||
 | 
			
		||||
        self._index  = Indexer(self._access)
 | 
			
		||||
        self._local_cache = {}
 | 
			
		||||
 | 
			
		||||
        self.reset_cache = False
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def engine(self):
 | 
			
		||||
        '''
 | 
			
		||||
        Database property to provide a singleton engine for DB interaction, initializing
 | 
			
		||||
        the database if it doesn't already exist.
 | 
			
		||||
 | 
			
		||||
        TODO: figure out thread safety across engines and/or connection. Any issue with
 | 
			
		||||
        hanging on to the same engine instance for the Database instance?
 | 
			
		||||
        '''
 | 
			
		||||
        raise NotImplementedError
 | 
			
		||||
 | 
			
		||||
    def connect(self):
 | 
			
		||||
        self.engine.connect()
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def access(self):
 | 
			
		||||
        return self._access
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def compose(self):
 | 
			
		||||
        return self._compose
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def index(self):
 | 
			
		||||
        if self.reset_cache:
 | 
			
		||||
            self._index.cache_clear()
 | 
			
		||||
            self.reset_cache = False
 | 
			
		||||
        return self._index
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def manage(self):
 | 
			
		||||
        '''
 | 
			
		||||
        Accessing `.manage` queues a cache clear on the external index, as well wipes the
 | 
			
		||||
        local index.
 | 
			
		||||
        '''
 | 
			
		||||
        self.reset_cache = True
 | 
			
		||||
        self._local_cache = {}
 | 
			
		||||
        return self._manage
 | 
			
		||||
 | 
			
		||||
    def populate_indexes(self): pass
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										3
									
								
								co3/databases/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								co3/databases/__init__.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,3 @@
 | 
			
		||||
from co3.databases.sql import *
 | 
			
		||||
from co3.databases.fts import FTSDatabase
 | 
			
		||||
from co3.databases.vss import VSSDatabase
 | 
			
		||||
							
								
								
									
										
											BIN
										
									
								
								co3/databases/__pycache__/__init__.cpython-311.pyc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								co3/databases/__pycache__/__init__.cpython-311.pyc
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								co3/databases/__pycache__/__init__.cpython-312.pyc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								co3/databases/__pycache__/__init__.cpython-312.pyc
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								co3/databases/__pycache__/_base.cpython-311.pyc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								co3/databases/__pycache__/_base.cpython-311.pyc
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								co3/databases/__pycache__/core.cpython-311.pyc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								co3/databases/__pycache__/core.cpython-311.pyc
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								co3/databases/__pycache__/fts.cpython-311.pyc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								co3/databases/__pycache__/fts.cpython-311.pyc
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								co3/databases/__pycache__/fts.cpython-312.pyc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								co3/databases/__pycache__/fts.cpython-312.pyc
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								co3/databases/__pycache__/sql.cpython-312.pyc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								co3/databases/__pycache__/sql.cpython-312.pyc
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								co3/databases/__pycache__/vss.cpython-312.pyc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								co3/databases/__pycache__/vss.cpython-312.pyc
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										8
									
								
								co3/databases/fts.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										8
									
								
								co3/databases/fts.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,8 @@
 | 
			
		||||
from co3.database import Database
 | 
			
		||||
 | 
			
		||||
from co3.accessors.fts import FTSAccessor
 | 
			
		||||
from co3.managers.fts  import FTSManager
 | 
			
		||||
 | 
			
		||||
class FTSDatabase(Database):
 | 
			
		||||
    accessor = FTSAccessor
 | 
			
		||||
    manager  = FTSManager
 | 
			
		||||
							
								
								
									
										34
									
								
								co3/databases/sql.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										34
									
								
								co3/databases/sql.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,34 @@
 | 
			
		||||
from typing import Self
 | 
			
		||||
 | 
			
		||||
from co3.database import Database
 | 
			
		||||
 | 
			
		||||
from co3.accessors.sql import RelationalAccessor, TabularAccessor, SQLAccessor
 | 
			
		||||
from co3.managers.sql  import RelationalManager,  TabularManager,  SQLManager
 | 
			
		||||
 | 
			
		||||
from co3.relation import Relation
 | 
			
		||||
from co3.relations import TabularRelation, SQLTable
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class RelationalDatabase[R: Relation](Database):
 | 
			
		||||
    accessor: type[RelationalAccessor[Self, R]] = RelationalAccessor[Self, R]
 | 
			
		||||
    manager:  type[RelationalManager[Self, R]]  = RelationalManager[Self, R]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TabularDatabase[R: TabularRelation](RelationalDatabase[R]):
 | 
			
		||||
    '''
 | 
			
		||||
    accessor/manager assignments satisfy supertype's type settings;
 | 
			
		||||
    `TabluarAccessor[Self, R]` is of type `type[RelationalAccessor[Self, R]]`
 | 
			
		||||
    (and yes, `type[]` specifies that the variable is itself being set to a type or a
 | 
			
		||||
    class, rather than a satisfying _instance_)
 | 
			
		||||
    '''
 | 
			
		||||
    accessor: type[TabularAccessor[Self, R]] = TabularAccessor[Self, R]
 | 
			
		||||
    manager:  type[TabularManager[Self, R]]  = TabularManager[Self, R]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class SQLDatabase[R: SQLTable](TabularDatabase[R]):
 | 
			
		||||
    accessor = SQLAccessor
 | 
			
		||||
    manager  = SQLManager
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class SQLiteDatabase(SQLDatabase[SQLTable]):
 | 
			
		||||
    pass
 | 
			
		||||
							
								
								
									
										9
									
								
								co3/databases/vss.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										9
									
								
								co3/databases/vss.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,9 @@
 | 
			
		||||
from co3.database import Database
 | 
			
		||||
 | 
			
		||||
from co3.accessors.vss import VSSAccessor
 | 
			
		||||
from co3.managers.vss  import VSSManager
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class VSSDatabase(Database):
 | 
			
		||||
    accessor = VSSAccessor
 | 
			
		||||
    manager  = VSSManager
 | 
			
		||||
							
								
								
									
										433
									
								
								co3/indexer.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										433
									
								
								co3/indexer.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,433 @@
 | 
			
		||||
import time
 | 
			
		||||
import logging
 | 
			
		||||
import threading
 | 
			
		||||
from collections import defaultdict
 | 
			
		||||
from collections.abc import Iterable
 | 
			
		||||
 | 
			
		||||
import sqlalchemy as sa
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
logger = logging.getLogger(__name__)
 | 
			
		||||
 | 
			
		||||
class Indexer:
 | 
			
		||||
    '''
 | 
			
		||||
    Indexer class
 | 
			
		||||
 | 
			
		||||
    Provides restricted access to an underlying Accessor to enable more efficient, superficial
 | 
			
		||||
    caching.
 | 
			
		||||
 | 
			
		||||
    Cache clearing is to be handled by a wrapper class, like the Database.
 | 
			
		||||
 | 
			
		||||
    Caching occurs at the class level, with indexes prefixed by table's origin Composer.
 | 
			
		||||
    This means that cached selects/group-bys will be available regardless of the provided
 | 
			
		||||
    Accessors so long as the same Composer is used under the hood. 
 | 
			
		||||
    '''
 | 
			
		||||
    _cls_select_cache  = {}
 | 
			
		||||
    _cls_groupby_cache = defaultdict(dict)
 | 
			
		||||
    
 | 
			
		||||
    def __init__(self, accessor, cache_select=True, cache_groupby=True):
 | 
			
		||||
        self.accessor = accessor
 | 
			
		||||
 | 
			
		||||
        # set instance caches; if remains None, methods can't index
 | 
			
		||||
        self._select_cache = None
 | 
			
		||||
        self._groupby_cache = None
 | 
			
		||||
 | 
			
		||||
        if cache_groupby and not cache_select:
 | 
			
		||||
            raise ValueError('cannot cache groupbys without select caching enabled')
 | 
			
		||||
 | 
			
		||||
        if cache_select:
 | 
			
		||||
            self._select_cache = self._cls_select_cache
 | 
			
		||||
 | 
			
		||||
        if cache_groupby:
 | 
			
		||||
            self._groupby_cache = self._cls_groupby_cache
 | 
			
		||||
 | 
			
		||||
        self._access_lock = threading.Lock()
 | 
			
		||||
 | 
			
		||||
    def cache_clear(self, group_by_only=False): 
 | 
			
		||||
        self._groupby_cache.clear()
 | 
			
		||||
        if not group_by_only:
 | 
			
		||||
            self._select_cache.clear()
 | 
			
		||||
 | 
			
		||||
    def cache_block(
 | 
			
		||||
        self,
 | 
			
		||||
        table,
 | 
			
		||||
        **kwargs,
 | 
			
		||||
    ):
 | 
			
		||||
        '''
 | 
			
		||||
        Provide a user-friendly, dynamically re-indexable
 | 
			
		||||
        '''
 | 
			
		||||
        return CacheBlock(
 | 
			
		||||
            indexer = self,
 | 
			
		||||
            table   = table,
 | 
			
		||||
            **kwargs,
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    def cached_query(
 | 
			
		||||
        self,
 | 
			
		||||
        table, 
 | 
			
		||||
 | 
			
		||||
        cols        = None,
 | 
			
		||||
        where       = None,
 | 
			
		||||
        distinct_on = None,
 | 
			
		||||
        order_by    = None,
 | 
			
		||||
        limit       = 0,
 | 
			
		||||
 | 
			
		||||
        group_by    = None,
 | 
			
		||||
        agg_on      = None,
 | 
			
		||||
        index_on    = None,
 | 
			
		||||
    ):
 | 
			
		||||
        '''
 | 
			
		||||
        Like `group_by`, but makes a full query to the Accessors table `table_name` and
 | 
			
		||||
        caches the results. The processing performed by the GROUP BY is also cached.
 | 
			
		||||
 | 
			
		||||
        Update: `cached_select` and `cached_group_by` now unified by a single
 | 
			
		||||
        `cached_query` method. This allows better defined GROUP BY caches, that are
 | 
			
		||||
        reactive to the full set of parameters returning the result set (and not just the
 | 
			
		||||
        table, requiring a full query).
 | 
			
		||||
 | 
			
		||||
        Note: on cache keys
 | 
			
		||||
            Cache keys are now fully stringified, as many objects are now allowed to be
 | 
			
		||||
            native SQLAlchemy objects. Indexing these objects works, but doing so will
 | 
			
		||||
            condition the cache on their memory addresses, which isn't what we want.
 | 
			
		||||
            SQLAlchemy converts most join/column/table-like objects to reasonable strings,
 | 
			
		||||
            which will look the same regardless of instance.
 | 
			
		||||
 | 
			
		||||
            Context: this became a clear issue when passing in more
 | 
			
		||||
            `order_by=<col>.desc()`. The `desc()` causes the index to store the column in
 | 
			
		||||
            an instance-specific way, rather than an easily re-usable, canonical column
 | 
			
		||||
            reference. Each time the CoreDatabase.files() was being called, for instance,
 | 
			
		||||
            that @property would be re-evaluated, causing `desc()` to be re-initialized,
 | 
			
		||||
            and thus look different to the cache. Stringifying everything prevents this
 | 
			
		||||
            (although this could well be an indication that only a single `cache_block`
 | 
			
		||||
            should ever be returned be database properties).
 | 
			
		||||
 | 
			
		||||
        Note: on access locks
 | 
			
		||||
            A double-checked locking scheme is employed before both of the stages (select
 | 
			
		||||
            and manual group by), using the same lock. This resolves the common scenario
 | 
			
		||||
            where many threads need to look up a query in the cache, experience a cache
 | 
			
		||||
            miss, and try to do the work. This non-linearly explodes the total time to
 | 
			
		||||
            wait in my experience, so doing this only when needed saves tons of time,
 | 
			
		||||
            especially in high-congestion moments.
 | 
			
		||||
        '''
 | 
			
		||||
        start     = time.time()
 | 
			
		||||
        cache_key = tuple(map(str, (table, cols, where, distinct_on, order_by, limit)))
 | 
			
		||||
 | 
			
		||||
        # apparently this is the double-check locking scheme (didn't realize when implementing)
 | 
			
		||||
        if self._select_cache is None or cache_key not in self._select_cache:
 | 
			
		||||
            # cache re-compute possible, acquire lock to continue. A later thread may
 | 
			
		||||
            # acquire this after work has been done by an earlier thread, so re-eval the
 | 
			
		||||
            # condition below before actually performing a DB read. If access results in a
 | 
			
		||||
            # cache hit, locking isn't needed.
 | 
			
		||||
            with self._access_lock:
 | 
			
		||||
                if self._select_cache is None or cache_key not in self._select_cache:
 | 
			
		||||
                    results = self.accessor.select(
 | 
			
		||||
                        table,
 | 
			
		||||
                        cols=cols,
 | 
			
		||||
                        where=where,
 | 
			
		||||
                        distinct_on=distinct_on,
 | 
			
		||||
                        order_by=order_by,
 | 
			
		||||
                        limit=limit,
 | 
			
		||||
                        mappings=True
 | 
			
		||||
                    )
 | 
			
		||||
 | 
			
		||||
                    # cache results if select_cache is defined
 | 
			
		||||
                    if self._select_cache is not None: 
 | 
			
		||||
                        self._select_cache[cache_key] = results
 | 
			
		||||
 | 
			
		||||
                    logger.debug(
 | 
			
		||||
                        f'Indexer "select" cache miss for table "{table}": access in {time.time()-start:.4f}s'
 | 
			
		||||
                    )
 | 
			
		||||
                else:
 | 
			
		||||
                    results = self._select_cache[cache_key]
 | 
			
		||||
                    logger.debug(
 | 
			
		||||
                        f'Indexer "select" cache hit for table "{table}": access in {time.time()-start:.4f}s'
 | 
			
		||||
                    )
 | 
			
		||||
        else:
 | 
			
		||||
            results = self._select_cache[cache_key]
 | 
			
		||||
            logger.debug(
 | 
			
		||||
                f'Indexer "select" cache hit for table "{table}": access in {time.time()-start:.4f}s'
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
        start     = time.time()
 | 
			
		||||
        cache_key = (*cache_key, group_by, agg_on, index_on)
 | 
			
		||||
 | 
			
		||||
        if group_by is not None:
 | 
			
		||||
            if self._groupby_cache is None or cache_key not in self._groupby_cache:
 | 
			
		||||
                with self._access_lock:
 | 
			
		||||
                    if self._groupby_cache is None or cache_key not in self._groupby_cache:
 | 
			
		||||
                        results = self.group_by(
 | 
			
		||||
                            results,
 | 
			
		||||
                            group_by     = group_by,
 | 
			
		||||
                            agg_on       = agg_on,
 | 
			
		||||
                            index_on     = index_on,
 | 
			
		||||
                            return_index = True,
 | 
			
		||||
                        )
 | 
			
		||||
 | 
			
		||||
                        if self._groupby_cache is not None:
 | 
			
		||||
                            self._groupby_cache[cache_key] = results
 | 
			
		||||
 | 
			
		||||
                        logger.debug(
 | 
			
		||||
                            f'Indexer "group_by" cache miss for table "{table}": access in {time.time()-start:.4f}s'
 | 
			
		||||
                        )
 | 
			
		||||
                    else:
 | 
			
		||||
                        results = self._groupby_cache[cache_key]
 | 
			
		||||
                        logger.debug(
 | 
			
		||||
                            f'Indexer "group_by" cache hit for table "{table}": access in {time.time()-start:.4f}s'
 | 
			
		||||
                        )
 | 
			
		||||
            else:
 | 
			
		||||
                results = self._groupby_cache[cache_key]
 | 
			
		||||
                logger.debug(
 | 
			
		||||
                    f'Indexer "group_by" cache hit for table "{table}": access in {time.time()-start:.4f}s'
 | 
			
		||||
                )
 | 
			
		||||
 | 
			
		||||
        return results
 | 
			
		||||
        
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def group_by(
 | 
			
		||||
        cls,
 | 
			
		||||
        rows,
 | 
			
		||||
        group_by,
 | 
			
		||||
        agg_on=None,
 | 
			
		||||
        index_on=None,
 | 
			
		||||
        return_index=False,
 | 
			
		||||
    ):
 | 
			
		||||
        '''
 | 
			
		||||
        Post-query "group by"-like aggregation. Creates an index over a set of columns
 | 
			
		||||
        (`group_by_cols`), and aggregates values from `agg_cols` under the groups.
 | 
			
		||||
 | 
			
		||||
        Rows can be dicts or mappings, and columns can be strings or SQLAlchemy columns.
 | 
			
		||||
        To ensure the right columns are being used for the operation, it's best to pass in
 | 
			
		||||
        mappings and use SQA columns if you aren't sure exactly how the keys look in your
 | 
			
		||||
        results (dicts can have ambiguous keys across tables with the same columns and/or
 | 
			
		||||
        different labeling schemes altogether).
 | 
			
		||||
 | 
			
		||||
        TODO: add a flag that handles None's as distinct. That is, for the group_by
 | 
			
		||||
        column(s) of interest, if rows in the provided query set have NULL values for
 | 
			
		||||
        these columns, treat all such rows as their "own group" and return them alongside
 | 
			
		||||
        the grouped/aggregated ones. This is behavior desired by something like
 | 
			
		||||
        FTSManager.recreate(), which wants to bundle up conversions for blocks
 | 
			
		||||
        (effectively grouping by blocks.name and link.id, aggregating on
 | 
			
		||||
        block_conversions.format, then flattening). You could either do this, or as the
 | 
			
		||||
        caller just make sure to first filter the result set before grouping (e.g.,
 | 
			
		||||
        splitting the NULL-valued rows from those that are well-defined), and then
 | 
			
		||||
        stitching the two sets back together afterward.
 | 
			
		||||
 | 
			
		||||
        Multi-dim update:
 | 
			
		||||
        
 | 
			
		||||
        - group_by: can be a tuple of tuples of columns. Each inner tuple is a nested
 | 
			
		||||
          "group by index" in the group by index
 | 
			
		||||
        - 
 | 
			
		||||
        '''
 | 
			
		||||
        if not rows:
 | 
			
		||||
            return {} if return_index else []
 | 
			
		||||
 | 
			
		||||
        rows_are_mappings = not isinstance(rows[0], dict)
 | 
			
		||||
 | 
			
		||||
        if not rows_are_mappings:
 | 
			
		||||
            if isinstance(group_by, sa.Column):
 | 
			
		||||
                group_by = group_by.name
 | 
			
		||||
            else:
 | 
			
		||||
                group_by = str(group_by)
 | 
			
		||||
 | 
			
		||||
        #if group_by is None:                     group_by = []
 | 
			
		||||
        #elif not isinstance(group_by, Iterable): group_by = [group_by]
 | 
			
		||||
 | 
			
		||||
        if agg_on is None:                     agg_on = []
 | 
			
		||||
        elif not isinstance(agg_on, Iterable): agg_on = [agg_on]
 | 
			
		||||
 | 
			
		||||
        if index_on is None:                     index_on = []
 | 
			
		||||
        elif not isinstance(index_on, Iterable): index_on = [index_on]
 | 
			
		||||
 | 
			
		||||
        agg_on_names = []
 | 
			
		||||
        for agg in agg_on:
 | 
			
		||||
            # if a SQA column, can either use `.name` or `str(c)`. The latter includes the
 | 
			
		||||
            # table name, the former doesn't; ambiguity can be introduced here.
 | 
			
		||||
            if isinstance(agg, sa.Column):
 | 
			
		||||
                agg_on_names.append(agg.name)
 | 
			
		||||
            else:
 | 
			
		||||
                agg_on_names.append(str(agg))
 | 
			
		||||
 | 
			
		||||
        index_on_names = []
 | 
			
		||||
        for index in index_on:
 | 
			
		||||
            # if a SQA column, can either use `.name` or `str(c)`. The latter includes the
 | 
			
		||||
            # table name, the former doesn't; ambiguity can be introduced here.
 | 
			
		||||
            if isinstance(index, sa.Column):
 | 
			
		||||
                index_on_names.append(index.name)
 | 
			
		||||
            else:
 | 
			
		||||
                index_on_names.append(str(index))
 | 
			
		||||
 | 
			
		||||
        # when rows are dicts, use columns' string names
 | 
			
		||||
        if not rows_are_mappings:
 | 
			
		||||
            agg_on = agg_on_names
 | 
			
		||||
            index_on = index_on_names
 | 
			
		||||
 | 
			
		||||
        #print(f'rows_are_mappings: {rows_are_mappings}')
 | 
			
		||||
        #print(f'group_by: {group_by}')
 | 
			
		||||
        #print(f'agg_on: {agg_on}')
 | 
			
		||||
        #print(f'agg_on_names: {agg_on_names}')
 | 
			
		||||
        #print(f'index_on: {index_on}')
 | 
			
		||||
        #print(f'index_on_names: {index_on_names}')
 | 
			
		||||
 | 
			
		||||
        # "group by" block ID and wrangle the links into a list
 | 
			
		||||
        group_by_idx = {}
 | 
			
		||||
        for row in rows:
 | 
			
		||||
            # generic get
 | 
			
		||||
            group_by_attr = row.get(group_by)
 | 
			
		||||
 | 
			
		||||
            # wrap possible mapping dict
 | 
			
		||||
            row_dict = dict(row)
 | 
			
		||||
 | 
			
		||||
            # add new entries; standardize 
 | 
			
		||||
            #aggregates = {}
 | 
			
		||||
            #for agg_name in agg_on_names:
 | 
			
		||||
            #    aggregates[agg_name] = []
 | 
			
		||||
            #row_dict['aggregates'] = aggregates
 | 
			
		||||
            row_dict['aggregates'] = []
 | 
			
		||||
 | 
			
		||||
            indexes = {}
 | 
			
		||||
            for index_name in index_on_names:
 | 
			
		||||
                indexes[index_name] = {}
 | 
			
		||||
            row_dict['indexes'] = indexes
 | 
			
		||||
 | 
			
		||||
            if group_by_attr is None:
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
            if group_by_attr not in group_by_idx:
 | 
			
		||||
                group_by_idx[group_by_attr] = row_dict
 | 
			
		||||
 | 
			
		||||
            # actually include all agg cols, even if None, so agg array indexes align
 | 
			
		||||
            agg_dict = {
 | 
			
		||||
                agg_on_names[i] : row.get(agg_col)
 | 
			
		||||
                for i, agg_col in enumerate(agg_on)
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            #aggregates = group_by_idx[group_by_attr]['aggregates']
 | 
			
		||||
            #for agg_key, agg_val in agg_dict.items():
 | 
			
		||||
            #    aggregates[agg_key].append(agg_val)
 | 
			
		||||
            aggregates = group_by_idx[group_by_attr]['aggregates']
 | 
			
		||||
            aggregates.append(agg_dict)
 | 
			
		||||
 | 
			
		||||
            indexes = group_by_idx[group_by_attr]['indexes']
 | 
			
		||||
            for i, index_col in enumerate(index_on):
 | 
			
		||||
                index_name = index_on_names[i]
 | 
			
		||||
                indexes[index_name][row[index_col]] = agg_dict
 | 
			
		||||
 | 
			
		||||
        if return_index:
 | 
			
		||||
            return group_by_idx
 | 
			
		||||
 | 
			
		||||
        return list(group_by_idx.values())
 | 
			
		||||
 | 
			
		||||
class CacheBlock:
 | 
			
		||||
    '''
 | 
			
		||||
    CacheBlock class
 | 
			
		||||
 | 
			
		||||
    Wraps up a set of query parameters for a specific entity, and provides cached access
 | 
			
		||||
    to different types of "re-queries" via an associated Indexer.
 | 
			
		||||
 | 
			
		||||
    The goal here is to help build/define entities as the possibly complex transformations
 | 
			
		||||
    on the base schema that they are. For example, the Note primitive (entity)
 | 
			
		||||
    incorporates details across `files`, `notes`, `note_conversions`, and
 | 
			
		||||
    `note_conversion_matter` tables (defined in a single endpoint by a Composer), often
 | 
			
		||||
    needs to be selected in particular ways (via an Accessor), and results stored for fast
 | 
			
		||||
    access later on (handled by an Indexer). This pipeline can be daunting and requires
 | 
			
		||||
    too many moving parts to be handled explicitly everywhere. CacheBlocks wrap up a set
 | 
			
		||||
    of query "preferences," exposing a simpler interface for downstream access to
 | 
			
		||||
    entities. It still allows for low-level control over re-grouping/indexing, raw hits to
 | 
			
		||||
    the actual DB, etc, but keeps things tighter and well-behaved for the Indexer.
 | 
			
		||||
    
 | 
			
		||||
    You can think of these as the Indexer's "fingers"; they're deployable mini-Indexes
 | 
			
		||||
    that "send back" results to the class cache, which is "broadcast" to all other
 | 
			
		||||
    instances for use when necessary.
 | 
			
		||||
 | 
			
		||||
    Note: Example usage
 | 
			
		||||
        
 | 
			
		||||
        ```py
 | 
			
		||||
        cb = CacheBlock()
 | 
			
		||||
 | 
			
		||||
        # Set up cached queries with chained params or via call:
 | 
			
		||||
        
 | 
			
		||||
        cb.where(t.notes.c.name=="name").group_by(t.note_conversions.c.format)
 | 
			
		||||
        cb() # get results
 | 
			
		||||
 | 
			
		||||
        # - OR - # (use strings when known)
 | 
			
		||||
 | 
			
		||||
        cb.where(t.notes.c.name=="name").group_by('format')
 | 
			
		||||
        cb() # get results
 | 
			
		||||
 | 
			
		||||
        # - OR - # (use kwargs in the call; results returned right away)
 | 
			
		||||
 | 
			
		||||
        cb(
 | 
			
		||||
            where=(t.notes.c.name=="name"),
 | 
			
		||||
            group_by='format'
 | 
			
		||||
        )
 | 
			
		||||
        ```
 | 
			
		||||
 | 
			
		||||
    '''
 | 
			
		||||
    def __init__(
 | 
			
		||||
        self,
 | 
			
		||||
        indexer,
 | 
			
		||||
        table,
 | 
			
		||||
 | 
			
		||||
        cols        = None,
 | 
			
		||||
        where       = None,
 | 
			
		||||
        distinct_on = None,
 | 
			
		||||
        order_by    = None,
 | 
			
		||||
        limit       = 0,
 | 
			
		||||
 | 
			
		||||
        group_by    = None,
 | 
			
		||||
        agg_on      = None,
 | 
			
		||||
        index_on    = None,
 | 
			
		||||
    ):
 | 
			
		||||
        self.indexer = indexer
 | 
			
		||||
 | 
			
		||||
        self.query_args = {
 | 
			
		||||
            'table'       : table,
 | 
			
		||||
    
 | 
			
		||||
            'cols'        : cols,
 | 
			
		||||
            'where'       : where,
 | 
			
		||||
            'distinct_on' : distinct_on,
 | 
			
		||||
            'order_by'    : order_by,
 | 
			
		||||
            'limit'       : limit,
 | 
			
		||||
    
 | 
			
		||||
            'group_by'    : group_by,
 | 
			
		||||
            'agg_on'      : agg_on,
 | 
			
		||||
            'index_on'    : index_on,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
    def _query(self, **kwargs):
 | 
			
		||||
        '''Make cached query with defaults, override with those provided'''
 | 
			
		||||
        return self.indexer.cached_query(**{
 | 
			
		||||
            k : (v if k not in kwargs else kwargs[k])
 | 
			
		||||
            for k,v in self.query_args.items()
 | 
			
		||||
        })
 | 
			
		||||
 | 
			
		||||
    def __call__(self, **kwargs):
 | 
			
		||||
        '''
 | 
			
		||||
        TODO: overload this for the queries, i.e. getting keys or returning aggregates
 | 
			
		||||
        '''
 | 
			
		||||
        return self._query(**kwargs)
 | 
			
		||||
 | 
			
		||||
    def where(self, where):
 | 
			
		||||
        self.query_args['where'] = where
 | 
			
		||||
        return self
 | 
			
		||||
        #return self._query(where=where)
 | 
			
		||||
 | 
			
		||||
    def distinct_on(self, distinct_on):
 | 
			
		||||
        self.query_args['distinct_on'] = distinct_on
 | 
			
		||||
        return self
 | 
			
		||||
        #return self._query(distinct_on=distinct_on)
 | 
			
		||||
 | 
			
		||||
    def order_by(self, order_by):
 | 
			
		||||
        self.query_args['order_by'] = order_by
 | 
			
		||||
        return self
 | 
			
		||||
        #return self._query(order_by=order_by)
 | 
			
		||||
 | 
			
		||||
    def limit(self, limit):
 | 
			
		||||
        self.query_args['limit'] = limit
 | 
			
		||||
        return self
 | 
			
		||||
 | 
			
		||||
    def group_by(self, group_by):
 | 
			
		||||
        self.query_args['group_by'] = group_by
 | 
			
		||||
        return self
 | 
			
		||||
        #return self._query(group_by=group_by)
 | 
			
		||||
    
 | 
			
		||||
Some files were not shown because too many files have changed in this diff Show More
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user