co3/co3/collector.py

111 lines
4.0 KiB
Python

'''
Defines the Collector base class.
This module is the critical "middleware" connecting the primitive object definitions and
their representations in the database. It operates with full knowledge of how both are
defined, and abstracts away both the prep work for DB insertions as well as updates
trickling down the primitive hierarchy.
The `src` format target is re-used for both canonical tables/primitives, as well as
<prim>_conversion_matter tables in tables/conversions under the `src` format. The latter
is meant to extend those attributes that are format-specific (i.e., would change when, say,
converting to `html5`), and thus need to be broken across the format dimension.
Note:
Despite the structure of the database module, this class does not currently inherit
from a super class in localsys.db (like the accessors and managers, for instance).
This will likely ultimately be the model that's embraced, but until FTS (or other
groups) need a collector, this will be remain an independent class. It is, however,
named like a concrete subclass, taking on the "Core" prefix.
'''
from pathlib import Path
from collections import defaultdict
import logging
import importlib
import subprocess
from uuid import uuid4
import sqlalchemy as sa
from co3 import util
from co3.component import Component
#from localsys.db.schema import tables
logger = logging.getLogger(__name__)
class Collector[C: Component, M: 'Mapper[C]']:
def __init__(self):
self._inserts = defaultdict(lambda: defaultdict(list))
@property
def inserts(self):
return self._inserts_from_receipts()
def _inserts_from_receipts(self, receipts=None, pop=False):
inserts = defaultdict(list)
if receipts is None:
receipts = list(self._inserts.keys())
for receipt in receipts:
if pop: insert_dict = self._inserts.pop(receipt, {})
else: insert_dict = self._inserts[receipt]
for table, insert_list in insert_dict.items():
inserts[table].extend(insert_list)
return dict(inserts)
def _reset_session(self):
self._inserts = defaultdict(lambda: defaultdict(list))
def _generate_unique_receipt(self):
return str(uuid4())
def add_insert(self, table_name, insert_dict, receipts=None):
'''
TODO: formalize table_name mapping; at class level provide a `table_map`, or provide
the table object itself to this method
'''
if table_name not in tables.table_map:
#logger.debug(f'Inserts provided for non-existent table {table_name}')
return None
receipt = self._generate_unique_receipt()
self._inserts[receipt][table_name].append(
utils.db.prepare_insert(
tables.table_map[table_name],
insert_dict
)
)
if receipts is not None:
receipts.append(receipt)
return receipt
def collect_inserts(self, receipts=None):
'''
Collect insert-ready dictionaries for the core primitive schema. This method is
effectively a light wrapper around the File and Note-based collection logic
elsewhere in the class.
The overall collection scheme embraces a session-like sequential update model to
an internal insert tracker. The sequence of insert methods is ordered according to
the schema hierarchy, and higher level inserts dictate the scope for lower level
inserts (all methods check and populate the same `inserts` dictionary). Calling
this method flushes any existing inserts, ensuring a re-scan takes place across
calls (or "sessions").
Parameters:
skip_updated: whether to ignore primitives with existing up-to-date
database entries
Returns:
Table name-indexed dictionary of insert lists (of column name-indexed dicts)
'''
return self._inserts_from_receipts(receipts, pop=True)