This commit is contained in:
2025-08-05 15:15:36 +02:00
parent 4bd960ed05
commit 7fabb4163a
192 changed files with 14901 additions and 0 deletions

View File

552
_archive/osis/base.py Normal file
View File

@@ -0,0 +1,552 @@
import datetime
import os
import yaml
import uuid
import json
import hashlib
from typing import TypeVar, Generic, List, Optional
from pydantic import BaseModel, StrictStr, Field
from sqlalchemy.ext.declarative import declarative_base
from osis.datatools import normalize_email, normalize_phone
from sqlalchemy import (
create_engine,
Column,
Integer,
String,
DateTime,
TIMESTAMP,
func,
Boolean,
Date,
inspect,
text,
bindparam,
)
from sqlalchemy.orm import sessionmaker
from sqlalchemy.dialects.postgresql import JSONB, JSON
import logging
from termcolor import colored
from osis.db import DB, DBType # type: ignore
# Set up logging
logging.basicConfig(level=logging.INFO, format="%(message)s")
logger = logging.getLogger(__name__)
def calculate_months(
investment_date: datetime.date, conversion_date: datetime.date
) -> float:
delta = conversion_date - investment_date
days_in_month = 30.44
months = delta.days / days_in_month
return months
def indexed_field(cls):
cls.__index_fields__ = dict()
for name, field in cls.__fields__.items():
if field.json_schema_extra is not None:
for cat in ["index", "indexft", "indexphone", "indexemail", "human"]:
if field.json_schema_extra.get(cat, False):
if name not in cls.__index_fields__:
cls.__index_fields__[name] = dict()
# print(f"{cls.__name__} found index name:{name} cat:{cat}")
cls.__index_fields__[name][cat] = field.annotation
if cat in ["indexphone", "indexemail"]:
cls.__index_fields__[name]["indexft"] = field.annotation
return cls
@indexed_field
class MyBaseModel(BaseModel):
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
name: StrictStr = Field(default="", index=True, human=True)
description: StrictStr = Field(default="")
lasthash: StrictStr = Field(default="")
creation_date: int = Field(
default_factory=lambda: int(datetime.datetime.now().timestamp())
)
mod_date: int = Field(
def ault_factory=lambda: int(datetime.datetime.now().timestamp())
)
def pre_save(self):
self.mod_date = int(datetime.datetime.now().timestamp())
print("pre-save")
# for fieldname, typedict in self.__class__.__index_fields__.items():
# v= self.__dict__[fieldname]
# if 'indexphone' in typedict:
# self.__dict__[fieldname]=[normalize_phone(i) for i in v.split(",")].uniq()
# if 'indexemail' in typedict:
# self.__dict__[fieldname]=[normalize_email(i) for i in v.split(",")].uniq()
# return ",".join(emails)
# print(field)
# #if field not in ["id", "name","creation_date", "mod_date"]:
# from IPython import embed; embed()
def yaml_get(self) -> str:
data = self.dict()
return yaml.dump(data, sort_keys=True, default_flow_style=False)
def json_get(self) -> str:
data = self.dict()
# return self.model_dump_json()
return json.dumps(data, sort_keys=True, indent=2)
def hash(self) -> str:
data = self.dict()
data.pop("lasthash")
data.pop("mod_date")
data.pop("creation_date")
data.pop("id")
yaml_string = yaml.dump(data, sort_keys=True, default_flow_style=False)
# Encode the YAML string to bytes using UTF-8 encoding
yaml_bytes = yaml_string.encode("utf-8")
self.lasthash = hashlib.md5(yaml_bytes).hexdigest()
return self.lasthash
def doc_id(self, partition: str) -> str:
return f"{partition}:{self.id}"
def __str__(self):
return self.json_get()
T = TypeVar("T", bound=MyBaseModel)
class MyBaseFactory(Generic[T]):
def __init__(
self,
model_cls: type[T],
db: DB,
use_fs: bool = True,
keep_history: bool = False,
reset: bool = False,
load: bool = False,
human_readable: bool = True,
):
self.mycat = model_cls.__name__.lower()
self.description = ""
self.model_cls = model_cls
self.engine = create_engine(db.cfg.url())
self.Session = sessionmaker(bind=self.engine)
self.use_fs = use_fs
self.human_readable = human_readable
self.keep_history = keep_history
self.db = db
dbcat = db.dbcat_new(cat=self.mycat, reset=reset)
self.db_cat = dbcat
self.ft_table_name = f"{self.mycat}_ft"
self._init_db_schema(reset=reset)
if self.use_fs:
self._check_db_schema()
else:
if not self._check_db_schema_ok():
raise RuntimeError(
"DB schema changed in line to model used, need to find ways how to migrate"
)
if reset:
self.db_cat.reset()
self._reset_db()
if load:
self.load()
def _reset_db(self):
logger.info(colored("Resetting database...", "red"))
with self.engine.connect() as connection:
cascade = ""
if self.db.cfg.db_type == DBType.POSTGRESQL:
cascade = " CASCADE"
connection.execute(text(f'DROP TABLE IF EXISTS "{self.mycat}"{cascade}'))
if self.keep_history:
connection.execute(
text(f'DROP TABLE IF EXISTS "{self.mycat}_history" {cascade}')
)
connection.commit()
self._init_db_schema()
def _init_db_schema(self, reset: bool = False):
# first make sure table is created if needed
inspector = inspect(self.engine)
if inspector.has_table(self.mycat):
if reset:
self._reset_db()
return
print(f"Table {self.mycat} does exist.")
Base = declarative_base()
def create_model(tablename):
class MyModel(Base):
__tablename__ = tablename
id = Column(String, primary_key=True)
name = Column(String, index=True)
creation_date = Column(Integer, index=True)
mod_date = Column(Integer, index=True)
hash = Column(String, index=True)
data = Column(JSON)
version = Column(Integer)
index_fields = self.model_cls.__index_fields__
for field, index_types in index_fields.items():
if "index" in index_types:
field_type = index_types["index"]
if field not in ["id", "name", "creation_date", "mod_date"]:
if field_type == int:
locals()[field] = Column(Integer, index=True)
elif field_type == datetime.date:
locals()[field] = Column(Date, index=True)
elif field_type == bool:
locals()[field] = Column(Boolean, index=True)
else:
locals()[field] = Column(String, index=True)
create_model_ft()
return MyModel
def create_model_ft():
index_fields = self.model_cls.__index_fields__
toindex: List[str] = []
for fieldnam, index_types in index_fields.items():
print(f"field name: {fieldnam}")
print(f"toindex: {toindex}")
if "indexft" in index_types:
toindex.append(fieldnam)
if len(toindex) > 0:
with self.engine.connect() as connection:
result = connection.execute(
text(
"SELECT name FROM sqlite_master WHERE type='table' AND name=:table_name"
),
{"table_name": self.ft_table_name},
)
if result.fetchone() is None:
# means table does not exist
st = text(
"CREATE VIRTUAL TABLE :table_name USING fts5(:fields)"
)
st = st.bindparams(bindparam("fields", expanding=True))
st = st.bindparams(
table_name=self.ft_table_name, fields=toindex
)
# TODO: this is not working
connection.execute(
st,
{
"table_name": self.ft_table_name,
"fields": toindex,
},
)
self.table_model = create_model(self.mycat)
if self.keep_history:
self.history_table_model = create_model(
"HistoryTableModel", f"{self.mycat}_history"
)
Base.metadata.create_all(self.engine)
def _check_db_schema_ok(self) -> bool:
inspector = inspect(self.engine)
table_name = self.table_model.__tablename__
# Get columns from the database
db_columns = {col["name"]: col for col in inspector.get_columns(table_name)}
# Get columns from the model
model_columns = {c.name: c for c in self.table_model.__table__.columns}
# print("model col")
# print(model_columns)
# Check for columns in model but not in db
for col_name, col in model_columns.items():
if col_name not in db_columns:
logger.info(
colored(
f"Column '{col_name}' exists in model but not in database",
"red",
)
)
return False
else:
# Check column type
db_col = db_columns[col_name]
if str(col.type) != str(db_col["type"]):
logger.info(
colored(
f"Column '{col_name}' type mismatch: Model {col.type}, DB {db_col['type']}",
"red",
)
)
return False
# Check for columns in db but not in model
for col_name in db_columns:
if col_name not in model_columns:
logger.info(
colored(
f"Column '{col_name}' exists in database but not in model",
"red",
)
)
return False
return True
def _check_db_schema(self):
# check if schema is ok, if not lets reload
if self._check_db_schema_ok():
return
self.load()
def new(self, name: str = "", **kwargs) -> T:
o = self.model_cls(name=name, **kwargs)
return o
def _encode(self, item: T) -> dict:
return item.model_dump()
def _decode(self, data: str) -> T:
if self.use_fs:
return self.model_cls(**yaml.load(data, Loader=yaml.Loader))
else:
return self.model_cls(**json.loads(data))
def get(self, id: str = "") -> T:
if not isinstance(id, str):
raise ValueError(f"id needs to be str. Now: {id}")
session = self.Session()
result = session.query(self.table_model).filter_by(id=id).first()
session.close()
if result:
if self.use_fs:
data = self.db_cat.get(id=id)
else:
data = result.data
return self._decode(data)
raise ValueError(f"can't find {self.mycat}:{id}")
def exists(self, id: str = "") -> bool:
if not isinstance(id, str):
raise ValueError(f"id needs to be str. Now: {id}")
session = self.Session()
result = session.query(self.table_model).filter_by(id=id).first()
session.close()
return result is not None
def get_by_name(self, name: str) -> Optional[T]:
r = self.list(name=name)
if len(r) > 1:
raise ValueError(f"found more than 1 object with name {name}")
if len(r) < 1:
raise ValueError(f"object not found with name {name}")
return r[0]
def set(self, item: T, ignorefs: bool = False):
item.pre_save()
new_hash = item.hash()
session = self.Session()
db_item = session.query(self.table_model).filter_by(id=item.id).first()
data = item.model_dump()
index_fields = self.model_cls.__index_fields__
to_ft_index = List[str]
ft_field_values = [f"'{db_item.id}'"]
for field_name, index_types in index_fields.items():
if "indexft" in index_types:
to_ft_index.append(field_name)
ft_field_values.append(f"'{db_item[field_name]}'")
if db_item:
if db_item.hash != new_hash:
db_item.name = item.name
db_item.mod_date = item.mod_date
db_item.creation_date = item.creation_date
db_item.hash = new_hash
if not self.use_fs:
db_item.data = data
# Update indexed fields
for field, val in self.model_cls.__indexed_fields__: # type: ignore
if field not in ["id", "name", "creation_date", "mod_date"]:
if "indexft" in val:
session.execute(
f"UPDATE {self.ft_table_name} SET {field} = '{getattr(item, field)}'"
)
setattr(db_item, field, getattr(item, field))
if self.keep_history and not self.use_fs:
version = (
session.query(func.max(self.history_table_model.version))
.filter_by(id=item.id)
.scalar()
or 0
)
history_item = self.history_table_model(
id=f"{item.id}_{version + 1}",
name=item.name,
creation_date=item.creation_date,
mod_date=item.mod_date,
hash=new_hash,
data=data,
version=version + 1,
)
session.add(history_item)
if not ignorefs and self.use_fs:
self.db_cat.set(data=item.yaml_get(), id=item.id)
else:
db_item = self.table_model(
id=item.id,
name=item.name,
creation_date=item.creation_date,
mod_date=item.mod_date,
hash=new_hash,
)
if not self.use_fs:
db_item.data = item.json_get()
session.add(db_item)
session.execute(
f'INSERT INTO {self.ft_table_name} (id, {", ".join(to_ft_index)}) VALUES ({", ".join(ft_field_values)})'
)
if not ignorefs and self.use_fs:
self.db_cat.set(
data=item.yaml_get(), id=item.id, humanid=self._human_name_get(item)
)
# Set indexed fields
for field, _ in self.model_cls.__indexed_fields__: # type: ignore
if field not in ["id", "name", "creation_date", "mod_date"]:
setattr(db_item, field, getattr(item, field))
session.add(db_item)
session.commit()
session.close()
# used for a symlink so its easy for a human to edit
def _human_name_get(self, item: T) -> str:
humanname = ""
if self.human_readable:
for fieldhuman, _ in self.model_cls.__human_fields__: # type: ignore
if fieldhuman not in ["id", "creation_date", "mod_date"]:
humanname += f"{item.__getattribute__(fieldhuman)}_"
humanname = humanname.rstrip("_")
if humanname == "":
raise Exception(f"humanname should not be empty for {item}")
return humanname
def delete(self, id: str):
if not isinstance(id, str):
raise ValueError(f"id needs to be str. Now: {id}")
session = self.Session()
result = session.query(self.table_model).filter_by(id=id).delete()
session.execute(f"DELETE FROM {self.ft_table_name} WHERE id={id};")
session.commit()
session.close()
if result > 1:
raise ValueError(f"multiple values deleted with id {id}")
elif result == 0:
raise ValueError(f"no record found with id {id}")
if self.use_fs:
humanid = ""
if self.exists():
item = self.get(id)
# so we can remove the link
humanid = self._human_name_get(item)
self.db_cat.delete(id=id, humanid=humanid)
def list(
self, id: Optional[str] = None, name: Optional[str] = None, **kwargs
) -> List[T]:
session = self.Session()
query = session.query(self.table_model)
if id:
query = query.filter(self.table_model.id == id)
if name:
query = query.filter(self.table_model.name.ilike(f"%{name}%"))
index_fields = self.model_cls.__index_fields__
for key, value in kwargs.items():
if value is None:
continue
if self.use_fs:
query = query.filter(getattr(self.table_model, key) == value)
else:
if key in index_fields and "indexft" in index_fields[key]:
result = session.execute(
f'SELECT id From {self.ft_table_name} WHERE {key} MATCH "{value}"'
)
ids = []
for _, value in result:
ids.append(value)
query = query.filter(self.table_model.id in ids)
else:
query = query.filter(
self.table_model.data[key].astext.ilike(f"%{value}%")
)
results = query.all()
session.close()
items = []
for result in results:
items.append(self.get(id=result.id))
return items
def load(self, reset: bool = False):
if self.use_fs:
logger.info(colored(f"Reload DB.", "green"))
if reset:
self._reset_db()
# Get all IDs and hashes from the database
session = self.Session()
db_items = {
item.id: item.hash
for item in session.query(
self.table_model.id, self.table_model.hash
).all()
}
session.close()
done = []
for root, _, files in os.walk(self.db.path):
for file in files:
if file.endswith(".yaml"):
file_path = os.path.join(root, file)
with open(file_path, "r") as f:
data = yaml.safe_load(f)
obj = self._decode(data)
myhash = obj.hash()
if reset:
self.set(obj, ignorefs=True)
else:
if obj.id in db_items:
if db_items[obj.id] != myhash:
# Hash mismatch, update the database record
self.set(obj, ignorefs=True)
else:
# New item, add to database
self.set(obj, ignorefs=True)
done.append(obj.id)

27
_archive/osis/conf.py Normal file
View File

@@ -0,0 +1,27 @@
# Configuration file for the Sphinx documentation builder.
#
# For the full list of built-in configuration values, see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
# -- Project information -----------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
project = 'osis'
copyright = '2024, kristof'
author = 'kristof'
# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
extensions = []
templates_path = ['_templates']
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
# -- Options for HTML output -------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
html_theme = 'alabaster'
html_static_path = ['_static']

View File

@@ -0,0 +1,13 @@
import re
def normalize_email(email: str) -> str:
# Normalize email by stripping spaces and converting to lower case
#EmailStr.validate(email) #TODO
return email.strip().lower()
def normalize_phone(phone: str) -> str:
# Normalize phone number by removing dots and spaces, and ensure it matches the pattern +<digits>
phone = phone.replace(".", "").replace(" ", "")
if not re.match(r"^\+\d+$", phone):
raise ValueError(f"Invalid phone number: {phone}")
return phone

361
_archive/osis/db.py Normal file
View File

@@ -0,0 +1,361 @@
import os
import shutil
import logging
from termcolor import colored
from herotools.pathtools import expand_path
import psycopg2
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
from osis.id import int_to_id
from psycopg2.extras import DictCursor
import sqlite3
from enum import Enum
# Set up logging
logging.basicConfig(level=logging.INFO, format="%(message)s")
logger = logging.getLogger(__name__)
class DBCat:
def __init__(self, path: str, cat: str):
path = expand_path(path)
self.path_id = os.path.join(path, "id", cat)
self.path_human = os.path.join(path, "human", cat)
self.path = path
self._init()
def _init(self):
os.makedirs(self.path_id, exist_ok=True)
os.makedirs(self.path_human, exist_ok=True)
def reset(self):
if os.path.exists(self.path_id):
shutil.rmtree(self.path_id, ignore_errors=True)
if os.path.exists(self.path_human):
shutil.rmtree(self.path_human, ignore_errors=True)
self._init()
def _get_path_id(self, id: str) -> str:
id1 = id[:2]
dir_path = os.path.join(self.path_id, id1)
file_path = os.path.join(dir_path, f"{id}.yaml")
os.makedirs(dir_path, exist_ok=True)
return file_path
def set(self, id: str, data: str, humanid: str = ""):
fs_path = self._get_path_id(id=id)
with open(fs_path, "w") as f:
f.write(data)
if humanid != "":
human_file_path = os.path.join(self.path_human, humanid)
# Create a symbolic link
try:
os.symlink(fs_path, human_file_path)
except FileExistsError:
# If the symlink already exists, we can either ignore it or update it
if not os.path.islink(human_file_path):
raise # If it's not a symlink, re-raise the exception
os.remove(human_file_path) # Remove the existing symlink
os.symlink(fs_path, human_file_path) # Create a new symlink
return fs_path
def get(self, id: str) -> str:
fs_path = self._get_path_id(id=id)
with open(fs_path, "r") as f:
return f.read()
def delete(self, id: str, humanid: str = ""):
fs_path = self._get_path_id(id=id)
os.remove(fs_path)
if humanid != "":
human_file_path = os.path.join(self.path_human, humanid)
os.remove(human_file_path)
class DBType(Enum):
SQLITE = "sqlite"
POSTGRESQL = "postgresql"
class DBConfig:
def __init__(
self,
db_type: DBType = DBType.POSTGRESQL,
db_name: str = "main",
db_login: str = "admin",
db_passwd: str = "admin",
db_addr: str = "localhost",
db_port: int = 5432,
db_path: str = "/tmp/db"
):
self.db_type = db_type
self.db_name = db_name
self.db_login = db_login
self.db_passwd = db_passwd
self.db_addr = db_addr
self.db_port = db_port
self.db_path = expand_path(db_path)
def __str__(self):
return (f"DBConfig(db_name='{self.db_name}', db_login='{self.db_login}', "
f"db_addr='{self.db_addr}', db_port={self.db_port}, db_path='{self.db_path}')")
def __repr__(self):
return self.__str__()
def url(self) -> str:
if self.db_type == DBType.POSTGRESQL:
return f"postgresql://{self.db_login}:{self.db_passwd}@{self.db_addr}:{self.db_port}/{self.db_name}"
elif self.db_type == DBType.SQLITE:
return f"sqlite:///{self.db_path}/{self.db_name}.db"
else:
raise ValueError(f"Unsupported database type: {self.db_type}")
class DB:
def __init__(self,cfg:DBConfig , path: str, reset: bool = False):
self.cfg = cfg
self.path = expand_path(path)
self.path_id = os.path.join(self.path, "id")
self.path_human = os.path.join(self.path, "human")
self.dbcats = dict[str, DBCat]()
if reset:
self.reset()
else:
self._init()
def reset(self):
if os.path.exists(self.path_id):
shutil.rmtree(self.path_id, ignore_errors=True)
logger.info(colored(f"Removed db dir: {self.path_id}", "red"))
if os.path.exists(self.path_human):
shutil.rmtree(self.path_human, ignore_errors=True)
logger.info(colored(f"Removed db dir: {self.path_human}", "red"))
if self.cfg.db_type == DBType.POSTGRESQL:
conn=self.db_connection()
cur = conn.cursor()
cur.execute("SELECT 1 FROM pg_database WHERE datname = %s", (self.cfg.db_name,))
exists = cur.fetchone()
cur.close()
conn.close()
if exists:
# Disconnect from the current database
# Reconnect to the postgres database to drop the target database
conn = psycopg2.connect(dbname='postgres', user=self.cfg.db_login, password=self.cfg.db_passwd, host=self.cfg.db_addr)
conn.autocommit = True
cur = conn.cursor()
#need to remove the open connections to be able to remove it
cur.execute(f"""
SELECT pg_terminate_backend(pg_stat_activity.pid)
FROM pg_stat_activity
WHERE pg_stat_activity.datname = %s
AND pid <> pg_backend_pid();
""", (self.cfg.db_name,))
print(f"Terminated all connections to database '{self.cfg.db_name}'")
cur.execute(f"DROP DATABASE {self.cfg.db_name}")
print(f"Database '{self.cfg.db_name}' dropped successfully.")
cur.close()
conn.close()
self._init()
def _init(self):
os.makedirs(self.path_human, exist_ok=True)
os.makedirs(self.path_id, exist_ok=True)
for key, dbcat in self.dbcats:
dbcat._init()
def dbcat_new(self, cat: str, reset: bool = False) -> DBCat:
dbc = DBCat(cat=cat, path=self.path)
self.dbcats[cat] = dbc
return dbc
def dbcat_get(self, cat: str) -> DBCat:
if cat in self.dbcats:
return self.dbcats[cat]
raise Exception(f"can't find dbcat with cat:{cat}")
def db_connection(self):
if self.cfg.db_type == DBType.POSTGRESQL:
try:
conn = psycopg2.connect(
dbname=self.cfg.db_name,
user=self.cfg.db_login,
password=self.cfg.db_passwd,
host=self.cfg.db_addr,
port=self.cfg.db_port
)
conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
conn.autocommit = True # Set autocommit mode
except psycopg2.OperationalError as e:
if f"database \"{self.cfg.db_name}\" does not exist" in str(e):
# Connect to 'postgres' database to create the new database
conn = psycopg2.connect(
dbname='postgres',
user=self.cfg.db_login,
password=self.cfg.db_passwd,
host=self.cfg.db_addr,
port=self.cfg.db_port
)
conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
cur = conn.cursor()
cur.execute(f"CREATE DATABASE {self.cfg.db_name}")
cur.close()
conn.close()
# Now connect to the newly created database
conn = psycopg2.connect(
dbname=self.cfg.db_name,
user=self.cfg.db_login,
password=self.cfg.db_passwd,
host=self.cfg.db_addr,
port=self.cfg.db_port
)
conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
print(f"Database '{self.cfg.db_name}' created successfully.")
else:
raise e
elif self.cfg.db_type == DBType.SQLITE:
db_file = os.path.join(self.cfg.db_path, f"{self.cfg.db_name}.db")
conn = sqlite3.connect(db_file)
else:
raise ValueError(f"Unsupported database type: {self.cfg.db_type}")
return conn
def db_create(self, db_name: str = "", user_name: str = "", user_password: str = ""):
if self.cfg.db_type == DBType.POSTGRESQL:
self.db_create_id()
# Connect to PostgreSQL server
conn = self.db_connection()
cur = conn.cursor()
if db_name=="":
db_name=self.cfg.db_name
try:
# Check if the database already exists
cur.execute("SELECT 1 FROM pg_database WHERE datname = %s", (db_name,))
exists = cur.fetchone()
if not exists:
# Create the database
cur.execute(f"CREATE DATABASE {db_name}")
print(f"Database '{db_name}' created successfully.")
if user_name and user_password:
# Check if user exists
cur.execute("SELECT 1 FROM pg_roles WHERE rolname = %s", (user_name,))
user_exists = cur.fetchone()
if not user_exists:
# Create the user
cur.execute(f"CREATE USER {user_name} WITH PASSWORD %s", (user_password,))
print(f"User '{user_name}' created successfully.")
# Grant privileges on the database to the user
cur.execute(f"GRANT ALL PRIVILEGES ON DATABASE {db_name} TO {user_name}")
print(f"Privileges granted to '{user_name}' on '{db_name}'.")
except psycopg2.Error as e:
raise Exception(f"Postgresql error: {e}")
finally:
# Close the cursor and connection
cur.close()
conn.close()
elif self.cfg.db_type == DBType.SQLITE:
# For SQLite, we just need to create the database file if it doesn't exist
db_file = os.path.join(self.cfg.db_path, f"{db_name}.db")
if not os.path.exists(db_file):
conn = sqlite3.connect(db_file)
conn.close()
print(f"SQLite database '{db_name}' created successfully at {db_file}.")
else:
print(f"SQLite database '{db_name}' already exists at {db_file}.")
if user_name:
print("Note: SQLite doesn't support user management like PostgreSQL.")
else:
raise ValueError(f"Unsupported database type: {self.cfg.db_type}")
def db_create_id(self):
with self.db_connection() as conn:
with conn.cursor() as cur:
cur.execute("""
CREATE TABLE IF NOT EXISTS user_id_counters (
user_id INTEGER PRIMARY KEY,
last_id_given INTEGER NOT NULL DEFAULT 0
)
""")
conn.commit()
def new_id(self,user_id: int) -> str:
if not 0 <= user_id <= 50:
raise ValueError("User ID must be between 0 and 50")
max_ids = 60466175
ids_per_user = max_ids // 51 # We use 51 to ensure we don't exceed the max even for user_id 50
with self.db_connection() as conn:
with conn.cursor(cursor_factory=DictCursor) as cur:
# Try to get the last_id_given for this user
cur.execute("SELECT last_id_given FROM user_id_counters WHERE user_id = %s", (user_id,))
result = cur.fetchone()
if result is None:
# If no record exists for this user, insert a new one
cur.execute(
"INSERT INTO user_id_counters (user_id, last_id_given) VALUES (%s, 0) RETURNING last_id_given",
(user_id,)
)
last_id_given = 0
else:
last_id_given = result['last_id_given']
# Calculate the new ID
new_id_int = (user_id * ids_per_user) + last_id_given + 1
if new_id_int > (user_id + 1) * ids_per_user:
raise ValueError(f"No more IDs available for user {user_id}")
# Update the last_id_given in the database
cur.execute(
"UPDATE user_id_counters SET last_id_given = last_id_given + 1 WHERE user_id = %s",
(user_id,)
)
conn.commit()
return int_to_id(new_id_int)
def db_new(
db_type: DBType = DBType.POSTGRESQL,
db_name: str = "main",
db_login: str = "admin",
db_passwd: str = "admin",
db_addr: str = "localhost",
db_port: int = 5432,
db_path: str = "/tmp/db",
reset: bool = False,
):
# Create a DBConfig object
config = DBConfig(
db_type=db_type,
db_name=db_name,
db_login=db_login,
db_passwd=db_passwd,
db_addr=db_addr,
db_port=db_port,
db_path=db_path
)
# Create and return a DB object
mydb = DB(cfg=config, path=db_path, reset=reset)
mydb.db_create()
return mydb

77
_archive/osis/doc.py Normal file
View File

@@ -0,0 +1,77 @@
import os
import subprocess
import sys
def should_document(file_name):
"""
Determine if a file should be documented based on its name.
Args:
file_name (str): The name of the file.
Returns:
bool: True if the file should be documented, False otherwise.
"""
lower_name = file_name.lower()
return (
file_name.endswith('.py') and
'example' not in lower_name and
'_generate' not in lower_name
)
def generate_pydoc(start_dir):
"""
Generate pydoc documentation for Python modules in the given directory.
Args:
start_dir (str): The directory to start searching for Python modules.
Returns:
None
"""
# Create the docs directory
docs_dir = os.path.join(start_dir, 'docs')
os.makedirs(docs_dir, exist_ok=True)
# Walk through the directory
for root, dirs, files in os.walk(start_dir):
for file in files:
if should_document(file):
module_name = os.path.splitext(file)[0]
module_path = os.path.relpath(os.path.join(root, file), start_dir)
module_path = os.path.splitext(module_path)[0].replace(os.path.sep, '.')
# Skip the script itself
if module_name == os.path.splitext(os.path.basename(__file__))[0]:
continue
output_file = os.path.join(docs_dir, f'{module_name}.txt')
try:
# Run pydoc and capture the output
result = subprocess.run(
[sys.executable, '-m', 'pydoc', module_path],
capture_output=True,
text=True,
check=True
)
# Write the output to a file
with open(output_file, 'w') as f:
f.write(result.stdout)
print(f"Generated documentation for {module_path} in {output_file}")
except subprocess.CalledProcessError as e:
print(f"Error generating documentation for {module_path}: {e}")
except Exception as e:
print(f"Unexpected error for {module_path}: {e}")
if __name__ == "__main__":
# Get the directory of the script
script_dir = os.path.dirname(os.path.abspath(__file__))
# Generate documentation
generate_pydoc(script_dir)
print(f"Documentation generation complete. Output is in {os.path.join(script_dir, 'docs')}")

43
_archive/osis/id.py Normal file
View File

@@ -0,0 +1,43 @@
from typing import Tuple
from typing import Optional
def int_to_id(number: int) -> str:
chars: str = '0123456789abcdefghijklmnopqrstuvwxyz'
base: int = len(chars)
if number < 0:
raise ValueError("Input must be a non-negative integer")
# Convert to base-36
result: list[str] = []
while number > 0:
number, remainder = divmod(number, base)
result.append(chars[remainder])
# Pad with '0' if necessary to reach minimum length of 3
while len(result) < 3:
result.append('0')
# Raise error if result is longer than 5 characters
if len(result) > 5:
raise ValueError("Input number is too large (results in more than 5 characters)")
# Reverse the list and join into a string
return ''.join(reversed(result))
def id_to_int(id: str) -> int:
chars: str = '0123456789abcdefghijklmnopqrstuvwxyz'
base: int = len(chars)
if not 3 <= len(id) <= 5:
raise ValueError("ID must be between 3 and 5 characters long")
if not all(c in chars for c in id):
raise ValueError("ID contains invalid characters")
result: int = 0
for char in id:
result = result * base + chars.index(char)
return result

159
_archive/osis/orm.py Normal file
View File

@@ -0,0 +1,159 @@
from typing import Dict, Type, List
import datetime
from dataclasses import dataclass, field
import psycopg2
from psycopg2.extras import Json
@dataclass
class ObjIndexDef:
table_name: str
table_fields: Dict[str, Type]
fts_fields: List[str] = field(default_factory=list) # full text fields
def sql_col_type(field_type: Type) -> str:
if field_type == int:
return "INTEGER"
elif field_type == float:
return "REAL"
elif field_type == str:
return "TEXT"
elif field_type == bool:
return "BOOLEAN"
elif field_type == datetime.date:
return "DATE"
elif field_type == datetime.datetime:
return "TIMESTAMP"
else:
return "TEXT" # default type if none match
def obj_index_def_new(table_name: str, table_fields: Dict[str, Type], fts_fields: List[str]) -> ObjIndexDef:
# Convert Python types to SQL types
sql_table_fields = {field_name: sql_col_type(field_type) for field_name, field_type in table_fields.items()}
# Create and return the ObjIndexDef instance
return ObjIndexDef(
table_name=table_name,
table_fields=table_fields,
fts_fields=fts_fields
)
def sql_table_create(db, definition: ObjIndexDef, reset: bool = False) -> str:
columns = []
for field_name, field_type in definition.table_fields.items():
if field_name not in ["id", "name", "creation_date", "mod_date", "data"]:
sql_type = sql_col_type(field_type)
columns.append(f"{field_name} {sql_type}")
columns.append("id TEXT PRIMARY KEY")
columns.append("name TEXT NOT NULL")
columns.append("creation_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP")
columns.append("mod_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP")
columns.append("data JSON")
columns_str = ", ".join(columns)
create_table_sql = f"CREATE TABLE IF NOT EXISTS {definition.table_name} ({columns_str});"
if reset:
drop_table_sql = f"DROP TABLE IF EXISTS {definition.table_name};"
create_table_sql = drop_table_sql + "\n" + create_table_sql
if definition.fts_fields:
fts_table_sql = f"""
CREATE TABLE IF NOT EXISTS {definition.table_name}_fts (
id TEXT PRIMARY KEY,
{definition.table_name}_id TEXT REFERENCES {definition.table_name}(id),
document tsvector
);
CREATE INDEX IF NOT EXISTS idx_{definition.table_name}_fts_document
ON {definition.table_name}_fts USING GIN(document);
"""
create_table_sql += "\n" + fts_table_sql
c=db.db_connection()
try:
with c.cursor() as cursor:
cursor.execute(create_table_sql)
c.commit() # Commit the transaction
print("SQL script executed successfully.")
except psycopg2.Error as e:
c.rollback() # Rollback on error
print(f"An error occurred: {e}")
return create_table_sql
def insert_update(db, definition: ObjIndexDef, **args):
table_name = definition.table_name
fields = definition.table_fields.keys()
c=db.db_connection()
# Prepare the data
data = {}
for field in fields:
if field in args:
if isinstance(args[field], dict):
data[field] = Json(args[field])
else:
data[field] = args[field]
elif field not in ["id", "creation_date", "mod_date"]:
data[field] = None
# Ensure required fields are present
if "id" not in data:
raise ValueError("'id' field is required for insert/update operation")
if "name" not in data:
raise ValueError("'name' field is required for insert/update operation")
# Set modification date
data["mod_date"] = datetime.datetime.now()
# Prepare SQL
fields_str = ", ".join(data.keys())
placeholders = ", ".join(["%s"] * len(data))
update_str = ", ".join([f"{k} = EXCLUDED.{k}" for k in data.keys() if k != "id"])
sql = f"""
INSERT INTO {table_name} ({fields_str})
VALUES ({placeholders})
ON CONFLICT (id) DO UPDATE
SET {update_str};
"""
# Execute SQL
try:
with c.cursor() as cursor:
cursor.execute(sql, list(data.values()))
c.commit()
# Update FTS table if necessary
if definition.fts_fields:
c.update_fts(definition, data)
print(f"Successfully inserted/updated record with id {data['id']}")
except psycopg2.Error as e:
c.rollback()
print(f"An error occurred: {e}")
def update_fts(db, definition: ObjIndexDef, data: dict):
fts_table = f"{definition.table_name}_fts"
fts_fields = definition.fts_fields
c=db.db_connection()
# Prepare FTS document
fts_data = " ".join(str(data[field]) for field in fts_fields if field in data)
sql = f"""
INSERT INTO {fts_table} (id, {definition.table_name}_id, document)
VALUES (%s, %s, to_tsvector(%s))
ON CONFLICT (id) DO UPDATE
SET document = to_tsvector(EXCLUDED.document);
"""
try:
with c.cursor() as cursor:
cursor.execute(sql, (data['id'], data['id'], fts_data))
c.commit()
print(f"Successfully updated FTS for record with id {data['id']}")
except psycopg2.Error as e:
c.rollback()
print(f"An error occurred while updating FTS: {e}")

6
_archive/osis/readme.md Normal file
View File

@@ -0,0 +1,6 @@
# OSIS
Object storage and Index Service
see osis_examples under /examples how to use

View File

@@ -0,0 +1,49 @@
CREATE OR REPLACE FUNCTION create_table_from_json(definition JSONB, reset BOOLEAN DEFAULT FALSE)
RETURNS VOID AS $$
local json = require("cjson")
local def = json.decode(definition)
local table_name = def.table_name
local table_fields = def.table_fields
local fts_fields = def.fts_fields or {}
local columns = ""
for key, value in pairs(table_fields) do
columns = columns .. key .. " " .. value .. ", "
end
-- Add the necessary columns
columns = columns .. "id TEXT PRIMARY KEY, "
columns = columns .. "name TEXT NOT NULL, "
columns = columns .. "creation_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP, "
columns = columns .. "mod_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP, "
columns = columns .. "data JSON"
-- Construct the CREATE TABLE statement
local create_table_sql = string.format("CREATE TABLE IF NOT EXISTS %s (%s);", table_name, columns)
print("Create table " .. tostring(create_table_sql))
-- Conditionally construct the DROP TABLE statement
if reset then
local drop_table_sql = string.format("DROP TABLE IF EXISTS %s;", table_name)
create_table_sql = drop_table_sql .. create_table_sql
end
-- Add the FTS table and index if full-text search fields are provided
if #fts_fields > 0 then
local fts_table_sql = string.format([[
CREATE TABLE IF NOT EXISTS %s_fts (
id TEXT PRIMARY KEY,
%s_id TEXT REFERENCES %s(id),
document tsvector
);
CREATE INDEX IF NOT EXISTS idx_%s_fts_document ON %s_fts USING GIN(document);
]], table_name, table_name, table_name, table_name, table_name)
create_table_sql = create_table_sql .. fts_table_sql
end
print("Create table fts" .. tostring(create_table_sql))
-- Execute the dynamic SQL
SPI.execute(create_table_sql)
$$ LANGUAGE pllua;

View File

@@ -0,0 +1,100 @@
CREATE OR REPLACE FUNCTION create_table_from_json(definition JSONB, reset BOOLEAN DEFAULT FALSE)
RETURNS VOID AS $$
local spi = require("pllua.spi")
local function execute_sql(sql)
local status, result = pcall(function()
return spi.execute(sql)
end)
if not status then
error("Failed to execute SQL: " .. tostring(result))
end
return result
end
local nullval = {} -- use some unique object to mark nulls
local def = definition{ null = nullval, pg_numeric = true }
local table_name = def.table_name
local table_fields = def.table_fields
local fts_fields = def.fts_fields or {}
local columns = {}
local existing_columns = {}
local has_id_primary_key = false
local index_columns = {}
if reset then
local drop_table_sql = string.format("DROP TABLE IF EXISTS %s CASCADE;", table_name)
execute_sql(drop_table_sql)
end
for key, value in pairs(table_fields) do
if key:lower() == "id" then
-- Ensure 'id' is always PRIMARY KEY
table.insert(columns, key .. " " .. value .. " PRIMARY KEY")
has_id_primary_key = true
else
table.insert(columns, key .. " " .. value)
if key:lower() ~= "data" then
table.insert(index_columns, key)
end
end
existing_columns[key:lower()] = true
end
print("INdex columns " .. tostring(index_columns))
-- Add necessary columns only if they don't exist
local required_columns = {
{name = "name", type = "TEXT NOT NULL"},
{name = "creation_date", type = "TIMESTAMP DEFAULT CURRENT_TIMESTAMP"},
{name = "mod_date", type = "TIMESTAMP DEFAULT CURRENT_TIMESTAMP"},
{name = "data", type = "JSONB"}
}
for _, col in ipairs(required_columns) do
if not existing_columns[col.name:lower()] then
table.insert(columns, col.name .. " " .. col.type)
table.insert(index_columns, col.name)
end
end
-- If 'id' wasn't provided, add it as PRIMARY KEY
if not has_id_primary_key then
table.insert(columns, 1, "id TEXT PRIMARY KEY")
end
-- Join columns with commas
local columns_string = table.concat(columns, ", ")
-- Construct the CREATE TABLE statement
local create_table_sql = string.format("CREATE TABLE IF NOT EXISTS %s (%s);", table_name, columns_string)
-- Conditionally construct the DROP TABLE statement
-- Execute the CREATE TABLE statement
execute_sql(create_table_sql)
-- Create an index for each column
for _, column in ipairs(index_columns) do
local index_sql = string.format("CREATE INDEX IF NOT EXISTS idx_%s_%s ON %s (%s);",
table_name, column, table_name, column)
execute_sql(index_sql)
end
-- Add the FTS table and index if full-text search fields are provided
if #fts_fields > 0 then
local fts_table_sql = string.format([[
CREATE TABLE IF NOT EXISTS %s_fts (
id TEXT PRIMARY KEY,
%s_id TEXT REFERENCES %s(id),
document tsvector
);
CREATE INDEX IF NOT EXISTS idx_%s_fts_document ON %s_fts USING GIN(document);
]], table_name, table_name, table_name, table_name, table_name)
execute_sql(fts_table_sql)
end
return
$$ LANGUAGE pllua;

View File

@@ -0,0 +1,75 @@
CREATE OR REPLACE FUNCTION create_table_from_json(definition_json JSONB, reset BOOLEAN DEFAULT FALSE)
RETURNS VOID AS $$
import plpy
import json
def execute_sql(sql):
try:
plpy.execute(sql)
except Exception as e:
plpy.error(f"Failed to execute SQL: {str(e)}")
# Parse the JSONB input into a Python dictionary
definition = json.loads(definition_json)
table_name = definition['table_name']
table_fields = definition['table_fields']
fts_fields = definition.get('fts_fields', [])
columns = []
existing_columns = set()
has_id_primary_key = False
index_columns = []
if reset:
drop_table_sql = f"DROP TABLE IF EXISTS {table_name} CASCADE;"
execute_sql(drop_table_sql)
for key, value in table_fields.items():
if key.lower() == "id":
columns.append(f"{key} {value} PRIMARY KEY")
has_id_primary_key = True
else:
columns.append(f"{key} {value}")
if key.lower() != "data":
index_columns.append(key)
existing_columns.add(key.lower())
plpy.notice(f"Index columns {index_columns}")
required_columns = [
{"name": "name", "type": "TEXT NOT NULL"},
{"name": "creation_date", "type": "TIMESTAMP DEFAULT CURRENT_TIMESTAMP"},
{"name": "mod_date", "type": "TIMESTAMP DEFAULT CURRENT_TIMESTAMP"},
{"name": "data", "type": "JSONB"}
]
for col in required_columns:
if col['name'].lower() not in existing_columns:
columns.append(f"{col['name']} {col['type']}")
index_columns.append(col['name'])
if not has_id_primary_key:
columns.insert(0, "id TEXT PRIMARY KEY")
columns_string = ", ".join(columns)
create_table_sql = f"CREATE TABLE IF NOT EXISTS {table_name} ({columns_string});"
execute_sql(create_table_sql)
for column in index_columns:
index_sql = f"CREATE INDEX IF NOT EXISTS idx_{table_name}_{column} ON {table_name} ({column});"
execute_sql(index_sql)
if fts_fields:
fts_table_sql = f"""
CREATE TABLE IF NOT EXISTS {table_name}_fts (
id TEXT PRIMARY KEY,
{table_name}_id TEXT REFERENCES {table_name}(id),
document tsvector
);
CREATE INDEX IF NOT EXISTS idx_{table_name}_fts_document ON {table_name}_fts USING GIN(document);
"""
execute_sql(fts_table_sql)
$$ LANGUAGE plpython3u;

View File

@@ -0,0 +1,56 @@
CREATE OR REPLACE FUNCTION create_table_from_json(
definition JSONB,
reset BOOLEAN DEFAULT FALSE
) RETURNS VOID AS $$
DECLARE
table_name TEXT;
table_fields JSONB;
fts_fields TEXT[];
columns TEXT := '';
create_table_sql TEXT;
drop_table_sql TEXT;
fts_table_sql TEXT := '';
field RECORD;
BEGIN
-- Extract the values from the JSON object
table_name := definition->>'table_name';
table_fields := definition->'table_fields';
fts_fields := ARRAY(SELECT jsonb_array_elements_text(definition->'fts_fields'));
-- Iterate over the JSONB object to build the columns definition
FOR field IN SELECT * FROM jsonb_each_text(table_fields)
LOOP
columns := columns || field.key || ' ' || field.value || ', ';
END LOOP;
-- Add the necessary columns
columns := columns || 'id TEXT PRIMARY KEY, ';
columns := columns || 'name TEXT NOT NULL, ';
columns := columns || 'creation_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP, ';
columns := columns || 'mod_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP, ';
columns := columns || 'data JSON';
-- Construct the CREATE TABLE statement
create_table_sql := 'CREATE TABLE IF NOT EXISTS ' || table_name || ' (' || columns || ');';
-- Conditionally construct the DROP TABLE statement
IF reset THEN
drop_table_sql := 'DROP TABLE IF EXISTS ' || table_name || ';';
create_table_sql := drop_table_sql || create_table_sql;
END IF;
-- Add the FTS table and index if full-text search fields are provided
IF array_length(fts_fields, 1) > 0 THEN
fts_table_sql := 'CREATE TABLE IF NOT EXISTS ' || table_name || '_fts (' ||
'id TEXT PRIMARY KEY, ' ||
table_name || '_id TEXT REFERENCES ' || table_name || '(id), ' ||
'document tsvector);' ||
'CREATE INDEX IF NOT EXISTS idx_' || table_name || '_fts_document ' ||
'ON ' || table_name || '_fts USING GIN(document);';
create_table_sql := create_table_sql || fts_table_sql;
END IF;
-- Execute the dynamic SQL
EXECUTE create_table_sql;
END;
$$ LANGUAGE plpgsql;