diff --git a/_archive/aiprompts/ask.py b/_archive/aiprompts/ask.py new file mode 100644 index 0000000..9b916c0 --- /dev/null +++ b/_archive/aiprompts/ask.py @@ -0,0 +1,195 @@ +import os +import json +import enum +import textwrap +from typing import List, Optional +import logging +from termcolor import colored + +import ollama +import openai +from openai import OpenAI +from ai.instruction import instructions_load, instructions_get, instructions_reset + +# Set up logging +logging.basicConfig(level=logging.INFO, format='%(message)s') +logger = logging.getLogger(__name__) + +class Model(enum.Enum): + QWEN72I = "Qwen/Qwen2-72B-Instruct" + MIXTRAL7I = "mistralai/Mixtral-8x7B-Instruct-v0.1" + PHI3_MEDIUM = "phi3:medium-128k" + PHI3_MINI = "phi3:mini" + GPT35 = "gpt-3.5-turbo" + GPT4 = "gpt-4" + GPT4O = "gpt-4o" + QWEN1L= "qwen2:1.5b" #local + QWEN0L= "qwen2:0.5b" #local + PHI3L = "phi3:3.8b" + QWEN7L= "qwen2:7b" #local + +class AIAssistant: + def __init__(self): + self.model = Model.QWEN72I + self.openai_client = None + self.deepinfra_client = None + self._setup_clients() + + def _setup_clients(self): + openaikey = os.getenv("OPENAIKEY") + if openaikey: + logger.info(colored("OpenAI key set", "green")) + openai.api_key = openaikey + self.openai_client = openai + + deepinfrakey = os.getenv("DEEPINFRAKEY") + if deepinfrakey: + logger.info(colored("DEEPINFRAKEY key set", "green")) + self.deepinfra_client = OpenAI( + api_key=deepinfrakey, + base_url="https://api.deepinfra.com/v1/openai", + ) + + def set_model(self, model: Model): + self.model = model + logger.info(colored(f"Model set to: {model.value}", "cyan")) + + def ask(self, question: str, category: str = "", name: str = "", log: bool = True) -> str: + logger.info(colored(f"Asking question in category: {category}, name: {name}", "yellow")) + mm = instructions_get(category=category, name=name) + mm.add_message(role="user", content=question) + #mm.print_messages() + + if self.model in [Model.GPT4O, Model.GPT4, Model.GPT35]: + response = self._ask_openai(mm.messages, log) + elif self.model in [Model.QWEN72I, Model.MIXTRAL7I]: + response = self._ask_deepinfra(mm.messages, log) + else: + response = self._ask_ollama(mm.messages, log) + + logger.info(colored("Ask completed", "green")) + return response + + def _ask_openai(self, messages, log: bool) -> str: + response = self.openai_client.chat.completions.create( + model=self.model.value, + messages=messages, + max_tokens=300 + ) + r = response.choices[0].message.content + if log: + logger.info(colored(f"OpenAI Response: {self.model.value}", "magenta")) + logger.info(colored(r, "white")) + return r + + def _ask_ollama(self, messages, log: bool) -> str: + response = ollama.chat(model=self.model.value, messages=messages) + if log: + logger.info(colored(response['message']['content'], "white")) + return response['message']['content'] + + def _ask_deepinfra(self, messages, log: bool) -> str: + chat_completion = self.deepinfra_client.chat.completions.create( + model=self.model.value, + messages=messages, + max_tokens=None, + stream=False + ) + + if log: + logger.info(colored(f"\nDeepInfra Response: {self.model.value}", "magenta")) + logger.info(colored("-" * 20, "white")) + logger.info(colored(chat_completion.choices[0].message.content, "white")) + logger.info(colored("\nToken Usage:", "cyan")) + logger.info(colored(f"Prompt tokens: {chat_completion.usage.prompt_tokens}", "white")) + logger.info(colored(f"Completion tokens: {chat_completion.usage.completion_tokens}", "white")) + + return chat_completion.choices[0].message.content + + +def ai_assistent(reset:bool=True) -> AIAssistant: + mypath="~/code/git.threefold.info/projectmycelium/hero_server/lib/ai/instructions" + if reset: + instructions_reset() + instructions_load(mypath) + return AIAssistant() + +# Usage example: +if __name__ == "__main__": + + mypath="~/code/git.threefold.info/projectmycelium/hero_server/lib/ai/instructions" + instructions_reset() + instructions_load(mypath) + + assistant = AIAssistant() + + #assistant.set_model(Model.MIXTRAL7I) # Or any other model you prefer + assistant.set_model(Model.QWEN72I) + #assistant.set_model(Model.PHI3L) + + # response = assistant.ask( + # category='timemgmt', + # name='schedule', + # question=''' + # lets create a story + + # we need to paint our church + + # its long over due, the major complained, + # and his mother isn't happy + + # oh yes I forgot its election time + + # tom and ben will own this story + # its for our church in zanzibar + + # we need to do it in 4 month from now + + # our requirements are: + + # we need to make sure it can withstand sun + # color is white + # cost below 1000 USD + # ''' + # ) + #logger.info(colored("Final Response:", "green")) + + + response = assistant.ask( + category='', + name='', + question=''' + + based on following names [Isabelle, Kristof, Jan, Rob, Florine, Florian, Sabrina, Tom, Ben] + + - find the owners of the story out of the text below, these owners are the ones who will do the task + - see if these names are in the list above + - if names match, return them, if not give error + - return the names as a json list, don't give any other output + + ------ + + + we need to paint our church + + its long over due, the major complained, + and his mother isn't happy + + oh yes I forgot its election time + + tom and ben will own this story + its for our church in zanzibar + + we need to do it in 4 month from now + + our requirements are: + + we need to make sure it can withstand sun + color is white + cost below 1000 USD + + ''' + ) + + + logger.info(colored(response, "white")) \ No newline at end of file diff --git a/_archive/aiprompts/instruction.py b/_archive/aiprompts/instruction.py new file mode 100644 index 0000000..c4aadad --- /dev/null +++ b/_archive/aiprompts/instruction.py @@ -0,0 +1,158 @@ +import os +import json +import redis +from typing import List,Dict,Optional + +redis_client = redis.Redis(host='localhost', port=6379, db=0) + +#loads instructions from filesystem and stores in redis for further usage +class MessageManager: + def __init__(self, name = '', category = '', path: str = "", load: bool = True): + self.name = name + self.category = category + self.messages : List[Dict[str, str]] = [] + if self.category=="": + return + if path: + self.add(path) + else: + if load: + self.load() + + def add(self, dir_path: str, filter: Optional[List[str]] = None, save: bool = True): + dir_path = os.path.expanduser(dir_path) + def process_files(current_dir: str): + files_to_process = [] + for root, _, files in os.walk(current_dir): + for file in files: + if file.startswith(('sys_', 'user_')): + try: + priority = int(file.split('_')[1]) + descr = '_'.join(file.split('_')[2:]) + if not filter or any(f in descr for f in filter): + files_to_process.append((os.path.join(root, file), priority)) + except (IndexError, ValueError): + print(f"Skipping file with invalid format: {file}") + + for file_path, _ in sorted(files_to_process, key=lambda x: x[1]): + file_name = os.path.basename(file_path) + role = "system" if file_name.startswith('sys_') else "user" + self.add_file(file_path, role) + + process_files(dir_path) + + if save: + self.save() + + + def add_file(self, file_path, role): + file_path = os.path.expanduser(file_path) + with open(file_path, 'r') as file: + content = file.read().strip() + if role == "system": + self.add_message(role, content) + elif role == "user": + content_parts = content.split('--------', 1) + if len(content_parts) == 2: + content1, content2 = content_parts[0].strip(), content_parts[1].strip() + self.add_message("user", content1) + self.add_message("assistant", content2) + else: + raise Exception(f"File {file_path} does not contain the expected separator '--------'") + else: + raise Exception("Wrong role") + + def add_message(self, role, content): + if not self.__validate_message(role, content): + raise ValueError(f"Invalid message format. Role: {role}, Content: {content}") + self.messages.append({"role": role, "content": content}) + + def __validate_message(self, role, content): + valid_roles = ["system", "user", "assistant"] + return ( + isinstance(role, str) and + role in valid_roles and + isinstance(content, str) and + len(content.strip()) > 0 + ) + + def print_messages(self): + for message in self.messages: + role = message["role"].capitalize() + content = message["content"] + print(f"\n{role}:\n{'-' * len(role)}") + print(content) + print("-" * 40) + + def get_messages(self): + return self.messages + + def save(self): + key = f"llm:instructions:{self.category}:{self.name}" + value = json.dumps(self.messages) + redis_client.set(key, value) + + #return true if there where instructions + def load(self): + key = f"llm:instructions:{self.category}:{self.name}" + value = redis_client.get(key) + if value: + self.messages = json.loads(value) + return True + return False + + def delete(self): + key = f"llm:instructions:{self.category}:{self.name}" + return redis_client.delete(key) + +def instructions_reset(): + pattern = "llm:instructions*" + keys_to_delete = redis_client.scan_iter(match=pattern) + for key in keys_to_delete: + redis_client.delete(key) + +#get message manager and get from redis +def instructions_get( name:str, category:str) -> MessageManager: + m= MessageManager(name, category) + return m + +def instructions_load(path: str) -> List[MessageManager]: + path = os.path.expanduser(path) + message_managers = [] + #print(f"load {path}") + for item in os.listdir(path): + cat_path = os.path.join(path, item) + if os.path.isdir(cat_path): + category = os.path.basename(cat_path) + #print(f" load category: {cat_path}") + # Process files in the category directory, these will be re=used in each messagemanager + category_manager = MessageManager(name="", category=category) + for item in os.listdir(cat_path): + item_path = os.path.join(cat_path, item) + if os.path.isfile(item_path): + if item.startswith('sys_') or item.startswith('user_'): + #print(f" load cat base: {item_path}") + role = "system" if item.startswith('sys_') else "user" + category_manager.add_file(item_path, role) + elif os.path.isdir(item_path): + #print(f" load cat: {item_path}") + manager = MessageManager(name=item, category=category) + manager.messages = category_manager.messages + manager.add(item_path) + message_managers.append(manager) + + return message_managers + + +# Usage example: +if __name__ == "__main__": + + # mypath="/Users/despiegk1/code/git.threefold.info/projectmycelium/hero_server/lib/ai/instructions/timemgmt" + # #mypath="" + # manager = MessageManager(name="schedule", category="timemgmt",path=mypath) + # manager.print_messages() + + mypath="/Users/despiegk1/code/git.threefold.info/projectmycelium/hero_server/lib/ai/instructions" + instructions_reset() + instructions_load(mypath) + diff --git a/_archive/aiprompts/instructions/timemgmt/schedule/sys_5_translate.md b/_archive/aiprompts/instructions/timemgmt/schedule/sys_5_translate.md new file mode 100644 index 0000000..228a46d --- /dev/null +++ b/_archive/aiprompts/instructions/timemgmt/schedule/sys_5_translate.md @@ -0,0 +1,10 @@ + +if the chat coming from user seems to say translate e.g. translate to french, +then translate the text which came to french + +don't do anything for heroscript + +overrule all previous instructions + +just output the translated text + diff --git a/_archive/aiprompts/instructions/timemgmt/schedule/user_3_heroscript_agenda.md b/_archive/aiprompts/instructions/timemgmt/schedule/user_3_heroscript_agenda.md new file mode 100644 index 0000000..8b3d64e --- /dev/null +++ b/_archive/aiprompts/instructions/timemgmt/schedule/user_3_heroscript_agenda.md @@ -0,0 +1,42 @@ + +how do I use heroscript for agenda management + +------------------------- + +heroscript has basic notations to deal with calendars and events + +when the user asks to translate an calendar action to heroscript use following rules and see example below + +- all dates are in europe style: Format: DD/MM/YYYY e.g. 06/07/2023 +- if year not specified by user then always use current year which is 2024 +- if month not specified use current month which is september or month 9 +- date, title is always required, if attendies or people mentioned they should be on attendies list +- don't use comments in the heroscript (means no // at end of line for heroscript) +- default duration is 1h, also ok 15m (15 min), 1 day + +```heroscript + +//to add item in agenda +!!calendar.add + date:'30-10-24' + time:'10pm' + duration:'1h' + title:'meeting with tech team' + attendies:'user1, kristof, ...' + description:'' + +//to delete (can use words cancel, delete) +!!calendar.delete + id:100 + +//to reschedule e.g. delay, 1d stands for 1 day, 1w for 1 week, 1h for 1 hour +!!calendar.delay + id:100 + delay:'2d' + +//when e.g. reschedule or delete, we can inform participants +!!calendar.inform + id:100 + + +``` diff --git a/_archive/aiprompts/instructions/timemgmt/schedule/user_4_heroscript_story copy.md b/_archive/aiprompts/instructions/timemgmt/schedule/user_4_heroscript_story copy.md new file mode 100644 index 0000000..03e1b03 --- /dev/null +++ b/_archive/aiprompts/instructions/timemgmt/schedule/user_4_heroscript_story copy.md @@ -0,0 +1,60 @@ + +how do I use heroscript for story and task management + +------------------------- + +heroscript has basic notations to deal with stories and tasks + +when the user asks to translate an story or task action to heroscript use following rules and see example below + +- all dates are in europe style: Format: DD/MM/YYYY e.g. 06/07/2023 +- if year not specified by user then always use current year which is 2024 +- if month not specified use current month which is september or month 9 +- title is always required, if attendies or people mentioned they should be on assignment list +- date & time & duration is optional +- don't use comments in the heroscript (means no // at end of line for heroscript) +- duration expressed as 1m, 1h, 1d (minute, hour, day) +- deadline is or a date or +1h, +1d, .. the + means time from now, just list same way e.g. +1h + - 1 months is done as 30 days or +30 days, 2 months 60 days, ... (which means +30d for 1 month) +- stories cannot have a date, if a date given, giver an error +- owners, assignees, contributors, executors is all the same +- the description is always in markdown format +- the description always has the title repeated +- the description has title, purpose, deliverables +- try to figure out what purpose and deliverables are +- purpose is put as list in markdown + +```heroscript + +//to add a new story +!!story.add + title:'need to improve UI for version 1.0' + owners:'karoline, kristof' + description:' + # need to improve UI for version 1.0 + + We got some complaints from our userbase and its overdue. + + ## deliverables + + - [ ] specs and check with kristof + - [ ] implement mockup + - [ ] implement prototype + + ' + + +//to add a new task, which might (optional) be linked to a story +!!task.add + title:'let our userbase know' + story:10 + owners:'kristof' + deadline:'+10d' + description:' + write email to userbase + ask tom to check + ' + + + +``` diff --git a/_archive/aiprompts/instructions/timemgmt/sys_2_heroscript.md b/_archive/aiprompts/instructions/timemgmt/sys_2_heroscript.md new file mode 100644 index 0000000..e3bc5bd --- /dev/null +++ b/_archive/aiprompts/instructions/timemgmt/sys_2_heroscript.md @@ -0,0 +1,60 @@ + +'heroscript' is a simple declarative language in following form + +```heroscript +!!mother.define + myname:'mymama' + mylist:'20,200' + myint:2 + +//this is how we define a child (is in list) +!!child.define + mother:'mymama' + name:'florine' + length:100 + description:' + multiline is supported + ' + +!!child.define + mother:'mymama' + name:'aurelie' + length:60 + description:' + multiline is supported + now for aurelie + ' +``` + +some rules + + +- '0,70' is a list of 2 (when comma in example its a list) +- never use [] in lists, just have comma separation in between quotes '' +- in lists always put lowercase names +- node_name:'silver' is same as node_name:silver, when spaces always '' around +- // means comment +- all dates are in europe style: Format: DD/MM/YYYY e.g. 06/07/2023, always specify year + +the corresponding model in vlang would be + +```vlang +pub struct Mother { +pub mut: + myname string + mylist [20,200] + myint 2 + children []Child +} + +pub struct Child { +pub mut: + name string + length int + description string +} +``` + + + + diff --git a/_archive/aiprompts/instructions/vlang/heroscript/sys_2_heroscript.md b/_archive/aiprompts/instructions/vlang/heroscript/sys_2_heroscript.md new file mode 100644 index 0000000..3af7b2c --- /dev/null +++ b/_archive/aiprompts/instructions/vlang/heroscript/sys_2_heroscript.md @@ -0,0 +1,61 @@ + +'heroscript' is a simple declarative language in following form + +```heroscript +!!mother.define + myname:'mymama' + mylist:'20,200' + myint:2 + +//this is how we define a child (is in list) +!!child.define + mother:'mymama' + name:'florine' + length:100 + description:' + multiline is supported + ' + +!!child.define + mother:'mymama' + name:'aurelie' + length:60 + description:' + multiline is supported + now for aurelie + ' +``` + +some rules + + +- '0,70' is a list of 2 (when comma in example its a list) +- never use [] in lists, just have comma separation in between quotes '' +- in lists always put lowercase names +- node_name:'silver' is same as node_name:silver, when spaces always '' around +- // means comment +- all dates are in europe style: Format: DD/MM/YYYY e.g. 06/07/2023, always specify year + +the corresponding model in vlang would be + +```vlang +pub struct Mother { +pub mut: + myname string + mylist [20,200] + myint 2 + children []Child +} + +pub struct Child { +pub mut: + name string + length int + description string +} +``` + + +In a heroscript file, the second line after the `!!..define` block is typically used to define the properties or fields of the struct being defined. [1] The properties are specified as :, with each property on a new line. For example: + + diff --git a/_archive/aiprompts/instructions/vlang/http/user_1_httpclient_post.md b/_archive/aiprompts/instructions/vlang/http/user_1_httpclient_post.md new file mode 100644 index 0000000..f101034 --- /dev/null +++ b/_archive/aiprompts/instructions/vlang/http/user_1_httpclient_post.md @@ -0,0 +1,35 @@ + +how can I query a webservice over http using vlang for a simple post request + + +------------------- + + +```vlang + +import freeflowuniverse.crystallib.clients.httpconnection +import json + + +mut conn := httpconnection.new(name: 'test', url: 'https://jsonplaceholder.typicode.com/')! + + +// adding a header field to be used in all requests. +// default header have the field Content-Type set to 'application/json', +// but we should reconsider this and leave it out, set it manually when needed +conn.default_header.add(.content_language, 'Content-Language: en-US') + +// Getting a blog post with id 1 (us example), should be fresh response from the server +mut res := conn.send(prefix: 'posts', id: '1')! + +// Result object have minimum fileds (code, data) and one method is_ok() +println('Status code: ${res.code}') + +// you can check if you got a success status code or not +println('Success: ${res.is_ok()}') + +// access the result data +println('Data: ${res.data}') + + +``` \ No newline at end of file diff --git a/_archive/aiprompts/instructions/vlang/sys_1_vlang.md b/_archive/aiprompts/instructions/vlang/sys_1_vlang.md new file mode 100644 index 0000000..5b1f840 --- /dev/null +++ b/_archive/aiprompts/instructions/vlang/sys_1_vlang.md @@ -0,0 +1,80 @@ +you are chatbot, you try to help everyone with knowledge from v and vlang which is in the attached knowledge base + +ALWAYS FOLLOW THE FOLLOWING INSTRUCTIONS FIRST + +## structs examples + +```v +@[heap] +pub struct GitAddr { +pub mut: + gsconfig &GitStructureConfig + accounts []&Account + provider string + account string + name string // is the name of the repository + branch string + nr int +} + +pub struct Account { +pub mut: + name string //my comment + +} + +``` + +note usage of pub & pub mut + +all names are lowercase (snakecase with _) + +& is used for references + +## normalize a string + +We call this name fix, anytime we use a name as id, or as a key in a map we want to normalize the string + +```v +import freeflowuniverse.crystallib.core.texttools + +mut myname:="a__Name_to_fix" +myname = texttools.name_fix(myname) +``` + +## dealing with paths + +alwayse use this library when dealing with path, info how to use it can be found in your knowledgebase from core.pathlib.md + +```v +import freeflowuniverse.crystallib.core.pathlib + +#to get a path from a file or dir, the pathlib will figure out if its a dir or file and if it exists +mut p:=pathlib.get('/tmp/mysourcefiles')! + +#to get a dir and create it + + +#to get a list of paths and copy to other destination +mut pathlist:=p.list(regex:[r'.*.md$'])! //this gets all files ending on .md +pathlist.copy('/tmp/mydest')! + +``` + +## executing commands + +```v + +#simple commands, means < 1 line and can be executed using os.execute +# fn execute(cmd string) Result see os.md module +res := os.execute(cmd) +if res.exit_code > 0 { + return error('cannot upload over ssh: ${cmd}') +} +#ALWAYS check the return code +``` + +#if the command is more complicated use the osal.exec method as can be found in osal.md file + +res := osal.exec(cmd: args.cmd, stdout: args.stdout, debug: executor.debug)! +``` \ No newline at end of file diff --git a/_archive/aiprompts/intent.py b/_archive/aiprompts/intent.py new file mode 100644 index 0000000..081ac58 --- /dev/null +++ b/_archive/aiprompts/intent.py @@ -0,0 +1,23 @@ +from transformers import pipeline + +# Load the pipeline for text classification +classifier = pipeline("zero-shot-classification", model="typeform/distilbert-base-uncased-mnli") + +# Define the possible intents +candidate_labels = ["complaint", "feedback", "appointment","travel","agenda","taskmanagement","religion","fire test"] + +def determine_intent(user_input): + result = classifier(user_input, candidate_labels) + print(result) + return result["labels"][0] # The intent with the highest score + +# Example user input +user_input = ''' + Playing with matches is dangerous. + Can you book me a meeting, its about flying to paris + ''' + +# Determine the intent +for i in range(10): + intent = determine_intent(user_input) + print(f"User intent: {intent}") \ No newline at end of file diff --git a/_archive/aiprompts/tools/chinook.py b/_archive/aiprompts/tools/chinook.py new file mode 100644 index 0000000..98c22f0 --- /dev/null +++ b/_archive/aiprompts/tools/chinook.py @@ -0,0 +1,133 @@ +import sqlite3 + +import json +from openai import OpenAI +from tenacity import retry, wait_random_exponential, stop_after_attempt +from termcolor import colored + +GPT_MODEL = "gpt-4o" +client = OpenAI() +dbpath="/Users/despiegk1/Downloads/chinook.db" + +conn = sqlite3.connect(dbpath) +print("Opened database successfully") + +def get_table_names(conn): + """Return a list of table names.""" + table_names = [] + tables = conn.execute("SELECT name FROM sqlite_master WHERE type='table';") + for table in tables.fetchall(): + table_names.append(table[0]) + return table_names + + +def get_column_names(conn, table_name): + """Return a list of column names.""" + column_names = [] + columns = conn.execute(f"PRAGMA table_info('{table_name}');").fetchall() + for col in columns: + column_names.append(col[1]) + return column_names + + +def get_database_info(conn): + """Return a list of dicts containing the table name and columns for each table in the database.""" + table_dicts = [] + for table_name in get_table_names(conn): + columns_names = get_column_names(conn, table_name) + table_dicts.append({"table_name": table_name, "column_names": columns_names}) + return table_dicts + + +database_schema_dict = get_database_info(conn) +database_schema_string = "\n".join( + [ + f"Table: {table['table_name']}\nColumns: {', '.join(table['column_names'])}" + for table in database_schema_dict + ] +) + +tools = [ + { + "type": "function", + "function": { + "name": "ask_database", + "description": "Use this function to answer user questions about music. Input should be a fully formed SQL query.", + "parameters": { + "type": "object", + "properties": { + "query": { + "type": "string", + "description": f""" + SQL query extracting info to answer the user's question. + SQL should be written using this database schema: + {database_schema_string} + The query should be returned in plain text, not in JSON. + """, + } + }, + "required": ["query"], + }, + } + } +] + +def ask_database(conn, query): + """Function to query SQLite database with a provided SQL query.""" + try: + results = str(conn.execute(query).fetchall()) + except Exception as e: + results = f"query failed with error: {e}" + return results + + +# Step #1: Prompt with content that may result in function call. In this case the model can identify the information requested by the user is potentially available in the database schema passed to the model in Tools description. +messages = [{ + "role":"user", + "content": "What is the name of the album with the most tracks?" +}] + +response = client.chat.completions.create( + model='gpt-4o', + messages=messages, + tools= tools, + tool_choice="auto" +) + +# Append the message to messages list +response_message = response.choices[0].message +messages.append(response_message) + +print(response_message) + +# Step 2: determine if the response from the model includes a tool call. +tool_calls = response_message.tool_calls +if tool_calls: + # If true the model will return the name of the tool / function to call and the argument(s) + tool_call_id = tool_calls[0].id + tool_function_name = tool_calls[0].function.name + tool_query_string = eval(tool_calls[0].function.arguments)['query'] + + # Step 3: Call the function and retrieve results. Append the results to the messages list. + if tool_function_name == 'ask_database': + results = ask_database(conn, tool_query_string) + + messages.append({ + "role":"tool", + "tool_call_id":tool_call_id, + "name": tool_function_name, + "content":results + }) + + # Step 4: Invoke the chat completions API with the function response appended to the messages list + # Note that messages with role 'tool' must be a response to a preceding message with 'tool_calls' + model_response_with_function_call = client.chat.completions.create( + model="gpt-4o", + messages=messages, + ) # get a new response from the model where it can see the function response + print(model_response_with_function_call.choices[0].message.content) + else: + print(f"Error: function {tool_function_name} does not exist") +else: + # Model did not identify a function to call, result can be returned to the user + print(response_message.content) \ No newline at end of file diff --git a/_archive/bart/bart.py b/_archive/bart/bart.py new file mode 100644 index 0000000..767ece6 --- /dev/null +++ b/_archive/bart/bart.py @@ -0,0 +1,15 @@ +import requests + +API_URL = "https://api-inference.huggingface.co/models/facebook/bart-large-mnli" +headers = {"Authorization": "Bearer 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"} + +def query(payload): + response = requests.post(API_URL, headers=headers, json=payload) + return response.json() + +output = query({ + "inputs": "Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!", + "parameters": {"candidate_labels": ["refund", "legal", "faq"]}, +}) + +print(output) \ No newline at end of file diff --git a/_archive/fluently_image/fluently.py b/_archive/fluently_image/fluently.py new file mode 100644 index 0000000..cdff936 --- /dev/null +++ b/_archive/fluently_image/fluently.py @@ -0,0 +1,15 @@ +import requests + +API_URL = "https://api-inference.huggingface.co/models/fluently/Fluently-XL-v4" +headers = {"Authorization": "Bearer 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"} + +def query(payload): + response = requests.post(API_URL, headers=headers, json=payload) + return response.content +image_bytes = query({ + "inputs": "Astronaut riding a horse", +}) +# You can access the image with PIL.Image for example +import io +from PIL import Image +image = Image.open(io.BytesIO(image_bytes)) \ No newline at end of file diff --git a/_archive/git_poller.py b/_archive/git_poller.py new file mode 100644 index 0000000..71aeec3 --- /dev/null +++ b/_archive/git_poller.py @@ -0,0 +1,123 @@ +import sys +import os +import redis +import subprocess +import time + +def find_git_root(path): + while path != '/': + if os.path.exists(os.path.join(path, '.git')): + return path + path = os.path.dirname(path) + return None + +def get_git_hash(path): + return subprocess.check_output(['git', 'rev-parse', 'HEAD'], cwd=path).decode('utf-8').strip() + +def get_changes(path, old_hash, new_hash): + return subprocess.check_output(['git', 'log', f'{old_hash}..{new_hash}', '--name-only', '--pretty=format:'], cwd=path).decode('utf-8').split('\n') + +def find_heroscript_callers(path, changes, start_path): + callers = set() + for change in changes: + if not change: + continue + change_path = os.path.join(path, change) + current_path = os.path.dirname(change_path) + while current_path.startswith(start_path): + if os.path.exists(os.path.join(current_path, '.heroscript_caller')): + callers.add(os.path.join(current_path, '.heroscript_caller')) + break + current_path = os.path.dirname(current_path) + return callers + +def find_all_heroscript_callers(path): + callers = set() + for root, dirs, files in os.walk(path): + if '.heroscript_caller' in files: + callers.add(os.path.join(root, '.heroscript_caller')) + return callers + +def read_heroscript_caller(file_path): + with open(file_path, 'r') as file: + lines = [line.strip() for line in file if line.strip()] + return list(dict.fromkeys(lines)) # Remove duplicates while preserving order + +def main(start_path, reset=False): + if not start_path: + start_path = os.getcwd() + + git_root = find_git_root(start_path) + if not git_root: + print(f"Error: No git repository found in {start_path} or its parent directories.") + return + + r = redis.Redis(host='localhost', port=6379, db=0) + + if reset: + r.hdel('git.lastcommit', git_root) + print(f"Reset Redis hash for {git_root}") + + # Perform git pull + subprocess.run(['git', 'pull'], cwd=git_root, check=True) + + new_hash = get_git_hash(git_root) + old_hash = r.hget('git.lastcommit', git_root) + + if old_hash: + old_hash = old_hash.decode('utf-8') + if old_hash != new_hash: + changes = get_changes(git_root, old_hash, new_hash) + callers = find_heroscript_callers(git_root, changes, start_path) + else: + print("No changes detected.") + return + else: + callers = find_all_heroscript_callers(start_path) + + myerror=False + for caller in callers: + unique_lines = read_heroscript_caller(caller) + for heroscripturl in unique_lines: + print(f"{heroscripturl}:{new_hash}") + res0=run_hero_command(heroscripturl) + if res0==False: + myerror=True + + if myerror==False: + r.hset('git.lastcommit', git_root, new_hash) + +def run_hero_command(url:str) -> bool: + try: + # Construct the command + command = f"hero run -u {url}" + + # Run the command and capture output + result = subprocess.run(command, shell=True, check=True, + stdout=subprocess.PIPE, stderr=subprocess.PIPE, + text=True) + + except subprocess.CalledProcessError as e: + print(f"Error running command: {e}") + print(f"Command output: {e.output}") + print(f"Command stderr: {e.stderr}") + return False + except Exception as e: + print(f"An unexpected error occurred: {e}") + print(f"Command output: {e.output}") + print(f"Command stderr: {e.stderr}") + return False + + print("Command Output (stdout):") + print(result.stdout) + + return True + +if __name__ == "__main__": + if len(sys.argv) == 3 and sys.argv[2] == '--reset': + main(sys.argv[1], reset=True) + elif len(sys.argv) == 2: + main(sys.argv[1]) + else: + print("Usage: python script.py [--reset]") + sys.exit(1) \ No newline at end of file diff --git a/_archive/lib/context/context.py b/_archive/lib/context/context.py new file mode 100644 index 0000000..ad088e5 --- /dev/null +++ b/_archive/lib/context/context.py @@ -0,0 +1,199 @@ +"""Context management module for handling file operations and tracking changes.""" + +import hashlib +import logging +import os +import shutil + +from herotools.pathtools import remove_file_if_exists +from herotools.texttools import name_fix + + +class MyFile: + """A class representing a file in the context system with tracking capabilities.""" + + def __init__(self, path: str): + """Initialize a MyFile instance. + + Args: + path: The path to the file + + """ + self.path = path + self.exists = os.path.exists(self.path) + self.changed_in_context = False # Indicates if the file is new or was changed + self._md5 = "" + + def md5(self) -> str: + """Calculate and return MD5 hash of the file. + + Returns: + str: The MD5 hash of the file's contents + + Raises: + FileNotFoundError: If the file does not exist + + """ + if not self.exists: + raise FileNotFoundError(f"File does not exist: {self.path}") + if not self._md5: + with open(self.path, "rb") as file: + self._md5 = hashlib.md5(file.read()).hexdigest() + return self._md5 + + def name(self) -> str: + """Return the base name of the file. + + Returns: + str: The file's base name + + """ + return os.path.basename(self.path) + + def ext(self) -> str: + """Return the file extension in lower case. + + Returns: + str: The file's extension in lowercase + + """ + return os.path.splitext(self.path)[1].lower() + + +class Context: + """A class for managing file contexts and tracking file changes.""" + + def __init__(self, name: str = "default", reset: bool = False): + """Initialize a Context instance. + + Args: + name: The name of the context + reset: Whether to reset (remove) the existing context + + """ + logging.basicConfig(level=logging.DEBUG, format="%(message)s") + self.logger = logging.getLogger(__name__) + contextroot = os.getenv("CONTEXTROOT", "~/context") + self.name = name_fix(name) + self.path = os.path.join(os.path.expanduser(contextroot), self.name) + if reset: + self._remove_context() + + def _remove_context(self): + """Remove the context directory if it exists.""" + if os.path.exists(self.path): + try: + shutil.rmtree(self.path) + self.logger.info(f"Context directory removed: {self.path}") + except Exception as e: + self.logger.error(f"Error removing context directory: {e!s}") + + def file_set(self, path: str, cat: str, name: str = "", content: str = "") -> MyFile: + """Set a file in the context with the given category. + + Args: + path: Source file path + cat: Category for organizing files + name: Optional custom name for the file + content: Optional content to write to file + + Returns: + MyFile: A MyFile instance representing the file in context + + Raises: + ValueError: If both path and content are provided + FileNotFoundError: If the source file does not exist + + """ + cat = name_fix(cat) + name = name_fix(name) + + if content: + if path: + raise ValueError("path and content cannot be both set") + path = os.path.join(self.path, "files", cat, name) + with open(path, "w") as file: + file.write(content) + + mf = MyFile(path=path) + if not mf.exists: + raise FileNotFoundError(f"Source file does not exist: {path}") + + if not content: + if not name: + name = name_fix(mf.name()) + else: + if os.path.splitext(name)[1].lower() != mf.ext(): + name_ext = os.path.splitext(name)[1] + raise ValueError(f"Extension {name_ext} must match file extension {mf.ext()}") + + file_path = os.path.join(self.path, "files", cat, name) + file_path_md5 = os.path.join(self.path, "files", cat, name + ".md5") + os.makedirs(os.path.dirname(file_path), exist_ok=True) + + # Check if the MD5 hash of the file on disk + md5_on_disk = "" + if os.path.exists(file_path_md5): + with open(file_path_md5) as file: + md5_on_disk = file.read().strip() + # Validate that it's a valid MD5 hash + if len(md5_on_disk) != 32 or not all(c in "0123456789abcdef" for c in md5_on_disk.lower()): + raise RuntimeError("Bug: hash is not in the right format") + + new_md5 = mf.md5() + + changed_in_context = False + if not md5_on_disk or new_md5 != md5_on_disk: + changed_in_context = True + + md5_dir = os.path.join(self.path, "files", "md5") + + if changed_in_context: + # File did change + old_name = os.path.basename(path) + new_name = os.path.basename(file_path) + self.logger.debug(f"File changed in context {self.name}: {old_name} -> {new_name}") + if mf.path != file_path: + shutil.copy2(mf.path, file_path) + with open(file_path_md5, "w") as file: + file.write(new_md5) + # Remove the old MD5 link if it exists + if md5_on_disk: + old_md5_link = os.path.join(md5_dir, md5_on_disk) + remove_file_if_exists(old_md5_link) + + mf.path = file_path + + os.makedirs(md5_dir, exist_ok=True) + md5_link = os.path.join(md5_dir, mf.md5()) + if not os.path.exists(md5_link): + os.symlink(os.path.relpath(file_path, md5_dir), md5_link) + + return mf + + def file_get(self, name: str, cat: str, needtoexist: bool = True) -> MyFile: + """Get a file from the context with the given category. + + Args: + name: Name of the file to retrieve + cat: Category the file is stored under + needtoexist: Whether to raise an error if file doesn't exist + + Returns: + MyFile: A MyFile instance representing the requested file + + Raises: + FileNotFoundError: If needtoexist is True and file doesn't exist + + """ + name = name_fix(name) + cat = name_fix(cat) + file_path = os.path.join(self.path, "files", cat, name) + if needtoexist: + if os.path.exists(file_path): + return MyFile(file_path) + else: + self.logger.warning(f"File not found: {file_path}") + raise FileNotFoundError(f"Context file does not exist: {file_path}") + else: + return MyFile(file_path) diff --git a/_archive/lib/dagu/client.py b/_archive/lib/dagu/client.py new file mode 100644 index 0000000..ab67482 --- /dev/null +++ b/_archive/lib/dagu/client.py @@ -0,0 +1,155 @@ +import os +import requests +from requests.auth import HTTPBasicAuth +from dataclasses import dataclass, field +from typing import List, Optional +from datetime import datetime +import time + +@dataclass +class DAGStatus: + name: str + status: str + group: Optional[str] = None + schedule: Optional[str] = None + lastRun: Optional[str] = None + nextRun: Optional[str] = None + pid: Optional[int] = None + log: Optional[str] = None + requestId: Optional[str] = None + params: Optional[str] = None + startedAt: Optional[str] = None + finishedAt: Optional[str] = None + suspended: Optional[bool] = None + + def get_last_run_epoch(self) -> Optional[int]: + """Convert lastRun to epoch time.""" + return self._convert_to_epoch(self.lastRun) + + def get_next_run_epoch(self) -> Optional[int]: + """Convert nextRun to epoch time.""" + return self._convert_to_epoch(self.nextRun) + + @staticmethod + def _convert_to_epoch(timestamp: Optional[str]) -> Optional[int]: + """Helper method to convert an ISO 8601 timestamp to epoch time.""" + if timestamp: + dt = datetime.fromisoformat(timestamp.replace('Z', '+00:00')) + return int(time.mktime(dt.timetuple())) + return None + +class DAGuClient: + def __init__(self, base_url: str = "http://localhost:8888"): + self.base_url = base_url + self.auth = self._get_basic_auth() + + def _get_basic_auth(self) -> HTTPBasicAuth: + """Retrieve the Basic Auth credentials from environment variables.""" + username = os.getenv('DAGU_BASICAUTH_USERNAME') + password = os.getenv('DAGU_BASICAUTH_PASSWORD') + + if not username or not password: + raise EnvironmentError("Please set the DAGU_BASICAUTH_USERNAME and DAGU_BASICAUTH_PASSWORD environment variables.") + + return HTTPBasicAuth(username, password) + + def list_dags(self) -> List[DAGStatus]: + """Fetches the list of DAGs with their statuses from the DAGu REST API.""" + try: + response = requests.get(f"{self.base_url}/api/v1/dags", auth=self.auth) + response.raise_for_status() # Raises an HTTPError for bad responses (4xx or 5xx) + dags_data = response.json().get('DAGs', []) + + if isinstance(dags_data, list): + return [self._parse_dag(dag) for dag in dags_data] + else: + print(f"Unexpected response format: {dags_data}") + return [] + except requests.exceptions.RequestException as e: + print(f"Error during request: {e}") + return [] + + def _parse_dag(self, dag_entry: dict) -> DAGStatus: + """Helper function to parse a DAG's JSON data into a DAGStatus object.""" + try: + dag_data = dag_entry.get("DAG", {}) + status_data = dag_entry.get("Status", {}) + + return DAGStatus( + name=dag_data.get("Name"), + status=status_data.get("StatusText"), + group=dag_data.get("Group"), + schedule=(dag_data.get("Schedule", [{}])[0].get("Expression") + if dag_data.get("Schedule") else None), + lastRun=status_data.get("FinishedAt"), + nextRun=None, # Adjust as needed based on your API's response format + pid=status_data.get("Pid"), + log=status_data.get("Log"), + requestId=status_data.get("RequestId"), + params=status_data.get("Params"), + startedAt=status_data.get("StartedAt"), + finishedAt=status_data.get("FinishedAt"), + suspended=dag_entry.get("Suspended") + ) + except AttributeError as e: + print(f"Error parsing DAG data: {dag_entry}, Error: {e}") + return None + + def submit_dag_action(self, name: str, action: str, request_id: Optional[str] = None, params: Optional[str] = None) -> dict: + """Submit an action to a specified DAG. + + Args: + name (str): Name of the DAG. + action (str): Action to be performed ('start', 'stop', or 'retry'). + request_id (Optional[str]): Required if action is 'retry'. + params (Optional[str]): Parameters for the DAG execution. + + Returns: + dict: Response from the API. + """ + url = f"{self.base_url}/api/v1/dags/{name}" + payload = { + "action": action, + **({"request-id": request_id} if request_id else {}), + **({"params": params} if params else {}), + } + + try: + response = requests.post(url, json=payload, auth=self.auth) + response.raise_for_status() # Raises an HTTPError for bad responses (4xx or 5xx) + return response.json() + except requests.exceptions.RequestException as e: + print(f"Error during request: {e}") + print(f"Response content: {response.content}") + return {} + +# Example usage +if __name__ == "__main__": + client = DAGuClient() + + # List DAGs + try: + dags = client.list_dags() + for dag in dags: + if dag: + print(f"DAG Name: {dag.name}, Status: {dag.status}, Group: {dag.group}, " + f"Schedule: {dag.schedule}, Last Run: {dag.lastRun}, " + f"Next Run: {dag.nextRun}, PID: {dag.pid}, Log: {dag.log}, " + f"Request ID: {dag.requestId}, Params: {dag.params}, " + f"Started At: {dag.startedAt}, Finished At: {dag.finishedAt}, " + f"Suspended: {dag.suspended}") + # Example of using helper methods to get epoch times + if dag.get_last_run_epoch(): + print(f"Last Run Epoch: {dag.get_last_run_epoch()}") + if dag.get_next_run_epoch(): + print(f"Next Run Epoch: {dag.get_next_run_epoch()}") + except Exception as e: + print(f"Error: {e}") + + # Submit an action to a DAG (example: start a DAG) + try: + dag_name = "test11" # Replace with your actual DAG name + action_response = client.submit_dag_action(name=dag_name, action="start") + print(f"Action Response: {action_response}") + except Exception as e: + print(f"Error: {e}") diff --git a/_archive/lib/dagu/dag.py b/_archive/lib/dagu/dag.py new file mode 100644 index 0000000..18f90bb --- /dev/null +++ b/_archive/lib/dagu/dag.py @@ -0,0 +1,184 @@ +import os +import yaml +from dataclasses import dataclass, field +from typing import List, Dict, Optional +from server import * + +@dataclass +class EnvVariable: + key: str + value: str + +@dataclass +class HandlerCommand: + command: str + +@dataclass +class Handlers: + success: Optional[HandlerCommand] = None + failure: Optional[HandlerCommand] = None + cancel: Optional[HandlerCommand] = None + exit: Optional[HandlerCommand] = None + +@dataclass +class RepeatPolicy: + repeat: bool + intervalSec: int + +@dataclass +class Precondition: + condition: str + expected: str + +@dataclass +class Step: + name: str + command: str + script: Optional[str] = None + depends: List[str] = field(default_factory=list) + description: Optional[str] = None + repeatPolicy: Optional[RepeatPolicy] = None + +@dataclass +class DAG: + name: str + description: Optional[str] = None + schedule: Optional[str] = None + group: Optional[str] = None + tags: Optional[str] = None # This should be a single string + env: Dict[str, str] = field(default_factory=dict) + logDir: Optional[str] = None + restartWaitSec: Optional[int] = None + histRetentionDays: Optional[int] = None + delaySec: Optional[int] = None + maxActiveRuns: Optional[int] = None + params: Optional[List[str]] = field(default_factory=list) + preconditions: List[Precondition] = field(default_factory=list) + mailOn: Dict[str, bool] = field(default_factory=dict) + handlerOn: Handlers = field(default_factory=Handlers) + MaxCleanUpTimeSec: Optional[int] = None + steps: List[Step] = field(default_factory=list) + + def add_step(self, step: Step): + """Add a step to the DAG.""" + self.steps.append(step) + + def to_dict(self) -> Dict: + return { + "name": self.name, + **({"description": self.description} if self.description else {}), + **({"schedule": self.schedule} if self.schedule else {}), + **({"group": self.group} if self.group else {}), + **({"tags": self.tags} if self.tags else {}), + **({"env": [{"key": k, "value": v} for k, v in self.env.items()]} if self.env else {}), + **({"logDir": self.logDir} if self.logDir else {}), + **({"restartWaitSec": self.restartWaitSec} if self.restartWaitSec else {}), + **({"histRetentionDays": self.histRetentionDays} if self.histRetentionDays else {}), + **({"delaySec": self.delaySec} if self.delaySec else {}), + **({"maxActiveRuns": self.maxActiveRuns} if self.maxActiveRuns else {}), + **({"params": " ".join(self.params)} if self.params else {}), + **({"preconditions": [{"condition": pc.condition, "expected": pc.expected} for pc in self.preconditions]} if self.preconditions else {}), + **({"mailOn": self.mailOn} if self.mailOn else {}), + **({"MaxCleanUpTimeSec": self.MaxCleanUpTimeSec} if self.MaxCleanUpTimeSec else {}), + **({"handlerOn": { + "success": {"command": self.handlerOn.success.command} if self.handlerOn.success else None, + "failure": {"command": self.handlerOn.failure.command} if self.handlerOn.failure else None, + "cancel": {"command": self.handlerOn.cancel.command} if self.handlerOn.cancel else None, + "exit": {"command": self.handlerOn.exit.command} if self.handlerOn.exit else None, + }} if any(vars(self.handlerOn).values()) else {}), + "steps": [ + { + "name": step.name, + "command": step.command, + **({"script": step.script} if step.script else {}), + **({"depends": step.depends} if step.depends else {}), # Change this back to depends_on if needed + **({"description": step.description} if step.description else {}), + **({"repeatPolicy": { + "repeat": step.repeatPolicy.repeat, + "intervalSec": step.repeatPolicy.intervalSec + }} if step.repeatPolicy else {}), + } for step in self.steps + ], + } + + def to_yaml(self) -> str: + return yaml.dump(self.to_dict(), sort_keys=False) + +def new(**kwargs) -> DAG: + return DAG(**kwargs) + +# Example usage to create a new DAG +if __name__ == "__main__": + # Initialize the server with the default DAG directory + server = Server() + + # List existing DAGs + print("Listing existing DAGs:") + dags = server.list_dags() + for dag_name in dags: + print(f" - {dag_name}") + + # Create a new DAG + dag = new( + name="example_dag", + description="Example DAG to demonstrate functionality", + schedule="0 * * * *", + group="ExampleGroup", + tags="example", # Convert tags to a comma-separated string + env={ + "LOG_DIR": "${HOME}/logs", + "PATH": "/usr/local/bin:${PATH}" + }, + logDir="${LOG_DIR}", + restartWaitSec=60, + histRetentionDays=3, + delaySec=1, + maxActiveRuns=1, + params=["param1", "param2"], + preconditions=[ + Precondition(condition="`echo $2`", expected="param2") + ], + mailOn={"failure": True, "success": True}, + MaxCleanUpTimeSec=300, + handlerOn=Handlers( + success=HandlerCommand(command="echo succeed"), # Convert to map structure + failure=HandlerCommand(command="echo failed"), # Convert to map structure + cancel=HandlerCommand(command="echo canceled"), # Convert to map structure + exit=HandlerCommand(command="echo finished") # Convert to map structure + ) + ) + + # Add steps to the DAG + dag.add_step(Step( + name="pull_data", + command="sh", + script="echo `date '+%Y-%m-%d'`", + )) + + dag.add_step(Step( + name="cleanse_data", + command="echo cleansing ${DATA_DIR}/${DATE}.csv", + depends=["pull_data"] # Ensure this is the correct key + )) + + dag.add_step(Step( + name="transform_data", + command="echo transforming ${DATA_DIR}/${DATE}_clean.csv", + depends=["cleanse_data"] # Ensure this is the correct key + )) + + dag.add_step(Step( + name="A task", + command="main.sh", + repeatPolicy=RepeatPolicy(repeat=True, intervalSec=60) + )) + + # Save the new DAG as a YAML file + server.create_dag(dag) + print(f"DAG '{dag.name}' created and saved and started.") + + # List DAGs again to see the newly created one + print("\nListing updated DAGs:") + dags = server.list_dags() + for dag_name in dags: + print(f" - {dag_name}") diff --git a/_archive/lib/dagu/server.py b/_archive/lib/dagu/server.py new file mode 100644 index 0000000..a15a7f7 --- /dev/null +++ b/_archive/lib/dagu/server.py @@ -0,0 +1,51 @@ +import os +import yaml +import glob +from typing import List +from dag import DAG +from client import * + +# Assuming the following classes have already been defined: +# - DAG (for creating and managing DAG structures) +# - Step +# - Handlers +# - RepeatPolicy +# - Precondition + +class Server: + def __init__(self, dag_dir: str = "~/hero/var/dagu/dags/"): + self.dag_dir = os.path.expanduser(dag_dir) + os.makedirs(self.dag_dir, exist_ok=True) # Ensure the directory exists + + def list_dags(self) -> List[str]: + """Lists the DAGs in the directory.""" + dag_files = glob.glob(os.path.join(self.dag_dir, "*.yaml")) + return [os.path.splitext(os.path.basename(dag_file))[0] for dag_file in dag_files] + + def delete_dag(self, name: str) -> bool: + """Deletes a DAG file based on its name.""" + dag_file = os.path.join(self.dag_dir, f"{name}.yaml") + if os.path.exists(dag_file): + os.remove(dag_file) + return True + else: + print(f"DAG '{name}' does not exist.") + return False + + def create_dag(self, dag:DAG, start:bool = True) -> bool: + """Creates a new DAG and saves it as a YAML file.""" + dag_file = os.path.join(self.dag_dir, f"{dag.name}.yaml") + with open(dag_file, 'w') as file: + yaml.dump(dag.to_dict(), file, sort_keys=False) + if start: + self.start_dag(dag.name) + return True + + def start_dag(self,dag_name:str) -> bool: + client = DAGuClient() + action_response = client.submit_dag_action(name=dag_name, action="start") + + def stop_dag(self,dag_name:str) -> bool: + client = DAGuClient() + action_response = client.submit_dag_action(name=dag_name, action="stop") + diff --git a/_archive/lib/web/mdcollections/__init__.py b/_archive/lib/web/mdcollections/__init__.py new file mode 100644 index 0000000..9e279e8 --- /dev/null +++ b/_archive/lib/web/mdcollections/__init__.py @@ -0,0 +1,13 @@ +from .base_types import MDItem, MDPage, MDImage, MDCollection +from .mdcollections import MDCollections +from .scanner import scan_directory + +# Re-export all public types and functions +__all__ = [ + 'MDItem', + 'MDPage', + 'MDImage', + 'MDCollection', + 'MDCollections', + 'scan_directory' +] diff --git a/_archive/lib/web/mdcollections/base_types.py b/_archive/lib/web/mdcollections/base_types.py new file mode 100644 index 0000000..2a4b3b5 --- /dev/null +++ b/_archive/lib/web/mdcollections/base_types.py @@ -0,0 +1,177 @@ +from pathlib import Path +from typing import List, Dict +from dataclasses import dataclass +from .tools import name_fix + +import os + +class MDItem: + """Base class for items in a collection.""" + def __init__(self, collection: "MDCollection", rel_path: Path): + if not isinstance(rel_path, Path): + raise TypeError("rel_path must be a Path instance") + self.collection = collection + self.rel_path = rel_path + self.content_ = "" + self.processed = bool + + def __str__(self) -> str: + return f"{self.__class__.__name__}: {self.rel_path}" + + @property + def full_path(self) -> Path: + """Returns the full path to the item.""" + return self.collection.path / self.rel_path + + @property + def path(self) -> str: + """Returns the fixed name of the item without extension.""" + return str(self.full_path.resolve()) + + @property + def name(self) -> str: + """Returns the fixed name of the item.""" + return name_fix(os.path.basename(self.rel_path)) + +class MDPage(MDItem): + """Represents a markdown file in the collection.""" + pass + + @property + def content(self) -> str: + if not self.content_: + if os.path.exists(self.path): + try: + with open(self.path, 'r', encoding='utf-8') as f: + self.content_ = f.read() + except OSError as e: + raise Exception(f"Error reading file {self.path}: {e}") + else: + raise FileNotFoundError(f"Cannot find markdown file: {self.path}") + return self.content_ + + + +class MDImage(MDItem): + """Represents an image file in the collection.""" + pass + + + +@dataclass +class MDCollection: + """Represents a collection of markdown files and images.""" + path: Path + name: str + items: List[MDItem] + + def page_get(self, name: str) -> MDPage: + """ + Get a markdown page by name. + + Args: + name: Name of the page to find (will be normalized) + + Returns: + MDPage object + + Raises: + ValueError: If page not found + """ + # Remove .md extension if present + if "__" in name: + raise ValueError("there should be no __ in name of page_get") + + if name.endswith('.md'): + name=name[:-3] + normalized_name = name_fix(name) + for item in self.items: + if isinstance(item, MDPage): + item_name = name_fix(item.rel_path.stem) + if item_name == normalized_name: + return item + raise ValueError(f"Page not found: {name}") + + def image_get(self, name: str) -> MDImage: + """ + Get an image by name. + + Args: + name: Name of the image to find (will be normalized) + + Returns: + MDImage object + + Raises: + ValueError: If image not found + """ + normalized_name = name_fix(name) + for item in self.items: + if isinstance(item, MDImage): + # For images, compare with extension + item_name = name_fix(os.path.basename(item.rel_path)) + if item_name == normalized_name: + return item + raise ValueError(f"Image not found: {name}") + + def __str__(self) -> str: + """Returns a tree-like string representation of the collection.""" + result = [f"Collection: {self.name} ({self.path})"] + + # Group items by type + pages = [item for item in self.items if isinstance(item, MDPage)] + images = [item for item in self.items if isinstance(item, MDImage)] + + # Add pages + if pages: + result.append(" Pages:") + for page in sorted(pages, key=lambda x: str(x.rel_path)): + result.append(f" └─ {page.name}") + + # Add images + if images: + result.append(" Images:") + for image in sorted(images, key=lambda x: str(x.rel_path)): + result.append(f" └─ {image.name}") + + return "\n".join(result) + + def index_page(self) -> MDPage: + """Generate a dynamic index of all markdown files in the collection.""" + # Get all markdown pages and sort them by relative path + pages = sorted( + [item for item in self.items if isinstance(item, MDPage)], + key=lambda x: str(x.rel_path) + ) + + # Group pages by directory + page_groups: Dict[str, List[MDPage]] = {} + for page in pages: + dir_path = str(page.rel_path.parent) + if dir_path == '.': + dir_path = 'Root' + if dir_path not in page_groups: + page_groups[dir_path] = [] + page_groups[dir_path].append(page) + + # Generate markdown content + content = ["# Collection Index\n"] + + for dir_path in sorted(page_groups.keys()): + # Add directory header + if dir_path != 'Root': + content.append(f"\n## {dir_path}\n") + elif len(page_groups) > 1: # Only show Root header if there are other directories + content.append("\n## Root Directory\n") + + # Add pages in current directory + for page in sorted(page_groups[dir_path], key=lambda x: x.name): + # Create display name by removing extension and formatting + display_name = page.rel_path.stem.replace('_', ' ').replace('-', ' ').title() + # Create link using relative path + link_path = str(page.rel_path) + content.append(f'- [{display_name}]({self.name}__{link_path})') + + mdp=MDPage(self,Path("index.md")) + mdp.content_ = "\n".join(content) + return mdp diff --git a/_archive/lib/web/mdcollections/factory.py b/_archive/lib/web/mdcollections/factory.py new file mode 100644 index 0000000..ecf4046 --- /dev/null +++ b/_archive/lib/web/mdcollections/factory.py @@ -0,0 +1,25 @@ +import os +from pathlib import Path +from typing import Optional +from .mdcollections import MDCollections + +def create_collections(path: Optional[str] = None) -> MDCollections: + """ + Factory function to create and initialize an MDCollections instance. + + Args: + path: Optional path to scan for collections. Defaults to "data/markdown" + + Returns: + Initialized MDCollections instance + + Raises: + ValueError: If path is None + """ + if path is None: + raise ValueError("Path cannot be None") + + # Expand ~ to home directory if present in path + expanded_path = os.path.expanduser(path) + return MDCollections(root_path=Path(expanded_path)) + diff --git a/_archive/lib/web/mdcollections/mdcollections.py b/_archive/lib/web/mdcollections/mdcollections.py new file mode 100644 index 0000000..4fad542 --- /dev/null +++ b/_archive/lib/web/mdcollections/mdcollections.py @@ -0,0 +1,112 @@ +from pathlib import Path +from typing import List, Optional +from .base_types import MDCollection, MDPage, MDImage, MDItem +from .scanner import scan_directory +from .tools import name_fix + +class MDCollections: + """Manages multiple markdown collections.""" + def __init__(self, root_path: Path): + """ + Initialize collections manager. + + Args: + root_path: Root directory containing collections + """ + self.root_path = root_path + self.collections: List[MDCollection] = [] + self._scan_collections() + + def _scan_collections(self): + """Scan root directory for collections.""" + if not self.root_path.exists(): + raise ValueError(f"Root path does not exist: {self.root_path}") + + # Scan immediate subdirectories only + for path in sorted(self.root_path.iterdir()): + if path.is_dir(): + # Skip directories starting with _ or containing 'archive' in lowercase + if path.name.startswith('_') or 'archive' in path.name.lower(): + continue + + items = scan_directory(path) + if items: # Only create collection if directory contains markdown files + collection = MDCollection( + path=path, + name=path.name, + items=sorted(items, key=lambda x: x.name) + ) + self.collections.append(collection) + + # Sort collections by name + self.collections.sort(key=lambda x: x.name) + + def collection_get(self, name: str) -> MDCollection: + """ + Get a collection by name. + + Args: + name: Name of the collection to find + + Returns: + MDCollection object + + Raises: + ValueError: If collection not found + """ + for collection in self.collections: + if collection.name == name: + return collection + raise ValueError(f"Collection not found: {name}") + + def page_get(self, collection_name: str, page_name: str) -> MDPage: + """ + Get a page from a specific collection. + + Args: + collection_name: Name of the collection + page_name: Name of the page + + Returns: + MDPage object + + Raises: + ValueError: If collection or page not found + """ + page_name=name_fix(page_name) + collection_name=name_fix(collection_name) + + collection = self.collection_get(collection_name) + return collection.page_get(page_name) + + def image_get(self, collection_name: str, image_name: str) -> MDImage: + """ + Get an image from a specific collection. + + Args: + collection_name: Name of the collection + image_name: Name of the image + + Returns: + MDImage object + + Raises: + ValueError: If collection or image not found + """ + # Handle image name that might contain collection prefix + if "__" in image_name: + image_name, collection_name = image_name.split("__", 1) + + image_name = name_fix(image_name) + collection_name = name_fix(collection_name) + + collection = self.collection_get(collection_name) + print(f" -- image get: '{collection_name}' '{image_name}'") + return collection.image_get(image_name) + + def __str__(self) -> str: + """Returns a string representation of all collections.""" + if not self.collections: + return "No collections found" + + return "\n\n".join(str(collection) for collection in self.collections) diff --git a/_archive/lib/web/mdcollections/scanner.py b/_archive/lib/web/mdcollections/scanner.py new file mode 100644 index 0000000..7874de2 --- /dev/null +++ b/_archive/lib/web/mdcollections/scanner.py @@ -0,0 +1,61 @@ +from pathlib import Path +from typing import List, Sequence +from .base_types import MDItem, MDPage, MDImage, MDCollection + +def scan_directory(path: Path) -> Sequence[MDItem]: + """ + Scan a directory for markdown files and images. + + Args: + path: Directory to scan + + Returns: + List of MDItem objects (MDPage or MDImage) + """ + if not path.exists(): + raise ValueError(f"Path does not exist: {path}") + if not path.is_dir(): + raise ValueError(f"Path is not a directory: {path}") + + items: List[MDItem] = [] + + # Create a temporary collection for the items + temp_collection = MDCollection( + path=path, + name=path.name, + items=[] # Will be populated later + ) + + # First scan for markdown files + for md_path in path.rglob("*.md"): + # Skip files in hidden directories (starting with .) + if any(part.startswith('.') for part in md_path.parts): + continue + + # Get path relative to collection root + rel_path = md_path.relative_to(path) + + # Create MDPage + page = MDPage(temp_collection, rel_path) + items.append(page) + + # Then scan for images + image_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.svg'} + for img_path in path.rglob("*"): + # Skip files in hidden directories (starting with .) + if any(part.startswith('.') for part in img_path.parts): + continue + + # Check if file has image extension + if img_path.suffix.lower() in image_extensions: + # Get path relative to collection root + rel_path = img_path.relative_to(path) + + # Create MDImage + image = MDImage(temp_collection, rel_path) + items.append(image) + + # Update the temporary collection's items + temp_collection.items = items + + return items diff --git a/_archive/lib/web/mdcollections/tools.py b/_archive/lib/web/mdcollections/tools.py new file mode 100644 index 0000000..3de0bdd --- /dev/null +++ b/_archive/lib/web/mdcollections/tools.py @@ -0,0 +1,99 @@ +from pathlib import Path +from typing import Union +import os +import re + +def should_skip_path(path: Union[str, Path]) -> bool: + """ + Check if a path should be skipped based on its basename. + Skips paths that start with . or _ + + Args: + path: Path to check (can be file or directory) + + Returns: + True if path should be skipped, False otherwise + """ + path = Path(path) + return path.name.startswith(('.', '_')) + + +def strip_ansi_codes(text): + """Remove ANSI escape codes from text.""" + ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])') + return ansi_escape.sub('', text) + + + +def name_fix(path: str) -> str: + """ + Normalize only the final part (stem) of a path by: + - Converting spaces to underscores + - Making lowercase + Preserves the directory structure and only modifies the final name. + + Args: + path: Path to normalize + + Returns: + Path with normalized stem but unchanged structure + """ + if not isinstance(path, str): + raise TypeError("Input must be a string") + + if '/' in path: + raise ValueError("Path should not contain forward slashes - use for filenames only") + + path = strip_ansi_codes(path).strip() + name, ext = os.path.splitext(path) + + if not is_image(path) and ext.lower() == '.md': + ext = "" + + # Convert to lowercase and replace spaces and other characters + name = name.lower().replace(' ', '_').replace('-', '_').replace(',', '') + name = name.replace('__', '_').rstrip(' ') + + # Only strip trailing underscores for image files + if is_image(name): + name = name.rstrip('_') + + return f"{name}{ext}" + + +def path_fix(path: Union[str, Path]) -> Path: + """ + Normalize only the final part (stem) of a path by: + - Converting spaces to underscores + - Making lowercase + Preserves the directory structure and only modifies the final name. + + Args: + path: Path to normalize + + Returns: + Path with normalized stem but unchanged structure + """ + if not isinstance(path, (str, Path)): + path = str(path) + path = Path(path) + # Keep directory structure unchanged, only normalize the filename + parent = path.parent + filename = name_fix(path.name) + # Recombine with original parent path + return parent / filename + + +def is_image(basename): + # Define a set of valid image extensions + image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.svg'] + + # Get the file extension from the basename + _, extension = os.path.splitext(basename) + extension = extension.strip() + + #print(f" ----- {basename} '{extension.lower()}' {extension.lower() in image_extensions}") + + # Check if the extension is in the set of image extensions + return extension.lower() in image_extensions + diff --git a/_archive/lib/web/mdserver/__init__.py b/_archive/lib/web/mdserver/__init__.py new file mode 100644 index 0000000..819aa28 --- /dev/null +++ b/_archive/lib/web/mdserver/__init__.py @@ -0,0 +1,9 @@ +""" +MDServer package initialization. +This helps Python properly resolve the package imports. +""" +from .markdown_server import MDServer +from .factory import serve_markdown +from .process_markdown import process_markdown + +__all__ = ['MDServer', 'serve_markdown', 'process_markdown'] diff --git a/_archive/lib/web/mdserver/factory.py b/_archive/lib/web/mdserver/factory.py new file mode 100644 index 0000000..8f63508 --- /dev/null +++ b/_archive/lib/web/mdserver/factory.py @@ -0,0 +1,19 @@ +from typing import Optional, Union +from pathlib import Path + +import sys +sys.path.append(str(Path(__file__).parent.parent)) + +from .markdown_server import MDServer # Import directly from the module file + + +def serve_markdown(collections_path: str) -> None: + """ + Legacy function to maintain backward compatibility. + Creates an MDServer instance and serves the markdown content. + + Args: + collections_path: Path to the collections directory. Can be a string or Path object. + """ + server = MDServer(collections_path=collections_path) + server.serve_markdown() diff --git a/_archive/lib/web/mdserver/macro_chart.py b/_archive/lib/web/mdserver/macro_chart.py new file mode 100644 index 0000000..f43bff3 --- /dev/null +++ b/_archive/lib/web/mdserver/macro_chart.py @@ -0,0 +1,55 @@ +import re +from typing import TYPE_CHECKING +from mdcollections.base_types import MDPage + +def js_to_python(js_str): + """Convert JavaScript object notation to Python dictionary syntax.""" + # Remove any 'option =' prefix and trailing semicolon + js_str = re.sub(r'^option\s*=\s*', '', js_str) + js_str = re.sub(r';(\s*)$', '', js_str) + + # Convert JavaScript property names to Python dictionary keys + js_str = re.sub(r'(\b\w+):', r'"\1":', js_str) + + # Convert single quotes to double quotes for string values + # First, replace escaped single quotes with a placeholder + js_str = js_str.replace("\\'", "___ESCAPED_QUOTE___") + # Then replace regular single quotes with double quotes + js_str = js_str.replace("'", '"') + # Finally, restore escaped single quotes + js_str = js_str.replace("___ESCAPED_QUOTE___", "\\'") + + # Handle trailing commas + js_str = re.sub(r',(\s*[}\]])', r'\1', js_str) + + # Handle special JavaScript values + js_str = js_str.replace('true', 'True').replace('false', 'False').replace('null', 'None') + + # Remove any comments + js_str = re.sub(r'//.*?\n|/\*.*?\*/', '', js_str, flags=re.DOTALL) + + return js_str.strip() + +def process_markdown_echarts(page: MDPage) -> MDPage: + """Convert ```echarts blocks to ```py sl blocks that use st_echarts.""" + if not isinstance(page, MDPage): + raise TypeError("page must be a MDPage") + + def replace_echarts_block(match): + echarts_code = match.group(1).strip() + python_code = js_to_python(echarts_code) + + # Create the streamlit code block + streamlit_code = f"""```py sl +from streamlit_echarts import st_echarts +option = {python_code} +st_echarts(options=option, height="400px") +```""" + return streamlit_code + + # Process all echarts code blocks + processed_content = re.sub(r"```echarts\n(.*?)\n```", replace_echarts_block, page.content, flags=re.DOTALL) + + page.content_ = processed_content + + return page diff --git a/_archive/lib/web/mdserver/macro_links.py b/_archive/lib/web/mdserver/macro_links.py new file mode 100644 index 0000000..54eaf6f --- /dev/null +++ b/_archive/lib/web/mdserver/macro_links.py @@ -0,0 +1,119 @@ +import os +import re +import streamlit as st +from PIL import Image # Pillow package provides PIL +from typing import Optional, List, Tuple, TYPE_CHECKING +from mdcollections.tools import name_fix, is_image +from mdcollections.base_types import MDPage +from mdcollections.mdcollections import MDCollections +from .process_images import process_image +from .tools import debug + +def link_process(link: str, page: MDPage, collections: MDCollections, is_image_link: bool, debug_enabled: bool = False) -> str: + """Process link path and verify existence in collection.""" + + if not isinstance(link, str): + raise TypeError("link must be strings") + + if not isinstance(collections, MDCollections): + raise TypeError("collection must be MDCollection") + + if not isinstance(page, MDPage): + raise TypeError("page must be MDPage") + + debug(f"\nProcessing link: {link}") + debug(f"Is image link: {is_image_link}") + + # Remove './' if present + if link.startswith("./"): + link = link[2:] + debug("Removed './' prefix from link") + + # Get just the filename without directories + link = os.path.basename(link) + debug(f"Extracted basename: {link}") + + + # Process link format + if not '__' in link: + if ":" in link: + link = link.replace(':', '__') + + # Create full link if needed + if not "__" in link: + link = f"{page.collection.name}__{link}" + debug(f"Created full link: {link}") + + if link.count("__")>1: + raise RuntimeError(f"cannot have 2x __ in ${link}") + + collection_name, item_name = link.split('__', 1) + + # Convert to lowercase and replace spaces with underscores + item_name = name_fix(item_name) + collection_name = name_fix(collection_name) + debug(f"Normalized: '{collection_name}__{item_name}'") + + # Ensure .md extension for pages + if is_image_link: + try: + md_i = collections.image_get(collection_name=collection_name,image_name=item_name) + debug("Successfully verified image exists") + # process_image(md_i) + # return "" + return f"{collection_name}__{item_name}" + except ValueError: + debug(f"Error - image not found: {link}") + return f'ERROR: Image not found: {link}' + else: + if not item_name.endswith('.md'): + item_name = f"{item_name}.md" + debug(f"Added .md extension: {item_name}") + try: + collections.page_get(collection_name, item_name) + debug("Successfully verified page exists") + except ValueError: + debug(f"Error - page not found: {link}") + return f'ERROR: Page not found: {link}' + + return f"?page={collection_name}__{item_name}.md" + +def process_links(page: MDPage, collections: MDCollections) -> MDPage: + """Process links in the markdown content.""" + if not isinstance(page, MDPage): + raise TypeError("page must be a MDPage") + if not isinstance(collections, MDCollections): + raise TypeError("collections must be a MDCollections") + + debug(f"Processing links for page: {page.name}") + debug(f"Content length before processing: {len(page.content)} characters") + + link_pattern = r'(!?)\[(.*?)\]\((.*?)\)' + + def replace_link(match): + is_image_link = match.group(1) == '!' + link_text = match.group(2) + link_path = match.group(3) + + debug(f"Found link - Text: {link_text}, Path: {link_path}") + debug(f"Is image link: {is_image_link}") + + processed_link = link_process(link_path, page, collections, is_image_link) + + if "ERROR:" in processed_link: + debug(f"Link processing error: {processed_link}") + return processed_link #this forwards the error, is html in red + + if is_image_link: + debug(f"Returning processed image link: ![{link_text}]({processed_link})") + return f'![{link_text}]({processed_link})' + else: + debug(f"Returning processed text link: [{link_text}]({processed_link})") + return f'[{link_text}]({processed_link})' + + page.content_ = re.sub(link_pattern, replace_link, page.content) + + debug(f"Content length after processing: {len(page.content)} characters") + debug("Link processing complete") + + return page diff --git a/_archive/lib/web/mdserver/macro_mermaid.py b/_archive/lib/web/mdserver/macro_mermaid.py new file mode 100644 index 0000000..56dfc42 --- /dev/null +++ b/_archive/lib/web/mdserver/macro_mermaid.py @@ -0,0 +1,29 @@ +import re +from typing import TYPE_CHECKING +from mdcollections.base_types import MDPage + + +def process_markdown_mermaid(page: MDPage) -> MDPage: + """Convert ```mermaid blocks to ```py sl blocks that use st_mermaid.""" + if not isinstance(page, MDPage): + raise TypeError("page must be a MDPage") + + def replace_mermaid_block(match): + mermaid_code = match.group(1).strip() + + # Create the streamlit code block + # Note: The mermaid code needs to be properly escaped as a string + mermaid_code = mermaid_code.replace('"', '\\"') # Escape double quotes + streamlit_code = f'''```py sl +from streamlit_mermaid import st_mermaid +st_mermaid(""" +{mermaid_code} +""") +```''' + return streamlit_code + + # Process all mermaid code blocks + processed_content = re.sub(r"```mermaid\n(.*?)\n```", replace_mermaid_block, page.content, flags=re.DOTALL) + page.content_ = processed_content + + return page diff --git a/_archive/lib/web/mdserver/macro_sl.py b/_archive/lib/web/mdserver/macro_sl.py new file mode 100644 index 0000000..df9febb --- /dev/null +++ b/_archive/lib/web/mdserver/macro_sl.py @@ -0,0 +1,69 @@ +import re +import streamlit as st +import pandas as pd +import numpy as np +from io import StringIO +import sys +from typing import TYPE_CHECKING +from mdcollections.base_types import MDPage + +# if TYPE_CHECKING: +# from .markdown_server import MDServer + +def execute_streamlit_code(code_block): + """ + Execute a streamlit code block and capture its output. + The code block should be valid Python code that uses streamlit. + """ + # Create string buffer to capture any print outputs + old_stdout = sys.stdout + redirected_output = StringIO() + sys.stdout = redirected_output + + try: + # Execute the code block + # The code block can use st, pd, np which are already imported + exec(code_block, { + 'st': st, + 'pd': pd, + 'np': np + }) + + # Get any printed output + printed_output = redirected_output.getvalue() + return True, printed_output + except Exception as e: + return False, f"Error: {str(e)}\n\nFailed code:\n{code_block}" + finally: + # Restore stdout + sys.stdout = old_stdout + + +def process_streamlit_blocks(page: MDPage) -> MDPage: + """ + Find and process ```py sl code blocks in markdown content. + Returns the modified content with executed streamlit code blocks replaced by their output. + """ + if not isinstance(page, MDPage): + raise TypeError("page must be a MDPage") + # if not hasattr(md_server, 'collections_manager'): + # raise TypeError("md_server must be an instance of MDServer") + + def replace_code_block(match): + code = match.group(1).strip() + success, result = execute_streamlit_code(code) + + if not success: + # If execution failed, return the error message + return f"```\n{result}\n```" + + # If successful, return empty string - the streamlit components + # will be rendered but the code block itself won't be shown + return "" + + # Process the code block + processed_content = re.sub(r"```py\s+sl\n(.*?)\n```", replace_code_block, page.content, flags=re.DOTALL) + + page.content_ = processed_content + + return page diff --git a/_archive/lib/web/mdserver/macro_slides.py b/_archive/lib/web/mdserver/macro_slides.py new file mode 100644 index 0000000..ff451db --- /dev/null +++ b/_archive/lib/web/mdserver/macro_slides.py @@ -0,0 +1,76 @@ +import re +import streamlit as st +from PIL import Image +from typing import TYPE_CHECKING, List +from mdcollections.base_types import MDPage, MDImage + +# if TYPE_CHECKING: +# from .markdown_server import MDServer + +def create_slider_component(images: List[str]) -> None: + """Create a Streamlit component for image slides.""" + st.markdown(""" + + """, unsafe_allow_html=True) + + # Initialize session state + if 'current_slide' not in st.session_state: + st.session_state.current_slide = 0 + + # Navigation buttons + col1, col2, col3 = st.columns([1, 4, 1]) + + with col1: + if st.button("⬅️ Previous"): + st.session_state.current_slide = (st.session_state.current_slide - 1) % len(images) + + with col3: + if st.button("Next ➡️"): + st.session_state.current_slide = (st.session_state.current_slide + 1) % len(images) + + # Display current image + current_image_spec = images[st.session_state.current_slide] + if not hasattr(st.session_state, 'md_server') or not st.session_state.md_server.collections_manager: + st.error("Collections manager not initialized") + return + + try: + image_item = st.session_state.md_server.collections_manager.image_get(current_image_spec) + image = Image.open(image_item.path) + st.image(image, use_column_width=True) + except Exception as e: + st.error(f"Could not load image: {current_image_spec}. Error: {str(e)}") + + # Display slide counter + st.caption(f"Slide {st.session_state.current_slide + 1} of {len(images)}") + +def process_markdown_slides(page: MDPage) -> MDPage: + """Convert ```slides blocks to ```py sl blocks that use the slider component.""" + if not isinstance(page, MDPage): + raise TypeError("page must be a MDPage") + # if not hasattr(md_server, 'collections_manager'): + # raise TypeError("md_server must be an instance of MDServer") + + # # Store md_server in session state for use by create_slider_component + # st.session_state.md_server = md_server + + def replace_slides_block(match): + slides_content = match.group(1).strip() + image_paths = [line.strip() for line in slides_content.split('\n') if line.strip()] + + # Create the streamlit code block + image_paths_str = repr(image_paths) + streamlit_code = f'''```py sl +from .macro_slides import create_slider_component +create_slider_component({image_paths_str}) +```''' + return streamlit_code + + # Process all slides code blocks + page.content_ = re.sub(r"```slides\n(.*?)\n```", replace_slides_block, page.content, flags=re.DOTALL) + + return page diff --git a/_archive/lib/web/mdserver/markdown_server.py b/_archive/lib/web/mdserver/markdown_server.py new file mode 100644 index 0000000..aa309d7 --- /dev/null +++ b/_archive/lib/web/mdserver/markdown_server.py @@ -0,0 +1,237 @@ +from typing import Optional, Union +import os +from pathlib import Path +import traceback +import sys +import re +import pudb +try: + import streamlit as st +except ImportError: + raise ImportError("streamlit is required. Install with: pip install streamlit") + +from mdcollections.base_types import MDPage, MDImage, MDCollection +from mdcollections.mdcollections import MDCollections +from .process_markdown import process_markdown, summary_load +from .tools import debug + +def setup_static_dir(collections_path: str) -> None: + """ + Set up static directory for serving images. + Creates symbolic links from collections to static directory. + """ + pass + # static_dir = os.path.join(collections_path, "static") + # if not os.path.exists(static_dir): + # os.makedirs(static_dir) + + # Create symlinks for each collection + # collections = os.listdir(collections_path) + # for collection in collections: + # collection_path = os.path.join(collections_path, collection) + # if os.path.isdir(collection_path) and not collection.startswith('.') and collection != 'static': + # # Create symlink from collection to static/collection + # static_link = os.path.join(static_dir, collection) + # if not os.path.exists(static_link): + # try: + # os.symlink(collection_path, static_link) + # except OSError as e: + # debug(f"Failed to create symlink from {collection_path} to {static_link}: {e}") + +def process_markdown_content(content: str, base_path: str, collection_name: str) -> None: + """ + Process and display markdown content. + + Args: + content: The markdown content to process + base_path: Base path for resolving relative paths + collection_name: Name of the collection + """ + st.markdown(content) + +class MDServer: + def __init__(self,collections_path:str): + """Initialize the MDServer instance.""" + # Convert path to string if it's a Path object + if not isinstance(collections_path, str): + return RuntimeError("collections_path must be a string.") + + st.session_state.setdefault('current_collection', None) + st.session_state.setdefault('current_page', None) + st.session_state.setdefault('show_collections_view', False) + st.session_state.setdefault('collections_manager', None) + st.session_state.setdefault('debug_mode', True) + + # Get the collections manager + collections_path = os.path.expanduser(collections_path) + + print(f"Initializing collections manager for: {collections_path}") + + collections_manager = MDCollections(root_path=Path(collections_path)) + + # Set up static directory for serving images + setup_static_dir(collections_path) + + # Set up page config + st.set_page_config( + page_title="Markdown Server", + page_icon="📚", + layout="wide", + initial_sidebar_state="expanded", + ) + + st.session_state.collections_manager = collections_manager + + @property + def collections_manager(self) -> MDCollections: + """ + Property to safely access the collections manager. + Ensures collections_manager is initialized before access. + + Returns: + MDCollections: The initialized collections manager + + Raises: + RuntimeError: If collections_manager is not initialized + """ + if not st.session_state.get('collections_manager'): + raise RuntimeError("Collections manager not initialized. Please ensure MDServer is properly initialized.") + return st.session_state.collections_manager + + @property + def collections(self) -> list: + """ + Property to safely access collections from the collections manager. + + Returns: + list: List of available collections + + Raises: + RuntimeError: If collections_manager is not initialized + """ + return self.collections_manager.collections + + def handle_url_parameters(self) -> None: + """ + Handle URL parameters to load specific pages. + Expected format: ?page=collection_name__page_name.md + Example: ?page=banking_whitepaper__web_3_vision.md + """ + query_params = st.query_params + requested_page = query_params.get('page', None) + + if not requested_page: + return + + try: + # Split the page parameter using '__' as delimiter + if '__' not in requested_page: + raise ValueError(f"Invalid page format. Expected format: collection_name__page_name.md, got: {requested_page}") + + collection_name, page_name = requested_page.split('__', 1) + + # Get the page using collections_manager's page_get method + page = self.collections_manager.page_get( + collection_name=collection_name, + page_name=page_name + ) + + page = process_markdown(page, collections=self.collections_manager) + + st.session_state.current_collection = page.collection + st.session_state.current_page = page + + except ValueError as e: + # Handle invalid format or page not found errors + st.warning(f"Could not load page: {requested_page}. Error: {str(e)}") + + def setup_sidebar(self, collections: MDCollections) -> None: + """ + Set up the sidebar with collection selection. + + Args: + collections: List of available collections + """ + with st.sidebar: + # Add Debug Mode toggle that persists across reloads + debug_mode = st.toggle("Debug Mode", st.session_state.debug_mode) + if debug_mode != st.session_state.debug_mode: + st.session_state.debug_mode = debug_mode + # Store in local storage to persist across reloads + st.session_state['debug_mode'] = debug_mode + + # Add Collections View action + if st.button("View All Collections"): + st.session_state.show_collections_view = True + st.session_state.current_page = None + return + + collection_names = [c.name for c in self.collections] + current_idx = collection_names.index(st.session_state.current_collection.name) if st.session_state.current_collection else 0 + + selected_collection_name = st.selectbox( + "Choose a collection:", + collection_names, + index=current_idx, + key="collection_selector" + ) + + # Add sidebar content + with st.sidebar: + # Check for summary.md + collection = self.collections_manager.collection_get(selected_collection_name) + summary_page = summary_load(collection) + st.markdown(summary_page.content, unsafe_allow_html=True) + + # Get the selected collection by name + st.session_state.current_collection = self.collections_manager.collection_get(selected_collection_name) + + def display_content(self) -> None: + """Display the markdown content in the main area.""" + main_content = st.container() + + with main_content: + try: + if st.session_state.show_collections_view: + # Read and process collections view template + collections_view_path = Path(__file__).parent / "pages" / "collections_view.md" + with open(collections_view_path, 'r') as f: + template = f.read() + + # Replace placeholder with actual collections string representation + content = template.replace("{collections_str}", str(self.collections_manager)) + st.markdown(content) + + elif st.session_state.current_page: + st.markdown(st.session_state.current_page.content, unsafe_allow_html=True) + + elif st.session_state.current_collection: + # Display collection summary and index when no specific page is selected + st.markdown("### Collection Index") + myindex_page = st.session_state.current_collection.index_page() + myindex_page = process_markdown(myindex_page, collections=self.collections_manager) + st.markdown(myindex_page.content) + else: + st.warning("Please select a collection.") + except Exception as e: + st.error(f"An error occurred: {str(e)}") + + def serve_markdown(self) -> None: + """ + Serve markdown content using Streamlit. + """ + try: + if not self.collections: + st.error("No collections found.") + return + + # Handle URL parameters + self.handle_url_parameters() + + # Setup sidebar + self.setup_sidebar(self.collections_manager) + + # Display content + self.display_content() + except Exception as e: + st.error(f"An error occurred: {str(e)}") diff --git a/_archive/lib/web/mdserver/pages/collections_view.md b/_archive/lib/web/mdserver/pages/collections_view.md new file mode 100644 index 0000000..b1ef978 --- /dev/null +++ b/_archive/lib/web/mdserver/pages/collections_view.md @@ -0,0 +1,7 @@ +# Collections Overview + +```python +{collections_str} +``` + +The tree structure above is automatically generated from the current state of the collections manager. diff --git a/_archive/lib/web/mdserver/process_images.py b/_archive/lib/web/mdserver/process_images.py new file mode 100644 index 0000000..b1c66e2 --- /dev/null +++ b/_archive/lib/web/mdserver/process_images.py @@ -0,0 +1,89 @@ +import os +import re +import streamlit as st +from PIL import Image # Pillow package provides PIL +from typing import Optional, List, Tuple, TYPE_CHECKING +from mdcollections.base_types import MDImage, MDPage +from mdcollections.mdcollections import MDCollections +from .tools import debug + + +def process_image(myimage: MDImage, alt_text: Optional[str] = None) -> str: + """ + Process an image and return HTML img tag for proper rendering in markdown. + + Args: + myimage: The MDImage object to process + alt_text: Optional alternative text for the image + + Returns: + str: HTML img tag with proper styling + """ + if not isinstance(myimage, MDImage): + raise TypeError("myimage must be a MDImage") + try: + # Verify image can be opened + Image.open(myimage.path) + + # Construct static URL using collection name and relative path + static_url = f"/app/static/{myimage.collection.name}/{myimage.rel_path}" + + # Create HTML img tag with proper styling + return f'{alt_text or ' + except Exception as e: + debug(f"Error processing image {myimage.path}: {str(e)}") + return f"Error loading image: {myimage.path}" + + +def process_images(page: MDPage, collections: MDCollections) -> MDPage: + """ + Process images in the markdown content while preserving text structure. + + Args: + page: The MDPage object containing markdown content + collections: The MDCollections object containing image references + + Returns: + MDPage: The processed page with images displayed + """ + if not isinstance(page, MDPage): + raise TypeError("page must be a MDPage") + if not isinstance(collections, MDCollections): + raise TypeError("collections must be a MDCollections") + + debug(f"Processing images for page: {page.name}") + debug(f"Content length before processing: {len(page.content)} characters") + + # Match markdown image syntax: ![alt text](path) + link_pattern = r'!\[(.*?)\]\((.*?)\)' + + def replace_link(match): + alt_text = match.group(1) + image_path = match.group(2) + + # Split path into collection and image name + try: + parts = image_path.split("__", 1) + if len(parts) != 2: + debug(f"Invalid image path format (missing __): {image_path}") + return f"Invalid image path format: {image_path}" + + image_name, collection_name = parts + debug(f"Found image link, will now check - Alt text: {alt_text}, Image: '{image_name}', Collection: '{collection_name}'") + + # Get the image from collections using the path + myimage = collections.image_get(image_name, collection_name) + return process_image(myimage, alt_text if alt_text else None) + except ValueError as e: + debug(f"Image not found in collection: {image_path}.\n{e}") + return f"Image not found: {image_path}" + except Exception as e: + debug(f"Error processing image {image_path}: {str(e)}") + return f"Error processing image: {image_path}" + + # Process all image links while preserving surrounding text + page.content_ = re.sub(link_pattern, replace_link, page.content) + + debug("Image processing complete") + + return page diff --git a/_archive/lib/web/mdserver/process_markdown.py b/_archive/lib/web/mdserver/process_markdown.py new file mode 100644 index 0000000..52bc732 --- /dev/null +++ b/_archive/lib/web/mdserver/process_markdown.py @@ -0,0 +1,80 @@ +import os +import re +import streamlit as st +from PIL import Image # Pillow package provides PIL +from typing import Optional, List, Tuple, TYPE_CHECKING +from .macro_sl import process_streamlit_blocks +from .macro_chart import process_markdown_echarts +from .macro_mermaid import process_markdown_mermaid +from .macro_slides import process_markdown_slides +from .macro_sl import process_streamlit_blocks +from .macro_links import process_links +from .process_images import process_images +from mdcollections.tools import name_fix, is_image +from mdcollections.base_types import MDPage, MDCollection +from mdcollections.mdcollections import MDCollections +from .tools import debug,rewrite_summary_links + + +def summary_load(collection:MDCollection) -> MDPage: + """Load the summary.md file if it exists, otherwise it creates an index""" + if not isinstance(collection, MDCollection): + raise TypeError("collection must be a MDCollection") + try: + mypage = collection.page_get("summary.md") + mypage.content_=rewrite_summary_links(mypage.content_) #need to rewrite the first part of path as collection, might change in future + return mypage + except ValueError: + return collection.index_page() + +def process_markdown(page: MDPage, collections: MDCollections) -> MDPage: + """Process markdown content and handle images, links, and streamlit code blocks. + + Args: + page: The MDPage object to process + collections: The MDCollections object containing all collections + """ + if not isinstance(page, MDPage): + raise TypeError("page must be a MDPage") + if not isinstance(collections, MDCollections): + raise TypeError("collections must be a MDCollections") + + debug(f"Processing markdown for page: {page.name} in collection: {page.collection.name}\nInitial content length: {len(page.content)} characters") + + if page.processed: + RuntimeError(f"double processing of page {page.name}") + + # Process special blocks with page and md_server arguments + #debug("Processing echarts blocks...") + page = process_markdown_echarts(page) + + #debug("Processing mermaid blocks...") + page = process_markdown_mermaid(page) + + #debug("Processing slides blocks...") + page = process_markdown_slides(page) + + #debug("Processing streamlit blocks...") + page = process_streamlit_blocks(page) + + #debug("Processing links...") + # Pass the debug flag to process_links + page = process_links(page=page, collections=collections) + + page = process_images(page=page, collections=collections ) + + # Process remaining content + if page.content.strip(): + debug(f"Rendering final markdown content (length: {len(page.content)} characters)") + st.markdown(page.content, unsafe_allow_html=True) + else: + debug("No content to render after processing") + + return page + +def parse_page_parameter(page_param: str) -> Tuple[Optional[str], str]: + """Parse the page parameter to extract collection and file name.""" + if '__' in page_param: + collection, filename = page_param.split('__', 1) + return collection, filename + return None, page_param diff --git a/_archive/lib/web/mdserver/requirements.txt b/_archive/lib/web/mdserver/requirements.txt new file mode 100644 index 0000000..1f575ef --- /dev/null +++ b/_archive/lib/web/mdserver/requirements.txt @@ -0,0 +1,5 @@ +streamlit>=1.24.0 +pandas>=1.5.0 +numpy>=1.24.0 +ipython>=8.0.0 +Pillow>=10.0.0 diff --git a/_archive/lib/web/mdserver/tools.py b/_archive/lib/web/mdserver/tools.py new file mode 100644 index 0000000..0c26d0f --- /dev/null +++ b/_archive/lib/web/mdserver/tools.py @@ -0,0 +1,43 @@ +import re +import streamlit as st + +def strip_ansi_codes(text): + """Remove ANSI escape codes from text.""" + ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])') + return ansi_escape.sub('', text) + + + +def debug(message: str): + """Display debug messages in a compact code block format. + + Args: + message: The debug message to display + """ + debug_enabled=st.session_state.debug_mode + if debug_enabled: + #st.code(message, language="text") + print(strip_ansi_codes(message)) + + +def rewrite_summary_links(text:str) -> str: + import re + + def replace_first_slash(match): + # Get the matched text + link = match.group(1) + # Replace the first slash with double underscore + new_link = link.replace('/', '__', 1) + return f'({new_link})' + + # Use a regular expression to find links in the format (path/to/resource) + pattern = r'\(([^)]+)\)' + + # Process each line and apply the substitution + rewritten_lines = [] + for line in text.splitlines(): + rewritten_line = re.sub(pattern, replace_first_slash, line) + rewritten_lines.append(rewritten_line) + + # Join the rewritten lines back together + return '\n'.join(rewritten_lines) diff --git a/_archive/openapi/generator/model_generator.py b/_archive/openapi/generator/model_generator.py new file mode 100644 index 0000000..698a796 --- /dev/null +++ b/_archive/openapi/generator/model_generator.py @@ -0,0 +1,188 @@ +from typing import Dict, List, Set, Any + +class VlangCodeGenerator: + pass + +class ModelGenerator: + def __init__( + self, spec: Dict[str, Any], lang_code_generator: str + ) -> None: + self.spec = spec + self.lang_code_generator = lang_code_generator + # self.processed_objects: Dict[str, Dict[str, str]] = {} + # self.ordered_objects: List[str] = [] + # self.used_names: Set[str] = set() + + def generate_models(self): + if self.lang_code_generator != "vlang": + raise ValueError('Unsupported language.') + + + if not self.spec.get('components'): + raise ValueError("No components found in spec") + + components = self.spec['components'] + + if not components.get('schemas'): + raise ValueError("No schemas found in components") + + schemas = components['schemas'] + schemas_path = ["components", "schemas"] + for name, schema in schemas.items(): + self.jsonschema_to_type( + path=schemas_path + [name], + jsonschema=schema, + ) + + objects_code = "" + for val in self.ordered_objects: + if val == "": + continue + objects_code = f"{objects_code}{val}\n\n" + + print(f'debugzo4 {objects_code}') + return objects_code + + # def jsonschema_to_type( + # self, path: List[str], jsonschema: SchemaObject | ReferenceObject + # ) -> str: + # if isinstance(jsonschema, ReferenceObject): + # ref: str = jsonschema.ref + + # ref_schema = self.spec.ref_to_schema(ref) + # ref_path = ref.split("/")[1:] + + # if isinstance(ref_schema, ContentDescriptorObject): + # # TODO: implement + # raise Exception("unimplemented") + # # return self.content_descriptor_to_type(ref_path, ref_schema) + + # return self.jsonschema_to_type(ref_path, ref_schema) + + # path_str = "/".join([item.lower() for item in path]) + # if path_str in self.processed_objects: + # return self.processed_objects[path_str]["name"] + + # type_name = self.type_name_from_path(path) + + # description = getattr(jsonschema, 'description', None) + # if jsonschema.enum: + # enum = jsonschema.enum + # type_code = self.lang_code_generator.generate_enum(enum, type_name) + # if self.lang_code_generator.is_primitive(type_code): + # return type_code + + # self.add_object(path_str, type_code, type_name) + # return type_name + + # if jsonschema.type: + # match jsonschema.type: + # case "string": + # return self.lang_code_generator.string_primitive() + + # case "integer": + # return self.lang_code_generator.integer_primitive() + + # case "number": + # return self.lang_code_generator.number_primitive() + + # case "array": + # if isinstance(jsonschema.items, List): + # raise Exception( + # "array of different item types is not supported" + # ) + + # item_type_name = self.jsonschema_to_type( + # path + ["item"], jsonschema.items + # ) + # return self.lang_code_generator.array_of_type( + # item_type_name + # ) + + # case "boolean": + # return self.lang_code_generator.bool_primitive() + + # case "object": + # # to prevent cyclic dependencies + # self.add_object(path_str, "", type_name) + + # properties: Dict[str, PropertyInfo] = {} + # for ( + # property_name, + # property_schema, + # ) in jsonschema.properties.items(): + # schema = property_schema + # new_path = path + ["properties", property_name] + # if isinstance(property_schema, ReferenceObject): + # schema = self.spec.ref_to_schema( + # property_schema.ref + # ) + # new_path = property_schema.ref.split("/")[1:] + + # property_info = PropertyInfo( + # name=property_name, + # type_name=self.jsonschema_to_type(new_path, schema), + # description=schema.description, + # example=schema.example, + # ) + + # properties[property_name] = property_info + + # type_code = self.lang_code_generator.generate_object( + # type_name, properties + # ) + # self.add_object(path_str, type_code, type_name) + # return type_name + + # case "null": + # return self.lang_code_generator.null_primitive() + + # case _: + # raise Exception(f"type {jsonschema.type} is not supported") + + # if jsonschema.anyOf: + # type_names = [] + # for i, item in enumerate(jsonschema.anyOf): + # type_names.append( + # self.jsonschema_to_type(path + [f"anyOf{i}"], item) + # ) + + # return self.lang_code_generator.generate_multitype(type_names) + # # self.add_object(path_str, type_code, type_code) + # # return type_code + + # elif jsonschema.oneOf: + # type_names = [] + # for i, item in enumerate(jsonschema.oneOf): + # type_names.append( + # self.jsonschema_to_type(path + [f"oneOf{i}"], item) + # ) + + # return self.lang_code_generator.generate_multitype(type_names) + # # self.add_object(path_str, type_code, type_code) + # # return type_code + + # elif jsonschema.allOf: + # return self.lang_code_generator.encapsulate_types(jsonschema.allOf) + # # self.add_object(path_str, type_code, type_code) + # # return type_name + + # raise Exception(f"type {jsonschema.type} is not supported") + + # def add_object(self, path_str: str, type_code: str, type_name: str): + # self.used_names.add(type_name) + # self.processed_objects[path_str] = { + # "code": type_code, + # "name": type_name, + # } + # print(f'debugzo21 {self.processed_objects[path_str]}') + # self.ordered_objects.append(type_code) + + # def type_name_from_path(self, path: List[str]) -> str: + # type_name = "" + # for item in reversed(path): + # type_name += item.title() if item.islower() else item + # if type_name not in self.used_names: + # return type_name + + # raise Exception(f"failed to generate unique name from path: {path}") diff --git a/_archive/openapi/generator/server/vlang/templates/enum.jinja b/_archive/openapi/generator/server/vlang/templates/enum.jinja new file mode 100644 index 0000000..f96878e --- /dev/null +++ b/_archive/openapi/generator/server/vlang/templates/enum.jinja @@ -0,0 +1,9 @@ +pub enum {{ type_name }}{ + {% for elem in enum -%} + {% if is_integer -%} + {{ number_to_words(elem) }} = {{ elem }} + {% else -%} + {{ elem }} + {% endif -%} + {% endfor %} +} \ No newline at end of file diff --git a/_archive/openapi/generator/server/vlang/templates/executor.jinja b/_archive/openapi/generator/server/vlang/templates/executor.jinja new file mode 100644 index 0000000..ad0b915 --- /dev/null +++ b/_archive/openapi/generator/server/vlang/templates/executor.jinja @@ -0,0 +1,77 @@ +pub struct {{ actor_executor_name }}{ +pub mut: + db &backend.Backend + redis &redisclient.Redis +} + +pub fn (mut executor {{ actor_executor_name }}) execute(rpc_msg_id string, rpc_msg_method string, rpc_msg_params_str string) { + raw_params := json2.raw_decode(rpc_msg_params_str) or{ + executor.return_error(rpc_msg_id, jsonrpc.invalid_params) + return + } + + params_arr := raw_params.arr() + + match rpc_msg_method { + {%- for method in methods %} + '{{method.name}}' { + {%- for param in method.params %} + {%- if generator.is_primitive(generator.get_param_type(method.name, param))%} + {{param.name}} := params_arr[{{loop.index0}}] as {{generator.get_param_type(method.name, param)}} + {%- else %} + {{param.name}} := json.decode({{generator.get_param_type(method.name, param)}}, params_arr[{{loop.index0}}].json_str()) or { + executor.return_error(rpc_msg_id, jsonrpc.invalid_request) + return + } + {%- endif %} + {%- endfor %} + + {%- if generator.get_method_return_type(method) == 'none' %} + executor.{{method.name}}_internal({{generator.get_method_params_as_args(method)}}) or { + executor.return_error(rpc_msg_id, jsonrpc.InnerJsonRpcError{ + code: 32000 + message: '${err}' + }) + return + } + + response := jsonrpc.JsonRpcResponse[string]{ + jsonrpc: '2.0.0' + id: rpc_msg_id + result: '' + } + {%- else %} + result := executor.{{method.name}}_internal({{generator.get_method_params_as_args(method)}}) or { + executor.return_error(rpc_msg_id, jsonrpc.InnerJsonRpcError{ + code: 32000 + message: '${err}' + }) + return + } + + response := jsonrpc.JsonRpcResponse[{{generator.get_method_return_type(method)}}]{ + jsonrpc: '2.0.0' + id: rpc_msg_id + result: result + } + {%- endif %} + + // put response in response queue + executor.redis.lpush(rpc_msg_id, response.to_json()) or { + println('failed to push response for ${rpc_msg_id} to redis queue: ${err}') + } + } + {%- endfor %} + else { + executor.return_error(rpc_msg_id, jsonrpc.method_not_found) + return + } + } +} + +pub fn (mut executor {{actor_executor_name}}) return_error(rpc_msg_id string, error jsonrpc.InnerJsonRpcError){ + response := jsonrpc.new_jsonrpcerror(rpc_msg_id, error) + executor.redis.lpush(rpc_msg_id, response.to_json()) or { + println('failed to push response for ${rpc_msg_id} to redis queue: ${err}') + } +} \ No newline at end of file diff --git a/_archive/openapi/generator/server/vlang/templates/handler.jinja b/_archive/openapi/generator/server/vlang/templates/handler.jinja new file mode 100644 index 0000000..cfc57ae --- /dev/null +++ b/_archive/openapi/generator/server/vlang/templates/handler.jinja @@ -0,0 +1,50 @@ +struct Handler { +pub mut: + db &backend.Backend + redis &redisclient.Redis + {% for actor in actors %} + {{actor}}_executor {{get_actor_executor_name(actor)}} + {%- endfor %} +} + +pub fn new(db_config backend.BackendConfig, redis_addr string) !Handler{ + db := backend.new(db_config)! + mut redis_client := redisclient.new([redis_addr])! + redis_client.selectdb(0)! + + return Handler{ + db: &db + redis: &redis_client + {%- for actor in actors %} + {{actor}}_executor: {{get_actor_executor_name(actor)}}{ + db: &db + redis: &redis_client + } + {%- endfor %} + } +} + +// handle handles an incoming JSON-RPC encoded message and returns an encoded response +pub fn (mut handler Handler) handle(id string, method string, params_str string) { + actor := method.all_before('.') + method_name := method.all_after('.') + + match actor { + {%- for actor in actors %} + '{{ actor }}' { + spawn (&handler.{{actor}}_executor).execute(id, method_name, params_str) + } + {%- endfor %} + else { + handler.return_error(id, jsonrpc.method_not_found) + return + } + } +} + +pub fn (mut handler Handler) return_error(rpc_msg_id string, error jsonrpc.InnerJsonRpcError){ + response := jsonrpc.new_jsonrpcerror(rpc_msg_id, error) + handler.redis.lpush(rpc_msg_id, response.to_json()) or { + println('failed to push response for ${rpc_msg_id} to redis queue: ${err}') + } +} diff --git a/_archive/openapi/generator/server/vlang/templates/handler_test.jinja b/_archive/openapi/generator/server/vlang/templates/handler_test.jinja new file mode 100644 index 0000000..2f4b524 --- /dev/null +++ b/_archive/openapi/generator/server/vlang/templates/handler_test.jinja @@ -0,0 +1,31 @@ +module myhandler + +import x.json2 +import rand +import freeflowuniverse.crystallib.baobab.backend + +fn test_handler(){ + db_config := backend.BackendConfig{ + name: 'myhandler' + secret: 'secret' + reset: true + db_type: .postgres + } + + mut handler := new(db_config, '127.0.0.1:6379')! + {% for method_name in method_names %} + do_request(mut handler, '{{method_name}}')! + {%- endfor %} +} + +fn do_request(mut handler Handler, method_name string) ! { + // TODO: edit input parameters + mut params := []json2.Any{} + params << "objid" + params << "blabla_name" + params_str := json2.Any(params).json_str() + + id := rand.string(6) + handler.handle(rand.string(6), method_name, json2.Any(params).json_str()) + println('request id: ${id}') +} \ No newline at end of file diff --git a/_archive/openapi/generator/server/vlang/templates/internal_actor_method.jinja b/_archive/openapi/generator/server/vlang/templates/internal_actor_method.jinja new file mode 100644 index 0000000..339eeb5 --- /dev/null +++ b/_archive/openapi/generator/server/vlang/templates/internal_actor_method.jinja @@ -0,0 +1,7 @@ +pub fn (mut executor {{ actor_executor_name }}) {{function_name}}({{method_params}}) !{{return_type}}{ + // context allows us to see who the user is and which groups the user is + // context also gives a logging feature + // context is linked to 1 circle + // context is linked to a DB (OSIS) + panic('implement') +} \ No newline at end of file diff --git a/_archive/openapi/generator/server/vlang/templates/internal_crud_methods.jinja b/_archive/openapi/generator/server/vlang/templates/internal_crud_methods.jinja new file mode 100644 index 0000000..588678e --- /dev/null +++ b/_archive/openapi/generator/server/vlang/templates/internal_crud_methods.jinja @@ -0,0 +1,28 @@ +pub fn (mut executor {{ actor_executor_name }}) {{variable_name}}_get_internal(id string) !{{type_name}}{ + json_str := executor.db.indexer.get_json(id, backend.RootObject{ + name: '{{type_name}}' + })! + + return json.decode({{type_name}}, json_str)! +} + +pub fn (mut executor {{ actor_executor_name }}) {{variable_name}}_set_internal({{variable_name}} {{type_name}}) !{ + if {{variable_name}}.oid != ''{ + executor.db.indexer.set(backend.RootObject{ + id: {{variable_name}}.oid + name: '{{type_name}}' + })! + } + + executor.db.indexer.new(backend.RootObject{ + name: '{{type_name}}' + })! +} + +pub fn (mut executor {{ actor_executor_name }}) {{variable_name}}_delete_internal(id string) !{ + executor.db.indexer.delete(id, backend.RootObject{ + name: '{{type_name}}' + })! +} + + diff --git a/_archive/openapi/generator/server/vlang/templates/method_param_struct.jinja b/_archive/openapi/generator/server/vlang/templates/method_param_struct.jinja new file mode 100644 index 0000000..8ee539d --- /dev/null +++ b/_archive/openapi/generator/server/vlang/templates/method_param_struct.jinja @@ -0,0 +1,5 @@ +pub struct {{method_param_struct_name}}{ + {% for param_name, param_type in params.items()%} + {{param_name}} {{param_type}} + {%- endfor %} +} \ No newline at end of file diff --git a/_archive/openapi/generator/server/vlang/templates/methods.jinja b/_archive/openapi/generator/server/vlang/templates/methods.jinja new file mode 100644 index 0000000..f0ff6fc --- /dev/null +++ b/_archive/openapi/generator/server/vlang/templates/methods.jinja @@ -0,0 +1,75 @@ +{% if method_example -%} +/* +Example: +{{ method_example }} +*/ +{% endif -%} + +{% if method_description -%} +/* +{{ method_description }} +*/ +{% endif -%} +pub fn {{ function_name }}({{ vlang_code_generator.get_method_params(method_params) }}) {{ method_result }}{ + mut conn := httpconnection.new( + name: 'openrpc_client' + url: '{{ base_url }}' + )! + + mut params := map[string]json2.Any{} + {% for param_name, param_type in method_params.items() -%} + {% if vlang_code_generator.is_primitive(param_type) %} + params["{{ param_name }}"] = {{ param_name }} + {% elif vlang_code_generator.is_vlang_array(param_type) %} + mut any_arr := []json2.Any{} + for item in {{ param_name }}{ + {% if vlang_code_generator.is_primitive(param_type[2:]) %} + any_arr << item + {% else %} + any_arr << json2.raw_decode(json2.encode(item))! + {% endif %} + } + params["{{ param_name }}"] = json2.Any(any_arr) + {%else %} + params["{{ param_name }}"] = json2.raw_decode(json2.encode({{ param_name }}))! + {% endif %} + {% endfor -%} + + mut payload := map[string]json2.Any{} + payload['jsonrpc'] = "2.0" + payload['id'] = 0 + payload['method'] = '{{ method_name }}' + payload['params'] = params + + response := conn.send(method: .post, data: json2.encode(payload){% if url_path -%}, prefix: '{{ url_path }}' {% endif -%})! + if !response.is_ok() { + return error('failed to make rpc request: (${response.code}) ${response.data}') + } + + {% if return_type != 'none' %} + mp := json2.raw_decode(response.data)!.as_map() + res := mp['result'] or { + return error('invalid jsonrpc result: ${response.data}') + } + + if res is json2.Null{ + return error('not found') + } + + {% if vlang_code_generator.is_primitive(return_type) %} + return res as {{return_type}} + {% elif vlang_code_generator.is_vlang_array(return_type) %} + mut res_arr := {{return_type}} + for item in res.arr() { + {% if vlang_code_generator.is_primitive(return_type[2:]) %} + res_arr << item as {{return_type}} + {% else %} + res_arr << json2.decode[{{return_type[2:]}}](item.json_str())! + {% endif %} + } + return res_arr + {%else %} + return json2.decode[{{return_type}}](res.json_str())! + {% endif -%} + {% endif %} +} \ No newline at end of file diff --git a/_archive/openapi/generator/server/vlang/templates/pre.jinja b/_archive/openapi/generator/server/vlang/templates/pre.jinja new file mode 100644 index 0000000..34f6cf2 --- /dev/null +++ b/_archive/openapi/generator/server/vlang/templates/pre.jinja @@ -0,0 +1,5 @@ +module {{module_name}} +{% for item in imports %} +import {{item}} +{%- endfor %} + diff --git a/_archive/openapi/generator/server/vlang/templates/struct.jinja b/_archive/openapi/generator/server/vlang/templates/struct.jinja new file mode 100644 index 0000000..a564525 --- /dev/null +++ b/_archive/openapi/generator/server/vlang/templates/struct.jinja @@ -0,0 +1,10 @@ +@[params] +pub struct {{ struct_name }}{ +pub mut: + {%- for property_name, property_info in properties.items() %} + {%- if property_info.description %} + // {{ property_info.description }} + {%- endif %} + {{ property_name }} {{ property_info.type_name }} + {%- endfor %} +} \ No newline at end of file diff --git a/_archive/openapi/generator/server/vlang/vlang.py b/_archive/openapi/generator/server/vlang/vlang.py new file mode 100644 index 0000000..e717c10 --- /dev/null +++ b/_archive/openapi/generator/server/vlang/vlang.py @@ -0,0 +1,231 @@ +from openapi_python_client.schema import OpenAPI, Schema, Reference +from jinja2 import Environment, FileSystemLoader +from typing import Dict, Any + +import os + + +script_dir = os.path.dirname(os.path.abspath(__file__)) +env = Environment(loader=FileSystemLoader(script_dir)) + +class VlangCodeGenerator: + def __init__(self, python_code: OpenAPI, output_dir: str) -> None: + self.python_code = python_code + self.output_dir = output_dir + self.struct_template = env.get_template("templates/struct.jinja") + + def generate(self): + """ + Main generation method to create V code. + """ + # Ensure the output directory exists + os.makedirs(self.output_dir, exist_ok=True) + + structs = self._generate_structs() + print('structs: ', structs) + # methods = self._generate_methods() + + # # Combine structs and methods into one file + vlang_code = structs + output_file = f"{self.output_dir}/generated.v" + + # Write to file + with open(output_file, "w") as file: + file.write(vlang_code) + print(f"Vlang code generated at {output_file}") + + def _generate_struct(self, struct_name: str, scheme: Schema | Reference) -> str: + properties = {} + code = "" + + for field_name, field in scheme.properties.items(): # type: ignore + v_type = self._convert_type(field.type) # type: ignore + + if field.type == 'object': # type: ignore + # Capitalize each part of the field name and create a nested struct name + nested_struct_name = ''.join(part.capitalize() for part in field_name.split("_")) + + # Generate the struct for the nested object + code += self._generate_struct(struct_name=nested_struct_name, scheme=field) + + # Update v_type to the newly generated nested struct name + v_type = nested_struct_name + + # Update the properties dictionary with type name and description + properties[field_name] = { + 'type_name': v_type, + 'description': field.description # type: ignore + } + + code += "\n" + code += self.struct_template.render( + struct_name=struct_name, + properties= properties # type: ignore + ) + code += "\n" + + return code + + def _generate_structs(self) -> str: + """ + Generate V structs from OpenAPI components with support for nested objects and arrays. + """ + if not self.python_code.components: + raise ValueError("No components found in spec") + + if not self.python_code.components.schemas: + raise ValueError("No schemas found in components") + + code = "" + + for struct_name, schema in self.python_code.components.schemas.items(): + code += self._generate_struct(struct_name=struct_name, scheme=schema) + + return code + + # structs_code = [] + # for schema_name, schema in self.python_code.components.schemas.items(): + # fields = [] + # for field_name, field in schema.properties.items(): # type: ignore + # if field.type == "object": # type: ignore + # # Generate a nested struct + # parts = field_name.split("_") + # nested_struct_name = "" + # for part in parts: + # nested_struct_name += part.capitalize() + # nested_struct = self._generate_struct_from_object(nested_struct_name, field) # type: ignore + # structs_code.append(nested_struct) + # fields.append(f"\t{field_name} {nested_struct_name}") + # print(f"Generated struct for {nested_struct_name}") + # elif field.type == "array": # type: ignore + # # Handle arrays with proper type conversion for items + # item_type = self._convert_type(field.items.type) # type: ignore + # fields.append(f"\t{field_name} []{item_type}") + # else: + # # Convert JSON schema type to V type + # v_type = self._convert_type(field.type) # type: ignore + # fields.append(f"\t{field_name} {v_type}") + + # # Construct struct + # struct_code = f"pub struct {schema_name} {{\n" + "\n".join(fields) + "\n}" + # structs_code.append(struct_code) + # print(f"Generated struct for {schema_name}") + + # return "\n\n".join(structs_code) + + # def _generate_struct_from_object(self, struct_name: str, schema: dict) -> str: + # """ + # Generate a nested struct from an object schema. + # """ + # fields = [] + # for field_name, field in schema.properties.items(): # type: ignore + # v_type = self._convert_type(field.type) # type: ignore + # fields.append(f"\t{field_name} {v_type}") + + # return f"struct {struct_name} {{\n" + "\n".join(fields) + "\n}" + + # def _generate_methods(self) -> str: + # """ + # Generate V methods based on OpenAPI paths and operations. + # """ + # if not self.python_code.paths: + # raise ValueError("No paths found in spec") + + # methods_code = [] + # for path, path_item in self.python_code.paths.items(): + # # Explicitly check for HTTP method attributes in PathItem + # for http_method in ["get", "post", "put", "delete", "patch", "options", "head"]: + # operation = getattr(path_item, http_method, None) + # if operation: + # # Generate method name and parameters + # method_name = self._generate_method_name(http_method, path) + # parameters = self._generate_method_parameters(operation.parameters) + # request_body = self._generate_request_body(operation.request_body) + # response_type = self._generate_response_type(operation.responses) + + # # Combine method arguments + # method_arguments = parameters + # if request_body: + # method_arguments += f", {request_body}" if parameters else request_body + + # # Generate the method code + # method_code = f"fn {method_name}({method_arguments}) {response_type} {{\n" + # method_code += f"\t// TODO: Implement the {http_method.upper()} request to {path}\n" + # method_code += "\t// Use the generated structs for request/response bodies\n" + # method_code += "}\n" + # methods_code.append(method_code) + + # print(f"Generated method for {http_method.upper()} {path}") + + # return "\n\n".join(methods_code) + + # def _generate_method_name(self, http_method: str, path: str) -> str: + # """ + # Generate a method name from the HTTP method and path. + # """ + # # Remove leading/trailing slashes and replace `/` with `_` + # sanitized_path = path.strip("/").replace("/", "_").replace("{", "").replace("}", "") + # return f"{http_method.lower()}_{sanitized_path}" + + # def _generate_method_parameters(self, parameters) -> str: + # if not parameters: + # return "" + + # param_list = [] + # for param in parameters: + # param_name = param.name + # param_schema = getattr(param, "schema", None) + # print('param_name: ', param_name) + # print('param_schema: ', param_schema) + # # if param_schema and param_schema.type: + # # param_type = self._convert_type(param_schema.type) + # # param_list.append(f"{param_name} {param_type}") + + # return ", ".join(param_list) + + + # def _generate_request_body(self, request_body) -> str: + # """ + # Generate a function parameter for the request body if present. + # """ + # if not request_body or not request_body.content: + # return "" + + # # Assume application/json content type + # json_schema = request_body.content.get("application/json") + # if not json_schema or not json_schema.schema: + # return "" + + # print('body_type: ', json_schema) + # # body_type = json_schema.schema.ref.split("/")[-1] # Extract the schema name + # return f"body {json_schema}" + + # def _generate_response_type(self, responses) -> str: + # """ + # Determine the return type of the method based on responses. + # """ + # if not responses: + # return "void" + + # for status_code, response in responses.items(): + # if response.content and "application/json" in response.content: + # json_schema = response.content["application/json"].schema + # print('json_schema: ', json_schema) + # # if json_schema and json_schema.ref: + # # return json_schema.ref.split("/")[-1] # Extract schema name + + # return "void" + + def _convert_type(self, json_type: str) -> str: + """ + Map JSON schema types to Vlang types. + """ + type_mapping = { + "string": "string", + "integer": "int", + "number": "f64", + "boolean": "bool", + "array": "[]", + } + return type_mapping.get(json_type, "string") # Default to `string` + diff --git a/_archive/openapi/openapi.py b/_archive/openapi/openapi.py new file mode 100644 index 0000000..4159335 --- /dev/null +++ b/_archive/openapi/openapi.py @@ -0,0 +1,50 @@ +from openapi_python_client.schema import OpenAPI + +import json +import yaml + +from generator.server.vlang.vlang import VlangCodeGenerator + +class OpenApiCodeGenerator: + def __init__(self, lang: str, spec_file: str, output_dir: str): + self.lang = lang + self.spec_file = spec_file + self.output_dir = output_dir + + def _read_file(self): + """ + Read the OpenAPI spec file. + """ + if self.spec_file.endswith(".json"): + with open(self.spec_file, "r") as file: + return file.read() # Return raw JSON string + elif self.spec_file.endswith(".yaml"): + with open(self.spec_file, "r") as file: + # Convert YAML to JSON string for compatibility + return json.dumps(yaml.safe_load(file)) + else: + raise ValueError("Unsupported file format") + + + def generate(self): + """ + Main generation logic for code based on the OpenAPI spec. + """ + file_content = self._read_file() + openapi = OpenAPI.model_validate_json(file_content) + if self.lang == "vlang": + vlang_code_generator = VlangCodeGenerator( + python_code=openapi, output_dir=self.output_dir + ) + vlang_code_generator.generate() + elif self.lang == "python": + print("Python code generation not implemented yet.") + + +if __name__ == "__main__": + s = OpenApiCodeGenerator( + lang="vlang", + spec_file="/home/thunder/work/codescalers/github/hero_server_python/lib/openapi/schema.json", + output_dir="./output" + ) + s.generate() diff --git a/_archive/openapi/schema.json b/_archive/openapi/schema.json new file mode 100644 index 0000000..8134607 --- /dev/null +++ b/_archive/openapi/schema.json @@ -0,0 +1,136 @@ +{ + "openapi": "3.0.3", + "info": { + "title": "User Management API", + "version": "1.0.0", + "description": "A simple API to manage users" + }, + "servers": [ + { + "url": "https://api.example.com/v1", + "description": "Production server" + } + ], + "paths": { + "/users": { + "get": { + "summary": "List all users", + "responses": { + "200": { + "description": "A list of users", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/User" + } + } + } + } + } + } + }, + "post": { + "summary": "Create a new user", + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/User" + } + } + } + }, + "responses": { + "201": { + "description": "User created successfully" + } + } + } + }, + "/users/{userId}": { + "get": { + "summary": "Get a user by ID", + "parameters": [ + { + "name": "userId", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "description": "User details", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/User" + } + } + } + }, + "404": { + "description": "User not found" + } + } + } + } + }, + "components": { + "schemas": { + "User": { + "type": "object", + "properties": { + "id": { + "type": "string", + "example": "123", + "description": "The unique identifier for the user" + }, + "name": { + "type": "string", + "example": "John Doe" + }, + "email": { + "type": "string", + "example": "john.doe@example.com" + }, + "user_profile": { + "type": "object", + "properties": { + "age": { + "type": "integer" + }, + "address": { + "type": "string" + } + } + } + }, + "required": [ + "id", + "name", + "email" + ] + }, + "UserBalance": { + "type": "object", + "properties": { + "id": { + "type": "string", + "example": "123", + "description": "The unique identifier for the user" + }, + "name": { + "type": "string", + "example": "John Doe" + } + } + } + } + } +} \ No newline at end of file diff --git a/_archive/openapi/schema.yml b/_archive/openapi/schema.yml new file mode 100644 index 0000000..d592a03 --- /dev/null +++ b/_archive/openapi/schema.yml @@ -0,0 +1,68 @@ +openapi: 3.0.3 +info: + title: User Management API + version: 1.0.0 + description: A simple API to manage users +servers: + - url: https://api.example.com/v1 + description: Production server +paths: + /users: + get: + summary: List all users + responses: + '200': + description: A list of users + content: + application/json: + schema: + type: array + items: + $ref: '#/components/schemas/User' + post: + summary: Create a new user + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/User' + responses: + '201': + description: User created successfully + /users/{userId}: + get: + summary: Get a user by ID + parameters: + - name: userId + in: path + required: true + schema: + type: string + responses: + '200': + description: User details + content: + application/json: + schema: + $ref: '#/components/schemas/User' + '404': + description: User not found +components: + schemas: + User: + type: object + properties: + id: + type: string + example: '123' + name: + type: string + example: John Doe + email: + type: string + example: john.doe@example.com + required: + - id + - name + - email diff --git a/_archive/openrpc/__init__.py b/_archive/openrpc/__init__.py new file mode 100644 index 0000000..3b8226d --- /dev/null +++ b/_archive/openrpc/__init__.py @@ -0,0 +1,23 @@ +from heroserver.openrpc.factory import openrpc_dict, openrpc_spec, openrpc_spec_write +from heroserver.openrpc.model.openrpc_spec import OpenRPCSpec + + +def init_openrpc_dict(path: str = "") -> dict: + """ + return openrpc dict + """ + return openrpc_dict(path=path) + + +def init_openrpc_spec_write(path: str = "", dest: str = "") -> str: + """ + parse & write the specs to the destination, the path will be ${destination}/openrpc_spec.json" and .../openrpc_spec.yaml" + """ + return openrpc_spec_write(path=path, dest=dest) + + +def init_openrpc_spec(path: str = "") -> OpenRPCSpec: + """ + return openrpc object + """ + return openrpc_spec(path=path) diff --git a/_archive/openrpc/factory.py b/_archive/openrpc/factory.py new file mode 100644 index 0000000..76544d6 --- /dev/null +++ b/_archive/openrpc/factory.py @@ -0,0 +1,58 @@ +import json +import os + +import yaml # type: ignore + +from heroserver.openrpc.model.openrpc_spec import ( + OpenRPCSpec, +) +from heroserver.openrpc.parser.parser import parser + + +def openrpc_spec_write(path: str = "", dest: str = "") -> str: + """ + parse & write the specs + dest is the path where we write the openrpc specs + returns filename = f"{dest}/openrpc_spec.json" + """ + data = openrpc_dict(path=path) + + out = json.dumps(data, indent=2) + # print(out) + + dest = os.path.expanduser(dest) + os.makedirs(dest, exist_ok=True) + + filename = f"{dest}/openrpc_spec.json" + # Write the spec to the file + with open(filename, "w") as f: + f.write(out) + print(f"OpenRPC specification (JSON) has been written to: {filename}") + + yaml_filename = f"{dest}/openrpc_spec.yaml" + with open(yaml_filename, "w") as f: + yaml.dump(data, f, sort_keys=False) + print(f"OpenRPC specification (YAML) has been written to: {yaml_filename}") + + return filename + + +def openrpc_spec(path: str = "") -> OpenRPCSpec: + """ + return openrpc object starting from spec path + this is our python representation of OpenRPCSpec + """ + data = openrpc_dict(path=path) + + spec_object = OpenRPCSpec.load(data) + + return spec_object + + +def openrpc_dict(path: str = "") -> dict: + """ + return openrpc dict starting from spec path + """ + data = parser(path=path) + + return data diff --git a/_archive/openrpc/factory_model.py b/_archive/openrpc/factory_model.py new file mode 100644 index 0000000..33cea32 --- /dev/null +++ b/_archive/openrpc/factory_model.py @@ -0,0 +1,91 @@ +import os +from pathlib import Path +from typing import Dict, Optional + +from heroserver.openrpc.factory import openrpc_dict, openrpc_spec, openrpc_spec_write +from heroserver.openrpc.model.openrpc_spec import OpenRPCSpec + + +class OpenRPCFactory: + def __init__(self, generation_path: str, spec_path: str): + """ + Initialize the OpenRPCFactory with a generation path and a spec path. + + :param generation_path: The path where the generation will occur. + :param spec_path: The path to the OpenRPC specification (in vlang format). + """ + import os.path + + self.actors: Dict[str, OpenRPCActor] = {} + self.generation_path: str = os.path.expanduser(generation_path) + self.spec_path: str = os.path.expanduser(spec_path) + + def add_actor(self, actor: "OpenRPCActor"): + self.actors[actor.name] = actor + + def get_actor(self, name: str) -> Optional["OpenRPCActor"]: + return self.actors.get(name) + + def remove_actor(self, name: str) -> None: + self.actors.pop(name, None) + + def scan(self): + for subdir in os.listdir(self.spec_path): + subdir_path = os.path.join(self.spec_path, subdir) + if os.path.isdir(subdir_path): + actor = OpenRPCActor(name=subdir, path_ourspec=subdir_path, parent=self) + self.add_actor(actor) + + +class OpenRPCActor: + def __init__(self, name: str, path_ourspec: str, parent: OpenRPCFactory): + self.name: str = name + self.path_ourspec: str = path_ourspec # the directory where we parse & generate + self.path_openrpc: str = os.path.join(parent.generation_path, self.name) # the file which represents openrpc spec + self.parent = parent + + self.openrpc_spec: OpenRPCSpec = openrpc_spec(path=path_ourspec) + + def openrpc_dict(self) -> dict: + return openrpc_dict(path=self.path_ourspec) + + def openrpc_spec_write(self) -> dict: + return openrpc_spec_write(path=self.path_ourspec, dest=self.path_openrpc) + + def openrpc_spec_yaml_path(self) -> str: + yaml_path = os.path.join(self.path_openrpc, "openrpc_spec.yaml") + if not os.path.exists(yaml_path): + self.openrpc_spec_write() + return yaml_path + + def openrpc_spec_json_path(self) -> str: + json_path = os.path.join(self.path_openrpc, "openrpc_spec.json") + if not os.path.exists(json_path): + self.openrpc_spec_write() + return json_path + + def generate_rest_server(self): + from heroserver.openrpc.generator.rest_server.python.rest_server_generator import RestServerGenerator + + rest_server_generator = RestServerGenerator(self.openrpc_spec, Path(self.path_openrpc)) + rest_server_generator.generate() + + +def new(generation_path: str, spec_path: str) -> OpenRPCFactory: + """ + Create a new OpenRPCFactory and return OpenRPCActors, starting from a path. + + :param generation_path: The path where the generation will occur. + :param spec_path: The path to the OpenRPC specification. + :return: An instance of OpenRPCFactory with actors initialized. + """ + factory = OpenRPCFactory(generation_path=generation_path, spec_path=spec_path) + factory.scan() + return factory + + +# Usage example: +# spec = OpenRPCSpec(...) # Create an OpenRPCSpec instance +# actor = OpenRPCActor("MyActor", "/path/to/actor", spec, "/path/to/openrpc.json") +# actors = OpenRPCActors() +# actors.add_actor(actor) diff --git a/_archive/openrpc/generator/__init__.py b/_archive/openrpc/generator/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/_archive/openrpc/generator/client/generator.py b/_archive/openrpc/generator/client/generator.py new file mode 100644 index 0000000..3363899 --- /dev/null +++ b/_archive/openrpc/generator/client/generator.py @@ -0,0 +1,77 @@ +from typing import Dict, List, Optional, Union +from urllib.parse import urlparse + +from heroserver.openrpc.generator.code.lang_code_generator import LangCodeGenerator +from heroserver.openrpc.generator.model.model_generator import ModelGenerator + +from heroserver.openrpc.model.common import ( + ContentDescriptorObject, + ReferenceObject, +) +from heroserver.openrpc.model.openrpc_spec import OpenRPCSpec + + +class ClientGenerator: + def __init__( + self, + spec: OpenRPCSpec, + lang_code_generator: LangCodeGenerator, + output_file: str, + ) -> None: + self.spec = spec + self.model_generator = ModelGenerator(spec, lang_code_generator) + self.lang_code_generator = lang_code_generator + self.output_file = output_file + + def generate_client(self): + code_pre = self.lang_code_generator.generate_imports() + code_models = self.model_generator.generate_models() + code_methods = self.generate_methods() + + # Write the generated code to a file + with open(self.output_file, "w") as file: + file.write(code_pre) + file.write("\n") + file.write(code_models) + file.write("\n") + file.write(code_methods) + + print(f"Generated API code has been written to {self.output_file}") + + def generate_methods(self): + servers = self.spec.servers + base_url = "http://localhost:8000" + if servers: + base_url = servers[0].url + + url = urlparse(base_url) + methods = [] + for method_spec in self.spec.methods: + params: Dict[str, str] = {} + for param in method_spec.params: + params[param.name] = self.model_generator.jsonschema_to_type( + ["methods", method_spec.name, "params", param.name], + param.schema, + ) + + return_type = self.method_result_return_type(["methods", method_spec.name, "result"], method_spec.result) + methods.append(self.lang_code_generator.generate_method(method_spec, url, params, return_type)) + + return "\n\n".join(methods) + + def method_result_return_type( + self, + path: List[str], + method_result: Optional[Union[ContentDescriptorObject, ReferenceObject]], + ) -> str: + if not method_result: + type_name = "" + + if isinstance(method_result, ContentDescriptorObject): + schema = method_result.schema + type_name = self.model_generator.jsonschema_to_type(path, schema) + + elif isinstance(method_result, ReferenceObject): + type_name = self.model_generator.jsonschema_to_type(path, method_result) + + return type_name diff --git a/_archive/openrpc/generator/code/golang/golang_code_generator.py b/_archive/openrpc/generator/code/golang/golang_code_generator.py new file mode 100644 index 0000000..a95180c --- /dev/null +++ b/_archive/openrpc/generator/code/golang/golang_code_generator.py @@ -0,0 +1,177 @@ +import json +import os +from typing import Any, Dict, List +from urllib.parse import ParseResult + +import inflect +from jinja2 import Environment, FileSystemLoader +from heroserver.openrpc.generator.lang_code_generator import LangCodeGenerator, PropertyInfo + +from heroserver.openrpc.model.common import ( + ReferenceObject, + SchemaObject, +) +from heroserver.openrpc.model.methods import MethodObject +from heroserver.openrpc.model.openrpc_spec import ( + OpenRPCSpec, +) + +script_dir = os.path.dirname(os.path.abspath(__file__)) +env = Environment(loader=FileSystemLoader(script_dir)) +inflector = inflect.engine() + + +class GolangCodeGenerator(LangCodeGenerator): + def __init__(self) -> None: + self.struct_template = env.get_template("templates/struct.jinja") + self.methods_template = env.get_template("templates/methods.jinja") + self.pre_template = env.get_template("templates/pre.jinja") + + def generate_imports(self) -> str: + return self.pre_template.render( + package_name="rpcclient", + imports=[ + "net/http", + "github.com/mitchellh/mapstructure", + "encoding/json", + "bytes", + "fmt", + "io", + ], + ) + + def generate_object( + self, + type_name: str, + properties: Dict[str, PropertyInfo], + ): + return self.struct_template.render(generator=self, type_name=type_name, properties=properties) + + def generate_method( + self, + method_spec: MethodObject, + url: ParseResult, + params: Dict[str, str], + return_type: str, + ) -> str: + function_name = self.get_camel_case_name(method_spec.name) + method_name = method_spec.name + method_result = self.type_to_method_result(return_type) + method_description = "" + if method_spec.description: + method_description = method_spec.description.replace("'", " ") + + method_example = "" + if method_spec.examples and len(method_spec.examples) > 0: + method_example = json.dumps(method_spec.examples[0], indent=4) + + method_code = self.methods_template.render( + generator=self, + url=url.geturl(), + function_name=function_name, + method_name=method_name, + method_params=params, + method_result=method_result, + return_type=return_type, + method_description=method_description, + method_example=method_example, + ) + + return method_code + + def string_primitive(self) -> str: + return "string" + + def integer_primitive(self) -> str: + return "int64" + + def number_primitive(self) -> str: + return "float64" + + def null_primitive(self) -> str: + return "nil" + + def bool_primitive(self) -> str: + return "bool" + + def array_of_type(self, type_name: str) -> str: + return f"[]{type_name}" + + def generate_multitype(self, types: List[str]) -> str: + if len(types) > 2: + raise Exception("only a type and null are supported with anyOf/allOf keyword") + + if len(types) == 1: + return types[0] + + if types[0] == "nil": + return f"*{types[1]}" + if types[1] == "nil": + return f"*{types[0]}" + + raise Exception("only a type and null are supported with anyOf/allOf keyword") + + def encapsulate_types(self, path: List[str], types: List[SchemaObject | ReferenceObject]) -> str: + raise Exception("no support for allOf keyword") + + def generate_enum(self, enum: List[Any], type_name: str) -> str: + if all(isinstance(elem, str) for elem in enum): + return self.string_primitive() + + elif all(isinstance(elem, int) for elem in enum): + return self.integer_primitive() + + else: + raise Exception(f"failed to generate enum code for: {enum}") + + def type_to_method_result(self, type_name: str) -> str: + method_result = "error" + if len(type_name) > 0 and type_name != "nil": + method_result = f"({type_name}, error)" + + return method_result + + def is_primitive(self, type: str) -> bool: + return type in ["int64", "float64", "int", "bool", "string"] + + def is_array(self, type: str) -> bool: + return type.startswith("[]") + + def get_method_params(self, method_params: Dict[str, str]) -> str: + return ", ".join([f"{param_name} {param_type}" for param_name, param_type in method_params.items()]) + + def get_camel_case_name(self, method_name: str) -> str: + return "".join([item.title() for item in method_name.split("_")]) + + def get_default_return_with_error(self, return_type: str, error_statement: str) -> str: + if return_type == "nil": + return error_statement + + if return_type == "string": + return f'"", {error_statement}' + + if return_type == "bool": + return f"false, {error_statement}" + + if return_type == "float64" or return_type == "int64": + return f"0, {error_statement}" + + return f"{return_type}{{}}, {error_statement}" + + +# main() +if __name__ == "__main__": + from heroserver.openrpc.generator.generator import ClientGenerator + from heroserver.openrpc.parser.parser import parser + + data = parser(path="/root/code/git.threefold.info/projectmycelium/hero_server/generatorexamples/example1/specs/storymanager") + + spec_object = OpenRPCSpec.load(data) + golang_code_generator = GolangCodeGenerator() + generator = ClientGenerator( + spec_object, + golang_code_generator, + "/tmp/go_client_new.go", + ) + + generator.generate_client() diff --git a/_archive/openrpc/generator/code/golang/templates/methods.jinja b/_archive/openrpc/generator/code/golang/templates/methods.jinja new file mode 100644 index 0000000..5fc0a86 --- /dev/null +++ b/_archive/openrpc/generator/code/golang/templates/methods.jinja @@ -0,0 +1,92 @@ +{% if method_example -%} +/* +Example: +{{ method_example }} +*/ +{% endif -%} + +{% if method_description -%} +/* +{{ method_description }} +*/ +{% endif -%} +func {{ function_name }}({{ generator.get_method_params(method_params) }}) {{ method_result }} { + params := map[string]interface{}{} + {%- for param_name, param_type in method_params.items() %} + params["{{param_name}}"] = {{param_name}} + {%- endfor %} + + payload := map[string]interface{}{} + payload["jsonrpc"] = "2.0" + payload["id"] = 0 + payload["method"] = "{{ method_name }}" + payload["params"] = params + + payloadBytes, err := json.Marshal(payload) + if err != nil{ + return {{generator.get_default_return_with_error(return_type, 'err')}} + } + + resp, err := http.Post("{{url}}", "application/json", bytes.NewBuffer(payloadBytes)) + if err != nil{ + return {{generator.get_default_return_with_error(return_type, 'fmt.Errorf("failed to make post request: %w", err)')}} + } + + if resp.StatusCode >= 400{ + return {{generator.get_default_return_with_error(return_type, 'fmt.Errorf("request failed with status %d: %s", resp.StatusCode, resp.Status)')}} + } + + {%- if return_type != 'nil' %} + defer resp.Body.Close() + body, err := io.ReadAll(resp.Body) + if err != nil{ + return {{generator.get_default_return_with_error(return_type, 'fmt.Errorf("failed to read response body: %w", err)')}} + } + + mp := map[string]interface{}{} + if err := json.Unmarshal(body, &mp); err != nil{ + return {{generator.get_default_return_with_error(return_type, 'fmt.Errorf("failed to decode response body: %w", err)')}} + } + + result, ok := mp["result"] + if !ok { + return {{generator.get_default_return_with_error(return_type, 'fmt.Errorf("invalid jsonrpc result: %v", mp)')}} + } + + if result == nil { + {%- if return_type == 'nil '%} + return {{generator.get_default_return_with_error(return_type, 'nil')}} + {%- else %} + return {{generator.get_default_return_with_error(return_type, 'fmt.Errorf("invalid jsonrpc result: {{return_type}} was expected but found nil")')}} + {%- endif %} + } + + {%- if generator.is_primitive(return_type) %} + return result.({{return_type}}), nil + {%- elif generator.is_array(return_type) %} + resSlice := {{return_type}}{} + for item := range result.([]intreface{}) { + {%- if generator.is_primitive(return_type[2:]) %} + resSlice = append(resSlice, item.({{return_type[2:]}})) + {%- else %} + tmp := {{return_type[2:]}}{} + if err := mapstructure.Decode(item, &tmp); err != nil{ + return {{generator.get_default_return_with_error(return_type, 'fmt.Errorf("failed to decode result: %w", err)')}} + } + + resSlice = append(resSlice, tmp) + {%- endif %} + } + return resSlice, nil + {%- else %} + ret := {{return_type}}{} + if err := mapstructure.Decode(result, &ret); err != nil{ + return {{generator.get_default_return_with_error(return_type, 'fmt.Errorf("failed to decode result: %w", err)')}} + } + + return ret, nil + {%- endif %} + {%- else %} + return nil + {%- endif %} +} \ No newline at end of file diff --git a/_archive/openrpc/generator/code/golang/templates/pre.jinja b/_archive/openrpc/generator/code/golang/templates/pre.jinja new file mode 100644 index 0000000..74a8810 --- /dev/null +++ b/_archive/openrpc/generator/code/golang/templates/pre.jinja @@ -0,0 +1,5 @@ +package {{package_name}} +{% for item in imports %} +import "{{item}}" +{%- endfor %} + diff --git a/_archive/openrpc/generator/code/golang/templates/struct.jinja b/_archive/openrpc/generator/code/golang/templates/struct.jinja new file mode 100644 index 0000000..05701cd --- /dev/null +++ b/_archive/openrpc/generator/code/golang/templates/struct.jinja @@ -0,0 +1,8 @@ +type {{type_name}} struct{ + {%- for property_name, property_info in properties.items() %} + {%- if property_info.description %} + // {{ property_info.description }} + {%- endif %} + {{ generator.get_camel_case_name(property_name) }} {{ property_info.type_name }} `json:"{{property_name}}"` + {%- endfor%} +} \ No newline at end of file diff --git a/_archive/openrpc/generator/code/lang_code_generator.py b/_archive/openrpc/generator/code/lang_code_generator.py new file mode 100644 index 0000000..ae0fe09 --- /dev/null +++ b/_archive/openrpc/generator/code/lang_code_generator.py @@ -0,0 +1,97 @@ +from abc import ABC, abstractmethod +from typing import Any, Dict, List, Optional, Union +from urllib.parse import ParseResult + +from heroserver.openrpc.model.common import ( + ReferenceObject, + SchemaObject, +) +from heroserver.openrpc.model.methods import MethodObject + + +class PropertyInfo: + def __init__( + self, + name: str, + type_name: str, + description: Optional[str] = None, + example: Optional[str] = None, + ) -> None: + self.name = name + self.type_name = type_name + self.description = description + self.example = example + + +class LangCodeGenerator(ABC): + @abstractmethod + def generate_imports(self) -> str: + pass + + @abstractmethod + def generate_object( + self, + type_name: str, + properties: Dict[str, PropertyInfo], + ): + pass + + @abstractmethod + def generate_method( + self, + method_spec: MethodObject, + url: ParseResult, + params: Dict[str, str], + return_type: str, + ) -> str: + pass + + @abstractmethod + def string_primitive(self) -> str: + pass + + @abstractmethod + def integer_primitive(self) -> str: + pass + + @abstractmethod + def number_primitive(self) -> str: + pass + + @abstractmethod + def null_primitive(self) -> str: + pass + + @abstractmethod + def bool_primitive(self) -> str: + pass + + @abstractmethod + def is_primitive(self, type_name: str) -> bool: + pass + + @abstractmethod + def generate_multitype(self, path: List[str], types: List[Union[SchemaObject, ReferenceObject]]) -> str: + """handles `anyOf` and `oneOf` in a json schema""" + pass + + @abstractmethod + def array_of_type(self, type_name: str) -> str: + pass + + @abstractmethod + def encapsulate_types(self, path: List[str], types: List[Union[SchemaObject, ReferenceObject]]) -> str: + """handles `allOf` in a json schema""" + pass + + @abstractmethod + def generate_enum(self, enum: List[Any], type_name: str) -> str: + pass + + @abstractmethod + def type_to_method_result(self, type_name: str) -> str: + """ + convert type to method result + - type_name can be empty + """ + pass diff --git a/_archive/openrpc/generator/code/python/python_code_generator.py b/_archive/openrpc/generator/code/python/python_code_generator.py new file mode 100644 index 0000000..07a799c --- /dev/null +++ b/_archive/openrpc/generator/code/python/python_code_generator.py @@ -0,0 +1,205 @@ +import json +import os +from typing import Any, Dict, List +from urllib.parse import ParseResult + +import inflect +from jinja2 import Environment, FileSystemLoader + +from heroserver.openrpc.generator.code.lang_code_generator import LangCodeGenerator, PropertyInfo +from heroserver.openrpc.model.common import ( + ReferenceObject, + SchemaObject, +) +from heroserver.openrpc.model.methods import MethodObject +from heroserver.openrpc.model.openrpc_spec import ( + OpenRPCSpec, +) + +script_dir = os.path.dirname(os.path.abspath(__file__)) +env = Environment(loader=FileSystemLoader(script_dir)) +inflector = inflect.engine() + +STRING_PRIMITIVE = "str" +INT_PRIMITIVE = "int" +FLOAT_PRIMITIVE = "float" +BOOL_PRMITIVE = "bool" +NONE_PRIMITIVE = "None" + + +class PythonCodeGenerator(LangCodeGenerator): + def __init__(self) -> None: + self.class_template = env.get_template("templates/class.jinja") + self.enum_template = env.get_template("templates/enum.jinja") + self.method_template = env.get_template("templates/method.jinja") + self.pre_template = env.get_template("templates/pre.jinja") + + def generate_imports(self) -> str: + return self.pre_template.render() + + def generate_object( + self, + type_name: str, + properties: Dict[str, PropertyInfo], + ): + # for name, info in properties.items(): + # info["load_code"] = self.generate_load_code(name, info['type'], 'data', f'data["{name}"]') + + return self.class_template.render(python_code_generator=self, class_name=type_name, properties=properties) + + def generate_load_code(self, name: str, type_name: str, data_source: str, load_param: str) -> str: + if type_name.startswith("Optional"): + type_name = type_name.removeprefix("Optional[").removesuffix("]") + return f'({self.generate_load_code(name, type_name, data_source)} if "{name}" in {data_source} else None)' + + if type_name.startswith("List"): + type_name = type_name.removeprefix("List[").removesuffix("]") + if self.is_primitive(type_name): + return f'{data_source}.get("{name}")' + return f'[{self.generate_load_code(name, type_name, data_source, 'item')} for item in {data_source}.get("{name}", [])]' + + if self.is_primitive(type_name): + return f'{data_source}.get("{name}")' + + return f"{type_name}.load({load_param})" + + def generate_method( + self, + method_spec: MethodObject, + url: ParseResult, + params: Dict[str, str], + return_type: str, + ) -> str: + function_name = method_spec.name.lower().replace(".", "_") + method_name = method_spec.name + method_result = self.type_to_method_result(return_type) + method_description = "" + if method_spec.description: + method_description = method_spec.description.replace("'", " ") + method_description = method_description.replace("\n", "\n# ") + + method_example = "" + if method_spec.examples and len(method_spec.examples) > 0: + method_example = json.dumps(method_spec.examples[0], indent=4) + method_example.replace("\n", "\n#") + + method_code = self.method_template.render( + python_code_generator=self, + base_url=f"{url.scheme}://{url.netloc}", + url_path=url.path, + function_name=function_name, + method_name=method_name, + method_params=params, + method_result=method_result, + return_type=return_type, + method_description=method_description, + method_example=method_example, + ) + + return method_code + + def string_primitive(self) -> str: + return STRING_PRIMITIVE + + def integer_primitive(self) -> str: + return INT_PRIMITIVE + + def number_primitive(self) -> str: + return FLOAT_PRIMITIVE + + def null_primitive(self) -> str: + return NONE_PRIMITIVE + + def bool_primitive(self) -> str: + return BOOL_PRMITIVE + + def array_of_type(self, type_name: str) -> str: + return f"List[{type_name}]" + + def generate_multitype(self, types: List[str]) -> str: + if len(types) > 2: + raise Exception("only a type and null are supported with anyOf/allOf keyword") + + if len(types) == 1: + return types[0] + + if types[0] == NONE_PRIMITIVE: + return f"Optional[{types[1]}]" + if types[1] == NONE_PRIMITIVE: + return f"Optional[{types[0]}]" + + raise Exception("only a type and null are supported with anyOf/allOf keyword") + + def encapsulate_types(self, path: List[str], types: List[SchemaObject | ReferenceObject]) -> str: + raise Exception("no support for allOf keyword") + + def generate_enum(self, enum: List[Any], type_name: str) -> str: + if all(isinstance(elem, str) for elem in enum): + # enum of strings + return self.enum_template.render( + enum=enum, + type_name=type_name, + number_to_words=inflector.number_to_words, + ) + + elif all(isinstance(elem, int) for elem in enum): + # enum of integers + return self.enum_template.render( + is_integer=True, + enum=enum, + type_name=type_name, + number_to_words=inflector.number_to_words, + ) + + else: + raise Exception(f"failed to generate enum code for: {enum}") + + def type_to_method_result(self, type_name: str) -> str: + return type_name + + def get_method_params(self, method_params: Dict[str, str]) -> str: + return ", ".join([f"{param_name}: {param_type}" for param_name, param_type in method_params.items()]) + + def is_primitive(self, type_name: str) -> bool: + return type_name in [STRING_PRIMITIVE, INT_PRIMITIVE, FLOAT_PRIMITIVE, BOOL_PRMITIVE] or any( + type_name.startswith(end) for end in ["List", "Optional", "Union"] + ) + + def get_pydantic_field_params(self, prop_info: PropertyInfo) -> str: + field_str = "" + if prop_info.type_name.startswith("Optional"): + field_str = "None" + else: + field_str = "..." + + if prop_info.description: + field_str += f', description="{prop_info.description}"' + + if prop_info.example: + if isinstance(prop_info.example, str): + example_formatted = f'"{prop_info.example}"' + else: + example_formatted = prop_info.example + field_str += f", examples=[{example_formatted}]" + + return f"Field({field_str})" + + +# main() +if __name__ == "__main__": + import yaml + + from heroserver.openrpc.generator.generator import ClientGenerator + + with open("/root/code/git.threefold.info/projectmycelium/hero_server/generatorexamples/mycelium_openrpc.yaml", "r") as file: + data = yaml.safe_load(file) + # print(data) + spec_object = OpenRPCSpec.load(data) + python_code_generator = PythonCodeGenerator() + generator = ClientGenerator( + spec_object, + python_code_generator, + "/tmp/python_client.py", + ) + + generator.generate_client() diff --git a/_archive/openrpc/generator/code/python/templates/class.jinja b/_archive/openrpc/generator/code/python/templates/class.jinja new file mode 100644 index 0000000..5d2fa0a --- /dev/null +++ b/_archive/openrpc/generator/code/python/templates/class.jinja @@ -0,0 +1,4 @@ +class {{ class_name }}(BaseModel): + {% for prop_name, prop_info in properties.items() -%} + {{ prop_name }}: {{prop_info.type_name}} = {{python_code_generator.get_pydantic_field_params(prop_info)}} + {% endfor %} \ No newline at end of file diff --git a/_archive/openrpc/generator/code/python/templates/enum.jinja b/_archive/openrpc/generator/code/python/templates/enum.jinja new file mode 100644 index 0000000..547bfc0 --- /dev/null +++ b/_archive/openrpc/generator/code/python/templates/enum.jinja @@ -0,0 +1,18 @@ +{% if is_integer %} +class {{ type_name }}(Enum): + {% for elem in enum -%} + {{ number_to_words(elem) }} = {{ elem }} + {% endfor %} +{% else -%} +class {{ type_name }}(str, Enum): + {% for elem in enum -%} + {{ elem.upper() }} = '{{ elem }}' + {% endfor %} +{% endif %} + {# @classmethod + def load(cls, data: Dict[str, Any]) -> "{{type_name}}": + return cls( + {% for elem in enum -%} + {{elem}} = data.get('{{elem}}'), + {% endfor %} + ) #} \ No newline at end of file diff --git a/_archive/openrpc/generator/code/python/templates/method.jinja b/_archive/openrpc/generator/code/python/templates/method.jinja new file mode 100644 index 0000000..c129995 --- /dev/null +++ b/_archive/openrpc/generator/code/python/templates/method.jinja @@ -0,0 +1,30 @@ +{% if method_example != "" -%} +# Example: +# {{ method_example }} +{% endif -%} +def {{ function_name }}({{ python_code_generator.get_method_params(method_params) }}){% if method_result %} -> {{ method_result }}{% endif %}: + {% if method_description != "" -%} + """ + {{ method_description }} + """ + {% endif -%} + url = "{{base_url}}" + headers = {"content-type": "application/json"} + + params = { + {% for param_name, param_type in method_params.items() -%} + '{{ param_name }}': {{ param_name }}, + {% endfor -%} +} + + response = requests.post(url, json={"jsonrpc": "2.0", "id": 0, 'method': '{{ method_name }}', 'params': params}, headers=headers).json() + + {% if return_type -%} + {% if python_code_generator.is_primitive(return_type) -%} + return response['result'] + {% else -%} + return {{return_type}}(response['result']) + {% endif -%} + {% else -%} + response.raise_for_status() + {% endif -%} \ No newline at end of file diff --git a/_archive/openrpc/generator/code/python/templates/pre.jinja b/_archive/openrpc/generator/code/python/templates/pre.jinja new file mode 100644 index 0000000..9377c2a --- /dev/null +++ b/_archive/openrpc/generator/code/python/templates/pre.jinja @@ -0,0 +1,5 @@ +from typing import List, Optional, Union, Any, Dict +from pydantic import BaseModel, Field +from enum import Enum +import requests + diff --git a/_archive/openrpc/generator/code/vlang/handler_generator.py b/_archive/openrpc/generator/code/vlang/handler_generator.py new file mode 100644 index 0000000..c852c86 --- /dev/null +++ b/_archive/openrpc/generator/code/vlang/handler_generator.py @@ -0,0 +1,205 @@ +import os +from pathlib import Path +from typing import Dict, Union + +from jinja2 import Environment, FileSystemLoader + +from heroserver.openrpc.generator.model_generator import ModelGenerator +from heroserver.openrpc.generator.vlang.vlang_code_generator import VlangGenerator +from heroserver.openrpc.model.common import ContentDescriptorObject, ReferenceObject +from heroserver.openrpc.model.methods import MethodObject +from heroserver.openrpc.model.openrpc_spec import OpenRPCSpec + +script_dir = os.path.dirname(os.path.abspath(__file__)) +env = Environment(loader=FileSystemLoader(script_dir)) + + +def get_actor_executor_name(actor: str) -> str: + return f"{''.join([part.title() for part in actor.split('_')])}Executor" + + +class ActorGenerator: + def __init__(self, actor: str, spec: OpenRPCSpec, dir: Path) -> None: + self.spec = spec + self.actor = actor + self.dir = dir + self.model_generator = ModelGenerator(spec, VlangGenerator()) + self.executor_template = env.get_template("templates/executor.jinja") + self.pre_template = env.get_template("templates/pre.jinja") + self.internal_crud_methods_template = env.get_template("templates/internal_crud_methods.jinja") + self.internal_actor_method_template = env.get_template("templates/internal_actor_method.jinja") + + def generate(self): + self.generate_models() + self.generate_crud() + self.generate_internal_actor_methods() + self.generate_executor() + + def generate_models(self): + pre = self.pre_template.render(module_name="myhandler", imports=[]) + code = self.model_generator.generate_models() + path = self.dir.joinpath(f"{self.actor}_models.v") + + with open(path, "w") as file: + file.write(f"{pre}\n\n{code}\n") + + def generate_crud(self): + imports = self.pre_template.render( + module_name="myhandler", + imports=["json", "freeflowuniverse.crystallib.baobab.backend"], + ) + methods = "" + for path_str in self.model_generator.spec.get_root_objects().keys(): + object = self.model_generator.processed_objects[path_str] + if object["code"] == "": + continue + + type_name = object["name"] + variable_name = type_name.lower() + methods += ( + self.internal_crud_methods_template.render( + variable_name=variable_name, + type_name=type_name, + actor_executor_name=get_actor_executor_name(self.actor), + ) + + "\n\n" + ) + + path = self.dir.joinpath(f"{self.actor}_crud.v") + with open(path, "w") as file: + file.write(f"{imports}\n\n{methods}") + + def generate_internal_actor_methods(self): + pre = self.pre_template.render(module_name="myhandler", imports=[]) + for method in self.spec.methods: + function_name = method.name.lower().replace(".", "_") + "_internal" + file_path = self.dir.joinpath(f"{self.actor}_{function_name}.v") + if file_path.exists(): + continue + + if any(method.name.endswith(end) for end in ["get", "set", "delete"]): + continue + + params: Dict[str, str] = {} + for param in method.params: + params[param.name] = self.model_generator.jsonschema_to_type(["methods", method.name, "params", param.name], param.schema) + + return_type = self.get_method_return_type(method) + method_params = ", ".join([f"{param.name} {self.get_param_type(method.name, param)}" for param in method.params]) + + code = self.internal_actor_method_template.render( + function_name=function_name, + method_params=method_params, + return_type=return_type, + actor_executor_name=get_actor_executor_name(self.actor), + ) + + with open(file_path, "w") as file: + file.write(f"{pre}\n\n{code}") + + def generate_executor(self): + pre = self.pre_template.render( + module_name="myhandler", + imports=[ + "x.json2", + "json", + "freeflowuniverse.crystallib.clients.redisclient", + "freeflowuniverse.crystallib.baobab.backend", + "freeflowuniverse.crystallib.rpc.jsonrpc", + ], + ) + + code = self.executor_template.render( + generator=self, + actor_executor_name=get_actor_executor_name(self.actor), + methods=self.spec.methods, + ) + + path = self.dir.joinpath(f"{self.actor}_executor.v") + with open(path, "w") as file: + file.write(f"{pre}\n\n{code}") + + def get_param_type( + self, + method_name: str, + param: Union[ContentDescriptorObject, ReferenceObject], + ) -> str: + type_name = self.model_generator.jsonschema_to_type(["methods", method_name, "params", param.name], param.schema) + return type_name + + def get_method_return_type(self, method: MethodObject) -> str: + if not method.result: + return "" + + path = ["methods", method.name, "result"] + schema = method.result + if isinstance(method.result, ContentDescriptorObject): + schema = method.result.schema + + return self.model_generator.jsonschema_to_type(path, schema) + + def is_primitive(self, type_name: str) -> bool: + return self.model_generator.lang_code_generator.is_primitive(type_name) + + def get_method_params_as_args(self, method: MethodObject) -> str: + return ", ".join([param.name for param in method.params]) + + +class Generator: + def generate_handler(self, specs_dir: Path, output_dir: Path): + output_dir.mkdir(parents=True, exist_ok=True) + + handler_template = env.get_template("templates/handler.jinja") + handler_test_template = env.get_template("templates/handler_test.jinja") + pre_template = env.get_template("templates/pre.jinja") + actors = [] + method_names = [] + + pre = pre_template.render( + module_name="myhandler", + imports=[ + "freeflowuniverse.crystallib.clients.redisclient", + "freeflowuniverse.crystallib.baobab.backend", + "freeflowuniverse.crystallib.rpc.jsonrpc", + ], + ) + code = "" + for item in specs_dir.iterdir(): + if not item.is_dir(): + continue + + actors.append(item.name) + + data = parser(path=item.as_posix()) + openrpc_spec = OpenRPCSpec.load(data) + actor_generator = ActorGenerator(item.name, openrpc_spec, output_dir) + actor_generator.generate() + + for method in openrpc_spec.methods: + method_names.append(f"{item.name}.{method.name}") + + code = handler_template.render(actors=actors, get_actor_executor_name=get_actor_executor_name) + + handler_path = output_dir.joinpath("handler.v") + with open(handler_path, "w") as file: + file.write(f"{pre}\n\n{code}") + + handler_test_path = output_dir.joinpath("handler_test.v") + with open(handler_test_path, "w") as file: + file.write(handler_test_template.render(method_names=method_names)) + + +if __name__ == "__main__": + from heroserver.openrpc.parser.parser import parser + + generator = Generator() + path = "~/code/git.threefold.info/projectmycelium/hero_server/generatorexamples/example1/specs" + generator.generate_handler(Path(path), Path("/tmp/myhandler")) + # vlang_code_generator = VlangGenerator() + # generator = ClientGenerator( + # spec_object, + # vlang_code_generator, + # "/tmp/v_client_new.v", + # ) + + # generator.generate_client() diff --git a/_archive/openrpc/generator/code/vlang/templates/enum.jinja b/_archive/openrpc/generator/code/vlang/templates/enum.jinja new file mode 100644 index 0000000..f96878e --- /dev/null +++ b/_archive/openrpc/generator/code/vlang/templates/enum.jinja @@ -0,0 +1,9 @@ +pub enum {{ type_name }}{ + {% for elem in enum -%} + {% if is_integer -%} + {{ number_to_words(elem) }} = {{ elem }} + {% else -%} + {{ elem }} + {% endif -%} + {% endfor %} +} \ No newline at end of file diff --git a/_archive/openrpc/generator/code/vlang/templates/executor.jinja b/_archive/openrpc/generator/code/vlang/templates/executor.jinja new file mode 100644 index 0000000..ad0b915 --- /dev/null +++ b/_archive/openrpc/generator/code/vlang/templates/executor.jinja @@ -0,0 +1,77 @@ +pub struct {{ actor_executor_name }}{ +pub mut: + db &backend.Backend + redis &redisclient.Redis +} + +pub fn (mut executor {{ actor_executor_name }}) execute(rpc_msg_id string, rpc_msg_method string, rpc_msg_params_str string) { + raw_params := json2.raw_decode(rpc_msg_params_str) or{ + executor.return_error(rpc_msg_id, jsonrpc.invalid_params) + return + } + + params_arr := raw_params.arr() + + match rpc_msg_method { + {%- for method in methods %} + '{{method.name}}' { + {%- for param in method.params %} + {%- if generator.is_primitive(generator.get_param_type(method.name, param))%} + {{param.name}} := params_arr[{{loop.index0}}] as {{generator.get_param_type(method.name, param)}} + {%- else %} + {{param.name}} := json.decode({{generator.get_param_type(method.name, param)}}, params_arr[{{loop.index0}}].json_str()) or { + executor.return_error(rpc_msg_id, jsonrpc.invalid_request) + return + } + {%- endif %} + {%- endfor %} + + {%- if generator.get_method_return_type(method) == 'none' %} + executor.{{method.name}}_internal({{generator.get_method_params_as_args(method)}}) or { + executor.return_error(rpc_msg_id, jsonrpc.InnerJsonRpcError{ + code: 32000 + message: '${err}' + }) + return + } + + response := jsonrpc.JsonRpcResponse[string]{ + jsonrpc: '2.0.0' + id: rpc_msg_id + result: '' + } + {%- else %} + result := executor.{{method.name}}_internal({{generator.get_method_params_as_args(method)}}) or { + executor.return_error(rpc_msg_id, jsonrpc.InnerJsonRpcError{ + code: 32000 + message: '${err}' + }) + return + } + + response := jsonrpc.JsonRpcResponse[{{generator.get_method_return_type(method)}}]{ + jsonrpc: '2.0.0' + id: rpc_msg_id + result: result + } + {%- endif %} + + // put response in response queue + executor.redis.lpush(rpc_msg_id, response.to_json()) or { + println('failed to push response for ${rpc_msg_id} to redis queue: ${err}') + } + } + {%- endfor %} + else { + executor.return_error(rpc_msg_id, jsonrpc.method_not_found) + return + } + } +} + +pub fn (mut executor {{actor_executor_name}}) return_error(rpc_msg_id string, error jsonrpc.InnerJsonRpcError){ + response := jsonrpc.new_jsonrpcerror(rpc_msg_id, error) + executor.redis.lpush(rpc_msg_id, response.to_json()) or { + println('failed to push response for ${rpc_msg_id} to redis queue: ${err}') + } +} \ No newline at end of file diff --git a/_archive/openrpc/generator/code/vlang/templates/handler.jinja b/_archive/openrpc/generator/code/vlang/templates/handler.jinja new file mode 100644 index 0000000..cfc57ae --- /dev/null +++ b/_archive/openrpc/generator/code/vlang/templates/handler.jinja @@ -0,0 +1,50 @@ +struct Handler { +pub mut: + db &backend.Backend + redis &redisclient.Redis + {% for actor in actors %} + {{actor}}_executor {{get_actor_executor_name(actor)}} + {%- endfor %} +} + +pub fn new(db_config backend.BackendConfig, redis_addr string) !Handler{ + db := backend.new(db_config)! + mut redis_client := redisclient.new([redis_addr])! + redis_client.selectdb(0)! + + return Handler{ + db: &db + redis: &redis_client + {%- for actor in actors %} + {{actor}}_executor: {{get_actor_executor_name(actor)}}{ + db: &db + redis: &redis_client + } + {%- endfor %} + } +} + +// handle handles an incoming JSON-RPC encoded message and returns an encoded response +pub fn (mut handler Handler) handle(id string, method string, params_str string) { + actor := method.all_before('.') + method_name := method.all_after('.') + + match actor { + {%- for actor in actors %} + '{{ actor }}' { + spawn (&handler.{{actor}}_executor).execute(id, method_name, params_str) + } + {%- endfor %} + else { + handler.return_error(id, jsonrpc.method_not_found) + return + } + } +} + +pub fn (mut handler Handler) return_error(rpc_msg_id string, error jsonrpc.InnerJsonRpcError){ + response := jsonrpc.new_jsonrpcerror(rpc_msg_id, error) + handler.redis.lpush(rpc_msg_id, response.to_json()) or { + println('failed to push response for ${rpc_msg_id} to redis queue: ${err}') + } +} diff --git a/_archive/openrpc/generator/code/vlang/templates/handler_test.jinja b/_archive/openrpc/generator/code/vlang/templates/handler_test.jinja new file mode 100644 index 0000000..2f4b524 --- /dev/null +++ b/_archive/openrpc/generator/code/vlang/templates/handler_test.jinja @@ -0,0 +1,31 @@ +module myhandler + +import x.json2 +import rand +import freeflowuniverse.crystallib.baobab.backend + +fn test_handler(){ + db_config := backend.BackendConfig{ + name: 'myhandler' + secret: 'secret' + reset: true + db_type: .postgres + } + + mut handler := new(db_config, '127.0.0.1:6379')! + {% for method_name in method_names %} + do_request(mut handler, '{{method_name}}')! + {%- endfor %} +} + +fn do_request(mut handler Handler, method_name string) ! { + // TODO: edit input parameters + mut params := []json2.Any{} + params << "objid" + params << "blabla_name" + params_str := json2.Any(params).json_str() + + id := rand.string(6) + handler.handle(rand.string(6), method_name, json2.Any(params).json_str()) + println('request id: ${id}') +} \ No newline at end of file diff --git a/_archive/openrpc/generator/code/vlang/templates/internal_actor_method.jinja b/_archive/openrpc/generator/code/vlang/templates/internal_actor_method.jinja new file mode 100644 index 0000000..339eeb5 --- /dev/null +++ b/_archive/openrpc/generator/code/vlang/templates/internal_actor_method.jinja @@ -0,0 +1,7 @@ +pub fn (mut executor {{ actor_executor_name }}) {{function_name}}({{method_params}}) !{{return_type}}{ + // context allows us to see who the user is and which groups the user is + // context also gives a logging feature + // context is linked to 1 circle + // context is linked to a DB (OSIS) + panic('implement') +} \ No newline at end of file diff --git a/_archive/openrpc/generator/code/vlang/templates/internal_crud_methods.jinja b/_archive/openrpc/generator/code/vlang/templates/internal_crud_methods.jinja new file mode 100644 index 0000000..588678e --- /dev/null +++ b/_archive/openrpc/generator/code/vlang/templates/internal_crud_methods.jinja @@ -0,0 +1,28 @@ +pub fn (mut executor {{ actor_executor_name }}) {{variable_name}}_get_internal(id string) !{{type_name}}{ + json_str := executor.db.indexer.get_json(id, backend.RootObject{ + name: '{{type_name}}' + })! + + return json.decode({{type_name}}, json_str)! +} + +pub fn (mut executor {{ actor_executor_name }}) {{variable_name}}_set_internal({{variable_name}} {{type_name}}) !{ + if {{variable_name}}.oid != ''{ + executor.db.indexer.set(backend.RootObject{ + id: {{variable_name}}.oid + name: '{{type_name}}' + })! + } + + executor.db.indexer.new(backend.RootObject{ + name: '{{type_name}}' + })! +} + +pub fn (mut executor {{ actor_executor_name }}) {{variable_name}}_delete_internal(id string) !{ + executor.db.indexer.delete(id, backend.RootObject{ + name: '{{type_name}}' + })! +} + + diff --git a/_archive/openrpc/generator/code/vlang/templates/method_param_struct.jinja b/_archive/openrpc/generator/code/vlang/templates/method_param_struct.jinja new file mode 100644 index 0000000..8ee539d --- /dev/null +++ b/_archive/openrpc/generator/code/vlang/templates/method_param_struct.jinja @@ -0,0 +1,5 @@ +pub struct {{method_param_struct_name}}{ + {% for param_name, param_type in params.items()%} + {{param_name}} {{param_type}} + {%- endfor %} +} \ No newline at end of file diff --git a/_archive/openrpc/generator/code/vlang/templates/methods.jinja b/_archive/openrpc/generator/code/vlang/templates/methods.jinja new file mode 100644 index 0000000..f0ff6fc --- /dev/null +++ b/_archive/openrpc/generator/code/vlang/templates/methods.jinja @@ -0,0 +1,75 @@ +{% if method_example -%} +/* +Example: +{{ method_example }} +*/ +{% endif -%} + +{% if method_description -%} +/* +{{ method_description }} +*/ +{% endif -%} +pub fn {{ function_name }}({{ vlang_code_generator.get_method_params(method_params) }}) {{ method_result }}{ + mut conn := httpconnection.new( + name: 'openrpc_client' + url: '{{ base_url }}' + )! + + mut params := map[string]json2.Any{} + {% for param_name, param_type in method_params.items() -%} + {% if vlang_code_generator.is_primitive(param_type) %} + params["{{ param_name }}"] = {{ param_name }} + {% elif vlang_code_generator.is_vlang_array(param_type) %} + mut any_arr := []json2.Any{} + for item in {{ param_name }}{ + {% if vlang_code_generator.is_primitive(param_type[2:]) %} + any_arr << item + {% else %} + any_arr << json2.raw_decode(json2.encode(item))! + {% endif %} + } + params["{{ param_name }}"] = json2.Any(any_arr) + {%else %} + params["{{ param_name }}"] = json2.raw_decode(json2.encode({{ param_name }}))! + {% endif %} + {% endfor -%} + + mut payload := map[string]json2.Any{} + payload['jsonrpc'] = "2.0" + payload['id'] = 0 + payload['method'] = '{{ method_name }}' + payload['params'] = params + + response := conn.send(method: .post, data: json2.encode(payload){% if url_path -%}, prefix: '{{ url_path }}' {% endif -%})! + if !response.is_ok() { + return error('failed to make rpc request: (${response.code}) ${response.data}') + } + + {% if return_type != 'none' %} + mp := json2.raw_decode(response.data)!.as_map() + res := mp['result'] or { + return error('invalid jsonrpc result: ${response.data}') + } + + if res is json2.Null{ + return error('not found') + } + + {% if vlang_code_generator.is_primitive(return_type) %} + return res as {{return_type}} + {% elif vlang_code_generator.is_vlang_array(return_type) %} + mut res_arr := {{return_type}} + for item in res.arr() { + {% if vlang_code_generator.is_primitive(return_type[2:]) %} + res_arr << item as {{return_type}} + {% else %} + res_arr << json2.decode[{{return_type[2:]}}](item.json_str())! + {% endif %} + } + return res_arr + {%else %} + return json2.decode[{{return_type}}](res.json_str())! + {% endif -%} + {% endif %} +} \ No newline at end of file diff --git a/_archive/openrpc/generator/code/vlang/templates/pre.jinja b/_archive/openrpc/generator/code/vlang/templates/pre.jinja new file mode 100644 index 0000000..34f6cf2 --- /dev/null +++ b/_archive/openrpc/generator/code/vlang/templates/pre.jinja @@ -0,0 +1,5 @@ +module {{module_name}} +{% for item in imports %} +import {{item}} +{%- endfor %} + diff --git a/_archive/openrpc/generator/code/vlang/templates/struct.jinja b/_archive/openrpc/generator/code/vlang/templates/struct.jinja new file mode 100644 index 0000000..e9e4a2e --- /dev/null +++ b/_archive/openrpc/generator/code/vlang/templates/struct.jinja @@ -0,0 +1,10 @@ +@[params] +pub struct {{ type_name }}{ +pub mut: + {%- for property_name, property_info in properties.items() %} + {%- if property_info.description %} + // {{ property_info.description }} + {%- endif %} + {{ property_name }} {{ property_info.type_name }} + {%- endfor %} +} \ No newline at end of file diff --git a/_archive/openrpc/generator/code/vlang/vlang_code_generator.py b/_archive/openrpc/generator/code/vlang/vlang_code_generator.py new file mode 100644 index 0000000..547d365 --- /dev/null +++ b/_archive/openrpc/generator/code/vlang/vlang_code_generator.py @@ -0,0 +1,164 @@ +import json +import os +from typing import Any, Dict, List +from urllib.parse import ParseResult + +import inflect +from jinja2 import Environment, FileSystemLoader + +from heroserver.openrpc.generator.lang_code_generator import LangCodeGenerator, PropertyInfo +from heroserver.openrpc.model.common import ( + ReferenceObject, + SchemaObject, +) +from heroserver.openrpc.model.methods import MethodObject +from heroserver.openrpc.model.openrpc_spec import ( + OpenRPCSpec, +) + +script_dir = os.path.dirname(os.path.abspath(__file__)) +env = Environment(loader=FileSystemLoader(script_dir)) +inflector = inflect.engine() + + +class VlangGenerator(LangCodeGenerator): + def __init__(self) -> None: + self.struct_template = env.get_template("templates/struct.jinja") + self.enum_template = env.get_template("templates/enum.jinja") + self.methods_template = env.get_template("templates/methods.jinja") + self.pre_template = env.get_template("templates/pre.jinja") + + def generate_imports(self) -> str: + return self.pre_template.render() + + def generate_object( + self, + type_name: str, + properties: Dict[str, PropertyInfo], + ): + return self.struct_template.render(type_name=type_name, properties=properties) + + def generate_method( + self, + method_spec: MethodObject, + url: ParseResult, + params: Dict[str, str], + return_type: str, + ) -> str: + function_name = method_spec.name.lower().replace(".", "_") + method_name = method_spec.name + method_result = self.type_to_method_result(return_type) + method_description = "" + if method_spec.description: + method_description = method_spec.description.replace("'", " ") + + method_example = "" + if method_spec.examples and len(method_spec.examples) > 0: + method_example = json.dumps(method_spec.examples[0], indent=4) + + method_code = self.methods_template.render( + vlang_code_generator=self, + base_url=f"{url.scheme}://{url.netloc}", + url_path=url.path, + function_name=function_name, + method_name=method_name, + method_params=params, + method_result=method_result, + return_type=return_type, + method_description=method_description, + method_example=method_example, + ) + + return method_code + + def string_primitive(self) -> str: + return "string" + + def integer_primitive(self) -> str: + return "i64" + + def number_primitive(self) -> str: + return "f64" + + def null_primitive(self) -> str: + return "none" + + def bool_primitive(self) -> str: + return "bool" + + def array_of_type(self, type_name: str) -> str: + return f"[]{type_name}" + + def generate_multitype(self, types: List[str]) -> str: + if len(types) > 2: + raise Exception("only a type and null are supported with anyOf/allOf keyword") + + if len(types) == 1: + return types[0] + + if types[0] == "none": + return f"?{types[1]}" + if types[1] == "none": + return f"?{types[0]}" + + raise Exception("only a type and null are supported with anyOf/allOf keyword") + + def encapsulate_types(self, path: List[str], types: List[SchemaObject | ReferenceObject]) -> str: + raise Exception("no support for allOf keyword") + + def generate_enum(self, enum: List[Any], type_name: str) -> str: + if all(isinstance(elem, str) for elem in enum): + # enum of strings + return self.enum_template.render( + enum=enum, + type_name=type_name, + number_to_words=inflector.number_to_words, + ) + + elif all(isinstance(elem, int) for elem in enum): + # enum of integers + return self.enum_template.render( + is_integer=True, + enum=enum, + type_name=type_name, + number_to_words=inflector.number_to_words, + ) + + else: + raise Exception(f"failed to generate enum code for: {enum}") + + def type_to_method_result(self, type_name: str) -> str: + if type_name == "none": + type_name = "" + + if type_name.startswith("?"): + type_name = type_name[1:] + + return "!" + type_name + + def is_primitive(self, type: str) -> bool: + return type in ["u64", "f64", "i64", "int", "bool", "string"] + + def is_vlang_array(self, type: str) -> bool: + return type.startswith("[]") + + def get_method_params(self, method_params: Dict[str, str]) -> str: + return ", ".join([f"{param_name} {param_type}" for param_name, param_type in method_params.items()]) + + +# main() +if __name__ == "__main__": + from heroserver.openrpc.generator.generator import ClientGenerator + from heroserver.openrpc.parser.parser import parser + + data = parser(path="~/code/git.threefold.info/projectmycelium/hero_server/lib/openrpclib/parser/examples") + + spec_object = OpenRPCSpec.load(data) + vlang_code_generator = VlangGenerator() + generator = ClientGenerator( + spec_object, + vlang_code_generator, + "/tmp/v_client_new.v", + ) + + generator.generate_client() diff --git a/_archive/openrpc/generator/hero_generator.py b/_archive/openrpc/generator/hero_generator.py new file mode 100644 index 0000000..8590753 --- /dev/null +++ b/_archive/openrpc/generator/hero_generator.py @@ -0,0 +1,46 @@ +import argparse +from pathlib import Path + +from heroserver.openrpc.generator.rest_server.python.rest_server_generator import ( + RestServerGenerator, +) +from heroserver.openrpc.model.openrpc_spec import OpenRPCSpec +from heroserver.openrpc.parser.parser import parser + + +def do(specs_dir: Path, output: Path): + for item in specs_dir.iterdir(): + if not item.is_dir(): + continue + + actor_name = item.name + actor_output_path = output.joinpath(actor_name) + actor_output_path.mkdir(parents=True, exist_ok=True) + + print(f"item: {item.as_posix()}") + # if item.as_posix() == "generatorexamples/example1/specs/storymanager": + # continue + data = parser(path=item.as_posix()) + # print(f"data: {data}") + spec_object = OpenRPCSpec.load(data) + server_generator = RestServerGenerator(spec_object, actor_output_path) + server_generator.generate() + + +if __name__ == "__main__": + arg_parser = argparse.ArgumentParser(description="Hero server and client generator tool.") + arg_parser.add_argument( + "--specs", + type=str, + required=True, + help="specs directory", + ) + arg_parser.add_argument( + "--output", + type=str, + required=True, + help="output directory", + ) + + args = arg_parser.parse_args() + do(Path(args.specs), Path(args.output)) diff --git a/_archive/openrpc/generator/mdbook/generate_mdbook.py b/_archive/openrpc/generator/mdbook/generate_mdbook.py new file mode 100644 index 0000000..d1fb2ce --- /dev/null +++ b/_archive/openrpc/generator/mdbook/generate_mdbook.py @@ -0,0 +1,90 @@ +import argparse +import json +import os + +from jinja2 import Environment, FileSystemLoader + +from ....openrpc.tools import get_pydantic_type, get_return_type, topological_sort + +script_dir = os.path.dirname(os.path.abspath(__file__)) + + +def generate_models(openrpc_spec: dict) -> str: + schema_dict = openrpc_spec["components"]["schemas"] + sorted_classes = topological_sort(schema_dict) + + env = Environment(loader=FileSystemLoader(script_dir), trim_blocks=True, lstrip_blocks=True) + template = env.get_template("templates/mdbook/structs.jinja") + model_code = template.render( + sorted_classes=sorted_classes, + schema_dict=schema_dict, + get_pydantic_type=get_pydantic_type, + ) + + return model_code + + +def generate_model(model_name: str, schema: dict) -> str: + env = Environment(loader=FileSystemLoader(script_dir)) + template = env.get_template("templates/vlang/struct.jinja") + model_code = template.render(model_name=model_name, schema=schema, get_pydantic_type=get_pydantic_type) + + return model_code + + +def generate_api_methods(openrpc_spec: dict) -> str: + env = Environment(loader=FileSystemLoader(script_dir), trim_blocks=True, lstrip_blocks=True) + template = env.get_template("templates/mdbook/methods.jinja") + + code = template.render( + spec=openrpc_spec, + methods=openrpc_spec.get("methods", []), + get_return_type=get_return_type, + get_pydantic_type=get_pydantic_type, + ) + + return code + + +def main() -> None: + parser = argparse.ArgumentParser(description="Generate API code from OpenRPC specification") + parser.add_argument( + "-s", + "--spec", + help="Path to the specs (expressed in our own V format)", + default="~/code/git.threefold.info/projectmycelium/hero_server/generatorexamples/example1/specs", + ) + parser.add_argument( + "-o", + "--output", + default="/tmp/generator/mdbook", + help="Output file path (default: /tmp/generator/mdbook)", + ) + args = parser.parse_args() + + spec_file = os.path.expanduser(args.spec) + output_dir = os.path.expanduser(args.output) + + if not os.path.isfile(spec_file): + print(f"Error: OpenRPC specification file '{spec_file}' does not exist.") + return + + with open(spec_file) as file: + openrpc_spec = json.load(file) + + code_models = generate_models(openrpc_spec) + code_methods = generate_api_methods(openrpc_spec) + + os.makedirs(os.path.dirname(output_dir), exist_ok=True) + + # Write the generated code to a file + with open(f"{output_dir}/models.md", "w") as file: + file.write(code_models) + with open(f"{output_dir}/methods.md", "w") as file: + file.write(code_methods) + + print(f"Generated API code has been written to {output_dir}") + + +if __name__ == "__main__": + main() diff --git a/_archive/openrpc/generator/mdbook/templates/methods.jinja b/_archive/openrpc/generator/mdbook/templates/methods.jinja new file mode 100644 index 0000000..aad7bfa --- /dev/null +++ b/_archive/openrpc/generator/mdbook/templates/methods.jinja @@ -0,0 +1,16 @@ +## Methods + +{% for method in methods %} +- {{ method['name'] }}: {{ method.get('description', '') }} + - Parameters: + {% for param in method.get('params', []) %} + {{ param['name'] }}: {{ get_pydantic_type(param['schema'])}} + {% endfor %} + + - Return Type: + {{ get_return_type(method['result']) }} + + - Example: + {{ method.get('examples', [{}])[0] }} + +{% endfor %} \ No newline at end of file diff --git a/_archive/openrpc/generator/mdbook/templates/structs.jinja b/_archive/openrpc/generator/mdbook/templates/structs.jinja new file mode 100644 index 0000000..311271e --- /dev/null +++ b/_archive/openrpc/generator/mdbook/templates/structs.jinja @@ -0,0 +1,9 @@ +# Classes + +{% for class_name in sorted_classes %} +- {{ schema_dict[class_name]['title'] }} +{% for prop_name, prop in schema_dict[class_name]['properties'].items() %} + - {{ prop_name }} ({{ get_pydantic_type(prop)}}): {{ prop['description'] }} +{% endfor %} + +{% endfor %} \ No newline at end of file diff --git a/_archive/openrpc/generator/model/model_generator.py b/_archive/openrpc/generator/model/model_generator.py new file mode 100644 index 0000000..ff28994 --- /dev/null +++ b/_archive/openrpc/generator/model/model_generator.py @@ -0,0 +1,170 @@ +from typing import Dict, List, Set + +from heroserver.openrpc.generator.code.lang_code_generator import ( + LangCodeGenerator, + PropertyInfo, +) +from heroserver.openrpc.model.common import ( + ContentDescriptorObject, + ReferenceObject, + SchemaObject, +) +from heroserver.openrpc.model.openrpc_spec import OpenRPCSpec + + +class ModelGenerator: + def __init__(self, spec: OpenRPCSpec, lang_code_generator: LangCodeGenerator) -> None: + self.spec = spec + self.lang_code_generator = lang_code_generator + self.processed_objects: Dict[str, Dict[str, str]] = {} + self.ordered_objects: List[str] = [] + self.used_names: Set[str] = set() + + def generate_models(self): + if not self.spec.components: + return "" + + schemas = self.spec.components.schemas + schemas_path = ["components", "schemas"] + for name, schema in schemas.items(): + self.jsonschema_to_type( + path=schemas_path + [name], + jsonschema=schema, + ) + + objects_code = "" + for val in self.ordered_objects: + if val == "": + continue + objects_code = f"{objects_code}{val}\n\n" + + print(f"debugzo4 {objects_code}") + return objects_code + + def jsonschema_to_type(self, path: List[str], jsonschema: SchemaObject | ReferenceObject) -> str: + if isinstance(jsonschema, ReferenceObject): + ref: str = jsonschema.ref + + ref_schema = self.spec.ref_to_schema(ref) + ref_path = ref.split("/")[1:] + + if isinstance(ref_schema, ContentDescriptorObject): + # TODO: implement + raise Exception("unimplemented") + # return self.content_descriptor_to_type(ref_path, ref_schema) + + return self.jsonschema_to_type(ref_path, ref_schema) + + path_str = "/".join([item.lower() for item in path]) + if path_str in self.processed_objects: + return self.processed_objects[path_str]["name"] + + type_name = self.type_name_from_path(path) + + description = getattr(jsonschema, "description", None) + if jsonschema.enum: + enum = jsonschema.enum + type_code = self.lang_code_generator.generate_enum(enum, type_name) + if self.lang_code_generator.is_primitive(type_code): + return type_code + + self.add_object(path_str, type_code, type_name) + return type_name + + if jsonschema.type: + match jsonschema.type: + case "string": + return self.lang_code_generator.string_primitive() + + case "integer": + return self.lang_code_generator.integer_primitive() + + case "number": + return self.lang_code_generator.number_primitive() + + case "array": + if isinstance(jsonschema.items, List): + raise Exception("array of different item types is not supported") + + item_type_name = self.jsonschema_to_type(path + ["item"], jsonschema.items) + return self.lang_code_generator.array_of_type(item_type_name) + + case "boolean": + return self.lang_code_generator.bool_primitive() + + case "object": + # to prevent cyclic dependencies + self.add_object(path_str, "", type_name) + + properties: Dict[str, PropertyInfo] = {} + for ( + property_name, + property_schema, + ) in jsonschema.properties.items(): + schema = property_schema + new_path = path + ["properties", property_name] + if isinstance(property_schema, ReferenceObject): + schema = self.spec.ref_to_schema(property_schema.ref) + new_path = property_schema.ref.split("/")[1:] + + property_info = PropertyInfo( + name=property_name, + type_name=self.jsonschema_to_type(new_path, schema), + description=schema.description, + example=schema.example, + ) + + properties[property_name] = property_info + + type_code = self.lang_code_generator.generate_object(type_name, properties) + self.add_object(path_str, type_code, type_name) + return type_name + + case "null": + return self.lang_code_generator.null_primitive() + + case _: + raise Exception(f"type {jsonschema.type} is not supported") + + if jsonschema.anyOf: + type_names = [] + for i, item in enumerate(jsonschema.anyOf): + type_names.append(self.jsonschema_to_type(path + [f"anyOf{i}"], item)) + + return self.lang_code_generator.generate_multitype(type_names) + # self.add_object(path_str, type_code, type_code) + # return type_code + + elif jsonschema.oneOf: + type_names = [] + for i, item in enumerate(jsonschema.oneOf): + type_names.append(self.jsonschema_to_type(path + [f"oneOf{i}"], item)) + + return self.lang_code_generator.generate_multitype(type_names) + # self.add_object(path_str, type_code, type_code) + # return type_code + + elif jsonschema.allOf: + return self.lang_code_generator.encapsulate_types(jsonschema.allOf) + # self.add_object(path_str, type_code, type_code) + # return type_name + + raise Exception(f"type {jsonschema.type} is not supported") + + def add_object(self, path_str: str, type_code: str, type_name: str): + self.used_names.add(type_name) + self.processed_objects[path_str] = { + "code": type_code, + "name": type_name, + } + print(f"debugzo21 {self.processed_objects[path_str]}") + self.ordered_objects.append(type_code) + + def type_name_from_path(self, path: List[str]) -> str: + type_name = "" + for item in reversed(path): + type_name += item.title() if item.islower() else item + if type_name not in self.used_names: + return type_name + + raise Exception(f"failed to generate unique name from path: {path}") diff --git a/_archive/openrpc/generator/readme.md b/_archive/openrpc/generator/readme.md new file mode 100644 index 0000000..0ee3a27 --- /dev/null +++ b/_archive/openrpc/generator/readme.md @@ -0,0 +1,14 @@ + +## example how to use + +```python + +import heroserver.openrpc.generator + +openrpc_spec = generator.openrpc_spec( + path="~/code/git.threefold.info/projectmycelium/hero_server/generatorexamples/example1/specs" +) + +print(openrpc_spec) + +``` \ No newline at end of file diff --git a/_archive/openrpc/generator/rest_server/example_rest_server/vm_manager.py b/_archive/openrpc/generator/rest_server/example_rest_server/vm_manager.py new file mode 100644 index 0000000..2dadff9 --- /dev/null +++ b/_archive/openrpc/generator/rest_server/example_rest_server/vm_manager.py @@ -0,0 +1,28 @@ +from typing import Union + +from fastapi import FastAPI +from vm_manager__vm_start import vm_start + +app = FastAPI() + +#VM WOULD BE AN OBJECT of e.g. a virtual machine description + +@app.get("/$circleguid/vm_manager/vm") +def vm_get()-> VM: + return {...} + + +@app.post("/$circleguid/vm_manager/vm") +def vm_set()-> bool: + return True + +@app.delete("/$circleguid/vm_manager/vm") +def vm_delete()-> bool: + ##would use osis to delete this objecc + return True + + +@app.get("/$circleguid/vm_manager/vm_start/{vm_guid}") +def vm_start(vm_guid: str) -> bool: + vm_start(context=context,vm_guid=vm_guid) + diff --git a/_archive/openrpc/generator/rest_server/example_rest_server/vm_manager__vm_start.py b/_archive/openrpc/generator/rest_server/example_rest_server/vm_manager__vm_start.py new file mode 100644 index 0000000..60a274b --- /dev/null +++ b/_archive/openrpc/generator/rest_server/example_rest_server/vm_manager__vm_start.py @@ -0,0 +1,8 @@ + +def vm_start(context, vm_guid: str) -> bool: + #context allows us to see who the user is and which groups the user is + #context also gives a logging feature + #context is linked to 1 circle + #context is linked to a DB (OSIS) + + #code to be implemented e.g. using DAGU to start a vm \ No newline at end of file diff --git a/_archive/openrpc/generator/rest_server/python/rest_server_generator.py b/_archive/openrpc/generator/rest_server/python/rest_server_generator.py new file mode 100644 index 0000000..fbbed61 --- /dev/null +++ b/_archive/openrpc/generator/rest_server/python/rest_server_generator.py @@ -0,0 +1,256 @@ +import os +from pathlib import Path +from typing import Dict, List, Optional, Union + +from jinja2 import Environment, FileSystemLoader + +from heroserver.openrpc.generator.code.python.python_code_generator import PythonCodeGenerator +from heroserver.openrpc.generator.model.model_generator import ModelGenerator + +# Fix the issue by ensuring that the 'object' variable is properly defined and has the expected attributes. +# The following code will ensure that 'object' is a valid SchemaObject before calling 'print_items'. +from heroserver.openrpc.model.common import ContentDescriptorObject, ReferenceObject, SchemaObject +from heroserver.openrpc.model.openrpc_spec import OpenRPCSpec +from heroserver.openrpc.parser.parser import parser + +script_dir = os.path.dirname(os.path.abspath(__file__)) +env = Environment(loader=FileSystemLoader(script_dir)) + + +class RestServerGenerator: + def __init__( + self, + spec: OpenRPCSpec, + dir: Path, + ) -> None: + if not isinstance(spec, OpenRPCSpec): + raise TypeError(f"Expected spec to be of type OpenRPCSpec, got {type(spec)}") + if not isinstance(dir, Path): + raise TypeError(f"Expected dir to be of type Path, got {type(dir)}") + + self.model_generator = ModelGenerator(spec, PythonCodeGenerator()) + self.spec = spec + self.dir = dir + self.crud_methods_template = env.get_template("templates/crud_methods.jinja") + self.internal_crud_methods_template = env.get_template("templates/internal_crud_methods.jinja") + self.internal_crud_mock_methods_template = env.get_template("templates/internal_crud_mock_methods.jinja") + self.imports_template = env.get_template("templates/imports.jinja") + self.actor_method_template = env.get_template("templates/actor_method.jinja") + self.internal_actor_method_template = env.get_template("templates/internal_actor_method.jinja") + self.server_template = env.get_template("templates/server.jinja") + + def generate(self): + self.dir.mkdir(parents=True, exist_ok=True) + + self.generate_models() + self.generate_crud() + self.generate_mock_crud() + self.generate_internal_actor_methods() + self.generate_openapi() + self.generate_openapi_mock() + self.generate_server() + + print(f"Generated API code has been written to {self.dir}") + + def generate_server(self): + code = self.server_template.render() + + path = self.dir.joinpath("server.py") + with open(path, "w") as file: + file.write(code) + + def generate_openapi(self): + imports = self.imports_template.render(import_crud=True, import_models=True) + app_init = "app = FastAPI()\n\n" + methods = "" + for path_str in self.model_generator.spec.get_root_objects().keys(): + object = self.model_generator.processed_objects[path_str] + if object["code"] == "": + continue + + type_name = object["name"] + variable_name = type_name.lower() + methods += self.crud_methods_template.render(variable_name=variable_name, type_name=type_name) + "\n\n" + + for method in self.spec.methods: + if any(method.name.endswith(end) for end in ["get", "set", "delete"]): + continue + + params: Dict[str, str] = {} + for param in method.params: + params[param.name] = self.model_generator.jsonschema_to_type(["methods", method.name, "params", param.name], param.schema) + + return_type = self.method_result_return_type(["methods", method.name, "result"], method.result) + + function_name = method.name.lower().replace(".", "_") + imports += f"from {function_name}_internal import {function_name}_internal\n" + methods += ( + self.actor_method_template.render( + rest_server_generator=self, + function_name=function_name, + method_params=params, + method_result=return_type, + ) + + "\n\n" + ) + + path = self.dir.joinpath("open_api.py") + with open(path, "w") as file: + file.write(f"{imports}\n\n{app_init}\n\n{methods}") + + def generate_openapi_mock(self): + imports = self.imports_template.render(mock=True, import_crud=True, import_models=True) + app_init = "app = FastAPI()\n\n" + methods = "" + for path_str in self.model_generator.spec.get_root_objects().keys(): + object = self.model_generator.processed_objects[path_str] + if object["code"] == "": + continue + + type_name = object["name"] + variable_name = type_name.lower() + methods += self.crud_methods_template.render(mock=True, variable_name=variable_name, type_name=type_name) + "\n\n" + + for method in self.spec.methods: + if any(method.name.endswith(end) for end in ["get", "set", "delete"]): + continue + + params: Dict[str, str] = {} + for param in method.params: + params[param.name] = self.model_generator.jsonschema_to_type(["methods", method.name, "params", param.name], param.schema) + + return_type = self.method_result_return_type(["methods", method.name, "result"], method.result) + + function_name = method.name.lower().replace(".", "_") + imports += f"from {function_name}_internal import {function_name}_internal\n" + methods += ( + self.actor_method_template.render( + mock=True, + rest_server_generator=self, + function_name=function_name, + method_params=params, + method_result=return_type, + ) + + "\n\n" + ) + + path = self.dir.joinpath("open_api_mock.py") + with open(path, "w") as file: + file.write(f"{imports}\n\n{app_init}\n\n{methods}") + + def generate_models(self): + imports = self.imports_template.render() + code = self.model_generator.generate_models() + path = self.dir.joinpath("models.py") + + with open(path, "w") as file: + file.write(f"{imports}\n\n{code}\n") + + def generate_crud(self): + imports = self.imports_template.render(import_models=True) + methods = "" + for path_str in self.model_generator.spec.get_root_objects().keys(): + object = self.model_generator.processed_objects[path_str] + if object["code"] == "": + continue + + type_name = object["name"] + variable_name = type_name.lower() + methods += self.internal_crud_methods_template.render(variable_name=variable_name, type_name=type_name) + "\n\n" + + path = self.dir.joinpath("crud.py") + with open(path, "w") as file: + file.write(f"{imports}\n\n{methods}") + + def generate_mock_crud(self): + imports = self.imports_template.render(import_models=True) + imports += "from heroserver.openrpc.tools import create_example_object" + methods = "" + for path_str in self.model_generator.spec.get_root_objects().keys(): + object = self.model_generator.spec.get_root_objects()[path_str] + + if isinstance(object, SchemaObject): + print_items(object) + + object = self.model_generator.processed_objects[path_str] + if object["code"] == "": + continue + + type_name = object["name"] + variable_name = type_name.lower() + + methods += self.internal_crud_mock_methods_template.render(variable_name=variable_name, type_name=type_name) + "\n\n" + + path = self.dir.joinpath("crud_mock.py") + with open(path, "w") as file: + file.write(f"{imports}\n\n{methods}") + + def generate_internal_actor_methods(self): + imports = self.imports_template.render(import_models=True) + for method in self.spec.methods: + function_name = method.name.lower().replace(".", "_") + "_internal" + file_path = self.dir.joinpath(f"{function_name}.py") + if file_path.exists(): + continue + + if any(method.name.endswith(end) for end in ["get", "set", "delete"]): + continue + + params: Dict[str, str] = {} + for param in method.params: + params[param.name] = self.model_generator.jsonschema_to_type(["methods", method.name, "params", param.name], param.schema) + + return_type = self.method_result_return_type(["methods", method.name, "result"], method.result) + + code = self.internal_actor_method_template.render( + rest_server_generator=self, + function_name=function_name, + method_params=params, + method_result=return_type, + ) + + with open(file_path, "w") as file: + file.write(f"{imports}\n\n{code}") + + def get_method_params(self, method_params: Dict[str, str]) -> str: + return ", ".join([f"{param_name}: {param_type}" for param_name, param_type in method_params.items()]) + + def method_result_return_type( + self, + path: List[str], + method_result: Optional[Union[ContentDescriptorObject, ReferenceObject]], + ) -> str: + if not method_result: + type_name = "" + + if isinstance(method_result, ContentDescriptorObject): + schema = method_result.schema + type_name = self.model_generator.jsonschema_to_type(path, schema) + + elif isinstance(method_result, ReferenceObject): + type_name = self.model_generator.jsonschema_to_type(path, method_result) + + return type_name + + +def print_items(schema_object, depth=0): + print(f"prito {schema_object.items}") + indent = " " * depth + if isinstance(schema_object.items, list): + for item in schema_object.items: + print(f"{indent}Item: {item}") + if isinstance(item, SchemaObject): + print_items(item, depth + 1) + print(f"{indent}Example: {item.example}") + elif isinstance(schema_object.items, SchemaObject): + print(f"{indent}Item: {schema_object.items}") + print_items(schema_object.items, depth + 1) + print(f"{indent}Example: {schema_object.items.example}") + + +if __name__ == "__main__": + data = parser(path="~/code/git.threefold.info/hero/hero_server_python/baobabspecs") + + spec_object = OpenRPCSpec.load(data) + server_generator = RestServerGenerator(spec_object, Path("/tmp/rest2")) + server_generator.generate() diff --git a/_archive/openrpc/generator/rest_server/python/templates/actor_method.jinja b/_archive/openrpc/generator/rest_server/python/templates/actor_method.jinja new file mode 100644 index 0000000..8eaef30 --- /dev/null +++ b/_archive/openrpc/generator/rest_server/python/templates/actor_method.jinja @@ -0,0 +1,7 @@ +@app.post("/$circleguid/{{function_name}}") +def {{ function_name }}(circleguid: int, {{ rest_server_generator.get_method_params(method_params) }}){% if method_result %} -> {{ method_result }}{% endif %}: + {% if mock %} + return {{function_name}}_internal_mock(context, circleguid, {{', '.join(method_params.keys())}}) + {% else %} + return {{function_name}}_internal(context, circleguid, {{', '.join(method_params.keys())}}) + {% endif %} diff --git a/_archive/openrpc/generator/rest_server/python/templates/crud_methods.jinja b/_archive/openrpc/generator/rest_server/python/templates/crud_methods.jinja new file mode 100644 index 0000000..de62c29 --- /dev/null +++ b/_archive/openrpc/generator/rest_server/python/templates/crud_methods.jinja @@ -0,0 +1,16 @@ +{% if mock %} + {% set suffix = '_mock' %} +{% else %} + {% set suffix = '' %} +{% endif %} +@app.get("/{circleguid}/{{variable_name}}_manager{{suffix}}/{{variable_name}}/{id}") +def {{variable_name}}_get(circleguid: int, id: str)-> {{type_name}}: + return {{variable_name}}_get_internal{{suffix}}(circleguid, id) + +@app.post("/{circleguid}/{{variable_name}}_manager{{suffix}}/{{variable_name}}") +def {{variable_name}}_set(circleguid: int, {{variable_name}}: {{type_name}})-> bool: + return {{variable_name}}_set_internal{{suffix}}(circleguid, {{variable_name}}) + +@app.delete("/{circleguid}/{{variable_name}}_manager{{suffix}}/{{variable_name}}/{id}") +def {{variable_name}}_delete(circleguid: int, id: str)-> bool: + return {{variable_name}}_delete_internal{{suffix}}(circleguid, id) diff --git a/_archive/openrpc/generator/rest_server/python/templates/imports.jinja b/_archive/openrpc/generator/rest_server/python/templates/imports.jinja new file mode 100644 index 0000000..57b75db --- /dev/null +++ b/_archive/openrpc/generator/rest_server/python/templates/imports.jinja @@ -0,0 +1,12 @@ +{% if mock %} + {% set suffix = '_mock' %} +{% else %} + {% set suffix = '' %} +{% endif %} +from fastapi import FastAPI +from pydantic import BaseModel, Field +from typing import List +from enum import Enum +{% if import_models %}from models import *{% endif %} +{% if import_crud %}from crud{{suffix}} import *{% endif %} +{% if import_openapi %}from open_api import *{% endif %} diff --git a/_archive/openrpc/generator/rest_server/python/templates/internal_actor_method.jinja b/_archive/openrpc/generator/rest_server/python/templates/internal_actor_method.jinja new file mode 100644 index 0000000..53df596 --- /dev/null +++ b/_archive/openrpc/generator/rest_server/python/templates/internal_actor_method.jinja @@ -0,0 +1,9 @@ +from typing import List, Optional, Dict, Union +from enum import Enum + +def {{function_name}}(context, circleguid: int, {{rest_server_generator.get_method_params(method_params)}}) -> {{method_result}}: + #context allows us to see who the user is and which groups the user is + #context also gives a logging feature + #context is linked to 1 circle + #context is linked to a DB (OSIS) + pass \ No newline at end of file diff --git a/_archive/openrpc/generator/rest_server/python/templates/internal_crud_methods.jinja b/_archive/openrpc/generator/rest_server/python/templates/internal_crud_methods.jinja new file mode 100644 index 0000000..b4eca74 --- /dev/null +++ b/_archive/openrpc/generator/rest_server/python/templates/internal_crud_methods.jinja @@ -0,0 +1,11 @@ +def {{variable_name}}_get_internal(circleguid: int, id: str) -> {{type_name}}: + return {{type_name}}() + +def {{variable_name}}_set_internal(circleguid: int, {{variable_name}}: {{type_name}})-> bool: + return True + +def {{variable_name}}_delete_internal(circleguid: int, id: str)-> bool: + ##would use osis to delete this objecc + return True + + diff --git a/_archive/openrpc/generator/rest_server/python/templates/internal_crud_mock_methods.jinja b/_archive/openrpc/generator/rest_server/python/templates/internal_crud_mock_methods.jinja new file mode 100644 index 0000000..6c7bbd1 --- /dev/null +++ b/_archive/openrpc/generator/rest_server/python/templates/internal_crud_mock_methods.jinja @@ -0,0 +1,8 @@ +def {{variable_name}}_get_internal_mock(circleguid: int, id: str) -> {{type_name}}: + return create_example_object({{type_name}}) + +def {{variable_name}}_set_internal_mock(circleguid: int, {{variable_name}}: {{type_name}})-> bool: + return True + +def {{variable_name}}_delete_internal_mock(circleguid: int, id: str)-> bool: + return True diff --git a/_archive/openrpc/generator/rest_server/python/templates/server.jinja b/_archive/openrpc/generator/rest_server/python/templates/server.jinja new file mode 100644 index 0000000..2a94a9d --- /dev/null +++ b/_archive/openrpc/generator/rest_server/python/templates/server.jinja @@ -0,0 +1,5 @@ +import uvicorn +from open_api import app + +if __name__ == "__main__": + uvicorn.run(app, host="0.0.0.0", port=8000) \ No newline at end of file diff --git a/_archive/openrpc/generator/rest_server/vlang/rest_server_generator.py b/_archive/openrpc/generator/rest_server/vlang/rest_server_generator.py new file mode 100644 index 0000000..914796f --- /dev/null +++ b/_archive/openrpc/generator/rest_server/vlang/rest_server_generator.py @@ -0,0 +1,169 @@ +import os +from pathlib import Path +from typing import Dict, List, Optional, Union + +from jinja2 import Environment, FileSystemLoader + +from heroserver.openrpc.generator.actor.vlang.vlang_code_generator import VlangGenerator +from heroserver.openrpc.generator.model_generator import ModelGenerator +from heroserver.openrpc.model.common import ContentDescriptorObject, ReferenceObject +from heroserver.openrpc.model.openrpc_spec import OpenRPCSpec + +script_dir = os.path.dirname(os.path.abspath(__file__)) +env = Environment(loader=FileSystemLoader(script_dir)) + + +class RestServerGenerator: + def __init__( + self, + spec: OpenRPCSpec, + dir: Path, + ) -> None: + self.lang_code_generator = VlangGenerator() + self.model_generator = ModelGenerator(spec, VlangGenerator()) + self.spec = spec + self.dir = dir + self.crud_methods_template = env.get_template("templates/crud_methods.jinja") + self.internal_crud_methods_template = env.get_template("templates/internal_crud_methods.jinja") + self.imports_template = env.get_template("templates/imports.jinja") + self.actor_method_template = env.get_template("templates/actor_method.jinja") + self.internal_actor_method_template = env.get_template("templates/internal_actor_method.jinja") + self.server_template = env.get_template("templates/server.jinja") + + def generate(self): + self.dir.mkdir(parents=True, exist_ok=True) + + self.generate_models() + self.generate_crud() + self.generate_internal_actor_methods() + self.generate_openapi() + self.generate_server() + + print(f"Generated API code has been written to {self.dir}") + + def generate_server(self): + imports = self.imports_template.render(import_vweb=True) + code = self.server_template.render() + + path = self.dir.joinpath("server.v") + with open(path, "w") as file: + file.write(f"{imports}\n\n{code}") + + def generate_openapi(self): + imports = self.imports_template.render(import_vweb=True) + methods = "" + for path_str in self.model_generator.spec.get_root_objects().keys(): + object = self.model_generator.processed_objects[path_str] + if object["code"] == "": + continue + + type_name = object["name"] + variable_name = type_name.lower() + methods += self.crud_methods_template.render(variable_name=variable_name, type_name=type_name) + "\n\n" + + for method in self.spec.methods: + if any(method.name.endswith(end) for end in ["get", "set", "delete"]): + continue + + params: Dict[str, str] = {} + for param in method.params: + params[param.name] = self.model_generator.jsonschema_to_type(["methods", method.name, "params", param.name], param.schema) + + return_type = self.method_result_return_type(["methods", method.name, "result"], method.result) + + function_name = method.name.lower().replace(".", "_") + methods += ( + self.actor_method_template.render( + rest_server_generator=self, + function_name=function_name, + method_params=params, + method_result=return_type, + ) + + "\n\n" + ) + + path = self.dir.joinpath("open_api.v") + with open(path, "w") as file: + file.write(f"{imports}\n\n{methods}") + + def generate_models(self): + imports = self.imports_template.render() + code = self.model_generator.generate_models() + path = self.dir.joinpath("models.v") + + with open(path, "w") as file: + file.write(f"{imports}\n\n{code}\n") + + def generate_crud(self): + imports = self.imports_template.render(import_models=True) + methods = "" + for path_str in self.model_generator.spec.get_root_objects().keys(): + object = self.model_generator.processed_objects[path_str] + if object["code"] == "": + continue + + type_name = object["name"] + variable_name = type_name.lower() + methods += self.internal_crud_methods_template.render(variable_name=variable_name, type_name=type_name) + "\n\n" + + path = self.dir.joinpath("crud.v") + with open(path, "w") as file: + file.write(f"{imports}\n\n{methods}") + + def generate_internal_actor_methods(self): + imports = self.imports_template.render(import_models=True) + for method in self.spec.methods: + function_name = method.name.lower().replace(".", "_") + "_internal" + file_path = self.dir.joinpath(f"{function_name}.v") + if file_path.exists(): + continue + + if any(method.name.endswith(end) for end in ["get", "set", "delete"]): + continue + + params: Dict[str, str] = {} + for param in method.params: + params[param.name] = self.model_generator.jsonschema_to_type(["methods", method.name, "params", param.name], param.schema) + + return_type = self.method_result_return_type(["methods", method.name, "result"], method.result) + + code = self.internal_actor_method_template.render( + rest_server_generator=self, + function_name=function_name, + method_params=params, + method_result=return_type, + ) + + with open(file_path, "w") as file: + file.write(f"{imports}\n\n{code}") + + def get_method_params(self, method_params: Dict[str, str]) -> str: + return ", ".join([f"{param_name} {param_type}" for param_name, param_type in method_params.items()]) + + def method_result_return_type( + self, + path: List[str], + method_result: Optional[Union[ContentDescriptorObject, ReferenceObject]], + ) -> str: + if not method_result: + type_name = "" + + if isinstance(method_result, ContentDescriptorObject): + schema = method_result.schema + type_name = self.model_generator.jsonschema_to_type(path, schema) + + elif isinstance(method_result, ReferenceObject): + type_name = self.model_generator.jsonschema_to_type(path, method_result) + + return type_name + + +if __name__ == "__main__": + from heroserver.openrpc.generator.model_generator import ModelGenerator + from heroserver.openrpc.parser.parser import parser + + data = parser(path="/root/code/git.threefold.info/projectmycelium/hero_server/generatorexamples/example1/specs/storymanager") + + spec_object = OpenRPCSpec.load(data) + server_generator = RestServerGenerator(spec_object, Path("/tmp/rest3")) + server_generator.generate() diff --git a/_archive/openrpc/generator/rest_server/vlang/templates/actor_method.jinja b/_archive/openrpc/generator/rest_server/vlang/templates/actor_method.jinja new file mode 100644 index 0000000..40557a3 --- /dev/null +++ b/_archive/openrpc/generator/rest_server/vlang/templates/actor_method.jinja @@ -0,0 +1,20 @@ +@['/:circleguid/{{function_name}}'; post] +pub fn (mut v_server_app VServerApp) {{ function_name }}(circleguid int) vweb.Result{ + body := json2.raw_decode(v_server_app.req.data)!.as_map() + {% for param_name, param_tpe in method_params.items() %} + {% if rest_server_generator.lang_code_generator.is_primitive(param_type) %} + {{param_name}} := body['{{param_name}}'].{{param_type}}() + {% else %} + {{param_name}} := json2.decode[{{param_type}}](body['{{param_name}}'].json_str()) or { + v_server_app.set_status(400, '') + return v_server_app.text('HTTP 400: Bad Request') + } + {% endif %} + {% endfor %} + res := {{function_name}}_internal(context, circleguid, {{', '.join(method_params.keys())}}) or { + v_server_app.set_status(500, '') + return v_server_app.text('HTTP 500: Internal Server Error') + } + + return v_server_app.json(res) +} \ No newline at end of file diff --git a/_archive/openrpc/generator/rest_server/vlang/templates/crud_methods.jinja b/_archive/openrpc/generator/rest_server/vlang/templates/crud_methods.jinja new file mode 100644 index 0000000..5754956 --- /dev/null +++ b/_archive/openrpc/generator/rest_server/vlang/templates/crud_methods.jinja @@ -0,0 +1,32 @@ +@['/:circleguid/{{variable_name}}_manager/{{variable_name}}/:id'; get] +pub fn (mut v_server_app VServerApp) {{variable_name}}_get(circleguid int, id str) vweb.Result{ + res := {{variable_name}}_get_internal(circleguid, id) or { + v_server_app.set_status(500, '') + return v_server_app.text('HTTP 500: Internal Server Error') + } + return v_server_app.json(res) +} + +@['/:circleguid/{{variable_name}}_manager/{{variable_name}}'; post] +pub fn (mut v_server_app VServerApp) {{variable_name}}_set(circleguid int) vweb.Result{ + {{variable_name}} := json2.decode[{{type_name}}](v_server_app.req.data) or { + v_server_app.set_status(400, '') + return v_server_app.text('HTTP 400: Bad Request') + } + res := {{variable_name}}_set_internal(circleguid, {{variable_name}})or { + v_server_app.set_status(500, '') + return v_server_app.text('HTTP 500: Internal Server Error') + } + + return v_server_app.json(res) +} + +@['/:circleguid/{{variable_name}}_manager/{{variable_name}}/:id'; delete] +pub fn (mut v_server_app VServerApp) {{variable_name}}_delete(circleguid int, id str) vweb.Result{ + ##would use osis to delete this objecc + res := {{variable_name}}_delete_internal(circleguid, id) or { + v_server_app.set_status(500, '') + return v_server_app.text('HTTP 500: Internal Server Error') + } + return v_server_app.json(res) +} \ No newline at end of file diff --git a/_archive/openrpc/generator/rest_server/vlang/templates/imports.jinja b/_archive/openrpc/generator/rest_server/vlang/templates/imports.jinja new file mode 100644 index 0000000..e9e94de --- /dev/null +++ b/_archive/openrpc/generator/rest_server/vlang/templates/imports.jinja @@ -0,0 +1,4 @@ +module main + +import x.json2 +{% if import_vweb %}import vweb{% endif %} diff --git a/_archive/openrpc/generator/rest_server/vlang/templates/internal_actor_method.jinja b/_archive/openrpc/generator/rest_server/vlang/templates/internal_actor_method.jinja new file mode 100644 index 0000000..397f4af --- /dev/null +++ b/_archive/openrpc/generator/rest_server/vlang/templates/internal_actor_method.jinja @@ -0,0 +1,11 @@ +module main + +import freeflowuniverse.crystallib.context + +pub fn {{function_name}}(ctx context.Context, circleguid int, {{rest_server_generator.get_method_params(method_params)}}) !{{method_result}}{ + // context allows us to see who the user is and which groups the user is + // context also gives a logging feature + // context is linked to 1 circle + // context is linked to a DB (OSIS) + return 0 +} \ No newline at end of file diff --git a/_archive/openrpc/generator/rest_server/vlang/templates/internal_crud_methods.jinja b/_archive/openrpc/generator/rest_server/vlang/templates/internal_crud_methods.jinja new file mode 100644 index 0000000..2bb7a26 --- /dev/null +++ b/_archive/openrpc/generator/rest_server/vlang/templates/internal_crud_methods.jinja @@ -0,0 +1,28 @@ +pub fn {{variable_name}}_get_internal(db &backend.Indexer, circleguid int, id string) !{{type_name}}{ + json_str := db.get(id, RootObject{ + name: '{{type_name}}' + })! + + return json.decode({{type_name}}, json_str)! +} + +pub fn {{variable_name}}_set_internal(db &backend.Indexer, circleguid int, id string, {{variable_name}} {{type_name}}) !{ + if id != ''{ + db.set(RootObject{ + id: id + name: '{{type_name}}' + })! + } + + db.new(RootObject{ + name: '{{type_name}}' + })! +} + +pub fn {{variable_name}}_delete_internal(db &backend.Indexer, circleguid int, id string) !{ + db.delete(id, RootObject{ + name: '{{type_name}}' + })! +} + + diff --git a/_archive/openrpc/generator/rest_server/vlang/templates/server.jinja b/_archive/openrpc/generator/rest_server/vlang/templates/server.jinja new file mode 100644 index 0000000..20096ec --- /dev/null +++ b/_archive/openrpc/generator/rest_server/vlang/templates/server.jinja @@ -0,0 +1,73 @@ +module main + +import freeflowuniverse.crystallib.baobab.backend +import freeflowuniverse.crystallib.clients.redisclient +import net.http + +struct RPCServer { +pub mut: + backend &backend.Backend + redis &redisclient.Redis +} + +fn main() { + rpc_server := new_server() or{ + eprintln('failed to create server: ${err}') + exit(1) + } + + rpc_server.listen_and_serve(8000) or { + eprintln('server error: ${err}') + exit(1) + } +} + + +fn new_server() !RPCServer{ + db := start_new_backend_conn()! + redis_client := redisclient.new(['localhost:6379'])! + return RPCServer{ + backend: &db + redis: &redis_client + } +} + +fn (mut s RPCServer) listen_and_serve(port int) !{ + mut server := &http.Server{ + addr: 'localhost:${port}' + handler: s.handler + } + + server.listen_and_serve()! +} + +fn (mut s RPCServer) handler(req http.Request) http.Response{ + if req.method != .post || req.url != '/'{ + return http.Response{ + status: 400 + status_msg: 'Bad Request. invalid method or path' + } + } + + body := req.data + id := id := jsonrpc.decode_request_id(body) or { + return http.Response(status: 400, status_msg: 'Bad Request. Cannot decode request.id ${msg}') + } + method := jsonrpc.jsonrpcrequest_decode_method(body) or { + return http.Response(status: 400, status_msg: 'Bad Request. Cannot decode request.method ${msg}') + } + params_str := jsonrpc.request_params(body) or { + return http.Response(status: 400, status_msg: 'Bad Request. Cannot decode request.params ${msg}') + } + + s.handle_rpc(id, method, params_str) + + response_body := s.redis.brpop([id]) or { + return http.Response(status: 500, status_msg: 'Internal Server Error: Serer timed-out while waiting for a response') + } + + return http.Response{ + status: 200 + body: response_body + } +} \ No newline at end of file diff --git a/_archive/openrpc/generator/server/templates/method.jinja b/_archive/openrpc/generator/server/templates/method.jinja new file mode 100644 index 0000000..8354cbb --- /dev/null +++ b/_archive/openrpc/generator/server/templates/method.jinja @@ -0,0 +1,20 @@ +@['/:{{function_name}}'; post] +pub fn (mut app App) {{ function_name }}() vweb.Result{ + body := json2.raw_decode(app.req.data)!.as_map() + {% for param_name, param_tpe in method_params.items() %} + {% if rest_server_generator.lang_code_generator.is_primitive(param_type) %} + {{param_name}} := body['{{param_name}}'].{{param_type}}() + {% else %} + {{param_name}} := json2.decode[{{param_type}}](body['{{param_name}}'].json_str()) or { + app.set_status(400, 'Bad Request: ${err}') + return v_server_app.text('HTTP 400: Bad Request') + } + {% endif %} + {% endfor %} + res := {{function_name}}_internal({{', '.join(method_params.keys())}}) or { + app.set_status(500, '') + return app.text('HTTP 500: Internal Server Error') + } + + return app.json(res) +} \ No newline at end of file diff --git a/_archive/openrpc/generator/server/templates/method_internal.jinja b/_archive/openrpc/generator/server/templates/method_internal.jinja new file mode 100644 index 0000000..94bad5e --- /dev/null +++ b/_archive/openrpc/generator/server/templates/method_internal.jinja @@ -0,0 +1,6 @@ +module main + + +pub fn {{function_name}}({{server_generator.get_method_params(method_params)}}) !{{method_result}}{ + panic('to be implemented') +} \ No newline at end of file diff --git a/_archive/openrpc/generator/server/templates/server.jinja b/_archive/openrpc/generator/server/templates/server.jinja new file mode 100644 index 0000000..9f4cfbc --- /dev/null +++ b/_archive/openrpc/generator/server/templates/server.jinja @@ -0,0 +1,11 @@ +module main + +import vweb + +struct App {} + +fn main() { + app := &App{} + port := 8080 + vweb.run(app, port) +} diff --git a/_archive/openrpc/generator/server/vlang.py b/_archive/openrpc/generator/server/vlang.py new file mode 100644 index 0000000..3f3048e --- /dev/null +++ b/_archive/openrpc/generator/server/vlang.py @@ -0,0 +1,49 @@ +import os +from pathlib import Path + +from jinja2 import Environment, FileSystemLoader + +from heroserver.openrpc.model.openrpc_spec import OpenRPCSpec + +script_dir = os.path.dirname(os.path.abspath(__file__)) +env = Environment(loader=FileSystemLoader(script_dir)) + + +class ServerGenerator: + def __init__(self, spec: OpenRPCSpec, dir: Path): + self.spec = spec + self.dir = dir + + self.server_template = env.get_template("templates/server.jinja") + + def generate(self): + self.dir.mkdir(parents=True, exist_ok=True) + + self.generate_server() + self.generate_models() + self.generate_methods() + + def generate_server(self): + code = self.server_template.render() + server_file_path = self.dir.joinpath("server.v") + + with open(server_file_path, "w") as file: + file.write(f"{code}") + + def generate_models(): + pass + + def generate_methods(): + pass + + +if __name__ == "__main__": + from heroserver.openrpc.parser.parser import parser + + # from heroserver.openrpc.generator.model_generator import ModelGenerator + + data = parser(path="/root/code/git.threefold.info/hero_server/generatorexamples/mycelium_openrpc.yaml") + + spec_object = OpenRPCSpec.load(data) + server_generator = ServerGenerator(spec_object, Path("/tmp/server3")) + server_generator.generate() diff --git a/_archive/openrpc/model/__init__.py b/_archive/openrpc/model/__init__.py new file mode 100644 index 0000000..071e90f --- /dev/null +++ b/_archive/openrpc/model/__init__.py @@ -0,0 +1,55 @@ +import json + +import yaml # type: ignore + +from heroserver.openrpc.model.openrpc_spec import OpenRPCSpec + +# def decode_openrpc(yaml_string: str) -> dict: +# # TODO: +# pass + + +# def encode_openrpc(openrpc_object: dict) -> str: +# # TODO: +# pass + + +def decode_openrpc_yaml(yaml_string: str) -> OpenRPCSpec: + # Parse YAML string into a Python dict and then convert it into an OpenRPCObject using Pydantic + data = yaml.safe_load(yaml_string) + return OpenRPCSpec.load(data) + + +def encode_openrpc_yaml(openrpc_object: OpenRPCSpec) -> str: + # Convert the OpenRPCObject instance to a dictionary and then dump it to a YAML string + return yaml.dump(openrpc_object.__dict__, sort_keys=False, allow_unicode=True) + + +def decode_openrpc_json(json_string: str) -> OpenRPCSpec: + d = json.loads(json_string) + return OpenRPCSpec.load(d) + + +def encode_openrpc_json(openrpc_object: OpenRPCSpec) -> str: + # Convert the OpenRPCObject instance to a dictionary and then dump it to a JSON string + return json.dumps(openrpc_object, indent=4) + + +# check that the dict is well formatted +def check(openrpc_spec: dict) -> bool: + # todo, try to load the dict in openrpc object + json_spec = json.dumps(openrpc_spec) + try: + decode_openrpc_json(json_spec) + except: + return False + return True + + +if __name__ == "__main__": + from heroserver.openrpc.parser.cleaner import load + from heroserver.openrpc.parser.parser import parser + + openrpc_spec = parser(load("/root/code/git.threefold.info/projectmycelium/hero_server/lib/openrpclib/parser/examples")) + + print(check(openrpc_spec)) diff --git a/_archive/openrpc/model/common.py b/_archive/openrpc/model/common.py new file mode 100644 index 0000000..af3d5ea --- /dev/null +++ b/_archive/openrpc/model/common.py @@ -0,0 +1,329 @@ +from typing import Any, Dict, List, Optional, Union + + +class ReferenceObject: + def __init__(self, ref: str = ""): + self.ref = ref + + @classmethod + def load(cls, data: Dict[str, Any]) -> "ReferenceObject": + return cls(ref=data.get("$ref", "")) + + +class SchemaObject: + def __init__( + self, + title: Optional[str] = None, + multipleOf: Optional[float] = None, + maximum: Optional[float] = None, + exclusiveMaximum: Optional[bool] = None, + minimum: Optional[float] = None, + exclusiveMinimum: Optional[bool] = None, + maxLength: Optional[int] = None, + minLength: Optional[int] = None, + pattern: Optional[str] = None, + maxItems: Optional[int] = None, + minItems: Optional[int] = None, + uniqueItems: Optional[bool] = None, + maxProperties: Optional[int] = None, + minProperties: Optional[int] = None, + required: Optional[List[str]] = None, + enum: Optional[List[Any]] = None, + type: Optional[str] = None, + allOf: Optional[List[Union["SchemaObject", ReferenceObject]]] = None, + oneOf: Optional[List[Union["SchemaObject", ReferenceObject]]] = None, + anyOf: Optional[List[Union["SchemaObject", ReferenceObject]]] = None, + not_: Optional[Union["SchemaObject", ReferenceObject]] = None, + items: Optional[ + Union[ + "SchemaObject", + ReferenceObject, + List[Union["SchemaObject", ReferenceObject]], + ] + ] = None, + properties: Optional[Dict[str, Union["SchemaObject", ReferenceObject]]] = None, + additionalProperties: Optional[Union[bool, "SchemaObject"]] = None, + description: Optional[str] = None, + format: Optional[str] = None, + default: Optional[Any] = None, + xtags: Optional[List[str]] = None, + example: Optional[str] = None, + ): + self.title = title + self.multipleOf = multipleOf + self.maximum = maximum + self.exclusiveMaximum = exclusiveMaximum + self.minimum = minimum + self.exclusiveMinimum = exclusiveMinimum + self.maxLength = maxLength + self.minLength = minLength + self.pattern = pattern + self.maxItems = maxItems + self.minItems = minItems + self.uniqueItems = uniqueItems + self.maxProperties = maxProperties + self.minProperties = minProperties + self.required = required + self.enum = enum + self.type = type + self.allOf = allOf + self.oneOf = oneOf + self.anyOf = anyOf + self.not_ = not_ + self.items = items + self.properties = properties + self.additionalProperties = additionalProperties + self.description = description + self.format = format + self.default = default + self.xtags = xtags + self.example = example + + @classmethod + def load(cls, data: Dict[str, Any]) -> "SchemaObject": + return cls( + title=data.get("title"), + multipleOf=data.get("multipleOf"), + maximum=data.get("maximum"), + exclusiveMaximum=data.get("exclusiveMaximum"), + minimum=data.get("minimum"), + exclusiveMinimum=data.get("exclusiveMinimum"), + maxLength=data.get("maxLength"), + minLength=data.get("minLength"), + pattern=data.get("pattern"), + maxItems=data.get("maxItems"), + minItems=data.get("minItems"), + uniqueItems=data.get("uniqueItems"), + maxProperties=data.get("maxProperties"), + minProperties=data.get("minProperties"), + required=data.get("required"), + enum=data.get("enum"), + type=data.get("type"), + allOf=( + [ + ( + ReferenceObject.load(item) + if "$ref" in item + else SchemaObject.load(item) + ) + for item in data.get("allOf", []) + ] + if "allOf" in data + else None + ), + oneOf=( + [ + ( + ReferenceObject.load(item) + if "$ref" in item + else SchemaObject.load(item) + ) + for item in data.get("oneOf", []) + ] + if "oneOf" in data + else None + ), + anyOf=( + [ + ( + ReferenceObject.load(item) + if "$ref" in item + else SchemaObject.load(item) + ) + for item in data.get("anyOf", []) + ] + if "anyOf" in data + else None + ), + not_=( + ( + ReferenceObject.load(data) + if "$ref" in data + else SchemaObject.load(data) + ) + if "not" in data + else None + ), + items=( + ( + ReferenceObject.load(data["items"]) + if "$ref" in data["items"] + else SchemaObject.load(data["items"]) + ) + if isinstance(data.get("items"), dict) + else ( + [ + ( + ReferenceObject.load(item) + if "$ref" in item + else SchemaObject.load(item) + ) + for item in data.get("items", []) + ] + if "items" in data + else None + ) + ), + properties=( + { + k: ( + ReferenceObject.load(v) if "$ref" in v else SchemaObject.load(v) + ) + for k, v in data.get("properties", {}).items() + } + if "properties" in data + else None + ), + additionalProperties=( + SchemaObject.load(data["additionalProperties"]) + if isinstance(data.get("additionalProperties"), dict) + else data.get("additionalProperties") + ), + description=data.get("description"), + format=data.get("format"), + default=data.get("default"), + xtags=data.get("x-tags"), + example=data.get("example"), + ) + + +class ContentDescriptorObject: + def __init__( + self, + name: str, + schema: Union[SchemaObject, ReferenceObject], + summary: Optional[str] = None, + description: Optional[str] = None, + required: Optional[bool] = None, + deprecated: Optional[bool] = None, + ): + self.name = name + self.summary = summary + self.description = description + self.required = required + self.schema = schema + self.deprecated = deprecated + + @classmethod + def load(cls, data: Dict[str, Any]) -> "ContentDescriptorObject": + return cls( + name=data["name"], + summary=data.get("summary"), + description=data.get("description"), + required=data.get("required"), + schema=( + ReferenceObject.load(data["schema"]) + if "$ref" in data["schema"] + else SchemaObject.load(data["schema"]) + ), + deprecated=data.get("deprecated"), + ) + + +class ExternalDocumentationObject: + def __init__(self, url: str, description: Optional[str] = None): + self.description = description + self.url = url + + @classmethod + def load(cls, data: Dict[str, Any]) -> "ExternalDocumentationObject": + return cls(description=data.get("description"), url=data["url"]) + + +class ExampleObject: + def __init__( + self, + name: str, + summary: Optional[str] = None, + description: Optional[str] = None, + value: Optional[Any] = None, + externalValue: Optional[str] = None, + ): + self.name = name + self.summary = summary + self.description = description + self.value = value + self.externalValue = externalValue + + @classmethod + def load(cls, data: Dict[str, Any]) -> "ExampleObject": + return cls( + name=data["name"], + summary=data.get("summary"), + description=data.get("description"), + value=data.get("value"), + externalValue=data.get("externalValue"), + ) + + +class ErrorObject: + def __init__(self, code: int, message: str, data: Optional[Any] = None): + self.code = code + self.message = message + self.data = data + + @classmethod + def load(cls, data: Dict[str, Any]) -> "ErrorObject": + return cls(code=data["code"], message=data["message"], data=data.get("data")) + + +class ExamplePairingObject: + def __init__( + self, + name: str, + result: Union[ExampleObject, ReferenceObject], + params: List[ExampleObject], + description: Optional[str] = None, + summary: Optional[str] = None, + ): + self.name = name + self.description = description + self.summary = summary + self.params = params + self.result = result + + def get_x() -> Union[str, int]: + a = [1, 2, 3] + b = ["a", "b", "c"] + z = Union() + + @classmethod + def load(cls, data: Dict[str, Any]) -> "ExamplePairingObject": + return cls( + name=data["name"], + description=data.get("description"), + summary=data.get("summary"), + params=[ExampleObject.load(item) for item in data["params"]], + result=( + ExampleObject.load(data["result"]) + if isinstance(data["result"], dict) and "value" in data["result"] + else ReferenceObject.load(data["result"]) + ), + ) + + +class TagObject: + def __init__( + self, + name: str, + summary: Optional[str] = None, + description: Optional[str] = None, + externalDocs: Optional[ExternalDocumentationObject] = None, + ): + self.name = name + self.summary = summary + self.description = description + self.externalDocs = externalDocs + + @classmethod + def load(cls, data: Dict[str, Any]) -> "TagObject": + return cls( + name=data["name"], + summary=data.get("summary"), + description=data.get("description"), + externalDocs=( + ExternalDocumentationObject.load(data["externalDocs"]) + if "externalDocs" in data + else None + ), + ) diff --git a/_archive/openrpc/model/components.py b/_archive/openrpc/model/components.py new file mode 100644 index 0000000..6d4c7b6 --- /dev/null +++ b/_archive/openrpc/model/components.py @@ -0,0 +1,44 @@ +from typing import Any, Dict, Union + +from heroserver.openrpc.model.common import ( + ContentDescriptorObject, + ErrorObject, + ExampleObject, + ExamplePairingObject, + ReferenceObject, + SchemaObject, + TagObject, +) +from heroserver.openrpc.model.server import LinkObject + + +class ComponentsObject: + def __init__( + self, + contentDescriptors: Dict[str, ContentDescriptorObject], + schemas: Dict[str, Union[SchemaObject, ReferenceObject]], + examples: Dict[str, ExampleObject], + links: Dict[str, LinkObject], + errors: Dict[str, ErrorObject], + examplePairingObjects: Dict[str, ExamplePairingObject], + tags: Dict[str, TagObject], + ): + self.contentDescriptors = contentDescriptors + self.schemas = schemas + self.examples = examples + self.links = links + self.errors = errors + self.examplePairingObjects = examplePairingObjects + self.tags = tags + + @classmethod + def load(cls, data: Dict[str, Any]) -> "ComponentsObject": + return cls( + contentDescriptors={k: ContentDescriptorObject.load(v) for k, v in data.get("contentDescriptors", {}).items()}, + schemas={k: ReferenceObject.load(v) if "$ref" in v else SchemaObject.load(v) for k, v in data.get("schemas", {}).items()}, + examples={k: ExampleObject.load(v) for k, v in data.get("examples", {}).items()}, + links={k: LinkObject.load(v) for k, v in data.get("links", {}).items()}, + errors={k: ErrorObject.load(v) for k, v in data.get("errors", {}).items()}, + examplePairingObjects={k: ExamplePairingObject.load(v) for k, v in data.get("examplePairingObjects", {}).items()}, + tags={k: TagObject.load(v) for k, v in data.get("tags", {}).items()}, + ) diff --git a/_archive/openrpc/model/info.py b/_archive/openrpc/model/info.py new file mode 100644 index 0000000..e3b48e6 --- /dev/null +++ b/_archive/openrpc/model/info.py @@ -0,0 +1,56 @@ +from typing import Any, Dict, Optional + + +class ContactObject: + def __init__( + self, + name: Optional[str] = None, + url: Optional[str] = None, + email: Optional[str] = None, + ): + self.name = name + self.url = url + self.email = email + + @classmethod + def load(cls, data: Dict[str, Any]) -> "ContactObject": + return cls(name=data.get("name"), url=data.get("url"), email=data.get("email")) + + +class LicenseObject: + def __init__(self, name: str, url: Optional[str] = None): + self.name = name + self.url = url + + @classmethod + def load(cls, data: Dict[str, Any]) -> "LicenseObject": + return cls(name=data["name"], url=data.get("url")) + + +class InfoObject: + def __init__( + self, + title: str, + version: str, + description: Optional[str] = None, + termsOfService: Optional[str] = None, + contact: Optional[ContactObject] = None, + license: Optional[LicenseObject] = None, + ): + self.title = title + self.description = description + self.termsOfService = termsOfService + self.contact = contact + self.license = license + self.version = version + + @classmethod + def load(cls, data: Dict[str, Any]) -> "InfoObject": + return cls( + title=data["title"], + description=data.get("description"), + termsOfService=data.get("termsOfService"), + contact=ContactObject.load(data["contact"]) if "contact" in data else None, + license=LicenseObject.load(data["license"]) if "license" in data else None, + version=data["version"], + ) diff --git a/_archive/openrpc/model/methods.py b/_archive/openrpc/model/methods.py new file mode 100644 index 0000000..ce7e7fb --- /dev/null +++ b/_archive/openrpc/model/methods.py @@ -0,0 +1,91 @@ +from typing import Any, Dict, List, Optional, Union + +from heroserver.openrpc.model.common import ( + ContentDescriptorObject, + ErrorObject, + ExamplePairingObject, + ExternalDocumentationObject, + ReferenceObject, + TagObject, +) +from heroserver.openrpc.model.server import LinkObject, ServerObject + + +class MethodObject: + def __init__( + self, + name: str, + params: List[Union[ContentDescriptorObject, ReferenceObject]], + result: Union[ContentDescriptorObject, ReferenceObject, None], + tags: Optional[List[Union[TagObject, ReferenceObject]]] = None, + summary: Optional[str] = None, + description: Optional[str] = None, + externalDocs: Optional[ExternalDocumentationObject] = None, + deprecated: Optional[bool] = None, + servers: Optional[List[ServerObject]] = None, + errors: Optional[List[Union[ErrorObject, ReferenceObject]]] = None, + links: Optional[List[Union[LinkObject, ReferenceObject]]] = None, + paramStructure: Optional[str] = None, + examples: Optional[List[ExamplePairingObject]] = None, + ): + self.name = name + self.tags = tags + self.summary = summary + self.description = description + self.externalDocs = externalDocs + self.params = params + self.result = result + self.deprecated = deprecated + self.servers = servers + self.errors = errors + self.links = links + self.paramStructure = paramStructure + self.examples = examples + + @classmethod + def load(cls, data: Dict[str, Any]) -> "MethodObject": + return cls( + name=data["name"], + tags=( + [ + (TagObject.load(item) if isinstance(item, dict) and "name" in item else ReferenceObject.load(item)) + for item in data.get("tags", []) + ] + if "tags" in data + else None + ), + summary=data.get("summary"), + description=data.get("description"), + externalDocs=(ExternalDocumentationObject.load(data["externalDocs"]) if "externalDocs" in data else None), + params=[ + (ContentDescriptorObject.load(item) if isinstance(item, dict) and "name" in item else ReferenceObject.load(item)) + for item in data["params"] + ], + result=( + ContentDescriptorObject.load(data["result"]) + if isinstance(data["result"], dict) and "name" in data["result"] + else ReferenceObject.load(data["result"]) + if "result" in data + else None + ), + deprecated=data.get("deprecated"), + servers=([ServerObject.load(item) for item in data.get("servers", [])] if "servers" in data else None), + errors=( + [ + (ErrorObject.load(item) if isinstance(item, dict) and "code" in item else ReferenceObject.load(item)) + for item in data.get("errors", []) + ] + if "errors" in data + else None + ), + links=( + [ + (LinkObject.load(item) if isinstance(item, dict) and "name" in item else ReferenceObject.load(item)) + for item in data.get("links", []) + ] + if "links" in data + else None + ), + paramStructure=data.get("paramStructure"), + examples=([ExamplePairingObject.load(item) for item in data.get("examples", [])] if "examples" in data else None), + ) diff --git a/_archive/openrpc/model/openrpc_spec.py b/_archive/openrpc/model/openrpc_spec.py new file mode 100644 index 0000000..3893a45 --- /dev/null +++ b/_archive/openrpc/model/openrpc_spec.py @@ -0,0 +1,98 @@ +from typing import Any, Dict, List, Optional, Union + +from heroserver.openrpc.model.common import ( + ContentDescriptorObject, + ExternalDocumentationObject, + ReferenceObject, + SchemaObject, +) +from heroserver.openrpc.model.components import ComponentsObject +from heroserver.openrpc.model.info import InfoObject +from heroserver.openrpc.model.methods import MethodObject +from heroserver.openrpc.model.server import ServerObject + +ROOT_OBJ_DEF = "!!define.root_object" + + +class OpenRPCSpec: + def __init__( + self, + openrpc: str, + info: InfoObject, + methods: List[MethodObject], + servers: Optional[List[ServerObject]] = None, + components: Optional[ComponentsObject] = None, + externalDocs: Optional[ExternalDocumentationObject] = None, + spec_extensions: Optional[Dict[str, Any]] = None, + ): + self.openrpc = openrpc + self.info = info + self.servers = servers + self.methods = methods + self.components = components + self.externalDocs = externalDocs + self.spec_extensions = spec_extensions + + @classmethod + def load(cls, data: Dict[str, Any]) -> "OpenRPCSpec": + return cls( + openrpc=data["openrpc"], + info=InfoObject.load(data["info"]), + servers=([ServerObject.load(item) for item in data.get("servers", [])] if "servers" in data else None), + methods=[MethodObject.load(item) for item in data["methods"]], + components=(ComponentsObject.load(data["components"]) if "components" in data else None), + externalDocs=(ExternalDocumentationObject.load(data["externalDocs"]) if "externalDocs" in data else None), + spec_extensions=data.get("spec_extensions"), + ) + + def ref_to_schema(self, ref: str) -> Union[SchemaObject, ContentDescriptorObject]: + if not ref.startswith("#/"): + raise Exception(f"invalid ref: {ref}") + + l = ref.split("/")[1:] + obj = self + for item in l: + # TODO: find root cause of RO_ + if item.startswith("RO_"): + item = item[3:] + + if isinstance(obj, Dict): + print("obj contents: ", obj) + print("Trying to access key: ", item) + obj = obj[item] + else: + obj = obj.__dict__[item] + + if not isinstance(obj, SchemaObject) and not isinstance(obj, ContentDescriptorObject): + raise Exception(f"ref to unsupported type: {ref}") + + return obj + + def get_root_objects(self) -> Dict[str, SchemaObject]: + if not self.components: + return {} + + objs: Dict[str, SchemaObject] = {} + base_ref = ["components", "schemas"] + for name, scheme in self.components.schemas.items(): + if scheme.xtags and "rootobject" in scheme.xtags: + objs["/".join(base_ref + [name.lower()])] = scheme + + return objs + + def set_root_objects(self, refs: List[str]): + for ref in refs: + obj = self.ref_to_schema(ref) + if isinstance(obj, ContentDescriptorObject): + obj = obj.schema + if isinstance(obj, ReferenceObject): + self.set_root_objects([obj.ref]) + continue + + if not obj.description: + obj.description = ROOT_OBJ_DEF + else: + obj.description += f";{ROOT_OBJ_DEF}" + + +# Note that classes that refer to themselves or each other are handled using string literals in annotations to avoid forward reference issues. Python 3.7+ supports this feature with the use of 'from __future__ import annotations'. diff --git a/_archive/openrpc/model/server.py b/_archive/openrpc/model/server.py new file mode 100644 index 0000000..f3fc5b6 --- /dev/null +++ b/_archive/openrpc/model/server.py @@ -0,0 +1,84 @@ +from typing import Any, Dict, List, Optional, Union + + +class ServerVariableObject: + def __init__( + self, + default: str, + enum: Optional[List[str]] = None, + description: Optional[str] = None, + ): + self.enum = enum + self.default = default + self.description = description + + @classmethod + def load(cls, data: Dict[str, Any]) -> "ServerVariableObject": + return cls( + enum=data.get("enum"), + default=data["default"], + description=data.get("description"), + ) + + +class ServerObject: + def __init__( + self, + name: str, + url: str, + summary: Optional[str] = None, + description: Optional[str] = None, + variables: Optional[Dict[str, ServerVariableObject]] = None, + ): + self.name = name + self.url = url + self.summary = summary + self.description = description + self.variables = variables + + @classmethod + def load(cls, data: Dict[str, Any]) -> "ServerObject": + variables = ( + { + k: ServerVariableObject.load(v) + for k, v in data.get("variables", {}).items() + } + if "variables" in data + else None + ) + return cls( + name=data["name"], + url=data["url"], + summary=data.get("summary"), + description=data.get("description"), + variables=variables, + ) + + +class LinkObject: + def __init__( + self, + name: str, + method: str, + params: Dict[str, Any], + description: Optional[str] = None, + summary: Optional[str] = None, + server: Optional[ServerObject] = None, + ): + self.name = name + self.description = description + self.summary = summary + self.method = method + self.params = params + self.server = server + + @classmethod + def load(cls, data: Dict[str, Any]) -> "LinkObject": + return cls( + name=data["name"], + description=data.get("description"), + summary=data.get("summary"), + method=data["method"], + params=data["params"], + server=ServerObject.load(data["server"]) if "server" in data else None, + ) diff --git a/_archive/openrpc/parser/cleaner.py b/_archive/openrpc/parser/cleaner.py new file mode 100644 index 0000000..ca15bd2 --- /dev/null +++ b/_archive/openrpc/parser/cleaner.py @@ -0,0 +1,88 @@ +import re +import os + + +# remoces pub, mut, non needed code, ... +def cleaner(code: str): + lines = code.split("\n") + processed_lines = [] + in_function = False + in_struct_or_enum = False + + for line in lines: + line = line.replace("\t", " ") + stripped_line = line.strip() + + # Skip lines starting with 'pub mut:' + if re.match(r"^\s*pub\s*(\s+mut\s*)?:", stripped_line): + continue + + # Remove 'pub ' at the start of struct and function lines + if stripped_line.startswith("pub "): + line = line.lstrip()[4:] # Remove leading spaces and 'pub ' + + # Check if we're entering or exiting a struct or enum + if re.match(r"(struct|enum)\s+\w+\s*{", stripped_line): + in_struct_or_enum = True + processed_lines.append(line) + elif in_struct_or_enum and "}" in stripped_line: + in_struct_or_enum = False + processed_lines.append(line) + elif in_struct_or_enum: + # Ensure consistent indentation within structs and enums + processed_lines.append(line) + else: + # Handle function declarations + if "fn " in stripped_line: + if "{" in stripped_line: + # Function declaration and opening brace on the same line + in_function = True + processed_lines.append(line) + else: + return Exception(f"accolade needs to be in fn line.\n{line}") + elif in_function: + if stripped_line == "}": + # Closing brace of the function + in_function = False + processed_lines.append("}") + # Skip all other lines inside the function + else: + processed_lines.append(line) + + return "\n".join(processed_lines) + + +def load(path: str) -> str: + # walk over directory find all .v files, recursive + # ignore all imports (import at start of line) + # ignore all module ... (module at start of line) + path = os.path.expanduser(path) + if not os.path.exists(path): + raise FileNotFoundError(f"The path '{path}' does not exist.") + all_code = [] + # Walk over directory recursively + for root, _, files in os.walk(path): + for file in files: + if file.endswith(".v"): + file_path = os.path.join(root, file) + with open(file_path, "r") as f: + lines = f.readlines() + + # Filter out import and module lines + filtered_lines = [ + line + for line in lines + if not line.strip().startswith(("import", "module")) + ] + + all_code.append("".join(filtered_lines)) + + return "\n\n".join(all_code) + + +if __name__ == "__main__": + # from heroserver.openrpc.parser.example import load_example + code = load("~/code/git.threefold.info/projectmycelium/hero_server/lib/openrpclib/parser/examples") + # Parse the code + code = cleaner(code) + print(code) diff --git a/_archive/openrpc/parser/cleaner.v b/_archive/openrpc/parser/cleaner.v new file mode 100644 index 0000000..2685754 --- /dev/null +++ b/_archive/openrpc/parser/cleaner.v @@ -0,0 +1,92 @@ +module main + +import os +import regex as re + +// Removes pub, mut, unneeded code, etc. +fn cleaner(code string) string { + lines := code.split_into_lines() + mut processed_lines := []string{} + mut in_function := false + mut in_struct_or_enum := false + + for line in lines { + line = line.replace('\t', ' ') + stripped_line := line.trim_space() + + // Skip lines starting with 'pub mut:' + if stripped_line.starts_with('pub mut:') { + continue + } + + // Remove 'pub ' at the start of struct and function lines + if stripped_line.starts_with('pub ') { + line = line.trim_left()[4..] // Remove leading spaces and 'pub ' + } + + // Check if we're entering or exiting a struct or enum + mut r := re.regex_opt(r'(struct|enum)\s+\w+\s*{') or { panic(err) } + if r.matches_string(stripped_line) { + in_struct_or_enum = true + processed_lines << line + } else if in_struct_or_enum && '}' in stripped_line { + in_struct_or_enum = false + processed_lines << line + } else if in_struct_or_enum { + // Ensure consistent indentation within structs and enums + processed_lines << line + } else { + // Handle function declarations + r = re.regex_opt(r'fn\s+\w+') or { panic(err) } + if r.matches_string(stripped_line) { + if '{' in stripped_line { + // Function declaration and opening brace on the same line + in_function = true + processed_lines << line + } else { + return error('accolade needs to be in fn line.\n${line}') + } + } else if in_function { + if stripped_line == '}' { + // Closing brace of the function + in_function = false + processed_lines << '}' + } + // Skip all other lines inside the function + } else { + processed_lines << line + } + } + } + + return processed_lines.join('\n') +} + +fn load(path string) !string { + // Walk over directory, find all .v files recursively. + // Ignore all imports (import at start of line) + // Ignore all module ... (module at start of line) + path = os.expand_env(path) + if !os.exists(path) { + panic('The path "${path}" does not exist.') + } + // Walk over directory recursively + os.walk_ext(path, '.v', fn (path string, _ []os.FileInfo) { + t+=process_file(path)! +} + +fn process_file(file_path string) !string { + lines := os.read_lines(file_path) or { return err } + // Filter out import and module lines + filtered_lines := lines.filter(it !in ['import', 'module'].map(it.trim_space())) + + return filtered_lines.join('\n') +} + +fn main() { + // from heroserver.openrpc.parser.example import load_example + code := load('~/code/git.threefold.info/hero/hero_server/lib/openrpclib/parser/examples') + // Parse the code + code = cleaner(code)! + println(code) +} diff --git a/_archive/openrpc/parser/example.py b/_archive/openrpc/parser/example.py new file mode 100644 index 0000000..05c9d31 --- /dev/null +++ b/_archive/openrpc/parser/example.py @@ -0,0 +1,27 @@ +import os +import sys + + +def load_example() -> str: + # Start from the current working directory + current_dir = os.path.dirname(os.path.abspath(__file__)) + + examples_dir = os.path.join(current_dir, "examples") + + examples = "" + if os.path.isdir(examples_dir): + examples = load_v_files(examples_dir) + + return examples + + +def load_v_files(path: str) -> str: + examples = "" + for entry in os.listdir(path): + if os.path.isdir(entry): + examples += load_v_files(entry) + "\n\n" + elif entry.endswith(".v"): + with open(entry, "r") as file: + examples += file.read() + "\n" + + return examples diff --git a/_archive/openrpc/parser/includes.py b/_archive/openrpc/parser/includes.py new file mode 100644 index 0000000..24311d7 --- /dev/null +++ b/_archive/openrpc/parser/includes.py @@ -0,0 +1,107 @@ +import os + +def includes_process_text(text): + lines = text.split('\n') + result = {} + current_block = None + current_content = [] + + for line in lines: + stripped_line = line.strip() + if stripped_line.startswith('<') and stripped_line.endswith('>') and not stripped_line.startswith(' after a block.\n{line}") + # result[current_block.upper()] = '\n'.join(current_content).rstrip() + current_block = stripped_line[1:-1] # Remove '<' and '>' + current_content = [] + elif stripped_line == '': + if current_block: + result[current_block] = '\n'.join(current_content).rstrip() + current_block = None + current_content = [] + elif current_block is not None: + current_content.append(line) + + if current_block: + raise Exception(f"should not come here, there needs to be after a block.\n{line}") + result[current_block] = '\n'.join(current_content).rstrip() + + return result + +def include_process_directory(path): + path = os.path.expanduser(path) + if not os.path.exists(path): + raise FileNotFoundError(f"The path '{path}' does not exist.") + all_blocks = {} + for root, dirs, files in os.walk(path): + for file in files: + if file.startswith('include_'): + file_path = os.path.join(root, file) + print(f" -- include {file_path}") + with open(file_path, 'r') as f: + content = f.read() + blocks = includes_process_text(content) + all_blocks.update(blocks) + return all_blocks + +def include_process_text(input_text, block_dict): + lines = input_text.split('\n') + result_lines = [] + + for line in lines: + stripped_line = line.strip() + if stripped_line.startswith('//include<') and stripped_line.endswith('>'): + key = stripped_line[10:-1].upper() # Extract and uppercase the key + if key in block_dict: + # Include the block exactly as it is in the dictionary + result_lines.append(block_dict[key]) + else: + result_lines.append(f"// ERROR: Block '{key}' not found in dictionary") + else: + result_lines.append(line) + + return '\n'.join(result_lines) + +if __name__ == "__main__": + # Example usage + input_text = """ + + oid string //is unique id for user in a circle, example=a7c * + name string //short name for swimlane' + time_creation int //time when signature was created, in epoch example=1711442827 * + comments []string //list of oid's of comments linked to this story + + + +this is my name, one line only + +""" + + #parsed_blocks = include_parse_blocks(input_text) + + includes_dict = include_process_directory("~/code/git.threefold.info/projectmycelium/hero_server/lib/openrpclib/parser/examples") + + for key, value in includes_dict.items(): + print(f"{key}:") + print(value) + print() # Add a blank line between blocks for readability + + input_text = ''' +//we didn't do anything for comments yet +// +//this needs to go to description in openrpc spec +// +@[rootobject] +struct Story { + //include + content string //description of the milestone example="this is example content which gives more color" * + owners []string //list of users (oid) who are the owners of this project example="10a,g6,aa1" * + notifications []string //list of users (oid) who want to be informed of changes of this milestone example="ad3" + deadline int //epoch deadline for the milestone example="1711442827" * + projects []string //link to a projects this story belongs too + milestones []string //link to the mulestones this story belongs too +} +''' + + result = include_process_text(input_text, includes_dict) + print(result) \ No newline at end of file diff --git a/_archive/openrpc/parser/parser.py b/_archive/openrpc/parser/parser.py new file mode 100644 index 0000000..4caaed2 --- /dev/null +++ b/_archive/openrpc/parser/parser.py @@ -0,0 +1,245 @@ +import json +import re +from typing import List, Tuple + +import yaml # type: ignore + +from heroserver.openrpc.parser.cleaner import cleaner, load +from heroserver.openrpc.parser.includes import include_process_directory, include_process_text, includes_process_text +from heroserver.openrpc.parser.splitter import CodeType, splitter + +# use https://regex101.com/ + + +def parse_field_description(field_description): + # Initialize the result dictionary + result = {"description": "", "index": False, "example": None} + + # Check if the field is indexed + if field_description.strip().endswith("*"): + result["index"] = True + field_description = field_description.strip()[:-1].strip() + + # Split the description and example + parts = field_description.split("example=", 1) + + # Set the description + result["description"] = parts[0].strip() + + # Extract the example if it exists + if len(parts) > 1: + example_value = parts[1].strip() + if example_value.startswith("[") and example_value.endswith("]"): + result["example"] = json.loads(example_value) + elif example_value.isdigit(): + result["example"] = int(example_value) + else: + example_match = re.search(r'["\'](.+?)["\']', example_value) + if example_match: + result["example"] = example_match.group(1) + + return result + + +def parse_struct(struct_def): + struct_name = re.search(r"struct (\w+)", struct_def).group(1) + fields = re.findall(r"\s+(\w+)\s+([\w\[\]]+)(?:\s*\/\/(.+))?", struct_def) + return struct_name, fields + + +def parse_enum(enum_def): + enum_name = re.search(r"enum (\w+)", enum_def).group(1) + values = re.findall(r"\n\s+(\w+)", enum_def) + return enum_name, values + + +def parse_function(func_def): + # Match the function signature + match = re.search(r"fn (\w+)\((.*?)\)\s*(!?\w*)", func_def) + if match: + func_name = match.group(1) + params_str = match.group(2).strip() + return_type = match.group(3).strip() + + if return_type.startswith("RO_"): + return_type = return_type[3:] + if return_type.startswith("!RO_"): + return_type = return_type[4:] + if return_type.startswith("?RO_"): + return_type = return_type[4:] + + # print(f" -- return type: {return_type}") + + # Parse parameters + params = [] + if params_str: + # This regex handles parameters with or without type annotations + param_pattern = re.compile(r"(\w+)(?:\s+(\w+))?") + for param_match in param_pattern.finditer(params_str): + param_name, param_type = param_match.groups() + if param_type.startswith("RO_"): + param_type = param_type[3:] + params.append((param_name, param_type if param_type else None)) + + return func_name, params, return_type + return None, None, None + + +def get_type_schema(type_name): + if type_name.startswith("[]"): + item_type = type_name[2:] + return {"type": "array", "items": get_type_schema(item_type)} + elif type_name in ["string"]: + return {"type": "string"} + elif type_name in ["f64", "float", "f32", "f16"]: + return {"type": "number"} + elif type_name in ["int"]: + return {"type": "integer"} + elif type_name == "bool": + return {"type": "boolean"} + elif type_name == "": + return {"type": "null"} + else: + return {"$ref": f"#/components/schemas/{type_name}"} + + +def parser(code: str = "", path: str = "") -> dict: + if len(code) > 0 and len(path) > 0: + raise Exception("cannot have code and path filled in at same time") + if len(path) > 0: + code = load(path) + includes_dict = include_process_directory(path) + else: + includes_dict = includes_process_text(path) + + openrpc_spec = { + "openrpc": "1.2.6", + "info": {"title": "V Code API", "version": "1.0.0"}, + "methods": [], + "components": {"schemas": {}}, + } + + # this function just cleans the code so we have a proper input for the parser + code = cleaner(code) + + # this function is a pre-processor, it finds include blocks and adds them in + code = include_process_text(code, includes_dict) + + codeblocks = splitter(code) + + structs: List[Tuple[dict, List[str]]] = list() + enums = list() + functions = list() + + for item in codeblocks: + if item["type"] == CodeType.STRUCT: + structs.append((item["block"], item["comments"])) + if item["type"] == CodeType.ENUM: + enums.append((item["block"], item["comments"])) + if item["type"] == CodeType.FUNCTION: + functions.append((item["block"], item["comments"])) + + # Process structs and enums + for item in structs: + struct_name, fields = parse_struct(item[0]) + rootobject = False + if struct_name.startswith("RO_"): + rootobject = True + struct_name = struct_name[3:] + + openrpc_spec["components"]["schemas"][struct_name] = { + "type": "object", + "properties": {}, + } + + for field in fields: + field_name, field_type, field_description = field + parsed_description = parse_field_description(field_description) + + field_schema = { + **get_type_schema(field_type), + "description": parsed_description["description"], + } + + if parsed_description["example"]: + field_schema["example"] = parsed_description["example"] + + if parsed_description["index"]: + field_schema["x-tags"] = field_schema.get("x-tags", []) + ["indexed"] + + openrpc_spec["components"]["schemas"][struct_name]["properties"][field_name] = field_schema + + if rootobject: + openrpc_spec["components"]["schemas"][struct_name]["x-tags"] = ["rootobject"] + + functions.append((f"fn {struct_name.lower()}_get(id string) {struct_name}", [])) + functions.append((f"fn {struct_name.lower()}_set(obj {struct_name})", [])) + functions.append((f"fn {struct_name.lower()}_delete(id string)", [])) + + for item in enums: + enum_name, values = parse_enum(item[0]) + openrpc_spec["components"]["schemas"][enum_name] = { + "type": "string", + "enum": values, + } + + # print(functions) + # from IPython import embed; embed() + # Process functions + for item in functions: + func_name, params, return_type = parse_function(item[0]) + print(f"debugzooo {func_name} {params}") + if return_type: + return_type = return_type.lstrip("!") + else: + return_type = "" + + if func_name: + descr_return = f"Result of the {func_name} function is {return_type}" + descr_function = f"Executes the {func_name} function" + if len(item[1]) > 0: + if isinstance(item[1], list): + descr_function = "\n".join(item[1]) + else: + descr_function = "\n".join(str(element) for element in item[1:]) + method = { + "name": func_name, + "description": descr_function, + "params": [], + "result": { + "name": "result", + "description": descr_return, + "schema": get_type_schema(return_type), + }, + } + for param in params: + # from IPython import embed; embed() + if len(param) == 2: + param_name, param_type = param + method["params"].append( + { + "name": param_name, + "description": f"Parameter {param_name} of type {param_type}", + "schema": get_type_schema(param_type), + } + ) + openrpc_spec["methods"].append(method) # do it in the openrpc model + + return openrpc_spec + + +if __name__ == "__main__": + openrpc_spec = parser(path="~/code/git.threefold.info/projectmycelium/hero_server/generatorexamples/example1/specs") + out = json.dumps(openrpc_spec, indent=2) + # print(out) + + filename = "/tmp/openrpc_spec.json" + # Write the spec to the file + with open(filename, "w") as f: + f.write(out) + print(f"OpenRPC specification (JSON) has been written to: {filename}") + + yaml_filename = "/tmp/openrpc_spec.yaml" + with open(yaml_filename, "w") as f: + yaml.dump(openrpc_spec, f, sort_keys=False) + print(f"OpenRPC specification (YAML) has been written to: {yaml_filename}") diff --git a/_archive/openrpc/parser/splitter.py b/_archive/openrpc/parser/splitter.py new file mode 100644 index 0000000..230ec71 --- /dev/null +++ b/_archive/openrpc/parser/splitter.py @@ -0,0 +1,80 @@ +from enum import Enum + +from heroserver.openrpc.parser.cleaner import cleaner + + +class CodeType(Enum): + STRUCT = "struct" + ENUM = "enum" + FUNCTION = "function" + + +def splitter(code: str): + lines = code.split("\n") + result = [] + current_block = None + current_comments = [] + + for line in lines: + line = line.replace("\t", " ") + stripped_line = line.strip() + + if stripped_line.startswith("//"): + current_comments.append(stripped_line[2:].strip()) + elif stripped_line.startswith("struct "): + if current_block: + result.append(current_block) + current_block = { + "type": CodeType.STRUCT, + "comments": current_comments, + "block": line, + } + current_comments = [] + elif stripped_line.startswith("enum "): + if current_block: + result.append(current_block) + current_block = { + "type": CodeType.ENUM, + "comments": current_comments, + "block": line, + } + current_comments = [] + elif stripped_line.startswith("fn "): + if current_block: + result.append(current_block) + current_block = { + "type": CodeType.FUNCTION, + "comments": current_comments, + "block": line.split("{")[0].strip(), + } + current_comments = [] + elif current_block: + if current_block["type"] == CodeType.STRUCT and stripped_line == "}": + current_block["block"] += "\n" + line + result.append(current_block) + current_block = None + elif current_block["type"] == CodeType.ENUM and stripped_line == "}": + current_block["block"] += "\n" + line + result.append(current_block) + current_block = None + elif current_block["type"] in [CodeType.STRUCT, CodeType.ENUM]: + current_block["block"] += "\n" + line + + if current_block: + result.append(current_block) + + return result + + +if __name__ == "__main__": + from heroserver.openrpc.parser.cleaner import load + + code = load("/root/code/git.threefold.info/projectmycelium/hero_server/lib/openrpclib/parser/examples") + code = cleaner(code) + # Test the function + parsed_code = splitter(code) + for item in parsed_code: + print(f"Type: {item['type']}") + print(f"Comments: {item['comments']}") + print(f"Block:\n{item['block']}") + print("-" * 50) diff --git a/_archive/openrpc/readme.md b/_archive/openrpc/readme.md new file mode 100644 index 0000000..1e58ea1 --- /dev/null +++ b/_archive/openrpc/readme.md @@ -0,0 +1,19 @@ +## example how to use + +```python + +from openrpc import openrpc_spec_write + +#load all the specs and write the result in a dir +openrpc_spec = openrpc_spec_write( + path="~/code/git.threefold.info/projectmycelium/hero_server/generatorexamples/example1/specs" + dest="/tmp/openrpc/example1" +) + +``` + +## internal process + +- first we clean the code to only have relevant parts +- then we find the blocks, can be function, enum or struct +- then we parse the blocks diff --git a/_archive/openrpc/tools/__init__.py b/_archive/openrpc/tools/__init__.py new file mode 100644 index 0000000..76ebb17 --- /dev/null +++ b/_archive/openrpc/tools/__init__.py @@ -0,0 +1,3 @@ +from .tools_py import create_example_object, get_pydantic_type, get_return_type, topological_sort + +__all__ = ["get_pydantic_type", "get_return_type", "topological_sort", "create_example_object"] diff --git a/_archive/openrpc/tools/py.typed b/_archive/openrpc/tools/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/_archive/openrpc/tools/test_imports.py b/_archive/openrpc/tools/test_imports.py new file mode 100644 index 0000000..d5d666e --- /dev/null +++ b/_archive/openrpc/tools/test_imports.py @@ -0,0 +1,6 @@ +from heroserver.openrpc.tools import get_pydantic_type + +# Simple test schema +test_schema = {"type": "string", "format": "email"} +result = get_pydantic_type(test_schema) +print(f"Test passed: {result == 'Email'}") diff --git a/_archive/openrpc/tools/tools_py.py b/_archive/openrpc/tools/tools_py.py new file mode 100644 index 0000000..a7d601c --- /dev/null +++ b/_archive/openrpc/tools/tools_py.py @@ -0,0 +1,128 @@ +from inspect import isclass +from typing import Any, Dict, List + +from pydantic import BaseModel + + +def get_pydantic_type(schema: Dict[str, Any]) -> str: + """ + Convert OpenRPC schema types to Pydantic types. + + Args: + schema: OpenRPC schema dictionary + + Returns: + String representation of the Pydantic type + """ + if "type" in schema: + if schema["type"] == "string": + if "format" in schema and schema["format"] == "email": + return "Email" + return "str" + elif schema["type"] == "integer": + return "int" + elif schema["type"] == "array": + items_type = get_pydantic_type(schema["items"]) + return f"List[{items_type}]" + elif schema["type"] == "object": + return "dict" + elif schema["type"] == "boolean": + return "bool" + elif schema["type"] == "null": + return "None" + elif "$ref" in schema: + ref_name = schema["$ref"].split("/")[-1] + return ref_name + elif "anyOf" in schema: + types = [get_pydantic_type(sub_schema) for sub_schema in schema["anyOf"]] + if "None" in types: + # Remove 'None' from the types list + types = [t for t in types if t != "None"] + if len(types) == 1: + return f"Optional[{types[0]}]" + else: + return f"Optional[Union[{', '.join(types)}]]" + else: + return f"Union[{', '.join(types)}]" + + return "Any" + + +def get_return_type(method_result: Dict[str, Any]) -> str: + """ + Get the return type from a method result schema. + + Args: + method_result: Method result dictionary containing schema or $ref + + Returns: + String representation of the return type + """ + if "schema" in method_result: + schema = method_result["schema"] + if "type" in schema: + return get_pydantic_type(schema) + elif "$ref" in schema: + ref_name = schema["$ref"].split("/")[-1] + return ref_name + elif "anyOf" in schema: + schema_list = schema["anyOf"] + if isinstance(schema_list, list): + return " | ".join(get_pydantic_type(sub_schema) for sub_schema in schema_list) + return "Any" + elif "$ref" in method_result: # Handle $ref at the top level + ref_path = method_result["$ref"] + if isinstance(ref_path, str): + return ref_path.split("/")[-1] + return "" + + +def topological_sort(schema_dict: Dict[str, Any]) -> List[str]: + visited = set() + stack = [] + sorted_classes = [] + + def dfs(class_name: str) -> None: + visited.add(class_name) + if class_name in schema_dict: + for prop in schema_dict[class_name].get("properties", {}).values(): + if "$ref" in prop: + ref_name = prop["$ref"].split("/")[-1] + if ref_name not in visited: + dfs(ref_name) + stack.append(class_name) + + for class_name in schema_dict: + if class_name not in visited: + dfs(class_name) + + while stack: + sorted_classes.append(stack.pop()) + + return sorted_classes + + +def create_example_object(cls: type[BaseModel]) -> BaseModel: + """ + Create an example object from a Pydantic model class using field examples. + + Args: + cls: A Pydantic BaseModel class + + Returns: + An instance of the provided model class with example data + + Raises: + ValueError: If cls is not a valid Pydantic BaseModel class + """ + if not isclass(cls) or not issubclass(cls, BaseModel): + raise ValueError(f"{cls} is not a valid pydantic BaseModel class.") + + example_data = {} + + for field_name, field_info in cls.model_fields.items(): + examples = field_info.examples + if examples: + example_data[field_name] = examples[0] + + return cls(**example_data) diff --git a/_archive/osis/__init__.py b/_archive/osis/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/_archive/osis/base.py b/_archive/osis/base.py new file mode 100644 index 0000000..3626543 --- /dev/null +++ b/_archive/osis/base.py @@ -0,0 +1,552 @@ +import datetime +import os +import yaml +import uuid +import json +import hashlib +from typing import TypeVar, Generic, List, Optional +from pydantic import BaseModel, StrictStr, Field +from sqlalchemy.ext.declarative import declarative_base +from osis.datatools import normalize_email, normalize_phone +from sqlalchemy import ( + create_engine, + Column, + Integer, + String, + DateTime, + TIMESTAMP, + func, + Boolean, + Date, + inspect, + text, + bindparam, +) +from sqlalchemy.orm import sessionmaker +from sqlalchemy.dialects.postgresql import JSONB, JSON +import logging +from termcolor import colored +from osis.db import DB, DBType # type: ignore + +# Set up logging +logging.basicConfig(level=logging.INFO, format="%(message)s") +logger = logging.getLogger(__name__) + + +def calculate_months( + investment_date: datetime.date, conversion_date: datetime.date +) -> float: + delta = conversion_date - investment_date + days_in_month = 30.44 + months = delta.days / days_in_month + return months + + +def indexed_field(cls): + cls.__index_fields__ = dict() + for name, field in cls.__fields__.items(): + if field.json_schema_extra is not None: + for cat in ["index", "indexft", "indexphone", "indexemail", "human"]: + if field.json_schema_extra.get(cat, False): + if name not in cls.__index_fields__: + cls.__index_fields__[name] = dict() + # print(f"{cls.__name__} found index name:{name} cat:{cat}") + cls.__index_fields__[name][cat] = field.annotation + if cat in ["indexphone", "indexemail"]: + cls.__index_fields__[name]["indexft"] = field.annotation + + return cls + + +@indexed_field +class MyBaseModel(BaseModel): + id: str = Field(default_factory=lambda: str(uuid.uuid4())) + name: StrictStr = Field(default="", index=True, human=True) + description: StrictStr = Field(default="") + lasthash: StrictStr = Field(default="") + creation_date: int = Field( + default_factory=lambda: int(datetime.datetime.now().timestamp()) + ) + mod_date: int = Field( + def ault_factory=lambda: int(datetime.datetime.now().timestamp()) + ) + + def pre_save(self): + self.mod_date = int(datetime.datetime.now().timestamp()) + print("pre-save") + # for fieldname, typedict in self.__class__.__index_fields__.items(): + # v= self.__dict__[fieldname] + # if 'indexphone' in typedict: + # self.__dict__[fieldname]=[normalize_phone(i) for i in v.split(",")].uniq() + # if 'indexemail' in typedict: + # self.__dict__[fieldname]=[normalize_email(i) for i in v.split(",")].uniq() + + # return ",".join(emails) + + # print(field) + # #if field not in ["id", "name","creation_date", "mod_date"]: + # from IPython import embed; embed() + + def yaml_get(self) -> str: + data = self.dict() + return yaml.dump(data, sort_keys=True, default_flow_style=False) + + def json_get(self) -> str: + data = self.dict() + # return self.model_dump_json() + return json.dumps(data, sort_keys=True, indent=2) + + def hash(self) -> str: + data = self.dict() + data.pop("lasthash") + data.pop("mod_date") + data.pop("creation_date") + data.pop("id") + yaml_string = yaml.dump(data, sort_keys=True, default_flow_style=False) + # Encode the YAML string to bytes using UTF-8 encoding + yaml_bytes = yaml_string.encode("utf-8") + self.lasthash = hashlib.md5(yaml_bytes).hexdigest() + return self.lasthash + + def doc_id(self, partition: str) -> str: + return f"{partition}:{self.id}" + + def __str__(self): + return self.json_get() + + +T = TypeVar("T", bound=MyBaseModel) + + +class MyBaseFactory(Generic[T]): + def __init__( + self, + model_cls: type[T], + db: DB, + use_fs: bool = True, + keep_history: bool = False, + reset: bool = False, + load: bool = False, + human_readable: bool = True, + ): + self.mycat = model_cls.__name__.lower() + self.description = "" + self.model_cls = model_cls + self.engine = create_engine(db.cfg.url()) + self.Session = sessionmaker(bind=self.engine) + self.use_fs = use_fs + self.human_readable = human_readable + self.keep_history = keep_history + self.db = db + dbcat = db.dbcat_new(cat=self.mycat, reset=reset) + self.db_cat = dbcat + self.ft_table_name = f"{self.mycat}_ft" + + self._init_db_schema(reset=reset) + + if self.use_fs: + self._check_db_schema() + else: + if not self._check_db_schema_ok(): + raise RuntimeError( + "DB schema changed in line to model used, need to find ways how to migrate" + ) + + if reset: + self.db_cat.reset() + self._reset_db() + + if load: + self.load() + + def _reset_db(self): + logger.info(colored("Resetting database...", "red")) + with self.engine.connect() as connection: + cascade = "" + if self.db.cfg.db_type == DBType.POSTGRESQL: + cascade = " CASCADE" + connection.execute(text(f'DROP TABLE IF EXISTS "{self.mycat}"{cascade}')) + if self.keep_history: + connection.execute( + text(f'DROP TABLE IF EXISTS "{self.mycat}_history" {cascade}') + ) + connection.commit() + self._init_db_schema() + + def _init_db_schema(self, reset: bool = False): + # first make sure table is created if needed + inspector = inspect(self.engine) + if inspector.has_table(self.mycat): + if reset: + self._reset_db() + return + print(f"Table {self.mycat} does exist.") + + Base = declarative_base() + + def create_model(tablename): + class MyModel(Base): + __tablename__ = tablename + id = Column(String, primary_key=True) + name = Column(String, index=True) + creation_date = Column(Integer, index=True) + mod_date = Column(Integer, index=True) + hash = Column(String, index=True) + data = Column(JSON) + version = Column(Integer) + index_fields = self.model_cls.__index_fields__ + for field, index_types in index_fields.items(): + if "index" in index_types: + field_type = index_types["index"] + if field not in ["id", "name", "creation_date", "mod_date"]: + if field_type == int: + locals()[field] = Column(Integer, index=True) + elif field_type == datetime.date: + locals()[field] = Column(Date, index=True) + elif field_type == bool: + locals()[field] = Column(Boolean, index=True) + else: + locals()[field] = Column(String, index=True) + + create_model_ft() + return MyModel + + def create_model_ft(): + index_fields = self.model_cls.__index_fields__ + toindex: List[str] = [] + for fieldnam, index_types in index_fields.items(): + print(f"field name: {fieldnam}") + print(f"toindex: {toindex}") + if "indexft" in index_types: + toindex.append(fieldnam) + if len(toindex) > 0: + with self.engine.connect() as connection: + result = connection.execute( + text( + "SELECT name FROM sqlite_master WHERE type='table' AND name=:table_name" + ), + {"table_name": self.ft_table_name}, + ) + if result.fetchone() is None: + # means table does not exist + st = text( + "CREATE VIRTUAL TABLE :table_name USING fts5(:fields)" + ) + st = st.bindparams(bindparam("fields", expanding=True)) + st = st.bindparams( + table_name=self.ft_table_name, fields=toindex + ) + # TODO: this is not working + connection.execute( + st, + { + "table_name": self.ft_table_name, + "fields": toindex, + }, + ) + + self.table_model = create_model(self.mycat) + + if self.keep_history: + self.history_table_model = create_model( + "HistoryTableModel", f"{self.mycat}_history" + ) + + Base.metadata.create_all(self.engine) + + def _check_db_schema_ok(self) -> bool: + + inspector = inspect(self.engine) + table_name = self.table_model.__tablename__ + + # Get columns from the database + db_columns = {col["name"]: col for col in inspector.get_columns(table_name)} + + # Get columns from the model + model_columns = {c.name: c for c in self.table_model.__table__.columns} + + # print("model col") + # print(model_columns) + + # Check for columns in model but not in db + for col_name, col in model_columns.items(): + if col_name not in db_columns: + logger.info( + colored( + f"Column '{col_name}' exists in model but not in database", + "red", + ) + ) + return False + else: + # Check column type + db_col = db_columns[col_name] + if str(col.type) != str(db_col["type"]): + logger.info( + colored( + f"Column '{col_name}' type mismatch: Model {col.type}, DB {db_col['type']}", + "red", + ) + ) + return False + + # Check for columns in db but not in model + for col_name in db_columns: + if col_name not in model_columns: + logger.info( + colored( + f"Column '{col_name}' exists in database but not in model", + "red", + ) + ) + return False + return True + + def _check_db_schema(self): + # check if schema is ok, if not lets reload + if self._check_db_schema_ok(): + return + self.load() + + def new(self, name: str = "", **kwargs) -> T: + o = self.model_cls(name=name, **kwargs) + return o + + def _encode(self, item: T) -> dict: + return item.model_dump() + + def _decode(self, data: str) -> T: + if self.use_fs: + return self.model_cls(**yaml.load(data, Loader=yaml.Loader)) + else: + return self.model_cls(**json.loads(data)) + + def get(self, id: str = "") -> T: + if not isinstance(id, str): + raise ValueError(f"id needs to be str. Now: {id}") + session = self.Session() + result = session.query(self.table_model).filter_by(id=id).first() + session.close() + if result: + if self.use_fs: + data = self.db_cat.get(id=id) + else: + data = result.data + return self._decode(data) + raise ValueError(f"can't find {self.mycat}:{id}") + + def exists(self, id: str = "") -> bool: + if not isinstance(id, str): + raise ValueError(f"id needs to be str. Now: {id}") + session = self.Session() + result = session.query(self.table_model).filter_by(id=id).first() + session.close() + return result is not None + + def get_by_name(self, name: str) -> Optional[T]: + r = self.list(name=name) + if len(r) > 1: + raise ValueError(f"found more than 1 object with name {name}") + if len(r) < 1: + raise ValueError(f"object not found with name {name}") + return r[0] + + def set(self, item: T, ignorefs: bool = False): + + item.pre_save() + new_hash = item.hash() + + session = self.Session() + db_item = session.query(self.table_model).filter_by(id=item.id).first() + data = item.model_dump() + + index_fields = self.model_cls.__index_fields__ + to_ft_index = List[str] + ft_field_values = [f"'{db_item.id}'"] + for field_name, index_types in index_fields.items(): + if "indexft" in index_types: + to_ft_index.append(field_name) + ft_field_values.append(f"'{db_item[field_name]}'") + + if db_item: + if db_item.hash != new_hash: + db_item.name = item.name + db_item.mod_date = item.mod_date + db_item.creation_date = item.creation_date + db_item.hash = new_hash + if not self.use_fs: + db_item.data = data + + # Update indexed fields + for field, val in self.model_cls.__indexed_fields__: # type: ignore + if field not in ["id", "name", "creation_date", "mod_date"]: + if "indexft" in val: + session.execute( + f"UPDATE {self.ft_table_name} SET {field} = '{getattr(item, field)}'" + ) + + setattr(db_item, field, getattr(item, field)) + + if self.keep_history and not self.use_fs: + version = ( + session.query(func.max(self.history_table_model.version)) + .filter_by(id=item.id) + .scalar() + or 0 + ) + history_item = self.history_table_model( + id=f"{item.id}_{version + 1}", + name=item.name, + creation_date=item.creation_date, + mod_date=item.mod_date, + hash=new_hash, + data=data, + version=version + 1, + ) + session.add(history_item) + + if not ignorefs and self.use_fs: + self.db_cat.set(data=item.yaml_get(), id=item.id) + else: + db_item = self.table_model( + id=item.id, + name=item.name, + creation_date=item.creation_date, + mod_date=item.mod_date, + hash=new_hash, + ) + if not self.use_fs: + db_item.data = item.json_get() + session.add(db_item) + + session.execute( + f'INSERT INTO {self.ft_table_name} (id, {", ".join(to_ft_index)}) VALUES ({", ".join(ft_field_values)})' + ) + + if not ignorefs and self.use_fs: + self.db_cat.set( + data=item.yaml_get(), id=item.id, humanid=self._human_name_get(item) + ) + + # Set indexed fields + for field, _ in self.model_cls.__indexed_fields__: # type: ignore + if field not in ["id", "name", "creation_date", "mod_date"]: + setattr(db_item, field, getattr(item, field)) + session.add(db_item) + + session.commit() + session.close() + + # used for a symlink so its easy for a human to edit + def _human_name_get(self, item: T) -> str: + humanname = "" + if self.human_readable: + for fieldhuman, _ in self.model_cls.__human_fields__: # type: ignore + if fieldhuman not in ["id", "creation_date", "mod_date"]: + humanname += f"{item.__getattribute__(fieldhuman)}_" + humanname = humanname.rstrip("_") + if humanname == "": + raise Exception(f"humanname should not be empty for {item}") + return humanname + + def delete(self, id: str): + if not isinstance(id, str): + raise ValueError(f"id needs to be str. Now: {id}") + session = self.Session() + result = session.query(self.table_model).filter_by(id=id).delete() + session.execute(f"DELETE FROM {self.ft_table_name} WHERE id={id};") + session.commit() + session.close() + if result > 1: + raise ValueError(f"multiple values deleted with id {id}") + elif result == 0: + raise ValueError(f"no record found with id {id}") + + if self.use_fs: + humanid = "" + if self.exists(): + item = self.get(id) + # so we can remove the link + humanid = self._human_name_get(item) + self.db_cat.delete(id=id, humanid=humanid) + + def list( + self, id: Optional[str] = None, name: Optional[str] = None, **kwargs + ) -> List[T]: + session = self.Session() + query = session.query(self.table_model) + if id: + query = query.filter(self.table_model.id == id) + if name: + query = query.filter(self.table_model.name.ilike(f"%{name}%")) + + index_fields = self.model_cls.__index_fields__ + for key, value in kwargs.items(): + if value is None: + continue + if self.use_fs: + query = query.filter(getattr(self.table_model, key) == value) + else: + if key in index_fields and "indexft" in index_fields[key]: + result = session.execute( + f'SELECT id From {self.ft_table_name} WHERE {key} MATCH "{value}"' + ) + + ids = [] + for _, value in result: + ids.append(value) + + query = query.filter(self.table_model.id in ids) + else: + query = query.filter( + self.table_model.data[key].astext.ilike(f"%{value}%") + ) + results = query.all() + session.close() + + items = [] + for result in results: + items.append(self.get(id=result.id)) + + return items + + def load(self, reset: bool = False): + + if self.use_fs: + logger.info(colored(f"Reload DB.", "green")) + if reset: + self._reset_db() + + # Get all IDs and hashes from the database + session = self.Session() + db_items = { + item.id: item.hash + for item in session.query( + self.table_model.id, self.table_model.hash + ).all() + } + session.close() + + done = [] + + for root, _, files in os.walk(self.db.path): + for file in files: + if file.endswith(".yaml"): + file_path = os.path.join(root, file) + with open(file_path, "r") as f: + data = yaml.safe_load(f) + obj = self._decode(data) + myhash = obj.hash() + + if reset: + self.set(obj, ignorefs=True) + else: + if obj.id in db_items: + if db_items[obj.id] != myhash: + # Hash mismatch, update the database record + self.set(obj, ignorefs=True) + else: + # New item, add to database + self.set(obj, ignorefs=True) + + done.append(obj.id) diff --git a/_archive/osis/conf.py b/_archive/osis/conf.py new file mode 100644 index 0000000..7011cea --- /dev/null +++ b/_archive/osis/conf.py @@ -0,0 +1,27 @@ +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +project = 'osis' +copyright = '2024, kristof' +author = 'kristof' + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = [] + +templates_path = ['_templates'] +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] + + + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_theme = 'alabaster' +html_static_path = ['_static'] diff --git a/_archive/osis/datatools.py b/_archive/osis/datatools.py new file mode 100644 index 0000000..c4be2e0 --- /dev/null +++ b/_archive/osis/datatools.py @@ -0,0 +1,13 @@ +import re + +def normalize_email(email: str) -> str: + # Normalize email by stripping spaces and converting to lower case + #EmailStr.validate(email) #TODO + return email.strip().lower() + +def normalize_phone(phone: str) -> str: + # Normalize phone number by removing dots and spaces, and ensure it matches the pattern + + phone = phone.replace(".", "").replace(" ", "") + if not re.match(r"^\+\d+$", phone): + raise ValueError(f"Invalid phone number: {phone}") + return phone diff --git a/_archive/osis/db.py b/_archive/osis/db.py new file mode 100644 index 0000000..35c5f29 --- /dev/null +++ b/_archive/osis/db.py @@ -0,0 +1,361 @@ +import os +import shutil +import logging +from termcolor import colored +from herotools.pathtools import expand_path +import psycopg2 +from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT +from osis.id import int_to_id +from psycopg2.extras import DictCursor + +import sqlite3 +from enum import Enum + +# Set up logging +logging.basicConfig(level=logging.INFO, format="%(message)s") +logger = logging.getLogger(__name__) + + +class DBCat: + def __init__(self, path: str, cat: str): + path = expand_path(path) + self.path_id = os.path.join(path, "id", cat) + self.path_human = os.path.join(path, "human", cat) + self.path = path + self._init() + + def _init(self): + os.makedirs(self.path_id, exist_ok=True) + os.makedirs(self.path_human, exist_ok=True) + + def reset(self): + if os.path.exists(self.path_id): + shutil.rmtree(self.path_id, ignore_errors=True) + if os.path.exists(self.path_human): + shutil.rmtree(self.path_human, ignore_errors=True) + self._init() + + def _get_path_id(self, id: str) -> str: + id1 = id[:2] + dir_path = os.path.join(self.path_id, id1) + file_path = os.path.join(dir_path, f"{id}.yaml") + os.makedirs(dir_path, exist_ok=True) + return file_path + + def set(self, id: str, data: str, humanid: str = ""): + fs_path = self._get_path_id(id=id) + with open(fs_path, "w") as f: + f.write(data) + if humanid != "": + human_file_path = os.path.join(self.path_human, humanid) + # Create a symbolic link + try: + os.symlink(fs_path, human_file_path) + except FileExistsError: + # If the symlink already exists, we can either ignore it or update it + if not os.path.islink(human_file_path): + raise # If it's not a symlink, re-raise the exception + os.remove(human_file_path) # Remove the existing symlink + os.symlink(fs_path, human_file_path) # Create a new symlink + return fs_path + + def get(self, id: str) -> str: + fs_path = self._get_path_id(id=id) + with open(fs_path, "r") as f: + return f.read() + + def delete(self, id: str, humanid: str = ""): + fs_path = self._get_path_id(id=id) + os.remove(fs_path) + if humanid != "": + human_file_path = os.path.join(self.path_human, humanid) + os.remove(human_file_path) + +class DBType(Enum): + SQLITE = "sqlite" + POSTGRESQL = "postgresql" + +class DBConfig: + def __init__( + self, + db_type: DBType = DBType.POSTGRESQL, + db_name: str = "main", + db_login: str = "admin", + db_passwd: str = "admin", + db_addr: str = "localhost", + db_port: int = 5432, + db_path: str = "/tmp/db" + ): + self.db_type = db_type + self.db_name = db_name + self.db_login = db_login + self.db_passwd = db_passwd + self.db_addr = db_addr + self.db_port = db_port + self.db_path = expand_path(db_path) + + def __str__(self): + return (f"DBConfig(db_name='{self.db_name}', db_login='{self.db_login}', " + f"db_addr='{self.db_addr}', db_port={self.db_port}, db_path='{self.db_path}')") + + def __repr__(self): + return self.__str__() + + + def url(self) -> str: + if self.db_type == DBType.POSTGRESQL: + return f"postgresql://{self.db_login}:{self.db_passwd}@{self.db_addr}:{self.db_port}/{self.db_name}" + elif self.db_type == DBType.SQLITE: + return f"sqlite:///{self.db_path}/{self.db_name}.db" + else: + raise ValueError(f"Unsupported database type: {self.db_type}") + +class DB: + def __init__(self,cfg:DBConfig , path: str, reset: bool = False): + self.cfg = cfg + self.path = expand_path(path) + self.path_id = os.path.join(self.path, "id") + self.path_human = os.path.join(self.path, "human") + self.dbcats = dict[str, DBCat]() + + if reset: + self.reset() + else: + self._init() + + def reset(self): + if os.path.exists(self.path_id): + shutil.rmtree(self.path_id, ignore_errors=True) + logger.info(colored(f"Removed db dir: {self.path_id}", "red")) + if os.path.exists(self.path_human): + shutil.rmtree(self.path_human, ignore_errors=True) + logger.info(colored(f"Removed db dir: {self.path_human}", "red")) + if self.cfg.db_type == DBType.POSTGRESQL: + conn=self.db_connection() + cur = conn.cursor() + cur.execute("SELECT 1 FROM pg_database WHERE datname = %s", (self.cfg.db_name,)) + exists = cur.fetchone() + cur.close() + conn.close() + if exists: + # Disconnect from the current database + # Reconnect to the postgres database to drop the target database + conn = psycopg2.connect(dbname='postgres', user=self.cfg.db_login, password=self.cfg.db_passwd, host=self.cfg.db_addr) + conn.autocommit = True + cur = conn.cursor() + #need to remove the open connections to be able to remove it + cur.execute(f""" + SELECT pg_terminate_backend(pg_stat_activity.pid) + FROM pg_stat_activity + WHERE pg_stat_activity.datname = %s + AND pid <> pg_backend_pid(); + """, (self.cfg.db_name,)) + print(f"Terminated all connections to database '{self.cfg.db_name}'") + + cur.execute(f"DROP DATABASE {self.cfg.db_name}") + print(f"Database '{self.cfg.db_name}' dropped successfully.") + cur.close() + conn.close() + + self._init() + + def _init(self): + os.makedirs(self.path_human, exist_ok=True) + os.makedirs(self.path_id, exist_ok=True) + for key, dbcat in self.dbcats: + dbcat._init() + + def dbcat_new(self, cat: str, reset: bool = False) -> DBCat: + dbc = DBCat(cat=cat, path=self.path) + self.dbcats[cat] = dbc + return dbc + + def dbcat_get(self, cat: str) -> DBCat: + if cat in self.dbcats: + return self.dbcats[cat] + raise Exception(f"can't find dbcat with cat:{cat}") + + def db_connection(self): + if self.cfg.db_type == DBType.POSTGRESQL: + try: + conn = psycopg2.connect( + dbname=self.cfg.db_name, + user=self.cfg.db_login, + password=self.cfg.db_passwd, + host=self.cfg.db_addr, + port=self.cfg.db_port + ) + conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) + conn.autocommit = True # Set autocommit mode + except psycopg2.OperationalError as e: + if f"database \"{self.cfg.db_name}\" does not exist" in str(e): + # Connect to 'postgres' database to create the new database + conn = psycopg2.connect( + dbname='postgres', + user=self.cfg.db_login, + password=self.cfg.db_passwd, + host=self.cfg.db_addr, + port=self.cfg.db_port + ) + conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) + cur = conn.cursor() + cur.execute(f"CREATE DATABASE {self.cfg.db_name}") + cur.close() + conn.close() + + # Now connect to the newly created database + conn = psycopg2.connect( + dbname=self.cfg.db_name, + user=self.cfg.db_login, + password=self.cfg.db_passwd, + host=self.cfg.db_addr, + port=self.cfg.db_port + ) + conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) + print(f"Database '{self.cfg.db_name}' created successfully.") + else: + raise e + elif self.cfg.db_type == DBType.SQLITE: + db_file = os.path.join(self.cfg.db_path, f"{self.cfg.db_name}.db") + conn = sqlite3.connect(db_file) + else: + raise ValueError(f"Unsupported database type: {self.cfg.db_type}") + return conn + + def db_create(self, db_name: str = "", user_name: str = "", user_password: str = ""): + if self.cfg.db_type == DBType.POSTGRESQL: + self.db_create_id() + # Connect to PostgreSQL server + conn = self.db_connection() + cur = conn.cursor() + + if db_name=="": + db_name=self.cfg.db_name + + try: + # Check if the database already exists + cur.execute("SELECT 1 FROM pg_database WHERE datname = %s", (db_name,)) + exists = cur.fetchone() + + if not exists: + # Create the database + cur.execute(f"CREATE DATABASE {db_name}") + print(f"Database '{db_name}' created successfully.") + + if user_name and user_password: + # Check if user exists + cur.execute("SELECT 1 FROM pg_roles WHERE rolname = %s", (user_name,)) + user_exists = cur.fetchone() + + if not user_exists: + # Create the user + cur.execute(f"CREATE USER {user_name} WITH PASSWORD %s", (user_password,)) + print(f"User '{user_name}' created successfully.") + + # Grant privileges on the database to the user + cur.execute(f"GRANT ALL PRIVILEGES ON DATABASE {db_name} TO {user_name}") + print(f"Privileges granted to '{user_name}' on '{db_name}'.") + + except psycopg2.Error as e: + raise Exception(f"Postgresql error: {e}") + finally: + # Close the cursor and connection + cur.close() + conn.close() + + elif self.cfg.db_type == DBType.SQLITE: + # For SQLite, we just need to create the database file if it doesn't exist + db_file = os.path.join(self.cfg.db_path, f"{db_name}.db") + if not os.path.exists(db_file): + conn = sqlite3.connect(db_file) + conn.close() + print(f"SQLite database '{db_name}' created successfully at {db_file}.") + else: + print(f"SQLite database '{db_name}' already exists at {db_file}.") + + if user_name: + print("Note: SQLite doesn't support user management like PostgreSQL.") + + else: + raise ValueError(f"Unsupported database type: {self.cfg.db_type}") + + + def db_create_id(self): + with self.db_connection() as conn: + with conn.cursor() as cur: + cur.execute(""" + CREATE TABLE IF NOT EXISTS user_id_counters ( + user_id INTEGER PRIMARY KEY, + last_id_given INTEGER NOT NULL DEFAULT 0 + ) + """) + conn.commit() + + + def new_id(self,user_id: int) -> str: + if not 0 <= user_id <= 50: + raise ValueError("User ID must be between 0 and 50") + + max_ids = 60466175 + ids_per_user = max_ids // 51 # We use 51 to ensure we don't exceed the max even for user_id 50 + + with self.db_connection() as conn: + with conn.cursor(cursor_factory=DictCursor) as cur: + # Try to get the last_id_given for this user + cur.execute("SELECT last_id_given FROM user_id_counters WHERE user_id = %s", (user_id,)) + result = cur.fetchone() + + if result is None: + # If no record exists for this user, insert a new one + cur.execute( + "INSERT INTO user_id_counters (user_id, last_id_given) VALUES (%s, 0) RETURNING last_id_given", + (user_id,) + ) + last_id_given = 0 + else: + last_id_given = result['last_id_given'] + + # Calculate the new ID + new_id_int = (user_id * ids_per_user) + last_id_given + 1 + + if new_id_int > (user_id + 1) * ids_per_user: + raise ValueError(f"No more IDs available for user {user_id}") + + # Update the last_id_given in the database + cur.execute( + "UPDATE user_id_counters SET last_id_given = last_id_given + 1 WHERE user_id = %s", + (user_id,) + ) + conn.commit() + + return int_to_id(new_id_int) + + + +def db_new( + db_type: DBType = DBType.POSTGRESQL, + db_name: str = "main", + db_login: str = "admin", + db_passwd: str = "admin", + db_addr: str = "localhost", + db_port: int = 5432, + db_path: str = "/tmp/db", + reset: bool = False, +): + # Create a DBConfig object + config = DBConfig( + db_type=db_type, + db_name=db_name, + db_login=db_login, + db_passwd=db_passwd, + db_addr=db_addr, + db_port=db_port, + db_path=db_path + ) + + # Create and return a DB object + mydb = DB(cfg=config, path=db_path, reset=reset) + mydb.db_create() + return mydb + + diff --git a/_archive/osis/doc.py b/_archive/osis/doc.py new file mode 100644 index 0000000..309214c --- /dev/null +++ b/_archive/osis/doc.py @@ -0,0 +1,77 @@ +import os +import subprocess +import sys + +def should_document(file_name): + """ + Determine if a file should be documented based on its name. + + Args: + file_name (str): The name of the file. + + Returns: + bool: True if the file should be documented, False otherwise. + """ + lower_name = file_name.lower() + return ( + file_name.endswith('.py') and + 'example' not in lower_name and + '_generate' not in lower_name + ) + +def generate_pydoc(start_dir): + """ + Generate pydoc documentation for Python modules in the given directory. + + Args: + start_dir (str): The directory to start searching for Python modules. + + Returns: + None + """ + # Create the docs directory + docs_dir = os.path.join(start_dir, 'docs') + os.makedirs(docs_dir, exist_ok=True) + + # Walk through the directory + for root, dirs, files in os.walk(start_dir): + for file in files: + if should_document(file): + module_name = os.path.splitext(file)[0] + module_path = os.path.relpath(os.path.join(root, file), start_dir) + module_path = os.path.splitext(module_path)[0].replace(os.path.sep, '.') + + # Skip the script itself + if module_name == os.path.splitext(os.path.basename(__file__))[0]: + continue + + output_file = os.path.join(docs_dir, f'{module_name}.txt') + + try: + # Run pydoc and capture the output + result = subprocess.run( + [sys.executable, '-m', 'pydoc', module_path], + capture_output=True, + text=True, + check=True + ) + + # Write the output to a file + with open(output_file, 'w') as f: + f.write(result.stdout) + + print(f"Generated documentation for {module_path} in {output_file}") + + except subprocess.CalledProcessError as e: + print(f"Error generating documentation for {module_path}: {e}") + except Exception as e: + print(f"Unexpected error for {module_path}: {e}") + +if __name__ == "__main__": + # Get the directory of the script + script_dir = os.path.dirname(os.path.abspath(__file__)) + + # Generate documentation + generate_pydoc(script_dir) + + print(f"Documentation generation complete. Output is in {os.path.join(script_dir, 'docs')}") \ No newline at end of file diff --git a/_archive/osis/id.py b/_archive/osis/id.py new file mode 100644 index 0000000..62c9765 --- /dev/null +++ b/_archive/osis/id.py @@ -0,0 +1,43 @@ +from typing import Tuple +from typing import Optional + + +def int_to_id(number: int) -> str: + chars: str = '0123456789abcdefghijklmnopqrstuvwxyz' + base: int = len(chars) + + if number < 0: + raise ValueError("Input must be a non-negative integer") + + # Convert to base-36 + result: list[str] = [] + while number > 0: + number, remainder = divmod(number, base) + result.append(chars[remainder]) + + # Pad with '0' if necessary to reach minimum length of 3 + while len(result) < 3: + result.append('0') + + # Raise error if result is longer than 5 characters + if len(result) > 5: + raise ValueError("Input number is too large (results in more than 5 characters)") + + # Reverse the list and join into a string + return ''.join(reversed(result)) + +def id_to_int(id: str) -> int: + chars: str = '0123456789abcdefghijklmnopqrstuvwxyz' + base: int = len(chars) + + if not 3 <= len(id) <= 5: + raise ValueError("ID must be between 3 and 5 characters long") + + if not all(c in chars for c in id): + raise ValueError("ID contains invalid characters") + + result: int = 0 + for char in id: + result = result * base + chars.index(char) + + return result diff --git a/_archive/osis/orm.py b/_archive/osis/orm.py new file mode 100644 index 0000000..d42089f --- /dev/null +++ b/_archive/osis/orm.py @@ -0,0 +1,159 @@ +from typing import Dict, Type, List +import datetime +from dataclasses import dataclass, field +import psycopg2 +from psycopg2.extras import Json + +@dataclass +class ObjIndexDef: + table_name: str + table_fields: Dict[str, Type] + fts_fields: List[str] = field(default_factory=list) # full text fields + +def sql_col_type(field_type: Type) -> str: + if field_type == int: + return "INTEGER" + elif field_type == float: + return "REAL" + elif field_type == str: + return "TEXT" + elif field_type == bool: + return "BOOLEAN" + elif field_type == datetime.date: + return "DATE" + elif field_type == datetime.datetime: + return "TIMESTAMP" + else: + return "TEXT" # default type if none match + +def obj_index_def_new(table_name: str, table_fields: Dict[str, Type], fts_fields: List[str]) -> ObjIndexDef: + # Convert Python types to SQL types + sql_table_fields = {field_name: sql_col_type(field_type) for field_name, field_type in table_fields.items()} + + # Create and return the ObjIndexDef instance + return ObjIndexDef( + table_name=table_name, + table_fields=table_fields, + fts_fields=fts_fields + ) + +def sql_table_create(db, definition: ObjIndexDef, reset: bool = False) -> str: + columns = [] + for field_name, field_type in definition.table_fields.items(): + if field_name not in ["id", "name", "creation_date", "mod_date", "data"]: + sql_type = sql_col_type(field_type) + columns.append(f"{field_name} {sql_type}") + + columns.append("id TEXT PRIMARY KEY") + columns.append("name TEXT NOT NULL") + columns.append("creation_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP") + columns.append("mod_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP") + columns.append("data JSON") + + columns_str = ", ".join(columns) + + create_table_sql = f"CREATE TABLE IF NOT EXISTS {definition.table_name} ({columns_str});" + + if reset: + drop_table_sql = f"DROP TABLE IF EXISTS {definition.table_name};" + create_table_sql = drop_table_sql + "\n" + create_table_sql + + if definition.fts_fields: + fts_table_sql = f""" + CREATE TABLE IF NOT EXISTS {definition.table_name}_fts ( + id TEXT PRIMARY KEY, + {definition.table_name}_id TEXT REFERENCES {definition.table_name}(id), + document tsvector + ); + CREATE INDEX IF NOT EXISTS idx_{definition.table_name}_fts_document + ON {definition.table_name}_fts USING GIN(document); + """ + create_table_sql += "\n" + fts_table_sql + + c=db.db_connection() + + try: + with c.cursor() as cursor: + cursor.execute(create_table_sql) + c.commit() # Commit the transaction + print("SQL script executed successfully.") + except psycopg2.Error as e: + c.rollback() # Rollback on error + print(f"An error occurred: {e}") + + return create_table_sql + +def insert_update(db, definition: ObjIndexDef, **args): + table_name = definition.table_name + fields = definition.table_fields.keys() + c=db.db_connection() + # Prepare the data + data = {} + for field in fields: + if field in args: + if isinstance(args[field], dict): + data[field] = Json(args[field]) + else: + data[field] = args[field] + elif field not in ["id", "creation_date", "mod_date"]: + data[field] = None + + # Ensure required fields are present + if "id" not in data: + raise ValueError("'id' field is required for insert/update operation") + if "name" not in data: + raise ValueError("'name' field is required for insert/update operation") + + # Set modification date + data["mod_date"] = datetime.datetime.now() + + # Prepare SQL + fields_str = ", ".join(data.keys()) + placeholders = ", ".join(["%s"] * len(data)) + update_str = ", ".join([f"{k} = EXCLUDED.{k}" for k in data.keys() if k != "id"]) + + sql = f""" + INSERT INTO {table_name} ({fields_str}) + VALUES ({placeholders}) + ON CONFLICT (id) DO UPDATE + SET {update_str}; + """ + + # Execute SQL + try: + with c.cursor() as cursor: + cursor.execute(sql, list(data.values())) + + c.commit() + + # Update FTS table if necessary + if definition.fts_fields: + c.update_fts(definition, data) + + print(f"Successfully inserted/updated record with id {data['id']}") + except psycopg2.Error as e: + c.rollback() + print(f"An error occurred: {e}") + +def update_fts(db, definition: ObjIndexDef, data: dict): + fts_table = f"{definition.table_name}_fts" + fts_fields = definition.fts_fields + c=db.db_connection() + # Prepare FTS document + fts_data = " ".join(str(data[field]) for field in fts_fields if field in data) + + sql = f""" + INSERT INTO {fts_table} (id, {definition.table_name}_id, document) + VALUES (%s, %s, to_tsvector(%s)) + ON CONFLICT (id) DO UPDATE + SET document = to_tsvector(EXCLUDED.document); + """ + + try: + with c.cursor() as cursor: + cursor.execute(sql, (data['id'], data['id'], fts_data)) + c.commit() + print(f"Successfully updated FTS for record with id {data['id']}") + except psycopg2.Error as e: + c.rollback() + print(f"An error occurred while updating FTS: {e}") \ No newline at end of file diff --git a/_archive/osis/readme.md b/_archive/osis/readme.md new file mode 100644 index 0000000..9d88afe --- /dev/null +++ b/_archive/osis/readme.md @@ -0,0 +1,6 @@ +# OSIS + +Object storage and Index Service + +see osis_examples under /examples how to use + diff --git a/_archive/osis/templates/create_table copy.lua b/_archive/osis/templates/create_table copy.lua new file mode 100644 index 0000000..8332672 --- /dev/null +++ b/_archive/osis/templates/create_table copy.lua @@ -0,0 +1,49 @@ +CREATE OR REPLACE FUNCTION create_table_from_json(definition JSONB, reset BOOLEAN DEFAULT FALSE) +RETURNS VOID AS $$ + local json = require("cjson") + local def = json.decode(definition) + local table_name = def.table_name + local table_fields = def.table_fields + local fts_fields = def.fts_fields or {} + + local columns = "" + for key, value in pairs(table_fields) do + columns = columns .. key .. " " .. value .. ", " + end + + -- Add the necessary columns + columns = columns .. "id TEXT PRIMARY KEY, " + columns = columns .. "name TEXT NOT NULL, " + columns = columns .. "creation_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP, " + columns = columns .. "mod_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP, " + columns = columns .. "data JSON" + + -- Construct the CREATE TABLE statement + local create_table_sql = string.format("CREATE TABLE IF NOT EXISTS %s (%s);", table_name, columns) + + print("Create table " .. tostring(create_table_sql)) + + -- Conditionally construct the DROP TABLE statement + if reset then + local drop_table_sql = string.format("DROP TABLE IF EXISTS %s;", table_name) + create_table_sql = drop_table_sql .. create_table_sql + end + + -- Add the FTS table and index if full-text search fields are provided + if #fts_fields > 0 then + local fts_table_sql = string.format([[ + CREATE TABLE IF NOT EXISTS %s_fts ( + id TEXT PRIMARY KEY, + %s_id TEXT REFERENCES %s(id), + document tsvector + ); + CREATE INDEX IF NOT EXISTS idx_%s_fts_document ON %s_fts USING GIN(document); + ]], table_name, table_name, table_name, table_name, table_name) + create_table_sql = create_table_sql .. fts_table_sql + end + + print("Create table fts" .. tostring(create_table_sql)) + + -- Execute the dynamic SQL + SPI.execute(create_table_sql) +$$ LANGUAGE pllua; \ No newline at end of file diff --git a/_archive/osis/templates/create_table.lua b/_archive/osis/templates/create_table.lua new file mode 100644 index 0000000..6aa2206 --- /dev/null +++ b/_archive/osis/templates/create_table.lua @@ -0,0 +1,100 @@ +CREATE OR REPLACE FUNCTION create_table_from_json(definition JSONB, reset BOOLEAN DEFAULT FALSE) +RETURNS VOID AS $$ +local spi = require("pllua.spi") + +local function execute_sql(sql) + local status, result = pcall(function() + return spi.execute(sql) + end) + if not status then + error("Failed to execute SQL: " .. tostring(result)) + end + return result +end + +local nullval = {} -- use some unique object to mark nulls +local def = definition{ null = nullval, pg_numeric = true } +local table_name = def.table_name +local table_fields = def.table_fields +local fts_fields = def.fts_fields or {} + +local columns = {} +local existing_columns = {} +local has_id_primary_key = false +local index_columns = {} + +if reset then + local drop_table_sql = string.format("DROP TABLE IF EXISTS %s CASCADE;", table_name) + execute_sql(drop_table_sql) +end + + +for key, value in pairs(table_fields) do + if key:lower() == "id" then + -- Ensure 'id' is always PRIMARY KEY + table.insert(columns, key .. " " .. value .. " PRIMARY KEY") + has_id_primary_key = true + else + table.insert(columns, key .. " " .. value) + if key:lower() ~= "data" then + table.insert(index_columns, key) + end + end + existing_columns[key:lower()] = true +end + +print("INdex columns " .. tostring(index_columns)) + +-- Add necessary columns only if they don't exist +local required_columns = { + {name = "name", type = "TEXT NOT NULL"}, + {name = "creation_date", type = "TIMESTAMP DEFAULT CURRENT_TIMESTAMP"}, + {name = "mod_date", type = "TIMESTAMP DEFAULT CURRENT_TIMESTAMP"}, + {name = "data", type = "JSONB"} +} + +for _, col in ipairs(required_columns) do + if not existing_columns[col.name:lower()] then + table.insert(columns, col.name .. " " .. col.type) + table.insert(index_columns, col.name) + end +end + +-- If 'id' wasn't provided, add it as PRIMARY KEY +if not has_id_primary_key then + table.insert(columns, 1, "id TEXT PRIMARY KEY") +end + +-- Join columns with commas +local columns_string = table.concat(columns, ", ") + +-- Construct the CREATE TABLE statement +local create_table_sql = string.format("CREATE TABLE IF NOT EXISTS %s (%s);", table_name, columns_string) + +-- Conditionally construct the DROP TABLE statement + +-- Execute the CREATE TABLE statement +execute_sql(create_table_sql) + +-- Create an index for each column +for _, column in ipairs(index_columns) do + local index_sql = string.format("CREATE INDEX IF NOT EXISTS idx_%s_%s ON %s (%s);", + table_name, column, table_name, column) + execute_sql(index_sql) +end + +-- Add the FTS table and index if full-text search fields are provided +if #fts_fields > 0 then + local fts_table_sql = string.format([[ + CREATE TABLE IF NOT EXISTS %s_fts ( + id TEXT PRIMARY KEY, + %s_id TEXT REFERENCES %s(id), + document tsvector + ); + CREATE INDEX IF NOT EXISTS idx_%s_fts_document ON %s_fts USING GIN(document); + ]], table_name, table_name, table_name, table_name, table_name) + execute_sql(fts_table_sql) +end + +return +$$ LANGUAGE pllua; \ No newline at end of file diff --git a/_archive/osis/templates/create_table.py b/_archive/osis/templates/create_table.py new file mode 100644 index 0000000..dee9810 --- /dev/null +++ b/_archive/osis/templates/create_table.py @@ -0,0 +1,75 @@ +CREATE OR REPLACE FUNCTION create_table_from_json(definition_json JSONB, reset BOOLEAN DEFAULT FALSE) +RETURNS VOID AS $$ +import plpy +import json + +def execute_sql(sql): + try: + plpy.execute(sql) + except Exception as e: + plpy.error(f"Failed to execute SQL: {str(e)}") + +# Parse the JSONB input into a Python dictionary +definition = json.loads(definition_json) + +table_name = definition['table_name'] +table_fields = definition['table_fields'] +fts_fields = definition.get('fts_fields', []) + +columns = [] +existing_columns = set() +has_id_primary_key = False +index_columns = [] + +if reset: + drop_table_sql = f"DROP TABLE IF EXISTS {table_name} CASCADE;" + execute_sql(drop_table_sql) + +for key, value in table_fields.items(): + if key.lower() == "id": + columns.append(f"{key} {value} PRIMARY KEY") + has_id_primary_key = True + else: + columns.append(f"{key} {value}") + if key.lower() != "data": + index_columns.append(key) + existing_columns.add(key.lower()) + +plpy.notice(f"Index columns {index_columns}") + +required_columns = [ + {"name": "name", "type": "TEXT NOT NULL"}, + {"name": "creation_date", "type": "TIMESTAMP DEFAULT CURRENT_TIMESTAMP"}, + {"name": "mod_date", "type": "TIMESTAMP DEFAULT CURRENT_TIMESTAMP"}, + {"name": "data", "type": "JSONB"} +] + +for col in required_columns: + if col['name'].lower() not in existing_columns: + columns.append(f"{col['name']} {col['type']}") + index_columns.append(col['name']) + +if not has_id_primary_key: + columns.insert(0, "id TEXT PRIMARY KEY") + +columns_string = ", ".join(columns) + +create_table_sql = f"CREATE TABLE IF NOT EXISTS {table_name} ({columns_string});" +execute_sql(create_table_sql) + +for column in index_columns: + index_sql = f"CREATE INDEX IF NOT EXISTS idx_{table_name}_{column} ON {table_name} ({column});" + execute_sql(index_sql) + +if fts_fields: + fts_table_sql = f""" + CREATE TABLE IF NOT EXISTS {table_name}_fts ( + id TEXT PRIMARY KEY, + {table_name}_id TEXT REFERENCES {table_name}(id), + document tsvector + ); + CREATE INDEX IF NOT EXISTS idx_{table_name}_fts_document ON {table_name}_fts USING GIN(document); + """ + execute_sql(fts_table_sql) + +$$ LANGUAGE plpython3u; \ No newline at end of file diff --git a/_archive/osis/templates/create_table.sql b/_archive/osis/templates/create_table.sql new file mode 100644 index 0000000..1fce5b6 --- /dev/null +++ b/_archive/osis/templates/create_table.sql @@ -0,0 +1,56 @@ +CREATE OR REPLACE FUNCTION create_table_from_json( + definition JSONB, + reset BOOLEAN DEFAULT FALSE +) RETURNS VOID AS $$ +DECLARE + table_name TEXT; + table_fields JSONB; + fts_fields TEXT[]; + columns TEXT := ''; + create_table_sql TEXT; + drop_table_sql TEXT; + fts_table_sql TEXT := ''; + field RECORD; +BEGIN + -- Extract the values from the JSON object + table_name := definition->>'table_name'; + table_fields := definition->'table_fields'; + fts_fields := ARRAY(SELECT jsonb_array_elements_text(definition->'fts_fields')); + + -- Iterate over the JSONB object to build the columns definition + FOR field IN SELECT * FROM jsonb_each_text(table_fields) + LOOP + columns := columns || field.key || ' ' || field.value || ', '; + END LOOP; + + -- Add the necessary columns + columns := columns || 'id TEXT PRIMARY KEY, '; + columns := columns || 'name TEXT NOT NULL, '; + columns := columns || 'creation_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP, '; + columns := columns || 'mod_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP, '; + columns := columns || 'data JSON'; + + -- Construct the CREATE TABLE statement + create_table_sql := 'CREATE TABLE IF NOT EXISTS ' || table_name || ' (' || columns || ');'; + + -- Conditionally construct the DROP TABLE statement + IF reset THEN + drop_table_sql := 'DROP TABLE IF EXISTS ' || table_name || ';'; + create_table_sql := drop_table_sql || create_table_sql; + END IF; + + -- Add the FTS table and index if full-text search fields are provided + IF array_length(fts_fields, 1) > 0 THEN + fts_table_sql := 'CREATE TABLE IF NOT EXISTS ' || table_name || '_fts (' || + 'id TEXT PRIMARY KEY, ' || + table_name || '_id TEXT REFERENCES ' || table_name || '(id), ' || + 'document tsvector);' || + 'CREATE INDEX IF NOT EXISTS idx_' || table_name || '_fts_document ' || + 'ON ' || table_name || '_fts USING GIN(document);'; + create_table_sql := create_table_sql || fts_table_sql; + END IF; + + -- Execute the dynamic SQL + EXECUTE create_table_sql; +END; +$$ LANGUAGE plpgsql; \ No newline at end of file diff --git a/_archive/requirements.txt b/_archive/requirements.txt new file mode 100644 index 0000000..f0cd63e --- /dev/null +++ b/_archive/requirements.txt @@ -0,0 +1,50 @@ + + +#for the ai +#vyper +aider-chat +bcrypt>=4.1.3 +beautifulsoup4>=4.12.0 +email-validator +fastapi>=0.104.0 +inflect +IPython +langchain-community>=0.0.16 +langchain-openai>=0.0.5 +langchain>=0.1.0 +meilisearch>=0.21.0 +msgpack +mypy>=1.9.0 +numpy>=1.26.0 +openai>=1.0.0 +pandas>=2.2.0 +peewee +Pillow>=10.0.0 +pipx +playwright +psutil +psycopg2 +pudb +pydantic>=2.7.1 +pylance>=0.10.12 +pymupdf +PyPDF2>=3.0.0 +pytest +python-Levenshtein +pywalletconnect +pyyaml +pyyaml +qrcode +redis>=5.0.3 +requests>=2.31.0 +ruff +sqlalchemy +stellar_sdk +streamlit-echarts>=0.4.0 +streamlit-mermaid>=0.1.0 +streamlit>=1.32.0 +toml +uvicorn>=0.24.0 +watchdog +bcrypt +radicale \ No newline at end of file diff --git a/_archive/start_termux_server.sh b/_archive/start_termux_server.sh new file mode 100755 index 0000000..b35517e --- /dev/null +++ b/_archive/start_termux_server.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +# Get IP address +ip_addr=$(ifconfig | grep -Eo 'inet (addr:)?([0-9]*\.){3}[0-9]*' | grep -Eo '([0-9]*\.){3}[0-9]*' | grep -v '127.0.0.1' | head -n 1) + +if [ -z "$ip_addr" ]; then + echo "Could not determine IP address" + exit 1 +fi + +echo "Starting HTTP server..." +echo "Your IP address is: $ip_addr" +echo "Server will be available at: http://$ip_addr:8080/termuxinstall.sh" +echo "" +echo "To download and run the script, use:" +echo "curl -s http://$ip_addr:8080/termuxinstall.sh > /install.sh && bash /install.sh" + +# Change to the tools directory and start the server (dlx lets you start without downloading) +cd "$(dirname "$0")" && pnpm dlx http-server . -p 8080 diff --git a/_archive/termuxinstall.sh b/_archive/termuxinstall.sh new file mode 100644 index 0000000..cdd1e8b --- /dev/null +++ b/_archive/termuxinstall.sh @@ -0,0 +1,86 @@ +#!/bin/bash +set -ex + +# Ensure script is executed in Termux +if [ -z "$TERMUX_VERSION" ]; then + echo "This script must be run in Termux. Exiting." + exit 1 +fi + +# Function to handle errors +handle_error() { + echo "Error: $1" + exit 1 +} + +# Update and upgrade packages using Termux package management +echo "Updating package list and upgrading installed packages..." +pkg update -y || handle_error "Failed to update package list" +pkg upgrade -y || handle_error "Failed to upgrade packages" + +# Install essential utilities +echo "Installing utilities: mc, htop, wget, curl..." +pkg install -y mc htop wget curl termux-gui-bash || handle_error "Failed to install utilities" + +# Install OpenSSH SSH server +echo "Installing OpenSSH SSH server..." +pkg install -y openssh || handle_error "Failed to install OpenSSH" + +# Define SSH port +SSH_PORT=8022 # Termux default port + +# Check and handle existing dropbear +if command -v dropbear >/dev/null 2>&1; then + echo "Found existing dropbear installation, removing..." + if pgrep dropbear >/dev/null; then + pkill dropbear || handle_error "Failed to stop dropbear" + fi + pkg uninstall -y dropbear || handle_error "Failed to uninstall dropbear" +fi + +# Setup SSH directory and authorized_keys +SSHDIR="$HOME/.ssh" +mkdir -p "$SSHDIR" +chmod 700 "$SSHDIR" + +# Add authorized key +echo "Adding authorized SSH key..." +AUTHKEYS="$SSHDIR/authorized_keys" +echo "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIahWiRRm9cWAKktH9dndn3R45grKqzPC3mKX8IjGgH6 kristof@incubaid.com" > "$AUTHKEYS" +chmod 600 "$AUTHKEYS" + +# Set password non-interactively (Termux specific) +echo "Setting default password..." +export PASSWORD="planet007" +(echo "${PASSWORD}"; echo "${PASSWORD}") | passwd || handle_error "Failed to set password" + +# Start SSH server +echo "Starting SSH server on port $SSH_PORT..." +sshd -p $SSH_PORT || handle_error "Failed to start SSH server" + +termux-setup-storage + + +pkg install libgc rust + +# Display the current IP address +echo "Fetching IP address..." +IP_ADDR=$(ifconfig 2>/dev/null | grep -Eo 'inet (addr:)?([0-9]*\.){3}[0-9]*' | grep -Eo '([0-9]*\.){3}[0-9]*' | grep -v '127.0.0.1' | head -n 1) +if [ -z "$IP_ADDR" ]; then + echo "Could not determine IP address automatically" + echo "Please check your IP address manually using 'ip addr'" +else + echo "Your IP address is: $IP_ADDR" + echo "" + echo "SSH connection details:" + echo " - Host: $IP_ADDR" + echo " - Port: $SSH_PORT" + echo " - Password: planet007" + echo " - Key authentication is enabled" + echo "" + echo "Example SSH commands:" + echo " With password: ssh user@$IP_ADDR -p $SSH_PORT" + echo " With key: ssh -i /path/to/private/key user@$IP_ADDR -p $SSH_PORT" +fi + +echo "Setup completed successfully!" diff --git a/_archive/web/__init__.py b/_archive/web/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/_archive/web/auth.py b/_archive/web/auth.py new file mode 100644 index 0000000..138d5bc --- /dev/null +++ b/_archive/web/auth.py @@ -0,0 +1,46 @@ +from datetime import datetime, timedelta + +import jwt +from jwt.exceptions import ExpiredSignatureError, InvalidTokenError + + +class JWTHandler: + import os + + SECRET_KEY = os.getenv('JWT_SECRET_KEY') + if not SECRET_KEY: + raise EnvironmentError('JWT_SECRET_KEY environment variable is not set') + ALGORITHM = 'HS256' + ACCESS_TOKEN_EXPIRE_MINUTES = 30 + + def __init__(self, secret_key=None, algorithm=None, expire_minutes=None): + if secret_key: + self.SECRET_KEY = secret_key + if algorithm: + self.ALGORITHM = algorithm + if expire_minutes: + self.ACCESS_TOKEN_EXPIRE_MINUTES = expire_minutes + + def create_access_token(self, data: dict): + to_encode = data.copy() + expire = datetime.utcnow() + timedelta( + minutes=self.ACCESS_TOKEN_EXPIRE_MINUTES + ) + to_encode.update({'exp': expire}) + return jwt.encode(to_encode, self.SECRET_KEY, algorithm=self.ALGORITHM) + + def verify_access_token(self, token: str): + try: + payload = jwt.decode( + token, self.SECRET_KEY, algorithms=[self.ALGORITHM] + ) + email: str = payload.get('sub') + if email is None: + raise InvalidTokenError + return email + except (ExpiredSignatureError, InvalidTokenError): + raise InvalidTokenError + + +def new(secret_key=None, algorithm=None, expire_minutes=None) -> JWTHandler: + return JWTHandler(secret_key, algorithm, expire_minutes) diff --git a/_archive/web/deduper.py b/_archive/web/deduper.py new file mode 100644 index 0000000..98f39ef --- /dev/null +++ b/_archive/web/deduper.py @@ -0,0 +1,204 @@ +import hashlib +from typing import Dict +import os +from pathlib import Path +import json +import shutil + +class Deduper: + """ + Tools to start from an existing directory to make sure we don't have duplicates in template + """ + def __init__(self, path: str): + self.path = Path(path).expanduser().resolve() + self.path.mkdir(parents=True, exist_ok=True) + self.hash_dict: Dict[str, str] = {} #key is the hash, #value is the relative path of the object in relation to the deduper + self.meta_file = self.path / ".meta.json" + + #from IPython import embed;embed() + + if self.meta_file.exists(): + self.import_dict() + else: + self.load_assets() + + def load_assets(self): + """Load all the existing files and calculate their hashes""" + self.hash_dict = {} + for root, _, files in os.walk(self.path): + for file in files: + file_path = os.path.join(root, file) + relative_path = os.path.relpath(file_path, self.path) + print(f" - load deduped file {file_path}") + if not file.startswith('.'): + file_hash = self._calculate_md5(file_path) + if file_hash in self.hash_dict: + raise Exception(f"duplicate in dedupe pool: {file_path}") + self.hash_dict[file_hash] = relative_path + self.export_dict() + + def _calculate_md5(self, file_path: str) -> str: + hash_md5 = hashlib.md5() + with open(file_path, "rb") as f: + for chunk in iter(lambda: f.read(4096), b""): + hash_md5.update(chunk) + return hash_md5.hexdigest() + + def export_dict(self): + """Export the hash dictionary to a JSON file""" + with open(self.meta_file, 'w') as f: + json.dump(self.hash_dict, f, indent=2) + + def import_dict(self): + """Import the hash dictionary from a JSON file""" + with open(self.meta_file, 'r') as f: + self.hash_dict = json.load(f) + + + def path_check(self, path: str = "") -> str: + """ + Calculate MD5 from the path to look up the file in the deduper. + Will return empty string if not found + """ + file_path = Path(path) + if not file_path.exists(): + print(f" **ERROR: File '{path}' does not exist.") + return "" + # raise FileNotFoundError(f"File '{path}' does not exist.") + file_hash = self._calculate_md5(str(file_path)) + return self.path_find(file_hash=file_hash) + + def path_find(self, path: str = "",file_hash: str = "") -> str: + """ + return the relative path of the found object in relation to the dedupe stor + will return empty string if not found + """ + res = [] + if path!="": + input_path = Path(path) + input_filename = input_path.name + for _, stored_path in self.hash_dict.items(): + stored_path_path = Path(stored_path) + if stored_path_path.name.lower() == input_filename.lower(): + if len(input_path.parts) == 1: + res.append(stored_path) + elif input_path.as_posix().lower() == stored_path_path.as_posix().lower(): + res.append(stored_path) + if len(res)==1: + return res[0] + elif len(res)==0: + return "" + else: + raise Exception(f"found more than 1: {path}") + elif file_hash!="": + if file_hash in self.hash_dict: + return self.hash_dict[file_hash] + return "" + else: + raise Exception("need to input path or file_hash") + + def add(self, source_path: str, dest_path_rel: str = "",dest_dir_rel = "") -> str: + """ + Add a file to the specified path in the dedupe pool if it doesn't exist. + + Args: + source_path (str): Path to the source file to be copied. + dest_path_rel (str): Path where the file should be copied to, relative to the dedupe pool. + + Returns: + str: The path of the file in the dedupe pool if successful, empty string if failed. + """ + source_path0 = Path(source_path) + if not dest_path_rel: + if dest_dir_rel: + dest_dir_rel=dest_dir_rel.strip("/") + dest_path_rel = f"{dest_dir_rel}/{source_path0.name}" + else: + dest_path_rel = source_path0.name + # dest_path is the relative path + + # Check if the file already exists in the dedupe pool + existing_path = self.path_check(source_path) + if existing_path: + print(f"File already exists in dedupe pool: {existing_path}") + return existing_path + dest_path_rel = self._relpath_find_new(dest_path_rel) + dest_path = self.path / dest_path_rel + dest_path.parent.mkdir(parents=True, exist_ok=True) + + try: + shutil.copy2(source_path, dest_path) + print(f"File copied successfully to: {dest_path}") + except IOError as e: + raise Exception(f"Unable to add file {source_path} to {dest_path}.\n{e}") + + # Add the new file to the hash dictionary + relative_path = dest_path.relative_to(self.path).as_posix() + file_hash = self._calculate_md5(dest_path.as_posix()) + self.hash_dict[file_hash] = relative_path + self.export_dict() + return relative_path + + def path_find_full(self, path: str = "",file_hash: str = "" ) -> str: + mypath = self.path_find(path=path,file_hash=file_hash) + return str(self.path / mypath) + + + def _relpath_find_new(self, rel_path: str) -> str: + """ + find full path which doesn't exist yet + """ + if not rel_path: + raise ValueError("rel_path cannot be empty") + + # Construct the full path + full_path = self.path / rel_path + + # Check if the file exists + if not full_path.exists(): + return rel_path + + rel_path_obj = Path(rel_path) + rel_path_no_extension = str(rel_path_obj.with_suffix('')) + rel_path_extension = rel_path_obj.suffix + + new_rel_path = f"{rel_path_no_extension}{rel_path_extension}" + + # Check if filename exists in the dedupe pool + counter = 2 + new_full_path = self.path / new_rel_path + while new_full_path.exists(): + # Update path for the next iteration + new_rel_path = f"{rel_path_no_extension}_{counter}{rel_path_extension}" + new_full_path = self.path / new_rel_path + counter += 1 + + return new_rel_path + + + def svg_get(self, name: str) -> str: + """ + Get the SVG content based on the name (in lowercase) and match on the SVG name only. + + Args: + name (str): The name of the SVG file to retrieve. + + Returns: + str: The content of the SVG file if found, empty string otherwise. + """ + name = Path(name).name.lower() + name = name.lower() + if not name.endswith('.svg'): + name += '.svg' + + for _, stored_path in self.hash_dict.items(): + stored_path_obj = Path(stored_path) + if stored_path_obj.name.lower() == name: + full_path = self.path / stored_path + try: + with open(full_path, 'r') as svg_file: + return svg_file.read() + except IOError as e: + raise Exception(f"Error reading SVG file {full_path}: {e}") + + raise Exception(f"SVG file '{name}' not found in the dedupe pool.") diff --git a/_archive/web/templatefixer.py b/_archive/web/templatefixer.py new file mode 100644 index 0000000..287b649 --- /dev/null +++ b/_archive/web/templatefixer.py @@ -0,0 +1,308 @@ +import hashlib +import os +import shutil +from urllib.parse import urlparse + +import redis +import requests +from bs4 import BeautifulSoup + +# from typing import Dict +from colorama import Fore + +# from herotools.extensions import check_and_add_extension +from web.deduper import Deduper + +image_movie_extensions = ( + '.jpg', + '.jpeg', + '.png', + '.gif', + '.bmp', + '.webp', + '.mp3', + '.mp4', + '.avi', + '.mov', + '.wmv', + '.flv', + '.webm', +) + + +# import pudb; pudb.set_trace() + + +class HTMLTemplateConverter: + def __init__( + self, + src_dir: str, + dest_dir: str, + static_dir: str = '', + reset: bool = False, + ): + self.src_dir = src_dir + self.dest_dir = dest_dir + + if reset and os.path.exists(self.dest_dir): + print(' - reset') + shutil.rmtree(self.dest_dir) + + if static_dir == '': + static_dir = f'{dest_dir}/static' + + self.static_dir = static_dir + + os.makedirs(self.dest_dir, exist_ok=True) + os.makedirs(self.static_dir, exist_ok=True) + + # Create a relative symlink called 'static' in dest_dir pointing to self.static_dir + static_link_path = os.path.join(self.dest_dir, 'static') + if not os.path.exists(static_link_path): + os.symlink(self.static_dir, static_link_path) + + self.deduper_static = Deduper(static_dir) + + if reset: + self.deduper_static.load_assets() + + self.redis_client = redis.StrictRedis(host='localhost', port=6379, db=0) + self.cache_expiration = 3600 # 1 hour + + def download_file(self, myurl: str, remove: bool = False) -> str: + # Check if the file is already in Redis cache + key = f'web.download.{myurl}' + cached_path = self.redis_client.get(key) + if cached_path: + print(f' - download cached {myurl}') + temp_path = cached_path.decode('utf-8') + else: + print(f' - download {myurl}') + response = requests.get(myurl) + if response.status_code == 200: + if '?' in myurl: + local_filename = hashlib.md5( + myurl.encode('utf-8') + ).hexdigest() + else: + url_path = urlparse(myurl).path + base_name, extension = os.path.splitext( + os.path.basename(url_path) + ) + local_filename = base_name + extension + + # Download to temporary directory + temp_dir = os.path.join('/tmp/files') + os.makedirs(temp_dir, exist_ok=True) + temp_path = os.path.join(temp_dir, local_filename) + + with open(temp_path, 'wb') as f: + f.write(response.content) + + # Update Redis cache + self.redis_client.setex(key, self.cache_expiration, temp_path) + else: + raise Exception(f'ERROR: failed to download {myurl}') + if remove: + os.remove(temp_path) + self.redis_client.delete(key) + return temp_path + + def add_to_static(self, file_path: str, dest_dir_rel: str = '') -> str: + """ + add path to the static directory + returns the path as need to be used in the template for the file link + """ + # Check if the file path exists + if not os.path.exists(file_path): + file_path2 = f'{self.src_dir}/{file_path}' + if not os.path.exists(file_path2): + print( + f"{Fore.RED}ERROR: File '{file_path}' or '{file_path2}' does not exist.{Fore.RESET}" + ) + # raise FileNotFoundError(f"File '{file_path}' and {file_path2} does not exist.") + return f'error/{file_path2}' + else: + file_path = file_path2 + + # Calculate hash for the file to be added + file_dedupe_location = self.deduper_static.path_check(file_path) + if file_dedupe_location: + return file_dedupe_location + return self.deduper_static.add( + source_path=file_path, dest_dir_rel=dest_dir_rel + ) + + def add_file( + self, + src_file_path: str, + file_path: str, + remove: bool = False, + dest_dir_rel: str = '', + ) -> str: + print( + f' - addfile {file_path} for dest_dir_rel:{dest_dir_rel}\n from out of file: {src_file_path}' + ) + + if 'fonts.googleapis.com' in file_path: + return file_path + + if file_path.startswith('http://') or file_path.startswith('https://'): + try: + temp_path = self.download_file(file_path) + except Exception: + print( + f"{Fore.RED}ERROR DOWNLOAD: File '{file_path}'.{Fore.RESET}" + ) + return f'/error/download/{file_path}' + + # import pudb; pudb.set_trace() + # from IPython import embed;embed() + # s + + src_file_path = '' + r = self.add_file( + src_file_path, temp_path, remove=True, dest_dir_rel=dest_dir_rel + ) + return f'{r}' + + else: + if not os.path.exists(file_path): + # now we need to go relative in relation to the src_file_path + file_path2 = os.path.abspath( + os.path.join(os.path.dirname(src_file_path), file_path) + ) + if os.path.exists(file_path2): + file_path = file_path2 + else: + print( + f"{Fore.RED}ERROR: File '{file_path}' or `{file_path2}` does not exist.{Fore.RESET}" + ) + return f'/error/{file_path}' + # raise FileNotFoundError(f"File '{file_path}' or `{file_path2}` does not exist.") + + # Check if file exists inself.deduper + existing_path = self.deduper_static.path_check(file_path) + if existing_path: + return f'/static/{existing_path}' + + return self.add_to_static(file_path, dest_dir_rel=dest_dir_rel) + + def convert(self) -> None: + os.makedirs(self.dest_dir, exist_ok=True) + + for root, _, files in os.walk(self.src_dir): + for file in files: + if file.endswith('.html'): + src_file_path = os.path.abspath(os.path.join(root, file)) + rel_path = os.path.relpath(src_file_path, self.src_dir) + + dest_file_path = os.path.join(self.dest_dir, rel_path) + os.makedirs(os.path.dirname(dest_file_path), exist_ok=True) + with open( + src_file_path, 'r', encoding='utf-8' + ) as html_file: + html_content = html_file.read() + + soup = BeautifulSoup(html_content, 'html.parser') + + svg_elements = soup.find_all('svg') + + for i, svg in enumerate(svg_elements, start=1): + svg_file_path = '/tmp/my.svg' + with open( + svg_file_path, 'w', encoding='utf-8' + ) as svg_file: + svg_file.write(str(svg)) + + svg_path = self.add_file( + src_file_path, + file_path=svg_file_path, + dest_dir_rel='svg', + ) + + svg_file_path_in_out = os.path.join( + 'static', 'svg', os.path.basename(svg_path) + ) + + svg.replace_with( + f"{{% include '{svg_file_path_in_out}' %}}" + ) + + os.remove(svg_file_path) + + for link in soup.find_all('link', href=True): + href = link['href'] + base_href = href.split('?')[0] if '?' in href else href + if base_href.endswith('.css'): + new_href = self.add_file( + src_file_path, base_href, dest_dir_rel='css' + ) + link['href'] = new_href + else: + # Check if base_href is an image or movie file + if base_href.lower().endswith( + image_movie_extensions + ): + new_src = self.add_file( + src_file_path, base_href, dest_dir_rel='img' + ) + # Assuming the original attribute was 'src' for images/movies + else: + # Handle other types of files or links here if needed + if href.startswith( + 'http://' + ) or href.startswith('https://'): + new_src = self.add_file(src_file_path, href) + else: + new_src = self.add_file( + src_file_path, base_href + ) + # from IPython import embed;embed() + # s + if link.has_key('src'): + link['src'] = new_src + elif link.has_key('href'): + link['href'] = new_src + # if "pro-tailwind.min" in href: + # from IPython import embed;embed() + # w + + for script in soup.find_all('script', src=True): + src = script['src'] + src_href = src.split('?')[0] if '?' in src else src + if src_href.endswith('.js'): + new_src = self.add_file( + src_file_path, src_href, dest_dir_rel='js' + ) + script['src'] = new_src + + for img in soup.find_all('img', src=True): + src = img['src'] + new_src = self.add_file( + src_file_path, src, dest_dir_rel='img' + ) + img['src'] = new_src + + jinja_template = str(soup.prettify()) + + with open( + dest_file_path, 'w', encoding='utf-8' + ) as dest_file: + dest_file.write(jinja_template) + + +# Example usage +# +# converter = HTMLTemplateConverter("source_directory", "destination_directory") +# converter.convert_html_to_jinja() + + +def new( + src_dir: str, dest_dir: str, static_dir: str = '', reset: bool = False +) -> HTMLTemplateConverter: + f = HTMLTemplateConverter( + src_dir, dest_dir, static_dir=static_dir, reset=reset + ) + f.convert() + return f diff --git a/env.sh b/env.sh new file mode 100755 index 0000000..f98f415 --- /dev/null +++ b/env.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +# Get the directory where this script is located +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "$SCRIPT_DIR" + +echo "Setting up Herolib environment in: $SCRIPT_DIR" + +# Check if uv is installed +if ! command -v uv &> /dev/null; then + echo "❌ uv is not installed. Please install uv first:" + echo " curl -LsSf https://astral.sh/uv/install.sh | sh" + echo " or visit: https://docs.astral.sh/uv/getting-started/installation/" + exit 1 +fi + +echo "✅ uv found: $(uv --version)" + +# Create virtual environment if it doesn't exist +if [ ! -d ".venv" ]; then + echo "📦 Creating Python virtual environment..." + uv venv + echo "✅ Virtual environment created" +else + echo "✅ Virtual environment already exists" +fi + +# Activate virtual environment +echo "🔄 Activating virtual environment..." +source .venv/bin/activate + diff --git a/examples/downloader/downloader_example.py b/examples/downloader/downloader_example.py new file mode 100644 index 0000000..97ce6d9 --- /dev/null +++ b/examples/downloader/downloader_example.py @@ -0,0 +1,467 @@ +import http.server +import json +import logging +import multiprocessing +import os +import queue # For queue.Empty exception +import shutil # For removing temp dir if TemporaryDirectory context manager not used for whole scope +import socketserver +import sys +import tempfile +import time + +import requests # For checking server readiness + +# Adjust the Python path to include the parent directory (project root) +# so that 'lib.downloader' can be imported. +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) +from lib.downloader import STATE_FILE_NAME, download_site + +# Configure logging for the example script +logger = logging.getLogger(__name__) +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" +) + + +# This function needs to be at the top level for multiprocessing to find it. +def run_download_test_process(test_name, downloader_kwargs, queue): + """ + Wrapper to run download_site in a separate process and put summary in a queue. + """ + logger.info(f"--- Running Test in subprocess: {test_name} ---") + summary = None + try: + summary = download_site(**downloader_kwargs) + logger.info(f"Test {test_name} completed in subprocess.") + except Exception as e: + logger.error(f"Error in test {test_name} (subprocess): {e}", exc_info=True) + # summary will remain None or be an incomplete one if error is after its creation + finally: + queue.put({"test_name": test_name, "summary": summary}) + + +def create_temp_site_files(base_dir): + """Creates the dummy HTML files in a 'test_site' subdirectory of base_dir.""" + site_dir = os.path.join(base_dir, "test_site") + os.makedirs(os.path.join(site_dir, "sub"), exist_ok=True) + + with open(os.path.join(site_dir, "index.html"), "w") as f: + f.write( + '

Index

Page 1 Page 2 Ignored Non Existent' + ) + with open(os.path.join(site_dir, "page1.html"), "w") as f: + f.write('

Page 1

Index') + with open(os.path.join(site_dir, "sub", "page2.html"), "w") as f: + f.write( + '

Page 2

Index Back External' + ) + with open(os.path.join(site_dir, "ignored_page.html"), "w") as f: + f.write("

Ignored Page

") + logger.info(f"Created dummy site files in {site_dir}") + return site_dir + + +# Top-level target function for the HTTP server process +def _http_server_target_function(directory, host, port): + import functools + + # Use functools.partial to set the 'directory' argument for SimpleHTTPRequestHandler + # This ensures the server serves files from the specified 'directory'. + Handler = functools.partial( + http.server.SimpleHTTPRequestHandler, directory=directory + ) + + try: + with socketserver.TCPServer((host, port), Handler) as httpd: + logger.info( + f"HTTP server process (PID {os.getpid()}) started on {host}:{port}, serving {directory}" + ) + httpd.serve_forever() + except Exception as e: + logger.error( + f"HTTP server process (PID {os.getpid()}) failed: {e}", exc_info=True + ) + raise + + +def start_http_server_process(directory, host, port): + """Starts a simple HTTP server in a separate process.""" + # Removed server_ready_event = multiprocessing.Event() + + server_process = multiprocessing.Process( + target=_http_server_target_function, + args=(directory, host, port), # Removed server_ready_event from args + daemon=True, + ) + server_process.start() + logger.info( + f"HTTP server process (PID: {server_process.pid}) initiated for {directory} on {host}:{port}" + ) + # Removed event waiting logic + + return server_process + + +def find_free_port(): + """Finds an available port on the local machine.""" + with socketserver.TCPServer( + ("localhost", 0), http.server.BaseHTTPRequestHandler + ) as s: + return s.server_address[1] + + +def check_server_ready(url, retries=10, delay=0.5): + """Checks if the server is responding to requests.""" + for i in range(retries): + try: + response = requests.get(url, timeout=1) + if response.status_code == 200: + logger.info(f"Server is ready at {url}") + return True + except requests.ConnectionError: + logger.debug( + f"Server not ready yet at {url}, attempt {i + 1}/{retries}. Retrying in {delay}s..." + ) + except requests.Timeout: + logger.debug( + f"Server timed out at {url}, attempt {i + 1}/{retries}. Retrying in {delay}s..." + ) + time.sleep(delay) + logger.error(f"Server failed to start at {url} after {retries} retries.") + return False + + +def main(): + # Using TemporaryDirectory for automatic cleanup + with tempfile.TemporaryDirectory(prefix="downloader_test_") as temp_base_dir: + logger.info(f"Created temporary base directory: {temp_base_dir}") + + # 1. Create the dummy website files + site_root_path = create_temp_site_files( + temp_base_dir + ) # This is /tmp/xxxx/test_site + + # 2. Start the HTTP server + host = "localhost" + port = find_free_port() + server_process = start_http_server_process(site_root_path, host, port) + + test_url_base = f"http://{host}:{port}/" # Server serves from site_root_path, so URLs are relative to that + + # 3. Check if server is ready + # We check the index.html which is at the root of what's being served + if not check_server_ready(test_url_base + "index.html"): + logger.error("Test server failed to become ready. Aborting tests.") + if server_process.is_alive(): + server_process.terminate() + server_process.join(timeout=5) + return + + # 4. Define test parameters + # Destination for downloaded content will also be inside the temp_base_dir + download_destination_root = os.path.join(temp_base_dir, "downloaded_content") + os.makedirs(download_destination_root, exist_ok=True) + + tests_params_config = [ + ( + "1: Basic recursive download (depth 2)", + { + "start_url": test_url_base + "index.html", + "dest_dir": os.path.join(download_destination_root, "test1"), + "recursive": True, + "follow_links": True, + "depth_limit": 2, + "max_age_hours": 0, + }, + ), + ( + "2: With ignore_paths and max_age (reuse test1 dir)", + { + "start_url": test_url_base + "index.html", + "dest_dir": os.path.join( + download_destination_root, "test1" + ), # Use same dest + "recursive": True, + "follow_links": True, + "depth_limit": 2, + "ignore_paths": ["ignored_page.html"], + "max_age_hours": 1, # Should skip files from test1 if downloaded recently + }, + ), + ( + "3: Non-recursive (single page)", + { + "start_url": test_url_base + "page1.html", + "dest_dir": os.path.join(download_destination_root, "test3"), + "recursive": False, # Effectively depth_limit 0 for the spider + "max_age_hours": 0, + }, + ), + ( + "4: Depth limit 0 (only start_url)", + { + "start_url": test_url_base + "index.html", + "dest_dir": os.path.join(download_destination_root, "test4_depth0"), + "recursive": True, # 'recursive' flag enables depth control + "follow_links": True, + "depth_limit": 0, # Spider should only download index.html + "max_age_hours": 0, + }, + ), + ( + "5: Depth limit 1", + { + "start_url": test_url_base + "index.html", + "dest_dir": os.path.join(download_destination_root, "test5_depth1"), + "recursive": True, + "follow_links": True, + "depth_limit": 1, # index.html and its direct links + "max_age_hours": 0, + }, + ), + ] + + # 5. Run tests using multiprocessing + # A queue to get results back from subprocesses + results_queue = multiprocessing.Queue() + processes = [] + + for test_name, downloader_kwargs in tests_params_config: + # Ensure dest_dir exists for each test before starting + os.makedirs(downloader_kwargs["dest_dir"], exist_ok=True) + + p = multiprocessing.Process( + target=run_download_test_process, + args=(test_name, downloader_kwargs, results_queue), + ) + processes.append(p) + p.start() + + # Wait for all processes to complete + for p in processes: + p.join() + + # Collect and print results + logger.info("\n--- All Test Processes Completed. Results: ---") + all_tests_passed = True + results_collected = 0 + failed_tests_details = [] # Store details of failed tests + + # ANSI escape codes for colors + RED = "\033[91m" + GREEN = "\033[92m" + RESET = "\033[0m" + + while results_collected < len(tests_params_config): + current_test_passed = True + failure_reason = "" + try: + result = results_queue.get(timeout=10) # Timeout to avoid hanging + results_collected += 1 + test_name = result["test_name"] + summary = result["summary"] + + print(f"\nResult for Test: {test_name}") + if summary: + print(f" Summary: {json.dumps(summary, indent=2)}") + # Basic check: if errors array in summary is empty, consider it a pass for now + if summary.get("errors") and len(summary.get("errors")) > 0: + failure_reason = ( + f"Reported errors in summary: {summary.get('errors')}" + ) + logger.error(f" Test '{test_name}' {failure_reason}") + current_test_passed = False + elif ( + summary.get("successful_downloads", 0) == 0 + and not ( + test_name.startswith( + "4:" + ) # Test 4 might have 0 successful if only start_url is processed + and summary.get("total_processed_urls", 0) + > 0 # and it was processed + ) + and not test_name.startswith( + "2:" + ) # Test 2 might have 0 successful if all skipped + ): + # This condition is a bit loose. Specific checks below are more important. + # For now, we don't mark as failed here unless other checks also fail. + pass + + # Specific checks for state and re-download + if test_name.startswith("1:"): # After Test 1 + state_file = summary.get("state_file_path") + if state_file and os.path.exists(state_file): + with open(state_file, "r") as f: + state = json.load(f) + expected_success_files = [ + test_url_base + "index.html", + test_url_base + "page1.html", + test_url_base + "sub/page2.html", + ] + actual_success_count = 0 + for url, data in state.items(): + if ( + url in expected_success_files + and data.get("status") == "success" + ): + actual_success_count += 1 + if actual_success_count >= 3: + logger.info( + f" Test 1: State file check PASSED for key successful files." + ) + else: + failure_reason = f"State file check FAILED. Expected ~3 successes, got {actual_success_count}. State: {state}" + logger.error(f" Test 1: {failure_reason}") + current_test_passed = False + else: + failure_reason = ( + "State file not found or summary incomplete." + ) + logger.error(f" Test 1: {failure_reason}") + current_test_passed = False + + elif test_name.startswith( + "2:" + ): # After Test 2 (re-run on test1 dir) + state_file = summary.get("state_file_path") + if state_file and os.path.exists(state_file): + with open(state_file, "r") as f: + state = json.load(f) + skipped_count = 0 + main_files_to_check_skip = [ + test_url_base + "index.html", + test_url_base + "page1.html", + test_url_base + "sub/page2.html", + ] + for url_to_check in main_files_to_check_skip: + if ( + url_to_check in state + and state[url_to_check].get("status") + == "skipped_max_age" + ): + skipped_count += 1 + + if skipped_count >= 3: + logger.info( + f" Test 2: Re-download check (skipped_max_age) PASSED for key files." + ) + else: + failure_reason = f"Re-download check FAILED. Expected ~3 skips, got {skipped_count}. State: {state}" + logger.error(f" Test 2: {failure_reason}") + current_test_passed = False + + if ( + test_url_base + "ignored_page.html" in state + and state[test_url_base + "ignored_page.html"].get( + "status" + ) + == "success" + ): + ignore_fail_reason = "ignored_page.html was downloaded, but should have been ignored." + logger.error(f" Test 2: {ignore_fail_reason}") + if not failure_reason: + failure_reason = ignore_fail_reason + else: + failure_reason += f"; {ignore_fail_reason}" + current_test_passed = False + else: + failure_reason = ( + "State file not found or summary incomplete." + ) + logger.error(f" Test 2: {failure_reason}") + current_test_passed = False + + elif test_name.startswith("4:"): # Depth 0 + state_file = summary.get("state_file_path") + if state_file and os.path.exists(state_file): + with open(state_file, "r") as f: + state = json.load(f) + if ( + len(state) == 1 + and (test_url_base + "index.html") in state + and ( + state[test_url_base + "index.html"].get("status") + == "success" + # Allow "failed" for depth 0 if the single URL itself failed, + # as the test is about *not* crawling further. + or state[test_url_base + "index.html"].get("status") + == "failed" + ) + ): + logger.info( + f" Test 4: Depth 0 check PASSED (1 item in state)." + ) + else: + failure_reason = f"Depth 0 check FAILED. Expected 1 item processed, got {len(state)}. State: {state}" + logger.error(f" Test 4: {failure_reason}") + current_test_passed = False + else: + failure_reason = ( + "State file not found or summary incomplete." + ) + logger.error(f" Test 4: {failure_reason}") + current_test_passed = False + else: + failure_reason = ( + "Did not return a summary (likely failed hard in subprocess)." + ) + logger.error(f" Test '{test_name}' {failure_reason}") + current_test_passed = False + + except queue.Empty: # Changed from multiprocessing.queues.Empty + test_name = f"Unknown Test (result {results_collected + 1} of {len(tests_params_config)})" + failure_reason = "Queue was empty after waiting, a subprocess might have died without putting result." + logger.error(failure_reason) + current_test_passed = False + # Do not break here, try to collect other results if any. + # Instead, mark this attempt as a failure. + # We increment results_collected because we "processed" an attempt to get a result. + + if not current_test_passed: + all_tests_passed = False + failed_tests_details.append( + {"name": test_name, "reason": failure_reason} + ) + + # 6. Terminate the server + logger.info("Terminating HTTP server process...") + if server_process.is_alive(): + server_process.terminate() + server_process.join(timeout=5) # Wait for it to terminate + if server_process.is_alive(): + logger.warning( + "Server process did not terminate gracefully, attempting to kill." + ) + server_process.kill() # Force kill if terminate didn't work + server_process.join(timeout=5) + + if server_process.is_alive(): + logger.error("SERVER PROCESS COULD NOT BE STOPPED.") + else: + logger.info("HTTP server process stopped.") + + if failed_tests_details: + logger.error(f"\n--- {RED}Summary of Failed Tests{RESET} ---") + for failed_test in failed_tests_details: + logger.error(f"{RED} Test: {failed_test['name']}{RESET}") + logger.error(f"{RED} Reason: {failed_test['reason']}{RESET}") + logger.error(f"\n{RED}Some downloader tests FAILED.{RESET}") + sys.exit(1) # Exit with error code if tests failed + else: + logger.info( + f"\n{GREEN}All downloader tests PASSED (based on implemented checks).{RESET}" + ) + + # Note: TemporaryDirectory temp_base_dir is automatically cleaned up here + logger.info( + f"Temporary base directory {temp_base_dir} and its contents (should be) removed." + ) + + +if __name__ == "__main__": + # It's good practice to protect the main call for multiprocessing, + # especially on Windows, though 'spawn' (default on macOS for 3.8+) is generally safer. + multiprocessing.freeze_support() # For PyInstaller compatibility, good habit + main() diff --git a/examples/downloader/threefold_scraper.py b/examples/downloader/threefold_scraper.py new file mode 100755 index 0000000..37b1526 --- /dev/null +++ b/examples/downloader/threefold_scraper.py @@ -0,0 +1,134 @@ +#!/usr/bin/env python3 + +import argparse +import json +import os +import tempfile + +import html2text +import lmstudio as lms +import requests +import scrapy +from IPython import embed +from openai import OpenAI +from scrapy.crawler import CrawlerProcess +from scrapy.http import Request + +client = OpenAI(base_url="http://localhost:1234/v1", api_key="YOUR_ANY_API_KEY") +# api_key is usually required, but for LM Studio it might not be strictly necessary. +# client = OpenAI(base_url="http://localhost:1234/v1", api_key="YOUR_ANY_API_KEY") +# openai.api_key = "YOUR_ANY_API_KEY" # Replace with your actual key if needed + + +class ThreeFoldDocsSpider(scrapy.Spider): + name = "threefold_docs" + start_urls = ["https://threefold.info/tech/docs/"] + + def parse(self, response): + # Extract the main content + content = response.css("main").get() + + # Convert HTML to markdown using LMStudio + markdown_content = convert_html_to_markdown_with_lmstudio(content) + + # Save the content + if markdown_content: + # Remove leading whitespace and markdown code block fence if present + markdown_content = markdown_content.lstrip() + if markdown_content.startswith("```markdown"): + markdown_content = markdown_content[len("```markdown\n") :] + elif markdown_content.startswith("```"): + markdown_content = markdown_content[len("```\n") :] + + with open("threefold_docs.md", "w", encoding="utf-8") as f: + f.write(markdown_content) + + self.log(f"Saved content to threefold_docs.md") + else: + self.log(f"Could not convert HTML to Markdown for {response.url}") + + +def convert_html_to_markdown_with_lmstudio(html_content): + """Convert HTML to Markdown using LMStudio with jinaai.readerlm-v2""" + try: + # Use the OpenAI-compatible API provided by LMStudio + response = client.chat.completions.create( + model="jinaai/ReaderLM-v2", # Assuming this is the correct model ID + messages=[ + { + "role": "system", + "content": "You are a helpful assistant that converts HTML to Markdown.", + }, + { + "role": "user", + "content": f"Please convert the following HTML to Markdown:\n\n{html_content}", + }, + ], + stream=False, # Set to True if streaming is desired + ) + return response.choices[0].message.content + except Exception as e: + print(f"Error converting HTML to Markdown with LMStudio: {e}") + return None + + +def scrape_threefold_docs(): + """Run the Scrapy spider to scrape ThreeFold docs""" + process = CrawlerProcess( + { + "USER_AGENT": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", + "LOG_LEVEL": "INFO", + } + ) + + process.crawl(ThreeFoldDocsSpider) + process.start() + + return "threefold_docs.md" + + +# Note: The lmstudio Python library primarily provides an OpenAI-compatible client. +# Automated model downloading and loading are typically handled manually in the +# LM Studio application or through its local server API, not directly via this client. +# Ensure the "jinaai/jina-embeddings-v2-base-en" model (or the desired Jina embeddings v3 model if available) +# is downloaded and loaded in your LM Studio application before running this script. + + +def get_embedding_with_lmstudio(text, model_name="jinaai/jina-embeddings-v2-base-en"): + """Get embedding for text using LM Studio with the specified model.""" + # Ensure the "jinaai/jina-embeddings-v3" model is downloaded and loaded in your LM Studio application before running this script. + try: + # Use the OpenAI-compatible API for embeddings + response = client.embeddings.create(model=model_name, input=[text]) + return response.data[0].embedding + except Exception as e: + print(f"Error getting embedding with LMStudio: {e}") + print("Please ensure LM Studio is running and the specified model is loaded.") + return None + + +def main(): + model_to_use = "jinaai/jina-embeddings-v2-base-en" + + markdown_file = scrape_threefold_docs() + + embed() + + if os.path.exists(markdown_file): + with open(markdown_file, "r", encoding="utf-8") as f: + content = f.read() + + # Example usage of the embedding function + embedding = get_embedding_with_lmstudio(content, model_to_use) + if embedding: + print( + f"Successfully generated embedding (first 10 dimensions): {embedding[:10]}..." + ) + else: + print("Failed to generate embedding.") + + # Model unloading should be done manually in LM Studio. + + +if __name__ == "__main__": + main() diff --git a/install.sh b/install.sh new file mode 100755 index 0000000..8c07661 --- /dev/null +++ b/install.sh @@ -0,0 +1,43 @@ +#!/bin/bash + +# Herolib Web Server Installation Script +# This script sets up the necessary environment for the Flask web server. + +set -e # Exit on any error + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +# Script directory +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "$SCRIPT_DIR" + +echo -e "${BLUE}🔧 Setting up Herolib Web Server Environment${NC}" +echo "==================================================" + +# Check if uv is installed +if ! command -v uv &> /dev/null; then + echo -e "${YELLOW}⚠️ uv is not installed. Installing uv...${NC}" + curl -LsSf https://astral.sh/uv/install.sh | sh + source $HOME/.cargo/env + echo -e "${GREEN}✅ uv installed${NC}" +fi + +echo -e "${GREEN}✅ uv found${NC}" + +# Initialize uv project if not already done +if [ ! -f "pyproject.toml" ]; then + echo -e "${YELLOW}⚠️ No pyproject.toml found. Initializing uv project...${NC}" + uv init --no-readme --python 3.13 + echo -e "${GREEN}✅ uv project initialized${NC}" +fi + +# Sync dependencies +echo -e "${YELLOW}📦 Installing dependencies with uv...${NC}" +uv sync +echo -e "${GREEN}✅ Dependencies installed${NC}" + diff --git a/lib/clients/assemblyai/client.py b/lib/clients/assemblyai/client.py new file mode 100644 index 0000000..d5c9553 --- /dev/null +++ b/lib/clients/assemblyai/client.py @@ -0,0 +1,69 @@ +import os + +from pydub import AudioSegment +import assemblyai as aai + + +class Client: + def __init__(self): + api_key = os.getenv("ASSEMBLYAI") + + if not api_key: + raise EnvironmentError( + "Please set the ASSEMBLYAI environment variable with your AssemblyAI API key." + ) + + self.api_key = api_key + aai.settings.api_key = self.api_key + self.transcriber = aai.Transcriber() + + def convert_to_ogg_mono(self, input_path: str, output_path: str): + """Converts an audio file from .mp4 to .ogg (mono).""" + audio = AudioSegment.from_file(input_path, format="mp4") + # Convert to mono if needed by uncommenting the line below + # audio = audio.set_channels(1) + audio.export(output_path, format="ogg") + print(f"Converted to .ogg in {output_path}") + + def transcribe_audio(self, audio_path: str, output_path: str): + """Transcribes the audio file and saves the transcription to a Markdown file.""" + config = aai.TranscriptionConfig( + speaker_labels=True, + ) + + transcript = self.transcriber.transcribe(audio_path, config) + + with open(output_path, "w", encoding="utf-8") as f: + for utterance in transcript.utterances: + f.write( + f"** Speaker {utterance.speaker}:\n{utterance.text}\n-------------\n" + ) + + print(f"Transcription saved to {output_path}") + + def transcribe_audio_file(self, input_path: str, output_transcription_path: str): + """Handles the entire process from conversion to transcription and cleanup.""" + converted_audio_path = input_path.replace(".mp4", ".ogg") + + # Convert .mp4 to .ogg + self.convert_to_ogg_mono(input_path, converted_audio_path) + + # Perform the transcription + self.transcribe_audio(converted_audio_path, output_transcription_path) + + # Optionally, clean up the converted file + os.remove(converted_audio_path) + print(f"Removed temporary file {converted_audio_path}") + + +# Example usage: +if __name__ == "__main__": + # Retrieve API key from environment variable + + # Define the paths for the input audio and output transcription + input_audio_path = "/tmp/475353425.mp4" + output_transcription_path = "/tmp/transcribe_475353425.md" + + # Perform the transcription process + client = Client() + client.transcribe_audio_file(input_audio_path, output_transcription_path) diff --git a/lib/clients/readme.md b/lib/clients/readme.md new file mode 100644 index 0000000..767080f --- /dev/null +++ b/lib/clients/readme.md @@ -0,0 +1,19 @@ +# Vimeo Client + +need following functionality + +- upload video +- download +- list video's + +## some info + +- https://developer.vimeo.com/api/reference + +## remarks to use make sure you have the secrets + +```bash +hero git clone -u git@git.threefold.info:despiegk/hero_secrets.git +source git.threefold.info/projectmycelium/hero_server/myenv.sh +``` + diff --git a/lib/clients/stellar/__init__.py b/lib/clients/stellar/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/lib/clients/stellar/horizon.py b/lib/clients/stellar/horizon.py new file mode 100644 index 0000000..7e7b391 --- /dev/null +++ b/lib/clients/stellar/horizon.py @@ -0,0 +1,241 @@ +from dataclasses import dataclass, field, asdict +from typing import List, Optional +from stellar_sdk import Keypair, Server, StrKey +import json +import redis +from stellar.model import StellarAsset, StellarAccount +import os +import csv +import toml +from herotools.texttools import description_fix + + + +class HorizonServer: + def __init__(self, instance: str = "default", network: str = "main", tomlfile: str = "", owner: str = ""): + """ + Load a Stellar account's information using the Horizon server. + The Horizon server is an API that allows interaction with the Stellar network. It provides endpoints to submit transactions, check account balances, and perform other operations on the Stellar ledger. + All gets cached in redis + """ + self.redis_client = redis.Redis(host='localhost', port=6379, db=0) # Adjust as needed + self.instance = instance + if network not in ['main', 'testnet']: + raise ValueError("Invalid network value. Must be 'main' or 'testnet'.") + self.network = network + testnet = self.network == 'testnet' + self.server = Server("https://horizon-testnet.stellar.org" if testnet else "https://horizon.stellar.org") + self.tomlfile = os.path.expanduser(tomlfile) + self.owner = owner + if self.tomlfile: + self.toml_load() + + def account_exists(self, pubkey: str) -> bool: + """ + Check if an account exists in the Redis cache based on the public key. + """ + redis_key = f"stellar:{self.instance}:accounts:{pubkey}" + return self.redis_client.exists(redis_key) != None + + def account_get(self, key: str, reload: bool = False, name: str = "", description: str = "", cat: str = "") -> StellarAccount: + """ + Load a Stellar account's information. + + Args: + key (str): The private or public key of the Stellar account. + reset (bool, optional): Whether to force a refresh of the cached data. Defaults to False. + name (str, optional): Name for the account. Defaults to "". + description (str, optional): Description for the account. Defaults to "". + owner (str, optional): Owner of the account. Defaults to "". + cat (str, optional): Category of the account. Defaults to "". + + Returns: + StellarAccount: A struct containing the account's information. + """ + + if key == "" and name: + for redis_key in self.redis_client.scan_iter(f"stellar:{self.instance}:accounts:*"): + data = self.redis_client.get(redis_key) + if data: + data = json.loads(str(data)) + if data.get('name') == name and data.get('priv_key', data.get('public_key')): + key = data.get('priv_key', data.get('public_key')) + break + + if key == "": + raise ValueError("No key provided") + + # Determine if the key is a public or private key + if StrKey.is_valid_ed25519_public_key(key): + public_key = key + priv_key = "" + elif StrKey.is_valid_ed25519_secret_seed(key): + priv_key = key + keypair = Keypair.from_secret(priv_key) + public_key = keypair.public_key + else: + raise ValueError("Invalid Stellar key provided") + + redis_key = f"stellar:{self.instance}:accounts:{public_key}" + + data = self.redis_client.get(redis_key) + changed = False + if data: + try: + data = json.loads(str(data)) + except Exception as e: + print(data) + raise e + data['assets'] = [StellarAsset(**asset) for asset in data['assets']] + account = StellarAccount(**data) + if description!="" and description!=account.description: + account.description = description + changed = True + if name!="" and name!=account.name: + account.name = name + changed = True + if self.owner!="" and self.owner!=account.owner: + account.owner = self.owner + changed = True + if cat!="" and cat!=account.cat: + account.cat = cat + changed = True + else: + account = StellarAccount(public_key=public_key, description=description, name=name, priv_key=priv_key, owner=self.owner, cat=cat) + changed = True + + + if reload or account.assets == []: + changed = True + if reload: + account.assets = [] + account_data = self.server.accounts().account_id(public_key).call() + account.assets.clear() # Clear existing assets to avoid duplication + for balance in account_data['balances']: + asset_type = balance['asset_type'] + if asset_type == 'native': + account.assets.append(StellarAsset(type="XLM", balance=balance['balance'])) + else: + if 'asset_code' in balance: + account.assets.append(StellarAsset( + type=balance['asset_code'], + issuer=balance['asset_issuer'], + balance=balance['balance'] + )) + changed = True + + # Cache the result in Redis for 1 hour if there were changes + if changed: + self.account_save(account) + + return account + + def comment_add(self, pubkey: str, comment: str, ignore_non_exist: bool = False): + """ + Add a comment to a Stellar account based on the public key. + + Args: + pubkey (str): The public key of the Stellar account. + comment (str): The comment to add to the account. + """ + comment = description_fix(comment) + if not self.account_exists(pubkey): + if ignore_non_exist: + return + raise ValueError("Account does not exist in the cache") + account = self.account_get(pubkey) + account.comments.append(comment) + self.account_save(account) + + def account_save(self, account: StellarAccount): + """ + Save a Stellar account's information to the Redis cache. + + Args: + account (StellarAccount): The account to save. + """ + redis_key = f"stellar:{self.instance}:accounts:{account.public_key}" + self.redis_client.setex(redis_key, 600, json.dumps(asdict(account))) + + def reload_cache(self): + """ + Walk over all known accounts and reload their information. + """ + for redis_key in self.redis_client.scan_iter(f"stellar:{self.instance}:accounts:*"): + data = self.redis_client.get(redis_key) or "" + if data: + data = json.loads(str(data)) + public_key = data.get('public_key') + if public_key: + self.account_get(public_key, reload=True) + + + #format is PUBKEY,DESCRIPTION in text format + def load_accounts_csv(self, file_path:str): + file_path=os.path.expanduser(file_path) + if not os.path.exists(file_path): + return Exception(f"Error: File '{file_path}' does not exist.") + try: + with open(file_path, 'r', newline='') as file: + reader = csv.reader(file, delimiter=',') + for row in reader: + if row and len(row) >= 2: # Check if row is not empty and has at least 2 elements + pubkey = row[0].strip() + comment = ','.join(row[1:]).strip() + if self.account_exists(pubkey): + self.comment_add(pubkey, comment) + except IOError as e: + return Exception(f"Error reading file: {e}") + except csv.Error as e: + return Exception(f"Error parsing CSV: {e}") + except Exception as e: + return Exception(f"Error: {e}") + + def accounts_get(self) -> List[StellarAccount]: + """ + Retrieve a list of all known Stellar accounts from the Redis cache. + + Returns: + List[StellarAccount]: A list of StellarAccount objects. + """ + accounts = [] + for redis_key in self.redis_client.scan_iter(f"stellar:{self.instance}:accounts:*"): + pubkey = str(redis_key.split(':')[-1]) + accounts.append(self.account_get(key=pubkey)) + return accounts + + def toml_save(self): + """ + Save the list of all known Stellar accounts to a TOML file. + + Args: + file_path (str): The path where the list needs to be saved. + """ + if self.tomlfile == "": + raise ValueError("No TOML file path provided") + accounts = self.accounts_get() + accounts_dict = {account.public_key: asdict(account) for account in accounts} + with open(self.tomlfile, 'w') as file: + toml.dump( accounts_dict, file) + + def toml_load(self): + """ + Load the list of Stellar accounts from a TOML file and save them to the Redis cache. + + Args: + file_path (str): The path of the TOML file to load. + """ + if not os.path.exists(self.tomlfile): + return + #raise FileNotFoundError(f"Error: File '{self.tomlfile}' does not exist.") + with open(self.tomlfile, 'r') as file: + accounts_dict = toml.load(file) + for pubkey, account_data in accounts_dict.items(): + account_data['assets'] = [StellarAsset(**asset) for asset in account_data['assets']] + account = StellarAccount(**account_data) + self.account_save(account) + + + +def new(instance: str = "default",owner: str = "", network: str = "main", tomlfile: str = "") -> HorizonServer: + return HorizonServer(instance=instance, network=network, tomlfile=tomlfile,owner=owner) diff --git a/lib/clients/stellar/model.py b/lib/clients/stellar/model.py new file mode 100644 index 0000000..205ba6a --- /dev/null +++ b/lib/clients/stellar/model.py @@ -0,0 +1,70 @@ +from dataclasses import dataclass, field, asdict +from typing import List, Optional +from stellar_sdk import Keypair, Server, StrKey +import json +import redis + +@dataclass +class StellarAsset: + type: str + balance: float + issuer: str = "" + + def format_balance(self): + balance_float = float(self.balance) + formatted_balance = f"{balance_float:,.2f}" + if '.' in formatted_balance: + formatted_balance = formatted_balance.rstrip('0').rstrip('.') + return formatted_balance + + def md(self): + formatted_balance = self.format_balance() + return f"- **{self.type}**: {formatted_balance}" + +@dataclass +class StellarAccount: + owner: str + priv_key: str = "" + public_key: str = "" + assets: List[StellarAsset] = field(default_factory=list) + name: str = "" + description: str = "" + comments: List[str] = field(default_factory=list) + cat: str = "" + question: str = "" + + def md(self): + result = [ + f"# Stellar Account: {self.name or 'Unnamed'}","", + f"**Public Key**: {self.public_key}", + f"**Cat**: {self.cat}", + f"**Description**: {self.description[:60]}..." if self.description else "**Description**: None", + f"**Question**: {self.question}" if self.question else "**Question**: None", + "", + "## Assets:","" + ] + + for asset in self.assets: + result.append(asset.md()) + + if len(self.assets) == 0: + result.append("- No assets") + + result.append("") + + if self.comments: + result.append("## Comments:") + for comment in self.comments: + if '\n' in comment: + multiline_comment = "\n ".join(comment.split('\n')) + result.append(f"- {multiline_comment}") + else: + result.append(f"- {comment}") + + return "\n".join(result) + + def balance_str(self) -> str: + out=[] + for asset in self.assets: + out.append(f"{asset.type}:{float(asset.balance):,.0f}") + return " ".join(out) diff --git a/lib/clients/stellar/model_accounts.v b/lib/clients/stellar/model_accounts.v new file mode 100644 index 0000000..b76033a --- /dev/null +++ b/lib/clients/stellar/model_accounts.v @@ -0,0 +1,78 @@ +module stellar +import freeflowuniverse.crystallib.core.texttools + +pub struct DigitalAssets { +pub mut: + + +} + +pub struct Owner { +pub mut: + name string + accounts []Account +} + +@[params] +pub struct AccountGetArgs{ +pub mut: + name string + bctype BlockChainType +} + +pub fn (self DigitalAssets) account_get(args_ AccountGetArgs) !&Account { + + mut accounts := []&Account + mut args:=args_ + + args.name = texttools.name_fix(args.name) + + for account in self.accounts { + if account.name == args.name && account.bctype == args.bctype { + accounts<<&account + } + } + + if accounts.len == 0 { + return error('No account found with the given name:${args.name} and blockchain type: ${args.bctype}') + } else if count > 1 { + return error('Multiple accounts found with the given name:${args.name} and blockchain type: ${args.bctype}') + } + + return accounts[0] +} + +pub struct Account { +pub mut: + name string + secret string + pubkey string + description string + cat string + owner string + assets []Asset + bctype BlockChainType +} + +pub struct Asset { +pub mut: + amount int + assettype AssetType +} + +pub fn (self Asset) name() string { + return self.assettype.name +} + +pub struct AssetType { +pub mut: + name string + issuer string + bctype BlockChainType +} + +pub enum BlockChainType{ + stellar_pub + stellar_test + +} \ No newline at end of file diff --git a/lib/clients/stellar/testnet.py b/lib/clients/stellar/testnet.py new file mode 100644 index 0000000..2d28744 --- /dev/null +++ b/lib/clients/stellar/testnet.py @@ -0,0 +1,46 @@ +from typing import Tuple +from stellar_sdk import Server, Keypair, TransactionBuilder, Network, Asset, Signer, TransactionEnvelope +import redis +import requests +import json +import time + +def create_account_on_testnet() -> Tuple[str, str]: + + def fund(public_key: str) -> float: + # Request funds from the Stellar testnet friendbot + response = requests.get(f"https://friendbot.stellar.org?addr={public_key}") + if response.status_code != 200: + raise Exception("Failed to fund new account with friendbot") + time.sleep(1) + return balance(public_key) + + def create_account() -> Tuple[str, str]: + # Initialize Redis client + redis_client = redis.StrictRedis(host='localhost', port=6379, db=0) + + # Generate keypair + keypair = Keypair.random() + public_key = keypair.public_key + secret_key = keypair.secret + account_data = { + "public_key": public_key, + "secret_key": secret_key + } + redis_client.set("stellartest:testaccount", json.dumps(account_data)) + time.sleep(1) + return public_key, secret_key + + # Check if the account already exists in Redis + if redis_client.exists("stellartest:testaccount"): + account_data = json.loads(redis_client.get("stellartest:testaccount")) + public_key = account_data["public_key"] + secret_key = account_data["secret_key"] + r = balance(public_key) + if r < 100: + fund(public_key) + r = balance(public_key) + return public_key, secret_key + else: + create_account() + return create_account_on_testnet() diff --git a/lib/clients/telegram/__init__.py b/lib/clients/telegram/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/lib/clients/telegram/bot.py b/lib/clients/telegram/bot.py new file mode 100644 index 0000000..a57f5dc --- /dev/null +++ b/lib/clients/telegram/bot.py @@ -0,0 +1,102 @@ +import json +import redis +import telebot +import os +import logging +from termcolor import colored +from telebot.types import InlineKeyboardMarkup, InlineKeyboardButton +from telebot.formatting import escape_markdown + +from bot_audio import audio_add +from bot_text import text_add +from ai.ask import ai_assistent,AIAssistant + +class MyBot: + def __init__(self,ai_reset:bool=False): + # Initialize logging + logging.basicConfig(level=logging.INFO, format='%(message)s') + self.logger = logging.getLogger(__name__) + + # Initialize Redis connection + self.redis_client = redis.Redis(host='localhost', port=6379, db=0) + + # Initialize Telegram bot + self.telebotkey = os.getenv("TELEBOT") + if self.telebotkey: + self.logger.info(colored("TELEBOT key set", "green")) + self.bot = telebot.TeleBot(self.telebotkey) + else: + raise Exception("can't find TELEBOT in ENV") + + # Set up message handlers + self.setup_handlers() + audio_add(self) + text_add(self,reset=ai_reset) + + def setup_handlers(self): + @self.bot.message_handler(commands=['help']) + def send_welcome(message): + self.bot.reply_to(message, """\ +Hi there, I am your hero. +Just speak to me or do /start or /help +""") + + @self.bot.message_handler(commands=['start']) + def start_command(message): + chat_id = message.chat.id + + keyboard = InlineKeyboardMarkup() + subscribe_button = InlineKeyboardButton("Subscribe to Updates", callback_data='subscribe') + unsubscribe_button = InlineKeyboardButton("Unsubscribe from Updates", callback_data='unsubscribe') + keyboard.row(subscribe_button, unsubscribe_button) + + self.bot.reply_to(message, "Please choose an option:", reply_markup=keyboard) + + @self.bot.callback_query_handler(func=lambda call: True) + def callback_query(call): + chat_id = call.message.chat.id + + if call.data == 'subscribe': + self.redis_client.hset('subscribed_chats', chat_id, '1') + self.bot.answer_callback_query(call.id, "You have subscribed to updates.") + print(f"User subscribed to updates: {chat_id}") + elif call.data == 'unsubscribe': + self.redis_client.hdel('subscribed_chats', chat_id) + self.bot.answer_callback_query(call.id, "You have unsubscribed from updates.") + print(f"User unsubscribed from updates: {chat_id}") + + def send_message_to_subscribers(self, message): + subscribed_chats = self.redis_client.hgetall('subscribed_chats') + for chat_id in subscribed_chats: + try: + self.bot.send_message(chat_id.decode('utf-8'), message) + except Exception as e: + print(f"Failed to send message to chat {chat_id}: {str(e)}") + + def send_error_to_telegram(self,chat_id, error_message): + # Format the error message for Telegram + telegram_message = f"🚨 Error Occurred 🚨\n\n" + telegram_message += f"app: {escape_markdown(error_message['app'])}\n" + telegram_message += f"Function: {escape_markdown(error_message['function'])}\n" + telegram_message += f"msg: {escape_markdown(error_message['msg'])}\n" + telegram_message += f"Exception Type: {escape_markdown(error_message['exception_type'])}\n" + telegram_message += f"Exception Message: ```\n{escape_markdown(error_message['exception_message'])}\n```\n" + if 'traceback' in error_message: + telegram_message += f"Traceback:\n```\n{escape_markdown(error_message['traceback'])}\n```" + # Send the error message to the subscribed chat + self.bot.send_message(chat_id, telegram_message, parse_mode='Markdown') + + + def start(self): + print("Bot started") + # Start the bot + self.bot.polling() + + +def bot_new() -> MyBot: + return MyBot() + +# Usage +if __name__ == "__main__": + my_bot = bot_new() + my_bot.start() \ No newline at end of file diff --git a/lib/clients/telegram/bot_audio.py b/lib/clients/telegram/bot_audio.py new file mode 100644 index 0000000..d60fe03 --- /dev/null +++ b/lib/clients/telegram/bot_audio.py @@ -0,0 +1,72 @@ +import os +from pydub import AudioSegment +import whisper + +def audio_add(self): + + self.model = whisper.load_model("base") + + @self.bot.message_handler(content_types=['audio', 'voice']) #, 'document' + def handle_audio(message): + try: + chat_id = message.chat.id + file_info = None + audio_path = None + + if message.content_type == 'audio': + file_info = self.bot.get_file(message.audio.file_id) + audio_path = f"/tmp/audio/{message.audio.file_id}.mp3" + elif message.content_type == 'voice': + file_info = self.bot.get_file(message.voice.file_id) + audio_path = f"/tmp/audio/{message.voice.file_id}.ogg" + + if file_info: + downloaded_file = self.bot.download_file(file_info.file_path) + + # Ensure the directory exists + os.makedirs(os.path.dirname(audio_path), exist_ok=True) + + # Save the audio file + with open(audio_path, 'wb') as new_file: + new_file.write(downloaded_file) + + #bot.send_message(chat_id, f"Audio received and saved successfully to {audio_path}.") + print(f"Audio received and saved to {audio_path}") + + + # Convert to WAV format if necessary + wav_path = audio_path.replace('.mp3', '.wav').replace('.ogg', '.wav') + if audio_path.endswith('.mp3') or audio_path.endswith('.ogg'): + audio = AudioSegment.from_file(audio_path) + audio.export(wav_path, format='wav') + else: + wav_path = audio_path + + # Transcribe audio using Whisper + result = self.model.transcribe(wav_path) + transcription = result["text"] + + self.bot.send_message(chat_id, transcription, parse_mode='Markdown') + print(f"Audio received and saved to {audio_path}") + print(f"Transcription: {transcription}") + + text2 = self.text_process(self,transcription) + + print(f"Processed text {chat_id}: {text2}") + + if len(text2)>0: + self.bot.send_message(chat_id, text2) + + + + except Exception as e: + error_message = { + 'app': 'Telegram Bot', + 'function': 'handle_audio', + 'msg': 'Failed to process audio file', + 'exception_type': type(e).__name__, + 'exception_message': str(e) + } + self.send_error_to_telegram(chat_id, error_message) + print(f"Error processing audio file: {e}") + diff --git a/lib/clients/telegram/bot_text.py b/lib/clients/telegram/bot_text.py new file mode 100644 index 0000000..78b3a9e --- /dev/null +++ b/lib/clients/telegram/bot_text.py @@ -0,0 +1,51 @@ +import os +from ai.ask import ai_assistent + +def text_add(self,reset:bool=False): + + self.ai_assistent = ai_assistent(reset=reset) + self.text_process = text_process + + @self.bot.message_handler(content_types=['text']) + def handle_text(message): + try: + chat_id = message.chat.id + + text = message.text + + # Here you can add your logic to process the text + # For now, let's just echo the message back + # response = f"You said: {text}" + + print(f"Received text from {chat_id}: {text}") + + text2 = self.text_process(self,text) + + print(f"Processed text {chat_id}: {text2}") + + if len(text2)>0: + self.bot.send_message(chat_id, text2) + + + except Exception as e: + error_message = { + 'app': 'Telegram Bot', + 'function': 'handle_text', + 'msg': 'Failed to process text', + 'exception_type': type(e).__name__, + 'exception_message': str(e) + } + self.send_error_to_telegram(chat_id, error_message) + print(f"Error processing text file: {e}") + + +def text_process(self, txt) -> str: + if "translate" not in txt.lower(): + txt+='''\n\n + only output the heroscript, no comments + ''' + response = self.ai_assistent.ask( + category='timemgmt', + name='schedule', + question=txt) + return response \ No newline at end of file diff --git a/lib/clients/telegram/errorqueue.py b/lib/clients/telegram/errorqueue.py new file mode 100644 index 0000000..fbf545a --- /dev/null +++ b/lib/clients/telegram/errorqueue.py @@ -0,0 +1,36 @@ +import json +import redis +import telebot +import threading +from telebot.types import InlineKeyboardMarkup, InlineKeyboardButton +import time +from telebot.formatting import escape_markdown +import os +from telegram.bot import send_error_to_telegram + +# Initialize Redis connection +redis_client = redis.Redis(host='localhost', port=6379, db=0) + +#get errors from redis and send them to bot if subscription done +def process_error_queue(): + while True: + # Pop an error message from the Redis queue + error_json = redis_client.lpop('error_queue') + + if error_json: + # Deserialize the error message from JSON + error_message = json.loads(error_json) + + # Get all subscribed chat IDs from Redis + subscribed_chats = redis_client.hgetall('subscribed_chats') + + # Send the error message to all subscribed chats + for chat_id in subscribed_chats.keys(): + send_error_to_telegram(int(chat_id), error_message) + else: + # If the queue is empty, wait for a short interval before checking again + time.sleep(1) + +# Start processing the error queue +process_error_queue_thread = threading.Thread(target=process_error_queue) +process_error_queue_thread.start() \ No newline at end of file diff --git a/lib/clients/vimeo/client.py b/lib/clients/vimeo/client.py new file mode 100644 index 0000000..92fe664 --- /dev/null +++ b/lib/clients/vimeo/client.py @@ -0,0 +1,142 @@ +import os +from typing import List, Optional + +import requests +import vimeo +from model_video import VideoInfo, video_model_load, videos_model_load + + +class VimeoClient: + def __init__(self): + # Retrieve necessary credentials from environment variables + self.client_id = os.getenv("VIMEO_CLIENT_ID") + self.client_secret = os.getenv("VIMEO_SECRET") + self.access_token = os.getenv("VIMEO_ACCESSTOKEN_ID") + self.user_id = os.getenv("VIMEO_USER_ID") + + # Check if all environment variables are present + if not all([self.client_id, self.client_secret, self.access_token, self.user_id]): + raise EnvironmentError( + "Please set the VIMEO_CLIENT_ID, VIMEO_SECRET,VIMEO_USER_ID and VIMEO_ACCESSTOKEN_ID environment variables." + ) + + # Initialize the Vimeo client + self.client = vimeo.VimeoClient(token=self.access_token, key=self.client_id, secret=self.client_secret) + + def upload(self, file: str, video_title: str, description: str) -> str: + video_uri = self.client.upload(file, data={"name": video_title, "description": description}) + return video_uri + + def download(self, video_id: str, output_file: str = "myvid.mp4"): + info = self.get_video_info(video_id) + + size, link = 0, "" + for item in info.download: + if item["size"] > size: + size = item["size"] + link = item["link"] + + if link == "": + raise Exception("download link not provided for video") + + video_response = requests.get(link, stream=True) + downloaded_mb = 0 + with open(output_file, "wb") as video_file: + for chunk in video_response.iter_content(chunk_size=1024): + if chunk: + downloaded_mb += len(chunk) / 1024 + print(f"{downloaded_mb}MB Downloaded...") + video_file.write(chunk) + + print(f"Video downloaded successfully to {output_file}!") + + def get_video_info(self, video_id: str) -> VideoInfo: + """ + Get information about a video by URI. + :param uri: URI of the Vimeo video. + :return: Video information as a dictionary, or None if an error occurs. + """ + # , fields: List[str] + response = self.client.get(f"/videos/{video_id}") + if response.status_code == 200: + myvideo = video_model_load(response.content) + else: + raise Exception(f"Failed to get video details. Status code: {response.status_code}, Error: {response.text}") + return myvideo + + def get_videos(self, folder: Optional[int] = None, folders: Optional[List[int]] = None) -> List[VideoInfo]: + """ + Get information about videos from specified folder(s) or all videos if no folder is specified. + :param folder: ID of a single folder to fetch videos from. + :param folders: List of folder IDs to fetch videos from. + :return: List of VideoInfo objects. + """ + if self.user_id == 0: + raise Exception("Can't find user ID, it's not set in env variables") + + all_videos = [] + + if folder is not None: + folders = [folder] + elif folders is None: + # If no folder or folders specified, get all videos + response = self.client.get("/me/videos") + if response.status_code == 200: + return videos_model_load(response.content) + else: + raise Exception(f"Failed to get videos. Status code: {response.status_code}, Error: {response.text}") + for folder_id in folders: + response = self.client.get(f"/users/{self.user_id}/projects/{folder_id}/videos") + if response.status_code == 200: + videos = videos_model_load(response.content) + all_videos.extend(videos) + else: + print(f"Failed to get videos for folder {folder_id}. Status code: {response.status_code}, Error: {response.text}") + + return all_videos + + # def get_videos(self,folder:int,folders:List[int]) -> List[VideoInfo]: + # """ + # Get information about a video by URI. + # :param uri: URI of the Vimeo video. + # :return: Video information as a dictionary, or None if an error occurs. + # """ + # if folder>0: + # if self.user_id == 0: + # return Exception("can't find userid, its not set in env variables") + # # print(f"folderid:{folder}") + # response = self.client.get(f"/users/{self.user_id}/projects/{folder}/videos") + # # api_url = f"https://api.vimeo.com/users/{self.user_id}/projects/13139570/videos" + # # print(api_url) + # # access_token = "e65daca3b0dbc18c2fadc5cafcf81004" + # # headers = { + # # "Authorization": f"Bearer {access_token}" + # # } + # # Make the GET request to the Vimeo API + # #response = requests.get(api_url, headers=headers) + # else: + # response = self.client.get(f"/me/videos/") + + # if response.status_code == 200: + # myvideos = videos_model_load(response.content) + # else: + # raise Exception(f"Failed to get video details. Status code: {response.status_code}, Error: {response.text}") + # return myvideos + + +def new() -> VimeoClient: + return VimeoClient() + + +# Example usage: +if __name__ == "__main__": + cl = new() + v = cl.get_videos(folders=[10700101, 13139570, 12926235, 10752310, 10702046]) + for item in v: + video_id = item.uri.split("/")[-1] + print(f" - {item.name} : {video_id} ") + # from IPython import embed; embed() + # s + # vi=cl.get_video_info("475353425") + # print(json_to_yaml(vi)) + # cl.download("475353425", "/tmp/475353425.mp4") diff --git a/lib/clients/vimeo/model_video.py b/lib/clients/vimeo/model_video.py new file mode 100644 index 0000000..debd3e0 --- /dev/null +++ b/lib/clients/vimeo/model_video.py @@ -0,0 +1,177 @@ +from dataclasses import dataclass, field +from typing import List, Optional, Dict, Any +from dataclasses_json import dataclass_json +import json +import yaml + +def json_to_yaml(json_data): + # If the input is a JSON string, parse it into a Python dictionary + if isinstance(json_data, str): + json_data = json.loads(json_data) + + # Convert the dictionary to a YAML formatted string + yaml_data = yaml.dump(json_data, sort_keys=False, default_flow_style=False) + + return yaml_data + + +@dataclass_json +@dataclass +class Size: + width: int + height: int + link: str + link_with_play_button: Optional[str] = None + +@dataclass_json +@dataclass +class Pictures: + uri: str + active: bool + type: str + base_link: str + sizes: List[Size] + resource_key: str + default_picture: bool + +@dataclass_json +@dataclass +class Embed: + html: str + badges: Dict[str, Any] + interactive: bool + buttons: Dict[str, bool] + logos: Dict[str, Any] + play_button: Dict[str, Any] + title: Dict[str, Any] + end_screen: List[Any] + playbar: bool + quality_selector: Optional[str] + pip: bool + autopip: bool + volume: bool + color: str + colors: Dict[str, str] + event_schedule: bool + has_cards: bool + outro_type: str + show_timezone: bool + cards: List[Any] + airplay: bool + audio_tracks: bool + chapters: bool + chromecast: bool + closed_captions: bool + transcript: bool + ask_ai: bool + uri: Optional[str] + email_capture_form: Optional[str] + speed: bool + +@dataclass_json +@dataclass +class Uploader: + pictures: Pictures + +@dataclass_json +@dataclass +class User: + uri: str + name: str + link: str + capabilities: Dict[str, bool] + location: str + gender: str + bio: str + short_bio: str + created_time: str + pictures: Pictures + websites: List[Dict[str, Optional[str]]] + #metadata: Dict[str, Any] + location_details: Dict[str, Optional[Any]] + skills: List[Any] + available_for_hire: bool + can_work_remotely: bool + preferences: Dict[str, Any] + content_filter: List[str] + upload_quota: Dict[str, Any] + resource_key: str + account: str + +@dataclass_json +@dataclass +class VideoInfo: + uri: str + name: str + description: Optional[str] + type: str + link: str + player_embed_url: str + duration: int + width: int + height: int + #embed: Embed + created_time: str + modified_time: str + release_time: str + content_rating: List[str] + content_rating_class: str + rating_mod_locked: bool + license: Optional[str] + privacy: Dict[str, Any] + pictures: Pictures + tags: List[Any] + stats: Dict[str, int] + categories: List[Any] + uploader: Uploader + #metadata: Dict[str, Any] + manage_link: str + #user: Optional[User] + last_user_action_event_date: Optional[str] + parent_folder: Optional[Dict[str, Any]] + review_page: Optional[Dict[str, Any]] + files: Optional[List[Dict[str, Any]]] + download: Optional[List[Dict[str, Any]]] + app: Optional[Dict[str, str]] + play: Optional[Dict[str, Any]] + status: str + resource_key: str + upload: Optional[Dict[str, Optional[str]]] + transcode: Dict[str, str] + is_playable: bool + has_audio: bool + + +def video_model_load(json_data:str,dojsonload:bool=True) -> VideoInfo: + + if dojsonload: + json_dict = json.loads(json_data) + else: + json_dict = json_data + + json_dict.pop('metadata', {}) + json_dict.pop('embed', {}) + json_dict.pop('user', {}) + json_dict.pop('websites', {}) + # if 'user' in json_dict: + # json_dict['user'].pop('metadata', None) + # if 'websites' in json_dict: + # json_dict['websites'].pop('metadata', None) + + + json_data_cleaned = json.dumps(json_dict) + + video_object = VideoInfo.from_json(json_data_cleaned) + + return video_object + + +def videos_model_load(json_data:str) -> List[VideoInfo]: + json_list = json.loads(json_data) + json_list2= list() + + for item in json_list["data"]: + d=video_model_load(item,dojsonload=False) + json_list2.append(d) + + return json_list2 \ No newline at end of file diff --git a/lib/clients/whisper/__init__.py b/lib/clients/whisper/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/lib/clients/whisper/convert.py b/lib/clients/whisper/convert.py new file mode 100644 index 0000000..d7c797d --- /dev/null +++ b/lib/clients/whisper/convert.py @@ -0,0 +1,107 @@ +import os +from pydub import AudioSegment +import whisper +import moviepy.editor as mp +import nltk +from nltk.tokenize import sent_tokenize, word_tokenize + +# Download necessary NLTK data +nltk.download('punkt', quiet=True) + +class Convertor: + def __init__(self, max_chars_per_part=4000,context:str = "main"): + self.max_chars_per_part = max_chars_per_part + self.context = context + + @classmethod + def new(cls, max_chars_per_part=4000): + return cls(max_chars_per_part) + + def process(self, path: str): + if path.lower().endswith(('.mp4', '.avi', '.mov')): # Video files + return self.process_video(path) + elif path.lower().endswith(('.mp3', '.wav', '.ogg')): # Audio files + return self.process_audio(path) + else: + raise ValueError("Unsupported file format") + + def process_video(self, video_path: str): + # Extract audio from video + video = mp.VideoFileClip(video_path) + audio_path = video_path.rsplit('.', 1)[0] + '.wav' + video.audio.write_audiofile(audio_path) + video.close() + return audio_path + + def process_audio(self, audio_path: str): + # Convert to WAV format if necessary + wav_path = audio_path.rsplit('.', 1)[0] + '.wav' + if not audio_path.lower().endswith('.wav'): + audio = AudioSegment.from_file(audio_path) + audio.export(wav_path, format='wav') + else: + wav_path = audio_path + + def split_text(self, text): + parts = [] + current_part = "" + paragraphs = text.split('\n\n') + + for paragraph in paragraphs: + sentences = sent_tokenize(paragraph) + for sentence in sentences: + if len(current_part) + len(sentence) < self.max_chars_per_part: + current_part += sentence + ' ' + else: + if current_part: + parts.append(current_part.strip()) + current_part = sentence + ' ' + + # Add a paragraph break if it doesn't exceed the limit + if len(current_part) + 2 < self.max_chars_per_part: + current_part += '\n\n' + else: + parts.append(current_part.strip()) + current_part = '\n\n' + + if current_part: + parts.append(current_part.strip()) + + return parts + + def find_natural_pause(self, text): + words = word_tokenize(text) + total_words = len(words) + mid_point = total_words // 2 + + # Look for punctuation near the middle + for i in range(mid_point, total_words): + if words[i] in '.!?': + return ' '.join(words[:i+1]), ' '.join(words[i+1:]) + + # If no punctuation found, split at the nearest space to the middle + return ' '.join(words[:mid_point]), ' '.join(words[mid_point:]) + + def write_to_file(self, parts, output_path): + with open(output_path, 'w', encoding='utf-8') as f: + for i, part in enumerate(parts, 1): + f.write(f"Part {i}:\n\n") + f.write(part) + f.write("\n\n") + if i < len(parts): + f.write("-" * 50 + "\n\n") + + +# Usage example: +if __name__ == "__main__": + processor = Convertor.new() + item = "/Users/despiegk1/Documents/Zoom/2024-07-16 16.42.50 Kristof De Spiegeleer's Personal Meeting Room/video1720369800.mp4" + transcription_parts = processor.process(item) + + processor.write_to_file(transcription_parts, output_file) + + print(f"Transcription split into {len(transcription_parts)} parts:") + for i, part in enumerate(transcription_parts, 1): + print(f"Part {i}:") + print(part) + print("-" * 50) \ No newline at end of file diff --git a/lib/clients/whisper/whisper.py b/lib/clients/whisper/whisper.py new file mode 100644 index 0000000..38ffeff --- /dev/null +++ b/lib/clients/whisper/whisper.py @@ -0,0 +1,118 @@ +import os +from pydub import AudioSegment +import whisper +import moviepy.editor as mp +import nltk +from nltk.tokenize import sent_tokenize, word_tokenize + +# Download necessary NLTK data +nltk.download('punkt', quiet=True) + +class MediaProcessor: + def __init__(self, max_chars_per_part=4000): + self.model = whisper.load_model("base.en") + #self.model = whisper.load_model("medium.en") + self.max_chars_per_part = max_chars_per_part + + @classmethod + def new(cls, max_chars_per_part=4000): + return cls(max_chars_per_part) + + def process(self, path: str): + if path.lower().endswith(('.mp4', '.avi', '.mov')): # Video files + return self.process_video(path) + elif path.lower().endswith(('.mp3', '.wav', '.ogg')): # Audio files + return self.process_audio(path) + else: + raise ValueError("Unsupported file format") + + def process_video(self, video_path: str): + # Extract audio from video + video = mp.VideoFileClip(video_path) + audio_path = video_path.rsplit('.', 1)[0] + '.wav' + video.audio.write_audiofile(audio_path) + video.close() + + # Now process the extracted audio + return self.process_audio(audio_path) + + def process_audio(self, audio_path: str): + # Convert to WAV format if necessary + wav_path = audio_path.rsplit('.', 1)[0] + '.wav' + if not audio_path.lower().endswith('.wav'): + audio = AudioSegment.from_file(audio_path) + audio.export(wav_path, format='wav') + else: + wav_path = audio_path + + # Transcribe audio using Whisper + result = self.model.transcribe(wav_path) + transcription = result["text"] + + # Split the transcription into parts + return self.split_text(transcription) + + def split_text(self, text): + parts = [] + current_part = "" + paragraphs = text.split('\n\n') + + for paragraph in paragraphs: + sentences = sent_tokenize(paragraph) + for sentence in sentences: + if len(current_part) + len(sentence) < self.max_chars_per_part: + current_part += sentence + ' ' + else: + if current_part: + parts.append(current_part.strip()) + current_part = sentence + ' ' + + # Add a paragraph break if it doesn't exceed the limit + if len(current_part) + 2 < self.max_chars_per_part: + current_part += '\n\n' + else: + parts.append(current_part.strip()) + current_part = '\n\n' + + if current_part: + parts.append(current_part.strip()) + + return parts + + def find_natural_pause(self, text): + words = word_tokenize(text) + total_words = len(words) + mid_point = total_words // 2 + + # Look for punctuation near the middle + for i in range(mid_point, total_words): + if words[i] in '.!?': + return ' '.join(words[:i+1]), ' '.join(words[i+1:]) + + # If no punctuation found, split at the nearest space to the middle + return ' '.join(words[:mid_point]), ' '.join(words[mid_point:]) + + def write_to_file(self, parts, output_path): + with open(output_path, 'w', encoding='utf-8') as f: + for i, part in enumerate(parts, 1): + f.write(f"Part {i}:\n\n") + f.write(part) + f.write("\n\n") + if i < len(parts): + f.write("-" * 50 + "\n\n") + + +# Usage example: +if __name__ == "__main__": + processor = MediaProcessor.new(max_chars_per_part=10000) + output_file = "/Users/despiegk1/Documents/transcription3.md" + item = "/Users/despiegk1/Documents/Zoom/2024-07-16 16.42.50 Kristof De Spiegeleer's Personal Meeting Room/video1720369800.mp4" + transcription_parts = processor.process(item) + + processor.write_to_file(transcription_parts, output_file) + + print(f"Transcription split into {len(transcription_parts)} parts:") + for i, part in enumerate(transcription_parts, 1): + print(f"Part {i}:") + print(part) + print("-" * 50) \ No newline at end of file diff --git a/lib/clients/wireless/wigle_net.py b/lib/clients/wireless/wigle_net.py new file mode 100644 index 0000000..3d21376 --- /dev/null +++ b/lib/clients/wireless/wigle_net.py @@ -0,0 +1,313 @@ +import json +import os +import time +from dataclasses import dataclass +from datetime import datetime, timedelta +from enum import Enum +from typing import Any, Dict, List, Optional + +import redis +import requests + +API_URL = "https://api.wigle.net/api/v2/network/search" +REDIS_CACHE_EXPIRY = timedelta(hours=1) +API_RATE_LIMIT = 30 # seconds between requests + +# Initialize Redis connection +redis_client = redis.Redis(host="localhost", port=6379, db=0, decode_responses=True) + +# Track last API request time (initialized to allow immediate first request) +_last_request_time = time.time() - API_RATE_LIMIT + + +class WigleError(Exception): + """Custom exception for Wigle-related errors""" + + pass + + +class NetworkType(str, Enum): + """Network types supported by Wigle API""" + + WIFI = "WIFI" + BT = "BT" + CELL = "CELL" + + +class Encryption(str, Enum): + """WiFi encryption types""" + + NONE = "None" + WEP = "WEP" + WPA = "WPA" + WPA2 = "WPA2" + WPA3 = "WPA3" + UNKNOWN = "unknown" + + +@dataclass +class Location: + """Represents a wireless network location with all available Wigle API fields""" + + ssid: str + latitude: float + longitude: float + last_update: Optional[datetime] + encryption: Optional[str] = None + network_type: Optional[str] = None + channel: Optional[int] = None + frequency: Optional[float] = None + qos: Optional[int] = None + transid: Optional[str] = None + firsttime: Optional[datetime] = None + lasttime: Optional[datetime] = None + country_code: Optional[str] = None + city: Optional[str] = None + region: Optional[str] = None + house_number: Optional[str] = None + road: Optional[str] = None + address: Optional[str] = None + + +def get_wigle_auth() -> str: + """Get Wigle authentication token from environment variable""" + wigle_auth = os.getenv("WIGLE") + if not wigle_auth: + raise WigleError("WIGLE environment variable not set. Format should be: 'AIDxxx:yyy'") + return wigle_auth + + +def enforce_rate_limit(): + """Enforce API rate limit by sleeping if needed, showing countdown""" + global _last_request_time + current_time = time.time() + time_since_last_request = current_time - _last_request_time + + if time_since_last_request < API_RATE_LIMIT: + sleep_time = API_RATE_LIMIT - time_since_last_request + print(f"\nRate limit: waiting {sleep_time:.0f} seconds", end="", flush=True) + + # Show countdown + for remaining in range(int(sleep_time), 0, -1): + time.sleep(1) + print(f"\rRate limit: waiting {remaining:2d} seconds", end="", flush=True) + + print("\rRate limit: continuing... ") # Clear the line + + _last_request_time = time.time() + + +def search_networks( + *, + # Location filters + latitude_north: Optional[float] = None, + latitude_south: Optional[float] = None, + longitude_east: Optional[float] = None, + longitude_west: Optional[float] = None, + # Network filters + ssid: Optional[str] = None, + ssidlike: Optional[str] = None, + network_type: Optional[NetworkType] = None, + encryption: Optional[Encryption] = None, + # Time filters + on_since: Optional[datetime] = None, + last_update: Optional[datetime] = None, + # Result control + results_per_page: int = 100, + search_after: Optional[str] = None, + # Other filters + freenet: Optional[bool] = None, + paynet: Optional[bool] = None, + show_query: bool = False, +) -> Dict[str, Any]: + """ + Search for networks using the Wigle API with full parameter support and Redis caching. + Rate limited to one request per minute. + + Args: + latitude_north: Northern boundary of search box + latitude_south: Southern boundary of search box + longitude_east: Eastern boundary of search box + longitude_west: Western boundary of search box + ssid: Exact SSID match + ssidlike: SSID wildcard match + network_type: Filter by network type (WIFI/BT/CELL) + encryption: Filter by encryption type + on_since: Only show networks seen on or after date + last_update: Only show networks updated since date + results_per_page: Number of results per page (max 100) + search_after: Token for getting next batch of results + freenet: Show only free networks + paynet: Show only pay networks + show_query: Return query bounds without results + + Returns: + Dictionary containing search results and metadata including searchAfter token + + Raises: + WigleError: If the WIGLE environment variable is not set or API request fails + """ + # https://api.wigle.net/api/v2/network/search?onlymine=false&encryption=None&freenet=false&paynet=false + try: + # Build cache key from all parameters + params = locals() + cache_key = f"wigle:search:{json.dumps(params, default=str, sort_keys=True)}" + + cached_result = redis_client.get(cache_key) + if cached_result: + return json.loads(cached_result) + + # Enforce rate limit before making request + enforce_rate_limit() + + # Build API parameters + api_params = { + "onlymine": "false", + "resultsPerPage": results_per_page, + } + + # Add optional parameters if provided + if latitude_north is not None: + api_params["latrange1"] = latitude_south + api_params["latrange2"] = latitude_north + api_params["longrange1"] = longitude_west + api_params["longrange2"] = longitude_east + + if ssid: + api_params["ssid"] = ssid + if ssidlike: + api_params["ssidlike"] = ssidlike + if network_type: + api_params["netid"] = network_type.value + if encryption: + api_params["encryption"] = encryption.value + else: + api_params["encryption"] = "None" + if on_since: + api_params["onSince"] = on_since.strftime("%Y%m%d") + if last_update: + api_params["lastupdt"] = last_update.strftime("%Y%m%d") + if freenet is not None: + api_params["freenet"] = str(freenet).lower() + if paynet is not None: + api_params["paynet"] = str(paynet).lower() + if search_after: + api_params["searchAfter"] = search_after + if show_query: + api_params["showQuery"] = str(show_query).lower() + + # Make API request + wigle_auth = get_wigle_auth() + headers = {"Authorization": f"Basic {wigle_auth}"} + response = requests.get(API_URL, params=api_params, headers=headers) + response.raise_for_status() + result = response.json() + + print(result) + + # Cache the result + redis_client.setex(cache_key, int(REDIS_CACHE_EXPIRY.total_seconds()), json.dumps(result)) + + return result + + except requests.exceptions.RequestException as e: + raise WigleError(f"API request failed: {str(e)}") + + +def parse_network_to_location(network: Dict[str, Any]) -> Location: + """Convert a network result from Wigle API to a Location object""" + # Parse dates if present + last_update = None + firsttime = None + lasttime = None + + if network.get("lastupdt"): + try: + last_update = datetime.strptime(network["lastupdt"], "%Y-%m-%d %H:%M:%S") + except ValueError: + pass + + if network.get("firsttime"): + try: + firsttime = datetime.strptime(network["firsttime"], "%Y-%m-%d %H:%M:%S") + except ValueError: + pass + + if network.get("lasttime"): + try: + lasttime = datetime.strptime(network["lasttime"], "%Y-%m-%d %H:%M:%S") + except ValueError: + pass + + return Location( + ssid=network["ssid"], + latitude=float(network["trilat"]), + longitude=float(network["trilong"]), + last_update=last_update, + encryption=network.get("encryption"), + network_type=network.get("type"), + channel=network.get("channel"), + frequency=network.get("frequency"), + qos=network.get("qos"), + transid=network.get("transid"), + firsttime=firsttime, + lasttime=lasttime, + country_code=network.get("country"), + city=network.get("city"), + region=network.get("region"), + house_number=network.get("housenumber"), + road=network.get("road"), + address=network.get("address"), + ) + + +def get_all() -> List[Location]: + """Search for OpenRoaming networks and return list of locations. + Rate limited to one request per minute, including pagination requests. + + Returns: + List[Location]: List of found network locations + + Raises: + WigleError: If the WIGLE environment variable is not set or API request fails + """ + ssid_names = ["Adentro OpenRoaming", "OpenRoaming", "Passpoint", "PasspointAruba", "Cellular Wi-Fi Passthrough", "WBA_OpenRoaming"] + locations: List[Location] = [] + + for name in ssid_names: + try: + search_after = None + while True: + results = search_networks( + ssid=name, encryption=Encryption.NONE, network_type=NetworkType.WIFI, results_per_page=100, search_after=search_after + ) + + if not results or not results.get("results"): + break + + for network in results["results"]: + locations.append(parse_network_to_location(network)) + + # Get searchAfter token for next batch + search_after = results.get("searchAfter") + if not search_after: + break + + except WigleError as e: + raise WigleError(f"Error searching for {name}: {str(e)}") + + print(f"Found {len(locations)} OpenRoaming network locations") + return locations + + +if __name__ == "__main__": + locations = get_all() + for loc in locations: + print(f"SSID: {loc.ssid}") + print(f"Location: ({loc.latitude}, {loc.longitude})") + print(f"Network Type: {loc.network_type or 'N/A'}") + print(f"Encryption: {loc.encryption or 'N/A'}") + print(f"Last Update: {loc.last_update or 'N/A'}") + if loc.address: + print(f"Address: {loc.address}") + print("-" * 50) diff --git a/lib/core/heroscript/examples/heroscript_example.py b/lib/core/heroscript/examples/heroscript_example.py new file mode 100644 index 0000000..03d05ef --- /dev/null +++ b/lib/core/heroscript/examples/heroscript_example.py @@ -0,0 +1,38 @@ +from pydantic import BaseModel, Field +from typing import Dict, Any, Type, TypeVar +from heroscript.heroscript import * + + +class User(BaseModel, HeroScriptMixin): + oid: str = Field() + name: str = Field(min_length=2, description="Chosen name by user", example="myname") + city: str = Field() + age: int = Field() + description: str = Field() + + + +# Example usage +u1 = User(oid="abc123", name="John", age=30, city="New York", + description=""" + this is a multiline + + we need to remove the + this will stay 4 chars in + + end + """) + +myheroscript = u1.heroscript() +print(myheroscript) + +u2 = User.from_heroscript(heroscript=myheroscript) +myprint(u2) + +# p1 = Product(id=1, name="Phone", price=999.99, description="A smart phone") + +# product_heroscript = p1.heroscript() +# print(product_heroscript) + +# p2 = Product.from_heroscript(product_heroscript) +# print(p2) \ No newline at end of file diff --git a/lib/core/heroscript/examples/heroscript_example2.py b/lib/core/heroscript/examples/heroscript_example2.py new file mode 100644 index 0000000..d1a2f24 --- /dev/null +++ b/lib/core/heroscript/examples/heroscript_example2.py @@ -0,0 +1,78 @@ +from pydantic import BaseModel, Field +from typing import Dict, Any, Type, TypeVar, List +from heroscript.heroscript import * + +class Comment(BaseModel): + description: str = Field(default="") + +class HeroBase(BaseModel, HeroScriptMixin): + oid: str = Field(default="",metadata={"unique": True}) + name: str = Field(min_length=2, description="Chosen name by user", example="myname",metadata={"unique": True}) + comments: List[Comment] = Field(..., description="Comment which can be attached to obj") + +class User(HeroBase): + city: str = Field(metadata={"index": True}) + age: int = Field(metadata={"index": True}) + description: str = Field(default="") + +class Product(BaseModel, HeroScriptMixin): + id: int = Field(default="",metadata={"unique": True}) + name: str = Field(metadata={"unique": True}) + price: float = Field() + description: str = Field() + + +myheroscript=""" + +```hero +!!user.define + oid:abc123 + name:John + description:' + this is a multiline + + we need to remove the + this will stay 4 chars in + + end + ' + age:30 + city:'New York' + +!!product.define + id:33 + name:aproduct + description:' + this is a multiline + + we need to remove the + this will stay 4 chars in + + end + ' + price:10.0 + +``` + +""" + +# hs=HeroScripts(class_types={"user":User,"product":Product},content=myheroscript) +mypath="~/code/git.threefold.info/tfgrid/hero_research/hero/osis/heroscript/example" +hs=HeroScripts(class_types={"user":User,"product":Product},path=mypath) + +objs=hs.get_objects() + +for o in objs: + myprint(o) + +for item in hs.heroscripts: + print(item) + +query = "john*" +results = hs.search(User, query) + +# Print the search results +for r in results: + # print(f"User: {r["path"]}") + print(r) + \ No newline at end of file diff --git a/lib/core/heroscript/examples/wiki/done.json b/lib/core/heroscript/examples/wiki/done.json new file mode 100644 index 0000000..4d3ff34 --- /dev/null +++ b/lib/core/heroscript/examples/wiki/done.json @@ -0,0 +1 @@ +{"/Users/despiegk/code/git.threefold.info/tfgrid/hero_research/hero/osis/heroscript/example/testFile.md": "f6e8b6a32349c262cb9afbea771c5add", "/Users/despiegk/code/git.threefold.info/tfgrid/hero_research/hero/osis/heroscript/example/sub/test file 2.md": "0ecc29046b6ef743481358e4c5630a6d"} \ No newline at end of file diff --git a/lib/core/heroscript/examples/wiki/sub/test file 2.md b/lib/core/heroscript/examples/wiki/sub/test file 2.md new file mode 100644 index 0000000..4018a56 --- /dev/null +++ b/lib/core/heroscript/examples/wiki/sub/test file 2.md @@ -0,0 +1,15 @@ +# header + +!!product.define + id:33 + name:aproduct + description:' + this is a multiline + + we need to remove the + this will stay 4 chars in + + end + ' + price:10.0 +something else \ No newline at end of file diff --git a/lib/core/heroscript/examples/wiki/testFile.md b/lib/core/heroscript/examples/wiki/testFile.md new file mode 100644 index 0000000..aa9a3c4 --- /dev/null +++ b/lib/core/heroscript/examples/wiki/testFile.md @@ -0,0 +1,22 @@ + +!!user.define + oid:abc123 + name:John + description:' + this is a multiline + + we need to remove the + this will stay 4 chars in + + end + ' + age:30 + city:'New York' + +```heroscript +!!user.define + oid:4nd + name:John2 + age:40 + city:bxl +``` \ No newline at end of file diff --git a/lib/core/heroscript/heroaction.py b/lib/core/heroscript/heroaction.py new file mode 100644 index 0000000..ec98439 --- /dev/null +++ b/lib/core/heroscript/heroaction.py @@ -0,0 +1,207 @@ + +from herotools.texttools import dedent +from typing import List, Dict, Tuple +import re +from heroscript.tools import action_blocks,format_multiline_text,heroscript_repr +import textwrap + +class HeroActions: + def __init__(self, path: str = "", content:str = ""): + blocks=action_blocks(path=path,content=content) + self.actions : List[HeroAction] = [] + for block in blocks: + self.actions.append(HeroAction(block)) + + def __repr__(self): + out="" + for item in self.actions: + out+=item.__repr__()+"\n" + return out + + +class HeroAction: + def __init__(self, content: str): + blocks=action_blocks(content=content) + if len(blocks)==0: + raise ValueError(f"don't find actions in {content}") + elif len(blocks)>1: + raise ValueError(f"Found more than one action in {content}") + content=blocks[0] + self.name, content = _name_paramstr(content) + self.params = Params(content) + + def __str__(self): + param_str=textwrap.indent(self.params.__str__()," ") + return f"!!{self.name}\n{param_str}" + + def __repr__(self): + #return self.__str__() + return heroscript_repr(self.__str__()) + + +class Params: + def __init__(self, content: str): + self.__params = params_parse(content) + + def __str__(self): + sorted_params = sorted(self.__params.items()) + param_str="" + for key,value in sorted_params: + if "'" in value: + param_str+=f"{key}: {value}\n" + elif "\n" in value: + v=format_multiline_text(value) + param_str+=f"{key}: {v}\n" + elif " " in value: + param_str+=f"{key}: '{value}'\n" + else: + param_str+=f"{key}: {value}\n" + return param_str + + + def get_int(self, key: str, defval: int = 99999999) -> int: + if key not in self.__params: + if defval == 99999999: + raise KeyError(f"Key '{key}' must exist in parameters") + return defval + return int(self.__params[key]) + + def get_float(self, key: str, defval: float = 99999999.0) -> float: + if key not in self.__params: + if defval == 99999999.0: + raise KeyError(f"Key '{key}' must exist in parameters") + return defval + return float(self.__params[key]) + + def get(self, key: str, defval: str = "99999999") -> str: + if key not in self.__params: + if defval == "99999999": + raise KeyError(f"Key '{key}' must exist in parameters") + return defval + return self.__params[key] + + def get_list(self, key: str, defval: List[str] = [], needtoexist: bool = True) -> List[str]: + if defval is None: + defval = [] + if key not in self.__params: + if needtoexist: + raise KeyError(f"Key '{key}' must exist in parameters") + return defval + return [item.strip().strip("'").strip() for item in self.__params[key].split(",")] + + def get_list_int(self, key: str, defval: List[int] = [], needtoexist: bool = True) -> List[int]: + if defval is None: + defval = [] + if key not in self.__params: + if needtoexist: + raise KeyError(f"Key '{key}' must exist in parameters") + return defval + return [int(item.strip()) for item in self.__params[key].split(",")] + + def get_list_float(self, key: str, defval: List[float] = [], needtoexist: bool = True) -> List[float]: + if defval is None: + defval = [] + if key not in self.__params: + if needtoexist: + raise KeyError(f"Key '{key}' must exist in parameters") + return defval + return [float(item.strip()) for item in self.__params[key].split(",")] + + def get_all(self) -> Dict[str, str]: + return self.__params + + +def _name_paramstr(heroscript: str) -> Tuple[str, str]: + if not isinstance(heroscript, str): + raise ValueError("Input must be a string") + + heroscript = dedent(heroscript) + lines = heroscript.strip().split("\n") + if not lines or "!!" not in lines[0]: + raise ValueError("The first line must contain '!!' to indicate the class name") + + try: + class_name = lines[0].split("!!")[1].lower().strip() + except IndexError: + raise ValueError("Invalid format for class name extraction") + + rest_of_text = dedent("\n".join(lines[1:])) + return class_name, rest_of_text + + +def params_parse(content: str) -> Dict[str, str]: + lines = dedent(content).strip().split("\n") + props = {} + multiline_prop = None + multiline_value : List[str] = list() + + for line in lines: + if multiline_prop: + if line.strip() == "'": + props[prop] = dedent("\n".join(multiline_value)) + multiline_prop = None + multiline_value = [] + else: + multiline_value.append(line) + else: + if ":" in line: + prop, value = line.split(":", 1) + prop = prop.strip() + value = value.strip() + if value == "'": + multiline_prop = prop + else: + if value.startswith("'") and value.endswith("'"): + value1 = value[1:-1] + if not "'" in value1: + value=value1 + props[prop] = value + return props + + + +if __name__ == "__main__": + + # Example usage + text = """ + + !!obj1.define + myname: 'mymama' + mylist: '20,200' + mylist2: 20,'a bbb' + mylist3: 20,200 + myint:2 + + !!obj2.color + mother: 'mymama' + name:'aurelie' + length:60 + description:' + multiline is supported + now for aurelie + ' + color:green + """ + + + hero_actions = HeroActions(content=text) + print(hero_actions) + + a2=hero_actions.actions[1] + + + assert a2.params.get_list(key="color")==["green"] + assert a2.params.get_list(key="mother")==["mymama"] + assert a2.params.get(key="color")=="green" + assert a2.params.get_int(key="length")==60 + assert a2.params.get_list_int(key="length")==[60] + + #now some non existing ones + assert a2.params.get_int(key="lengtha",defval=3)==3 + assert a2.params.get(key="lengtha",defval="3")=="3" + + a1=hero_actions.actions[0] + #print(a1.params.get_list(key="mylist2")) + assert a1.params.get_list(key="mylist")==["20","200"] + assert a1.params.get_list_int(key="mylist")==[20,200] + assert a1.params.get_list(key="mylist2")==["20","a bbb"] \ No newline at end of file diff --git a/lib/core/heroscript/heroscripts.py b/lib/core/heroscript/heroscripts.py new file mode 100644 index 0000000..86f8c16 --- /dev/null +++ b/lib/core/heroscript/heroscripts.py @@ -0,0 +1,129 @@ +from pydantic import BaseModel, Field +from typing import Any, Type, TypeVar +import re +import hashlib +import json +import os +from types import List,Dict + + +T = TypeVar("T", bound=BaseModel) + +class HeroScripts: + def __init__(self, class_types: dict, path:str = "", content:str = "", indexpath: str = ""): + self.class_types = class_types + self.heroscripts = List(HeroScript) + self.path = os.path.expanduser(path) + self.indexpath = os.path.expanduser(indexpath) + self.done = Dict[str,str] = {} + + # self.done_load() + + if self.path: + try: + # self.done_load() + self.load(self.path) + self.done_save() + except FileNotFoundError as e: + print(f"Directory not found: {self.path}") + print(f"Error: {str(e)}") + + self.create_indexes() + self.index_objects() + + if content: + blocks = extract_heroscript_blocks(content) + self.heroscripts.extend(HeroScript(block) for block in blocks) + + def done_load(self): + if self.path: + done_file = os.path.join(self.path, "done.json") + if os.path.exists(done_file): + with open(done_file, "r") as f: + self.done = json.load(f) + + def done_save(self): + if self.path: + done_file = os.path.join(self.path, "done.json") + with open(done_file, "w") as f: + json.dump(self.done, f) + + def load(self, path): + for root, _, files in os.walk(path): + for filename in files: + print(f" - load {path}/{filename}") + path=f"{path}/{filename}" + if filename.endswith(".md"): + filepath = os.path.join(root, filename) + with open(filepath, "r") as file: + content = file.read() + md5hash = hashlib.md5(content.encode()).hexdigest() + if filepath not in self.done or self.done[filepath] != md5hash: + blocks = self.extract_heroscript_blocks(content) + self.heroscripts.extend(HeroScript(block,path) for block in blocks) + self.done[filepath] = md5hash + + @staticmethod + + + def get_objects(self): + objects = [] + for heroscript in self.heroscripts: + if heroscript.content: + try: + class_name = heroscript.content.split("\n")[0].split("!!")[1].split(".")[0].lower() + if class_name in self.class_types: + class_type = self.class_types[class_name] + try: + obj = class_type.from_heroscript(heroscript.content) + objects.append(obj) + except Exception as e: + print(f"Error parsing HeroScript: {e}") + except (IndexError, ValueError): + print(f"Invalid HeroScript format: {heroscript.content}") + return objects + + + def create_indexes(self): + for class_type in self.class_types.values(): + schema = self.create_schema(class_type) + index_dir = os.path.join(self.indexpath, class_type.__name__.lower()) + if not os.path.exists(index_dir): + os.makedirs(index_dir) + index.create_in(index_dir, schema) + + def create_schema(self, class_type): + schema_fields = {"path": STORED()} + for field_name, field in class_type.__fields__.items(): + json_schema_extra = getattr(field, "json_schema_extra", None) + if json_schema_extra is not None: + metadata = json_schema_extra.get("metadata", {}) + if isinstance(metadata, list): + metadata = {item: True for item in metadata} + if metadata.get("unique") or metadata.get("indexed"): + if field.annotation == str : + schema_fields[field_name] = ID(stored=True, unique=metadata.get("unique", False)) + elif field.annotation == int or field.annotation == float : + schema_fields[field_name] = NUMERIC(stored=True, unique=metadata.get("unique", False)) + else: + schema_fields[field_name] = TEXT(stored=True,lowercase=True) + return Schema(**schema_fields) + + def index_objects(self): + for heroscript in self.heroscripts: + for obj in self.get_objects(): + index_dir = os.path.join(self.indexpath, type(obj).__name__.lower()) + ix = index.open_dir(index_dir) + writer = ix.writer() + writer.add_document(path=heroscript.path, **{k: str(v).lower() for k, v in obj.dict().items() if k in ix.schema.names()}) + writer.commit() + + def search(self, class_type, query): + index_dir = os.path.join(self.indexpath, class_type.__name__.lower()) + ix = index.open_dir(index_dir) + qp = QueryParser("name", schema=ix.schema) + q = qp.parse(query) + with ix.searcher() as searcher: + results = searcher.search(q) + # return results + return [result["path"] for result in results] diff --git a/lib/core/heroscript/mixin.py b/lib/core/heroscript/mixin.py new file mode 100644 index 0000000..9866f51 --- /dev/null +++ b/lib/core/heroscript/mixin.py @@ -0,0 +1,82 @@ +from pydantic import BaseModel, Field +from typing import Dict, Any, Type, TypeVar +import re +from colorama import Fore, Style +import hashlib +import json +import os +from types import List +from heroscript.heroaction import HeroAction +from heroscript.tools import format_multiline_text + +class HeroScriptMixin: + + def heroscript(self) -> HeroAction: + class_name = self.__class__.__name__.lower() + prop_order = ["id", "oid", "name", "title", "description", "content"] + + # Get all the properties of the object + props = list(self.__fields__.keys()) + + # Separate properties into those in prop_order and the rest + ordered_props = [prop for prop in prop_order if prop in props] + remaining_props = [prop for prop in props if prop not in prop_order] + + # Sort the remaining properties + sorted_remaining_props = sorted(remaining_props) + + # Combine the ordered properties and sorted remaining properties + sorted_props = ordered_props + sorted_remaining_props + + lines = [f"!!{class_name}.define"] + for prop in sorted_props: + if prop in self.__fields__: + val = getattr(self, prop) + if isinstance(val, str): + if "\n" in val: + val = format_multiline_text(text=val) + elif any(c.isspace() for c in val): + val = f"'{val}'" + lines.append(f" {prop}:{val}") + + result = "\n".join(lines) + + return HeroAction(content=result) + + @classmethod + def from_heroscript(cls, heroscript: str): + lines = heroscript.strip().split("\n") + class_name = lines[0].split("!!")[1].split(".")[0] + + props = {} + multiline_prop = None + multiline_value = List(str) + + for line in lines[1:]: + if multiline_prop: + if line.strip() == "'": + # End of multiline text + min_indent = min(len(ml) - len(ml.lstrip()) for ml in multiline_value if ml.strip()) + unindented_lines = [ml[min_indent:] for ml in multiline_value] + props[multiline_prop] = "\n".join(unindented_lines) + multiline_prop = None + multiline_value = [] + else: + multiline_value.append(line) + else: + if ":" in line: + prop, value = line.split(":", 1) + prop = prop.strip() + value = value.strip() + + if value == "'": + # Start of multiline text + multiline_prop = prop + else: + if value.startswith("'") and value.endswith("'"): + value = value[1:-1] + props[prop] = value + + return cls(**props) + + diff --git a/lib/core/heroscript/readme.md b/lib/core/heroscript/readme.md new file mode 100644 index 0000000..847088b --- /dev/null +++ b/lib/core/heroscript/readme.md @@ -0,0 +1,4 @@ +## heroscript + + +> not to be used yet \ No newline at end of file diff --git a/lib/core/heroscript/tools.py b/lib/core/heroscript/tools.py new file mode 100644 index 0000000..42a26e0 --- /dev/null +++ b/lib/core/heroscript/tools.py @@ -0,0 +1,145 @@ + +from typing import List +import os +from colorama import Fore, Style +from herotools.texttools import dedent +import textwrap + +#load the heroscripts from filesystem +def heroscript_blocks(path: str) -> List[str]: + + heroscript_blocks = list() + + for root, dirs, files in os.walk(path): + for file in files: + if file.endswith(".md"): + file_path = os.path.join(root, file) + with open(file_path, "r") as f: + content = f.read() + blocks = _extract_heroscript_blocks(content) + heroscript_blocks.extend(blocks) + + return heroscript_blocks + + +def _extract_heroscript_blocks(content: str): + content=dedent(content) + blocks = [] + lines = content.split("\n") + + in_block = False + block_lines : List[str] = list() + + for line in lines: + if line.startswith("```hero"): + in_block = True + block_lines = [] + elif line.startswith("```") and in_block: + in_block = False + block = "\n".join(block_lines) + blocks.append(block) + elif in_block: + block_lines.append(line) + return blocks + + +def action_blocks(path: str = "", content:str = "") -> List[str]: + if content!="": + return __action_blocks_get(content) + res : List[str] = list() + for hscript in heroscript_blocks(path): + for actionscript in __action_blocks_get(hscript): + res.append(actionscript) + return res + +def __action_blocks_get(content: str) -> List[str]: + content=dedent(content) + blocks = list() + lines = content.split("\n") + + block_lines : List[str] = list() + herofound=False + + for line in lines: + # print(line) + if line.startswith("!!"): + herofound=True + if block_lines: #means we found before + block = "\n".join(block_lines) + blocks.append(block) + block_lines = [] + # print("f1") + block_lines.append(line) + elif line.strip() and not line.startswith(" ") and not line.startswith("\t") and block_lines: + block = "\n".join(block_lines) + blocks.append(block) + block_lines = [] + herofound=False + elif herofound: + block_lines.append(line) + # print("append") + + if block_lines: + block = "\n".join(block_lines) + blocks.append(block) + + return blocks + +def myprint(obj): + class_name = f"{Fore.YELLOW}{obj.__class__.__name__}{Style.RESET_ALL}" + fields = [field for field in obj.__fields__ if field in obj.__dict__] + attributes = ', '.join(f"{Fore.LIGHTBLACK_EX}{field}{Style.RESET_ALL}={Fore.GREEN}'{getattr(obj, field)}'{Style.RESET_ALL}" for field in fields) + print( f"{class_name}({attributes})" ) + + +#format text to be ready to be set in heroscript +def format_multiline_text(text: str) -> str: + + text = dedent(text) + text = textwrap.indent(text, " ") + + # Join the formatted lines with newline characters and add the required indentation + formatted_text = "'\n" + text + "\n '" + + return formatted_text + + + +#representation with colors of heroscript +def heroscript_repr(content:str) ->str: + lines = content.split("\n") + formatted_lines = [] + + for line in lines: + if line.startswith("!!"): + formatted_line = f"{Fore.RED}{line}{Style.RESET_ALL}" + elif ":" in line: + prop, value = line.split(":", 1) + prop = prop.strip() + value = value.strip() + + if value.startswith("'") and value.endswith("'"): + value = f" {Fore.GREEN}{value}{Style.RESET_ALL}" + else: + value = f" {Fore.YELLOW}{value}{Style.RESET_ALL}" + + formatted_line = f" {Fore.CYAN}{prop}{Style.RESET_ALL}:{value}" + else: + formatted_line = line + + formatted_lines.append(formatted_line) + + return "\n".join(formatted_lines) + +def heroscript_print(content:str): + o=heroscript_repr(content) + print(o) + + +if __name__ == "__main__": + + t=" something\n a\n\n bbbb" + + print(dedent(t)) + + print(format_multiline_text(t)) \ No newline at end of file diff --git a/lib/crypt/box/box.py b/lib/crypt/box/box.py new file mode 100644 index 0000000..277d8b3 --- /dev/null +++ b/lib/crypt/box/box.py @@ -0,0 +1,41 @@ +from fastapi import HTTPException +from cryptography.fernet import Fernet +import redis +import base64 +import hashlib + +#TODO: KRISTOF FIX + +def box_get(): + r = redis.Redis(host='localhost', port=6379, db=0) + + key = r.get('my.secret') + + if key is None: + raise HTTPException(status_code=404, detail="can't find my.secret in redis, needs to be set: "+name+" use secret-set to register your secret.") + + + hash_digest = hashlib.sha256(key).digest() + + # Encode the hash digest to make it url-safe base64-encoded + key2 = base64.urlsafe_b64encode(hash_digest) + + try: + f = Fernet(key2) + except Exception as e: + # if str(e).find("Resource Missing")>0: + # raise HTTPException(status_code=400, detail="Could not find account with pubkey: "+account_keypair.public_key) + raise HTTPException(status_code=400, detail=str(e)) + + return f + + +def box_secret_set(secret:str): + r = redis.Redis(host='localhost', port=6379, db=0) + + # key = r.set('my.secret',secret) + r.setex('my.secret', 43200,secret) # Set the key with an expiration time of 12 hours + + box_get() + + return "OK" diff --git a/lib/crypt/box/box_api.py b/lib/crypt/box/box_api.py new file mode 100644 index 0000000..7001b41 --- /dev/null +++ b/lib/crypt/box/box_api.py @@ -0,0 +1,26 @@ +from fastapi import APIRouter, HTTPException,Response +from pydantic import BaseModel, constr, Field +from secret.box import box_secret_set,box_get + +#TODO: KRISTOF FIX + +router = APIRouter() + +##############POSITION + +class BoxSecretSetRequest(BaseModel): + secret: str = Field(..., description="a well chosen secret key, do never forget this key, you will loose your assets") + + + +@router.post("/secret",description="Set your secret for your hero, will be kept for 12 hours") +async def set_secret(request: BoxSecretSetRequest): + box_secret_set(secret=request.secret) + return Response(content="OK", media_type="text/plain") + + +@router.get("/secret",description="Check if it exists.") +async def secret_check(): + b=box_get() + return Response(content="OK", media_type="text/plain") + diff --git a/lib/downloader/downloader.py b/lib/downloader/downloader.py new file mode 100644 index 0000000..15ce01b --- /dev/null +++ b/lib/downloader/downloader.py @@ -0,0 +1,412 @@ +import json +import logging +import mimetypes # Added +import os +from datetime import datetime, timedelta +from urllib.parse import urljoin, urlparse + +import scrapy +from scrapy.crawler import CrawlerProcess +from scrapy.linkextractors import LinkExtractor +from scrapy.utils.project import get_project_settings + +# Configure logging +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" +) +logger = logging.getLogger(__name__) + +STATE_FILE_NAME = ".download_state.json" + + +class GenericDownloaderSpider(scrapy.Spider): + name = "generic_downloader" + + def __init__( + self, + start_url, + dest_dir, + allowed_domains, + ignore_paths=None, + depth_limit=0, + follow_links=True, + max_age_hours=0, + state_data=None, + *args, + **kwargs, + ): + super(GenericDownloaderSpider, self).__init__(*args, **kwargs) + self.start_urls = [start_url] + self.dest_dir = dest_dir + self.allowed_domains = allowed_domains + self.ignore_paths = ignore_paths if ignore_paths else [] + self.depth_limit = int(depth_limit) + self.follow_links = bool(follow_links) + self.max_age_hours = int(max_age_hours) + self.state_data = state_data if state_data else {} + self.link_extractor = LinkExtractor(allow_domains=self.allowed_domains) + + os.makedirs(self.dest_dir, exist_ok=True) + logger.info(f"Downloader initialized for {start_url}") + logger.info(f"Destination directory: {self.dest_dir}") + logger.info(f"Allowed domains: {self.allowed_domains}") + logger.info(f"Ignore paths: {self.ignore_paths}") + logger.info(f"Depth limit: {self.depth_limit}") + logger.info(f"Follow links: {self.follow_links}") + logger.info(f"Max age (hours): {self.max_age_hours}") + + def _should_ignore(self, url_path): + for pattern in self.ignore_paths: + if pattern in url_path: # Simple substring match for now, can be regex + return True + return False + + def _get_file_path(self, response): # Changed signature to take response + url = response.url + parsed_url = urlparse(url) + original_path = parsed_url.path # e.g. /foo/bar.html or /foo/ or / + + # Determine base_name and current_ext from original_path + if original_path.endswith("/"): + base_name = "index" + current_ext = "" + # path_for_dirs is the path part that forms the directory structure + path_for_dirs = original_path.lstrip("/") + else: + path_basename = os.path.basename(original_path) + if ( + not path_basename and original_path == "/" + ): # Root path e.g. http://example.com + base_name = "index" + current_ext = "" + else: # e.g. /file.txt or /file_no_ext or /.config + base_name, current_ext = os.path.splitext(path_basename) + if not base_name and current_ext: # Hidden file like /.bashrc + base_name = current_ext # Treat .bashrc as base_name + current_ext = "" # No further extension part + path_for_dirs = os.path.dirname(original_path.lstrip("/")) + + # Try to get extension from Content-Type + content_type = ( + response.headers.get("Content-Type", b"") + .decode("utf-8") + .split(";")[0] + .strip() + ) + mime_ext = mimetypes.guess_extension(content_type) if content_type else None + + final_ext = current_ext + if mime_ext and not current_ext: # No path extension, use MIME type's + final_ext = mime_ext + elif ( + mime_ext + and current_ext.lower() in [".htm", ".html"] + and mime_ext + and mime_ext.lower() not in [".htm", ".html"] + ): + # Path had .html/.htm, but MIME type suggests something more specific + final_ext = mime_ext + logger.debug( + f"URL {url}: Path ext {current_ext} overridden by Content-Type ext {mime_ext}." + ) + elif not final_ext and ( + content_type.startswith("text/") + or content_type + in ["application/javascript", "application/json", "application/xml"] + ): + # Fallback for common text types if no extension determined yet and no path ext + if not base_name.endswith( + (".js", ".css", ".json", ".xml", ".txt") + ): # Avoid double .html.html + final_ext = ".html" + + filename = base_name + final_ext + + # Create path components for the directory structure + components = [] + if path_for_dirs: + components.extend(comp for comp in path_for_dirs.split("/") if comp) + components.append(filename) + + # Sanitize components + sane_components = [] + for comp_idx, comp_val in enumerate(components): + # Basic sanitization: replace invalid chars, limit length, avoid '..' + # Allow '.' for filenames but not as a full component name if it's not the only char + if comp_val == "..": + continue # Skip parent dir references in path construction + + sane_comp = "".join( + c if c.isalnum() or c in ["-", "_", "."] else "_" for c in comp_val + ) + sane_comp = sane_comp[:150] # Limit component length + + if ( + not sane_comp and comp_idx == len(components) - 1 + ): # last component (filename) became empty + sane_comp = "downloaded_file" + final_ext # fallback filename + elif not sane_comp: + sane_comp = "_" # placeholder for empty dir name + + if sane_comp: # Ensure component is not empty after sanitization + sane_components.append(sane_comp) + + if not sane_components: # If all components were sanitized away or skipped + sane_components = [filename if filename else "unknown_file" + final_ext] + + file_path = os.path.join(self.dest_dir, *sane_components) + return file_path + + def parse(self, response, depth=0): + url = response.url + logger.info(f"Processing URL (depth {depth}): {url}") + + parsed_url = urlparse(url) + if self._should_ignore(parsed_url.path): + logger.info(f"Ignoring URL (matches ignore_paths): {url}") + return + + file_path = self._get_file_path(response) # Pass response object + + # Check download state and max_age + if url in self.state_data: + url_state = self.state_data[url] + last_download_time_str = url_state.get("timestamp") + # Consider previous status; only skip if it was a success or another skip + can_skip_based_on_history = url_state.get("status", "").startswith( + "success" + ) or url_state.get("status", "").startswith("skipped") + + if last_download_time_str and can_skip_based_on_history: + last_download_time = datetime.fromisoformat(last_download_time_str) + if self.max_age_hours > 0 and ( + datetime.utcnow() - last_download_time + ) < timedelta(hours=self.max_age_hours): + logger.info( + f"Skipping download for {url}, recently processed at {last_download_time_str} with status '{url_state.get('status')}'." + ) + # Update state to reflect this skip check + self.state_data[url]["status"] = "skipped_max_age" + self.state_data[url]["skipped_timestamp"] = ( + datetime.utcnow().isoformat() + ) + # Still need to check for links if recursive + # Corrected depth condition: + # Follow if self.depth_limit is 0 (infinite) OR current depth is less than a positive limit. + if self.follow_links and ( + self.depth_limit == 0 or depth < self.depth_limit + ): + for link in self.link_extractor.extract_links(response): + parsed_link_url = urlparse(link.url) + if not self._should_ignore(parsed_link_url.path): + yield response.follow(link, callback=self.parse) + else: + logger.info( + f"Ignoring extracted link (matches ignore_paths): {link.url}" + ) + return + + logger.info(f"Processing and saving {url} to {file_path}") + os.makedirs(os.path.dirname(file_path), exist_ok=True) + + try: + with open(file_path, "wb") as f: + f.write(response.body) + logger.info(f"Successfully saved {url} to {file_path}") + self.state_data[url] = { + "timestamp": datetime.utcnow().isoformat(), + "status": "success", + "path": file_path, + "size": len(response.body), + } + except Exception as e: + logger.error(f"Failed to save {url} to {file_path}: {e}") + self.state_data[url] = { + "timestamp": datetime.utcnow().isoformat(), + "status": "failed", + "error": str(e), + } + return # Do not proceed further if save failed + + # Corrected depth condition for following links: + # Follow if self.depth_limit is 0 (infinite) OR current depth is less than a positive limit. + if self.follow_links and (self.depth_limit == 0 or depth < self.depth_limit): + logger.info( + f"Following links from {url} at custom depth {depth} (for next level {depth + 1})" + ) + extracted_links = list(self.link_extractor.extract_links(response)) + if not extracted_links: + logger.info(f" No links extracted from {url} by LinkExtractor.") + else: + logger.info( + f" LinkExtractor found {len(extracted_links)} links from {url}: {[l.url for l in extracted_links]}" + ) + + for link_idx, link in enumerate(extracted_links): + logger.debug( + f" Considering link {link_idx + 1}/{len(extracted_links)}: Text='{link.text}', URL='{link.url}'" + ) + parsed_link_url = urlparse(link.url) + if self._should_ignore(parsed_link_url.path): + logger.info( + f" Ignoring extracted link (matches ignore_paths): {link.url}" + ) + else: + logger.info( + f" Yielding request for: {link.url} (to be processed at custom depth {depth + 1})" + ) + yield response.follow(link, callback=self.parse) + + def closed(self, reason): + logger.info(f"Spider closed: {reason}. Finalizing and saving state.") + state_file_path = os.path.join(self.dest_dir, STATE_FILE_NAME) + try: + # Ensure the directory for the state file exists, though dest_dir should already. + os.makedirs(os.path.dirname(state_file_path), exist_ok=True) + with open(state_file_path, "w") as f: + json.dump(self.state_data, f, indent=4) + logger.info( + f"Spider successfully saved state ({len(self.state_data)} items) to {state_file_path}" + ) + except Exception as e: + logger.error( + f"Spider failed to save state to {state_file_path}: {e}", exc_info=True + ) + + +def download_site( + start_url, + dest_dir, + recursive=True, + ignore_paths=None, + depth_limit=0, # 0 means no limit if recursive is True + follow_links=True, # This is somewhat redundant if recursive is True, but good for clarity + max_age_hours=24, # Re-download if older than 24 hours +): + """ + Downloads a website or a single page. + + :param start_url: The URL to start downloading from. + :param dest_dir: The directory to save downloaded files. + :param recursive: Whether to download recursively. + :param ignore_paths: A list of path substrings or regex patterns to ignore. + :param depth_limit: Maximum depth for recursive downloads (0 for no limit). + :param follow_links: Whether to follow links on pages (primarily for recursive). + :param max_age_hours: Max age of a file in hours. If a file was downloaded + more recently than this, it won't be re-downloaded. + 0 means always re-download. + :return: A dictionary summarizing the download process. + """ + parsed_url = urlparse(start_url) + if not parsed_url.scheme or not parsed_url.netloc: + logger.error( + f"Invalid start_url: {start_url}. Must be a full URL (e.g., http://example.com)" + ) + return None + + allowed_domains = [parsed_url.hostname] # Changed from netloc to hostname + + state_file_path = os.path.join(dest_dir, STATE_FILE_NAME) + state_data = {} + if os.path.exists(state_file_path): + try: + with open(state_file_path, "r") as f: + state_data = json.load(f) + logger.info(f"Loaded download state from {state_file_path}") + except json.JSONDecodeError: + logger.warning( + f"Could not decode JSON from state file {state_file_path}. Starting fresh." + ) + except Exception as e: + logger.error( + f"Error loading state file {state_file_path}: {e}. Starting fresh." + ) + + settings = get_project_settings() + settings.set("ROBOTSTXT_OBEY", False) # Explicitly disable robots.txt + # settings.set('LOG_LEVEL', 'DEBUG') # Optionally enable for more Scrapy internal logs + + effective_scrapy_depth = 0 # Default for non-recursive or depth_limit=0 with recursion (0 means infinite for Scrapy) + if recursive and int(depth_limit) > 0: + effective_scrapy_depth = int(depth_limit) + # If not recursive, effective_scrapy_depth remains 0. + # If recursive and depth_limit is 0, effective_scrapy_depth remains 0 (infinite). + settings.set("DEPTH_LIMIT", effective_scrapy_depth) + + logger.info(f"Scrapy DEPTH_LIMIT set to: {effective_scrapy_depth}") + # Scrapy's DEPTH_PRIORITY and SCHEDULER_DISK_QUEUE might be useful for large crawls + # For now, keeping it simple. + + process = CrawlerProcess(settings) + + # The spider needs to be instantiated with all its custom args + # Scrapy's process.crawl can take kwargs which are passed to the spider's __init__ + process.crawl( + GenericDownloaderSpider, + start_url=start_url, + dest_dir=dest_dir, + allowed_domains=allowed_domains, + ignore_paths=ignore_paths, + depth_limit=int(depth_limit) + if recursive + else 0, # Spider handles its own depth based on this + follow_links=follow_links and recursive, + max_age_hours=int(max_age_hours), + state_data=state_data, + ) + + logger.info(f"Starting download process for {start_url}...") + process.start() # This will block until the crawl is finished + + # The spider's closed() method is now responsible for writing the final state. + # Load this definitive state to build the summary. + final_state_data_for_summary = {} + if os.path.exists(state_file_path): + try: + with open(state_file_path, "r") as f: + final_state_data_for_summary = json.load(f) + logger.info( + f"Loaded final state ({len(final_state_data_for_summary)} items) from {state_file_path} for summary construction." + ) + except json.JSONDecodeError as e: + logger.error( + f"Error decoding JSON from final state file {state_file_path} for summary: {e}. Summary will be based on empty or incomplete state." + ) + except Exception as e: + logger.error( + f"Error loading final state from {state_file_path} for summary: {e}. Summary will be based on empty or incomplete state." + ) + else: + logger.warning( + f"State file {state_file_path} not found after spider closed. Summary will be based on empty state." + ) + + summary = { + "start_url": start_url, + "dest_dir": dest_dir, + "total_processed_urls": len(final_state_data_for_summary), + "successful_downloads": 0, + "failed_downloads": 0, + "skipped_max_age": 0, + "total_bytes_downloaded": 0, + "state_file_path": state_file_path, + "errors": [], + } + + # Populate summary from the final_state_data_for_summary loaded from the file + for url_key, data_val in final_state_data_for_summary.items(): + status = data_val.get("status") + if status == "success": + summary["successful_downloads"] += 1 + summary["total_bytes_downloaded"] += data_val.get("size", 0) + elif status == "failed": + summary["failed_downloads"] += 1 + if "error" in data_val: + summary["errors"].append(f"URL: {url_key}, Error: {data_val['error']}") + elif status == "skipped_max_age": + summary["skipped_max_age"] += 1 + # Any errors during state file loading for summary should also be noted if critical + # For now, the logs capture it. If final_state_data_for_summary is empty due to load error, summary will reflect that. + + logger.info(f"Download process finished. Summary: {json.dumps(summary, indent=2)}") + return summary diff --git a/lib/tools/__init__.py b/lib/tools/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/lib/tools/extensions.py b/lib/tools/extensions.py new file mode 100644 index 0000000..bb8f882 --- /dev/null +++ b/lib/tools/extensions.py @@ -0,0 +1,31 @@ +import mimetypes +import os + +def check_and_add_extension(file_path: str) -> str: + # Only check if there's no extension + if not os.path.splitext(file_path)[1]: + # Read the file content + with open(file_path, 'rb') as f: + content = f.read(2048) # Read the first 2048 bytes for detection + + # Detect content type + content_type = detect_content_type(content) + extension = mimetypes.guess_extension(content_type) + + if extension: + new_file_path = file_path + extension + os.rename(file_path, new_file_path) + return new_file_path + + return file_path + +def detect_content_type(content: bytes) -> str: + # Simple content-based detection + if content.startswith(b'\xff\xd8'): + return 'image/jpeg' + if content.startswith(b'\x89PNG'): + return 'image/png' + if content.startswith(b'GIF'): + return 'image/gif' + # Add more checks as needed + return 'application/octet-stream' \ No newline at end of file diff --git a/lib/tools/gitscanner.py b/lib/tools/gitscanner.py new file mode 100644 index 0000000..5d6cbe9 --- /dev/null +++ b/lib/tools/gitscanner.py @@ -0,0 +1,270 @@ +import sys +import os + +# Add the parent directory of herotools to the Python module search path +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) + +import time +import json +import subprocess +from typing import Optional,List +import redis +from herotools.logger import logger +from herotools.texttools import name_fix +from enum import Enum, auto +from dataclasses import dataclass +import git + + +# Initialize Redis client +redis_client = redis.StrictRedis(host='localhost', port=6379, db=0) + +# Define the ChangeType Enum +class ChangeType(Enum): + DEL = 'del' + MOD = 'mod' + NEW = 'new' + +@dataclass +class FileChange: + commit_hash: str + commit_time: str + path: str #relative path in the repo + change_type: ChangeType + + +class Repo: + def __init__(self, cat: str, account: str, name: str, path: str): + self.cat = cat + self.account = account + self.name = name + self.path = path + self.hash_last_found: Optional[float] = None + self.hash_last_processed: Optional[str] = None + self.lastcheck: Optional[float] = None + + def __str__(self): + return json.dumps({ + "cat": self.cat, + "account": self.account, + "name": self.name, + "path": self.path, + "hash_last_found": self.hash_last_found, + "hash_last_processed": self.hash_last_processed, + "lastcheck": self.lastcheck + }, indent=2) + + def _redis_key(self) -> str: + return f"gitcheck:{self.cat}:{self.account}:{self.name}" + + def save_to_redis(self): + redis_client.set(self._redis_key(), json.dumps(self.__dict__)) + + @staticmethod + def load_from_redis(cat: str, account: str, name: str) -> Optional['Repo']: + redis_key = f"gitcheck:{cat}:{account}:{name}" + data = redis_client.get(redis_key) + if data: + data = json.loads(data) + repo = Repo(data["cat"], data["account"], data["name"], data["path"]) + repo.hash_last_found = data.get("hash_last_found") + repo.hash_last_processed = data.get("hash_last_processed") + repo.lastcheck = data.get("lastcheck") + return repo + return None + + def get_remote_commit_hash(self, branch: str) -> str: + """Get the latest commit hash from the remote repository.""" + result = subprocess.run( + ['git', 'ls-remote', 'origin', f'refs/heads/{branch}'], + cwd=self.path, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True + ) + if result.returncode != 0: + raise Exception(f"Error fetching remote commit hash: {result.stderr}") + + return result.stdout.split()[0] + + def get_local_commit_hash(self) -> str: + """Get the latest commit hash from the local repository.""" + result = subprocess.run( + ['git', 'rev-parse', 'HEAD'], + cwd=self.path, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True + ) + if result.returncode != 0: + raise Exception(f"Error fetching local commit hash: {result.stderr}") + + return result.stdout.strip() + + def get_current_branch(self) -> str: + result = subprocess.run( + ['git', 'branch', '--show-current'], + cwd=self.path, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True + ) + if result.returncode != 0: + raise Exception(f"Error fetching local branch name: {result.stderr}") + + return result.stdout.split()[0] + + def get_remote_default_branch(self) -> str: + result = subprocess.run( + ['git', 'ls-remote', '--symref', 'origin', 'HEAD'], + cwd=self.path, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True + ) + if result.returncode != 0: + raise Exception(f"Error fetching local branch name: {result.stderr}") + + return result.stdout.split()[1].split('/')[-1] + + def should_check_again(self) -> bool: + """Determine if we should check the repository again based on the last check time.""" + if self.lastcheck is None: + return True + return (time.time() - self.lastcheck) > 60 + + def update_last_check_time(self) -> None: + """Update the last check time.""" + self.lastcheck = time.time() + self.save_to_redis() + + def log_change(self, epoch_time: float) -> None: + """Log a detected change in Redis.""" + self.hash_last_found = epoch_time + self.save_to_redis() + + def check_for_changes(self, branch: str = 'main') -> None: + """Check the repository for updates and log changes if found.""" + if not self.should_check_again(): + print("WAIT TO CHECK FOR CHANGES") + return + + try: + diff_commits = self.get_local_remote_diff_commits(branch) + + if diff_commits != []: + print("FOUND SOME CHANGES") + self.log_change(time.time()) + file_changes = self.get_file_changes_from_commits(diff_commits) + self.print_file_changes(file_changes) + else: + print("NO CHANGED FOUND") + + self.update_last_check_time() + except Exception as e: + print(f"An error occurred while checking repo {self.path}: {e}") + + def get_local_remote_diff_commits(self, branch: str) -> List[git.Commit]: + # Open the repository + repo = git.Repo(self.path) + + # Get the local branch + local_branch = repo.heads[branch] + # Get the remote reference for the branch + remote_ref = repo.remotes.origin.refs[branch] + + # Fetch the latest changes from the remote + repo.remotes.origin.fetch() + + # Get the commit hashes of the local and remote branches + local_commit = local_branch.commit + remote_commit = remote_ref.commit + + if local_commit == remote_commit: + return [] + + # Get the common ancestor commit + base_commit = repo.merge_base(local_commit, remote_commit)[0] + + # Get the ahead and behind commits + ahead_commits = list(repo.iter_commits(f"{base_commit}..{local_commit}")) + behind_commits = list(repo.iter_commits(f"{base_commit}..{remote_commit}")) + + # Combine the ahead and behind commits + diff_commits = ahead_commits + behind_commits + return diff_commits + + def get_file_changes_from_commits(self, commit_list: List[git.Commit]) -> List[FileChange]: + file_changes = [] + for commit in commit_list: + # print(commit) + diffs = commit.diff(self.hash_last_processed, create_patch=True) + # print(diffs) + for diff in diffs: + if diff.deleted_file: + change_type = ChangeType.DEL + elif diff.new_file: + change_type = ChangeType.NEW + else: + change_type = ChangeType.MOD + + file_change = FileChange( + commit_hash=commit.hexsha, + commit_time=str(commit.committed_datetime), + path=diff.b_path if diff.new_file else diff.a_path, + change_type=change_type + ) + file_changes.append(file_change) + return file_changes + + def print_file_changes(self, file_changes: List[FileChange]): + for file_change in file_changes: + print("------------------------------------") + print(f"Commit Hash: {file_change.commit_hash}") + print(f"Commit Time: {file_change.commit_time}") + print(f"File Path: {file_change.path}") + print(f"Change Type: {file_change.change_type.value}") + print("------------------------------------") + +def gitscan(path: str, cat: str) -> None: + """Walk over directories to find Git repositories and check them.""" + path = os.path.abspath(os.path.expanduser(path)) + for root, dirs, files in os.walk(path): + if '.git' in dirs: + accountname = os.path.basename(os.path.dirname(root)) + reponame = os.path.basename(root) + repo = Repo.load_from_redis(cat, accountname, reponame) + if repo is None: + repo = Repo(cat, accountname, reponame, root) + branch = repo.get_current_branch() + + logger.debug(f"root: {root}") + logger.debug(f"accountname: {accountname}") + logger.debug(f"reponame: {reponame}") + logger.debug(f"branch: {branch}") + logger.debug(f"repo: {repo}") + + repo.check_for_changes(branch) + dirs[:] = [] # Don't go deeper into subdirectories + else: + # Filter out any .git directories from further traversal + dirs[:] = [d for d in dirs if d != '.git'] + +def print_redis_client(): + cursor = 0 + while True: + cursor, keys = redis_client.scan(cursor) + for key in keys: + value = redis_client.get(key) + print(key) + print(value) + print() + if cursor == 0: + break + +if __name__ == "__main__": + # print_redis_client() + mypath = "~/code/git.threefold.info/projectmycelium" + category = 'mycat' + gitscan(path=mypath, cat=category) + # print_redis_client() \ No newline at end of file diff --git a/lib/tools/logger.py b/lib/tools/logger.py new file mode 100644 index 0000000..20a4dfc --- /dev/null +++ b/lib/tools/logger.py @@ -0,0 +1,39 @@ +import logging +import colorlog + +log_colors_config = { + 'DEBUG': 'cyan', + 'INFO': 'green', + 'WARNING': 'yellow', + 'ERROR': 'red', + 'CRITICAL': 'bold_red', +} + +secondary_log_colors_config = { + 'name': { + 'DEBUG': 'blue', + 'INFO': 'blue', + 'WARNING': 'blue', + 'ERROR': 'blue', + 'CRITICAL': 'blue' + }, + 'levelname': log_colors_config +} + + +formatter = colorlog.ColoredFormatter( + '%(log_color)s%(asctime)s - %(name_log_color)s%(name)s - %(levelname_log_color)s%(levelname)s - %(message)s', + datefmt='%Y-%m-%d %H:%M:%S', + log_colors=log_colors_config, + secondary_log_colors=secondary_log_colors_config +) + + +# Create a handler +handler = logging.StreamHandler() +handler.setFormatter(formatter) + +# Get the root logger +logger = logging.getLogger() +logger.setLevel(logging.DEBUG) +logger.addHandler(handler) diff --git a/lib/tools/md5.py b/lib/tools/md5.py new file mode 100644 index 0000000..23d2974 --- /dev/null +++ b/lib/tools/md5.py @@ -0,0 +1,13 @@ +import hashlib +from typing import List + + +def file_md5(file_path: str) -> str: + """ + Compute the MD5 hash of the file content. + """ + hash_md5 = hashlib.md5() + with open(file_path, "rb") as f: + for chunk in iter(lambda: f.read(4096), b""): + hash_md5.update(chunk) + return hash_md5.hexdigest() \ No newline at end of file diff --git a/lib/tools/ourtime.py b/lib/tools/ourtime.py new file mode 100644 index 0000000..2856598 --- /dev/null +++ b/lib/tools/ourtime.py @@ -0,0 +1,55 @@ +from datetime import datetime, timezone, timedelta +import re + + +def epoch_get(deadline: str) -> int: + """ + Set the deadline based on various input formats. + + Supports: + - Relative: +1h (hours), +2d (days), +1w (week), +1m (month) + - Absolute: 20/10/2024, 20/10, 20/10/24 (all same day) + + If hour not specified, defaults to midday (noon). + + Returns the deadline as a Unix timestamp (epoch). + """ + now = datetime.now(timezone.utc) + + # Check for relative time format + relative_match = re.match(r'\+(\d+)([hdwm])', deadline) + if relative_match: + amount, unit = relative_match.groups() + amount = int(amount) + if unit == 'h': + delta = timedelta(hours=amount) + elif unit == 'd': + delta = timedelta(days=amount) + elif unit == 'w': + delta = timedelta(weeks=amount) + elif unit == 'm': + delta = timedelta(days=amount * 30) # Approximate + + new_deadline = now + delta + return int(new_deadline.timestamp()) + + # Check for absolute date format + date_formats = ['%d/%m/%Y', '%d/%m/%y', '%d/%m'] + for fmt in date_formats: + try: + date_obj = datetime.strptime(deadline, fmt) + if fmt == '%d/%m': + # If year is not provided, use the current year + date_obj = date_obj.replace(year=now.year) + + # If the resulting date is in the past, assume next year + if date_obj.replace(tzinfo=timezone.utc) < now: + date_obj = date_obj.replace(year=date_obj.year + 1) + + # Set time to noon (12:00) + date_obj = date_obj.replace(hour=12, minute=0, second=0, microsecond=0, tzinfo=timezone.utc) + return int(date_obj.timestamp()) + except ValueError: + continue + + raise ValueError("Invalid deadline format. Use +Xh/d/w/m for relative or DD/MM/YYYY for absolute dates.") diff --git a/lib/tools/pathtools.py b/lib/tools/pathtools.py new file mode 100644 index 0000000..4a418f1 --- /dev/null +++ b/lib/tools/pathtools.py @@ -0,0 +1,26 @@ + + +import os + +def expand_path(path: str) -> str: + """ + Expand ~ to the user's home directory and return the absolute path. + """ + return os.path.abspath(os.path.expanduser(path)) + + +def remove_file_if_exists(file_path): + try: + # This will remove the file or symlink, regardless of whether + # it's a regular file, a directory, or a broken symlink + os.remove(file_path) + except FileNotFoundError: + # File doesn't exist, so we don't need to do anything + pass + except IsADirectoryError: + # It's a directory, so we use rmdir instead + os.rmdir(file_path) + except PermissionError: + print(f"Permission denied: Unable to remove {file_path}") + except Exception as e: + print(f"An error occurred while trying to remove {file_path}: {str(e)}") \ No newline at end of file diff --git a/lib/tools/texttools.py b/lib/tools/texttools.py new file mode 100644 index 0000000..a9cf26b --- /dev/null +++ b/lib/tools/texttools.py @@ -0,0 +1,110 @@ + +import re +import unicodedata +import random + + +def description_fix(description): + description = description.lower() + description = unicodedata.normalize('NFKD', description).encode('ASCII', 'ignore').decode('ASCII') + description = re.sub(r'[^a-z0-9\s]', '', description) + return description.strip() + + +# def name_fix(name: str) -> str: +# """ +# Normalize the string to lowercase ASCII, replace spaces and specific punctuations with underscores, +# and remove non-ASCII characters. +# """ +# name = name.lower() +# name = unicodedata.normalize('NFKD', name).encode('ascii', 'ignore').decode('ascii') +# name = re.sub(r'[ :;!]', '_', name) # Replace spaces and specific punctuations with underscores +# name = re.sub(r'\W+', '', name) # Remove any other non-word characters +# return name + + +def name_fix(name: str) -> str: + """ + Normalize the string to lowercase ASCII, replace spaces and specific punctuations with underscores, + maintain dots, and remove non-ASCII characters. + """ + name = name.lower() + name = unicodedata.normalize('NFKD', name).encode('ascii', 'ignore').decode('ascii') + name = re.sub(r'[ :;!]', '_', name) # Replace spaces and specific punctuations with underscores + name = re.sub(r'[^\w._]+', '', name) # Remove any non-word characters except dots and underscores + return name + +def name_obfuscate(name): + # Define a mapping of consonants to their obfuscated counterparts + consonant_map = { + 'b': 'p', 'c': 'k', 'd': 't', 'f': 'v', 'g': 'j', 'h': 'x', + 'j': 'q', 'k': 'c', 'l': 'r', 'm': 'n', 'n': 'm', 'p': 'b', + 'q': 'g', 'r': 'l', 's': 'z', 't': 'd', 'v': 'f', 'w': 'y', + 'x': 'h', 'y': 'w', 'z': 's' + } + + # Define a mapping of vowels to their obfuscated counterparts + vowel_map = { + 'a': 'e', 'e': 'i', 'i': 'o', 'o': 'u', 'u': 'a' + } + + # Convert the name to lowercase + name = name.lower() + + # Split the name into words + words = name.split() + + obfuscated_words = [] + for word in words: + obfuscated_word = '' + for char in word: + if char in vowel_map: + # Obfuscate vowels + obfuscated_word += vowel_map[char] + elif char in consonant_map: + # Obfuscate consonants + obfuscated_word += consonant_map[char] + else: + # Keep non-alphabetic characters unchanged + obfuscated_word += char + obfuscated_words.append(obfuscated_word) + + # Join the obfuscated words back into a single string + obfuscated_name = ' '.join(obfuscated_words) + + # Capitalize the first letter of each word + obfuscated_name = obfuscated_name.title() + + return obfuscated_name + +def dedent(content: str) -> str: + # Split the input content into lines + lines = content.splitlines() + + # Remove leading and trailing empty lines + while lines and not lines[0].strip(): + lines.pop(0) + while lines and not lines[-1].strip(): + lines.pop() + + if not lines: + return "" + + # Find the minimum indentation (leading spaces) in all non-empty lines + min_indent = None + for line in lines: + stripped_line = line.lstrip() + if stripped_line: # Only consider non-empty lines + leading_spaces = len(line) - len(stripped_line) + if min_indent is None or leading_spaces < min_indent: + min_indent = leading_spaces + + # Dedent each line by the minimum indentation found + dedented_lines = [line[min_indent:] if len(line) > min_indent else line for line in lines] + + # Join the dedented lines back into a single string + return "\n".join(dedented_lines) + +if __name__ == "__main__": + print("fixed name:", name_fix("John Doe")) + print("obfuscated name:", name_obfuscate("John Doe")) \ No newline at end of file diff --git a/lib/web/doctools/html_replacer.py b/lib/web/doctools/html_replacer.py new file mode 100644 index 0000000..1ee6957 --- /dev/null +++ b/lib/web/doctools/html_replacer.py @@ -0,0 +1,94 @@ +from herotools.logger import logger +from bs4 import BeautifulSoup +import re +from typing import Callable +from herotools.texttools import name_fix + +# Define the type for the content and link fetching functions +LinkFetcher = Callable[[str, str, str, str, str], str] +ContentFetcher = Callable[[str, str, str, str], str] + +# Private functions to be used internally + +def _get_link(language: str, prefix: str, site_name: str, pagename: str, name: str) -> str: + # Replace this with your logic to get the actual link + logger.debug(f"_get_link: {language[:10]:<10} {site_name}:{pagename}:{name}") + return f"{prefix}{language}/{site_name}/{pagename}/{name}.jpg" + +def _get_content(language: str, site_name: str, pagename: str, name: str) -> str: + # Replace this with your logic to get the actual content + logger.debug(f"_get_content: {language[:10]:<10} {site_name}:{pagename}:{name}") + return f"Replaced text for {name} on page {pagename} in {language} language on {site_name} site" + +def _process_html(language: str, prefix: str, site_name: str, pagename: str, html_content: str) -> str: + """ + Function to process HTML and replace content based on tags. + This allows us to work with templates and get content based on language to replace in HTML. + """ + language = name_fix(language) + site_name = name_fix(site_name) + pagename = name_fix(pagename) + prefix = prefix.strip() + if not prefix.endswith('/'): + prefix += '/' + + soup = BeautifulSoup(html_content, 'html.parser') + + # Find all elements with class names starting with !!img: or !!txt: + for element in soup.find_all(class_=re.compile(r'!!(img|txt):(.+)')): + for cls in element['class']: + if cls.startswith('!!img:'): + name = cls.split(':')[1] + name = name_fix(name) + # Get the link to replace the src attribute in !!img: elements + link = _get_link(language=language, prefix=prefix, site_name=site_name, pagename=pagename, name=name) + if element.name == 'img': + element['src'] = link + elif 'src' in element.attrs: + element['src'] = link # In case the element is not an img but has a src attribute + elif cls.startswith('!!txt:'): + name = cls.split(':')[1] + name = name_fix(name) + # Get the content to replace the text in !!txt: elements + content = _get_content(language=language, site_name=site_name, pagename=pagename, name=name) + element.string = content + + # Output the modified HTML + return str(soup) + +# Public function to process the HTML content +def process(language: str, prefix: str, site_name: str, pagename: str, html_content: str) -> str: + """ + Public function to process HTML and replace content based on tags. + This function wraps the internal _process_html function. + """ + return _process_html(language=language, prefix=prefix, site_name=site_name, pagename=pagename, html_content=html_content) + +# Sample usage with a given language, site name, page name, and HTML content +if __name__ == "__main__": + # Example HTML content + html_content = ''' + + + + + + Sample Page + + +

Take care of your performance every day.

+ +

This is a sample description text.

+ + + ''' + + # Process the HTML content for a specific language, site name, and page + language: str = "en" + site_name: str = "ExampleSite" + pagename: str = "HomePage" + prefix: str = "http://localhost/images/" + processed_html: str = process(language=language, prefix=prefix, site_name=site_name, pagename=pagename, html_content=html_content) + + # Print the modified HTML + print(processed_html) diff --git a/lib/web/doctools/md_replacer.py b/lib/web/doctools/md_replacer.py new file mode 100644 index 0000000..78e03fa --- /dev/null +++ b/lib/web/doctools/md_replacer.py @@ -0,0 +1,172 @@ +import sys +import os + +# Add the parent directory of herotools to the Python module search path +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) + +from herotools.logger import logger +from markdown_it import MarkdownIt +from markdown_it.tree import SyntaxTreeNode +import re +from enum import Enum +from herotools.texttools import name_fix +from mdformat.renderer import MDRenderer +from urllib.parse import urlparse + +class ImageType(Enum): + JPEG = 'jpeg' + PNG = 'png' + GIF = 'gif' + OTHER = 'other' + + +def get_link_page(prefix:str, linkname:str, sitename: str, name: str) -> str: + """ + Generates a page link based on sitename and name. + + Args: + sitename (str): The name of the site. + name (str): The name of the page. + + Returns: + str: The generated link. + """ + logger.debug(f"get_link_page: {prefix[:60]:<60} {linkname} {sitename}:{name}") + return f"[{linkname}]({prefix}/{sitename}/{name})" + +def get_link_image(prefix:str, sitename: str, name: str, image_type: ImageType) -> str: + """ + Generates an image link based on the URL and image type. + + Args: + url (str): The original URL of the image. + image_type (ImageType): The type of the image. + + Returns: + str: The generated link. + """ + logger.debug(f"get_link_image: {prefix[:60]:<60} {sitename}:{name}") + return f"![]({prefix}/{sitename}/{name})" + +def get_include(sitename: str, name: str) -> str: + """ + Generates an include directive link based on sitename and name. + + Args: + sitename (str): The name of the site. + name (str): The name of the page to include. + + Returns: + str: The generated include directive. + """ + logger.debug(f"get_include: {sitename}:{name}") + return f"include: {sitename}/{name}" + +def replace(prefix:str, markdown: str) -> str: + """ + Finds all image links, markdown page links, and custom include directives in the provided markdown text + and replaces them using the appropriate functions. + + Args: + markdown (str): The markdown content. + + Returns: + str: The modified markdown content with updated links. + """ + # Initialize the Markdown parser + md = MarkdownIt() + tokens = md.parse(markdown) + ast = SyntaxTreeNode(tokens) + + print(ast.pretty(indent=2, show_text=True)) + + def process_node(node: SyntaxTreeNode): + # from IPython import embed; embed() + + def get_new_url(url: str): + logger.debug(f"url: {url}") + + parsed_url = urlparse(url) + # site_name = parsed_url.netloc + image_path = parsed_url.path + logger.debug(f"parsed_url: {parsed_url}") + + # prefix = prefix.rstrip('/') + # image_path = image_path.strip('/') + + new_url = f"{prefix.rstrip('/')}/{image_path.strip('/')}" + logger.debug(f"new_url: {new_url}") + + return new_url + + if node.type == 'image': + # Process image link + url = node.attrs.get('src', '') + new_url = get_new_url(url) + node.attrs['src'] = new_url + + elif node.type == 'link': + # Process markdown page link + url = node.attrs.get('href', '') + new_url = get_new_url(url) + node.attrs['href'] = new_url + + # Recursively process child nodes + for child in node.children or []: + process_node(child) + + def replace_include_directives(match: re.Match) -> str: + """ + Replaces custom include directives with appropriate links. + + Args: + match (re.Match): The match object containing the found include directive. + + Returns: + str: The generated link for the include directive. + """ + url = match.group(1) + if ':' in url: + site_name, page = url.split(':', 1) + page_name = page.split('/')[-1] + else: + site_name = "" + page_name = url + if not page.endswith('.md'): + page += '.md' + return get_include(prefix, site_name, page_name) + + + # Process the root node + process_node(ast) + + # Convert the AST back to markdown + renderer = MDRenderer() + options = {} + env = {} + rendered_markdown = renderer.render(tokens, options, env) + + # include_pattern = re.compile(r"!!include page:'(.*?)'") + # rendered_markdown = include_pattern.sub(replace_include_directives, rendered_markdown) + + return rendered_markdown + + + +if __name__ == "__main__": + + text = """ +![Image description](https://example.com/image.png) +[Page link](sitename:some/path/to/page.md) +!!include page:'mypage' +!!include page:'mypage.md' +!!include page:'mysite:mypage +!!include page:'mysite:mypage' +!!include page:'mysite:mypage.md' + """ + + print(text) + text2=replace("http://localhost:8080/pre/", text) + print(text2) + + \ No newline at end of file diff --git a/lib/web/doctools/processor.py b/lib/web/doctools/processor.py new file mode 100644 index 0000000..9e6f1b4 --- /dev/null +++ b/lib/web/doctools/processor.py @@ -0,0 +1,94 @@ +import os +import re +from typing import Callable + +from herotools.logger import logger +from herotools.md5 import file_md5 +from herotools.texttools import name_fix + + +def _example_set_file(site_name: str, path: str, md5: str) -> None: + # Placeholder for actual implementation + logger.debug(f"set_file : site_name={site_name[:20]:<20} {path}") + + +def _example_set_img(site_name: str, path: str, md5: str) -> None: + # Placeholder for actual implementation + logger.debug(f"set_img : site_name={site_name[:20]:<20} {path}") + + +def _example_set_markdown( + site_name: str, path: str, md5: str, content: str +) -> None: + # Placeholder for actual implementation + logger.debug(f"set_markdown : site_name={site_name[:20]:<20} {path}") + + +def _example_set_site(site_name: str, path: str) -> None: + # Placeholder for actual implementation + logger.info(f"set_site : site_name={site_name[:20]:<20} {path}") + + +def _site_process_action( + site_name: str, + site_path: str, + set_file: Callable[[str, str, str], None], + set_img: Callable[[str, str, str], None], + set_markdown: Callable[[str, str, str, str], None], +) -> None: + logger.debug(f"site process: {site_path[:60]:<60} -> {site_name}") + for root, _, files in os.walk(site_path): + for file in files: + file_path = os.path.join(root, file) + file_path_rel = os.path.relpath(file_path, site_path) + file_name = os.path.basename(file) + # print(file_name) + mymd5 = file_md5(file_path) + if file.lower().endswith(".md"): + with open(file_path, "r", encoding="utf-8") as f: + content = f.read() + set_markdown(site_name, file_path_rel, mymd5, content) + elif file_name in [".collection", ".site", ".done"]: + continue + elif re.search( + r"\.(jpg|jpeg|png|gif|bmp|tiff|webp)$", file, re.IGNORECASE + ): + set_img(site_name, file_path_rel, mymd5) + else: + set_file(site_name, file_path_rel, mymd5) + + +def process( + path: str, + set_site: Callable[[str, str], None], + set_file: Callable[[str, str, str], None], + set_img: Callable[[str, str, str], None], + set_markdown: Callable[[str, str, str, str], None], +) -> None: + """ + walk over directory and apply set_file(), set_img() and set_markdown() + """ + path = os.path.abspath(os.path.expanduser(path)) + logger.info(f"sites process: {path}") + for root, dirs, files in os.walk(path): + if ".site" in files or ".collection" in files: + site_name = name_fix(os.path.basename(root)) + set_site(site_name, root) + _site_process_action( + site_name, root, set_file, set_img, set_markdown + ) + # Prevent the os.walk from going deeper into subdirectories + dirs[:] = [] + + +if __name__ == "__main__": + mypath = "~/code/git.threefold.info/projectmycelium/info_projectmycelium/collections" + + # logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s') + process( + mypath, + _example_set_site, + _example_set_file, + _example_set_img, + _example_set_markdown, + ) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..8e8e22c --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,11 @@ +[project] +name = "Herolib" +version = "0.9.0" +description = "Lib from Hero's project for Actors" +requires-python = ">=3.13" +dependencies = [ + "peewee>=3.17.0", + "pygments>=2.16.1", + "toml", + "requests>=2.31.0" +] diff --git a/uv.lock b/uv.lock new file mode 100644 index 0000000..6d962ee --- /dev/null +++ b/uv.lock @@ -0,0 +1,110 @@ +version = 1 +revision = 1 +requires-python = ">=3.13" + +[[package]] +name = "certifi" +version = "2025.8.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/dc/67/960ebe6bf230a96cda2e0abcf73af550ec4f090005363542f0765df162e0/certifi-2025.8.3.tar.gz", hash = "sha256:e564105f78ded564e3ae7c923924435e1daa7463faeab5bb932bc53ffae63407", size = 162386 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e5/48/1549795ba7742c948d2ad169c1c8cdbae65bc450d6cd753d124b17c8cd32/certifi-2025.8.3-py3-none-any.whl", hash = "sha256:f6c12493cfb1b06ba2ff328595af9350c65d6644968e5d3a2ffd78699af217a5", size = 161216 }, +] + +[[package]] +name = "charset-normalizer" +version = "3.4.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e4/33/89c2ced2b67d1c2a61c19c6751aa8902d46ce3dacb23600a283619f5a12d/charset_normalizer-3.4.2.tar.gz", hash = "sha256:5baececa9ecba31eff645232d59845c07aa030f0c81ee70184a90d35099a0e63", size = 126367 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ea/12/a93df3366ed32db1d907d7593a94f1fe6293903e3e92967bebd6950ed12c/charset_normalizer-3.4.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:926ca93accd5d36ccdabd803392ddc3e03e6d4cd1cf17deff3b989ab8e9dbcf0", size = 199622 }, + { url = "https://files.pythonhosted.org/packages/04/93/bf204e6f344c39d9937d3c13c8cd5bbfc266472e51fc8c07cb7f64fcd2de/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eba9904b0f38a143592d9fc0e19e2df0fa2e41c3c3745554761c5f6447eedabf", size = 143435 }, + { url = "https://files.pythonhosted.org/packages/22/2a/ea8a2095b0bafa6c5b5a55ffdc2f924455233ee7b91c69b7edfcc9e02284/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3fddb7e2c84ac87ac3a947cb4e66d143ca5863ef48e4a5ecb83bd48619e4634e", size = 153653 }, + { url = "https://files.pythonhosted.org/packages/b6/57/1b090ff183d13cef485dfbe272e2fe57622a76694061353c59da52c9a659/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:98f862da73774290f251b9df8d11161b6cf25b599a66baf087c1ffe340e9bfd1", size = 146231 }, + { url = "https://files.pythonhosted.org/packages/e2/28/ffc026b26f441fc67bd21ab7f03b313ab3fe46714a14b516f931abe1a2d8/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c9379d65defcab82d07b2a9dfbfc2e95bc8fe0ebb1b176a3190230a3ef0e07c", size = 148243 }, + { url = "https://files.pythonhosted.org/packages/c0/0f/9abe9bd191629c33e69e47c6ef45ef99773320e9ad8e9cb08b8ab4a8d4cb/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e635b87f01ebc977342e2697d05b56632f5f879a4f15955dfe8cef2448b51691", size = 150442 }, + { url = "https://files.pythonhosted.org/packages/67/7c/a123bbcedca91d5916c056407f89a7f5e8fdfce12ba825d7d6b9954a1a3c/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1c95a1e2902a8b722868587c0e1184ad5c55631de5afc0eb96bc4b0d738092c0", size = 145147 }, + { url = "https://files.pythonhosted.org/packages/ec/fe/1ac556fa4899d967b83e9893788e86b6af4d83e4726511eaaad035e36595/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ef8de666d6179b009dce7bcb2ad4c4a779f113f12caf8dc77f0162c29d20490b", size = 153057 }, + { url = "https://files.pythonhosted.org/packages/2b/ff/acfc0b0a70b19e3e54febdd5301a98b72fa07635e56f24f60502e954c461/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:32fc0341d72e0f73f80acb0a2c94216bd704f4f0bce10aedea38f30502b271ff", size = 156454 }, + { url = "https://files.pythonhosted.org/packages/92/08/95b458ce9c740d0645feb0e96cea1f5ec946ea9c580a94adfe0b617f3573/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:289200a18fa698949d2b39c671c2cc7a24d44096784e76614899a7ccf2574b7b", size = 154174 }, + { url = "https://files.pythonhosted.org/packages/78/be/8392efc43487ac051eee6c36d5fbd63032d78f7728cb37aebcc98191f1ff/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4a476b06fbcf359ad25d34a057b7219281286ae2477cc5ff5e3f70a246971148", size = 149166 }, + { url = "https://files.pythonhosted.org/packages/44/96/392abd49b094d30b91d9fbda6a69519e95802250b777841cf3bda8fe136c/charset_normalizer-3.4.2-cp313-cp313-win32.whl", hash = "sha256:aaeeb6a479c7667fbe1099af9617c83aaca22182d6cf8c53966491a0f1b7ffb7", size = 98064 }, + { url = "https://files.pythonhosted.org/packages/e9/b0/0200da600134e001d91851ddc797809e2fe0ea72de90e09bec5a2fbdaccb/charset_normalizer-3.4.2-cp313-cp313-win_amd64.whl", hash = "sha256:aa6af9e7d59f9c12b33ae4e9450619cf2488e2bbe9b44030905877f0b2324980", size = 105641 }, + { url = "https://files.pythonhosted.org/packages/20/94/c5790835a017658cbfabd07f3bfb549140c3ac458cfc196323996b10095a/charset_normalizer-3.4.2-py3-none-any.whl", hash = "sha256:7f56930ab0abd1c45cd15be65cc741c28b1c9a34876ce8c17a2fa107810c0af0", size = 52626 }, +] + +[[package]] +name = "herolib" +version = "0.9.0" +source = { virtual = "." } +dependencies = [ + { name = "peewee" }, + { name = "pygments" }, + { name = "requests" }, + { name = "toml" }, +] + +[package.metadata] +requires-dist = [ + { name = "peewee", specifier = ">=3.17.0" }, + { name = "pygments", specifier = ">=2.16.1" }, + { name = "requests", specifier = ">=2.31.0" }, + { name = "toml" }, +] + +[[package]] +name = "idna" +version = "3.10" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f1/70/7703c29685631f5a7590aa73f1f1d3fa9a380e654b86af429e0934a32f7d/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", size = 190490 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442 }, +] + +[[package]] +name = "peewee" +version = "3.18.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/04/89/76f6f1b744c8608e0d416b588b9d63c2a500ff800065ae610f7c80f532d6/peewee-3.18.2.tar.gz", hash = "sha256:77a54263eb61aff2ea72f63d2eeb91b140c25c1884148e28e4c0f7c4f64996a0", size = 949220 } + +[[package]] +name = "pygments" +version = "2.19.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217 }, +] + +[[package]] +name = "requests" +version = "2.32.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "charset-normalizer" }, + { name = "idna" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e1/0a/929373653770d8a0d7ea76c37de6e41f11eb07559b103b1c02cafb3f7cf8/requests-2.32.4.tar.gz", hash = "sha256:27d0316682c8a29834d3264820024b62a36942083d52caf2f14c0591336d3422", size = 135258 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7c/e4/56027c4a6b4ae70ca9de302488c5ca95ad4a39e190093d6c1a8ace08341b/requests-2.32.4-py3-none-any.whl", hash = "sha256:27babd3cda2a6d50b30443204ee89830707d396671944c998b5975b031ac2b2c", size = 64847 }, +] + +[[package]] +name = "toml" +version = "0.10.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/be/ba/1f744cdc819428fc6b5084ec34d9b30660f6f9daaf70eead706e3203ec3c/toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f", size = 22253 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/44/6f/7120676b6d73228c96e17f1f794d8ab046fc910d781c8d151120c3f1569e/toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b", size = 16588 }, +] + +[[package]] +name = "urllib3" +version = "2.5.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/15/22/9ee70a2574a4f4599c47dd506532914ce044817c7752a79b6a51286319bc/urllib3-2.5.0.tar.gz", hash = "sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760", size = 393185 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a7/c2/fe1e52489ae3122415c51f387e221dd0773709bad6c6cdaa599e8a2c5185/urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc", size = 129795 }, +]