...
This commit is contained in:
69
lib/clients/assemblyai/client.py
Normal file
69
lib/clients/assemblyai/client.py
Normal file
@@ -0,0 +1,69 @@
|
||||
import os
|
||||
|
||||
from pydub import AudioSegment
|
||||
import assemblyai as aai
|
||||
|
||||
|
||||
class Client:
|
||||
def __init__(self):
|
||||
api_key = os.getenv("ASSEMBLYAI")
|
||||
|
||||
if not api_key:
|
||||
raise EnvironmentError(
|
||||
"Please set the ASSEMBLYAI environment variable with your AssemblyAI API key."
|
||||
)
|
||||
|
||||
self.api_key = api_key
|
||||
aai.settings.api_key = self.api_key
|
||||
self.transcriber = aai.Transcriber()
|
||||
|
||||
def convert_to_ogg_mono(self, input_path: str, output_path: str):
|
||||
"""Converts an audio file from .mp4 to .ogg (mono)."""
|
||||
audio = AudioSegment.from_file(input_path, format="mp4")
|
||||
# Convert to mono if needed by uncommenting the line below
|
||||
# audio = audio.set_channels(1)
|
||||
audio.export(output_path, format="ogg")
|
||||
print(f"Converted to .ogg in {output_path}")
|
||||
|
||||
def transcribe_audio(self, audio_path: str, output_path: str):
|
||||
"""Transcribes the audio file and saves the transcription to a Markdown file."""
|
||||
config = aai.TranscriptionConfig(
|
||||
speaker_labels=True,
|
||||
)
|
||||
|
||||
transcript = self.transcriber.transcribe(audio_path, config)
|
||||
|
||||
with open(output_path, "w", encoding="utf-8") as f:
|
||||
for utterance in transcript.utterances:
|
||||
f.write(
|
||||
f"** Speaker {utterance.speaker}:\n{utterance.text}\n-------------\n"
|
||||
)
|
||||
|
||||
print(f"Transcription saved to {output_path}")
|
||||
|
||||
def transcribe_audio_file(self, input_path: str, output_transcription_path: str):
|
||||
"""Handles the entire process from conversion to transcription and cleanup."""
|
||||
converted_audio_path = input_path.replace(".mp4", ".ogg")
|
||||
|
||||
# Convert .mp4 to .ogg
|
||||
self.convert_to_ogg_mono(input_path, converted_audio_path)
|
||||
|
||||
# Perform the transcription
|
||||
self.transcribe_audio(converted_audio_path, output_transcription_path)
|
||||
|
||||
# Optionally, clean up the converted file
|
||||
os.remove(converted_audio_path)
|
||||
print(f"Removed temporary file {converted_audio_path}")
|
||||
|
||||
|
||||
# Example usage:
|
||||
if __name__ == "__main__":
|
||||
# Retrieve API key from environment variable
|
||||
|
||||
# Define the paths for the input audio and output transcription
|
||||
input_audio_path = "/tmp/475353425.mp4"
|
||||
output_transcription_path = "/tmp/transcribe_475353425.md"
|
||||
|
||||
# Perform the transcription process
|
||||
client = Client()
|
||||
client.transcribe_audio_file(input_audio_path, output_transcription_path)
|
19
lib/clients/readme.md
Normal file
19
lib/clients/readme.md
Normal file
@@ -0,0 +1,19 @@
|
||||
# Vimeo Client
|
||||
|
||||
need following functionality
|
||||
|
||||
- upload video
|
||||
- download
|
||||
- list video's
|
||||
|
||||
## some info
|
||||
|
||||
- https://developer.vimeo.com/api/reference
|
||||
|
||||
## remarks to use make sure you have the secrets
|
||||
|
||||
```bash
|
||||
hero git clone -u git@git.threefold.info:despiegk/hero_secrets.git
|
||||
source git.threefold.info/projectmycelium/hero_server/myenv.sh
|
||||
```
|
||||
|
0
lib/clients/stellar/__init__.py
Normal file
0
lib/clients/stellar/__init__.py
Normal file
241
lib/clients/stellar/horizon.py
Normal file
241
lib/clients/stellar/horizon.py
Normal file
@@ -0,0 +1,241 @@
|
||||
from dataclasses import dataclass, field, asdict
|
||||
from typing import List, Optional
|
||||
from stellar_sdk import Keypair, Server, StrKey
|
||||
import json
|
||||
import redis
|
||||
from stellar.model import StellarAsset, StellarAccount
|
||||
import os
|
||||
import csv
|
||||
import toml
|
||||
from herotools.texttools import description_fix
|
||||
|
||||
|
||||
|
||||
class HorizonServer:
|
||||
def __init__(self, instance: str = "default", network: str = "main", tomlfile: str = "", owner: str = ""):
|
||||
"""
|
||||
Load a Stellar account's information using the Horizon server.
|
||||
The Horizon server is an API that allows interaction with the Stellar network. It provides endpoints to submit transactions, check account balances, and perform other operations on the Stellar ledger.
|
||||
All gets cached in redis
|
||||
"""
|
||||
self.redis_client = redis.Redis(host='localhost', port=6379, db=0) # Adjust as needed
|
||||
self.instance = instance
|
||||
if network not in ['main', 'testnet']:
|
||||
raise ValueError("Invalid network value. Must be 'main' or 'testnet'.")
|
||||
self.network = network
|
||||
testnet = self.network == 'testnet'
|
||||
self.server = Server("https://horizon-testnet.stellar.org" if testnet else "https://horizon.stellar.org")
|
||||
self.tomlfile = os.path.expanduser(tomlfile)
|
||||
self.owner = owner
|
||||
if self.tomlfile:
|
||||
self.toml_load()
|
||||
|
||||
def account_exists(self, pubkey: str) -> bool:
|
||||
"""
|
||||
Check if an account exists in the Redis cache based on the public key.
|
||||
"""
|
||||
redis_key = f"stellar:{self.instance}:accounts:{pubkey}"
|
||||
return self.redis_client.exists(redis_key) != None
|
||||
|
||||
def account_get(self, key: str, reload: bool = False, name: str = "", description: str = "", cat: str = "") -> StellarAccount:
|
||||
"""
|
||||
Load a Stellar account's information.
|
||||
|
||||
Args:
|
||||
key (str): The private or public key of the Stellar account.
|
||||
reset (bool, optional): Whether to force a refresh of the cached data. Defaults to False.
|
||||
name (str, optional): Name for the account. Defaults to "".
|
||||
description (str, optional): Description for the account. Defaults to "".
|
||||
owner (str, optional): Owner of the account. Defaults to "".
|
||||
cat (str, optional): Category of the account. Defaults to "".
|
||||
|
||||
Returns:
|
||||
StellarAccount: A struct containing the account's information.
|
||||
"""
|
||||
|
||||
if key == "" and name:
|
||||
for redis_key in self.redis_client.scan_iter(f"stellar:{self.instance}:accounts:*"):
|
||||
data = self.redis_client.get(redis_key)
|
||||
if data:
|
||||
data = json.loads(str(data))
|
||||
if data.get('name') == name and data.get('priv_key', data.get('public_key')):
|
||||
key = data.get('priv_key', data.get('public_key'))
|
||||
break
|
||||
|
||||
if key == "":
|
||||
raise ValueError("No key provided")
|
||||
|
||||
# Determine if the key is a public or private key
|
||||
if StrKey.is_valid_ed25519_public_key(key):
|
||||
public_key = key
|
||||
priv_key = ""
|
||||
elif StrKey.is_valid_ed25519_secret_seed(key):
|
||||
priv_key = key
|
||||
keypair = Keypair.from_secret(priv_key)
|
||||
public_key = keypair.public_key
|
||||
else:
|
||||
raise ValueError("Invalid Stellar key provided")
|
||||
|
||||
redis_key = f"stellar:{self.instance}:accounts:{public_key}"
|
||||
|
||||
data = self.redis_client.get(redis_key)
|
||||
changed = False
|
||||
if data:
|
||||
try:
|
||||
data = json.loads(str(data))
|
||||
except Exception as e:
|
||||
print(data)
|
||||
raise e
|
||||
data['assets'] = [StellarAsset(**asset) for asset in data['assets']]
|
||||
account = StellarAccount(**data)
|
||||
if description!="" and description!=account.description:
|
||||
account.description = description
|
||||
changed = True
|
||||
if name!="" and name!=account.name:
|
||||
account.name = name
|
||||
changed = True
|
||||
if self.owner!="" and self.owner!=account.owner:
|
||||
account.owner = self.owner
|
||||
changed = True
|
||||
if cat!="" and cat!=account.cat:
|
||||
account.cat = cat
|
||||
changed = True
|
||||
else:
|
||||
account = StellarAccount(public_key=public_key, description=description, name=name, priv_key=priv_key, owner=self.owner, cat=cat)
|
||||
changed = True
|
||||
|
||||
|
||||
if reload or account.assets == []:
|
||||
changed = True
|
||||
if reload:
|
||||
account.assets = []
|
||||
account_data = self.server.accounts().account_id(public_key).call()
|
||||
account.assets.clear() # Clear existing assets to avoid duplication
|
||||
for balance in account_data['balances']:
|
||||
asset_type = balance['asset_type']
|
||||
if asset_type == 'native':
|
||||
account.assets.append(StellarAsset(type="XLM", balance=balance['balance']))
|
||||
else:
|
||||
if 'asset_code' in balance:
|
||||
account.assets.append(StellarAsset(
|
||||
type=balance['asset_code'],
|
||||
issuer=balance['asset_issuer'],
|
||||
balance=balance['balance']
|
||||
))
|
||||
changed = True
|
||||
|
||||
# Cache the result in Redis for 1 hour if there were changes
|
||||
if changed:
|
||||
self.account_save(account)
|
||||
|
||||
return account
|
||||
|
||||
def comment_add(self, pubkey: str, comment: str, ignore_non_exist: bool = False):
|
||||
"""
|
||||
Add a comment to a Stellar account based on the public key.
|
||||
|
||||
Args:
|
||||
pubkey (str): The public key of the Stellar account.
|
||||
comment (str): The comment to add to the account.
|
||||
"""
|
||||
comment = description_fix(comment)
|
||||
if not self.account_exists(pubkey):
|
||||
if ignore_non_exist:
|
||||
return
|
||||
raise ValueError("Account does not exist in the cache")
|
||||
account = self.account_get(pubkey)
|
||||
account.comments.append(comment)
|
||||
self.account_save(account)
|
||||
|
||||
def account_save(self, account: StellarAccount):
|
||||
"""
|
||||
Save a Stellar account's information to the Redis cache.
|
||||
|
||||
Args:
|
||||
account (StellarAccount): The account to save.
|
||||
"""
|
||||
redis_key = f"stellar:{self.instance}:accounts:{account.public_key}"
|
||||
self.redis_client.setex(redis_key, 600, json.dumps(asdict(account)))
|
||||
|
||||
def reload_cache(self):
|
||||
"""
|
||||
Walk over all known accounts and reload their information.
|
||||
"""
|
||||
for redis_key in self.redis_client.scan_iter(f"stellar:{self.instance}:accounts:*"):
|
||||
data = self.redis_client.get(redis_key) or ""
|
||||
if data:
|
||||
data = json.loads(str(data))
|
||||
public_key = data.get('public_key')
|
||||
if public_key:
|
||||
self.account_get(public_key, reload=True)
|
||||
|
||||
|
||||
#format is PUBKEY,DESCRIPTION in text format
|
||||
def load_accounts_csv(self, file_path:str):
|
||||
file_path=os.path.expanduser(file_path)
|
||||
if not os.path.exists(file_path):
|
||||
return Exception(f"Error: File '{file_path}' does not exist.")
|
||||
try:
|
||||
with open(file_path, 'r', newline='') as file:
|
||||
reader = csv.reader(file, delimiter=',')
|
||||
for row in reader:
|
||||
if row and len(row) >= 2: # Check if row is not empty and has at least 2 elements
|
||||
pubkey = row[0].strip()
|
||||
comment = ','.join(row[1:]).strip()
|
||||
if self.account_exists(pubkey):
|
||||
self.comment_add(pubkey, comment)
|
||||
except IOError as e:
|
||||
return Exception(f"Error reading file: {e}")
|
||||
except csv.Error as e:
|
||||
return Exception(f"Error parsing CSV: {e}")
|
||||
except Exception as e:
|
||||
return Exception(f"Error: {e}")
|
||||
|
||||
def accounts_get(self) -> List[StellarAccount]:
|
||||
"""
|
||||
Retrieve a list of all known Stellar accounts from the Redis cache.
|
||||
|
||||
Returns:
|
||||
List[StellarAccount]: A list of StellarAccount objects.
|
||||
"""
|
||||
accounts = []
|
||||
for redis_key in self.redis_client.scan_iter(f"stellar:{self.instance}:accounts:*"):
|
||||
pubkey = str(redis_key.split(':')[-1])
|
||||
accounts.append(self.account_get(key=pubkey))
|
||||
return accounts
|
||||
|
||||
def toml_save(self):
|
||||
"""
|
||||
Save the list of all known Stellar accounts to a TOML file.
|
||||
|
||||
Args:
|
||||
file_path (str): The path where the list needs to be saved.
|
||||
"""
|
||||
if self.tomlfile == "":
|
||||
raise ValueError("No TOML file path provided")
|
||||
accounts = self.accounts_get()
|
||||
accounts_dict = {account.public_key: asdict(account) for account in accounts}
|
||||
with open(self.tomlfile, 'w') as file:
|
||||
toml.dump( accounts_dict, file)
|
||||
|
||||
def toml_load(self):
|
||||
"""
|
||||
Load the list of Stellar accounts from a TOML file and save them to the Redis cache.
|
||||
|
||||
Args:
|
||||
file_path (str): The path of the TOML file to load.
|
||||
"""
|
||||
if not os.path.exists(self.tomlfile):
|
||||
return
|
||||
#raise FileNotFoundError(f"Error: File '{self.tomlfile}' does not exist.")
|
||||
with open(self.tomlfile, 'r') as file:
|
||||
accounts_dict = toml.load(file)
|
||||
for pubkey, account_data in accounts_dict.items():
|
||||
account_data['assets'] = [StellarAsset(**asset) for asset in account_data['assets']]
|
||||
account = StellarAccount(**account_data)
|
||||
self.account_save(account)
|
||||
|
||||
|
||||
|
||||
def new(instance: str = "default",owner: str = "", network: str = "main", tomlfile: str = "") -> HorizonServer:
|
||||
return HorizonServer(instance=instance, network=network, tomlfile=tomlfile,owner=owner)
|
70
lib/clients/stellar/model.py
Normal file
70
lib/clients/stellar/model.py
Normal file
@@ -0,0 +1,70 @@
|
||||
from dataclasses import dataclass, field, asdict
|
||||
from typing import List, Optional
|
||||
from stellar_sdk import Keypair, Server, StrKey
|
||||
import json
|
||||
import redis
|
||||
|
||||
@dataclass
|
||||
class StellarAsset:
|
||||
type: str
|
||||
balance: float
|
||||
issuer: str = ""
|
||||
|
||||
def format_balance(self):
|
||||
balance_float = float(self.balance)
|
||||
formatted_balance = f"{balance_float:,.2f}"
|
||||
if '.' in formatted_balance:
|
||||
formatted_balance = formatted_balance.rstrip('0').rstrip('.')
|
||||
return formatted_balance
|
||||
|
||||
def md(self):
|
||||
formatted_balance = self.format_balance()
|
||||
return f"- **{self.type}**: {formatted_balance}"
|
||||
|
||||
@dataclass
|
||||
class StellarAccount:
|
||||
owner: str
|
||||
priv_key: str = ""
|
||||
public_key: str = ""
|
||||
assets: List[StellarAsset] = field(default_factory=list)
|
||||
name: str = ""
|
||||
description: str = ""
|
||||
comments: List[str] = field(default_factory=list)
|
||||
cat: str = ""
|
||||
question: str = ""
|
||||
|
||||
def md(self):
|
||||
result = [
|
||||
f"# Stellar Account: {self.name or 'Unnamed'}","",
|
||||
f"**Public Key**: {self.public_key}",
|
||||
f"**Cat**: {self.cat}",
|
||||
f"**Description**: {self.description[:60]}..." if self.description else "**Description**: None",
|
||||
f"**Question**: {self.question}" if self.question else "**Question**: None",
|
||||
"",
|
||||
"## Assets:",""
|
||||
]
|
||||
|
||||
for asset in self.assets:
|
||||
result.append(asset.md())
|
||||
|
||||
if len(self.assets) == 0:
|
||||
result.append("- No assets")
|
||||
|
||||
result.append("")
|
||||
|
||||
if self.comments:
|
||||
result.append("## Comments:")
|
||||
for comment in self.comments:
|
||||
if '\n' in comment:
|
||||
multiline_comment = "\n ".join(comment.split('\n'))
|
||||
result.append(f"- {multiline_comment}")
|
||||
else:
|
||||
result.append(f"- {comment}")
|
||||
|
||||
return "\n".join(result)
|
||||
|
||||
def balance_str(self) -> str:
|
||||
out=[]
|
||||
for asset in self.assets:
|
||||
out.append(f"{asset.type}:{float(asset.balance):,.0f}")
|
||||
return " ".join(out)
|
78
lib/clients/stellar/model_accounts.v
Normal file
78
lib/clients/stellar/model_accounts.v
Normal file
@@ -0,0 +1,78 @@
|
||||
module stellar
|
||||
import freeflowuniverse.crystallib.core.texttools
|
||||
|
||||
pub struct DigitalAssets {
|
||||
pub mut:
|
||||
|
||||
|
||||
}
|
||||
|
||||
pub struct Owner {
|
||||
pub mut:
|
||||
name string
|
||||
accounts []Account
|
||||
}
|
||||
|
||||
@[params]
|
||||
pub struct AccountGetArgs{
|
||||
pub mut:
|
||||
name string
|
||||
bctype BlockChainType
|
||||
}
|
||||
|
||||
pub fn (self DigitalAssets) account_get(args_ AccountGetArgs) !&Account {
|
||||
|
||||
mut accounts := []&Account
|
||||
mut args:=args_
|
||||
|
||||
args.name = texttools.name_fix(args.name)
|
||||
|
||||
for account in self.accounts {
|
||||
if account.name == args.name && account.bctype == args.bctype {
|
||||
accounts<<&account
|
||||
}
|
||||
}
|
||||
|
||||
if accounts.len == 0 {
|
||||
return error('No account found with the given name:${args.name} and blockchain type: ${args.bctype}')
|
||||
} else if count > 1 {
|
||||
return error('Multiple accounts found with the given name:${args.name} and blockchain type: ${args.bctype}')
|
||||
}
|
||||
|
||||
return accounts[0]
|
||||
}
|
||||
|
||||
pub struct Account {
|
||||
pub mut:
|
||||
name string
|
||||
secret string
|
||||
pubkey string
|
||||
description string
|
||||
cat string
|
||||
owner string
|
||||
assets []Asset
|
||||
bctype BlockChainType
|
||||
}
|
||||
|
||||
pub struct Asset {
|
||||
pub mut:
|
||||
amount int
|
||||
assettype AssetType
|
||||
}
|
||||
|
||||
pub fn (self Asset) name() string {
|
||||
return self.assettype.name
|
||||
}
|
||||
|
||||
pub struct AssetType {
|
||||
pub mut:
|
||||
name string
|
||||
issuer string
|
||||
bctype BlockChainType
|
||||
}
|
||||
|
||||
pub enum BlockChainType{
|
||||
stellar_pub
|
||||
stellar_test
|
||||
|
||||
}
|
46
lib/clients/stellar/testnet.py
Normal file
46
lib/clients/stellar/testnet.py
Normal file
@@ -0,0 +1,46 @@
|
||||
from typing import Tuple
|
||||
from stellar_sdk import Server, Keypair, TransactionBuilder, Network, Asset, Signer, TransactionEnvelope
|
||||
import redis
|
||||
import requests
|
||||
import json
|
||||
import time
|
||||
|
||||
def create_account_on_testnet() -> Tuple[str, str]:
|
||||
|
||||
def fund(public_key: str) -> float:
|
||||
# Request funds from the Stellar testnet friendbot
|
||||
response = requests.get(f"https://friendbot.stellar.org?addr={public_key}")
|
||||
if response.status_code != 200:
|
||||
raise Exception("Failed to fund new account with friendbot")
|
||||
time.sleep(1)
|
||||
return balance(public_key)
|
||||
|
||||
def create_account() -> Tuple[str, str]:
|
||||
# Initialize Redis client
|
||||
redis_client = redis.StrictRedis(host='localhost', port=6379, db=0)
|
||||
|
||||
# Generate keypair
|
||||
keypair = Keypair.random()
|
||||
public_key = keypair.public_key
|
||||
secret_key = keypair.secret
|
||||
account_data = {
|
||||
"public_key": public_key,
|
||||
"secret_key": secret_key
|
||||
}
|
||||
redis_client.set("stellartest:testaccount", json.dumps(account_data))
|
||||
time.sleep(1)
|
||||
return public_key, secret_key
|
||||
|
||||
# Check if the account already exists in Redis
|
||||
if redis_client.exists("stellartest:testaccount"):
|
||||
account_data = json.loads(redis_client.get("stellartest:testaccount"))
|
||||
public_key = account_data["public_key"]
|
||||
secret_key = account_data["secret_key"]
|
||||
r = balance(public_key)
|
||||
if r < 100:
|
||||
fund(public_key)
|
||||
r = balance(public_key)
|
||||
return public_key, secret_key
|
||||
else:
|
||||
create_account()
|
||||
return create_account_on_testnet()
|
0
lib/clients/telegram/__init__.py
Normal file
0
lib/clients/telegram/__init__.py
Normal file
102
lib/clients/telegram/bot.py
Normal file
102
lib/clients/telegram/bot.py
Normal file
@@ -0,0 +1,102 @@
|
||||
import json
|
||||
import redis
|
||||
import telebot
|
||||
import os
|
||||
import logging
|
||||
from termcolor import colored
|
||||
from telebot.types import InlineKeyboardMarkup, InlineKeyboardButton
|
||||
from telebot.formatting import escape_markdown
|
||||
|
||||
from bot_audio import audio_add
|
||||
from bot_text import text_add
|
||||
from ai.ask import ai_assistent,AIAssistant
|
||||
|
||||
class MyBot:
|
||||
def __init__(self,ai_reset:bool=False):
|
||||
# Initialize logging
|
||||
logging.basicConfig(level=logging.INFO, format='%(message)s')
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
# Initialize Redis connection
|
||||
self.redis_client = redis.Redis(host='localhost', port=6379, db=0)
|
||||
|
||||
# Initialize Telegram bot
|
||||
self.telebotkey = os.getenv("TELEBOT")
|
||||
if self.telebotkey:
|
||||
self.logger.info(colored("TELEBOT key set", "green"))
|
||||
self.bot = telebot.TeleBot(self.telebotkey)
|
||||
else:
|
||||
raise Exception("can't find TELEBOT in ENV")
|
||||
|
||||
# Set up message handlers
|
||||
self.setup_handlers()
|
||||
audio_add(self)
|
||||
text_add(self,reset=ai_reset)
|
||||
|
||||
def setup_handlers(self):
|
||||
@self.bot.message_handler(commands=['help'])
|
||||
def send_welcome(message):
|
||||
self.bot.reply_to(message, """\
|
||||
Hi there, I am your hero.
|
||||
Just speak to me or do /start or /help
|
||||
""")
|
||||
|
||||
@self.bot.message_handler(commands=['start'])
|
||||
def start_command(message):
|
||||
chat_id = message.chat.id
|
||||
|
||||
keyboard = InlineKeyboardMarkup()
|
||||
subscribe_button = InlineKeyboardButton("Subscribe to Updates", callback_data='subscribe')
|
||||
unsubscribe_button = InlineKeyboardButton("Unsubscribe from Updates", callback_data='unsubscribe')
|
||||
keyboard.row(subscribe_button, unsubscribe_button)
|
||||
|
||||
self.bot.reply_to(message, "Please choose an option:", reply_markup=keyboard)
|
||||
|
||||
@self.bot.callback_query_handler(func=lambda call: True)
|
||||
def callback_query(call):
|
||||
chat_id = call.message.chat.id
|
||||
|
||||
if call.data == 'subscribe':
|
||||
self.redis_client.hset('subscribed_chats', chat_id, '1')
|
||||
self.bot.answer_callback_query(call.id, "You have subscribed to updates.")
|
||||
print(f"User subscribed to updates: {chat_id}")
|
||||
elif call.data == 'unsubscribe':
|
||||
self.redis_client.hdel('subscribed_chats', chat_id)
|
||||
self.bot.answer_callback_query(call.id, "You have unsubscribed from updates.")
|
||||
print(f"User unsubscribed from updates: {chat_id}")
|
||||
|
||||
def send_message_to_subscribers(self, message):
|
||||
subscribed_chats = self.redis_client.hgetall('subscribed_chats')
|
||||
for chat_id in subscribed_chats:
|
||||
try:
|
||||
self.bot.send_message(chat_id.decode('utf-8'), message)
|
||||
except Exception as e:
|
||||
print(f"Failed to send message to chat {chat_id}: {str(e)}")
|
||||
|
||||
def send_error_to_telegram(self,chat_id, error_message):
|
||||
# Format the error message for Telegram
|
||||
telegram_message = f"🚨 Error Occurred 🚨\n\n"
|
||||
telegram_message += f"app: {escape_markdown(error_message['app'])}\n"
|
||||
telegram_message += f"Function: {escape_markdown(error_message['function'])}\n"
|
||||
telegram_message += f"msg: {escape_markdown(error_message['msg'])}\n"
|
||||
telegram_message += f"Exception Type: {escape_markdown(error_message['exception_type'])}\n"
|
||||
telegram_message += f"Exception Message: ```\n{escape_markdown(error_message['exception_message'])}\n```\n"
|
||||
if 'traceback' in error_message:
|
||||
telegram_message += f"Traceback:\n```\n{escape_markdown(error_message['traceback'])}\n```"
|
||||
# Send the error message to the subscribed chat
|
||||
self.bot.send_message(chat_id, telegram_message, parse_mode='Markdown')
|
||||
|
||||
|
||||
def start(self):
|
||||
print("Bot started")
|
||||
# Start the bot
|
||||
self.bot.polling()
|
||||
|
||||
|
||||
def bot_new() -> MyBot:
|
||||
return MyBot()
|
||||
|
||||
# Usage
|
||||
if __name__ == "__main__":
|
||||
my_bot = bot_new()
|
||||
my_bot.start()
|
72
lib/clients/telegram/bot_audio.py
Normal file
72
lib/clients/telegram/bot_audio.py
Normal file
@@ -0,0 +1,72 @@
|
||||
import os
|
||||
from pydub import AudioSegment
|
||||
import whisper
|
||||
|
||||
def audio_add(self):
|
||||
|
||||
self.model = whisper.load_model("base")
|
||||
|
||||
@self.bot.message_handler(content_types=['audio', 'voice']) #, 'document'
|
||||
def handle_audio(message):
|
||||
try:
|
||||
chat_id = message.chat.id
|
||||
file_info = None
|
||||
audio_path = None
|
||||
|
||||
if message.content_type == 'audio':
|
||||
file_info = self.bot.get_file(message.audio.file_id)
|
||||
audio_path = f"/tmp/audio/{message.audio.file_id}.mp3"
|
||||
elif message.content_type == 'voice':
|
||||
file_info = self.bot.get_file(message.voice.file_id)
|
||||
audio_path = f"/tmp/audio/{message.voice.file_id}.ogg"
|
||||
|
||||
if file_info:
|
||||
downloaded_file = self.bot.download_file(file_info.file_path)
|
||||
|
||||
# Ensure the directory exists
|
||||
os.makedirs(os.path.dirname(audio_path), exist_ok=True)
|
||||
|
||||
# Save the audio file
|
||||
with open(audio_path, 'wb') as new_file:
|
||||
new_file.write(downloaded_file)
|
||||
|
||||
#bot.send_message(chat_id, f"Audio received and saved successfully to {audio_path}.")
|
||||
print(f"Audio received and saved to {audio_path}")
|
||||
|
||||
|
||||
# Convert to WAV format if necessary
|
||||
wav_path = audio_path.replace('.mp3', '.wav').replace('.ogg', '.wav')
|
||||
if audio_path.endswith('.mp3') or audio_path.endswith('.ogg'):
|
||||
audio = AudioSegment.from_file(audio_path)
|
||||
audio.export(wav_path, format='wav')
|
||||
else:
|
||||
wav_path = audio_path
|
||||
|
||||
# Transcribe audio using Whisper
|
||||
result = self.model.transcribe(wav_path)
|
||||
transcription = result["text"]
|
||||
|
||||
self.bot.send_message(chat_id, transcription, parse_mode='Markdown')
|
||||
print(f"Audio received and saved to {audio_path}")
|
||||
print(f"Transcription: {transcription}")
|
||||
|
||||
text2 = self.text_process(self,transcription)
|
||||
|
||||
print(f"Processed text {chat_id}: {text2}")
|
||||
|
||||
if len(text2)>0:
|
||||
self.bot.send_message(chat_id, text2)
|
||||
|
||||
|
||||
|
||||
except Exception as e:
|
||||
error_message = {
|
||||
'app': 'Telegram Bot',
|
||||
'function': 'handle_audio',
|
||||
'msg': 'Failed to process audio file',
|
||||
'exception_type': type(e).__name__,
|
||||
'exception_message': str(e)
|
||||
}
|
||||
self.send_error_to_telegram(chat_id, error_message)
|
||||
print(f"Error processing audio file: {e}")
|
||||
|
51
lib/clients/telegram/bot_text.py
Normal file
51
lib/clients/telegram/bot_text.py
Normal file
@@ -0,0 +1,51 @@
|
||||
import os
|
||||
from ai.ask import ai_assistent
|
||||
|
||||
def text_add(self,reset:bool=False):
|
||||
|
||||
self.ai_assistent = ai_assistent(reset=reset)
|
||||
self.text_process = text_process
|
||||
|
||||
@self.bot.message_handler(content_types=['text'])
|
||||
def handle_text(message):
|
||||
try:
|
||||
chat_id = message.chat.id
|
||||
|
||||
text = message.text
|
||||
|
||||
# Here you can add your logic to process the text
|
||||
# For now, let's just echo the message back
|
||||
# response = f"You said: {text}"
|
||||
|
||||
print(f"Received text from {chat_id}: {text}")
|
||||
|
||||
text2 = self.text_process(self,text)
|
||||
|
||||
print(f"Processed text {chat_id}: {text2}")
|
||||
|
||||
if len(text2)>0:
|
||||
self.bot.send_message(chat_id, text2)
|
||||
|
||||
|
||||
except Exception as e:
|
||||
error_message = {
|
||||
'app': 'Telegram Bot',
|
||||
'function': 'handle_text',
|
||||
'msg': 'Failed to process text',
|
||||
'exception_type': type(e).__name__,
|
||||
'exception_message': str(e)
|
||||
}
|
||||
self.send_error_to_telegram(chat_id, error_message)
|
||||
print(f"Error processing text file: {e}")
|
||||
|
||||
|
||||
def text_process(self, txt) -> str:
|
||||
if "translate" not in txt.lower():
|
||||
txt+='''\n\n
|
||||
only output the heroscript, no comments
|
||||
'''
|
||||
response = self.ai_assistent.ask(
|
||||
category='timemgmt',
|
||||
name='schedule',
|
||||
question=txt)
|
||||
return response
|
36
lib/clients/telegram/errorqueue.py
Normal file
36
lib/clients/telegram/errorqueue.py
Normal file
@@ -0,0 +1,36 @@
|
||||
import json
|
||||
import redis
|
||||
import telebot
|
||||
import threading
|
||||
from telebot.types import InlineKeyboardMarkup, InlineKeyboardButton
|
||||
import time
|
||||
from telebot.formatting import escape_markdown
|
||||
import os
|
||||
from telegram.bot import send_error_to_telegram
|
||||
|
||||
# Initialize Redis connection
|
||||
redis_client = redis.Redis(host='localhost', port=6379, db=0)
|
||||
|
||||
#get errors from redis and send them to bot if subscription done
|
||||
def process_error_queue():
|
||||
while True:
|
||||
# Pop an error message from the Redis queue
|
||||
error_json = redis_client.lpop('error_queue')
|
||||
|
||||
if error_json:
|
||||
# Deserialize the error message from JSON
|
||||
error_message = json.loads(error_json)
|
||||
|
||||
# Get all subscribed chat IDs from Redis
|
||||
subscribed_chats = redis_client.hgetall('subscribed_chats')
|
||||
|
||||
# Send the error message to all subscribed chats
|
||||
for chat_id in subscribed_chats.keys():
|
||||
send_error_to_telegram(int(chat_id), error_message)
|
||||
else:
|
||||
# If the queue is empty, wait for a short interval before checking again
|
||||
time.sleep(1)
|
||||
|
||||
# Start processing the error queue
|
||||
process_error_queue_thread = threading.Thread(target=process_error_queue)
|
||||
process_error_queue_thread.start()
|
142
lib/clients/vimeo/client.py
Normal file
142
lib/clients/vimeo/client.py
Normal file
@@ -0,0 +1,142 @@
|
||||
import os
|
||||
from typing import List, Optional
|
||||
|
||||
import requests
|
||||
import vimeo
|
||||
from model_video import VideoInfo, video_model_load, videos_model_load
|
||||
|
||||
|
||||
class VimeoClient:
|
||||
def __init__(self):
|
||||
# Retrieve necessary credentials from environment variables
|
||||
self.client_id = os.getenv("VIMEO_CLIENT_ID")
|
||||
self.client_secret = os.getenv("VIMEO_SECRET")
|
||||
self.access_token = os.getenv("VIMEO_ACCESSTOKEN_ID")
|
||||
self.user_id = os.getenv("VIMEO_USER_ID")
|
||||
|
||||
# Check if all environment variables are present
|
||||
if not all([self.client_id, self.client_secret, self.access_token, self.user_id]):
|
||||
raise EnvironmentError(
|
||||
"Please set the VIMEO_CLIENT_ID, VIMEO_SECRET,VIMEO_USER_ID and VIMEO_ACCESSTOKEN_ID environment variables."
|
||||
)
|
||||
|
||||
# Initialize the Vimeo client
|
||||
self.client = vimeo.VimeoClient(token=self.access_token, key=self.client_id, secret=self.client_secret)
|
||||
|
||||
def upload(self, file: str, video_title: str, description: str) -> str:
|
||||
video_uri = self.client.upload(file, data={"name": video_title, "description": description})
|
||||
return video_uri
|
||||
|
||||
def download(self, video_id: str, output_file: str = "myvid.mp4"):
|
||||
info = self.get_video_info(video_id)
|
||||
|
||||
size, link = 0, ""
|
||||
for item in info.download:
|
||||
if item["size"] > size:
|
||||
size = item["size"]
|
||||
link = item["link"]
|
||||
|
||||
if link == "":
|
||||
raise Exception("download link not provided for video")
|
||||
|
||||
video_response = requests.get(link, stream=True)
|
||||
downloaded_mb = 0
|
||||
with open(output_file, "wb") as video_file:
|
||||
for chunk in video_response.iter_content(chunk_size=1024):
|
||||
if chunk:
|
||||
downloaded_mb += len(chunk) / 1024
|
||||
print(f"{downloaded_mb}MB Downloaded...")
|
||||
video_file.write(chunk)
|
||||
|
||||
print(f"Video downloaded successfully to {output_file}!")
|
||||
|
||||
def get_video_info(self, video_id: str) -> VideoInfo:
|
||||
"""
|
||||
Get information about a video by URI.
|
||||
:param uri: URI of the Vimeo video.
|
||||
:return: Video information as a dictionary, or None if an error occurs.
|
||||
"""
|
||||
# , fields: List[str]
|
||||
response = self.client.get(f"/videos/{video_id}")
|
||||
if response.status_code == 200:
|
||||
myvideo = video_model_load(response.content)
|
||||
else:
|
||||
raise Exception(f"Failed to get video details. Status code: {response.status_code}, Error: {response.text}")
|
||||
return myvideo
|
||||
|
||||
def get_videos(self, folder: Optional[int] = None, folders: Optional[List[int]] = None) -> List[VideoInfo]:
|
||||
"""
|
||||
Get information about videos from specified folder(s) or all videos if no folder is specified.
|
||||
:param folder: ID of a single folder to fetch videos from.
|
||||
:param folders: List of folder IDs to fetch videos from.
|
||||
:return: List of VideoInfo objects.
|
||||
"""
|
||||
if self.user_id == 0:
|
||||
raise Exception("Can't find user ID, it's not set in env variables")
|
||||
|
||||
all_videos = []
|
||||
|
||||
if folder is not None:
|
||||
folders = [folder]
|
||||
elif folders is None:
|
||||
# If no folder or folders specified, get all videos
|
||||
response = self.client.get("/me/videos")
|
||||
if response.status_code == 200:
|
||||
return videos_model_load(response.content)
|
||||
else:
|
||||
raise Exception(f"Failed to get videos. Status code: {response.status_code}, Error: {response.text}")
|
||||
for folder_id in folders:
|
||||
response = self.client.get(f"/users/{self.user_id}/projects/{folder_id}/videos")
|
||||
if response.status_code == 200:
|
||||
videos = videos_model_load(response.content)
|
||||
all_videos.extend(videos)
|
||||
else:
|
||||
print(f"Failed to get videos for folder {folder_id}. Status code: {response.status_code}, Error: {response.text}")
|
||||
|
||||
return all_videos
|
||||
|
||||
# def get_videos(self,folder:int,folders:List[int]) -> List[VideoInfo]:
|
||||
# """
|
||||
# Get information about a video by URI.
|
||||
# :param uri: URI of the Vimeo video.
|
||||
# :return: Video information as a dictionary, or None if an error occurs.
|
||||
# """
|
||||
# if folder>0:
|
||||
# if self.user_id == 0:
|
||||
# return Exception("can't find userid, its not set in env variables")
|
||||
# # print(f"folderid:{folder}")
|
||||
# response = self.client.get(f"/users/{self.user_id}/projects/{folder}/videos")
|
||||
# # api_url = f"https://api.vimeo.com/users/{self.user_id}/projects/13139570/videos"
|
||||
# # print(api_url)
|
||||
# # access_token = "e65daca3b0dbc18c2fadc5cafcf81004"
|
||||
# # headers = {
|
||||
# # "Authorization": f"Bearer {access_token}"
|
||||
# # }
|
||||
# # Make the GET request to the Vimeo API
|
||||
# #response = requests.get(api_url, headers=headers)
|
||||
# else:
|
||||
# response = self.client.get(f"/me/videos/")
|
||||
|
||||
# if response.status_code == 200:
|
||||
# myvideos = videos_model_load(response.content)
|
||||
# else:
|
||||
# raise Exception(f"Failed to get video details. Status code: {response.status_code}, Error: {response.text}")
|
||||
# return myvideos
|
||||
|
||||
|
||||
def new() -> VimeoClient:
|
||||
return VimeoClient()
|
||||
|
||||
|
||||
# Example usage:
|
||||
if __name__ == "__main__":
|
||||
cl = new()
|
||||
v = cl.get_videos(folders=[10700101, 13139570, 12926235, 10752310, 10702046])
|
||||
for item in v:
|
||||
video_id = item.uri.split("/")[-1]
|
||||
print(f" - {item.name} : {video_id} ")
|
||||
# from IPython import embed; embed()
|
||||
# s
|
||||
# vi=cl.get_video_info("475353425")
|
||||
# print(json_to_yaml(vi))
|
||||
# cl.download("475353425", "/tmp/475353425.mp4")
|
177
lib/clients/vimeo/model_video.py
Normal file
177
lib/clients/vimeo/model_video.py
Normal file
@@ -0,0 +1,177 @@
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Optional, Dict, Any
|
||||
from dataclasses_json import dataclass_json
|
||||
import json
|
||||
import yaml
|
||||
|
||||
def json_to_yaml(json_data):
|
||||
# If the input is a JSON string, parse it into a Python dictionary
|
||||
if isinstance(json_data, str):
|
||||
json_data = json.loads(json_data)
|
||||
|
||||
# Convert the dictionary to a YAML formatted string
|
||||
yaml_data = yaml.dump(json_data, sort_keys=False, default_flow_style=False)
|
||||
|
||||
return yaml_data
|
||||
|
||||
|
||||
@dataclass_json
|
||||
@dataclass
|
||||
class Size:
|
||||
width: int
|
||||
height: int
|
||||
link: str
|
||||
link_with_play_button: Optional[str] = None
|
||||
|
||||
@dataclass_json
|
||||
@dataclass
|
||||
class Pictures:
|
||||
uri: str
|
||||
active: bool
|
||||
type: str
|
||||
base_link: str
|
||||
sizes: List[Size]
|
||||
resource_key: str
|
||||
default_picture: bool
|
||||
|
||||
@dataclass_json
|
||||
@dataclass
|
||||
class Embed:
|
||||
html: str
|
||||
badges: Dict[str, Any]
|
||||
interactive: bool
|
||||
buttons: Dict[str, bool]
|
||||
logos: Dict[str, Any]
|
||||
play_button: Dict[str, Any]
|
||||
title: Dict[str, Any]
|
||||
end_screen: List[Any]
|
||||
playbar: bool
|
||||
quality_selector: Optional[str]
|
||||
pip: bool
|
||||
autopip: bool
|
||||
volume: bool
|
||||
color: str
|
||||
colors: Dict[str, str]
|
||||
event_schedule: bool
|
||||
has_cards: bool
|
||||
outro_type: str
|
||||
show_timezone: bool
|
||||
cards: List[Any]
|
||||
airplay: bool
|
||||
audio_tracks: bool
|
||||
chapters: bool
|
||||
chromecast: bool
|
||||
closed_captions: bool
|
||||
transcript: bool
|
||||
ask_ai: bool
|
||||
uri: Optional[str]
|
||||
email_capture_form: Optional[str]
|
||||
speed: bool
|
||||
|
||||
@dataclass_json
|
||||
@dataclass
|
||||
class Uploader:
|
||||
pictures: Pictures
|
||||
|
||||
@dataclass_json
|
||||
@dataclass
|
||||
class User:
|
||||
uri: str
|
||||
name: str
|
||||
link: str
|
||||
capabilities: Dict[str, bool]
|
||||
location: str
|
||||
gender: str
|
||||
bio: str
|
||||
short_bio: str
|
||||
created_time: str
|
||||
pictures: Pictures
|
||||
websites: List[Dict[str, Optional[str]]]
|
||||
#metadata: Dict[str, Any]
|
||||
location_details: Dict[str, Optional[Any]]
|
||||
skills: List[Any]
|
||||
available_for_hire: bool
|
||||
can_work_remotely: bool
|
||||
preferences: Dict[str, Any]
|
||||
content_filter: List[str]
|
||||
upload_quota: Dict[str, Any]
|
||||
resource_key: str
|
||||
account: str
|
||||
|
||||
@dataclass_json
|
||||
@dataclass
|
||||
class VideoInfo:
|
||||
uri: str
|
||||
name: str
|
||||
description: Optional[str]
|
||||
type: str
|
||||
link: str
|
||||
player_embed_url: str
|
||||
duration: int
|
||||
width: int
|
||||
height: int
|
||||
#embed: Embed
|
||||
created_time: str
|
||||
modified_time: str
|
||||
release_time: str
|
||||
content_rating: List[str]
|
||||
content_rating_class: str
|
||||
rating_mod_locked: bool
|
||||
license: Optional[str]
|
||||
privacy: Dict[str, Any]
|
||||
pictures: Pictures
|
||||
tags: List[Any]
|
||||
stats: Dict[str, int]
|
||||
categories: List[Any]
|
||||
uploader: Uploader
|
||||
#metadata: Dict[str, Any]
|
||||
manage_link: str
|
||||
#user: Optional[User]
|
||||
last_user_action_event_date: Optional[str]
|
||||
parent_folder: Optional[Dict[str, Any]]
|
||||
review_page: Optional[Dict[str, Any]]
|
||||
files: Optional[List[Dict[str, Any]]]
|
||||
download: Optional[List[Dict[str, Any]]]
|
||||
app: Optional[Dict[str, str]]
|
||||
play: Optional[Dict[str, Any]]
|
||||
status: str
|
||||
resource_key: str
|
||||
upload: Optional[Dict[str, Optional[str]]]
|
||||
transcode: Dict[str, str]
|
||||
is_playable: bool
|
||||
has_audio: bool
|
||||
|
||||
|
||||
def video_model_load(json_data:str,dojsonload:bool=True) -> VideoInfo:
|
||||
|
||||
if dojsonload:
|
||||
json_dict = json.loads(json_data)
|
||||
else:
|
||||
json_dict = json_data
|
||||
|
||||
json_dict.pop('metadata', {})
|
||||
json_dict.pop('embed', {})
|
||||
json_dict.pop('user', {})
|
||||
json_dict.pop('websites', {})
|
||||
# if 'user' in json_dict:
|
||||
# json_dict['user'].pop('metadata', None)
|
||||
# if 'websites' in json_dict:
|
||||
# json_dict['websites'].pop('metadata', None)
|
||||
|
||||
|
||||
json_data_cleaned = json.dumps(json_dict)
|
||||
|
||||
video_object = VideoInfo.from_json(json_data_cleaned)
|
||||
|
||||
return video_object
|
||||
|
||||
|
||||
def videos_model_load(json_data:str) -> List[VideoInfo]:
|
||||
json_list = json.loads(json_data)
|
||||
json_list2= list()
|
||||
|
||||
for item in json_list["data"]:
|
||||
d=video_model_load(item,dojsonload=False)
|
||||
json_list2.append(d)
|
||||
|
||||
return json_list2
|
0
lib/clients/whisper/__init__.py
Normal file
0
lib/clients/whisper/__init__.py
Normal file
107
lib/clients/whisper/convert.py
Normal file
107
lib/clients/whisper/convert.py
Normal file
@@ -0,0 +1,107 @@
|
||||
import os
|
||||
from pydub import AudioSegment
|
||||
import whisper
|
||||
import moviepy.editor as mp
|
||||
import nltk
|
||||
from nltk.tokenize import sent_tokenize, word_tokenize
|
||||
|
||||
# Download necessary NLTK data
|
||||
nltk.download('punkt', quiet=True)
|
||||
|
||||
class Convertor:
|
||||
def __init__(self, max_chars_per_part=4000,context:str = "main"):
|
||||
self.max_chars_per_part = max_chars_per_part
|
||||
self.context = context
|
||||
|
||||
@classmethod
|
||||
def new(cls, max_chars_per_part=4000):
|
||||
return cls(max_chars_per_part)
|
||||
|
||||
def process(self, path: str):
|
||||
if path.lower().endswith(('.mp4', '.avi', '.mov')): # Video files
|
||||
return self.process_video(path)
|
||||
elif path.lower().endswith(('.mp3', '.wav', '.ogg')): # Audio files
|
||||
return self.process_audio(path)
|
||||
else:
|
||||
raise ValueError("Unsupported file format")
|
||||
|
||||
def process_video(self, video_path: str):
|
||||
# Extract audio from video
|
||||
video = mp.VideoFileClip(video_path)
|
||||
audio_path = video_path.rsplit('.', 1)[0] + '.wav'
|
||||
video.audio.write_audiofile(audio_path)
|
||||
video.close()
|
||||
return audio_path
|
||||
|
||||
def process_audio(self, audio_path: str):
|
||||
# Convert to WAV format if necessary
|
||||
wav_path = audio_path.rsplit('.', 1)[0] + '.wav'
|
||||
if not audio_path.lower().endswith('.wav'):
|
||||
audio = AudioSegment.from_file(audio_path)
|
||||
audio.export(wav_path, format='wav')
|
||||
else:
|
||||
wav_path = audio_path
|
||||
|
||||
def split_text(self, text):
|
||||
parts = []
|
||||
current_part = ""
|
||||
paragraphs = text.split('\n\n')
|
||||
|
||||
for paragraph in paragraphs:
|
||||
sentences = sent_tokenize(paragraph)
|
||||
for sentence in sentences:
|
||||
if len(current_part) + len(sentence) < self.max_chars_per_part:
|
||||
current_part += sentence + ' '
|
||||
else:
|
||||
if current_part:
|
||||
parts.append(current_part.strip())
|
||||
current_part = sentence + ' '
|
||||
|
||||
# Add a paragraph break if it doesn't exceed the limit
|
||||
if len(current_part) + 2 < self.max_chars_per_part:
|
||||
current_part += '\n\n'
|
||||
else:
|
||||
parts.append(current_part.strip())
|
||||
current_part = '\n\n'
|
||||
|
||||
if current_part:
|
||||
parts.append(current_part.strip())
|
||||
|
||||
return parts
|
||||
|
||||
def find_natural_pause(self, text):
|
||||
words = word_tokenize(text)
|
||||
total_words = len(words)
|
||||
mid_point = total_words // 2
|
||||
|
||||
# Look for punctuation near the middle
|
||||
for i in range(mid_point, total_words):
|
||||
if words[i] in '.!?':
|
||||
return ' '.join(words[:i+1]), ' '.join(words[i+1:])
|
||||
|
||||
# If no punctuation found, split at the nearest space to the middle
|
||||
return ' '.join(words[:mid_point]), ' '.join(words[mid_point:])
|
||||
|
||||
def write_to_file(self, parts, output_path):
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
for i, part in enumerate(parts, 1):
|
||||
f.write(f"Part {i}:\n\n")
|
||||
f.write(part)
|
||||
f.write("\n\n")
|
||||
if i < len(parts):
|
||||
f.write("-" * 50 + "\n\n")
|
||||
|
||||
|
||||
# Usage example:
|
||||
if __name__ == "__main__":
|
||||
processor = Convertor.new()
|
||||
item = "/Users/despiegk1/Documents/Zoom/2024-07-16 16.42.50 Kristof De Spiegeleer's Personal Meeting Room/video1720369800.mp4"
|
||||
transcription_parts = processor.process(item)
|
||||
|
||||
processor.write_to_file(transcription_parts, output_file)
|
||||
|
||||
print(f"Transcription split into {len(transcription_parts)} parts:")
|
||||
for i, part in enumerate(transcription_parts, 1):
|
||||
print(f"Part {i}:")
|
||||
print(part)
|
||||
print("-" * 50)
|
118
lib/clients/whisper/whisper.py
Normal file
118
lib/clients/whisper/whisper.py
Normal file
@@ -0,0 +1,118 @@
|
||||
import os
|
||||
from pydub import AudioSegment
|
||||
import whisper
|
||||
import moviepy.editor as mp
|
||||
import nltk
|
||||
from nltk.tokenize import sent_tokenize, word_tokenize
|
||||
|
||||
# Download necessary NLTK data
|
||||
nltk.download('punkt', quiet=True)
|
||||
|
||||
class MediaProcessor:
|
||||
def __init__(self, max_chars_per_part=4000):
|
||||
self.model = whisper.load_model("base.en")
|
||||
#self.model = whisper.load_model("medium.en")
|
||||
self.max_chars_per_part = max_chars_per_part
|
||||
|
||||
@classmethod
|
||||
def new(cls, max_chars_per_part=4000):
|
||||
return cls(max_chars_per_part)
|
||||
|
||||
def process(self, path: str):
|
||||
if path.lower().endswith(('.mp4', '.avi', '.mov')): # Video files
|
||||
return self.process_video(path)
|
||||
elif path.lower().endswith(('.mp3', '.wav', '.ogg')): # Audio files
|
||||
return self.process_audio(path)
|
||||
else:
|
||||
raise ValueError("Unsupported file format")
|
||||
|
||||
def process_video(self, video_path: str):
|
||||
# Extract audio from video
|
||||
video = mp.VideoFileClip(video_path)
|
||||
audio_path = video_path.rsplit('.', 1)[0] + '.wav'
|
||||
video.audio.write_audiofile(audio_path)
|
||||
video.close()
|
||||
|
||||
# Now process the extracted audio
|
||||
return self.process_audio(audio_path)
|
||||
|
||||
def process_audio(self, audio_path: str):
|
||||
# Convert to WAV format if necessary
|
||||
wav_path = audio_path.rsplit('.', 1)[0] + '.wav'
|
||||
if not audio_path.lower().endswith('.wav'):
|
||||
audio = AudioSegment.from_file(audio_path)
|
||||
audio.export(wav_path, format='wav')
|
||||
else:
|
||||
wav_path = audio_path
|
||||
|
||||
# Transcribe audio using Whisper
|
||||
result = self.model.transcribe(wav_path)
|
||||
transcription = result["text"]
|
||||
|
||||
# Split the transcription into parts
|
||||
return self.split_text(transcription)
|
||||
|
||||
def split_text(self, text):
|
||||
parts = []
|
||||
current_part = ""
|
||||
paragraphs = text.split('\n\n')
|
||||
|
||||
for paragraph in paragraphs:
|
||||
sentences = sent_tokenize(paragraph)
|
||||
for sentence in sentences:
|
||||
if len(current_part) + len(sentence) < self.max_chars_per_part:
|
||||
current_part += sentence + ' '
|
||||
else:
|
||||
if current_part:
|
||||
parts.append(current_part.strip())
|
||||
current_part = sentence + ' '
|
||||
|
||||
# Add a paragraph break if it doesn't exceed the limit
|
||||
if len(current_part) + 2 < self.max_chars_per_part:
|
||||
current_part += '\n\n'
|
||||
else:
|
||||
parts.append(current_part.strip())
|
||||
current_part = '\n\n'
|
||||
|
||||
if current_part:
|
||||
parts.append(current_part.strip())
|
||||
|
||||
return parts
|
||||
|
||||
def find_natural_pause(self, text):
|
||||
words = word_tokenize(text)
|
||||
total_words = len(words)
|
||||
mid_point = total_words // 2
|
||||
|
||||
# Look for punctuation near the middle
|
||||
for i in range(mid_point, total_words):
|
||||
if words[i] in '.!?':
|
||||
return ' '.join(words[:i+1]), ' '.join(words[i+1:])
|
||||
|
||||
# If no punctuation found, split at the nearest space to the middle
|
||||
return ' '.join(words[:mid_point]), ' '.join(words[mid_point:])
|
||||
|
||||
def write_to_file(self, parts, output_path):
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
for i, part in enumerate(parts, 1):
|
||||
f.write(f"Part {i}:\n\n")
|
||||
f.write(part)
|
||||
f.write("\n\n")
|
||||
if i < len(parts):
|
||||
f.write("-" * 50 + "\n\n")
|
||||
|
||||
|
||||
# Usage example:
|
||||
if __name__ == "__main__":
|
||||
processor = MediaProcessor.new(max_chars_per_part=10000)
|
||||
output_file = "/Users/despiegk1/Documents/transcription3.md"
|
||||
item = "/Users/despiegk1/Documents/Zoom/2024-07-16 16.42.50 Kristof De Spiegeleer's Personal Meeting Room/video1720369800.mp4"
|
||||
transcription_parts = processor.process(item)
|
||||
|
||||
processor.write_to_file(transcription_parts, output_file)
|
||||
|
||||
print(f"Transcription split into {len(transcription_parts)} parts:")
|
||||
for i, part in enumerate(transcription_parts, 1):
|
||||
print(f"Part {i}:")
|
||||
print(part)
|
||||
print("-" * 50)
|
313
lib/clients/wireless/wigle_net.py
Normal file
313
lib/clients/wireless/wigle_net.py
Normal file
@@ -0,0 +1,313 @@
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timedelta
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import redis
|
||||
import requests
|
||||
|
||||
API_URL = "https://api.wigle.net/api/v2/network/search"
|
||||
REDIS_CACHE_EXPIRY = timedelta(hours=1)
|
||||
API_RATE_LIMIT = 30 # seconds between requests
|
||||
|
||||
# Initialize Redis connection
|
||||
redis_client = redis.Redis(host="localhost", port=6379, db=0, decode_responses=True)
|
||||
|
||||
# Track last API request time (initialized to allow immediate first request)
|
||||
_last_request_time = time.time() - API_RATE_LIMIT
|
||||
|
||||
|
||||
class WigleError(Exception):
|
||||
"""Custom exception for Wigle-related errors"""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class NetworkType(str, Enum):
|
||||
"""Network types supported by Wigle API"""
|
||||
|
||||
WIFI = "WIFI"
|
||||
BT = "BT"
|
||||
CELL = "CELL"
|
||||
|
||||
|
||||
class Encryption(str, Enum):
|
||||
"""WiFi encryption types"""
|
||||
|
||||
NONE = "None"
|
||||
WEP = "WEP"
|
||||
WPA = "WPA"
|
||||
WPA2 = "WPA2"
|
||||
WPA3 = "WPA3"
|
||||
UNKNOWN = "unknown"
|
||||
|
||||
|
||||
@dataclass
|
||||
class Location:
|
||||
"""Represents a wireless network location with all available Wigle API fields"""
|
||||
|
||||
ssid: str
|
||||
latitude: float
|
||||
longitude: float
|
||||
last_update: Optional[datetime]
|
||||
encryption: Optional[str] = None
|
||||
network_type: Optional[str] = None
|
||||
channel: Optional[int] = None
|
||||
frequency: Optional[float] = None
|
||||
qos: Optional[int] = None
|
||||
transid: Optional[str] = None
|
||||
firsttime: Optional[datetime] = None
|
||||
lasttime: Optional[datetime] = None
|
||||
country_code: Optional[str] = None
|
||||
city: Optional[str] = None
|
||||
region: Optional[str] = None
|
||||
house_number: Optional[str] = None
|
||||
road: Optional[str] = None
|
||||
address: Optional[str] = None
|
||||
|
||||
|
||||
def get_wigle_auth() -> str:
|
||||
"""Get Wigle authentication token from environment variable"""
|
||||
wigle_auth = os.getenv("WIGLE")
|
||||
if not wigle_auth:
|
||||
raise WigleError("WIGLE environment variable not set. Format should be: 'AIDxxx:yyy'")
|
||||
return wigle_auth
|
||||
|
||||
|
||||
def enforce_rate_limit():
|
||||
"""Enforce API rate limit by sleeping if needed, showing countdown"""
|
||||
global _last_request_time
|
||||
current_time = time.time()
|
||||
time_since_last_request = current_time - _last_request_time
|
||||
|
||||
if time_since_last_request < API_RATE_LIMIT:
|
||||
sleep_time = API_RATE_LIMIT - time_since_last_request
|
||||
print(f"\nRate limit: waiting {sleep_time:.0f} seconds", end="", flush=True)
|
||||
|
||||
# Show countdown
|
||||
for remaining in range(int(sleep_time), 0, -1):
|
||||
time.sleep(1)
|
||||
print(f"\rRate limit: waiting {remaining:2d} seconds", end="", flush=True)
|
||||
|
||||
print("\rRate limit: continuing... ") # Clear the line
|
||||
|
||||
_last_request_time = time.time()
|
||||
|
||||
|
||||
def search_networks(
|
||||
*,
|
||||
# Location filters
|
||||
latitude_north: Optional[float] = None,
|
||||
latitude_south: Optional[float] = None,
|
||||
longitude_east: Optional[float] = None,
|
||||
longitude_west: Optional[float] = None,
|
||||
# Network filters
|
||||
ssid: Optional[str] = None,
|
||||
ssidlike: Optional[str] = None,
|
||||
network_type: Optional[NetworkType] = None,
|
||||
encryption: Optional[Encryption] = None,
|
||||
# Time filters
|
||||
on_since: Optional[datetime] = None,
|
||||
last_update: Optional[datetime] = None,
|
||||
# Result control
|
||||
results_per_page: int = 100,
|
||||
search_after: Optional[str] = None,
|
||||
# Other filters
|
||||
freenet: Optional[bool] = None,
|
||||
paynet: Optional[bool] = None,
|
||||
show_query: bool = False,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Search for networks using the Wigle API with full parameter support and Redis caching.
|
||||
Rate limited to one request per minute.
|
||||
|
||||
Args:
|
||||
latitude_north: Northern boundary of search box
|
||||
latitude_south: Southern boundary of search box
|
||||
longitude_east: Eastern boundary of search box
|
||||
longitude_west: Western boundary of search box
|
||||
ssid: Exact SSID match
|
||||
ssidlike: SSID wildcard match
|
||||
network_type: Filter by network type (WIFI/BT/CELL)
|
||||
encryption: Filter by encryption type
|
||||
on_since: Only show networks seen on or after date
|
||||
last_update: Only show networks updated since date
|
||||
results_per_page: Number of results per page (max 100)
|
||||
search_after: Token for getting next batch of results
|
||||
freenet: Show only free networks
|
||||
paynet: Show only pay networks
|
||||
show_query: Return query bounds without results
|
||||
|
||||
Returns:
|
||||
Dictionary containing search results and metadata including searchAfter token
|
||||
|
||||
Raises:
|
||||
WigleError: If the WIGLE environment variable is not set or API request fails
|
||||
"""
|
||||
# https://api.wigle.net/api/v2/network/search?onlymine=false&encryption=None&freenet=false&paynet=false
|
||||
try:
|
||||
# Build cache key from all parameters
|
||||
params = locals()
|
||||
cache_key = f"wigle:search:{json.dumps(params, default=str, sort_keys=True)}"
|
||||
|
||||
cached_result = redis_client.get(cache_key)
|
||||
if cached_result:
|
||||
return json.loads(cached_result)
|
||||
|
||||
# Enforce rate limit before making request
|
||||
enforce_rate_limit()
|
||||
|
||||
# Build API parameters
|
||||
api_params = {
|
||||
"onlymine": "false",
|
||||
"resultsPerPage": results_per_page,
|
||||
}
|
||||
|
||||
# Add optional parameters if provided
|
||||
if latitude_north is not None:
|
||||
api_params["latrange1"] = latitude_south
|
||||
api_params["latrange2"] = latitude_north
|
||||
api_params["longrange1"] = longitude_west
|
||||
api_params["longrange2"] = longitude_east
|
||||
|
||||
if ssid:
|
||||
api_params["ssid"] = ssid
|
||||
if ssidlike:
|
||||
api_params["ssidlike"] = ssidlike
|
||||
if network_type:
|
||||
api_params["netid"] = network_type.value
|
||||
if encryption:
|
||||
api_params["encryption"] = encryption.value
|
||||
else:
|
||||
api_params["encryption"] = "None"
|
||||
if on_since:
|
||||
api_params["onSince"] = on_since.strftime("%Y%m%d")
|
||||
if last_update:
|
||||
api_params["lastupdt"] = last_update.strftime("%Y%m%d")
|
||||
if freenet is not None:
|
||||
api_params["freenet"] = str(freenet).lower()
|
||||
if paynet is not None:
|
||||
api_params["paynet"] = str(paynet).lower()
|
||||
if search_after:
|
||||
api_params["searchAfter"] = search_after
|
||||
if show_query:
|
||||
api_params["showQuery"] = str(show_query).lower()
|
||||
|
||||
# Make API request
|
||||
wigle_auth = get_wigle_auth()
|
||||
headers = {"Authorization": f"Basic {wigle_auth}"}
|
||||
response = requests.get(API_URL, params=api_params, headers=headers)
|
||||
response.raise_for_status()
|
||||
result = response.json()
|
||||
|
||||
print(result)
|
||||
|
||||
# Cache the result
|
||||
redis_client.setex(cache_key, int(REDIS_CACHE_EXPIRY.total_seconds()), json.dumps(result))
|
||||
|
||||
return result
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
raise WigleError(f"API request failed: {str(e)}")
|
||||
|
||||
|
||||
def parse_network_to_location(network: Dict[str, Any]) -> Location:
|
||||
"""Convert a network result from Wigle API to a Location object"""
|
||||
# Parse dates if present
|
||||
last_update = None
|
||||
firsttime = None
|
||||
lasttime = None
|
||||
|
||||
if network.get("lastupdt"):
|
||||
try:
|
||||
last_update = datetime.strptime(network["lastupdt"], "%Y-%m-%d %H:%M:%S")
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
if network.get("firsttime"):
|
||||
try:
|
||||
firsttime = datetime.strptime(network["firsttime"], "%Y-%m-%d %H:%M:%S")
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
if network.get("lasttime"):
|
||||
try:
|
||||
lasttime = datetime.strptime(network["lasttime"], "%Y-%m-%d %H:%M:%S")
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
return Location(
|
||||
ssid=network["ssid"],
|
||||
latitude=float(network["trilat"]),
|
||||
longitude=float(network["trilong"]),
|
||||
last_update=last_update,
|
||||
encryption=network.get("encryption"),
|
||||
network_type=network.get("type"),
|
||||
channel=network.get("channel"),
|
||||
frequency=network.get("frequency"),
|
||||
qos=network.get("qos"),
|
||||
transid=network.get("transid"),
|
||||
firsttime=firsttime,
|
||||
lasttime=lasttime,
|
||||
country_code=network.get("country"),
|
||||
city=network.get("city"),
|
||||
region=network.get("region"),
|
||||
house_number=network.get("housenumber"),
|
||||
road=network.get("road"),
|
||||
address=network.get("address"),
|
||||
)
|
||||
|
||||
|
||||
def get_all() -> List[Location]:
|
||||
"""Search for OpenRoaming networks and return list of locations.
|
||||
Rate limited to one request per minute, including pagination requests.
|
||||
|
||||
Returns:
|
||||
List[Location]: List of found network locations
|
||||
|
||||
Raises:
|
||||
WigleError: If the WIGLE environment variable is not set or API request fails
|
||||
"""
|
||||
ssid_names = ["Adentro OpenRoaming", "OpenRoaming", "Passpoint", "PasspointAruba", "Cellular Wi-Fi Passthrough", "WBA_OpenRoaming"]
|
||||
locations: List[Location] = []
|
||||
|
||||
for name in ssid_names:
|
||||
try:
|
||||
search_after = None
|
||||
while True:
|
||||
results = search_networks(
|
||||
ssid=name, encryption=Encryption.NONE, network_type=NetworkType.WIFI, results_per_page=100, search_after=search_after
|
||||
)
|
||||
|
||||
if not results or not results.get("results"):
|
||||
break
|
||||
|
||||
for network in results["results"]:
|
||||
locations.append(parse_network_to_location(network))
|
||||
|
||||
# Get searchAfter token for next batch
|
||||
search_after = results.get("searchAfter")
|
||||
if not search_after:
|
||||
break
|
||||
|
||||
except WigleError as e:
|
||||
raise WigleError(f"Error searching for {name}: {str(e)}")
|
||||
|
||||
print(f"Found {len(locations)} OpenRoaming network locations")
|
||||
return locations
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
locations = get_all()
|
||||
for loc in locations:
|
||||
print(f"SSID: {loc.ssid}")
|
||||
print(f"Location: ({loc.latitude}, {loc.longitude})")
|
||||
print(f"Network Type: {loc.network_type or 'N/A'}")
|
||||
print(f"Encryption: {loc.encryption or 'N/A'}")
|
||||
print(f"Last Update: {loc.last_update or 'N/A'}")
|
||||
if loc.address:
|
||||
print(f"Address: {loc.address}")
|
||||
print("-" * 50)
|
38
lib/core/heroscript/examples/heroscript_example.py
Normal file
38
lib/core/heroscript/examples/heroscript_example.py
Normal file
@@ -0,0 +1,38 @@
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import Dict, Any, Type, TypeVar
|
||||
from heroscript.heroscript import *
|
||||
|
||||
|
||||
class User(BaseModel, HeroScriptMixin):
|
||||
oid: str = Field()
|
||||
name: str = Field(min_length=2, description="Chosen name by user", example="myname")
|
||||
city: str = Field()
|
||||
age: int = Field()
|
||||
description: str = Field()
|
||||
|
||||
|
||||
|
||||
# Example usage
|
||||
u1 = User(oid="abc123", name="John", age=30, city="New York",
|
||||
description="""
|
||||
this is a multiline
|
||||
|
||||
we need to remove the
|
||||
this will stay 4 chars in
|
||||
|
||||
end
|
||||
""")
|
||||
|
||||
myheroscript = u1.heroscript()
|
||||
print(myheroscript)
|
||||
|
||||
u2 = User.from_heroscript(heroscript=myheroscript)
|
||||
myprint(u2)
|
||||
|
||||
# p1 = Product(id=1, name="Phone", price=999.99, description="A smart phone")
|
||||
|
||||
# product_heroscript = p1.heroscript()
|
||||
# print(product_heroscript)
|
||||
|
||||
# p2 = Product.from_heroscript(product_heroscript)
|
||||
# print(p2)
|
78
lib/core/heroscript/examples/heroscript_example2.py
Normal file
78
lib/core/heroscript/examples/heroscript_example2.py
Normal file
@@ -0,0 +1,78 @@
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import Dict, Any, Type, TypeVar, List
|
||||
from heroscript.heroscript import *
|
||||
|
||||
class Comment(BaseModel):
|
||||
description: str = Field(default="")
|
||||
|
||||
class HeroBase(BaseModel, HeroScriptMixin):
|
||||
oid: str = Field(default="",metadata={"unique": True})
|
||||
name: str = Field(min_length=2, description="Chosen name by user", example="myname",metadata={"unique": True})
|
||||
comments: List[Comment] = Field(..., description="Comment which can be attached to obj")
|
||||
|
||||
class User(HeroBase):
|
||||
city: str = Field(metadata={"index": True})
|
||||
age: int = Field(metadata={"index": True})
|
||||
description: str = Field(default="")
|
||||
|
||||
class Product(BaseModel, HeroScriptMixin):
|
||||
id: int = Field(default="",metadata={"unique": True})
|
||||
name: str = Field(metadata={"unique": True})
|
||||
price: float = Field()
|
||||
description: str = Field()
|
||||
|
||||
|
||||
myheroscript="""
|
||||
|
||||
```hero
|
||||
!!user.define
|
||||
oid:abc123
|
||||
name:John
|
||||
description:'
|
||||
this is a multiline
|
||||
|
||||
we need to remove the
|
||||
this will stay 4 chars in
|
||||
|
||||
end
|
||||
'
|
||||
age:30
|
||||
city:'New York'
|
||||
|
||||
!!product.define
|
||||
id:33
|
||||
name:aproduct
|
||||
description:'
|
||||
this is a multiline
|
||||
|
||||
we need to remove the
|
||||
this will stay 4 chars in
|
||||
|
||||
end
|
||||
'
|
||||
price:10.0
|
||||
|
||||
```
|
||||
|
||||
"""
|
||||
|
||||
# hs=HeroScripts(class_types={"user":User,"product":Product},content=myheroscript)
|
||||
mypath="~/code/git.threefold.info/tfgrid/hero_research/hero/osis/heroscript/example"
|
||||
hs=HeroScripts(class_types={"user":User,"product":Product},path=mypath)
|
||||
|
||||
objs=hs.get_objects()
|
||||
|
||||
for o in objs:
|
||||
myprint(o)
|
||||
|
||||
for item in hs.heroscripts:
|
||||
print(item)
|
||||
|
||||
query = "john*"
|
||||
results = hs.search(User, query)
|
||||
|
||||
# Print the search results
|
||||
for r in results:
|
||||
# print(f"User: {r["path"]}")
|
||||
print(r)
|
||||
|
1
lib/core/heroscript/examples/wiki/done.json
Normal file
1
lib/core/heroscript/examples/wiki/done.json
Normal file
@@ -0,0 +1 @@
|
||||
{"/Users/despiegk/code/git.threefold.info/tfgrid/hero_research/hero/osis/heroscript/example/testFile.md": "f6e8b6a32349c262cb9afbea771c5add", "/Users/despiegk/code/git.threefold.info/tfgrid/hero_research/hero/osis/heroscript/example/sub/test file 2.md": "0ecc29046b6ef743481358e4c5630a6d"}
|
15
lib/core/heroscript/examples/wiki/sub/test file 2.md
Normal file
15
lib/core/heroscript/examples/wiki/sub/test file 2.md
Normal file
@@ -0,0 +1,15 @@
|
||||
# header
|
||||
|
||||
!!product.define
|
||||
id:33
|
||||
name:aproduct
|
||||
description:'
|
||||
this is a multiline
|
||||
|
||||
we need to remove the
|
||||
this will stay 4 chars in
|
||||
|
||||
end
|
||||
'
|
||||
price:10.0
|
||||
something else
|
22
lib/core/heroscript/examples/wiki/testFile.md
Normal file
22
lib/core/heroscript/examples/wiki/testFile.md
Normal file
@@ -0,0 +1,22 @@
|
||||
|
||||
!!user.define
|
||||
oid:abc123
|
||||
name:John
|
||||
description:'
|
||||
this is a multiline
|
||||
|
||||
we need to remove the
|
||||
this will stay 4 chars in
|
||||
|
||||
end
|
||||
'
|
||||
age:30
|
||||
city:'New York'
|
||||
|
||||
```heroscript
|
||||
!!user.define
|
||||
oid:4nd
|
||||
name:John2
|
||||
age:40
|
||||
city:bxl
|
||||
```
|
207
lib/core/heroscript/heroaction.py
Normal file
207
lib/core/heroscript/heroaction.py
Normal file
@@ -0,0 +1,207 @@
|
||||
|
||||
from herotools.texttools import dedent
|
||||
from typing import List, Dict, Tuple
|
||||
import re
|
||||
from heroscript.tools import action_blocks,format_multiline_text,heroscript_repr
|
||||
import textwrap
|
||||
|
||||
class HeroActions:
|
||||
def __init__(self, path: str = "", content:str = ""):
|
||||
blocks=action_blocks(path=path,content=content)
|
||||
self.actions : List[HeroAction] = []
|
||||
for block in blocks:
|
||||
self.actions.append(HeroAction(block))
|
||||
|
||||
def __repr__(self):
|
||||
out=""
|
||||
for item in self.actions:
|
||||
out+=item.__repr__()+"\n"
|
||||
return out
|
||||
|
||||
|
||||
class HeroAction:
|
||||
def __init__(self, content: str):
|
||||
blocks=action_blocks(content=content)
|
||||
if len(blocks)==0:
|
||||
raise ValueError(f"don't find actions in {content}")
|
||||
elif len(blocks)>1:
|
||||
raise ValueError(f"Found more than one action in {content}")
|
||||
content=blocks[0]
|
||||
self.name, content = _name_paramstr(content)
|
||||
self.params = Params(content)
|
||||
|
||||
def __str__(self):
|
||||
param_str=textwrap.indent(self.params.__str__()," ")
|
||||
return f"!!{self.name}\n{param_str}"
|
||||
|
||||
def __repr__(self):
|
||||
#return self.__str__()
|
||||
return heroscript_repr(self.__str__())
|
||||
|
||||
|
||||
class Params:
|
||||
def __init__(self, content: str):
|
||||
self.__params = params_parse(content)
|
||||
|
||||
def __str__(self):
|
||||
sorted_params = sorted(self.__params.items())
|
||||
param_str=""
|
||||
for key,value in sorted_params:
|
||||
if "'" in value:
|
||||
param_str+=f"{key}: {value}\n"
|
||||
elif "\n" in value:
|
||||
v=format_multiline_text(value)
|
||||
param_str+=f"{key}: {v}\n"
|
||||
elif " " in value:
|
||||
param_str+=f"{key}: '{value}'\n"
|
||||
else:
|
||||
param_str+=f"{key}: {value}\n"
|
||||
return param_str
|
||||
|
||||
|
||||
def get_int(self, key: str, defval: int = 99999999) -> int:
|
||||
if key not in self.__params:
|
||||
if defval == 99999999:
|
||||
raise KeyError(f"Key '{key}' must exist in parameters")
|
||||
return defval
|
||||
return int(self.__params[key])
|
||||
|
||||
def get_float(self, key: str, defval: float = 99999999.0) -> float:
|
||||
if key not in self.__params:
|
||||
if defval == 99999999.0:
|
||||
raise KeyError(f"Key '{key}' must exist in parameters")
|
||||
return defval
|
||||
return float(self.__params[key])
|
||||
|
||||
def get(self, key: str, defval: str = "99999999") -> str:
|
||||
if key not in self.__params:
|
||||
if defval == "99999999":
|
||||
raise KeyError(f"Key '{key}' must exist in parameters")
|
||||
return defval
|
||||
return self.__params[key]
|
||||
|
||||
def get_list(self, key: str, defval: List[str] = [], needtoexist: bool = True) -> List[str]:
|
||||
if defval is None:
|
||||
defval = []
|
||||
if key not in self.__params:
|
||||
if needtoexist:
|
||||
raise KeyError(f"Key '{key}' must exist in parameters")
|
||||
return defval
|
||||
return [item.strip().strip("'").strip() for item in self.__params[key].split(",")]
|
||||
|
||||
def get_list_int(self, key: str, defval: List[int] = [], needtoexist: bool = True) -> List[int]:
|
||||
if defval is None:
|
||||
defval = []
|
||||
if key not in self.__params:
|
||||
if needtoexist:
|
||||
raise KeyError(f"Key '{key}' must exist in parameters")
|
||||
return defval
|
||||
return [int(item.strip()) for item in self.__params[key].split(",")]
|
||||
|
||||
def get_list_float(self, key: str, defval: List[float] = [], needtoexist: bool = True) -> List[float]:
|
||||
if defval is None:
|
||||
defval = []
|
||||
if key not in self.__params:
|
||||
if needtoexist:
|
||||
raise KeyError(f"Key '{key}' must exist in parameters")
|
||||
return defval
|
||||
return [float(item.strip()) for item in self.__params[key].split(",")]
|
||||
|
||||
def get_all(self) -> Dict[str, str]:
|
||||
return self.__params
|
||||
|
||||
|
||||
def _name_paramstr(heroscript: str) -> Tuple[str, str]:
|
||||
if not isinstance(heroscript, str):
|
||||
raise ValueError("Input must be a string")
|
||||
|
||||
heroscript = dedent(heroscript)
|
||||
lines = heroscript.strip().split("\n")
|
||||
if not lines or "!!" not in lines[0]:
|
||||
raise ValueError("The first line must contain '!!' to indicate the class name")
|
||||
|
||||
try:
|
||||
class_name = lines[0].split("!!")[1].lower().strip()
|
||||
except IndexError:
|
||||
raise ValueError("Invalid format for class name extraction")
|
||||
|
||||
rest_of_text = dedent("\n".join(lines[1:]))
|
||||
return class_name, rest_of_text
|
||||
|
||||
|
||||
def params_parse(content: str) -> Dict[str, str]:
|
||||
lines = dedent(content).strip().split("\n")
|
||||
props = {}
|
||||
multiline_prop = None
|
||||
multiline_value : List[str] = list()
|
||||
|
||||
for line in lines:
|
||||
if multiline_prop:
|
||||
if line.strip() == "'":
|
||||
props[prop] = dedent("\n".join(multiline_value))
|
||||
multiline_prop = None
|
||||
multiline_value = []
|
||||
else:
|
||||
multiline_value.append(line)
|
||||
else:
|
||||
if ":" in line:
|
||||
prop, value = line.split(":", 1)
|
||||
prop = prop.strip()
|
||||
value = value.strip()
|
||||
if value == "'":
|
||||
multiline_prop = prop
|
||||
else:
|
||||
if value.startswith("'") and value.endswith("'"):
|
||||
value1 = value[1:-1]
|
||||
if not "'" in value1:
|
||||
value=value1
|
||||
props[prop] = value
|
||||
return props
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
# Example usage
|
||||
text = """
|
||||
|
||||
!!obj1.define
|
||||
myname: 'mymama'
|
||||
mylist: '20,200'
|
||||
mylist2: 20,'a bbb'
|
||||
mylist3: 20,200
|
||||
myint:2
|
||||
|
||||
!!obj2.color
|
||||
mother: 'mymama'
|
||||
name:'aurelie'
|
||||
length:60
|
||||
description:'
|
||||
multiline is supported
|
||||
now for aurelie
|
||||
'
|
||||
color:green
|
||||
"""
|
||||
|
||||
|
||||
hero_actions = HeroActions(content=text)
|
||||
print(hero_actions)
|
||||
|
||||
a2=hero_actions.actions[1]
|
||||
|
||||
|
||||
assert a2.params.get_list(key="color")==["green"]
|
||||
assert a2.params.get_list(key="mother")==["mymama"]
|
||||
assert a2.params.get(key="color")=="green"
|
||||
assert a2.params.get_int(key="length")==60
|
||||
assert a2.params.get_list_int(key="length")==[60]
|
||||
|
||||
#now some non existing ones
|
||||
assert a2.params.get_int(key="lengtha",defval=3)==3
|
||||
assert a2.params.get(key="lengtha",defval="3")=="3"
|
||||
|
||||
a1=hero_actions.actions[0]
|
||||
#print(a1.params.get_list(key="mylist2"))
|
||||
assert a1.params.get_list(key="mylist")==["20","200"]
|
||||
assert a1.params.get_list_int(key="mylist")==[20,200]
|
||||
assert a1.params.get_list(key="mylist2")==["20","a bbb"]
|
129
lib/core/heroscript/heroscripts.py
Normal file
129
lib/core/heroscript/heroscripts.py
Normal file
@@ -0,0 +1,129 @@
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import Any, Type, TypeVar
|
||||
import re
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
from types import List,Dict
|
||||
|
||||
|
||||
T = TypeVar("T", bound=BaseModel)
|
||||
|
||||
class HeroScripts:
|
||||
def __init__(self, class_types: dict, path:str = "", content:str = "", indexpath: str = ""):
|
||||
self.class_types = class_types
|
||||
self.heroscripts = List(HeroScript)
|
||||
self.path = os.path.expanduser(path)
|
||||
self.indexpath = os.path.expanduser(indexpath)
|
||||
self.done = Dict[str,str] = {}
|
||||
|
||||
# self.done_load()
|
||||
|
||||
if self.path:
|
||||
try:
|
||||
# self.done_load()
|
||||
self.load(self.path)
|
||||
self.done_save()
|
||||
except FileNotFoundError as e:
|
||||
print(f"Directory not found: {self.path}")
|
||||
print(f"Error: {str(e)}")
|
||||
|
||||
self.create_indexes()
|
||||
self.index_objects()
|
||||
|
||||
if content:
|
||||
blocks = extract_heroscript_blocks(content)
|
||||
self.heroscripts.extend(HeroScript(block) for block in blocks)
|
||||
|
||||
def done_load(self):
|
||||
if self.path:
|
||||
done_file = os.path.join(self.path, "done.json")
|
||||
if os.path.exists(done_file):
|
||||
with open(done_file, "r") as f:
|
||||
self.done = json.load(f)
|
||||
|
||||
def done_save(self):
|
||||
if self.path:
|
||||
done_file = os.path.join(self.path, "done.json")
|
||||
with open(done_file, "w") as f:
|
||||
json.dump(self.done, f)
|
||||
|
||||
def load(self, path):
|
||||
for root, _, files in os.walk(path):
|
||||
for filename in files:
|
||||
print(f" - load {path}/{filename}")
|
||||
path=f"{path}/{filename}"
|
||||
if filename.endswith(".md"):
|
||||
filepath = os.path.join(root, filename)
|
||||
with open(filepath, "r") as file:
|
||||
content = file.read()
|
||||
md5hash = hashlib.md5(content.encode()).hexdigest()
|
||||
if filepath not in self.done or self.done[filepath] != md5hash:
|
||||
blocks = self.extract_heroscript_blocks(content)
|
||||
self.heroscripts.extend(HeroScript(block,path) for block in blocks)
|
||||
self.done[filepath] = md5hash
|
||||
|
||||
@staticmethod
|
||||
|
||||
|
||||
def get_objects(self):
|
||||
objects = []
|
||||
for heroscript in self.heroscripts:
|
||||
if heroscript.content:
|
||||
try:
|
||||
class_name = heroscript.content.split("\n")[0].split("!!")[1].split(".")[0].lower()
|
||||
if class_name in self.class_types:
|
||||
class_type = self.class_types[class_name]
|
||||
try:
|
||||
obj = class_type.from_heroscript(heroscript.content)
|
||||
objects.append(obj)
|
||||
except Exception as e:
|
||||
print(f"Error parsing HeroScript: {e}")
|
||||
except (IndexError, ValueError):
|
||||
print(f"Invalid HeroScript format: {heroscript.content}")
|
||||
return objects
|
||||
|
||||
|
||||
def create_indexes(self):
|
||||
for class_type in self.class_types.values():
|
||||
schema = self.create_schema(class_type)
|
||||
index_dir = os.path.join(self.indexpath, class_type.__name__.lower())
|
||||
if not os.path.exists(index_dir):
|
||||
os.makedirs(index_dir)
|
||||
index.create_in(index_dir, schema)
|
||||
|
||||
def create_schema(self, class_type):
|
||||
schema_fields = {"path": STORED()}
|
||||
for field_name, field in class_type.__fields__.items():
|
||||
json_schema_extra = getattr(field, "json_schema_extra", None)
|
||||
if json_schema_extra is not None:
|
||||
metadata = json_schema_extra.get("metadata", {})
|
||||
if isinstance(metadata, list):
|
||||
metadata = {item: True for item in metadata}
|
||||
if metadata.get("unique") or metadata.get("indexed"):
|
||||
if field.annotation == str :
|
||||
schema_fields[field_name] = ID(stored=True, unique=metadata.get("unique", False))
|
||||
elif field.annotation == int or field.annotation == float :
|
||||
schema_fields[field_name] = NUMERIC(stored=True, unique=metadata.get("unique", False))
|
||||
else:
|
||||
schema_fields[field_name] = TEXT(stored=True,lowercase=True)
|
||||
return Schema(**schema_fields)
|
||||
|
||||
def index_objects(self):
|
||||
for heroscript in self.heroscripts:
|
||||
for obj in self.get_objects():
|
||||
index_dir = os.path.join(self.indexpath, type(obj).__name__.lower())
|
||||
ix = index.open_dir(index_dir)
|
||||
writer = ix.writer()
|
||||
writer.add_document(path=heroscript.path, **{k: str(v).lower() for k, v in obj.dict().items() if k in ix.schema.names()})
|
||||
writer.commit()
|
||||
|
||||
def search(self, class_type, query):
|
||||
index_dir = os.path.join(self.indexpath, class_type.__name__.lower())
|
||||
ix = index.open_dir(index_dir)
|
||||
qp = QueryParser("name", schema=ix.schema)
|
||||
q = qp.parse(query)
|
||||
with ix.searcher() as searcher:
|
||||
results = searcher.search(q)
|
||||
# return results
|
||||
return [result["path"] for result in results]
|
82
lib/core/heroscript/mixin.py
Normal file
82
lib/core/heroscript/mixin.py
Normal file
@@ -0,0 +1,82 @@
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import Dict, Any, Type, TypeVar
|
||||
import re
|
||||
from colorama import Fore, Style
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
from types import List
|
||||
from heroscript.heroaction import HeroAction
|
||||
from heroscript.tools import format_multiline_text
|
||||
|
||||
class HeroScriptMixin:
|
||||
|
||||
def heroscript(self) -> HeroAction:
|
||||
class_name = self.__class__.__name__.lower()
|
||||
prop_order = ["id", "oid", "name", "title", "description", "content"]
|
||||
|
||||
# Get all the properties of the object
|
||||
props = list(self.__fields__.keys())
|
||||
|
||||
# Separate properties into those in prop_order and the rest
|
||||
ordered_props = [prop for prop in prop_order if prop in props]
|
||||
remaining_props = [prop for prop in props if prop not in prop_order]
|
||||
|
||||
# Sort the remaining properties
|
||||
sorted_remaining_props = sorted(remaining_props)
|
||||
|
||||
# Combine the ordered properties and sorted remaining properties
|
||||
sorted_props = ordered_props + sorted_remaining_props
|
||||
|
||||
lines = [f"!!{class_name}.define"]
|
||||
for prop in sorted_props:
|
||||
if prop in self.__fields__:
|
||||
val = getattr(self, prop)
|
||||
if isinstance(val, str):
|
||||
if "\n" in val:
|
||||
val = format_multiline_text(text=val)
|
||||
elif any(c.isspace() for c in val):
|
||||
val = f"'{val}'"
|
||||
lines.append(f" {prop}:{val}")
|
||||
|
||||
result = "\n".join(lines)
|
||||
|
||||
return HeroAction(content=result)
|
||||
|
||||
@classmethod
|
||||
def from_heroscript(cls, heroscript: str):
|
||||
lines = heroscript.strip().split("\n")
|
||||
class_name = lines[0].split("!!")[1].split(".")[0]
|
||||
|
||||
props = {}
|
||||
multiline_prop = None
|
||||
multiline_value = List(str)
|
||||
|
||||
for line in lines[1:]:
|
||||
if multiline_prop:
|
||||
if line.strip() == "'":
|
||||
# End of multiline text
|
||||
min_indent = min(len(ml) - len(ml.lstrip()) for ml in multiline_value if ml.strip())
|
||||
unindented_lines = [ml[min_indent:] for ml in multiline_value]
|
||||
props[multiline_prop] = "\n".join(unindented_lines)
|
||||
multiline_prop = None
|
||||
multiline_value = []
|
||||
else:
|
||||
multiline_value.append(line)
|
||||
else:
|
||||
if ":" in line:
|
||||
prop, value = line.split(":", 1)
|
||||
prop = prop.strip()
|
||||
value = value.strip()
|
||||
|
||||
if value == "'":
|
||||
# Start of multiline text
|
||||
multiline_prop = prop
|
||||
else:
|
||||
if value.startswith("'") and value.endswith("'"):
|
||||
value = value[1:-1]
|
||||
props[prop] = value
|
||||
|
||||
return cls(**props)
|
||||
|
||||
|
4
lib/core/heroscript/readme.md
Normal file
4
lib/core/heroscript/readme.md
Normal file
@@ -0,0 +1,4 @@
|
||||
## heroscript
|
||||
|
||||
|
||||
> not to be used yet
|
145
lib/core/heroscript/tools.py
Normal file
145
lib/core/heroscript/tools.py
Normal file
@@ -0,0 +1,145 @@
|
||||
|
||||
from typing import List
|
||||
import os
|
||||
from colorama import Fore, Style
|
||||
from herotools.texttools import dedent
|
||||
import textwrap
|
||||
|
||||
#load the heroscripts from filesystem
|
||||
def heroscript_blocks(path: str) -> List[str]:
|
||||
|
||||
heroscript_blocks = list()
|
||||
|
||||
for root, dirs, files in os.walk(path):
|
||||
for file in files:
|
||||
if file.endswith(".md"):
|
||||
file_path = os.path.join(root, file)
|
||||
with open(file_path, "r") as f:
|
||||
content = f.read()
|
||||
blocks = _extract_heroscript_blocks(content)
|
||||
heroscript_blocks.extend(blocks)
|
||||
|
||||
return heroscript_blocks
|
||||
|
||||
|
||||
def _extract_heroscript_blocks(content: str):
|
||||
content=dedent(content)
|
||||
blocks = []
|
||||
lines = content.split("\n")
|
||||
|
||||
in_block = False
|
||||
block_lines : List[str] = list()
|
||||
|
||||
for line in lines:
|
||||
if line.startswith("```hero"):
|
||||
in_block = True
|
||||
block_lines = []
|
||||
elif line.startswith("```") and in_block:
|
||||
in_block = False
|
||||
block = "\n".join(block_lines)
|
||||
blocks.append(block)
|
||||
elif in_block:
|
||||
block_lines.append(line)
|
||||
return blocks
|
||||
|
||||
|
||||
def action_blocks(path: str = "", content:str = "") -> List[str]:
|
||||
if content!="":
|
||||
return __action_blocks_get(content)
|
||||
res : List[str] = list()
|
||||
for hscript in heroscript_blocks(path):
|
||||
for actionscript in __action_blocks_get(hscript):
|
||||
res.append(actionscript)
|
||||
return res
|
||||
|
||||
def __action_blocks_get(content: str) -> List[str]:
|
||||
content=dedent(content)
|
||||
blocks = list()
|
||||
lines = content.split("\n")
|
||||
|
||||
block_lines : List[str] = list()
|
||||
herofound=False
|
||||
|
||||
for line in lines:
|
||||
# print(line)
|
||||
if line.startswith("!!"):
|
||||
herofound=True
|
||||
if block_lines: #means we found before
|
||||
block = "\n".join(block_lines)
|
||||
blocks.append(block)
|
||||
block_lines = []
|
||||
# print("f1")
|
||||
block_lines.append(line)
|
||||
elif line.strip() and not line.startswith(" ") and not line.startswith("\t") and block_lines:
|
||||
block = "\n".join(block_lines)
|
||||
blocks.append(block)
|
||||
block_lines = []
|
||||
herofound=False
|
||||
elif herofound:
|
||||
block_lines.append(line)
|
||||
# print("append")
|
||||
|
||||
if block_lines:
|
||||
block = "\n".join(block_lines)
|
||||
blocks.append(block)
|
||||
|
||||
return blocks
|
||||
|
||||
def myprint(obj):
|
||||
class_name = f"{Fore.YELLOW}{obj.__class__.__name__}{Style.RESET_ALL}"
|
||||
fields = [field for field in obj.__fields__ if field in obj.__dict__]
|
||||
attributes = ', '.join(f"{Fore.LIGHTBLACK_EX}{field}{Style.RESET_ALL}={Fore.GREEN}'{getattr(obj, field)}'{Style.RESET_ALL}" for field in fields)
|
||||
print( f"{class_name}({attributes})" )
|
||||
|
||||
|
||||
#format text to be ready to be set in heroscript
|
||||
def format_multiline_text(text: str) -> str:
|
||||
|
||||
text = dedent(text)
|
||||
text = textwrap.indent(text, " ")
|
||||
|
||||
# Join the formatted lines with newline characters and add the required indentation
|
||||
formatted_text = "'\n" + text + "\n '"
|
||||
|
||||
return formatted_text
|
||||
|
||||
|
||||
|
||||
#representation with colors of heroscript
|
||||
def heroscript_repr(content:str) ->str:
|
||||
lines = content.split("\n")
|
||||
formatted_lines = []
|
||||
|
||||
for line in lines:
|
||||
if line.startswith("!!"):
|
||||
formatted_line = f"{Fore.RED}{line}{Style.RESET_ALL}"
|
||||
elif ":" in line:
|
||||
prop, value = line.split(":", 1)
|
||||
prop = prop.strip()
|
||||
value = value.strip()
|
||||
|
||||
if value.startswith("'") and value.endswith("'"):
|
||||
value = f" {Fore.GREEN}{value}{Style.RESET_ALL}"
|
||||
else:
|
||||
value = f" {Fore.YELLOW}{value}{Style.RESET_ALL}"
|
||||
|
||||
formatted_line = f" {Fore.CYAN}{prop}{Style.RESET_ALL}:{value}"
|
||||
else:
|
||||
formatted_line = line
|
||||
|
||||
formatted_lines.append(formatted_line)
|
||||
|
||||
return "\n".join(formatted_lines)
|
||||
|
||||
def heroscript_print(content:str):
|
||||
o=heroscript_repr(content)
|
||||
print(o)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
t=" something\n a\n\n bbbb"
|
||||
|
||||
print(dedent(t))
|
||||
|
||||
print(format_multiline_text(t))
|
41
lib/crypt/box/box.py
Normal file
41
lib/crypt/box/box.py
Normal file
@@ -0,0 +1,41 @@
|
||||
from fastapi import HTTPException
|
||||
from cryptography.fernet import Fernet
|
||||
import redis
|
||||
import base64
|
||||
import hashlib
|
||||
|
||||
#TODO: KRISTOF FIX
|
||||
|
||||
def box_get():
|
||||
r = redis.Redis(host='localhost', port=6379, db=0)
|
||||
|
||||
key = r.get('my.secret')
|
||||
|
||||
if key is None:
|
||||
raise HTTPException(status_code=404, detail="can't find my.secret in redis, needs to be set: "+name+" use secret-set to register your secret.")
|
||||
|
||||
|
||||
hash_digest = hashlib.sha256(key).digest()
|
||||
|
||||
# Encode the hash digest to make it url-safe base64-encoded
|
||||
key2 = base64.urlsafe_b64encode(hash_digest)
|
||||
|
||||
try:
|
||||
f = Fernet(key2)
|
||||
except Exception as e:
|
||||
# if str(e).find("Resource Missing")>0:
|
||||
# raise HTTPException(status_code=400, detail="Could not find account with pubkey: "+account_keypair.public_key)
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
|
||||
return f
|
||||
|
||||
|
||||
def box_secret_set(secret:str):
|
||||
r = redis.Redis(host='localhost', port=6379, db=0)
|
||||
|
||||
# key = r.set('my.secret',secret)
|
||||
r.setex('my.secret', 43200,secret) # Set the key with an expiration time of 12 hours
|
||||
|
||||
box_get()
|
||||
|
||||
return "OK"
|
26
lib/crypt/box/box_api.py
Normal file
26
lib/crypt/box/box_api.py
Normal file
@@ -0,0 +1,26 @@
|
||||
from fastapi import APIRouter, HTTPException,Response
|
||||
from pydantic import BaseModel, constr, Field
|
||||
from secret.box import box_secret_set,box_get
|
||||
|
||||
#TODO: KRISTOF FIX
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
##############POSITION
|
||||
|
||||
class BoxSecretSetRequest(BaseModel):
|
||||
secret: str = Field(..., description="a well chosen secret key, do never forget this key, you will loose your assets")
|
||||
|
||||
|
||||
|
||||
@router.post("/secret",description="Set your secret for your hero, will be kept for 12 hours")
|
||||
async def set_secret(request: BoxSecretSetRequest):
|
||||
box_secret_set(secret=request.secret)
|
||||
return Response(content="OK", media_type="text/plain")
|
||||
|
||||
|
||||
@router.get("/secret",description="Check if it exists.")
|
||||
async def secret_check():
|
||||
b=box_get()
|
||||
return Response(content="OK", media_type="text/plain")
|
||||
|
412
lib/downloader/downloader.py
Normal file
412
lib/downloader/downloader.py
Normal file
@@ -0,0 +1,412 @@
|
||||
import json
|
||||
import logging
|
||||
import mimetypes # Added
|
||||
import os
|
||||
from datetime import datetime, timedelta
|
||||
from urllib.parse import urljoin, urlparse
|
||||
|
||||
import scrapy
|
||||
from scrapy.crawler import CrawlerProcess
|
||||
from scrapy.linkextractors import LinkExtractor
|
||||
from scrapy.utils.project import get_project_settings
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
STATE_FILE_NAME = ".download_state.json"
|
||||
|
||||
|
||||
class GenericDownloaderSpider(scrapy.Spider):
|
||||
name = "generic_downloader"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
start_url,
|
||||
dest_dir,
|
||||
allowed_domains,
|
||||
ignore_paths=None,
|
||||
depth_limit=0,
|
||||
follow_links=True,
|
||||
max_age_hours=0,
|
||||
state_data=None,
|
||||
*args,
|
||||
**kwargs,
|
||||
):
|
||||
super(GenericDownloaderSpider, self).__init__(*args, **kwargs)
|
||||
self.start_urls = [start_url]
|
||||
self.dest_dir = dest_dir
|
||||
self.allowed_domains = allowed_domains
|
||||
self.ignore_paths = ignore_paths if ignore_paths else []
|
||||
self.depth_limit = int(depth_limit)
|
||||
self.follow_links = bool(follow_links)
|
||||
self.max_age_hours = int(max_age_hours)
|
||||
self.state_data = state_data if state_data else {}
|
||||
self.link_extractor = LinkExtractor(allow_domains=self.allowed_domains)
|
||||
|
||||
os.makedirs(self.dest_dir, exist_ok=True)
|
||||
logger.info(f"Downloader initialized for {start_url}")
|
||||
logger.info(f"Destination directory: {self.dest_dir}")
|
||||
logger.info(f"Allowed domains: {self.allowed_domains}")
|
||||
logger.info(f"Ignore paths: {self.ignore_paths}")
|
||||
logger.info(f"Depth limit: {self.depth_limit}")
|
||||
logger.info(f"Follow links: {self.follow_links}")
|
||||
logger.info(f"Max age (hours): {self.max_age_hours}")
|
||||
|
||||
def _should_ignore(self, url_path):
|
||||
for pattern in self.ignore_paths:
|
||||
if pattern in url_path: # Simple substring match for now, can be regex
|
||||
return True
|
||||
return False
|
||||
|
||||
def _get_file_path(self, response): # Changed signature to take response
|
||||
url = response.url
|
||||
parsed_url = urlparse(url)
|
||||
original_path = parsed_url.path # e.g. /foo/bar.html or /foo/ or /
|
||||
|
||||
# Determine base_name and current_ext from original_path
|
||||
if original_path.endswith("/"):
|
||||
base_name = "index"
|
||||
current_ext = ""
|
||||
# path_for_dirs is the path part that forms the directory structure
|
||||
path_for_dirs = original_path.lstrip("/")
|
||||
else:
|
||||
path_basename = os.path.basename(original_path)
|
||||
if (
|
||||
not path_basename and original_path == "/"
|
||||
): # Root path e.g. http://example.com
|
||||
base_name = "index"
|
||||
current_ext = ""
|
||||
else: # e.g. /file.txt or /file_no_ext or /.config
|
||||
base_name, current_ext = os.path.splitext(path_basename)
|
||||
if not base_name and current_ext: # Hidden file like /.bashrc
|
||||
base_name = current_ext # Treat .bashrc as base_name
|
||||
current_ext = "" # No further extension part
|
||||
path_for_dirs = os.path.dirname(original_path.lstrip("/"))
|
||||
|
||||
# Try to get extension from Content-Type
|
||||
content_type = (
|
||||
response.headers.get("Content-Type", b"")
|
||||
.decode("utf-8")
|
||||
.split(";")[0]
|
||||
.strip()
|
||||
)
|
||||
mime_ext = mimetypes.guess_extension(content_type) if content_type else None
|
||||
|
||||
final_ext = current_ext
|
||||
if mime_ext and not current_ext: # No path extension, use MIME type's
|
||||
final_ext = mime_ext
|
||||
elif (
|
||||
mime_ext
|
||||
and current_ext.lower() in [".htm", ".html"]
|
||||
and mime_ext
|
||||
and mime_ext.lower() not in [".htm", ".html"]
|
||||
):
|
||||
# Path had .html/.htm, but MIME type suggests something more specific
|
||||
final_ext = mime_ext
|
||||
logger.debug(
|
||||
f"URL {url}: Path ext {current_ext} overridden by Content-Type ext {mime_ext}."
|
||||
)
|
||||
elif not final_ext and (
|
||||
content_type.startswith("text/")
|
||||
or content_type
|
||||
in ["application/javascript", "application/json", "application/xml"]
|
||||
):
|
||||
# Fallback for common text types if no extension determined yet and no path ext
|
||||
if not base_name.endswith(
|
||||
(".js", ".css", ".json", ".xml", ".txt")
|
||||
): # Avoid double .html.html
|
||||
final_ext = ".html"
|
||||
|
||||
filename = base_name + final_ext
|
||||
|
||||
# Create path components for the directory structure
|
||||
components = []
|
||||
if path_for_dirs:
|
||||
components.extend(comp for comp in path_for_dirs.split("/") if comp)
|
||||
components.append(filename)
|
||||
|
||||
# Sanitize components
|
||||
sane_components = []
|
||||
for comp_idx, comp_val in enumerate(components):
|
||||
# Basic sanitization: replace invalid chars, limit length, avoid '..'
|
||||
# Allow '.' for filenames but not as a full component name if it's not the only char
|
||||
if comp_val == "..":
|
||||
continue # Skip parent dir references in path construction
|
||||
|
||||
sane_comp = "".join(
|
||||
c if c.isalnum() or c in ["-", "_", "."] else "_" for c in comp_val
|
||||
)
|
||||
sane_comp = sane_comp[:150] # Limit component length
|
||||
|
||||
if (
|
||||
not sane_comp and comp_idx == len(components) - 1
|
||||
): # last component (filename) became empty
|
||||
sane_comp = "downloaded_file" + final_ext # fallback filename
|
||||
elif not sane_comp:
|
||||
sane_comp = "_" # placeholder for empty dir name
|
||||
|
||||
if sane_comp: # Ensure component is not empty after sanitization
|
||||
sane_components.append(sane_comp)
|
||||
|
||||
if not sane_components: # If all components were sanitized away or skipped
|
||||
sane_components = [filename if filename else "unknown_file" + final_ext]
|
||||
|
||||
file_path = os.path.join(self.dest_dir, *sane_components)
|
||||
return file_path
|
||||
|
||||
def parse(self, response, depth=0):
|
||||
url = response.url
|
||||
logger.info(f"Processing URL (depth {depth}): {url}")
|
||||
|
||||
parsed_url = urlparse(url)
|
||||
if self._should_ignore(parsed_url.path):
|
||||
logger.info(f"Ignoring URL (matches ignore_paths): {url}")
|
||||
return
|
||||
|
||||
file_path = self._get_file_path(response) # Pass response object
|
||||
|
||||
# Check download state and max_age
|
||||
if url in self.state_data:
|
||||
url_state = self.state_data[url]
|
||||
last_download_time_str = url_state.get("timestamp")
|
||||
# Consider previous status; only skip if it was a success or another skip
|
||||
can_skip_based_on_history = url_state.get("status", "").startswith(
|
||||
"success"
|
||||
) or url_state.get("status", "").startswith("skipped")
|
||||
|
||||
if last_download_time_str and can_skip_based_on_history:
|
||||
last_download_time = datetime.fromisoformat(last_download_time_str)
|
||||
if self.max_age_hours > 0 and (
|
||||
datetime.utcnow() - last_download_time
|
||||
) < timedelta(hours=self.max_age_hours):
|
||||
logger.info(
|
||||
f"Skipping download for {url}, recently processed at {last_download_time_str} with status '{url_state.get('status')}'."
|
||||
)
|
||||
# Update state to reflect this skip check
|
||||
self.state_data[url]["status"] = "skipped_max_age"
|
||||
self.state_data[url]["skipped_timestamp"] = (
|
||||
datetime.utcnow().isoformat()
|
||||
)
|
||||
# Still need to check for links if recursive
|
||||
# Corrected depth condition:
|
||||
# Follow if self.depth_limit is 0 (infinite) OR current depth is less than a positive limit.
|
||||
if self.follow_links and (
|
||||
self.depth_limit == 0 or depth < self.depth_limit
|
||||
):
|
||||
for link in self.link_extractor.extract_links(response):
|
||||
parsed_link_url = urlparse(link.url)
|
||||
if not self._should_ignore(parsed_link_url.path):
|
||||
yield response.follow(link, callback=self.parse)
|
||||
else:
|
||||
logger.info(
|
||||
f"Ignoring extracted link (matches ignore_paths): {link.url}"
|
||||
)
|
||||
return
|
||||
|
||||
logger.info(f"Processing and saving {url} to {file_path}")
|
||||
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
||||
|
||||
try:
|
||||
with open(file_path, "wb") as f:
|
||||
f.write(response.body)
|
||||
logger.info(f"Successfully saved {url} to {file_path}")
|
||||
self.state_data[url] = {
|
||||
"timestamp": datetime.utcnow().isoformat(),
|
||||
"status": "success",
|
||||
"path": file_path,
|
||||
"size": len(response.body),
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save {url} to {file_path}: {e}")
|
||||
self.state_data[url] = {
|
||||
"timestamp": datetime.utcnow().isoformat(),
|
||||
"status": "failed",
|
||||
"error": str(e),
|
||||
}
|
||||
return # Do not proceed further if save failed
|
||||
|
||||
# Corrected depth condition for following links:
|
||||
# Follow if self.depth_limit is 0 (infinite) OR current depth is less than a positive limit.
|
||||
if self.follow_links and (self.depth_limit == 0 or depth < self.depth_limit):
|
||||
logger.info(
|
||||
f"Following links from {url} at custom depth {depth} (for next level {depth + 1})"
|
||||
)
|
||||
extracted_links = list(self.link_extractor.extract_links(response))
|
||||
if not extracted_links:
|
||||
logger.info(f" No links extracted from {url} by LinkExtractor.")
|
||||
else:
|
||||
logger.info(
|
||||
f" LinkExtractor found {len(extracted_links)} links from {url}: {[l.url for l in extracted_links]}"
|
||||
)
|
||||
|
||||
for link_idx, link in enumerate(extracted_links):
|
||||
logger.debug(
|
||||
f" Considering link {link_idx + 1}/{len(extracted_links)}: Text='{link.text}', URL='{link.url}'"
|
||||
)
|
||||
parsed_link_url = urlparse(link.url)
|
||||
if self._should_ignore(parsed_link_url.path):
|
||||
logger.info(
|
||||
f" Ignoring extracted link (matches ignore_paths): {link.url}"
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
f" Yielding request for: {link.url} (to be processed at custom depth {depth + 1})"
|
||||
)
|
||||
yield response.follow(link, callback=self.parse)
|
||||
|
||||
def closed(self, reason):
|
||||
logger.info(f"Spider closed: {reason}. Finalizing and saving state.")
|
||||
state_file_path = os.path.join(self.dest_dir, STATE_FILE_NAME)
|
||||
try:
|
||||
# Ensure the directory for the state file exists, though dest_dir should already.
|
||||
os.makedirs(os.path.dirname(state_file_path), exist_ok=True)
|
||||
with open(state_file_path, "w") as f:
|
||||
json.dump(self.state_data, f, indent=4)
|
||||
logger.info(
|
||||
f"Spider successfully saved state ({len(self.state_data)} items) to {state_file_path}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Spider failed to save state to {state_file_path}: {e}", exc_info=True
|
||||
)
|
||||
|
||||
|
||||
def download_site(
|
||||
start_url,
|
||||
dest_dir,
|
||||
recursive=True,
|
||||
ignore_paths=None,
|
||||
depth_limit=0, # 0 means no limit if recursive is True
|
||||
follow_links=True, # This is somewhat redundant if recursive is True, but good for clarity
|
||||
max_age_hours=24, # Re-download if older than 24 hours
|
||||
):
|
||||
"""
|
||||
Downloads a website or a single page.
|
||||
|
||||
:param start_url: The URL to start downloading from.
|
||||
:param dest_dir: The directory to save downloaded files.
|
||||
:param recursive: Whether to download recursively.
|
||||
:param ignore_paths: A list of path substrings or regex patterns to ignore.
|
||||
:param depth_limit: Maximum depth for recursive downloads (0 for no limit).
|
||||
:param follow_links: Whether to follow links on pages (primarily for recursive).
|
||||
:param max_age_hours: Max age of a file in hours. If a file was downloaded
|
||||
more recently than this, it won't be re-downloaded.
|
||||
0 means always re-download.
|
||||
:return: A dictionary summarizing the download process.
|
||||
"""
|
||||
parsed_url = urlparse(start_url)
|
||||
if not parsed_url.scheme or not parsed_url.netloc:
|
||||
logger.error(
|
||||
f"Invalid start_url: {start_url}. Must be a full URL (e.g., http://example.com)"
|
||||
)
|
||||
return None
|
||||
|
||||
allowed_domains = [parsed_url.hostname] # Changed from netloc to hostname
|
||||
|
||||
state_file_path = os.path.join(dest_dir, STATE_FILE_NAME)
|
||||
state_data = {}
|
||||
if os.path.exists(state_file_path):
|
||||
try:
|
||||
with open(state_file_path, "r") as f:
|
||||
state_data = json.load(f)
|
||||
logger.info(f"Loaded download state from {state_file_path}")
|
||||
except json.JSONDecodeError:
|
||||
logger.warning(
|
||||
f"Could not decode JSON from state file {state_file_path}. Starting fresh."
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Error loading state file {state_file_path}: {e}. Starting fresh."
|
||||
)
|
||||
|
||||
settings = get_project_settings()
|
||||
settings.set("ROBOTSTXT_OBEY", False) # Explicitly disable robots.txt
|
||||
# settings.set('LOG_LEVEL', 'DEBUG') # Optionally enable for more Scrapy internal logs
|
||||
|
||||
effective_scrapy_depth = 0 # Default for non-recursive or depth_limit=0 with recursion (0 means infinite for Scrapy)
|
||||
if recursive and int(depth_limit) > 0:
|
||||
effective_scrapy_depth = int(depth_limit)
|
||||
# If not recursive, effective_scrapy_depth remains 0.
|
||||
# If recursive and depth_limit is 0, effective_scrapy_depth remains 0 (infinite).
|
||||
settings.set("DEPTH_LIMIT", effective_scrapy_depth)
|
||||
|
||||
logger.info(f"Scrapy DEPTH_LIMIT set to: {effective_scrapy_depth}")
|
||||
# Scrapy's DEPTH_PRIORITY and SCHEDULER_DISK_QUEUE might be useful for large crawls
|
||||
# For now, keeping it simple.
|
||||
|
||||
process = CrawlerProcess(settings)
|
||||
|
||||
# The spider needs to be instantiated with all its custom args
|
||||
# Scrapy's process.crawl can take kwargs which are passed to the spider's __init__
|
||||
process.crawl(
|
||||
GenericDownloaderSpider,
|
||||
start_url=start_url,
|
||||
dest_dir=dest_dir,
|
||||
allowed_domains=allowed_domains,
|
||||
ignore_paths=ignore_paths,
|
||||
depth_limit=int(depth_limit)
|
||||
if recursive
|
||||
else 0, # Spider handles its own depth based on this
|
||||
follow_links=follow_links and recursive,
|
||||
max_age_hours=int(max_age_hours),
|
||||
state_data=state_data,
|
||||
)
|
||||
|
||||
logger.info(f"Starting download process for {start_url}...")
|
||||
process.start() # This will block until the crawl is finished
|
||||
|
||||
# The spider's closed() method is now responsible for writing the final state.
|
||||
# Load this definitive state to build the summary.
|
||||
final_state_data_for_summary = {}
|
||||
if os.path.exists(state_file_path):
|
||||
try:
|
||||
with open(state_file_path, "r") as f:
|
||||
final_state_data_for_summary = json.load(f)
|
||||
logger.info(
|
||||
f"Loaded final state ({len(final_state_data_for_summary)} items) from {state_file_path} for summary construction."
|
||||
)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(
|
||||
f"Error decoding JSON from final state file {state_file_path} for summary: {e}. Summary will be based on empty or incomplete state."
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Error loading final state from {state_file_path} for summary: {e}. Summary will be based on empty or incomplete state."
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"State file {state_file_path} not found after spider closed. Summary will be based on empty state."
|
||||
)
|
||||
|
||||
summary = {
|
||||
"start_url": start_url,
|
||||
"dest_dir": dest_dir,
|
||||
"total_processed_urls": len(final_state_data_for_summary),
|
||||
"successful_downloads": 0,
|
||||
"failed_downloads": 0,
|
||||
"skipped_max_age": 0,
|
||||
"total_bytes_downloaded": 0,
|
||||
"state_file_path": state_file_path,
|
||||
"errors": [],
|
||||
}
|
||||
|
||||
# Populate summary from the final_state_data_for_summary loaded from the file
|
||||
for url_key, data_val in final_state_data_for_summary.items():
|
||||
status = data_val.get("status")
|
||||
if status == "success":
|
||||
summary["successful_downloads"] += 1
|
||||
summary["total_bytes_downloaded"] += data_val.get("size", 0)
|
||||
elif status == "failed":
|
||||
summary["failed_downloads"] += 1
|
||||
if "error" in data_val:
|
||||
summary["errors"].append(f"URL: {url_key}, Error: {data_val['error']}")
|
||||
elif status == "skipped_max_age":
|
||||
summary["skipped_max_age"] += 1
|
||||
# Any errors during state file loading for summary should also be noted if critical
|
||||
# For now, the logs capture it. If final_state_data_for_summary is empty due to load error, summary will reflect that.
|
||||
|
||||
logger.info(f"Download process finished. Summary: {json.dumps(summary, indent=2)}")
|
||||
return summary
|
0
lib/tools/__init__.py
Normal file
0
lib/tools/__init__.py
Normal file
31
lib/tools/extensions.py
Normal file
31
lib/tools/extensions.py
Normal file
@@ -0,0 +1,31 @@
|
||||
import mimetypes
|
||||
import os
|
||||
|
||||
def check_and_add_extension(file_path: str) -> str:
|
||||
# Only check if there's no extension
|
||||
if not os.path.splitext(file_path)[1]:
|
||||
# Read the file content
|
||||
with open(file_path, 'rb') as f:
|
||||
content = f.read(2048) # Read the first 2048 bytes for detection
|
||||
|
||||
# Detect content type
|
||||
content_type = detect_content_type(content)
|
||||
extension = mimetypes.guess_extension(content_type)
|
||||
|
||||
if extension:
|
||||
new_file_path = file_path + extension
|
||||
os.rename(file_path, new_file_path)
|
||||
return new_file_path
|
||||
|
||||
return file_path
|
||||
|
||||
def detect_content_type(content: bytes) -> str:
|
||||
# Simple content-based detection
|
||||
if content.startswith(b'\xff\xd8'):
|
||||
return 'image/jpeg'
|
||||
if content.startswith(b'\x89PNG'):
|
||||
return 'image/png'
|
||||
if content.startswith(b'GIF'):
|
||||
return 'image/gif'
|
||||
# Add more checks as needed
|
||||
return 'application/octet-stream'
|
270
lib/tools/gitscanner.py
Normal file
270
lib/tools/gitscanner.py
Normal file
@@ -0,0 +1,270 @@
|
||||
import sys
|
||||
import os
|
||||
|
||||
# Add the parent directory of herotools to the Python module search path
|
||||
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
||||
|
||||
import time
|
||||
import json
|
||||
import subprocess
|
||||
from typing import Optional,List
|
||||
import redis
|
||||
from herotools.logger import logger
|
||||
from herotools.texttools import name_fix
|
||||
from enum import Enum, auto
|
||||
from dataclasses import dataclass
|
||||
import git
|
||||
|
||||
|
||||
# Initialize Redis client
|
||||
redis_client = redis.StrictRedis(host='localhost', port=6379, db=0)
|
||||
|
||||
# Define the ChangeType Enum
|
||||
class ChangeType(Enum):
|
||||
DEL = 'del'
|
||||
MOD = 'mod'
|
||||
NEW = 'new'
|
||||
|
||||
@dataclass
|
||||
class FileChange:
|
||||
commit_hash: str
|
||||
commit_time: str
|
||||
path: str #relative path in the repo
|
||||
change_type: ChangeType
|
||||
|
||||
|
||||
class Repo:
|
||||
def __init__(self, cat: str, account: str, name: str, path: str):
|
||||
self.cat = cat
|
||||
self.account = account
|
||||
self.name = name
|
||||
self.path = path
|
||||
self.hash_last_found: Optional[float] = None
|
||||
self.hash_last_processed: Optional[str] = None
|
||||
self.lastcheck: Optional[float] = None
|
||||
|
||||
def __str__(self):
|
||||
return json.dumps({
|
||||
"cat": self.cat,
|
||||
"account": self.account,
|
||||
"name": self.name,
|
||||
"path": self.path,
|
||||
"hash_last_found": self.hash_last_found,
|
||||
"hash_last_processed": self.hash_last_processed,
|
||||
"lastcheck": self.lastcheck
|
||||
}, indent=2)
|
||||
|
||||
def _redis_key(self) -> str:
|
||||
return f"gitcheck:{self.cat}:{self.account}:{self.name}"
|
||||
|
||||
def save_to_redis(self):
|
||||
redis_client.set(self._redis_key(), json.dumps(self.__dict__))
|
||||
|
||||
@staticmethod
|
||||
def load_from_redis(cat: str, account: str, name: str) -> Optional['Repo']:
|
||||
redis_key = f"gitcheck:{cat}:{account}:{name}"
|
||||
data = redis_client.get(redis_key)
|
||||
if data:
|
||||
data = json.loads(data)
|
||||
repo = Repo(data["cat"], data["account"], data["name"], data["path"])
|
||||
repo.hash_last_found = data.get("hash_last_found")
|
||||
repo.hash_last_processed = data.get("hash_last_processed")
|
||||
repo.lastcheck = data.get("lastcheck")
|
||||
return repo
|
||||
return None
|
||||
|
||||
def get_remote_commit_hash(self, branch: str) -> str:
|
||||
"""Get the latest commit hash from the remote repository."""
|
||||
result = subprocess.run(
|
||||
['git', 'ls-remote', 'origin', f'refs/heads/{branch}'],
|
||||
cwd=self.path,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True
|
||||
)
|
||||
if result.returncode != 0:
|
||||
raise Exception(f"Error fetching remote commit hash: {result.stderr}")
|
||||
|
||||
return result.stdout.split()[0]
|
||||
|
||||
def get_local_commit_hash(self) -> str:
|
||||
"""Get the latest commit hash from the local repository."""
|
||||
result = subprocess.run(
|
||||
['git', 'rev-parse', 'HEAD'],
|
||||
cwd=self.path,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True
|
||||
)
|
||||
if result.returncode != 0:
|
||||
raise Exception(f"Error fetching local commit hash: {result.stderr}")
|
||||
|
||||
return result.stdout.strip()
|
||||
|
||||
def get_current_branch(self) -> str:
|
||||
result = subprocess.run(
|
||||
['git', 'branch', '--show-current'],
|
||||
cwd=self.path,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True
|
||||
)
|
||||
if result.returncode != 0:
|
||||
raise Exception(f"Error fetching local branch name: {result.stderr}")
|
||||
|
||||
return result.stdout.split()[0]
|
||||
|
||||
def get_remote_default_branch(self) -> str:
|
||||
result = subprocess.run(
|
||||
['git', 'ls-remote', '--symref', 'origin', 'HEAD'],
|
||||
cwd=self.path,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True
|
||||
)
|
||||
if result.returncode != 0:
|
||||
raise Exception(f"Error fetching local branch name: {result.stderr}")
|
||||
|
||||
return result.stdout.split()[1].split('/')[-1]
|
||||
|
||||
def should_check_again(self) -> bool:
|
||||
"""Determine if we should check the repository again based on the last check time."""
|
||||
if self.lastcheck is None:
|
||||
return True
|
||||
return (time.time() - self.lastcheck) > 60
|
||||
|
||||
def update_last_check_time(self) -> None:
|
||||
"""Update the last check time."""
|
||||
self.lastcheck = time.time()
|
||||
self.save_to_redis()
|
||||
|
||||
def log_change(self, epoch_time: float) -> None:
|
||||
"""Log a detected change in Redis."""
|
||||
self.hash_last_found = epoch_time
|
||||
self.save_to_redis()
|
||||
|
||||
def check_for_changes(self, branch: str = 'main') -> None:
|
||||
"""Check the repository for updates and log changes if found."""
|
||||
if not self.should_check_again():
|
||||
print("WAIT TO CHECK FOR CHANGES")
|
||||
return
|
||||
|
||||
try:
|
||||
diff_commits = self.get_local_remote_diff_commits(branch)
|
||||
|
||||
if diff_commits != []:
|
||||
print("FOUND SOME CHANGES")
|
||||
self.log_change(time.time())
|
||||
file_changes = self.get_file_changes_from_commits(diff_commits)
|
||||
self.print_file_changes(file_changes)
|
||||
else:
|
||||
print("NO CHANGED FOUND")
|
||||
|
||||
self.update_last_check_time()
|
||||
except Exception as e:
|
||||
print(f"An error occurred while checking repo {self.path}: {e}")
|
||||
|
||||
def get_local_remote_diff_commits(self, branch: str) -> List[git.Commit]:
|
||||
# Open the repository
|
||||
repo = git.Repo(self.path)
|
||||
|
||||
# Get the local branch
|
||||
local_branch = repo.heads[branch]
|
||||
# Get the remote reference for the branch
|
||||
remote_ref = repo.remotes.origin.refs[branch]
|
||||
|
||||
# Fetch the latest changes from the remote
|
||||
repo.remotes.origin.fetch()
|
||||
|
||||
# Get the commit hashes of the local and remote branches
|
||||
local_commit = local_branch.commit
|
||||
remote_commit = remote_ref.commit
|
||||
|
||||
if local_commit == remote_commit:
|
||||
return []
|
||||
|
||||
# Get the common ancestor commit
|
||||
base_commit = repo.merge_base(local_commit, remote_commit)[0]
|
||||
|
||||
# Get the ahead and behind commits
|
||||
ahead_commits = list(repo.iter_commits(f"{base_commit}..{local_commit}"))
|
||||
behind_commits = list(repo.iter_commits(f"{base_commit}..{remote_commit}"))
|
||||
|
||||
# Combine the ahead and behind commits
|
||||
diff_commits = ahead_commits + behind_commits
|
||||
return diff_commits
|
||||
|
||||
def get_file_changes_from_commits(self, commit_list: List[git.Commit]) -> List[FileChange]:
|
||||
file_changes = []
|
||||
for commit in commit_list:
|
||||
# print(commit)
|
||||
diffs = commit.diff(self.hash_last_processed, create_patch=True)
|
||||
# print(diffs)
|
||||
for diff in diffs:
|
||||
if diff.deleted_file:
|
||||
change_type = ChangeType.DEL
|
||||
elif diff.new_file:
|
||||
change_type = ChangeType.NEW
|
||||
else:
|
||||
change_type = ChangeType.MOD
|
||||
|
||||
file_change = FileChange(
|
||||
commit_hash=commit.hexsha,
|
||||
commit_time=str(commit.committed_datetime),
|
||||
path=diff.b_path if diff.new_file else diff.a_path,
|
||||
change_type=change_type
|
||||
)
|
||||
file_changes.append(file_change)
|
||||
return file_changes
|
||||
|
||||
def print_file_changes(self, file_changes: List[FileChange]):
|
||||
for file_change in file_changes:
|
||||
print("------------------------------------")
|
||||
print(f"Commit Hash: {file_change.commit_hash}")
|
||||
print(f"Commit Time: {file_change.commit_time}")
|
||||
print(f"File Path: {file_change.path}")
|
||||
print(f"Change Type: {file_change.change_type.value}")
|
||||
print("------------------------------------")
|
||||
|
||||
def gitscan(path: str, cat: str) -> None:
|
||||
"""Walk over directories to find Git repositories and check them."""
|
||||
path = os.path.abspath(os.path.expanduser(path))
|
||||
for root, dirs, files in os.walk(path):
|
||||
if '.git' in dirs:
|
||||
accountname = os.path.basename(os.path.dirname(root))
|
||||
reponame = os.path.basename(root)
|
||||
repo = Repo.load_from_redis(cat, accountname, reponame)
|
||||
if repo is None:
|
||||
repo = Repo(cat, accountname, reponame, root)
|
||||
branch = repo.get_current_branch()
|
||||
|
||||
logger.debug(f"root: {root}")
|
||||
logger.debug(f"accountname: {accountname}")
|
||||
logger.debug(f"reponame: {reponame}")
|
||||
logger.debug(f"branch: {branch}")
|
||||
logger.debug(f"repo: {repo}")
|
||||
|
||||
repo.check_for_changes(branch)
|
||||
dirs[:] = [] # Don't go deeper into subdirectories
|
||||
else:
|
||||
# Filter out any .git directories from further traversal
|
||||
dirs[:] = [d for d in dirs if d != '.git']
|
||||
|
||||
def print_redis_client():
|
||||
cursor = 0
|
||||
while True:
|
||||
cursor, keys = redis_client.scan(cursor)
|
||||
for key in keys:
|
||||
value = redis_client.get(key)
|
||||
print(key)
|
||||
print(value)
|
||||
print()
|
||||
if cursor == 0:
|
||||
break
|
||||
|
||||
if __name__ == "__main__":
|
||||
# print_redis_client()
|
||||
mypath = "~/code/git.threefold.info/projectmycelium"
|
||||
category = 'mycat'
|
||||
gitscan(path=mypath, cat=category)
|
||||
# print_redis_client()
|
39
lib/tools/logger.py
Normal file
39
lib/tools/logger.py
Normal file
@@ -0,0 +1,39 @@
|
||||
import logging
|
||||
import colorlog
|
||||
|
||||
log_colors_config = {
|
||||
'DEBUG': 'cyan',
|
||||
'INFO': 'green',
|
||||
'WARNING': 'yellow',
|
||||
'ERROR': 'red',
|
||||
'CRITICAL': 'bold_red',
|
||||
}
|
||||
|
||||
secondary_log_colors_config = {
|
||||
'name': {
|
||||
'DEBUG': 'blue',
|
||||
'INFO': 'blue',
|
||||
'WARNING': 'blue',
|
||||
'ERROR': 'blue',
|
||||
'CRITICAL': 'blue'
|
||||
},
|
||||
'levelname': log_colors_config
|
||||
}
|
||||
|
||||
|
||||
formatter = colorlog.ColoredFormatter(
|
||||
'%(log_color)s%(asctime)s - %(name_log_color)s%(name)s - %(levelname_log_color)s%(levelname)s - %(message)s',
|
||||
datefmt='%Y-%m-%d %H:%M:%S',
|
||||
log_colors=log_colors_config,
|
||||
secondary_log_colors=secondary_log_colors_config
|
||||
)
|
||||
|
||||
|
||||
# Create a handler
|
||||
handler = logging.StreamHandler()
|
||||
handler.setFormatter(formatter)
|
||||
|
||||
# Get the root logger
|
||||
logger = logging.getLogger()
|
||||
logger.setLevel(logging.DEBUG)
|
||||
logger.addHandler(handler)
|
13
lib/tools/md5.py
Normal file
13
lib/tools/md5.py
Normal file
@@ -0,0 +1,13 @@
|
||||
import hashlib
|
||||
from typing import List
|
||||
|
||||
|
||||
def file_md5(file_path: str) -> str:
|
||||
"""
|
||||
Compute the MD5 hash of the file content.
|
||||
"""
|
||||
hash_md5 = hashlib.md5()
|
||||
with open(file_path, "rb") as f:
|
||||
for chunk in iter(lambda: f.read(4096), b""):
|
||||
hash_md5.update(chunk)
|
||||
return hash_md5.hexdigest()
|
55
lib/tools/ourtime.py
Normal file
55
lib/tools/ourtime.py
Normal file
@@ -0,0 +1,55 @@
|
||||
from datetime import datetime, timezone, timedelta
|
||||
import re
|
||||
|
||||
|
||||
def epoch_get(deadline: str) -> int:
|
||||
"""
|
||||
Set the deadline based on various input formats.
|
||||
|
||||
Supports:
|
||||
- Relative: +1h (hours), +2d (days), +1w (week), +1m (month)
|
||||
- Absolute: 20/10/2024, 20/10, 20/10/24 (all same day)
|
||||
|
||||
If hour not specified, defaults to midday (noon).
|
||||
|
||||
Returns the deadline as a Unix timestamp (epoch).
|
||||
"""
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
# Check for relative time format
|
||||
relative_match = re.match(r'\+(\d+)([hdwm])', deadline)
|
||||
if relative_match:
|
||||
amount, unit = relative_match.groups()
|
||||
amount = int(amount)
|
||||
if unit == 'h':
|
||||
delta = timedelta(hours=amount)
|
||||
elif unit == 'd':
|
||||
delta = timedelta(days=amount)
|
||||
elif unit == 'w':
|
||||
delta = timedelta(weeks=amount)
|
||||
elif unit == 'm':
|
||||
delta = timedelta(days=amount * 30) # Approximate
|
||||
|
||||
new_deadline = now + delta
|
||||
return int(new_deadline.timestamp())
|
||||
|
||||
# Check for absolute date format
|
||||
date_formats = ['%d/%m/%Y', '%d/%m/%y', '%d/%m']
|
||||
for fmt in date_formats:
|
||||
try:
|
||||
date_obj = datetime.strptime(deadline, fmt)
|
||||
if fmt == '%d/%m':
|
||||
# If year is not provided, use the current year
|
||||
date_obj = date_obj.replace(year=now.year)
|
||||
|
||||
# If the resulting date is in the past, assume next year
|
||||
if date_obj.replace(tzinfo=timezone.utc) < now:
|
||||
date_obj = date_obj.replace(year=date_obj.year + 1)
|
||||
|
||||
# Set time to noon (12:00)
|
||||
date_obj = date_obj.replace(hour=12, minute=0, second=0, microsecond=0, tzinfo=timezone.utc)
|
||||
return int(date_obj.timestamp())
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
raise ValueError("Invalid deadline format. Use +Xh/d/w/m for relative or DD/MM/YYYY for absolute dates.")
|
26
lib/tools/pathtools.py
Normal file
26
lib/tools/pathtools.py
Normal file
@@ -0,0 +1,26 @@
|
||||
|
||||
|
||||
import os
|
||||
|
||||
def expand_path(path: str) -> str:
|
||||
"""
|
||||
Expand ~ to the user's home directory and return the absolute path.
|
||||
"""
|
||||
return os.path.abspath(os.path.expanduser(path))
|
||||
|
||||
|
||||
def remove_file_if_exists(file_path):
|
||||
try:
|
||||
# This will remove the file or symlink, regardless of whether
|
||||
# it's a regular file, a directory, or a broken symlink
|
||||
os.remove(file_path)
|
||||
except FileNotFoundError:
|
||||
# File doesn't exist, so we don't need to do anything
|
||||
pass
|
||||
except IsADirectoryError:
|
||||
# It's a directory, so we use rmdir instead
|
||||
os.rmdir(file_path)
|
||||
except PermissionError:
|
||||
print(f"Permission denied: Unable to remove {file_path}")
|
||||
except Exception as e:
|
||||
print(f"An error occurred while trying to remove {file_path}: {str(e)}")
|
110
lib/tools/texttools.py
Normal file
110
lib/tools/texttools.py
Normal file
@@ -0,0 +1,110 @@
|
||||
|
||||
import re
|
||||
import unicodedata
|
||||
import random
|
||||
|
||||
|
||||
def description_fix(description):
|
||||
description = description.lower()
|
||||
description = unicodedata.normalize('NFKD', description).encode('ASCII', 'ignore').decode('ASCII')
|
||||
description = re.sub(r'[^a-z0-9\s]', '', description)
|
||||
return description.strip()
|
||||
|
||||
|
||||
# def name_fix(name: str) -> str:
|
||||
# """
|
||||
# Normalize the string to lowercase ASCII, replace spaces and specific punctuations with underscores,
|
||||
# and remove non-ASCII characters.
|
||||
# """
|
||||
# name = name.lower()
|
||||
# name = unicodedata.normalize('NFKD', name).encode('ascii', 'ignore').decode('ascii')
|
||||
# name = re.sub(r'[ :;!]', '_', name) # Replace spaces and specific punctuations with underscores
|
||||
# name = re.sub(r'\W+', '', name) # Remove any other non-word characters
|
||||
# return name
|
||||
|
||||
|
||||
def name_fix(name: str) -> str:
|
||||
"""
|
||||
Normalize the string to lowercase ASCII, replace spaces and specific punctuations with underscores,
|
||||
maintain dots, and remove non-ASCII characters.
|
||||
"""
|
||||
name = name.lower()
|
||||
name = unicodedata.normalize('NFKD', name).encode('ascii', 'ignore').decode('ascii')
|
||||
name = re.sub(r'[ :;!]', '_', name) # Replace spaces and specific punctuations with underscores
|
||||
name = re.sub(r'[^\w._]+', '', name) # Remove any non-word characters except dots and underscores
|
||||
return name
|
||||
|
||||
def name_obfuscate(name):
|
||||
# Define a mapping of consonants to their obfuscated counterparts
|
||||
consonant_map = {
|
||||
'b': 'p', 'c': 'k', 'd': 't', 'f': 'v', 'g': 'j', 'h': 'x',
|
||||
'j': 'q', 'k': 'c', 'l': 'r', 'm': 'n', 'n': 'm', 'p': 'b',
|
||||
'q': 'g', 'r': 'l', 's': 'z', 't': 'd', 'v': 'f', 'w': 'y',
|
||||
'x': 'h', 'y': 'w', 'z': 's'
|
||||
}
|
||||
|
||||
# Define a mapping of vowels to their obfuscated counterparts
|
||||
vowel_map = {
|
||||
'a': 'e', 'e': 'i', 'i': 'o', 'o': 'u', 'u': 'a'
|
||||
}
|
||||
|
||||
# Convert the name to lowercase
|
||||
name = name.lower()
|
||||
|
||||
# Split the name into words
|
||||
words = name.split()
|
||||
|
||||
obfuscated_words = []
|
||||
for word in words:
|
||||
obfuscated_word = ''
|
||||
for char in word:
|
||||
if char in vowel_map:
|
||||
# Obfuscate vowels
|
||||
obfuscated_word += vowel_map[char]
|
||||
elif char in consonant_map:
|
||||
# Obfuscate consonants
|
||||
obfuscated_word += consonant_map[char]
|
||||
else:
|
||||
# Keep non-alphabetic characters unchanged
|
||||
obfuscated_word += char
|
||||
obfuscated_words.append(obfuscated_word)
|
||||
|
||||
# Join the obfuscated words back into a single string
|
||||
obfuscated_name = ' '.join(obfuscated_words)
|
||||
|
||||
# Capitalize the first letter of each word
|
||||
obfuscated_name = obfuscated_name.title()
|
||||
|
||||
return obfuscated_name
|
||||
|
||||
def dedent(content: str) -> str:
|
||||
# Split the input content into lines
|
||||
lines = content.splitlines()
|
||||
|
||||
# Remove leading and trailing empty lines
|
||||
while lines and not lines[0].strip():
|
||||
lines.pop(0)
|
||||
while lines and not lines[-1].strip():
|
||||
lines.pop()
|
||||
|
||||
if not lines:
|
||||
return ""
|
||||
|
||||
# Find the minimum indentation (leading spaces) in all non-empty lines
|
||||
min_indent = None
|
||||
for line in lines:
|
||||
stripped_line = line.lstrip()
|
||||
if stripped_line: # Only consider non-empty lines
|
||||
leading_spaces = len(line) - len(stripped_line)
|
||||
if min_indent is None or leading_spaces < min_indent:
|
||||
min_indent = leading_spaces
|
||||
|
||||
# Dedent each line by the minimum indentation found
|
||||
dedented_lines = [line[min_indent:] if len(line) > min_indent else line for line in lines]
|
||||
|
||||
# Join the dedented lines back into a single string
|
||||
return "\n".join(dedented_lines)
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("fixed name:", name_fix("John Doe"))
|
||||
print("obfuscated name:", name_obfuscate("John Doe"))
|
94
lib/web/doctools/html_replacer.py
Normal file
94
lib/web/doctools/html_replacer.py
Normal file
@@ -0,0 +1,94 @@
|
||||
from herotools.logger import logger
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
from typing import Callable
|
||||
from herotools.texttools import name_fix
|
||||
|
||||
# Define the type for the content and link fetching functions
|
||||
LinkFetcher = Callable[[str, str, str, str, str], str]
|
||||
ContentFetcher = Callable[[str, str, str, str], str]
|
||||
|
||||
# Private functions to be used internally
|
||||
|
||||
def _get_link(language: str, prefix: str, site_name: str, pagename: str, name: str) -> str:
|
||||
# Replace this with your logic to get the actual link
|
||||
logger.debug(f"_get_link: {language[:10]:<10} {site_name}:{pagename}:{name}")
|
||||
return f"{prefix}{language}/{site_name}/{pagename}/{name}.jpg"
|
||||
|
||||
def _get_content(language: str, site_name: str, pagename: str, name: str) -> str:
|
||||
# Replace this with your logic to get the actual content
|
||||
logger.debug(f"_get_content: {language[:10]:<10} {site_name}:{pagename}:{name}")
|
||||
return f"Replaced text for {name} on page {pagename} in {language} language on {site_name} site"
|
||||
|
||||
def _process_html(language: str, prefix: str, site_name: str, pagename: str, html_content: str) -> str:
|
||||
"""
|
||||
Function to process HTML and replace content based on tags.
|
||||
This allows us to work with templates and get content based on language to replace in HTML.
|
||||
"""
|
||||
language = name_fix(language)
|
||||
site_name = name_fix(site_name)
|
||||
pagename = name_fix(pagename)
|
||||
prefix = prefix.strip()
|
||||
if not prefix.endswith('/'):
|
||||
prefix += '/'
|
||||
|
||||
soup = BeautifulSoup(html_content, 'html.parser')
|
||||
|
||||
# Find all elements with class names starting with !!img: or !!txt:
|
||||
for element in soup.find_all(class_=re.compile(r'!!(img|txt):(.+)')):
|
||||
for cls in element['class']:
|
||||
if cls.startswith('!!img:'):
|
||||
name = cls.split(':')[1]
|
||||
name = name_fix(name)
|
||||
# Get the link to replace the src attribute in !!img: elements
|
||||
link = _get_link(language=language, prefix=prefix, site_name=site_name, pagename=pagename, name=name)
|
||||
if element.name == 'img':
|
||||
element['src'] = link
|
||||
elif 'src' in element.attrs:
|
||||
element['src'] = link # In case the element is not an img but has a src attribute
|
||||
elif cls.startswith('!!txt:'):
|
||||
name = cls.split(':')[1]
|
||||
name = name_fix(name)
|
||||
# Get the content to replace the text in !!txt: elements
|
||||
content = _get_content(language=language, site_name=site_name, pagename=pagename, name=name)
|
||||
element.string = content
|
||||
|
||||
# Output the modified HTML
|
||||
return str(soup)
|
||||
|
||||
# Public function to process the HTML content
|
||||
def process(language: str, prefix: str, site_name: str, pagename: str, html_content: str) -> str:
|
||||
"""
|
||||
Public function to process HTML and replace content based on tags.
|
||||
This function wraps the internal _process_html function.
|
||||
"""
|
||||
return _process_html(language=language, prefix=prefix, site_name=site_name, pagename=pagename, html_content=html_content)
|
||||
|
||||
# Sample usage with a given language, site name, page name, and HTML content
|
||||
if __name__ == "__main__":
|
||||
# Example HTML content
|
||||
html_content = '''
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Sample Page</title>
|
||||
</head>
|
||||
<body>
|
||||
<h2 class="mb-6 is-size-1 is-size-3-mobile has-text-weight-bold !!txt:title1">Take care of your performance every day.</h2>
|
||||
<img class="responsive !!img:logo" src="old-link.jpg" alt="Company Logo">
|
||||
<p class="content !!txt:description">This is a sample description text.</p>
|
||||
</body>
|
||||
</html>
|
||||
'''
|
||||
|
||||
# Process the HTML content for a specific language, site name, and page
|
||||
language: str = "en"
|
||||
site_name: str = "ExampleSite"
|
||||
pagename: str = "HomePage"
|
||||
prefix: str = "http://localhost/images/"
|
||||
processed_html: str = process(language=language, prefix=prefix, site_name=site_name, pagename=pagename, html_content=html_content)
|
||||
|
||||
# Print the modified HTML
|
||||
print(processed_html)
|
172
lib/web/doctools/md_replacer.py
Normal file
172
lib/web/doctools/md_replacer.py
Normal file
@@ -0,0 +1,172 @@
|
||||
import sys
|
||||
import os
|
||||
|
||||
# Add the parent directory of herotools to the Python module search path
|
||||
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
||||
|
||||
from herotools.logger import logger
|
||||
from markdown_it import MarkdownIt
|
||||
from markdown_it.tree import SyntaxTreeNode
|
||||
import re
|
||||
from enum import Enum
|
||||
from herotools.texttools import name_fix
|
||||
from mdformat.renderer import MDRenderer
|
||||
from urllib.parse import urlparse
|
||||
|
||||
class ImageType(Enum):
|
||||
JPEG = 'jpeg'
|
||||
PNG = 'png'
|
||||
GIF = 'gif'
|
||||
OTHER = 'other'
|
||||
|
||||
|
||||
def get_link_page(prefix:str, linkname:str, sitename: str, name: str) -> str:
|
||||
"""
|
||||
Generates a page link based on sitename and name.
|
||||
|
||||
Args:
|
||||
sitename (str): The name of the site.
|
||||
name (str): The name of the page.
|
||||
|
||||
Returns:
|
||||
str: The generated link.
|
||||
"""
|
||||
logger.debug(f"get_link_page: {prefix[:60]:<60} {linkname} {sitename}:{name}")
|
||||
return f"[{linkname}]({prefix}/{sitename}/{name})"
|
||||
|
||||
def get_link_image(prefix:str, sitename: str, name: str, image_type: ImageType) -> str:
|
||||
"""
|
||||
Generates an image link based on the URL and image type.
|
||||
|
||||
Args:
|
||||
url (str): The original URL of the image.
|
||||
image_type (ImageType): The type of the image.
|
||||
|
||||
Returns:
|
||||
str: The generated link.
|
||||
"""
|
||||
logger.debug(f"get_link_image: {prefix[:60]:<60} {sitename}:{name}")
|
||||
return f""
|
||||
|
||||
def get_include(sitename: str, name: str) -> str:
|
||||
"""
|
||||
Generates an include directive link based on sitename and name.
|
||||
|
||||
Args:
|
||||
sitename (str): The name of the site.
|
||||
name (str): The name of the page to include.
|
||||
|
||||
Returns:
|
||||
str: The generated include directive.
|
||||
"""
|
||||
logger.debug(f"get_include: {sitename}:{name}")
|
||||
return f"include: {sitename}/{name}"
|
||||
|
||||
def replace(prefix:str, markdown: str) -> str:
|
||||
"""
|
||||
Finds all image links, markdown page links, and custom include directives in the provided markdown text
|
||||
and replaces them using the appropriate functions.
|
||||
|
||||
Args:
|
||||
markdown (str): The markdown content.
|
||||
|
||||
Returns:
|
||||
str: The modified markdown content with updated links.
|
||||
"""
|
||||
# Initialize the Markdown parser
|
||||
md = MarkdownIt()
|
||||
tokens = md.parse(markdown)
|
||||
ast = SyntaxTreeNode(tokens)
|
||||
|
||||
print(ast.pretty(indent=2, show_text=True))
|
||||
|
||||
def process_node(node: SyntaxTreeNode):
|
||||
# from IPython import embed; embed()
|
||||
|
||||
def get_new_url(url: str):
|
||||
logger.debug(f"url: {url}")
|
||||
|
||||
parsed_url = urlparse(url)
|
||||
# site_name = parsed_url.netloc
|
||||
image_path = parsed_url.path
|
||||
logger.debug(f"parsed_url: {parsed_url}")
|
||||
|
||||
# prefix = prefix.rstrip('/')
|
||||
# image_path = image_path.strip('/')
|
||||
|
||||
new_url = f"{prefix.rstrip('/')}/{image_path.strip('/')}"
|
||||
logger.debug(f"new_url: {new_url}")
|
||||
|
||||
return new_url
|
||||
|
||||
if node.type == 'image':
|
||||
# Process image link
|
||||
url = node.attrs.get('src', '')
|
||||
new_url = get_new_url(url)
|
||||
node.attrs['src'] = new_url
|
||||
|
||||
elif node.type == 'link':
|
||||
# Process markdown page link
|
||||
url = node.attrs.get('href', '')
|
||||
new_url = get_new_url(url)
|
||||
node.attrs['href'] = new_url
|
||||
|
||||
# Recursively process child nodes
|
||||
for child in node.children or []:
|
||||
process_node(child)
|
||||
|
||||
def replace_include_directives(match: re.Match) -> str:
|
||||
"""
|
||||
Replaces custom include directives with appropriate links.
|
||||
|
||||
Args:
|
||||
match (re.Match): The match object containing the found include directive.
|
||||
|
||||
Returns:
|
||||
str: The generated link for the include directive.
|
||||
"""
|
||||
url = match.group(1)
|
||||
if ':' in url:
|
||||
site_name, page = url.split(':', 1)
|
||||
page_name = page.split('/')[-1]
|
||||
else:
|
||||
site_name = ""
|
||||
page_name = url
|
||||
if not page.endswith('.md'):
|
||||
page += '.md'
|
||||
return get_include(prefix, site_name, page_name)
|
||||
|
||||
|
||||
# Process the root node
|
||||
process_node(ast)
|
||||
|
||||
# Convert the AST back to markdown
|
||||
renderer = MDRenderer()
|
||||
options = {}
|
||||
env = {}
|
||||
rendered_markdown = renderer.render(tokens, options, env)
|
||||
|
||||
# include_pattern = re.compile(r"!!include page:'(.*?)'")
|
||||
# rendered_markdown = include_pattern.sub(replace_include_directives, rendered_markdown)
|
||||
|
||||
return rendered_markdown
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
text = """
|
||||

|
||||
[Page link](sitename:some/path/to/page.md)
|
||||
!!include page:'mypage'
|
||||
!!include page:'mypage.md'
|
||||
!!include page:'mysite:mypage
|
||||
!!include page:'mysite:mypage'
|
||||
!!include page:'mysite:mypage.md'
|
||||
"""
|
||||
|
||||
print(text)
|
||||
text2=replace("http://localhost:8080/pre/", text)
|
||||
print(text2)
|
||||
|
||||
|
94
lib/web/doctools/processor.py
Normal file
94
lib/web/doctools/processor.py
Normal file
@@ -0,0 +1,94 @@
|
||||
import os
|
||||
import re
|
||||
from typing import Callable
|
||||
|
||||
from herotools.logger import logger
|
||||
from herotools.md5 import file_md5
|
||||
from herotools.texttools import name_fix
|
||||
|
||||
|
||||
def _example_set_file(site_name: str, path: str, md5: str) -> None:
|
||||
# Placeholder for actual implementation
|
||||
logger.debug(f"set_file : site_name={site_name[:20]:<20} {path}")
|
||||
|
||||
|
||||
def _example_set_img(site_name: str, path: str, md5: str) -> None:
|
||||
# Placeholder for actual implementation
|
||||
logger.debug(f"set_img : site_name={site_name[:20]:<20} {path}")
|
||||
|
||||
|
||||
def _example_set_markdown(
|
||||
site_name: str, path: str, md5: str, content: str
|
||||
) -> None:
|
||||
# Placeholder for actual implementation
|
||||
logger.debug(f"set_markdown : site_name={site_name[:20]:<20} {path}")
|
||||
|
||||
|
||||
def _example_set_site(site_name: str, path: str) -> None:
|
||||
# Placeholder for actual implementation
|
||||
logger.info(f"set_site : site_name={site_name[:20]:<20} {path}")
|
||||
|
||||
|
||||
def _site_process_action(
|
||||
site_name: str,
|
||||
site_path: str,
|
||||
set_file: Callable[[str, str, str], None],
|
||||
set_img: Callable[[str, str, str], None],
|
||||
set_markdown: Callable[[str, str, str, str], None],
|
||||
) -> None:
|
||||
logger.debug(f"site process: {site_path[:60]:<60} -> {site_name}")
|
||||
for root, _, files in os.walk(site_path):
|
||||
for file in files:
|
||||
file_path = os.path.join(root, file)
|
||||
file_path_rel = os.path.relpath(file_path, site_path)
|
||||
file_name = os.path.basename(file)
|
||||
# print(file_name)
|
||||
mymd5 = file_md5(file_path)
|
||||
if file.lower().endswith(".md"):
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
set_markdown(site_name, file_path_rel, mymd5, content)
|
||||
elif file_name in [".collection", ".site", ".done"]:
|
||||
continue
|
||||
elif re.search(
|
||||
r"\.(jpg|jpeg|png|gif|bmp|tiff|webp)$", file, re.IGNORECASE
|
||||
):
|
||||
set_img(site_name, file_path_rel, mymd5)
|
||||
else:
|
||||
set_file(site_name, file_path_rel, mymd5)
|
||||
|
||||
|
||||
def process(
|
||||
path: str,
|
||||
set_site: Callable[[str, str], None],
|
||||
set_file: Callable[[str, str, str], None],
|
||||
set_img: Callable[[str, str, str], None],
|
||||
set_markdown: Callable[[str, str, str, str], None],
|
||||
) -> None:
|
||||
"""
|
||||
walk over directory and apply set_file(), set_img() and set_markdown()
|
||||
"""
|
||||
path = os.path.abspath(os.path.expanduser(path))
|
||||
logger.info(f"sites process: {path}")
|
||||
for root, dirs, files in os.walk(path):
|
||||
if ".site" in files or ".collection" in files:
|
||||
site_name = name_fix(os.path.basename(root))
|
||||
set_site(site_name, root)
|
||||
_site_process_action(
|
||||
site_name, root, set_file, set_img, set_markdown
|
||||
)
|
||||
# Prevent the os.walk from going deeper into subdirectories
|
||||
dirs[:] = []
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
mypath = "~/code/git.threefold.info/projectmycelium/info_projectmycelium/collections"
|
||||
|
||||
# logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
process(
|
||||
mypath,
|
||||
_example_set_site,
|
||||
_example_set_file,
|
||||
_example_set_img,
|
||||
_example_set_markdown,
|
||||
)
|
Reference in New Issue
Block a user