import os import hashlib from bs4 import BeautifulSoup import shutil from urllib.parse import urlparse import requests from tools.extensions import check_and_add_extension from tools.deduper import Deduper from typing import Dict #import pudb; pudb.set_trace() class HTMLTemplateConverter: def __init__(self, src_dir: str, dest_dir: str, static_dir:str = "" ,reset : bool = False): self.src_dir = src_dir self.dest_dir = dest_dir if static_dir=="": static_dir = f"{dest_dir}/static" if reset and os.path.exists(self.dest_dir): print(" - reset") shutil.rmtree(self.dest_dir) self.deduper_static = Deduper(static_dir) if reset: self.deduper_static.load_assets() def add_to_static(self, file_path: str, dest_dir_rel:str = "") -> str: """ add path to the static directory returns the path as need to be used in the template for the file link """ # Check if the file path exists if not os.path.exists(file_path): file_path2=f"{self.src_dir}/{file_path}" if not os.path.exists(file_path2): raise FileNotFoundError(f"File '{file_path}' and {file_path2} does not exist.") else: file_path = file_path2 # Calculate hash for the file to be added file_dedupe_location = self.deduper_static.path_check(file_path) if file_dedupe_location: return file_dedupe_location return self.deduper_static.add(source_path=file_path,dest_dir_rel=dest_dir_rel) def add_file(self, file_path: str, remove : bool = False, dest_dir_rel :str = "") -> str: print (f" - addfile {file_path} for dest_dir_rel:{dest_dir_rel}") if file_path.startswith('http://') or file_path.startswith('https://'): response = requests.get(file_path) if response.status_code == 200: url_path = urlparse(file_path).path base_name, extension = os.path.splitext(os.path.basename(url_path)) local_filename = base_name + extension # Download to temporary directory temp_dir = os.path.join("/tmp/files") os.makedirs(temp_dir, exist_ok=True) temp_path = os.path.join(temp_dir, local_filename) with open(temp_path, 'wb') as f: f.write(response.content) print(f" - download {file_path}") #import pudb; pudb.set_trace() r = self.add_file(temp_path,remove=True,dest_dir_rel=dest_dir_rel) if remove: os.remove(temp_path) return r else: print(f"ERROR: failed to download {file_path}") return "" else: if not os.path.exists(file_path): file_path2= f"{self.src_dir}/{file_path}" if os.path.exists(file_path2): file_path = file_path2 else: raise FileNotFoundError(f"File '{file_path}' or {file_path2} does not exist.") # Check if file exists inself.deduper existing_path = self.deduper_static.path_check(file_path) if existing_path: return existing_path return self.add_to_static(file_path,dest_dir_rel=dest_dir_rel) def convert(self) -> None: os.makedirs(self.dest_dir, exist_ok=True) for root, _, files in os.walk(self.src_dir): for file in files: if file.endswith('.html'): src_file_path = os.path.join(root, file) dest_file_path = os.path.join(self.dest_dir, file) with open(src_file_path, 'r', encoding='utf-8') as html_file: html_content = html_file.read() soup = BeautifulSoup(html_content, 'html.parser') svg_elements = soup.find_all('svg') for i, svg in enumerate(svg_elements, start=1): svg_file_path = "/tmp/my.svg" with open(svg_file_path, 'w', encoding='utf-8') as svg_file: svg_file.write(str(svg)) svg_path = self.add_file(file_path=svg_file_path,dest_dir_rel="svg") svg_path_full = f"{self.deduper_static.path}/{svg_path}" if not os.path.exists(svg_path_full): raise Exception(f"file svg does not exist {svg_path_full}") svg_file_path2 = os.path.join(self.dest_dir, svg_path) # from IPython import embed;embed() # s if not (os.path.exists(svg_file_path2) or os.path.islink(svg_file_path2)): os.makedirs(os.path.dirname(svg_file_path2), exist_ok=True) svg_path_full = os.path.abspath(svg_path_full) svg_file_path2 = os.path.abspath(svg_file_path2) print(f" - symlink: {svg_path_full} {svg_file_path2}") os.symlink(svg_path_full, svg_file_path2) svg.replace_with(f"{{% include '{svg_path}' %}}") os.remove(svg_file_path) for link in soup.find_all('link', href=True): href = link['href'] if href.endswith('.css'): new_href = self.add_file(href,dest_dir_rel="css") link['href'] = f"static/{new_href}" for script in soup.find_all('script', src=True): src = script['src'] if src.endswith('.js'): new_src = self.add_file(src,dest_dir_rel="js") script['src'] = f"static/{new_src}" for img in soup.find_all('img', src=True): src = img['src'] new_src = self.add_file(src) img['src'] = f"static/{new_src}" jinja_template = str(soup.prettify()) with open(dest_file_path, 'w', encoding='utf-8') as dest_file: dest_file.write(jinja_template) # Example usage # # converter = HTMLTemplateConverter("source_directory", "destination_directory") # converter.convert_html_to_jinja() def new(src_dir: str, dest_dir: str, static_dir:str = "",reset : bool = False) -> HTMLTemplateConverter: f = HTMLTemplateConverter(src_dir,dest_dir,static_dir=static_dir,reset=reset) f.convert() return f