heroweb/lib/tools/templatefixer.py

import os
import hashlib
from bs4 import BeautifulSoup
import shutil
from urllib.parse import urlparse
import requests
from tools.extensions import check_and_add_extension
from tools.deduper import Deduper
from typing import Dict

class HTMLTemplateConverter:
    def __init__(self, src_dir: str, dest_dir: str, dedupe_dir:str,reset : bool = False):
        self.src_dir = src_dir
        self.dest_dir = dest_dir
        self.static_dir = f"{dest_dir}/static"
        self.dedupe_dir = dedupe_dir
        
        if reset and os.path.exists(self.dest_dir):
            print(" - reset")
            shutil.rmtree(self.dest_dir)
                
        self.deduper = Deduper(self.dedupe_dir)
        
    def add_to_static(self, file_path: str):
        # Check if the file path exists
        if not os.path.exists(file_path):
            file_path=f"{self.src_dir}/{file_path}"
            if not os.path.exists(file_path):
                raise FileNotFoundError(f"File '{file_path}' does not exist.")

        # Get filename and extension, lowercase the filename
        base_name, extension = os.path.splitext(os.path.basename(file_path))
        base_name = base_name.lower()
        extension = extension.lower()

        # Initial path setup
        base_path = os.path.join(self.static_dir, base_name)
        new_path = f"{base_path}{extension}"

        # Calculate hash for the file to be added
        file_hash = self.deduper._calculate_md5(file_path)

        # Check if filename exists in the static_dir
        counter = 2
        while os.path.exists(new_path):
            # Calculate hash for the existing file
            existing_file_hash = self.deduper._calculate_md5(new_path)
            
            # If hashes match, return the existing file
            if file_hash == existing_file_hash:
                return os.path.basename(new_path)
            
            # Update path for the next iteration
            new_path = f"{base_path}_{counter}{extension}"
            counter += 1

        # Copy the file to the new unique path
        shutil.copy2(file_path, new_path)

        return os.path.basename(new_path)

    def add_file(self, file_path: str, remove : bool = False) -> str:
        #print (f" - addfile {file_path}")
        
        if file_path.startswith('http://') or file_path.startswith('https://'):
            response = requests.get(file_path)
            if response.status_code == 200:
                url_path = urlparse(file_path).path
                base_name, extension = os.path.splitext(os.path.basename(url_path))
                local_filename = base_name + extension
                
                # Download to temporary directory
                temp_dir = os.path.join("/tmp/files")
                os.makedirs(temp_dir, exist_ok=True)
                temp_path = os.path.join(temp_dir, local_filename)
                
                with open(temp_path, 'wb') as f:
                    f.write(response.content)
                    
                print(f" - download {file_path}")
                #temp_path = check_and_add_extension(temp_path)
                
                #import pudb; pudb.set_trace() 
                
                return self.add_file(temp_path,remove=True)
                
            else:
                print(f"ERROR: failed to download {file_path}")
                return ""
        else:

            if not os.path.exists(file_path):
                file_path=f"{self.src_dir}/{file_path}"
                if not os.path.exists(file_path):
                    raise FileNotFoundError(f"File '{file_path}' does not exist.")        

            # Check if file exists inself.deduper
            existing_path = self.deduper.check_from_path(file_path)
            if existing_path:
                print(" - exists in dedupe pool")
                if remove:
                    os.remove(temp_path)
                return f"/files/{existing_path}"
            
            #not in dedupe pool, copy to static dir
            static_path = self.add_to_static(file_path)        
            return f"/static/{static_path}"
        

    def convert(self) -> None:

        os.makedirs(self.dest_dir, exist_ok=True)
        svgsdir = os.path.join(self.dest_dir, 'svgs')
        os.makedirs(svgsdir, exist_ok=True)
        
        static_dest_dir = os.path.join(self.dest_dir, 'static')
        os.makedirs(static_dest_dir, exist_ok=True)

        for root, _, files in os.walk(self.src_dir):
            for file in files:
                if file.endswith('.html'):
                    src_file_path = os.path.join(root, file)
                    dest_file_path = os.path.join(self.dest_dir, file)

                    with open(src_file_path, 'r', encoding='utf-8') as html_file:
                        html_content = html_file.read()

                    soup = BeautifulSoup(html_content, 'html.parser')

                    svg_elements = soup.find_all('svg')
                    
                    for i, svg in enumerate(svg_elements, start=1):
                        svg_file_path = "/tmp/my.svg"
                        with open(svg_file_path, 'w', encoding='utf-8') as svg_file:
                            svg_file.write(str(svg))
                            
                        existing_path = self.deduper.check_from_path(svg_file_path)
                        
                        if existing_path:
                            svg_filename = f'svgs/{existing_path}'    
                            svg_filename0  = f"{self.dedupe_dir}/{existing_path}"    
                            print(f" - svg exists in dedupe pool: , '{svg_filename}'")
                            svg_file_path_ = os.path.join(self.dedupe_dir, existing_path)
                            svg_file_path2 = os.path.join(self.dest_dir, svg_filename)    
                            print(svg_file_path2)                        
                            if not (os.path.exists(svg_file_path2) or os.path.islink(svg_file_path2)):
                                os.makedirs(os.path.dirname(svg_filename), exist_ok=True)
                                svg_file_path_ = os.path.abspath(svg_file_path_)
                                svg_file_path2 = os.path.abspath(svg_file_path2)                                
                                print(f" - symlink: {svg_file_path_} {svg_file_path2}")
                                os.symlink(svg_file_path_, svg_file_path2)                                                                    
                        else:                               
                            svg_filename = f'svgs/{file}_svg_{i}.svg'    
                            svg_file_path2 = os.path.join(self.dest_dir, svg_filename)
                            shutil.copy(svg_file_path,svg_file_path2)                                                        
                            print(f" - svg new {svg_filename}")                                 
                        os.remove(svg_file_path)
                        svg.replace_with(f"{{% include '{svg_filename}' %}}")
                        
                    for link in soup.find_all('link', href=True):
                        href = link['href']
                        if href.endswith('.css'):
                            new_href = self.add_file(href)
                            link['href'] = new_href

                    for script in soup.find_all('script', src=True):
                        src = script['src']
                        if src.endswith('.js'):
                            new_src = self.add_file(src)
                            script['src'] = new_src

                    for img in soup.find_all('img', src=True):
                        src = img['src']
                        new_src = self.add_file(src)
                        img['src'] = new_src

                    jinja_template = str(soup.prettify())

                    with open(dest_file_path, 'w', encoding='utf-8') as dest_file:
                        dest_file.write(jinja_template)


# Example usage
# 
# converter = HTMLTemplateConverter("source_directory", "destination_directory")
# converter.convert_html_to_jinja()

def new(src_dir: str, dest_dir: str, dedupe_dir:str,reset : bool = False) -> HTMLTemplateConverter:
    f =  HTMLTemplateConverter(src_dir,dest_dir,dedupe_dir,reset=reset)
    f.convert()
    return f
... 2024-08-22 10:09:10 +00:00			`import os`
			`import hashlib`
			`from bs4 import BeautifulSoup`
			`import shutil`
			`from urllib.parse import urlparse`
			`import requests`
			`from tools.extensions import check_and_add_extension`
			`from tools.deduper import Deduper`
			`from typing import Dict`

			`class HTMLTemplateConverter:`
			`def __init__(self, src_dir: str, dest_dir: str, dedupe_dir:str,reset : bool = False):`
			`self.src_dir = src_dir`
			`self.dest_dir = dest_dir`
			`self.static_dir = f"{dest_dir}/static"`
			`self.dedupe_dir = dedupe_dir`

			`if reset and os.path.exists(self.dest_dir):`
			`print(" - reset")`
			`shutil.rmtree(self.dest_dir)`

			`self.deduper = Deduper(self.dedupe_dir)`

			`def add_to_static(self, file_path: str):`
			`# Check if the file path exists`
			`if not os.path.exists(file_path):`
			`file_path=f"{self.src_dir}/{file_path}"`
			`if not os.path.exists(file_path):`
			`raise FileNotFoundError(f"File '{file_path}' does not exist.")`

			`# Get filename and extension, lowercase the filename`
			`base_name, extension = os.path.splitext(os.path.basename(file_path))`
			`base_name = base_name.lower()`
			`extension = extension.lower()`

			`# Initial path setup`
			`base_path = os.path.join(self.static_dir, base_name)`
			`new_path = f"{base_path}{extension}"`

			`# Calculate hash for the file to be added`
			`file_hash = self.deduper._calculate_md5(file_path)`

			`# Check if filename exists in the static_dir`
			`counter = 2`
			`while os.path.exists(new_path):`
			`# Calculate hash for the existing file`
			`existing_file_hash = self.deduper._calculate_md5(new_path)`

			`# If hashes match, return the existing file`
			`if file_hash == existing_file_hash:`
			`return os.path.basename(new_path)`

			`# Update path for the next iteration`
			`new_path = f"{base_path}_{counter}{extension}"`
			`counter += 1`

			`# Copy the file to the new unique path`
			`shutil.copy2(file_path, new_path)`

			`return os.path.basename(new_path)`

			`def add_file(self, file_path: str, remove : bool = False) -> str:`
			`#print (f" - addfile {file_path}")`

			`if file_path.startswith('http://') or file_path.startswith('https://'):`
			`response = requests.get(file_path)`
			`if response.status_code == 200:`
			`url_path = urlparse(file_path).path`
			`base_name, extension = os.path.splitext(os.path.basename(url_path))`
			`local_filename = base_name + extension`

			`# Download to temporary directory`
			`temp_dir = os.path.join("/tmp/files")`
			`os.makedirs(temp_dir, exist_ok=True)`
			`temp_path = os.path.join(temp_dir, local_filename)`

			`with open(temp_path, 'wb') as f:`
			`f.write(response.content)`

			`print(f" - download {file_path}")`
			`#temp_path = check_and_add_extension(temp_path)`

			`#import pudb; pudb.set_trace()`

			`return self.add_file(temp_path,remove=True)`

			`else:`
			`print(f"ERROR: failed to download {file_path}")`
			`return ""`
			`else:`

			`if not os.path.exists(file_path):`
			`file_path=f"{self.src_dir}/{file_path}"`
			`if not os.path.exists(file_path):`
			`raise FileNotFoundError(f"File '{file_path}' does not exist.")`

			`# Check if file exists inself.deduper`
			`existing_path = self.deduper.check_from_path(file_path)`
			`if existing_path:`
			`print(" - exists in dedupe pool")`
			`if remove:`
			`os.remove(temp_path)`
			`return f"/files/{existing_path}"`

			`#not in dedupe pool, copy to static dir`
			`static_path = self.add_to_static(file_path)`
			`return f"/static/{static_path}"`


			`def convert(self) -> None:`

			`os.makedirs(self.dest_dir, exist_ok=True)`
			`svgsdir = os.path.join(self.dest_dir, 'svgs')`
			`os.makedirs(svgsdir, exist_ok=True)`

			`static_dest_dir = os.path.join(self.dest_dir, 'static')`
			`os.makedirs(static_dest_dir, exist_ok=True)`

			`for root, _, files in os.walk(self.src_dir):`
			`for file in files:`
			`if file.endswith('.html'):`
			`src_file_path = os.path.join(root, file)`
			`dest_file_path = os.path.join(self.dest_dir, file)`

			`with open(src_file_path, 'r', encoding='utf-8') as html_file:`
			`html_content = html_file.read()`

			`soup = BeautifulSoup(html_content, 'html.parser')`

			`svg_elements = soup.find_all('svg')`

			`for i, svg in enumerate(svg_elements, start=1):`
			`svg_file_path = "/tmp/my.svg"`
			`with open(svg_file_path, 'w', encoding='utf-8') as svg_file:`
			`svg_file.write(str(svg))`

			`existing_path = self.deduper.check_from_path(svg_file_path)`

			`if existing_path:`
			`svg_filename = f'svgs/{existing_path}'`
			`svg_filename0 = f"{self.dedupe_dir}/{existing_path}"`
			`print(f" - svg exists in dedupe pool: , '{svg_filename}'")`
			`svg_file_path_ = os.path.join(self.dedupe_dir, existing_path)`
			`svg_file_path2 = os.path.join(self.dest_dir, svg_filename)`
			`print(svg_file_path2)`
			`if not (os.path.exists(svg_file_path2) or os.path.islink(svg_file_path2)):`
			`os.makedirs(os.path.dirname(svg_filename), exist_ok=True)`
			`svg_file_path_ = os.path.abspath(svg_file_path_)`
			`svg_file_path2 = os.path.abspath(svg_file_path2)`
			`print(f" - symlink: {svg_file_path_} {svg_file_path2}")`
			`os.symlink(svg_file_path_, svg_file_path2)`
			`else:`
			`svg_filename = f'svgs/{file}_svg_{i}.svg'`
			`svg_file_path2 = os.path.join(self.dest_dir, svg_filename)`
			`shutil.copy(svg_file_path,svg_file_path2)`
			`print(f" - svg new {svg_filename}")`
			`os.remove(svg_file_path)`
			`svg.replace_with(f"{{% include '{svg_filename}' %}}")`

			`for link in soup.find_all('link', href=True):`
			`href = link['href']`
			`if href.endswith('.css'):`
			`new_href = self.add_file(href)`
			`link['href'] = new_href`

			`for script in soup.find_all('script', src=True):`
			`src = script['src']`
			`if src.endswith('.js'):`
			`new_src = self.add_file(src)`
			`script['src'] = new_src`

			`for img in soup.find_all('img', src=True):`
			`src = img['src']`
			`new_src = self.add_file(src)`
			`img['src'] = new_src`

			`jinja_template = str(soup.prettify())`

			`with open(dest_file_path, 'w', encoding='utf-8') as dest_file:`
			`dest_file.write(jinja_template)`


			`# Example usage`
			`#`
			`# converter = HTMLTemplateConverter("source_directory", "destination_directory")`
			`# converter.convert_html_to_jinja()`

			`def new(src_dir: str, dest_dir: str, dedupe_dir:str,reset : bool = False) -> HTMLTemplateConverter:`
			`f = HTMLTemplateConverter(src_dir,dest_dir,dedupe_dir,reset=reset)`
			`f.convert()`
			`return f`