191 lines
8.2 KiB
Python
191 lines
8.2 KiB
Python
|
import os
|
||
|
import hashlib
|
||
|
from bs4 import BeautifulSoup
|
||
|
import shutil
|
||
|
from urllib.parse import urlparse
|
||
|
import requests
|
||
|
from tools.extensions import check_and_add_extension
|
||
|
from tools.deduper import Deduper
|
||
|
from typing import Dict
|
||
|
|
||
|
class HTMLTemplateConverter:
|
||
|
def __init__(self, src_dir: str, dest_dir: str, dedupe_dir:str,reset : bool = False):
|
||
|
self.src_dir = src_dir
|
||
|
self.dest_dir = dest_dir
|
||
|
self.static_dir = f"{dest_dir}/static"
|
||
|
self.dedupe_dir = dedupe_dir
|
||
|
|
||
|
if reset and os.path.exists(self.dest_dir):
|
||
|
print(" - reset")
|
||
|
shutil.rmtree(self.dest_dir)
|
||
|
|
||
|
self.deduper = Deduper(self.dedupe_dir)
|
||
|
|
||
|
def add_to_static(self, file_path: str):
|
||
|
# Check if the file path exists
|
||
|
if not os.path.exists(file_path):
|
||
|
file_path=f"{self.src_dir}/{file_path}"
|
||
|
if not os.path.exists(file_path):
|
||
|
raise FileNotFoundError(f"File '{file_path}' does not exist.")
|
||
|
|
||
|
# Get filename and extension, lowercase the filename
|
||
|
base_name, extension = os.path.splitext(os.path.basename(file_path))
|
||
|
base_name = base_name.lower()
|
||
|
extension = extension.lower()
|
||
|
|
||
|
# Initial path setup
|
||
|
base_path = os.path.join(self.static_dir, base_name)
|
||
|
new_path = f"{base_path}{extension}"
|
||
|
|
||
|
# Calculate hash for the file to be added
|
||
|
file_hash = self.deduper._calculate_md5(file_path)
|
||
|
|
||
|
# Check if filename exists in the static_dir
|
||
|
counter = 2
|
||
|
while os.path.exists(new_path):
|
||
|
# Calculate hash for the existing file
|
||
|
existing_file_hash = self.deduper._calculate_md5(new_path)
|
||
|
|
||
|
# If hashes match, return the existing file
|
||
|
if file_hash == existing_file_hash:
|
||
|
return os.path.basename(new_path)
|
||
|
|
||
|
# Update path for the next iteration
|
||
|
new_path = f"{base_path}_{counter}{extension}"
|
||
|
counter += 1
|
||
|
|
||
|
# Copy the file to the new unique path
|
||
|
shutil.copy2(file_path, new_path)
|
||
|
|
||
|
return os.path.basename(new_path)
|
||
|
|
||
|
def add_file(self, file_path: str, remove : bool = False) -> str:
|
||
|
#print (f" - addfile {file_path}")
|
||
|
|
||
|
if file_path.startswith('http://') or file_path.startswith('https://'):
|
||
|
response = requests.get(file_path)
|
||
|
if response.status_code == 200:
|
||
|
url_path = urlparse(file_path).path
|
||
|
base_name, extension = os.path.splitext(os.path.basename(url_path))
|
||
|
local_filename = base_name + extension
|
||
|
|
||
|
# Download to temporary directory
|
||
|
temp_dir = os.path.join("/tmp/files")
|
||
|
os.makedirs(temp_dir, exist_ok=True)
|
||
|
temp_path = os.path.join(temp_dir, local_filename)
|
||
|
|
||
|
with open(temp_path, 'wb') as f:
|
||
|
f.write(response.content)
|
||
|
|
||
|
print(f" - download {file_path}")
|
||
|
#temp_path = check_and_add_extension(temp_path)
|
||
|
|
||
|
#import pudb; pudb.set_trace()
|
||
|
|
||
|
return self.add_file(temp_path,remove=True)
|
||
|
|
||
|
else:
|
||
|
print(f"ERROR: failed to download {file_path}")
|
||
|
return ""
|
||
|
else:
|
||
|
|
||
|
if not os.path.exists(file_path):
|
||
|
file_path=f"{self.src_dir}/{file_path}"
|
||
|
if not os.path.exists(file_path):
|
||
|
raise FileNotFoundError(f"File '{file_path}' does not exist.")
|
||
|
|
||
|
# Check if file exists inself.deduper
|
||
|
existing_path = self.deduper.check_from_path(file_path)
|
||
|
if existing_path:
|
||
|
print(" - exists in dedupe pool")
|
||
|
if remove:
|
||
|
os.remove(temp_path)
|
||
|
return f"/files/{existing_path}"
|
||
|
|
||
|
#not in dedupe pool, copy to static dir
|
||
|
static_path = self.add_to_static(file_path)
|
||
|
return f"/static/{static_path}"
|
||
|
|
||
|
|
||
|
def convert(self) -> None:
|
||
|
|
||
|
os.makedirs(self.dest_dir, exist_ok=True)
|
||
|
svgsdir = os.path.join(self.dest_dir, 'svgs')
|
||
|
os.makedirs(svgsdir, exist_ok=True)
|
||
|
|
||
|
static_dest_dir = os.path.join(self.dest_dir, 'static')
|
||
|
os.makedirs(static_dest_dir, exist_ok=True)
|
||
|
|
||
|
for root, _, files in os.walk(self.src_dir):
|
||
|
for file in files:
|
||
|
if file.endswith('.html'):
|
||
|
src_file_path = os.path.join(root, file)
|
||
|
dest_file_path = os.path.join(self.dest_dir, file)
|
||
|
|
||
|
with open(src_file_path, 'r', encoding='utf-8') as html_file:
|
||
|
html_content = html_file.read()
|
||
|
|
||
|
soup = BeautifulSoup(html_content, 'html.parser')
|
||
|
|
||
|
svg_elements = soup.find_all('svg')
|
||
|
|
||
|
for i, svg in enumerate(svg_elements, start=1):
|
||
|
svg_file_path = "/tmp/my.svg"
|
||
|
with open(svg_file_path, 'w', encoding='utf-8') as svg_file:
|
||
|
svg_file.write(str(svg))
|
||
|
|
||
|
existing_path = self.deduper.check_from_path(svg_file_path)
|
||
|
|
||
|
if existing_path:
|
||
|
svg_filename = f'svgs/{existing_path}'
|
||
|
svg_filename0 = f"{self.dedupe_dir}/{existing_path}"
|
||
|
print(f" - svg exists in dedupe pool: , '{svg_filename}'")
|
||
|
svg_file_path_ = os.path.join(self.dedupe_dir, existing_path)
|
||
|
svg_file_path2 = os.path.join(self.dest_dir, svg_filename)
|
||
|
print(svg_file_path2)
|
||
|
if not (os.path.exists(svg_file_path2) or os.path.islink(svg_file_path2)):
|
||
|
os.makedirs(os.path.dirname(svg_filename), exist_ok=True)
|
||
|
svg_file_path_ = os.path.abspath(svg_file_path_)
|
||
|
svg_file_path2 = os.path.abspath(svg_file_path2)
|
||
|
print(f" - symlink: {svg_file_path_} {svg_file_path2}")
|
||
|
os.symlink(svg_file_path_, svg_file_path2)
|
||
|
else:
|
||
|
svg_filename = f'svgs/{file}_svg_{i}.svg'
|
||
|
svg_file_path2 = os.path.join(self.dest_dir, svg_filename)
|
||
|
shutil.copy(svg_file_path,svg_file_path2)
|
||
|
print(f" - svg new {svg_filename}")
|
||
|
os.remove(svg_file_path)
|
||
|
svg.replace_with(f"{{% include '{svg_filename}' %}}")
|
||
|
|
||
|
for link in soup.find_all('link', href=True):
|
||
|
href = link['href']
|
||
|
if href.endswith('.css'):
|
||
|
new_href = self.add_file(href)
|
||
|
link['href'] = new_href
|
||
|
|
||
|
for script in soup.find_all('script', src=True):
|
||
|
src = script['src']
|
||
|
if src.endswith('.js'):
|
||
|
new_src = self.add_file(src)
|
||
|
script['src'] = new_src
|
||
|
|
||
|
for img in soup.find_all('img', src=True):
|
||
|
src = img['src']
|
||
|
new_src = self.add_file(src)
|
||
|
img['src'] = new_src
|
||
|
|
||
|
jinja_template = str(soup.prettify())
|
||
|
|
||
|
with open(dest_file_path, 'w', encoding='utf-8') as dest_file:
|
||
|
dest_file.write(jinja_template)
|
||
|
|
||
|
|
||
|
# Example usage
|
||
|
#
|
||
|
# converter = HTMLTemplateConverter("source_directory", "destination_directory")
|
||
|
# converter.convert_html_to_jinja()
|
||
|
|
||
|
def new(src_dir: str, dest_dir: str, dedupe_dir:str,reset : bool = False) -> HTMLTemplateConverter:
|
||
|
f = HTMLTemplateConverter(src_dir,dest_dir,dedupe_dir,reset=reset)
|
||
|
f.convert()
|
||
|
return f
|