95 lines
4.1 KiB
Python
95 lines
4.1 KiB
Python
from herotools.logger import logger
|
|
from bs4 import BeautifulSoup
|
|
import re
|
|
from typing import Callable
|
|
from herotools.texttools import name_fix
|
|
|
|
# Define the type for the content and link fetching functions
|
|
LinkFetcher = Callable[[str, str, str, str, str], str]
|
|
ContentFetcher = Callable[[str, str, str, str], str]
|
|
|
|
# Private functions to be used internally
|
|
|
|
def _get_link(language: str, prefix: str, site_name: str, pagename: str, name: str) -> str:
|
|
# Replace this with your logic to get the actual link
|
|
logger.debug(f"_get_link: {language[:10]:<10} {site_name}:{pagename}:{name}")
|
|
return f"{prefix}{language}/{site_name}/{pagename}/{name}.jpg"
|
|
|
|
def _get_content(language: str, site_name: str, pagename: str, name: str) -> str:
|
|
# Replace this with your logic to get the actual content
|
|
logger.debug(f"_get_content: {language[:10]:<10} {site_name}:{pagename}:{name}")
|
|
return f"Replaced text for {name} on page {pagename} in {language} language on {site_name} site"
|
|
|
|
def _process_html(language: str, prefix: str, site_name: str, pagename: str, html_content: str) -> str:
|
|
"""
|
|
Function to process HTML and replace content based on tags.
|
|
This allows us to work with templates and get content based on language to replace in HTML.
|
|
"""
|
|
language = name_fix(language)
|
|
site_name = name_fix(site_name)
|
|
pagename = name_fix(pagename)
|
|
prefix = prefix.strip()
|
|
if not prefix.endswith('/'):
|
|
prefix += '/'
|
|
|
|
soup = BeautifulSoup(html_content, 'html.parser')
|
|
|
|
# Find all elements with class names starting with !!img: or !!txt:
|
|
for element in soup.find_all(class_=re.compile(r'!!(img|txt):(.+)')):
|
|
for cls in element['class']:
|
|
if cls.startswith('!!img:'):
|
|
name = cls.split(':')[1]
|
|
name = name_fix(name)
|
|
# Get the link to replace the src attribute in !!img: elements
|
|
link = _get_link(language=language, prefix=prefix, site_name=site_name, pagename=pagename, name=name)
|
|
if element.name == 'img':
|
|
element['src'] = link
|
|
elif 'src' in element.attrs:
|
|
element['src'] = link # In case the element is not an img but has a src attribute
|
|
elif cls.startswith('!!txt:'):
|
|
name = cls.split(':')[1]
|
|
name = name_fix(name)
|
|
# Get the content to replace the text in !!txt: elements
|
|
content = _get_content(language=language, site_name=site_name, pagename=pagename, name=name)
|
|
element.string = content
|
|
|
|
# Output the modified HTML
|
|
return str(soup)
|
|
|
|
# Public function to process the HTML content
|
|
def process(language: str, prefix: str, site_name: str, pagename: str, html_content: str) -> str:
|
|
"""
|
|
Public function to process HTML and replace content based on tags.
|
|
This function wraps the internal _process_html function.
|
|
"""
|
|
return _process_html(language=language, prefix=prefix, site_name=site_name, pagename=pagename, html_content=html_content)
|
|
|
|
# Sample usage with a given language, site name, page name, and HTML content
|
|
if __name__ == "__main__":
|
|
# Example HTML content
|
|
html_content = '''
|
|
<!DOCTYPE html>
|
|
<html lang="en">
|
|
<head>
|
|
<meta charset="UTF-8">
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
<title>Sample Page</title>
|
|
</head>
|
|
<body>
|
|
<h2 class="mb-6 is-size-1 is-size-3-mobile has-text-weight-bold !!txt:title1">Take care of your performance every day.</h2>
|
|
<img class="responsive !!img:logo" src="old-link.jpg" alt="Company Logo">
|
|
<p class="content !!txt:description">This is a sample description text.</p>
|
|
</body>
|
|
</html>
|
|
'''
|
|
|
|
# Process the HTML content for a specific language, site name, and page
|
|
language: str = "en"
|
|
site_name: str = "ExampleSite"
|
|
pagename: str = "HomePage"
|
|
prefix: str = "http://localhost/images/"
|
|
processed_html: str = process(language=language, prefix=prefix, site_name=site_name, pagename=pagename, html_content=html_content)
|
|
|
|
# Print the modified HTML
|
|
print(processed_html)
|