from herotools.logger import logger from bs4 import BeautifulSoup import re from typing import Callable from herotools.texttools import name_fix # Define the type for the content and link fetching functions LinkFetcher = Callable[[str, str, str, str, str], str] ContentFetcher = Callable[[str, str, str, str], str] # Private functions to be used internally def _get_link(language: str, prefix: str, site_name: str, pagename: str, name: str) -> str: # Replace this with your logic to get the actual link logger.debug(f"_get_link: {language[:10]:<10} {site_name}:{pagename}:{name}") return f"{prefix}{language}/{site_name}/{pagename}/{name}.jpg" def _get_content(language: str, site_name: str, pagename: str, name: str) -> str: # Replace this with your logic to get the actual content logger.debug(f"_get_content: {language[:10]:<10} {site_name}:{pagename}:{name}") return f"Replaced text for {name} on page {pagename} in {language} language on {site_name} site" def _process_html(language: str, prefix: str, site_name: str, pagename: str, html_content: str) -> str: """ Function to process HTML and replace content based on tags. This allows us to work with templates and get content based on language to replace in HTML. """ language = name_fix(language) site_name = name_fix(site_name) pagename = name_fix(pagename) prefix = prefix.strip() if not prefix.endswith('/'): prefix += '/' soup = BeautifulSoup(html_content, 'html.parser') # Find all elements with class names starting with !!img: or !!txt: for element in soup.find_all(class_=re.compile(r'!!(img|txt):(.+)')): for cls in element['class']: if cls.startswith('!!img:'): name = cls.split(':')[1] name = name_fix(name) # Get the link to replace the src attribute in !!img: elements link = _get_link(language=language, prefix=prefix, site_name=site_name, pagename=pagename, name=name) if element.name == 'img': element['src'] = link elif 'src' in element.attrs: element['src'] = link # In case the element is not an img but has a src attribute elif cls.startswith('!!txt:'): name = cls.split(':')[1] name = name_fix(name) # Get the content to replace the text in !!txt: elements content = _get_content(language=language, site_name=site_name, pagename=pagename, name=name) element.string = content # Output the modified HTML return str(soup) # Public function to process the HTML content def process(language: str, prefix: str, site_name: str, pagename: str, html_content: str) -> str: """ Public function to process HTML and replace content based on tags. This function wraps the internal _process_html function. """ return _process_html(language=language, prefix=prefix, site_name=site_name, pagename=pagename, html_content=html_content) # Sample usage with a given language, site name, page name, and HTML content if __name__ == "__main__": # Example HTML content html_content = '''
This is a sample description text.
''' # Process the HTML content for a specific language, site name, and page language: str = "en" site_name: str = "ExampleSite" pagename: str = "HomePage" prefix: str = "http://localhost/images/" processed_html: str = process(language=language, prefix=prefix, site_name=site_name, pagename=pagename, html_content=html_content) # Print the modified HTML print(processed_html)