herolib_python/lib/web/doctools/md_replacer.py
2025-08-05 15:15:36 +02:00

172 lines
4.9 KiB
Python

import sys
import os
# Add the parent directory of herotools to the Python module search path
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from herotools.logger import logger
from markdown_it import MarkdownIt
from markdown_it.tree import SyntaxTreeNode
import re
from enum import Enum
from herotools.texttools import name_fix
from mdformat.renderer import MDRenderer
from urllib.parse import urlparse
class ImageType(Enum):
JPEG = 'jpeg'
PNG = 'png'
GIF = 'gif'
OTHER = 'other'
def get_link_page(prefix:str, linkname:str, sitename: str, name: str) -> str:
"""
Generates a page link based on sitename and name.
Args:
sitename (str): The name of the site.
name (str): The name of the page.
Returns:
str: The generated link.
"""
logger.debug(f"get_link_page: {prefix[:60]:<60} {linkname} {sitename}:{name}")
return f"[{linkname}]({prefix}/{sitename}/{name})"
def get_link_image(prefix:str, sitename: str, name: str, image_type: ImageType) -> str:
"""
Generates an image link based on the URL and image type.
Args:
url (str): The original URL of the image.
image_type (ImageType): The type of the image.
Returns:
str: The generated link.
"""
logger.debug(f"get_link_image: {prefix[:60]:<60} {sitename}:{name}")
return f"![]({prefix}/{sitename}/{name})"
def get_include(sitename: str, name: str) -> str:
"""
Generates an include directive link based on sitename and name.
Args:
sitename (str): The name of the site.
name (str): The name of the page to include.
Returns:
str: The generated include directive.
"""
logger.debug(f"get_include: {sitename}:{name}")
return f"include: {sitename}/{name}"
def replace(prefix:str, markdown: str) -> str:
"""
Finds all image links, markdown page links, and custom include directives in the provided markdown text
and replaces them using the appropriate functions.
Args:
markdown (str): The markdown content.
Returns:
str: The modified markdown content with updated links.
"""
# Initialize the Markdown parser
md = MarkdownIt()
tokens = md.parse(markdown)
ast = SyntaxTreeNode(tokens)
print(ast.pretty(indent=2, show_text=True))
def process_node(node: SyntaxTreeNode):
# from IPython import embed; embed()
def get_new_url(url: str):
logger.debug(f"url: {url}")
parsed_url = urlparse(url)
# site_name = parsed_url.netloc
image_path = parsed_url.path
logger.debug(f"parsed_url: {parsed_url}")
# prefix = prefix.rstrip('/')
# image_path = image_path.strip('/')
new_url = f"{prefix.rstrip('/')}/{image_path.strip('/')}"
logger.debug(f"new_url: {new_url}")
return new_url
if node.type == 'image':
# Process image link
url = node.attrs.get('src', '')
new_url = get_new_url(url)
node.attrs['src'] = new_url
elif node.type == 'link':
# Process markdown page link
url = node.attrs.get('href', '')
new_url = get_new_url(url)
node.attrs['href'] = new_url
# Recursively process child nodes
for child in node.children or []:
process_node(child)
def replace_include_directives(match: re.Match) -> str:
"""
Replaces custom include directives with appropriate links.
Args:
match (re.Match): The match object containing the found include directive.
Returns:
str: The generated link for the include directive.
"""
url = match.group(1)
if ':' in url:
site_name, page = url.split(':', 1)
page_name = page.split('/')[-1]
else:
site_name = ""
page_name = url
if not page.endswith('.md'):
page += '.md'
return get_include(prefix, site_name, page_name)
# Process the root node
process_node(ast)
# Convert the AST back to markdown
renderer = MDRenderer()
options = {}
env = {}
rendered_markdown = renderer.render(tokens, options, env)
# include_pattern = re.compile(r"!!include page:'(.*?)'")
# rendered_markdown = include_pattern.sub(replace_include_directives, rendered_markdown)
return rendered_markdown
if __name__ == "__main__":
text = """
![Image description](https://example.com/image.png)
[Page link](sitename:some/path/to/page.md)
!!include page:'mypage'
!!include page:'mypage.md'
!!include page:'mysite:mypage
!!include page:'mysite:mypage'
!!include page:'mysite:mypage.md'
"""
print(text)
text2=replace("http://localhost:8080/pre/", text)
print(text2)