...
This commit is contained in:
172
herolib/web/doctools/md_replacer.py
Normal file
172
herolib/web/doctools/md_replacer.py
Normal file
@@ -0,0 +1,172 @@
|
||||
import sys
|
||||
import os
|
||||
|
||||
# Add the parent directory of herotools to the Python module search path
|
||||
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
||||
|
||||
from herotools.logger import logger
|
||||
from markdown_it import MarkdownIt
|
||||
from markdown_it.tree import SyntaxTreeNode
|
||||
import re
|
||||
from enum import Enum
|
||||
from herotools.texttools import name_fix
|
||||
from mdformat.renderer import MDRenderer
|
||||
from urllib.parse import urlparse
|
||||
|
||||
class ImageType(Enum):
|
||||
JPEG = 'jpeg'
|
||||
PNG = 'png'
|
||||
GIF = 'gif'
|
||||
OTHER = 'other'
|
||||
|
||||
|
||||
def get_link_page(prefix:str, linkname:str, sitename: str, name: str) -> str:
|
||||
"""
|
||||
Generates a page link based on sitename and name.
|
||||
|
||||
Args:
|
||||
sitename (str): The name of the site.
|
||||
name (str): The name of the page.
|
||||
|
||||
Returns:
|
||||
str: The generated link.
|
||||
"""
|
||||
logger.debug(f"get_link_page: {prefix[:60]:<60} {linkname} {sitename}:{name}")
|
||||
return f"[{linkname}]({prefix}/{sitename}/{name})"
|
||||
|
||||
def get_link_image(prefix:str, sitename: str, name: str, image_type: ImageType) -> str:
|
||||
"""
|
||||
Generates an image link based on the URL and image type.
|
||||
|
||||
Args:
|
||||
url (str): The original URL of the image.
|
||||
image_type (ImageType): The type of the image.
|
||||
|
||||
Returns:
|
||||
str: The generated link.
|
||||
"""
|
||||
logger.debug(f"get_link_image: {prefix[:60]:<60} {sitename}:{name}")
|
||||
return f""
|
||||
|
||||
def get_include(sitename: str, name: str) -> str:
|
||||
"""
|
||||
Generates an include directive link based on sitename and name.
|
||||
|
||||
Args:
|
||||
sitename (str): The name of the site.
|
||||
name (str): The name of the page to include.
|
||||
|
||||
Returns:
|
||||
str: The generated include directive.
|
||||
"""
|
||||
logger.debug(f"get_include: {sitename}:{name}")
|
||||
return f"include: {sitename}/{name}"
|
||||
|
||||
def replace(prefix:str, markdown: str) -> str:
|
||||
"""
|
||||
Finds all image links, markdown page links, and custom include directives in the provided markdown text
|
||||
and replaces them using the appropriate functions.
|
||||
|
||||
Args:
|
||||
markdown (str): The markdown content.
|
||||
|
||||
Returns:
|
||||
str: The modified markdown content with updated links.
|
||||
"""
|
||||
# Initialize the Markdown parser
|
||||
md = MarkdownIt()
|
||||
tokens = md.parse(markdown)
|
||||
ast = SyntaxTreeNode(tokens)
|
||||
|
||||
print(ast.pretty(indent=2, show_text=True))
|
||||
|
||||
def process_node(node: SyntaxTreeNode):
|
||||
# from IPython import embed; embed()
|
||||
|
||||
def get_new_url(url: str):
|
||||
logger.debug(f"url: {url}")
|
||||
|
||||
parsed_url = urlparse(url)
|
||||
# site_name = parsed_url.netloc
|
||||
image_path = parsed_url.path
|
||||
logger.debug(f"parsed_url: {parsed_url}")
|
||||
|
||||
# prefix = prefix.rstrip('/')
|
||||
# image_path = image_path.strip('/')
|
||||
|
||||
new_url = f"{prefix.rstrip('/')}/{image_path.strip('/')}"
|
||||
logger.debug(f"new_url: {new_url}")
|
||||
|
||||
return new_url
|
||||
|
||||
if node.type == 'image':
|
||||
# Process image link
|
||||
url = node.attrs.get('src', '')
|
||||
new_url = get_new_url(url)
|
||||
node.attrs['src'] = new_url
|
||||
|
||||
elif node.type == 'link':
|
||||
# Process markdown page link
|
||||
url = node.attrs.get('href', '')
|
||||
new_url = get_new_url(url)
|
||||
node.attrs['href'] = new_url
|
||||
|
||||
# Recursively process child nodes
|
||||
for child in node.children or []:
|
||||
process_node(child)
|
||||
|
||||
def replace_include_directives(match: re.Match) -> str:
|
||||
"""
|
||||
Replaces custom include directives with appropriate links.
|
||||
|
||||
Args:
|
||||
match (re.Match): The match object containing the found include directive.
|
||||
|
||||
Returns:
|
||||
str: The generated link for the include directive.
|
||||
"""
|
||||
url = match.group(1)
|
||||
if ':' in url:
|
||||
site_name, page = url.split(':', 1)
|
||||
page_name = page.split('/')[-1]
|
||||
else:
|
||||
site_name = ""
|
||||
page_name = url
|
||||
if not page.endswith('.md'):
|
||||
page += '.md'
|
||||
return get_include(prefix, site_name, page_name)
|
||||
|
||||
|
||||
# Process the root node
|
||||
process_node(ast)
|
||||
|
||||
# Convert the AST back to markdown
|
||||
renderer = MDRenderer()
|
||||
options = {}
|
||||
env = {}
|
||||
rendered_markdown = renderer.render(tokens, options, env)
|
||||
|
||||
# include_pattern = re.compile(r"!!include page:'(.*?)'")
|
||||
# rendered_markdown = include_pattern.sub(replace_include_directives, rendered_markdown)
|
||||
|
||||
return rendered_markdown
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
text = """
|
||||

|
||||
[Page link](sitename:some/path/to/page.md)
|
||||
!!include page:'mypage'
|
||||
!!include page:'mypage.md'
|
||||
!!include page:'mysite:mypage
|
||||
!!include page:'mysite:mypage'
|
||||
!!include page:'mysite:mypage.md'
|
||||
"""
|
||||
|
||||
print(text)
|
||||
text2=replace("http://localhost:8080/pre/", text)
|
||||
print(text2)
|
||||
|
||||
|
Reference in New Issue
Block a user