This commit is contained in:
2025-08-05 15:15:36 +02:00
parent 4bd960ed05
commit 7fabb4163a
192 changed files with 14901 additions and 0 deletions

View File

@@ -0,0 +1,9 @@
"""
MDServer package initialization.
This helps Python properly resolve the package imports.
"""
from .markdown_server import MDServer
from .factory import serve_markdown
from .process_markdown import process_markdown
__all__ = ['MDServer', 'serve_markdown', 'process_markdown']

View File

@@ -0,0 +1,19 @@
from typing import Optional, Union
from pathlib import Path
import sys
sys.path.append(str(Path(__file__).parent.parent))
from .markdown_server import MDServer # Import directly from the module file
def serve_markdown(collections_path: str) -> None:
"""
Legacy function to maintain backward compatibility.
Creates an MDServer instance and serves the markdown content.
Args:
collections_path: Path to the collections directory. Can be a string or Path object.
"""
server = MDServer(collections_path=collections_path)
server.serve_markdown()

View File

@@ -0,0 +1,55 @@
import re
from typing import TYPE_CHECKING
from mdcollections.base_types import MDPage
def js_to_python(js_str):
"""Convert JavaScript object notation to Python dictionary syntax."""
# Remove any 'option =' prefix and trailing semicolon
js_str = re.sub(r'^option\s*=\s*', '', js_str)
js_str = re.sub(r';(\s*)$', '', js_str)
# Convert JavaScript property names to Python dictionary keys
js_str = re.sub(r'(\b\w+):', r'"\1":', js_str)
# Convert single quotes to double quotes for string values
# First, replace escaped single quotes with a placeholder
js_str = js_str.replace("\\'", "___ESCAPED_QUOTE___")
# Then replace regular single quotes with double quotes
js_str = js_str.replace("'", '"')
# Finally, restore escaped single quotes
js_str = js_str.replace("___ESCAPED_QUOTE___", "\\'")
# Handle trailing commas
js_str = re.sub(r',(\s*[}\]])', r'\1', js_str)
# Handle special JavaScript values
js_str = js_str.replace('true', 'True').replace('false', 'False').replace('null', 'None')
# Remove any comments
js_str = re.sub(r'//.*?\n|/\*.*?\*/', '', js_str, flags=re.DOTALL)
return js_str.strip()
def process_markdown_echarts(page: MDPage) -> MDPage:
"""Convert ```echarts blocks to ```py sl blocks that use st_echarts."""
if not isinstance(page, MDPage):
raise TypeError("page must be a MDPage")
def replace_echarts_block(match):
echarts_code = match.group(1).strip()
python_code = js_to_python(echarts_code)
# Create the streamlit code block
streamlit_code = f"""```py sl
from streamlit_echarts import st_echarts
option = {python_code}
st_echarts(options=option, height="400px")
```"""
return streamlit_code
# Process all echarts code blocks
processed_content = re.sub(r"```echarts\n(.*?)\n```", replace_echarts_block, page.content, flags=re.DOTALL)
page.content_ = processed_content
return page

View File

@@ -0,0 +1,119 @@
import os
import re
import streamlit as st
from PIL import Image # Pillow package provides PIL
from typing import Optional, List, Tuple, TYPE_CHECKING
from mdcollections.tools import name_fix, is_image
from mdcollections.base_types import MDPage
from mdcollections.mdcollections import MDCollections
from .process_images import process_image
from .tools import debug
def link_process(link: str, page: MDPage, collections: MDCollections, is_image_link: bool, debug_enabled: bool = False) -> str:
"""Process link path and verify existence in collection."""
if not isinstance(link, str):
raise TypeError("link must be strings")
if not isinstance(collections, MDCollections):
raise TypeError("collection must be MDCollection")
if not isinstance(page, MDPage):
raise TypeError("page must be MDPage")
debug(f"\nProcessing link: {link}")
debug(f"Is image link: {is_image_link}")
# Remove './' if present
if link.startswith("./"):
link = link[2:]
debug("Removed './' prefix from link")
# Get just the filename without directories
link = os.path.basename(link)
debug(f"Extracted basename: {link}")
# Process link format
if not '__' in link:
if ":" in link:
link = link.replace(':', '__')
# Create full link if needed
if not "__" in link:
link = f"{page.collection.name}__{link}"
debug(f"Created full link: {link}")
if link.count("__")>1:
raise RuntimeError(f"cannot have 2x __ in ${link}")
collection_name, item_name = link.split('__', 1)
# Convert to lowercase and replace spaces with underscores
item_name = name_fix(item_name)
collection_name = name_fix(collection_name)
debug(f"Normalized: '{collection_name}__{item_name}'")
# Ensure .md extension for pages
if is_image_link:
try:
md_i = collections.image_get(collection_name=collection_name,image_name=item_name)
debug("Successfully verified image exists")
# process_image(md_i)
# return ""
return f"{collection_name}__{item_name}"
except ValueError:
debug(f"Error - image not found: {link}")
return f'<span style="color: red;">ERROR: Image not found: {link}</span>'
else:
if not item_name.endswith('.md'):
item_name = f"{item_name}.md"
debug(f"Added .md extension: {item_name}")
try:
collections.page_get(collection_name, item_name)
debug("Successfully verified page exists")
except ValueError:
debug(f"Error - page not found: {link}")
return f'<span style="color: red;">ERROR: Page not found: {link}</span>'
return f"?page={collection_name}__{item_name}.md"
def process_links(page: MDPage, collections: MDCollections) -> MDPage:
"""Process links in the markdown content."""
if not isinstance(page, MDPage):
raise TypeError("page must be a MDPage")
if not isinstance(collections, MDCollections):
raise TypeError("collections must be a MDCollections")
debug(f"Processing links for page: {page.name}")
debug(f"Content length before processing: {len(page.content)} characters")
link_pattern = r'(!?)\[(.*?)\]\((.*?)\)'
def replace_link(match):
is_image_link = match.group(1) == '!'
link_text = match.group(2)
link_path = match.group(3)
debug(f"Found link - Text: {link_text}, Path: {link_path}")
debug(f"Is image link: {is_image_link}")
processed_link = link_process(link_path, page, collections, is_image_link)
if "ERROR:" in processed_link:
debug(f"Link processing error: {processed_link}")
return processed_link #this forwards the error, is html in red
if is_image_link:
debug(f"Returning processed image link: ![{link_text}]({processed_link})")
return f'![{link_text}]({processed_link})'
else:
debug(f"Returning processed text link: [{link_text}]({processed_link})")
return f'[{link_text}]({processed_link})'
page.content_ = re.sub(link_pattern, replace_link, page.content)
debug(f"Content length after processing: {len(page.content)} characters")
debug("Link processing complete")
return page

View File

@@ -0,0 +1,29 @@
import re
from typing import TYPE_CHECKING
from mdcollections.base_types import MDPage
def process_markdown_mermaid(page: MDPage) -> MDPage:
"""Convert ```mermaid blocks to ```py sl blocks that use st_mermaid."""
if not isinstance(page, MDPage):
raise TypeError("page must be a MDPage")
def replace_mermaid_block(match):
mermaid_code = match.group(1).strip()
# Create the streamlit code block
# Note: The mermaid code needs to be properly escaped as a string
mermaid_code = mermaid_code.replace('"', '\\"') # Escape double quotes
streamlit_code = f'''```py sl
from streamlit_mermaid import st_mermaid
st_mermaid("""
{mermaid_code}
""")
```'''
return streamlit_code
# Process all mermaid code blocks
processed_content = re.sub(r"```mermaid\n(.*?)\n```", replace_mermaid_block, page.content, flags=re.DOTALL)
page.content_ = processed_content
return page

View File

@@ -0,0 +1,69 @@
import re
import streamlit as st
import pandas as pd
import numpy as np
from io import StringIO
import sys
from typing import TYPE_CHECKING
from mdcollections.base_types import MDPage
# if TYPE_CHECKING:
# from .markdown_server import MDServer
def execute_streamlit_code(code_block):
"""
Execute a streamlit code block and capture its output.
The code block should be valid Python code that uses streamlit.
"""
# Create string buffer to capture any print outputs
old_stdout = sys.stdout
redirected_output = StringIO()
sys.stdout = redirected_output
try:
# Execute the code block
# The code block can use st, pd, np which are already imported
exec(code_block, {
'st': st,
'pd': pd,
'np': np
})
# Get any printed output
printed_output = redirected_output.getvalue()
return True, printed_output
except Exception as e:
return False, f"Error: {str(e)}\n\nFailed code:\n{code_block}"
finally:
# Restore stdout
sys.stdout = old_stdout
def process_streamlit_blocks(page: MDPage) -> MDPage:
"""
Find and process ```py sl code blocks in markdown content.
Returns the modified content with executed streamlit code blocks replaced by their output.
"""
if not isinstance(page, MDPage):
raise TypeError("page must be a MDPage")
# if not hasattr(md_server, 'collections_manager'):
# raise TypeError("md_server must be an instance of MDServer")
def replace_code_block(match):
code = match.group(1).strip()
success, result = execute_streamlit_code(code)
if not success:
# If execution failed, return the error message
return f"```\n{result}\n```"
# If successful, return empty string - the streamlit components
# will be rendered but the code block itself won't be shown
return ""
# Process the code block
processed_content = re.sub(r"```py\s+sl\n(.*?)\n```", replace_code_block, page.content, flags=re.DOTALL)
page.content_ = processed_content
return page

View File

@@ -0,0 +1,76 @@
import re
import streamlit as st
from PIL import Image
from typing import TYPE_CHECKING, List
from mdcollections.base_types import MDPage, MDImage
# if TYPE_CHECKING:
# from .markdown_server import MDServer
def create_slider_component(images: List[str]) -> None:
"""Create a Streamlit component for image slides."""
st.markdown("""
<style>
.stImage {
cursor: pointer;
}
</style>
""", unsafe_allow_html=True)
# Initialize session state
if 'current_slide' not in st.session_state:
st.session_state.current_slide = 0
# Navigation buttons
col1, col2, col3 = st.columns([1, 4, 1])
with col1:
if st.button("⬅️ Previous"):
st.session_state.current_slide = (st.session_state.current_slide - 1) % len(images)
with col3:
if st.button("Next ➡️"):
st.session_state.current_slide = (st.session_state.current_slide + 1) % len(images)
# Display current image
current_image_spec = images[st.session_state.current_slide]
if not hasattr(st.session_state, 'md_server') or not st.session_state.md_server.collections_manager:
st.error("Collections manager not initialized")
return
try:
image_item = st.session_state.md_server.collections_manager.image_get(current_image_spec)
image = Image.open(image_item.path)
st.image(image, use_column_width=True)
except Exception as e:
st.error(f"Could not load image: {current_image_spec}. Error: {str(e)}")
# Display slide counter
st.caption(f"Slide {st.session_state.current_slide + 1} of {len(images)}")
def process_markdown_slides(page: MDPage) -> MDPage:
"""Convert ```slides blocks to ```py sl blocks that use the slider component."""
if not isinstance(page, MDPage):
raise TypeError("page must be a MDPage")
# if not hasattr(md_server, 'collections_manager'):
# raise TypeError("md_server must be an instance of MDServer")
# # Store md_server in session state for use by create_slider_component
# st.session_state.md_server = md_server
def replace_slides_block(match):
slides_content = match.group(1).strip()
image_paths = [line.strip() for line in slides_content.split('\n') if line.strip()]
# Create the streamlit code block
image_paths_str = repr(image_paths)
streamlit_code = f'''```py sl
from .macro_slides import create_slider_component
create_slider_component({image_paths_str})
```'''
return streamlit_code
# Process all slides code blocks
page.content_ = re.sub(r"```slides\n(.*?)\n```", replace_slides_block, page.content, flags=re.DOTALL)
return page

View File

@@ -0,0 +1,237 @@
from typing import Optional, Union
import os
from pathlib import Path
import traceback
import sys
import re
import pudb
try:
import streamlit as st
except ImportError:
raise ImportError("streamlit is required. Install with: pip install streamlit")
from mdcollections.base_types import MDPage, MDImage, MDCollection
from mdcollections.mdcollections import MDCollections
from .process_markdown import process_markdown, summary_load
from .tools import debug
def setup_static_dir(collections_path: str) -> None:
"""
Set up static directory for serving images.
Creates symbolic links from collections to static directory.
"""
pass
# static_dir = os.path.join(collections_path, "static")
# if not os.path.exists(static_dir):
# os.makedirs(static_dir)
# Create symlinks for each collection
# collections = os.listdir(collections_path)
# for collection in collections:
# collection_path = os.path.join(collections_path, collection)
# if os.path.isdir(collection_path) and not collection.startswith('.') and collection != 'static':
# # Create symlink from collection to static/collection
# static_link = os.path.join(static_dir, collection)
# if not os.path.exists(static_link):
# try:
# os.symlink(collection_path, static_link)
# except OSError as e:
# debug(f"Failed to create symlink from {collection_path} to {static_link}: {e}")
def process_markdown_content(content: str, base_path: str, collection_name: str) -> None:
"""
Process and display markdown content.
Args:
content: The markdown content to process
base_path: Base path for resolving relative paths
collection_name: Name of the collection
"""
st.markdown(content)
class MDServer:
def __init__(self,collections_path:str):
"""Initialize the MDServer instance."""
# Convert path to string if it's a Path object
if not isinstance(collections_path, str):
return RuntimeError("collections_path must be a string.")
st.session_state.setdefault('current_collection', None)
st.session_state.setdefault('current_page', None)
st.session_state.setdefault('show_collections_view', False)
st.session_state.setdefault('collections_manager', None)
st.session_state.setdefault('debug_mode', True)
# Get the collections manager
collections_path = os.path.expanduser(collections_path)
print(f"Initializing collections manager for: {collections_path}")
collections_manager = MDCollections(root_path=Path(collections_path))
# Set up static directory for serving images
setup_static_dir(collections_path)
# Set up page config
st.set_page_config(
page_title="Markdown Server",
page_icon="📚",
layout="wide",
initial_sidebar_state="expanded",
)
st.session_state.collections_manager = collections_manager
@property
def collections_manager(self) -> MDCollections:
"""
Property to safely access the collections manager.
Ensures collections_manager is initialized before access.
Returns:
MDCollections: The initialized collections manager
Raises:
RuntimeError: If collections_manager is not initialized
"""
if not st.session_state.get('collections_manager'):
raise RuntimeError("Collections manager not initialized. Please ensure MDServer is properly initialized.")
return st.session_state.collections_manager
@property
def collections(self) -> list:
"""
Property to safely access collections from the collections manager.
Returns:
list: List of available collections
Raises:
RuntimeError: If collections_manager is not initialized
"""
return self.collections_manager.collections
def handle_url_parameters(self) -> None:
"""
Handle URL parameters to load specific pages.
Expected format: ?page=collection_name__page_name.md
Example: ?page=banking_whitepaper__web_3_vision.md
"""
query_params = st.query_params
requested_page = query_params.get('page', None)
if not requested_page:
return
try:
# Split the page parameter using '__' as delimiter
if '__' not in requested_page:
raise ValueError(f"Invalid page format. Expected format: collection_name__page_name.md, got: {requested_page}")
collection_name, page_name = requested_page.split('__', 1)
# Get the page using collections_manager's page_get method
page = self.collections_manager.page_get(
collection_name=collection_name,
page_name=page_name
)
page = process_markdown(page, collections=self.collections_manager)
st.session_state.current_collection = page.collection
st.session_state.current_page = page
except ValueError as e:
# Handle invalid format or page not found errors
st.warning(f"Could not load page: {requested_page}. Error: {str(e)}")
def setup_sidebar(self, collections: MDCollections) -> None:
"""
Set up the sidebar with collection selection.
Args:
collections: List of available collections
"""
with st.sidebar:
# Add Debug Mode toggle that persists across reloads
debug_mode = st.toggle("Debug Mode", st.session_state.debug_mode)
if debug_mode != st.session_state.debug_mode:
st.session_state.debug_mode = debug_mode
# Store in local storage to persist across reloads
st.session_state['debug_mode'] = debug_mode
# Add Collections View action
if st.button("View All Collections"):
st.session_state.show_collections_view = True
st.session_state.current_page = None
return
collection_names = [c.name for c in self.collections]
current_idx = collection_names.index(st.session_state.current_collection.name) if st.session_state.current_collection else 0
selected_collection_name = st.selectbox(
"Choose a collection:",
collection_names,
index=current_idx,
key="collection_selector"
)
# Add sidebar content
with st.sidebar:
# Check for summary.md
collection = self.collections_manager.collection_get(selected_collection_name)
summary_page = summary_load(collection)
st.markdown(summary_page.content, unsafe_allow_html=True)
# Get the selected collection by name
st.session_state.current_collection = self.collections_manager.collection_get(selected_collection_name)
def display_content(self) -> None:
"""Display the markdown content in the main area."""
main_content = st.container()
with main_content:
try:
if st.session_state.show_collections_view:
# Read and process collections view template
collections_view_path = Path(__file__).parent / "pages" / "collections_view.md"
with open(collections_view_path, 'r') as f:
template = f.read()
# Replace placeholder with actual collections string representation
content = template.replace("{collections_str}", str(self.collections_manager))
st.markdown(content)
elif st.session_state.current_page:
st.markdown(st.session_state.current_page.content, unsafe_allow_html=True)
elif st.session_state.current_collection:
# Display collection summary and index when no specific page is selected
st.markdown("### Collection Index")
myindex_page = st.session_state.current_collection.index_page()
myindex_page = process_markdown(myindex_page, collections=self.collections_manager)
st.markdown(myindex_page.content)
else:
st.warning("Please select a collection.")
except Exception as e:
st.error(f"An error occurred: {str(e)}")
def serve_markdown(self) -> None:
"""
Serve markdown content using Streamlit.
"""
try:
if not self.collections:
st.error("No collections found.")
return
# Handle URL parameters
self.handle_url_parameters()
# Setup sidebar
self.setup_sidebar(self.collections_manager)
# Display content
self.display_content()
except Exception as e:
st.error(f"An error occurred: {str(e)}")

View File

@@ -0,0 +1,7 @@
# Collections Overview
```python
{collections_str}
```
The tree structure above is automatically generated from the current state of the collections manager.

View File

@@ -0,0 +1,89 @@
import os
import re
import streamlit as st
from PIL import Image # Pillow package provides PIL
from typing import Optional, List, Tuple, TYPE_CHECKING
from mdcollections.base_types import MDImage, MDPage
from mdcollections.mdcollections import MDCollections
from .tools import debug
def process_image(myimage: MDImage, alt_text: Optional[str] = None) -> str:
"""
Process an image and return HTML img tag for proper rendering in markdown.
Args:
myimage: The MDImage object to process
alt_text: Optional alternative text for the image
Returns:
str: HTML img tag with proper styling
"""
if not isinstance(myimage, MDImage):
raise TypeError("myimage must be a MDImage")
try:
# Verify image can be opened
Image.open(myimage.path)
# Construct static URL using collection name and relative path
static_url = f"/app/static/{myimage.collection.name}/{myimage.rel_path}"
# Create HTML img tag with proper styling
return f'<img src="{static_url}" alt="{alt_text or ""}" style="max-width: 100%; height: auto; display: inline-block; margin: 0.5em 0;">'
except Exception as e:
debug(f"Error processing image {myimage.path}: {str(e)}")
return f"Error loading image: {myimage.path}"
def process_images(page: MDPage, collections: MDCollections) -> MDPage:
"""
Process images in the markdown content while preserving text structure.
Args:
page: The MDPage object containing markdown content
collections: The MDCollections object containing image references
Returns:
MDPage: The processed page with images displayed
"""
if not isinstance(page, MDPage):
raise TypeError("page must be a MDPage")
if not isinstance(collections, MDCollections):
raise TypeError("collections must be a MDCollections")
debug(f"Processing images for page: {page.name}")
debug(f"Content length before processing: {len(page.content)} characters")
# Match markdown image syntax: ![alt text](path)
link_pattern = r'!\[(.*?)\]\((.*?)\)'
def replace_link(match):
alt_text = match.group(1)
image_path = match.group(2)
# Split path into collection and image name
try:
parts = image_path.split("__", 1)
if len(parts) != 2:
debug(f"Invalid image path format (missing __): {image_path}")
return f"Invalid image path format: {image_path}"
image_name, collection_name = parts
debug(f"Found image link, will now check - Alt text: {alt_text}, Image: '{image_name}', Collection: '{collection_name}'")
# Get the image from collections using the path
myimage = collections.image_get(image_name, collection_name)
return process_image(myimage, alt_text if alt_text else None)
except ValueError as e:
debug(f"Image not found in collection: {image_path}.\n{e}")
return f"Image not found: {image_path}"
except Exception as e:
debug(f"Error processing image {image_path}: {str(e)}")
return f"Error processing image: {image_path}"
# Process all image links while preserving surrounding text
page.content_ = re.sub(link_pattern, replace_link, page.content)
debug("Image processing complete")
return page

View File

@@ -0,0 +1,80 @@
import os
import re
import streamlit as st
from PIL import Image # Pillow package provides PIL
from typing import Optional, List, Tuple, TYPE_CHECKING
from .macro_sl import process_streamlit_blocks
from .macro_chart import process_markdown_echarts
from .macro_mermaid import process_markdown_mermaid
from .macro_slides import process_markdown_slides
from .macro_sl import process_streamlit_blocks
from .macro_links import process_links
from .process_images import process_images
from mdcollections.tools import name_fix, is_image
from mdcollections.base_types import MDPage, MDCollection
from mdcollections.mdcollections import MDCollections
from .tools import debug,rewrite_summary_links
def summary_load(collection:MDCollection) -> MDPage:
"""Load the summary.md file if it exists, otherwise it creates an index"""
if not isinstance(collection, MDCollection):
raise TypeError("collection must be a MDCollection")
try:
mypage = collection.page_get("summary.md")
mypage.content_=rewrite_summary_links(mypage.content_) #need to rewrite the first part of path as collection, might change in future
return mypage
except ValueError:
return collection.index_page()
def process_markdown(page: MDPage, collections: MDCollections) -> MDPage:
"""Process markdown content and handle images, links, and streamlit code blocks.
Args:
page: The MDPage object to process
collections: The MDCollections object containing all collections
"""
if not isinstance(page, MDPage):
raise TypeError("page must be a MDPage")
if not isinstance(collections, MDCollections):
raise TypeError("collections must be a MDCollections")
debug(f"Processing markdown for page: {page.name} in collection: {page.collection.name}\nInitial content length: {len(page.content)} characters")
if page.processed:
RuntimeError(f"double processing of page {page.name}")
# Process special blocks with page and md_server arguments
#debug("Processing echarts blocks...")
page = process_markdown_echarts(page)
#debug("Processing mermaid blocks...")
page = process_markdown_mermaid(page)
#debug("Processing slides blocks...")
page = process_markdown_slides(page)
#debug("Processing streamlit blocks...")
page = process_streamlit_blocks(page)
#debug("Processing links...")
# Pass the debug flag to process_links
page = process_links(page=page, collections=collections)
page = process_images(page=page, collections=collections )
# Process remaining content
if page.content.strip():
debug(f"Rendering final markdown content (length: {len(page.content)} characters)")
st.markdown(page.content, unsafe_allow_html=True)
else:
debug("No content to render after processing")
return page
def parse_page_parameter(page_param: str) -> Tuple[Optional[str], str]:
"""Parse the page parameter to extract collection and file name."""
if '__' in page_param:
collection, filename = page_param.split('__', 1)
return collection, filename
return None, page_param

View File

@@ -0,0 +1,5 @@
streamlit>=1.24.0
pandas>=1.5.0
numpy>=1.24.0
ipython>=8.0.0
Pillow>=10.0.0

View File

@@ -0,0 +1,43 @@
import re
import streamlit as st
def strip_ansi_codes(text):
"""Remove ANSI escape codes from text."""
ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
return ansi_escape.sub('', text)
def debug(message: str):
"""Display debug messages in a compact code block format.
Args:
message: The debug message to display
"""
debug_enabled=st.session_state.debug_mode
if debug_enabled:
#st.code(message, language="text")
print(strip_ansi_codes(message))
def rewrite_summary_links(text:str) -> str:
import re
def replace_first_slash(match):
# Get the matched text
link = match.group(1)
# Replace the first slash with double underscore
new_link = link.replace('/', '__', 1)
return f'({new_link})'
# Use a regular expression to find links in the format (path/to/resource)
pattern = r'\(([^)]+)\)'
# Process each line and apply the substitution
rewritten_lines = []
for line in text.splitlines():
rewritten_line = re.sub(pattern, replace_first_slash, line)
rewritten_lines.append(rewritten_line)
# Join the rewritten lines back together
return '\n'.join(rewritten_lines)