This commit is contained in:
2025-08-05 15:15:36 +02:00
parent 4bd960ed05
commit 7fabb4163a
192 changed files with 14901 additions and 0 deletions

View File

@@ -0,0 +1,13 @@
from .base_types import MDItem, MDPage, MDImage, MDCollection
from .mdcollections import MDCollections
from .scanner import scan_directory
# Re-export all public types and functions
__all__ = [
'MDItem',
'MDPage',
'MDImage',
'MDCollection',
'MDCollections',
'scan_directory'
]

View File

@@ -0,0 +1,177 @@
from pathlib import Path
from typing import List, Dict
from dataclasses import dataclass
from .tools import name_fix
import os
class MDItem:
"""Base class for items in a collection."""
def __init__(self, collection: "MDCollection", rel_path: Path):
if not isinstance(rel_path, Path):
raise TypeError("rel_path must be a Path instance")
self.collection = collection
self.rel_path = rel_path
self.content_ = ""
self.processed = bool
def __str__(self) -> str:
return f"{self.__class__.__name__}: {self.rel_path}"
@property
def full_path(self) -> Path:
"""Returns the full path to the item."""
return self.collection.path / self.rel_path
@property
def path(self) -> str:
"""Returns the fixed name of the item without extension."""
return str(self.full_path.resolve())
@property
def name(self) -> str:
"""Returns the fixed name of the item."""
return name_fix(os.path.basename(self.rel_path))
class MDPage(MDItem):
"""Represents a markdown file in the collection."""
pass
@property
def content(self) -> str:
if not self.content_:
if os.path.exists(self.path):
try:
with open(self.path, 'r', encoding='utf-8') as f:
self.content_ = f.read()
except OSError as e:
raise Exception(f"Error reading file {self.path}: {e}")
else:
raise FileNotFoundError(f"Cannot find markdown file: {self.path}")
return self.content_
class MDImage(MDItem):
"""Represents an image file in the collection."""
pass
@dataclass
class MDCollection:
"""Represents a collection of markdown files and images."""
path: Path
name: str
items: List[MDItem]
def page_get(self, name: str) -> MDPage:
"""
Get a markdown page by name.
Args:
name: Name of the page to find (will be normalized)
Returns:
MDPage object
Raises:
ValueError: If page not found
"""
# Remove .md extension if present
if "__" in name:
raise ValueError("there should be no __ in name of page_get")
if name.endswith('.md'):
name=name[:-3]
normalized_name = name_fix(name)
for item in self.items:
if isinstance(item, MDPage):
item_name = name_fix(item.rel_path.stem)
if item_name == normalized_name:
return item
raise ValueError(f"Page not found: {name}")
def image_get(self, name: str) -> MDImage:
"""
Get an image by name.
Args:
name: Name of the image to find (will be normalized)
Returns:
MDImage object
Raises:
ValueError: If image not found
"""
normalized_name = name_fix(name)
for item in self.items:
if isinstance(item, MDImage):
# For images, compare with extension
item_name = name_fix(os.path.basename(item.rel_path))
if item_name == normalized_name:
return item
raise ValueError(f"Image not found: {name}")
def __str__(self) -> str:
"""Returns a tree-like string representation of the collection."""
result = [f"Collection: {self.name} ({self.path})"]
# Group items by type
pages = [item for item in self.items if isinstance(item, MDPage)]
images = [item for item in self.items if isinstance(item, MDImage)]
# Add pages
if pages:
result.append(" Pages:")
for page in sorted(pages, key=lambda x: str(x.rel_path)):
result.append(f" └─ {page.name}")
# Add images
if images:
result.append(" Images:")
for image in sorted(images, key=lambda x: str(x.rel_path)):
result.append(f" └─ {image.name}")
return "\n".join(result)
def index_page(self) -> MDPage:
"""Generate a dynamic index of all markdown files in the collection."""
# Get all markdown pages and sort them by relative path
pages = sorted(
[item for item in self.items if isinstance(item, MDPage)],
key=lambda x: str(x.rel_path)
)
# Group pages by directory
page_groups: Dict[str, List[MDPage]] = {}
for page in pages:
dir_path = str(page.rel_path.parent)
if dir_path == '.':
dir_path = 'Root'
if dir_path not in page_groups:
page_groups[dir_path] = []
page_groups[dir_path].append(page)
# Generate markdown content
content = ["# Collection Index\n"]
for dir_path in sorted(page_groups.keys()):
# Add directory header
if dir_path != 'Root':
content.append(f"\n## {dir_path}\n")
elif len(page_groups) > 1: # Only show Root header if there are other directories
content.append("\n## Root Directory\n")
# Add pages in current directory
for page in sorted(page_groups[dir_path], key=lambda x: x.name):
# Create display name by removing extension and formatting
display_name = page.rel_path.stem.replace('_', ' ').replace('-', ' ').title()
# Create link using relative path
link_path = str(page.rel_path)
content.append(f'- [{display_name}]({self.name}__{link_path})')
mdp=MDPage(self,Path("index.md"))
mdp.content_ = "\n".join(content)
return mdp

View File

@@ -0,0 +1,25 @@
import os
from pathlib import Path
from typing import Optional
from .mdcollections import MDCollections
def create_collections(path: Optional[str] = None) -> MDCollections:
"""
Factory function to create and initialize an MDCollections instance.
Args:
path: Optional path to scan for collections. Defaults to "data/markdown"
Returns:
Initialized MDCollections instance
Raises:
ValueError: If path is None
"""
if path is None:
raise ValueError("Path cannot be None")
# Expand ~ to home directory if present in path
expanded_path = os.path.expanduser(path)
return MDCollections(root_path=Path(expanded_path))

View File

@@ -0,0 +1,112 @@
from pathlib import Path
from typing import List, Optional
from .base_types import MDCollection, MDPage, MDImage, MDItem
from .scanner import scan_directory
from .tools import name_fix
class MDCollections:
"""Manages multiple markdown collections."""
def __init__(self, root_path: Path):
"""
Initialize collections manager.
Args:
root_path: Root directory containing collections
"""
self.root_path = root_path
self.collections: List[MDCollection] = []
self._scan_collections()
def _scan_collections(self):
"""Scan root directory for collections."""
if not self.root_path.exists():
raise ValueError(f"Root path does not exist: {self.root_path}")
# Scan immediate subdirectories only
for path in sorted(self.root_path.iterdir()):
if path.is_dir():
# Skip directories starting with _ or containing 'archive' in lowercase
if path.name.startswith('_') or 'archive' in path.name.lower():
continue
items = scan_directory(path)
if items: # Only create collection if directory contains markdown files
collection = MDCollection(
path=path,
name=path.name,
items=sorted(items, key=lambda x: x.name)
)
self.collections.append(collection)
# Sort collections by name
self.collections.sort(key=lambda x: x.name)
def collection_get(self, name: str) -> MDCollection:
"""
Get a collection by name.
Args:
name: Name of the collection to find
Returns:
MDCollection object
Raises:
ValueError: If collection not found
"""
for collection in self.collections:
if collection.name == name:
return collection
raise ValueError(f"Collection not found: {name}")
def page_get(self, collection_name: str, page_name: str) -> MDPage:
"""
Get a page from a specific collection.
Args:
collection_name: Name of the collection
page_name: Name of the page
Returns:
MDPage object
Raises:
ValueError: If collection or page not found
"""
page_name=name_fix(page_name)
collection_name=name_fix(collection_name)
collection = self.collection_get(collection_name)
return collection.page_get(page_name)
def image_get(self, collection_name: str, image_name: str) -> MDImage:
"""
Get an image from a specific collection.
Args:
collection_name: Name of the collection
image_name: Name of the image
Returns:
MDImage object
Raises:
ValueError: If collection or image not found
"""
# Handle image name that might contain collection prefix
if "__" in image_name:
image_name, collection_name = image_name.split("__", 1)
image_name = name_fix(image_name)
collection_name = name_fix(collection_name)
collection = self.collection_get(collection_name)
print(f" -- image get: '{collection_name}' '{image_name}'")
return collection.image_get(image_name)
def __str__(self) -> str:
"""Returns a string representation of all collections."""
if not self.collections:
return "No collections found"
return "\n\n".join(str(collection) for collection in self.collections)

View File

@@ -0,0 +1,61 @@
from pathlib import Path
from typing import List, Sequence
from .base_types import MDItem, MDPage, MDImage, MDCollection
def scan_directory(path: Path) -> Sequence[MDItem]:
"""
Scan a directory for markdown files and images.
Args:
path: Directory to scan
Returns:
List of MDItem objects (MDPage or MDImage)
"""
if not path.exists():
raise ValueError(f"Path does not exist: {path}")
if not path.is_dir():
raise ValueError(f"Path is not a directory: {path}")
items: List[MDItem] = []
# Create a temporary collection for the items
temp_collection = MDCollection(
path=path,
name=path.name,
items=[] # Will be populated later
)
# First scan for markdown files
for md_path in path.rglob("*.md"):
# Skip files in hidden directories (starting with .)
if any(part.startswith('.') for part in md_path.parts):
continue
# Get path relative to collection root
rel_path = md_path.relative_to(path)
# Create MDPage
page = MDPage(temp_collection, rel_path)
items.append(page)
# Then scan for images
image_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.svg'}
for img_path in path.rglob("*"):
# Skip files in hidden directories (starting with .)
if any(part.startswith('.') for part in img_path.parts):
continue
# Check if file has image extension
if img_path.suffix.lower() in image_extensions:
# Get path relative to collection root
rel_path = img_path.relative_to(path)
# Create MDImage
image = MDImage(temp_collection, rel_path)
items.append(image)
# Update the temporary collection's items
temp_collection.items = items
return items

View File

@@ -0,0 +1,99 @@
from pathlib import Path
from typing import Union
import os
import re
def should_skip_path(path: Union[str, Path]) -> bool:
"""
Check if a path should be skipped based on its basename.
Skips paths that start with . or _
Args:
path: Path to check (can be file or directory)
Returns:
True if path should be skipped, False otherwise
"""
path = Path(path)
return path.name.startswith(('.', '_'))
def strip_ansi_codes(text):
"""Remove ANSI escape codes from text."""
ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
return ansi_escape.sub('', text)
def name_fix(path: str) -> str:
"""
Normalize only the final part (stem) of a path by:
- Converting spaces to underscores
- Making lowercase
Preserves the directory structure and only modifies the final name.
Args:
path: Path to normalize
Returns:
Path with normalized stem but unchanged structure
"""
if not isinstance(path, str):
raise TypeError("Input must be a string")
if '/' in path:
raise ValueError("Path should not contain forward slashes - use for filenames only")
path = strip_ansi_codes(path).strip()
name, ext = os.path.splitext(path)
if not is_image(path) and ext.lower() == '.md':
ext = ""
# Convert to lowercase and replace spaces and other characters
name = name.lower().replace(' ', '_').replace('-', '_').replace(',', '')
name = name.replace('__', '_').rstrip(' ')
# Only strip trailing underscores for image files
if is_image(name):
name = name.rstrip('_')
return f"{name}{ext}"
def path_fix(path: Union[str, Path]) -> Path:
"""
Normalize only the final part (stem) of a path by:
- Converting spaces to underscores
- Making lowercase
Preserves the directory structure and only modifies the final name.
Args:
path: Path to normalize
Returns:
Path with normalized stem but unchanged structure
"""
if not isinstance(path, (str, Path)):
path = str(path)
path = Path(path)
# Keep directory structure unchanged, only normalize the filename
parent = path.parent
filename = name_fix(path.name)
# Recombine with original parent path
return parent / filename
def is_image(basename):
# Define a set of valid image extensions
image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.svg']
# Get the file extension from the basename
_, extension = os.path.splitext(basename)
extension = extension.strip()
#print(f" ----- {basename} '{extension.lower()}' {extension.lower() in image_extensions}")
# Check if the extension is in the set of image extensions
return extension.lower() in image_extensions

View File

@@ -0,0 +1,9 @@
"""
MDServer package initialization.
This helps Python properly resolve the package imports.
"""
from .markdown_server import MDServer
from .factory import serve_markdown
from .process_markdown import process_markdown
__all__ = ['MDServer', 'serve_markdown', 'process_markdown']

View File

@@ -0,0 +1,19 @@
from typing import Optional, Union
from pathlib import Path
import sys
sys.path.append(str(Path(__file__).parent.parent))
from .markdown_server import MDServer # Import directly from the module file
def serve_markdown(collections_path: str) -> None:
"""
Legacy function to maintain backward compatibility.
Creates an MDServer instance and serves the markdown content.
Args:
collections_path: Path to the collections directory. Can be a string or Path object.
"""
server = MDServer(collections_path=collections_path)
server.serve_markdown()

View File

@@ -0,0 +1,55 @@
import re
from typing import TYPE_CHECKING
from mdcollections.base_types import MDPage
def js_to_python(js_str):
"""Convert JavaScript object notation to Python dictionary syntax."""
# Remove any 'option =' prefix and trailing semicolon
js_str = re.sub(r'^option\s*=\s*', '', js_str)
js_str = re.sub(r';(\s*)$', '', js_str)
# Convert JavaScript property names to Python dictionary keys
js_str = re.sub(r'(\b\w+):', r'"\1":', js_str)
# Convert single quotes to double quotes for string values
# First, replace escaped single quotes with a placeholder
js_str = js_str.replace("\\'", "___ESCAPED_QUOTE___")
# Then replace regular single quotes with double quotes
js_str = js_str.replace("'", '"')
# Finally, restore escaped single quotes
js_str = js_str.replace("___ESCAPED_QUOTE___", "\\'")
# Handle trailing commas
js_str = re.sub(r',(\s*[}\]])', r'\1', js_str)
# Handle special JavaScript values
js_str = js_str.replace('true', 'True').replace('false', 'False').replace('null', 'None')
# Remove any comments
js_str = re.sub(r'//.*?\n|/\*.*?\*/', '', js_str, flags=re.DOTALL)
return js_str.strip()
def process_markdown_echarts(page: MDPage) -> MDPage:
"""Convert ```echarts blocks to ```py sl blocks that use st_echarts."""
if not isinstance(page, MDPage):
raise TypeError("page must be a MDPage")
def replace_echarts_block(match):
echarts_code = match.group(1).strip()
python_code = js_to_python(echarts_code)
# Create the streamlit code block
streamlit_code = f"""```py sl
from streamlit_echarts import st_echarts
option = {python_code}
st_echarts(options=option, height="400px")
```"""
return streamlit_code
# Process all echarts code blocks
processed_content = re.sub(r"```echarts\n(.*?)\n```", replace_echarts_block, page.content, flags=re.DOTALL)
page.content_ = processed_content
return page

View File

@@ -0,0 +1,119 @@
import os
import re
import streamlit as st
from PIL import Image # Pillow package provides PIL
from typing import Optional, List, Tuple, TYPE_CHECKING
from mdcollections.tools import name_fix, is_image
from mdcollections.base_types import MDPage
from mdcollections.mdcollections import MDCollections
from .process_images import process_image
from .tools import debug
def link_process(link: str, page: MDPage, collections: MDCollections, is_image_link: bool, debug_enabled: bool = False) -> str:
"""Process link path and verify existence in collection."""
if not isinstance(link, str):
raise TypeError("link must be strings")
if not isinstance(collections, MDCollections):
raise TypeError("collection must be MDCollection")
if not isinstance(page, MDPage):
raise TypeError("page must be MDPage")
debug(f"\nProcessing link: {link}")
debug(f"Is image link: {is_image_link}")
# Remove './' if present
if link.startswith("./"):
link = link[2:]
debug("Removed './' prefix from link")
# Get just the filename without directories
link = os.path.basename(link)
debug(f"Extracted basename: {link}")
# Process link format
if not '__' in link:
if ":" in link:
link = link.replace(':', '__')
# Create full link if needed
if not "__" in link:
link = f"{page.collection.name}__{link}"
debug(f"Created full link: {link}")
if link.count("__")>1:
raise RuntimeError(f"cannot have 2x __ in ${link}")
collection_name, item_name = link.split('__', 1)
# Convert to lowercase and replace spaces with underscores
item_name = name_fix(item_name)
collection_name = name_fix(collection_name)
debug(f"Normalized: '{collection_name}__{item_name}'")
# Ensure .md extension for pages
if is_image_link:
try:
md_i = collections.image_get(collection_name=collection_name,image_name=item_name)
debug("Successfully verified image exists")
# process_image(md_i)
# return ""
return f"{collection_name}__{item_name}"
except ValueError:
debug(f"Error - image not found: {link}")
return f'<span style="color: red;">ERROR: Image not found: {link}</span>'
else:
if not item_name.endswith('.md'):
item_name = f"{item_name}.md"
debug(f"Added .md extension: {item_name}")
try:
collections.page_get(collection_name, item_name)
debug("Successfully verified page exists")
except ValueError:
debug(f"Error - page not found: {link}")
return f'<span style="color: red;">ERROR: Page not found: {link}</span>'
return f"?page={collection_name}__{item_name}.md"
def process_links(page: MDPage, collections: MDCollections) -> MDPage:
"""Process links in the markdown content."""
if not isinstance(page, MDPage):
raise TypeError("page must be a MDPage")
if not isinstance(collections, MDCollections):
raise TypeError("collections must be a MDCollections")
debug(f"Processing links for page: {page.name}")
debug(f"Content length before processing: {len(page.content)} characters")
link_pattern = r'(!?)\[(.*?)\]\((.*?)\)'
def replace_link(match):
is_image_link = match.group(1) == '!'
link_text = match.group(2)
link_path = match.group(3)
debug(f"Found link - Text: {link_text}, Path: {link_path}")
debug(f"Is image link: {is_image_link}")
processed_link = link_process(link_path, page, collections, is_image_link)
if "ERROR:" in processed_link:
debug(f"Link processing error: {processed_link}")
return processed_link #this forwards the error, is html in red
if is_image_link:
debug(f"Returning processed image link: ![{link_text}]({processed_link})")
return f'![{link_text}]({processed_link})'
else:
debug(f"Returning processed text link: [{link_text}]({processed_link})")
return f'[{link_text}]({processed_link})'
page.content_ = re.sub(link_pattern, replace_link, page.content)
debug(f"Content length after processing: {len(page.content)} characters")
debug("Link processing complete")
return page

View File

@@ -0,0 +1,29 @@
import re
from typing import TYPE_CHECKING
from mdcollections.base_types import MDPage
def process_markdown_mermaid(page: MDPage) -> MDPage:
"""Convert ```mermaid blocks to ```py sl blocks that use st_mermaid."""
if not isinstance(page, MDPage):
raise TypeError("page must be a MDPage")
def replace_mermaid_block(match):
mermaid_code = match.group(1).strip()
# Create the streamlit code block
# Note: The mermaid code needs to be properly escaped as a string
mermaid_code = mermaid_code.replace('"', '\\"') # Escape double quotes
streamlit_code = f'''```py sl
from streamlit_mermaid import st_mermaid
st_mermaid("""
{mermaid_code}
""")
```'''
return streamlit_code
# Process all mermaid code blocks
processed_content = re.sub(r"```mermaid\n(.*?)\n```", replace_mermaid_block, page.content, flags=re.DOTALL)
page.content_ = processed_content
return page

View File

@@ -0,0 +1,69 @@
import re
import streamlit as st
import pandas as pd
import numpy as np
from io import StringIO
import sys
from typing import TYPE_CHECKING
from mdcollections.base_types import MDPage
# if TYPE_CHECKING:
# from .markdown_server import MDServer
def execute_streamlit_code(code_block):
"""
Execute a streamlit code block and capture its output.
The code block should be valid Python code that uses streamlit.
"""
# Create string buffer to capture any print outputs
old_stdout = sys.stdout
redirected_output = StringIO()
sys.stdout = redirected_output
try:
# Execute the code block
# The code block can use st, pd, np which are already imported
exec(code_block, {
'st': st,
'pd': pd,
'np': np
})
# Get any printed output
printed_output = redirected_output.getvalue()
return True, printed_output
except Exception as e:
return False, f"Error: {str(e)}\n\nFailed code:\n{code_block}"
finally:
# Restore stdout
sys.stdout = old_stdout
def process_streamlit_blocks(page: MDPage) -> MDPage:
"""
Find and process ```py sl code blocks in markdown content.
Returns the modified content with executed streamlit code blocks replaced by their output.
"""
if not isinstance(page, MDPage):
raise TypeError("page must be a MDPage")
# if not hasattr(md_server, 'collections_manager'):
# raise TypeError("md_server must be an instance of MDServer")
def replace_code_block(match):
code = match.group(1).strip()
success, result = execute_streamlit_code(code)
if not success:
# If execution failed, return the error message
return f"```\n{result}\n```"
# If successful, return empty string - the streamlit components
# will be rendered but the code block itself won't be shown
return ""
# Process the code block
processed_content = re.sub(r"```py\s+sl\n(.*?)\n```", replace_code_block, page.content, flags=re.DOTALL)
page.content_ = processed_content
return page

View File

@@ -0,0 +1,76 @@
import re
import streamlit as st
from PIL import Image
from typing import TYPE_CHECKING, List
from mdcollections.base_types import MDPage, MDImage
# if TYPE_CHECKING:
# from .markdown_server import MDServer
def create_slider_component(images: List[str]) -> None:
"""Create a Streamlit component for image slides."""
st.markdown("""
<style>
.stImage {
cursor: pointer;
}
</style>
""", unsafe_allow_html=True)
# Initialize session state
if 'current_slide' not in st.session_state:
st.session_state.current_slide = 0
# Navigation buttons
col1, col2, col3 = st.columns([1, 4, 1])
with col1:
if st.button("⬅️ Previous"):
st.session_state.current_slide = (st.session_state.current_slide - 1) % len(images)
with col3:
if st.button("Next ➡️"):
st.session_state.current_slide = (st.session_state.current_slide + 1) % len(images)
# Display current image
current_image_spec = images[st.session_state.current_slide]
if not hasattr(st.session_state, 'md_server') or not st.session_state.md_server.collections_manager:
st.error("Collections manager not initialized")
return
try:
image_item = st.session_state.md_server.collections_manager.image_get(current_image_spec)
image = Image.open(image_item.path)
st.image(image, use_column_width=True)
except Exception as e:
st.error(f"Could not load image: {current_image_spec}. Error: {str(e)}")
# Display slide counter
st.caption(f"Slide {st.session_state.current_slide + 1} of {len(images)}")
def process_markdown_slides(page: MDPage) -> MDPage:
"""Convert ```slides blocks to ```py sl blocks that use the slider component."""
if not isinstance(page, MDPage):
raise TypeError("page must be a MDPage")
# if not hasattr(md_server, 'collections_manager'):
# raise TypeError("md_server must be an instance of MDServer")
# # Store md_server in session state for use by create_slider_component
# st.session_state.md_server = md_server
def replace_slides_block(match):
slides_content = match.group(1).strip()
image_paths = [line.strip() for line in slides_content.split('\n') if line.strip()]
# Create the streamlit code block
image_paths_str = repr(image_paths)
streamlit_code = f'''```py sl
from .macro_slides import create_slider_component
create_slider_component({image_paths_str})
```'''
return streamlit_code
# Process all slides code blocks
page.content_ = re.sub(r"```slides\n(.*?)\n```", replace_slides_block, page.content, flags=re.DOTALL)
return page

View File

@@ -0,0 +1,237 @@
from typing import Optional, Union
import os
from pathlib import Path
import traceback
import sys
import re
import pudb
try:
import streamlit as st
except ImportError:
raise ImportError("streamlit is required. Install with: pip install streamlit")
from mdcollections.base_types import MDPage, MDImage, MDCollection
from mdcollections.mdcollections import MDCollections
from .process_markdown import process_markdown, summary_load
from .tools import debug
def setup_static_dir(collections_path: str) -> None:
"""
Set up static directory for serving images.
Creates symbolic links from collections to static directory.
"""
pass
# static_dir = os.path.join(collections_path, "static")
# if not os.path.exists(static_dir):
# os.makedirs(static_dir)
# Create symlinks for each collection
# collections = os.listdir(collections_path)
# for collection in collections:
# collection_path = os.path.join(collections_path, collection)
# if os.path.isdir(collection_path) and not collection.startswith('.') and collection != 'static':
# # Create symlink from collection to static/collection
# static_link = os.path.join(static_dir, collection)
# if not os.path.exists(static_link):
# try:
# os.symlink(collection_path, static_link)
# except OSError as e:
# debug(f"Failed to create symlink from {collection_path} to {static_link}: {e}")
def process_markdown_content(content: str, base_path: str, collection_name: str) -> None:
"""
Process and display markdown content.
Args:
content: The markdown content to process
base_path: Base path for resolving relative paths
collection_name: Name of the collection
"""
st.markdown(content)
class MDServer:
def __init__(self,collections_path:str):
"""Initialize the MDServer instance."""
# Convert path to string if it's a Path object
if not isinstance(collections_path, str):
return RuntimeError("collections_path must be a string.")
st.session_state.setdefault('current_collection', None)
st.session_state.setdefault('current_page', None)
st.session_state.setdefault('show_collections_view', False)
st.session_state.setdefault('collections_manager', None)
st.session_state.setdefault('debug_mode', True)
# Get the collections manager
collections_path = os.path.expanduser(collections_path)
print(f"Initializing collections manager for: {collections_path}")
collections_manager = MDCollections(root_path=Path(collections_path))
# Set up static directory for serving images
setup_static_dir(collections_path)
# Set up page config
st.set_page_config(
page_title="Markdown Server",
page_icon="📚",
layout="wide",
initial_sidebar_state="expanded",
)
st.session_state.collections_manager = collections_manager
@property
def collections_manager(self) -> MDCollections:
"""
Property to safely access the collections manager.
Ensures collections_manager is initialized before access.
Returns:
MDCollections: The initialized collections manager
Raises:
RuntimeError: If collections_manager is not initialized
"""
if not st.session_state.get('collections_manager'):
raise RuntimeError("Collections manager not initialized. Please ensure MDServer is properly initialized.")
return st.session_state.collections_manager
@property
def collections(self) -> list:
"""
Property to safely access collections from the collections manager.
Returns:
list: List of available collections
Raises:
RuntimeError: If collections_manager is not initialized
"""
return self.collections_manager.collections
def handle_url_parameters(self) -> None:
"""
Handle URL parameters to load specific pages.
Expected format: ?page=collection_name__page_name.md
Example: ?page=banking_whitepaper__web_3_vision.md
"""
query_params = st.query_params
requested_page = query_params.get('page', None)
if not requested_page:
return
try:
# Split the page parameter using '__' as delimiter
if '__' not in requested_page:
raise ValueError(f"Invalid page format. Expected format: collection_name__page_name.md, got: {requested_page}")
collection_name, page_name = requested_page.split('__', 1)
# Get the page using collections_manager's page_get method
page = self.collections_manager.page_get(
collection_name=collection_name,
page_name=page_name
)
page = process_markdown(page, collections=self.collections_manager)
st.session_state.current_collection = page.collection
st.session_state.current_page = page
except ValueError as e:
# Handle invalid format or page not found errors
st.warning(f"Could not load page: {requested_page}. Error: {str(e)}")
def setup_sidebar(self, collections: MDCollections) -> None:
"""
Set up the sidebar with collection selection.
Args:
collections: List of available collections
"""
with st.sidebar:
# Add Debug Mode toggle that persists across reloads
debug_mode = st.toggle("Debug Mode", st.session_state.debug_mode)
if debug_mode != st.session_state.debug_mode:
st.session_state.debug_mode = debug_mode
# Store in local storage to persist across reloads
st.session_state['debug_mode'] = debug_mode
# Add Collections View action
if st.button("View All Collections"):
st.session_state.show_collections_view = True
st.session_state.current_page = None
return
collection_names = [c.name for c in self.collections]
current_idx = collection_names.index(st.session_state.current_collection.name) if st.session_state.current_collection else 0
selected_collection_name = st.selectbox(
"Choose a collection:",
collection_names,
index=current_idx,
key="collection_selector"
)
# Add sidebar content
with st.sidebar:
# Check for summary.md
collection = self.collections_manager.collection_get(selected_collection_name)
summary_page = summary_load(collection)
st.markdown(summary_page.content, unsafe_allow_html=True)
# Get the selected collection by name
st.session_state.current_collection = self.collections_manager.collection_get(selected_collection_name)
def display_content(self) -> None:
"""Display the markdown content in the main area."""
main_content = st.container()
with main_content:
try:
if st.session_state.show_collections_view:
# Read and process collections view template
collections_view_path = Path(__file__).parent / "pages" / "collections_view.md"
with open(collections_view_path, 'r') as f:
template = f.read()
# Replace placeholder with actual collections string representation
content = template.replace("{collections_str}", str(self.collections_manager))
st.markdown(content)
elif st.session_state.current_page:
st.markdown(st.session_state.current_page.content, unsafe_allow_html=True)
elif st.session_state.current_collection:
# Display collection summary and index when no specific page is selected
st.markdown("### Collection Index")
myindex_page = st.session_state.current_collection.index_page()
myindex_page = process_markdown(myindex_page, collections=self.collections_manager)
st.markdown(myindex_page.content)
else:
st.warning("Please select a collection.")
except Exception as e:
st.error(f"An error occurred: {str(e)}")
def serve_markdown(self) -> None:
"""
Serve markdown content using Streamlit.
"""
try:
if not self.collections:
st.error("No collections found.")
return
# Handle URL parameters
self.handle_url_parameters()
# Setup sidebar
self.setup_sidebar(self.collections_manager)
# Display content
self.display_content()
except Exception as e:
st.error(f"An error occurred: {str(e)}")

View File

@@ -0,0 +1,7 @@
# Collections Overview
```python
{collections_str}
```
The tree structure above is automatically generated from the current state of the collections manager.

View File

@@ -0,0 +1,89 @@
import os
import re
import streamlit as st
from PIL import Image # Pillow package provides PIL
from typing import Optional, List, Tuple, TYPE_CHECKING
from mdcollections.base_types import MDImage, MDPage
from mdcollections.mdcollections import MDCollections
from .tools import debug
def process_image(myimage: MDImage, alt_text: Optional[str] = None) -> str:
"""
Process an image and return HTML img tag for proper rendering in markdown.
Args:
myimage: The MDImage object to process
alt_text: Optional alternative text for the image
Returns:
str: HTML img tag with proper styling
"""
if not isinstance(myimage, MDImage):
raise TypeError("myimage must be a MDImage")
try:
# Verify image can be opened
Image.open(myimage.path)
# Construct static URL using collection name and relative path
static_url = f"/app/static/{myimage.collection.name}/{myimage.rel_path}"
# Create HTML img tag with proper styling
return f'<img src="{static_url}" alt="{alt_text or ""}" style="max-width: 100%; height: auto; display: inline-block; margin: 0.5em 0;">'
except Exception as e:
debug(f"Error processing image {myimage.path}: {str(e)}")
return f"Error loading image: {myimage.path}"
def process_images(page: MDPage, collections: MDCollections) -> MDPage:
"""
Process images in the markdown content while preserving text structure.
Args:
page: The MDPage object containing markdown content
collections: The MDCollections object containing image references
Returns:
MDPage: The processed page with images displayed
"""
if not isinstance(page, MDPage):
raise TypeError("page must be a MDPage")
if not isinstance(collections, MDCollections):
raise TypeError("collections must be a MDCollections")
debug(f"Processing images for page: {page.name}")
debug(f"Content length before processing: {len(page.content)} characters")
# Match markdown image syntax: ![alt text](path)
link_pattern = r'!\[(.*?)\]\((.*?)\)'
def replace_link(match):
alt_text = match.group(1)
image_path = match.group(2)
# Split path into collection and image name
try:
parts = image_path.split("__", 1)
if len(parts) != 2:
debug(f"Invalid image path format (missing __): {image_path}")
return f"Invalid image path format: {image_path}"
image_name, collection_name = parts
debug(f"Found image link, will now check - Alt text: {alt_text}, Image: '{image_name}', Collection: '{collection_name}'")
# Get the image from collections using the path
myimage = collections.image_get(image_name, collection_name)
return process_image(myimage, alt_text if alt_text else None)
except ValueError as e:
debug(f"Image not found in collection: {image_path}.\n{e}")
return f"Image not found: {image_path}"
except Exception as e:
debug(f"Error processing image {image_path}: {str(e)}")
return f"Error processing image: {image_path}"
# Process all image links while preserving surrounding text
page.content_ = re.sub(link_pattern, replace_link, page.content)
debug("Image processing complete")
return page

View File

@@ -0,0 +1,80 @@
import os
import re
import streamlit as st
from PIL import Image # Pillow package provides PIL
from typing import Optional, List, Tuple, TYPE_CHECKING
from .macro_sl import process_streamlit_blocks
from .macro_chart import process_markdown_echarts
from .macro_mermaid import process_markdown_mermaid
from .macro_slides import process_markdown_slides
from .macro_sl import process_streamlit_blocks
from .macro_links import process_links
from .process_images import process_images
from mdcollections.tools import name_fix, is_image
from mdcollections.base_types import MDPage, MDCollection
from mdcollections.mdcollections import MDCollections
from .tools import debug,rewrite_summary_links
def summary_load(collection:MDCollection) -> MDPage:
"""Load the summary.md file if it exists, otherwise it creates an index"""
if not isinstance(collection, MDCollection):
raise TypeError("collection must be a MDCollection")
try:
mypage = collection.page_get("summary.md")
mypage.content_=rewrite_summary_links(mypage.content_) #need to rewrite the first part of path as collection, might change in future
return mypage
except ValueError:
return collection.index_page()
def process_markdown(page: MDPage, collections: MDCollections) -> MDPage:
"""Process markdown content and handle images, links, and streamlit code blocks.
Args:
page: The MDPage object to process
collections: The MDCollections object containing all collections
"""
if not isinstance(page, MDPage):
raise TypeError("page must be a MDPage")
if not isinstance(collections, MDCollections):
raise TypeError("collections must be a MDCollections")
debug(f"Processing markdown for page: {page.name} in collection: {page.collection.name}\nInitial content length: {len(page.content)} characters")
if page.processed:
RuntimeError(f"double processing of page {page.name}")
# Process special blocks with page and md_server arguments
#debug("Processing echarts blocks...")
page = process_markdown_echarts(page)
#debug("Processing mermaid blocks...")
page = process_markdown_mermaid(page)
#debug("Processing slides blocks...")
page = process_markdown_slides(page)
#debug("Processing streamlit blocks...")
page = process_streamlit_blocks(page)
#debug("Processing links...")
# Pass the debug flag to process_links
page = process_links(page=page, collections=collections)
page = process_images(page=page, collections=collections )
# Process remaining content
if page.content.strip():
debug(f"Rendering final markdown content (length: {len(page.content)} characters)")
st.markdown(page.content, unsafe_allow_html=True)
else:
debug("No content to render after processing")
return page
def parse_page_parameter(page_param: str) -> Tuple[Optional[str], str]:
"""Parse the page parameter to extract collection and file name."""
if '__' in page_param:
collection, filename = page_param.split('__', 1)
return collection, filename
return None, page_param

View File

@@ -0,0 +1,5 @@
streamlit>=1.24.0
pandas>=1.5.0
numpy>=1.24.0
ipython>=8.0.0
Pillow>=10.0.0

View File

@@ -0,0 +1,43 @@
import re
import streamlit as st
def strip_ansi_codes(text):
"""Remove ANSI escape codes from text."""
ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
return ansi_escape.sub('', text)
def debug(message: str):
"""Display debug messages in a compact code block format.
Args:
message: The debug message to display
"""
debug_enabled=st.session_state.debug_mode
if debug_enabled:
#st.code(message, language="text")
print(strip_ansi_codes(message))
def rewrite_summary_links(text:str) -> str:
import re
def replace_first_slash(match):
# Get the matched text
link = match.group(1)
# Replace the first slash with double underscore
new_link = link.replace('/', '__', 1)
return f'({new_link})'
# Use a regular expression to find links in the format (path/to/resource)
pattern = r'\(([^)]+)\)'
# Process each line and apply the substitution
rewritten_lines = []
for line in text.splitlines():
rewritten_line = re.sub(pattern, replace_first_slash, line)
rewritten_lines.append(rewritten_line)
# Join the rewritten lines back together
return '\n'.join(rewritten_lines)