This commit is contained in:
2025-08-05 15:15:36 +02:00
parent 4bd960ed05
commit 7fabb4163a
192 changed files with 14901 additions and 0 deletions

View File

@@ -0,0 +1,13 @@
from .base_types import MDItem, MDPage, MDImage, MDCollection
from .mdcollections import MDCollections
from .scanner import scan_directory
# Re-export all public types and functions
__all__ = [
'MDItem',
'MDPage',
'MDImage',
'MDCollection',
'MDCollections',
'scan_directory'
]

View File

@@ -0,0 +1,177 @@
from pathlib import Path
from typing import List, Dict
from dataclasses import dataclass
from .tools import name_fix
import os
class MDItem:
"""Base class for items in a collection."""
def __init__(self, collection: "MDCollection", rel_path: Path):
if not isinstance(rel_path, Path):
raise TypeError("rel_path must be a Path instance")
self.collection = collection
self.rel_path = rel_path
self.content_ = ""
self.processed = bool
def __str__(self) -> str:
return f"{self.__class__.__name__}: {self.rel_path}"
@property
def full_path(self) -> Path:
"""Returns the full path to the item."""
return self.collection.path / self.rel_path
@property
def path(self) -> str:
"""Returns the fixed name of the item without extension."""
return str(self.full_path.resolve())
@property
def name(self) -> str:
"""Returns the fixed name of the item."""
return name_fix(os.path.basename(self.rel_path))
class MDPage(MDItem):
"""Represents a markdown file in the collection."""
pass
@property
def content(self) -> str:
if not self.content_:
if os.path.exists(self.path):
try:
with open(self.path, 'r', encoding='utf-8') as f:
self.content_ = f.read()
except OSError as e:
raise Exception(f"Error reading file {self.path}: {e}")
else:
raise FileNotFoundError(f"Cannot find markdown file: {self.path}")
return self.content_
class MDImage(MDItem):
"""Represents an image file in the collection."""
pass
@dataclass
class MDCollection:
"""Represents a collection of markdown files and images."""
path: Path
name: str
items: List[MDItem]
def page_get(self, name: str) -> MDPage:
"""
Get a markdown page by name.
Args:
name: Name of the page to find (will be normalized)
Returns:
MDPage object
Raises:
ValueError: If page not found
"""
# Remove .md extension if present
if "__" in name:
raise ValueError("there should be no __ in name of page_get")
if name.endswith('.md'):
name=name[:-3]
normalized_name = name_fix(name)
for item in self.items:
if isinstance(item, MDPage):
item_name = name_fix(item.rel_path.stem)
if item_name == normalized_name:
return item
raise ValueError(f"Page not found: {name}")
def image_get(self, name: str) -> MDImage:
"""
Get an image by name.
Args:
name: Name of the image to find (will be normalized)
Returns:
MDImage object
Raises:
ValueError: If image not found
"""
normalized_name = name_fix(name)
for item in self.items:
if isinstance(item, MDImage):
# For images, compare with extension
item_name = name_fix(os.path.basename(item.rel_path))
if item_name == normalized_name:
return item
raise ValueError(f"Image not found: {name}")
def __str__(self) -> str:
"""Returns a tree-like string representation of the collection."""
result = [f"Collection: {self.name} ({self.path})"]
# Group items by type
pages = [item for item in self.items if isinstance(item, MDPage)]
images = [item for item in self.items if isinstance(item, MDImage)]
# Add pages
if pages:
result.append(" Pages:")
for page in sorted(pages, key=lambda x: str(x.rel_path)):
result.append(f" └─ {page.name}")
# Add images
if images:
result.append(" Images:")
for image in sorted(images, key=lambda x: str(x.rel_path)):
result.append(f" └─ {image.name}")
return "\n".join(result)
def index_page(self) -> MDPage:
"""Generate a dynamic index of all markdown files in the collection."""
# Get all markdown pages and sort them by relative path
pages = sorted(
[item for item in self.items if isinstance(item, MDPage)],
key=lambda x: str(x.rel_path)
)
# Group pages by directory
page_groups: Dict[str, List[MDPage]] = {}
for page in pages:
dir_path = str(page.rel_path.parent)
if dir_path == '.':
dir_path = 'Root'
if dir_path not in page_groups:
page_groups[dir_path] = []
page_groups[dir_path].append(page)
# Generate markdown content
content = ["# Collection Index\n"]
for dir_path in sorted(page_groups.keys()):
# Add directory header
if dir_path != 'Root':
content.append(f"\n## {dir_path}\n")
elif len(page_groups) > 1: # Only show Root header if there are other directories
content.append("\n## Root Directory\n")
# Add pages in current directory
for page in sorted(page_groups[dir_path], key=lambda x: x.name):
# Create display name by removing extension and formatting
display_name = page.rel_path.stem.replace('_', ' ').replace('-', ' ').title()
# Create link using relative path
link_path = str(page.rel_path)
content.append(f'- [{display_name}]({self.name}__{link_path})')
mdp=MDPage(self,Path("index.md"))
mdp.content_ = "\n".join(content)
return mdp

View File

@@ -0,0 +1,25 @@
import os
from pathlib import Path
from typing import Optional
from .mdcollections import MDCollections
def create_collections(path: Optional[str] = None) -> MDCollections:
"""
Factory function to create and initialize an MDCollections instance.
Args:
path: Optional path to scan for collections. Defaults to "data/markdown"
Returns:
Initialized MDCollections instance
Raises:
ValueError: If path is None
"""
if path is None:
raise ValueError("Path cannot be None")
# Expand ~ to home directory if present in path
expanded_path = os.path.expanduser(path)
return MDCollections(root_path=Path(expanded_path))

View File

@@ -0,0 +1,112 @@
from pathlib import Path
from typing import List, Optional
from .base_types import MDCollection, MDPage, MDImage, MDItem
from .scanner import scan_directory
from .tools import name_fix
class MDCollections:
"""Manages multiple markdown collections."""
def __init__(self, root_path: Path):
"""
Initialize collections manager.
Args:
root_path: Root directory containing collections
"""
self.root_path = root_path
self.collections: List[MDCollection] = []
self._scan_collections()
def _scan_collections(self):
"""Scan root directory for collections."""
if not self.root_path.exists():
raise ValueError(f"Root path does not exist: {self.root_path}")
# Scan immediate subdirectories only
for path in sorted(self.root_path.iterdir()):
if path.is_dir():
# Skip directories starting with _ or containing 'archive' in lowercase
if path.name.startswith('_') or 'archive' in path.name.lower():
continue
items = scan_directory(path)
if items: # Only create collection if directory contains markdown files
collection = MDCollection(
path=path,
name=path.name,
items=sorted(items, key=lambda x: x.name)
)
self.collections.append(collection)
# Sort collections by name
self.collections.sort(key=lambda x: x.name)
def collection_get(self, name: str) -> MDCollection:
"""
Get a collection by name.
Args:
name: Name of the collection to find
Returns:
MDCollection object
Raises:
ValueError: If collection not found
"""
for collection in self.collections:
if collection.name == name:
return collection
raise ValueError(f"Collection not found: {name}")
def page_get(self, collection_name: str, page_name: str) -> MDPage:
"""
Get a page from a specific collection.
Args:
collection_name: Name of the collection
page_name: Name of the page
Returns:
MDPage object
Raises:
ValueError: If collection or page not found
"""
page_name=name_fix(page_name)
collection_name=name_fix(collection_name)
collection = self.collection_get(collection_name)
return collection.page_get(page_name)
def image_get(self, collection_name: str, image_name: str) -> MDImage:
"""
Get an image from a specific collection.
Args:
collection_name: Name of the collection
image_name: Name of the image
Returns:
MDImage object
Raises:
ValueError: If collection or image not found
"""
# Handle image name that might contain collection prefix
if "__" in image_name:
image_name, collection_name = image_name.split("__", 1)
image_name = name_fix(image_name)
collection_name = name_fix(collection_name)
collection = self.collection_get(collection_name)
print(f" -- image get: '{collection_name}' '{image_name}'")
return collection.image_get(image_name)
def __str__(self) -> str:
"""Returns a string representation of all collections."""
if not self.collections:
return "No collections found"
return "\n\n".join(str(collection) for collection in self.collections)

View File

@@ -0,0 +1,61 @@
from pathlib import Path
from typing import List, Sequence
from .base_types import MDItem, MDPage, MDImage, MDCollection
def scan_directory(path: Path) -> Sequence[MDItem]:
"""
Scan a directory for markdown files and images.
Args:
path: Directory to scan
Returns:
List of MDItem objects (MDPage or MDImage)
"""
if not path.exists():
raise ValueError(f"Path does not exist: {path}")
if not path.is_dir():
raise ValueError(f"Path is not a directory: {path}")
items: List[MDItem] = []
# Create a temporary collection for the items
temp_collection = MDCollection(
path=path,
name=path.name,
items=[] # Will be populated later
)
# First scan for markdown files
for md_path in path.rglob("*.md"):
# Skip files in hidden directories (starting with .)
if any(part.startswith('.') for part in md_path.parts):
continue
# Get path relative to collection root
rel_path = md_path.relative_to(path)
# Create MDPage
page = MDPage(temp_collection, rel_path)
items.append(page)
# Then scan for images
image_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.svg'}
for img_path in path.rglob("*"):
# Skip files in hidden directories (starting with .)
if any(part.startswith('.') for part in img_path.parts):
continue
# Check if file has image extension
if img_path.suffix.lower() in image_extensions:
# Get path relative to collection root
rel_path = img_path.relative_to(path)
# Create MDImage
image = MDImage(temp_collection, rel_path)
items.append(image)
# Update the temporary collection's items
temp_collection.items = items
return items

View File

@@ -0,0 +1,99 @@
from pathlib import Path
from typing import Union
import os
import re
def should_skip_path(path: Union[str, Path]) -> bool:
"""
Check if a path should be skipped based on its basename.
Skips paths that start with . or _
Args:
path: Path to check (can be file or directory)
Returns:
True if path should be skipped, False otherwise
"""
path = Path(path)
return path.name.startswith(('.', '_'))
def strip_ansi_codes(text):
"""Remove ANSI escape codes from text."""
ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
return ansi_escape.sub('', text)
def name_fix(path: str) -> str:
"""
Normalize only the final part (stem) of a path by:
- Converting spaces to underscores
- Making lowercase
Preserves the directory structure and only modifies the final name.
Args:
path: Path to normalize
Returns:
Path with normalized stem but unchanged structure
"""
if not isinstance(path, str):
raise TypeError("Input must be a string")
if '/' in path:
raise ValueError("Path should not contain forward slashes - use for filenames only")
path = strip_ansi_codes(path).strip()
name, ext = os.path.splitext(path)
if not is_image(path) and ext.lower() == '.md':
ext = ""
# Convert to lowercase and replace spaces and other characters
name = name.lower().replace(' ', '_').replace('-', '_').replace(',', '')
name = name.replace('__', '_').rstrip(' ')
# Only strip trailing underscores for image files
if is_image(name):
name = name.rstrip('_')
return f"{name}{ext}"
def path_fix(path: Union[str, Path]) -> Path:
"""
Normalize only the final part (stem) of a path by:
- Converting spaces to underscores
- Making lowercase
Preserves the directory structure and only modifies the final name.
Args:
path: Path to normalize
Returns:
Path with normalized stem but unchanged structure
"""
if not isinstance(path, (str, Path)):
path = str(path)
path = Path(path)
# Keep directory structure unchanged, only normalize the filename
parent = path.parent
filename = name_fix(path.name)
# Recombine with original parent path
return parent / filename
def is_image(basename):
# Define a set of valid image extensions
image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.svg']
# Get the file extension from the basename
_, extension = os.path.splitext(basename)
extension = extension.strip()
#print(f" ----- {basename} '{extension.lower()}' {extension.lower() in image_extensions}")
# Check if the extension is in the set of image extensions
return extension.lower() in image_extensions