...

2025-08-05 15:15:36 +02:00
parent 4bd960ed05
commit 7fabb4163a
192 changed files with 14901 additions and 0 deletions
--- a/_archive/lib/web/mdcollections/init.py
+++ b/_archive/lib/web/mdcollections/init.py
@@ -0,0 +1,13 @@
+from .base_types import MDItem, MDPage, MDImage, MDCollection
+from .mdcollections import MDCollections
+from .scanner import scan_directory
+
+# Re-export all public types and functions
+__all__ = [
+    'MDItem',
+    'MDPage',
+    'MDImage',
+    'MDCollection',
+    'MDCollections',
+    'scan_directory'
+]
--- a/_archive/lib/web/mdcollections/base_types.py
+++ b/_archive/lib/web/mdcollections/base_types.py
@@ -0,0 +1,177 @@
+from pathlib import Path
+from typing import List, Dict
+from dataclasses import dataclass
+from .tools import name_fix
+
+import os
+
+class MDItem:
+    """Base class for items in a collection."""
+    def __init__(self, collection: "MDCollection", rel_path: Path):
+        if not isinstance(rel_path, Path):
+            raise TypeError("rel_path must be a Path instance")
+        self.collection = collection
+        self.rel_path = rel_path
+        self.content_ = ""
+        self.processed = bool
+
+    def __str__(self) -> str:
+        return f"{self.__class__.__name__}: {self.rel_path}"
+
+    @property
+    def full_path(self) -> Path:
+        """Returns the full path to the item."""
+        return self.collection.path / self.rel_path
+
+    @property
+    def path(self) -> str:
+        """Returns the fixed name of the item without extension."""
+        return str(self.full_path.resolve())
+
+    @property
+    def name(self) -> str:
+        """Returns the fixed name of the item."""
+        return name_fix(os.path.basename(self.rel_path))
+
+class MDPage(MDItem):
+    """Represents a markdown file in the collection."""
+    pass
+
+    @property
+    def content(self) -> str:
+        if not self.content_:
+            if os.path.exists(self.path):
+                try:
+                    with open(self.path, 'r', encoding='utf-8') as f:
+                        self.content_ = f.read()
+                except OSError as e:
+                    raise Exception(f"Error reading file {self.path}: {e}")
+            else:
+                raise FileNotFoundError(f"Cannot find markdown file: {self.path}")
+        return self.content_
+    
+        
+
+class MDImage(MDItem):
+    """Represents an image file in the collection."""
+    pass
+
+
+
+@dataclass
+class MDCollection:
+    """Represents a collection of markdown files and images."""
+    path: Path
+    name: str
+    items: List[MDItem]
+
+    def page_get(self, name: str) -> MDPage:
+        """
+        Get a markdown page by name.
+        
+        Args:
+            name: Name of the page to find (will be normalized)
+            
+        Returns:
+            MDPage object
+            
+        Raises:
+            ValueError: If page not found
+        """
+        # Remove .md extension if present
+        if "__" in name:
+            raise ValueError("there should be no __ in name of page_get")
+        
+        if name.endswith('.md'):
+            name=name[:-3]
+        normalized_name = name_fix(name)
+        for item in self.items:
+            if isinstance(item, MDPage):
+                item_name = name_fix(item.rel_path.stem)
+                if item_name == normalized_name:
+                    return item
+        raise ValueError(f"Page not found: {name}")
+
+    def image_get(self, name: str) -> MDImage:
+        """
+        Get an image by name.
+        
+        Args:
+            name: Name of the image to find (will be normalized)
+            
+        Returns:
+            MDImage object
+            
+        Raises:
+            ValueError: If image not found
+        """
+        normalized_name = name_fix(name)
+        for item in self.items:
+            if isinstance(item, MDImage):
+                # For images, compare with extension
+                item_name = name_fix(os.path.basename(item.rel_path))
+                if item_name == normalized_name:
+                    return item
+        raise ValueError(f"Image not found: {name}")
+
+    def __str__(self) -> str:
+        """Returns a tree-like string representation of the collection."""
+        result = [f"Collection: {self.name} ({self.path})"]
+        
+        # Group items by type
+        pages = [item for item in self.items if isinstance(item, MDPage)]
+        images = [item for item in self.items if isinstance(item, MDImage)]
+        
+        # Add pages
+        if pages:
+            result.append("  Pages:")
+            for page in sorted(pages, key=lambda x: str(x.rel_path)):
+                result.append(f"    └─ {page.name}")
+        
+        # Add images
+        if images:
+            result.append("  Images:")
+            for image in sorted(images, key=lambda x: str(x.rel_path)):
+                result.append(f"    └─ {image.name}")
+        
+        return "\n".join(result)
+
+    def index_page(self) -> MDPage:
+        """Generate a dynamic index of all markdown files in the collection."""
+        # Get all markdown pages and sort them by relative path
+        pages = sorted(
+            [item for item in self.items if isinstance(item, MDPage)],
+            key=lambda x: str(x.rel_path)
+        )
+        
+        # Group pages by directory
+        page_groups: Dict[str, List[MDPage]] = {}
+        for page in pages:
+            dir_path = str(page.rel_path.parent)
+            if dir_path == '.':
+                dir_path = 'Root'
+            if dir_path not in page_groups:
+                page_groups[dir_path] = []
+            page_groups[dir_path].append(page)
+        
+        # Generate markdown content
+        content = ["# Collection Index\n"]
+        
+        for dir_path in sorted(page_groups.keys()):
+            # Add directory header
+            if dir_path != 'Root':
+                content.append(f"\n## {dir_path}\n")
+            elif len(page_groups) > 1:  # Only show Root header if there are other directories
+                content.append("\n## Root Directory\n")
+            
+            # Add pages in current directory
+            for page in sorted(page_groups[dir_path], key=lambda x: x.name):
+                # Create display name by removing extension and formatting
+                display_name = page.rel_path.stem.replace('_', ' ').replace('-', ' ').title()
+                # Create link using relative path
+                link_path = str(page.rel_path)
+                content.append(f'- [{display_name}]({self.name}__{link_path})')
+        
+        mdp=MDPage(self,Path("index.md"))
+        mdp.content_ = "\n".join(content)
+        return mdp
--- a/_archive/lib/web/mdcollections/factory.py
+++ b/_archive/lib/web/mdcollections/factory.py
@@ -0,0 +1,25 @@
+import os
+from pathlib import Path
+from typing import Optional
+from .mdcollections import MDCollections
+
+def create_collections(path: Optional[str] = None) -> MDCollections:
+    """
+    Factory function to create and initialize an MDCollections instance.
+    
+    Args:
+        path: Optional path to scan for collections. Defaults to "data/markdown"
+        
+    Returns:
+        Initialized MDCollections instance
+        
+    Raises:
+        ValueError: If path is None
+    """
+    if path is None:
+        raise ValueError("Path cannot be None")
+        
+    # Expand ~ to home directory if present in path
+    expanded_path = os.path.expanduser(path)
+    return MDCollections(root_path=Path(expanded_path))
+
--- a/_archive/lib/web/mdcollections/mdcollections.py
+++ b/_archive/lib/web/mdcollections/mdcollections.py
@@ -0,0 +1,112 @@
+from pathlib import Path
+from typing import List, Optional
+from .base_types import MDCollection, MDPage, MDImage, MDItem
+from .scanner import scan_directory
+from .tools import name_fix
+
+class MDCollections:
+    """Manages multiple markdown collections."""
+    def __init__(self, root_path: Path):
+        """
+        Initialize collections manager.
+        
+        Args:
+            root_path: Root directory containing collections
+        """
+        self.root_path = root_path
+        self.collections: List[MDCollection] = []
+        self._scan_collections()
+
+    def _scan_collections(self):
+        """Scan root directory for collections."""
+        if not self.root_path.exists():
+            raise ValueError(f"Root path does not exist: {self.root_path}")
+        
+        # Scan immediate subdirectories only
+        for path in sorted(self.root_path.iterdir()):
+            if path.is_dir():
+                # Skip directories starting with _ or containing 'archive' in lowercase
+                if path.name.startswith('_') or 'archive' in path.name.lower():
+                    continue
+
+                items = scan_directory(path)
+                if items:  # Only create collection if directory contains markdown files
+                    collection = MDCollection(
+                        path=path,
+                        name=path.name,
+                        items=sorted(items, key=lambda x: x.name)
+                    )
+                    self.collections.append(collection)
+        
+        # Sort collections by name
+        self.collections.sort(key=lambda x: x.name)
+
+    def collection_get(self, name: str) -> MDCollection:
+        """
+        Get a collection by name.
+        
+        Args:
+            name: Name of the collection to find
+            
+        Returns:
+            MDCollection object
+            
+        Raises:
+            ValueError: If collection not found
+        """
+        for collection in self.collections:
+            if collection.name == name:
+                return collection
+        raise ValueError(f"Collection not found: {name}")
+
+    def page_get(self, collection_name: str, page_name: str) -> MDPage:
+        """
+        Get a page from a specific collection.
+        
+        Args:
+            collection_name: Name of the collection
+            page_name: Name of the page
+            
+        Returns:
+            MDPage object
+            
+        Raises:
+            ValueError: If collection or page not found
+        """
+        page_name=name_fix(page_name)
+        collection_name=name_fix(collection_name)
+        
+        collection = self.collection_get(collection_name)
+        return collection.page_get(page_name)
+
+    def image_get(self, collection_name: str, image_name: str) -> MDImage:
+        """
+        Get an image from a specific collection.
+        
+        Args:
+            collection_name: Name of the collection
+            image_name: Name of the image
+            
+        Returns:
+            MDImage object
+            
+        Raises:
+            ValueError: If collection or image not found
+        """
+        # Handle image name that might contain collection prefix
+        if "__" in image_name:
+            image_name, collection_name = image_name.split("__", 1)
+        
+        image_name = name_fix(image_name)
+        collection_name = name_fix(collection_name)
+        
+        collection = self.collection_get(collection_name)
+        print(f"  -- image get: '{collection_name}' '{image_name}'")
+        return collection.image_get(image_name)
+
+    def __str__(self) -> str:
+        """Returns a string representation of all collections."""
+        if not self.collections:
+            return "No collections found"
+        
+        return "\n\n".join(str(collection) for collection in self.collections)
--- a/_archive/lib/web/mdcollections/scanner.py
+++ b/_archive/lib/web/mdcollections/scanner.py
@@ -0,0 +1,61 @@
+from pathlib import Path
+from typing import List, Sequence
+from .base_types import MDItem, MDPage, MDImage, MDCollection
+
+def scan_directory(path: Path) -> Sequence[MDItem]:
+    """
+    Scan a directory for markdown files and images.
+    
+    Args:
+        path: Directory to scan
+        
+    Returns:
+        List of MDItem objects (MDPage or MDImage)
+    """
+    if not path.exists():
+        raise ValueError(f"Path does not exist: {path}")
+    if not path.is_dir():
+        raise ValueError(f"Path is not a directory: {path}")
+    
+    items: List[MDItem] = []
+    
+    # Create a temporary collection for the items
+    temp_collection = MDCollection(
+        path=path,
+        name=path.name,
+        items=[]  # Will be populated later
+    )
+    
+    # First scan for markdown files
+    for md_path in path.rglob("*.md"):
+        # Skip files in hidden directories (starting with .)
+        if any(part.startswith('.') for part in md_path.parts):
+            continue
+        
+        # Get path relative to collection root
+        rel_path = md_path.relative_to(path)
+        
+        # Create MDPage
+        page = MDPage(temp_collection, rel_path)
+        items.append(page)
+    
+    # Then scan for images
+    image_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.svg'}
+    for img_path in path.rglob("*"):
+        # Skip files in hidden directories (starting with .)
+        if any(part.startswith('.') for part in img_path.parts):
+            continue
+        
+        # Check if file has image extension
+        if img_path.suffix.lower() in image_extensions:
+            # Get path relative to collection root
+            rel_path = img_path.relative_to(path)
+            
+            # Create MDImage
+            image = MDImage(temp_collection, rel_path)
+            items.append(image)
+    
+    # Update the temporary collection's items
+    temp_collection.items = items
+    
+    return items
--- a/_archive/lib/web/mdcollections/tools.py
+++ b/_archive/lib/web/mdcollections/tools.py
@@ -0,0 +1,99 @@
+from pathlib import Path
+from typing import Union
+import os
+import re
+
+def should_skip_path(path: Union[str, Path]) -> bool:
+    """
+    Check if a path should be skipped based on its basename.
+    Skips paths that start with . or _
+    
+    Args:
+        path: Path to check (can be file or directory)
+        
+    Returns:
+        True if path should be skipped, False otherwise
+    """
+    path = Path(path)
+    return path.name.startswith(('.', '_'))
+
+
+def strip_ansi_codes(text):
+    """Remove ANSI escape codes from text."""
+    ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
+    return ansi_escape.sub('', text)
+
+
+
+def name_fix(path: str) -> str:
+    """
+    Normalize only the final part (stem) of a path by:
+    - Converting spaces to underscores
+    - Making lowercase
+    Preserves the directory structure and only modifies the final name.
+    
+    Args:
+        path: Path to normalize
+        
+    Returns:
+        Path with normalized stem but unchanged structure
+    """
+    if not isinstance(path, str):
+        raise TypeError("Input must be a string")
+    
+    if '/' in path:
+        raise ValueError("Path should not contain forward slashes - use for filenames only")
+    
+    path = strip_ansi_codes(path).strip()
+    name, ext = os.path.splitext(path)
+    
+    if not is_image(path) and ext.lower() == '.md':
+        ext = ""
+    
+    # Convert to lowercase and replace spaces and other characters
+    name = name.lower().replace(' ', '_').replace('-', '_').replace(',', '')
+    name = name.replace('__', '_').rstrip(' ')
+
+    # Only strip trailing underscores for image files
+    if is_image(name):
+        name = name.rstrip('_')
+    
+    return f"{name}{ext}"
+
+
+def path_fix(path: Union[str, Path]) -> Path:
+    """
+    Normalize only the final part (stem) of a path by:
+    - Converting spaces to underscores
+    - Making lowercase
+    Preserves the directory structure and only modifies the final name.
+    
+    Args:
+        path: Path to normalize
+        
+    Returns:
+        Path with normalized stem but unchanged structure
+    """
+    if not isinstance(path, (str, Path)):
+        path = str(path)
+    path = Path(path)
+    # Keep directory structure unchanged, only normalize the filename
+    parent = path.parent
+    filename = name_fix(path.name)
+    # Recombine with original parent path
+    return parent / filename
+
+
+def is_image(basename):
+    # Define a set of valid image extensions
+    image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.svg']
+    
+    # Get the file extension from the basename
+    _, extension = os.path.splitext(basename)
+    extension = extension.strip()
+    
+    #print(f" ----- {basename} '{extension.lower()}' {extension.lower() in image_extensions}")
+    
+    # Check if the extension is in the set of image extensions
+    return extension.lower() in image_extensions
+