...
This commit is contained in:
13
_archive/lib/web/mdcollections/__init__.py
Normal file
13
_archive/lib/web/mdcollections/__init__.py
Normal file
@@ -0,0 +1,13 @@
|
||||
from .base_types import MDItem, MDPage, MDImage, MDCollection
|
||||
from .mdcollections import MDCollections
|
||||
from .scanner import scan_directory
|
||||
|
||||
# Re-export all public types and functions
|
||||
__all__ = [
|
||||
'MDItem',
|
||||
'MDPage',
|
||||
'MDImage',
|
||||
'MDCollection',
|
||||
'MDCollections',
|
||||
'scan_directory'
|
||||
]
|
177
_archive/lib/web/mdcollections/base_types.py
Normal file
177
_archive/lib/web/mdcollections/base_types.py
Normal file
@@ -0,0 +1,177 @@
|
||||
from pathlib import Path
|
||||
from typing import List, Dict
|
||||
from dataclasses import dataclass
|
||||
from .tools import name_fix
|
||||
|
||||
import os
|
||||
|
||||
class MDItem:
|
||||
"""Base class for items in a collection."""
|
||||
def __init__(self, collection: "MDCollection", rel_path: Path):
|
||||
if not isinstance(rel_path, Path):
|
||||
raise TypeError("rel_path must be a Path instance")
|
||||
self.collection = collection
|
||||
self.rel_path = rel_path
|
||||
self.content_ = ""
|
||||
self.processed = bool
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"{self.__class__.__name__}: {self.rel_path}"
|
||||
|
||||
@property
|
||||
def full_path(self) -> Path:
|
||||
"""Returns the full path to the item."""
|
||||
return self.collection.path / self.rel_path
|
||||
|
||||
@property
|
||||
def path(self) -> str:
|
||||
"""Returns the fixed name of the item without extension."""
|
||||
return str(self.full_path.resolve())
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
"""Returns the fixed name of the item."""
|
||||
return name_fix(os.path.basename(self.rel_path))
|
||||
|
||||
class MDPage(MDItem):
|
||||
"""Represents a markdown file in the collection."""
|
||||
pass
|
||||
|
||||
@property
|
||||
def content(self) -> str:
|
||||
if not self.content_:
|
||||
if os.path.exists(self.path):
|
||||
try:
|
||||
with open(self.path, 'r', encoding='utf-8') as f:
|
||||
self.content_ = f.read()
|
||||
except OSError as e:
|
||||
raise Exception(f"Error reading file {self.path}: {e}")
|
||||
else:
|
||||
raise FileNotFoundError(f"Cannot find markdown file: {self.path}")
|
||||
return self.content_
|
||||
|
||||
|
||||
|
||||
class MDImage(MDItem):
|
||||
"""Represents an image file in the collection."""
|
||||
pass
|
||||
|
||||
|
||||
|
||||
@dataclass
|
||||
class MDCollection:
|
||||
"""Represents a collection of markdown files and images."""
|
||||
path: Path
|
||||
name: str
|
||||
items: List[MDItem]
|
||||
|
||||
def page_get(self, name: str) -> MDPage:
|
||||
"""
|
||||
Get a markdown page by name.
|
||||
|
||||
Args:
|
||||
name: Name of the page to find (will be normalized)
|
||||
|
||||
Returns:
|
||||
MDPage object
|
||||
|
||||
Raises:
|
||||
ValueError: If page not found
|
||||
"""
|
||||
# Remove .md extension if present
|
||||
if "__" in name:
|
||||
raise ValueError("there should be no __ in name of page_get")
|
||||
|
||||
if name.endswith('.md'):
|
||||
name=name[:-3]
|
||||
normalized_name = name_fix(name)
|
||||
for item in self.items:
|
||||
if isinstance(item, MDPage):
|
||||
item_name = name_fix(item.rel_path.stem)
|
||||
if item_name == normalized_name:
|
||||
return item
|
||||
raise ValueError(f"Page not found: {name}")
|
||||
|
||||
def image_get(self, name: str) -> MDImage:
|
||||
"""
|
||||
Get an image by name.
|
||||
|
||||
Args:
|
||||
name: Name of the image to find (will be normalized)
|
||||
|
||||
Returns:
|
||||
MDImage object
|
||||
|
||||
Raises:
|
||||
ValueError: If image not found
|
||||
"""
|
||||
normalized_name = name_fix(name)
|
||||
for item in self.items:
|
||||
if isinstance(item, MDImage):
|
||||
# For images, compare with extension
|
||||
item_name = name_fix(os.path.basename(item.rel_path))
|
||||
if item_name == normalized_name:
|
||||
return item
|
||||
raise ValueError(f"Image not found: {name}")
|
||||
|
||||
def __str__(self) -> str:
|
||||
"""Returns a tree-like string representation of the collection."""
|
||||
result = [f"Collection: {self.name} ({self.path})"]
|
||||
|
||||
# Group items by type
|
||||
pages = [item for item in self.items if isinstance(item, MDPage)]
|
||||
images = [item for item in self.items if isinstance(item, MDImage)]
|
||||
|
||||
# Add pages
|
||||
if pages:
|
||||
result.append(" Pages:")
|
||||
for page in sorted(pages, key=lambda x: str(x.rel_path)):
|
||||
result.append(f" └─ {page.name}")
|
||||
|
||||
# Add images
|
||||
if images:
|
||||
result.append(" Images:")
|
||||
for image in sorted(images, key=lambda x: str(x.rel_path)):
|
||||
result.append(f" └─ {image.name}")
|
||||
|
||||
return "\n".join(result)
|
||||
|
||||
def index_page(self) -> MDPage:
|
||||
"""Generate a dynamic index of all markdown files in the collection."""
|
||||
# Get all markdown pages and sort them by relative path
|
||||
pages = sorted(
|
||||
[item for item in self.items if isinstance(item, MDPage)],
|
||||
key=lambda x: str(x.rel_path)
|
||||
)
|
||||
|
||||
# Group pages by directory
|
||||
page_groups: Dict[str, List[MDPage]] = {}
|
||||
for page in pages:
|
||||
dir_path = str(page.rel_path.parent)
|
||||
if dir_path == '.':
|
||||
dir_path = 'Root'
|
||||
if dir_path not in page_groups:
|
||||
page_groups[dir_path] = []
|
||||
page_groups[dir_path].append(page)
|
||||
|
||||
# Generate markdown content
|
||||
content = ["# Collection Index\n"]
|
||||
|
||||
for dir_path in sorted(page_groups.keys()):
|
||||
# Add directory header
|
||||
if dir_path != 'Root':
|
||||
content.append(f"\n## {dir_path}\n")
|
||||
elif len(page_groups) > 1: # Only show Root header if there are other directories
|
||||
content.append("\n## Root Directory\n")
|
||||
|
||||
# Add pages in current directory
|
||||
for page in sorted(page_groups[dir_path], key=lambda x: x.name):
|
||||
# Create display name by removing extension and formatting
|
||||
display_name = page.rel_path.stem.replace('_', ' ').replace('-', ' ').title()
|
||||
# Create link using relative path
|
||||
link_path = str(page.rel_path)
|
||||
content.append(f'- [{display_name}]({self.name}__{link_path})')
|
||||
|
||||
mdp=MDPage(self,Path("index.md"))
|
||||
mdp.content_ = "\n".join(content)
|
||||
return mdp
|
25
_archive/lib/web/mdcollections/factory.py
Normal file
25
_archive/lib/web/mdcollections/factory.py
Normal file
@@ -0,0 +1,25 @@
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
from .mdcollections import MDCollections
|
||||
|
||||
def create_collections(path: Optional[str] = None) -> MDCollections:
|
||||
"""
|
||||
Factory function to create and initialize an MDCollections instance.
|
||||
|
||||
Args:
|
||||
path: Optional path to scan for collections. Defaults to "data/markdown"
|
||||
|
||||
Returns:
|
||||
Initialized MDCollections instance
|
||||
|
||||
Raises:
|
||||
ValueError: If path is None
|
||||
"""
|
||||
if path is None:
|
||||
raise ValueError("Path cannot be None")
|
||||
|
||||
# Expand ~ to home directory if present in path
|
||||
expanded_path = os.path.expanduser(path)
|
||||
return MDCollections(root_path=Path(expanded_path))
|
||||
|
112
_archive/lib/web/mdcollections/mdcollections.py
Normal file
112
_archive/lib/web/mdcollections/mdcollections.py
Normal file
@@ -0,0 +1,112 @@
|
||||
from pathlib import Path
|
||||
from typing import List, Optional
|
||||
from .base_types import MDCollection, MDPage, MDImage, MDItem
|
||||
from .scanner import scan_directory
|
||||
from .tools import name_fix
|
||||
|
||||
class MDCollections:
|
||||
"""Manages multiple markdown collections."""
|
||||
def __init__(self, root_path: Path):
|
||||
"""
|
||||
Initialize collections manager.
|
||||
|
||||
Args:
|
||||
root_path: Root directory containing collections
|
||||
"""
|
||||
self.root_path = root_path
|
||||
self.collections: List[MDCollection] = []
|
||||
self._scan_collections()
|
||||
|
||||
def _scan_collections(self):
|
||||
"""Scan root directory for collections."""
|
||||
if not self.root_path.exists():
|
||||
raise ValueError(f"Root path does not exist: {self.root_path}")
|
||||
|
||||
# Scan immediate subdirectories only
|
||||
for path in sorted(self.root_path.iterdir()):
|
||||
if path.is_dir():
|
||||
# Skip directories starting with _ or containing 'archive' in lowercase
|
||||
if path.name.startswith('_') or 'archive' in path.name.lower():
|
||||
continue
|
||||
|
||||
items = scan_directory(path)
|
||||
if items: # Only create collection if directory contains markdown files
|
||||
collection = MDCollection(
|
||||
path=path,
|
||||
name=path.name,
|
||||
items=sorted(items, key=lambda x: x.name)
|
||||
)
|
||||
self.collections.append(collection)
|
||||
|
||||
# Sort collections by name
|
||||
self.collections.sort(key=lambda x: x.name)
|
||||
|
||||
def collection_get(self, name: str) -> MDCollection:
|
||||
"""
|
||||
Get a collection by name.
|
||||
|
||||
Args:
|
||||
name: Name of the collection to find
|
||||
|
||||
Returns:
|
||||
MDCollection object
|
||||
|
||||
Raises:
|
||||
ValueError: If collection not found
|
||||
"""
|
||||
for collection in self.collections:
|
||||
if collection.name == name:
|
||||
return collection
|
||||
raise ValueError(f"Collection not found: {name}")
|
||||
|
||||
def page_get(self, collection_name: str, page_name: str) -> MDPage:
|
||||
"""
|
||||
Get a page from a specific collection.
|
||||
|
||||
Args:
|
||||
collection_name: Name of the collection
|
||||
page_name: Name of the page
|
||||
|
||||
Returns:
|
||||
MDPage object
|
||||
|
||||
Raises:
|
||||
ValueError: If collection or page not found
|
||||
"""
|
||||
page_name=name_fix(page_name)
|
||||
collection_name=name_fix(collection_name)
|
||||
|
||||
collection = self.collection_get(collection_name)
|
||||
return collection.page_get(page_name)
|
||||
|
||||
def image_get(self, collection_name: str, image_name: str) -> MDImage:
|
||||
"""
|
||||
Get an image from a specific collection.
|
||||
|
||||
Args:
|
||||
collection_name: Name of the collection
|
||||
image_name: Name of the image
|
||||
|
||||
Returns:
|
||||
MDImage object
|
||||
|
||||
Raises:
|
||||
ValueError: If collection or image not found
|
||||
"""
|
||||
# Handle image name that might contain collection prefix
|
||||
if "__" in image_name:
|
||||
image_name, collection_name = image_name.split("__", 1)
|
||||
|
||||
image_name = name_fix(image_name)
|
||||
collection_name = name_fix(collection_name)
|
||||
|
||||
collection = self.collection_get(collection_name)
|
||||
print(f" -- image get: '{collection_name}' '{image_name}'")
|
||||
return collection.image_get(image_name)
|
||||
|
||||
def __str__(self) -> str:
|
||||
"""Returns a string representation of all collections."""
|
||||
if not self.collections:
|
||||
return "No collections found"
|
||||
|
||||
return "\n\n".join(str(collection) for collection in self.collections)
|
61
_archive/lib/web/mdcollections/scanner.py
Normal file
61
_archive/lib/web/mdcollections/scanner.py
Normal file
@@ -0,0 +1,61 @@
|
||||
from pathlib import Path
|
||||
from typing import List, Sequence
|
||||
from .base_types import MDItem, MDPage, MDImage, MDCollection
|
||||
|
||||
def scan_directory(path: Path) -> Sequence[MDItem]:
|
||||
"""
|
||||
Scan a directory for markdown files and images.
|
||||
|
||||
Args:
|
||||
path: Directory to scan
|
||||
|
||||
Returns:
|
||||
List of MDItem objects (MDPage or MDImage)
|
||||
"""
|
||||
if not path.exists():
|
||||
raise ValueError(f"Path does not exist: {path}")
|
||||
if not path.is_dir():
|
||||
raise ValueError(f"Path is not a directory: {path}")
|
||||
|
||||
items: List[MDItem] = []
|
||||
|
||||
# Create a temporary collection for the items
|
||||
temp_collection = MDCollection(
|
||||
path=path,
|
||||
name=path.name,
|
||||
items=[] # Will be populated later
|
||||
)
|
||||
|
||||
# First scan for markdown files
|
||||
for md_path in path.rglob("*.md"):
|
||||
# Skip files in hidden directories (starting with .)
|
||||
if any(part.startswith('.') for part in md_path.parts):
|
||||
continue
|
||||
|
||||
# Get path relative to collection root
|
||||
rel_path = md_path.relative_to(path)
|
||||
|
||||
# Create MDPage
|
||||
page = MDPage(temp_collection, rel_path)
|
||||
items.append(page)
|
||||
|
||||
# Then scan for images
|
||||
image_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.svg'}
|
||||
for img_path in path.rglob("*"):
|
||||
# Skip files in hidden directories (starting with .)
|
||||
if any(part.startswith('.') for part in img_path.parts):
|
||||
continue
|
||||
|
||||
# Check if file has image extension
|
||||
if img_path.suffix.lower() in image_extensions:
|
||||
# Get path relative to collection root
|
||||
rel_path = img_path.relative_to(path)
|
||||
|
||||
# Create MDImage
|
||||
image = MDImage(temp_collection, rel_path)
|
||||
items.append(image)
|
||||
|
||||
# Update the temporary collection's items
|
||||
temp_collection.items = items
|
||||
|
||||
return items
|
99
_archive/lib/web/mdcollections/tools.py
Normal file
99
_archive/lib/web/mdcollections/tools.py
Normal file
@@ -0,0 +1,99 @@
|
||||
from pathlib import Path
|
||||
from typing import Union
|
||||
import os
|
||||
import re
|
||||
|
||||
def should_skip_path(path: Union[str, Path]) -> bool:
|
||||
"""
|
||||
Check if a path should be skipped based on its basename.
|
||||
Skips paths that start with . or _
|
||||
|
||||
Args:
|
||||
path: Path to check (can be file or directory)
|
||||
|
||||
Returns:
|
||||
True if path should be skipped, False otherwise
|
||||
"""
|
||||
path = Path(path)
|
||||
return path.name.startswith(('.', '_'))
|
||||
|
||||
|
||||
def strip_ansi_codes(text):
|
||||
"""Remove ANSI escape codes from text."""
|
||||
ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
|
||||
return ansi_escape.sub('', text)
|
||||
|
||||
|
||||
|
||||
def name_fix(path: str) -> str:
|
||||
"""
|
||||
Normalize only the final part (stem) of a path by:
|
||||
- Converting spaces to underscores
|
||||
- Making lowercase
|
||||
Preserves the directory structure and only modifies the final name.
|
||||
|
||||
Args:
|
||||
path: Path to normalize
|
||||
|
||||
Returns:
|
||||
Path with normalized stem but unchanged structure
|
||||
"""
|
||||
if not isinstance(path, str):
|
||||
raise TypeError("Input must be a string")
|
||||
|
||||
if '/' in path:
|
||||
raise ValueError("Path should not contain forward slashes - use for filenames only")
|
||||
|
||||
path = strip_ansi_codes(path).strip()
|
||||
name, ext = os.path.splitext(path)
|
||||
|
||||
if not is_image(path) and ext.lower() == '.md':
|
||||
ext = ""
|
||||
|
||||
# Convert to lowercase and replace spaces and other characters
|
||||
name = name.lower().replace(' ', '_').replace('-', '_').replace(',', '')
|
||||
name = name.replace('__', '_').rstrip(' ')
|
||||
|
||||
# Only strip trailing underscores for image files
|
||||
if is_image(name):
|
||||
name = name.rstrip('_')
|
||||
|
||||
return f"{name}{ext}"
|
||||
|
||||
|
||||
def path_fix(path: Union[str, Path]) -> Path:
|
||||
"""
|
||||
Normalize only the final part (stem) of a path by:
|
||||
- Converting spaces to underscores
|
||||
- Making lowercase
|
||||
Preserves the directory structure and only modifies the final name.
|
||||
|
||||
Args:
|
||||
path: Path to normalize
|
||||
|
||||
Returns:
|
||||
Path with normalized stem but unchanged structure
|
||||
"""
|
||||
if not isinstance(path, (str, Path)):
|
||||
path = str(path)
|
||||
path = Path(path)
|
||||
# Keep directory structure unchanged, only normalize the filename
|
||||
parent = path.parent
|
||||
filename = name_fix(path.name)
|
||||
# Recombine with original parent path
|
||||
return parent / filename
|
||||
|
||||
|
||||
def is_image(basename):
|
||||
# Define a set of valid image extensions
|
||||
image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.svg']
|
||||
|
||||
# Get the file extension from the basename
|
||||
_, extension = os.path.splitext(basename)
|
||||
extension = extension.strip()
|
||||
|
||||
#print(f" ----- {basename} '{extension.lower()}' {extension.lower() in image_extensions}")
|
||||
|
||||
# Check if the extension is in the set of image extensions
|
||||
return extension.lower() in image_extensions
|
||||
|
Reference in New Issue
Block a user