This commit is contained in:
2025-08-22 13:11:04 +02:00
parent d80b956ff7
commit bc0d90d41a
14 changed files with 17332 additions and 13 deletions

View File

@@ -0,0 +1,86 @@
import os
import time
import re
import sys
import toml
import libtmux
from libtmux.pane import Pane
from libtmux.window import Window
from libtmux.session import Session
import psutil
from typing import Dict, List, Optional, Any, Set, Tuple
from dataclasses import dataclass, field, asdict
from datetime import datetime
import uuid
from pathlib import Path
import asyncio
import uvicorn
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
import threading
# Configuration
WAITING_MESSAGE = "WAITING FOR JOBS"
HPY_SH_PATH = "/root/heromonkey/functions/hpy.sh" # Path to hpy.sh
@dataclass
class ProcessMetrics:
"""Metrics for a running process and its children."""
cpu_percent: float = 0.0
memory_rss: int = 0 # Resident Set Size in bytes
memory_vms: int = 0 # Virtual Memory Size in bytes
memory_percent: float = 0.0
num_threads: int = 0
num_children: int = 0
children_cpu_percent: float = 0.0
children_memory_rss: int = 0
last_updated: str = ""
@dataclass
class TaskStatus:
"""Status of an individual task (script)."""
script_path: str
script_name: str
state: str = "PENDING" # PENDING, WAITING, RUNNING, DONE, ERROR, CRASHED, TIMED_OUT
start_time: Optional[str] = None
end_time: Optional[str] = None
duration_seconds: float = 0.0
exit_code: Optional[int] = None
error_message: Optional[str] = None
pane_id: Optional[str] = None
process_metrics: ProcessMetrics = field(default_factory=ProcessMetrics)
@dataclass
class DirectoryStatus:
"""Status of a directory containing tasks."""
directory_num: int
directory_path: str
state: str = "PENDING" # PENDING, RUNNING, DONE, ERROR, TIMED_OUT
timeout: int = 600
start_time: Optional[str] = None
end_time: Optional[str] = None
duration_seconds: float = 0.0
tasks: List[TaskStatus] = field(default_factory=list)
window_name: Optional[str] = None
@dataclass
class DAGStructure:
"""Complete DAG structure for the task run."""
run_name: str
run_id: str
state: str = "INITIALIZING" # INITIALIZING, RUNNING, COMPLETED, FAILED
start_time: str = ""
end_time: Optional[str] = None
duration_seconds: float = 0.0
total_directories: int = 0
completed_directories: int = 0
failed_directories: int = 0
directories: List[DirectoryStatus] = field(default_factory=list)
last_updated: str = ""
class MetaData:
"""Class to hold metadata for a task directory."""
def __init__(self, timeout: int = 600): # Default timeout to 10 minutes (600 seconds)
self.timeout = timeout
# Add more attributes here in the future