import os import time import re import sys import toml import libtmux from libtmux.pane import Pane from libtmux.window import Window from libtmux.session import Session import psutil from typing import Dict, List, Optional, Any, Set, Tuple from dataclasses import dataclass, field, asdict from datetime import datetime import uuid from pathlib import Path import asyncio import uvicorn from fastapi import FastAPI, HTTPException from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel import threading # Configuration WAITING_MESSAGE = "WAITING FOR JOBS" HPY_SH_PATH = "/root/heromonkey/functions/hpy.sh" # Path to hpy.sh @dataclass class ProcessMetrics: """Metrics for a running process and its children.""" cpu_percent: float = 0.0 memory_rss: int = 0 # Resident Set Size in bytes memory_vms: int = 0 # Virtual Memory Size in bytes memory_percent: float = 0.0 num_threads: int = 0 num_children: int = 0 children_cpu_percent: float = 0.0 children_memory_rss: int = 0 last_updated: str = "" @dataclass class TaskStatus: """Status of an individual task (script).""" script_path: str script_name: str state: str = "PENDING" # PENDING, WAITING, RUNNING, DONE, ERROR, CRASHED, TIMED_OUT start_time: Optional[str] = None end_time: Optional[str] = None duration_seconds: float = 0.0 exit_code: Optional[int] = None error_message: Optional[str] = None pane_id: Optional[str] = None process_metrics: ProcessMetrics = field(default_factory=ProcessMetrics) @dataclass class DirectoryStatus: """Status of a directory containing tasks.""" directory_num: int directory_path: str state: str = "PENDING" # PENDING, RUNNING, DONE, ERROR, TIMED_OUT timeout: int = 600 start_time: Optional[str] = None end_time: Optional[str] = None duration_seconds: float = 0.0 tasks: List[TaskStatus] = field(default_factory=list) window_name: Optional[str] = None @dataclass class DAGStructure: """Complete DAG structure for the task run.""" run_name: str run_id: str state: str = "INITIALIZING" # INITIALIZING, RUNNING, COMPLETED, FAILED start_time: str = "" end_time: Optional[str] = None duration_seconds: float = 0.0 total_directories: int = 0 completed_directories: int = 0 failed_directories: int = 0 directories: List[DirectoryStatus] = field(default_factory=list) last_updated: str = "" class MetaData: """Class to hold metadata for a task directory.""" def __init__(self, timeout: int = 600): # Default timeout to 10 minutes (600 seconds) self.timeout = timeout # Add more attributes here in the future