87 lines
2.7 KiB
Python
87 lines
2.7 KiB
Python
import os
|
|
import time
|
|
import re
|
|
import sys
|
|
import toml
|
|
import libtmux
|
|
from libtmux.pane import Pane
|
|
from libtmux.window import Window
|
|
from libtmux.session import Session
|
|
import psutil
|
|
from typing import Dict, List, Optional, Any, Set, Tuple
|
|
from dataclasses import dataclass, field, asdict
|
|
from datetime import datetime
|
|
import uuid
|
|
from pathlib import Path
|
|
import asyncio
|
|
import uvicorn
|
|
from fastapi import FastAPI, HTTPException
|
|
from fastapi.middleware.cors import CORSMiddleware
|
|
from pydantic import BaseModel
|
|
import threading
|
|
|
|
# Configuration
|
|
WAITING_MESSAGE = "WAITING FOR JOBS"
|
|
HPY_SH_PATH = "/root/heromonkey/functions/hpy.sh" # Path to hpy.sh
|
|
|
|
@dataclass
|
|
class ProcessMetrics:
|
|
"""Metrics for a running process and its children."""
|
|
cpu_percent: float = 0.0
|
|
memory_rss: int = 0 # Resident Set Size in bytes
|
|
memory_vms: int = 0 # Virtual Memory Size in bytes
|
|
memory_percent: float = 0.0
|
|
num_threads: int = 0
|
|
num_children: int = 0
|
|
children_cpu_percent: float = 0.0
|
|
children_memory_rss: int = 0
|
|
last_updated: str = ""
|
|
|
|
@dataclass
|
|
class TaskStatus:
|
|
"""Status of an individual task (script)."""
|
|
script_path: str
|
|
script_name: str
|
|
state: str = "PENDING" # PENDING, WAITING, RUNNING, DONE, ERROR, CRASHED, TIMED_OUT
|
|
start_time: Optional[str] = None
|
|
end_time: Optional[str] = None
|
|
duration_seconds: float = 0.0
|
|
exit_code: Optional[int] = None
|
|
error_message: Optional[str] = None
|
|
pane_id: Optional[str] = None
|
|
process_metrics: ProcessMetrics = field(default_factory=ProcessMetrics)
|
|
|
|
@dataclass
|
|
class DirectoryStatus:
|
|
"""Status of a directory containing tasks."""
|
|
directory_num: int
|
|
directory_path: str
|
|
state: str = "PENDING" # PENDING, RUNNING, DONE, ERROR, TIMED_OUT
|
|
timeout: int = 600
|
|
start_time: Optional[str] = None
|
|
end_time: Optional[str] = None
|
|
duration_seconds: float = 0.0
|
|
tasks: List[TaskStatus] = field(default_factory=list)
|
|
window_name: Optional[str] = None
|
|
|
|
@dataclass
|
|
class DAGStructure:
|
|
"""Complete DAG structure for the task run."""
|
|
run_name: str
|
|
run_id: str
|
|
state: str = "INITIALIZING" # INITIALIZING, RUNNING, COMPLETED, FAILED
|
|
start_time: str = ""
|
|
end_time: Optional[str] = None
|
|
duration_seconds: float = 0.0
|
|
total_directories: int = 0
|
|
completed_directories: int = 0
|
|
failed_directories: int = 0
|
|
directories: List[DirectoryStatus] = field(default_factory=list)
|
|
last_updated: str = ""
|
|
|
|
class MetaData:
|
|
"""Class to hold metadata for a task directory."""
|
|
def __init__(self, timeout: int = 600): # Default timeout to 10 minutes (600 seconds)
|
|
self.timeout = timeout
|
|
# Add more attributes here in the future
|