initial commit
This commit is contained in:
		
							
								
								
									
										166
									
								
								src/app.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										166
									
								
								src/app.rs
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,166 @@
 | 
			
		||||
//! # Hero Supervisor Application
 | 
			
		||||
//!
 | 
			
		||||
//! Simplified supervisor application that wraps a built Supervisor instance.
 | 
			
		||||
//! Use SupervisorBuilder to construct the supervisor with all configuration,
 | 
			
		||||
//! then pass it to SupervisorApp for runtime management.
 | 
			
		||||
 | 
			
		||||
use crate::Supervisor;
 | 
			
		||||
use crate::openrpc::start_openrpc_servers;
 | 
			
		||||
use log::{info, error, debug};
 | 
			
		||||
use std::sync::Arc;
 | 
			
		||||
use tokio::sync::Mutex;
 | 
			
		||||
 | 
			
		||||
/// Main supervisor application
 | 
			
		||||
pub struct SupervisorApp {
 | 
			
		||||
    pub supervisor: Supervisor,
 | 
			
		||||
    pub bind_address: String,
 | 
			
		||||
    pub port: u16,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl SupervisorApp {
 | 
			
		||||
    /// Create a new supervisor application with a built supervisor
 | 
			
		||||
    pub fn new(supervisor: Supervisor, bind_address: String, port: u16) -> Self {
 | 
			
		||||
        Self { 
 | 
			
		||||
            supervisor,
 | 
			
		||||
            bind_address,
 | 
			
		||||
            port,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Start the complete supervisor application
 | 
			
		||||
    /// This method handles the entire application lifecycle:
 | 
			
		||||
    /// - Starts all configured runners
 | 
			
		||||
    /// - Launches the OpenRPC server
 | 
			
		||||
    /// - Sets up graceful shutdown handling
 | 
			
		||||
    /// - Keeps the application running
 | 
			
		||||
    pub async fn start(&mut self) -> Result<(), Box<dyn std::error::Error>> {
 | 
			
		||||
        info!("Starting Hero Supervisor Application");
 | 
			
		||||
 | 
			
		||||
        // Start all configured runners
 | 
			
		||||
        self.start_all().await?;
 | 
			
		||||
 | 
			
		||||
        // Start OpenRPC server
 | 
			
		||||
        self.start_openrpc_server().await?;
 | 
			
		||||
 | 
			
		||||
        // Set up graceful shutdown
 | 
			
		||||
        self.setup_graceful_shutdown().await;
 | 
			
		||||
 | 
			
		||||
        // Keep the application running
 | 
			
		||||
        info!("Supervisor is running. Press Ctrl+C to shutdown.");
 | 
			
		||||
        self.run_main_loop().await;
 | 
			
		||||
 | 
			
		||||
        Ok(())
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Start the OpenRPC server
 | 
			
		||||
    async fn start_openrpc_server(&self) -> Result<(), Box<dyn std::error::Error>> {
 | 
			
		||||
        info!("Starting OpenRPC server...");
 | 
			
		||||
        
 | 
			
		||||
        let supervisor_for_openrpc = Arc::new(Mutex::new(self.supervisor.clone()));
 | 
			
		||||
        let bind_address = self.bind_address.clone();
 | 
			
		||||
        let port = self.port;
 | 
			
		||||
        
 | 
			
		||||
        // Start the OpenRPC server in a background task
 | 
			
		||||
        let server_handle = tokio::spawn(async move {
 | 
			
		||||
            if let Err(e) = start_openrpc_servers(supervisor_for_openrpc, &bind_address, port).await {
 | 
			
		||||
                error!("OpenRPC server error: {}", e);
 | 
			
		||||
            }
 | 
			
		||||
        });
 | 
			
		||||
 | 
			
		||||
        // Give the server a moment to start
 | 
			
		||||
        tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
 | 
			
		||||
        info!("OpenRPC server started successfully");
 | 
			
		||||
 | 
			
		||||
        // Store the handle for potential cleanup (we could add this to the struct if needed)
 | 
			
		||||
        std::mem::forget(server_handle); // For now, let it run in background
 | 
			
		||||
 | 
			
		||||
        Ok(())
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Set up graceful shutdown handling
 | 
			
		||||
    async fn setup_graceful_shutdown(&self) {
 | 
			
		||||
        tokio::spawn(async move {
 | 
			
		||||
            tokio::signal::ctrl_c().await.expect("Failed to listen for ctrl+c");
 | 
			
		||||
            info!("Received shutdown signal");
 | 
			
		||||
            std::process::exit(0);
 | 
			
		||||
        });
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Main application loop
 | 
			
		||||
    async fn run_main_loop(&self) {
 | 
			
		||||
        // Keep the main thread alive
 | 
			
		||||
        loop {
 | 
			
		||||
            tokio::time::sleep(tokio::time::Duration::from_secs(1)).await;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Start all configured runners
 | 
			
		||||
    pub async fn start_all(&mut self) -> Result<(), Box<dyn std::error::Error>> {
 | 
			
		||||
        info!("Starting all runners");
 | 
			
		||||
        
 | 
			
		||||
        let results = self.supervisor.start_all().await;
 | 
			
		||||
        let mut failed_count = 0;
 | 
			
		||||
        
 | 
			
		||||
        for (runner_id, result) in results {
 | 
			
		||||
            match result {
 | 
			
		||||
                Ok(_) => info!("Runner {} started successfully", runner_id),
 | 
			
		||||
                Err(e) => {
 | 
			
		||||
                    error!("Failed to start runner {}: {}", runner_id, e);
 | 
			
		||||
                    failed_count += 1;
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        
 | 
			
		||||
        if failed_count == 0 {
 | 
			
		||||
            info!("All runners started successfully");
 | 
			
		||||
        } else {
 | 
			
		||||
            error!("Failed to start {} runners", failed_count);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        Ok(())
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Stop all configured runners
 | 
			
		||||
    pub async fn stop_all(&mut self, force: bool) -> Result<(), Box<dyn std::error::Error>> {
 | 
			
		||||
        info!("Stopping all runners (force: {})", force);
 | 
			
		||||
        
 | 
			
		||||
        let results = self.supervisor.stop_all(force).await;
 | 
			
		||||
        let mut failed_count = 0;
 | 
			
		||||
        
 | 
			
		||||
        for (runner_id, result) in results {
 | 
			
		||||
            match result {
 | 
			
		||||
                Ok(_) => info!("Runner {} stopped successfully", runner_id),
 | 
			
		||||
                Err(e) => {
 | 
			
		||||
                    error!("Failed to stop runner {}: {}", runner_id, e);
 | 
			
		||||
                    failed_count += 1;
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        
 | 
			
		||||
        if failed_count == 0 {
 | 
			
		||||
            info!("All runners stopped successfully");
 | 
			
		||||
        } else {
 | 
			
		||||
            error!("Failed to stop {} runners", failed_count);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        Ok(())
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Get status of all runners
 | 
			
		||||
    pub async fn get_status(&self) -> Result<Vec<(String, String)>, Box<dyn std::error::Error>> {
 | 
			
		||||
        debug!("Getting status of all runners");
 | 
			
		||||
        
 | 
			
		||||
        let statuses = self.supervisor.get_all_runner_status().await
 | 
			
		||||
            .map_err(|e| Box::new(e) as Box<dyn std::error::Error>)?;
 | 
			
		||||
        
 | 
			
		||||
        let status_strings: Vec<(String, String)> = statuses
 | 
			
		||||
            .into_iter()
 | 
			
		||||
            .map(|(runner_id, status)| {
 | 
			
		||||
                let status_str = format!("{:?}", status);
 | 
			
		||||
                (runner_id, status_str)
 | 
			
		||||
            })
 | 
			
		||||
            .collect();
 | 
			
		||||
        
 | 
			
		||||
        Ok(status_strings)
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										327
									
								
								src/client.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										327
									
								
								src/client.rs
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,327 @@
 | 
			
		||||
//! Main supervisor implementation for managing multiple actor runners.
 | 
			
		||||
 | 
			
		||||
use chrono::Utc;
 | 
			
		||||
use redis::AsyncCommands;
 | 
			
		||||
use sal_service_manager::{ProcessManager, SimpleProcessManager, TmuxProcessManager};
 | 
			
		||||
use std::collections::HashMap;
 | 
			
		||||
use std::path::PathBuf;
 | 
			
		||||
use std::sync::Arc;
 | 
			
		||||
use tokio::sync::Mutex;
 | 
			
		||||
 | 
			
		||||
use crate::{runner::{LogInfo, Runner, RunnerConfig, RunnerError, RunnerResult, RunnerStatus}, JobError, job::JobStatus};
 | 
			
		||||
use crate::{job::Job};
 | 
			
		||||
 | 
			
		||||
#[cfg(feature = "admin")]
 | 
			
		||||
use supervisor_admin_server::{AdminSupervisor, RunnerConfigInfo, JobInfo};
 | 
			
		||||
 | 
			
		||||
/// Process manager type for a runner
 | 
			
		||||
#[derive(Debug, Clone)]
 | 
			
		||||
pub enum ProcessManagerType {
 | 
			
		||||
    /// Simple process manager for direct process spawning
 | 
			
		||||
    Simple,
 | 
			
		||||
    /// Tmux process manager for session-based management
 | 
			
		||||
    Tmux(String), // session name
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Main supervisor that manages multiple runners
 | 
			
		||||
#[derive(Clone)]
 | 
			
		||||
pub struct Client {
 | 
			
		||||
    redis_client: redis::Client,
 | 
			
		||||
    /// Namespace for queue keys
 | 
			
		||||
    namespace: String,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub struct ClientBuilder {
 | 
			
		||||
    /// Redis URL for connection
 | 
			
		||||
    redis_url: String,
 | 
			
		||||
    /// Namespace for queue keys
 | 
			
		||||
    namespace: String,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl ClientBuilder {
 | 
			
		||||
    /// Create a new supervisor builder
 | 
			
		||||
    pub fn new() -> Self {
 | 
			
		||||
        Self {
 | 
			
		||||
            redis_url: "redis://localhost:6379".to_string(),
 | 
			
		||||
            namespace: "".to_string(),
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Set the Redis URL
 | 
			
		||||
    pub fn redis_url<S: Into<String>>(mut self, url: S) -> Self {
 | 
			
		||||
        self.redis_url = url.into();
 | 
			
		||||
        self
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Set the namespace for queue keys
 | 
			
		||||
    pub fn namespace<S: Into<String>>(mut self, namespace: S) -> Self {
 | 
			
		||||
        self.namespace = namespace.into();
 | 
			
		||||
        self
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Build the supervisor
 | 
			
		||||
    pub async fn build(self) -> RunnerResult<Client> {
 | 
			
		||||
        // Create Redis client
 | 
			
		||||
        let redis_client = redis::Client::open(self.redis_url.as_str())
 | 
			
		||||
            .map_err(|e| RunnerError::ConfigError {
 | 
			
		||||
                reason: format!("Invalid Redis URL: {}", e),
 | 
			
		||||
            })?;
 | 
			
		||||
 | 
			
		||||
        Ok(Client {
 | 
			
		||||
            redis_client,
 | 
			
		||||
            namespace: self.namespace,
 | 
			
		||||
        })
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl Default for Client {
 | 
			
		||||
    fn default() -> Self {
 | 
			
		||||
        // Note: Default implementation creates an empty supervisor
 | 
			
		||||
        // Use Supervisor::builder() for proper initialization
 | 
			
		||||
        Self {
 | 
			
		||||
            redis_client: redis::Client::open("redis://localhost:6379").unwrap(),
 | 
			
		||||
            namespace: "".to_string(),
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl Client {
 | 
			
		||||
    /// Create a new supervisor builder
 | 
			
		||||
    pub fn builder() -> ClientBuilder {
 | 
			
		||||
        ClientBuilder::new()
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// List all job IDs from Redis
 | 
			
		||||
    pub async fn list_jobs(&self) -> RunnerResult<Vec<String>> {
 | 
			
		||||
        let mut conn = self.redis_client
 | 
			
		||||
            .get_multiplexed_async_connection()
 | 
			
		||||
            .await
 | 
			
		||||
            .map_err(|e| RunnerError::RedisError { source: e })?;
 | 
			
		||||
        
 | 
			
		||||
        let keys: Vec<String> = conn.keys(format!("{}:*", &self.jobs_key())).await?;
 | 
			
		||||
        let job_ids: Vec<String> = keys
 | 
			
		||||
            .into_iter()
 | 
			
		||||
            .filter_map(|key| {
 | 
			
		||||
                if key.starts_with(&format!("{}:", self.jobs_key())) {
 | 
			
		||||
                    key.strip_prefix(&format!("{}:", self.jobs_key()))
 | 
			
		||||
                        .map(|s| s.to_string())
 | 
			
		||||
                } else {
 | 
			
		||||
                    None
 | 
			
		||||
                }
 | 
			
		||||
            })
 | 
			
		||||
            .collect();
 | 
			
		||||
        
 | 
			
		||||
        Ok(job_ids)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    fn jobs_key(&self) -> String {
 | 
			
		||||
        if self.namespace.is_empty() {
 | 
			
		||||
            format!("job")
 | 
			
		||||
        } else {
 | 
			
		||||
            format!("{}:job", self.namespace)
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pub fn job_key(&self, job_id: &str) -> String {
 | 
			
		||||
        if self.namespace.is_empty() {
 | 
			
		||||
            format!("job:{}", job_id)
 | 
			
		||||
        } else {
 | 
			
		||||
            format!("{}:job:{}", self.namespace, job_id)
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pub fn job_reply_key(&self, job_id: &str) -> String {
 | 
			
		||||
        if self.namespace.is_empty() {
 | 
			
		||||
            format!("reply:{}", job_id)
 | 
			
		||||
        } else {
 | 
			
		||||
            format!("{}:reply:{}", self.namespace, job_id)
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Set job error in Redis
 | 
			
		||||
    pub async fn set_error(&self,
 | 
			
		||||
        job_id: &str,
 | 
			
		||||
        error: &str,
 | 
			
		||||
    ) -> Result<(), JobError> {
 | 
			
		||||
        let job_key = self.job_key(job_id);
 | 
			
		||||
        let now = Utc::now();
 | 
			
		||||
        
 | 
			
		||||
        let mut conn = self.redis_client
 | 
			
		||||
            .get_multiplexed_async_connection()
 | 
			
		||||
            .await
 | 
			
		||||
            .map_err(|e| JobError:: Redis(e))?;
 | 
			
		||||
            
 | 
			
		||||
        conn.hset_multiple(&job_key, &[
 | 
			
		||||
                ("error", error),
 | 
			
		||||
                ("status", JobStatus::Error.as_str()),
 | 
			
		||||
                ("updated_at", &now.to_rfc3339()),
 | 
			
		||||
            ]).await
 | 
			
		||||
            .map_err(|e| JobError::Redis(e))?;
 | 
			
		||||
 | 
			
		||||
        Ok(())
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Set job status in Redis
 | 
			
		||||
    pub async fn set_job_status(&self,
 | 
			
		||||
        job_id: &str,
 | 
			
		||||
        status: JobStatus,
 | 
			
		||||
    ) -> Result<(), JobError> {
 | 
			
		||||
        let job_key = self.job_key(job_id);
 | 
			
		||||
        let now = Utc::now();
 | 
			
		||||
        
 | 
			
		||||
        let mut conn = self.redis_client
 | 
			
		||||
            .get_multiplexed_async_connection()
 | 
			
		||||
            .await
 | 
			
		||||
            .map_err(|e| JobError:: Redis(e))?;
 | 
			
		||||
            
 | 
			
		||||
        conn.hset_multiple(&job_key, &[
 | 
			
		||||
                ("status", status.as_str()),
 | 
			
		||||
                ("updated_at", &now.to_rfc3339()),
 | 
			
		||||
            ]).await
 | 
			
		||||
            .map_err(|e| JobError::Redis(e))?;
 | 
			
		||||
        Ok(())
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
        /// Get job status from Redis
 | 
			
		||||
        pub async fn get_status(
 | 
			
		||||
            &self,
 | 
			
		||||
            job_id: &str,
 | 
			
		||||
        ) -> Result<JobStatus, JobError> {
 | 
			
		||||
            let mut conn = self.redis_client
 | 
			
		||||
                .get_multiplexed_async_connection()
 | 
			
		||||
                .await
 | 
			
		||||
                .map_err(|e| JobError:: Redis(e))?;
 | 
			
		||||
            
 | 
			
		||||
            let status_str: Option<String> = conn.hget(&self.job_key(job_id), "status").await?;
 | 
			
		||||
            
 | 
			
		||||
            match status_str {
 | 
			
		||||
                Some(s) => JobStatus::from_str(&s).ok_or_else(|| JobError::InvalidStatus(s)),
 | 
			
		||||
                None => Err(JobError::NotFound(job_id.to_string())),
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
    /// Delete job from Redis
 | 
			
		||||
    pub async fn delete_from_redis(
 | 
			
		||||
        &self,
 | 
			
		||||
        job_id: &str,
 | 
			
		||||
    ) -> Result<(), JobError> {
 | 
			
		||||
        let mut conn = self.redis_client
 | 
			
		||||
                .get_multiplexed_async_connection()
 | 
			
		||||
                .await
 | 
			
		||||
                .map_err(|e| JobError:: Redis(e))?;
 | 
			
		||||
 | 
			
		||||
        let job_key = self.job_key(job_id);
 | 
			
		||||
        let _: () = conn.del(&job_key).await?;
 | 
			
		||||
        Ok(())
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Store this job in Redis
 | 
			
		||||
    pub async fn store_job_in_redis(&self, job: &Job) -> Result<(), JobError> {
 | 
			
		||||
        let mut conn = self.redis_client
 | 
			
		||||
            .get_multiplexed_async_connection()
 | 
			
		||||
            .await
 | 
			
		||||
            .map_err(|e| JobError:: Redis(e))?;
 | 
			
		||||
 | 
			
		||||
        let job_key = self.job_key(&job.id);
 | 
			
		||||
        
 | 
			
		||||
        // Serialize the job data
 | 
			
		||||
        let job_data = serde_json::to_string(job)?;
 | 
			
		||||
        
 | 
			
		||||
        // Store job data in Redis hash
 | 
			
		||||
        let _: () = conn.hset_multiple(&job_key, &[
 | 
			
		||||
            ("data", job_data),
 | 
			
		||||
            ("status", JobStatus::Dispatched.as_str().to_string()),
 | 
			
		||||
            ("created_at", job.created_at.to_rfc3339()),
 | 
			
		||||
            ("updated_at", job.updated_at.to_rfc3339()),
 | 
			
		||||
        ]).await?;
 | 
			
		||||
 | 
			
		||||
        // Set TTL for the job (24 hours)
 | 
			
		||||
        let _: () = conn.expire(&job_key, 86400).await?;
 | 
			
		||||
 | 
			
		||||
        Ok(())
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Load a job from Redis by ID
 | 
			
		||||
    pub async fn load_job_from_redis(
 | 
			
		||||
        &self,
 | 
			
		||||
        job_id: &str,
 | 
			
		||||
    ) -> Result<Job, JobError> {
 | 
			
		||||
        let job_key = self.job_key(job_id);
 | 
			
		||||
        
 | 
			
		||||
        let mut conn = self.redis_client
 | 
			
		||||
            .get_multiplexed_async_connection()
 | 
			
		||||
            .await
 | 
			
		||||
            .map_err(|e| JobError:: Redis(e))?;
 | 
			
		||||
 | 
			
		||||
        // Get job data from Redis
 | 
			
		||||
        let job_data: Option<String> = conn.hget(&job_key, "data").await?;
 | 
			
		||||
        
 | 
			
		||||
        match job_data {
 | 
			
		||||
            Some(data) => {
 | 
			
		||||
                let job: Job = serde_json::from_str(&data)?;
 | 
			
		||||
                Ok(job)
 | 
			
		||||
            }
 | 
			
		||||
            None => Err(JobError::NotFound(job_id.to_string())),
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Delete a job by ID
 | 
			
		||||
    pub async fn delete_job(&mut self, job_id: &str) -> RunnerResult<()> {
 | 
			
		||||
        use redis::AsyncCommands;
 | 
			
		||||
        
 | 
			
		||||
        let mut conn = self.redis_client.get_multiplexed_async_connection().await
 | 
			
		||||
            .map_err(|e| JobError:: Redis(e))?;
 | 
			
		||||
        
 | 
			
		||||
        let job_key = self.job_key(job_id);
 | 
			
		||||
        let deleted_count: i32 = conn.del(&job_key).await
 | 
			
		||||
            .map_err(|e| RunnerError::QueueError {
 | 
			
		||||
                actor_id: job_id.to_string(),
 | 
			
		||||
                reason: format!("Failed to delete job: {}", e),
 | 
			
		||||
            })?;
 | 
			
		||||
        
 | 
			
		||||
        if deleted_count == 0 {
 | 
			
		||||
            return Err(RunnerError::QueueError {
 | 
			
		||||
                actor_id: job_id.to_string(),
 | 
			
		||||
                reason: format!("Job '{}' not found or already deleted", job_id),
 | 
			
		||||
            });
 | 
			
		||||
        }
 | 
			
		||||
        
 | 
			
		||||
        Ok(())
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Set job result in Redis
 | 
			
		||||
    pub async fn set_result(
 | 
			
		||||
        &self,
 | 
			
		||||
        job_id: &str,
 | 
			
		||||
        result: &str,
 | 
			
		||||
    ) -> Result<(), JobError> {
 | 
			
		||||
        let job_key = self.job_key(&job_id);
 | 
			
		||||
        let now = Utc::now();
 | 
			
		||||
        let mut conn = self.redis_client
 | 
			
		||||
            .get_multiplexed_async_connection()
 | 
			
		||||
            .await
 | 
			
		||||
            .map_err(|e| JobError:: Redis(e))?;
 | 
			
		||||
        let _: () = conn.hset_multiple(&job_key, &[
 | 
			
		||||
            ("result", result),
 | 
			
		||||
            ("status", JobStatus::Finished.as_str()),
 | 
			
		||||
            ("updated_at", &now.to_rfc3339()),
 | 
			
		||||
        ]).await?;
 | 
			
		||||
 | 
			
		||||
        Ok(())
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Get job result from Redis
 | 
			
		||||
    pub async fn get_result(
 | 
			
		||||
        &self,
 | 
			
		||||
        job_id: &str,
 | 
			
		||||
    ) -> Result<Option<String>, JobError> {
 | 
			
		||||
        let job_key = self.job_key(job_id);
 | 
			
		||||
        let mut conn = self.redis_client
 | 
			
		||||
            .get_multiplexed_async_connection()
 | 
			
		||||
            .await
 | 
			
		||||
            .map_err(|e| JobError:: Redis(e))?;
 | 
			
		||||
        let result: Option<String> = conn.hget(&job_key, "result").await?;
 | 
			
		||||
        Ok(result)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										220
									
								
								src/job.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										220
									
								
								src/job.rs
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,220 @@
 | 
			
		||||
use chrono::Utc;
 | 
			
		||||
use serde::{Deserialize, Serialize};
 | 
			
		||||
use std::collections::HashMap;
 | 
			
		||||
use std::time::Duration;
 | 
			
		||||
use uuid::Uuid;
 | 
			
		||||
use redis::AsyncCommands;
 | 
			
		||||
use thiserror::Error;
 | 
			
		||||
 | 
			
		||||
/// Job status enumeration
 | 
			
		||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
 | 
			
		||||
pub enum JobStatus {
 | 
			
		||||
    Dispatched,
 | 
			
		||||
    WaitingForPrerequisites,
 | 
			
		||||
    Started,
 | 
			
		||||
    Error,
 | 
			
		||||
    Stopping,
 | 
			
		||||
    Finished,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl JobStatus {
 | 
			
		||||
    pub fn as_str(&self) -> &'static str {
 | 
			
		||||
        match self {
 | 
			
		||||
            JobStatus::Dispatched => "dispatched",
 | 
			
		||||
            JobStatus::WaitingForPrerequisites => "waiting_for_prerequisites",
 | 
			
		||||
            JobStatus::Started => "started",
 | 
			
		||||
            JobStatus::Error => "error",
 | 
			
		||||
            JobStatus::Stopping => "stopping",
 | 
			
		||||
            JobStatus::Finished => "finished",
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pub fn from_str(s: &str) -> Option<Self> {
 | 
			
		||||
        match s {
 | 
			
		||||
            "dispatched" => Some(JobStatus::Dispatched),
 | 
			
		||||
            "waiting_for_prerequisites" => Some(JobStatus::WaitingForPrerequisites),
 | 
			
		||||
            "started" => Some(JobStatus::Started),
 | 
			
		||||
            "error" => Some(JobStatus::Error),
 | 
			
		||||
            "stopping" => Some(JobStatus::Stopping),
 | 
			
		||||
            "finished" => Some(JobStatus::Finished),
 | 
			
		||||
            _ => None,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Representation of a script execution request.
 | 
			
		||||
///
 | 
			
		||||
/// This structure contains all the information needed to execute a script
 | 
			
		||||
/// on a actor service, including the script content, dependencies, and metadata.
 | 
			
		||||
#[derive(Debug, Clone, Serialize, Deserialize)]
 | 
			
		||||
pub struct Job {
 | 
			
		||||
    pub id: String,
 | 
			
		||||
    pub caller_id: String,
 | 
			
		||||
    pub context_id: String,
 | 
			
		||||
    pub payload: String,
 | 
			
		||||
    pub runner_name: String, // name of the runner to execute this job
 | 
			
		||||
    pub executor: String, // name of the executor the runner will use to execute this job
 | 
			
		||||
    pub timeout: Duration,
 | 
			
		||||
    pub env_vars: HashMap<String, String>, // environment variables for script execution
 | 
			
		||||
    pub created_at: chrono::DateTime<chrono::Utc>,
 | 
			
		||||
    pub updated_at: chrono::DateTime<chrono::Utc>,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Error types for job operations
 | 
			
		||||
#[derive(Error, Debug)]
 | 
			
		||||
pub enum JobError {
 | 
			
		||||
    #[error("Redis error: {0}")]
 | 
			
		||||
    Redis(#[from] redis::RedisError),
 | 
			
		||||
    #[error("Serialization error: {0}")]
 | 
			
		||||
    Serialization(#[from] serde_json::Error),
 | 
			
		||||
    #[error("Job not found: {0}")]
 | 
			
		||||
    NotFound(String),
 | 
			
		||||
    #[error("Invalid job status: {0}")]
 | 
			
		||||
    InvalidStatus(String),
 | 
			
		||||
    #[error("Timeout error: {0}")]
 | 
			
		||||
    Timeout(String),
 | 
			
		||||
    #[error("Invalid job data: {0}")]
 | 
			
		||||
    InvalidData(String),
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl Job {
 | 
			
		||||
    /// Create a new job with the given parameters
 | 
			
		||||
    pub fn new(
 | 
			
		||||
        caller_id: String,
 | 
			
		||||
        context_id: String,
 | 
			
		||||
        payload: String,
 | 
			
		||||
        runner_name: String,
 | 
			
		||||
        executor: String,
 | 
			
		||||
    ) -> Self {
 | 
			
		||||
        let now = Utc::now();
 | 
			
		||||
        Self {
 | 
			
		||||
            id: Uuid::new_v4().to_string(),
 | 
			
		||||
            caller_id,
 | 
			
		||||
            context_id,
 | 
			
		||||
            payload,
 | 
			
		||||
            runner_name,
 | 
			
		||||
            executor,
 | 
			
		||||
            timeout: Duration::from_secs(300), // 5 minutes default
 | 
			
		||||
            env_vars: HashMap::new(),
 | 
			
		||||
            created_at: now,
 | 
			
		||||
            updated_at: now,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Builder for constructing job execution requests.
 | 
			
		||||
pub struct JobBuilder {
 | 
			
		||||
    caller_id: String,
 | 
			
		||||
    context_id: String,
 | 
			
		||||
    payload: String,
 | 
			
		||||
    runner_name: String,
 | 
			
		||||
    executor: String,
 | 
			
		||||
    timeout: Duration,
 | 
			
		||||
    env_vars: HashMap<String, String>,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl JobBuilder {
 | 
			
		||||
    pub fn new() -> Self {
 | 
			
		||||
        Self {
 | 
			
		||||
            caller_id: "".to_string(),
 | 
			
		||||
            context_id: "".to_string(),
 | 
			
		||||
            payload: "".to_string(),
 | 
			
		||||
            runner_name: "".to_string(),
 | 
			
		||||
            executor: "".to_string(),
 | 
			
		||||
            timeout: Duration::from_secs(300), // 5 minutes default
 | 
			
		||||
            env_vars: HashMap::new(),
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Set the caller ID for this job
 | 
			
		||||
    pub fn caller_id(mut self, caller_id: &str) -> Self {
 | 
			
		||||
        self.caller_id = caller_id.to_string();
 | 
			
		||||
        self
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Set the context ID for this job
 | 
			
		||||
    pub fn context_id(mut self, context_id: &str) -> Self {
 | 
			
		||||
        self.context_id = context_id.to_string();
 | 
			
		||||
        self
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Set the payload (script content) for this job
 | 
			
		||||
    pub fn payload(mut self, payload: &str) -> Self {
 | 
			
		||||
        self.payload = payload.to_string();
 | 
			
		||||
        self
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Set the runner name for this job
 | 
			
		||||
    pub fn runner_name(mut self, runner_name: &str) -> Self {
 | 
			
		||||
        self.runner_name = runner_name.to_string();
 | 
			
		||||
        self
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Set the executor for this job
 | 
			
		||||
    pub fn executor(mut self, executor: &str) -> Self {
 | 
			
		||||
        self.executor = executor.to_string();
 | 
			
		||||
        self
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Set the timeout for job execution
 | 
			
		||||
    pub fn timeout(mut self, timeout: Duration) -> Self {
 | 
			
		||||
        self.timeout = timeout;
 | 
			
		||||
        self
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Set a single environment variable
 | 
			
		||||
    pub fn env_var(mut self, key: &str, value: &str) -> Self {
 | 
			
		||||
        self.env_vars.insert(key.to_string(), value.to_string());
 | 
			
		||||
        self
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Set multiple environment variables from a HashMap
 | 
			
		||||
    pub fn env_vars(mut self, env_vars: HashMap<String, String>) -> Self {
 | 
			
		||||
        self.env_vars = env_vars;
 | 
			
		||||
        self
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Clear all environment variables
 | 
			
		||||
    pub fn clear_env_vars(mut self) -> Self {
 | 
			
		||||
        self.env_vars.clear();
 | 
			
		||||
        self
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Build the job
 | 
			
		||||
    pub fn build(self) -> Result<Job, JobError> {
 | 
			
		||||
        if self.caller_id.is_empty() {
 | 
			
		||||
            return Err(JobError::InvalidData("caller_id is required".to_string()));
 | 
			
		||||
        }
 | 
			
		||||
        if self.context_id.is_empty() {
 | 
			
		||||
            return Err(JobError::InvalidData("context_id is required".to_string()));
 | 
			
		||||
        }
 | 
			
		||||
        if self.payload.is_empty() {
 | 
			
		||||
            return Err(JobError::InvalidData("payload is required".to_string()));
 | 
			
		||||
        }
 | 
			
		||||
        if self.runner_name.is_empty() {
 | 
			
		||||
            return Err(JobError::InvalidData("runner_name is required".to_string()));
 | 
			
		||||
        }
 | 
			
		||||
        if self.executor.is_empty() {
 | 
			
		||||
            return Err(JobError::InvalidData("executor is required".to_string()));
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        let mut job = Job::new(
 | 
			
		||||
            self.caller_id,
 | 
			
		||||
            self.context_id,
 | 
			
		||||
            self.payload,
 | 
			
		||||
            self.runner_name,
 | 
			
		||||
            self.executor,
 | 
			
		||||
        );
 | 
			
		||||
 | 
			
		||||
        job.timeout = self.timeout;
 | 
			
		||||
        job.env_vars = self.env_vars;
 | 
			
		||||
 | 
			
		||||
        Ok(job)
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl Default for JobBuilder {
 | 
			
		||||
    fn default() -> Self {
 | 
			
		||||
        Self::new()
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										21
									
								
								src/lib.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										21
									
								
								src/lib.rs
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,21 @@
 | 
			
		||||
//! Hero Supervisor - Actor management for the Hero ecosystem.
 | 
			
		||||
//! 
 | 
			
		||||
//! See README.md for detailed documentation and usage examples.
 | 
			
		||||
 | 
			
		||||
pub mod runner;
 | 
			
		||||
pub mod supervisor;
 | 
			
		||||
pub mod job;
 | 
			
		||||
pub mod client;
 | 
			
		||||
pub mod app;
 | 
			
		||||
 | 
			
		||||
// OpenRPC server module
 | 
			
		||||
pub mod openrpc;
 | 
			
		||||
 | 
			
		||||
// Re-export main types for convenience
 | 
			
		||||
pub use runner::{
 | 
			
		||||
    LogInfo, Runner, RunnerConfig, RunnerResult, RunnerStatus,
 | 
			
		||||
};
 | 
			
		||||
pub use sal_service_manager::{ProcessManager, SimpleProcessManager, TmuxProcessManager};
 | 
			
		||||
pub use supervisor::{Supervisor, SupervisorBuilder, ProcessManagerType};
 | 
			
		||||
pub use job::{Job, JobBuilder, JobStatus, JobError};
 | 
			
		||||
pub use app::SupervisorApp;
 | 
			
		||||
							
								
								
									
										829
									
								
								src/openrpc.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										829
									
								
								src/openrpc.rs
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,829 @@
 | 
			
		||||
//! OpenRPC server implementation.
 | 
			
		||||
 | 
			
		||||
use jsonrpsee::{
 | 
			
		||||
    core::{RpcResult, async_trait},
 | 
			
		||||
    proc_macros::rpc,
 | 
			
		||||
    server::{Server, ServerHandle},
 | 
			
		||||
    types::{ErrorObject, ErrorObjectOwned},
 | 
			
		||||
};
 | 
			
		||||
use tower_http::cors::{CorsLayer, Any};
 | 
			
		||||
 | 
			
		||||
use anyhow;
 | 
			
		||||
use log::{debug, info, error};
 | 
			
		||||
 | 
			
		||||
use crate::supervisor::Supervisor;
 | 
			
		||||
use crate::runner::{Runner, RunnerError};
 | 
			
		||||
use crate::job::Job;
 | 
			
		||||
use crate::ProcessManagerType;
 | 
			
		||||
use sal_service_manager::{ProcessStatus, LogInfo};
 | 
			
		||||
use serde::{Deserialize, Serialize};
 | 
			
		||||
 | 
			
		||||
use std::net::SocketAddr;
 | 
			
		||||
use std::path::PathBuf;
 | 
			
		||||
use std::sync::Arc;
 | 
			
		||||
use std::fs;
 | 
			
		||||
use tokio::sync::Mutex;
 | 
			
		||||
 | 
			
		||||
/// Load OpenRPC specification from docs/openrpc.json
 | 
			
		||||
fn load_openrpc_spec() -> Result<serde_json::Value, Box<dyn std::error::Error>> {
 | 
			
		||||
    // Try to find the openrpc.json file relative to the current working directory
 | 
			
		||||
    let possible_paths = [
 | 
			
		||||
        "docs/openrpc.json",
 | 
			
		||||
        "../docs/openrpc.json", 
 | 
			
		||||
        "../../docs/openrpc.json",
 | 
			
		||||
        "./supervisor/docs/openrpc.json",
 | 
			
		||||
    ];
 | 
			
		||||
    
 | 
			
		||||
    for path in &possible_paths {
 | 
			
		||||
        if let Ok(content) = fs::read_to_string(path) {
 | 
			
		||||
            match serde_json::from_str(&content) {
 | 
			
		||||
                Ok(spec) => {
 | 
			
		||||
                    debug!("Loaded OpenRPC specification from: {}", path);
 | 
			
		||||
                    return Ok(spec);
 | 
			
		||||
                }
 | 
			
		||||
                Err(e) => {
 | 
			
		||||
                    error!("Failed to parse OpenRPC JSON from {}: {}", path, e);
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    Err("Could not find or parse docs/openrpc.json".into())
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Helper function to convert RunnerError to RPC error
 | 
			
		||||
fn runner_error_to_rpc_error(err: RunnerError) -> ErrorObject<'static> {
 | 
			
		||||
    ErrorObject::owned(
 | 
			
		||||
        -32603, // Internal error code
 | 
			
		||||
        format!("Supervisor error: {}", err),
 | 
			
		||||
        None::<()>,
 | 
			
		||||
    )
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Helper function to create invalid params error
 | 
			
		||||
fn invalid_params_error(msg: &str) -> ErrorObject<'static> {
 | 
			
		||||
    ErrorObject::owned(
 | 
			
		||||
        -32602, // Invalid params error code
 | 
			
		||||
        format!("Invalid parameters: {}", msg),
 | 
			
		||||
        None::<()>,
 | 
			
		||||
    )
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Request parameters for registering a new runner
 | 
			
		||||
#[derive(Debug, Deserialize, Serialize)]
 | 
			
		||||
pub struct RegisterRunnerParams {
 | 
			
		||||
    pub secret: String,
 | 
			
		||||
    pub name: String,
 | 
			
		||||
    pub queue: String,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Request parameters for running a job
 | 
			
		||||
#[derive(Debug, Deserialize, Serialize)]
 | 
			
		||||
pub struct RunJobParams {
 | 
			
		||||
    pub secret: String,
 | 
			
		||||
    pub job: Job,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Request parameters for adding a new runner
 | 
			
		||||
#[derive(Debug, Deserialize, Serialize)]
 | 
			
		||||
pub struct AddRunnerParams {
 | 
			
		||||
    pub actor_id: String,
 | 
			
		||||
    pub binary_path: String,
 | 
			
		||||
    pub db_path: String,
 | 
			
		||||
    pub redis_url: String,
 | 
			
		||||
    pub process_manager_type: String, // "simple" or "tmux"
 | 
			
		||||
    pub tmux_session_name: Option<String>, // required if process_manager_type is "tmux"
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Request parameters for queuing a job
 | 
			
		||||
#[derive(Debug, Deserialize, Serialize)]
 | 
			
		||||
pub struct QueueJobParams {
 | 
			
		||||
    pub runner_name: String,
 | 
			
		||||
    pub job: Job,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Request parameters for queue and wait operation
 | 
			
		||||
#[derive(Debug, Deserialize, Serialize)]
 | 
			
		||||
pub struct QueueAndWaitParams {
 | 
			
		||||
    pub runner_name: String,
 | 
			
		||||
    pub job: Job,
 | 
			
		||||
    pub timeout_secs: u64,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Request parameters for getting runner logs
 | 
			
		||||
#[derive(Debug, Deserialize, Serialize)]
 | 
			
		||||
pub struct GetLogsParams {
 | 
			
		||||
    pub actor_id: String,
 | 
			
		||||
    pub lines: Option<usize>,
 | 
			
		||||
    pub follow: bool,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Request parameters for adding secrets
 | 
			
		||||
#[derive(Debug, Deserialize, Serialize)]
 | 
			
		||||
pub struct AddSecretParams {
 | 
			
		||||
    pub admin_secret: String,
 | 
			
		||||
    pub secret_type: String, // "admin", "user", or "register"
 | 
			
		||||
    pub secret_value: String,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Request parameters for removing secrets
 | 
			
		||||
#[derive(Debug, Deserialize, Serialize)]
 | 
			
		||||
pub struct RemoveSecretParams {
 | 
			
		||||
    pub admin_secret: String,
 | 
			
		||||
    pub secret_type: String, // "admin", "user", or "register"
 | 
			
		||||
    pub secret_value: String,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Request parameters for listing secrets
 | 
			
		||||
#[derive(Debug, Deserialize, Serialize)]
 | 
			
		||||
pub struct ListSecretsParams {
 | 
			
		||||
    pub admin_secret: String,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Serializable wrapper for ProcessStatus
 | 
			
		||||
#[derive(Debug, Serialize, Clone)]
 | 
			
		||||
pub enum ProcessStatusWrapper {
 | 
			
		||||
    Running,
 | 
			
		||||
    Stopped,
 | 
			
		||||
    Starting,
 | 
			
		||||
    Stopping,
 | 
			
		||||
    Error(String),
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl From<ProcessStatus> for ProcessStatusWrapper {
 | 
			
		||||
    fn from(status: ProcessStatus) -> Self {
 | 
			
		||||
        match status {
 | 
			
		||||
            ProcessStatus::Running => ProcessStatusWrapper::Running,
 | 
			
		||||
            ProcessStatus::Stopped => ProcessStatusWrapper::Stopped,
 | 
			
		||||
            ProcessStatus::Starting => ProcessStatusWrapper::Starting,
 | 
			
		||||
            ProcessStatus::Stopping => ProcessStatusWrapper::Stopping,
 | 
			
		||||
            ProcessStatus::Error(msg) => ProcessStatusWrapper::Error(msg),
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Note: RunnerStatus is just an alias for ProcessStatus, so we don't need a separate impl
 | 
			
		||||
 | 
			
		||||
/// Serializable wrapper for Runner
 | 
			
		||||
#[derive(Debug, Serialize, Clone)]
 | 
			
		||||
pub struct RunnerWrapper {
 | 
			
		||||
    pub id: String,
 | 
			
		||||
    pub name: String,
 | 
			
		||||
    pub command: String,
 | 
			
		||||
    pub redis_url: String,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl From<&Runner> for RunnerWrapper {
 | 
			
		||||
    fn from(runner: &Runner) -> Self {
 | 
			
		||||
        RunnerWrapper {
 | 
			
		||||
            id: runner.id.clone(),
 | 
			
		||||
            name: runner.name.clone(),
 | 
			
		||||
            command: runner.command.to_string_lossy().to_string(),
 | 
			
		||||
            redis_url: runner.redis_url.clone(),
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Serializable wrapper for LogInfo
 | 
			
		||||
#[derive(Debug, Serialize, Clone)]
 | 
			
		||||
pub struct LogInfoWrapper {
 | 
			
		||||
    pub timestamp: String,
 | 
			
		||||
    pub level: String,
 | 
			
		||||
    pub message: String,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl From<LogInfo> for LogInfoWrapper {
 | 
			
		||||
    fn from(log: LogInfo) -> Self {
 | 
			
		||||
        LogInfoWrapper {
 | 
			
		||||
            timestamp: log.timestamp,
 | 
			
		||||
            level: log.level,
 | 
			
		||||
            message: log.message,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl From<crate::runner::LogInfo> for LogInfoWrapper {
 | 
			
		||||
    fn from(log: crate::runner::LogInfo) -> Self {
 | 
			
		||||
        LogInfoWrapper {
 | 
			
		||||
            timestamp: log.timestamp,
 | 
			
		||||
            level: log.level,
 | 
			
		||||
            message: log.message,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Response for runner status queries
 | 
			
		||||
#[derive(Debug, Serialize, Clone)]
 | 
			
		||||
pub struct RunnerStatusResponse {
 | 
			
		||||
    pub actor_id: String,
 | 
			
		||||
    pub status: ProcessStatusWrapper,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Response for supervisor info
 | 
			
		||||
#[derive(Debug, Serialize, Clone)]
 | 
			
		||||
pub struct SupervisorInfoResponse {
 | 
			
		||||
    pub server_url: String,
 | 
			
		||||
    pub admin_secrets_count: usize,
 | 
			
		||||
    pub user_secrets_count: usize,
 | 
			
		||||
    pub register_secrets_count: usize,
 | 
			
		||||
    pub runners_count: usize,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// OpenRPC trait defining all supervisor methods
 | 
			
		||||
#[rpc(server)]
 | 
			
		||||
pub trait SupervisorRpc {
 | 
			
		||||
    /// Register a new runner with secret-based authentication
 | 
			
		||||
    #[method(name = "register_runner")]
 | 
			
		||||
    async fn register_runner(&self, params: RegisterRunnerParams) -> RpcResult<String>;
 | 
			
		||||
 | 
			
		||||
    /// Create a job (fire-and-forget, non-blocking)
 | 
			
		||||
    #[method(name = "create_job")]
 | 
			
		||||
    async fn create_job(&self, params: RunJobParams) -> RpcResult<String>;
 | 
			
		||||
 | 
			
		||||
    /// Run a job on the appropriate runner (blocking, returns result)
 | 
			
		||||
    #[method(name = "run_job")]
 | 
			
		||||
    async fn run_job(&self, params: RunJobParams) -> RpcResult<Option<String>>;
 | 
			
		||||
 | 
			
		||||
    /// Remove a runner from the supervisor
 | 
			
		||||
    #[method(name = "remove_runner")]
 | 
			
		||||
    async fn remove_runner(&self, actor_id: String) -> RpcResult<()>;
 | 
			
		||||
 | 
			
		||||
    /// List all runner IDs
 | 
			
		||||
    #[method(name = "list_runners")]
 | 
			
		||||
    async fn list_runners(&self) -> RpcResult<Vec<String>>;
 | 
			
		||||
 | 
			
		||||
    /// Start a specific runner
 | 
			
		||||
    #[method(name = "start_runner")]
 | 
			
		||||
    async fn start_runner(&self, actor_id: String) -> RpcResult<()>;
 | 
			
		||||
 | 
			
		||||
    /// Stop a specific runner
 | 
			
		||||
    #[method(name = "stop_runner")]
 | 
			
		||||
    async fn stop_runner(&self, actor_id: String, force: bool) -> RpcResult<()>;
 | 
			
		||||
 | 
			
		||||
    /// Get a specific runner by ID
 | 
			
		||||
    #[method(name = "get_runner")]
 | 
			
		||||
    async fn get_runner(&self, actor_id: String) -> RpcResult<RunnerWrapper>;
 | 
			
		||||
 | 
			
		||||
    /// Get the status of a specific runner
 | 
			
		||||
    #[method(name = "get_runner_status")]
 | 
			
		||||
    async fn get_runner_status(&self, actor_id: String) -> RpcResult<ProcessStatusWrapper>;
 | 
			
		||||
 | 
			
		||||
    /// Get logs for a specific runner
 | 
			
		||||
    #[method(name = "get_runner_logs")]
 | 
			
		||||
    async fn get_runner_logs(&self, params: GetLogsParams) -> RpcResult<Vec<LogInfoWrapper>>;
 | 
			
		||||
 | 
			
		||||
    /// Queue a job to a specific runner
 | 
			
		||||
    #[method(name = "queue_job_to_runner")]
 | 
			
		||||
    async fn queue_job_to_runner(&self, params: QueueJobParams) -> RpcResult<()>;
 | 
			
		||||
 | 
			
		||||
    /// List all job IDs from Redis
 | 
			
		||||
    #[method(name = "list_jobs")]
 | 
			
		||||
    async fn list_jobs(&self) -> RpcResult<Vec<String>>;
 | 
			
		||||
 | 
			
		||||
    /// Get a job by job ID
 | 
			
		||||
    #[method(name = "get_job")]
 | 
			
		||||
    async fn get_job(&self, job_id: String) -> RpcResult<Job>;
 | 
			
		||||
 | 
			
		||||
    /// Ping a runner (dispatch a ping job)
 | 
			
		||||
    #[method(name = "ping_runner")]
 | 
			
		||||
    async fn ping_runner(&self, runner_id: String) -> RpcResult<String>;
 | 
			
		||||
 | 
			
		||||
    /// Stop a job
 | 
			
		||||
    #[method(name = "stop_job")]
 | 
			
		||||
    async fn stop_job(&self, job_id: String) -> RpcResult<()>;
 | 
			
		||||
 | 
			
		||||
    /// Delete a job
 | 
			
		||||
    #[method(name = "delete_job")]
 | 
			
		||||
    async fn delete_job(&self, job_id: String) -> RpcResult<()>;
 | 
			
		||||
 | 
			
		||||
    /// Queue a job to a specific runner and wait for the result
 | 
			
		||||
    #[method(name = "queue_and_wait")]
 | 
			
		||||
    async fn queue_and_wait(&self, params: QueueAndWaitParams) -> RpcResult<Option<String>>;
 | 
			
		||||
 | 
			
		||||
    /// Get status of all runners
 | 
			
		||||
    #[method(name = "get_all_runner_status")]
 | 
			
		||||
    async fn get_all_runner_status(&self) -> RpcResult<Vec<RunnerStatusResponse>>;
 | 
			
		||||
 | 
			
		||||
    /// Start all runners
 | 
			
		||||
    #[method(name = "start_all")]
 | 
			
		||||
    async fn start_all(&self) -> RpcResult<Vec<(String, String)>>;
 | 
			
		||||
 | 
			
		||||
    /// Stop all runners
 | 
			
		||||
    #[method(name = "stop_all")]
 | 
			
		||||
    async fn stop_all(&self, force: bool) -> RpcResult<Vec<(String, String)>>;
 | 
			
		||||
 | 
			
		||||
    /// Get status of all runners (alternative format)
 | 
			
		||||
    #[method(name = "get_all_status")]
 | 
			
		||||
    async fn get_all_status(&self) -> RpcResult<Vec<(String, String)>>;
 | 
			
		||||
 | 
			
		||||
    /// Add a secret to the supervisor (admin, user, or register)
 | 
			
		||||
    #[method(name = "add_secret")]
 | 
			
		||||
    async fn add_secret(&self, params: AddSecretParams) -> RpcResult<()>;
 | 
			
		||||
 | 
			
		||||
    /// Remove a secret from the supervisor
 | 
			
		||||
    #[method(name = "remove_secret")]
 | 
			
		||||
    async fn remove_secret(&self, params: RemoveSecretParams) -> RpcResult<()>;
 | 
			
		||||
 | 
			
		||||
    /// List all secrets (returns counts only for security)
 | 
			
		||||
    #[method(name = "list_secrets")]
 | 
			
		||||
    async fn list_secrets(&self, params: ListSecretsParams) -> RpcResult<SupervisorInfoResponse>;
 | 
			
		||||
 | 
			
		||||
    /// List admin secrets (returns actual secret values)
 | 
			
		||||
    #[method(name = "list_admin_secrets")]
 | 
			
		||||
    async fn list_admin_secrets(&self, admin_secret: String) -> RpcResult<Vec<String>>;
 | 
			
		||||
 | 
			
		||||
    /// List user secrets (returns actual secret values)
 | 
			
		||||
    #[method(name = "list_user_secrets")]
 | 
			
		||||
    async fn list_user_secrets(&self, admin_secret: String) -> RpcResult<Vec<String>>;
 | 
			
		||||
 | 
			
		||||
    /// List register secrets (returns actual secret values)
 | 
			
		||||
    #[method(name = "list_register_secrets")]
 | 
			
		||||
    async fn list_register_secrets(&self, admin_secret: String) -> RpcResult<Vec<String>>;
 | 
			
		||||
 | 
			
		||||
    /// Get supervisor information and statistics
 | 
			
		||||
    #[method(name = "get_supervisor_info")]
 | 
			
		||||
    async fn get_supervisor_info(&self, admin_secret: String) -> RpcResult<SupervisorInfoResponse>;
 | 
			
		||||
    
 | 
			
		||||
    /// OpenRPC discovery method - returns the OpenRPC document describing this API
 | 
			
		||||
    #[method(name = "rpc.discover")]
 | 
			
		||||
    async fn rpc_discover(&self) -> RpcResult<serde_json::Value>;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Helper function to parse process manager type from string
 | 
			
		||||
fn parse_process_manager_type(pm_type: &str, session_name: Option<String>) -> Result<ProcessManagerType, ErrorObject<'static>> {
 | 
			
		||||
    match pm_type.to_lowercase().as_str() {
 | 
			
		||||
        "simple" => Ok(ProcessManagerType::Simple),
 | 
			
		||||
        "tmux" => {
 | 
			
		||||
            let session = session_name.unwrap_or_else(|| "default_session".to_string());
 | 
			
		||||
            Ok(ProcessManagerType::Tmux(session))
 | 
			
		||||
        },
 | 
			
		||||
        _ => Err(invalid_params_error(&format!(
 | 
			
		||||
            "Invalid process manager type: {}. Must be 'simple' or 'tmux'",
 | 
			
		||||
            pm_type
 | 
			
		||||
        ))),
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Direct RPC implementation on Arc<Mutex<Supervisor>>
 | 
			
		||||
/// This eliminates the need for a wrapper struct
 | 
			
		||||
#[async_trait]
 | 
			
		||||
impl SupervisorRpcServer for Arc<Mutex<Supervisor>> {
 | 
			
		||||
    async fn register_runner(&self, params: RegisterRunnerParams) -> RpcResult<String> {
 | 
			
		||||
        debug!("OpenRPC request: register_runner with params: {:?}", params);
 | 
			
		||||
        
 | 
			
		||||
        let mut supervisor = self.lock().await;
 | 
			
		||||
        supervisor
 | 
			
		||||
            .register_runner(¶ms.secret, ¶ms.name, ¶ms.queue)
 | 
			
		||||
            .await
 | 
			
		||||
            .map_err(runner_error_to_rpc_error)?;
 | 
			
		||||
        
 | 
			
		||||
        // Return the runner name that was registered
 | 
			
		||||
        Ok(params.name)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    async fn create_job(&self, params: RunJobParams) -> RpcResult<String> {
 | 
			
		||||
        debug!("OpenRPC request: create_job with params: {:?}", params);
 | 
			
		||||
        
 | 
			
		||||
        let mut supervisor = self.lock().await;
 | 
			
		||||
        let job_id = supervisor
 | 
			
		||||
            .create_job(¶ms.secret, params.job)
 | 
			
		||||
            .await
 | 
			
		||||
            .map_err(runner_error_to_rpc_error)?;
 | 
			
		||||
        
 | 
			
		||||
        Ok(job_id)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    async fn run_job(&self, params: RunJobParams) -> RpcResult<Option<String>> {
 | 
			
		||||
        debug!("OpenRPC request: run_job with params: {:?}", params);
 | 
			
		||||
        
 | 
			
		||||
        let mut supervisor = self.lock().await;
 | 
			
		||||
        supervisor
 | 
			
		||||
            .run_job(¶ms.secret, params.job)
 | 
			
		||||
            .await
 | 
			
		||||
            .map_err(runner_error_to_rpc_error)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    async fn remove_runner(&self, actor_id: String) -> RpcResult<()> {
 | 
			
		||||
        debug!("OpenRPC request: remove_runner with actor_id: {}", actor_id);
 | 
			
		||||
        let mut supervisor = self.lock().await;
 | 
			
		||||
        supervisor
 | 
			
		||||
            .remove_runner(&actor_id)
 | 
			
		||||
            .await
 | 
			
		||||
            .map_err(runner_error_to_rpc_error)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    async fn list_runners(&self) -> RpcResult<Vec<String>> {
 | 
			
		||||
        debug!("OpenRPC request: list_runners");
 | 
			
		||||
        let supervisor = self.lock().await;
 | 
			
		||||
        Ok(supervisor.list_runners().into_iter().map(|s| s.to_string()).collect())
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    async fn start_runner(&self, actor_id: String) -> RpcResult<()> {
 | 
			
		||||
        debug!("OpenRPC request: start_runner with actor_id: {}", actor_id);
 | 
			
		||||
        let mut supervisor = self.lock().await;
 | 
			
		||||
        supervisor
 | 
			
		||||
            .start_runner(&actor_id)
 | 
			
		||||
            .await
 | 
			
		||||
            .map_err(runner_error_to_rpc_error)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    async fn stop_runner(&self, actor_id: String, force: bool) -> RpcResult<()> {
 | 
			
		||||
        debug!("OpenRPC request: stop_runner with actor_id: {}, force: {}", actor_id, force);
 | 
			
		||||
        let mut supervisor = self.lock().await;
 | 
			
		||||
        supervisor
 | 
			
		||||
            .stop_runner(&actor_id, force)
 | 
			
		||||
            .await
 | 
			
		||||
            .map_err(runner_error_to_rpc_error)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    async fn get_runner(&self, actor_id: String) -> RpcResult<RunnerWrapper> {
 | 
			
		||||
        debug!("OpenRPC request: get_runner with actor_id: {}", actor_id);
 | 
			
		||||
        let supervisor = self.lock().await;
 | 
			
		||||
        match supervisor.get_runner(&actor_id) {
 | 
			
		||||
            Some(runner) => Ok(RunnerWrapper::from(runner)),
 | 
			
		||||
            None => Err(ErrorObjectOwned::owned(-32000, format!("Runner not found: {}", actor_id), None::<()>)),
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    async fn get_runner_status(&self, actor_id: String) -> RpcResult<ProcessStatusWrapper> {
 | 
			
		||||
        debug!("OpenRPC request: get_runner_status with actor_id: {}", actor_id);
 | 
			
		||||
        let supervisor = self.lock().await;
 | 
			
		||||
        let status = supervisor
 | 
			
		||||
            .get_runner_status(&actor_id)
 | 
			
		||||
            .await
 | 
			
		||||
            .map_err(runner_error_to_rpc_error)?;
 | 
			
		||||
        Ok(status.into())
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    async fn get_runner_logs(&self, params: GetLogsParams) -> RpcResult<Vec<LogInfoWrapper>> {
 | 
			
		||||
        debug!("OpenRPC request: get_runner_logs with params: {:?}", params);
 | 
			
		||||
        let supervisor = self.lock().await;
 | 
			
		||||
        let logs = supervisor
 | 
			
		||||
            .get_runner_logs(¶ms.actor_id, params.lines, params.follow)
 | 
			
		||||
            .await
 | 
			
		||||
            .map_err(runner_error_to_rpc_error)?;
 | 
			
		||||
        Ok(logs.into_iter().map(LogInfoWrapper::from).collect())
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    async fn queue_job_to_runner(&self, params: QueueJobParams) -> RpcResult<()> {
 | 
			
		||||
        debug!("OpenRPC request: queue_job_to_runner with params: {:?}", params);
 | 
			
		||||
        let mut supervisor = self.lock().await;
 | 
			
		||||
        supervisor
 | 
			
		||||
            .queue_job_to_runner(¶ms.runner_name, params.job)
 | 
			
		||||
            .await
 | 
			
		||||
            .map_err(runner_error_to_rpc_error)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    async fn list_jobs(&self) -> RpcResult<Vec<String>> {
 | 
			
		||||
        debug!("OpenRPC request: list_jobs");
 | 
			
		||||
        let supervisor = self.lock().await;
 | 
			
		||||
        supervisor
 | 
			
		||||
            .list_jobs()
 | 
			
		||||
            .await
 | 
			
		||||
            .map_err(runner_error_to_rpc_error)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    async fn get_job(&self, job_id: String) -> RpcResult<Job> {
 | 
			
		||||
        debug!("OpenRPC request: get_job with job_id: {}", job_id);
 | 
			
		||||
        let supervisor = self.lock().await;
 | 
			
		||||
        supervisor
 | 
			
		||||
            .get_job(&job_id)
 | 
			
		||||
            .await
 | 
			
		||||
            .map_err(runner_error_to_rpc_error)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    async fn ping_runner(&self, runner_id: String) -> RpcResult<String> {
 | 
			
		||||
        debug!("OpenRPC request: ping_runner with runner_id: {}", runner_id);
 | 
			
		||||
        let mut supervisor = self.lock().await;
 | 
			
		||||
        supervisor
 | 
			
		||||
            .ping_runner(&runner_id)
 | 
			
		||||
            .await
 | 
			
		||||
            .map_err(runner_error_to_rpc_error)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    async fn stop_job(&self, job_id: String) -> RpcResult<()> {
 | 
			
		||||
        debug!("OpenRPC request: stop_job with job_id: {}", job_id);
 | 
			
		||||
        let mut supervisor = self.lock().await;
 | 
			
		||||
        supervisor
 | 
			
		||||
            .stop_job(&job_id)
 | 
			
		||||
            .await
 | 
			
		||||
            .map_err(runner_error_to_rpc_error)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    async fn delete_job(&self, job_id: String) -> RpcResult<()> {
 | 
			
		||||
        debug!("OpenRPC request: delete_job with job_id: {}", job_id);
 | 
			
		||||
        let mut supervisor = self.lock().await;
 | 
			
		||||
        supervisor
 | 
			
		||||
            .delete_job(&job_id)
 | 
			
		||||
            .await
 | 
			
		||||
            .map_err(runner_error_to_rpc_error)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    async fn queue_and_wait(&self, params: QueueAndWaitParams) -> RpcResult<Option<String>> {
 | 
			
		||||
        debug!("OpenRPC request: queue_and_wait with params: {:?}", params);
 | 
			
		||||
        let mut supervisor = self.lock().await;
 | 
			
		||||
        supervisor
 | 
			
		||||
            .queue_and_wait(¶ms.runner_name, params.job, params.timeout_secs)
 | 
			
		||||
            .await
 | 
			
		||||
            .map_err(runner_error_to_rpc_error)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    async fn get_all_runner_status(&self) -> RpcResult<Vec<RunnerStatusResponse>> {
 | 
			
		||||
        debug!("OpenRPC request: get_all_runner_status");
 | 
			
		||||
        let supervisor = self.lock().await;
 | 
			
		||||
        let statuses = supervisor.get_all_runner_status().await
 | 
			
		||||
            .map_err(runner_error_to_rpc_error)?;
 | 
			
		||||
        Ok(statuses
 | 
			
		||||
            .into_iter()
 | 
			
		||||
            .map(|(actor_id, status)| RunnerStatusResponse {
 | 
			
		||||
                actor_id,
 | 
			
		||||
                status: ProcessStatusWrapper::from(status),
 | 
			
		||||
            })
 | 
			
		||||
            .collect())
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    async fn start_all(&self) -> RpcResult<Vec<(String, String)>> {
 | 
			
		||||
        debug!("OpenRPC request: start_all");
 | 
			
		||||
        let mut supervisor = self.lock().await;
 | 
			
		||||
        let results = supervisor.start_all().await;
 | 
			
		||||
        Ok(results
 | 
			
		||||
            .into_iter()
 | 
			
		||||
            .map(|(actor_id, result)| {
 | 
			
		||||
                let status = match result {
 | 
			
		||||
                    Ok(_) => "Success".to_string(),
 | 
			
		||||
                    Err(e) => format!("Error: {}", e),
 | 
			
		||||
                };
 | 
			
		||||
                (actor_id, status)
 | 
			
		||||
            })
 | 
			
		||||
            .collect())
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    async fn stop_all(&self, force: bool) -> RpcResult<Vec<(String, String)>> {
 | 
			
		||||
        debug!("OpenRPC request: stop_all with force: {}", force);
 | 
			
		||||
        let mut supervisor = self.lock().await;
 | 
			
		||||
        let results = supervisor.stop_all(force).await;
 | 
			
		||||
        Ok(results
 | 
			
		||||
            .into_iter()
 | 
			
		||||
            .map(|(actor_id, result)| {
 | 
			
		||||
                let status = match result {
 | 
			
		||||
                    Ok(_) => "Success".to_string(),
 | 
			
		||||
                    Err(e) => format!("Error: {}", e),
 | 
			
		||||
                };
 | 
			
		||||
                (actor_id, status)
 | 
			
		||||
            })
 | 
			
		||||
            .collect())
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    async fn get_all_status(&self) -> RpcResult<Vec<(String, String)>> {
 | 
			
		||||
        debug!("OpenRPC request: get_all_status");
 | 
			
		||||
        let supervisor = self.lock().await;
 | 
			
		||||
        let statuses = supervisor.get_all_runner_status().await
 | 
			
		||||
            .map_err(runner_error_to_rpc_error)?;
 | 
			
		||||
        Ok(statuses
 | 
			
		||||
            .into_iter()
 | 
			
		||||
            .map(|(actor_id, status)| {
 | 
			
		||||
                let status_str = format!("{:?}", status);
 | 
			
		||||
                (actor_id, status_str)
 | 
			
		||||
            })
 | 
			
		||||
            .collect())
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    async fn add_secret(&self, params: AddSecretParams) -> RpcResult<()> {
 | 
			
		||||
        debug!("OpenRPC request: add_secret, type: {}", params.secret_type);
 | 
			
		||||
        let mut supervisor = self.lock().await;
 | 
			
		||||
        
 | 
			
		||||
        // Verify admin secret
 | 
			
		||||
        if !supervisor.has_admin_secret(¶ms.admin_secret) {
 | 
			
		||||
            return Err(ErrorObject::owned(-32602, "Invalid admin secret", None::<()>));
 | 
			
		||||
        }
 | 
			
		||||
        
 | 
			
		||||
        match params.secret_type.as_str() {
 | 
			
		||||
            "admin" => {
 | 
			
		||||
                supervisor.add_admin_secret(params.secret_value);
 | 
			
		||||
            }
 | 
			
		||||
            "user" => {
 | 
			
		||||
                supervisor.add_user_secret(params.secret_value);
 | 
			
		||||
            }
 | 
			
		||||
            "register" => {
 | 
			
		||||
                supervisor.add_register_secret(params.secret_value);
 | 
			
		||||
            }
 | 
			
		||||
            _ => {
 | 
			
		||||
                return Err(ErrorObject::owned(-32602, "Invalid secret type. Must be 'admin', 'user', or 'register'", None::<()>));
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        
 | 
			
		||||
        Ok(())
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    async fn remove_secret(&self, params: RemoveSecretParams) -> RpcResult<()> {
 | 
			
		||||
        debug!("OpenRPC request: remove_secret, type: {}", params.secret_type);
 | 
			
		||||
        let mut supervisor = self.lock().await;
 | 
			
		||||
        
 | 
			
		||||
        // Verify admin secret
 | 
			
		||||
        if !supervisor.has_admin_secret(¶ms.admin_secret) {
 | 
			
		||||
            return Err(ErrorObject::owned(-32602, "Invalid admin secret", None::<()>));
 | 
			
		||||
        }
 | 
			
		||||
        
 | 
			
		||||
        match params.secret_type.as_str() {
 | 
			
		||||
            "admin" => {
 | 
			
		||||
                supervisor.remove_admin_secret(¶ms.secret_value);
 | 
			
		||||
            }
 | 
			
		||||
            "user" => {
 | 
			
		||||
                supervisor.remove_user_secret(¶ms.secret_value);
 | 
			
		||||
            }
 | 
			
		||||
            "register" => {
 | 
			
		||||
                supervisor.remove_register_secret(¶ms.secret_value);
 | 
			
		||||
            }
 | 
			
		||||
            _ => {
 | 
			
		||||
                return Err(ErrorObject::owned(-32602, "Invalid secret type. Must be 'admin', 'user', or 'register'", None::<()>));
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        
 | 
			
		||||
        Ok(())
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    async fn list_secrets(&self, params: ListSecretsParams) -> RpcResult<SupervisorInfoResponse> {
 | 
			
		||||
        debug!("OpenRPC request: list_secrets");
 | 
			
		||||
        let supervisor = self.lock().await;
 | 
			
		||||
        
 | 
			
		||||
        // Verify admin secret
 | 
			
		||||
        if !supervisor.has_admin_secret(¶ms.admin_secret) {
 | 
			
		||||
            return Err(ErrorObject::owned(-32602, "Invalid admin secret", None::<()>));
 | 
			
		||||
        }
 | 
			
		||||
        
 | 
			
		||||
        Ok(SupervisorInfoResponse {
 | 
			
		||||
            server_url: "http://127.0.0.1:3030".to_string(),
 | 
			
		||||
            admin_secrets_count: supervisor.admin_secrets_count(),
 | 
			
		||||
            user_secrets_count: supervisor.user_secrets_count(),
 | 
			
		||||
            register_secrets_count: supervisor.register_secrets_count(),
 | 
			
		||||
            runners_count: supervisor.runners_count(),
 | 
			
		||||
        })
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    async fn list_admin_secrets(&self, admin_secret: String) -> RpcResult<Vec<String>> {
 | 
			
		||||
        debug!("OpenRPC request: list_admin_secrets");
 | 
			
		||||
        let supervisor = self.lock().await;
 | 
			
		||||
        
 | 
			
		||||
        // Verify admin secret
 | 
			
		||||
        if !supervisor.has_admin_secret(&admin_secret) {
 | 
			
		||||
            return Err(ErrorObject::owned(-32602, "Invalid admin secret", None::<()>));
 | 
			
		||||
        }
 | 
			
		||||
        
 | 
			
		||||
        Ok(supervisor.get_admin_secrets())
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    async fn list_user_secrets(&self, admin_secret: String) -> RpcResult<Vec<String>> {
 | 
			
		||||
        debug!("OpenRPC request: list_user_secrets");
 | 
			
		||||
        let supervisor = self.lock().await;
 | 
			
		||||
        
 | 
			
		||||
        // Verify admin secret
 | 
			
		||||
        if !supervisor.has_admin_secret(&admin_secret) {
 | 
			
		||||
            return Err(ErrorObject::owned(-32602, "Invalid admin secret", None::<()>));
 | 
			
		||||
        }
 | 
			
		||||
        
 | 
			
		||||
        Ok(supervisor.get_user_secrets())
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    async fn list_register_secrets(&self, admin_secret: String) -> RpcResult<Vec<String>> {
 | 
			
		||||
        debug!("OpenRPC request: list_register_secrets");
 | 
			
		||||
        let supervisor = self.lock().await;
 | 
			
		||||
        
 | 
			
		||||
        // Verify admin secret
 | 
			
		||||
        if !supervisor.has_admin_secret(&admin_secret) {
 | 
			
		||||
            return Err(ErrorObject::owned(-32602, "Invalid admin secret", None::<()>));
 | 
			
		||||
        }
 | 
			
		||||
        
 | 
			
		||||
        Ok(supervisor.get_register_secrets())
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    async fn get_supervisor_info(&self, admin_secret: String) -> RpcResult<SupervisorInfoResponse> {
 | 
			
		||||
        debug!("OpenRPC request: get_supervisor_info");
 | 
			
		||||
        let supervisor = self.lock().await;
 | 
			
		||||
        
 | 
			
		||||
        // Verify admin secret
 | 
			
		||||
        if !supervisor.has_admin_secret(&admin_secret) {
 | 
			
		||||
            return Err(ErrorObject::owned(-32602, "Invalid admin secret", None::<()>));
 | 
			
		||||
        }
 | 
			
		||||
        
 | 
			
		||||
        Ok(SupervisorInfoResponse {
 | 
			
		||||
            server_url: "http://127.0.0.1:3030".to_string(),
 | 
			
		||||
            admin_secrets_count: supervisor.admin_secrets_count(),
 | 
			
		||||
            user_secrets_count: supervisor.user_secrets_count(),
 | 
			
		||||
            register_secrets_count: supervisor.register_secrets_count(),
 | 
			
		||||
            runners_count: supervisor.runners_count(),
 | 
			
		||||
        })
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    async fn rpc_discover(&self) -> RpcResult<serde_json::Value> {
 | 
			
		||||
        debug!("OpenRPC request: rpc.discover");
 | 
			
		||||
        
 | 
			
		||||
        // Read OpenRPC specification from docs/openrpc.json
 | 
			
		||||
        match load_openrpc_spec() {
 | 
			
		||||
            Ok(spec) => Ok(spec),
 | 
			
		||||
            Err(e) => {
 | 
			
		||||
                error!("Failed to load OpenRPC specification: {}", e);
 | 
			
		||||
                // Fallback to a minimal spec if file loading fails
 | 
			
		||||
                Ok(serde_json::json!({
 | 
			
		||||
                    "openrpc": "1.3.2",
 | 
			
		||||
                    "info": {
 | 
			
		||||
                        "title": "Hero Supervisor OpenRPC API",
 | 
			
		||||
                        "version": "1.0.0",
 | 
			
		||||
                        "description": "OpenRPC API for managing Hero Supervisor runners and jobs"
 | 
			
		||||
                    },
 | 
			
		||||
                    "methods": [],
 | 
			
		||||
                    "error": "Failed to load full specification"
 | 
			
		||||
                }))
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Start the OpenRPC server with a default supervisor
 | 
			
		||||
pub async fn start_server(addr: SocketAddr) -> anyhow::Result<ServerHandle> {
 | 
			
		||||
    let supervisor = Arc::new(Mutex::new(Supervisor::default()));
 | 
			
		||||
    start_server_with_supervisor(addr, supervisor).await
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Start the OpenRPC server with an existing supervisor instance
 | 
			
		||||
pub async fn start_server_with_supervisor(
 | 
			
		||||
    addr: SocketAddr,
 | 
			
		||||
    supervisor: Arc<Mutex<Supervisor>>,
 | 
			
		||||
) -> anyhow::Result<ServerHandle> {
 | 
			
		||||
    let server = Server::builder().build(addr).await?;
 | 
			
		||||
    let handle = server.start(supervisor.into_rpc());
 | 
			
		||||
    Ok(handle)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Start HTTP OpenRPC server (Unix socket support would require additional dependencies)
 | 
			
		||||
pub async fn start_http_openrpc_server(
 | 
			
		||||
    supervisor: Arc<Mutex<Supervisor>>,
 | 
			
		||||
    bind_address: &str,
 | 
			
		||||
    port: u16,
 | 
			
		||||
) -> anyhow::Result<ServerHandle> {
 | 
			
		||||
    let http_addr: SocketAddr = format!("{}:{}", bind_address, port).parse()?;
 | 
			
		||||
    
 | 
			
		||||
    // Configure CORS to allow requests from the admin UI
 | 
			
		||||
    let cors = CorsLayer::new()
 | 
			
		||||
        .allow_origin(Any)
 | 
			
		||||
        .allow_headers(Any)
 | 
			
		||||
        .allow_methods(Any);
 | 
			
		||||
    
 | 
			
		||||
    // Start HTTP server with CORS
 | 
			
		||||
    let http_server = Server::builder()
 | 
			
		||||
        .set_http_middleware(tower::ServiceBuilder::new().layer(cors))
 | 
			
		||||
        .build(http_addr)
 | 
			
		||||
        .await?;
 | 
			
		||||
    let http_handle = http_server.start(supervisor.into_rpc());
 | 
			
		||||
    
 | 
			
		||||
    info!("OpenRPC HTTP server running at http://{} with CORS enabled", http_addr);
 | 
			
		||||
    
 | 
			
		||||
    Ok(http_handle)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Simplified server startup function for supervisor binary
 | 
			
		||||
pub async fn start_openrpc_servers(
 | 
			
		||||
    supervisor: Arc<Mutex<Supervisor>>,
 | 
			
		||||
    bind_address: &str,
 | 
			
		||||
    port: u16,
 | 
			
		||||
) -> Result<(), Box<dyn std::error::Error>> {
 | 
			
		||||
    let bind_address = bind_address.to_string();
 | 
			
		||||
    tokio::spawn(async move {
 | 
			
		||||
        match start_http_openrpc_server(supervisor, &bind_address, port).await {
 | 
			
		||||
            Ok(http_handle) => {
 | 
			
		||||
                info!("OpenRPC server started successfully");
 | 
			
		||||
                // Keep the server running
 | 
			
		||||
                http_handle.stopped().await;
 | 
			
		||||
                error!("OpenRPC server stopped unexpectedly");
 | 
			
		||||
            }
 | 
			
		||||
            Err(e) => {
 | 
			
		||||
                error!("Failed to start OpenRPC server: {}", e);
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    });
 | 
			
		||||
    
 | 
			
		||||
    // Give the server a moment to start up
 | 
			
		||||
    tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
 | 
			
		||||
    
 | 
			
		||||
    Ok(())
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[cfg(test)]
 | 
			
		||||
mod tests {
 | 
			
		||||
    use super::*;
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    fn test_supervisor_rpc_creation() {
 | 
			
		||||
        let _rpc = SupervisorRpcImpl::new();
 | 
			
		||||
        // Just test that we can create the RPC implementation
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[cfg(feature = "openrpc")]
 | 
			
		||||
    #[test]
 | 
			
		||||
    fn test_process_manager_type_parsing() {
 | 
			
		||||
        assert!(SupervisorRpcImpl::parse_process_manager_type("simple").is_ok());
 | 
			
		||||
        assert!(SupervisorRpcImpl::parse_process_manager_type("tmux").is_ok());
 | 
			
		||||
        assert!(SupervisorRpcImpl::parse_process_manager_type("Simple").is_ok());
 | 
			
		||||
        assert!(SupervisorRpcImpl::parse_process_manager_type("TMUX").is_ok());
 | 
			
		||||
        assert!(SupervisorRpcImpl::parse_process_manager_type("invalid").is_err());
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										234
									
								
								src/runner.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										234
									
								
								src/runner.rs
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,234 @@
 | 
			
		||||
//! Runner implementation for actor process management.
 | 
			
		||||
 | 
			
		||||
use crate::job::{Job};
 | 
			
		||||
use log::{debug, info};
 | 
			
		||||
use redis::AsyncCommands;
 | 
			
		||||
use sal_service_manager::{ProcessManager, ProcessManagerError as ServiceProcessManagerError, ProcessStatus, ProcessConfig};
 | 
			
		||||
use std::path::PathBuf;
 | 
			
		||||
use std::sync::Arc;
 | 
			
		||||
use tokio::sync::Mutex;
 | 
			
		||||
 | 
			
		||||
/// Represents the current status of an actor/runner (alias for ProcessStatus)
 | 
			
		||||
pub type RunnerStatus = ProcessStatus;
 | 
			
		||||
 | 
			
		||||
/// Log information structure
 | 
			
		||||
#[derive(Debug, Clone)]
 | 
			
		||||
pub struct LogInfo {
 | 
			
		||||
    pub timestamp: String,
 | 
			
		||||
    pub level: String,
 | 
			
		||||
    pub message: String,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Runner configuration and state (merged from RunnerConfig)
 | 
			
		||||
#[derive(Debug, Clone)]
 | 
			
		||||
pub struct Runner {
 | 
			
		||||
    /// Unique identifier for the runner
 | 
			
		||||
    pub id: String,
 | 
			
		||||
    pub name: String,
 | 
			
		||||
    pub namespace: String,
 | 
			
		||||
    /// Path to the actor binary
 | 
			
		||||
    pub command: PathBuf, // Command to run runner by, used only if supervisor is used to run runners
 | 
			
		||||
    /// Redis URL for job queue
 | 
			
		||||
    pub redis_url: String,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl Runner {
 | 
			
		||||
    /// Create a new runner from configuration
 | 
			
		||||
    pub fn from_config(config: RunnerConfig) -> Self {
 | 
			
		||||
        Self {
 | 
			
		||||
            id: config.id,
 | 
			
		||||
            name: config.name,
 | 
			
		||||
            namespace: config.namespace,
 | 
			
		||||
            command: config.command,
 | 
			
		||||
            redis_url: config.redis_url,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Create a new runner with the given parameters
 | 
			
		||||
    pub fn new(
 | 
			
		||||
        id: String,
 | 
			
		||||
        name: String,
 | 
			
		||||
        namespace: String,
 | 
			
		||||
        command: PathBuf,
 | 
			
		||||
        redis_url: String,
 | 
			
		||||
    ) -> Self {
 | 
			
		||||
        Self {
 | 
			
		||||
            id,
 | 
			
		||||
            name,
 | 
			
		||||
            namespace,
 | 
			
		||||
            command,
 | 
			
		||||
            redis_url,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Get the queue key for this runner with the given namespace
 | 
			
		||||
    pub fn get_queue(&self) -> String {
 | 
			
		||||
        if self.namespace == "" {
 | 
			
		||||
            format!("runner:{}", self.name)
 | 
			
		||||
        } else {
 | 
			
		||||
            format!("{}:runner:{}", self.namespace, self.name)
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Result type for runner operations
 | 
			
		||||
pub type RunnerResult<T> = Result<T, RunnerError>;
 | 
			
		||||
 | 
			
		||||
/// Errors that can occur during runner operations
 | 
			
		||||
#[derive(Debug, thiserror::Error)]
 | 
			
		||||
pub enum RunnerError {
 | 
			
		||||
    #[error("Actor '{actor_id}' not found")]
 | 
			
		||||
    ActorNotFound { actor_id: String },
 | 
			
		||||
    
 | 
			
		||||
    #[error("Actor '{actor_id}' is already running")]
 | 
			
		||||
    ActorAlreadyRunning { actor_id: String },
 | 
			
		||||
    
 | 
			
		||||
    #[error("Actor '{actor_id}' is not running")]
 | 
			
		||||
    ActorNotRunning { actor_id: String },
 | 
			
		||||
    
 | 
			
		||||
    #[error("Failed to start actor '{actor_id}': {reason}")]
 | 
			
		||||
    StartupFailed { actor_id: String, reason: String },
 | 
			
		||||
    
 | 
			
		||||
    #[error("Failed to stop actor '{actor_id}': {reason}")]
 | 
			
		||||
    StopFailed { actor_id: String, reason: String },
 | 
			
		||||
    
 | 
			
		||||
    #[error("Timeout waiting for actor '{actor_id}' to start")]
 | 
			
		||||
    StartupTimeout { actor_id: String },
 | 
			
		||||
    
 | 
			
		||||
    #[error("Job queue error for actor '{actor_id}': {reason}")]
 | 
			
		||||
    QueueError { actor_id: String, reason: String },
 | 
			
		||||
    
 | 
			
		||||
    #[error("Process manager error: {source}")]
 | 
			
		||||
    ProcessManagerError {
 | 
			
		||||
        #[from]
 | 
			
		||||
        source: ServiceProcessManagerError,
 | 
			
		||||
    },
 | 
			
		||||
    
 | 
			
		||||
    #[error("Configuration error: {reason}")]
 | 
			
		||||
    ConfigError { reason: String },
 | 
			
		||||
    
 | 
			
		||||
    #[error("Invalid secret: {0}")]
 | 
			
		||||
    InvalidSecret(String),
 | 
			
		||||
    
 | 
			
		||||
    #[error("IO error: {source}")]
 | 
			
		||||
    IoError {
 | 
			
		||||
        #[from]
 | 
			
		||||
        source: std::io::Error,
 | 
			
		||||
    },
 | 
			
		||||
    
 | 
			
		||||
    #[error("Redis error: {source}")]
 | 
			
		||||
    RedisError {
 | 
			
		||||
        #[from]
 | 
			
		||||
        source: redis::RedisError,
 | 
			
		||||
    },
 | 
			
		||||
    
 | 
			
		||||
    #[error("Job error: {source}")]
 | 
			
		||||
    JobError {
 | 
			
		||||
        #[from]
 | 
			
		||||
        source: crate::JobError,
 | 
			
		||||
    },
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Convert Runner to ProcessConfig
 | 
			
		||||
pub fn runner_to_process_config(config: &Runner) -> ProcessConfig {
 | 
			
		||||
    ProcessConfig::new(config.id.clone(), config.command.clone())
 | 
			
		||||
        .with_arg("--id".to_string())
 | 
			
		||||
        .with_arg(config.id.clone())
 | 
			
		||||
        .with_arg("--redis-url".to_string())
 | 
			
		||||
        .with_arg(config.redis_url.clone())
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Type alias for backward compatibility
 | 
			
		||||
pub type RunnerConfig = Runner;
 | 
			
		||||
 | 
			
		||||
#[cfg(test)]
 | 
			
		||||
mod tests {
 | 
			
		||||
    use super::*;
 | 
			
		||||
    use sal_service_manager::{ProcessManagerError, SimpleProcessManager};
 | 
			
		||||
    use std::collections::HashMap;
 | 
			
		||||
 | 
			
		||||
    #[derive(Debug)]
 | 
			
		||||
    struct MockProcessManager {
 | 
			
		||||
        processes: HashMap<String, ProcessStatus>,
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    impl MockProcessManager {
 | 
			
		||||
        fn new() -> Self {
 | 
			
		||||
            Self {
 | 
			
		||||
                processes: HashMap::new(),
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[async_trait::async_trait]
 | 
			
		||||
    impl ProcessManager for MockProcessManager {
 | 
			
		||||
        async fn start_process(&mut self, config: &ProcessConfig) -> Result<(), ProcessManagerError> {
 | 
			
		||||
            self.processes.insert(config.id.clone(), ProcessStatus::Running);
 | 
			
		||||
            Ok(())
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        async fn stop_process(&mut self, process_id: &str, _force: bool) -> Result<(), ProcessManagerError> {
 | 
			
		||||
            self.processes.insert(process_id.to_string(), ProcessStatus::Stopped);
 | 
			
		||||
            Ok(())
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        async fn process_status(&self, process_id: &str) -> Result<ProcessStatus, ProcessManagerError> {
 | 
			
		||||
            Ok(self.processes.get(process_id).cloned().unwrap_or(ProcessStatus::Stopped))
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        async fn process_logs(&self, _process_id: &str, _lines: Option<usize>, _follow: bool) -> Result<Vec<LogInfo>, ProcessManagerError> {
 | 
			
		||||
            Ok(vec![])
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        async fn health_check(&self) -> Result<(), ProcessManagerError> {
 | 
			
		||||
            Ok(())
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        async fn list_processes(&self) -> Result<Vec<String>, ProcessManagerError> {
 | 
			
		||||
            Ok(self.processes.keys().cloned().collect())
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    fn test_runner_creation() {
 | 
			
		||||
        let runner = Runner::new(
 | 
			
		||||
            "test_actor".to_string(),
 | 
			
		||||
            "test_runner".to_string(),
 | 
			
		||||
            "".to_string(),
 | 
			
		||||
            PathBuf::from("/path/to/binary"),
 | 
			
		||||
            "redis://localhost:6379".to_string(),
 | 
			
		||||
        );
 | 
			
		||||
 | 
			
		||||
        assert_eq!(runner.id, "test_actor");
 | 
			
		||||
        assert_eq!(runner.name, "test_runner");
 | 
			
		||||
        assert_eq!(runner.command, PathBuf::from("/path/to/binary"));
 | 
			
		||||
        assert_eq!(runner.redis_url, "redis://localhost:6379");
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    fn test_runner_get_queue() {
 | 
			
		||||
        let runner = Runner::new(
 | 
			
		||||
            "test_actor".to_string(),
 | 
			
		||||
            "test_runner".to_string(),
 | 
			
		||||
            "".to_string(),
 | 
			
		||||
            PathBuf::from("/path/to/binary"),
 | 
			
		||||
            "redis://localhost:6379".to_string(),
 | 
			
		||||
        );
 | 
			
		||||
 | 
			
		||||
        let queue_key = runner.get_queue();
 | 
			
		||||
        assert_eq!(queue_key, "runner:test_runner");
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    fn test_runner_error_types() {
 | 
			
		||||
        let error = RunnerError::ActorNotFound {
 | 
			
		||||
            actor_id: "test".to_string(),
 | 
			
		||||
        };
 | 
			
		||||
        assert!(error.to_string().contains("test"));
 | 
			
		||||
 | 
			
		||||
        let error = RunnerError::ActorAlreadyRunning {
 | 
			
		||||
            actor_id: "test".to_string(),
 | 
			
		||||
        };
 | 
			
		||||
        assert!(error.to_string().contains("already running"));
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										777
									
								
								src/supervisor.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										777
									
								
								src/supervisor.rs
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,777 @@
 | 
			
		||||
//! Main supervisor implementation for managing multiple actor runners.
 | 
			
		||||
 | 
			
		||||
use chrono::Utc;
 | 
			
		||||
use sal_service_manager::{ProcessManager, SimpleProcessManager, TmuxProcessManager};
 | 
			
		||||
use std::collections::HashMap;
 | 
			
		||||
use std::path::PathBuf;
 | 
			
		||||
use std::sync::Arc;
 | 
			
		||||
use tokio::sync::Mutex;
 | 
			
		||||
 | 
			
		||||
use crate::{client::{Client, ClientBuilder}, job::JobStatus, runner::{LogInfo, Runner, RunnerConfig, RunnerError, RunnerResult, RunnerStatus}, JobError};
 | 
			
		||||
use crate::{job::Job};
 | 
			
		||||
 | 
			
		||||
#[cfg(feature = "admin")]
 | 
			
		||||
use supervisor_admin_server::{AdminSupervisor, RunnerConfigInfo, JobInfo};
 | 
			
		||||
 | 
			
		||||
/// Process manager type for a runner
 | 
			
		||||
#[derive(Debug, Clone)]
 | 
			
		||||
pub enum ProcessManagerType {
 | 
			
		||||
    /// Simple process manager for direct process spawning
 | 
			
		||||
    Simple,
 | 
			
		||||
    /// Tmux process manager for session-based management
 | 
			
		||||
    Tmux(String), // session name
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Main supervisor that manages multiple runners
 | 
			
		||||
#[derive(Clone)]
 | 
			
		||||
pub struct Supervisor {
 | 
			
		||||
    /// Map of runner name to runner configuration
 | 
			
		||||
    runners: HashMap<String, Runner>,
 | 
			
		||||
    /// Shared process manager for all runners
 | 
			
		||||
    process_manager: Arc<Mutex<dyn ProcessManager>>,
 | 
			
		||||
    /// Shared Redis client for all runners
 | 
			
		||||
    redis_client: redis::Client,
 | 
			
		||||
    /// Namespace for queue keys
 | 
			
		||||
    namespace: String,
 | 
			
		||||
    /// Admin secrets for full access
 | 
			
		||||
    admin_secrets: Vec<String>,
 | 
			
		||||
    /// User secrets for limited access
 | 
			
		||||
    user_secrets: Vec<String>,
 | 
			
		||||
    /// Register secrets for runner registration
 | 
			
		||||
    register_secrets: Vec<String>,
 | 
			
		||||
    client: Client,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub struct SupervisorBuilder {
 | 
			
		||||
    /// Map of runner name to runner configuration
 | 
			
		||||
    runners: HashMap<String, Runner>,
 | 
			
		||||
    /// Redis URL for connection
 | 
			
		||||
    redis_url: String,
 | 
			
		||||
    /// Process manager type
 | 
			
		||||
    process_manager_type: ProcessManagerType,
 | 
			
		||||
    /// Namespace for queue keys
 | 
			
		||||
    namespace: String,
 | 
			
		||||
    /// Admin secrets for full access
 | 
			
		||||
    admin_secrets: Vec<String>,
 | 
			
		||||
    /// User secrets for limited access
 | 
			
		||||
    user_secrets: Vec<String>,
 | 
			
		||||
    /// Register secrets for runner registration
 | 
			
		||||
    register_secrets: Vec<String>,
 | 
			
		||||
    client_builder: ClientBuilder,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl SupervisorBuilder {
 | 
			
		||||
    /// Create a new supervisor builder
 | 
			
		||||
    pub fn new() -> Self {
 | 
			
		||||
        Self {
 | 
			
		||||
            runners: HashMap::new(),
 | 
			
		||||
            redis_url: "redis://localhost:6379".to_string(),
 | 
			
		||||
            process_manager_type: ProcessManagerType::Simple,
 | 
			
		||||
            namespace: "".to_string(),
 | 
			
		||||
            admin_secrets: Vec::new(),
 | 
			
		||||
            user_secrets: Vec::new(),
 | 
			
		||||
            register_secrets: Vec::new(),
 | 
			
		||||
            client_builder: ClientBuilder::new(),
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Set the Redis URL
 | 
			
		||||
    pub fn redis_url<S: Into<String>>(mut self, url: S) -> Self {
 | 
			
		||||
        let url_string = url.into();
 | 
			
		||||
        self.redis_url = url_string.clone();
 | 
			
		||||
        self.client_builder = self.client_builder.redis_url(url_string);
 | 
			
		||||
        self
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Set the process manager type
 | 
			
		||||
    pub fn process_manager(mut self, pm_type: ProcessManagerType) -> Self {
 | 
			
		||||
        self.process_manager_type = pm_type;
 | 
			
		||||
        self
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Set the namespace for queue keys
 | 
			
		||||
    pub fn namespace<S: Into<String>>(mut self, namespace: S) -> Self {
 | 
			
		||||
        let namespace_string = namespace.into();
 | 
			
		||||
        self.namespace = namespace_string.clone();
 | 
			
		||||
        self.client_builder = self.client_builder.namespace(namespace_string);
 | 
			
		||||
        self
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Add an admin secret
 | 
			
		||||
    pub fn add_admin_secret<S: Into<String>>(mut self, secret: S) -> Self {
 | 
			
		||||
        self.admin_secrets.push(secret.into());
 | 
			
		||||
        self
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Add multiple admin secrets
 | 
			
		||||
    pub fn admin_secrets<I, S>(mut self, secrets: I) -> Self 
 | 
			
		||||
    where
 | 
			
		||||
        I: IntoIterator<Item = S>,
 | 
			
		||||
        S: Into<String>,
 | 
			
		||||
    {
 | 
			
		||||
        self.admin_secrets.extend(secrets.into_iter().map(|s| s.into()));
 | 
			
		||||
        self
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Add a user secret
 | 
			
		||||
    pub fn add_user_secret<S: Into<String>>(mut self, secret: S) -> Self {
 | 
			
		||||
        self.user_secrets.push(secret.into());
 | 
			
		||||
        self
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Add multiple user secrets
 | 
			
		||||
    pub fn user_secrets<I, S>(mut self, secrets: I) -> Self 
 | 
			
		||||
    where
 | 
			
		||||
        I: IntoIterator<Item = S>,
 | 
			
		||||
        S: Into<String>,
 | 
			
		||||
    {
 | 
			
		||||
        self.user_secrets.extend(secrets.into_iter().map(|s| s.into()));
 | 
			
		||||
        self
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Add a register secret
 | 
			
		||||
    pub fn add_register_secret<S: Into<String>>(mut self, secret: S) -> Self {
 | 
			
		||||
        self.register_secrets.push(secret.into());
 | 
			
		||||
        self
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Add multiple register secrets
 | 
			
		||||
    pub fn register_secrets<I, S>(mut self, secrets: I) -> Self 
 | 
			
		||||
    where
 | 
			
		||||
        I: IntoIterator<Item = S>,
 | 
			
		||||
        S: Into<String>,
 | 
			
		||||
    {
 | 
			
		||||
        self.register_secrets.extend(secrets.into_iter().map(|s| s.into()));
 | 
			
		||||
        self
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Add a runner to the supervisor
 | 
			
		||||
    pub fn add_runner(mut self, runner: Runner) -> Self {
 | 
			
		||||
        self.runners.insert(runner.id.clone(), runner);
 | 
			
		||||
        self
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Build the supervisor
 | 
			
		||||
    pub async fn build(self) -> RunnerResult<Supervisor> {
 | 
			
		||||
        // Create process manager based on type
 | 
			
		||||
        let process_manager: Arc<Mutex<dyn ProcessManager>> = match &self.process_manager_type {
 | 
			
		||||
            ProcessManagerType::Simple => {
 | 
			
		||||
                Arc::new(Mutex::new(SimpleProcessManager::new()))
 | 
			
		||||
            }
 | 
			
		||||
            ProcessManagerType::Tmux(session_name) => {
 | 
			
		||||
                Arc::new(Mutex::new(TmuxProcessManager::new(session_name.clone())))
 | 
			
		||||
            }
 | 
			
		||||
        };
 | 
			
		||||
 | 
			
		||||
        // Create Redis client
 | 
			
		||||
        let redis_client = redis::Client::open(self.redis_url.as_str())
 | 
			
		||||
            .map_err(|e| RunnerError::ConfigError {
 | 
			
		||||
                reason: format!("Invalid Redis URL: {}", e),
 | 
			
		||||
            })?;
 | 
			
		||||
 | 
			
		||||
        Ok(Supervisor {
 | 
			
		||||
            client: self.client_builder.build().await.unwrap(),
 | 
			
		||||
            runners: self.runners,
 | 
			
		||||
            process_manager,
 | 
			
		||||
            redis_client,
 | 
			
		||||
            namespace: self.namespace,
 | 
			
		||||
            admin_secrets: self.admin_secrets,
 | 
			
		||||
            user_secrets: self.user_secrets,
 | 
			
		||||
            register_secrets: self.register_secrets,
 | 
			
		||||
        })
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl Supervisor {
 | 
			
		||||
    /// Create a new supervisor builder
 | 
			
		||||
    pub fn builder() -> SupervisorBuilder {
 | 
			
		||||
        SupervisorBuilder::new()
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Add a new runner to the supervisor
 | 
			
		||||
    pub async fn add_runner(
 | 
			
		||||
        &mut self,
 | 
			
		||||
        config: RunnerConfig,
 | 
			
		||||
    ) -> RunnerResult<()> {
 | 
			
		||||
        // Runner is now just the config
 | 
			
		||||
        let runner = Runner::from_config(config.clone());
 | 
			
		||||
        
 | 
			
		||||
        self.runners.insert(config.id.clone(), runner);
 | 
			
		||||
        Ok(())
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Register a new runner with secret-based authentication
 | 
			
		||||
    pub async fn register_runner(&mut self, secret: &str, name: &str, queue: &str) -> RunnerResult<()> {
 | 
			
		||||
        // Check if the secret is valid (admin or register secret)
 | 
			
		||||
        if !self.admin_secrets.contains(&secret.to_string()) && 
 | 
			
		||||
           !self.register_secrets.contains(&secret.to_string()) {
 | 
			
		||||
            return Err(RunnerError::InvalidSecret("Invalid secret for runner registration".to_string()));
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // Create a basic runner config for the named runner
 | 
			
		||||
        let config = RunnerConfig {
 | 
			
		||||
            id: name.to_string(), // Use the provided name as actor_id
 | 
			
		||||
            name: name.to_string(), // Use the provided name as actor_id
 | 
			
		||||
            namespace: self.namespace.clone(),
 | 
			
		||||
            command: PathBuf::from("/tmp/mock_runner"), // Default path
 | 
			
		||||
            redis_url: "redis://localhost:6379".to_string(),
 | 
			
		||||
        };
 | 
			
		||||
 | 
			
		||||
        // Add the runner using existing logic
 | 
			
		||||
        self.add_runner(config).await
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Create a job (fire-and-forget, non-blocking) with secret-based authentication
 | 
			
		||||
    pub async fn create_job(&mut self, secret: &str, job: crate::job::Job) -> RunnerResult<String> {
 | 
			
		||||
        // Check if the secret is valid (admin or user secret)
 | 
			
		||||
        if !self.admin_secrets.contains(&secret.to_string()) && 
 | 
			
		||||
           !self.user_secrets.contains(&secret.to_string()) {
 | 
			
		||||
            return Err(RunnerError::InvalidSecret("Invalid secret for job creation".to_string()));
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // Find the runner by name
 | 
			
		||||
        let runner_name = job.runner_name.clone();
 | 
			
		||||
        let job_id = job.id.clone(); // Store job ID before moving job
 | 
			
		||||
        if let Some(_runner) = self.runners.get(&runner_name) {
 | 
			
		||||
            // Use the supervisor's queue_job_to_runner method (fire-and-forget)
 | 
			
		||||
            self.queue_job_to_runner(&runner_name, job).await?;
 | 
			
		||||
            Ok(job_id) // Return the job ID immediately
 | 
			
		||||
        } else {
 | 
			
		||||
            Err(RunnerError::ActorNotFound {
 | 
			
		||||
                actor_id: job.runner_name.clone(),
 | 
			
		||||
            })
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Run a job on the appropriate runner with secret-based authentication
 | 
			
		||||
    /// This is a synchronous operation that queues the job, waits for the result, and returns it
 | 
			
		||||
    pub async fn run_job(&mut self, secret: &str, job: crate::job::Job) -> RunnerResult<Option<String>> {
 | 
			
		||||
        // Check if the secret is valid (admin or user secret)
 | 
			
		||||
        if !self.admin_secrets.contains(&secret.to_string()) && 
 | 
			
		||||
           !self.user_secrets.contains(&secret.to_string()) {
 | 
			
		||||
            return Err(RunnerError::InvalidSecret("Invalid secret for job execution".to_string()));
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // Find the runner by name
 | 
			
		||||
        let runner_name = job.runner_name.clone();
 | 
			
		||||
        if let Some(_runner) = self.runners.get(&runner_name) {
 | 
			
		||||
            // Use the synchronous queue_and_wait method with a reasonable timeout (30 seconds)
 | 
			
		||||
            self.queue_and_wait(&runner_name, job, 30).await
 | 
			
		||||
        } else {
 | 
			
		||||
            Err(RunnerError::ActorNotFound {
 | 
			
		||||
                actor_id: job.runner_name.clone(),
 | 
			
		||||
            })
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Remove a runner from the supervisor
 | 
			
		||||
    pub async fn remove_runner(&mut self, actor_id: &str) -> RunnerResult<()> {
 | 
			
		||||
        if let Some(_instance) = self.runners.remove(actor_id) {
 | 
			
		||||
            // Runner is removed from the map, which will drop the Arc
 | 
			
		||||
            // and eventually clean up the runner when no more references exist
 | 
			
		||||
        }
 | 
			
		||||
        Ok(())
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Get a runner by actor ID
 | 
			
		||||
    pub fn get_runner(&self, actor_id: &str) -> Option<&Runner> {
 | 
			
		||||
        self.runners.get(actor_id)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Get a job by job ID from Redis
 | 
			
		||||
    pub async fn get_job(&self, job_id: &str) -> RunnerResult<crate::job::Job> {
 | 
			
		||||
        use redis::AsyncCommands;
 | 
			
		||||
        
 | 
			
		||||
        let mut conn = self.redis_client.get_multiplexed_async_connection().await
 | 
			
		||||
            .map_err(|e| RunnerError::RedisError { 
 | 
			
		||||
                source: e 
 | 
			
		||||
            })?;
 | 
			
		||||
        
 | 
			
		||||
        self.client.load_job_from_redis(job_id).await
 | 
			
		||||
            .map_err(|e| RunnerError::QueueError {
 | 
			
		||||
                actor_id: job_id.to_string(),
 | 
			
		||||
                reason: format!("Failed to load job: {}", e),
 | 
			
		||||
            })
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Ping a runner by dispatching a ping job to its queue
 | 
			
		||||
    pub async fn ping_runner(&mut self, runner_id: &str) -> RunnerResult<String> {
 | 
			
		||||
        use crate::job::{Job, JobBuilder};
 | 
			
		||||
        use std::time::Duration;
 | 
			
		||||
        
 | 
			
		||||
        // Check if runner exists
 | 
			
		||||
        if !self.runners.contains_key(runner_id) {
 | 
			
		||||
            return Err(RunnerError::ActorNotFound {
 | 
			
		||||
                actor_id: runner_id.to_string(),
 | 
			
		||||
            });
 | 
			
		||||
        }
 | 
			
		||||
        
 | 
			
		||||
        // Create a ping job
 | 
			
		||||
        let ping_job = JobBuilder::new()
 | 
			
		||||
            .caller_id("supervisor_ping")
 | 
			
		||||
            .context_id("ping_context")
 | 
			
		||||
            .payload("ping")
 | 
			
		||||
            .runner_name(runner_id)
 | 
			
		||||
            .executor("ping")
 | 
			
		||||
            .timeout(Duration::from_secs(10))
 | 
			
		||||
            .build()
 | 
			
		||||
            .map_err(|e| RunnerError::QueueError {
 | 
			
		||||
                actor_id: runner_id.to_string(),
 | 
			
		||||
                reason: format!("Failed to create ping job: {}", e),
 | 
			
		||||
            })?;
 | 
			
		||||
        
 | 
			
		||||
        // Queue the ping job
 | 
			
		||||
        let job_id = ping_job.id.clone();
 | 
			
		||||
        self.queue_job_to_runner(runner_id, ping_job).await?;
 | 
			
		||||
        
 | 
			
		||||
        Ok(job_id)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Stop a job by ID
 | 
			
		||||
    pub async fn stop_job(&mut self, job_id: &str) -> RunnerResult<()> {
 | 
			
		||||
        use redis::AsyncCommands;
 | 
			
		||||
        
 | 
			
		||||
        // For now, we'll implement a basic stop by removing the job from Redis
 | 
			
		||||
        // In a more sophisticated implementation, you might send a stop signal to the runner
 | 
			
		||||
        let mut conn = self.redis_client.get_multiplexed_async_connection().await
 | 
			
		||||
            .map_err(|e| RunnerError::QueueError {
 | 
			
		||||
                actor_id: job_id.to_string(),
 | 
			
		||||
                reason: format!("Failed to connect to Redis: {}", e),
 | 
			
		||||
            })?;
 | 
			
		||||
        
 | 
			
		||||
        let job_key = self.client.set_job_status(job_id, JobStatus::Stopping).await;
 | 
			
		||||
        
 | 
			
		||||
        Ok(())
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Delete a job by ID
 | 
			
		||||
    pub async fn delete_job(&mut self, job_id: &str) -> RunnerResult<()> {
 | 
			
		||||
        self.client.delete_job(&job_id).await
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// List all managed runners
 | 
			
		||||
    pub fn list_runners(&self) -> Vec<&str> {
 | 
			
		||||
        self.runners.keys().map(|s| s.as_str()).collect()
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Start a specific runner
 | 
			
		||||
    pub async fn start_runner(&mut self, actor_id: &str) -> RunnerResult<()> {
 | 
			
		||||
        use crate::runner::runner_to_process_config;
 | 
			
		||||
        use log::info;
 | 
			
		||||
        
 | 
			
		||||
        if let Some(runner) = self.runners.get(actor_id) {
 | 
			
		||||
            info!("Starting actor {}", runner.id);
 | 
			
		||||
            
 | 
			
		||||
            let process_config = runner_to_process_config(runner);
 | 
			
		||||
            let mut pm = self.process_manager.lock().await;
 | 
			
		||||
            pm.start_process(&process_config).await?;
 | 
			
		||||
            
 | 
			
		||||
            info!("Successfully started actor {}", runner.id);
 | 
			
		||||
            Ok(())
 | 
			
		||||
        } else {
 | 
			
		||||
            Err(RunnerError::ActorNotFound {
 | 
			
		||||
                actor_id: actor_id.to_string(),
 | 
			
		||||
            })
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Stop a specific runner
 | 
			
		||||
    pub async fn stop_runner(&mut self, actor_id: &str, force: bool) -> RunnerResult<()> {
 | 
			
		||||
        use log::info;
 | 
			
		||||
        
 | 
			
		||||
        if let Some(runner) = self.runners.get(actor_id) {
 | 
			
		||||
            info!("Stopping actor {}", runner.id);
 | 
			
		||||
            
 | 
			
		||||
            let mut pm = self.process_manager.lock().await;
 | 
			
		||||
            pm.stop_process(&runner.id, force).await?;
 | 
			
		||||
            
 | 
			
		||||
            info!("Successfully stopped actor {}", runner.id);
 | 
			
		||||
            Ok(())
 | 
			
		||||
        } else {
 | 
			
		||||
            Err(RunnerError::ActorNotFound {
 | 
			
		||||
                actor_id: actor_id.to_string(),
 | 
			
		||||
            })
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Get status of a specific runner
 | 
			
		||||
    pub async fn get_runner_status(&self, actor_id: &str) -> RunnerResult<RunnerStatus> {
 | 
			
		||||
        if let Some(runner) = self.runners.get(actor_id) {
 | 
			
		||||
            let pm = self.process_manager.lock().await;
 | 
			
		||||
            let status = pm.process_status(&runner.id).await?;
 | 
			
		||||
            Ok(status)
 | 
			
		||||
        } else {
 | 
			
		||||
            Err(RunnerError::ActorNotFound {
 | 
			
		||||
                actor_id: actor_id.to_string(),
 | 
			
		||||
            })
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Get logs from a specific runner
 | 
			
		||||
    pub async fn get_runner_logs(
 | 
			
		||||
        &self,
 | 
			
		||||
        actor_id: &str,
 | 
			
		||||
        lines: Option<usize>,
 | 
			
		||||
        follow: bool,
 | 
			
		||||
    ) -> RunnerResult<Vec<LogInfo>> {
 | 
			
		||||
        if let Some(runner) = self.runners.get(actor_id) {
 | 
			
		||||
            let pm = self.process_manager.lock().await;
 | 
			
		||||
            let logs = pm.process_logs(&runner.id, lines, follow).await?;
 | 
			
		||||
            
 | 
			
		||||
            // Convert sal_service_manager::LogInfo to our LogInfo
 | 
			
		||||
            let converted_logs = logs.into_iter().map(|log| LogInfo {
 | 
			
		||||
                timestamp: log.timestamp,
 | 
			
		||||
                level: log.level,
 | 
			
		||||
                message: log.message,
 | 
			
		||||
            }).collect();
 | 
			
		||||
            
 | 
			
		||||
            Ok(converted_logs)
 | 
			
		||||
        } else {
 | 
			
		||||
            Err(RunnerError::ActorNotFound {
 | 
			
		||||
                actor_id: actor_id.to_string(),
 | 
			
		||||
            })
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Queue a job to a specific runner by name
 | 
			
		||||
    pub async fn queue_job_to_runner(&mut self, runner_name: &str, job: crate::job::Job) -> RunnerResult<()> {
 | 
			
		||||
        use redis::AsyncCommands;
 | 
			
		||||
        use log::{debug, info};
 | 
			
		||||
        
 | 
			
		||||
        if let Some(runner) = self.runners.get(runner_name) {
 | 
			
		||||
            debug!("Queuing job {} for actor {}", job.id, runner.id);
 | 
			
		||||
            
 | 
			
		||||
            let mut conn = self.redis_client.get_multiplexed_async_connection().await
 | 
			
		||||
                .map_err(|e| RunnerError::QueueError {
 | 
			
		||||
                    actor_id: runner.id.clone(),
 | 
			
		||||
                    reason: format!("Failed to connect to Redis: {}", e),
 | 
			
		||||
                })?;
 | 
			
		||||
 | 
			
		||||
            // Store the job in Redis first
 | 
			
		||||
            self.client.store_job_in_redis(&job).await
 | 
			
		||||
                .map_err(|e| RunnerError::QueueError {
 | 
			
		||||
                    actor_id: runner.id.clone(),
 | 
			
		||||
                    reason: format!("Failed to store job: {}", e),
 | 
			
		||||
                })?;
 | 
			
		||||
 | 
			
		||||
            // Use the runner's get_queue method with our namespace
 | 
			
		||||
            let queue_key = runner.get_queue();
 | 
			
		||||
            
 | 
			
		||||
            let _: () = conn.lpush(&queue_key, &job.id).await
 | 
			
		||||
                .map_err(|e| RunnerError::QueueError {
 | 
			
		||||
                    actor_id: runner.id.clone(),
 | 
			
		||||
                    reason: format!("Failed to queue job: {}", e),
 | 
			
		||||
                })?;
 | 
			
		||||
 | 
			
		||||
            info!("Job {} queued successfully for actor {} on queue {}", job.id, runner.id, queue_key);
 | 
			
		||||
            Ok(())
 | 
			
		||||
        } else {
 | 
			
		||||
            Err(RunnerError::ActorNotFound {
 | 
			
		||||
                actor_id: runner_name.to_string(),
 | 
			
		||||
            })
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Queue a job to a specific runner and wait for the result
 | 
			
		||||
    /// This implements the proper Hero job protocol:
 | 
			
		||||
    /// 1. Queue the job to the runner
 | 
			
		||||
    /// 2. BLPOP on the reply queue for this job
 | 
			
		||||
    /// 3. Get the job result from the job hash
 | 
			
		||||
    /// 4. Return the complete result
 | 
			
		||||
    pub async fn queue_and_wait(&mut self, runner_name: &str, job: crate::job::Job, timeout_secs: u64) -> RunnerResult<Option<String>> {
 | 
			
		||||
        use redis::AsyncCommands;
 | 
			
		||||
        
 | 
			
		||||
        let job_id = job.id.clone();
 | 
			
		||||
        
 | 
			
		||||
        // First queue the job
 | 
			
		||||
        self.queue_job_to_runner(runner_name, job).await?;
 | 
			
		||||
        
 | 
			
		||||
        // Get Redis connection from the supervisor (shared Redis client)
 | 
			
		||||
        let _runner = self.runners.get(runner_name)
 | 
			
		||||
            .ok_or_else(|| RunnerError::ActorNotFound {
 | 
			
		||||
                actor_id: runner_name.to_string(),
 | 
			
		||||
            })?;
 | 
			
		||||
        
 | 
			
		||||
        let mut conn = self.redis_client.get_multiplexed_async_connection().await
 | 
			
		||||
            .map_err(|e| RunnerError::RedisError { 
 | 
			
		||||
                source: e 
 | 
			
		||||
            })?;
 | 
			
		||||
        
 | 
			
		||||
        // BLPOP on the reply queue for this specific job
 | 
			
		||||
        let reply_key = self.client.job_reply_key(&job_id);
 | 
			
		||||
        let result: Option<Vec<String>> = conn.blpop(&reply_key, timeout_secs as f64).await
 | 
			
		||||
            .map_err(|e| RunnerError::RedisError { 
 | 
			
		||||
                source: e 
 | 
			
		||||
            })?;
 | 
			
		||||
        
 | 
			
		||||
        match result {
 | 
			
		||||
            Some(reply_data) => {
 | 
			
		||||
                // Reply received, now get the job result from the job hash
 | 
			
		||||
                let job_key = self.client.job_key(&job_id);
 | 
			
		||||
                let job_result: Option<String> = conn.hget(&job_key, "result").await
 | 
			
		||||
                    .map_err(|e| RunnerError::RedisError { 
 | 
			
		||||
                        source: e 
 | 
			
		||||
                    })?;
 | 
			
		||||
                
 | 
			
		||||
                Ok(job_result)
 | 
			
		||||
            }
 | 
			
		||||
            None => {
 | 
			
		||||
                // Timeout occurred
 | 
			
		||||
                Ok(None)
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Get status of all runners
 | 
			
		||||
    pub async fn get_all_runner_status(&self) -> RunnerResult<Vec<(String, RunnerStatus)>> {
 | 
			
		||||
        let mut results = Vec::new();
 | 
			
		||||
        
 | 
			
		||||
        for (actor_id, instance) in &self.runners {
 | 
			
		||||
            match self.get_runner_status(actor_id).await {
 | 
			
		||||
                Ok(status) => results.push((actor_id.clone(), status)),
 | 
			
		||||
                Err(_) => {
 | 
			
		||||
                    use sal_service_manager::ProcessStatus;
 | 
			
		||||
                    results.push((actor_id.clone(), ProcessStatus::Stopped));
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        
 | 
			
		||||
        Ok(results)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Start all runners
 | 
			
		||||
    pub async fn start_all(&mut self) -> Vec<(String, RunnerResult<()>)> {
 | 
			
		||||
        let mut results = Vec::new();
 | 
			
		||||
        let actor_ids: Vec<String> = self.runners.keys().cloned().collect();
 | 
			
		||||
        
 | 
			
		||||
        for actor_id in actor_ids {
 | 
			
		||||
            let result = self.start_runner(&actor_id).await;
 | 
			
		||||
            results.push((actor_id, result));
 | 
			
		||||
        }
 | 
			
		||||
        
 | 
			
		||||
        results
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Stop all runners
 | 
			
		||||
    pub async fn stop_all(&mut self, force: bool) -> Vec<(String, RunnerResult<()>)> {
 | 
			
		||||
        let mut results = Vec::new();
 | 
			
		||||
        let actor_ids: Vec<String> = self.runners.keys().cloned().collect();
 | 
			
		||||
        
 | 
			
		||||
        for actor_id in actor_ids {
 | 
			
		||||
            let result = self.stop_runner(&actor_id, force).await;
 | 
			
		||||
            results.push((actor_id, result));
 | 
			
		||||
        }
 | 
			
		||||
        
 | 
			
		||||
        results
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    /// Get status of all runners
 | 
			
		||||
    pub async fn get_all_status(&self) -> Vec<(String, RunnerResult<RunnerStatus>)> {
 | 
			
		||||
        let mut results = Vec::new();
 | 
			
		||||
        
 | 
			
		||||
        for (actor_id, _instance) in &self.runners {
 | 
			
		||||
            let result = self.get_runner_status(actor_id).await;
 | 
			
		||||
            results.push((actor_id.clone(), result));
 | 
			
		||||
        }
 | 
			
		||||
        
 | 
			
		||||
        results
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Add an admin secret
 | 
			
		||||
    pub fn add_admin_secret(&mut self, secret: String) {
 | 
			
		||||
        if !self.admin_secrets.contains(&secret) {
 | 
			
		||||
            self.admin_secrets.push(secret);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Remove an admin secret
 | 
			
		||||
    pub fn remove_admin_secret(&mut self, secret: &str) -> bool {
 | 
			
		||||
        if let Some(pos) = self.admin_secrets.iter().position(|x| x == secret) {
 | 
			
		||||
            self.admin_secrets.remove(pos);
 | 
			
		||||
            true
 | 
			
		||||
        } else {
 | 
			
		||||
            false
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Check if admin secret exists
 | 
			
		||||
    pub fn has_admin_secret(&self, secret: &str) -> bool {
 | 
			
		||||
        self.admin_secrets.contains(&secret.to_string())
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Get admin secrets count
 | 
			
		||||
    pub fn admin_secrets_count(&self) -> usize {
 | 
			
		||||
        self.admin_secrets.len()
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Add a user secret
 | 
			
		||||
    pub fn add_user_secret(&mut self, secret: String) {
 | 
			
		||||
        if !self.user_secrets.contains(&secret) {
 | 
			
		||||
            self.user_secrets.push(secret);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Remove a user secret
 | 
			
		||||
    pub fn remove_user_secret(&mut self, secret: &str) -> bool {
 | 
			
		||||
        if let Some(pos) = self.user_secrets.iter().position(|x| x == secret) {
 | 
			
		||||
            self.user_secrets.remove(pos);
 | 
			
		||||
            true
 | 
			
		||||
        } else {
 | 
			
		||||
            false
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Check if user secret exists
 | 
			
		||||
    pub fn has_user_secret(&self, secret: &str) -> bool {
 | 
			
		||||
        self.user_secrets.contains(&secret.to_string())
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Get user secrets count
 | 
			
		||||
    pub fn user_secrets_count(&self) -> usize {
 | 
			
		||||
        self.user_secrets.len()
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Add a register secret
 | 
			
		||||
    pub fn add_register_secret(&mut self, secret: String) {
 | 
			
		||||
        if !self.register_secrets.contains(&secret) {
 | 
			
		||||
            self.register_secrets.push(secret);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Remove a register secret
 | 
			
		||||
    pub fn remove_register_secret(&mut self, secret: &str) -> bool {
 | 
			
		||||
        if let Some(pos) = self.register_secrets.iter().position(|x| x == secret) {
 | 
			
		||||
            self.register_secrets.remove(pos);
 | 
			
		||||
            true
 | 
			
		||||
        } else {
 | 
			
		||||
            false
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Check if register secret exists
 | 
			
		||||
    pub fn has_register_secret(&self, secret: &str) -> bool {
 | 
			
		||||
        self.register_secrets.contains(&secret.to_string())
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Get register secrets count
 | 
			
		||||
    pub fn register_secrets_count(&self) -> usize {
 | 
			
		||||
        self.register_secrets.len()
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// List all job IDs from Redis
 | 
			
		||||
    pub async fn list_jobs(&self) -> RunnerResult<Vec<String>> {
 | 
			
		||||
        self.client.list_jobs().await
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Get runners count
 | 
			
		||||
    pub fn runners_count(&self) -> usize {
 | 
			
		||||
        self.runners.len()
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Get admin secrets (returns cloned vector for security)
 | 
			
		||||
    pub fn get_admin_secrets(&self) -> Vec<String> {
 | 
			
		||||
        self.admin_secrets.clone()
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Get user secrets (returns cloned vector for security)
 | 
			
		||||
    pub fn get_user_secrets(&self) -> Vec<String> {
 | 
			
		||||
        self.user_secrets.clone()
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Get register secrets (returns cloned vector for security)
 | 
			
		||||
    pub fn get_register_secrets(&self) -> Vec<String> {
 | 
			
		||||
        self.register_secrets.clone()
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl Default for Supervisor {
 | 
			
		||||
    fn default() -> Self {
 | 
			
		||||
        // Note: Default implementation creates an empty supervisor
 | 
			
		||||
        // Use Supervisor::builder() for proper initialization
 | 
			
		||||
        Self {
 | 
			
		||||
            runners: HashMap::new(),
 | 
			
		||||
            process_manager: Arc::new(Mutex::new(SimpleProcessManager::new())),
 | 
			
		||||
            redis_client: redis::Client::open("redis://localhost:6379").unwrap(),
 | 
			
		||||
            namespace: "".to_string(),
 | 
			
		||||
            admin_secrets: Vec::new(),
 | 
			
		||||
            user_secrets: Vec::new(),
 | 
			
		||||
            register_secrets: Vec::new(),
 | 
			
		||||
            client: Client::default(),
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
mod tests {
 | 
			
		||||
    use super::*;
 | 
			
		||||
    use std::path::PathBuf;
 | 
			
		||||
    use sal_service_manager::SimpleProcessManager;
 | 
			
		||||
 | 
			
		||||
    #[tokio::test]
 | 
			
		||||
    async fn test_supervisor_creation() {
 | 
			
		||||
        let supervisor = Supervisor::builder()
 | 
			
		||||
            .redis_url("redis://localhost:6379")
 | 
			
		||||
            .build()
 | 
			
		||||
            .await
 | 
			
		||||
            .unwrap();
 | 
			
		||||
        assert_eq!(supervisor.list_runners().len(), 0);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[tokio::test]
 | 
			
		||||
    async fn test_add_runner() {
 | 
			
		||||
        use std::path::PathBuf;
 | 
			
		||||
        
 | 
			
		||||
        let config = RunnerConfig::new(
 | 
			
		||||
            "test_actor".to_string(),
 | 
			
		||||
            "test_actor".to_string(),
 | 
			
		||||
            "".to_string(),
 | 
			
		||||
            PathBuf::from("/usr/bin/test_actor"),
 | 
			
		||||
            "redis://localhost:6379".to_string(),
 | 
			
		||||
        );
 | 
			
		||||
        
 | 
			
		||||
        let runner = Runner::from_config(config.clone());
 | 
			
		||||
        let mut supervisor = Supervisor::builder()
 | 
			
		||||
            .redis_url("redis://localhost:6379")
 | 
			
		||||
            .add_runner(runner)
 | 
			
		||||
            .build()
 | 
			
		||||
            .await
 | 
			
		||||
            .unwrap();
 | 
			
		||||
        
 | 
			
		||||
        assert_eq!(supervisor.list_runners().len(), 1);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[tokio::test]
 | 
			
		||||
    async fn test_add_multiple_runners() {
 | 
			
		||||
        use std::path::PathBuf;
 | 
			
		||||
        
 | 
			
		||||
        let config1 = RunnerConfig::new(
 | 
			
		||||
            "sal_actor".to_string(),
 | 
			
		||||
            "sal_actor".to_string(),
 | 
			
		||||
            "".to_string(),
 | 
			
		||||
            PathBuf::from("/usr/bin/sal_actor"),
 | 
			
		||||
            "redis://localhost:6379".to_string(),
 | 
			
		||||
        );
 | 
			
		||||
 | 
			
		||||
        let config2 = RunnerConfig::new(
 | 
			
		||||
            "osis_actor".to_string(),
 | 
			
		||||
            "osis_actor".to_string(),
 | 
			
		||||
            "".to_string(),
 | 
			
		||||
            PathBuf::from("/usr/bin/osis_actor"),
 | 
			
		||||
            "redis://localhost:6379".to_string(),
 | 
			
		||||
        );
 | 
			
		||||
 | 
			
		||||
        let runner1 = Runner::from_config(config1);
 | 
			
		||||
        let runner2 = Runner::from_config(config2);
 | 
			
		||||
        
 | 
			
		||||
        let supervisor = Supervisor::builder()
 | 
			
		||||
            .redis_url("redis://localhost:6379")
 | 
			
		||||
            .add_runner(runner1)
 | 
			
		||||
            .add_runner(runner2)
 | 
			
		||||
            .build()
 | 
			
		||||
            .await
 | 
			
		||||
            .unwrap();
 | 
			
		||||
 | 
			
		||||
        assert_eq!(supervisor.list_runners().len(), 2);
 | 
			
		||||
        assert!(supervisor.get_runner("sal_actor").is_some());
 | 
			
		||||
        assert!(supervisor.get_runner("osis_actor").is_some());
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
		Reference in New Issue
	
	Block a user