move repos into monorepo
This commit is contained in:
111
bin/supervisor/src/auth.rs
Normal file
111
bin/supervisor/src/auth.rs
Normal file
@@ -0,0 +1,111 @@
|
||||
//! Authentication and API key management
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use uuid::Uuid;
|
||||
|
||||
/// API key scope/permission level
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum ApiKeyScope {
|
||||
/// Full access - can manage keys, runners, jobs
|
||||
Admin,
|
||||
/// Can register new runners
|
||||
Registrar,
|
||||
/// Can create and manage jobs
|
||||
User,
|
||||
}
|
||||
|
||||
impl ApiKeyScope {
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
ApiKeyScope::Admin => "admin",
|
||||
ApiKeyScope::Registrar => "registrar",
|
||||
ApiKeyScope::User => "user",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An API key with metadata
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ApiKey {
|
||||
/// The actual key value (UUID or custom string)
|
||||
pub key: String,
|
||||
/// Human-readable name for the key
|
||||
pub name: String,
|
||||
/// Permission scope
|
||||
pub scope: ApiKeyScope,
|
||||
/// When the key was created
|
||||
pub created_at: String,
|
||||
/// Optional expiration timestamp
|
||||
pub expires_at: Option<String>,
|
||||
}
|
||||
|
||||
impl ApiKey {
|
||||
/// Create a new API key with a generated UUID
|
||||
pub fn new(name: String, scope: ApiKeyScope) -> Self {
|
||||
Self {
|
||||
key: Uuid::new_v4().to_string(),
|
||||
name,
|
||||
scope,
|
||||
created_at: chrono::Utc::now().to_rfc3339(),
|
||||
expires_at: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new API key with a specific key value
|
||||
pub fn with_key(key: String, name: String, scope: ApiKeyScope) -> Self {
|
||||
Self {
|
||||
key,
|
||||
name,
|
||||
scope,
|
||||
created_at: chrono::Utc::now().to_rfc3339(),
|
||||
expires_at: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Response for auth verification
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct AuthVerifyResponse {
|
||||
pub valid: bool,
|
||||
pub name: String,
|
||||
pub scope: String,
|
||||
}
|
||||
|
||||
/// Method authorization requirements
|
||||
/// Maps RPC method names to required scopes
|
||||
pub fn get_method_required_scopes(method: &str) -> Option<Vec<ApiKeyScope>> {
|
||||
use ApiKeyScope::*;
|
||||
|
||||
match method {
|
||||
// Admin-only methods
|
||||
"key.create" | "key.generate" | "key.delete" | "key.list" |
|
||||
"supervisor.info" => {
|
||||
Some(vec![Admin])
|
||||
}
|
||||
|
||||
// Admin or Registrar methods
|
||||
"runner.create" | "runner.remove" => {
|
||||
Some(vec![Admin, Registrar])
|
||||
}
|
||||
|
||||
// Admin or User methods
|
||||
"job.create" | "job.run" | "job.start" | "job.stop" | "job.delete" => {
|
||||
Some(vec![Admin, User])
|
||||
}
|
||||
|
||||
// Public methods (no auth required)
|
||||
"rpc.discover" => None,
|
||||
|
||||
// Any authenticated user (read-only operations)
|
||||
"runner.list" | "runner.ping" |
|
||||
"job.get" | "job.list" | "job.status" | "job.result" | "job.logs" |
|
||||
"auth.verify" => {
|
||||
Some(vec![Admin, Registrar, User])
|
||||
}
|
||||
|
||||
// Default: require authentication
|
||||
_ => Some(vec![Admin, Registrar, User]),
|
||||
}
|
||||
}
|
||||
112
bin/supervisor/src/bin/supervisor.rs
Normal file
112
bin/supervisor/src/bin/supervisor.rs
Normal file
@@ -0,0 +1,112 @@
|
||||
//! Hero Supervisor Binary
|
||||
|
||||
use hero_supervisor::SupervisorBuilder;
|
||||
use clap::Parser;
|
||||
use log::{error, info};
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
/// Hero Supervisor - manages actors and dispatches jobs
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(name = "supervisor")]
|
||||
#[command(about = "Hero Supervisor - manages actors and dispatches jobs")]
|
||||
struct Args {
|
||||
/// Redis URL for job queue
|
||||
#[arg(long, default_value = "redis://127.0.0.1:6379")]
|
||||
redis_url: String,
|
||||
|
||||
/// Namespace for Redis keys
|
||||
#[arg(long, default_value = "")]
|
||||
namespace: String,
|
||||
|
||||
/// Admin secrets (required, can be specified multiple times)
|
||||
#[arg(long = "admin-secret", value_name = "SECRET", required = true)]
|
||||
admin_secrets: Vec<String>,
|
||||
|
||||
/// User secrets (can be specified multiple times)
|
||||
#[arg(long = "user-secret", value_name = "SECRET")]
|
||||
user_secrets: Vec<String>,
|
||||
|
||||
/// Register secrets (can be specified multiple times)
|
||||
#[arg(long = "register-secret", value_name = "SECRET")]
|
||||
register_secrets: Vec<String>,
|
||||
|
||||
/// Port for OpenRPC HTTP server
|
||||
#[arg(long, default_value = "3030")]
|
||||
port: u16,
|
||||
|
||||
/// Bind address for OpenRPC HTTP server
|
||||
#[arg(long, default_value = "127.0.0.1")]
|
||||
bind_address: String,
|
||||
|
||||
/// Pre-configured runner names (comma-separated)
|
||||
#[arg(long, value_name = "NAMES", value_delimiter = ',')]
|
||||
runners: Vec<String>,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
env_logger::init();
|
||||
let args = Args::parse();
|
||||
|
||||
// Build supervisor
|
||||
let mut builder = SupervisorBuilder::new()
|
||||
.admin_secrets(args.admin_secrets);
|
||||
|
||||
if !args.user_secrets.is_empty() {
|
||||
builder = builder.user_secrets(args.user_secrets);
|
||||
}
|
||||
|
||||
if !args.register_secrets.is_empty() {
|
||||
builder = builder.register_secrets(args.register_secrets);
|
||||
}
|
||||
|
||||
let mut supervisor = builder.build().await?;
|
||||
|
||||
// Register pre-configured runners
|
||||
if !args.runners.is_empty() {
|
||||
for runner_name in &args.runners {
|
||||
match supervisor.runner_create(runner_name.clone()).await {
|
||||
Ok(_) => {},
|
||||
Err(e) => error!("Failed to register runner '{}': {}", runner_name, e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Start OpenRPC server
|
||||
use hero_supervisor::openrpc::start_http_openrpc_server;
|
||||
|
||||
let supervisor_clone = supervisor.clone();
|
||||
let bind_addr = args.bind_address.clone();
|
||||
let port = args.port;
|
||||
|
||||
tokio::spawn(async move {
|
||||
match start_http_openrpc_server(supervisor_clone, &bind_addr, port).await {
|
||||
Ok(handle) => {
|
||||
handle.stopped().await;
|
||||
error!("OpenRPC server stopped unexpectedly");
|
||||
}
|
||||
Err(e) => {
|
||||
error!("OpenRPC server error: {}", e);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
|
||||
|
||||
// Print startup info
|
||||
println!("📡 http://{}:{}", args.bind_address, args.port);
|
||||
info!("Hero Supervisor is running. Press Ctrl+C to shutdown.");
|
||||
|
||||
// Set up graceful shutdown
|
||||
tokio::spawn(async move {
|
||||
tokio::signal::ctrl_c().await.expect("Failed to listen for ctrl+c");
|
||||
info!("Received shutdown signal");
|
||||
std::process::exit(0);
|
||||
});
|
||||
|
||||
// Keep the application running
|
||||
loop {
|
||||
tokio::time::sleep(tokio::time::Duration::from_secs(1)).await;
|
||||
}
|
||||
}
|
||||
198
bin/supervisor/src/builder.rs
Normal file
198
bin/supervisor/src/builder.rs
Normal file
@@ -0,0 +1,198 @@
|
||||
//! Supervisor builder for configuration and initialization.
|
||||
|
||||
use crate::error::{SupervisorError, SupervisorResult};
|
||||
use crate::Supervisor;
|
||||
use hero_job_client::ClientBuilder;
|
||||
|
||||
/// Builder for constructing a Supervisor instance
|
||||
pub struct SupervisorBuilder {
|
||||
/// Set of registered runner IDs
|
||||
runners: std::collections::HashSet<String>,
|
||||
/// Redis URL for connection
|
||||
redis_url: String,
|
||||
/// Admin secrets for bootstrapping API keys
|
||||
admin_secrets: Vec<String>,
|
||||
/// User secrets for bootstrapping API keys
|
||||
user_secrets: Vec<String>,
|
||||
/// Register secrets for bootstrapping API keys
|
||||
register_secrets: Vec<String>,
|
||||
client_builder: ClientBuilder,
|
||||
/// Osiris URL for queries (optional)
|
||||
osiris_url: Option<String>,
|
||||
/// Supervisor URL for commands via Osiris (optional)
|
||||
supervisor_url: Option<String>,
|
||||
/// Supervisor secret for Osiris commands (optional)
|
||||
supervisor_secret: Option<String>,
|
||||
/// Runner name for Osiris operations (optional)
|
||||
osiris_runner_name: Option<String>,
|
||||
}
|
||||
|
||||
impl SupervisorBuilder {
|
||||
/// Create a new supervisor builder
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
runners: std::collections::HashSet::new(),
|
||||
redis_url: "redis://localhost:6379".to_string(),
|
||||
admin_secrets: Vec::new(),
|
||||
user_secrets: Vec::new(),
|
||||
register_secrets: Vec::new(),
|
||||
client_builder: ClientBuilder::new(),
|
||||
osiris_url: None,
|
||||
supervisor_url: None,
|
||||
supervisor_secret: None,
|
||||
osiris_runner_name: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the Osiris URL for queries
|
||||
pub fn osiris_url<S: Into<String>>(mut self, url: S) -> Self {
|
||||
self.osiris_url = Some(url.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the Supervisor URL for Osiris commands
|
||||
pub fn supervisor_url_for_osiris<S: Into<String>>(mut self, url: S) -> Self {
|
||||
self.supervisor_url = Some(url.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the Supervisor secret for Osiris commands
|
||||
pub fn supervisor_secret<S: Into<String>>(mut self, secret: S) -> Self {
|
||||
self.supervisor_secret = Some(secret.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the runner name for Osiris operations
|
||||
pub fn osiris_runner_name<S: Into<String>>(mut self, name: S) -> Self {
|
||||
self.osiris_runner_name = Some(name.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Add an admin secret
|
||||
pub fn add_admin_secret<S: Into<String>>(mut self, secret: S) -> Self {
|
||||
self.admin_secrets.push(secret.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Add multiple admin secrets
|
||||
pub fn admin_secrets<I, S>(mut self, secrets: I) -> Self
|
||||
where
|
||||
I: IntoIterator<Item = S>,
|
||||
S: Into<String>,
|
||||
{
|
||||
self.admin_secrets.extend(secrets.into_iter().map(|s| s.into()));
|
||||
self
|
||||
}
|
||||
|
||||
/// Add a user secret
|
||||
pub fn add_user_secret<S: Into<String>>(mut self, secret: S) -> Self {
|
||||
self.user_secrets.push(secret.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Add multiple user secrets
|
||||
pub fn user_secrets<I, S>(mut self, secrets: I) -> Self
|
||||
where
|
||||
I: IntoIterator<Item = S>,
|
||||
S: Into<String>,
|
||||
{
|
||||
self.user_secrets.extend(secrets.into_iter().map(|s| s.into()));
|
||||
self
|
||||
}
|
||||
|
||||
/// Add a register secret
|
||||
pub fn add_register_secret<S: Into<String>>(mut self, secret: S) -> Self {
|
||||
self.register_secrets.push(secret.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Add multiple register secrets
|
||||
pub fn register_secrets<I, S>(mut self, secrets: I) -> Self
|
||||
where
|
||||
I: IntoIterator<Item = S>,
|
||||
S: Into<String>,
|
||||
{
|
||||
self.register_secrets.extend(secrets.into_iter().map(|s| s.into()));
|
||||
self
|
||||
}
|
||||
|
||||
/// Add a runner to the supervisor
|
||||
pub fn add_runner(mut self, runner_id: String) -> Self {
|
||||
self.runners.insert(runner_id);
|
||||
self
|
||||
}
|
||||
|
||||
/// Build the supervisor
|
||||
pub async fn build(self) -> SupervisorResult<Supervisor> {
|
||||
// Create Redis client
|
||||
let redis_client = redis::Client::open(self.redis_url.as_str())
|
||||
.map_err(|e| SupervisorError::ConfigError {
|
||||
reason: format!("Invalid Redis URL: {}", e),
|
||||
})?;
|
||||
|
||||
// Create the store
|
||||
let mut store = crate::store::Store::new();
|
||||
|
||||
// Add admin secrets as API keys
|
||||
for secret in &self.admin_secrets {
|
||||
store.key_create(
|
||||
crate::auth::ApiKey::new(secret.clone(), crate::auth::ApiKeyScope::Admin),
|
||||
);
|
||||
}
|
||||
|
||||
// Add user secrets as API keys
|
||||
for secret in &self.user_secrets {
|
||||
store.key_create(
|
||||
crate::auth::ApiKey::new(secret.clone(), crate::auth::ApiKeyScope::User),
|
||||
);
|
||||
}
|
||||
|
||||
// Add register secrets as API keys
|
||||
for secret in &self.register_secrets {
|
||||
store.key_create(
|
||||
crate::auth::ApiKey::new(secret.clone(), crate::auth::ApiKeyScope::Registrar),
|
||||
);
|
||||
}
|
||||
|
||||
// Build the client
|
||||
let client = self.client_builder.build().await?;
|
||||
|
||||
// Build Osiris client if configured
|
||||
// Temporarily disabled - needs update
|
||||
// let osiris_client = if let (Some(osiris_url), Some(supervisor_url)) =
|
||||
// (self.osiris_url, self.supervisor_url) {
|
||||
// let mut builder = osiris_client::OsirisClient::builder()
|
||||
// .osiris_url(osiris_url)
|
||||
// .supervisor_url(supervisor_url)
|
||||
// .runner_name(self.osiris_runner_name.unwrap_or_else(|| "osiris-runner".to_string()));
|
||||
//
|
||||
// if let Some(secret) = self.supervisor_secret {
|
||||
// builder = builder.supervisor_secret(secret);
|
||||
// }
|
||||
//
|
||||
// Some(builder.build().map_err(|e| SupervisorError::ConfigError {
|
||||
// reason: format!("Failed to build Osiris client: {}", e),
|
||||
// })?)
|
||||
// } else {
|
||||
// None
|
||||
// };
|
||||
|
||||
// Add pre-configured runners to the store
|
||||
for runner_id in self.runners {
|
||||
let _ = store.runner_add(runner_id);
|
||||
}
|
||||
|
||||
Ok(Supervisor {
|
||||
store: std::sync::Arc::new(tokio::sync::Mutex::new(store)),
|
||||
job_client: client,
|
||||
redis_client,
|
||||
// osiris_client, // Temporarily disabled
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for SupervisorBuilder {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
73
bin/supervisor/src/error.rs
Normal file
73
bin/supervisor/src/error.rs
Normal file
@@ -0,0 +1,73 @@
|
||||
//! Error types for supervisor operations.
|
||||
|
||||
use thiserror::Error;
|
||||
use jsonrpsee::types::{ErrorObject, ErrorObjectOwned};
|
||||
|
||||
/// Result type for supervisor operations
|
||||
pub type SupervisorResult<T> = Result<T, SupervisorError>;
|
||||
|
||||
/// Errors that can occur during supervisor operations
|
||||
#[derive(Debug, Error)]
|
||||
pub enum SupervisorError {
|
||||
#[error("Runner '{runner_id}' not found")]
|
||||
RunnerNotFound { runner_id: String },
|
||||
|
||||
#[error("Runner '{runner_id}' is already registered")]
|
||||
RunnerAlreadyRegistered { runner_id: String },
|
||||
|
||||
#[error("Job '{job_id}' not found")]
|
||||
JobNotFound { job_id: String },
|
||||
|
||||
#[error("Failed to queue job for runner '{runner_id}': {reason}")]
|
||||
QueueError { runner_id: String, reason: String },
|
||||
|
||||
#[error("Configuration error: {reason}")]
|
||||
ConfigError { reason: String },
|
||||
|
||||
#[error("Invalid secret or API key: {0}")]
|
||||
InvalidSecret(String),
|
||||
|
||||
#[error("Authentication error: {message}")]
|
||||
AuthenticationError { message: String },
|
||||
|
||||
#[error("Insufficient permissions: {message}")]
|
||||
PermissionDenied { message: String },
|
||||
|
||||
#[error("Redis error: {source}")]
|
||||
RedisError {
|
||||
#[from]
|
||||
source: redis::RedisError,
|
||||
},
|
||||
|
||||
#[error("Job error: {source}")]
|
||||
JobError {
|
||||
#[from]
|
||||
source: hero_job::JobError,
|
||||
},
|
||||
|
||||
#[error("Job client error: {source}")]
|
||||
JobClientError {
|
||||
#[from]
|
||||
source: hero_job_client::ClientError,
|
||||
},
|
||||
|
||||
#[error("IO error: {source}")]
|
||||
IoError {
|
||||
#[from]
|
||||
source: std::io::Error,
|
||||
},
|
||||
|
||||
#[error("Osiris client error: {0}")]
|
||||
OsirisError(String),
|
||||
}
|
||||
|
||||
/// Implement conversion from SupervisorError → RPC ErrorObject
|
||||
impl From<SupervisorError> for ErrorObject<'static> {
|
||||
fn from(err: SupervisorError) -> Self {
|
||||
ErrorObject::owned(
|
||||
-32603, // Internal error code
|
||||
format!("Supervisor error: {err}"),
|
||||
None::<()>,
|
||||
)
|
||||
}
|
||||
}
|
||||
19
bin/supervisor/src/lib.rs
Normal file
19
bin/supervisor/src/lib.rs
Normal file
@@ -0,0 +1,19 @@
|
||||
//! Hero Supervisor - Actor management for the Hero ecosystem.
|
||||
//!
|
||||
//! See README.md for detailed documentation and usage examples.
|
||||
|
||||
pub mod supervisor;
|
||||
pub mod builder;
|
||||
pub mod error;
|
||||
pub mod openrpc;
|
||||
pub mod auth;
|
||||
pub mod store;
|
||||
|
||||
// Re-export job client for convenience
|
||||
pub use hero_job_client as job_client;
|
||||
|
||||
// Re-export main types for convenience
|
||||
pub use supervisor::Supervisor;
|
||||
pub use builder::SupervisorBuilder;
|
||||
pub use error::{SupervisorError, SupervisorResult};
|
||||
pub use hero_job::{Job, JobBuilder, JobStatus, JobError};
|
||||
474
bin/supervisor/src/openrpc.rs
Normal file
474
bin/supervisor/src/openrpc.rs
Normal file
@@ -0,0 +1,474 @@
|
||||
//! OpenRPC server implementation.
|
||||
|
||||
use jsonrpsee::{
|
||||
core::{RpcResult, async_trait},
|
||||
server::middleware::rpc::{RpcServiceT, RpcServiceBuilder, MethodResponse},
|
||||
proc_macros::rpc,
|
||||
server::{Server, ServerHandle},
|
||||
types::{ErrorObject, ErrorObjectOwned},
|
||||
};
|
||||
use tower_http::cors::{CorsLayer, Any};
|
||||
|
||||
use anyhow;
|
||||
use log::{debug, info, error};
|
||||
|
||||
use crate::{auth::ApiKey, supervisor::Supervisor};
|
||||
use crate::error::SupervisorError;
|
||||
use hero_job::{Job, JobResult, JobStatus};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
use std::fs;
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
/// Load OpenRPC specification from docs/openrpc.json
|
||||
fn load_openrpc_spec() -> Result<serde_json::Value, Box<dyn std::error::Error>> {
|
||||
let path = "../../docs/openrpc.json";
|
||||
let content = fs::read_to_string(path)?;
|
||||
let spec = serde_json::from_str(&content)?;
|
||||
debug!("Loaded OpenRPC specification from: {}", path);
|
||||
Ok(spec)
|
||||
}
|
||||
|
||||
/// Request parameters for generating API keys (auto-generates key value)
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
pub struct GenerateApiKeyParams {
|
||||
pub name: String,
|
||||
pub scope: String, // "admin", "registrar", or "user"
|
||||
}
|
||||
|
||||
/// Job status response with metadata
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct JobStatusResponse {
|
||||
pub job_id: String,
|
||||
pub status: String,
|
||||
pub created_at: String,
|
||||
}
|
||||
|
||||
/// Supervisor information response
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SupervisorInfo {
|
||||
pub server_url: String,
|
||||
}
|
||||
|
||||
/// OpenRPC trait - maps directly to Supervisor methods
|
||||
/// This trait exists only for jsonrpsee's macro system.
|
||||
/// The implementation below is just error type conversion -
|
||||
/// all actual logic lives in Supervisor methods.
|
||||
#[rpc(server)]
|
||||
pub trait SupervisorRpc {
|
||||
/// Create a job without queuing it to a runner
|
||||
#[method(name = "job.create")]
|
||||
async fn job_create(&self, params: Job) -> RpcResult<String>;
|
||||
|
||||
/// Get a job by job ID
|
||||
#[method(name = "job.get")]
|
||||
async fn job_get(&self, job_id: String) -> RpcResult<Job>;
|
||||
|
||||
/// Start a previously created job by queuing it to its assigned runner
|
||||
#[method(name = "job.start")]
|
||||
async fn job_start(&self, job_id: String) -> RpcResult<()>;
|
||||
|
||||
/// Run a job on the appropriate runner and return the result
|
||||
#[method(name = "job.run")]
|
||||
async fn job_run(&self, params: Job) -> RpcResult<JobResult>;
|
||||
|
||||
/// Get the current status of a job
|
||||
#[method(name = "job.status")]
|
||||
async fn job_status(&self, job_id: String) -> RpcResult<JobStatus>;
|
||||
|
||||
/// Get the result of a completed job (blocks until result is available)
|
||||
#[method(name = "job.result")]
|
||||
async fn job_result(&self, job_id: String) -> RpcResult<JobResult>;
|
||||
|
||||
/// Get logs for a specific job
|
||||
#[method(name = "job.logs")]
|
||||
async fn job_logs(&self, job_id: String) -> RpcResult<Vec<String>>;
|
||||
|
||||
/// Stop a running job
|
||||
#[method(name = "job.stop")]
|
||||
async fn job_stop(&self, job_id: String) -> RpcResult<()>;
|
||||
|
||||
/// Delete a job from the system
|
||||
#[method(name = "job.delete")]
|
||||
async fn job_delete(&self, job_id: String) -> RpcResult<()>;
|
||||
|
||||
/// List all jobs
|
||||
#[method(name = "job.list")]
|
||||
async fn job_list(&self) -> RpcResult<Vec<Job>>;
|
||||
|
||||
/// Add a runner with configuration
|
||||
#[method(name = "runner.create")]
|
||||
async fn runner_create(&self, runner_id: String) -> RpcResult<()>;
|
||||
|
||||
/// Delete a runner from the supervisor
|
||||
#[method(name = "runner.remove")]
|
||||
async fn runner_delete(&self, runner_id: String) -> RpcResult<()>;
|
||||
|
||||
/// List all runner IDs
|
||||
#[method(name = "runner.list")]
|
||||
async fn runner_list(&self) -> RpcResult<Vec<String>>;
|
||||
|
||||
/// Ping a runner (dispatch a ping job)
|
||||
#[method(name = "runner.ping")]
|
||||
async fn ping_runner(&self, runner_id: String) -> RpcResult<String>;
|
||||
|
||||
/// Create an API key with provided key value
|
||||
#[method(name = "key.create")]
|
||||
async fn key_create(&self, key: ApiKey) -> RpcResult<()>;
|
||||
|
||||
/// Generate a new API key with auto-generated key value
|
||||
#[method(name = "key.generate")]
|
||||
async fn key_generate(&self, params: GenerateApiKeyParams) -> RpcResult<ApiKey>;
|
||||
|
||||
/// Delete an API key
|
||||
#[method(name = "key.delete")]
|
||||
async fn key_delete(&self, key_id: String) -> RpcResult<()>;
|
||||
|
||||
/// List all secrets (returns counts only for security)
|
||||
#[method(name = "key.list")]
|
||||
async fn key_list(&self) -> RpcResult<Vec<ApiKey>>;
|
||||
|
||||
/// Verify an API key and return its metadata
|
||||
#[method(name = "auth.verify")]
|
||||
async fn auth_verify(&self) -> RpcResult<crate::auth::AuthVerifyResponse>;
|
||||
|
||||
/// Get supervisor information
|
||||
#[method(name = "supervisor.info")]
|
||||
async fn supervisor_info(&self) -> RpcResult<SupervisorInfo>;
|
||||
|
||||
/// OpenRPC discovery method - returns the OpenRPC document describing this API
|
||||
#[method(name = "rpc.discover")]
|
||||
async fn rpc_discover(&self) -> RpcResult<serde_json::Value>;
|
||||
}
|
||||
|
||||
/// RPC implementation on Supervisor
|
||||
///
|
||||
/// This implementation is ONLY for error type conversion (SupervisorError → ErrorObject).
|
||||
/// All business logic is in Supervisor methods - these are thin wrappers.
|
||||
/// Authorization is handled by middleware before methods are called.
|
||||
#[async_trait]
|
||||
impl SupervisorRpcServer for Supervisor {
|
||||
async fn job_create(&self, job: Job) -> RpcResult<String> {
|
||||
Ok(self.job_create(job).await?)
|
||||
}
|
||||
|
||||
async fn job_get(&self, job_id: String) -> RpcResult<Job> {
|
||||
Ok(self.job_get(&job_id).await?)
|
||||
}
|
||||
|
||||
async fn job_list(&self) -> RpcResult<Vec<Job>> {
|
||||
let job_ids = self.job_list().await;
|
||||
let mut jobs = Vec::new();
|
||||
for job_id in job_ids {
|
||||
if let Ok(job) = self.job_get(&job_id).await {
|
||||
jobs.push(job);
|
||||
}
|
||||
}
|
||||
Ok(jobs)
|
||||
}
|
||||
|
||||
async fn job_run(&self, job: Job) -> RpcResult<JobResult> {
|
||||
let output = self.job_run(job).await?;
|
||||
Ok(JobResult::Success { success: output })
|
||||
}
|
||||
|
||||
async fn job_start(&self, job_id: String) -> RpcResult<()> {
|
||||
self.job_start(&job_id).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn job_status(&self, job_id: String) -> RpcResult<JobStatus> {
|
||||
Ok(self.job_status(&job_id).await?)
|
||||
}
|
||||
|
||||
async fn job_logs(&self, job_id: String) -> RpcResult<Vec<String>> {
|
||||
Ok(self.job_logs(&job_id, None).await?)
|
||||
}
|
||||
|
||||
async fn job_result(&self, job_id: String) -> RpcResult<JobResult> {
|
||||
match self.job_result(&job_id).await? {
|
||||
Some(result) => {
|
||||
if result.starts_with("Error:") {
|
||||
Ok(JobResult::Error { error: result })
|
||||
} else {
|
||||
Ok(JobResult::Success { success: result })
|
||||
}
|
||||
},
|
||||
None => Ok(JobResult::Error { error: "Job result not available".to_string() })
|
||||
}
|
||||
}
|
||||
|
||||
async fn job_stop(&self, job_id: String) -> RpcResult<()> {
|
||||
self.job_stop(&job_id).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn job_delete(&self, job_id: String) -> RpcResult<()> {
|
||||
self.job_delete(&job_id).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn runner_create(&self, runner_id: String) -> RpcResult<()> {
|
||||
self.runner_create(runner_id).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn runner_delete(&self, runner_id: String) -> RpcResult<()> {
|
||||
Ok(self.runner_delete(&runner_id).await?)
|
||||
}
|
||||
|
||||
async fn runner_list(&self) -> RpcResult<Vec<String>> {
|
||||
Ok(self.runner_list().await)
|
||||
}
|
||||
|
||||
|
||||
async fn ping_runner(&self, runner_id: String) -> RpcResult<String> {
|
||||
Ok(self.runner_ping(&runner_id).await?)
|
||||
}
|
||||
|
||||
async fn key_create(&self, key: ApiKey) -> RpcResult<()> {
|
||||
let _ = self.key_create(key).await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn key_generate(&self, params: GenerateApiKeyParams) -> RpcResult<ApiKey> {
|
||||
// Parse scope
|
||||
let api_scope = match params.scope.to_lowercase().as_str() {
|
||||
"admin" => crate::auth::ApiKeyScope::Admin,
|
||||
"registrar" => crate::auth::ApiKeyScope::Registrar,
|
||||
"user" => crate::auth::ApiKeyScope::User,
|
||||
_ => return Err(ErrorObject::owned(-32602, "Invalid scope. Must be 'admin', 'registrar', or 'user'", None::<()>)),
|
||||
};
|
||||
|
||||
let api_key = self.create_api_key(params.name, api_scope).await;
|
||||
Ok(api_key)
|
||||
}
|
||||
|
||||
async fn key_delete(&self, key_id: String) -> RpcResult<()> {
|
||||
self.key_delete(&key_id).await
|
||||
.ok_or_else(|| ErrorObject::owned(-32603, "API key not found", None::<()>))?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn key_list(&self) -> RpcResult<Vec<ApiKey>> {
|
||||
Ok(self.key_list().await)
|
||||
}
|
||||
|
||||
async fn auth_verify(&self) -> RpcResult<crate::auth::AuthVerifyResponse> {
|
||||
// If this method is called, middleware already verified the key
|
||||
// So we just return success - the middleware wouldn't have let an invalid key through
|
||||
Ok(crate::auth::AuthVerifyResponse {
|
||||
valid: true,
|
||||
name: "verified".to_string(),
|
||||
scope: "authenticated".to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn supervisor_info(&self) -> RpcResult<SupervisorInfo> {
|
||||
Ok(SupervisorInfo {
|
||||
server_url: "http://127.0.0.1:3031".to_string(), // TODO: get from config
|
||||
})
|
||||
}
|
||||
|
||||
async fn rpc_discover(&self) -> RpcResult<serde_json::Value> {
|
||||
debug!("OpenRPC request: rpc.discover");
|
||||
|
||||
// Read OpenRPC specification from docs/openrpc.json
|
||||
match load_openrpc_spec() {
|
||||
Ok(spec) => Ok(spec),
|
||||
Err(e) => {
|
||||
error!("Failed to load OpenRPC specification: {}", e);
|
||||
// Fallback to a minimal spec if file loading fails
|
||||
Ok(serde_json::json!({
|
||||
"openrpc": "1.3.2",
|
||||
"info": {
|
||||
"title": "Hero Supervisor OpenRPC API",
|
||||
"version": "1.0.0",
|
||||
"description": "OpenRPC API for managing Hero Supervisor runners and jobs"
|
||||
},
|
||||
"methods": [],
|
||||
"error": "Failed to load full specification"
|
||||
}))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Authorization middleware using RpcServiceT
|
||||
/// This middleware is created per-connection and checks permissions for each RPC call
|
||||
#[derive(Clone)]
|
||||
struct AuthMiddleware<S> {
|
||||
supervisor: Supervisor,
|
||||
inner: S,
|
||||
}
|
||||
|
||||
impl<S> RpcServiceT for AuthMiddleware<S>
|
||||
where
|
||||
S: RpcServiceT<MethodResponse = MethodResponse> + Send + Sync + Clone + 'static,
|
||||
{
|
||||
type MethodResponse = MethodResponse;
|
||||
type BatchResponse = S::BatchResponse;
|
||||
type NotificationResponse = S::NotificationResponse;
|
||||
|
||||
fn call<'a>(&self, req: jsonrpsee::server::middleware::rpc::Request<'a>) -> impl std::future::Future<Output = Self::MethodResponse> + Send + 'a {
|
||||
let supervisor = self.supervisor.clone();
|
||||
let inner = self.inner.clone();
|
||||
let method = req.method_name().to_string();
|
||||
let id = req.id();
|
||||
|
||||
Box::pin(async move {
|
||||
// Check if method requires auth
|
||||
let required_scopes = match crate::auth::get_method_required_scopes(&method) {
|
||||
None => {
|
||||
// Public method - no auth required
|
||||
debug!("ℹ️ Public method: {}", method);
|
||||
return inner.call(req).await;
|
||||
}
|
||||
Some(scopes) => scopes,
|
||||
};
|
||||
|
||||
// Extract Authorization header from extensions
|
||||
let headers = req.extensions().get::<hyper::HeaderMap>();
|
||||
|
||||
let api_key = headers
|
||||
.and_then(|h| h.get(hyper::header::AUTHORIZATION))
|
||||
.and_then(|value| value.to_str().ok())
|
||||
.and_then(|s| s.strip_prefix("Bearer "))
|
||||
.map(|k| k.to_string());
|
||||
|
||||
let api_key = match api_key {
|
||||
Some(key) => key,
|
||||
None => {
|
||||
error!("❌ Missing Authorization header for method: {}", method);
|
||||
let err = ErrorObjectOwned::owned(
|
||||
-32001,
|
||||
format!("Missing Authorization header for method: {}", method),
|
||||
None::<()>,
|
||||
);
|
||||
return MethodResponse::error(id, err);
|
||||
}
|
||||
};
|
||||
|
||||
// Verify API key and check scope
|
||||
let key_obj = match supervisor.key_get(&api_key).await {
|
||||
Some(k) => k,
|
||||
None => {
|
||||
error!("❌ Invalid API key");
|
||||
let err = ErrorObjectOwned::owned(-32001, "Invalid API key", None::<()>);
|
||||
return MethodResponse::error(id, err);
|
||||
}
|
||||
};
|
||||
|
||||
if !required_scopes.contains(&key_obj.scope) {
|
||||
error!(
|
||||
"❌ Unauthorized: method '{}' requires {:?}, got {:?}",
|
||||
method, required_scopes, key_obj.scope
|
||||
);
|
||||
let err = ErrorObjectOwned::owned(
|
||||
-32001,
|
||||
format!(
|
||||
"Insufficient permissions for '{}'. Required: {:?}, Got: {:?}",
|
||||
method, required_scopes, key_obj.scope
|
||||
),
|
||||
None::<()>,
|
||||
);
|
||||
return MethodResponse::error(id, err);
|
||||
}
|
||||
|
||||
debug!("✅ Authorized: {} with scope {:?}", method, key_obj.scope);
|
||||
|
||||
// Authorized - proceed with the call
|
||||
inner.call(req).await
|
||||
})
|
||||
}
|
||||
|
||||
fn batch<'a>(&self, batch: jsonrpsee::server::middleware::rpc::Batch<'a>) -> impl std::future::Future<Output = Self::BatchResponse> + Send + 'a {
|
||||
// For simplicity, pass through batch requests
|
||||
// In production, you'd want to check each request in the batch
|
||||
self.inner.batch(batch)
|
||||
}
|
||||
|
||||
fn notification<'a>(&self, notif: jsonrpsee::server::middleware::rpc::Notification<'a>) -> impl std::future::Future<Output = Self::NotificationResponse> + Send + 'a {
|
||||
self.inner.notification(notif)
|
||||
}
|
||||
}
|
||||
|
||||
/// HTTP middleware to propagate headers into request extensions
|
||||
#[derive(Clone)]
|
||||
struct HeaderPropagationService<S> {
|
||||
inner: S,
|
||||
}
|
||||
|
||||
impl<S, B> tower::Service<hyper::Request<B>> for HeaderPropagationService<S>
|
||||
where
|
||||
S: tower::Service<hyper::Request<B>> + Clone + Send + 'static,
|
||||
S::Future: Send + 'static,
|
||||
B: Send + 'static,
|
||||
{
|
||||
type Response = S::Response;
|
||||
type Error = S::Error;
|
||||
type Future = std::pin::Pin<Box<dyn std::future::Future<Output = Result<Self::Response, Self::Error>> + Send>>;
|
||||
|
||||
fn poll_ready(&mut self, cx: &mut std::task::Context<'_>) -> std::task::Poll<Result<(), Self::Error>> {
|
||||
self.inner.poll_ready(cx)
|
||||
}
|
||||
|
||||
fn call(&mut self, mut req: hyper::Request<B>) -> Self::Future {
|
||||
let headers = req.headers().clone();
|
||||
req.extensions_mut().insert(headers);
|
||||
let fut = self.inner.call(req);
|
||||
Box::pin(fut)
|
||||
}
|
||||
}
|
||||
|
||||
/// Start HTTP OpenRPC server (Unix socket support would require additional dependencies)
|
||||
pub async fn start_http_openrpc_server(
|
||||
supervisor: Supervisor,
|
||||
bind_address: &str,
|
||||
port: u16,
|
||||
) -> anyhow::Result<ServerHandle> {
|
||||
let http_addr: SocketAddr = format!("{}:{}", bind_address, port).parse()?;
|
||||
|
||||
// Configure CORS to allow requests from the admin UI
|
||||
// Note: Authorization header must be explicitly listed, not covered by Any
|
||||
use tower_http::cors::AllowHeaders;
|
||||
let cors = CorsLayer::new()
|
||||
.allow_origin(Any)
|
||||
.allow_headers(AllowHeaders::list([
|
||||
hyper::header::CONTENT_TYPE,
|
||||
hyper::header::AUTHORIZATION,
|
||||
]))
|
||||
.allow_methods(Any)
|
||||
.expose_headers(Any);
|
||||
|
||||
// Build RPC middleware with authorization (per-connection)
|
||||
let supervisor_for_middleware = supervisor.clone();
|
||||
let rpc_middleware = RpcServiceBuilder::new().layer_fn(move |service| {
|
||||
// This closure runs once per connection
|
||||
AuthMiddleware {
|
||||
supervisor: supervisor_for_middleware.clone(),
|
||||
inner: service,
|
||||
}
|
||||
});
|
||||
|
||||
// Build HTTP middleware stack with CORS and header propagation
|
||||
let http_middleware = tower::ServiceBuilder::new()
|
||||
.layer(cors)
|
||||
.layer(tower::layer::layer_fn(|service| {
|
||||
HeaderPropagationService { inner: service }
|
||||
}));
|
||||
|
||||
let http_server = Server::builder()
|
||||
.set_rpc_middleware(rpc_middleware)
|
||||
.set_http_middleware(http_middleware)
|
||||
.build(http_addr)
|
||||
.await?;
|
||||
|
||||
let http_handle = http_server.start(supervisor.into_rpc());
|
||||
|
||||
info!("OpenRPC HTTP server running at http://{} with CORS enabled", http_addr);
|
||||
|
||||
Ok(http_handle)
|
||||
}
|
||||
286
bin/supervisor/src/store.rs
Normal file
286
bin/supervisor/src/store.rs
Normal file
@@ -0,0 +1,286 @@
|
||||
//! In-memory storage layer for Supervisor
|
||||
//!
|
||||
//! Provides CRUD operations for:
|
||||
//! - API Keys
|
||||
//! - Runners
|
||||
//! - Jobs
|
||||
|
||||
use crate::auth::{ApiKey, ApiKeyScope};
|
||||
use crate::error::{SupervisorError, SupervisorResult};
|
||||
use hero_job::Job;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
/// In-memory storage for all supervisor data
|
||||
pub struct Store {
|
||||
/// API keys (key_value -> ApiKey)
|
||||
api_keys: HashMap<String, ApiKey>,
|
||||
/// Registered runner IDs
|
||||
runners: HashSet<String>,
|
||||
/// In-memory job storage (job_id -> Job)
|
||||
jobs: HashMap<String, Job>,
|
||||
}
|
||||
|
||||
impl Store {
|
||||
/// Create a new store
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
api_keys: HashMap::new(),
|
||||
runners: HashSet::new(),
|
||||
jobs: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
// ==================== API Key Operations ====================
|
||||
|
||||
/// Create an API key with a specific value
|
||||
pub fn key_create(&mut self, key: ApiKey) -> ApiKey {
|
||||
self.api_keys.insert(key.name.clone(), key.clone());
|
||||
key
|
||||
}
|
||||
|
||||
/// Create a new API key with generated UUID
|
||||
pub fn key_create_new(&mut self, name: String, scope: ApiKeyScope) -> ApiKey {
|
||||
let key = ApiKey::new(name, scope);
|
||||
self.api_keys.insert(key.name.clone(), key.clone());
|
||||
key
|
||||
}
|
||||
|
||||
/// Get an API key by its value
|
||||
pub fn key_get(&self, key_name: &str) -> Option<&ApiKey> {
|
||||
self.api_keys.get(key_name)
|
||||
}
|
||||
|
||||
/// Delete an API key
|
||||
pub fn key_delete(&mut self, key_name: &str) -> Option<ApiKey> {
|
||||
self.api_keys.remove(key_name)
|
||||
}
|
||||
|
||||
/// List all API keys
|
||||
pub fn key_list(&self) -> Vec<ApiKey> {
|
||||
self.api_keys.values().cloned().collect()
|
||||
}
|
||||
|
||||
/// List API keys by scope
|
||||
pub fn key_list_by_scope(&self, scope: ApiKeyScope) -> Vec<ApiKey> {
|
||||
self.api_keys
|
||||
.values()
|
||||
.filter(|k| k.scope == scope)
|
||||
.cloned()
|
||||
.collect()
|
||||
}
|
||||
|
||||
// ==================== Runner Operations ====================
|
||||
|
||||
/// Add a runner
|
||||
pub fn runner_add(&mut self, runner_id: String) -> SupervisorResult<()> {
|
||||
self.runners.insert(runner_id);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Remove a runner
|
||||
pub fn runner_remove(&mut self, runner_id: &str) -> SupervisorResult<()> {
|
||||
self.runners.remove(runner_id);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Check if a runner exists
|
||||
pub fn runner_exists(&self, runner_id: &str) -> bool {
|
||||
self.runners.contains(runner_id)
|
||||
}
|
||||
|
||||
/// List all runner IDs
|
||||
pub fn runner_list_all(&self) -> Vec<String> {
|
||||
self.runners.iter().cloned().collect()
|
||||
}
|
||||
|
||||
// ==================== Job Operations ====================
|
||||
|
||||
/// Store a job in memory
|
||||
pub fn job_store(&mut self, job: Job) -> SupervisorResult<()> {
|
||||
self.jobs.insert(job.id.clone(), job);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get a job from memory
|
||||
pub fn job_get(&self, job_id: &str) -> SupervisorResult<Job> {
|
||||
self.jobs
|
||||
.get(job_id)
|
||||
.cloned()
|
||||
.ok_or_else(|| SupervisorError::JobNotFound {
|
||||
job_id: job_id.to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Delete a job from memory
|
||||
pub fn job_delete(&mut self, job_id: &str) -> SupervisorResult<()> {
|
||||
self.jobs
|
||||
.remove(job_id)
|
||||
.ok_or_else(|| SupervisorError::JobNotFound {
|
||||
job_id: job_id.to_string(),
|
||||
})?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// List all job IDs
|
||||
pub fn job_list(&self) -> Vec<String> {
|
||||
self.jobs.keys().cloned().collect()
|
||||
}
|
||||
|
||||
/// Check if a job exists
|
||||
pub fn job_exists(&self, job_id: &str) -> bool {
|
||||
self.jobs.contains_key(job_id)
|
||||
}
|
||||
}
|
||||
|
||||
impl Clone for Store {
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
api_keys: self.api_keys.clone(),
|
||||
runners: self.runners.clone(),
|
||||
jobs: self.jobs.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use hero_job::JobBuilder;
|
||||
|
||||
fn create_test_store() -> Store {
|
||||
Store::new()
|
||||
}
|
||||
|
||||
fn create_test_job(id: &str, runner: &str) -> Job {
|
||||
let mut job = JobBuilder::new()
|
||||
.caller_id("test_caller")
|
||||
.context_id("test_context")
|
||||
.runner(runner)
|
||||
.executor("test")
|
||||
.payload("test payload")
|
||||
.build()
|
||||
.unwrap();
|
||||
job.id = id.to_string(); // Set ID manually
|
||||
job
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_api_key_operations() {
|
||||
let mut store = create_test_store();
|
||||
|
||||
// Create key
|
||||
let key = store.key_create_new("test_key".to_string(), ApiKeyScope::Admin);
|
||||
assert_eq!(key.name, "test_key");
|
||||
assert_eq!(key.scope, ApiKeyScope::Admin);
|
||||
|
||||
// Get key
|
||||
let retrieved = store.key_get(&key.key);
|
||||
assert!(retrieved.is_some());
|
||||
assert_eq!(retrieved.unwrap().name, "test_key");
|
||||
|
||||
// List keys
|
||||
let keys = store.key_list();
|
||||
assert_eq!(keys.len(), 1);
|
||||
|
||||
// List by scope
|
||||
let admin_keys = store.key_list_by_scope(ApiKeyScope::Admin);
|
||||
assert_eq!(admin_keys.len(), 1);
|
||||
|
||||
// Delete key
|
||||
let removed = store.key_delete(&key.key);
|
||||
assert!(removed.is_some());
|
||||
assert!(store.key_get(&key.key).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_runner_operations() {
|
||||
let mut store = create_test_store();
|
||||
|
||||
// Add runner
|
||||
assert!(store.runner_add("runner1".to_string()).is_ok());
|
||||
assert!(store.runner_exists("runner1"));
|
||||
|
||||
// List runners
|
||||
let runners = store.runner_list_all();
|
||||
assert_eq!(runners.len(), 1);
|
||||
assert!(runners.contains(&"runner1".to_string()));
|
||||
|
||||
// List all runners
|
||||
let all_runners = store.runner_list_all();
|
||||
assert_eq!(all_runners.len(), 1);
|
||||
|
||||
// Remove runner
|
||||
assert!(store.runner_remove("runner1").is_ok());
|
||||
assert!(!store.runner_exists("runner1"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_job_operations() {
|
||||
let mut store = create_test_store();
|
||||
let job = create_test_job("job1", "runner1");
|
||||
|
||||
// Store job
|
||||
assert!(store.job_store(job.clone()).is_ok());
|
||||
assert!(store.job_exists("job1"));
|
||||
|
||||
// Get job
|
||||
let retrieved = store.job_get("job1");
|
||||
assert!(retrieved.is_ok());
|
||||
assert_eq!(retrieved.unwrap().id, "job1");
|
||||
|
||||
// List jobs
|
||||
let jobs = store.job_list();
|
||||
assert_eq!(jobs.len(), 1);
|
||||
assert!(jobs.contains(&"job1".to_string()));
|
||||
|
||||
// Delete job
|
||||
assert!(store.job_delete("job1").is_ok());
|
||||
assert!(!store.job_exists("job1"));
|
||||
assert!(store.job_get("job1").is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_job_not_found() {
|
||||
let store = create_test_store();
|
||||
let result = store.job_get("nonexistent");
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_multiple_jobs() {
|
||||
let mut store = create_test_store();
|
||||
|
||||
// Add multiple jobs
|
||||
for i in 1..=3 {
|
||||
let job = create_test_job(&format!("job{}", i), "runner1");
|
||||
assert!(store.job_store(job).is_ok());
|
||||
}
|
||||
|
||||
// Verify all exist
|
||||
assert_eq!(store.job_list().len(), 3);
|
||||
assert!(store.job_exists("job1"));
|
||||
assert!(store.job_exists("job2"));
|
||||
assert!(store.job_exists("job3"));
|
||||
|
||||
// Delete one
|
||||
assert!(store.job_delete("job2").is_ok());
|
||||
assert_eq!(store.job_list().len(), 2);
|
||||
assert!(!store.job_exists("job2"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_store_clone() {
|
||||
let mut store = create_test_store();
|
||||
store.runner_add("runner1".to_string()).unwrap();
|
||||
|
||||
let job = create_test_job("job1", "runner1");
|
||||
store.job_store(job).unwrap();
|
||||
|
||||
// Clone the store
|
||||
let cloned = store.clone();
|
||||
|
||||
// Verify cloned data
|
||||
assert!(cloned.runner_exists("runner1"));
|
||||
assert!(cloned.job_exists("job1"));
|
||||
}
|
||||
}
|
||||
360
bin/supervisor/src/supervisor.rs
Normal file
360
bin/supervisor/src/supervisor.rs
Normal file
@@ -0,0 +1,360 @@
|
||||
//! Main supervisor implementation for managing multiple actor runners.
|
||||
|
||||
use crate::error::{SupervisorError, SupervisorResult};
|
||||
use crate::store::Store;
|
||||
use hero_job_client::Client as JobClient;
|
||||
use hero_job::{Job, JobStatus};
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
// Re-export RPC types for convenience
|
||||
pub use jsonrpsee::core::RpcResult;
|
||||
pub use jsonrpsee::types::ErrorObject;
|
||||
|
||||
/// Main supervisor that manages multiple runners
|
||||
#[derive(Clone)]
|
||||
pub struct Supervisor {
|
||||
/// Centralized storage layer with interior mutability
|
||||
pub(crate) store: Arc<Mutex<Store>>,
|
||||
/// Job client for Redis operations
|
||||
pub(crate) job_client: JobClient,
|
||||
/// Redis client for direct operations
|
||||
pub(crate) redis_client: redis::Client,
|
||||
// Optional Osiris client for persistent storage - temporarily disabled
|
||||
// pub(crate) osiris_client: Option<osiris_client::OsirisClient>,
|
||||
}
|
||||
|
||||
impl Supervisor {
|
||||
/// Create a new supervisor builder
|
||||
pub fn builder() -> crate::builder::SupervisorBuilder {
|
||||
crate::builder::SupervisorBuilder::new()
|
||||
}
|
||||
|
||||
/// Create a job (store in memory only, does not dispatch)
|
||||
/// Authorization must be checked by the caller (e.g., OpenRPC layer)
|
||||
pub async fn job_create(&self, job: Job) -> SupervisorResult<String> {
|
||||
let runner = job.runner.clone();
|
||||
let job_id = job.id.clone();
|
||||
|
||||
let mut store = self.store.lock().await;
|
||||
if !store.runner_exists(&runner) {
|
||||
return Err(SupervisorError::RunnerNotFound {
|
||||
runner_id: runner,
|
||||
});
|
||||
}
|
||||
|
||||
// Store job in memory only
|
||||
store.job_store(job)?;
|
||||
Ok(job_id)
|
||||
}
|
||||
|
||||
/// Delete a runner from the supervisor
|
||||
pub async fn runner_delete(&self, runner_id: &str) -> SupervisorResult<()> {
|
||||
self.store.lock().await.runner_remove(runner_id)
|
||||
}
|
||||
|
||||
/// Check if a runner is registered
|
||||
pub async fn has_runner(&self, runner_id: &str) -> bool {
|
||||
self.store.lock().await.runner_exists(runner_id)
|
||||
}
|
||||
|
||||
/// Get a job by job ID from memory
|
||||
pub async fn job_get(&self, job_id: &str) -> SupervisorResult<Job> {
|
||||
self.store.lock().await.job_get(job_id)
|
||||
}
|
||||
|
||||
/// Ping a runner by dispatching a ping job to its queue
|
||||
pub async fn runner_ping(&self, runner_id: &str) -> SupervisorResult<String> {
|
||||
use hero_job::JobBuilder;
|
||||
|
||||
// Check if runner exists
|
||||
let store = self.store.lock().await;
|
||||
if !store.runner_exists(runner_id) {
|
||||
return Err(SupervisorError::RunnerNotFound {
|
||||
runner_id: runner_id.to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
// Create a ping job
|
||||
let ping_job = JobBuilder::new()
|
||||
.caller_id("supervisor_ping")
|
||||
.context_id("ping_context")
|
||||
.payload("ping")
|
||||
.runner(runner_id)
|
||||
.executor("ping")
|
||||
.timeout(10)
|
||||
.build()
|
||||
.map_err(|e| SupervisorError::QueueError {
|
||||
runner_id: runner_id.to_string(),
|
||||
reason: format!("Failed to create ping job: {}", e),
|
||||
})?;
|
||||
|
||||
// Store and dispatch the ping job
|
||||
let job_id = ping_job.id.clone();
|
||||
drop(store);
|
||||
self.store.lock().await.job_store(ping_job.clone())?;
|
||||
self.job_client
|
||||
.store_job_in_redis(&ping_job)
|
||||
.await
|
||||
.map_err(SupervisorError::from)?;
|
||||
self.job_client
|
||||
.job_run(&job_id, runner_id)
|
||||
.await
|
||||
.map_err(SupervisorError::from)?;
|
||||
|
||||
Ok(job_id)
|
||||
}
|
||||
|
||||
/// Stop a job by ID
|
||||
pub async fn job_stop(&self, job_id: &str) -> SupervisorResult<()> {
|
||||
// For now, we'll implement a basic stop by setting status to Stopping
|
||||
let _ = self.job_client.set_job_status(job_id, JobStatus::Stopping).await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Delete a job by ID
|
||||
/// Authorization must be checked by the caller (e.g., OpenRPC layer)
|
||||
pub async fn job_delete(&self, job_id: &str) -> SupervisorResult<()> {
|
||||
self.store.lock().await.job_delete(job_id)
|
||||
}
|
||||
|
||||
/// List all managed runners
|
||||
pub async fn runner_list(&self) -> Vec<String> {
|
||||
self.store.lock().await.runner_list_all()
|
||||
}
|
||||
|
||||
/// Check if a runner is registered
|
||||
pub async fn runner_is_registered(&self, runner_id: &str) -> bool {
|
||||
self.store.lock().await.runner_exists(runner_id)
|
||||
}
|
||||
|
||||
/// Start a job by dispatching it to a runner's queue (fire-and-forget)
|
||||
pub async fn job_start(&self, job_id: &str) -> SupervisorResult<()> {
|
||||
// Get the job from memory
|
||||
let job = self.job_get(job_id).await?;
|
||||
let runner = job.runner.clone();
|
||||
|
||||
let store = self.store.lock().await;
|
||||
if !store.runner_exists(&runner) {
|
||||
return Err(SupervisorError::RunnerNotFound {
|
||||
runner_id: runner,
|
||||
});
|
||||
}
|
||||
|
||||
// Store job in Redis and dispatch to runner queue
|
||||
self.job_client
|
||||
.store_job_in_redis(&job)
|
||||
.await
|
||||
.map_err(SupervisorError::from)?;
|
||||
|
||||
self.job_client
|
||||
.job_run(&job.id, &runner)
|
||||
.await
|
||||
.map_err(SupervisorError::from)
|
||||
}
|
||||
|
||||
/// Run a job: create, dispatch, and wait for result
|
||||
pub async fn job_run(&self, job: Job) -> SupervisorResult<String> {
|
||||
let runner = job.runner.clone();
|
||||
|
||||
let mut store = self.store.lock().await;
|
||||
if !store.runner_exists(&runner) {
|
||||
return Err(SupervisorError::RunnerNotFound {
|
||||
runner_id: runner,
|
||||
});
|
||||
}
|
||||
|
||||
// Store job in memory
|
||||
store.job_store(job.clone())?;
|
||||
drop(store);
|
||||
|
||||
// Use job_client's job_run_wait which handles store in Redis, dispatch, and wait
|
||||
self.job_client
|
||||
.job_run_wait(&job, &runner, 30)
|
||||
.await
|
||||
.map_err(SupervisorError::from)
|
||||
}
|
||||
|
||||
// Secret management methods removed - use API key management instead
|
||||
// See add_api_key, remove_api_key, list_api_keys methods below
|
||||
|
||||
/// List all job IDs from memory
|
||||
pub async fn job_list(&self) -> Vec<String> {
|
||||
self.store.lock().await.job_list()
|
||||
}
|
||||
|
||||
/// Get the status of a job
|
||||
pub async fn job_status(&self, job_id: &str) -> SupervisorResult<JobStatus> {
|
||||
// First check if job exists in memory (created but not started)
|
||||
let store = self.store.lock().await;
|
||||
if let Ok(_job) = store.job_get(job_id) {
|
||||
drop(store);
|
||||
// Try to get status from Redis
|
||||
match self.job_client.get_status(job_id).await {
|
||||
Ok(status) => return Ok(status),
|
||||
Err(hero_job_client::ClientError::Job(hero_job::JobError::NotFound(_))) => {
|
||||
// Job exists in memory but not in Redis - it's created but not dispatched
|
||||
return Ok(JobStatus::Created);
|
||||
}
|
||||
Err(e) => return Err(SupervisorError::from(e)),
|
||||
}
|
||||
}
|
||||
drop(store);
|
||||
|
||||
// Job not in memory, try Redis
|
||||
let status = self.job_client.get_status(job_id).await
|
||||
.map_err(|e| match e {
|
||||
hero_job_client::ClientError::Job(hero_job::JobError::NotFound(_)) => {
|
||||
SupervisorError::JobNotFound { job_id: job_id.to_string() }
|
||||
}
|
||||
_ => SupervisorError::from(e)
|
||||
})?;
|
||||
|
||||
Ok(status)
|
||||
}
|
||||
|
||||
/// Get the result of a job (returns immediately with current result or error)
|
||||
pub async fn job_result(&self, job_id: &str) -> SupervisorResult<Option<String>> {
|
||||
// Use client's get_status to check if job exists and get its status
|
||||
let status = self.job_client.get_status(job_id).await
|
||||
.map_err(|e| match e {
|
||||
hero_job_client::ClientError::Job(hero_job::JobError::NotFound(_)) => {
|
||||
SupervisorError::JobNotFound { job_id: job_id.to_string() }
|
||||
}
|
||||
_ => SupervisorError::from(e)
|
||||
})?;
|
||||
|
||||
// If job has error status, get the error message
|
||||
if status.as_str() == "error" {
|
||||
let error_msg = self.job_client.get_error(job_id).await
|
||||
.map_err(SupervisorError::from)?;
|
||||
|
||||
return Ok(Some(format!("Error: {}", error_msg.unwrap_or_else(|| "Unknown error".to_string()))));
|
||||
}
|
||||
|
||||
// Use client's get_result to get the result
|
||||
let result = self.job_client.get_result(job_id).await
|
||||
.map_err(SupervisorError::from)?;
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
// API Key Management Methods
|
||||
|
||||
/// Get logs for a specific job
|
||||
///
|
||||
/// Reads log files from the logs/actor/<runner_name>/job-<job_id>/ directory
|
||||
pub async fn job_logs(&self, job_id: &str, lines: Option<usize>) -> SupervisorResult<Vec<String>> {
|
||||
// Determine the logs directory path
|
||||
// Default to ~/hero/logs
|
||||
let logs_root = if let Some(home) = std::env::var_os("HOME") {
|
||||
std::path::PathBuf::from(home).join("hero").join("logs")
|
||||
} else {
|
||||
std::path::PathBuf::from("logs")
|
||||
};
|
||||
|
||||
// Check if logs directory exists
|
||||
if !logs_root.exists() {
|
||||
return Ok(vec![format!("Logs directory not found: {}", logs_root.display())]);
|
||||
}
|
||||
|
||||
let actor_dir = logs_root.join("actor");
|
||||
if !actor_dir.exists() {
|
||||
return Ok(vec![format!("Actor logs directory not found: {}", actor_dir.display())]);
|
||||
}
|
||||
|
||||
// Search through all runner directories to find the job
|
||||
if let Ok(entries) = std::fs::read_dir(&actor_dir) {
|
||||
for entry in entries.flatten() {
|
||||
if entry.path().is_dir() {
|
||||
let job_dir = entry.path().join(format!("job-{}", job_id));
|
||||
|
||||
if job_dir.exists() {
|
||||
// Read all log files in the directory
|
||||
let mut all_logs = Vec::new();
|
||||
|
||||
if let Ok(log_entries) = std::fs::read_dir(&job_dir) {
|
||||
// Collect all log files with their paths for sorting
|
||||
let mut log_files: Vec<_> = log_entries
|
||||
.flatten()
|
||||
.filter(|e| {
|
||||
if !e.path().is_file() {
|
||||
return false;
|
||||
}
|
||||
// Accept files that start with "log" (covers log.YYYY-MM-DD-HH format)
|
||||
e.file_name().to_string_lossy().starts_with("log")
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Sort by filename (which includes timestamp for hourly rotation)
|
||||
log_files.sort_by_key(|e| e.path());
|
||||
|
||||
// Read files in order
|
||||
for entry in log_files {
|
||||
if let Ok(content) = std::fs::read_to_string(entry.path()) {
|
||||
all_logs.extend(content.lines().map(|s| s.to_string()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If lines limit is specified, return only the last N lines
|
||||
if let Some(n) = lines {
|
||||
let start = all_logs.len().saturating_sub(n);
|
||||
return Ok(all_logs[start..].to_vec());
|
||||
} else {
|
||||
return Ok(all_logs);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If no logs found, return helpful message
|
||||
Ok(vec![format!("No logs found for job: {}", job_id)])
|
||||
}
|
||||
|
||||
// API Key Management - These methods provide direct access to the key store
|
||||
// Authorization checking should be done at the OpenRPC layer before calling these
|
||||
|
||||
/// Get an API key by its value
|
||||
pub(crate) async fn key_get(&self, key_id: &str) -> Option<crate::auth::ApiKey> {
|
||||
self.store.lock().await.key_get(key_id).cloned()
|
||||
}
|
||||
|
||||
/// Create an API key with a specific value
|
||||
pub(crate) async fn key_create(&self, key: crate::auth::ApiKey) -> crate::auth::ApiKey {
|
||||
self.store.lock().await.key_create(key)
|
||||
}
|
||||
|
||||
/// Delete an API key
|
||||
pub(crate) async fn key_delete(&self, key_id: &str) -> Option<crate::auth::ApiKey> {
|
||||
self.store.lock().await.key_delete(key_id)
|
||||
}
|
||||
|
||||
/// List all API keys
|
||||
pub(crate) async fn key_list(&self) -> Vec<crate::auth::ApiKey> {
|
||||
self.store.lock().await.key_list()
|
||||
}
|
||||
|
||||
/// List API keys by scope
|
||||
pub(crate) async fn key_list_by_scope(&self, scope: crate::auth::ApiKeyScope) -> Vec<crate::auth::ApiKey> {
|
||||
self.store.lock().await.key_list_by_scope(scope)
|
||||
}
|
||||
|
||||
// Runner Management
|
||||
|
||||
/// Create a new runner
|
||||
/// Authorization must be checked by the caller (e.g., OpenRPC layer)
|
||||
pub async fn runner_create(&self, runner_id: String) -> SupervisorResult<String> {
|
||||
self.store.lock().await.runner_add(runner_id.clone())?;
|
||||
Ok(runner_id)
|
||||
}
|
||||
|
||||
/// Create a new API key with generated UUID
|
||||
pub async fn create_api_key(&self, name: String, scope: crate::auth::ApiKeyScope) -> crate::auth::ApiKey {
|
||||
self.store.lock().await.key_create_new(name, scope)
|
||||
}
|
||||
}
|
||||
|
||||
// Note: Default implementation removed because it requires async initialization
|
||||
// Use Supervisor::builder() for proper initialization
|
||||
Reference in New Issue
Block a user