move repos into monorepo

This commit is contained in:
Timur Gordon
2025-11-13 20:44:00 +01:00
commit 4b23e5eb7f
204 changed files with 33737 additions and 0 deletions

111
bin/supervisor/src/auth.rs Normal file
View File

@@ -0,0 +1,111 @@
//! Authentication and API key management
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use uuid::Uuid;
/// API key scope/permission level
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum ApiKeyScope {
/// Full access - can manage keys, runners, jobs
Admin,
/// Can register new runners
Registrar,
/// Can create and manage jobs
User,
}
impl ApiKeyScope {
pub fn as_str(&self) -> &'static str {
match self {
ApiKeyScope::Admin => "admin",
ApiKeyScope::Registrar => "registrar",
ApiKeyScope::User => "user",
}
}
}
/// An API key with metadata
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ApiKey {
/// The actual key value (UUID or custom string)
pub key: String,
/// Human-readable name for the key
pub name: String,
/// Permission scope
pub scope: ApiKeyScope,
/// When the key was created
pub created_at: String,
/// Optional expiration timestamp
pub expires_at: Option<String>,
}
impl ApiKey {
/// Create a new API key with a generated UUID
pub fn new(name: String, scope: ApiKeyScope) -> Self {
Self {
key: Uuid::new_v4().to_string(),
name,
scope,
created_at: chrono::Utc::now().to_rfc3339(),
expires_at: None,
}
}
/// Create a new API key with a specific key value
pub fn with_key(key: String, name: String, scope: ApiKeyScope) -> Self {
Self {
key,
name,
scope,
created_at: chrono::Utc::now().to_rfc3339(),
expires_at: None,
}
}
}
/// Response for auth verification
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AuthVerifyResponse {
pub valid: bool,
pub name: String,
pub scope: String,
}
/// Method authorization requirements
/// Maps RPC method names to required scopes
pub fn get_method_required_scopes(method: &str) -> Option<Vec<ApiKeyScope>> {
use ApiKeyScope::*;
match method {
// Admin-only methods
"key.create" | "key.generate" | "key.delete" | "key.list" |
"supervisor.info" => {
Some(vec![Admin])
}
// Admin or Registrar methods
"runner.create" | "runner.remove" => {
Some(vec![Admin, Registrar])
}
// Admin or User methods
"job.create" | "job.run" | "job.start" | "job.stop" | "job.delete" => {
Some(vec![Admin, User])
}
// Public methods (no auth required)
"rpc.discover" => None,
// Any authenticated user (read-only operations)
"runner.list" | "runner.ping" |
"job.get" | "job.list" | "job.status" | "job.result" | "job.logs" |
"auth.verify" => {
Some(vec![Admin, Registrar, User])
}
// Default: require authentication
_ => Some(vec![Admin, Registrar, User]),
}
}

View File

@@ -0,0 +1,112 @@
//! Hero Supervisor Binary
use hero_supervisor::SupervisorBuilder;
use clap::Parser;
use log::{error, info};
use std::sync::Arc;
use tokio::sync::Mutex;
/// Hero Supervisor - manages actors and dispatches jobs
#[derive(Parser, Debug)]
#[command(name = "supervisor")]
#[command(about = "Hero Supervisor - manages actors and dispatches jobs")]
struct Args {
/// Redis URL for job queue
#[arg(long, default_value = "redis://127.0.0.1:6379")]
redis_url: String,
/// Namespace for Redis keys
#[arg(long, default_value = "")]
namespace: String,
/// Admin secrets (required, can be specified multiple times)
#[arg(long = "admin-secret", value_name = "SECRET", required = true)]
admin_secrets: Vec<String>,
/// User secrets (can be specified multiple times)
#[arg(long = "user-secret", value_name = "SECRET")]
user_secrets: Vec<String>,
/// Register secrets (can be specified multiple times)
#[arg(long = "register-secret", value_name = "SECRET")]
register_secrets: Vec<String>,
/// Port for OpenRPC HTTP server
#[arg(long, default_value = "3030")]
port: u16,
/// Bind address for OpenRPC HTTP server
#[arg(long, default_value = "127.0.0.1")]
bind_address: String,
/// Pre-configured runner names (comma-separated)
#[arg(long, value_name = "NAMES", value_delimiter = ',')]
runners: Vec<String>,
}
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
env_logger::init();
let args = Args::parse();
// Build supervisor
let mut builder = SupervisorBuilder::new()
.admin_secrets(args.admin_secrets);
if !args.user_secrets.is_empty() {
builder = builder.user_secrets(args.user_secrets);
}
if !args.register_secrets.is_empty() {
builder = builder.register_secrets(args.register_secrets);
}
let mut supervisor = builder.build().await?;
// Register pre-configured runners
if !args.runners.is_empty() {
for runner_name in &args.runners {
match supervisor.runner_create(runner_name.clone()).await {
Ok(_) => {},
Err(e) => error!("Failed to register runner '{}': {}", runner_name, e),
}
}
}
// Start OpenRPC server
use hero_supervisor::openrpc::start_http_openrpc_server;
let supervisor_clone = supervisor.clone();
let bind_addr = args.bind_address.clone();
let port = args.port;
tokio::spawn(async move {
match start_http_openrpc_server(supervisor_clone, &bind_addr, port).await {
Ok(handle) => {
handle.stopped().await;
error!("OpenRPC server stopped unexpectedly");
}
Err(e) => {
error!("OpenRPC server error: {}", e);
}
}
});
tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
// Print startup info
println!("📡 http://{}:{}", args.bind_address, args.port);
info!("Hero Supervisor is running. Press Ctrl+C to shutdown.");
// Set up graceful shutdown
tokio::spawn(async move {
tokio::signal::ctrl_c().await.expect("Failed to listen for ctrl+c");
info!("Received shutdown signal");
std::process::exit(0);
});
// Keep the application running
loop {
tokio::time::sleep(tokio::time::Duration::from_secs(1)).await;
}
}

View File

@@ -0,0 +1,198 @@
//! Supervisor builder for configuration and initialization.
use crate::error::{SupervisorError, SupervisorResult};
use crate::Supervisor;
use hero_job_client::ClientBuilder;
/// Builder for constructing a Supervisor instance
pub struct SupervisorBuilder {
/// Set of registered runner IDs
runners: std::collections::HashSet<String>,
/// Redis URL for connection
redis_url: String,
/// Admin secrets for bootstrapping API keys
admin_secrets: Vec<String>,
/// User secrets for bootstrapping API keys
user_secrets: Vec<String>,
/// Register secrets for bootstrapping API keys
register_secrets: Vec<String>,
client_builder: ClientBuilder,
/// Osiris URL for queries (optional)
osiris_url: Option<String>,
/// Supervisor URL for commands via Osiris (optional)
supervisor_url: Option<String>,
/// Supervisor secret for Osiris commands (optional)
supervisor_secret: Option<String>,
/// Runner name for Osiris operations (optional)
osiris_runner_name: Option<String>,
}
impl SupervisorBuilder {
/// Create a new supervisor builder
pub fn new() -> Self {
Self {
runners: std::collections::HashSet::new(),
redis_url: "redis://localhost:6379".to_string(),
admin_secrets: Vec::new(),
user_secrets: Vec::new(),
register_secrets: Vec::new(),
client_builder: ClientBuilder::new(),
osiris_url: None,
supervisor_url: None,
supervisor_secret: None,
osiris_runner_name: None,
}
}
/// Set the Osiris URL for queries
pub fn osiris_url<S: Into<String>>(mut self, url: S) -> Self {
self.osiris_url = Some(url.into());
self
}
/// Set the Supervisor URL for Osiris commands
pub fn supervisor_url_for_osiris<S: Into<String>>(mut self, url: S) -> Self {
self.supervisor_url = Some(url.into());
self
}
/// Set the Supervisor secret for Osiris commands
pub fn supervisor_secret<S: Into<String>>(mut self, secret: S) -> Self {
self.supervisor_secret = Some(secret.into());
self
}
/// Set the runner name for Osiris operations
pub fn osiris_runner_name<S: Into<String>>(mut self, name: S) -> Self {
self.osiris_runner_name = Some(name.into());
self
}
/// Add an admin secret
pub fn add_admin_secret<S: Into<String>>(mut self, secret: S) -> Self {
self.admin_secrets.push(secret.into());
self
}
/// Add multiple admin secrets
pub fn admin_secrets<I, S>(mut self, secrets: I) -> Self
where
I: IntoIterator<Item = S>,
S: Into<String>,
{
self.admin_secrets.extend(secrets.into_iter().map(|s| s.into()));
self
}
/// Add a user secret
pub fn add_user_secret<S: Into<String>>(mut self, secret: S) -> Self {
self.user_secrets.push(secret.into());
self
}
/// Add multiple user secrets
pub fn user_secrets<I, S>(mut self, secrets: I) -> Self
where
I: IntoIterator<Item = S>,
S: Into<String>,
{
self.user_secrets.extend(secrets.into_iter().map(|s| s.into()));
self
}
/// Add a register secret
pub fn add_register_secret<S: Into<String>>(mut self, secret: S) -> Self {
self.register_secrets.push(secret.into());
self
}
/// Add multiple register secrets
pub fn register_secrets<I, S>(mut self, secrets: I) -> Self
where
I: IntoIterator<Item = S>,
S: Into<String>,
{
self.register_secrets.extend(secrets.into_iter().map(|s| s.into()));
self
}
/// Add a runner to the supervisor
pub fn add_runner(mut self, runner_id: String) -> Self {
self.runners.insert(runner_id);
self
}
/// Build the supervisor
pub async fn build(self) -> SupervisorResult<Supervisor> {
// Create Redis client
let redis_client = redis::Client::open(self.redis_url.as_str())
.map_err(|e| SupervisorError::ConfigError {
reason: format!("Invalid Redis URL: {}", e),
})?;
// Create the store
let mut store = crate::store::Store::new();
// Add admin secrets as API keys
for secret in &self.admin_secrets {
store.key_create(
crate::auth::ApiKey::new(secret.clone(), crate::auth::ApiKeyScope::Admin),
);
}
// Add user secrets as API keys
for secret in &self.user_secrets {
store.key_create(
crate::auth::ApiKey::new(secret.clone(), crate::auth::ApiKeyScope::User),
);
}
// Add register secrets as API keys
for secret in &self.register_secrets {
store.key_create(
crate::auth::ApiKey::new(secret.clone(), crate::auth::ApiKeyScope::Registrar),
);
}
// Build the client
let client = self.client_builder.build().await?;
// Build Osiris client if configured
// Temporarily disabled - needs update
// let osiris_client = if let (Some(osiris_url), Some(supervisor_url)) =
// (self.osiris_url, self.supervisor_url) {
// let mut builder = osiris_client::OsirisClient::builder()
// .osiris_url(osiris_url)
// .supervisor_url(supervisor_url)
// .runner_name(self.osiris_runner_name.unwrap_or_else(|| "osiris-runner".to_string()));
//
// if let Some(secret) = self.supervisor_secret {
// builder = builder.supervisor_secret(secret);
// }
//
// Some(builder.build().map_err(|e| SupervisorError::ConfigError {
// reason: format!("Failed to build Osiris client: {}", e),
// })?)
// } else {
// None
// };
// Add pre-configured runners to the store
for runner_id in self.runners {
let _ = store.runner_add(runner_id);
}
Ok(Supervisor {
store: std::sync::Arc::new(tokio::sync::Mutex::new(store)),
job_client: client,
redis_client,
// osiris_client, // Temporarily disabled
})
}
}
impl Default for SupervisorBuilder {
fn default() -> Self {
Self::new()
}
}

View File

@@ -0,0 +1,73 @@
//! Error types for supervisor operations.
use thiserror::Error;
use jsonrpsee::types::{ErrorObject, ErrorObjectOwned};
/// Result type for supervisor operations
pub type SupervisorResult<T> = Result<T, SupervisorError>;
/// Errors that can occur during supervisor operations
#[derive(Debug, Error)]
pub enum SupervisorError {
#[error("Runner '{runner_id}' not found")]
RunnerNotFound { runner_id: String },
#[error("Runner '{runner_id}' is already registered")]
RunnerAlreadyRegistered { runner_id: String },
#[error("Job '{job_id}' not found")]
JobNotFound { job_id: String },
#[error("Failed to queue job for runner '{runner_id}': {reason}")]
QueueError { runner_id: String, reason: String },
#[error("Configuration error: {reason}")]
ConfigError { reason: String },
#[error("Invalid secret or API key: {0}")]
InvalidSecret(String),
#[error("Authentication error: {message}")]
AuthenticationError { message: String },
#[error("Insufficient permissions: {message}")]
PermissionDenied { message: String },
#[error("Redis error: {source}")]
RedisError {
#[from]
source: redis::RedisError,
},
#[error("Job error: {source}")]
JobError {
#[from]
source: hero_job::JobError,
},
#[error("Job client error: {source}")]
JobClientError {
#[from]
source: hero_job_client::ClientError,
},
#[error("IO error: {source}")]
IoError {
#[from]
source: std::io::Error,
},
#[error("Osiris client error: {0}")]
OsirisError(String),
}
/// Implement conversion from SupervisorError → RPC ErrorObject
impl From<SupervisorError> for ErrorObject<'static> {
fn from(err: SupervisorError) -> Self {
ErrorObject::owned(
-32603, // Internal error code
format!("Supervisor error: {err}"),
None::<()>,
)
}
}

19
bin/supervisor/src/lib.rs Normal file
View File

@@ -0,0 +1,19 @@
//! Hero Supervisor - Actor management for the Hero ecosystem.
//!
//! See README.md for detailed documentation and usage examples.
pub mod supervisor;
pub mod builder;
pub mod error;
pub mod openrpc;
pub mod auth;
pub mod store;
// Re-export job client for convenience
pub use hero_job_client as job_client;
// Re-export main types for convenience
pub use supervisor::Supervisor;
pub use builder::SupervisorBuilder;
pub use error::{SupervisorError, SupervisorResult};
pub use hero_job::{Job, JobBuilder, JobStatus, JobError};

View File

@@ -0,0 +1,474 @@
//! OpenRPC server implementation.
use jsonrpsee::{
core::{RpcResult, async_trait},
server::middleware::rpc::{RpcServiceT, RpcServiceBuilder, MethodResponse},
proc_macros::rpc,
server::{Server, ServerHandle},
types::{ErrorObject, ErrorObjectOwned},
};
use tower_http::cors::{CorsLayer, Any};
use anyhow;
use log::{debug, info, error};
use crate::{auth::ApiKey, supervisor::Supervisor};
use crate::error::SupervisorError;
use hero_job::{Job, JobResult, JobStatus};
use serde::{Deserialize, Serialize};
use std::net::SocketAddr;
use std::sync::Arc;
use std::fs;
use tokio::sync::Mutex;
/// Load OpenRPC specification from docs/openrpc.json
fn load_openrpc_spec() -> Result<serde_json::Value, Box<dyn std::error::Error>> {
let path = "../../docs/openrpc.json";
let content = fs::read_to_string(path)?;
let spec = serde_json::from_str(&content)?;
debug!("Loaded OpenRPC specification from: {}", path);
Ok(spec)
}
/// Request parameters for generating API keys (auto-generates key value)
#[derive(Debug, Deserialize, Serialize)]
pub struct GenerateApiKeyParams {
pub name: String,
pub scope: String, // "admin", "registrar", or "user"
}
/// Job status response with metadata
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct JobStatusResponse {
pub job_id: String,
pub status: String,
pub created_at: String,
}
/// Supervisor information response
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SupervisorInfo {
pub server_url: String,
}
/// OpenRPC trait - maps directly to Supervisor methods
/// This trait exists only for jsonrpsee's macro system.
/// The implementation below is just error type conversion -
/// all actual logic lives in Supervisor methods.
#[rpc(server)]
pub trait SupervisorRpc {
/// Create a job without queuing it to a runner
#[method(name = "job.create")]
async fn job_create(&self, params: Job) -> RpcResult<String>;
/// Get a job by job ID
#[method(name = "job.get")]
async fn job_get(&self, job_id: String) -> RpcResult<Job>;
/// Start a previously created job by queuing it to its assigned runner
#[method(name = "job.start")]
async fn job_start(&self, job_id: String) -> RpcResult<()>;
/// Run a job on the appropriate runner and return the result
#[method(name = "job.run")]
async fn job_run(&self, params: Job) -> RpcResult<JobResult>;
/// Get the current status of a job
#[method(name = "job.status")]
async fn job_status(&self, job_id: String) -> RpcResult<JobStatus>;
/// Get the result of a completed job (blocks until result is available)
#[method(name = "job.result")]
async fn job_result(&self, job_id: String) -> RpcResult<JobResult>;
/// Get logs for a specific job
#[method(name = "job.logs")]
async fn job_logs(&self, job_id: String) -> RpcResult<Vec<String>>;
/// Stop a running job
#[method(name = "job.stop")]
async fn job_stop(&self, job_id: String) -> RpcResult<()>;
/// Delete a job from the system
#[method(name = "job.delete")]
async fn job_delete(&self, job_id: String) -> RpcResult<()>;
/// List all jobs
#[method(name = "job.list")]
async fn job_list(&self) -> RpcResult<Vec<Job>>;
/// Add a runner with configuration
#[method(name = "runner.create")]
async fn runner_create(&self, runner_id: String) -> RpcResult<()>;
/// Delete a runner from the supervisor
#[method(name = "runner.remove")]
async fn runner_delete(&self, runner_id: String) -> RpcResult<()>;
/// List all runner IDs
#[method(name = "runner.list")]
async fn runner_list(&self) -> RpcResult<Vec<String>>;
/// Ping a runner (dispatch a ping job)
#[method(name = "runner.ping")]
async fn ping_runner(&self, runner_id: String) -> RpcResult<String>;
/// Create an API key with provided key value
#[method(name = "key.create")]
async fn key_create(&self, key: ApiKey) -> RpcResult<()>;
/// Generate a new API key with auto-generated key value
#[method(name = "key.generate")]
async fn key_generate(&self, params: GenerateApiKeyParams) -> RpcResult<ApiKey>;
/// Delete an API key
#[method(name = "key.delete")]
async fn key_delete(&self, key_id: String) -> RpcResult<()>;
/// List all secrets (returns counts only for security)
#[method(name = "key.list")]
async fn key_list(&self) -> RpcResult<Vec<ApiKey>>;
/// Verify an API key and return its metadata
#[method(name = "auth.verify")]
async fn auth_verify(&self) -> RpcResult<crate::auth::AuthVerifyResponse>;
/// Get supervisor information
#[method(name = "supervisor.info")]
async fn supervisor_info(&self) -> RpcResult<SupervisorInfo>;
/// OpenRPC discovery method - returns the OpenRPC document describing this API
#[method(name = "rpc.discover")]
async fn rpc_discover(&self) -> RpcResult<serde_json::Value>;
}
/// RPC implementation on Supervisor
///
/// This implementation is ONLY for error type conversion (SupervisorError → ErrorObject).
/// All business logic is in Supervisor methods - these are thin wrappers.
/// Authorization is handled by middleware before methods are called.
#[async_trait]
impl SupervisorRpcServer for Supervisor {
async fn job_create(&self, job: Job) -> RpcResult<String> {
Ok(self.job_create(job).await?)
}
async fn job_get(&self, job_id: String) -> RpcResult<Job> {
Ok(self.job_get(&job_id).await?)
}
async fn job_list(&self) -> RpcResult<Vec<Job>> {
let job_ids = self.job_list().await;
let mut jobs = Vec::new();
for job_id in job_ids {
if let Ok(job) = self.job_get(&job_id).await {
jobs.push(job);
}
}
Ok(jobs)
}
async fn job_run(&self, job: Job) -> RpcResult<JobResult> {
let output = self.job_run(job).await?;
Ok(JobResult::Success { success: output })
}
async fn job_start(&self, job_id: String) -> RpcResult<()> {
self.job_start(&job_id).await?;
Ok(())
}
async fn job_status(&self, job_id: String) -> RpcResult<JobStatus> {
Ok(self.job_status(&job_id).await?)
}
async fn job_logs(&self, job_id: String) -> RpcResult<Vec<String>> {
Ok(self.job_logs(&job_id, None).await?)
}
async fn job_result(&self, job_id: String) -> RpcResult<JobResult> {
match self.job_result(&job_id).await? {
Some(result) => {
if result.starts_with("Error:") {
Ok(JobResult::Error { error: result })
} else {
Ok(JobResult::Success { success: result })
}
},
None => Ok(JobResult::Error { error: "Job result not available".to_string() })
}
}
async fn job_stop(&self, job_id: String) -> RpcResult<()> {
self.job_stop(&job_id).await?;
Ok(())
}
async fn job_delete(&self, job_id: String) -> RpcResult<()> {
self.job_delete(&job_id).await?;
Ok(())
}
async fn runner_create(&self, runner_id: String) -> RpcResult<()> {
self.runner_create(runner_id).await?;
Ok(())
}
async fn runner_delete(&self, runner_id: String) -> RpcResult<()> {
Ok(self.runner_delete(&runner_id).await?)
}
async fn runner_list(&self) -> RpcResult<Vec<String>> {
Ok(self.runner_list().await)
}
async fn ping_runner(&self, runner_id: String) -> RpcResult<String> {
Ok(self.runner_ping(&runner_id).await?)
}
async fn key_create(&self, key: ApiKey) -> RpcResult<()> {
let _ = self.key_create(key).await;
Ok(())
}
async fn key_generate(&self, params: GenerateApiKeyParams) -> RpcResult<ApiKey> {
// Parse scope
let api_scope = match params.scope.to_lowercase().as_str() {
"admin" => crate::auth::ApiKeyScope::Admin,
"registrar" => crate::auth::ApiKeyScope::Registrar,
"user" => crate::auth::ApiKeyScope::User,
_ => return Err(ErrorObject::owned(-32602, "Invalid scope. Must be 'admin', 'registrar', or 'user'", None::<()>)),
};
let api_key = self.create_api_key(params.name, api_scope).await;
Ok(api_key)
}
async fn key_delete(&self, key_id: String) -> RpcResult<()> {
self.key_delete(&key_id).await
.ok_or_else(|| ErrorObject::owned(-32603, "API key not found", None::<()>))?;
Ok(())
}
async fn key_list(&self) -> RpcResult<Vec<ApiKey>> {
Ok(self.key_list().await)
}
async fn auth_verify(&self) -> RpcResult<crate::auth::AuthVerifyResponse> {
// If this method is called, middleware already verified the key
// So we just return success - the middleware wouldn't have let an invalid key through
Ok(crate::auth::AuthVerifyResponse {
valid: true,
name: "verified".to_string(),
scope: "authenticated".to_string(),
})
}
async fn supervisor_info(&self) -> RpcResult<SupervisorInfo> {
Ok(SupervisorInfo {
server_url: "http://127.0.0.1:3031".to_string(), // TODO: get from config
})
}
async fn rpc_discover(&self) -> RpcResult<serde_json::Value> {
debug!("OpenRPC request: rpc.discover");
// Read OpenRPC specification from docs/openrpc.json
match load_openrpc_spec() {
Ok(spec) => Ok(spec),
Err(e) => {
error!("Failed to load OpenRPC specification: {}", e);
// Fallback to a minimal spec if file loading fails
Ok(serde_json::json!({
"openrpc": "1.3.2",
"info": {
"title": "Hero Supervisor OpenRPC API",
"version": "1.0.0",
"description": "OpenRPC API for managing Hero Supervisor runners and jobs"
},
"methods": [],
"error": "Failed to load full specification"
}))
}
}
}
}
/// Authorization middleware using RpcServiceT
/// This middleware is created per-connection and checks permissions for each RPC call
#[derive(Clone)]
struct AuthMiddleware<S> {
supervisor: Supervisor,
inner: S,
}
impl<S> RpcServiceT for AuthMiddleware<S>
where
S: RpcServiceT<MethodResponse = MethodResponse> + Send + Sync + Clone + 'static,
{
type MethodResponse = MethodResponse;
type BatchResponse = S::BatchResponse;
type NotificationResponse = S::NotificationResponse;
fn call<'a>(&self, req: jsonrpsee::server::middleware::rpc::Request<'a>) -> impl std::future::Future<Output = Self::MethodResponse> + Send + 'a {
let supervisor = self.supervisor.clone();
let inner = self.inner.clone();
let method = req.method_name().to_string();
let id = req.id();
Box::pin(async move {
// Check if method requires auth
let required_scopes = match crate::auth::get_method_required_scopes(&method) {
None => {
// Public method - no auth required
debug!(" Public method: {}", method);
return inner.call(req).await;
}
Some(scopes) => scopes,
};
// Extract Authorization header from extensions
let headers = req.extensions().get::<hyper::HeaderMap>();
let api_key = headers
.and_then(|h| h.get(hyper::header::AUTHORIZATION))
.and_then(|value| value.to_str().ok())
.and_then(|s| s.strip_prefix("Bearer "))
.map(|k| k.to_string());
let api_key = match api_key {
Some(key) => key,
None => {
error!("❌ Missing Authorization header for method: {}", method);
let err = ErrorObjectOwned::owned(
-32001,
format!("Missing Authorization header for method: {}", method),
None::<()>,
);
return MethodResponse::error(id, err);
}
};
// Verify API key and check scope
let key_obj = match supervisor.key_get(&api_key).await {
Some(k) => k,
None => {
error!("❌ Invalid API key");
let err = ErrorObjectOwned::owned(-32001, "Invalid API key", None::<()>);
return MethodResponse::error(id, err);
}
};
if !required_scopes.contains(&key_obj.scope) {
error!(
"❌ Unauthorized: method '{}' requires {:?}, got {:?}",
method, required_scopes, key_obj.scope
);
let err = ErrorObjectOwned::owned(
-32001,
format!(
"Insufficient permissions for '{}'. Required: {:?}, Got: {:?}",
method, required_scopes, key_obj.scope
),
None::<()>,
);
return MethodResponse::error(id, err);
}
debug!("✅ Authorized: {} with scope {:?}", method, key_obj.scope);
// Authorized - proceed with the call
inner.call(req).await
})
}
fn batch<'a>(&self, batch: jsonrpsee::server::middleware::rpc::Batch<'a>) -> impl std::future::Future<Output = Self::BatchResponse> + Send + 'a {
// For simplicity, pass through batch requests
// In production, you'd want to check each request in the batch
self.inner.batch(batch)
}
fn notification<'a>(&self, notif: jsonrpsee::server::middleware::rpc::Notification<'a>) -> impl std::future::Future<Output = Self::NotificationResponse> + Send + 'a {
self.inner.notification(notif)
}
}
/// HTTP middleware to propagate headers into request extensions
#[derive(Clone)]
struct HeaderPropagationService<S> {
inner: S,
}
impl<S, B> tower::Service<hyper::Request<B>> for HeaderPropagationService<S>
where
S: tower::Service<hyper::Request<B>> + Clone + Send + 'static,
S::Future: Send + 'static,
B: Send + 'static,
{
type Response = S::Response;
type Error = S::Error;
type Future = std::pin::Pin<Box<dyn std::future::Future<Output = Result<Self::Response, Self::Error>> + Send>>;
fn poll_ready(&mut self, cx: &mut std::task::Context<'_>) -> std::task::Poll<Result<(), Self::Error>> {
self.inner.poll_ready(cx)
}
fn call(&mut self, mut req: hyper::Request<B>) -> Self::Future {
let headers = req.headers().clone();
req.extensions_mut().insert(headers);
let fut = self.inner.call(req);
Box::pin(fut)
}
}
/// Start HTTP OpenRPC server (Unix socket support would require additional dependencies)
pub async fn start_http_openrpc_server(
supervisor: Supervisor,
bind_address: &str,
port: u16,
) -> anyhow::Result<ServerHandle> {
let http_addr: SocketAddr = format!("{}:{}", bind_address, port).parse()?;
// Configure CORS to allow requests from the admin UI
// Note: Authorization header must be explicitly listed, not covered by Any
use tower_http::cors::AllowHeaders;
let cors = CorsLayer::new()
.allow_origin(Any)
.allow_headers(AllowHeaders::list([
hyper::header::CONTENT_TYPE,
hyper::header::AUTHORIZATION,
]))
.allow_methods(Any)
.expose_headers(Any);
// Build RPC middleware with authorization (per-connection)
let supervisor_for_middleware = supervisor.clone();
let rpc_middleware = RpcServiceBuilder::new().layer_fn(move |service| {
// This closure runs once per connection
AuthMiddleware {
supervisor: supervisor_for_middleware.clone(),
inner: service,
}
});
// Build HTTP middleware stack with CORS and header propagation
let http_middleware = tower::ServiceBuilder::new()
.layer(cors)
.layer(tower::layer::layer_fn(|service| {
HeaderPropagationService { inner: service }
}));
let http_server = Server::builder()
.set_rpc_middleware(rpc_middleware)
.set_http_middleware(http_middleware)
.build(http_addr)
.await?;
let http_handle = http_server.start(supervisor.into_rpc());
info!("OpenRPC HTTP server running at http://{} with CORS enabled", http_addr);
Ok(http_handle)
}

286
bin/supervisor/src/store.rs Normal file
View File

@@ -0,0 +1,286 @@
//! In-memory storage layer for Supervisor
//!
//! Provides CRUD operations for:
//! - API Keys
//! - Runners
//! - Jobs
use crate::auth::{ApiKey, ApiKeyScope};
use crate::error::{SupervisorError, SupervisorResult};
use hero_job::Job;
use std::collections::{HashMap, HashSet};
/// In-memory storage for all supervisor data
pub struct Store {
/// API keys (key_value -> ApiKey)
api_keys: HashMap<String, ApiKey>,
/// Registered runner IDs
runners: HashSet<String>,
/// In-memory job storage (job_id -> Job)
jobs: HashMap<String, Job>,
}
impl Store {
/// Create a new store
pub fn new() -> Self {
Self {
api_keys: HashMap::new(),
runners: HashSet::new(),
jobs: HashMap::new(),
}
}
// ==================== API Key Operations ====================
/// Create an API key with a specific value
pub fn key_create(&mut self, key: ApiKey) -> ApiKey {
self.api_keys.insert(key.name.clone(), key.clone());
key
}
/// Create a new API key with generated UUID
pub fn key_create_new(&mut self, name: String, scope: ApiKeyScope) -> ApiKey {
let key = ApiKey::new(name, scope);
self.api_keys.insert(key.name.clone(), key.clone());
key
}
/// Get an API key by its value
pub fn key_get(&self, key_name: &str) -> Option<&ApiKey> {
self.api_keys.get(key_name)
}
/// Delete an API key
pub fn key_delete(&mut self, key_name: &str) -> Option<ApiKey> {
self.api_keys.remove(key_name)
}
/// List all API keys
pub fn key_list(&self) -> Vec<ApiKey> {
self.api_keys.values().cloned().collect()
}
/// List API keys by scope
pub fn key_list_by_scope(&self, scope: ApiKeyScope) -> Vec<ApiKey> {
self.api_keys
.values()
.filter(|k| k.scope == scope)
.cloned()
.collect()
}
// ==================== Runner Operations ====================
/// Add a runner
pub fn runner_add(&mut self, runner_id: String) -> SupervisorResult<()> {
self.runners.insert(runner_id);
Ok(())
}
/// Remove a runner
pub fn runner_remove(&mut self, runner_id: &str) -> SupervisorResult<()> {
self.runners.remove(runner_id);
Ok(())
}
/// Check if a runner exists
pub fn runner_exists(&self, runner_id: &str) -> bool {
self.runners.contains(runner_id)
}
/// List all runner IDs
pub fn runner_list_all(&self) -> Vec<String> {
self.runners.iter().cloned().collect()
}
// ==================== Job Operations ====================
/// Store a job in memory
pub fn job_store(&mut self, job: Job) -> SupervisorResult<()> {
self.jobs.insert(job.id.clone(), job);
Ok(())
}
/// Get a job from memory
pub fn job_get(&self, job_id: &str) -> SupervisorResult<Job> {
self.jobs
.get(job_id)
.cloned()
.ok_or_else(|| SupervisorError::JobNotFound {
job_id: job_id.to_string(),
})
}
/// Delete a job from memory
pub fn job_delete(&mut self, job_id: &str) -> SupervisorResult<()> {
self.jobs
.remove(job_id)
.ok_or_else(|| SupervisorError::JobNotFound {
job_id: job_id.to_string(),
})?;
Ok(())
}
/// List all job IDs
pub fn job_list(&self) -> Vec<String> {
self.jobs.keys().cloned().collect()
}
/// Check if a job exists
pub fn job_exists(&self, job_id: &str) -> bool {
self.jobs.contains_key(job_id)
}
}
impl Clone for Store {
fn clone(&self) -> Self {
Self {
api_keys: self.api_keys.clone(),
runners: self.runners.clone(),
jobs: self.jobs.clone(),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use hero_job::JobBuilder;
fn create_test_store() -> Store {
Store::new()
}
fn create_test_job(id: &str, runner: &str) -> Job {
let mut job = JobBuilder::new()
.caller_id("test_caller")
.context_id("test_context")
.runner(runner)
.executor("test")
.payload("test payload")
.build()
.unwrap();
job.id = id.to_string(); // Set ID manually
job
}
#[test]
fn test_api_key_operations() {
let mut store = create_test_store();
// Create key
let key = store.key_create_new("test_key".to_string(), ApiKeyScope::Admin);
assert_eq!(key.name, "test_key");
assert_eq!(key.scope, ApiKeyScope::Admin);
// Get key
let retrieved = store.key_get(&key.key);
assert!(retrieved.is_some());
assert_eq!(retrieved.unwrap().name, "test_key");
// List keys
let keys = store.key_list();
assert_eq!(keys.len(), 1);
// List by scope
let admin_keys = store.key_list_by_scope(ApiKeyScope::Admin);
assert_eq!(admin_keys.len(), 1);
// Delete key
let removed = store.key_delete(&key.key);
assert!(removed.is_some());
assert!(store.key_get(&key.key).is_none());
}
#[test]
fn test_runner_operations() {
let mut store = create_test_store();
// Add runner
assert!(store.runner_add("runner1".to_string()).is_ok());
assert!(store.runner_exists("runner1"));
// List runners
let runners = store.runner_list_all();
assert_eq!(runners.len(), 1);
assert!(runners.contains(&"runner1".to_string()));
// List all runners
let all_runners = store.runner_list_all();
assert_eq!(all_runners.len(), 1);
// Remove runner
assert!(store.runner_remove("runner1").is_ok());
assert!(!store.runner_exists("runner1"));
}
#[test]
fn test_job_operations() {
let mut store = create_test_store();
let job = create_test_job("job1", "runner1");
// Store job
assert!(store.job_store(job.clone()).is_ok());
assert!(store.job_exists("job1"));
// Get job
let retrieved = store.job_get("job1");
assert!(retrieved.is_ok());
assert_eq!(retrieved.unwrap().id, "job1");
// List jobs
let jobs = store.job_list();
assert_eq!(jobs.len(), 1);
assert!(jobs.contains(&"job1".to_string()));
// Delete job
assert!(store.job_delete("job1").is_ok());
assert!(!store.job_exists("job1"));
assert!(store.job_get("job1").is_err());
}
#[test]
fn test_job_not_found() {
let store = create_test_store();
let result = store.job_get("nonexistent");
assert!(result.is_err());
}
#[test]
fn test_multiple_jobs() {
let mut store = create_test_store();
// Add multiple jobs
for i in 1..=3 {
let job = create_test_job(&format!("job{}", i), "runner1");
assert!(store.job_store(job).is_ok());
}
// Verify all exist
assert_eq!(store.job_list().len(), 3);
assert!(store.job_exists("job1"));
assert!(store.job_exists("job2"));
assert!(store.job_exists("job3"));
// Delete one
assert!(store.job_delete("job2").is_ok());
assert_eq!(store.job_list().len(), 2);
assert!(!store.job_exists("job2"));
}
#[test]
fn test_store_clone() {
let mut store = create_test_store();
store.runner_add("runner1".to_string()).unwrap();
let job = create_test_job("job1", "runner1");
store.job_store(job).unwrap();
// Clone the store
let cloned = store.clone();
// Verify cloned data
assert!(cloned.runner_exists("runner1"));
assert!(cloned.job_exists("job1"));
}
}

View File

@@ -0,0 +1,360 @@
//! Main supervisor implementation for managing multiple actor runners.
use crate::error::{SupervisorError, SupervisorResult};
use crate::store::Store;
use hero_job_client::Client as JobClient;
use hero_job::{Job, JobStatus};
use std::sync::Arc;
use tokio::sync::Mutex;
// Re-export RPC types for convenience
pub use jsonrpsee::core::RpcResult;
pub use jsonrpsee::types::ErrorObject;
/// Main supervisor that manages multiple runners
#[derive(Clone)]
pub struct Supervisor {
/// Centralized storage layer with interior mutability
pub(crate) store: Arc<Mutex<Store>>,
/// Job client for Redis operations
pub(crate) job_client: JobClient,
/// Redis client for direct operations
pub(crate) redis_client: redis::Client,
// Optional Osiris client for persistent storage - temporarily disabled
// pub(crate) osiris_client: Option<osiris_client::OsirisClient>,
}
impl Supervisor {
/// Create a new supervisor builder
pub fn builder() -> crate::builder::SupervisorBuilder {
crate::builder::SupervisorBuilder::new()
}
/// Create a job (store in memory only, does not dispatch)
/// Authorization must be checked by the caller (e.g., OpenRPC layer)
pub async fn job_create(&self, job: Job) -> SupervisorResult<String> {
let runner = job.runner.clone();
let job_id = job.id.clone();
let mut store = self.store.lock().await;
if !store.runner_exists(&runner) {
return Err(SupervisorError::RunnerNotFound {
runner_id: runner,
});
}
// Store job in memory only
store.job_store(job)?;
Ok(job_id)
}
/// Delete a runner from the supervisor
pub async fn runner_delete(&self, runner_id: &str) -> SupervisorResult<()> {
self.store.lock().await.runner_remove(runner_id)
}
/// Check if a runner is registered
pub async fn has_runner(&self, runner_id: &str) -> bool {
self.store.lock().await.runner_exists(runner_id)
}
/// Get a job by job ID from memory
pub async fn job_get(&self, job_id: &str) -> SupervisorResult<Job> {
self.store.lock().await.job_get(job_id)
}
/// Ping a runner by dispatching a ping job to its queue
pub async fn runner_ping(&self, runner_id: &str) -> SupervisorResult<String> {
use hero_job::JobBuilder;
// Check if runner exists
let store = self.store.lock().await;
if !store.runner_exists(runner_id) {
return Err(SupervisorError::RunnerNotFound {
runner_id: runner_id.to_string(),
});
}
// Create a ping job
let ping_job = JobBuilder::new()
.caller_id("supervisor_ping")
.context_id("ping_context")
.payload("ping")
.runner(runner_id)
.executor("ping")
.timeout(10)
.build()
.map_err(|e| SupervisorError::QueueError {
runner_id: runner_id.to_string(),
reason: format!("Failed to create ping job: {}", e),
})?;
// Store and dispatch the ping job
let job_id = ping_job.id.clone();
drop(store);
self.store.lock().await.job_store(ping_job.clone())?;
self.job_client
.store_job_in_redis(&ping_job)
.await
.map_err(SupervisorError::from)?;
self.job_client
.job_run(&job_id, runner_id)
.await
.map_err(SupervisorError::from)?;
Ok(job_id)
}
/// Stop a job by ID
pub async fn job_stop(&self, job_id: &str) -> SupervisorResult<()> {
// For now, we'll implement a basic stop by setting status to Stopping
let _ = self.job_client.set_job_status(job_id, JobStatus::Stopping).await;
Ok(())
}
/// Delete a job by ID
/// Authorization must be checked by the caller (e.g., OpenRPC layer)
pub async fn job_delete(&self, job_id: &str) -> SupervisorResult<()> {
self.store.lock().await.job_delete(job_id)
}
/// List all managed runners
pub async fn runner_list(&self) -> Vec<String> {
self.store.lock().await.runner_list_all()
}
/// Check if a runner is registered
pub async fn runner_is_registered(&self, runner_id: &str) -> bool {
self.store.lock().await.runner_exists(runner_id)
}
/// Start a job by dispatching it to a runner's queue (fire-and-forget)
pub async fn job_start(&self, job_id: &str) -> SupervisorResult<()> {
// Get the job from memory
let job = self.job_get(job_id).await?;
let runner = job.runner.clone();
let store = self.store.lock().await;
if !store.runner_exists(&runner) {
return Err(SupervisorError::RunnerNotFound {
runner_id: runner,
});
}
// Store job in Redis and dispatch to runner queue
self.job_client
.store_job_in_redis(&job)
.await
.map_err(SupervisorError::from)?;
self.job_client
.job_run(&job.id, &runner)
.await
.map_err(SupervisorError::from)
}
/// Run a job: create, dispatch, and wait for result
pub async fn job_run(&self, job: Job) -> SupervisorResult<String> {
let runner = job.runner.clone();
let mut store = self.store.lock().await;
if !store.runner_exists(&runner) {
return Err(SupervisorError::RunnerNotFound {
runner_id: runner,
});
}
// Store job in memory
store.job_store(job.clone())?;
drop(store);
// Use job_client's job_run_wait which handles store in Redis, dispatch, and wait
self.job_client
.job_run_wait(&job, &runner, 30)
.await
.map_err(SupervisorError::from)
}
// Secret management methods removed - use API key management instead
// See add_api_key, remove_api_key, list_api_keys methods below
/// List all job IDs from memory
pub async fn job_list(&self) -> Vec<String> {
self.store.lock().await.job_list()
}
/// Get the status of a job
pub async fn job_status(&self, job_id: &str) -> SupervisorResult<JobStatus> {
// First check if job exists in memory (created but not started)
let store = self.store.lock().await;
if let Ok(_job) = store.job_get(job_id) {
drop(store);
// Try to get status from Redis
match self.job_client.get_status(job_id).await {
Ok(status) => return Ok(status),
Err(hero_job_client::ClientError::Job(hero_job::JobError::NotFound(_))) => {
// Job exists in memory but not in Redis - it's created but not dispatched
return Ok(JobStatus::Created);
}
Err(e) => return Err(SupervisorError::from(e)),
}
}
drop(store);
// Job not in memory, try Redis
let status = self.job_client.get_status(job_id).await
.map_err(|e| match e {
hero_job_client::ClientError::Job(hero_job::JobError::NotFound(_)) => {
SupervisorError::JobNotFound { job_id: job_id.to_string() }
}
_ => SupervisorError::from(e)
})?;
Ok(status)
}
/// Get the result of a job (returns immediately with current result or error)
pub async fn job_result(&self, job_id: &str) -> SupervisorResult<Option<String>> {
// Use client's get_status to check if job exists and get its status
let status = self.job_client.get_status(job_id).await
.map_err(|e| match e {
hero_job_client::ClientError::Job(hero_job::JobError::NotFound(_)) => {
SupervisorError::JobNotFound { job_id: job_id.to_string() }
}
_ => SupervisorError::from(e)
})?;
// If job has error status, get the error message
if status.as_str() == "error" {
let error_msg = self.job_client.get_error(job_id).await
.map_err(SupervisorError::from)?;
return Ok(Some(format!("Error: {}", error_msg.unwrap_or_else(|| "Unknown error".to_string()))));
}
// Use client's get_result to get the result
let result = self.job_client.get_result(job_id).await
.map_err(SupervisorError::from)?;
Ok(result)
}
// API Key Management Methods
/// Get logs for a specific job
///
/// Reads log files from the logs/actor/<runner_name>/job-<job_id>/ directory
pub async fn job_logs(&self, job_id: &str, lines: Option<usize>) -> SupervisorResult<Vec<String>> {
// Determine the logs directory path
// Default to ~/hero/logs
let logs_root = if let Some(home) = std::env::var_os("HOME") {
std::path::PathBuf::from(home).join("hero").join("logs")
} else {
std::path::PathBuf::from("logs")
};
// Check if logs directory exists
if !logs_root.exists() {
return Ok(vec![format!("Logs directory not found: {}", logs_root.display())]);
}
let actor_dir = logs_root.join("actor");
if !actor_dir.exists() {
return Ok(vec![format!("Actor logs directory not found: {}", actor_dir.display())]);
}
// Search through all runner directories to find the job
if let Ok(entries) = std::fs::read_dir(&actor_dir) {
for entry in entries.flatten() {
if entry.path().is_dir() {
let job_dir = entry.path().join(format!("job-{}", job_id));
if job_dir.exists() {
// Read all log files in the directory
let mut all_logs = Vec::new();
if let Ok(log_entries) = std::fs::read_dir(&job_dir) {
// Collect all log files with their paths for sorting
let mut log_files: Vec<_> = log_entries
.flatten()
.filter(|e| {
if !e.path().is_file() {
return false;
}
// Accept files that start with "log" (covers log.YYYY-MM-DD-HH format)
e.file_name().to_string_lossy().starts_with("log")
})
.collect();
// Sort by filename (which includes timestamp for hourly rotation)
log_files.sort_by_key(|e| e.path());
// Read files in order
for entry in log_files {
if let Ok(content) = std::fs::read_to_string(entry.path()) {
all_logs.extend(content.lines().map(|s| s.to_string()));
}
}
}
// If lines limit is specified, return only the last N lines
if let Some(n) = lines {
let start = all_logs.len().saturating_sub(n);
return Ok(all_logs[start..].to_vec());
} else {
return Ok(all_logs);
}
}
}
}
}
// If no logs found, return helpful message
Ok(vec![format!("No logs found for job: {}", job_id)])
}
// API Key Management - These methods provide direct access to the key store
// Authorization checking should be done at the OpenRPC layer before calling these
/// Get an API key by its value
pub(crate) async fn key_get(&self, key_id: &str) -> Option<crate::auth::ApiKey> {
self.store.lock().await.key_get(key_id).cloned()
}
/// Create an API key with a specific value
pub(crate) async fn key_create(&self, key: crate::auth::ApiKey) -> crate::auth::ApiKey {
self.store.lock().await.key_create(key)
}
/// Delete an API key
pub(crate) async fn key_delete(&self, key_id: &str) -> Option<crate::auth::ApiKey> {
self.store.lock().await.key_delete(key_id)
}
/// List all API keys
pub(crate) async fn key_list(&self) -> Vec<crate::auth::ApiKey> {
self.store.lock().await.key_list()
}
/// List API keys by scope
pub(crate) async fn key_list_by_scope(&self, scope: crate::auth::ApiKeyScope) -> Vec<crate::auth::ApiKey> {
self.store.lock().await.key_list_by_scope(scope)
}
// Runner Management
/// Create a new runner
/// Authorization must be checked by the caller (e.g., OpenRPC layer)
pub async fn runner_create(&self, runner_id: String) -> SupervisorResult<String> {
self.store.lock().await.runner_add(runner_id.clone())?;
Ok(runner_id)
}
/// Create a new API key with generated UUID
pub async fn create_api_key(&self, name: String, scope: crate::auth::ApiKeyScope) -> crate::auth::ApiKey {
self.store.lock().await.key_create_new(name, scope)
}
}
// Note: Default implementation removed because it requires async initialization
// Use Supervisor::builder() for proper initialization