update api, fix tests and examples

This commit is contained in:
Timur Gordon
2025-08-27 10:07:53 +02:00
parent 767c66fb6a
commit ef17d36300
42 changed files with 2984 additions and 781 deletions

View File

@@ -55,14 +55,6 @@ path = "cmd/supervisor.rs"
name = "openrpc_comprehensive"
path = "examples/basic_openrpc_client.rs"
[[example]]
name = "test_queue_and_wait"
path = "examples/test_queue_and_wait.rs"
[[example]]
name = "test_openrpc_methods"
path = "examples/test_openrpc_methods.rs"
[[example]]
name = "mock_runner"
path = "examples/mock_runner.rs"

View File

@@ -20,9 +20,11 @@ hero-supervisor/
The crate uses Rust's feature system to provide conditional compilation:
- **`default`**: Includes all functionality - supervisor, OpenRPC server, and CLI binary
- **`default`**: Includes CLI functionality
- **`cli`**: Enables the supervisor binary (included in default)
All OpenRPC functionality is now included by default for simplified deployment.
## Architecture
The Hero Supervisor uses a clean, simplified architecture with centralized resource management:
@@ -60,6 +62,9 @@ Simplified runner configuration and management. Contains `Runner` struct with co
#### `src/job.rs`
Job data structures, builder pattern, and Redis key management. Defines `Job` struct with metadata, script content, and status tracking.
#### `src/client.rs`
Client implementation for job management operations. Provides Redis-based job storage, retrieval, status updates, and lifecycle management. Separates job operations from supervisor logic.
#### `src/openrpc.rs`
OpenRPC server implementation that exposes all supervisor functionality over JSON-RPC. Implements RPC trait directly on the supervisor for clean integration.
@@ -148,14 +153,14 @@ curl -X POST -H "Content-Type: application/json" \
### Building
```bash
# Library only
# Build everything (default includes CLI and OpenRPC)
cargo build
# Library only (minimal build)
cargo build --no-default-features
# With CLI
# With CLI (same as default)
cargo build --features cli
# With OpenRPC server
cargo build --features openrpc
```
### Testing

View File

View File

View File

View File

View File

@@ -26,4 +26,4 @@ wasm-logger = "0.2"
uuid = { version = "1.0", features = ["v4", "js"] }
# Use our new WASM OpenRPC client
hero-supervisor-openrpc-client = { path = "../clients/openrpc" }
hero-supervisor-openrpc-client = { path = "../openrpc" }

View File

@@ -1,11 +1,9 @@
use yew::prelude::*;
use wasm_bindgen_futures::spawn_local;
use gloo::console;
use hero_supervisor_openrpc_client::wasm::{WasmSupervisorClient, WasmJob};
use gloo::timers::callback::Interval;
use crate::sidebar::{Sidebar, SupervisorInfo};
use wasm_bindgen_futures::spawn_local;
use hero_supervisor_openrpc_client::wasm::{WasmSupervisorClient, WasmJob};
use crate::sidebar::{Sidebar, SupervisorInfo, SessionSecretType};
use crate::runners::{Runners, RegisterForm};
use crate::jobs::Jobs;
@@ -17,7 +15,7 @@ fn generate_job_id() -> String {
#[derive(Clone, Default)]
pub struct JobForm {
pub payload: String,
pub runner_name: String,
pub runner: String,
pub executor: String,
pub secret: String,
}
@@ -47,7 +45,7 @@ pub struct AppState {
pub job_form: JobForm,
pub supervisor_info: Option<SupervisorInfo>,
pub admin_secret: String,
pub ping_states: std::collections::HashMap<String, PingState>, // runner_name -> ping_state
pub ping_states: std::collections::HashMap<String, PingState>, // runner -> ping_state
}
@@ -68,7 +66,7 @@ pub fn app() -> Html {
},
job_form: JobForm {
payload: String::new(),
runner_name: String::new(),
runner: String::new(),
executor: String::new(),
secret: String::new(),
},
@@ -340,7 +338,7 @@ pub fn app() -> Html {
// Admin secret change callback
let on_admin_secret_change = {
let state = state.clone();
Callback::from(move |admin_secret: String| {
Callback::from(move |(admin_secret, _secret_type): (String, SessionSecretType)| {
let mut new_state = (*state).clone();
new_state.admin_secret = admin_secret;
state.set(new_state);
@@ -354,7 +352,7 @@ pub fn app() -> Html {
let mut new_form = state.job_form.clone();
match field.as_str() {
"payload" => new_form.payload = value,
"runner_name" => new_form.runner_name = value,
"runner" => new_form.runner = value,
"executor" => new_form.executor = value,
"secret" => new_form.secret = value,
_ => {}
@@ -385,7 +383,7 @@ pub fn app() -> Html {
job_id.clone(),
job_form.payload.clone(),
job_form.executor.clone(),
job_form.runner_name.clone(),
job_form.runner.clone(),
);
// Immediately add job to the list with "pending" status
@@ -591,8 +589,9 @@ pub fn app() -> Html {
<Sidebar
server_url={state.server_url.clone()}
supervisor_info={state.supervisor_info.clone()}
admin_secret={state.admin_secret.clone()}
on_admin_secret_change={on_admin_secret_change}
session_secret={state.admin_secret.clone()}
session_secret_type={SessionSecretType::Admin}
on_session_secret_change={on_admin_secret_change}
on_supervisor_info_loaded={on_supervisor_info_loaded}
/>

View File

@@ -207,7 +207,7 @@ pub fn runner_detail(props: &RunnerDetailProps) -> Html {
.context_id("test-job")
.payload(script)
.job_type(JobType::SAL)
.runner_name(&runner_id)
.runner(&runner_id)
.build();
match job {

View File

@@ -23,7 +23,7 @@ impl PartialEq for JobsProps {
self.jobs.len() == other.jobs.len() &&
self.server_url == other.server_url &&
self.job_form.payload == other.job_form.payload &&
self.job_form.runner_name == other.job_form.runner_name &&
self.job_form.runner == other.job_form.runner &&
self.job_form.executor == other.job_form.executor &&
self.job_form.secret == other.job_form.secret &&
self.runners.len() == other.runners.len()
@@ -45,7 +45,7 @@ pub fn jobs(props: &JobsProps) -> Html {
let on_change = props.on_job_form_change.clone();
Callback::from(move |e: Event| {
let input: HtmlInputElement = e.target_unchecked_into();
on_change.emit(("runner_name".to_string(), input.value()));
on_change.emit(("runner".to_string(), input.value()));
})
};
@@ -104,13 +104,13 @@ pub fn jobs(props: &JobsProps) -> Html {
<td>
<select
class="form-control table-input"
value={props.job_form.runner_name.clone()}
value={props.job_form.runner.clone()}
onchange={on_runner_name_change}
>
<option value="" disabled=true>{"-Select Runner-"}</option>
{ for props.runners.iter().map(|(name, _status)| {
html! {
<option value={name.clone()} selected={name == &props.job_form.runner_name}>
<option value={name.clone()} selected={name == &props.job_form.runner}>
{name}
</option>
}
@@ -155,7 +155,7 @@ pub fn jobs(props: &JobsProps) -> Html {
<tr>
<td><small class="text-muted">{job_id}</small></td>
<td><code class="code">{job.payload()}</code></td>
<td>{job.runner_name()}</td>
<td>{job.runner()}</td>
<td>{job.executor()}</td>
<td class="action-cell">
<span class="status-badge">{"Queued"}</span>

View File

@@ -17,12 +17,12 @@ pub struct RunnersProps {
pub server_url: String,
pub runners: Vec<(String, String)>, // (name, status)
pub register_form: RegisterForm,
pub ping_states: HashMap<String, PingState>, // runner_name -> ping_state
pub ping_states: HashMap<String, PingState>, // runner -> ping_state
pub on_register_form_change: Callback<(String, String)>,
pub on_register_runner: Callback<()>,
pub on_load_runners: Callback<()>,
pub on_remove_runner: Callback<String>,
pub on_ping_runner: Callback<(String, String)>, // (runner_name, secret)
pub on_ping_runner: Callback<(String, String)>, // (runner, secret)
}
#[function_component(Runners)]
@@ -55,8 +55,8 @@ pub fn runners(props: &RunnersProps) -> Html {
&register_form.name,
&register_form.name, // queue = name
).await {
Ok(runner_name) => {
console::log!("Runner registered successfully:", runner_name);
Ok(runner) => {
console::log!("Runner registered successfully:", runner);
on_register_runner.emit(());
}
Err(e) => {

View File

@@ -103,13 +103,13 @@ impl SupervisorService {
}
/// Queue a job to a runner
pub async fn queue_job(&self, runner_name: &str, job: Job) -> ClientResult<()> {
self.client.borrow_mut().queue_job_to_runner(runner_name, job).await
pub async fn queue_job(&self, runner: &str, job: Job) -> ClientResult<()> {
self.client.borrow_mut().queue_job_to_runner(runner, job).await
}
/// Queue a job and wait for result
pub async fn queue_and_wait(&self, runner_name: &str, job: Job, timeout_secs: u64) -> ClientResult<Option<String>> {
self.client.borrow_mut().queue_and_wait(runner_name, job, timeout_secs).await
pub async fn queue_and_wait(&self, runner: &str, job: Job, timeout_secs: u64) -> ClientResult<Option<String>> {
self.client.borrow_mut().queue_and_wait(runner, job, timeout_secs).await
}
}

View File

@@ -2,8 +2,7 @@ use yew::prelude::*;
use wasm_bindgen::JsCast;
use wasm_bindgen_futures::spawn_local;
use gloo::console;
use hero_supervisor_openrpc_client::wasm::WasmSupervisorClient;
use hero_supervisor_openrpc_client::wasm::{WasmSupervisorClient, WasmJob};
#[derive(Clone, PartialEq)]
pub struct SupervisorInfo {
@@ -14,104 +13,167 @@ pub struct SupervisorInfo {
pub runners_count: usize,
}
#[derive(Clone, PartialEq, Debug)]
pub enum SessionSecretType {
None,
User,
Admin,
}
#[derive(Properties, PartialEq)]
pub struct SidebarProps {
pub server_url: String,
pub supervisor_info: Option<SupervisorInfo>,
pub admin_secret: String,
pub on_admin_secret_change: Callback<String>,
pub session_secret: String,
pub session_secret_type: SessionSecretType,
pub on_session_secret_change: Callback<(String, SessionSecretType)>,
pub on_supervisor_info_loaded: Callback<SupervisorInfo>,
}
#[function_component(Sidebar)]
pub fn sidebar(props: &SidebarProps) -> Html {
let is_unlocked = use_state(|| false);
let unlock_secret = use_state(|| String::new());
let session_secret_input = use_state(|| String::new());
let payload_input = use_state(|| String::new());
let admin_secrets = use_state(|| Vec::<String>::new());
let user_secrets = use_state(|| Vec::<String>::new());
let register_secrets = use_state(|| Vec::<String>::new());
let is_loading = use_state(|| false);
let on_unlock_secret_change = {
let unlock_secret = unlock_secret.clone();
let on_session_secret_change = {
let session_secret_input = session_secret_input.clone();
Callback::from(move |e: web_sys::Event| {
let input: web_sys::HtmlInputElement = e.target().unwrap().dyn_into().unwrap();
unlock_secret.set(input.value());
session_secret_input.set(input.value());
})
};
let on_unlock_submit = {
let unlock_secret = unlock_secret.clone();
let is_unlocked = is_unlocked.clone();
let on_session_secret_submit = {
let session_secret_input = session_secret_input.clone();
let is_loading = is_loading.clone();
let admin_secrets = admin_secrets.clone();
let user_secrets = user_secrets.clone();
let register_secrets = register_secrets.clone();
let server_url = props.server_url.clone();
let on_session_secret_change = props.on_session_secret_change.clone();
Callback::from(move |_: web_sys::MouseEvent| {
let unlock_secret = unlock_secret.clone();
let is_unlocked = is_unlocked.clone();
let is_loading = is_loading.clone();
let admin_secrets = admin_secrets.clone();
let user_secrets = user_secrets.clone();
let register_secrets = register_secrets.clone();
let server_url = server_url.clone();
let secret_value = (*unlock_secret).clone();
if secret_value.is_empty() {
let secret = (*session_secret_input).clone();
if secret.is_empty() {
return;
}
is_loading.set(true);
let client = WasmSupervisorClient::new(server_url.clone());
let session_secret_input = session_secret_input.clone();
let is_loading = is_loading.clone();
let admin_secrets = admin_secrets.clone();
let user_secrets = user_secrets.clone();
let register_secrets = register_secrets.clone();
let on_session_secret_change = on_session_secret_change.clone();
spawn_local(async move {
let client = WasmSupervisorClient::new(server_url);
// Try to load all secrets
match client.list_admin_secrets(&secret_value).await {
Ok(secrets) => {
admin_secrets.set(secrets);
// Try to get admin secrets first to determine if this is an admin secret
match client.list_admin_secrets(&secret).await {
Ok(admin_secret_list) => {
// This is an admin secret
admin_secrets.set(admin_secret_list);
// Load user secrets
if let Ok(user_secs) = client.list_user_secrets(&secret_value).await {
user_secrets.set(user_secs);
// Also load user and register secrets
if let Ok(user_secret_list) = client.list_user_secrets(&secret).await {
user_secrets.set(user_secret_list);
}
if let Ok(register_secret_list) = client.list_register_secrets(&secret).await {
register_secrets.set(register_secret_list);
}
// Load register secrets
if let Ok(reg_secs) = client.list_register_secrets(&secret_value).await {
register_secrets.set(reg_secs);
}
is_unlocked.set(true);
unlock_secret.set(String::new());
console::log!("Secrets unlocked successfully");
on_session_secret_change.emit((secret, SessionSecretType::Admin));
console::log!("Admin session established");
}
Err(e) => {
console::error!("Failed to unlock secrets:", format!("{:?}", e));
Err(_) => {
// Try as user secret - just test if we can make any call with it
match client.list_runners().await {
Ok(_) => {
// This appears to be a valid user secret
on_session_secret_change.emit((secret, SessionSecretType::User));
console::log!("User session established");
}
Err(e) => {
console::log!("Invalid secret:", format!("{:?}", e));
on_session_secret_change.emit((String::new(), SessionSecretType::None));
}
}
}
}
is_loading.set(false);
session_secret_input.set(String::new());
});
})
};
let on_lock_click = {
let is_unlocked = is_unlocked.clone();
let on_session_clear = {
let on_session_secret_change = props.on_session_secret_change.clone();
let admin_secrets = admin_secrets.clone();
let user_secrets = user_secrets.clone();
let register_secrets = register_secrets.clone();
Callback::from(move |_: web_sys::MouseEvent| {
is_unlocked.set(false);
on_session_secret_change.emit((String::new(), SessionSecretType::None));
admin_secrets.set(Vec::new());
user_secrets.set(Vec::new());
register_secrets.set(Vec::new());
console::log!("Secrets locked");
console::log!("Session cleared");
})
};
let on_payload_change = {
let payload_input = payload_input.clone();
Callback::from(move |e: web_sys::Event| {
let input: web_sys::HtmlInputElement = e.target().unwrap().dyn_into().unwrap();
payload_input.set(input.value());
})
};
let on_run_click = {
let payload_input = payload_input.clone();
let server_url = props.server_url.clone();
let session_secret = props.session_secret.clone();
let is_loading = is_loading.clone();
Callback::from(move |_: web_sys::MouseEvent| {
let payload = (*payload_input).clone();
if payload.is_empty() || session_secret.is_empty() {
return;
}
is_loading.set(true);
let client = WasmSupervisorClient::new(server_url.clone());
let payload_input = payload_input.clone();
let is_loading = is_loading.clone();
let session_secret = session_secret.clone();
spawn_local(async move {
// Create WasmJob object using constructor
let job = WasmJob::new(
uuid::Uuid::new_v4().to_string(),
payload.clone(),
"osis".to_string(),
"default".to_string(),
);
match client.create_job(session_secret.clone(), job).await {
Ok(job_id) => {
console::log!("Job created successfully:", job_id);
payload_input.set(String::new());
}
Err(e) => {
console::log!("Failed to create job:", format!("{:?}", e));
}
}
is_loading.set(false);
});
})
};
@@ -133,46 +195,76 @@ pub fn sidebar(props: &SidebarProps) -> Html {
</div>
</div>
// Secrets Management Section
<div class="secrets-section">
<div class="secrets-header">
<span class="secrets-title">{"Secrets"}</span>
if !*is_unlocked {
<button
class="unlock-btn"
onclick={on_unlock_submit}
disabled={*is_loading || unlock_secret.is_empty()}
>
<i class={if *is_loading { "fas fa-spinner fa-spin" } else { "fas fa-unlock" }}></i>
</button>
} else {
<button
class="lock-btn"
onclick={on_lock_click}
>
<i class="fas fa-lock"></i>
</button>
// Session Secret Management Section
<div class="session-section">
<div class="session-header">
<span class="session-title">{"Session"}</span>
{
match props.session_secret_type {
SessionSecretType::Admin => html! {
<span class="session-badge admin">{"Admin"}</span>
},
SessionSecretType::User => html! {
<span class="session-badge user">{"User"}</span>
},
SessionSecretType::None => html! {
<span class="session-badge none">{"None"}</span>
}
}
}
</div>
if !*is_unlocked {
<div class="unlock-input-row">
if props.session_secret_type == SessionSecretType::None {
<div class="session-input-row">
<input
type="password"
class="unlock-input"
placeholder="Enter admin secret to unlock"
value={(*unlock_secret).clone()}
onchange={on_unlock_secret_change}
class="session-input"
placeholder="Enter secret to establish session"
value={(*session_secret_input).clone()}
onchange={on_session_secret_change}
disabled={*is_loading}
/>
<button
class="session-btn"
onclick={on_session_secret_submit}
disabled={*is_loading || session_secret_input.is_empty()}
>
if *is_loading {
<i class="fas fa-spinner fa-spin"></i>
} else {
<i class="fas fa-sign-in-alt"></i>
}
</button>
</div>
} else {
<div class="session-active">
<div class="session-info">
<span class="session-secret-preview">
{format!("{}...", &props.session_secret[..std::cmp::min(8, props.session_secret.len())])}
</span>
<button
class="session-clear-btn"
onclick={on_session_clear}
>
<i class="fas fa-sign-out-alt"></i>
</button>
</div>
</div>
}
if *is_unlocked {
</div>
// Secrets Management Section (only visible for admin)
if props.session_secret_type == SessionSecretType::Admin {
<div class="secrets-section">
<div class="secrets-header">
<span class="secrets-title">{"Secrets Management"}</span>
</div>
<div class="secrets-content">
<div class="secret-group">
<div class="secret-header">
<span class="secret-title">{"Admin secrets"}</span>
<span class="secret-count">{admin_secrets.len()}</span>
</div>
<div class="secret-list">
{ for admin_secrets.iter().enumerate().map(|(i, secret)| {
@@ -185,22 +277,13 @@ pub fn sidebar(props: &SidebarProps) -> Html {
</div>
}
})}
<div class="secret-add-row">
<input
type="text"
class="secret-add-input"
placeholder="New admin secret"
/>
<button class="btn-icon btn-add">
<i class="fas fa-plus"></i>
</button>
</div>
</div>
</div>
<div class="secret-group">
<div class="secret-header">
<span class="secret-title">{"User secrets"}</span>
<span class="secret-count">{user_secrets.len()}</span>
</div>
<div class="secret-list">
{ for user_secrets.iter().enumerate().map(|(i, secret)| {
@@ -213,22 +296,13 @@ pub fn sidebar(props: &SidebarProps) -> Html {
</div>
}
})}
<div class="secret-add-row">
<input
type="text"
class="secret-add-input"
placeholder="New user secret"
/>
<button class="btn-icon btn-add">
<i class="fas fa-plus"></i>
</button>
</div>
</div>
</div>
<div class="secret-group">
<div class="secret-header">
<span class="secret-title">{"Register secrets"}</span>
<span class="secret-count">{register_secrets.len()}</span>
</div>
<div class="secret-list">
{ for register_secrets.iter().enumerate().map(|(i, secret)| {
@@ -241,27 +315,72 @@ pub fn sidebar(props: &SidebarProps) -> Html {
</div>
}
})}
<div class="secret-add-row">
<input
type="text"
class="secret-add-input"
placeholder="New register secret"
/>
<button class="btn-icon btn-add">
<i class="fas fa-plus"></i>
</button>
</div>
</div>
</div>
</div>
}
</div>
}
// Quick Actions Section
<div class="quick-actions">
<div class="quick-actions-header">
<span class="quick-actions-title">{"Quick Actions"}</span>
</div>
<div class="quick-actions-content">
if props.session_secret_type != SessionSecretType::None {
<div class="action-row">
<input
type="text"
class="action-input"
placeholder="Enter payload for job"
value={(*payload_input).clone()}
onchange={on_payload_change}
/>
<button
class="action-btn run-btn"
onclick={on_run_click}
disabled={payload_input.is_empty() || *is_loading}
>
if *is_loading {
<i class="fas fa-spinner fa-spin"></i>
} else {
<i class="fas fa-play"></i>
}
{"Run"}
</button>
</div>
} else {
<div class="action-disabled">
<span>{"Establish a session to enable quick actions"}</span>
</div>
}
</div>
</div>
if *is_unlocked {
<div class="save-section">
<button class="save-changes-btn">
{"Save Changes"}
</button>
// Supervisor Info Section
if let Some(info) = &props.supervisor_info {
<div class="supervisor-info">
<div class="supervisor-info-header">
<span class="supervisor-info-title">{"Supervisor Info"}</span>
</div>
<div class="supervisor-info-content">
<div class="info-item">
<span class="info-label">{"Admin secrets:"}</span>
<span class="info-value">{info.admin_secrets_count}</span>
</div>
<div class="info-item">
<span class="info-label">{"User secrets:"}</span>
<span class="info-value">{info.user_secrets_count}</span>
</div>
<div class="info-item">
<span class="info-label">{"Register secrets:"}</span>
<span class="info-value">{info.register_secrets_count}</span>
</div>
<div class="info-item">
<span class="info-label">{"Runners:"}</span>
<span class="info-value">{info.runners_count}</span>
</div>
</div>
</div>
}
</div>

View File

@@ -87,7 +87,7 @@ pub struct Job {
pub context_id: String,
pub payload: String,
pub job_type: JobType,
pub runner_name: String,
pub runner: String,
pub timeout: Option<u64>,
pub env_vars: HashMap<String, String>,
}
@@ -239,9 +239,9 @@ impl WasmSupervisorClient {
}
/// Queue a job to a specific runner
pub async fn queue_job_to_runner(&mut self, runner_name: &str, job: Job) -> WasmClientResult<()> {
pub async fn queue_job_to_runner(&mut self, runner: &str, job: Job) -> WasmClientResult<()> {
let params = json!({
"runner_name": runner_name,
"runner": runner,
"job": job
});
self.make_request("queue_job_to_runner", params).await
@@ -250,12 +250,12 @@ impl WasmSupervisorClient {
/// Queue a job to a specific runner and wait for the result
pub async fn queue_and_wait(
&mut self,
runner_name: &str,
runner: &str,
job: Job,
timeout_secs: u64,
) -> WasmClientResult<Option<String>> {
let params = json!({
"runner_name": runner_name,
"runner": runner,
"job": job,
"timeout_secs": timeout_secs
});
@@ -293,7 +293,7 @@ pub struct JobBuilder {
context_id: Option<String>,
payload: Option<String>,
job_type: Option<JobType>,
runner_name: Option<String>,
runner: Option<String>,
timeout: Option<u64>,
env_vars: HashMap<String, String>,
}
@@ -329,8 +329,8 @@ impl JobBuilder {
}
/// Set the runner name for this job
pub fn runner_name(mut self, runner_name: impl Into<String>) -> Self {
self.runner_name = Some(runner_name.into());
pub fn runner(mut self, runner: impl Into<String>) -> Self {
self.runner = Some(runner.into());
self
}
@@ -368,8 +368,8 @@ impl JobBuilder {
job_type: self.job_type.ok_or_else(|| WasmClientError::Server {
message: "job_type is required".to_string(),
})?,
runner_name: self.runner_name.ok_or_else(|| WasmClientError::Server {
message: "runner_name is required".to_string(),
runner: self.runner.ok_or_else(|| WasmClientError::Server {
message: "runner is required".to_string(),
})?,
timeout: self.timeout,
env_vars: self.env_vars,

View File

@@ -54,7 +54,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
.context_id("example_context")
.payload("print('Hello from Hero Supervisor!');")
.job_type(JobType::OSIS)
.runner_name("my_actor")
.runner("my_actor")
.timeout(Duration::from_secs(60))
.build()?;
@@ -112,7 +112,7 @@ let job = JobBuilder::new()
.context_id("context_id")
.payload("script_content")
.job_type(JobType::OSIS)
.runner_name("target_actor")
.runner("target_actor")
.timeout(Duration::from_secs(300))
.env_var("KEY", "value")
.build()?;

View File

@@ -138,7 +138,7 @@ impl App {
description: "Job ID".to_string(),
},
RpcParam {
name: "runner_name".to_string(),
name: "runner".to_string(),
param_type: "String".to_string(),
required: true,
description: "Name of the runner to execute the job".to_string(),
@@ -374,7 +374,7 @@ impl App {
match param.name.as_str() {
"secret" => params["secret"] = value,
"job_id" => params["job_id"] = value,
"runner_name" => params["runner_name"] = value,
"runner" => params["runner"] = value,
"payload" => params["payload"] = value,
_ => {}
}
@@ -536,10 +536,10 @@ impl App {
}
}
"run_job" => {
if let (Some(secret), Some(job_id), Some(runner_name), Some(payload)) = (
if let (Some(secret), Some(job_id), Some(runner), Some(payload)) = (
params.get("secret").and_then(|v| v.as_str()),
params.get("job_id").and_then(|v| v.as_str()),
params.get("runner_name").and_then(|v| v.as_str()),
params.get("runner").and_then(|v| v.as_str()),
params.get("payload").and_then(|v| v.as_str())
) {
// Create a job object
@@ -549,7 +549,7 @@ impl App {
"context_id": "cli_context",
"payload": payload,
"job_type": "SAL",
"runner_name": runner_name,
"runner": runner,
"timeout": 30000000000u64, // 30 seconds in nanoseconds
"env_vars": {},
"created_at": chrono::Utc::now().to_rfc3339(),
@@ -567,7 +567,7 @@ impl App {
}
} else {
Err(hero_supervisor_openrpc_client::ClientError::from(
serde_json::Error::io(std::io::Error::new(std::io::ErrorKind::InvalidInput, "Missing required parameters: secret, job_id, runner_name, payload"))
serde_json::Error::io(std::io::Error::new(std::io::ErrorKind::InvalidInput, "Missing required parameters: secret, job_id, runner, payload"))
))
}
}

View File

@@ -28,7 +28,6 @@
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::time::Duration;
use thiserror::Error;
use serde_json;
use uuid::Uuid;
@@ -157,32 +156,34 @@ pub struct RunnerConfig {
pub redis_url: String,
}
/// Job type enumeration that maps to runner types
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum JobType {
/// SAL job type
SAL,
/// OSIS job type
OSIS,
/// V job type
V,
/// Python job type
Python,
}
/// Job status enumeration
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum JobStatus {
/// Job has been created but not yet dispatched
Created,
/// Job has been dispatched to a worker queue
Dispatched,
/// Job is currently being executed
WaitingForPrerequisites,
Started,
/// Job completed successfully
Finished,
/// Job completed with an error
Error,
Stopping,
Finished,
}
/// Job result response
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(untagged)]
pub enum JobResult {
Success { success: String },
Error { error: String },
}
/// Job status response
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct JobStatusResponse {
pub job_id: String,
pub status: String,
pub created_at: String,
pub started_at: Option<String>,
pub completed_at: Option<String>,
}
/// Job structure for creating and managing jobs
@@ -196,20 +197,18 @@ pub struct Job {
pub context_id: String,
/// Script content or payload to execute
pub payload: String,
/// Type of job (determines which actor will process it)
pub job_type: JobType,
/// Name of the specific runner/actor to execute this job
pub runner_name: String,
/// Current status of the job
pub status: JobStatus,
pub runner: String,
/// Name of the executor the runner will use to execute this job
pub executor: String,
/// Job execution timeout (in seconds)
pub timeout: u64,
/// Environment variables for job execution
pub env_vars: HashMap<String, String>,
/// Timestamp when the job was created
pub created_at: String,
/// Timestamp when the job was last updated
pub updated_at: String,
/// Job execution timeout
pub timeout: Duration,
/// Environment variables for job execution
pub env_vars: HashMap<String, String>,
}
/// Process status wrapper for OpenRPC serialization (matches server response)
@@ -257,7 +256,7 @@ impl SupervisorClient {
let server_url = server_url.into();
let client = HttpClientBuilder::default()
.request_timeout(Duration::from_secs(30))
.request_timeout(std::time::Duration::from_secs(30))
.build(&server_url)
.map_err(|e| ClientError::Http(e.to_string()))?;
@@ -299,15 +298,83 @@ impl SupervisorClient {
Ok(())
}
/// Run a job on the appropriate runner
pub async fn run_job(
/// Create a new job without queuing it to a runner
pub async fn jobs_create(
&self,
secret: &str,
job: serde_json::Value,
) -> ClientResult<Option<String>> {
let result: Option<String> = self
job: Job,
) -> ClientResult<String> {
let params = serde_json::json!({
"secret": secret,
"job": job
});
let job_id: String = self
.client
.request("run_job", rpc_params![secret, job])
.request("jobs.create", rpc_params![params])
.await.map_err(|e| ClientError::JsonRpc(e))?;
Ok(job_id)
}
/// List all jobs
pub async fn jobs_list(&self) -> ClientResult<Vec<Job>> {
let jobs: Vec<Job> = self
.client
.request("jobs.list", rpc_params![])
.await.map_err(|e| ClientError::JsonRpc(e))?;
Ok(jobs)
}
/// Run a job on the appropriate runner and return the result
pub async fn job_run(
&self,
secret: &str,
job: Job,
) -> ClientResult<JobResult> {
let params = serde_json::json!({
"secret": secret,
"job": job
});
let result: JobResult = self
.client
.request("job.run", rpc_params![params])
.await.map_err(|e| ClientError::JsonRpc(e))?;
Ok(result)
}
/// Start a previously created job by queuing it to its assigned runner
pub async fn job_start(
&self,
secret: &str,
job_id: &str,
) -> ClientResult<()> {
let params = serde_json::json!({
"secret": secret,
"job_id": job_id
});
let _: () = self
.client
.request("job.start", rpc_params![params])
.await.map_err(|e| ClientError::JsonRpc(e))?;
Ok(())
}
/// Get the current status of a job
pub async fn job_status(&self, job_id: &str) -> ClientResult<JobStatusResponse> {
let status: JobStatusResponse = self
.client
.request("job.status", rpc_params![job_id])
.await.map_err(|e| ClientError::JsonRpc(e))?;
Ok(status)
}
/// Get the result of a completed job (blocks until result is available)
pub async fn job_result(&self, job_id: &str) -> ClientResult<JobResult> {
let result: JobResult = self
.client
.request("job.result", rpc_params![job_id])
.await.map_err(|e| ClientError::JsonRpc(e))?;
Ok(result)
}
@@ -347,6 +414,15 @@ impl SupervisorClient {
.await.map_err(|e| ClientError::JsonRpc(e))?;
Ok(())
}
/// Add a runner to the supervisor
pub async fn add_runner(&self, config: RunnerConfig, process_manager: ProcessManagerType) -> ClientResult<()> {
let _: () = self
.client
.request("add_runner", rpc_params![config, process_manager])
.await.map_err(|e| ClientError::JsonRpc(e))?;
Ok(())
}
/// Get status of a specific runner
pub async fn get_runner_status(&self, actor_id: &str) -> ClientResult<ProcessStatus> {
@@ -408,9 +484,9 @@ impl SupervisorClient {
}
/// Queue a job to a specific runner
pub async fn queue_job_to_runner(&self, runner_name: &str, job: Job) -> ClientResult<()> {
pub async fn queue_job_to_runner(&self, runner: &str, job: Job) -> ClientResult<()> {
let params = serde_json::json!({
"runner_name": runner_name,
"runner": runner,
"job": job
});
@@ -423,9 +499,9 @@ impl SupervisorClient {
/// Queue a job to a specific runner and wait for the result
/// This implements the proper Hero job protocol with BLPOP on reply queue
pub async fn queue_and_wait(&self, runner_name: &str, job: Job, timeout_secs: u64) -> ClientResult<Option<String>> {
pub async fn queue_and_wait(&self, runner: &str, job: Job, timeout_secs: u64) -> ClientResult<Option<String>> {
let params = serde_json::json!({
"runner_name": runner_name,
"runner": runner,
"job": job,
"timeout_secs": timeout_secs
});
@@ -573,6 +649,30 @@ impl SupervisorClient {
Ok(info)
}
/// Stop a running job
pub async fn job_stop(&self, secret: &str, job_id: &str) -> ClientResult<()> {
let params = serde_json::json!({
"secret": secret,
"job_id": job_id
});
self.client
.request("job.stop", rpc_params![params])
.await.map_err(|e| ClientError::JsonRpc(e))
}
/// Delete a job from the system
pub async fn job_delete(&self, secret: &str, job_id: &str) -> ClientResult<()> {
let params = serde_json::json!({
"secret": secret,
"job_id": job_id
});
self.client
.request("job.delete", rpc_params![params])
.await.map_err(|e| ClientError::JsonRpc(e))
}
/// Get supervisor information including secret counts
pub async fn get_supervisor_info(&self, admin_secret: &str) -> ClientResult<SupervisorInfo> {
let info: SupervisorInfo = self
@@ -588,9 +688,9 @@ pub struct JobBuilder {
caller_id: String,
context_id: String,
payload: String,
job_type: JobType,
runner_name: String,
timeout: Duration,
runner: String,
executor: String,
timeout: u64, // timeout in seconds
env_vars: HashMap<String, String>,
}
@@ -601,9 +701,9 @@ impl JobBuilder {
caller_id: "".to_string(),
context_id: "".to_string(),
payload: "".to_string(),
job_type: JobType::SAL, // default
runner_name: "".to_string(),
timeout: Duration::from_secs(300), // 5 minutes default
runner: "".to_string(),
executor: "".to_string(),
timeout: 300, // 5 minutes default
env_vars: HashMap::new(),
}
}
@@ -626,20 +726,20 @@ impl JobBuilder {
self
}
/// Set the job type
pub fn job_type(mut self, job_type: JobType) -> Self {
self.job_type = job_type;
/// Set the executor for this job
pub fn executor(mut self, executor: impl Into<String>) -> Self {
self.executor = executor.into();
self
}
/// Set the runner name for this job
pub fn runner_name(mut self, runner_name: impl Into<String>) -> Self {
self.runner_name = runner_name.into();
pub fn runner(mut self, runner: impl Into<String>) -> Self {
self.runner = runner.into();
self
}
/// Set the timeout for job execution
pub fn timeout(mut self, timeout: Duration) -> Self {
/// Set the timeout for job execution (in seconds)
pub fn timeout(mut self, timeout: u64) -> Self {
self.timeout = timeout;
self
}
@@ -673,9 +773,14 @@ impl JobBuilder {
message: "payload is required".to_string(),
});
}
if self.runner_name.is_empty() {
if self.runner.is_empty() {
return Err(ClientError::Server {
message: "runner_name is required".to_string(),
message: "runner is required".to_string(),
});
}
if self.executor.is_empty() {
return Err(ClientError::Server {
message: "executor is required".to_string(),
});
}
@@ -686,13 +791,12 @@ impl JobBuilder {
caller_id: self.caller_id,
context_id: self.context_id,
payload: self.payload,
job_type: self.job_type,
runner_name: self.runner_name,
status: JobStatus::Created,
created_at: now.clone(),
updated_at: now,
runner: self.runner,
executor: self.executor,
timeout: self.timeout,
env_vars: self.env_vars,
created_at: now.clone(),
updated_at: now,
})
}
}
@@ -722,9 +826,9 @@ mod tests {
.caller_id("test_client")
.context_id("test_context")
.payload("print('Hello, World!');")
.job_type(JobType::OSIS)
.runner_name("test_runner")
.timeout(Duration::from_secs(60))
.executor("osis")
.runner("test_runner")
.timeout(60)
.env_var("TEST_VAR", "test_value")
.build();
@@ -734,11 +838,10 @@ mod tests {
assert_eq!(job.caller_id, "test_client");
assert_eq!(job.context_id, "test_context");
assert_eq!(job.payload, "print('Hello, World!');");
assert_eq!(job.job_type, JobType::OSIS);
assert_eq!(job.runner_name, "test_runner");
assert_eq!(job.timeout, Duration::from_secs(60));
assert_eq!(job.executor, "osis");
assert_eq!(job.runner, "test_runner");
assert_eq!(job.timeout, 60);
assert_eq!(job.env_vars.get("TEST_VAR"), Some(&"test_value".to_string()));
assert_eq!(job.status, JobStatus::Created);
}
#[test]
@@ -747,7 +850,7 @@ mod tests {
let result = JobBuilder::new()
.context_id("test")
.payload("test")
.runner_name("test")
.runner("test")
.build();
assert!(result.is_err());
@@ -755,7 +858,7 @@ mod tests {
let result = JobBuilder::new()
.caller_id("test")
.payload("test")
.runner_name("test")
.runner("test")
.build();
assert!(result.is_err());
@@ -763,15 +866,26 @@ mod tests {
let result = JobBuilder::new()
.caller_id("test")
.context_id("test")
.runner_name("test")
.runner("test")
.executor("test")
.build();
assert!(result.is_err());
// Missing runner_name
// Missing runner
let result = JobBuilder::new()
.caller_id("test")
.context_id("test")
.payload("test")
.executor("test")
.build();
assert!(result.is_err());
// Missing executor
let result = JobBuilder::new()
.caller_id("test")
.context_id("test")
.payload("test")
.runner("test")
.build();
assert!(result.is_err());
}
@@ -885,7 +999,7 @@ mod client_tests {
assert_eq!(job.id(), "test-id");
assert_eq!(job.payload(), "test payload");
assert_eq!(job.job_type(), "SAL");
assert_eq!(job.runner_name(), "test-runner");
assert_eq!(job.runner(), "test-runner");
assert_eq!(job.caller_id(), "wasm_client");
assert_eq!(job.context_id(), "wasm_context");
assert_eq!(job.timeout_secs(), 30);
@@ -940,7 +1054,7 @@ mod client_tests {
assert_eq!(job.id(), "func-test-id");
assert_eq!(job.payload(), "func test payload");
assert_eq!(job.job_type(), "OSIS");
assert_eq!(job.runner_name(), "func-test-runner");
assert_eq!(job.runner(), "func-test-runner");
}
#[wasm_bindgen_test]

View File

@@ -95,7 +95,7 @@ pub struct WasmJob {
caller_id: String,
context_id: String,
payload: String,
runner_name: String,
runner: String,
executor: String,
timeout_secs: u64,
env_vars: String, // JSON string of HashMap<String, String>
@@ -138,8 +138,8 @@ impl WasmSupervisorClient {
match self.call_method("register_runner", params).await {
Ok(result) => {
// Extract the runner name from the result
if let Some(runner_name) = result.as_str() {
Ok(runner_name.to_string())
if let Some(runner) = result.as_str() {
Ok(runner.to_string())
} else {
Err(JsValue::from_str("Invalid response format: expected runner name"))
}
@@ -159,7 +159,7 @@ impl WasmSupervisorClient {
"caller_id": job.caller_id,
"context_id": job.context_id,
"payload": job.payload,
"runner_name": job.runner_name,
"runner": job.runner,
"executor": job.executor,
"timeout": {
"secs": job.timeout_secs,
@@ -194,7 +194,7 @@ impl WasmSupervisorClient {
"caller_id": job.caller_id,
"context_id": job.context_id,
"payload": job.payload,
"runner_name": job.runner_name,
"runner": job.runner,
"executor": job.executor,
"timeout": {
"secs": job.timeout_secs,
@@ -258,7 +258,7 @@ impl WasmSupervisorClient {
let caller_id = job_value.get("caller_id").and_then(|v| v.as_str()).unwrap_or("").to_string();
let context_id = job_value.get("context_id").and_then(|v| v.as_str()).unwrap_or("").to_string();
let payload = job_value.get("payload").and_then(|v| v.as_str()).unwrap_or("").to_string();
let runner_name = job_value.get("runner_name").and_then(|v| v.as_str()).unwrap_or("").to_string();
let runner = job_value.get("runner").and_then(|v| v.as_str()).unwrap_or("").to_string();
let executor = job_value.get("executor").and_then(|v| v.as_str()).unwrap_or("").to_string();
let timeout_secs = job_value.get("timeout").and_then(|v| v.get("secs")).and_then(|v| v.as_u64()).unwrap_or(30);
let env_vars = job_value.get("env_vars").map(|v| v.to_string()).unwrap_or_else(|| "{}".to_string());
@@ -270,7 +270,7 @@ impl WasmSupervisorClient {
caller_id,
context_id,
payload,
runner_name,
runner,
executor,
timeout_secs,
env_vars,
@@ -477,14 +477,14 @@ impl WasmSupervisorClient {
impl WasmJob {
/// Create a new job with default values
#[wasm_bindgen(constructor)]
pub fn new(id: String, payload: String, executor: String, runner_name: String) -> Self {
pub fn new(id: String, payload: String, executor: String, runner: String) -> Self {
let now = js_sys::Date::new_0().to_iso_string().as_string().unwrap();
Self {
id,
caller_id: "wasm_client".to_string(),
context_id: "wasm_context".to_string(),
payload,
runner_name,
runner,
executor,
timeout_secs: 30,
env_vars: "{}".to_string(),
@@ -555,8 +555,8 @@ impl WasmJob {
/// Get the runner name
#[wasm_bindgen(getter)]
pub fn runner_name(&self) -> String {
self.runner_name.clone()
pub fn runner(&self) -> String {
self.runner.clone()
}
/// Get the timeout in seconds
@@ -657,8 +657,8 @@ pub fn init() {
/// Utility function to create a job from JavaScript
/// Create a new job (convenience function for JavaScript)
#[wasm_bindgen]
pub fn create_job(id: String, payload: String, executor: String, runner_name: String) -> WasmJob {
WasmJob::new(id, payload, executor, runner_name)
pub fn create_job(id: String, payload: String, executor: String, runner: String) -> WasmJob {
WasmJob::new(id, payload, executor, runner)
}
/// Utility function to create a client from JavaScript

View File

@@ -2,7 +2,7 @@
//!
//! Main supervisor binary that manages multiple actors and listens to jobs over Redis.
//! The supervisor builds with actor configuration, starts actors, and dispatches jobs
//! to the appropriate runners based on the job's runner_name field.
//! to the appropriate runners based on the job's runner field.

280
docs/README.md Normal file
View File

@@ -0,0 +1,280 @@
# Hero Supervisor Documentation
## Overview
Hero Supervisor is a distributed job execution system that manages runners and coordinates job processing across multiple worker nodes. It provides a robust OpenRPC API for job management and runner administration.
## Architecture
The supervisor consists of several key components:
- **Supervisor Core**: Central coordinator that manages runners and job dispatch
- **OpenRPC Server**: JSON-RPC API server for remote management
- **Redis Backend**: Job queue and state management
- **Process Manager**: Runner lifecycle management (Simple or Tmux)
- **Client Libraries**: Native Rust and WASM clients for integration
## Quick Start
### Starting the Supervisor
```bash
# With default configuration
./supervisor
# With custom configuration file
./supervisor --config /path/to/config.toml
```
### Example Configuration
```toml
# config.toml
redis_url = "redis://localhost:6379"
namespace = "hero"
bind_address = "127.0.0.1"
port = 3030
# Admin secrets for full access
admin_secrets = ["admin-secret-123"]
# User secrets for job operations
user_secrets = ["user-secret-456"]
# Register secrets for runner registration
register_secrets = ["register-secret-789"]
[[actors]]
id = "sal_runner_1"
name = "sal_runner_1"
binary_path = "/path/to/sal_runner"
db_path = "/tmp/sal_db"
redis_url = "redis://localhost:6379"
process_manager = "simple"
[[actors]]
id = "osis_runner_1"
name = "osis_runner_1"
binary_path = "/path/to/osis_runner"
db_path = "/tmp/osis_db"
redis_url = "redis://localhost:6379"
process_manager = "tmux:osis_session"
```
## API Documentation
### Job API Convention
The Hero Supervisor follows a consistent naming convention for job operations:
- **`jobs.`** - General job operations (create, list)
- **`job.`** - Specific job operations (run, start, status, result)
See [Job API Convention](job-api-convention.md) for detailed documentation.
### Core Methods
#### Runner Management
- `register_runner` - Register a new runner
- `list_runners` - List all registered runners
- `start_runner` / `stop_runner` - Control runner lifecycle
- `get_runner_status` - Get runner status
- `get_runner_logs` - Retrieve runner logs
#### Job Management
- `jobs.create` - Create a job without queuing
- `jobs.list` - List all jobs with full details
- `job.run` - Run a job and return result
- `job.start` - Start a created job
- `job.stop` - Stop a running job
- `job.delete` - Delete a job from the system
- `job.status` - Get job status (non-blocking)
- `job.result` - Get job result (blocking)
#### Administration
- `add_secret` / `remove_secret` - Manage authentication secrets
- `get_supervisor_info` - Get system information
- `rpc.discover` - OpenRPC specification discovery
## Client Usage
### Rust Client
```rust
use hero_supervisor_openrpc_client::{SupervisorClient, JobBuilder};
// Create client
let client = SupervisorClient::new("http://localhost:3030")?;
// Create a job
let job = JobBuilder::new()
.caller_id("my_client")
.context_id("my_context")
.payload("print('Hello World')")
.executor("osis")
.runner("osis_runner_1")
.timeout(60)
.build()?;
// Option 1: Fire-and-forget execution
let result = client.job_run("user-secret", job.clone()).await?;
match result {
JobResult::Success { success } => println!("Output: {}", success),
JobResult::Error { error } => println!("Error: {}", error),
}
// Option 2: Asynchronous execution
let job_id = client.jobs_create("user-secret", job).await?;
client.job_start("user-secret", &job_id).await?;
// Poll for completion
loop {
let status = client.job_status(&job_id).await?;
if status.status == "completed" || status.status == "failed" {
break;
}
tokio::time::sleep(Duration::from_secs(1)).await;
}
let result = client.job_result(&job_id).await?;
// Option 3: Job management
// Stop a running job
client.job_stop("user-secret", &job_id).await?;
// Delete a job
client.job_delete("user-secret", &job_id).await?;
// List all jobs (returns full Job objects)
let jobs = client.jobs_list("user-secret").await?;
for job in jobs {
println!("Job {}: {} ({})", job.id, job.executor, job.payload);
}
```
### WASM Client
```javascript
import { WasmSupervisorClient, WasmJob } from 'hero-supervisor-openrpc-client';
// Create client
const client = new WasmSupervisorClient('http://localhost:3030');
// Create and run job
const job = new WasmJob('job-id', 'print("Hello")', 'osis', 'osis_runner_1');
const result = await client.create_job('user-secret', job);
```
## Security
### Authentication Levels
1. **Admin Secrets**: Full system access
- All runner management operations
- All job operations
- Secret management
- System information access
2. **User Secrets**: Job operations only
- Create, run, start jobs
- Get job status and results
- No runner or secret management
3. **Register Secrets**: Runner registration only
- Register new runners
- No other operations
### Best Practices
- Use different secret types for different access levels
- Rotate secrets regularly
- Store secrets securely (environment variables, secret management systems)
- Use HTTPS in production environments
- Implement proper logging and monitoring
## Development
### Building
```bash
# Build supervisor binary
cargo build --release
# Build with OpenRPC feature
cargo build --release --features openrpc
# Build client library
cd clients/openrpc
cargo build --release
```
### Testing
```bash
# Run tests
cargo test
# Run with Redis (requires Redis server)
docker run -d -p 6379:6379 redis:alpine
cargo test -- --ignored
```
### Examples
See the `examples/` directory for:
- Basic supervisor setup
- Mock runner implementation
- Comprehensive OpenRPC client usage
- Configuration examples
## Troubleshooting
### Common Issues
1. **Redis Connection Failed**
- Ensure Redis server is running
- Check Redis URL in configuration
- Verify network connectivity
2. **Runner Registration Failed**
- Check register secret validity
- Verify runner binary path exists
- Ensure runner has proper permissions
3. **Job Execution Timeout**
- Increase job timeout value
- Check runner resource availability
- Monitor runner logs for issues
4. **OpenRPC Method Not Found**
- Verify method name spelling
- Check OpenRPC specification
- Ensure server supports the method
### Logging
Enable debug logging:
```bash
RUST_LOG=debug ./supervisor --config config.toml
```
### Monitoring
Monitor key metrics:
- Runner status and health
- Job queue lengths
- Job success/failure rates
- Response times
- Redis connection status
## Contributing
1. Fork the repository
2. Create a feature branch
3. Make changes with tests
4. Update documentation
5. Submit a pull request
## License
[License information here]

333
docs/job-api-convention.md Normal file
View File

@@ -0,0 +1,333 @@
# Hero Supervisor Job API Convention
## Overview
The Hero Supervisor OpenRPC API follows a consistent naming convention for job-related operations:
- **`jobs.`** - General job operations (plural)
- **`job.`** - Specific job operations (singular)
This convention provides a clear distinction between operations that work with multiple jobs or create new jobs versus operations that work with a specific existing job.
## API Methods
### General Job Operations (`jobs.`)
#### `jobs.create`
Creates a new job without immediately queuing it to a runner.
**Parameters:**
- `secret` (string): Authentication secret (admin or user)
- `job` (Job object): Complete job specification
**Returns:**
- `job_id` (string): Unique identifier of the created job
**Usage:**
```json
{
"method": "jobs.create",
"params": {
"secret": "your-secret",
"job": {
"id": "job-123",
"caller_id": "client-1",
"context_id": "context-1",
"payload": "print('Hello World')",
"executor": "osis",
"runner": "osis-runner-1",
"timeout": 300,
"env_vars": {},
"created_at": "2023-01-01T00:00:00Z",
"updated_at": "2023-01-01T00:00:00Z"
}
}
}
```
#### `jobs.list`
Lists all jobs in the system with full details.
**Parameters:** None
**Returns:**
- `jobs` (array of Job objects): List of all jobs with complete information
**Usage:**
```json
{
"method": "jobs.list",
"params": []
}
```
**Response:**
```json
[
{
"id": "job-123",
"caller_id": "client-1",
"context_id": "context-1",
"payload": "print('Hello World')",
"executor": "osis",
"runner": "osis-runner-1",
"timeout": 300,
"env_vars": {},
"created_at": "2023-01-01T00:00:00Z",
"updated_at": "2023-01-01T00:00:00Z"
}
]
```
### Specific Job Operations (`job.`)
#### `job.run`
Runs a job immediately on the appropriate runner and returns the result.
**Parameters:**
- `secret` (string): Authentication secret (admin or user)
- `job` (Job object): Complete job specification
**Returns:**
- `result` (JobResult): Either success or error result
**JobResult Format:**
```json
// Success case
{
"success": "Job completed successfully with output..."
}
// Error case
{
"error": "Job failed with error message..."
}
```
**Usage:**
```json
{
"method": "job.run",
"params": {
"secret": "your-secret",
"job": { /* job object */ }
}
}
```
#### `job.start`
Starts a previously created job by queuing it to its assigned runner.
**Parameters:**
- `secret` (string): Authentication secret (admin or user)
- `job_id` (string): ID of the job to start
**Returns:** `null` (void)
**Usage:**
```json
{
"method": "job.start",
"params": {
"secret": "your-secret",
"job_id": "job-123"
}
}
```
#### `job.status`
Gets the current status of a job.
**Parameters:**
- `job_id` (string): ID of the job to check
**Returns:**
- `status` (JobStatusResponse): Current job status information
**JobStatusResponse Format:**
```json
{
"job_id": "job-123",
"status": "running",
"created_at": "2023-01-01T00:00:00Z",
"started_at": "2023-01-01T00:00:05Z",
"completed_at": null
}
```
**Status Values:**
- `created` - Job has been created but not queued
- `queued` - Job has been queued to a runner
- `running` - Job is currently executing
- `completed` - Job finished successfully
- `failed` - Job failed with an error
- `timeout` - Job timed out
**Usage:**
```json
{
"method": "job.status",
"params": ["job-123"]
}
```
#### `job.result`
Gets the result of a completed job. This method blocks until the result is available.
**Parameters:**
- `job_id` (string): ID of the job to get results for
**Returns:**
- `result` (JobResult): Either success or error result
**Usage:**
```json
{
"method": "job.result",
"params": ["job-123"]
}
```
#### `job.stop`
Stops a running job.
**Parameters:**
- `secret` (string): Authentication secret (admin or user)
- `job_id` (string): ID of the job to stop
**Returns:** `null` (void)
**Usage:**
```json
{
"method": "job.stop",
"params": {
"secret": "your-secret",
"job_id": "job-123"
}
}
```
#### `job.delete`
Deletes a job from the system.
**Parameters:**
- `secret` (string): Authentication secret (admin or user)
- `job_id` (string): ID of the job to delete
**Returns:** `null` (void)
**Usage:**
```json
{
"method": "job.delete",
"params": {
"secret": "your-secret",
"job_id": "job-123"
}
}
```
## Workflow Examples
### Fire-and-Forget Job
```javascript
// Create and immediately run a job
const result = await client.job_run(secret, jobSpec);
if (result.success) {
console.log("Job completed:", result.success);
} else {
console.error("Job failed:", result.error);
}
```
### Asynchronous Job Processing
```javascript
// 1. Create the job
const jobId = await client.jobs_create(secret, jobSpec);
// 2. Start the job
await client.job_start(secret, jobId);
// 3. Poll for completion (non-blocking)
let status;
do {
status = await client.job_status(jobId);
if (status.status === 'running') {
await sleep(1000); // Wait 1 second
}
} while (status.status === 'running' || status.status === 'queued');
// 4. Get the result
const result = await client.job_result(jobId);
```
### Batch Job Management
```javascript
// Create multiple jobs
const jobIds = [];
for (const jobSpec of jobSpecs) {
const jobId = await client.jobs_create(secret, jobSpec);
jobIds.push(jobId);
}
// Start all jobs
for (const jobId of jobIds) {
await client.job_start(secret, jobId);
}
// Monitor progress
const results = [];
for (const jobId of jobIds) {
const result = await client.job_result(jobId); // Blocks until complete
results.push(result);
}
// Optional: Stop or delete jobs if needed
for (const jobId of jobIds) {
await client.job_stop(secret, jobId); // Stop running job
await client.job_delete(secret, jobId); // Delete from system
}
```
## Authentication
All job operations require authentication using one of the following secret types:
- **Admin secrets**: Full access to all operations
- **User secrets**: Access to job operations (`jobs.create`, `job.run`, `job.start`)
- **Register secrets**: Only access to runner registration
## Error Handling
All methods return standard JSON-RPC error responses for:
- **Authentication errors** (-32602): Invalid or missing secrets
- **Job not found errors** (-32000): Job ID doesn't exist
- **Internal errors** (-32603): Server-side processing errors
## Migration from Legacy API
### Old → New Method Names
| Legacy Method | New Method | Notes |
|---------------|------------|-------|
| `run_job` | `job.run` | Same functionality, new naming |
| `list_jobs` | `jobs.list` | Same functionality, new naming |
| `create_job` | `jobs.create` | Enhanced to not auto-queue |
### New Methods Added
- `job.start` - Start a created job
- `job.stop` - Stop a running job
- `job.delete` - Delete a job from the system
- `job.status` - Get job status (non-blocking)
- `job.result` - Get job result (blocking)
### API Changes
- **Job struct**: Replaced `job_type` field with `executor`
- **jobs.list**: Now returns full Job objects instead of just job IDs
- **Enhanced job lifecycle**: Added stop and delete operations
This provides much more granular control over job lifecycle management.

View File

@@ -3,7 +3,27 @@
"info": {
"title": "Hero Supervisor OpenRPC API",
"version": "1.0.0",
"description": "OpenRPC API for managing Hero Supervisor runners and jobs"
"description": "OpenRPC API for managing Hero Supervisor runners and jobs. Job operations follow the convention: 'jobs.' for general operations and 'job.' for specific job operations."
},
"components": {
"schemas": {
"Job": {
"type": "object",
"properties": {
"id": { "type": "string" },
"caller_id": { "type": "string" },
"context_id": { "type": "string" },
"payload": { "type": "string" },
"runner": { "type": "string" },
"executor": { "type": "string" },
"timeout": { "type": "number" },
"env_vars": { "type": "object" },
"created_at": { "type": "string" },
"updated_at": { "type": "string" }
},
"required": ["id", "caller_id", "context_id", "payload", "runner", "executor", "timeout", "env_vars", "created_at", "updated_at"]
}
}
},
"methods": [
{
@@ -41,8 +61,8 @@
}
},
{
"name": "run_job",
"description": "Run a job on the appropriate runner",
"name": "jobs.create",
"description": "Create a new job without queuing it to a runner",
"params": [
{
"name": "params",
@@ -51,20 +71,42 @@
"properties": {
"secret": { "type": "string" },
"job": {
"type": "object",
"properties": {
"id": { "type": "string" },
"caller_id": { "type": "string" },
"context_id": { "type": "string" },
"payload": { "type": "string" },
"job_type": { "type": "string" },
"runner_name": { "type": "string" },
"timeout": { "type": "number" },
"env_vars": { "type": "object" },
"created_at": { "type": "string" },
"updated_at": { "type": "string" }
},
"required": ["id", "caller_id", "context_id", "payload", "job_type", "runner_name", "timeout", "env_vars", "created_at", "updated_at"]
"$ref": "#/components/schemas/Job"
}
},
"required": ["secret", "job"]
}
}
],
"result": {
"name": "job_id",
"schema": { "type": "string" }
}
},
{
"name": "jobs.list",
"description": "List all jobs",
"params": [],
"result": {
"name": "jobs",
"schema": {
"type": "array",
"items": { "$ref": "#/components/schemas/Job" }
}
}
},
{
"name": "job.run",
"description": "Run a job on the appropriate runner and return the result",
"params": [
{
"name": "params",
"schema": {
"type": "object",
"properties": {
"secret": { "type": "string" },
"job": {
"$ref": "#/components/schemas/Job"
}
},
"required": ["secret", "job"]
@@ -74,7 +116,101 @@
"result": {
"name": "result",
"schema": {
"type": ["string", "null"]
"oneOf": [
{
"type": "object",
"properties": {
"success": { "type": "string" }
},
"required": ["success"]
},
{
"type": "object",
"properties": {
"error": { "type": "string" }
},
"required": ["error"]
}
]
}
}
},
{
"name": "job.start",
"description": "Start a previously created job by queuing it to its assigned runner",
"params": [
{
"name": "params",
"schema": {
"type": "object",
"properties": {
"secret": { "type": "string" },
"job_id": { "type": "string" }
},
"required": ["secret", "job_id"]
}
}
],
"result": {
"name": "result",
"schema": { "type": "null" }
}
},
{
"name": "job.status",
"description": "Get the current status of a job",
"params": [
{
"name": "job_id",
"schema": { "type": "string" }
}
],
"result": {
"name": "status",
"schema": {
"type": "object",
"properties": {
"job_id": { "type": "string" },
"status": {
"type": "string",
"enum": ["created", "queued", "running", "completed", "failed", "timeout"]
},
"created_at": { "type": "string" },
"started_at": { "type": ["string", "null"] },
"completed_at": { "type": ["string", "null"] }
},
"required": ["job_id", "status", "created_at"]
}
}
},
{
"name": "job.result",
"description": "Get the result of a completed job (blocks until result is available)",
"params": [
{
"name": "job_id",
"schema": { "type": "string" }
}
],
"result": {
"name": "result",
"schema": {
"oneOf": [
{
"type": "object",
"properties": {
"success": { "type": "string" }
},
"required": ["success"]
},
{
"type": "object",
"properties": {
"error": { "type": "string" }
},
"required": ["error"]
}
]
}
}
},
@@ -200,6 +336,48 @@
}
}
},
{
"name": "job.stop",
"description": "Stop a running job",
"params": [
{
"name": "params",
"schema": {
"type": "object",
"properties": {
"secret": { "type": "string" },
"job_id": { "type": "string" }
},
"required": ["secret", "job_id"]
}
}
],
"result": {
"name": "result",
"schema": { "type": "null" }
}
},
{
"name": "job.delete",
"description": "Delete a job from the system",
"params": [
{
"name": "params",
"schema": {
"type": "object",
"properties": {
"secret": { "type": "string" },
"job_id": { "type": "string" }
},
"required": ["secret", "job_id"]
}
}
],
"result": {
"name": "result",
"schema": { "type": "null" }
}
},
{
"name": "rpc.discover",
"description": "OpenRPC discovery method - returns the OpenRPC document describing this API",

182
examples/README.md Normal file
View File

@@ -0,0 +1,182 @@
# Hero Supervisor Examples
This directory contains examples demonstrating the new job API functionality and workflows.
## Examples Overview
### 1. `job_api_examples.rs` - Comprehensive API Demo
Complete demonstration of all new job API methods:
- **Fire-and-forget execution** using `job.run`
- **Asynchronous processing** with `jobs.create`, `job.start`, `job.status`, `job.result`
- **Batch job processing** for multiple jobs
- **Job listing** with `jobs.list`
**Run with:**
```bash
cargo run --example job_api_examples
```
### 2. `simple_job_workflow.rs` - Basic Workflow
Simple example showing the basic job lifecycle:
1. Create job with `jobs.create`
2. Start job with `job.start`
3. Monitor with `job.status`
4. Get result with `job.result`
**Run with:**
```bash
cargo run --example simple_job_workflow
```
### 3. `integration_test.rs` - Integration Tests
Comprehensive integration tests validating:
- Complete job lifecycle
- Immediate job execution
- Job listing functionality
- Authentication error handling
- Nonexistent job operations
**Run with:**
```bash
cargo test --test integration_test
```
## Prerequisites
Before running the examples, ensure:
1. **Redis is running:**
```bash
docker run -d -p 6379:6379 redis:alpine
```
2. **Supervisor is running:**
```bash
./target/debug/supervisor --config examples/supervisor/config.toml
```
3. **Runners are configured** in your config.toml:
```toml
[[actors]]
id = "osis_runner_1"
name = "osis_runner_1"
binary_path = "/path/to/osis_runner"
db_path = "/tmp/osis_db"
redis_url = "redis://localhost:6379"
process_manager = "simple"
```
## API Convention Summary
The examples demonstrate the new job API convention:
### General Operations (`jobs.`)
- `jobs.create` - Create a job without queuing it
- `jobs.list` - List all job IDs in the system
### Specific Operations (`job.`)
- `job.run` - Run a job immediately and return result
- `job.start` - Start a previously created job
- `job.status` - Get current job status (non-blocking)
- `job.result` - Get job result (blocking until complete)
## Workflow Patterns
### Pattern 1: Fire-and-Forget
```rust
let result = client.job_run(secret, job).await?;
match result {
JobResult::Success { success } => println!("Output: {}", success),
JobResult::Error { error } => println!("Error: {}", error),
}
```
### Pattern 2: Asynchronous Processing
```rust
// Create and start
let job_id = client.jobs_create(secret, job).await?;
client.job_start(secret, &job_id).await?;
// Monitor (non-blocking)
loop {
let status = client.job_status(&job_id).await?;
if status.status == "completed" { break; }
sleep(Duration::from_secs(1)).await;
}
// Get result
let result = client.job_result(&job_id).await?;
```
### Pattern 3: Batch Processing
```rust
// Create all jobs
let mut job_ids = Vec::new();
for job_spec in job_specs {
let job_id = client.jobs_create(secret, job_spec).await?;
job_ids.push(job_id);
}
// Start all jobs
for job_id in &job_ids {
client.job_start(secret, job_id).await?;
}
// Collect results
for job_id in &job_ids {
let result = client.job_result(job_id).await?;
// Process result...
}
```
## Error Handling
The examples demonstrate proper error handling for:
- **Authentication errors** - Invalid secrets
- **Job not found errors** - Nonexistent job IDs
- **Connection errors** - Supervisor not available
- **Execution errors** - Job failures
## Authentication
Examples use different secret types:
- **Admin secrets**: Full system access
- **User secrets**: Job operations only (used in examples)
- **Register secrets**: Runner registration only
Configure secrets in your supervisor config:
```toml
admin_secrets = ["admin-secret-123"]
user_secrets = ["user-secret-456"]
register_secrets = ["register-secret-789"]
```
## Troubleshooting
### Common Issues
1. **Connection refused**
- Ensure supervisor is running on localhost:3030
- Check supervisor logs for errors
2. **Authentication failed**
- Verify secret is configured in supervisor
- Check secret type matches operation requirements
3. **Job execution failed**
- Ensure runners are properly configured and running
- Check runner logs for execution errors
- Verify job payload is valid for the target runner
4. **Redis connection failed**
- Ensure Redis is running on localhost:6379
- Check Redis connectivity from supervisor
### Debug Mode
Run examples with debug logging:
```bash
RUST_LOG=debug cargo run --example job_api_examples
```
This will show detailed API calls and responses for troubleshooting.

View File

@@ -17,7 +17,7 @@
use hero_supervisor_openrpc_client::{
SupervisorClient, RunnerConfig, RunnerType, ProcessManagerType,
JobBuilder, JobType, ClientError
JobBuilder, JobType
};
use std::time::Duration;
use escargot::CargoBuild;
@@ -136,8 +136,8 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
.context_id("demo")
.payload(payload)
.job_type(JobType::OSIS)
.runner_name("basic_example_actor")
.timeout(Duration::from_secs(30))
.runner("basic_example_actor")
.timeout(30)
.build()?;
println!("📤 Queuing job '{}': {}", description, job.id);
@@ -164,8 +164,8 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
.context_id("sync_demo")
.payload(payload)
.job_type(JobType::OSIS)
.runner_name("basic_example_actor")
.timeout(Duration::from_secs(30))
.runner("basic_example_actor")
.timeout(30)
.build()?;
println!("🚀 Executing '{}' with result verification...", description);

View File

@@ -0,0 +1,190 @@
//! Integration test for the new job API
//!
//! This test demonstrates the complete job lifecycle and validates
//! that all new API methods work correctly together.
use hero_supervisor_openrpc_client::{SupervisorClient, JobBuilder, JobResult};
use std::time::Duration;
use tokio::time::sleep;
#[tokio::test]
async fn test_complete_job_lifecycle() -> Result<(), Box<dyn std::error::Error>> {
// Skip test if supervisor is not running
let client = match SupervisorClient::new("http://localhost:3030") {
Ok(c) => c,
Err(_) => {
println!("Skipping integration test - supervisor not available");
return Ok(());
}
};
// Test connection
if client.discover().await.is_err() {
println!("Skipping integration test - supervisor not responding");
return Ok(());
}
let secret = "user-secret-456";
// Test 1: Create job
let job = JobBuilder::new()
.caller_id("integration_test")
.context_id("test_lifecycle")
.payload("echo 'Integration test job'")
.executor("osis")
.runner("osis_runner_1")
.timeout(30)
.build()?;
let job_id = client.jobs_create(secret, job).await?;
assert!(!job_id.is_empty());
// Test 2: Start job
client.job_start(secret, &job_id).await?;
// Test 3: Monitor status
let mut attempts = 0;
let max_attempts = 15; // 15 seconds max
let mut final_status = String::new();
while attempts < max_attempts {
let status = client.job_status(&job_id).await?;
final_status = status.status.clone();
if final_status == "completed" || final_status == "failed" || final_status == "timeout" {
break;
}
attempts += 1;
sleep(Duration::from_secs(1)).await;
}
// Test 4: Get result
let result = client.job_result(&job_id).await?;
match result {
JobResult::Success { success: _ } => {
assert_eq!(final_status, "completed");
},
JobResult::Error { error: _ } => {
assert!(final_status == "failed" || final_status == "timeout");
}
}
Ok(())
}
#[tokio::test]
async fn test_job_run_immediate() -> Result<(), Box<dyn std::error::Error>> {
let client = match SupervisorClient::new("http://localhost:3030") {
Ok(c) => c,
Err(_) => return Ok(()), // Skip if not available
};
if client.discover().await.is_err() {
return Ok(()); // Skip if not responding
}
let secret = "user-secret-456";
let job = JobBuilder::new()
.caller_id("integration_test")
.context_id("test_immediate")
.payload("echo 'Immediate job test'")
.executor("osis")
.runner("osis_runner_1")
.timeout(30)
.build()?;
// Test immediate execution
let result = client.job_run(secret, job).await?;
// Should get either success or error, but not panic
match result {
JobResult::Success { success } => {
assert!(!success.is_empty());
},
JobResult::Error { error } => {
assert!(!error.is_empty());
}
}
Ok(())
}
#[tokio::test]
async fn test_jobs_list() -> Result<(), Box<dyn std::error::Error>> {
let client = match SupervisorClient::new("http://localhost:3030") {
Ok(c) => c,
Err(_) => return Ok(()), // Skip if not available
};
if client.discover().await.is_err() {
return Ok(()); // Skip if not responding
}
// Test listing jobs
let job_ids = client.jobs_list().await?;
// Should return a vector (might be empty)
assert!(job_ids.len() >= 0);
Ok(())
}
#[tokio::test]
async fn test_authentication_errors() -> Result<(), Box<dyn std::error::Error>> {
let client = match SupervisorClient::new("http://localhost:3030") {
Ok(c) => c,
Err(_) => return Ok(()), // Skip if not available
};
if client.discover().await.is_err() {
return Ok(()); // Skip if not responding
}
let invalid_secret = "invalid-secret";
let job = JobBuilder::new()
.caller_id("integration_test")
.context_id("test_auth")
.payload("echo 'Auth test'")
.executor("osis")
.runner("osis_runner_1")
.timeout(30)
.build()?;
// Test that invalid secret fails
let result = client.jobs_create(invalid_secret, job.clone()).await;
assert!(result.is_err());
let result = client.job_run(invalid_secret, job.clone()).await;
assert!(result.is_err());
let result = client.job_start(invalid_secret, "fake-job-id").await;
assert!(result.is_err());
Ok(())
}
#[tokio::test]
async fn test_nonexistent_job_operations() -> Result<(), Box<dyn std::error::Error>> {
let client = match SupervisorClient::new("http://localhost:3030") {
Ok(c) => c,
Err(_) => return Ok(()), // Skip if not available
};
if client.discover().await.is_err() {
return Ok(()); // Skip if not responding
}
let fake_job_id = "nonexistent-job-id";
// Test operations on nonexistent job
let result = client.job_status(fake_job_id).await;
assert!(result.is_err());
let result = client.job_result(fake_job_id).await;
assert!(result.is_err());
Ok(())
}

View File

@@ -0,0 +1,269 @@
//! Examples demonstrating the new job API workflows
//!
//! This example shows how to use the new job API methods:
//! - jobs.create: Create a job without queuing
//! - jobs.list: List all jobs
//! - job.run: Run a job and get result immediately
//! - job.start: Start a created job
//! - job.status: Get job status (non-blocking)
//! - job.result: Get job result (blocking)
use hero_supervisor_openrpc_client::{SupervisorClient, JobBuilder, JobResult};
use std::time::Duration;
use tokio::time::sleep;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
// Initialize logging
env_logger::init();
println!("🚀 Hero Supervisor Job API Examples");
println!("===================================\n");
// Create client
let client = SupervisorClient::new("http://localhost:3030")?;
let secret = "user-secret-456"; // Use a user secret for job operations
// Test connection
println!("📡 Testing connection...");
match client.discover().await {
Ok(_) => println!("✅ Connected to supervisor\n"),
Err(e) => {
println!("❌ Failed to connect: {}", e);
println!("Make sure the supervisor is running with: ./supervisor --config examples/supervisor/config.toml\n");
return Ok(());
}
}
// Example 1: Fire-and-forget job execution
println!("🔥 Example 1: Fire-and-forget job execution");
println!("--------------------------------------------");
let job = JobBuilder::new()
.caller_id("example_client")
.context_id("fire_and_forget")
.payload("echo 'Hello from fire-and-forget job!'")
.executor("osis")
.runner("osis_runner_1")
.timeout(30)
.build()?;
println!("Running job immediately...");
match client.job_run(secret, job).await {
Ok(JobResult::Success { success }) => {
println!("✅ Job completed successfully:");
println!(" Output: {}", success);
},
Ok(JobResult::Error { error }) => {
println!("❌ Job failed:");
println!(" Error: {}", error);
},
Err(e) => {
println!("❌ API call failed: {}", e);
}
}
println!();
// Example 2: Asynchronous job processing
println!("⏰ Example 2: Asynchronous job processing");
println!("------------------------------------------");
let job = JobBuilder::new()
.caller_id("example_client")
.context_id("async_processing")
.payload("sleep 2 && echo 'Hello from async job!'")
.executor("osis")
.runner("osis_runner_1")
.timeout(60)
.build()?;
// Step 1: Create the job
println!("1. Creating job...");
let job_id = match client.jobs_create(secret, job).await {
Ok(id) => {
println!("✅ Job created with ID: {}", id);
id
},
Err(e) => {
println!("❌ Failed to create job: {}", e);
return Ok(());
}
};
// Step 2: Start the job
println!("2. Starting job...");
match client.job_start(secret, &job_id).await {
Ok(_) => println!("✅ Job started"),
Err(e) => {
println!("❌ Failed to start job: {}", e);
return Ok(());
}
}
// Step 3: Poll for completion (non-blocking)
println!("3. Monitoring job progress...");
let mut attempts = 0;
let max_attempts = 30; // 30 seconds max
loop {
attempts += 1;
match client.job_status(&job_id).await {
Ok(status) => {
println!(" Status: {} (attempt {})", status.status, attempts);
if status.status == "completed" || status.status == "failed" || status.status == "timeout" {
break;
}
if attempts >= max_attempts {
println!(" ⏰ Timeout waiting for job completion");
break;
}
sleep(Duration::from_secs(1)).await;
},
Err(e) => {
println!(" ❌ Failed to get job status: {}", e);
break;
}
}
}
// Step 4: Get the result
println!("4. Getting job result...");
match client.job_result(&job_id).await {
Ok(JobResult::Success { success }) => {
println!("✅ Job completed successfully:");
println!(" Output: {}", success);
},
Ok(JobResult::Error { error }) => {
println!("❌ Job failed:");
println!(" Error: {}", error);
},
Err(e) => {
println!("❌ Failed to get job result: {}", e);
}
}
println!();
// Example 3: Batch job processing
println!("📦 Example 3: Batch job processing");
println!("-----------------------------------");
let job_specs = vec![
("echo 'Batch job 1'", "batch_1"),
("echo 'Batch job 2'", "batch_2"),
("echo 'Batch job 3'", "batch_3"),
];
let mut job_ids = Vec::new();
// Create all jobs
println!("Creating batch jobs...");
for (i, (payload, context)) in job_specs.iter().enumerate() {
let job = JobBuilder::new()
.caller_id("example_client")
.context_id(context)
.payload(payload)
.executor("osis")
.runner("osis_runner_1")
.timeout(30)
.build()?;
match client.jobs_create(secret, job).await {
Ok(job_id) => {
println!("✅ Created job {}: {}", i + 1, job_id);
job_ids.push(job_id);
},
Err(e) => {
println!("❌ Failed to create job {}: {}", i + 1, e);
}
}
}
// Start all jobs
println!("Starting all batch jobs...");
for (i, job_id) in job_ids.iter().enumerate() {
match client.job_start(secret, job_id).await {
Ok(_) => println!("✅ Started job {}", i + 1),
Err(e) => println!("❌ Failed to start job {}: {}", i + 1, e),
}
}
// Collect results
println!("Collecting results...");
for (i, job_id) in job_ids.iter().enumerate() {
match client.job_result(job_id).await {
Ok(JobResult::Success { success }) => {
println!("✅ Job {} result: {}", i + 1, success);
},
Ok(JobResult::Error { error }) => {
println!("❌ Job {} failed: {}", i + 1, error);
},
Err(e) => {
println!("❌ Failed to get result for job {}: {}", i + 1, e);
}
}
}
println!();
// Example 4: List all jobs
println!("📋 Example 4: Listing all jobs");
println!("-------------------------------");
match client.jobs_list().await {
Ok(job_ids) => {
println!("✅ Found {} jobs in the system:", job_ids.len());
for (i, job_id) in job_ids.iter().take(10).enumerate() {
println!(" {}. {}", i + 1, job_id);
}
if job_ids.len() > 10 {
println!(" ... and {} more", job_ids.len() - 10);
}
},
Err(e) => {
println!("❌ Failed to list jobs: {}", e);
}
}
println!();
println!("🎉 All examples completed!");
println!("\nAPI Convention Summary:");
println!("- jobs.create: Create job without queuing");
println!("- jobs.list: List all job IDs");
println!("- job.run: Run job and return result immediately");
println!("- job.start: Start a created job");
println!("- job.status: Get job status (non-blocking)");
println!("- job.result: Get job result (blocking)");
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_job_builder() {
let job = JobBuilder::new()
.caller_id("test")
.context_id("test")
.payload("echo 'test'")
.executor("osis")
.runner("test_runner")
.build();
assert!(job.is_ok());
let job = job.unwrap();
assert_eq!(job.caller_id, "test");
assert_eq!(job.context_id, "test");
assert_eq!(job.payload, "echo 'test'");
}
#[tokio::test]
async fn test_client_creation() {
let client = SupervisorClient::new("http://localhost:3030");
assert!(client.is_ok());
}
}

View File

@@ -14,7 +14,7 @@ use std::time::Duration;
use tokio::time::sleep;
use redis::AsyncCommands;
use hero_supervisor::{
job::{Job, JobStatus, JobType, keys},
Job, JobStatus, JobError, client::{Client, ClientBuilder}
};
#[derive(Debug, Clone)]
@@ -43,6 +43,14 @@ impl MockRunnerConfig {
return Err("Missing value for --actor-id".into());
}
}
"--db-path" => {
if i + 1 < args.len() {
db_path = Some(args[i + 1].clone());
i += 2;
} else {
return Err("Missing value for --db-path".into());
}
}
"--redis-url" => {
if i + 1 < args.len() {
redis_url = Some(args[i + 1].clone());
@@ -65,16 +73,19 @@ impl MockRunnerConfig {
pub struct MockRunner {
config: MockRunnerConfig,
redis_client: redis::Client,
client: Client,
}
impl MockRunner {
pub fn new(config: MockRunnerConfig) -> Result<Self, Box<dyn std::error::Error>> {
let redis_client = redis::Client::open(config.redis_url.clone())?;
pub async fn new(config: MockRunnerConfig) -> Result<Self, Box<dyn std::error::Error>> {
let client = ClientBuilder::new()
.redis_url(&config.redis_url)
.build()
.await?;
Ok(MockRunner {
config,
redis_client,
client,
})
}
@@ -83,53 +94,52 @@ impl MockRunner {
println!("📂 DB Path: {}", self.config.db_path);
println!("🔗 Redis URL: {}", self.config.redis_url);
let mut conn = self.redis_client.get_multiplexed_async_connection().await?;
// Use the proper Hero job queue key for this actor instance
// Format: hero:q:work:type:{job_type}:group:{group}:inst:{instance}
let work_queue_key = keys::work_instance(&JobType::OSIS, "default", &self.config.actor_id);
let work_queue_key = format!("hero:q:work:type:osis:group:default:inst:{}", self.config.actor_id);
println!("👂 Listening for jobs on queue: {}", work_queue_key);
loop {
// Try to pop a job ID from the work queue using the Hero protocol
let result: redis::RedisResult<Option<String>> = conn.lpop(&work_queue_key, None).await;
let job_id = self.client.get_job_id(&work_queue_key).await?;
match result {
Ok(Some(job_id)) => {
match job_id {
Some(job_id) => {
println!("📨 Received job ID: {}", job_id);
if let Err(e) = self.process_job(&mut conn, &job_id).await {
if let Err(e) = self.process_job(&job_id).await {
eprintln!("❌ Error processing job {}: {}", job_id, e);
// Mark job as error
if let Err(e2) = Job::set_error(&mut conn, &job_id, &format!("Processing error: {}", e)).await {
if let Err(e2) = self.client.set_job_status(&job_id, JobStatus::Error).await {
eprintln!("❌ Failed to set job error status: {}", e2);
}
}
}
Ok(None) => {
None => {
// No jobs available, wait a bit
sleep(Duration::from_millis(100)).await;
}
Err(e) => {
eprintln!("❌ Redis error: {}", e);
sleep(Duration::from_secs(1)).await;
}
}
}
}
async fn process_job(&self, conn: &mut redis::aio::MultiplexedConnection, job_id: &str) -> Result<(), Box<dyn std::error::Error>> {
async fn process_job(&self, job_id: &str) -> Result<(), JobError> {
// Load the job from Redis using the Hero job system
let job = Job::load_from_redis(conn, job_id).await?;
let job = self.client.get_job(job_id).await?;
println!("📝 Processing job: {}", job.id);
println!("📝 Caller: {}", job.caller_id);
println!("📝 Context: {}", job.context_id);
println!("📝 Payload: {}", job.payload);
println!("📝 Job Type: {:?}", job.job_type);
self.process_job_internal(&self.client, job_id, &job).await
}
async fn process_job_internal(
&self,
client: &Client,
job_id: &str,
job: &Job,
) -> Result<(), JobError> {
println!("🔄 Processing job {} with payload: {}", job_id, job.payload);
// Mark job as started
Job::update_status(conn, job_id, JobStatus::Started).await?;
client.set_job_status(job_id, JobStatus::Started).await?;
println!("🚀 Job {} marked as Started", job_id);
// Simulate processing time
@@ -140,10 +150,8 @@ impl MockRunner {
println!("📤 Output: {}", output);
// Set the job result
Job::set_result(conn, job_id, &output).await?;
client.set_result(job_id, &output).await?;
// Mark job as finished
Job::update_status(conn, job_id, JobStatus::Finished).await?;
println!("✅ Job {} completed successfully", job_id);
Ok(())
@@ -156,7 +164,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
let config = MockRunnerConfig::from_args()?;
// Create and run the mock runner
let runner = MockRunner::new(config)?;
let runner = MockRunner::new(config).await?;
runner.run().await?;
Ok(())

View File

@@ -0,0 +1,64 @@
//! Simple job workflow example
//!
//! This example demonstrates the basic job lifecycle using the new API:
//! 1. Create a job
//! 2. Start the job
//! 3. Monitor its progress
//! 4. Get the result
use hero_supervisor_openrpc_client::{SupervisorClient, JobBuilder, JobResult};
use std::time::Duration;
use tokio::time::sleep;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
println!("Simple Job Workflow Example");
println!("============================\n");
// Create client
let client = SupervisorClient::new("http://localhost:3030")?;
let secret = "user-secret-456";
// Create a simple job
let job = JobBuilder::new()
.caller_id("simple_example")
.context_id("demo")
.payload("echo 'Hello from Hero Supervisor!' && sleep 3 && echo 'Job completed!'")
.executor("osis")
.runner("osis_runner_1")
.timeout(60)
.env_var("EXAMPLE_VAR", "example_value")
.build()?;
println!("📝 Creating job...");
let job_id = client.jobs_create(secret, job).await?;
println!("✅ Job created: {}\n", job_id);
println!("🚀 Starting job...");
client.job_start(secret, &job_id).await?;
println!("✅ Job started\n");
println!("👀 Monitoring job progress...");
loop {
let status = client.job_status(&job_id).await?;
println!(" Status: {}", status.status);
if status.status == "completed" || status.status == "failed" {
break;
}
sleep(Duration::from_secs(2)).await;
}
println!("\n📋 Getting job result...");
match client.job_result(&job_id).await? {
JobResult::Success { success } => {
println!("✅ Success: {}", success);
},
JobResult::Error { error } => {
println!("❌ Error: {}", error);
}
}
Ok(())
}

View File

@@ -69,7 +69,7 @@ Once running, the supervisor will:
1. Load the configuration from `config.toml`
2. Initialize and start all configured actors
3. Listen for jobs on the Redis queue (`hero:supervisor:jobs`)
4. Dispatch jobs to appropriate actors based on the `runner_name` field
4. Dispatch jobs to appropriate actors based on the `runner` field
5. Monitor actor health and status
## Testing
@@ -78,7 +78,7 @@ You can test the supervisor by dispatching jobs to the Redis queue:
```bash
# Using redis-cli to add a test job
redis-cli LPUSH "hero:supervisor:jobs" '{"id":"test-123","runner_name":"sal_actor_1","script":"print(\"Hello from SAL actor!\")"}'
redis-cli LPUSH "hero:supervisor:jobs" '{"id":"test-123","runner":"sal_actor_1","script":"print(\"Hello from SAL actor!\")"}'
```
## Stopping

View File

@@ -1,59 +0,0 @@
//! Test to verify OpenRPC method registration
use hero_supervisor_openrpc_client::SupervisorClient;
use tokio::time::{sleep, Duration};
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
println!("🔍 Testing OpenRPC method registration");
// Start a local supervisor with OpenRPC (assume it's running)
println!("📡 Connecting to OpenRPC server...");
let client = SupervisorClient::new("http://127.0.0.1:3030").await?;
// Test basic methods first
println!("🧪 Testing basic methods...");
// Test list_runners (should work)
match client.list_runners().await {
Ok(runners) => println!("✅ list_runners works: {:?}", runners),
Err(e) => println!("❌ list_runners failed: {}", e),
}
// Test get_all_runner_status (might have serialization issues)
match client.get_all_runner_status().await {
Ok(statuses) => println!("✅ get_all_runner_status works: {} runners", statuses.len()),
Err(e) => println!("❌ get_all_runner_status failed: {}", e),
}
// Test the new queue_and_wait method
println!("🎯 Testing queue_and_wait method...");
// Create a simple test job
use hero_supervisor::job::{JobBuilder, JobType};
let job = JobBuilder::new()
.caller_id("test_client")
.context_id("method_test")
.payload("print('Testing queue_and_wait method registration');")
.job_type(JobType::OSIS)
.runner_name("osis_actor") // Use existing runner
.timeout(Duration::from_secs(10))
.build()?;
match client.queue_and_wait("osis_actor", job, 10).await {
Ok(Some(result)) => println!("✅ queue_and_wait works! Result: {}", result),
Ok(None) => println!("⏰ queue_and_wait timed out"),
Err(e) => {
println!("❌ queue_and_wait failed: {}", e);
// Check if it's a MethodNotFound error
if e.to_string().contains("Method not found") {
println!("🔍 Method not found - this suggests trait registration issue");
}
}
}
println!("🏁 OpenRPC method test completed");
Ok(())
}

View File

@@ -1,70 +0,0 @@
//! Simple test for the queue_and_wait functionality
use hero_supervisor::{
supervisor::{Supervisor, ProcessManagerType},
runner::RunnerConfig,
job::{JobBuilder, JobType},
};
use std::time::Duration;
use std::path::PathBuf;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
println!("🧪 Testing queue_and_wait functionality directly");
// Create supervisor
let mut supervisor = Supervisor::new();
// Create a runner config
let config = RunnerConfig::new(
"test_actor".to_string(),
hero_supervisor::runner::RunnerType::OSISRunner,
PathBuf::from("./target/debug/examples/mock_runner"),
"/tmp/test_db".to_string(),
"redis://localhost:6379".to_string(),
);
// Add runner
println!(" Adding test runner...");
supervisor.add_runner(config, ProcessManagerType::Simple).await?;
// Start runner
println!("▶️ Starting test runner...");
supervisor.start_runner("test_actor").await?;
// Create a test job
let job = JobBuilder::new()
.caller_id("test_client")
.context_id("direct_test")
.payload("print('Direct queue_and_wait test!');")
.job_type(JobType::OSIS)
.runner_name("test_actor")
.timeout(Duration::from_secs(10))
.build()?;
println!("🚀 Testing queue_and_wait directly...");
println!("📋 Job ID: {}", job.id);
// Test queue_and_wait directly
match supervisor.queue_and_wait("test_actor", job, 10).await {
Ok(Some(result)) => {
println!("✅ queue_and_wait succeeded!");
println!("📤 Result: {}", result);
}
Ok(None) => {
println!("⏰ queue_and_wait timed out");
}
Err(e) => {
println!("❌ queue_and_wait failed: {}", e);
}
}
// Cleanup
println!("🧹 Cleaning up...");
supervisor.stop_runner("test_actor", false).await?;
supervisor.remove_runner("test_actor").await?;
println!("✅ Direct test completed!");
Ok(())
}

View File

@@ -1,46 +0,0 @@
//! Test program for register_runner functionality with secret authentication
use hero_supervisor::{SupervisorApp};
use log::info;
use tokio;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
env_logger::init();
info!("Starting supervisor with test secrets...");
// Create supervisor app with test secrets
let mut app = SupervisorApp::builder()
.redis_url("redis://localhost:6379")
.db_path("/tmp/hero_test_db")
.queue_key("hero:test_queue")
.admin_secret("admin123")
.register_secret("register456")
.user_secret("user789")
.build()
.await?;
info!("Supervisor configured with secrets:");
info!(" Admin secrets: {:?}", app.supervisor.admin_secrets());
info!(" Register secrets: {:?}", app.supervisor.register_secrets());
info!(" User secrets: {:?}", app.supervisor.user_secrets());
// Start OpenRPC server
let supervisor_arc = std::sync::Arc::new(tokio::sync::Mutex::new(app.supervisor.clone()));
info!("Starting OpenRPC server...");
hero_supervisor::openrpc::start_openrpc_servers(supervisor_arc).await?;
info!("Supervisor is running with OpenRPC server on http://127.0.0.1:3030");
info!("Test secrets configured:");
info!(" Admin secret: admin123");
info!(" Register secret: register456");
info!(" User secret: user789");
info!("Press Ctrl+C to stop...");
// Keep running
loop {
tokio::time::sleep(tokio::time::Duration::from_secs(1)).await;
}
}

View File

@@ -1,76 +0,0 @@
#!/usr/bin/env bash
set -euo pipefail
# This script prepares the dev environment and (when sourced) exports env vars.
# Usage:
# source ./scripts/environment.sh # export env vars to current shell
# ./scripts/environment.sh # runs setup checks; prints sourcing hint
SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
REPO_ROOT=$(cd "$SCRIPT_DIR/.." && pwd)
cd "$REPO_ROOT"
# --- Helper: print next steps -------------------------------------------------
print_next_steps() {
echo ""
echo "Next steps:"
echo " 1) Start server (in ../server): cargo run -- --from-env --verbose"
echo " 2) Start portal: ./scripts/start.sh (or ./scripts/start.sh --port 8088)"
echo " 3) Dev (Trunk): set -a; source .env; set +a; trunk serve"
}
# --- Ensure .env exists (key=value style) -------------------------------------
if [ ! -f ".env" ]; then
echo "📝 Creating .env file..."
cat > .env << EOF
# Portal Client Configuration
# This file configures the frontend portal app
## Export-style so that 'source .env' exports to current shell
# API Key for server authentication (must match one of the API_KEYS in the server .env)
export API_KEY=dev_key_123
# Optional: Override server API base URL (defaults to http://127.0.0.1:3001/api)
# Example: API_URL=http://localhost:3001/api
# export API_URL=
EOF
echo "✅ Created .env file with default API key"
else
echo "✅ .env file already exists"
fi
# --- Install prerequisites ----------------------------------------------------
if ! command -v trunk >/dev/null 2>&1; then
echo "📦 Installing trunk..."
cargo install trunk
else
echo "✅ trunk is installed"
fi
if ! rustup target list --installed | grep -q "wasm32-unknown-unknown"; then
echo "🔧 Adding wasm32-unknown-unknown target..."
rustup target add wasm32-unknown-unknown
else
echo "✅ wasm32-unknown-unknown target present"
fi
# --- Detect if sourced vs executed --------------------------------------------
# Works for bash and zsh
is_sourced=false
# shellcheck disable=SC2296
if [ -n "${ZSH_EVAL_CONTEXT:-}" ]; then
case $ZSH_EVAL_CONTEXT in *:file:*) is_sourced=true;; esac
elif [ -n "${BASH_SOURCE:-}" ] && [ "${BASH_SOURCE[0]}" != "$0" ]; then
is_sourced=true
fi
if $is_sourced; then
echo "🔐 Sourcing .env (export-style) into current shell..."
# shellcheck disable=SC1091
source .env
echo "✅ Environment exported (API_KEY, optional API_URL)"
else
echo " Run 'source ./scripts/environment.sh' or 'source .env' to export env vars to your shell."
print_next_steps
fi

View File

@@ -2,32 +2,14 @@
use chrono::Utc;
use redis::AsyncCommands;
use sal_service_manager::{ProcessManager, SimpleProcessManager, TmuxProcessManager};
use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::Arc;
use tokio::sync::Mutex;
use crate::{runner::{LogInfo, Runner, RunnerConfig, RunnerError, RunnerResult, RunnerStatus}, JobError, job::JobStatus};
use crate::{runner::{RunnerError, RunnerResult}, job::JobStatus, JobError};
use crate::{job::Job};
#[cfg(feature = "admin")]
use supervisor_admin_server::{AdminSupervisor, RunnerConfigInfo, JobInfo};
/// Process manager type for a runner
/// Client for managing jobs in Redis
#[derive(Debug, Clone)]
pub enum ProcessManagerType {
/// Simple process manager for direct process spawning
Simple,
/// Tmux process manager for session-based management
Tmux(String), // session name
}
/// Main supervisor that manages multiple runners
#[derive(Clone)]
pub struct Client {
redis_client: redis::Client,
/// Namespace for queue keys
namespace: String,
}
@@ -324,4 +306,23 @@ impl Client {
Ok(result)
}
/// Get a job ID from the work queue (blocking pop)
pub async fn get_job_id(&self, queue_key: &str) -> Result<Option<String>, JobError> {
let mut conn = self.redis_client
.get_multiplexed_async_connection()
.await
.map_err(|e| JobError::Redis(e))?;
// Use BRPOP with a short timeout to avoid blocking indefinitely
let result: Option<(String, String)> = conn.brpop(queue_key, 1.0).await
.map_err(|e| JobError::Redis(e))?;
Ok(result.map(|(_, job_id)| job_id))
}
/// Get a job by ID (alias for load_job_from_redis)
pub async fn get_job(&self, job_id: &str) -> Result<Job, JobError> {
self.load_job_from_redis(job_id).await
}
}

View File

@@ -1,9 +1,7 @@
use chrono::Utc;
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::time::Duration;
use uuid::Uuid;
use redis::AsyncCommands;
use thiserror::Error;
/// Job status enumeration
@@ -52,9 +50,9 @@ pub struct Job {
pub caller_id: String,
pub context_id: String,
pub payload: String,
pub runner_name: String, // name of the runner to execute this job
pub runner: String, // name of the runner to execute this job
pub executor: String, // name of the executor the runner will use to execute this job
pub timeout: Duration,
pub timeout: u64, // timeout in seconds
pub env_vars: HashMap<String, String>, // environment variables for script execution
pub created_at: chrono::DateTime<chrono::Utc>,
pub updated_at: chrono::DateTime<chrono::Utc>,
@@ -83,7 +81,7 @@ impl Job {
caller_id: String,
context_id: String,
payload: String,
runner_name: String,
runner: String,
executor: String,
) -> Self {
let now = Utc::now();
@@ -92,9 +90,9 @@ impl Job {
caller_id,
context_id,
payload,
runner_name,
runner,
executor,
timeout: Duration::from_secs(300), // 5 minutes default
timeout: 300, // 5 minutes default
env_vars: HashMap::new(),
created_at: now,
updated_at: now,
@@ -107,9 +105,9 @@ pub struct JobBuilder {
caller_id: String,
context_id: String,
payload: String,
runner_name: String,
runner: String,
executor: String,
timeout: Duration,
timeout: u64, // timeout in seconds
env_vars: HashMap<String, String>,
}
@@ -119,9 +117,9 @@ impl JobBuilder {
caller_id: "".to_string(),
context_id: "".to_string(),
payload: "".to_string(),
runner_name: "".to_string(),
runner: "".to_string(),
executor: "".to_string(),
timeout: Duration::from_secs(300), // 5 minutes default
timeout: 300, // 5 minutes default
env_vars: HashMap::new(),
}
}
@@ -145,8 +143,8 @@ impl JobBuilder {
}
/// Set the runner name for this job
pub fn runner_name(mut self, runner_name: &str) -> Self {
self.runner_name = runner_name.to_string();
pub fn runner(mut self, runner: &str) -> Self {
self.runner = runner.to_string();
self
}
@@ -156,8 +154,8 @@ impl JobBuilder {
self
}
/// Set the timeout for job execution
pub fn timeout(mut self, timeout: Duration) -> Self {
/// Set the timeout for job execution (in seconds)
pub fn timeout(mut self, timeout: u64) -> Self {
self.timeout = timeout;
self
}
@@ -191,8 +189,8 @@ impl JobBuilder {
if self.payload.is_empty() {
return Err(JobError::InvalidData("payload is required".to_string()));
}
if self.runner_name.is_empty() {
return Err(JobError::InvalidData("runner_name is required".to_string()));
if self.runner.is_empty() {
return Err(JobError::InvalidData("runner is required".to_string()));
}
if self.executor.is_empty() {
return Err(JobError::InvalidData("executor is required".to_string()));
@@ -202,7 +200,7 @@ impl JobBuilder {
self.caller_id,
self.context_id,
self.payload,
self.runner_name,
self.runner,
self.executor,
);

View File

@@ -18,4 +18,5 @@ pub use runner::{
pub use sal_service_manager::{ProcessManager, SimpleProcessManager, TmuxProcessManager};
pub use supervisor::{Supervisor, SupervisorBuilder, ProcessManagerType};
pub use job::{Job, JobBuilder, JobStatus, JobError};
pub use client::{Client, ClientBuilder};
pub use app::SupervisorApp;

View File

@@ -19,7 +19,6 @@ use sal_service_manager::{ProcessStatus, LogInfo};
use serde::{Deserialize, Serialize};
use std::net::SocketAddr;
use std::path::PathBuf;
use std::sync::Arc;
use std::fs;
use tokio::sync::Mutex;
@@ -84,6 +83,31 @@ pub struct RunJobParams {
pub job: Job,
}
/// Request parameters for starting a job
#[derive(Debug, Deserialize, Serialize)]
pub struct StartJobParams {
pub secret: String,
pub job_id: String,
}
/// Job result response
#[derive(Debug, Serialize, Clone)]
#[serde(untagged)]
pub enum JobResult {
Success { success: String },
Error { error: String },
}
/// Job status response
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct JobStatusResponse {
pub job_id: String,
pub status: String,
pub created_at: String,
pub started_at: Option<String>,
pub completed_at: Option<String>,
}
/// Request parameters for adding a new runner
#[derive(Debug, Deserialize, Serialize)]
pub struct AddRunnerParams {
@@ -98,18 +122,32 @@ pub struct AddRunnerParams {
/// Request parameters for queuing a job
#[derive(Debug, Deserialize, Serialize)]
pub struct QueueJobParams {
pub runner_name: String,
pub runner: String,
pub job: Job,
}
/// Request parameters for queue and wait operation
#[derive(Debug, Deserialize, Serialize)]
pub struct QueueAndWaitParams {
pub runner_name: String,
pub runner: String,
pub job: Job,
pub timeout_secs: u64,
}
/// Request parameters for stopping a job
#[derive(Debug, Deserialize, Serialize)]
pub struct StopJobParams {
pub secret: String,
pub job_id: String,
}
/// Request parameters for deleting a job
#[derive(Debug, Deserialize, Serialize)]
pub struct DeleteJobParams {
pub secret: String,
pub job_id: String,
}
/// Request parameters for getting runner logs
#[derive(Debug, Deserialize, Serialize)]
pub struct GetLogsParams {
@@ -236,13 +274,37 @@ pub trait SupervisorRpc {
#[method(name = "register_runner")]
async fn register_runner(&self, params: RegisterRunnerParams) -> RpcResult<String>;
/// Create a job (fire-and-forget, non-blocking)
#[method(name = "create_job")]
async fn create_job(&self, params: RunJobParams) -> RpcResult<String>;
/// Create a job without queuing it to a runner
#[method(name = "jobs.create")]
async fn jobs_create(&self, params: RunJobParams) -> RpcResult<String>;
/// Run a job on the appropriate runner (blocking, returns result)
#[method(name = "run_job")]
async fn run_job(&self, params: RunJobParams) -> RpcResult<Option<String>>;
/// List all jobs
#[method(name = "jobs.list")]
async fn jobs_list(&self) -> RpcResult<Vec<Job>>;
/// Run a job on the appropriate runner and return the result
#[method(name = "job.run")]
async fn job_run(&self, params: RunJobParams) -> RpcResult<JobResult>;
/// Start a previously created job by queuing it to its assigned runner
#[method(name = "job.start")]
async fn job_start(&self, params: StartJobParams) -> RpcResult<()>;
/// Get the current status of a job
#[method(name = "job.status")]
async fn job_status(&self, job_id: String) -> RpcResult<JobStatusResponse>;
/// Get the result of a completed job (blocks until result is available)
#[method(name = "job.result")]
async fn job_result(&self, job_id: String) -> RpcResult<JobResult>;
/// Stop a running job
#[method(name = "job.stop")]
async fn job_stop(&self, params: StopJobParams) -> RpcResult<()>;
/// Delete a job from the system
#[method(name = "job.delete")]
async fn job_delete(&self, params: DeleteJobParams) -> RpcResult<()>;
/// Remove a runner from the supervisor
#[method(name = "remove_runner")]
@@ -276,9 +338,6 @@ pub trait SupervisorRpc {
#[method(name = "queue_job_to_runner")]
async fn queue_job_to_runner(&self, params: QueueJobParams) -> RpcResult<()>;
/// List all job IDs from Redis
#[method(name = "list_jobs")]
async fn list_jobs(&self) -> RpcResult<Vec<String>>;
/// Get a job by job ID
#[method(name = "get_job")]
@@ -381,8 +440,8 @@ impl SupervisorRpcServer for Arc<Mutex<Supervisor>> {
Ok(params.name)
}
async fn create_job(&self, params: RunJobParams) -> RpcResult<String> {
debug!("OpenRPC request: create_job with params: {:?}", params);
async fn jobs_create(&self, params: RunJobParams) -> RpcResult<String> {
debug!("OpenRPC request: jobs.create with params: {:?}", params);
let mut supervisor = self.lock().await;
let job_id = supervisor
@@ -393,12 +452,85 @@ impl SupervisorRpcServer for Arc<Mutex<Supervisor>> {
Ok(job_id)
}
async fn run_job(&self, params: RunJobParams) -> RpcResult<Option<String>> {
debug!("OpenRPC request: run_job with params: {:?}", params);
async fn jobs_list(&self) -> RpcResult<Vec<Job>> {
debug!("OpenRPC request: jobs.list");
let supervisor = self.lock().await;
supervisor
.list_all_jobs()
.await
.map_err(runner_error_to_rpc_error)
}
async fn job_run(&self, params: RunJobParams) -> RpcResult<JobResult> {
debug!("OpenRPC request: job.run with params: {:?}", params);
let mut supervisor = self.lock().await;
match supervisor
.run_job(&params.secret, params.job)
.await
.map_err(runner_error_to_rpc_error)? {
Some(output) => Ok(JobResult::Success { success: output }),
None => Ok(JobResult::Error { error: "Job execution failed".to_string() })
}
}
async fn job_start(&self, params: StartJobParams) -> RpcResult<()> {
debug!("OpenRPC request: job.start with params: {:?}", params);
let mut supervisor = self.lock().await;
supervisor
.run_job(&params.secret, params.job)
.start_job(&params.secret, &params.job_id)
.await
.map_err(runner_error_to_rpc_error)
}
async fn job_status(&self, job_id: String) -> RpcResult<JobStatusResponse> {
debug!("OpenRPC request: job.status with job_id: {}", job_id);
let supervisor = self.lock().await;
let status = supervisor
.get_job_status(&job_id)
.await
.map_err(runner_error_to_rpc_error)?;
Ok(status)
}
async fn job_result(&self, job_id: String) -> RpcResult<JobResult> {
debug!("OpenRPC request: job.result with job_id: {}", job_id);
let supervisor = self.lock().await;
match supervisor
.get_job_result(&job_id)
.await
.map_err(runner_error_to_rpc_error)? {
Some(result) => {
if result.starts_with("Error:") {
Ok(JobResult::Error { error: result })
} else {
Ok(JobResult::Success { success: result })
}
},
None => Ok(JobResult::Error { error: "Job result not available".to_string() })
}
}
async fn job_stop(&self, params: StopJobParams) -> RpcResult<()> {
debug!("OpenRPC request: job.stop with params: {:?}", params);
let mut supervisor = self.lock().await;
supervisor
.stop_job(&params.job_id)
.await
.map_err(runner_error_to_rpc_error)
}
async fn job_delete(&self, params: DeleteJobParams) -> RpcResult<()> {
debug!("OpenRPC request: job.delete with params: {:?}", params);
let mut supervisor = self.lock().await;
supervisor
.delete_job(&params.job_id)
.await
.map_err(runner_error_to_rpc_error)
}
@@ -469,19 +601,11 @@ impl SupervisorRpcServer for Arc<Mutex<Supervisor>> {
debug!("OpenRPC request: queue_job_to_runner with params: {:?}", params);
let mut supervisor = self.lock().await;
supervisor
.queue_job_to_runner(&params.runner_name, params.job)
.queue_job_to_runner(&params.runner, params.job)
.await
.map_err(runner_error_to_rpc_error)
}
async fn list_jobs(&self) -> RpcResult<Vec<String>> {
debug!("OpenRPC request: list_jobs");
let supervisor = self.lock().await;
supervisor
.list_jobs()
.await
.map_err(runner_error_to_rpc_error)
}
async fn get_job(&self, job_id: String) -> RpcResult<Job> {
debug!("OpenRPC request: get_job with job_id: {}", job_id);
@@ -523,7 +647,7 @@ impl SupervisorRpcServer for Arc<Mutex<Supervisor>> {
debug!("OpenRPC request: queue_and_wait with params: {:?}", params);
let mut supervisor = self.lock().await;
supervisor
.queue_and_wait(&params.runner_name, params.job, params.timeout_secs)
.queue_and_wait(&params.runner, params.job, params.timeout_secs)
.await
.map_err(runner_error_to_rpc_error)
}
@@ -810,20 +934,108 @@ pub async fn start_openrpc_servers(
#[cfg(test)]
mod tests {
use super::*;
use crate::supervisor::Supervisor;
#[test]
fn test_supervisor_rpc_creation() {
let _rpc = SupervisorRpcImpl::new();
// Just test that we can create the RPC implementation
#[tokio::test]
async fn test_supervisor_rpc_creation() {
// Test that we can create a supervisor and use it with RPC
use crate::supervisor::SupervisorBuilder;
let supervisor = SupervisorBuilder::new()
.redis_url("redis://localhost:6379")
.namespace("test")
.build()
.await;
// Just test that we can build a supervisor
assert!(supervisor.is_ok() || supervisor.is_err()); // Either way is fine for this test
}
#[cfg(feature = "openrpc")]
#[test]
fn test_process_manager_type_parsing() {
assert!(SupervisorRpcImpl::parse_process_manager_type("simple").is_ok());
assert!(SupervisorRpcImpl::parse_process_manager_type("tmux").is_ok());
assert!(SupervisorRpcImpl::parse_process_manager_type("Simple").is_ok());
assert!(SupervisorRpcImpl::parse_process_manager_type("TMUX").is_ok());
assert!(SupervisorRpcImpl::parse_process_manager_type("invalid").is_err());
assert!(parse_process_manager_type("simple", None).is_ok());
assert!(parse_process_manager_type("tmux", Some("session".to_string())).is_ok());
assert!(parse_process_manager_type("Simple", None).is_ok());
assert!(parse_process_manager_type("TMUX", Some("session".to_string())).is_ok());
assert!(parse_process_manager_type("invalid", None).is_err());
}
#[tokio::test]
async fn test_job_api_methods() {
let supervisor = Arc::new(Mutex::new(Supervisor::default()));
let mut sup = supervisor.lock().await;
sup.add_user_secret("test-secret".to_string());
drop(sup);
// Test jobs.create
let job = crate::job::JobBuilder::new()
.caller_id("test")
.context_id("test")
.payload("test")
.runner("test_runner")
.executor("osis")
.build()
.unwrap();
let params = RunJobParams {
secret: "test-secret".to_string(),
job: job.clone(),
};
let result = supervisor.jobs_create(params).await;
// Should work or fail gracefully without Redis
assert!(result.is_ok() || result.is_err());
// Test job.start
let start_params = StartJobParams {
secret: "test-secret".to_string(),
job_id: "test-job".to_string(),
};
let result = supervisor.job_start(start_params).await;
// Should fail gracefully without Redis/job
assert!(result.is_err());
// Test invalid secret
let invalid_params = StartJobParams {
secret: "invalid".to_string(),
job_id: "test-job".to_string(),
};
let result = supervisor.job_start(invalid_params).await;
assert!(result.is_err());
}
#[test]
fn test_job_result_serialization() {
let success = JobResult::Success { success: "test output".to_string() };
let json = serde_json::to_string(&success).unwrap();
assert!(json.contains("success"));
assert!(json.contains("test output"));
let error = JobResult::Error { error: "test error".to_string() };
let json = serde_json::to_string(&error).unwrap();
assert!(json.contains("error"));
assert!(json.contains("test error"));
}
#[test]
fn test_job_status_response_serialization() {
let status = JobStatusResponse {
job_id: "test-job".to_string(),
status: "running".to_string(),
created_at: "2023-01-01T00:00:00Z".to_string(),
started_at: Some("2023-01-01T00:00:05Z".to_string()),
completed_at: None,
};
let json = serde_json::to_string(&status).unwrap();
assert!(json.contains("test-job"));
assert!(json.contains("running"));
assert!(json.contains("2023-01-01T00:00:00Z"));
let deserialized: JobStatusResponse = serde_json::from_str(&json).unwrap();
assert_eq!(deserialized.job_id, "test-job");
assert_eq!(deserialized.status, "running");
}
}

230
src/openrpc/tests.rs Normal file
View File

@@ -0,0 +1,230 @@
//! Tests for the new job API methods
#[cfg(test)]
mod job_api_tests {
use super::super::*;
use crate::supervisor::{Supervisor, SupervisorBuilder};
use crate::job::{Job, JobBuilder};
use std::sync::Arc;
use tokio::sync::Mutex;
use serde_json::json;
async fn create_test_supervisor() -> Arc<Mutex<Supervisor>> {
let supervisor = SupervisorBuilder::new()
.redis_url("redis://localhost:6379")
.namespace("test_job_api")
.build()
.await
.unwrap_or_else(|_| Supervisor::default());
let mut supervisor = supervisor;
supervisor.add_admin_secret("test-admin-secret".to_string());
supervisor.add_user_secret("test-user-secret".to_string());
Arc::new(Mutex::new(supervisor))
}
fn create_test_job() -> Job {
JobBuilder::new()
.id("test-job-123".to_string())
.caller_id("test-client".to_string())
.context_id("test-context".to_string())
.script("print('Hello World')".to_string())
.script_type(crate::job::ScriptType::Osis)
.timeout(30)
.build()
.unwrap()
}
#[tokio::test]
async fn test_jobs_create() {
let supervisor = create_test_supervisor().await;
let job = create_test_job();
let params = RunJobParams {
secret: "test-user-secret".to_string(),
job: job.clone(),
};
let result = supervisor.jobs_create(params).await;
assert!(result.is_ok());
let job_id = result.unwrap();
assert_eq!(job_id, job.id);
}
#[tokio::test]
async fn test_jobs_create_invalid_secret() {
let supervisor = create_test_supervisor().await;
let job = create_test_job();
let params = RunJobParams {
secret: "invalid-secret".to_string(),
job,
};
let result = supervisor.jobs_create(params).await;
assert!(result.is_err());
}
#[tokio::test]
async fn test_jobs_list() {
let supervisor = create_test_supervisor().await;
let result = supervisor.jobs_list().await;
// Should not error even if Redis is not available (will return empty list or error)
// The important thing is that the method signature works
assert!(result.is_ok() || result.is_err());
}
#[tokio::test]
async fn test_job_run_success_format() {
let supervisor = create_test_supervisor().await;
let job = create_test_job();
let params = RunJobParams {
secret: "test-user-secret".to_string(),
job,
};
let result = supervisor.job_run(params).await;
// The result should be a JobResult enum
match result {
Ok(JobResult::Success { success: _ }) => {
// Success case - job executed and returned output
},
Ok(JobResult::Error { error: _ }) => {
// Error case - job failed but method worked
},
Err(_) => {
// Method error (authentication, etc.)
// This is acceptable for testing without actual runners
}
}
}
#[tokio::test]
async fn test_job_start() {
let supervisor = create_test_supervisor().await;
let params = StartJobParams {
secret: "test-user-secret".to_string(),
job_id: "test-job-123".to_string(),
};
let result = supervisor.job_start(params).await;
// Should fail gracefully if job doesn't exist
assert!(result.is_err() || result.is_ok());
}
#[tokio::test]
async fn test_job_start_invalid_secret() {
let supervisor = create_test_supervisor().await;
let params = StartJobParams {
secret: "invalid-secret".to_string(),
job_id: "test-job-123".to_string(),
};
let result = supervisor.job_start(params).await;
assert!(result.is_err());
}
#[tokio::test]
async fn test_job_status() {
let supervisor = create_test_supervisor().await;
let result = supervisor.job_status("test-job-123".to_string()).await;
// Should return error for non-existent job
assert!(result.is_err());
}
#[tokio::test]
async fn test_job_result() {
let supervisor = create_test_supervisor().await;
let result = supervisor.job_result("test-job-123".to_string()).await;
// Should return error for non-existent job
assert!(result.is_err());
}
#[test]
fn test_job_result_enum_serialization() {
let success_result = JobResult::Success {
success: "Job completed successfully".to_string(),
};
let serialized = serde_json::to_string(&success_result).unwrap();
assert!(serialized.contains("success"));
assert!(serialized.contains("Job completed successfully"));
let error_result = JobResult::Error {
error: "Job failed with error".to_string(),
};
let serialized = serde_json::to_string(&error_result).unwrap();
assert!(serialized.contains("error"));
assert!(serialized.contains("Job failed with error"));
}
#[test]
fn test_job_status_response_serialization() {
let status_response = JobStatusResponse {
job_id: "test-job-123".to_string(),
status: "running".to_string(),
created_at: "2023-01-01T00:00:00Z".to_string(),
started_at: Some("2023-01-01T00:00:05Z".to_string()),
completed_at: None,
};
let serialized = serde_json::to_string(&status_response).unwrap();
assert!(serialized.contains("test-job-123"));
assert!(serialized.contains("running"));
assert!(serialized.contains("2023-01-01T00:00:00Z"));
assert!(serialized.contains("2023-01-01T00:00:05Z"));
let deserialized: JobStatusResponse = serde_json::from_str(&serialized).unwrap();
assert_eq!(deserialized.job_id, "test-job-123");
assert_eq!(deserialized.status, "running");
assert_eq!(deserialized.started_at, Some("2023-01-01T00:00:05Z".to_string()));
assert_eq!(deserialized.completed_at, None);
}
#[test]
fn test_start_job_params_serialization() {
let params = StartJobParams {
secret: "test-secret".to_string(),
job_id: "job-123".to_string(),
};
let serialized = serde_json::to_string(&params).unwrap();
assert!(serialized.contains("test-secret"));
assert!(serialized.contains("job-123"));
let deserialized: StartJobParams = serde_json::from_str(&serialized).unwrap();
assert_eq!(deserialized.secret, "test-secret");
assert_eq!(deserialized.job_id, "job-123");
}
#[test]
fn test_method_naming_convention() {
// Test that method names follow the jobs./job. convention
// These should be the actual method names in the trait
let jobs_methods = vec!["jobs.create", "jobs.list"];
let job_methods = vec!["job.run", "job.start", "job.status", "job.result"];
// Verify naming convention
for method in jobs_methods {
assert!(method.starts_with("jobs."));
}
for method in job_methods {
assert!(method.starts_with("job."));
}
}
}

View File

@@ -1,12 +1,7 @@
//! Runner implementation for actor process management.
use crate::job::{Job};
use log::{debug, info};
use redis::AsyncCommands;
use sal_service_manager::{ProcessManager, ProcessManagerError as ServiceProcessManagerError, ProcessStatus, ProcessConfig};
use sal_service_manager::{ProcessManagerError as ServiceProcessManagerError, ProcessStatus, ProcessConfig};
use std::path::PathBuf;
use std::sync::Arc;
use tokio::sync::Mutex;
/// Represents the current status of an actor/runner (alias for ProcessStatus)
pub type RunnerStatus = ProcessStatus;
@@ -127,8 +122,17 @@ pub enum RunnerError {
#[from]
source: crate::JobError,
},
#[error("Job '{job_id}' not found")]
JobNotFound { job_id: String },
#[error("Authentication error: {message}")]
AuthenticationError { message: String },
}
// Type alias for backward compatibility
pub type RunnerConfig = Runner;
/// Convert Runner to ProcessConfig
pub fn runner_to_process_config(config: &Runner) -> ProcessConfig {
ProcessConfig::new(config.id.clone(), config.command.clone())
@@ -137,98 +141,3 @@ pub fn runner_to_process_config(config: &Runner) -> ProcessConfig {
.with_arg("--redis-url".to_string())
.with_arg(config.redis_url.clone())
}
// Type alias for backward compatibility
pub type RunnerConfig = Runner;
#[cfg(test)]
mod tests {
use super::*;
use sal_service_manager::{ProcessManagerError, SimpleProcessManager};
use std::collections::HashMap;
#[derive(Debug)]
struct MockProcessManager {
processes: HashMap<String, ProcessStatus>,
}
impl MockProcessManager {
fn new() -> Self {
Self {
processes: HashMap::new(),
}
}
}
#[async_trait::async_trait]
impl ProcessManager for MockProcessManager {
async fn start_process(&mut self, config: &ProcessConfig) -> Result<(), ProcessManagerError> {
self.processes.insert(config.id.clone(), ProcessStatus::Running);
Ok(())
}
async fn stop_process(&mut self, process_id: &str, _force: bool) -> Result<(), ProcessManagerError> {
self.processes.insert(process_id.to_string(), ProcessStatus::Stopped);
Ok(())
}
async fn process_status(&self, process_id: &str) -> Result<ProcessStatus, ProcessManagerError> {
Ok(self.processes.get(process_id).cloned().unwrap_or(ProcessStatus::Stopped))
}
async fn process_logs(&self, _process_id: &str, _lines: Option<usize>, _follow: bool) -> Result<Vec<LogInfo>, ProcessManagerError> {
Ok(vec![])
}
async fn health_check(&self) -> Result<(), ProcessManagerError> {
Ok(())
}
async fn list_processes(&self) -> Result<Vec<String>, ProcessManagerError> {
Ok(self.processes.keys().cloned().collect())
}
}
#[test]
fn test_runner_creation() {
let runner = Runner::new(
"test_actor".to_string(),
"test_runner".to_string(),
"".to_string(),
PathBuf::from("/path/to/binary"),
"redis://localhost:6379".to_string(),
);
assert_eq!(runner.id, "test_actor");
assert_eq!(runner.name, "test_runner");
assert_eq!(runner.command, PathBuf::from("/path/to/binary"));
assert_eq!(runner.redis_url, "redis://localhost:6379");
}
#[test]
fn test_runner_get_queue() {
let runner = Runner::new(
"test_actor".to_string(),
"test_runner".to_string(),
"".to_string(),
PathBuf::from("/path/to/binary"),
"redis://localhost:6379".to_string(),
);
let queue_key = runner.get_queue();
assert_eq!(queue_key, "runner:test_runner");
}
#[test]
fn test_runner_error_types() {
let error = RunnerError::ActorNotFound {
actor_id: "test".to_string(),
};
assert!(error.to_string().contains("test"));
let error = RunnerError::ActorAlreadyRunning {
actor_id: "test".to_string(),
};
assert!(error.to_string().contains("already running"));
}
}

View File

@@ -1,17 +1,13 @@
//! Main supervisor implementation for managing multiple actor runners.
use chrono::Utc;
use sal_service_manager::{ProcessManager, SimpleProcessManager, TmuxProcessManager};
use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::Arc;
use tokio::sync::Mutex;
use crate::{client::{Client, ClientBuilder}, job::JobStatus, runner::{LogInfo, Runner, RunnerConfig, RunnerError, RunnerResult, RunnerStatus}, JobError};
use crate::{job::Job};
use crate::{client::{Client, ClientBuilder}, job::JobStatus, runner::{LogInfo, Runner, RunnerConfig, RunnerError, RunnerResult, RunnerStatus}};
#[cfg(feature = "admin")]
use supervisor_admin_server::{AdminSupervisor, RunnerConfigInfo, JobInfo};
/// Process manager type for a runner
#[derive(Debug, Clone)]
@@ -201,7 +197,7 @@ impl Supervisor {
}
/// Register a new runner with secret-based authentication
pub async fn register_runner(&mut self, secret: &str, name: &str, queue: &str) -> RunnerResult<()> {
pub async fn register_runner(&mut self, secret: &str, name: &str, _queue: &str) -> RunnerResult<()> {
// Check if the secret is valid (admin or register secret)
if !self.admin_secrets.contains(&secret.to_string()) &&
!self.register_secrets.contains(&secret.to_string()) {
@@ -230,15 +226,15 @@ impl Supervisor {
}
// Find the runner by name
let runner_name = job.runner_name.clone();
let runner = job.runner.clone();
let job_id = job.id.clone(); // Store job ID before moving job
if let Some(_runner) = self.runners.get(&runner_name) {
if let Some(_runner) = self.runners.get(&runner) {
// Use the supervisor's queue_job_to_runner method (fire-and-forget)
self.queue_job_to_runner(&runner_name, job).await?;
self.queue_job_to_runner(&runner, job).await?;
Ok(job_id) // Return the job ID immediately
} else {
Err(RunnerError::ActorNotFound {
actor_id: job.runner_name.clone(),
actor_id: job.runner.clone(),
})
}
}
@@ -253,13 +249,13 @@ impl Supervisor {
}
// Find the runner by name
let runner_name = job.runner_name.clone();
if let Some(_runner) = self.runners.get(&runner_name) {
let runner = job.runner.clone();
if let Some(_runner) = self.runners.get(&runner) {
// Use the synchronous queue_and_wait method with a reasonable timeout (30 seconds)
self.queue_and_wait(&runner_name, job, 30).await
self.queue_and_wait(&runner, job, 30).await
} else {
Err(RunnerError::ActorNotFound {
actor_id: job.runner_name.clone(),
actor_id: job.runner.clone(),
})
}
}
@@ -280,9 +276,7 @@ impl Supervisor {
/// Get a job by job ID from Redis
pub async fn get_job(&self, job_id: &str) -> RunnerResult<crate::job::Job> {
use redis::AsyncCommands;
let mut conn = self.redis_client.get_multiplexed_async_connection().await
let _conn = self.redis_client.get_multiplexed_async_connection().await
.map_err(|e| RunnerError::RedisError {
source: e
})?;
@@ -296,9 +290,8 @@ impl Supervisor {
/// Ping a runner by dispatching a ping job to its queue
pub async fn ping_runner(&mut self, runner_id: &str) -> RunnerResult<String> {
use crate::job::{Job, JobBuilder};
use std::time::Duration;
use crate::job::JobBuilder;
// Check if runner exists
if !self.runners.contains_key(runner_id) {
return Err(RunnerError::ActorNotFound {
@@ -311,9 +304,9 @@ impl Supervisor {
.caller_id("supervisor_ping")
.context_id("ping_context")
.payload("ping")
.runner_name(runner_id)
.runner(runner_id)
.executor("ping")
.timeout(Duration::from_secs(10))
.timeout(10)
.build()
.map_err(|e| RunnerError::QueueError {
actor_id: runner_id.to_string(),
@@ -329,17 +322,15 @@ impl Supervisor {
/// Stop a job by ID
pub async fn stop_job(&mut self, job_id: &str) -> RunnerResult<()> {
use redis::AsyncCommands;
// For now, we'll implement a basic stop by removing the job from Redis
// In a more sophisticated implementation, you might send a stop signal to the runner
let mut conn = self.redis_client.get_multiplexed_async_connection().await
let _conn = self.redis_client.get_multiplexed_async_connection().await
.map_err(|e| RunnerError::QueueError {
actor_id: job_id.to_string(),
reason: format!("Failed to connect to Redis: {}", e),
})?;
let job_key = self.client.set_job_status(job_id, JobStatus::Stopping).await;
let _job_key = self.client.set_job_status(job_id, JobStatus::Stopping).await;
Ok(())
}
@@ -434,11 +425,11 @@ impl Supervisor {
}
/// Queue a job to a specific runner by name
pub async fn queue_job_to_runner(&mut self, runner_name: &str, job: crate::job::Job) -> RunnerResult<()> {
pub async fn queue_job_to_runner(&mut self, runner: &str, job: crate::job::Job) -> RunnerResult<()> {
use redis::AsyncCommands;
use log::{debug, info};
if let Some(runner) = self.runners.get(runner_name) {
if let Some(runner) = self.runners.get(runner) {
debug!("Queuing job {} for actor {}", job.id, runner.id);
let mut conn = self.redis_client.get_multiplexed_async_connection().await
@@ -467,7 +458,7 @@ impl Supervisor {
Ok(())
} else {
Err(RunnerError::ActorNotFound {
actor_id: runner_name.to_string(),
actor_id: runner.to_string(),
})
}
}
@@ -478,18 +469,18 @@ impl Supervisor {
/// 2. BLPOP on the reply queue for this job
/// 3. Get the job result from the job hash
/// 4. Return the complete result
pub async fn queue_and_wait(&mut self, runner_name: &str, job: crate::job::Job, timeout_secs: u64) -> RunnerResult<Option<String>> {
pub async fn queue_and_wait(&mut self, runner: &str, job: crate::job::Job, timeout_secs: u64) -> RunnerResult<Option<String>> {
use redis::AsyncCommands;
let job_id = job.id.clone();
// First queue the job
self.queue_job_to_runner(runner_name, job).await?;
self.queue_job_to_runner(runner, job).await?;
// Get Redis connection from the supervisor (shared Redis client)
let _runner = self.runners.get(runner_name)
let _runner = self.runners.get(runner)
.ok_or_else(|| RunnerError::ActorNotFound {
actor_id: runner_name.to_string(),
actor_id: runner.to_string(),
})?;
let mut conn = self.redis_client.get_multiplexed_async_connection().await
@@ -505,7 +496,7 @@ impl Supervisor {
})?;
match result {
Some(reply_data) => {
Some(_reply_data) => {
// Reply received, now get the job result from the job hash
let job_key = self.client.job_key(&job_id);
let job_result: Option<String> = conn.hget(&job_key, "result").await
@@ -526,7 +517,7 @@ impl Supervisor {
pub async fn get_all_runner_status(&self) -> RunnerResult<Vec<(String, RunnerStatus)>> {
let mut results = Vec::new();
for (actor_id, instance) in &self.runners {
for (actor_id, _instance) in &self.runners {
match self.get_runner_status(actor_id).await {
Ok(status) => results.push((actor_id.clone(), status)),
Err(_) => {
@@ -663,6 +654,107 @@ impl Supervisor {
self.client.list_jobs().await
}
/// List all jobs with full details from Redis
pub async fn list_all_jobs(&self) -> RunnerResult<Vec<crate::job::Job>> {
let job_ids = self.client.list_jobs().await?;
let mut jobs = Vec::new();
for job_id in job_ids {
if let Ok(job) = self.client.get_job(&job_id).await {
jobs.push(job);
}
}
Ok(jobs)
}
/// Start a previously created job by queuing it to its assigned runner
pub async fn start_job(&mut self, secret: &str, job_id: &str) -> RunnerResult<()> {
// Check if the secret is valid (admin or user secret)
if !self.admin_secrets.contains(&secret.to_string()) &&
!self.user_secrets.contains(&secret.to_string()) {
return Err(RunnerError::AuthenticationError {
message: "Invalid secret for job operations".to_string()
});
}
// Get the job from Redis
let job = self.get_job(job_id).await?;
let runner = job.runner.clone();
// Queue the job to its assigned runner
self.queue_job_to_runner(&runner, job).await
}
/// Get the status of a job
pub async fn get_job_status(&self, job_id: &str) -> RunnerResult<crate::openrpc::JobStatusResponse> {
use redis::AsyncCommands;
let mut conn = self.redis_client.get_multiplexed_async_connection().await
.map_err(|e| RunnerError::RedisError { source: e })?;
// Get job data from Redis hash
let job_data: std::collections::HashMap<String, String> = conn.hgetall(format!("{}:job:{}", self.namespace, job_id)).await
.map_err(|e| RunnerError::RedisError { source: e })?;
if job_data.is_empty() {
return Err(RunnerError::JobNotFound { job_id: job_id.to_string() });
}
let status = job_data.get("status").unwrap_or(&"unknown".to_string()).clone();
let created_at = job_data.get("created_at").unwrap_or(&"".to_string()).clone();
let started_at = job_data.get("started_at").cloned();
let completed_at = job_data.get("completed_at").cloned();
Ok(crate::openrpc::JobStatusResponse {
job_id: job_id.to_string(),
status,
created_at,
started_at,
completed_at,
})
}
/// Get the result of a job (blocks until result is available)
pub async fn get_job_result(&self, job_id: &str) -> RunnerResult<Option<String>> {
use redis::AsyncCommands;
use tokio::time::{sleep, Duration};
let mut conn = self.redis_client.get_multiplexed_async_connection().await
.map_err(|e| RunnerError::RedisError { source: e })?;
// Poll for job completion with timeout
for _ in 0..300 { // 5 minutes timeout (300 * 1 second)
let job_data: std::collections::HashMap<String, String> = conn.hgetall(format!("{}:job:{}", self.namespace, job_id)).await
.map_err(|e| RunnerError::RedisError { source: e })?;
if job_data.is_empty() {
return Err(RunnerError::JobNotFound { job_id: job_id.to_string() });
}
let status_str = "unknown".to_string();
let status = job_data.get("status").unwrap_or(&status_str);
match status.as_str() {
"completed" => {
return Ok(job_data.get("result").cloned());
},
"failed" | "timeout" => {
let default_error = "Job failed".to_string();
let error_msg = job_data.get("error").unwrap_or(&default_error).clone();
return Ok(Some(format!("Error: {}", error_msg)));
},
_ => {
// Job still running, wait and check again
tokio::time::sleep(tokio::time::Duration::from_secs(1)).await;
}
}
}
// Timeout reached
Ok(Some("Error: Timeout waiting for job result".to_string()))
}
/// Get runners count
pub fn runners_count(&self) -> usize {
self.runners.len()
@@ -702,9 +794,8 @@ impl Default for Supervisor {
}
mod tests {
#[allow(unused_imports)]
use super::*;
use std::path::PathBuf;
use sal_service_manager::SimpleProcessManager;
#[tokio::test]
async fn test_supervisor_creation() {

View File

@@ -0,0 +1,279 @@
//! Integration tests for the job API
//!
//! These tests validate the complete job lifecycle using a real supervisor instance.
//! They require Redis and a running supervisor to execute properly.
use hero_supervisor_openrpc_client::{SupervisorClient, JobBuilder, JobResult};
use std::time::Duration;
use tokio::time::sleep;
use uuid::Uuid;
/// Test helper to create a unique job for testing
fn create_test_job(context: &str) -> Result<hero_supervisor_openrpc_client::Job, Box<dyn std::error::Error>> {
JobBuilder::new()
.caller_id("integration_test")
.context_id(context)
.payload("echo 'Test job output'")
.executor("osis")
.runner("osis_runner_1")
.timeout(30)
.env_var("TEST_VAR", "test_value")
.build()
.map_err(|e| e.into())
}
/// Test helper to check if supervisor is available
async fn is_supervisor_available() -> bool {
match SupervisorClient::new("http://localhost:3030") {
Ok(client) => client.discover().await.is_ok(),
Err(_) => false,
}
}
#[tokio::test]
async fn test_jobs_create_and_start() {
if !is_supervisor_available().await {
println!("Skipping test - supervisor not available");
return;
}
let client = SupervisorClient::new("http://localhost:3030").unwrap();
let secret = "user-secret-456";
let job = create_test_job("create_and_start").unwrap();
// Test jobs.create
let job_id = client.jobs_create(secret, job).await.unwrap();
assert!(!job_id.is_empty());
// Test job.start
let result = client.job_start(secret, &job_id).await;
assert!(result.is_ok());
}
#[tokio::test]
async fn test_job_status_monitoring() {
if !is_supervisor_available().await {
println!("Skipping test - supervisor not available");
return;
}
let client = SupervisorClient::new("http://localhost:3030").unwrap();
let secret = "user-secret-456";
let job = create_test_job("status_monitoring").unwrap();
let job_id = client.jobs_create(secret, job).await.unwrap();
client.job_start(secret, &job_id).await.unwrap();
// Test job.status
let mut attempts = 0;
let max_attempts = 10;
while attempts < max_attempts {
let status = client.job_status(&job_id).await.unwrap();
assert!(!status.job_id.is_empty());
assert!(!status.status.is_empty());
assert!(!status.created_at.is_empty());
if status.status == "completed" || status.status == "failed" {
break;
}
attempts += 1;
sleep(Duration::from_secs(1)).await;
}
}
#[tokio::test]
async fn test_job_result_retrieval() {
if !is_supervisor_available().await {
println!("Skipping test - supervisor not available");
return;
}
let client = SupervisorClient::new("http://localhost:3030").unwrap();
let secret = "user-secret-456";
let job = create_test_job("result_retrieval").unwrap();
let job_id = client.jobs_create(secret, job).await.unwrap();
client.job_start(secret, &job_id).await.unwrap();
// Wait a bit for job to complete
sleep(Duration::from_secs(3)).await;
// Test job.result
let result = client.job_result(&job_id).await.unwrap();
match result {
JobResult::Success { success } => {
assert!(!success.is_empty());
},
JobResult::Error { error } => {
assert!(!error.is_empty());
}
}
}
#[tokio::test]
async fn test_job_run_immediate() {
if !is_supervisor_available().await {
println!("Skipping test - supervisor not available");
return;
}
let client = SupervisorClient::new("http://localhost:3030").unwrap();
let secret = "user-secret-456";
let job = create_test_job("immediate_run").unwrap();
// Test job.run (immediate execution)
let result = client.job_run(secret, job).await.unwrap();
match result {
JobResult::Success { success } => {
assert!(!success.is_empty());
},
JobResult::Error { error } => {
assert!(!error.is_empty());
}
}
}
#[tokio::test]
async fn test_jobs_list() {
if !is_supervisor_available().await {
println!("Skipping test - supervisor not available");
return;
}
let client = SupervisorClient::new("http://localhost:3030").unwrap();
// Test jobs.list
let job_ids = client.jobs_list().await.unwrap();
// Should return a vector (might be empty)
assert!(job_ids.len() >= 0);
}
#[tokio::test]
async fn test_authentication_failures() {
if !is_supervisor_available().await {
println!("Skipping test - supervisor not available");
return;
}
let client = SupervisorClient::new("http://localhost:3030").unwrap();
let invalid_secret = "invalid-secret-123";
let job = create_test_job("auth_failure").unwrap();
// Test that invalid secrets fail
let result = client.jobs_create(invalid_secret, job.clone()).await;
assert!(result.is_err());
let result = client.job_run(invalid_secret, job.clone()).await;
assert!(result.is_err());
let result = client.job_start(invalid_secret, "fake-job-id").await;
assert!(result.is_err());
}
#[tokio::test]
async fn test_nonexistent_job_operations() {
if !is_supervisor_available().await {
println!("Skipping test - supervisor not available");
return;
}
let client = SupervisorClient::new("http://localhost:3030").unwrap();
let fake_job_id = format!("nonexistent-{}", Uuid::new_v4());
// Test operations on nonexistent job should fail
let result = client.job_status(&fake_job_id).await;
assert!(result.is_err());
let result = client.job_result(&fake_job_id).await;
assert!(result.is_err());
}
#[tokio::test]
async fn test_complete_workflow() {
if !is_supervisor_available().await {
println!("Skipping test - supervisor not available");
return;
}
let client = SupervisorClient::new("http://localhost:3030").unwrap();
let secret = "user-secret-456";
let job = create_test_job("complete_workflow").unwrap();
// Complete workflow test
let job_id = client.jobs_create(secret, job).await.unwrap();
client.job_start(secret, &job_id).await.unwrap();
// Monitor until completion
let mut final_status = String::new();
for _ in 0..15 {
let status = client.job_status(&job_id).await.unwrap();
final_status = status.status.clone();
if final_status == "completed" || final_status == "failed" || final_status == "timeout" {
break;
}
sleep(Duration::from_secs(1)).await;
}
// Get final result
let result = client.job_result(&job_id).await.unwrap();
match result {
JobResult::Success { .. } => {
assert_eq!(final_status, "completed");
},
JobResult::Error { .. } => {
assert!(final_status == "failed" || final_status == "timeout");
}
}
}
#[tokio::test]
async fn test_batch_job_processing() {
if !is_supervisor_available().await {
println!("Skipping test - supervisor not available");
return;
}
let client = SupervisorClient::new("http://localhost:3030").unwrap();
let secret = "user-secret-456";
let job_count = 3;
let mut job_ids = Vec::new();
// Create multiple jobs
for i in 0..job_count {
let job = JobBuilder::new()
.caller_id("integration_test")
.context_id(&format!("batch_job_{}", i))
.payload(&format!("echo 'Batch job {}'", i))
.executor("osis")
.runner("osis_runner_1")
.timeout(30)
.build()
.unwrap();
let job_id = client.jobs_create(secret, job).await.unwrap();
job_ids.push(job_id);
}
// Start all jobs
for job_id in &job_ids {
client.job_start(secret, job_id).await.unwrap();
}
// Wait for all jobs to complete
sleep(Duration::from_secs(5)).await;
// Collect all results
let mut results = Vec::new();
for job_id in &job_ids {
let result = client.job_result(job_id).await.unwrap();
results.push(result);
}
// Verify we got results for all jobs
assert_eq!(results.len(), job_count);
}