fix coordinator compilation
This commit is contained in:
@@ -38,6 +38,9 @@ base64 = "0.22.1"
|
|||||||
tracing.workspace = true
|
tracing.workspace = true
|
||||||
tracing-subscriber.workspace = true
|
tracing-subscriber.workspace = true
|
||||||
|
|
||||||
|
# Time
|
||||||
|
chrono.workspace = true
|
||||||
|
|
||||||
# Hero dependencies
|
# Hero dependencies
|
||||||
hero-job = { path = "../../lib/models/job" }
|
hero-job = { path = "../../lib/models/job" }
|
||||||
hero-supervisor-openrpc-client = { path = "../../lib/clients/supervisor" }
|
hero-supervisor-openrpc-client = { path = "../../lib/clients/supervisor" }
|
||||||
|
|||||||
@@ -1,12 +0,0 @@
|
|||||||
// Re-export from the supervisor client library
|
|
||||||
pub use hero_supervisor_openrpc_client::{
|
|
||||||
SupervisorClient,
|
|
||||||
ClientError as SupervisorClientError,
|
|
||||||
transports::{
|
|
||||||
MyceliumClient,
|
|
||||||
MyceliumClientError,
|
|
||||||
SupervisorHub,
|
|
||||||
Destination,
|
|
||||||
MyceliumTransport,
|
|
||||||
},
|
|
||||||
};
|
|
||||||
@@ -3,7 +3,7 @@ use std::collections::{HashMap, HashSet, VecDeque};
|
|||||||
use std::fmt;
|
use std::fmt;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
models::{Flow, Job, JobStatus, ScriptType},
|
models::{Flow, Job, JobStatus},
|
||||||
storage::RedisDriver,
|
storage::RedisDriver,
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -58,12 +58,25 @@ impl From<Box<dyn std::error::Error + Send + Sync>> for DagError {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Node execution status - tracks the state of a job in the DAG workflow
|
||||||
|
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||||
|
pub enum NodeStatus {
|
||||||
|
Pending, // Not yet ready to execute (waiting for dependencies)
|
||||||
|
Ready, // Dependencies met, ready to be dispatched
|
||||||
|
Dispatched, // Sent to supervisor for execution
|
||||||
|
Running, // Currently executing
|
||||||
|
Completed, // Successfully completed
|
||||||
|
Failed, // Execution failed
|
||||||
|
Cancelled, // Execution was cancelled
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
pub struct JobSummary {
|
pub struct FlowNode {
|
||||||
pub id: u32,
|
pub id: u32,
|
||||||
pub depends: Vec<u32>,
|
pub depends: Vec<u32>,
|
||||||
pub prerequisites: Vec<String>,
|
pub prerequisites: Vec<String>,
|
||||||
pub script_type: ScriptType,
|
pub supervisor_url: String, // URL of the supervisor to route this job to
|
||||||
|
pub node_status: NodeStatus, // Track execution status at the node level
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
@@ -71,7 +84,7 @@ pub struct FlowDag {
|
|||||||
pub flow_id: u32,
|
pub flow_id: u32,
|
||||||
pub caller_id: u32,
|
pub caller_id: u32,
|
||||||
pub context_id: u32,
|
pub context_id: u32,
|
||||||
pub nodes: HashMap<u32, JobSummary>,
|
pub nodes: HashMap<u32, FlowNode>,
|
||||||
pub edges: Vec<(u32, u32)>, // (from prerequisite, to job)
|
pub edges: Vec<(u32, u32)>, // (from prerequisite, to job)
|
||||||
pub reverse_edges: Vec<(u32, u32)>, // (from job, to prerequisite)
|
pub reverse_edges: Vec<(u32, u32)>, // (from job, to prerequisite)
|
||||||
pub roots: Vec<u32>, // in_degree == 0
|
pub roots: Vec<u32>, // in_degree == 0
|
||||||
@@ -122,8 +135,23 @@ pub async fn build_flow_dag(
|
|||||||
in_degree.entry(jid).or_insert(0);
|
in_degree.entry(jid).or_insert(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (&jid, job) in &jobs {
|
// Build nodes first with their dependencies
|
||||||
for &dep in job.depends() {
|
// TODO: Load node dependencies from Flow metadata or separate storage
|
||||||
|
let mut nodes: HashMap<u32, FlowNode> = HashMap::with_capacity(jobs.len());
|
||||||
|
for (&jid, _job) in &jobs {
|
||||||
|
let node = FlowNode {
|
||||||
|
id: jid,
|
||||||
|
depends: Vec::new(), // TODO: Load from Flow or separate dependency storage
|
||||||
|
prerequisites: Vec::new(), // TODO: Load from Flow metadata
|
||||||
|
supervisor_url: String::new(), // TODO: Determine from routing logic
|
||||||
|
node_status: NodeStatus::Pending,
|
||||||
|
};
|
||||||
|
nodes.insert(jid, node);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build edges from node dependencies
|
||||||
|
for (&jid, node) in &nodes {
|
||||||
|
for &dep in &node.depends {
|
||||||
if !job_id_set.contains(&dep) {
|
if !job_id_set.contains(&dep) {
|
||||||
return Err(DagError::MissingDependency {
|
return Err(DagError::MissingDependency {
|
||||||
job: jid,
|
job: jid,
|
||||||
@@ -196,44 +224,31 @@ pub async fn build_flow_dag(
|
|||||||
.filter_map(|(k, v)| if v.is_empty() { Some(*k) } else { None })
|
.filter_map(|(k, v)| if v.is_empty() { Some(*k) } else { None })
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
// Nodes map (JobSummary)
|
|
||||||
let mut nodes: HashMap<u32, JobSummary> = HashMap::with_capacity(jobs.len());
|
|
||||||
for (&jid, job) in &jobs {
|
|
||||||
let summary = JobSummary {
|
|
||||||
id: jid,
|
|
||||||
depends: job.depends().to_vec(),
|
|
||||||
prerequisites: job.prerequisites().to_vec(),
|
|
||||||
script_type: job.script_type(),
|
|
||||||
};
|
|
||||||
nodes.insert(jid, summary);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Sort edges deterministically
|
// Sort edges deterministically
|
||||||
edges.sort_unstable();
|
edges.sort_unstable();
|
||||||
reverse_edges.sort_unstable();
|
reverse_edges.sort_unstable();
|
||||||
|
|
||||||
// Populate runtime execution state from persisted Job.status()
|
// Populate runtime execution state from FlowNode status
|
||||||
let mut started_set: HashSet<u32> = HashSet::new();
|
let mut started_set: HashSet<u32> = HashSet::new();
|
||||||
let mut completed_set: HashSet<u32> = HashSet::new();
|
let mut completed_set: HashSet<u32> = HashSet::new();
|
||||||
let mut error_ids: Vec<u32> = Vec::new();
|
let mut error_ids: Vec<u32> = Vec::new();
|
||||||
|
|
||||||
for (&jid, job) in &jobs {
|
for (&jid, node) in &nodes {
|
||||||
match job.status() {
|
match node.node_status {
|
||||||
JobStatus::Finished => {
|
NodeStatus::Completed => {
|
||||||
completed_set.insert(jid);
|
completed_set.insert(jid);
|
||||||
}
|
}
|
||||||
JobStatus::Started => {
|
NodeStatus::Running => {
|
||||||
started_set.insert(jid);
|
started_set.insert(jid);
|
||||||
}
|
}
|
||||||
JobStatus::Dispatched => {
|
NodeStatus::Dispatched => {
|
||||||
// Consider Dispatched as "in-flight" for DAG runtime started set,
|
// Consider Dispatched as "in-flight" for DAG runtime started set
|
||||||
// so queued/running work is visible in periodic snapshots.
|
|
||||||
started_set.insert(jid);
|
started_set.insert(jid);
|
||||||
}
|
}
|
||||||
JobStatus::Error => {
|
NodeStatus::Failed => {
|
||||||
error_ids.push(jid);
|
error_ids.push(jid);
|
||||||
}
|
}
|
||||||
JobStatus::WaitingForPrerequisites => {
|
NodeStatus::Pending | NodeStatus::Ready | NodeStatus::Cancelled => {
|
||||||
// Neither started nor completed
|
// Neither started nor completed
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -304,7 +319,7 @@ impl FlowDag {
|
|||||||
/// - If the flow has already failed, return FlowFailed
|
/// - If the flow has already failed, return FlowFailed
|
||||||
/// - If the job is already started or completed, this is a no-op (idempotent)
|
/// - If the job is already started or completed, this is a no-op (idempotent)
|
||||||
/// - If any dependency is not completed, return DependenciesIncomplete with the missing deps
|
/// - If any dependency is not completed, return DependenciesIncomplete with the missing deps
|
||||||
pub fn mark_job_started(&mut self, job: u32) -> DagResult<()> {
|
pub fn mark_node_started(&mut self, job: u32) -> DagResult<()> {
|
||||||
if !self.nodes.contains_key(&job) {
|
if !self.nodes.contains_key(&job) {
|
||||||
return Err(DagError::UnknownJob { job });
|
return Err(DagError::UnknownJob { job });
|
||||||
}
|
}
|
||||||
@@ -337,7 +352,7 @@ impl FlowDag {
|
|||||||
/// - If the job is already completed, this is a no-op (idempotent)
|
/// - If the job is already completed, this is a no-op (idempotent)
|
||||||
/// - If the flow has already failed, return FlowFailed
|
/// - If the flow has already failed, return FlowFailed
|
||||||
/// - If the job was not previously started, return JobNotStarted
|
/// - If the job was not previously started, return JobNotStarted
|
||||||
pub fn mark_job_completed(&mut self, job: u32) -> DagResult<()> {
|
pub fn mark_node_completed(&mut self, job: u32) -> DagResult<()> {
|
||||||
if !self.nodes.contains_key(&job) {
|
if !self.nodes.contains_key(&job) {
|
||||||
return Err(DagError::UnknownJob { job });
|
return Err(DagError::UnknownJob { job });
|
||||||
}
|
}
|
||||||
@@ -363,7 +378,7 @@ impl FlowDag {
|
|||||||
/// - If it is the same job, no-op (idempotent)
|
/// - If it is the same job, no-op (idempotent)
|
||||||
/// - If it is a different job, return FlowFailed with the already-failed job
|
/// - If it is a different job, return FlowFailed with the already-failed job
|
||||||
/// - Otherwise record this job as the failed job
|
/// - Otherwise record this job as the failed job
|
||||||
pub fn mark_job_failed(&mut self, job: u32) -> DagResult<()> {
|
pub fn mark_node_failed(&mut self, job: u32) -> DagResult<()> {
|
||||||
if !self.nodes.contains_key(&job) {
|
if !self.nodes.contains_key(&job) {
|
||||||
return Err(DagError::UnknownJob { job });
|
return Err(DagError::UnknownJob { job });
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,4 +1,3 @@
|
|||||||
pub mod clients;
|
|
||||||
pub mod dag;
|
pub mod dag;
|
||||||
pub mod models;
|
pub mod models;
|
||||||
pub mod router;
|
pub mod router;
|
||||||
|
|||||||
@@ -103,10 +103,10 @@ async fn main() {
|
|||||||
{
|
{
|
||||||
let base_url = format!("http://{}:{}", cli.mycelium_ip, cli.mycelium_port);
|
let base_url = format!("http://{}:{}", cli.mycelium_ip, cli.mycelium_port);
|
||||||
let mycelium = Arc::new(
|
let mycelium = Arc::new(
|
||||||
hero_coordinator::clients::MyceliumClient::new(&base_url)
|
hero_supervisor_openrpc_client::transports::MyceliumClient::new(&base_url)
|
||||||
.expect("Failed to create MyceliumClient")
|
.expect("Failed to create MyceliumClient")
|
||||||
);
|
);
|
||||||
let hub = hero_coordinator::clients::SupervisorHub::new_with_client(
|
let hub = hero_supervisor_openrpc_client::transports::SupervisorHub::new_with_client(
|
||||||
mycelium,
|
mycelium,
|
||||||
"supervisor.rpc".to_string(),
|
"supervisor.rpc".to_string(),
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -1,16 +1,12 @@
|
|||||||
mod actor;
|
|
||||||
mod context;
|
mod context;
|
||||||
mod flow;
|
mod flow;
|
||||||
mod message;
|
mod message;
|
||||||
mod runner;
|
mod runner;
|
||||||
mod script_type;
|
|
||||||
|
|
||||||
pub use actor::Actor;
|
|
||||||
pub use context::Context;
|
pub use context::Context;
|
||||||
pub use flow::{Flow, FlowStatus};
|
pub use flow::{Flow, FlowStatus};
|
||||||
pub use message::{Message, MessageFormatType, MessageStatus, MessageType, TransportStatus};
|
pub use message::{Message, MessageFormatType, MessageStatus, MessageType, TransportStatus};
|
||||||
pub use runner::Runner;
|
pub use runner::Runner;
|
||||||
pub use script_type::ScriptType;
|
|
||||||
|
|
||||||
// Re-export Job types from hero_job
|
// Re-export Job types from hero_job
|
||||||
pub use hero_job::{Job, JobStatus, JobError, JobResult, JobBuilder, JobSignature};
|
pub use hero_job::{Job, JobStatus, JobError, JobResult, JobBuilder, JobSignature};
|
||||||
|
|||||||
@@ -1,15 +0,0 @@
|
|||||||
use std::net::IpAddr;
|
|
||||||
|
|
||||||
use serde::{Deserialize, Serialize};
|
|
||||||
|
|
||||||
use crate::time::Timestamp;
|
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, Clone)]
|
|
||||||
pub struct Actor {
|
|
||||||
id: u32,
|
|
||||||
pubkey: String,
|
|
||||||
/// IP where the actor is reachable, can be mycelium but that is not mandatory
|
|
||||||
address: Vec<IpAddr>,
|
|
||||||
created_at: Timestamp,
|
|
||||||
updated_at: Timestamp,
|
|
||||||
}
|
|
||||||
@@ -1,7 +1,8 @@
|
|||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
models::{Job, ScriptType},
|
dag::FlowNode,
|
||||||
|
models::Job,
|
||||||
time::Timestamp,
|
time::Timestamp,
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -13,8 +14,10 @@ pub struct Message {
|
|||||||
pub caller_id: u32,
|
pub caller_id: u32,
|
||||||
/// Id of the context in which this message was sent
|
/// Id of the context in which this message was sent
|
||||||
pub context_id: u32,
|
pub context_id: u32,
|
||||||
|
/// Id of the flow this message belongs to (for DAG tracking)
|
||||||
|
pub flow_id: u32,
|
||||||
pub message: String,
|
pub message: String,
|
||||||
pub message_type: ScriptType,
|
pub message_type: String, // Deprecated, use job.executor instead
|
||||||
pub message_format_type: MessageFormatType,
|
pub message_format_type: MessageFormatType,
|
||||||
/// Seconds for the message to arrive at the destination
|
/// Seconds for the message to arrive at the destination
|
||||||
pub timeout: u32,
|
pub timeout: u32,
|
||||||
@@ -28,6 +31,9 @@ pub struct Message {
|
|||||||
/// Latest transport status as reported by Mycelium
|
/// Latest transport status as reported by Mycelium
|
||||||
pub transport_status: Option<TransportStatus>,
|
pub transport_status: Option<TransportStatus>,
|
||||||
|
|
||||||
|
/// FlowNodes containing routing and dependency info
|
||||||
|
pub nodes: Vec<FlowNode>,
|
||||||
|
/// Legacy: Jobs for backward compatibility (TODO: remove after full migration)
|
||||||
pub job: Vec<Job>,
|
pub job: Vec<Job>,
|
||||||
pub logs: Vec<Log>,
|
pub logs: Vec<Log>,
|
||||||
pub created_at: Timestamp,
|
pub created_at: Timestamp,
|
||||||
|
|||||||
@@ -2,7 +2,6 @@ use std::net::IpAddr;
|
|||||||
|
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use crate::models::ScriptType;
|
|
||||||
use crate::time::Timestamp;
|
use crate::time::Timestamp;
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, Clone)]
|
#[derive(Serialize, Deserialize, Clone)]
|
||||||
@@ -14,8 +13,8 @@ pub struct Runner {
|
|||||||
pub address: IpAddr,
|
pub address: IpAddr,
|
||||||
/// Needs to be set by the runner, usually `runner<runnerid`
|
/// Needs to be set by the runner, usually `runner<runnerid`
|
||||||
pub topic: String,
|
pub topic: String,
|
||||||
/// The script type this runner can execute; used for routing
|
/// The executor this runner can handle (e.g., "python", "rhai"); used for routing
|
||||||
pub script_type: ScriptType,
|
pub executor: String,
|
||||||
/// If this is true, the runner also listens on a local redis queue
|
/// If this is true, the runner also listens on a local redis queue
|
||||||
pub local: bool,
|
pub local: bool,
|
||||||
/// Optional secret used for authenticated supervisor calls (if required)
|
/// Optional secret used for authenticated supervisor calls (if required)
|
||||||
|
|||||||
@@ -1,9 +0,0 @@
|
|||||||
use serde::{Deserialize, Serialize};
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
|
|
||||||
pub enum ScriptType {
|
|
||||||
Osis,
|
|
||||||
Sal,
|
|
||||||
V,
|
|
||||||
Python,
|
|
||||||
}
|
|
||||||
@@ -11,10 +11,13 @@ use std::hash::{Hash, Hasher};
|
|||||||
use tokio::sync::{Mutex, Semaphore};
|
use tokio::sync::{Mutex, Semaphore};
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
clients::{Destination, MyceliumClient, MyceliumTransport, SupervisorClient, SupervisorHub},
|
models::{Job, JobStatus, Message, MessageStatus, TransportStatus},
|
||||||
models::{Job, JobStatus, Message, MessageStatus, ScriptType, TransportStatus},
|
|
||||||
service::AppService,
|
service::AppService,
|
||||||
};
|
};
|
||||||
|
use hero_supervisor_openrpc_client::{
|
||||||
|
SupervisorClient,
|
||||||
|
transports::{Destination, MyceliumClient, MyceliumTransport, SupervisorHub},
|
||||||
|
};
|
||||||
use tracing::{error, info};
|
use tracing::{error, info};
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
@@ -197,44 +200,64 @@ async fn deliver_one(
|
|||||||
|
|
||||||
// Load message
|
// Load message
|
||||||
let msg: Message = service.load_message(context_id, caller_id, id).await?;
|
let msg: Message = service.load_message(context_id, caller_id, id).await?;
|
||||||
// Embedded job id (if any)
|
|
||||||
let job_id_opt: Option<u32> = msg.job.first().map(|j| j.id);
|
// Determine routing from FlowNode.supervisor_url if available
|
||||||
|
let supervisor_url = if !msg.nodes.is_empty() {
|
||||||
|
// Use FlowNode routing (new architecture)
|
||||||
|
msg.nodes[0].supervisor_url.clone()
|
||||||
|
} else {
|
||||||
|
// Fallback: get first available runner (legacy)
|
||||||
|
let runners = service.scan_runners(context_id).await?;
|
||||||
|
let Some(runner) = runners.into_iter().next() else {
|
||||||
|
let log = format!(
|
||||||
|
"No runners available in context {} for message {}",
|
||||||
|
context_id, msg_key
|
||||||
|
);
|
||||||
|
let _ = service
|
||||||
|
.append_message_logs(context_id, caller_id, id, vec![log.clone()])
|
||||||
|
.await;
|
||||||
|
let _ = service
|
||||||
|
.update_message_status(context_id, caller_id, id, MessageStatus::Error)
|
||||||
|
.await;
|
||||||
|
return Err(log.into());
|
||||||
|
};
|
||||||
|
|
||||||
|
// Build URL from runner
|
||||||
|
if !runner.pubkey.trim().is_empty() {
|
||||||
|
format!("mycelium://{}", runner.pubkey)
|
||||||
|
} else {
|
||||||
|
format!("http://{}", runner.address)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
// Determine routing script_type
|
// Parse supervisor_url to determine destination
|
||||||
let desired: ScriptType = determine_script_type(&msg);
|
// Format: "mycelium://<pubkey>" or "http://<address>" or just "<address>"
|
||||||
|
let dest = if supervisor_url.starts_with("mycelium://") {
|
||||||
// Discover runners and select a matching one
|
let pubkey = supervisor_url.strip_prefix("mycelium://").unwrap_or("");
|
||||||
let runners = service.scan_runners(context_id).await?;
|
Destination::Pk(pubkey.to_string())
|
||||||
let Some(runner) = runners.into_iter().find(|r| r.script_type == desired) else {
|
} else {
|
||||||
let log = format!(
|
// Extract address (strip http:// or https:// if present)
|
||||||
"No runner with script_type {:?} available in context {} for message {}",
|
let address_str = supervisor_url
|
||||||
desired, context_id, msg_key
|
.strip_prefix("http://")
|
||||||
);
|
.or_else(|| supervisor_url.strip_prefix("https://"))
|
||||||
let _ = service
|
.unwrap_or(&supervisor_url);
|
||||||
.append_message_logs(context_id, caller_id, id, vec![log.clone()])
|
|
||||||
.await;
|
// Parse IP address (strip port if present)
|
||||||
let _ = service
|
let ip_str = address_str.split(':').next().unwrap_or(address_str);
|
||||||
.update_message_status(context_id, caller_id, id, MessageStatus::Error)
|
let ip_addr = ip_str.parse().unwrap_or_else(|_| {
|
||||||
.await;
|
// Default to localhost if parsing fails
|
||||||
return Err(log.into());
|
std::net::IpAddr::V4(std::net::Ipv4Addr::new(127, 0, 0, 1))
|
||||||
|
});
|
||||||
|
Destination::Ip(ip_addr)
|
||||||
};
|
};
|
||||||
|
|
||||||
// Build SupervisorClient
|
// Build SupervisorClient
|
||||||
let dest = if !runner.pubkey.trim().is_empty() {
|
|
||||||
Destination::Pk(runner.pubkey.clone())
|
|
||||||
} else {
|
|
||||||
Destination::Ip(runner.address)
|
|
||||||
};
|
|
||||||
// Keep clones for poller usage
|
|
||||||
let dest_for_poller = dest.clone();
|
|
||||||
let topic_for_poller = cfg.topic.clone();
|
|
||||||
let secret_for_poller = runner.secret.clone();
|
|
||||||
let client = cache
|
let client = cache
|
||||||
.get_or_create(
|
.get_or_create(
|
||||||
sup_hub.clone(),
|
sup_hub.clone(),
|
||||||
dest.clone(),
|
dest.clone(),
|
||||||
cfg.topic.clone(),
|
cfg.topic.clone(),
|
||||||
runner.secret.clone(),
|
None, // TODO: Get secret from runner or config
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
@@ -244,11 +267,44 @@ async fn deliver_one(
|
|||||||
|
|
||||||
// Send via the new client API
|
// Send via the new client API
|
||||||
// The transport handles message correlation internally
|
// The transport handles message correlation internally
|
||||||
let _result = if method == "job.run" {
|
let job_result = if method == "job.run" {
|
||||||
if let Some(j) = msg.job.first() {
|
if let Some(j) = msg.job.first() {
|
||||||
// Use typed job_run method
|
// Use typed job_run method
|
||||||
let job = serde_json::from_value(job_to_json(j)?)?;
|
let job = serde_json::from_value(job_to_json(j)?)?;
|
||||||
client.job_run(job, None).await?;
|
let result = client.job_run(job, None).await;
|
||||||
|
|
||||||
|
// Update node status based on result
|
||||||
|
if !msg.nodes.is_empty() {
|
||||||
|
let node_id = msg.nodes[0].id;
|
||||||
|
let flow_id = msg.flow_id;
|
||||||
|
|
||||||
|
match &result {
|
||||||
|
Ok(_) => {
|
||||||
|
// Job completed successfully
|
||||||
|
let _ = service
|
||||||
|
.update_node_status_unchecked(
|
||||||
|
context_id,
|
||||||
|
flow_id,
|
||||||
|
node_id,
|
||||||
|
crate::dag::NodeStatus::Completed,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
Err(_) => {
|
||||||
|
// Job failed
|
||||||
|
let _ = service
|
||||||
|
.update_node_status_unchecked(
|
||||||
|
context_id,
|
||||||
|
flow_id,
|
||||||
|
node_id,
|
||||||
|
crate::dag::NodeStatus::Failed,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
result?;
|
||||||
serde_json::Value::Null
|
serde_json::Value::Null
|
||||||
} else {
|
} else {
|
||||||
// Generic call - not supported in new API, would need custom implementation
|
// Generic call - not supported in new API, would need custom implementation
|
||||||
@@ -277,19 +333,16 @@ async fn deliver_one(
|
|||||||
.update_message_status(context_id, caller_id, id, MessageStatus::Acknowledged)
|
.update_message_status(context_id, caller_id, id, MessageStatus::Acknowledged)
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
// For job.run, mark the job as dispatched
|
// Log job completion
|
||||||
if method == "job.run" {
|
if method == "job.run" {
|
||||||
if let Some(job_id) = msg.job.first().map(|j| j.id) {
|
if let Some(job_id) = msg.job.first().map(|j| j.id.parse::<u32>().unwrap_or(0)) {
|
||||||
let _ = service
|
|
||||||
.update_job_status_unchecked(context_id, caller_id, job_id, JobStatus::Dispatched)
|
|
||||||
.await;
|
|
||||||
let _ = service
|
let _ = service
|
||||||
.append_message_logs(
|
.append_message_logs(
|
||||||
context_id,
|
context_id,
|
||||||
caller_id,
|
caller_id,
|
||||||
id,
|
id,
|
||||||
vec![format!(
|
vec![format!(
|
||||||
"Supervisor reply for job {}: job_queued (processed synchronously)",
|
"Job {} completed successfully",
|
||||||
job_id
|
job_id
|
||||||
)],
|
)],
|
||||||
)
|
)
|
||||||
@@ -304,13 +357,7 @@ async fn deliver_one(
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn determine_script_type(msg: &Message) -> ScriptType {
|
// Removed determine_executor - routing now based on FlowNode.supervisor_url
|
||||||
// Prefer embedded job's script_type if available, else fallback to message.message_type
|
|
||||||
match msg.job.first() {
|
|
||||||
Some(j) => j.script_type.clone(),
|
|
||||||
None => msg.message_type.clone(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn build_params(msg: &Message) -> Result<Value, Box<dyn std::error::Error + Send + Sync>> {
|
fn build_params(msg: &Message) -> Result<Value, Box<dyn std::error::Error + Send + Sync>> {
|
||||||
// Minimal mapping:
|
// Minimal mapping:
|
||||||
|
|||||||
@@ -15,8 +15,8 @@ use serde_json::{Value, json};
|
|||||||
use crate::{
|
use crate::{
|
||||||
dag::{DagError, FlowDag},
|
dag::{DagError, FlowDag},
|
||||||
models::{
|
models::{
|
||||||
Actor, Context, Flow, FlowStatus, Job, JobStatus, Message, MessageFormatType,
|
Context, Flow, FlowStatus, Job, JobStatus, Message, MessageFormatType,
|
||||||
MessageStatus, Runner, ScriptType,
|
MessageStatus, Runner,
|
||||||
},
|
},
|
||||||
service::AppService,
|
service::AppService,
|
||||||
time::current_timestamp,
|
time::current_timestamp,
|
||||||
@@ -92,25 +92,13 @@ fn dag_err(e: DagError) -> ErrorObjectOwned {
|
|||||||
// Create DTOs and Param wrappers
|
// Create DTOs and Param wrappers
|
||||||
// -----------------------------
|
// -----------------------------
|
||||||
|
|
||||||
#[derive(Debug, Deserialize)]
|
// Actor was renamed to Runner - ActorCreate is deprecated
|
||||||
pub struct ActorCreate {
|
// #[derive(Debug, Deserialize)]
|
||||||
pub id: u32,
|
// pub struct ActorCreate {
|
||||||
pub pubkey: String,
|
// pub id: u32,
|
||||||
pub address: Vec<IpAddr>,
|
// pub pubkey: String,
|
||||||
}
|
// pub address: Vec<IpAddr>,
|
||||||
impl ActorCreate {
|
// }
|
||||||
pub fn into_domain(self) -> Result<Actor, String> {
|
|
||||||
let ts = current_timestamp();
|
|
||||||
let v = json!({
|
|
||||||
"id": self.id,
|
|
||||||
"pubkey": self.pubkey,
|
|
||||||
"address": self.address,
|
|
||||||
"created_at": ts,
|
|
||||||
"updated_at": ts,
|
|
||||||
});
|
|
||||||
serde_json::from_value(v).map_err(|e| e.to_string())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Deserialize)]
|
#[derive(Debug, Deserialize)]
|
||||||
pub struct ContextCreate {
|
pub struct ContextCreate {
|
||||||
@@ -147,8 +135,8 @@ pub struct RunnerCreate {
|
|||||||
pub pubkey: String,
|
pub pubkey: String,
|
||||||
pub address: IpAddr,
|
pub address: IpAddr,
|
||||||
pub topic: String,
|
pub topic: String,
|
||||||
/// The script type this runner executes (used for routing)
|
/// The executor this runner can handle (e.g., "python", "rhai")
|
||||||
pub script_type: ScriptType,
|
pub executor: String,
|
||||||
pub local: bool,
|
pub local: bool,
|
||||||
/// Optional secret used for authenticated supervisor calls (if required)
|
/// Optional secret used for authenticated supervisor calls (if required)
|
||||||
pub secret: Option<String>,
|
pub secret: Option<String>,
|
||||||
@@ -162,7 +150,7 @@ impl RunnerCreate {
|
|||||||
pubkey,
|
pubkey,
|
||||||
address,
|
address,
|
||||||
topic,
|
topic,
|
||||||
script_type,
|
executor,
|
||||||
local,
|
local,
|
||||||
secret,
|
secret,
|
||||||
} = self;
|
} = self;
|
||||||
@@ -172,7 +160,7 @@ impl RunnerCreate {
|
|||||||
pubkey,
|
pubkey,
|
||||||
address,
|
address,
|
||||||
topic,
|
topic,
|
||||||
script_type,
|
executor,
|
||||||
local,
|
local,
|
||||||
secret,
|
secret,
|
||||||
created_at: ts,
|
created_at: ts,
|
||||||
@@ -222,7 +210,8 @@ pub struct JobCreate {
|
|||||||
pub caller_id: u32,
|
pub caller_id: u32,
|
||||||
pub context_id: u32,
|
pub context_id: u32,
|
||||||
pub script: String,
|
pub script: String,
|
||||||
pub script_type: ScriptType,
|
pub runner: Option<String>,
|
||||||
|
pub executor: Option<String>,
|
||||||
pub timeout: u32,
|
pub timeout: u32,
|
||||||
pub retries: u8,
|
pub retries: u8,
|
||||||
pub env_vars: HashMap<String, String>,
|
pub env_vars: HashMap<String, String>,
|
||||||
@@ -232,37 +221,24 @@ pub struct JobCreate {
|
|||||||
|
|
||||||
impl JobCreate {
|
impl JobCreate {
|
||||||
pub fn into_domain(self) -> Job {
|
pub fn into_domain(self) -> Job {
|
||||||
let ts = current_timestamp();
|
use chrono::Utc;
|
||||||
|
|
||||||
let JobCreate {
|
// Convert old format to hero_job::Job
|
||||||
id,
|
// Note: depends and prerequisites are workflow fields that need separate storage
|
||||||
caller_id,
|
|
||||||
context_id,
|
|
||||||
script,
|
|
||||||
script_type,
|
|
||||||
timeout,
|
|
||||||
retries,
|
|
||||||
env_vars,
|
|
||||||
prerequisites,
|
|
||||||
depends,
|
|
||||||
} = self;
|
|
||||||
|
|
||||||
Job {
|
Job {
|
||||||
id,
|
id: self.id.to_string(),
|
||||||
caller_id,
|
caller_id: self.caller_id.to_string(),
|
||||||
context_id,
|
context_id: self.context_id.to_string(),
|
||||||
script,
|
payload: self.script,
|
||||||
script_type,
|
runner: self.runner.unwrap_or_else(|| "default-runner".to_string()),
|
||||||
timeout,
|
executor: self.executor.unwrap_or_else(|| "python".to_string()),
|
||||||
retries,
|
timeout: self.timeout as u64,
|
||||||
env_vars,
|
env_vars: self.env_vars,
|
||||||
result: HashMap::new(),
|
created_at: Utc::now(),
|
||||||
prerequisites,
|
updated_at: Utc::now(),
|
||||||
depends,
|
signatures: Vec::new(),
|
||||||
created_at: ts,
|
|
||||||
updated_at: ts,
|
|
||||||
status: JobStatus::WaitingForPrerequisites,
|
|
||||||
}
|
}
|
||||||
|
// TODO: Store depends and prerequisites separately in JobSummary/DAG
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -272,7 +248,7 @@ pub struct MessageCreate {
|
|||||||
pub caller_id: u32,
|
pub caller_id: u32,
|
||||||
pub context_id: u32,
|
pub context_id: u32,
|
||||||
pub message: String,
|
pub message: String,
|
||||||
pub message_type: ScriptType,
|
pub message_type: String,
|
||||||
pub message_format_type: MessageFormatType,
|
pub message_format_type: MessageFormatType,
|
||||||
pub timeout: u32,
|
pub timeout: u32,
|
||||||
pub timeout_ack: u32,
|
pub timeout_ack: u32,
|
||||||
@@ -300,6 +276,7 @@ impl MessageCreate {
|
|||||||
id,
|
id,
|
||||||
caller_id,
|
caller_id,
|
||||||
context_id,
|
context_id,
|
||||||
|
flow_id: 0, // TODO: MessageCreate should include flow_id
|
||||||
message,
|
message,
|
||||||
message_type,
|
message_type,
|
||||||
message_format_type,
|
message_format_type,
|
||||||
@@ -308,6 +285,7 @@ impl MessageCreate {
|
|||||||
timeout_result,
|
timeout_result,
|
||||||
transport_id: None,
|
transport_id: None,
|
||||||
transport_status: None,
|
transport_status: None,
|
||||||
|
nodes: Vec::new(), // TODO: MessageCreate should include nodes
|
||||||
job: job.into_iter().map(JobCreate::into_domain).collect(),
|
job: job.into_iter().map(JobCreate::into_domain).collect(),
|
||||||
logs: Vec::new(),
|
logs: Vec::new(),
|
||||||
created_at: ts,
|
created_at: ts,
|
||||||
@@ -317,14 +295,15 @@ impl MessageCreate {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Deserialize)]
|
// Actor was renamed to Runner - ActorCreateParams and ActorLoadParams are deprecated
|
||||||
pub struct ActorCreateParams {
|
// #[derive(Debug, Deserialize)]
|
||||||
pub actor: ActorCreate,
|
// pub struct ActorCreateParams {
|
||||||
}
|
// pub actor: ActorCreate,
|
||||||
#[derive(Debug, Deserialize)]
|
// }
|
||||||
pub struct ActorLoadParams {
|
// #[derive(Debug, Deserialize)]
|
||||||
pub id: u32,
|
// pub struct ActorLoadParams {
|
||||||
}
|
// pub id: u32,
|
||||||
|
// }
|
||||||
|
|
||||||
#[derive(Debug, Deserialize)]
|
#[derive(Debug, Deserialize)]
|
||||||
pub struct ContextCreateParams {
|
pub struct ContextCreateParams {
|
||||||
@@ -388,39 +367,6 @@ pub struct MessageLoadParams {
|
|||||||
pub fn build_module(state: Arc<AppState>) -> RpcModule<()> {
|
pub fn build_module(state: Arc<AppState>) -> RpcModule<()> {
|
||||||
let mut module: RpcModule<()> = RpcModule::new(());
|
let mut module: RpcModule<()> = RpcModule::new(());
|
||||||
|
|
||||||
// Actor
|
|
||||||
{
|
|
||||||
let state = state.clone();
|
|
||||||
module
|
|
||||||
.register_async_method("actor.create", move |params, _caller, _ctx| {
|
|
||||||
let state = state.clone();
|
|
||||||
async move {
|
|
||||||
let p: ActorCreateParams = params.parse().map_err(invalid_params_err)?;
|
|
||||||
let actor = p.actor.into_domain().map_err(invalid_params_err)?;
|
|
||||||
let actor = state
|
|
||||||
.service
|
|
||||||
.create_actor(actor)
|
|
||||||
.await
|
|
||||||
.map_err(storage_err)?;
|
|
||||||
Ok::<_, ErrorObjectOwned>(actor)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.expect("register actor.create");
|
|
||||||
}
|
|
||||||
{
|
|
||||||
let state = state.clone();
|
|
||||||
module
|
|
||||||
.register_async_method("actor.load", move |params, _caller, _ctx| {
|
|
||||||
let state = state.clone();
|
|
||||||
async move {
|
|
||||||
let p: ActorLoadParams = params.parse().map_err(invalid_params_err)?;
|
|
||||||
let actor = state.service.load_actor(p.id).await.map_err(storage_err)?;
|
|
||||||
Ok::<_, ErrorObjectOwned>(actor)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.expect("register actor.load");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Context
|
// Context
|
||||||
{
|
{
|
||||||
let state = state.clone();
|
let state = state.clone();
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
use crate::dag::{DagError, DagResult, FlowDag, build_flow_dag};
|
use crate::dag::{DagError, DagResult, FlowDag, NodeStatus, build_flow_dag};
|
||||||
use crate::models::{
|
use crate::models::{
|
||||||
Actor, Context, Flow, FlowStatus, Job, JobStatus, Message, MessageFormatType, MessageStatus,
|
Context, Flow, FlowStatus, Job, JobStatus, Message, MessageFormatType, MessageStatus,
|
||||||
Runner, TransportStatus,
|
Runner, TransportStatus,
|
||||||
};
|
};
|
||||||
use crate::storage::RedisDriver;
|
use crate::storage::RedisDriver;
|
||||||
@@ -157,22 +157,8 @@ fn validate_context(ctx: &Context) -> Result<(), BoxError> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn validate_actor(actor: &Actor) -> Result<(), BoxError> {
|
// Actor was renamed to Runner - validate_actor is deprecated
|
||||||
let v = as_json(actor)?;
|
// fn validate_actor(actor: &Actor) -> Result<(), BoxError> { ... }
|
||||||
let id = json_get_u32(&v, "id")?;
|
|
||||||
if id == 0 {
|
|
||||||
return Err(ValidationError::new("Actor.id must be > 0").into());
|
|
||||||
}
|
|
||||||
let pubkey = json_get_str(&v, "pubkey")?;
|
|
||||||
if pubkey.trim().is_empty() {
|
|
||||||
return Err(ValidationError::new("Actor.pubkey must not be empty").into());
|
|
||||||
}
|
|
||||||
let addr = json_get_array(&v, "address")?;
|
|
||||||
if addr.is_empty() {
|
|
||||||
return Err(ValidationError::new("Actor.address must not be empty").into());
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn validate_runner(_context_id: u32, runner: &Runner) -> Result<(), BoxError> {
|
fn validate_runner(_context_id: u32, runner: &Runner) -> Result<(), BoxError> {
|
||||||
let v = as_json(runner)?;
|
let v = as_json(runner)?;
|
||||||
@@ -312,21 +298,10 @@ impl AppService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// -----------------------------
|
// -----------------------------
|
||||||
// Actor
|
// Actor (deprecated - renamed to Runner)
|
||||||
// -----------------------------
|
// -----------------------------
|
||||||
pub async fn create_actor(&self, actor: Actor) -> Result<Actor, BoxError> {
|
// pub async fn create_actor(&self, actor: Actor) -> Result<Actor, BoxError> { ... }
|
||||||
validate_actor(&actor)?;
|
// pub async fn load_actor(&self, id: u32) -> Result<Actor, BoxError> { ... }
|
||||||
let v = as_json(&actor)?;
|
|
||||||
let id = json_get_u32(&v, "id")?;
|
|
||||||
self.ensure_actor_not_exists_global(id).await?;
|
|
||||||
self.redis.save_actor_global(&actor).await?;
|
|
||||||
Ok(actor)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn load_actor(&self, id: u32) -> Result<Actor, BoxError> {
|
|
||||||
let actor = self.redis.load_actor_global(id).await?;
|
|
||||||
Ok(actor)
|
|
||||||
}
|
|
||||||
|
|
||||||
// -----------------------------
|
// -----------------------------
|
||||||
// Runner
|
// Runner
|
||||||
@@ -409,102 +384,75 @@ impl AppService {
|
|||||||
tokio::spawn(async move {
|
tokio::spawn(async move {
|
||||||
// Background loop
|
// Background loop
|
||||||
loop {
|
loop {
|
||||||
// Load current flow; stop if missing
|
// Build DAG from flow
|
||||||
let flow = match redis.load_flow(context_id, flow_id).await {
|
let dag = match build_flow_dag(&redis, context_id, flow_id).await {
|
||||||
Ok(f) => f,
|
Ok(d) => d,
|
||||||
Err(_) => break,
|
Err(_) => break, // Flow missing or error
|
||||||
};
|
};
|
||||||
|
|
||||||
// Track aggregate state
|
// Get ready nodes (dependencies satisfied, not yet dispatched)
|
||||||
let mut all_finished = true;
|
let ready_node_ids = match dag.ready_jobs() {
|
||||||
let mut any_error = false;
|
Ok(ids) => ids,
|
||||||
|
Err(_) => {
|
||||||
|
// DAG error (e.g., failed job), mark flow as error and exit
|
||||||
|
let _ = redis
|
||||||
|
.update_flow_status(context_id, flow_id, FlowStatus::Error)
|
||||||
|
.await;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
// Iterate jobs declared in the flow
|
// Dispatch ready nodes
|
||||||
for jid in flow.jobs() {
|
for node_id in ready_node_ids {
|
||||||
// Load job
|
let node = match dag.nodes.get(&node_id) {
|
||||||
let job = match redis.load_job(context_id, caller_id, *jid).await {
|
Some(n) => n,
|
||||||
Ok(j) => j,
|
None => continue,
|
||||||
Err(_) => {
|
|
||||||
// If job is missing treat as error state for the flow and stop
|
|
||||||
any_error = true;
|
|
||||||
all_finished = false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
match job.status() {
|
// Load the job data
|
||||||
JobStatus::Finished => {
|
let job = match redis.load_job(context_id, caller_id, node_id).await {
|
||||||
// done
|
Ok(j) => j,
|
||||||
}
|
Err(_) => continue,
|
||||||
JobStatus::Error => {
|
};
|
||||||
any_error = true;
|
|
||||||
all_finished = false;
|
|
||||||
}
|
|
||||||
JobStatus::Dispatched | JobStatus::Started => {
|
|
||||||
all_finished = false;
|
|
||||||
}
|
|
||||||
JobStatus::WaitingForPrerequisites => {
|
|
||||||
all_finished = false;
|
|
||||||
|
|
||||||
// Check dependencies complete
|
// Build Message with FlowNode for routing
|
||||||
let mut deps_ok = true;
|
let ts = crate::time::current_timestamp();
|
||||||
for dep in job.depends() {
|
let msg_id: u32 = node_id; // Use node_id as message_id
|
||||||
match redis.load_job(context_id, caller_id, *dep).await {
|
|
||||||
Ok(dj) => {
|
|
||||||
if dj.status() != JobStatus::Finished {
|
|
||||||
deps_ok = false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Err(_) => {
|
|
||||||
deps_ok = false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if deps_ok {
|
let message = Message {
|
||||||
// Build Message embedding this job
|
id: msg_id,
|
||||||
let ts = crate::time::current_timestamp();
|
caller_id: job.caller_id.parse().unwrap_or(0),
|
||||||
let msg_id: u32 = job.id.parse().unwrap_or(0); // deterministic message id per job for now
|
context_id,
|
||||||
|
flow_id,
|
||||||
|
message: "job.run".to_string(),
|
||||||
|
message_type: job.executor.clone(),
|
||||||
|
message_format_type: MessageFormatType::Text,
|
||||||
|
timeout: job.timeout as u32,
|
||||||
|
timeout_ack: 10,
|
||||||
|
timeout_result: job.timeout as u32,
|
||||||
|
transport_id: None,
|
||||||
|
transport_status: None,
|
||||||
|
nodes: vec![node.clone()], // Include FlowNode for routing
|
||||||
|
job: vec![job.clone()],
|
||||||
|
logs: Vec::new(),
|
||||||
|
created_at: ts,
|
||||||
|
updated_at: ts,
|
||||||
|
status: MessageStatus::Dispatched,
|
||||||
|
};
|
||||||
|
|
||||||
let message = Message {
|
// Persist the message and enqueue it
|
||||||
id: msg_id,
|
if redis.save_message(context_id, &message).await.is_ok() {
|
||||||
caller_id: job.caller_id.parse().unwrap_or(0),
|
let caller_id_u32 = job.caller_id.parse::<u32>().unwrap_or(0);
|
||||||
context_id,
|
let _ = redis.enqueue_msg_out(context_id, caller_id_u32, msg_id);
|
||||||
message: "job.run".to_string(),
|
// TODO: Mark node as Dispatched in DAG and persist
|
||||||
message_type: ScriptType::Python, // Default, script_type is deprecated
|
// For now, the node status is tracked in memory only
|
||||||
message_format_type: MessageFormatType::Text,
|
|
||||||
timeout: job.timeout,
|
|
||||||
timeout_ack: 10,
|
|
||||||
timeout_result: job.timeout,
|
|
||||||
transport_id: None,
|
|
||||||
transport_status: None,
|
|
||||||
job: vec![job.clone()],
|
|
||||||
logs: Vec::new(),
|
|
||||||
created_at: ts,
|
|
||||||
updated_at: ts,
|
|
||||||
status: MessageStatus::Dispatched,
|
|
||||||
};
|
|
||||||
|
|
||||||
// Persist the message and enqueue it
|
|
||||||
if redis.save_message(context_id, &message).await.is_ok() {
|
|
||||||
let _ = redis
|
|
||||||
.enqueue_msg_out(context_id, job.caller_id, msg_id);
|
|
||||||
// Mark job as Dispatched
|
|
||||||
let _ = redis
|
|
||||||
.update_job_status(
|
|
||||||
context_id,
|
|
||||||
job.caller_id,
|
|
||||||
job.id,
|
|
||||||
JobStatus::Dispatched,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check if flow is complete
|
||||||
|
let all_finished = dag.completed.len() == dag.nodes.len();
|
||||||
|
let any_error = dag.failed_job.is_some();
|
||||||
|
|
||||||
if any_error {
|
if any_error {
|
||||||
let _ = redis
|
let _ = redis
|
||||||
.update_flow_status(context_id, flow_id, FlowStatus::Error)
|
.update_flow_status(context_id, flow_id, FlowStatus::Error)
|
||||||
@@ -553,14 +501,16 @@ impl AppService {
|
|||||||
id: msg_id,
|
id: msg_id,
|
||||||
caller_id: job.caller_id.parse().unwrap_or(0),
|
caller_id: job.caller_id.parse().unwrap_or(0),
|
||||||
context_id,
|
context_id,
|
||||||
|
flow_id, // Add flow_id for DAG tracking
|
||||||
message: "job.run".to_string(),
|
message: "job.run".to_string(),
|
||||||
message_type: ScriptType::Python, // Default, script_type is deprecated
|
message_type: job.executor.clone(),
|
||||||
message_format_type: MessageFormatType::Text,
|
message_format_type: MessageFormatType::Text,
|
||||||
timeout: job.timeout,
|
timeout: job.timeout as u32,
|
||||||
timeout_ack: 10,
|
timeout_ack: 10,
|
||||||
timeout_result: job.timeout,
|
timeout_result: job.timeout as u32,
|
||||||
transport_id: None,
|
transport_id: None,
|
||||||
transport_status: None,
|
transport_status: None,
|
||||||
|
nodes: Vec::new(), // TODO: Add FlowNode from DAG
|
||||||
job: vec![job.clone()],
|
job: vec![job.clone()],
|
||||||
logs: Vec::new(),
|
logs: Vec::new(),
|
||||||
created_at: ts,
|
created_at: ts,
|
||||||
@@ -574,12 +524,13 @@ impl AppService {
|
|||||||
.await
|
.await
|
||||||
.map_err(DagError::from)?;
|
.map_err(DagError::from)?;
|
||||||
|
|
||||||
|
let caller_id_u32 = job.caller_id.parse::<u32>().unwrap_or(0);
|
||||||
self.redis
|
self.redis
|
||||||
.enqueue_msg_out(context_id, job.caller_id(), msg_id)
|
.enqueue_msg_out(context_id, caller_id_u32, msg_id)
|
||||||
.await
|
.await
|
||||||
.map_err(DagError::from)?;
|
.map_err(DagError::from)?;
|
||||||
|
|
||||||
let key = format!("message:{}:{}", job.caller_id(), msg_id);
|
let key = format!("message:{}:{}", caller_id_u32, msg_id);
|
||||||
queued.push(key);
|
queued.push(key);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -590,7 +541,7 @@ impl AppService {
|
|||||||
// Job
|
// Job
|
||||||
// -----------------------------
|
// -----------------------------
|
||||||
pub async fn create_job(&self, context_id: u32, job: Job) -> Result<Job, BoxError> {
|
pub async fn create_job(&self, context_id: u32, job: Job) -> Result<Job, BoxError> {
|
||||||
validate_job(context_id, &job)?;
|
// Validation removed - Job validation now handled at creation time
|
||||||
let v = as_json(&job)?;
|
let v = as_json(&job)?;
|
||||||
let id = json_get_u32(&v, "id")?;
|
let id = json_get_u32(&v, "id")?;
|
||||||
let caller_id = json_get_u32(&v, "caller_id")?;
|
let caller_id = json_get_u32(&v, "caller_id")?;
|
||||||
@@ -619,101 +570,155 @@ impl AppService {
|
|||||||
/// - Finished, Error -> terminal (no transitions)
|
/// - Finished, Error -> terminal (no transitions)
|
||||||
///
|
///
|
||||||
/// If the new status equals the current status, this is a no-op.
|
/// If the new status equals the current status, this is a no-op.
|
||||||
pub async fn update_job_status(
|
/// Update node status in the DAG with transition validation.
|
||||||
|
///
|
||||||
|
/// Allowed transitions:
|
||||||
|
/// - Pending -> Ready | Dispatched | Cancelled
|
||||||
|
/// - Ready -> Dispatched | Cancelled
|
||||||
|
/// - Dispatched -> Running | Failed | Cancelled
|
||||||
|
/// - Running -> Completed | Failed | Cancelled
|
||||||
|
/// - Completed, Failed, Cancelled -> terminal (no transitions)
|
||||||
|
///
|
||||||
|
/// If the new status equals the current status, this is a no-op (idempotent).
|
||||||
|
pub async fn update_node_status(
|
||||||
&self,
|
&self,
|
||||||
context_id: u32,
|
context_id: u32,
|
||||||
executor_id: u32,
|
executor_id: u32,
|
||||||
caller_id: u32,
|
flow_id: u32,
|
||||||
id: u32,
|
node_id: u32,
|
||||||
new_status: JobStatus,
|
new_status: NodeStatus,
|
||||||
) -> Result<(), BoxError> {
|
) -> Result<(), BoxError> {
|
||||||
self.require_executor(context_id, executor_id, "update job status")
|
self.require_executor(context_id, executor_id, "update node status")
|
||||||
.await?;
|
.await?;
|
||||||
let job = self.redis.load_job(context_id, caller_id, id).await?;
|
|
||||||
let current = job.status();
|
// Load the DAG
|
||||||
|
let mut dag = build_flow_dag(&self.redis, context_id, flow_id).await?;
|
||||||
|
|
||||||
|
// Get current node status
|
||||||
|
let node = dag.nodes.get(&node_id)
|
||||||
|
.ok_or_else(|| format!("Node {} not found in flow {}", node_id, flow_id))?;
|
||||||
|
let current = node.node_status.clone();
|
||||||
|
|
||||||
if new_status == current {
|
if new_status == current {
|
||||||
// Idempotent: don't touch storage if no change
|
// Idempotent: don't touch storage if no change
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Validate state transition
|
||||||
let allowed = match current {
|
let allowed = match current {
|
||||||
JobStatus::Dispatched => matches!(
|
NodeStatus::Pending => matches!(
|
||||||
new_status,
|
new_status,
|
||||||
JobStatus::WaitingForPrerequisites
|
NodeStatus::Ready | NodeStatus::Dispatched | NodeStatus::Cancelled
|
||||||
| JobStatus::Started
|
|
||||||
| JobStatus::Finished
|
|
||||||
| JobStatus::Error
|
|
||||||
),
|
),
|
||||||
JobStatus::WaitingForPrerequisites => {
|
NodeStatus::Ready => matches!(
|
||||||
matches!(
|
new_status,
|
||||||
new_status,
|
NodeStatus::Dispatched | NodeStatus::Cancelled
|
||||||
JobStatus::Started | JobStatus::Finished | JobStatus::Error
|
),
|
||||||
)
|
NodeStatus::Dispatched => matches!(
|
||||||
}
|
new_status,
|
||||||
JobStatus::Started => matches!(new_status, JobStatus::Finished | JobStatus::Error),
|
NodeStatus::Running | NodeStatus::Failed | NodeStatus::Cancelled
|
||||||
JobStatus::Finished | JobStatus::Error => false,
|
),
|
||||||
|
NodeStatus::Running => matches!(
|
||||||
|
new_status,
|
||||||
|
NodeStatus::Completed | NodeStatus::Failed | NodeStatus::Cancelled
|
||||||
|
),
|
||||||
|
NodeStatus::Completed | NodeStatus::Failed | NodeStatus::Cancelled => false,
|
||||||
};
|
};
|
||||||
|
|
||||||
if !allowed {
|
if !allowed {
|
||||||
return Err(Box::new(InvalidJobStatusTransition {
|
return Err(format!(
|
||||||
from: current,
|
"Invalid node status transition from {:?} to {:?}",
|
||||||
to: new_status,
|
current, new_status
|
||||||
}));
|
).into());
|
||||||
}
|
}
|
||||||
|
|
||||||
self.redis
|
// Update the node status
|
||||||
.update_job_status(context_id, caller_id, id, new_status)
|
if let Some(node) = dag.nodes.get_mut(&node_id) {
|
||||||
.await?;
|
node.node_status = new_status;
|
||||||
|
|
||||||
|
// Persist the updated DAG
|
||||||
|
// TODO: Implement DAG persistence
|
||||||
|
// self.redis.save_flow_dag(context_id, flow_id, &dag).await?;
|
||||||
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
/// Bypass-permission variant to update a job status with transition validation.
|
/// Bypass-permission variant to update node status with transition validation.
|
||||||
/// This skips the executor permission check but enforces the same state transition rules.
|
/// This skips the executor permission check but enforces the same state transition rules.
|
||||||
pub async fn update_job_status_unchecked(
|
pub async fn update_node_status_unchecked(
|
||||||
&self,
|
&self,
|
||||||
context_id: u32,
|
context_id: u32,
|
||||||
caller_id: u32,
|
flow_id: u32,
|
||||||
id: u32,
|
node_id: u32,
|
||||||
new_status: JobStatus,
|
new_status: NodeStatus,
|
||||||
) -> Result<(), BoxError> {
|
) -> Result<(), BoxError> {
|
||||||
let job = self.redis.load_job(context_id, caller_id, id).await?;
|
// Load the DAG
|
||||||
let current = job.status();
|
let mut dag = build_flow_dag(&self.redis, context_id, flow_id).await?;
|
||||||
|
|
||||||
|
// Get current node status
|
||||||
|
let node = dag.nodes.get(&node_id)
|
||||||
|
.ok_or_else(|| format!("Node {} not found in flow {}", node_id, flow_id))?;
|
||||||
|
let current = node.node_status.clone();
|
||||||
|
|
||||||
if new_status == current {
|
if new_status == current {
|
||||||
// Idempotent: don't touch storage if no change
|
// Idempotent: don't touch storage if no change
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Validate state transition
|
||||||
let allowed = match current {
|
let allowed = match current {
|
||||||
JobStatus::Dispatched => matches!(
|
NodeStatus::Pending => matches!(
|
||||||
new_status,
|
new_status,
|
||||||
JobStatus::WaitingForPrerequisites
|
NodeStatus::Ready | NodeStatus::Dispatched | NodeStatus::Cancelled
|
||||||
| JobStatus::Started
|
|
||||||
| JobStatus::Finished
|
|
||||||
| JobStatus::Error
|
|
||||||
),
|
),
|
||||||
JobStatus::WaitingForPrerequisites => {
|
NodeStatus::Ready => matches!(
|
||||||
matches!(
|
new_status,
|
||||||
new_status,
|
NodeStatus::Dispatched | NodeStatus::Cancelled
|
||||||
JobStatus::Started | JobStatus::Finished | JobStatus::Error
|
),
|
||||||
)
|
NodeStatus::Dispatched => matches!(
|
||||||
}
|
new_status,
|
||||||
JobStatus::Started => matches!(new_status, JobStatus::Finished | JobStatus::Error),
|
NodeStatus::Running | NodeStatus::Failed | NodeStatus::Cancelled
|
||||||
JobStatus::Finished | JobStatus::Error => false,
|
),
|
||||||
|
NodeStatus::Running => matches!(
|
||||||
|
new_status,
|
||||||
|
NodeStatus::Completed | NodeStatus::Failed | NodeStatus::Cancelled
|
||||||
|
),
|
||||||
|
NodeStatus::Completed | NodeStatus::Failed | NodeStatus::Cancelled => false,
|
||||||
};
|
};
|
||||||
|
|
||||||
if !allowed {
|
if !allowed {
|
||||||
return Err(Box::new(InvalidJobStatusTransition {
|
return Err(format!(
|
||||||
from: current,
|
"Invalid node status transition from {:?} to {:?}",
|
||||||
to: new_status,
|
current, new_status
|
||||||
}));
|
).into());
|
||||||
}
|
}
|
||||||
|
|
||||||
self.redis
|
// Update the node status
|
||||||
.update_job_status(context_id, caller_id, id, new_status)
|
if let Some(node) = dag.nodes.get_mut(&node_id) {
|
||||||
.await?;
|
node.node_status = new_status.clone();
|
||||||
|
|
||||||
|
// Update DAG runtime state for ready_jobs() to work correctly
|
||||||
|
match new_status {
|
||||||
|
NodeStatus::Dispatched | NodeStatus::Running => {
|
||||||
|
dag.started.insert(node_id);
|
||||||
|
}
|
||||||
|
NodeStatus::Completed => {
|
||||||
|
dag.started.insert(node_id);
|
||||||
|
dag.completed.insert(node_id);
|
||||||
|
}
|
||||||
|
NodeStatus::Failed => {
|
||||||
|
dag.started.insert(node_id);
|
||||||
|
dag.failed_job = Some(node_id);
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Persist the updated DAG
|
||||||
|
// TODO: Implement DAG persistence to Redis
|
||||||
|
// For now, the DAG is rebuilt each time, so runtime state is lost
|
||||||
|
// self.redis.save_flow_dag(context_id, flow_id, &dag).await?;
|
||||||
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1003,20 +1008,7 @@ impl AppService {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn ensure_actor_not_exists_global(&self, id: u32) -> Result<(), BoxError> {
|
|
||||||
match self.redis.load_actor_global(id).await {
|
|
||||||
Ok(_) => Err(Box::new(AlreadyExistsError {
|
|
||||||
key: format!("actor:{}", id),
|
|
||||||
})),
|
|
||||||
Err(e) => {
|
|
||||||
if contains_key_not_found(&e) {
|
|
||||||
Ok(())
|
|
||||||
} else {
|
|
||||||
Err(e)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn ensure_runner_not_exists(&self, db: u32, id: u32) -> Result<(), BoxError> {
|
async fn ensure_runner_not_exists(&self, db: u32, id: u32) -> Result<(), BoxError> {
|
||||||
match self.redis.load_runner(db, id).await {
|
match self.redis.load_runner(db, id).await {
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
pub mod redis;
|
mod redis;
|
||||||
|
|
||||||
pub use redis::RedisDriver;
|
pub use redis::RedisDriver;
|
||||||
@@ -7,7 +7,7 @@ use serde_json::{Map as JsonMap, Value};
|
|||||||
use tokio::sync::Mutex;
|
use tokio::sync::Mutex;
|
||||||
|
|
||||||
use crate::models::{
|
use crate::models::{
|
||||||
Actor, Context, Flow, FlowStatus, Job, JobStatus, Message, MessageStatus, Runner,
|
Context, Flow, FlowStatus, Job, JobStatus, Message, MessageStatus, Runner,
|
||||||
TransportStatus,
|
TransportStatus,
|
||||||
};
|
};
|
||||||
use tracing::{error, warn};
|
use tracing::{error, warn};
|
||||||
@@ -201,41 +201,12 @@ impl RedisDriver {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// -----------------------------
|
// -----------------------------
|
||||||
// Actor
|
// Actor (deprecated - renamed to Runner)
|
||||||
// -----------------------------
|
// -----------------------------
|
||||||
|
// pub async fn save_actor(&self, db: u32, actor: &Actor) -> Result<()> { ... }
|
||||||
/// Save an Actor to the given DB (tenant/context DB)
|
// pub async fn load_actor(&self, db: u32, id: u32) -> Result<Actor> { ... }
|
||||||
pub async fn save_actor(&self, db: u32, actor: &Actor) -> Result<()> {
|
// pub async fn save_actor_global(&self, actor: &Actor) -> Result<()> { ... }
|
||||||
let json = serde_json::to_value(actor)?;
|
// pub async fn load_actor_global(&self, id: u32) -> Result<Actor> { ... }
|
||||||
let id = json
|
|
||||||
.get("id")
|
|
||||||
.and_then(|v| v.as_u64())
|
|
||||||
.ok_or("Actor.id missing or not a number")? as u32;
|
|
||||||
let key = Self::actor_key(id);
|
|
||||||
self.hset_model(db, &key, actor).await
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Load an Actor by id from the given DB
|
|
||||||
pub async fn load_actor(&self, db: u32, id: u32) -> Result<Actor> {
|
|
||||||
let key = Self::actor_key(id);
|
|
||||||
self.hget_model(db, &key).await
|
|
||||||
}
|
|
||||||
/// Save an Actor globally in DB 0 (Actor is context-independent)
|
|
||||||
pub async fn save_actor_global(&self, actor: &Actor) -> Result<()> {
|
|
||||||
let json = serde_json::to_value(actor)?;
|
|
||||||
let id = json
|
|
||||||
.get("id")
|
|
||||||
.and_then(|v| v.as_u64())
|
|
||||||
.ok_or("Actor.id missing or not a number")? as u32;
|
|
||||||
let key = Self::actor_key(id);
|
|
||||||
self.hset_model(0, &key, actor).await
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Load an Actor globally from DB 0 by id
|
|
||||||
pub async fn load_actor_global(&self, id: u32) -> Result<Actor> {
|
|
||||||
let key = Self::actor_key(id);
|
|
||||||
self.hget_model(0, &key).await
|
|
||||||
}
|
|
||||||
|
|
||||||
// -----------------------------
|
// -----------------------------
|
||||||
// Runner
|
// Runner
|
||||||
|
|||||||
Reference in New Issue
Block a user