Check job status in redis db as well before sending rpc call

Signed-off-by: Lee Smet <lee.smet@hotmail.com>
This commit is contained in:
Lee Smet
2025-09-05 19:58:52 +02:00
parent fb34b4e2f3
commit 25f35ea8fc

View File

@@ -305,6 +305,8 @@ async fn deliver_one(
let job_id_opt = job_id_opt; let job_id_opt = job_id_opt;
let mut last_status: Option<TransportStatus> = Some(TransportStatus::Sent); let mut last_status: Option<TransportStatus> = Some(TransportStatus::Sent);
// Ensure we only request supervisor job.status or job.result once per outbound message
let mut requested_job_check: bool = false;
loop { loop {
if start.elapsed() >= poll_timeout { if start.elapsed() >= poll_timeout {
@@ -337,48 +339,172 @@ async fn deliver_one(
// Stop on terminal states // Stop on terminal states
if matches!(s, TransportStatus::Delivered | TransportStatus::Read) { if matches!(s, TransportStatus::Delivered | TransportStatus::Read) {
// On Read, request supervisor job.status asynchronously; inbound listener will handle replies // Only request a single job status/result per message
// if matches!(s, TransportStatus::Read) if !requested_job_check {
// && let Some(job_id) = job_id_opt if let Some(job_id) = job_id_opt {
if let Some(job_id) = job_id_opt { // First consult Redis for the latest job state in case we already have a terminal update
let sup = cache match service_poll.load_job(context_id, caller_id, job_id).await {
.get_or_create( Ok(job) => {
client.clone(), match job.status() {
sup_dest.clone(), JobStatus::Finished | JobStatus::Error => {
sup_topic.clone(), // Local job is already terminal; skip supervisor job.status
secret_for_poller.clone(), let _ = service_poll
) .append_message_logs(
.await; context_id,
match sup.job_status_with_ids(job_id.to_string()).await { caller_id,
Ok((_out_id, inner_id)) => { id,
// Correlate this status request to the message/job vec![format!(
let _ = service_poll "Local job {} status is terminal ({:?}); skipping supervisor job.status",
.supcorr_set( job_id,
inner_id, context_id, caller_id, job_id, id, job.status()
) )],
.await; )
let _ = service_poll .await;
.append_message_logs(
context_id, // If result is still empty, immediately request supervisor job.result
caller_id, if job.result.is_empty() {
id, let sup = cache
vec![format!( .get_or_create(
"Requested supervisor job.status for job {}", client.clone(),
job_id sup_dest.clone(),
)], sup_topic.clone(),
) secret_for_poller.clone(),
.await; )
} .await;
Err(e) => { match sup.job_result_with_ids(job_id.to_string()).await {
let _ = service_poll Ok((_out2, inner2)) => {
.append_message_logs( let _ = service_poll
context_id, .supcorr_set(inner2, context_id, caller_id, job_id, id)
caller_id, .await;
id, let _ = service_poll
vec![format!("job.status request error: {}", e)], .append_message_logs(
) context_id,
.await; caller_id,
id,
vec![format!(
"Requested supervisor job.result for job {} (local terminal w/ empty result)",
job_id
)],
)
.await;
}
Err(e) => {
let _ = service_poll
.append_message_logs(
context_id,
caller_id,
id,
vec![format!(
"job.result request error for job {}: {}",
job_id, e
)],
)
.await;
}
}
} else {
// Result already present; nothing to fetch
let _ = service_poll
.append_message_logs(
context_id,
caller_id,
id,
vec![format!(
"Job {} already has result; no supervisor calls needed",
job_id
)],
)
.await;
}
}
// Not terminal yet -> request supervisor job.status as before
_ => {
let sup = cache
.get_or_create(
client.clone(),
sup_dest.clone(),
sup_topic.clone(),
secret_for_poller.clone(),
)
.await;
match sup.job_status_with_ids(job_id.to_string()).await {
Ok((_out_id, inner_id)) => {
// Correlate this status request to the message/job
let _ = service_poll
.supcorr_set(
inner_id, context_id, caller_id, job_id, id,
)
.await;
let _ = service_poll
.append_message_logs(
context_id,
caller_id,
id,
vec![format!(
"Requested supervisor job.status for job {}",
job_id
)],
)
.await;
}
Err(e) => {
let _ = service_poll
.append_message_logs(
context_id,
caller_id,
id,
vec![format!("job.status request error: {}", e)],
)
.await;
}
}
}
}
}
// If we cannot load the job, fall back to requesting job.status
Err(_) => {
let sup = cache
.get_or_create(
client.clone(),
sup_dest.clone(),
sup_topic.clone(),
secret_for_poller.clone(),
)
.await;
match sup.job_status_with_ids(job_id.to_string()).await {
Ok((_out_id, inner_id)) => {
let _ = service_poll
.supcorr_set(
inner_id, context_id, caller_id, job_id, id,
)
.await;
let _ = service_poll
.append_message_logs(
context_id,
caller_id,
id,
vec![format!(
"Requested supervisor job.status for job {} (fallback; load_job failed)",
job_id
)],
)
.await;
}
Err(e) => {
let _ = service_poll
.append_message_logs(
context_id,
caller_id,
id,
vec![format!("job.status request error: {}", e)],
)
.await;
}
}
}
} }
// Ensure we only do this once
requested_job_check = true;
} }
} }
// break; // break;