refactor: Improve service startup and management logic

- Add `create: true` to service `get` calls
- Update `running_check` to use `curl` for HTTP status code
- Ensure redis addresses have `redis://` prefix
- Clean up and re-create zinit services before starting
- Remove redundant `service_monitor` call in `startupmanager.start`
This commit is contained in:
Mahmoud-Emad
2025-11-19 17:08:49 +02:00
parent 012a59b3d8
commit 8daca7328d
8 changed files with 159 additions and 105 deletions

View File

@@ -24,7 +24,7 @@ if coordinator_installer.running()! {
// Step 2: Start Supervisor
println('\n Step 2/5: Starting Supervisor...')
mut supervisor_inst := supervisor.get()!
mut supervisor_inst := supervisor.get(create: true)!
supervisor_inst.start()!
if supervisor_inst.running()! {
println(' Supervisor is running on HTTP:${supervisor_inst.http_port} WS:${supervisor_inst.ws_port}')
@@ -34,7 +34,7 @@ if supervisor_inst.running()! {
// Step 3: Start Hero Runner
println('\n Step 3/5: Starting Hero Runner...')
mut hero_runner := herorunner.get()!
mut hero_runner := herorunner.get(create: true)!
hero_runner.start()!
if hero_runner.running()! {
println(' Hero Runner is running')
@@ -44,7 +44,7 @@ if hero_runner.running()! {
// Step 4: Start Osiris Runner
println('\n Step 4/5: Starting Osiris Runner...')
mut osiris_runner := osirisrunner.get()!
mut osiris_runner := osirisrunner.get(create: true)!
osiris_runner.start()!
if osiris_runner.running()! {
println(' Osiris Runner is running')
@@ -54,7 +54,7 @@ if osiris_runner.running()! {
// Step 5: Start SAL Runner
println('\n Step 5/5: Starting SAL Runner...')
mut sal_runner := salrunner.get()!
mut sal_runner := salrunner.get(create: true)!
sal_runner.start()!
if sal_runner.running()! {
println(' SAL Runner is running')

View File

@@ -27,12 +27,14 @@ fn (self &Coordinator) startupcmd() ![]startupmanager.ZProcessNewArgs {
fn (self &Coordinator) running_check() !bool {
// Check if the process is running by checking the HTTP port
// The coordinator returns 405 for GET requests (requires POST), so we check if we get any response
res := osal.exec(
cmd: 'curl -fsSL http://127.0.0.1:${self.http_port} || exit 1'
cmd: 'curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:${self.http_port}'
stdout: false
raise_error: false
)!
return res.exit_code == 0
// Any HTTP response code (including 405) means the server is running
return res.output.len > 0 && res.output.int() > 0
}
fn (self &Coordinator) start_pre() ! {

View File

@@ -12,9 +12,16 @@ import os
fn (self &Herorunner) startupcmd() ![]startupmanager.ZProcessNewArgs {
mut res := []startupmanager.ZProcessNewArgs{}
// Ensure redis_addr has the redis:// prefix
redis_url := if self.redis_addr.starts_with('redis://') {
self.redis_addr
} else {
'redis://${self.redis_addr}'
}
res << startupmanager.ZProcessNewArgs{
name: 'herorunner'
cmd: '${self.binary_path} --redis-addr ${self.redis_addr}'
cmd: '${self.binary_path} --redis-url ${redis_url} 12001'
env: {
'HOME': os.home_dir()
'RUST_LOG': self.log_level
@@ -66,7 +73,6 @@ fn ulist_get() !ulist.UList {
fn upload() ! {
}
@[params]
pub struct InstallArgs {
pub mut:

View File

@@ -12,9 +12,16 @@ import os
fn (self &Osirisrunner) startupcmd() ![]startupmanager.ZProcessNewArgs {
mut res := []startupmanager.ZProcessNewArgs{}
// Ensure redis_addr has the redis:// prefix
redis_url := if self.redis_addr.starts_with('redis://') {
self.redis_addr
} else {
'redis://${self.redis_addr}'
}
res << startupmanager.ZProcessNewArgs{
name: 'runner_osiris'
cmd: '${self.binary_path} --redis-addr ${self.redis_addr}'
cmd: '${self.binary_path} --redis-url ${redis_url} 12002'
env: {
'HOME': os.home_dir()
'RUST_LOG': self.log_level
@@ -66,7 +73,6 @@ fn ulist_get() !ulist.UList {
fn upload() ! {
}
@[params]
pub struct InstallArgs {
pub mut:

View File

@@ -12,9 +12,16 @@ import os
fn (self &Salrunner) startupcmd() ![]startupmanager.ZProcessNewArgs {
mut res := []startupmanager.ZProcessNewArgs{}
// Ensure redis_addr has the redis:// prefix
redis_url := if self.redis_addr.starts_with('redis://') {
self.redis_addr
} else {
'redis://${self.redis_addr}'
}
res << startupmanager.ZProcessNewArgs{
name: 'runner_sal'
cmd: '${self.binary_path} --redis-addr ${self.redis_addr}'
cmd: '${self.binary_path} --redis-url ${redis_url} 12003'
env: {
'HOME': os.home_dir()
'RUST_LOG': self.log_level
@@ -66,7 +73,6 @@ fn ulist_get() !ulist.UList {
fn upload() ! {
}
@[params]
pub struct InstallArgs {
pub mut:

View File

@@ -13,9 +13,16 @@ import os
fn (self &Supervisor) startupcmd() ![]startupmanager.ZProcessNewArgs {
mut res := []startupmanager.ZProcessNewArgs{}
// Ensure redis_addr has the redis:// prefix
redis_url := if self.redis_addr.starts_with('redis://') {
self.redis_addr
} else {
'redis://${self.redis_addr}'
}
res << startupmanager.ZProcessNewArgs{
name: 'supervisor'
cmd: '${self.binary_path} --redis-addr ${self.redis_addr} --api-http-port ${self.http_port} --api-ws-port ${self.ws_port}'
cmd: '${self.binary_path} --redis-url ${redis_url} --port ${self.http_port} --admin-secret mysecret'
env: {
'HOME': os.home_dir()
'RUST_LOG': self.log_level
@@ -28,8 +35,14 @@ fn (self &Supervisor) startupcmd() ![]startupmanager.ZProcessNewArgs {
fn (self &Supervisor) running_check() !bool {
// Check if the process is running by checking the HTTP port
res := osal.exec(cmd: 'curl -fsSL http://127.0.0.1:${self.http_port} || exit 1', stdout: false, raise_error: false)!
return res.exit_code == 0
// The supervisor returns 405 for GET requests (requires POST), so we check if we get any response
res := osal.exec(
cmd: 'curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:${self.http_port}'
stdout: false
raise_error: false
)!
// Any HTTP response code (including 405) means the server is running
return res.output.len > 0 && res.output.int() > 0
}
fn (self &Supervisor) start_pre() ! {
@@ -71,7 +84,6 @@ fn upload() ! {
// )!
}
@[params]
pub struct InstallArgs {
pub mut:

View File

@@ -44,14 +44,19 @@ pub fn new(args ArgsGet) !&Supervisor {
pub fn get(args ArgsGet) !&Supervisor {
mut context := base.context()!
supervisor_default = args.name
if args.fromdb || args.name !in supervisor_global {
mut name := if args.name == 'default' && supervisor_default.len > 0 {
supervisor_default
} else {
args.name
}
supervisor_default = name
if args.fromdb || name !in supervisor_global {
mut r := context.redis()!
if r.hexists('context:supervisor', args.name)! {
data := r.hget('context:supervisor', args.name)!
if r.hexists('context:supervisor', name)! {
data := r.hget('context:supervisor', name)!
if data.len == 0 {
print_backtrace()
return error('Supervisor with name: ${args.name} does not exist, prob bug.')
return error('Supervisor with name: ${name} does not exist, prob bug.')
}
mut obj := json.decode(Supervisor, data)!
set_in_mem(obj)!
@@ -60,14 +65,14 @@ pub fn get(args ArgsGet) !&Supervisor {
new(args)!
} else {
print_backtrace()
return error("Supervisor with name '${args.name}' does not exist")
return error("Supervisor with name '${name}' does not exist")
}
}
return get(name: args.name)! // no longer from db nor create
return get(name: name)! // no longer from db nor create
}
return supervisor_global[args.name] or {
return supervisor_global[name] or {
print_backtrace()
return error('could not get config for supervisor with name:${args.name}')
return error('could not get config for supervisor with name:${name}')
}
}
@@ -168,7 +173,8 @@ pub fn play(mut plbook PlayBook) ! {
supervisor_obj.build()!
}
}
if other_action.name in ['start', 'stop', 'restart', 'start_pre', 'start_post', 'stop_pre', 'stop_post'] {
if other_action.name in ['start', 'stop', 'restart', 'start_pre', 'start_post', 'stop_pre',
'stop_post'] {
mut p := other_action.params
name := p.get('name')!
mut supervisor_obj := get(name: name)!
@@ -261,8 +267,6 @@ pub fn (mut self Supervisor) start() ! {
for zprocess in self.startupcmd()! {
mut sm := startupmanager_get(zprocess.startuptype)!
println('debugzo ${sm}')
console.print_debug('installer: supervisor starting with ${zprocess.startuptype}...')
sm.new(zprocess)!
@@ -319,7 +323,6 @@ pub fn (mut self Supervisor) running() !bool {
return self.running_check()!
}
// switch instance to be used for supervisor
pub fn switch(name string) {
supervisor_default = name

View File

@@ -102,19 +102,43 @@ pub fn (mut sm StartupManager) new(args ZProcessNewArgs) ! {
shutdown_timeout: 0 // Default, or add to ZProcessNewArgs if needed
}
// Check if service already exists
existing_service := zinit_client.service_get(args.name) or { zinit.ServiceConfig{} }
// If service exists, stop monitoring, stop, and delete it first
if existing_service.exec.len > 0 {
console.print_debug('startupmanager: service ${args.name} already exists, cleaning up...')
// Stop the service first
zinit_client.service_stop(args.name) or {
console.print_debug('startupmanager: failed to stop service ${args.name}: ${err}')
}
// Forget (stop monitoring) the service
zinit_client.service_forget(args.name) or {
console.print_debug('startupmanager: failed to forget service ${args.name}: ${err}')
}
// Delete the service configuration
zinit_client.service_delete(args.name) or {
console.print_debug('startupmanager: failed to delete service ${args.name}: ${err}')
}
}
// Create the service configuration file in zinit
zinit_client.service_create(args.name, service_config) or {
return error('startupmanager: failed to create zinit service ${args.name}: ${err}')
}
// If 'start' is true, monitor and start the service immediately after creation
if args.start {
// Monitor loads the config and starts monitoring the service
zinit_client.service_monitor(args.name) or {
return error('startupmanager: failed to monitor zinit service ${args.name}: ${err}')
}
}
}
else {
panic('to implement, startup manager only support screen & systemd for now: ${mycat}')
}
}
// If 'start' is true, also monitor and start the service
if args.start {
sm.start(args.name)!
}
}
pub fn (mut sm StartupManager) start(name string) ! {
@@ -139,11 +163,6 @@ pub fn (mut sm StartupManager) start(name string) ! {
zinit_client.service_start(name) or {
return error('startupmanager: Failed to start zinit service ${name}: ${err}')
}
// Monitor loads the config, if it's new it starts it.
// If the service is already managed, this will bring it back up.
zinit_client.service_monitor(name) or {
return error('startupmanager: Failed to monitor zinit service ${name}: ${err}')
}
}
else {
panic('to implement, startup manager only support screen, systemd and zinit for now')