This commit is contained in:
2025-08-29 06:31:29 +02:00
parent 8d1656c679
commit b29468c0c2
6 changed files with 437 additions and 73 deletions

View File

@@ -2,14 +2,61 @@ module systemd
import freeflowuniverse.herolib.osal.core as osal
// Add more flexible journalctl options
@[params]
pub struct JournalArgs {
pub:
service string // name of service for which logs will be retrieved
limit int = 100 // number of last log lines to be shown
service string // name of service for which logs will be retrieved
limit int = 100 // number of last log lines to be shown
since string // time since when to show logs (e.g., "1 hour ago", "2024-01-01")
follow bool // follow logs in real-time
priority string // log priority (emerg, alert, crit, err, warning, notice, info, debug)
grep string // filter logs containing this text
}
pub fn journalctl(args JournalArgs) !string {
cmd := 'journalctl --no-pager -n ${args.limit} -u ${name_fix(args.service)}'
response := osal.execute_silent(cmd) or { return err }
mut cmd_parts := ['journalctl', '--no-pager']
if args.limit > 0 {
cmd_parts << ['-n', args.limit.str()]
}
if args.service != '' {
cmd_parts << ['-u', name_fix(args.service)]
}
if args.since != '' {
cmd_parts << ['--since', '"${args.since}"']
}
if args.follow {
cmd_parts << ['-f']
}
if args.priority != '' {
cmd_parts << ['-p', args.priority]
}
cmd := cmd_parts.join(' ')
mut response := osal.execute_silent(cmd) or {
return error('Failed to get journal logs for ${args.service}: ${err}')
}
if args.grep != '' {
lines := response.split('\n')
filtered_lines := lines.filter(it.contains(args.grep))
response = filtered_lines.join('\n')
}
return response
}
// Add convenience methods
pub fn journalctl_errors(service string) !string {
return journalctl(service: service, priority: 'err', limit: 50)
}
pub fn journalctl_recent(service string, since string) !string {
return journalctl(service: service, since: since, limit: 200)
}

View File

@@ -1,7 +1,254 @@
# a sal to work with systemd
# SystemD Module
A V module for managing systemd services with comprehensive error handling and monitoring capabilities.
> only basics implemented as we need for our installers
## Features
example see herolib/examples/...
- Create, start, stop, and delete systemd services
- Service status monitoring with detailed error reporting
- Journal log retrieval with filtering options
- Health checks for service validation
- Automatic retry logic for service operations
## Quick Start
```v
import freeflowuniverse.herolib.lib.osal.systemd
// Create systemd factory
mut systemd := systemd.new()!
// Create a new service
mut redis_service := systemd.new(
name: 'redis_custom'
cmd: 'redis-server /etc/redis/redis.conf'
description: 'Custom Redis server'
start: true
)!
// Check service status
status := redis_service.status()!
println('Redis service status: ${status}')
// Get service logs
logs := redis_service.get_logs(50)!
println('Recent logs:\n${logs}')
```
## Creating Services
### Basic Service
```v
mut service := systemd.new(
name: 'my_service'
cmd: '/usr/bin/my_application --config /etc/my_app.conf'
description: 'My custom application'
start: true
)!
```
### Service with Environment Variables
```v
mut service := systemd.new(
name: 'web_app'
cmd: '/usr/bin/webapp'
description: 'Web application server'
env: {
'PORT': '8080'
'ENV': 'production'
'DB_HOST': 'localhost'
}
start: true
)!
```
### Service with Complex Command
```v
// For multi-line commands, systemd will create a script file
mut service := systemd.new(
name: 'backup_service'
cmd: '
#!/bin/bash
cd /var/backups
tar -czf backup_$(date +%Y%m%d).tar.gz /home/data/
aws s3 cp backup_$(date +%Y%m%d).tar.gz s3://my-bucket/
'
description: 'Daily backup service'
start: true
)!
```
## Service Management
### Starting and Stopping Services
```v
// Start service (with automatic verification)
service.start()! // Will wait and verify service started successfully
// Stop service (with verification)
service.stop()! // Will wait and verify service stopped
// Restart service
service.restart()!
```
### Checking Service Status
```v
// Get simple status
status := service.status()!
match status {
.active { println('Service is running') }
.failed { println('Service has failed') }
.inactive { println('Service is stopped') }
else { println('Service status: ${status}') }
}
// Get detailed status information
detailed_status := service.status_detailed()!
println(detailed_status)
```
## Log Management
### Basic Log Retrieval
```v
// Get last 100 lines
logs := service.get_logs(100)!
// Using journalctl directly
logs := systemd.journalctl(service: 'my_service', limit: 50)!
```
### Advanced Log Filtering
```v
// Get error logs only
error_logs := systemd.journalctl_errors('my_service')!
// Get logs since specific time
recent_logs := systemd.journalctl_recent('my_service', '1 hour ago')!
// Custom log filtering
filtered_logs := systemd.journalctl(
service: 'my_service'
since: '2024-01-01'
priority: 'warning'
grep: 'connection'
limit: 200
)!
```
## Health Monitoring
### Individual Service Health Check
```v
is_healthy := systemd.validate_service('my_service')!
if !is_healthy {
println('Service needs attention')
}
```
### System-wide Health Check
```v
health_results := systemd.health_check()!
for service_name, is_healthy in health_results {
if !is_healthy {
println('Service ${service_name} is not healthy')
}
}
```
## Error Handling
The module provides detailed error messages with log context:
```v
// Service creation with error handling
mut service := systemd.new(
name: 'problematic_service'
cmd: '/nonexistent/binary'
start: true
) or {
println('Failed to create service: ${err}')
// Error will include recent logs showing why service failed
return
}
```
## Service Deletion
```v
// Stop and remove service completely
service.delete()!
// Or using systemd factory
systemd.destroy('service_name')!
```
## Best Practices
1. **Always handle errors**: Service operations can fail, always use `!` or `or` blocks
2. **Use descriptive names**: Service names should be clear and unique
3. **Check logs on failure**: When services fail, check logs for diagnostic information
4. **Validate service health**: Regularly check service status in production
5. **Use environment variables**: Keep configuration flexible with environment variables
## Common Patterns
### Conditional Service Creation
```v
if !systemd.exists('my_service') {
mut service := systemd.new(
name: 'my_service'
cmd: 'my_application'
start: true
)!
}
```
### Service with Dependency
```v
// Ensure dependency is running first
redis_status := systemd.get('redis')!.status()!
if redis_status != .active {
return error('Redis must be running before starting web service')
}
mut web_service := systemd.new(
name: 'web_service'
cmd: 'web_server --redis-host localhost:6379'
start: true
)!
```
## Troubleshooting
### Service Won't Start
1. Check service logs: `service.get_logs(100)!`
2. Verify command exists: `osal.cmd_exists('your_command')`
3. Check file permissions and paths
4. Review systemd unit file: `cat /etc/systemd/system/service_name.service`
### Service Keeps Failing
1. Get error logs: `systemd.journalctl_errors('service_name')!`
2. Check if command is executable
3. Verify environment variables and working directory
4. Test command manually: `your_command_here`
## Testing
```v
// Test module
vtest ~/code/github/freeflowuniverse/herolib/lib/osal/systemd/systemd_process_test.v

View File

@@ -175,6 +175,42 @@ pub fn (mut systemd Systemd) destroy(name_ string) ! {
}
}
// Add validation method
pub fn (mut systemd Systemd) validate_service(name string) !bool {
service := systemd.get(name)!
status := service.status()!
match status {
.active {
console.print_item(' Service ${name} is running')
return true
}
.failed {
logs := service.get_logs(20)!
console.print_stderr(' Service ${name} has failed. Recent logs:\n${logs}')
return false
}
else {
console.print_stderr(' Service ${name} status: ${status}')
return false
}
}
}
// Add method to check all services
pub fn (mut systemd Systemd) health_check() !map[string]bool {
mut results := map[string]bool{}
for process in systemd.processes {
if process.name.ends_with('_test') || process.name.ends_with('testservice') {
continue // Skip test services
}
results[process.name] = systemd.validate_service(process.name) or { false }
}
return results
}
fn name_fix(name_ string) string {
mut name := texttools.name_fix(name_)
if name.contains('.service') {

View File

@@ -6,6 +6,7 @@ import freeflowuniverse.herolib.osal.core as osal
import freeflowuniverse.herolib.core.pathlib
import freeflowuniverse.herolib.ui.console
import os
import time
@[heap]
pub struct SystemdProcess {
@@ -42,15 +43,48 @@ pub fn (mut self SystemdProcess) write() ! {
pub fn (mut self SystemdProcess) start() ! {
console.print_header('starting systemd process: ${self.name}')
// self.write()!
cmd := '
systemctl daemon-reload
systemctl enable ${self.name}
systemctl start ${self.name}
' // console.print_debug(cmd)
_ = osal.execute_silent(cmd)!
self.refresh()!
console.print_header('started systemd process: ${self.name}')
'
job := osal.exec(cmd: cmd, stdout: false)!
// Wait for service to start with timeout
mut attempts := 0
max_attempts := 10
wait_interval := 500 // milliseconds
for attempts < max_attempts {
time.sleep(wait_interval * time.millisecond)
status := self.status()!
match status {
.active {
console.print_header(' systemd process started successfully: ${self.name}')
self.refresh()!
return
}
.failed {
logs := self.get_logs(50)!
return error('Service ${self.name} failed to start. Recent logs:\n${logs}')
}
.activating {
attempts++
continue
}
else {
attempts++
continue
}
}
}
// If we get here, service didn't start in time
logs := self.get_logs(50)!
return error('Service ${self.name} did not start within expected time. Status: ${self.status()!}. Recent logs:\n${logs}')
}
// get status from system
@@ -73,14 +107,32 @@ pub fn (mut self SystemdProcess) delete() ! {
}
pub fn (mut self SystemdProcess) stop() ! {
console.print_header('stopping systemd process: ${self.name}')
cmd := '
set +ex
systemctl daemon-reload
systemctl disable ${self.name}
systemctl stop ${self.name}
systemctl disable ${self.name}
systemctl daemon-reload
'
_ = osal.exec(cmd: cmd, stdout: false, debug: false, ignore_error: false)!
self.systemd.load()!
_ = osal.exec(cmd: cmd, stdout: false, ignore_error: true)!
// Wait for service to stop
mut attempts := 0
max_attempts := 10
for attempts < max_attempts {
time.sleep(500 * time.millisecond)
status := self.status()!
if status == .inactive {
console.print_header(' systemd process stopped: ${self.name}')
return
}
attempts++
}
console.print_header(' systemd process may still be running: ${self.name}')
}
pub fn (mut self SystemdProcess) restart() ! {
@@ -101,40 +153,30 @@ enum SystemdStatus {
deactivating
}
pub fn (self SystemdProcess) get_logs(lines int) !string {
return journalctl(service: self.name, limit: lines)
}
// Improve status method with better error handling
pub fn (self SystemdProcess) status() !SystemdStatus {
// exit with 3 is converted to exit with 0
cmd := '
systemctl daemon-reload
systemctl status --no-pager --lines=0 ${name_fix(self.name)}
'
job := osal.exec(cmd: cmd, stdout: false) or {
if err.code() == 3 {
if err is osal.JobError {
return parse_systemd_process_status(err.job.output)
}
}
return error('Failed to run command to get status ${err}')
cmd := 'systemctl is-active ${name_fix(self.name)}'
job := osal.exec(cmd: cmd, stdout: false, ignore_error: true)!
match job.output.trim_space() {
'active' { return .active }
'inactive' { return .inactive }
'failed' { return .failed }
'activating' { return .activating }
'deactivating' { return .deactivating }
else { return .unknown }
}
return parse_systemd_process_status(job.output)
}
fn parse_systemd_process_status(output string) SystemdStatus {
lines := output.split_into_lines()
for line in lines {
if line.contains('Active: ') {
if line.contains('active (running)') {
return .active
} else if line.contains('inactive (dead)') {
return .inactive
} else if line.contains('failed') {
return .failed
} else if line.contains('activating') {
return .activating
} else if line.contains('deactivating') {
return .deactivating
}
}
}
return .unknown
// Add detailed status method
pub fn (self SystemdProcess) status_detailed() !string {
cmd := 'systemctl status --no-pager --lines=10 ${name_fix(self.name)}'
job := osal.exec(cmd: cmd, stdout: false, ignore_error: true)!
return job.output
}

View File

@@ -29,33 +29,19 @@ pub fn testsuite_end() ! {
process.delete()!
}
pub fn test_systemd_process_status() ! {
pub fn test_systemd_process_start_stop() ! {
mut systemdfactory := new()!
mut process := systemdfactory.new(
cmd: 'redis-server'
name: 'testservice'
cmd: 'redis-server'
name: 'testservice'
start: false
)!
process.start()!
status := process.status()!
assert status == .active
}
pub fn test_parse_systemd_process_status() ! {
output := 'testservice.service - testservice
Loaded: loaded (/etc/systemd/system/testservice.service; enabled; preset: disabled)
Active: active (running) since Mon 2024-06-10 12:51:24 CEST; 2ms ago
Main PID: 202537 (redis-server)
Tasks: 1 (limit: 154455)
Memory: 584.0K (peak: 584.0K)
CPU: 0
CGroup: /system.slice/testservice.service
202537 redis-server
Jun 10 12:51:24 myhost1 systemd[1]: testservice.service: Scheduled restart job, restart counter is at 1.
Jun 10 12:51:24 myhost1 systemd[1]: Started testservice.'
status := parse_systemd_process_status(output)
mut status := process.status()!
assert status == .active
process.stop()!
status = process.status()!
assert status == .inactive
}

View File

@@ -1,13 +1,19 @@
[Unit]
Description=${self.name}
After=network.target
Description=${self.description}
After=network.target
@if self.description == ''
Description=${self.name} service
@end
[Service]
Type=simple
ExecStart=${self.cmd}
WorkingDirectory=/tmp
StandardOutput=journal
StandardError=journal
@if self.restart
Restart=always
RestartSec=5
@else
Restart=no
@end