refactor: Integrate logger and refactor network operations

- Replace console logging with logger.log calls
- Improve network bridge creation robustness
- Enhance network IP allocation and cleanup logic
- Refactor network cleanup for better concurrency handling
This commit is contained in:
Mahmoud-Emad
2025-11-12 11:28:56 +02:00
parent 7836a48ad4
commit ad7e1980a5
5 changed files with 480 additions and 118 deletions

View File

@@ -1,6 +1,5 @@
module heropods
import incubaid.herolib.ui.console
import incubaid.herolib.osal.tmux
import incubaid.herolib.osal.core as osal
import incubaid.herolib.virt.crun
@@ -55,7 +54,11 @@ pub fn (mut self Container) start() ! {
if !container_exists {
// Container doesn't exist, create it first
console.print_debug('Container ${self.name} does not exist, creating it...')
self.factory.logger.log(
cat: 'container'
log: 'Container ${self.name} does not exist, creating it...'
logtype: .stdout
) or {}
// Try to create the container, if it fails with "File exists" error,
// try to force delete any leftover state and retry
crun_root := '${self.factory.base_dir}/runtime'
@@ -64,7 +67,11 @@ pub fn (mut self Container) start() ! {
stdout: true
) or {
if err.msg().contains('File exists') {
console.print_debug('Container creation failed with "File exists", attempting to clean up leftover state...')
self.factory.logger.log(
cat: 'container'
log: 'Container creation failed with "File exists", attempting to clean up leftover state...'
logtype: .stdout
) or {}
// Force delete any leftover state - try multiple cleanup approaches
osal.exec(cmd: 'crun --root ${crun_root} delete ${self.name}', stdout: false) or {}
osal.exec(cmd: 'crun delete ${self.name}', stdout: false) or {} // Also try default root
@@ -82,26 +89,42 @@ pub fn (mut self Container) start() ! {
return err
}
}
console.print_debug('Container ${self.name} created')
self.factory.logger.log(
cat: 'container'
log: 'Container ${self.name} created'
logtype: .stdout
) or {}
}
status := self.status()!
if status == .running {
console.print_debug('Container ${self.name} is already running')
self.factory.logger.log(
cat: 'container'
log: 'Container ${self.name} is already running'
logtype: .stdout
) or {}
return
}
// If container exists but is stopped, we need to delete and recreate it
// because crun doesn't allow restarting a stopped container
if container_exists && status != .running {
console.print_debug('Container ${self.name} exists but is stopped, recreating...')
self.factory.logger.log(
cat: 'container'
log: 'Container ${self.name} exists but is stopped, recreating...'
logtype: .stdout
) or {}
crun_root := '${self.factory.base_dir}/runtime'
osal.exec(cmd: 'crun --root ${crun_root} delete ${self.name}', stdout: false) or {}
osal.exec(
cmd: 'crun --root ${crun_root} create --bundle ${self.factory.base_dir}/configs/${self.name} ${self.name}'
stdout: true
)!
console.print_debug('Container ${self.name} recreated')
self.factory.logger.log(
cat: 'container'
log: 'Container ${self.name} recreated'
logtype: .stdout
) or {}
}
// start the container (crun start doesn't have --detach flag)
@@ -114,14 +137,28 @@ pub fn (mut self Container) start() ! {
// Setup network for the container (thread-safe)
// If this fails, stop the container to clean up
self.setup_network() or {
console.print_stderr('Network setup failed, stopping container: ${err}')
self.factory.logger.log(
cat: 'container'
log: 'Network setup failed, stopping container: ${err}'
logtype: .error
) or {}
// Use stop() method to properly clean up (kills process, cleans network, etc.)
// Ignore errors from stop since we're already in an error path
self.stop() or { console.print_debug('Failed to stop container during cleanup: ${err}') }
self.stop() or {
self.factory.logger.log(
cat: 'container'
log: 'Failed to stop container during cleanup: ${err}'
logtype: .error
) or {}
}
return error('Failed to setup network for container: ${err}')
}
console.print_green('Container ${self.name} started')
self.factory.logger.log(
cat: 'container'
log: 'Container ${self.name} started'
logtype: .stdout
) or {}
}
// Stop the container gracefully (SIGTERM) or forcefully (SIGKILL)
@@ -137,7 +174,11 @@ pub fn (mut self Container) start() ! {
pub fn (mut self Container) stop() ! {
status := self.status()!
if status == .stopped {
console.print_debug('Container ${self.name} is already stopped')
self.factory.logger.log(
cat: 'container'
log: 'Container ${self.name} is already stopped'
logtype: .stdout
) or {}
return
}
@@ -145,7 +186,11 @@ pub fn (mut self Container) stop() ! {
// Send SIGTERM for graceful shutdown
osal.exec(cmd: 'crun --root ${crun_root} kill ${self.name} SIGTERM', stdout: false) or {
console.print_debug('Failed to send SIGTERM (container may already be stopped): ${err}')
self.factory.logger.log(
cat: 'container'
log: 'Failed to send SIGTERM (container may already be stopped): ${err}'
logtype: .stdout
) or {}
}
// Wait up to sigterm_timeout_ms for graceful shutdown
@@ -158,18 +203,34 @@ pub fn (mut self Container) stop() ! {
ContainerStatus.stopped
}
if current_status == .stopped {
console.print_debug('Container ${self.name} stopped gracefully')
self.factory.logger.log(
cat: 'container'
log: 'Container ${self.name} stopped gracefully'
logtype: .stdout
) or {}
self.cleanup_network()! // Thread-safe network cleanup
console.print_green('Container ${self.name} stopped')
self.factory.logger.log(
cat: 'container'
log: 'Container ${self.name} stopped'
logtype: .stdout
) or {}
return
}
attempts++
}
// Force kill if still running after timeout
console.print_debug('Container ${self.name} did not stop gracefully, force killing')
self.factory.logger.log(
cat: 'container'
log: 'Container ${self.name} did not stop gracefully, force killing'
logtype: .stdout
) or {}
osal.exec(cmd: 'crun --root ${crun_root} kill ${self.name} SIGKILL', stdout: false) or {
console.print_debug('Failed to send SIGKILL: ${err}')
self.factory.logger.log(
cat: 'container'
log: 'Failed to send SIGKILL: ${err}'
logtype: .error
) or {}
}
// Wait for SIGKILL to take effect
@@ -187,7 +248,11 @@ pub fn (mut self Container) stop() ! {
// Cleanup network resources (thread-safe)
self.cleanup_network()!
console.print_green('Container ${self.name} stopped')
self.factory.logger.log(
cat: 'container'
log: 'Container ${self.name} stopped'
logtype: .stdout
) or {}
}
// Delete the container
@@ -203,16 +268,28 @@ pub fn (mut self Container) stop() ! {
pub fn (mut self Container) delete() ! {
// Check if container exists before trying to delete
if !self.container_exists_in_crun()! {
console.print_debug('Container ${self.name} does not exist in crun')
self.factory.logger.log(
cat: 'container'
log: 'Container ${self.name} does not exist in crun'
logtype: .stdout
) or {}
// Still cleanup network resources in case they exist (thread-safe)
self.cleanup_network() or {
console.print_debug('Network cleanup failed (may not exist): ${err}')
self.factory.logger.log(
cat: 'container'
log: 'Network cleanup failed (may not exist): ${err}'
logtype: .stdout
) or {}
}
// Remove from factory's container cache only after all cleanup is done
if self.name in self.factory.containers {
self.factory.containers.delete(self.name)
}
console.print_debug('Container ${self.name} removed from cache')
self.factory.logger.log(
cat: 'container'
log: 'Container ${self.name} removed from cache'
logtype: .stdout
) or {}
return
}
@@ -222,7 +299,11 @@ pub fn (mut self Container) delete() ! {
// Delete the container from crun
crun_root := '${self.factory.base_dir}/runtime'
osal.exec(cmd: 'crun --root ${crun_root} delete ${self.name}', stdout: false) or {
console.print_debug('Failed to delete container from crun: ${err}')
self.factory.logger.log(
cat: 'container'
log: 'Failed to delete container from crun: ${err}'
logtype: .error
) or {}
}
// Remove from factory's container cache only after all cleanup is complete
@@ -230,7 +311,11 @@ pub fn (mut self Container) delete() ! {
self.factory.containers.delete(self.name)
}
console.print_green('Container ${self.name} deleted')
self.factory.logger.log(
cat: 'container'
log: 'Container ${self.name} deleted'
logtype: .stdout
) or {}
}
// Execute command inside the container
@@ -242,7 +327,11 @@ pub fn (mut self Container) exec(cmd_ osal.Command) !string {
// Use the builder node to execute inside container
mut node := self.node()!
console.print_debug('Executing command in container ${self.name}: ${cmd_.cmd}')
self.factory.logger.log(
cat: 'container'
log: 'Executing command in container ${self.name}: ${cmd_.cmd}'
logtype: .stdout
) or {}
// Execute and provide better error context
return node.exec(cmd: cmd_.cmd, stdout: cmd_.stdout) or {
@@ -284,7 +373,7 @@ pub fn (self Container) status() !ContainerStatus {
ContainerStatus.paused
}
else {
console.print_debug('Unknown container status: ${state.status}')
// Unknown status - return unknown (can't log here as function is immutable)
ContainerStatus.unknown
}
}

View File

@@ -1,6 +1,5 @@
module heropods
import incubaid.herolib.ui.console
import incubaid.herolib.osal.core as osal
import incubaid.herolib.virt.crun
import incubaid.herolib.installers.virt.herorunner as herorunner_installer
@@ -74,7 +73,11 @@ pub fn (mut self HeroPods) container_new(args ContainerNewArgs) !&Container {
// If image not yet extracted, pull and unpack it
if !os.is_dir(rootfs_path) && args.docker_url != '' {
console.print_debug('Pulling image ${args.docker_url} with podman...')
self.logger.log(
cat: 'images'
log: 'Pulling image ${args.docker_url} with podman...'
logtype: .stdout
) or {}
self.podman_pull_and_export(args.docker_url, image_name, rootfs_path)!
}
}
@@ -197,12 +200,20 @@ fn (self HeroPods) podman_pull_and_export(docker_url string, image_name string,
// 3. If host binary doesn't exist, compiles it first
// 4. Makes the binary executable
fn (mut self HeroPods) install_hero_in_rootfs(rootfs_path string) ! {
console.print_debug('Installing hero binary into container rootfs: ${rootfs_path}')
self.logger.log(
cat: 'container'
log: 'Installing hero binary into container rootfs: ${rootfs_path}'
logtype: .stdout
) or {}
// Check if hero binary already exists in rootfs
hero_bin_path := '${rootfs_path}/usr/local/bin/hero'
if os.exists(hero_bin_path) {
console.print_debug('Hero binary already exists in rootfs, skipping installation')
self.logger.log(
cat: 'container'
log: 'Hero binary already exists in rootfs, skipping installation'
logtype: .stdout
) or {}
return
}
@@ -216,8 +227,16 @@ fn (mut self HeroPods) install_hero_in_rootfs(rootfs_path string) ! {
// If hero binary doesn't exist on host, compile it
if !os.exists(host_hero_path) {
console.print_debug('Hero binary not found on host at ${host_hero_path}')
console.print_debug('Compiling hero binary using compile script...')
self.logger.log(
cat: 'container'
log: 'Hero binary not found on host at ${host_hero_path}'
logtype: .stdout
) or {}
self.logger.log(
cat: 'container'
log: 'Compiling hero binary using compile script...'
logtype: .stdout
) or {}
// Get herolib directory
herolib_dir := os.join_path(os.home_dir(), 'code/github/incubaid/herolib')
@@ -243,11 +262,19 @@ fn (mut self HeroPods) install_hero_in_rootfs(rootfs_path string) ! {
}
// Copy hero binary to container rootfs
console.print_debug('Copying hero binary from ${host_hero_path} to ${hero_bin_path}')
self.logger.log(
cat: 'container'
log: 'Copying hero binary from ${host_hero_path} to ${hero_bin_path}'
logtype: .stdout
) or {}
os.cp(host_hero_path, hero_bin_path)!
// Make it executable
os.chmod(hero_bin_path, 0o755)!
console.print_debug('Hero binary successfully installed in container rootfs')
self.logger.log(
cat: 'container'
log: 'Hero binary successfully installed in container rootfs'
logtype: .stdout
) or {}
}

View File

@@ -1,6 +1,5 @@
module heropods
import incubaid.herolib.ui.console
import incubaid.herolib.osal.core as osal
import incubaid.herolib.core.texttools
import os
@@ -102,7 +101,11 @@ pub fn (mut self HeroPods) image_new(args ContainerImageArgs) !&ContainerImage {
// 3. Exports the rootfs to the images directory
// 4. Cleans up the temporary container
fn (mut self ContainerImage) download_from_docker(docker_url string, reset bool) ! {
console.print_header('Downloading image: ${docker_url}')
self.factory.logger.log(
cat: 'images'
log: 'Downloading image: ${docker_url}'
logtype: .stdout
) or {}
// Clean image name for local storage
image_dir := '${self.factory.base_dir}/images/${self.image_name}'
@@ -116,7 +119,11 @@ fn (mut self ContainerImage) download_from_docker(docker_url string, reset bool)
osal.exec(cmd: 'mkdir -p ${image_dir}', stdout: false)!
// Pull image using podman
console.print_debug('Pulling image: ${docker_url}')
self.factory.logger.log(
cat: 'images'
log: 'Pulling image: ${docker_url}'
logtype: .stdout
) or {}
osal.exec(cmd: 'podman pull ${docker_url}', stdout: true)!
// Create container from image (without running it)
@@ -138,7 +145,11 @@ fn (mut self ContainerImage) download_from_docker(docker_url string, reset bool)
// Remove the pulled image from podman to save space (optional)
osal.exec(cmd: 'podman rmi ${docker_url}', stdout: false) or {}
console.print_green('Image ${docker_url} extracted to ${self.rootfs_path}')
self.factory.logger.log(
cat: 'images'
log: 'Image ${docker_url} extracted to ${self.rootfs_path}'
logtype: .stdout
) or {}
}
// Update image metadata (size, creation time, etc.)
@@ -186,7 +197,11 @@ pub fn (mut self HeroPods) images_list() ![]&ContainerImage {
factory: &self
}
image.update_metadata() or {
console.print_stderr('Failed to update metadata for image ${dir}: ${err}')
self.logger.log(
cat: 'images'
log: 'Failed to update metadata for image ${dir}: ${err}'
logtype: .error
) or {}
continue
}
self.images[dir] = image
@@ -207,7 +222,11 @@ pub fn (mut self ContainerImage) export(args ImageExportArgs) ! {
return error('Image rootfs not found: ${self.rootfs_path}')
}
console.print_header('Exporting image ${self.image_name} to ${args.dest_path}')
self.factory.logger.log(
cat: 'images'
log: 'Exporting image ${self.image_name} to ${args.dest_path}'
logtype: .stdout
) or {}
// Ensure destination directory exists
dest_dir := os.dir(args.dest_path)
@@ -217,7 +236,11 @@ pub fn (mut self ContainerImage) export(args ImageExportArgs) ! {
cmd := 'tar -czf ${args.dest_path} -C ${os.dir(self.rootfs_path)} ${os.base(self.rootfs_path)}'
osal.exec(cmd: cmd, stdout: true)!
console.print_green('Image exported successfully to ${args.dest_path}')
self.factory.logger.log(
cat: 'images'
log: 'Image exported successfully to ${args.dest_path}'
logtype: .stdout
) or {}
}
// Import image from .tgz file
@@ -233,7 +256,11 @@ pub fn (mut self HeroPods) image_import(args ImageImportArgs) !&ContainerImage {
image_name := filename.replace('.tgz', '').replace('.tar.gz', '')
image_name_clean := texttools.name_fix(image_name)
console.print_header('Importing image from ${args.source_path}')
self.logger.log(
cat: 'images'
log: 'Importing image from ${args.source_path}'
logtype: .stdout
) or {}
image_dir := '${self.base_dir}/images/${image_name_clean}'
rootfs_path := '${image_dir}/rootfs'
@@ -264,7 +291,11 @@ pub fn (mut self HeroPods) image_import(args ImageImportArgs) !&ContainerImage {
image.update_metadata()!
self.images[image_name_clean] = image
console.print_green('Image imported successfully: ${image_name_clean}')
self.logger.log(
cat: 'images'
log: 'Image imported successfully: ${image_name_clean}'
logtype: .stdout
) or {}
return image
}
@@ -272,7 +303,11 @@ pub fn (mut self HeroPods) image_import(args ImageImportArgs) !&ContainerImage {
//
// Removes the image directory and removes from factory cache
pub fn (mut self ContainerImage) delete() ! {
console.print_header('Deleting image: ${self.image_name}')
self.factory.logger.log(
cat: 'images'
log: 'Deleting image: ${self.image_name}'
logtype: .stdout
) or {}
image_dir := os.dir(self.rootfs_path)
if os.is_dir(image_dir) {
@@ -284,7 +319,11 @@ pub fn (mut self ContainerImage) delete() ! {
self.factory.images.delete(self.image_name)
}
console.print_green('Image ${self.image_name} deleted successfully')
self.factory.logger.log(
cat: 'images'
log: 'Image ${self.image_name} deleted successfully'
logtype: .stdout
) or {}
}
// Get image info as map

View File

@@ -2,8 +2,8 @@ module heropods
import incubaid.herolib.data.encoderhero
import incubaid.herolib.osal.core as osal
import incubaid.herolib.ui.console
import incubaid.herolib.virt.crun
import incubaid.herolib.core.logger
import os
import sync
@@ -30,6 +30,7 @@ pub mut:
name string // name of the heropods
network_config NetworkConfig @[skip; str: skip] // network configuration (automatically initialized, not serialized)
network_mutex sync.Mutex @[skip; str: skip] // protects network_config for thread-safe concurrent access
logger logger.Logger @[skip; str: skip] // logger instance for debugging (not serialized)
}
// your checking & initialization code if needed
@@ -44,15 +45,23 @@ fn obj_init(mycfg_ HeroPods) !HeroPods {
stdout: false
)!
// Note: Logger not yet initialized at this point, so we use eprintln for early warnings
if args.use_podman {
if !osal.cmd_exists('podman') {
console.print_stderr('Warning: podman not found. Install podman for better image management.')
console.print_debug('Install with: apt install podman (Ubuntu) or brew install podman (macOS)')
} else {
console.print_debug('Using podman for image management')
eprintln('Warning: podman not found. Install podman for better image management.')
eprintln('Install with: apt install podman (Ubuntu) or brew install podman (macOS)')
}
}
// Initialize logger for debugging (with console output for visibility)
mut heropods_logger := logger.new(
path: '${args.base_dir}/logs'
console_output: true
) or {
eprintln('Warning: Failed to create logger: ${err}')
logger.Logger{} // Use empty logger as fallback
}
// Initialize HeroPods instance with network configuration
// Note: network_mutex is automatically initialized to zero value (unlocked state)
mut heropods := HeroPods{
@@ -67,6 +76,7 @@ fn obj_init(mycfg_ HeroPods) !HeroPods {
network_config: NetworkConfig{
allocated_ips: map[string]string{}
}
logger: heropods_logger
}
// Clean up any leftover crun state if reset is requested
@@ -97,7 +107,11 @@ pub fn heroscript_loads(heroscript string) !HeroPods {
}
fn (mut self HeroPods) setup_default_images(reset bool) ! {
console.print_header('Setting up default images...')
self.logger.log(
cat: 'images'
log: 'Setting up default images...'
logtype: .stdout
) or {}
default_images := [ContainerImageType.alpine_3_20, .ubuntu_24_04, .ubuntu_25_04]
@@ -107,7 +121,11 @@ fn (mut self HeroPods) setup_default_images(reset bool) ! {
reset: reset
}
if img.str() !in self.images || reset {
console.print_debug('Preparing default image: ${img.str()}')
self.logger.log(
cat: 'images'
log: 'Preparing default image: ${img.str()}'
logtype: .stdout
) or {}
self.image_new(args)!
}
}
@@ -132,11 +150,19 @@ fn (mut self HeroPods) load_existing_images() ! {
factory: &self
}
image.update_metadata() or {
console.print_stderr(' Failed to update metadata for image ${dir}: ${err}')
self.logger.log(
cat: 'images'
log: 'Failed to update metadata for image ${dir}: ${err}'
logtype: .error
) or {}
continue
}
self.images[dir] = image
console.print_debug('Loaded existing image: ${dir}')
self.logger.log(
cat: 'images'
log: 'Loaded existing image: ${dir}'
logtype: .stdout
) or {}
}
}
}
@@ -181,7 +207,11 @@ pub fn (self HeroPods) list() ![]Container {
// Clean up any leftover crun state
fn (mut self HeroPods) cleanup_crun_state() ! {
console.print_debug('Cleaning up leftover crun state...')
self.logger.log(
cat: 'cleanup'
log: 'Cleaning up leftover crun state...'
logtype: .stdout
) or {}
crun_root := '${self.base_dir}/runtime'
// Stop and delete all containers in our custom root
@@ -189,7 +219,11 @@ fn (mut self HeroPods) cleanup_crun_state() ! {
for container_name in result.output.split_into_lines() {
if container_name.trim_space() != '' {
console.print_debug('Cleaning up container: ${container_name}')
self.logger.log(
cat: 'cleanup'
log: 'Cleaning up container: ${container_name}'
logtype: .stdout
) or {}
osal.exec(cmd: 'crun --root ${crun_root} kill ${container_name} SIGKILL', stdout: false) or {}
osal.exec(cmd: 'crun --root ${crun_root} delete ${container_name}', stdout: false) or {}
}
@@ -199,7 +233,11 @@ fn (mut self HeroPods) cleanup_crun_state() ! {
result2 := osal.exec(cmd: 'crun list -q', stdout: false) or { return }
for container_name in result2.output.split_into_lines() {
if container_name.trim_space() != '' && container_name in self.containers {
console.print_debug('Cleaning up container from default root: ${container_name}')
self.logger.log(
cat: 'cleanup'
log: 'Cleaning up container from default root: ${container_name}'
logtype: .stdout
) or {}
osal.exec(cmd: 'crun kill ${container_name} SIGKILL', stdout: false) or {}
osal.exec(cmd: 'crun delete ${container_name}', stdout: false) or {}
}

View File

@@ -1,7 +1,6 @@
module heropods
import incubaid.herolib.osal.core as osal
import incubaid.herolib.ui.console
import os
import crypto.sha256
@@ -40,12 +39,25 @@ mut:
// Initialize network configuration in HeroPods factory
fn (mut self HeroPods) network_init() ! {
console.print_debug('Initializing HeroPods network layer...')
self.logger.log(
cat: 'network'
log: 'START network_init() - Initializing HeroPods network layer'
) or {}
// Setup host bridge if it doesn't exist
self.logger.log(
cat: 'network'
log: 'Calling network_setup_bridge()...'
logtype: .stdout
) or {}
self.network_setup_bridge()!
console.print_debug('HeroPods network layer initialized')
self.logger.log(
cat: 'network'
log: 'END network_init() - HeroPods network layer initialized successfully'
logtype: .stdout
) or {}
}
// Setup the host bridge network (one-time setup, idempotent)
@@ -54,83 +66,213 @@ fn (mut self HeroPods) network_setup_bridge() ! {
gateway_ip := '${self.network_config.gateway_ip}/${self.network_config.subnet.split('/')[1]}'
subnet := self.network_config.subnet
// Check if bridge already exists
result := osal.exec(
cmd: 'ip link show ${bridge_name}'
stdout: false
raise_error: false
) or {
osal.Job{
exit_code: 1
}
}
self.logger.log(
cat: 'network'
log: 'START network_setup_bridge() - bridge=${bridge_name}, gateway=${gateway_ip}, subnet=${subnet}'
logtype: .stdout
) or {}
if result.exit_code == 0 {
console.print_debug('Bridge ${bridge_name} already exists')
// Check if bridge already exists using os.execute (more reliable than osal.exec)
self.logger.log(
cat: 'network'
log: 'Checking if bridge ${bridge_name} exists (running: ip link show ${bridge_name})...'
logtype: .stdout
) or {}
check_result := os.execute('ip link show ${bridge_name} 2>/dev/null')
self.logger.log(
cat: 'network'
log: 'Bridge check result: exit_code=${check_result.exit_code}'
logtype: .stdout
) or {}
if check_result.exit_code == 0 {
self.logger.log(
cat: 'network'
log: 'Bridge ${bridge_name} already exists - skipping creation'
logtype: .stdout
) or {}
return
}
console.print_debug('Creating bridge ${bridge_name}...')
self.logger.log(
cat: 'network'
log: 'Bridge ${bridge_name} does not exist - creating new bridge'
logtype: .stdout
) or {}
// Create bridge
self.logger.log(
cat: 'network'
log: 'Step 1: Creating bridge (running: ip link add name ${bridge_name} type bridge)...'
logtype: .stdout
) or {}
osal.exec(
cmd: 'ip link add name ${bridge_name} type bridge'
stdout: false
)!
self.logger.log(
cat: 'network'
log: 'Step 1: Bridge created successfully'
logtype: .stdout
) or {}
// Assign IP to bridge
self.logger.log(
cat: 'network'
log: 'Step 2: Assigning IP to bridge (running: ip addr add ${gateway_ip} dev ${bridge_name})...'
logtype: .stdout
) or {}
osal.exec(
cmd: 'ip addr add ${gateway_ip} dev ${bridge_name}'
stdout: false
)!
self.logger.log(
cat: 'network'
log: 'Step 2: IP assigned successfully'
logtype: .stdout
) or {}
// Bring bridge up
self.logger.log(
cat: 'network'
log: 'Step 3: Bringing bridge up (running: ip link set ${bridge_name} up)...'
logtype: .stdout
) or {}
osal.exec(
cmd: 'ip link set ${bridge_name} up'
stdout: false
)!
// Enable IP forwarding (with error resilience)
osal.exec(
cmd: 'sysctl -w net.ipv4.ip_forward=1'
stdout: false
) or {
console.print_stderr('Warning: Failed to enable IPv4 forwarding. Containers may not have internet access.')
console.print_debug('You may need to run: sudo sysctl -w net.ipv4.ip_forward=1')
self.logger.log(
cat: 'network'
log: 'Step 3: Bridge brought up successfully'
logtype: .stdout
) or {}
// Enable IP forwarding
self.logger.log(
cat: 'network'
log: 'Step 4: Enabling IP forwarding (running: sysctl -w net.ipv4.ip_forward=1)...'
logtype: .stdout
) or {}
forward_result := os.execute('sysctl -w net.ipv4.ip_forward=1 2>/dev/null')
if forward_result.exit_code != 0 {
self.logger.log(
cat: 'network'
log: 'Step 4: WARNING - Failed to enable IPv4 forwarding (exit_code=${forward_result.exit_code})'
logtype: .error
) or {}
} else {
self.logger.log(
cat: 'network'
log: 'Step 4: IP forwarding enabled successfully'
logtype: .stdout
) or {}
}
// Get primary network interface for NAT
self.logger.log(
cat: 'network'
log: 'Step 5: Detecting primary network interface...'
logtype: .stdout
) or {}
primary_iface := self.network_get_primary_interface() or {
console.print_stderr('Warning: Could not detect primary network interface. NAT may not work.')
self.logger.log(
cat: 'network'
log: 'Step 5: WARNING - Could not detect primary interface: ${err}, using fallback eth0'
logtype: .error
) or {}
'eth0' // fallback
}
// Setup NAT for outbound traffic (with error resilience)
console.print_debug('Setting up NAT rules for ${primary_iface}...')
osal.exec(
cmd: 'iptables -t nat -C POSTROUTING -s ${subnet} -o ${primary_iface} -j MASQUERADE 2>/dev/null || iptables -t nat -A POSTROUTING -s ${subnet} -o ${primary_iface} -j MASQUERADE'
stdout: false
) or {
console.print_stderr('Warning: Failed to setup NAT rules. Containers may not have internet access.')
console.print_debug('You may need to run: sudo iptables -t nat -A POSTROUTING -s ${subnet} -o ${primary_iface} -j MASQUERADE')
self.logger.log(
cat: 'network'
log: 'Step 5: Primary interface detected: ${primary_iface}'
logtype: .stdout
) or {}
// Setup NAT for outbound traffic
self.logger.log(
cat: 'network'
log: 'Step 6: Setting up NAT rules for ${primary_iface} (running iptables command)...'
logtype: .stdout
) or {}
nat_result := os.execute('iptables -t nat -C POSTROUTING -s ${subnet} -o ${primary_iface} -j MASQUERADE 2>/dev/null || iptables -t nat -A POSTROUTING -s ${subnet} -o ${primary_iface} -j MASQUERADE')
if nat_result.exit_code != 0 {
self.logger.log(
cat: 'network'
log: 'Step 6: WARNING - Failed to setup NAT rules (exit_code=${nat_result.exit_code})'
logtype: .error
) or {}
} else {
self.logger.log(
cat: 'network'
log: 'Step 6: NAT rules configured successfully'
logtype: .stdout
) or {}
}
console.print_green('Bridge ${bridge_name} created and configured')
self.logger.log(
cat: 'network'
log: 'END network_setup_bridge() - Bridge ${bridge_name} created and configured successfully'
logtype: .stdout
) or {}
}
// Get the primary network interface for NAT
fn (self HeroPods) network_get_primary_interface() !string {
fn (mut self HeroPods) network_get_primary_interface() !string {
self.logger.log(
cat: 'network'
log: 'START network_get_primary_interface() - Detecting primary interface'
logtype: .stdout
) or {}
// Try to get the default route interface
cmd := "ip route | grep default | awk '{print \$5}' | head -n1"
self.logger.log(
cat: 'network'
log: 'Running command: ${cmd}'
logtype: .stdout
) or {}
result := osal.exec(
cmd: "ip route | grep default | awk '{print \$5}' | head -n1"
cmd: cmd
stdout: false
)!
self.logger.log(
cat: 'network'
log: 'Command completed, output: "${result.output.trim_space()}"'
logtype: .stdout
) or {}
iface := result.output.trim_space()
if iface == '' {
self.logger.log(
cat: 'network'
log: 'ERROR: Could not determine primary network interface (empty output)'
logtype: .error
) or {}
return error('Could not determine primary network interface')
}
self.logger.log(
cat: 'network'
log: 'END network_get_primary_interface() - Detected interface: ${iface}'
logtype: .stdout
) or {}
return iface
}
@@ -146,14 +288,49 @@ fn (self HeroPods) network_get_primary_interface() !string {
// Multiple concurrent container starts will be serialized at the IP allocation step,
// preventing race conditions where two containers could receive the same IP.
fn (mut self HeroPods) network_allocate_ip(container_name string) !string {
self.logger.log(
cat: 'network'
log: 'START network_allocate_ip() for container: ${container_name}'
logtype: .stdout
) or {}
self.logger.log(
cat: 'network'
log: 'Acquiring network_mutex lock...'
logtype: .stdout
) or {}
self.network_mutex.@lock()
self.logger.log(
cat: 'network'
log: 'network_mutex lock acquired'
logtype: .stdout
) or {}
defer {
self.logger.log(
cat: 'network'
log: 'Releasing network_mutex lock...'
logtype: .stdout
) or {}
self.network_mutex.unlock()
self.logger.log(
cat: 'network'
log: 'network_mutex lock released'
logtype: .stdout
) or {}
}
// Check if already allocated
if container_name in self.network_config.allocated_ips {
return self.network_config.allocated_ips[container_name]
existing_ip := self.network_config.allocated_ips[container_name]
self.logger.log(
cat: 'network'
log: 'Container ${container_name} already has IP: ${existing_ip}'
logtype: .stdout
) or {}
return existing_ip
}
// Extract base IP from subnet (e.g., "10.10.0.0/24" -> "10.10.0")
@@ -167,7 +344,11 @@ fn (mut self HeroPods) network_allocate_ip(container_name string) !string {
// Reuse a freed IP from the pool (LIFO - pop from end)
ip_offset = self.network_config.freed_ip_pool.last()
self.network_config.freed_ip_pool.delete_last()
console.print_debug('Reusing IP offset ${ip_offset} from freed pool (pool size: ${self.network_config.freed_ip_pool.len})')
self.logger.log(
cat: 'network'
log: 'Reusing IP offset ${ip_offset} from freed pool (pool size: ${self.network_config.freed_ip_pool.len})'
logtype: .stdout
) or {}
} else {
// No freed IPs available, allocate a new one
// This increment is atomic within the mutex lock
@@ -179,21 +360,27 @@ fn (mut self HeroPods) network_allocate_ip(container_name string) !string {
return error('IP address pool exhausted: subnet ${self.network_config.subnet} has no more available IPs. Consider using a larger subnet or multiple bridges.')
}
console.print_debug('Allocated new IP offset ${ip_offset} (next: ${self.network_config.next_ip_offset})')
self.logger.log(
cat: 'network'
log: 'Allocated new IP offset ${ip_offset} (next: ${self.network_config.next_ip_offset})'
logtype: .stdout
) or {}
}
// Build the full IP address
ip := '${base_ip}.${ip_offset}'
self.network_config.allocated_ips[container_name] = ip
console.print_debug('Allocated IP ${ip} to container ${container_name}')
self.logger.log(
cat: 'network'
log: 'Allocated IP ${ip} to container ${container_name}'
logtype: .stdout
) or {}
return ip
}
// Setup network for a container (creates veth pair, assigns IP, configures routing)
fn (mut self HeroPods) network_setup_container(container_name string, container_pid int) ! {
console.print_debug('Setting up network for container ${container_name} (PID: ${container_pid})...')
// Allocate IP address (thread-safe)
container_ip := self.network_allocate_ip(container_name)!
@@ -212,7 +399,7 @@ fn (mut self HeroPods) network_setup_container(container_name string, container_
osal.exec(cmd: 'ip link delete ${veth_bridge_short} 2>/dev/null', stdout: false) or {}
// Create veth pair
console.print_debug('Creating veth pair: ${veth_container_short} <-> ${veth_bridge_short}')
osal.exec(
cmd: 'ip link add ${veth_container_short} type veth peer name ${veth_bridge_short}'
stdout: false
@@ -230,14 +417,13 @@ fn (mut self HeroPods) network_setup_container(container_name string, container_
)!
// Move container end into container's network namespace
console.print_debug('Moving ${veth_container_short} into container namespace (PID: ${container_pid})')
osal.exec(
cmd: 'ip link set ${veth_container_short} netns ${container_pid}'
stdout: false
)!
// Configure network inside container
console.print_debug('Configuring network inside container: ${container_ip}/${subnet_mask}')
// Rename veth to eth0 inside container for consistency
osal.exec(
@@ -262,14 +448,10 @@ fn (mut self HeroPods) network_setup_container(container_name string, container_
cmd: 'nsenter -t ${container_pid} -n ip route add default via ${gateway_ip}'
stdout: false
)!
console.print_green('Network configured for container ${container_name}: ${container_ip}')
}
// Configure DNS inside container by writing resolv.conf
fn (self HeroPods) network_configure_dns(container_name string, rootfs_path string) ! {
console.print_debug('Configuring DNS for container ${container_name}...')
resolv_conf_path := '${rootfs_path}/etc/resolv.conf'
// Ensure /etc directory exists
@@ -288,7 +470,6 @@ fn (self HeroPods) network_configure_dns(container_name string, rootfs_path stri
os.write_file(resolv_conf_path, dns_content)!
dns_servers_str := self.network_config.dns_servers.join(', ')
console.print_debug('DNS configured: ${dns_servers_str}')
}
// Cleanup network for a container (removes veth pair and deallocates IP)
@@ -297,8 +478,6 @@ fn (self HeroPods) network_configure_dns(container_name string, rootfs_path stri
// IP deallocation is protected by network_mutex to prevent race conditions
// when multiple containers are being deleted concurrently.
fn (mut self HeroPods) network_cleanup_container(container_name string) ! {
console.print_debug('Cleaning up network for container ${container_name}...')
// Remove veth interfaces (they should be auto-removed when container stops, but cleanup anyway)
// Use same hash logic as setup to ensure we delete the correct interface
short_hash := sha256.hexhash(container_name)[..6]
@@ -307,7 +486,7 @@ fn (mut self HeroPods) network_cleanup_container(container_name string) ! {
osal.exec(
cmd: 'ip link delete ${veth_bridge_short} 2>/dev/null'
stdout: false
) or { console.print_debug('veth interface ${veth_bridge_short} already removed') }
) or {}
// Deallocate IP address and return it to the freed pool for reuse (thread-safe)
self.network_mutex.@lock()
@@ -326,13 +505,11 @@ fn (mut self HeroPods) network_cleanup_container(container_name string) ! {
// Add to freed pool for reuse (avoid duplicates)
if ip_offset !in self.network_config.freed_ip_pool {
self.network_config.freed_ip_pool << ip_offset
console.print_debug('Returned IP offset ${ip_offset} to freed pool (pool size: ${self.network_config.freed_ip_pool.len})')
}
}
// Remove from allocated IPs
self.network_config.allocated_ips.delete(container_name)
console.print_debug('Deallocated IP ${ip} from container ${container_name}')
}
}
@@ -346,8 +523,6 @@ fn (mut self HeroPods) network_cleanup_container(container_name string) ! {
// Uses separate lock/unlock calls for read and write operations to minimize
// lock contention. The container cleanup loop runs without holding the lock.
fn (mut self HeroPods) network_cleanup_all(full bool) ! {
console.print_debug('Cleaning up all HeroPods network resources (full=${full})...')
// Get list of containers to cleanup (thread-safe read)
self.network_mutex.@lock()
container_names := self.network_config.allocated_ips.keys()
@@ -356,7 +531,6 @@ fn (mut self HeroPods) network_cleanup_all(full bool) ! {
// Remove all veth interfaces (no lock needed - operates on local copy)
for container_name in container_names {
self.network_cleanup_container(container_name) or {
console.print_debug('Failed to cleanup network for ${container_name}: ${err}')
}
}
@@ -367,18 +541,13 @@ fn (mut self HeroPods) network_cleanup_all(full bool) ! {
self.network_config.next_ip_offset = 10
self.network_mutex.unlock()
console.print_debug('Cleared IP allocations and freed pool')
// Optionally remove the bridge for full cleanup
if full {
bridge_name := self.network_config.bridge_name
console.print_debug('Removing bridge ${bridge_name}...')
osal.exec(
cmd: 'ip link delete ${bridge_name}'
stdout: false
) or { console.print_debug('Bridge ${bridge_name} already removed or does not exist') }
) or {}
}
console.print_debug('Network cleanup complete')
}