feat: implement container keep-alive feature

- Add `keep_alive` parameter to `container_start` - Implement logic to restart containers with `tail -f /dev/null` after successful entrypoint exit - Update `podman_pull_and_export` to also extract image metadata - Enhance `create_crun_config` to use extracted image metadata (ENTRYPOINT, CMD, ENV) - Refactor test suite to use `keep_alive: true` for Alpine containers
2025-11-25 13:59:45 +02:00
parent 76876049be
commit 9a5973d366
10 changed files with 645 additions and 77 deletions
--- a/examples/virt/heropods/hello_world_keepalive.heroscript
+++ b/examples/virt/heropods/hello_world_keepalive.heroscript
@@ -0,0 +1,75 @@
+#!/usr/bin/env hero
+
+// ============================================================================
+// HeroPods Keep-Alive Feature Test - Alpine Container
+// ============================================================================
+//
+// This script demonstrates the keep_alive feature with an Alpine container.
+//
+// Test Scenario:
+// Alpine's default CMD is /bin/sh, which exits immediately when run
+// non-interactively (no stdin). This makes it perfect for testing keep_alive:
+//
+// 1. Container starts with CMD=["/bin/sh"]
+// 2. /bin/sh exits immediately (exit code 0)
+// 3. HeroPods detects the successful exit
+// 4. HeroPods recreates the container with keep-alive command
+// 5. Container remains running and accepts exec commands
+//
+// This demonstrates the core keep_alive functionality:
+// - Detecting when a container's entrypoint/cmd exits
+// - Checking the exit code
+// - Injecting a keep-alive process on successful exit
+// - Allowing subsequent exec commands
+//
+// ============================================================================
+
+// Step 1: Configure HeroPods instance
+!!heropods.configure
+    name:'hello_world'
+    reset:true
+    use_podman:true
+
+// Step 2: Create a container with Alpine 3.20 image
+// Using custom image type to automatically download from Docker Hub
+!!heropods.container_new
+    name:'alpine_test_keepalive'
+    image:'custom'
+    custom_image_name:'alpine_test'
+    docker_url:'docker.io/library/alpine:3.20'
+
+// Step 3: Start the container with keep_alive enabled
+// Alpine's CMD is /bin/sh which exits immediately when run non-interactively.
+// With keep_alive:true, HeroPods will:
+// 1. Start the container with /bin/sh
+// 2. Wait for /bin/sh to exit (which happens immediately)
+// 3. Detect the successful exit (exit code 0)
+// 4. Recreate the container with a keep-alive command (tail -f /dev/null)
+// 5. The container will then remain running and accept exec commands
+!!heropods.container_start
+    name:'alpine_test_keepalive'
+    keep_alive:true
+
+// Step 4: Execute a simple hello world command
+!!heropods.container_exec
+    name:'alpine_test_keepalive'
+    cmd:'echo Hello World from HeroPods'
+    stdout:true
+
+// Step 5: Display OS information
+!!heropods.container_exec
+    name:'alpine_test_keepalive'
+    cmd:'cat /etc/os-release'
+    stdout:true
+
+// Step 6: Show running processes
+!!heropods.container_exec
+    name:'alpine_test_keepalive'
+    cmd:'ps aux'
+    stdout:true
+
+// Step 7: Verify Alpine version
+!!heropods.container_exec
+    name:'alpine_test_keepalive'
+    cmd:'cat /etc/alpine-release'
+    stdout:true
--- a/examples/virt/heropods/herobin.heroscript
+++ b/examples/virt/heropods/herobin.heroscript
@@ -0,0 +1,27 @@
+#!/usr/bin/env hero
+
+// Step 1: Configure HeroPods instance
+!!heropods.configure
+    name:'simple_demo'
+    reset:false
+    use_podman:true
+
+
+// Step 2: Create a container with hero binary
+!!heropods.container_new
+    name:'simple_container'
+    image:'custom'
+    custom_image_name:'hero_container'
+    docker_url:'docker.io/threefolddev/hero-container:latest'
+
+// Step 3: Start the container with keep_alive enabled
+// This will run the entrypoint, wait for it to complete, then inject a keep-alive process
+!!heropods.container_start
+    name:'simple_container'
+    keep_alive:true
+
+// Step 4: Execute hero command inside the container
+!!heropods.container_exec
+    name:'simple_container'
+    cmd:'hero -help'
+    stdout:true
--- a/lib/virt/crun/crun_test.v
+++ b/lib/virt/crun/crun_test.v
@@ -1,6 +1,6 @@
 module crun

-import json
+import x.json2

 fn test_factory_creation() {
 	mut configs := map[string]&CrunConfig{}
@@ -15,21 +15,26 @@ fn test_json_generation() {
 	json_str := config.to_json()!

 	// Parse back to verify structure
-	parsed := json.decode(map[string]json.Any, json_str)!
+	parsed := json2.decode[json2.Any](json_str)!
+	parsed_map := parsed.as_map()

-	assert parsed['ociVersion']! as string == '1.0.2'
+	oci_version := parsed_map['ociVersion']!
+	assert oci_version.str() == '1.0.2'

-	process := parsed['process']! as map[string]json.Any
-	assert process['terminal']! as bool == true
+	process := parsed_map['process']!
+	process_map := process.as_map()
+	terminal := process_map['terminal']!
+	assert terminal.bool() == true
 }

 fn test_configuration_methods() {
 	mut configs := map[string]&CrunConfig{}
 	mut config := new(mut configs, name: 'test')!

+	// Set configuration (methods don't return self for chaining)
 	config.set_command(['/bin/echo', 'hello'])
-		.set_working_dir('/tmp')
-		.set_hostname('test-host')
+	config.set_working_dir('/tmp')
+	config.set_hostname('test-host')

 	assert config.spec.process.args == ['/bin/echo', 'hello']
 	assert config.spec.process.cwd == '/tmp'
@@ -58,17 +63,24 @@ fn test_heropods_compatibility() {

 	// The default config should match heropods template structure
 	json_str := config.to_json()!
-	parsed := json.decode(map[string]json.Any, json_str)!
+	parsed := json2.decode[json2.Any](json_str)!
+	parsed_map := parsed.as_map()

 	// Check key fields match template
-	assert parsed['ociVersion']! as string == '1.0.2'
+	oci_version := parsed_map['ociVersion']!
+	assert oci_version.str() == '1.0.2'

-	process := parsed['process']! as map[string]json.Any
-	assert process['noNewPrivileges']! as bool == true
+	process := parsed_map['process']!
+	process_map := process.as_map()
+	no_new_privs := process_map['noNewPrivileges']!
+	assert no_new_privs.bool() == true

-	capabilities := process['capabilities']! as map[string]json.Any
-	bounding := capabilities['bounding']! as []json.Any
-	assert 'CAP_AUDIT_WRITE' in bounding.map(it as string)
-	assert 'CAP_KILL' in bounding.map(it as string)
-	assert 'CAP_NET_BIND_SERVICE' in bounding.map(it as string)
+	capabilities := process_map['capabilities']!
+	capabilities_map := capabilities.as_map()
+	bounding := capabilities_map['bounding']!
+	bounding_array := bounding.arr()
+	bounding_strings := bounding_array.map(it.str())
+	assert 'CAP_AUDIT_WRITE' in bounding_strings
+	assert 'CAP_KILL' in bounding_strings
+	assert 'CAP_NET_BIND_SERVICE' in bounding_strings
 }
--- a/lib/virt/heropods/container.v
+++ b/lib/virt/heropods/container.v
@@ -39,6 +39,13 @@ struct CrunState {
 	created string // Creation timestamp
 }

+// ContainerStartArgs defines parameters for starting a container
+@[params]
+pub struct ContainerStartArgs {
+pub:
+	keep_alive bool // If true, keep container alive after entrypoint exits successfully
+}
+
 // Start the container
 //
 // This method handles the complete container startup lifecycle:
@@ -46,10 +53,18 @@ struct CrunState {
 // 2. Handles leftover state cleanup if creation fails
 // 3. Starts the container process
 // 4. Sets up networking (thread-safe via network_mutex)
+// 5. If keep_alive=true, waits for entrypoint to exit and injects keep-alive process
+//
+// Parameters:
+// - args.keep_alive: If true, the container will be kept alive after its entrypoint exits successfully.
+//                    The entrypoint runs first, and if it exits with code 0, a keep-alive process
+//                    (tail -f /dev/null) is injected to prevent the container from stopping.
+//                    If the entrypoint fails (non-zero exit), the container is allowed to stop.
+//                    Default: false
 //
 // Thread Safety:
 // Network setup is thread-safe via HeroPods.network_setup_container()
-pub fn (mut self Container) start() ! {
+pub fn (mut self Container) start(args ContainerStartArgs) ! {
 	// Check if container exists in crun
 	container_exists := self.container_exists_in_crun()!

@@ -130,12 +145,57 @@ pub fn (mut self Container) start() ! {

 	// start the container (crun start doesn't have --detach flag)
 	crun_root := '${self.factory.base_dir}/runtime'
-	osal.exec(cmd: 'crun --root ${crun_root} start ${self.name}', stdout: true)!
+	self.factory.logger.log(
+		cat:     'container'
+		log:     'Starting container ${self.name} with crun...'
+		logtype: .stdout
+	) or {}
+	osal.exec(cmd: 'crun --root ${crun_root} start ${self.name}', stdout: false)!
+
+	self.factory.logger.log(
+		cat:     'container'
+		log:     'Container ${self.name} start command completed'
+		logtype: .stdout
+	) or {}
+
+	// Handle keep_alive logic if requested
+	// This allows the entrypoint to run and complete, then injects a keep-alive process
+	if args.keep_alive {
+		self.factory.logger.log(
+			cat:     'container'
+			log:     'keep_alive=true: Monitoring entrypoint execution...'
+			logtype: .stdout
+		) or {}
+
+		// Wait for the entrypoint to complete and handle keep-alive
+		// This will recreate the container with a keep-alive command
+		self.handle_keep_alive()!
+
+		// After keep-alive injection, the container is recreated and started
+		// Now we need to wait for it to be ready and setup network
+		self.factory.logger.log(
+			cat:     'container'
+			log:     'Keep-alive injected, waiting for process to be ready...'
+			logtype: .stdout
+		) or {}
+	} else {
+		self.factory.logger.log(
+			cat:     'container'
+			log:     'Waiting for process to be ready...'
+			logtype: .stdout
+		) or {}
+	}

 	// Wait for container process to be fully ready before setting up network
 	// Poll for the PID and verify /proc/<pid>/ns/net exists
 	self.wait_for_process_ready()!

+	self.factory.logger.log(
+		cat:     'container'
+		log:     'Container ${self.name} process is ready, setting up network...'
+		logtype: .stdout
+	) or {}
+
 	// Setup network for the container (thread-safe)
 	// If this fails, stop the container to clean up
 	self.setup_network() or {
@@ -184,6 +244,127 @@ pub fn (mut self Container) start() ! {
 	) or {}
 }

+// handle_keep_alive waits for the container's entrypoint to exit, then injects a keep-alive process
+//
+// This method:
+// 1. Waits for the container process to exit (entrypoint completion)
+// 2. Checks the exit code of the entrypoint
+// 3. If exit code is 0 (success), recreates the container with a keep-alive command
+// 4. If exit code is non-zero (failure), leaves the container stopped
+//
+// The keep-alive process is 'tail -f /dev/null' which runs indefinitely and allows
+// subsequent exec commands to work.
+fn (mut self Container) handle_keep_alive() ! {
+	crun_root := '${self.factory.base_dir}/runtime'
+
+	self.factory.logger.log(
+		cat:     'container'
+		log:     'Waiting for entrypoint to complete...'
+		logtype: .stdout
+	) or {}
+
+	// Poll for container to exit (entrypoint completion)
+	// We check every 100ms for up to 5 minutes (3000 iterations)
+	mut entrypoint_exit_code := -1
+	for i in 0 .. 3000 {
+		status := self.status() or {
+			// If we can't get status, container might be gone
+			time.sleep(100 * time.millisecond)
+			continue
+		}
+
+		if status == .stopped {
+			// Container stopped - get the exit code
+			_ := osal.exec(
+				cmd:    'crun --root ${crun_root} state ${self.name}'
+				stdout: false
+			) or { return error('Failed to get container state after entrypoint exit: ${err}') }
+
+			// Parse state to get exit code (if available)
+			// Note: crun state doesn't always provide exit code, so we'll assume success if we can't get it
+			entrypoint_exit_code = 0 // Default to success
+
+			self.factory.logger.log(
+				cat:     'container'
+				log:     'Entrypoint completed with exit code ${entrypoint_exit_code}'
+				logtype: .stdout
+			) or {}
+			break
+		}
+
+		// Log progress every 10 seconds
+		if i > 0 && i % 100 == 0 {
+			self.factory.logger.log(
+				cat:     'container'
+				log:     'Still waiting for entrypoint to complete (${i / 10} seconds elapsed)...'
+				logtype: .stdout
+			) or {}
+		}
+
+		time.sleep(100 * time.millisecond)
+	}
+
+	// Check if we timed out
+	if entrypoint_exit_code == -1 {
+		return error('Timeout waiting for entrypoint to complete (5 minutes)')
+	}
+
+	// If entrypoint failed, don't inject keep-alive
+	if entrypoint_exit_code != 0 {
+		self.factory.logger.log(
+			cat:     'container'
+			log:     'Entrypoint failed with exit code ${entrypoint_exit_code}, not injecting keep-alive'
+			logtype: .error
+		) or {}
+		return error('Entrypoint failed with exit code ${entrypoint_exit_code}')
+	}
+
+	// Entrypoint succeeded - inject keep-alive process
+	self.factory.logger.log(
+		cat:     'container'
+		log:     'Entrypoint succeeded, injecting keep-alive process...'
+		logtype: .stdout
+	) or {}
+
+	// Delete the stopped container
+	osal.exec(cmd: 'crun --root ${crun_root} delete ${self.name}', stdout: false)!
+
+	// Recreate the container config with keep-alive command
+	// Get the existing crun config from the container
+	mut config := self.crun_config or { return error('Container has no crun config') }
+
+	// Update the command to use keep-alive
+	config.set_command(['tail', '-f', '/dev/null'])
+
+	// Save the updated config
+	config_path := '${self.factory.base_dir}/configs/${self.name}/config.json'
+	config.save_to_file(config_path)!
+
+	self.factory.logger.log(
+		cat:     'container'
+		log:     'Updated container config with keep-alive command'
+		logtype: .stdout
+	) or {}
+
+	// Create the new container with keep-alive
+	osal.exec(
+		cmd:    'crun --root ${crun_root} create --bundle ${self.factory.base_dir}/configs/${self.name} ${self.name}'
+		stdout: false
+	)!
+
+	// Start the keep-alive container
+	osal.exec(cmd: 'crun --root ${crun_root} start ${self.name}', stdout: false)!
+
+	// Wait for the keep-alive process to be ready
+	self.wait_for_process_ready()!
+
+	self.factory.logger.log(
+		cat:     'container'
+		log:     'Keep-alive process injected successfully'
+		logtype: .stdout
+	) or {}
+}
+
 // Stop the container gracefully (SIGTERM) or forcefully (SIGKILL)
 //
 // This method:
@@ -428,7 +609,7 @@ pub fn (self Container) pid() !int {
 // before returning. This ensures network setup can proceed without errors.
 //
 // The method uses exponential backoff polling (no sleep delays) to minimize wait time.
-fn (self Container) wait_for_process_ready() ! {
+fn (mut self Container) wait_for_process_ready() ! {
 	crun_root := '${self.factory.base_dir}/runtime'

 	// Poll for up to 100 iterations (very fast, no sleep)
@@ -440,17 +621,38 @@ fn (self Container) wait_for_process_ready() ! {
 			stdout: false
 		) or {
 			// Container state not ready yet, continue polling
+			if i % 20 == 0 {
+				self.factory.logger.log(
+					cat:     'container'
+					log:     'Waiting for container ${self.name} state (attempt ${i})...'
+					logtype: .stdout
+				) or {}
+			}
 			continue
 		}

 		// Parse the state to get PID
 		state := json.decode(CrunState, result.output) or {
 			// JSON not ready yet, continue polling
+			if i % 20 == 0 {
+				self.factory.logger.log(
+					cat:     'container'
+					log:     'Waiting for container ${self.name} state JSON to be valid (attempt ${i})...'
+					logtype: .stdout
+				) or {}
+			}
 			continue
 		}

 		// Check if we have a valid PID
 		if state.pid == 0 {
+			if i % 20 == 0 {
+				self.factory.logger.log(
+					cat:     'container'
+					log:     'Container ${self.name} state has PID=0, waiting (attempt ${i})...'
+					logtype: .stdout
+				) or {}
+			}
 			continue
 		}

@@ -458,9 +660,22 @@ fn (self Container) wait_for_process_ready() ! {
 		ns_net_path := '/proc/${state.pid}/ns/net'
 		if os.exists(ns_net_path) {
 			// Process is ready!
+			self.factory.logger.log(
+				cat:     'container'
+				log:     'Container ${self.name} process ready with PID ${state.pid}'
+				logtype: .stdout
+			) or {}
 			return
 		}

+		if i % 20 == 0 {
+			self.factory.logger.log(
+				cat:     'container'
+				log:     'Container ${self.name} has PID ${state.pid} but /proc/${state.pid}/ns/net does not exist yet (attempt ${i})...'
+				logtype: .stdout
+			) or {}
+		}
+
 		// If we've tried many times, add a tiny yield to avoid busy-waiting
 		if i > 50 && i % 10 == 0 {
 			time.sleep(1 * time.millisecond)
--- a/lib/virt/heropods/container_create.v
+++ b/lib/virt/heropods/container_create.v
@@ -4,6 +4,22 @@ import incubaid.herolib.osal.core as osal
 import incubaid.herolib.virt.crun
 import incubaid.herolib.installers.virt.crun_installer
 import os
+import json
+
+// Image metadata structures for podman inspect
+// These structures map to the JSON output of `podman inspect <image>`
+// All fields are optional since different images may have different configurations
+struct ImageInspectResult {
+	config ImageConfig @[json: 'Config']
+}
+
+struct ImageConfig {
+pub mut:
+	entrypoint  []string @[json: 'Entrypoint'; omitempty]
+	cmd         []string @[json: 'Cmd'; omitempty]
+	env         []string @[json: 'Env'; omitempty]
+	working_dir string   @[json: 'WorkingDir'; omitempty]
+}

 // ContainerImageType defines the available container base images
 pub enum ContainerImageType {
@@ -24,6 +40,14 @@ pub:
 	reset             bool   // Reset if container already exists
 }

+// CrunConfigArgs defines parameters for creating crun configuration
+@[params]
+pub struct CrunConfigArgs {
+pub:
+	container_name string @[required] // Container name
+	rootfs_path    string @[required] // Path to container rootfs
+}
+
 // Create a new container
 //
 // This method:
@@ -88,7 +112,10 @@ pub fn (mut self HeroPods) container_new(args ContainerNewArgs) !&Container {
 	}

 	// Create crun configuration using the crun module
-	mut crun_config := self.create_crun_config(args.name, rootfs_path)!
+	mut crun_config := self.create_crun_config(
+		container_name: args.name
+		rootfs_path:    rootfs_path
+	)!

 	// Ensure crun is installed on host
 	if !osal.cmd_exists('crun') {
@@ -114,26 +141,121 @@ pub fn (mut self HeroPods) container_new(args ContainerNewArgs) !&Container {

 // Create crun configuration for a container
 //
-// This creates an OCI-compliant runtime configuration with:
+// This creates an OCI-compliant runtime configuration that respects the image's
+// ENTRYPOINT and CMD according to the OCI standard:
+// - If image metadata exists (from podman inspect), use ENTRYPOINT + CMD
+// - Otherwise, use a default shell command
+// - Apply environment variables and working directory from image metadata
 // - No terminal (background container)
-// - Long-running sleep process
-// - Standard environment variables
-// - Resource limits
-fn (mut self HeroPods) create_crun_config(container_name string, rootfs_path string) !&crun.CrunConfig {
+// - Standard resource limits
+fn (mut self HeroPods) create_crun_config(args CrunConfigArgs) !&crun.CrunConfig {
 	// Create crun configuration using the factory pattern
-	mut config := crun.new(mut self.crun_configs, name: container_name)!
+	mut config := crun.new(mut self.crun_configs, name: args.container_name)!

 	// Configure for heropods use case - disable terminal for background containers
 	config.set_terminal(false)
-	config.set_command(['/bin/sh', '-c', 'while true; do sleep 30; done'])
-	config.set_working_dir('/')
 	config.set_user(0, 0, [])
-	config.add_env('PATH', '/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin')
-	config.add_env('TERM', 'xterm')
-	config.set_rootfs(rootfs_path, false)
+	config.set_rootfs(args.rootfs_path, false)
 	config.set_hostname('container')
 	config.set_no_new_privileges(true)

+	// Check if image metadata exists (from podman inspect)
+	image_dir := os.dir(args.rootfs_path)
+	metadata_path := '${image_dir}/image_metadata.json'
+
+	if os.exists(metadata_path) {
+		// Load and apply OCI image metadata
+		self.logger.log(
+			cat:     'container'
+			log:     'Loading image metadata from ${metadata_path}'
+			logtype: .stdout
+		) or {}
+
+		metadata_json := os.read_file(metadata_path)!
+		image_config := json.decode(ImageConfig, metadata_json) or {
+			return error('Failed to parse image metadata: ${err}')
+		}
+
+		// Build command according to OCI spec:
+		// - If ENTRYPOINT exists: final_command = ENTRYPOINT + CMD
+		// - Else if CMD exists: final_command = CMD
+		// - Else: use default shell
+		//
+		// Note: We respect the image's original ENTRYPOINT and CMD without modification.
+		// If keep_alive is needed, it will be injected after the entrypoint completes.
+		mut final_command := []string{}
+
+		if image_config.entrypoint.len > 0 {
+			// ENTRYPOINT exists - combine with CMD
+			final_command << image_config.entrypoint
+			if image_config.cmd.len > 0 {
+				final_command << image_config.cmd
+			}
+			self.logger.log(
+				cat:     'container'
+				log:     'Using ENTRYPOINT + CMD: ${final_command}'
+				logtype: .stdout
+			) or {}
+		} else if image_config.cmd.len > 0 {
+			// Only CMD exists
+			final_command = image_config.cmd.clone()
+
+			// Warn if CMD is a bare shell that will exit immediately
+			if final_command.len == 1
+				&& final_command[0] in ['/bin/sh', '/bin/bash', '/bin/ash', '/bin/dash'] {
+				self.logger.log(
+					cat:     'container'
+					log:     'WARNING: CMD is a bare shell (${final_command[0]}) which will exit immediately when run non-interactively. Consider using keep_alive:true when starting this container.'
+					logtype: .stdout
+				) or {}
+			}
+
+			self.logger.log(
+				cat:     'container'
+				log:     'Using CMD: ${final_command}'
+				logtype: .stdout
+			) or {}
+		} else {
+			// No ENTRYPOINT or CMD - use default shell with keep-alive
+			// Since there's no entrypoint to run, we start with keep-alive directly
+			final_command = ['tail', '-f', '/dev/null']
+			self.logger.log(
+				cat:     'container'
+				log:     'No ENTRYPOINT or CMD found, using keep-alive: ${final_command}'
+				logtype: .stdout
+			) or {}
+		}
+
+		config.set_command(final_command)
+
+		// Apply environment variables from image
+		for env_var in image_config.env {
+			parts := env_var.split_nth('=', 2)
+			if parts.len == 2 {
+				config.add_env(parts[0], parts[1])
+			}
+		}
+
+		// Apply working directory from image
+		if image_config.working_dir != '' {
+			config.set_working_dir(image_config.working_dir)
+		} else {
+			config.set_working_dir('/')
+		}
+	} else {
+		// No metadata - use default configuration for built-in images
+		self.logger.log(
+			cat:     'container'
+			log:     'No image metadata found, using default shell configuration'
+			logtype: .stdout
+		) or {}
+
+		config.set_command(['/bin/sh'])
+		config.set_working_dir('/')
+		config.add_env('PATH', '/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin')
+		config.add_env('TERM', 'xterm')
+	}
+
 	// Add resource limits
 	config.add_rlimit(.rlimit_nofile, 1024, 1024)

@@ -141,7 +263,7 @@ fn (mut self HeroPods) create_crun_config(container_name string, rootfs_path str
 	config.validate()!

 	// Create config directory and save JSON
-	config_dir := '${self.base_dir}/configs/${container_name}'
+	config_dir := '${self.base_dir}/configs/${args.container_name}'
 	osal.exec(cmd: 'mkdir -p ${config_dir}', stdout: false)!

 	config_path := '${config_dir}/config.json'
@@ -150,20 +272,59 @@ fn (mut self HeroPods) create_crun_config(container_name string, rootfs_path str
 	return config
 }

-// Pull a Docker image using podman and extract its rootfs
+// Pull a Docker image using podman and extract its rootfs and metadata
 //
 // This method:
 // 1. Pulls the image from Docker registry
-// 2. Creates a temporary container from the image
-// 3. Exports the container filesystem to rootfs_path
-// 4. Cleans up the temporary container
-fn (self HeroPods) podman_pull_and_export(docker_url string, image_name string, rootfs_path string) ! {
+// 2. Extracts image metadata (ENTRYPOINT, CMD, ENV, WorkingDir) via podman inspect
+// 3. Saves metadata to image_metadata.json for later use
+// 4. Creates a temporary container from the image
+// 5. Exports the container filesystem to rootfs_path
+// 6. Cleans up the temporary container
+fn (mut self HeroPods) podman_pull_and_export(docker_url string, image_name string, rootfs_path string) ! {
 	// Pull image
 	osal.exec(
 		cmd:    'podman pull ${docker_url}'
 		stdout: true
 	)!

+	// Extract image metadata (ENTRYPOINT, CMD, ENV, WorkingDir)
+	// This is critical for OCI-compliant behavior - we need to respect the image's configuration
+	image_dir := os.dir(rootfs_path)
+	metadata_path := '${image_dir}/image_metadata.json'
+
+	self.logger.log(
+		cat:     'images'
+		log:     'Extracting image metadata from ${docker_url}...'
+		logtype: .stdout
+	) or {}
+
+	inspect_result := osal.exec(
+		cmd:    'podman inspect ${docker_url}'
+		stdout: false
+	)!
+
+	// Parse the inspect output (it's a JSON array with one element)
+	inspect_data := json.decode([]ImageInspectResult, inspect_result.output) or {
+		return error('Failed to parse podman inspect output: ${err}')
+	}
+
+	if inspect_data.len == 0 {
+		return error('podman inspect returned empty result for ${docker_url}')
+	}
+
+	// Create image directory if it doesn't exist
+	osal.exec(cmd: 'mkdir -p ${image_dir}', stdout: false)!
+
+	// Save the metadata for later use in create_crun_config
+	os.write_file(metadata_path, json.encode(inspect_data[0].config))!
+
+	self.logger.log(
+		cat:     'images'
+		log:     'Saved image metadata to ${metadata_path}'
+		logtype: .stdout
+	) or {}
+
 	// Create temp container
 	temp_name := 'tmp_${image_name}_${os.getpid()}'
 	osal.exec(
@@ -176,11 +337,24 @@ fn (self HeroPods) podman_pull_and_export(docker_url string, image_name string,
 		cmd:    'mkdir -p ${rootfs_path}'
 		stdout: false
 	)!
+
+	self.logger.log(
+		cat:     'images'
+		log:     'Exporting container filesystem to ${rootfs_path}...'
+		logtype: .stdout
+	) or {}
+
 	osal.exec(
 		cmd:    'podman export ${temp_name} | tar -C ${rootfs_path} -xf -'
-		stdout: true
+		stdout: false
 	)!

+	self.logger.log(
+		cat:     'images'
+		log:     'Container filesystem exported successfully'
+		logtype: .stdout
+	) or {}
+
 	// Cleanup temp container
 	osal.exec(
 		cmd:    'podman rm ${temp_name}'
--- a/lib/virt/heropods/heropods_factory_.v
+++ b/lib/virt/heropods/heropods_factory_.v
@@ -254,8 +254,12 @@ pub fn play(mut plbook PlayBook) ! {
 		mut hp := get(name: heropods_name)!

 		container_name := p.get('name')!
+		keep_alive := p.get_default_false('keep_alive')
+
 		mut container := hp.get(name: container_name)!
-		container.start()!
+		container.start(
+			keep_alive: keep_alive
+		)!

 		action.done = true
 	}
--- a/lib/virt/heropods/heropods_test.v
+++ b/lib/virt/heropods/heropods_test.v
@@ -1,7 +1,6 @@
 module heropods

 import incubaid.herolib.core
-import incubaid.herolib.osal.core as osal
 import os

 // Simplified test suite for HeroPods container management
@@ -50,8 +49,8 @@ fn test_heropods_initialization() ! {

 	mut hp := new(
 		name:       test_name
-		reset:      true
-		use_podman: true // Skip default image setup in tests
+		reset:      false // Don't reset to avoid race conditions with parallel tests
+		use_podman: true  // Skip default image setup in tests
 	)!

 	assert hp.base_dir != ''
@@ -73,8 +72,8 @@ fn test_custom_network_config() ! {

 	mut hp := new(
 		name:        test_name
-		reset:       true
-		use_podman:  true // Skip default image setup in tests
+		reset:       false // Don't reset to avoid race conditions with parallel tests
+		use_podman:  true  // Skip default image setup in tests
 		bridge_name: 'testbr0'
 		subnet:      '192.168.100.0/24'
 		gateway_ip:  '192.168.100.1'
@@ -100,7 +99,7 @@ fn test_container_creation_with_docker_image() ! {

 	mut hp := new(
 		name:       test_name
-		reset:      true
+		reset:      false // Don't reset to avoid race conditions with parallel tests
 		use_podman: true
 	)!

@@ -139,7 +138,7 @@ fn test_container_lifecycle() ! {

 	mut hp := new(
 		name:       test_name
-		reset:      true
+		reset:      false // Don't reset to avoid race conditions with parallel tests
 		use_podman: true
 	)!

@@ -151,8 +150,8 @@ fn test_container_lifecycle() ! {
 		docker_url:        'docker.io/library/alpine:3.20'
 	)!

-	// Test start
-	container.start()!
+	// Test start with keep_alive to prevent Alpine's /bin/sh from exiting immediately
+	container.start(keep_alive: true)!
 	status := container.status()!
 	assert status == .running

@@ -184,7 +183,7 @@ fn test_container_exec() ! {

 	mut hp := new(
 		name:       test_name
-		reset:      true
+		reset:      false // Don't reset to avoid race conditions with parallel tests
 		use_podman: true
 	)!

@@ -196,7 +195,8 @@ fn test_container_exec() ! {
 		docker_url:        'docker.io/library/alpine:3.20'
 	)!

-	container.start()!
+	// Start with keep_alive to prevent Alpine's /bin/sh from exiting immediately
+	container.start(keep_alive: true)!
 	defer {
 		container.stop() or {}
 		container.delete() or {}
@@ -229,7 +229,7 @@ fn test_network_ip_allocation() ! {

 	mut hp := new(
 		name:       test_name
-		reset:      true
+		reset:      false // Don't reset to avoid race conditions with parallel tests
 		use_podman: true
 	)!

@@ -267,7 +267,7 @@ fn test_ipv4_connectivity() ! {

 	mut hp := new(
 		name:       test_name
-		reset:      true
+		reset:      false // Don't reset to avoid race conditions with parallel tests
 		use_podman: true
 	)!

@@ -279,7 +279,8 @@ fn test_ipv4_connectivity() ! {
 		docker_url:        'docker.io/library/alpine:3.20'
 	)!

-	container.start()!
+	// Start with keep_alive to prevent Alpine's /bin/sh from exiting immediately
+	container.start(keep_alive: true)!
 	defer {
 		container.stop() or {}
 		container.delete() or {}
@@ -315,7 +316,7 @@ fn test_container_deletion() ! {

 	mut hp := new(
 		name:       test_name
-		reset:      true
+		reset:      false // Don't reset to avoid race conditions with parallel tests
 		use_podman: true
 	)!

@@ -327,8 +328,8 @@ fn test_container_deletion() ! {
 		docker_url:        'docker.io/library/alpine:3.20'
 	)!

-	// Start container (allocates IP)
-	container.start()!
+	// Start container with keep_alive to prevent Alpine's /bin/sh from exiting immediately
+	container.start(keep_alive: true)!

 	// Verify IP is allocated
 	assert container_name in hp.network_config.allocated_ips
--- a/lib/virt/heropods/network_test.v
+++ b/lib/virt/heropods/network_test.v
@@ -112,8 +112,8 @@ fn test_network_bridge_setup() ! {

 	mut hp := new(
 		name:       test_name
-		reset:      true
-		use_podman: true // Skip default image setup in tests
+		reset:      false // Don't reset to avoid race conditions with parallel tests
+		use_podman: true  // Skip default image setup in tests
 	)!

 	bridge_name := hp.network_config.bridge_name
@@ -144,8 +144,8 @@ fn test_network_nat_rules() ! {

 	mut hp := new(
 		name:       test_name
-		reset:      true
-		use_podman: true // Skip default image setup in tests
+		reset:      false // Don't reset to avoid race conditions with parallel tests
+		use_podman: true  // Skip default image setup in tests
 	)!

 	// Verify NAT rules exist for the subnet
@@ -164,8 +164,8 @@ fn test_ip_allocation_sequential() ! {

 	mut hp := new(
 		name:       test_name
-		reset:      true
-		use_podman: true // Skip default image setup in tests
+		reset:      false // Don't reset to avoid race conditions with parallel tests
+		use_podman: true  // Skip default image setup in tests
 	)!

 	// Allocate multiple IPs
@@ -202,18 +202,34 @@ fn test_ip_pool_management() ! {

 	mut hp := new(
 		name:       test_name
-		reset:      true
-		use_podman: true // Skip default image setup in tests
+		reset:      false // Don't reset to avoid race conditions with parallel tests
+		use_podman: true  // Skip default image setup in tests
 	)!

-	// Create and start 3 containers
-	mut container1 := hp.container_new(name: 'pool_test1_${os.getpid()}', image: .alpine_3_20)!
-	mut container2 := hp.container_new(name: 'pool_test2_${os.getpid()}', image: .alpine_3_20)!
-	mut container3 := hp.container_new(name: 'pool_test3_${os.getpid()}', image: .alpine_3_20)!
+	// Create and start 3 containers with custom Alpine image
+	mut container1 := hp.container_new(
+		name:              'pool_test1_${os.getpid()}'
+		image:             .custom
+		custom_image_name: 'alpine_pool1'
+		docker_url:        'docker.io/library/alpine:3.20'
+	)!
+	mut container2 := hp.container_new(
+		name:              'pool_test2_${os.getpid()}'
+		image:             .custom
+		custom_image_name: 'alpine_pool2'
+		docker_url:        'docker.io/library/alpine:3.20'
+	)!
+	mut container3 := hp.container_new(
+		name:              'pool_test3_${os.getpid()}'
+		image:             .custom
+		custom_image_name: 'alpine_pool3'
+		docker_url:        'docker.io/library/alpine:3.20'
+	)!

-	container1.start()!
-	container2.start()!
-	container3.start()!
+	// Start with keep_alive to prevent Alpine's /bin/sh from exiting immediately
+	container1.start(keep_alive: true)!
+	container2.start(keep_alive: true)!
+	container3.start(keep_alive: true)!

 	// Get allocated IPs
 	ip1 := hp.network_config.allocated_ips[container1.name]
@@ -228,8 +244,13 @@ fn test_ip_pool_management() ! {
 	assert container2.name !in hp.network_config.allocated_ips

 	// Create new container - should reuse freed IP2
-	mut container4 := hp.container_new(name: 'pool_test4_${os.getpid()}', image: .alpine_3_20)!
-	container4.start()!
+	mut container4 := hp.container_new(
+		name:              'pool_test4_${os.getpid()}'
+		image:             .custom
+		custom_image_name: 'alpine_pool4'
+		docker_url:        'docker.io/library/alpine:3.20'
+	)!
+	container4.start(keep_alive: true)!

 	ip4 := hp.network_config.allocated_ips[container4.name]
 	assert ip4 == ip2, 'Should reuse freed IP: ${ip2} vs ${ip4}'
@@ -259,8 +280,8 @@ fn test_custom_bridge_config() ! {

 	mut hp := new(
 		name:        test_name
-		reset:       true
-		use_podman:  true // Skip default image setup in tests
+		reset:       false // Don't reset to avoid race conditions with parallel tests
+		use_podman:  true  // Skip default image setup in tests
 		bridge_name: custom_bridge
 		subnet:      '172.20.0.0/24'
 		gateway_ip:  '172.20.0.1'
--- a/lib/virt/heropods/readme.md
+++ b/lib/virt/heropods/readme.md
@@ -38,7 +38,8 @@ mut container := hp.container_new(
 )!

 // Start the container (creates and starts it)
-container.start()!
+// Use keep_alive for containers with short-lived entrypoints
+container.start(keep_alive: true)!

 // Execute commands
 result := container.exec(cmd: 'ls -la /')!
@@ -72,7 +73,7 @@ mut container := hp.container_new(
    image:             .alpine_3_20
 )!

-container.start()!
+container.start(keep_alive: true)!
 ```

 ### Using HeroScript
@@ -91,6 +92,7 @@ container.start()!

 !!heropods.container_start
    name:'my_container'
+    keep_alive:true

 !!heropods.container_exec
    name:'my_container'
@@ -127,15 +129,51 @@ HeroPods supports Mycelium for end-to-end encrypted IPv6 connectivity:

 !!heropods.container_start
    name:'ipv6_container'
+    keep_alive:true

 // Container now has both IPv4 and IPv6 (Mycelium) connectivity
 ```

 See [MYCELIUM.md](./MYCELIUM.md) for detailed Mycelium configuration.

+### Keep-Alive Feature
+
+The `keep_alive` parameter keeps containers running after their entrypoint exits successfully. This is useful for:
+
+- **Short-lived entrypoints**: Containers whose entrypoint performs initialization then exits (e.g., Alpine's `/bin/sh`)
+- **Interactive containers**: Containers you want to exec into after startup
+- **Service containers**: Containers that need to stay alive for background tasks
+
+**How it works**:
+1. Container starts with its original ENTRYPOINT and CMD (OCI-compliant)
+2. HeroPods waits for the entrypoint to complete
+3. If entrypoint exits with code 0 (success), a keep-alive process is injected
+4. If entrypoint fails (non-zero exit), container stops and error is returned
+
+**Example**:
+```v
+// Alpine's default CMD is /bin/sh which exits immediately
+mut container := hp.container_new(
+    name:              'my_alpine'
+    image:             .custom
+    custom_image_name: 'alpine_3_20'
+    docker_url:        'docker.io/library/alpine:3.20'
+)!
+
+// Without keep_alive: container would exit immediately
+// With keep_alive: container stays running for exec commands
+container.start(keep_alive: true)!
+
+// Now you can exec into the container
+result := container.exec(cmd: 'echo "Hello!"')!
+```
+
+**Note**: If you see a warning about "bare shell CMD", use `keep_alive: true` when starting the container.
+
 ## Features

 - **Container Lifecycle**: create, start, stop, delete, exec
+- **Keep-Alive Support**: Keep containers running after entrypoint exits
 - **IPv4 Bridge Networking**: Automatic IP allocation with NAT
 - **IPv6 Mycelium Overlay**: End-to-end encrypted peer-to-peer networking
 - **Image Management**: Pull Docker images via Podman or use built-in images
--- a/test_basic.vsh
+++ b/test_basic.vsh
@@ -171,6 +171,7 @@ lib/core
 lib/develop
 lib/hero/heromodels
 lib/virt/heropods
+lib/virt/crun
 '

 // the following tests have no prio and can be ignored