#!/bin/bash # Ubuntu VM Delete Script with Comprehensive Cleanup # Usage: ubuntu_vm_delete.sh # Use 'all' to delete all VMs set -e # Colors for output RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' # No Color # Configuration VM_BASE_DIR="/var/lib/vms" BTRFS_MOUNT_POINT="/var/lib/vms" BASE_SUBVOL="$BTRFS_MOUNT_POINT/base" VMS_SUBVOL="$BTRFS_MOUNT_POINT/vms" log() { echo -e "${GREEN}[$(date +'%Y-%m-%d %H:%M:%S')] $1${NC}" } warn() { echo -e "${YELLOW}[$(date +'%Y-%m-%d %H:%M:%S')] WARNING: $1${NC}" } error() { echo -e "${RED}[$(date +'%Y-%m-%d %H:%M:%S')] ERROR: $1${NC}" exit 1 } info() { echo -e "${BLUE}[$(date +'%Y-%m-%d %H:%M:%S')] INFO: $1${NC}" } show_usage() { echo "Ubuntu VM Delete Script" echo "" echo "Usage: $0 " echo "" echo "Arguments:" echo " vm_name - Name of the VM to delete" echo " all - Delete ALL VMs (use with extreme caution)" echo "" echo "Examples:" echo " $0 test-vm # Delete specific VM" echo " $0 all # Delete all VMs" echo "" echo "This script will:" echo " - Stop running VM processes" echo " - Remove TAP network interfaces" echo " - Clean up sockets and temporary files" echo " - Delete btrfs subvolumes" echo " - Remove all VM data permanently" } # Check if running as root if [ "$EUID" -ne 0 ]; then error "This script must be run as root for btrfs and network operations" fi # Parse arguments if [ $# -ne 1 ]; then show_usage exit 1 fi VM_TARGET="$1" # Validate VM name (unless it's 'all') if [ "$VM_TARGET" != "all" ] && [[ "$VM_TARGET" =~ [^a-zA-Z0-9_-] ]]; then error "VM name can only contain alphanumeric characters, hyphens, and underscores" fi # Check if VMs directory exists if [ ! -d "$VMS_SUBVOL" ]; then warn "VMs directory not found at $VMS_SUBVOL" info "No VMs to delete" exit 0 fi # Check if the base directory is on btrfs FILESYSTEM_TYPE=$(stat -f -c %T "$VM_BASE_DIR" 2>/dev/null) if [ "$FILESYSTEM_TYPE" != "btrfs" ]; then error "Base directory $VM_BASE_DIR is not on a btrfs filesystem (detected: $FILESYSTEM_TYPE)" fi # Function to safely stop a VM process stop_vm_process() { local vm_pid="$1" local vm_name="$2" if [ -z "$vm_pid" ]; then return 0 fi # Check if process exists if ! kill -0 "$vm_pid" 2>/dev/null; then info "VM process $vm_pid for '$vm_name' is not running" return 0 fi log "Stopping VM process $vm_pid for '$vm_name'..." # Try graceful shutdown first if kill -TERM "$vm_pid" 2>/dev/null; then # Wait up to 10 seconds for graceful shutdown local count=0 while [ $count -lt 10 ] && kill -0 "$vm_pid" 2>/dev/null; do sleep 1 count=$((count + 1)) done # Force kill if still running if kill -0 "$vm_pid" 2>/dev/null; then warn "Graceful shutdown failed, forcing termination..." kill -KILL "$vm_pid" 2>/dev/null || true sleep 1 fi fi # Final check if kill -0 "$vm_pid" 2>/dev/null; then warn "Failed to stop VM process $vm_pid" else log "VM process $vm_pid stopped successfully" fi } # Function to clean up network interfaces cleanup_network() { local tap_name="$1" local bridge_name="$2" local vm_name="$3" # Remove TAP interface if [ -n "$tap_name" ]; then if ip link show "$tap_name" &>/dev/null; then log "Removing TAP interface '$tap_name' for VM '$vm_name'" ip link delete "$tap_name" 2>/dev/null || warn "Failed to remove TAP interface '$tap_name'" else info "TAP interface '$tap_name' not found (already removed)" fi fi # Check if bridge still has any TAP interfaces if [ -n "$bridge_name" ] && ip link show "$bridge_name" &>/dev/null; then local tap_count=$(ip link show master "$bridge_name" 2>/dev/null | grep -c "tap-" || echo "0") if [ "$tap_count" -eq 0 ]; then info "Bridge '$bridge_name' has no remaining TAP interfaces" # Note: We don't automatically remove the bridge as it might be used by other services fi fi } # Function to clean up VM files and sockets cleanup_vm_files() { local vm_socket="$1" local vm_name="$2" # Remove VM socket if [ -n "$vm_socket" ] && [ -e "$vm_socket" ]; then log "Removing VM socket '$vm_socket'" rm -f "$vm_socket" || warn "Failed to remove VM socket '$vm_socket'" fi # Remove log files local log_file="/tmp/cloud-hypervisor-$vm_name.log" if [ -f "$log_file" ]; then log "Removing VM log file '$log_file'" rm -f "$log_file" || warn "Failed to remove log file '$log_file'" fi # Remove any other temporary files rm -f "/tmp/cloud-hypervisor-$vm_name"* 2>/dev/null || true } # Function to delete a single VM delete_single_vm() { local vm_name="$1" local vm_dir="$VMS_SUBVOL/$vm_name" local vm_info_file="$vm_dir/vm-info.txt" if [ ! -d "$vm_dir" ]; then warn "VM '$vm_name' not found at '$vm_dir'" return 1 fi log "Deleting VM: $vm_name" # Initialize variables with defaults local VM_PID="" local VM_SOCKET="" local TAP_NAME="" local BRIDGE_NAME="" local VM_IMAGE_PATH="" local CLOUD_INIT_PATH="" # Load VM info if available if [ -f "$vm_info_file" ]; then # Safely source the file with error handling if source "$vm_info_file" 2>/dev/null; then info "Loaded VM configuration from '$vm_info_file'" else warn "Failed to load VM configuration from '$vm_info_file', proceeding with cleanup anyway" fi else warn "VM info file not found at '$vm_info_file', proceeding with best-effort cleanup" # Try to guess some values TAP_NAME="tap-$vm_name" BRIDGE_NAME="br0" VM_SOCKET="/tmp/cloud-hypervisor-$vm_name.sock" fi # Stop VM process if [ -n "$VM_PID" ]; then stop_vm_process "$VM_PID" "$vm_name" else # Try to find the process by name local found_pid=$(pgrep -f "cloud-hypervisor.*$vm_name" 2>/dev/null || echo "") if [ -n "$found_pid" ]; then warn "Found VM process by name: $found_pid" stop_vm_process "$found_pid" "$vm_name" fi fi # Clean up network interfaces cleanup_network "$TAP_NAME" "$BRIDGE_NAME" "$vm_name" # Clean up VM files and sockets cleanup_vm_files "$VM_SOCKET" "$vm_name" # Verify the directory is a btrfs subvolume before attempting deletion if btrfs subvolume show "$vm_dir" &>/dev/null; then log "Deleting btrfs subvolume '$vm_dir'" if ! btrfs subvolume delete "$vm_dir"; then error "Failed to delete btrfs subvolume '$vm_dir'" fi log "Btrfs subvolume '$vm_dir' deleted successfully" else warn "Directory '$vm_dir' is not a btrfs subvolume, removing as regular directory" if ! rm -rf "$vm_dir"; then error "Failed to remove directory '$vm_dir'" fi log "Directory '$vm_dir' removed successfully" fi log "VM '$vm_name' deleted successfully" return 0 } # Function to list all VMs list_all_vms() { local vm_list=() if [ ! -d "$VMS_SUBVOL" ]; then return 0 fi for vm_dir in "$VMS_SUBVOL"/*; do if [ -d "$vm_dir" ]; then local vm_name=$(basename "$vm_dir") vm_list+=("$vm_name") fi done printf '%s\n' "${vm_list[@]}" } # Main deletion logic if [ "$VM_TARGET" = "all" ]; then # Delete all VMs warn "You are about to delete ALL VMs!" echo "" # List all VMs vm_list=($(list_all_vms)) if [ ${#vm_list[@]} -eq 0 ]; then info "No VMs found to delete" exit 0 fi echo "VMs to be deleted:" for vm in "${vm_list[@]}"; do echo " - $vm" done echo "" warn "Deleting ALL VMs without confirmation..." log "Proceeding with deletion of all VMs..." success_count=0 failure_count=0 for vm_name in "${vm_list[@]}"; do if delete_single_vm "$vm_name"; then success_count=$((success_count + 1)) else failure_count=$((failure_count + 1)) fi done echo "" log "Deletion summary:" log " Successfully deleted: $success_count VMs" if [ $failure_count -gt 0 ]; then warn " Failed to delete: $failure_count VMs" fi # Clean up any remaining orphaned processes log "Checking for orphaned cloud-hypervisor processes..." orphaned_pids=$(pgrep -f "cloud-hypervisor" 2>/dev/null || echo "") if [ -n "$orphaned_pids" ]; then warn "Found orphaned cloud-hypervisor processes: $orphaned_pids" echo "$orphaned_pids" | xargs -r kill -TERM 2>/dev/null || true sleep 2 echo "$orphaned_pids" | xargs -r kill -KILL 2>/dev/null || true fi # Clean up any remaining TAP interfaces log "Checking for orphaned TAP interfaces..." orphaned_taps=$(ip link show | grep "tap-" | cut -d: -f2 | tr -d ' ' || echo "") if [ -n "$orphaned_taps" ]; then warn "Found orphaned TAP interfaces: $orphaned_taps" echo "$orphaned_taps" | xargs -r -I {} ip link delete {} 2>/dev/null || true fi log "All VMs deletion completed" else # Delete single VM vm_name="$VM_TARGET" if [ ! -d "$VMS_SUBVOL/$vm_name" ]; then error "VM '$vm_name' not found" fi log "Deleting VM '$vm_name' without confirmation..." delete_single_vm "$vm_name" fi log "VM deletion script completed successfully"