itenv_tools/tools/ubuntu_vm_delete.sh
2025-06-15 19:20:32 +02:00

351 lines
9.9 KiB
Bash
Executable File

#!/bin/bash
# Ubuntu VM Delete Script with Comprehensive Cleanup
# Usage: ubuntu_vm_delete.sh <vm_name|all>
# Use 'all' to delete all VMs
set -e
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Configuration
VM_BASE_DIR="/var/lib/vms"
BTRFS_MOUNT_POINT="/var/lib/vms"
BASE_SUBVOL="$BTRFS_MOUNT_POINT/base"
VMS_SUBVOL="$BTRFS_MOUNT_POINT/vms"
log() {
echo -e "${GREEN}[$(date +'%Y-%m-%d %H:%M:%S')] $1${NC}"
}
warn() {
echo -e "${YELLOW}[$(date +'%Y-%m-%d %H:%M:%S')] WARNING: $1${NC}"
}
error() {
echo -e "${RED}[$(date +'%Y-%m-%d %H:%M:%S')] ERROR: $1${NC}"
exit 1
}
info() {
echo -e "${BLUE}[$(date +'%Y-%m-%d %H:%M:%S')] INFO: $1${NC}"
}
show_usage() {
echo "Ubuntu VM Delete Script"
echo ""
echo "Usage: $0 <vm_name|all>"
echo ""
echo "Arguments:"
echo " vm_name - Name of the VM to delete"
echo " all - Delete ALL VMs (use with extreme caution)"
echo ""
echo "Examples:"
echo " $0 test-vm # Delete specific VM"
echo " $0 all # Delete all VMs"
echo ""
echo "This script will:"
echo " - Stop running VM processes"
echo " - Remove TAP network interfaces"
echo " - Clean up sockets and temporary files"
echo " - Delete btrfs subvolumes"
echo " - Remove all VM data permanently"
}
# Check if running as root
if [ "$EUID" -ne 0 ]; then
error "This script must be run as root for btrfs and network operations"
fi
# Parse arguments
if [ $# -ne 1 ]; then
show_usage
exit 1
fi
VM_TARGET="$1"
# Validate VM name (unless it's 'all')
if [ "$VM_TARGET" != "all" ] && [[ "$VM_TARGET" =~ [^a-zA-Z0-9_-] ]]; then
error "VM name can only contain alphanumeric characters, hyphens, and underscores"
fi
# Check if VMs directory exists
if [ ! -d "$VMS_SUBVOL" ]; then
warn "VMs directory not found at $VMS_SUBVOL"
info "No VMs to delete"
exit 0
fi
# Check if the base directory is on btrfs
FILESYSTEM_TYPE=$(stat -f -c %T "$VM_BASE_DIR" 2>/dev/null)
if [ "$FILESYSTEM_TYPE" != "btrfs" ]; then
error "Base directory $VM_BASE_DIR is not on a btrfs filesystem (detected: $FILESYSTEM_TYPE)"
fi
# Function to safely stop a VM process
stop_vm_process() {
local vm_pid="$1"
local vm_name="$2"
if [ -z "$vm_pid" ]; then
return 0
fi
# Check if process exists
if ! kill -0 "$vm_pid" 2>/dev/null; then
info "VM process $vm_pid for '$vm_name' is not running"
return 0
fi
log "Stopping VM process $vm_pid for '$vm_name'..."
# Try graceful shutdown first
if kill -TERM "$vm_pid" 2>/dev/null; then
# Wait up to 10 seconds for graceful shutdown
local count=0
while [ $count -lt 10 ] && kill -0 "$vm_pid" 2>/dev/null; do
sleep 1
count=$((count + 1))
done
# Force kill if still running
if kill -0 "$vm_pid" 2>/dev/null; then
warn "Graceful shutdown failed, forcing termination..."
kill -KILL "$vm_pid" 2>/dev/null || true
sleep 1
fi
fi
# Final check
if kill -0 "$vm_pid" 2>/dev/null; then
warn "Failed to stop VM process $vm_pid"
else
log "VM process $vm_pid stopped successfully"
fi
}
# Function to clean up network interfaces
cleanup_network() {
local tap_name="$1"
local bridge_name="$2"
local vm_name="$3"
# Remove TAP interface
if [ -n "$tap_name" ]; then
if ip link show "$tap_name" &>/dev/null; then
log "Removing TAP interface '$tap_name' for VM '$vm_name'"
ip link delete "$tap_name" 2>/dev/null || warn "Failed to remove TAP interface '$tap_name'"
else
info "TAP interface '$tap_name' not found (already removed)"
fi
fi
# Check if bridge still has any TAP interfaces
if [ -n "$bridge_name" ] && ip link show "$bridge_name" &>/dev/null; then
local tap_count=$(ip link show master "$bridge_name" 2>/dev/null | grep "tap-" | wc -l)
if [ "$tap_count" -eq 0 ]; then
info "Bridge '$bridge_name' has no remaining TAP interfaces"
# Note: We don't automatically remove the bridge as it might be used by other services
fi
fi
}
# Function to clean up VM files and sockets
cleanup_vm_files() {
local vm_socket="$1"
local vm_name="$2"
# Remove VM socket
if [ -n "$vm_socket" ] && [ -e "$vm_socket" ]; then
log "Removing VM socket '$vm_socket'"
rm -f "$vm_socket" || warn "Failed to remove VM socket '$vm_socket'"
fi
# Remove log files
local log_file="/tmp/cloud-hypervisor-$vm_name.log"
if [ -f "$log_file" ]; then
log "Removing VM log file '$log_file'"
rm -f "$log_file" || warn "Failed to remove log file '$log_file'"
fi
# Remove any other temporary files
rm -f "/tmp/cloud-hypervisor-$vm_name"* 2>/dev/null || true
}
# Function to delete a single VM
delete_single_vm() {
local vm_name="$1"
local vm_dir="$VMS_SUBVOL/$vm_name"
local vm_info_file="$vm_dir/vm-info.txt"
if [ ! -d "$vm_dir" ]; then
warn "VM '$vm_name' not found at '$vm_dir'"
return 1
fi
log "Deleting VM: $vm_name"
# Initialize variables with defaults
local VM_PID=""
local VM_SOCKET=""
local TAP_NAME=""
local BRIDGE_NAME=""
local VM_IMAGE_PATH=""
local CLOUD_INIT_PATH=""
# Load VM info if available
if [ -f "$vm_info_file" ]; then
# Safely source the file with error handling
if source "$vm_info_file" 2>/dev/null; then
info "Loaded VM configuration from '$vm_info_file'"
else
warn "Failed to load VM configuration from '$vm_info_file', proceeding with cleanup anyway"
fi
else
warn "VM info file not found at '$vm_info_file', proceeding with best-effort cleanup"
# Try to guess some values
TAP_NAME="tap-$vm_name"
BRIDGE_NAME="br0"
VM_SOCKET="/tmp/cloud-hypervisor-$vm_name.sock"
fi
# Stop VM process
if [ -n "$VM_PID" ]; then
stop_vm_process "$VM_PID" "$vm_name"
else
# Try to find the process by name
local found_pids=$(pgrep -f "cloud-hypervisor.*$vm_name" 2>/dev/null || echo "")
if [ -n "$found_pids" ]; then
warn "Found VM process(es) by name: $found_pids"
# Process each PID separately
echo "$found_pids" | while read -r pid; do
if [ -n "$pid" ]; then
stop_vm_process "$pid" "$vm_name"
fi
done
fi
fi
# Clean up network interfaces
cleanup_network "$TAP_NAME" "$BRIDGE_NAME" "$vm_name"
# Clean up VM files and sockets
cleanup_vm_files "$VM_SOCKET" "$vm_name"
# Verify the directory is a btrfs subvolume before attempting deletion
if btrfs subvolume show "$vm_dir" &>/dev/null; then
log "Deleting btrfs subvolume '$vm_dir'"
if ! btrfs subvolume delete "$vm_dir"; then
error "Failed to delete btrfs subvolume '$vm_dir'"
fi
log "Btrfs subvolume '$vm_dir' deleted successfully"
else
warn "Directory '$vm_dir' is not a btrfs subvolume, removing as regular directory"
if ! rm -rf "$vm_dir"; then
error "Failed to remove directory '$vm_dir'"
fi
log "Directory '$vm_dir' removed successfully"
fi
log "VM '$vm_name' deleted successfully"
return 0
}
# Function to list all VMs
list_all_vms() {
local vm_list=()
if [ ! -d "$VMS_SUBVOL" ]; then
return 0
fi
for vm_dir in "$VMS_SUBVOL"/*; do
if [ -d "$vm_dir" ]; then
local vm_name=$(basename "$vm_dir")
vm_list+=("$vm_name")
fi
done
printf '%s\n' "${vm_list[@]}"
}
# Main deletion logic
if [ "$VM_TARGET" = "all" ]; then
# Delete all VMs
warn "You are about to delete ALL VMs!"
echo ""
# List all VMs
vm_list=($(list_all_vms))
if [ ${#vm_list[@]} -eq 0 ]; then
info "No VMs found to delete"
exit 0
fi
echo "VMs to be deleted:"
for vm in "${vm_list[@]}"; do
echo " - $vm"
done
echo ""
warn "Deleting ALL VMs without confirmation..."
log "Proceeding with deletion of all VMs..."
success_count=0
failure_count=0
for vm_name in "${vm_list[@]}"; do
if delete_single_vm "$vm_name"; then
success_count=$((success_count + 1))
else
failure_count=$((failure_count + 1))
fi
done
echo ""
log "Deletion summary:"
log " Successfully deleted: $success_count VMs"
if [ $failure_count -gt 0 ]; then
warn " Failed to delete: $failure_count VMs"
fi
# Clean up any remaining orphaned processes
log "Checking for orphaned cloud-hypervisor processes..."
orphaned_pids=$(pgrep -f "cloud-hypervisor" 2>/dev/null || echo "")
if [ -n "$orphaned_pids" ]; then
warn "Found orphaned cloud-hypervisor processes: $orphaned_pids"
echo "$orphaned_pids" | xargs -r kill -TERM 2>/dev/null || true
sleep 2
echo "$orphaned_pids" | xargs -r kill -KILL 2>/dev/null || true
fi
# Clean up any remaining TAP interfaces
log "Checking for orphaned TAP interfaces..."
orphaned_taps=$(ip link show | grep "tap-" | cut -d: -f2 | tr -d ' ' || echo "")
if [ -n "$orphaned_taps" ]; then
warn "Found orphaned TAP interfaces: $orphaned_taps"
echo "$orphaned_taps" | xargs -r -I {} ip link delete {} 2>/dev/null || true
fi
log "All VMs deletion completed"
else
# Delete single VM
vm_name="$VM_TARGET"
if [ ! -d "$VMS_SUBVOL/$vm_name" ]; then
error "VM '$vm_name' not found"
fi
log "Deleting VM '$vm_name' without confirmation..."
delete_single_vm "$vm_name"
fi
log "VM deletion script completed successfully"