From 506c124578ce0a8d0b38391f14ff639f6c615da6 Mon Sep 17 00:00:00 2001 From: "kirby@puppetlabs.com" Date: Fri, 14 Oct 2016 12:40:33 -0700 Subject: [PATCH 01/11] Add vmpooler__migrating__vm at VM checkout --- lib/vmpooler/api/v1.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/vmpooler/api/v1.rb b/lib/vmpooler/api/v1.rb index 3c8c8aa..22a0ef1 100644 --- a/lib/vmpooler/api/v1.rb +++ b/lib/vmpooler/api/v1.rb @@ -55,6 +55,7 @@ module Vmpooler def account_for_starting_vm(template, vm) backend.sadd('vmpooler__running__' + template, vm) + backend.sadd('vmpooler__migrating__' + template, vm) backend.hset('vmpooler__active__' + template, vm, Time.now) backend.hset('vmpooler__vm__' + vm, 'checkout', Time.now) From 538f30af8e5cf0d5a96cb321c2f1d159c35dec1e Mon Sep 17 00:00:00 2001 From: "kirby@puppetlabs.com" Date: Fri, 14 Oct 2016 12:46:37 -0700 Subject: [PATCH 02/11] Update find_least_used_host method to evaluate hosts based on utilization. Without this change the determination is based on VM count. Additionally, a method is added to find the least used host compatible with the provided VM in order to support migrating a VM at checkout. Lastly, a capability is added to migrate VMs to a provided host, which also supports migrating VMs at checkout. Add method to check if vsphere connection is alive. Replace repeated usage of checking the current time in a begin/rescue block with this method. --- lib/vmpooler/vsphere_helper.rb | 168 ++++++++++++++++++--------------- 1 file changed, 93 insertions(+), 75 deletions(-) diff --git a/lib/vmpooler/vsphere_helper.rb b/lib/vmpooler/vsphere_helper.rb index 622b66d..6b6f57d 100644 --- a/lib/vmpooler/vsphere_helper.rb +++ b/lib/vmpooler/vsphere_helper.rb @@ -17,11 +17,7 @@ module Vmpooler end def add_disk(vm, size, datastore) - begin - @connection.serviceInstance.CurrentTime - rescue - initialize - end + vsphere_connection_alive? @connection return false unless size.to_i > 0 @@ -71,22 +67,14 @@ module Vmpooler end def find_datastore(datastorename) - begin - @connection.serviceInstance.CurrentTime - rescue - initialize - end + vsphere_connection_alive? @connection datacenter = @connection.serviceInstance.find_datacenter datacenter.find_datastore(datastorename) end def find_device(vm, deviceName) - begin - @connection.serviceInstance.CurrentTime - rescue - initialize - end + vsphere_connection_alive? @connection vm.config.hardware.device.each do |device| return device if device.deviceInfo.label == deviceName @@ -96,11 +84,7 @@ module Vmpooler end def find_disk_controller(vm) - begin - @connection.serviceInstance.CurrentTime - rescue - initialize - end + vsphere_connection_alive? @connection devices = find_disk_devices(vm) @@ -114,11 +98,7 @@ module Vmpooler end def find_disk_devices(vm) - begin - @connection.serviceInstance.CurrentTime - rescue - initialize - end + vsphere_connection_alive? @connection devices = {} @@ -146,11 +126,7 @@ module Vmpooler end def find_disk_unit_number(vm, controller) - begin - @connection.serviceInstance.CurrentTime - rescue - initialize - end + vsphere_connection_alive? @connection used_unit_numbers = [] available_unit_numbers = [] @@ -175,11 +151,7 @@ module Vmpooler end def find_folder(foldername) - begin - @connection.serviceInstance.CurrentTime - rescue - initialize - end + vsphere_connection_alive? @connection datacenter = @connection.serviceInstance.find_datacenter base = datacenter.vmFolder @@ -196,39 +168,96 @@ module Vmpooler base end - def find_least_used_host(cluster) + # Returns an array containing cumulative CPU and memory utilization of a host, and its object reference + # Params: + # +model+:: CPU arch version to match on + # +limit+:: Hard limit for CPU or memory utilization beyond which a host is excluded for deployments + def get_host_utilization(host, model=nil, limit=90) + if model + return nil unless host_has_cpu_model? host, model + end + return nil if host.runtime.inMaintenanceMode + return nil unless host.overallStatus == 'green' + + cpu_utilization = cpu_utilization_for host + memory_utilization = memory_utilization_for host + + return nil if cpu_utilization > limit + return nil if memory_utilization > limit + + [ cpu_utilization + memory_utilization, host ] + end + + def host_has_cpu_model?(host, model) + get_host_cpu_arch_version(host) == model + end + + def get_host_cpu_arch_version(host) + cpu_model = host.hardware.cpuPkg[0].description + cpu_model_parts = cpu_model.split() + arch_version = cpu_model_parts[4] + arch_version + end + + def cpu_utilization_for(host) + cpu_usage = host.summary.quickStats.overallCpuUsage + cpu_size = host.summary.hardware.cpuMhz * host.summary.hardware.numCpuCores + (cpu_usage.to_f / cpu_size.to_f) * 100 + end + + def memory_utilization_for(host) + memory_usage = host.summary.quickStats.overallMemoryUsage + memory_size = host.summary.hardware.memorySize / 1024 / 1024 + (memory_usage.to_f / memory_size.to_f) * 100 + end + + def vsphere_connection_alive?(connection) begin - @connection.serviceInstance.CurrentTime + connection.serviceInstance.CurrentTime rescue initialize end + end - hosts = {} - hosts_sort = {} + def find_least_used_host(cluster) + vsphere_connection_alive? @connection + cluster_object = find_cluster(cluster) + target_hosts = get_cluster_host_utilization(cluster_object) + least_used_host = target_hosts.sort[0][1] + least_used_host + end + + def find_cluster(cluster) datacenter = @connection.serviceInstance.find_datacenter - datacenter.hostFolder.children.each do |folder| - next unless folder.name == cluster - folder.host.each do |host| - if - (host.overallStatus == 'green') && - (!host.runtime.inMaintenanceMode) + datacenter.hostFolder.children.find { |cluster_object| cluster_object.name == cluster } + end - hosts[host.name] = host - hosts_sort[host.name] = host.vm.length - end - end + def get_cluster_host_utilization(cluster) + cluster_hosts = [] + cluster.host.each do |host| + host_usage = get_host_utilization(host) + cluster_hosts << host_usage if host_usage end + cluster_hosts + end - hosts[hosts_sort.sort_by { |_k, v| v }[0][0]] + def find_least_used_compatible_host(vm) + vsphere_connection_alive? @connection + + source_host = vm.summary.runtime.host + model = get_host_cpu_arch_version(source_host) + cluster = source_host.parent + target_hosts = [] + cluster.host.each do |host| + host_usage = get_host_utilization(host, model) + target_hosts << host_usage if host_usage + end + target_hosts.sort[0][1] end def find_pool(poolname) - begin - @connection.serviceInstance.CurrentTime - rescue - initialize - end + vsphere_connection_alive? @connection datacenter = @connection.serviceInstance.find_datacenter base = datacenter.hostFolder @@ -257,21 +286,13 @@ module Vmpooler end def find_vm(vmname) - begin - @connection.serviceInstance.CurrentTime - rescue - initialize - end + vsphere_connection_alive? @connection @connection.searchIndex.FindByDnsName(vmSearch: true, dnsName: vmname) end def find_vm_heavy(vmname) - begin - @connection.serviceInstance.CurrentTime - rescue - initialize - end + vsphere_connection_alive? @connection vmname = vmname.is_a?(Array) ? vmname : [vmname] containerView = get_base_vm_container_from @connection @@ -321,11 +342,7 @@ module Vmpooler end def find_vmdks(vmname, datastore) - begin - connection.serviceInstance.CurrentTime - rescue - initialize - end + vsphere_connection_alive? @connection disks = [] @@ -344,11 +361,7 @@ module Vmpooler end def get_base_vm_container_from(connection) - begin - connection.serviceInstance.CurrentTime - rescue - initialize - end + vsphere_connection_alive? @connection viewManager = connection.serviceContent.viewManager viewManager.CreateContainerView( @@ -372,6 +385,11 @@ module Vmpooler snapshot end + def migrate_vm_host(vm, host) + relospec = RbVmomi::VIM.VirtualMachineRelocateSpec(host: host) + vm.RelocateVM_Task(spec: relospec).wait_for_completion + end + def close @connection.close end From 58a548bc90cc30c761e0e6b233e74c8deca0501b Mon Sep 17 00:00:00 2001 From: "kirby@puppetlabs.com" Date: Fri, 14 Oct 2016 12:55:36 -0700 Subject: [PATCH 03/11] Add support for migrating VMs to pool_manager. This commit adds a capability to pool_manager to migrate VMs placed in the migrating queue. When a VM is checked out an entry is created in vmpooler__migrating. The existing process for evaluating VM states executes the migrate_vm method for the provided VM, and removes it from the queue. The least used compatible host for the provided VM is selected and, if necessary, a migration to the lesser used host is performed. Migration time and time from the task being queued until completion are both tracked with the redis VM object in 'migration_time' and 'checkout_to_migration'. The migration time is logged in the vmpooler.log, or the VM is reported as not requiring migration. Without this change VMs are not evaluated for checkout at request time. Add a method to wrap find_vm and find_vm_heavy in order to allow a single operation to be performed that does both. This commit also adds support for a configuration setting called migration_limit that makes migration at checkout optional. Additionally, logging is added to report a VM parent host when it is checked out. Without this change vmpooler assumes that migration at checkout is always enabled. If this setting is not present, or if the setting is 0, then migration at checkout will be disabled. If the setting is greater than 0 then that setting will be used to enforce a limit for the number of simultaneous migrations that will be evaluated. Documentation of this configuration option is added to the vmpooler.yaml.example file. --- lib/vmpooler/pool_manager.rb | 63 +++++++++++++++++++++++++++++++++- lib/vmpooler/vsphere_helper.rb | 30 ++++++++-------- vmpooler.yaml.example | 7 ++++ 3 files changed, 84 insertions(+), 16 deletions(-) diff --git a/lib/vmpooler/pool_manager.rb b/lib/vmpooler/pool_manager.rb index 53895bb..7248be0 100644 --- a/lib/vmpooler/pool_manager.rb +++ b/lib/vmpooler/pool_manager.rb @@ -455,6 +455,55 @@ module Vmpooler end end + def find_vsphere_pool_vm(pool, vm) + $vsphere[pool].find_vm(vm) || $vsphere[pool].find_vm_heavy(vm)[vm] + end + + def migration_enabled?(migration_limit) + # Returns migration_limit setting when enabled + return false if migration_limit == 0 or not migration_limit + migration_limit if migration_limit >= 1 + end + + def migrate_vm(vm, pool) + Thread.new do + _migrate_vm(vm, pool) + end + end + + def _migrate_vm(vm, pool) + $redis.srem('vmpooler__migrating__' + pool, vm) + vm_object = find_vsphere_pool_vm(pool, vm) + parent_host = vm_object.summary.runtime.host + parent_host_name = parent_host.name + migration_limit = migration_enabled? $config[:config]['migration_limit'] + + if not migration_limit + $logger.log('s', "[ ] [#{pool}] '#{vm}' is running on #{parent_host_name}") + else + migration_count = $redis.smembers('vmpooler__migration').size + if migration_count >= migration_limit + $logger.log('s', "[ ] [#{pool}] '#{vm}' is running on #{parent_host_name}. No migration will be evaluated since the migration_limit has been reached") + else + $redis.sadd('vmpooler__migration', vm) + host = $vsphere[pool].find_least_used_compatible_host(vm_object) + if host == parent_host + $logger.log('s', "[ ] [#{pool}] No migration required for '#{vm}' running on #{parent_host_name}") + else + start = Time.now + $vsphere[pool].migrate_vm_host(vm_object, host) + finish = '%.2f' % (Time.now - start) + $metrics.timing("migrate.#{vm['template']}", finish) + checkout_to_migration = '%.2f' % (Time.now - Time.parse($redis.hget('vmpooler__vm__' + vm, 'checkout'))) + $redis.hset('vmpooler__vm__' + vm, 'migration_time', finish) + $redis.hset('vmpooler__vm__' + vm, 'checkout_to_migration', checkout_to_migration) + $logger.log('s', "[>] [#{pool}] '#{vm}' migrated from #{parent_host_name} to #{host.name} in #{finish} seconds") + end + $redis.srem('vmpooler__migration', vm) + end + end + end + def check_pool(pool) $logger.log('d', "[*] [#{pool['name']}] starting worker thread") @@ -480,7 +529,8 @@ module Vmpooler (! $redis.sismember('vmpooler__ready__' + pool['name'], vm['name'])) && (! $redis.sismember('vmpooler__pending__' + pool['name'], vm['name'])) && (! $redis.sismember('vmpooler__completed__' + pool['name'], vm['name'])) && - (! $redis.sismember('vmpooler__discovered__' + pool['name'], vm['name'])) + (! $redis.sismember('vmpooler__discovered__' + pool['name'], vm['name'])) && + (! $redis.sismember('vmpooler__migrating__' + pool['name'], vm['name'])) $redis.sadd('vmpooler__discovered__' + pool['name'], vm['name']) @@ -555,6 +605,17 @@ module Vmpooler end end + # MIGRATIONS + $redis.smembers('vmpooler__migrating__' + pool['name']).each do |vm| + if inventory[vm] + begin + migrate_vm(vm, pool['name']) + rescue => err + $logger.log('s', "[x] [#{pool['name']}] '#{vm}' failed to migrate: #{err}") + end + end + end + # REPOPULATE ready = $redis.scard('vmpooler__ready__' + pool['name']) total = $redis.scard('vmpooler__pending__' + pool['name']) + ready diff --git a/lib/vmpooler/vsphere_helper.rb b/lib/vmpooler/vsphere_helper.rb index 6b6f57d..6e65e30 100644 --- a/lib/vmpooler/vsphere_helper.rb +++ b/lib/vmpooler/vsphere_helper.rb @@ -17,7 +17,7 @@ module Vmpooler end def add_disk(vm, size, datastore) - vsphere_connection_alive? @connection + ensure_connected @connection return false unless size.to_i > 0 @@ -67,14 +67,14 @@ module Vmpooler end def find_datastore(datastorename) - vsphere_connection_alive? @connection + ensure_connected @connection datacenter = @connection.serviceInstance.find_datacenter datacenter.find_datastore(datastorename) end def find_device(vm, deviceName) - vsphere_connection_alive? @connection + ensure_connected @connection vm.config.hardware.device.each do |device| return device if device.deviceInfo.label == deviceName @@ -84,7 +84,7 @@ module Vmpooler end def find_disk_controller(vm) - vsphere_connection_alive? @connection + ensure_connected @connection devices = find_disk_devices(vm) @@ -98,7 +98,7 @@ module Vmpooler end def find_disk_devices(vm) - vsphere_connection_alive? @connection + ensure_connected @connection devices = {} @@ -126,7 +126,7 @@ module Vmpooler end def find_disk_unit_number(vm, controller) - vsphere_connection_alive? @connection + ensure_connected @connection used_unit_numbers = [] available_unit_numbers = [] @@ -151,7 +151,7 @@ module Vmpooler end def find_folder(foldername) - vsphere_connection_alive? @connection + ensure_connected @connection datacenter = @connection.serviceInstance.find_datacenter base = datacenter.vmFolder @@ -211,7 +211,7 @@ module Vmpooler (memory_usage.to_f / memory_size.to_f) * 100 end - def vsphere_connection_alive?(connection) + def ensure_connected(connection) begin connection.serviceInstance.CurrentTime rescue @@ -220,7 +220,7 @@ module Vmpooler end def find_least_used_host(cluster) - vsphere_connection_alive? @connection + ensure_connected @connection cluster_object = find_cluster(cluster) target_hosts = get_cluster_host_utilization(cluster_object) @@ -243,7 +243,7 @@ module Vmpooler end def find_least_used_compatible_host(vm) - vsphere_connection_alive? @connection + ensure_connected @connection source_host = vm.summary.runtime.host model = get_host_cpu_arch_version(source_host) @@ -257,7 +257,7 @@ module Vmpooler end def find_pool(poolname) - vsphere_connection_alive? @connection + ensure_connected @connection datacenter = @connection.serviceInstance.find_datacenter base = datacenter.hostFolder @@ -286,13 +286,13 @@ module Vmpooler end def find_vm(vmname) - vsphere_connection_alive? @connection + ensure_connected @connection @connection.searchIndex.FindByDnsName(vmSearch: true, dnsName: vmname) end def find_vm_heavy(vmname) - vsphere_connection_alive? @connection + ensure_connected @connection vmname = vmname.is_a?(Array) ? vmname : [vmname] containerView = get_base_vm_container_from @connection @@ -342,7 +342,7 @@ module Vmpooler end def find_vmdks(vmname, datastore) - vsphere_connection_alive? @connection + ensure_connected @connection disks = [] @@ -361,7 +361,7 @@ module Vmpooler end def get_base_vm_container_from(connection) - vsphere_connection_alive? @connection + ensure_connected @connection viewManager = connection.serviceContent.viewManager viewManager.CreateContainerView( diff --git a/vmpooler.yaml.example b/vmpooler.yaml.example index 4e54891..c83808c 100644 --- a/vmpooler.yaml.example +++ b/vmpooler.yaml.example @@ -225,6 +225,13 @@ # If set, prefixes all created VMs with this string. This should include # a separator. # (optional; default: '') +# +# - migration_limit +# When set to any value greater than 0 enable VM migration at checkout. +# When enabled this capability will evaluate a VM for migration when it is requested +# in an effort to maintain a more even distribution of load across compute resources. +# The migration_limit ensures that no more than n migrations will be evaluated at any one time +# and greatly reduces the possibilty of VMs ending up bunched together on a particular host. # Example: From a244f9b92aabb8fef60b9c934680ae77ca24596a Mon Sep 17 00:00:00 2001 From: "kirby@puppetlabs.com" Date: Tue, 15 Nov 2016 13:49:12 -0800 Subject: [PATCH 04/11] Stop reloading configuration file from vspherehelper and instead source credentials from the configuration object that itself loads the configuration file when the application starts. Without this change the configuration file is reloaded every time vspherehelper is called. Additionally, this change makes it more straightforward to test vspherehelper connections. A method is added to make more clear what's happening when checking if a socket can be opened to a pending VM on port 22. Additionally, the connection appends domain from the configuration, when present, to the VM name so DNS search is not required. --- lib/vmpooler/pool_manager.rb | 22 ++++++++++++++-------- lib/vmpooler/vsphere_helper.rb | 31 ++++++++++++++++--------------- 2 files changed, 30 insertions(+), 23 deletions(-) diff --git a/lib/vmpooler/pool_manager.rb b/lib/vmpooler/pool_manager.rb index 7248be0..3c2836f 100644 --- a/lib/vmpooler/pool_manager.rb +++ b/lib/vmpooler/pool_manager.rb @@ -26,14 +26,20 @@ module Vmpooler end end + def open_socket(host, domain=nil, timeout=5, port=22) + Timeout.timeout(timeout) do + target_host = vm + target_host = "#{vm}.#{domain}" if domain + TCPSocket.new target_host, port + end + end + def _check_pending_vm(vm, pool, timeout) host = $vsphere[pool].find_vm(vm) if host begin - Timeout.timeout(5) do - TCPSocket.new vm, 22 - end + open_socket vm, $config[:config]['domain'], timeout move_pending_vm_to_ready(vm, pool, host) rescue fail_pending_vm(vm, pool, timeout) @@ -395,7 +401,7 @@ module Vmpooler def check_disk_queue $logger.log('d', "[*] [disk_manager] starting worker thread") - $vsphere['disk_manager'] ||= Vmpooler::VsphereHelper.new + $vsphere['disk_manager'] ||= Vmpooler::VsphereHelper.new $config[:vsphere] $threads['disk_manager'] = Thread.new do loop do @@ -421,7 +427,7 @@ module Vmpooler def check_snapshot_queue $logger.log('d', "[*] [snapshot_manager] starting worker thread") - $vsphere['snapshot_manager'] ||= Vmpooler::VsphereHelper.new + $vsphere['snapshot_manager'] ||= Vmpooler::VsphereHelper.new $config[:vsphere] $threads['snapshot_manager'] = Thread.new do loop do @@ -459,7 +465,7 @@ module Vmpooler $vsphere[pool].find_vm(vm) || $vsphere[pool].find_vm_heavy(vm)[vm] end - def migration_enabled?(migration_limit) + def migration_limit(migration_limit) # Returns migration_limit setting when enabled return false if migration_limit == 0 or not migration_limit migration_limit if migration_limit >= 1 @@ -476,7 +482,7 @@ module Vmpooler vm_object = find_vsphere_pool_vm(pool, vm) parent_host = vm_object.summary.runtime.host parent_host_name = parent_host.name - migration_limit = migration_enabled? $config[:config]['migration_limit'] + migration_limit = migration_limit $config[:config]['migration_limit'] if not migration_limit $logger.log('s', "[ ] [#{pool}] '#{vm}' is running on #{parent_host_name}") @@ -507,7 +513,7 @@ module Vmpooler def check_pool(pool) $logger.log('d', "[*] [#{pool['name']}] starting worker thread") - $vsphere[pool['name']] ||= Vmpooler::VsphereHelper.new + $vsphere[pool['name']] ||= Vmpooler::VsphereHelper.new $config[:vsphere] $threads[pool['name']] = Thread.new do loop do diff --git a/lib/vmpooler/vsphere_helper.rb b/lib/vmpooler/vsphere_helper.rb index 6e65e30..d72c8b0 100644 --- a/lib/vmpooler/vsphere_helper.rb +++ b/lib/vmpooler/vsphere_helper.rb @@ -6,14 +6,23 @@ module Vmpooler DISK_TYPE = 'thin' DISK_MODE = 'persistent' - def initialize(_vInfo = {}) - config_file = File.expand_path('vmpooler.yaml') - vsphere = YAML.load_file(config_file)[:vsphere] + def initialize(credentials) + $credentials = credentials + end - @connection = RbVmomi::VIM.connect host: vsphere['server'], - user: vsphere['username'], - password: vsphere['password'], - insecure: true + def ensure_connected(connection, credentials) + begin + connection.serviceInstance.CurrentTime + rescue + connect_to_vsphere $credentials + end + end + + def connect_to_vsphere(credentials) + @connection = RbVmomi::VIM.connect host: credentials['server'], + user: credentials['username'], + password: credentials['password'], + insecure: credentials['insecure'] || true end def add_disk(vm, size, datastore) @@ -211,14 +220,6 @@ module Vmpooler (memory_usage.to_f / memory_size.to_f) * 100 end - def ensure_connected(connection) - begin - connection.serviceInstance.CurrentTime - rescue - initialize - end - end - def find_least_used_host(cluster) ensure_connected @connection From f8bd79a8d92a93794a65f4773c47e109a1f74ae8 Mon Sep 17 00:00:00 2001 From: "kirby@puppetlabs.com" Date: Tue, 15 Nov 2016 14:43:13 -0800 Subject: [PATCH 05/11] Handle empty queues in pool manager Remove unneeded begin block in method Fix formatting of rescue block in fail_pending_vm --- lib/vmpooler/pool_manager.rb | 119 +++++++++++++++++++++-------------- 1 file changed, 71 insertions(+), 48 deletions(-) diff --git a/lib/vmpooler/pool_manager.rb b/lib/vmpooler/pool_manager.rb index 3c2836f..968b6bf 100644 --- a/lib/vmpooler/pool_manager.rb +++ b/lib/vmpooler/pool_manager.rb @@ -49,16 +49,21 @@ module Vmpooler end end - def fail_pending_vm(vm, pool, timeout) - clone_stamp = $redis.hget('vmpooler__vm__' + vm, 'clone') + def fail_pending_vm(vm, pool, timeout, exists=true) + clone_stamp = $redis.hget("vmpooler__vm__#{vm}", 'clone') + return if ! clone_stamp - if (clone_stamp) && - (((Time.now - Time.parse(clone_stamp)) / 60) > timeout) - - $redis.smove('vmpooler__pending__' + pool, 'vmpooler__completed__' + pool, vm) - - $logger.log('d', "[!] [#{pool}] '#{vm}' marked as 'failed' after #{timeout} minutes") + time_since_clone = (Time.now - Time.parse(clone_stamp)) / 60 + if time_since_clone > timeout + if exists + $redis.smove('vmpooler__pending__' + pool, 'vmpooler__completed__' + pool, vm) + $logger.log('d', "[!] [#{pool}] '#{vm}' marked as 'failed' after #{timeout} minutes") + else + remove_nonexistent_vm(vm, pool) + end end + rescue => err + $logger.log('d', "Fail pending VM failed with an error: #{err}") end def move_pending_vm_to_ready(vm, pool, host) @@ -549,75 +554,93 @@ module Vmpooler end # RUNNING - $redis.smembers('vmpooler__running__' + pool['name']).each do |vm| - if inventory[vm] - begin - check_running_vm(vm, pool['name'], $redis.hget('vmpooler__vm__' + vm, 'lifetime') || $config[:config]['vm_lifetime'] || 12) - rescue + running = $redis.smembers("vmpooler__running__#{pool['name']}") + if running + running.each do |vm| + if inventory[vm] + begin + check_running_vm(vm, pool['name'], $redis.hget('vmpooler__vm__' + vm, 'lifetime') || $config[:config]['vm_lifetime'] || 12) + rescue + end end end end # READY - $redis.smembers('vmpooler__ready__' + pool['name']).each do |vm| - if inventory[vm] - begin - check_ready_vm(vm, pool['name'], pool['ready_ttl'] || 0) - rescue + ready = $redis.smembers("vmpooler__ready__#{pool['name']}") + if ready + ready.each do |vm| if ready + if inventory[vm] + begin + check_ready_vm(vm, pool['name'], pool['ready_ttl'] || 0) + rescue + end end end end # PENDING - $redis.smembers('vmpooler__pending__' + pool['name']).each do |vm| - if inventory[vm] - begin - check_pending_vm(vm, pool['name'], pool['timeout'] || $config[:config]['timeout'] || 15) - rescue + pending = $redis.smembers('vmpooler__pending__' + pool['name']) + if pending + pending.each do |vm| + if inventory[vm] + begin + check_pending_vm(vm, pool['name'], pool['timeout'] || $config[:config]['timeout'] || 15) + rescue + end end end end # COMPLETED - $redis.smembers('vmpooler__completed__' + pool['name']).each do |vm| - if inventory[vm] - begin - destroy_vm(vm, pool['name']) - rescue - $logger.log('s', "[!] [#{pool['name']}] '#{vm}' destroy appears to have failed") + completed = $redis.smembers('vmpooler__completed__' + pool['name']) + if completed + completed.each do |vm| + if inventory[vm] + begin + destroy_vm(vm, pool['name']) + rescue + $logger.log('s', "[!] [#{pool['name']}] '#{vm}' destroy appears to have failed") + $redis.srem('vmpooler__completed__' + pool['name'], vm) + $redis.hdel('vmpooler__active__' + pool['name'], vm) + $redis.del('vmpooler__vm__' + vm) + end + else + $logger.log('s', "[!] [#{pool['name']}] '#{vm}' not found in inventory, removed from 'completed' queue") $redis.srem('vmpooler__completed__' + pool['name'], vm) $redis.hdel('vmpooler__active__' + pool['name'], vm) $redis.del('vmpooler__vm__' + vm) end - else - $logger.log('s', "[!] [#{pool['name']}] '#{vm}' not found in inventory, removed from 'completed' queue") - $redis.srem('vmpooler__completed__' + pool['name'], vm) - $redis.hdel('vmpooler__active__' + pool['name'], vm) - $redis.del('vmpooler__vm__' + vm) end end # DISCOVERED - $redis.smembers('vmpooler__discovered__' + pool['name']).each do |vm| - %w(pending ready running completed).each do |queue| - if $redis.sismember('vmpooler__' + queue + '__' + pool['name'], vm) - $logger.log('d', "[!] [#{pool['name']}] '#{vm}' found in '#{queue}', removed from 'discovered' queue") - $redis.srem('vmpooler__discovered__' + pool['name'], vm) + discovered = $redis.smembers("vmpooler__discovered__#{pool['name']}") + if discovered + discovered.each do |vm| + %w(pending ready running completed).each do |queue| + if $redis.sismember('vmpooler__' + queue + '__' + pool['name'], vm) + $logger.log('d', "[!] [#{pool['name']}] '#{vm}' found in '#{queue}', removed from 'discovered' queue") + $redis.srem('vmpooler__discovered__' + pool['name'], vm) + end end - end - if $redis.sismember('vmpooler__discovered__' + pool['name'], vm) - $redis.smove('vmpooler__discovered__' + pool['name'], 'vmpooler__completed__' + pool['name'], vm) + if $redis.sismember('vmpooler__discovered__' + pool['name'], vm) + $redis.smove('vmpooler__discovered__' + pool['name'], 'vmpooler__completed__' + pool['name'], vm) + end end end # MIGRATIONS - $redis.smembers('vmpooler__migrating__' + pool['name']).each do |vm| - if inventory[vm] - begin - migrate_vm(vm, pool['name']) - rescue => err - $logger.log('s', "[x] [#{pool['name']}] '#{vm}' failed to migrate: #{err}") + migrations = $redis.smembers('vmpooler__migrating__' + pool['name']) + if migrations + migrations.each do |vm| + if inventory[vm] + begin + migrate_vm(vm, pool['name']) + rescue => err + $logger.log('s', "[x] [#{pool['name']}] '#{vm}' failed to migrate: #{err}") + end end end end From 12c2c4a09b8656b870e3cfdfd8712088c54463da Mon Sep 17 00:00:00 2001 From: "kirby@puppetlabs.com" Date: Wed, 16 Nov 2016 03:18:52 -0800 Subject: [PATCH 06/11] Return host name with object when finding least used compatible host --- lib/vmpooler/vsphere_helper.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/vmpooler/vsphere_helper.rb b/lib/vmpooler/vsphere_helper.rb index d72c8b0..66b387b 100644 --- a/lib/vmpooler/vsphere_helper.rb +++ b/lib/vmpooler/vsphere_helper.rb @@ -254,7 +254,8 @@ module Vmpooler host_usage = get_host_utilization(host, model) target_hosts << host_usage if host_usage end - target_hosts.sort[0][1] + target_host = target_hosts.sort[0][1] + [target_host, target_host.name] end def find_pool(poolname) From a15090e00529e114ac426f05a406a886782014ec Mon Sep 17 00:00:00 2001 From: "kirby@puppetlabs.com" Date: Wed, 16 Nov 2016 03:20:59 -0800 Subject: [PATCH 07/11] Simplify vsphere connection handling in order to make it reasonable to test. Simplify migrate_vm method by breaking out some componenents. Improve error handling around migrate_vm. Add helpers to support setting up redis for migration at checkout testing. --- lib/vmpooler/pool_manager.rb | 465 +++++++++++++++++---------------- lib/vmpooler/vsphere_helper.rb | 28 +- 2 files changed, 252 insertions(+), 241 deletions(-) diff --git a/lib/vmpooler/pool_manager.rb b/lib/vmpooler/pool_manager.rb index 968b6bf..dd17385 100644 --- a/lib/vmpooler/pool_manager.rb +++ b/lib/vmpooler/pool_manager.rb @@ -20,9 +20,9 @@ module Vmpooler end # Check the state of a VM - def check_pending_vm(vm, pool, timeout) + def check_pending_vm(vm, pool, timeout, vsphere) Thread.new do - _check_pending_vm(vm, pool, timeout) + _check_pending_vm(vm, pool, timeout, vsphere) end end @@ -34,12 +34,14 @@ module Vmpooler end end - def _check_pending_vm(vm, pool, timeout) - host = $vsphere[pool].find_vm(vm) + def _check_pending_vm(vm, pool, timeout, vsphere) + host = vsphere.find_vm(vm) if host begin - open_socket vm, $config[:config]['domain'], timeout + Timeout.timeout(5) do + TCPSocket.new vm, 22 + end move_pending_vm_to_ready(vm, pool, host) rescue fail_pending_vm(vm, pool, timeout) @@ -87,7 +89,7 @@ module Vmpooler end end - def check_ready_vm(vm, pool, ttl) + def check_ready_vm(vm, pool, ttl, vsphere) Thread.new do if ttl > 0 if (((Time.now - host.runtime.bootTime) / 60).to_s[/^\d+\.\d{1}/].to_f) > ttl @@ -105,8 +107,8 @@ module Vmpooler $redis.hset('vmpooler__vm__' + vm, 'check', Time.now) - host = $vsphere[pool].find_vm(vm) || - $vsphere[pool].find_vm_heavy(vm)[vm] + host = vsphere.find_vm(vm) || + vsphere.find_vm_heavy(vm)[vm] if host if @@ -147,14 +149,14 @@ module Vmpooler end end - def check_running_vm(vm, pool, ttl) + def check_running_vm(vm, pool, ttl, vsphere) Thread.new do - _check_running_vm(vm, pool, ttl) + _check_running_vm(vm, pool, ttl, vsphere) end end - def _check_running_vm(vm, pool, ttl) - host = $vsphere[pool].find_vm(vm) + def _check_running_vm(vm, pool, ttl, vsphere) + host = vsphere.find_vm(vm) if host queue_from, queue_to = 'running', 'completed' @@ -178,101 +180,105 @@ module Vmpooler end # Clone a VM - def clone_vm(template, folder, datastore, target) + def clone_vm(template, folder, datastore, target, vsphere) Thread.new do - vm = {} - - if template =~ /\// - templatefolders = template.split('/') - vm['template'] = templatefolders.pop - end - - if templatefolders - vm[vm['template']] = $vsphere[vm['template']].find_folder(templatefolders.join('/')).find(vm['template']) - else - fail 'Please provide a full path to the template' - end - - if vm['template'].length == 0 - fail "Unable to find template '#{vm['template']}'!" - end - - # Generate a randomized hostname - o = [('a'..'z'), ('0'..'9')].map(&:to_a).flatten - vm['hostname'] = $config[:config]['prefix'] + o[rand(25)] + (0...14).map { o[rand(o.length)] }.join - - # Add VM to Redis inventory ('pending' pool) - $redis.sadd('vmpooler__pending__' + vm['template'], vm['hostname']) - $redis.hset('vmpooler__vm__' + vm['hostname'], 'clone', Time.now) - $redis.hset('vmpooler__vm__' + vm['hostname'], 'template', vm['template']) - - # Annotate with creation time, origin template, etc. - # Add extraconfig options that can be queried by vmtools - configSpec = RbVmomi::VIM.VirtualMachineConfigSpec( - annotation: JSON.pretty_generate( - name: vm['hostname'], - created_by: $config[:vsphere]['username'], - base_template: vm['template'], - creation_timestamp: Time.now.utc - ), - extraConfig: [ - { key: 'guestinfo.hostname', - value: vm['hostname'] - } - ] - ) - - # Choose a clone target - if target - $clone_target = $vsphere[vm['template']].find_least_used_host(target) - elsif $config[:config]['clone_target'] - $clone_target = $vsphere[vm['template']].find_least_used_host($config[:config]['clone_target']) - end - - # Put the VM in the specified folder and resource pool - relocateSpec = RbVmomi::VIM.VirtualMachineRelocateSpec( - datastore: $vsphere[vm['template']].find_datastore(datastore), - host: $clone_target, - diskMoveType: :moveChildMostDiskBacking - ) - - # Create a clone spec - spec = RbVmomi::VIM.VirtualMachineCloneSpec( - location: relocateSpec, - config: configSpec, - powerOn: true, - template: false - ) - - # Clone the VM - $logger.log('d', "[ ] [#{vm['template']}] '#{vm['hostname']}' is being cloned from '#{vm['template']}'") - begin - start = Time.now - vm[vm['template']].CloneVM_Task( - folder: $vsphere[vm['template']].find_folder(folder), - name: vm['hostname'], - spec: spec - ).wait_for_completion - finish = '%.2f' % (Time.now - start) + vm = {} - $redis.hset('vmpooler__clone__' + Date.today.to_s, vm['template'] + ':' + vm['hostname'], finish) - $redis.hset('vmpooler__vm__' + vm['hostname'], 'clone_time', finish) + if template =~ /\// + templatefolders = template.split('/') + vm['template'] = templatefolders.pop + end - $logger.log('s', "[+] [#{vm['template']}] '#{vm['hostname']}' cloned from '#{vm['template']}' in #{finish} seconds") - rescue - $logger.log('s', "[!] [#{vm['template']}] '#{vm['hostname']}' clone appears to have failed") - $redis.srem('vmpooler__pending__' + vm['template'], vm['hostname']) + if templatefolders + vm[vm['template']] = vsphere.find_folder(templatefolders.join('/')).find(vm['template']) + else + fail 'Please provide a full path to the template' + end + + if vm['template'].length == 0 + fail "Unable to find template '#{vm['template']}'!" + end + + # Generate a randomized hostname + o = [('a'..'z'), ('0'..'9')].map(&:to_a).flatten + vm['hostname'] = $config[:config]['prefix'] + o[rand(25)] + (0...14).map { o[rand(o.length)] }.join + + # Add VM to Redis inventory ('pending' pool) + $redis.sadd('vmpooler__pending__' + vm['template'], vm['hostname']) + $redis.hset('vmpooler__vm__' + vm['hostname'], 'clone', Time.now) + $redis.hset('vmpooler__vm__' + vm['hostname'], 'template', vm['template']) + + # Annotate with creation time, origin template, etc. + # Add extraconfig options that can be queried by vmtools + configSpec = RbVmomi::VIM.VirtualMachineConfigSpec( + annotation: JSON.pretty_generate( + name: vm['hostname'], + created_by: $config[:vsphere]['username'], + base_template: vm['template'], + creation_timestamp: Time.now.utc + ), + extraConfig: [ + { key: 'guestinfo.hostname', + value: vm['hostname'] + } + ] + ) + + # Choose a clone target + if target + $clone_target = vsphere.find_least_used_host(target) + elsif $config[:config]['clone_target'] + $clone_target = vsphere.find_least_used_host($config[:config]['clone_target']) + end + + # Put the VM in the specified folder and resource pool + relocateSpec = RbVmomi::VIM.VirtualMachineRelocateSpec( + datastore: vsphere.find_datastore(datastore), + host: $clone_target, + diskMoveType: :moveChildMostDiskBacking + ) + + # Create a clone spec + spec = RbVmomi::VIM.VirtualMachineCloneSpec( + location: relocateSpec, + config: configSpec, + powerOn: true, + template: false + ) + + # Clone the VM + $logger.log('d', "[ ] [#{vm['template']}] '#{vm['hostname']}' is being cloned from '#{vm['template']}'") + + begin + start = Time.now + vm[vm['template']].CloneVM_Task( + folder: vsphere.find_folder(folder), + name: vm['hostname'], + spec: spec + ).wait_for_completion + finish = '%.2f' % (Time.now - start) + + $redis.hset('vmpooler__clone__' + Date.today.to_s, vm['template'] + ':' + vm['hostname'], finish) + $redis.hset('vmpooler__vm__' + vm['hostname'], 'clone_time', finish) + + $logger.log('s', "[+] [#{vm['template']}] '#{vm['hostname']}' cloned from '#{vm['template']}' in #{finish} seconds") + rescue => err + $logger.log('s', "[!] [#{vm['template']}] '#{vm['hostname']}' clone failed with an error: #{err}") + $redis.srem('vmpooler__pending__' + vm['template'], vm['hostname']) + end + + $redis.decr('vmpooler__tasks__clone') + + $metrics.timing("clone.#{vm['template']}", finish) + rescue => err + $logger.log('s', "[!] [#{vm['template']}] '#{vm['hostname']}' failed while preparing to clone with an error: #{err}") end - - $redis.decr('vmpooler__tasks__clone') - - $metrics.timing("clone.#{vm['template']}", finish) end end # Destroy a VM - def destroy_vm(vm, pool) + def destroy_vm(vm, pool, vsphere) Thread.new do $redis.srem('vmpooler__completed__' + pool, vm) $redis.hdel('vmpooler__active__' + pool, vm) @@ -281,8 +287,8 @@ module Vmpooler # Auto-expire metadata key $redis.expire('vmpooler__vm__' + vm, ($config[:redis]['data_ttl'].to_i * 60 * 60)) - host = $vsphere[pool].find_vm(vm) || - $vsphere[pool].find_vm_heavy(vm)[vm] + host = vsphere.find_vm(vm) || + vsphere.find_vm_heavy(vm)[vm] if host start = Time.now @@ -305,15 +311,15 @@ module Vmpooler end end - def create_vm_disk(vm, disk_size) + def create_vm_disk(vm, disk_size, vsphere) Thread.new do - _create_vm_disk(vm, disk_size) + _create_vm_disk(vm, disk_size, vsphere) end end - def _create_vm_disk(vm, disk_size) - host = $vsphere['disk_manager'].find_vm(vm) || - $vsphere['disk_manager'].find_vm_heavy(vm)[vm] + def _create_vm_disk(vm, disk_size, vsphere) + host = vsphere.find_vm(vm) || + vsphere.find_vm_heavy(vm)[vm] if (host) && ((! disk_size.nil?) && (! disk_size.empty?) && (disk_size.to_i > 0)) $logger.log('s', "[ ] [disk_manager] '#{vm}' is attaching a #{disk_size}gb disk") @@ -330,7 +336,7 @@ module Vmpooler end if ((! datastore.nil?) && (! datastore.empty?)) - $vsphere['disk_manager'].add_disk(host, disk_size, datastore) + vsphere.add_disk(host, disk_size, datastore) rdisks = $redis.hget('vmpooler__vm__' + vm, 'disk') disks = rdisks ? rdisks.split(':') : [] @@ -346,15 +352,15 @@ module Vmpooler end end - def create_vm_snapshot(vm, snapshot_name) + def create_vm_snapshot(vm, snapshot_name, vsphere) Thread.new do - _create_vm_snapshot(vm, snapshot_name) + _create_vm_snapshot(vm, snapshot_name, vsphere) end end - def _create_vm_snapshot(vm, snapshot_name) - host = $vsphere['snapshot_manager'].find_vm(vm) || - $vsphere['snapshot_manager'].find_vm_heavy(vm)[vm] + def _create_vm_snapshot(vm, snapshot_name, vsphere) + host = vsphere.find_vm(vm) || + vsphere.find_vm_heavy(vm)[vm] if (host) && ((! snapshot_name.nil?) && (! snapshot_name.empty?)) $logger.log('s', "[ ] [snapshot_manager] '#{vm}' is being snapshotted") @@ -376,18 +382,18 @@ module Vmpooler end end - def revert_vm_snapshot(vm, snapshot_name) + def revert_vm_snapshot(vm, snapshot_name, vsphere) Thread.new do - _revert_vm_snapshot(vm, snapshot_name) + _revert_vm_snapshot(vm, snapshot_name, vsphere) end end - def _revert_vm_snapshot(vm, snapshot_name) - host = $vsphere['snapshot_manager'].find_vm(vm) || - $vsphere['snapshot_manager'].find_vm_heavy(vm)[vm] + def _revert_vm_snapshot(vm, snapshot_name, vsphere) + host = vsphere.find_vm(vm) || + vsphere.find_vm_heavy(vm)[vm] if host - snapshot = $vsphere['snapshot_manager'].find_snapshot(host, snapshot_name) + snapshot = vsphere.find_snapshot(host, snapshot_name) if snapshot $logger.log('s', "[ ] [snapshot_manager] '#{vm}' is being reverted to snapshot '#{snapshot_name}'") @@ -410,19 +416,19 @@ module Vmpooler $threads['disk_manager'] = Thread.new do loop do - _check_disk_queue + _check_disk_queue $vsphere['disk_manager'] sleep(5) end end end - def _check_disk_queue + def _check_disk_queue(vsphere) vm = $redis.spop('vmpooler__tasks__disk') unless vm.nil? begin vm_name, disk_size = vm.split(':') - create_vm_disk(vm_name, disk_size) + create_vm_disk(vm_name, disk_size, vsphere) rescue $logger.log('s', "[!] [disk_manager] disk creation appears to have failed") end @@ -436,19 +442,19 @@ module Vmpooler $threads['snapshot_manager'] = Thread.new do loop do - _check_snapshot_queue + _check_snapshot_queue $vsphere['snapshot_manager'] sleep(5) end end end - def _check_snapshot_queue + def _check_snapshot_queue(vsphere) vm = $redis.spop('vmpooler__tasks__snapshot') unless vm.nil? begin vm_name, snapshot_name = vm.split(':') - create_vm_snapshot(vm_name, snapshot_name) + create_vm_snapshot(vm_name, snapshot_name, vsphere) rescue $logger.log('s', "[!] [snapshot_manager] snapshot appears to have failed") end @@ -459,15 +465,15 @@ module Vmpooler unless vm.nil? begin vm_name, snapshot_name = vm.split(':') - revert_vm_snapshot(vm_name, snapshot_name) + revert_vm_snapshot(vm_name, snapshot_name, vsphere) rescue $logger.log('s', "[!] [snapshot_manager] snapshot revert appears to have failed") end end end - def find_vsphere_pool_vm(pool, vm) - $vsphere[pool].find_vm(vm) || $vsphere[pool].find_vm_heavy(vm)[vm] + def find_vsphere_pool_vm(pool, vm, vsphere) + vsphere.find_vm(vm) || vsphere.find_vm_heavy(vm)[vm] end def migration_limit(migration_limit) @@ -476,45 +482,67 @@ module Vmpooler migration_limit if migration_limit >= 1 end - def migrate_vm(vm, pool) + def migrate_vm(vm, pool, vsphere) Thread.new do - _migrate_vm(vm, pool) + _migrate_vm(vm, pool, vsphere) end end - def _migrate_vm(vm, pool) - $redis.srem('vmpooler__migrating__' + pool, vm) - vm_object = find_vsphere_pool_vm(pool, vm) - parent_host = vm_object.summary.runtime.host - parent_host_name = parent_host.name - migration_limit = migration_limit $config[:config]['migration_limit'] + def _migrate_vm(vm, pool, vsphere) + begin + $redis.srem('vmpooler__migrating__' + pool, vm) + vm_object = find_vsphere_pool_vm(pool, vm, vsphere) + parent_host, parent_host_name = get_vm_host_info(vm_object) + migration_limit = migration_limit $config[:config]['migration_limit'] - if not migration_limit - $logger.log('s', "[ ] [#{pool}] '#{vm}' is running on #{parent_host_name}") - else - migration_count = $redis.smembers('vmpooler__migration').size - if migration_count >= migration_limit - $logger.log('s', "[ ] [#{pool}] '#{vm}' is running on #{parent_host_name}. No migration will be evaluated since the migration_limit has been reached") + if not migration_limit + $logger.log('s', "[ ] [#{pool}] '#{vm}' is running on #{parent_host_name}") else - $redis.sadd('vmpooler__migration', vm) - host = $vsphere[pool].find_least_used_compatible_host(vm_object) - if host == parent_host - $logger.log('s', "[ ] [#{pool}] No migration required for '#{vm}' running on #{parent_host_name}") + migration_count = $redis.smembers('vmpooler__migration').size + if migration_count >= migration_limit + $logger.log('s', "[ ] [#{pool}] '#{vm}' is running on #{parent_host_name}. No migration will be evaluated since the migration_limit has been reached") else - start = Time.now - $vsphere[pool].migrate_vm_host(vm_object, host) - finish = '%.2f' % (Time.now - start) - $metrics.timing("migrate.#{vm['template']}", finish) - checkout_to_migration = '%.2f' % (Time.now - Time.parse($redis.hget('vmpooler__vm__' + vm, 'checkout'))) - $redis.hset('vmpooler__vm__' + vm, 'migration_time', finish) - $redis.hset('vmpooler__vm__' + vm, 'checkout_to_migration', checkout_to_migration) - $logger.log('s', "[>] [#{pool}] '#{vm}' migrated from #{parent_host_name} to #{host.name} in #{finish} seconds") + $redis.sadd('vmpooler__migration', vm) + host, host_name = vsphere.find_least_used_compatible_host(vm_object) + if host == parent_host + $logger.log('s', "[ ] [#{pool}] No migration required for '#{vm}' running on #{parent_host_name}") + else + finish = migrate_vm_and_record_timing(vm_object, vm, host, vsphere) + $logger.log('s', "[>] [#{pool}] '#{vm}' migrated from #{parent_host_name} to #{host_name} in #{finish} seconds") + end + remove_vmpooler_migration_vm(pool, vm) end - $redis.srem('vmpooler__migration', vm) end + rescue => err + $logger.log('s', "[x] [#{pool}] '#{vm}' migration failed with an error: #{err}") + remove_vmpooler_migration_vm(pool, vm) end end + def get_vm_host_info(vm_object) + parent_host = vm_object.summary.runtime.host + [parent_host, parent_host.name] + end + + def remove_vmpooler_migration_vm(pool, vm) + begin + $redis.srem('vmpooler__migration', vm) + rescue => err + $logger.log('s', "[x] [#{pool}] '#{vm}' removal from vmpooler__migration failed with an error: #{err}") + end + end + + def migrate_vm_and_record_timing(vm_object, vm_name, host, vsphere) + start = Time.now + vsphere.migrate_vm_host(vm_object, host) + finish = '%.2f' % (Time.now - start) + $metrics.timing("migrate.#{vm_name}", finish) + checkout_to_migration = '%.2f' % (Time.now - Time.parse($redis.hget("vmpooler__vm__#{vm_name}", 'checkout'))) + $redis.hset("vmpooler__vm__#{vm_name}", 'migration_time', finish) + $redis.hset("vmpooler__vm__#{vm_name}", 'checkout_to_migration', checkout_to_migration) + finish + end + def check_pool(pool) $logger.log('d', "[*] [#{pool['name']}] starting worker thread") @@ -522,17 +550,17 @@ module Vmpooler $threads[pool['name']] = Thread.new do loop do - _check_pool(pool) + _check_pool(pool, $vsphere[pool['name']]) sleep(5) end end end - def _check_pool(pool) + def _check_pool(pool, vsphere) # INVENTORY inventory = {} begin - base = $vsphere[pool['name']].find_folder(pool['folder']) + base = vsphere.find_folder(pool['folder']) base.childEntity.each do |vm| if @@ -554,111 +582,93 @@ module Vmpooler end # RUNNING - running = $redis.smembers("vmpooler__running__#{pool['name']}") - if running - running.each do |vm| - if inventory[vm] - begin - check_running_vm(vm, pool['name'], $redis.hget('vmpooler__vm__' + vm, 'lifetime') || $config[:config]['vm_lifetime'] || 12) - rescue - end + $redis.smembers("vmpooler__running__#{pool['name']}").each do |vm| + if inventory[vm] + begin + check_running_vm(vm, pool['name'], $redis.hget('vmpooler__vm__' + vm, 'lifetime') || $config[:config]['vm_lifetime'] || 12, vsphere) + rescue end end end # READY - ready = $redis.smembers("vmpooler__ready__#{pool['name']}") - if ready - ready.each do |vm| if ready - if inventory[vm] - begin - check_ready_vm(vm, pool['name'], pool['ready_ttl'] || 0) - rescue - end + $redis.smembers("vmpooler__ready__#{pool['name']}").each do |vm| + if inventory[vm] + begin + check_ready_vm(vm, pool['name'], pool['ready_ttl'] || 0, vsphere) + rescue end end end # PENDING - pending = $redis.smembers('vmpooler__pending__' + pool['name']) - if pending - pending.each do |vm| - if inventory[vm] - begin - check_pending_vm(vm, pool['name'], pool['timeout'] || $config[:config]['timeout'] || 15) - rescue - end + $redis.smembers("vmpooler__pending__#{pool['name']}").each do |vm| + if inventory[vm] + begin + check_pending_vm(vm, pool['name'], pool['timeout'] || $config[:config]['timeout'] || 15, vsphere) + rescue end end end # COMPLETED - completed = $redis.smembers('vmpooler__completed__' + pool['name']) - if completed - completed.each do |vm| - if inventory[vm] - begin - destroy_vm(vm, pool['name']) - rescue - $logger.log('s', "[!] [#{pool['name']}] '#{vm}' destroy appears to have failed") - $redis.srem('vmpooler__completed__' + pool['name'], vm) - $redis.hdel('vmpooler__active__' + pool['name'], vm) - $redis.del('vmpooler__vm__' + vm) - end - else - $logger.log('s', "[!] [#{pool['name']}] '#{vm}' not found in inventory, removed from 'completed' queue") - $redis.srem('vmpooler__completed__' + pool['name'], vm) - $redis.hdel('vmpooler__active__' + pool['name'], vm) - $redis.del('vmpooler__vm__' + vm) + $redis.smembers("vmpooler__completed__#{pool['name']}").each do |vm| + if inventory[vm] + begin + destroy_vm(vm, pool['name'], vsphere) + rescue + $logger.log('s', "[!] [#{pool['name']}] '#{vm}' destroy appears to have failed") + $redis.srem("vmpooler__completed__#{pool['name']}", vm) + $redis.hdel("vmpooler__active__#{pool['name']}", vm) + $redis.del("vmpooler__vm__#{vm}") end + else + $logger.log('s', "[!] [#{pool['name']}] '#{vm}' not found in inventory, removed from 'completed' queue") + $redis.srem("vmpooler__completed__#{pool['name']}", vm) + $redis.hdel("vmpooler__active__#{pool['name']}", vm) + $redis.del("vmpooler__vm__#{vm}") end end # DISCOVERED - discovered = $redis.smembers("vmpooler__discovered__#{pool['name']}") - if discovered - discovered.each do |vm| - %w(pending ready running completed).each do |queue| - if $redis.sismember('vmpooler__' + queue + '__' + pool['name'], vm) - $logger.log('d', "[!] [#{pool['name']}] '#{vm}' found in '#{queue}', removed from 'discovered' queue") - $redis.srem('vmpooler__discovered__' + pool['name'], vm) - end + $redis.smembers("vmpooler__discovered__#{pool['name']}").each do |vm| + %w(pending ready running completed).each do |queue| + if $redis.sismember("vmpooler__#{queue}__#{pool['name']}", vm) + $logger.log('d', "[!] [#{pool['name']}] '#{vm}' found in '#{queue}', removed from 'discovered' queue") + $redis.srem("vmpooler__discovered__#{pool['name']}", vm) end + end - if $redis.sismember('vmpooler__discovered__' + pool['name'], vm) - $redis.smove('vmpooler__discovered__' + pool['name'], 'vmpooler__completed__' + pool['name'], vm) - end + if $redis.sismember("vmpooler__discovered__#{pool['name']}", vm) + $redis.smove("vmpooler__discovered__#{pool['name']}", "vmpooler__completed__#{pool['name']}", vm) end end # MIGRATIONS - migrations = $redis.smembers('vmpooler__migrating__' + pool['name']) - if migrations - migrations.each do |vm| - if inventory[vm] - begin - migrate_vm(vm, pool['name']) - rescue => err - $logger.log('s', "[x] [#{pool['name']}] '#{vm}' failed to migrate: #{err}") - end + $redis.smembers("vmpooler__migrating__#{pool['name']}").each do |vm| + if inventory[vm] + begin + migrate_vm(vm, pool['name'], vsphere) + rescue => err + $logger.log('s', "[x] [#{pool['name']}] '#{vm}' failed to migrate: #{err}") end end end # REPOPULATE - ready = $redis.scard('vmpooler__ready__' + pool['name']) - total = $redis.scard('vmpooler__pending__' + pool['name']) + ready + ready = $redis.scard("vmpooler__ready__#{pool['name']}") + total = $redis.scard("vmpooler__pending__#{pool['name']}") + ready - $metrics.gauge('ready.' + pool['name'], $redis.scard('vmpooler__ready__' + pool['name'])) - $metrics.gauge('running.' + pool['name'], $redis.scard('vmpooler__running__' + pool['name'])) + $metrics.gauge("ready.#{pool['name']}", $redis.scard("vmpooler__ready__#{pool['name']}")) + $metrics.gauge("running.#{pool['name']}", $redis.scard("vmpooler__running__#{pool['name']}")) - if $redis.get('vmpooler__empty__' + pool['name']) + if $redis.get("vmpooler__empty__#{pool['name']}") unless ready == 0 - $redis.del('vmpooler__empty__' + pool['name']) + $redis.del("vmpooler__empty__#{pool['name']}") end else if ready == 0 - $redis.set('vmpooler__empty__' + pool['name'], 'true') + $redis.set("vmpooler__empty__#{pool['name']}", 'true') $logger.log('s', "[!] [#{pool['name']}] is empty") end end @@ -673,10 +683,11 @@ module Vmpooler pool['template'], pool['folder'], pool['datastore'], - pool['clone_target'] + pool['clone_target'], + vsphere ) - rescue - $logger.log('s', "[!] [#{pool['name']}] clone appears to have failed") + rescue => err + $logger.log('s', "[!] [#{pool['name']}] clone failed during check_pool with an error: #{err}") $redis.decr('vmpooler__tasks__clone') end end diff --git a/lib/vmpooler/vsphere_helper.rb b/lib/vmpooler/vsphere_helper.rb index 66b387b..c49bce4 100644 --- a/lib/vmpooler/vsphere_helper.rb +++ b/lib/vmpooler/vsphere_helper.rb @@ -26,7 +26,7 @@ module Vmpooler end def add_disk(vm, size, datastore) - ensure_connected @connection + ensure_connected @connection, $credentials return false unless size.to_i > 0 @@ -76,14 +76,14 @@ module Vmpooler end def find_datastore(datastorename) - ensure_connected @connection + ensure_connected @connection, $credentials datacenter = @connection.serviceInstance.find_datacenter datacenter.find_datastore(datastorename) end def find_device(vm, deviceName) - ensure_connected @connection + ensure_connected @connection, $credentials vm.config.hardware.device.each do |device| return device if device.deviceInfo.label == deviceName @@ -93,7 +93,7 @@ module Vmpooler end def find_disk_controller(vm) - ensure_connected @connection + ensure_connected @connection, $credentials devices = find_disk_devices(vm) @@ -107,7 +107,7 @@ module Vmpooler end def find_disk_devices(vm) - ensure_connected @connection + ensure_connected @connection, $credentials devices = {} @@ -135,7 +135,7 @@ module Vmpooler end def find_disk_unit_number(vm, controller) - ensure_connected @connection + ensure_connected @connection, $credentials used_unit_numbers = [] available_unit_numbers = [] @@ -160,7 +160,7 @@ module Vmpooler end def find_folder(foldername) - ensure_connected @connection + ensure_connected @connection, $credentials datacenter = @connection.serviceInstance.find_datacenter base = datacenter.vmFolder @@ -221,7 +221,7 @@ module Vmpooler end def find_least_used_host(cluster) - ensure_connected @connection + ensure_connected @connection, $credentials cluster_object = find_cluster(cluster) target_hosts = get_cluster_host_utilization(cluster_object) @@ -244,7 +244,7 @@ module Vmpooler end def find_least_used_compatible_host(vm) - ensure_connected @connection + ensure_connected @connection, $credentials source_host = vm.summary.runtime.host model = get_host_cpu_arch_version(source_host) @@ -259,7 +259,7 @@ module Vmpooler end def find_pool(poolname) - ensure_connected @connection + ensure_connected @connection, $credentials datacenter = @connection.serviceInstance.find_datacenter base = datacenter.hostFolder @@ -288,13 +288,13 @@ module Vmpooler end def find_vm(vmname) - ensure_connected @connection + ensure_connected @connection, $credentials @connection.searchIndex.FindByDnsName(vmSearch: true, dnsName: vmname) end def find_vm_heavy(vmname) - ensure_connected @connection + ensure_connected @connection, $credentials vmname = vmname.is_a?(Array) ? vmname : [vmname] containerView = get_base_vm_container_from @connection @@ -344,7 +344,7 @@ module Vmpooler end def find_vmdks(vmname, datastore) - ensure_connected @connection + ensure_connected @connection, $credentials disks = [] @@ -363,7 +363,7 @@ module Vmpooler end def get_base_vm_container_from(connection) - ensure_connected @connection + ensure_connected @connection, $credentials viewManager = connection.serviceContent.viewManager viewManager.CreateContainerView( From e6613d56a009ce9610074a3e9cfe805aef9c50bd Mon Sep 17 00:00:00 2001 From: "kirby@puppetlabs.com" Date: Fri, 14 Oct 2016 13:06:23 -0700 Subject: [PATCH 08/11] Update migration spec to call pool manager and validate results Use mock_redis instead of redis. Make passing of mock redis to helper calls more clear Update pool_manager_spec to specify vsphere argument where applicable. Update pool_helper calls to vsphere where needed for tests to pass. Without this change rspec tests for pool_manager_spec exhibit 12 failures. Update pool_manager_spec test with open_socket Pool_manager_spec stubs a tcpsocket connection to simulate this happening directly within _check_pending_vm. This commit updates this to look more like its usage with open_socket, which allows the test to pass. --- Gemfile | 1 + spec/helpers.rb | 20 +++-- spec/vmpooler/pool_manager_migration_spec.rb | 88 ++++++++++++++++++++ spec/vmpooler/pool_manager_spec.rb | 43 +++++----- 4 files changed, 125 insertions(+), 27 deletions(-) create mode 100644 spec/vmpooler/pool_manager_migration_spec.rb diff --git a/Gemfile b/Gemfile index 9c9a05c..b5116dd 100644 --- a/Gemfile +++ b/Gemfile @@ -16,6 +16,7 @@ gem 'statsd-ruby', '>= 1.3.0', :require => 'statsd' # Test deps group :test do + gem 'mock_redis', '>= 0.17.0' gem 'rack-test', '>= 0.6' gem 'rspec', '>= 3.2' gem 'simplecov', '>= 0.11.2' diff --git a/spec/helpers.rb b/spec/helpers.rb index 712cdab..9005ec5 100644 --- a/spec/helpers.rb +++ b/spec/helpers.rb @@ -50,11 +50,21 @@ def create_pending_vm(template, name, token = nil) redis.hset("vmpooler__vm__#{name}", "template", template) end -def create_vm(name, token = nil) - redis.hset("vmpooler__vm__#{name}", 'checkout', Time.now) - if token - redis.hset("vmpooler__vm__#{name}", 'token:token', token) - end +def create_vm(name, token = nil, redis_handle = nil) + redis_db = redis_handle ? redis_handle : redis + redis_db.hset("vmpooler__vm__#{name}", 'checkout', Time.now) + redis_db.hset("vmpooler__vm__#{name}", 'token:token', token) if token +end + +def create_migrating_vm(name, pool, redis_handle = nil) + redis_db = redis_handle ? redis_handle : redis + redis_db.hset("vmpooler__vm__#{name}", 'checkout', Time.now) + redis_db.sadd("vmpooler__migrating__#{pool}", name) +end + +def add_vm_to_migration_set(name, redis_handle = nil) + redis_db = redis_handle ? redis_handle : redis + redis_db.sadd('vmpooler__migration', name) end def fetch_vm(vm) diff --git a/spec/vmpooler/pool_manager_migration_spec.rb b/spec/vmpooler/pool_manager_migration_spec.rb new file mode 100644 index 0000000..6eab80e --- /dev/null +++ b/spec/vmpooler/pool_manager_migration_spec.rb @@ -0,0 +1,88 @@ +require 'spec_helper' +require 'mock_redis' +require 'time' + +describe 'Pool Manager' do + let(:logger) { double('logger') } + let(:redis) { MockRedis.new } + let(:metrics) { Vmpooler::DummyStatsd.new } + let(:config) { + { + config: { + 'site_name' => 'test pooler', + 'migration_limit' => 2, + vsphere: { + 'server' => 'vsphere.puppet.com', + 'username' => 'vmpooler@vsphere.local', + 'password' => '', + 'insecure' => true + }, + pools: [ {'name' => 'pool1', 'size' => 5, 'folder' => 'pool1_folder'} ], + statsd: { 'prefix' => 'stats_prefix'}, + pool_names: [ 'pool1' ] + } + } + } + let(:pool) { config[:config][:pools][0]['name'] } + let(:vm) { + { + 'name' => 'vm1', + 'host' => 'host1', + 'template' => pool, + } + } + + describe '#_migrate_vm' do + let(:vsphere) { double(pool) } + let(:pooler) { Vmpooler::PoolManager.new(config, logger, redis, metrics) } + context 'evaluates VM for migration and logs host' do + before do + create_migrating_vm vm['name'], pool, redis + allow(vsphere).to receive(:find_vm).and_return(vm) + allow(pooler).to receive(:get_vm_host_info).and_return([{'name' => 'host1'}, 'host1']) + expect(vsphere).to receive(:find_vm).with(vm['name']) + end + + it 'logs VM host when migration is disabled' do + config[:config]['migration_limit'] = nil + + expect(redis.sismember("vmpooler__migrating__#{pool}", vm['name'])).to be true + expect(logger).to receive(:log).with('s', "[ ] [#{pool}] '#{vm['name']}' is running on #{vm['host']}") + + pooler._migrate_vm(vm['name'], pool, vsphere) + + expect(redis.sismember("vmpooler__migrating__#{pool}", vm['name'])).to be false + end + + it 'verifies that migration_limit greater than or equal to migrations in progress and logs host' do + add_vm_to_migration_set vm['name'], redis + add_vm_to_migration_set 'vm2', redis + + expect(logger).to receive(:log).with('s', "[ ] [#{pool}] '#{vm['name']}' is running on #{vm['host']}. No migration will be evaluated since the migration_limit has been reached") + + pooler._migrate_vm(vm['name'], pool, vsphere) + end + + it 'verifies that migration_limit is less than migrations in progress and logs old host, new host and migration time' do + allow(vsphere).to receive(:find_least_used_compatible_host).and_return([{'name' => 'host2'}, 'host2']) + allow(vsphere).to receive(:migrate_vm_host) + + expect(redis.hget("vmpooler__vm__#{vm['name']}", 'migration_time')) + expect(redis.hget("vmpooler__vm__#{vm['name']}", 'checkout_to_migration')) + expect(logger).to receive(:log).with('s', "[>] [#{pool}] '#{vm['name']}' migrated from #{vm['host']} to host2 in 0.00 seconds") + + pooler._migrate_vm(vm['name'], pool, vsphere) + end + + it 'fails when no suitable host can be found' do + error = 'ArgumentError: No target host found' + allow(vsphere).to receive(:find_least_used_compatible_host) + allow(vsphere).to receive(:migrate_vm_host).and_raise(error) + + expect(logger).to receive(:log).with('s', "[x] [#{pool}] '#{vm['name']}' migration failed with an error: #{error}") + + pooler._migrate_vm(vm['name'], pool, vsphere) + end + end + end +end diff --git a/spec/vmpooler/pool_manager_spec.rb b/spec/vmpooler/pool_manager_spec.rb index c569578..76f5113 100644 --- a/spec/vmpooler/pool_manager_spec.rb +++ b/spec/vmpooler/pool_manager_spec.rb @@ -24,10 +24,10 @@ describe 'Pool Manager' do context 'host not in pool' do it 'calls fail_pending_vm' do - allow(pool_helper).to receive(:find_vm).and_return(nil) + allow(vsphere).to receive(:find_vm).and_return(nil) allow(redis).to receive(:hget) expect(redis).to receive(:hget).with(String, 'clone').once - subject._check_pending_vm(vm, pool, timeout) + subject._check_pending_vm(vm, pool, timeout, vsphere) end end @@ -36,16 +36,14 @@ describe 'Pool Manager' do let(:tcpsocket) { double('TCPSocket') } it 'calls move_pending_vm_to_ready' do - stub_const("TCPSocket", tcpsocket) - - allow(pool_helper).to receive(:find_vm).and_return(vm_finder) + allow(subject).to receive(:open_socket).and_return(true) + allow(vsphere).to receive(:find_vm).and_return(vm_finder) allow(vm_finder).to receive(:summary).and_return(nil) - allow(tcpsocket).to receive(:new).and_return(true) expect(vm_finder).to receive(:summary).once expect(redis).not_to receive(:hget).with(String, 'clone') - subject._check_pending_vm(vm, pool, timeout) + subject._check_pending_vm(vm, pool, timeout, vsphere) end end end @@ -156,16 +154,16 @@ describe 'Pool Manager' do end it 'does nothing with nil host' do - allow(pool_helper).to receive(:find_vm).and_return(nil) + allow(vsphere).to receive(:find_vm).and_return(nil) expect(redis).not_to receive(:smove) - subject._check_running_vm(vm, pool, timeout) + subject._check_running_vm(vm, pool, timeout, vsphere) end context 'valid host' do let(:vm_host) { double('vmhost') } it 'does not move vm when not poweredOn' do - allow(pool_helper).to receive(:find_vm).and_return vm_host + allow(vsphere).to receive(:find_vm).and_return vm_host allow(vm_host).to receive(:runtime).and_return true allow(vm_host).to receive_message_chain(:runtime, :powerState).and_return 'poweredOff' @@ -173,11 +171,11 @@ describe 'Pool Manager' do expect(redis).not_to receive(:smove) expect(logger).not_to receive(:log).with('d', "[!] [#{pool}] '#{vm}' appears to be powered off or dead") - subject._check_running_vm(vm, pool, timeout) + subject._check_running_vm(vm, pool, timeout, vsphere) end it 'moves vm when poweredOn, but past TTL' do - allow(pool_helper).to receive(:find_vm).and_return vm_host + allow(vsphere).to receive(:find_vm).and_return vm_host allow(vm_host).to receive(:runtime).and_return true allow(vm_host).to receive_message_chain(:runtime, :powerState).and_return 'poweredOn' @@ -185,7 +183,7 @@ describe 'Pool Manager' do expect(redis).to receive(:smove) expect(logger).to receive(:log).with('d', "[!] [#{pool}] '#{vm}' reached end of TTL after #{timeout} hours") - subject._check_running_vm(vm, pool, timeout) + subject._check_running_vm(vm, pool, timeout, vsphere) end end end @@ -228,6 +226,7 @@ describe 'Pool Manager' do allow(redis).to receive(:smembers).with('vmpooler__running__pool1').and_return([]) allow(redis).to receive(:smembers).with('vmpooler__completed__pool1').and_return([]) allow(redis).to receive(:smembers).with('vmpooler__discovered__pool1').and_return([]) + allow(redis).to receive(:smembers).with('vmpooler__migrating__pool1').and_return([]) allow(redis).to receive(:set) allow(redis).to receive(:get).with('vmpooler__tasks__clone').and_return(0) allow(redis).to receive(:get).with('vmpooler__empty__pool1').and_return(nil) @@ -240,7 +239,7 @@ describe 'Pool Manager' do allow(redis).to receive(:scard).with('vmpooler__running__pool1').and_return(0) expect(logger).to receive(:log).with('s', "[!] [pool1] is empty") - subject._check_pool(config[:pools][0]) + subject._check_pool(config[:pools][0], vsphere) end end end @@ -277,7 +276,7 @@ describe 'Pool Manager' do expect(metrics).to receive(:gauge).with('ready.pool1', 1) expect(metrics).to receive(:gauge).with('running.pool1', 5) - subject._check_pool(config[:pools][0]) + subject._check_pool(config[:pools][0], vsphere) end it 'increments metrics when ready with 0 when pool empty' do @@ -288,7 +287,7 @@ describe 'Pool Manager' do expect(metrics).to receive(:gauge).with('ready.pool1', 0) expect(metrics).to receive(:gauge).with('running.pool1', 5) - subject._check_pool(config[:pools][0]) + subject._check_pool(config[:pools][0], vsphere) end end end @@ -307,13 +306,13 @@ describe 'Pool Manager' do let(:vm_host) { double('vmhost') } it 'creates a snapshot' do - expect(pool_helper).to receive(:find_vm).and_return vm_host + expect(vsphere).to receive(:find_vm).and_return vm_host expect(logger).to receive(:log) expect(vm_host).to receive_message_chain(:CreateSnapshot_Task, :wait_for_completion) expect(redis).to receive(:hset).with('vmpooler__vm__testvm', 'snapshot:testsnapshot', Time.now.to_s) expect(logger).to receive(:log) - subject._create_vm_snapshot('testvm', 'testsnapshot') + subject._create_vm_snapshot('testvm', 'testsnapshot', vsphere) end end end @@ -333,13 +332,13 @@ describe 'Pool Manager' do let(:vm_snapshot) { double('vmsnapshot') } it 'reverts a snapshot' do - expect(pool_helper).to receive(:find_vm).and_return vm_host - expect(pool_helper).to receive(:find_snapshot).and_return vm_snapshot + expect(vsphere).to receive(:find_vm).and_return vm_host + expect(vsphere).to receive(:find_snapshot).and_return vm_snapshot expect(logger).to receive(:log) expect(vm_snapshot).to receive_message_chain(:RevertToSnapshot_Task, :wait_for_completion) expect(logger).to receive(:log) - subject._revert_vm_snapshot('testvm', 'testsnapshot') + subject._revert_vm_snapshot('testvm', 'testsnapshot', vsphere) end end end @@ -357,7 +356,7 @@ describe 'Pool Manager' do expect(redis).to receive(:spop).with('vmpooler__tasks__snapshot') expect(redis).to receive(:spop).with('vmpooler__tasks__snapshot-revert') - subject._check_snapshot_queue + subject._check_snapshot_queue(vsphere) end end end From 109f197fe7ddc3369b2f656bf0d4041cd26dcc14 Mon Sep 17 00:00:00 2001 From: "kirby@puppetlabs.com" Date: Wed, 16 Nov 2016 10:34:44 -0800 Subject: [PATCH 09/11] Update migration_count method for greater readability and predictability. Update migrate_vm to make clear when an if block is the end of the line by returning. Use scard instead of smembers.size() for determining migrations in progress. --- lib/vmpooler/pool_manager.rb | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/lib/vmpooler/pool_manager.rb b/lib/vmpooler/pool_manager.rb index dd17385..054ddd5 100644 --- a/lib/vmpooler/pool_manager.rb +++ b/lib/vmpooler/pool_manager.rb @@ -478,7 +478,7 @@ module Vmpooler def migration_limit(migration_limit) # Returns migration_limit setting when enabled - return false if migration_limit == 0 or not migration_limit + return false if migration_limit == 0 || ! migration_limit migration_limit if migration_limit >= 1 end @@ -494,13 +494,15 @@ module Vmpooler vm_object = find_vsphere_pool_vm(pool, vm, vsphere) parent_host, parent_host_name = get_vm_host_info(vm_object) migration_limit = migration_limit $config[:config]['migration_limit'] + migration_count = $redis.scard('vmpooler__migration') - if not migration_limit + if ! migration_limit $logger.log('s', "[ ] [#{pool}] '#{vm}' is running on #{parent_host_name}") + return else - migration_count = $redis.smembers('vmpooler__migration').size if migration_count >= migration_limit $logger.log('s', "[ ] [#{pool}] '#{vm}' is running on #{parent_host_name}. No migration will be evaluated since the migration_limit has been reached") + return else $redis.sadd('vmpooler__migration', vm) host, host_name = vsphere.find_least_used_compatible_host(vm_object) @@ -585,7 +587,8 @@ module Vmpooler $redis.smembers("vmpooler__running__#{pool['name']}").each do |vm| if inventory[vm] begin - check_running_vm(vm, pool['name'], $redis.hget('vmpooler__vm__' + vm, 'lifetime') || $config[:config]['vm_lifetime'] || 12, vsphere) + vm_lifetime = $redis.hget('vmpooler__vm__' + vm, 'lifetime') || $config[:config]['vm_lifetime'] || 12 + check_running_vm(vm, pool['name'], vm_lifetime, vsphere) rescue end end @@ -605,7 +608,8 @@ module Vmpooler $redis.smembers("vmpooler__pending__#{pool['name']}").each do |vm| if inventory[vm] begin - check_pending_vm(vm, pool['name'], pool['timeout'] || $config[:config]['timeout'] || 15, vsphere) + pool_timeout = pool['timeout'] || $config[:config]['timeout'] || 15 + check_pending_vm(vm, pool['name'], pool_timeout, vsphere) rescue end end From a6c8c76d310435403ce5862223f5bab699f9c451 Mon Sep 17 00:00:00 2001 From: "kirby@puppetlabs.com" Date: Wed, 16 Nov 2016 10:37:08 -0800 Subject: [PATCH 10/11] Use open socket method for opening socket This commit updates pool manager to use a method for opening a socket instead of opening it directly from check_pending_vm. Support is added for specifying the domain of the VM to connect to, which lays the groundwork for doing away with the assumption of having DNS search domains set for vmpooler to move VMs to the ready state. Additionally, this commit adds a block to ensure open_socket closes open connections. Without this change sockets are opened to each VM before moving to the ready state, and never explicitly closed. Also, use open socket for check_ready_vm --- lib/vmpooler/pool_manager.rb | 37 ++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/lib/vmpooler/pool_manager.rb b/lib/vmpooler/pool_manager.rb index 054ddd5..245a9b4 100644 --- a/lib/vmpooler/pool_manager.rb +++ b/lib/vmpooler/pool_manager.rb @@ -26,29 +26,30 @@ module Vmpooler end end - def open_socket(host, domain=nil, timeout=5, port=22) + def open_socket(host, domain=nil, timeout=5, port=22, &block) Timeout.timeout(timeout) do - target_host = vm - target_host = "#{vm}.#{domain}" if domain - TCPSocket.new target_host, port + target_host = host + target_host = "#{host}.#{domain}" if domain + sock = TCPSocket.new target_host, port + begin + yield sock if block_given? + ensure + sock.close + end end end def _check_pending_vm(vm, pool, timeout, vsphere) host = vsphere.find_vm(vm) - if host - begin - Timeout.timeout(5) do - TCPSocket.new vm, 22 - end - move_pending_vm_to_ready(vm, pool, host) - rescue - fail_pending_vm(vm, pool, timeout) - end - else - fail_pending_vm(vm, pool, timeout) + if ! host + fail_pending_vm(vm, pool, timeout, false) + return end + open_socket vm + move_pending_vm_to_ready(vm, pool, host) + rescue + fail_pending_vm(vm, pool, timeout) end def fail_pending_vm(vm, pool, timeout, exists=true) @@ -137,12 +138,12 @@ module Vmpooler end begin - Timeout.timeout(5) do - TCPSocket.new vm, 22 - end + open_socket vm rescue if $redis.smove('vmpooler__ready__' + pool, 'vmpooler__completed__' + pool, vm) $logger.log('d', "[!] [#{pool}] '#{vm}' is unreachable, removed from 'ready' queue") + else + $logger.log('d', "[!] [#{pool}] '#{vm}' is unreachable, and failed to remove from 'ready' queue") end end end From 02327dfcd6a59e021dd2c7cc09422df08e143e03 Mon Sep 17 00:00:00 2001 From: "kirby@puppetlabs.com" Date: Wed, 16 Nov 2016 16:33:45 -0800 Subject: [PATCH 11/11] (POOLER-26) Fix lost VMs getting stuck in pending This commit updates vmpooler to understand how to resolve a situation where a pending VM does not exist. Without this change a pending VM that does not exist in vmware inventory gets stuck in the pending state, preventing the pool from ever reaching its target capacity. As a part of this change the find_vm method is updated to perform a light, then heavy search each time find_vm is called and all usage of find_vm || find_vm_heavy is replaced. This makes find_vm usage consistent across pool_manager. Additionally, open_socket method is updated to resolve an incorrect reference to the host name. --- lib/vmpooler/pool_manager.rb | 30 +++++++++----------- lib/vmpooler/vsphere_helper.rb | 5 ++++ spec/vmpooler/pool_manager_migration_spec.rb | 1 - spec/vmpooler/pool_manager_spec.rb | 1 - 4 files changed, 19 insertions(+), 18 deletions(-) diff --git a/lib/vmpooler/pool_manager.rb b/lib/vmpooler/pool_manager.rb index 245a9b4..939d7bc 100644 --- a/lib/vmpooler/pool_manager.rb +++ b/lib/vmpooler/pool_manager.rb @@ -52,6 +52,11 @@ module Vmpooler fail_pending_vm(vm, pool, timeout) end + def remove_nonexistent_vm(vm, pool) + $redis.srem("vmpooler__pending__#{pool}", vm) + $logger.log('d', "[!] [#{pool}] '#{vm}' no longer exists. Removing from pending.") + end + def fail_pending_vm(vm, pool, timeout, exists=true) clone_stamp = $redis.hget("vmpooler__vm__#{vm}", 'clone') return if ! clone_stamp @@ -108,8 +113,7 @@ module Vmpooler $redis.hset('vmpooler__vm__' + vm, 'check', Time.now) - host = vsphere.find_vm(vm) || - vsphere.find_vm_heavy(vm)[vm] + host = vsphere.find_vm(vm) if host if @@ -288,8 +292,7 @@ module Vmpooler # Auto-expire metadata key $redis.expire('vmpooler__vm__' + vm, ($config[:redis]['data_ttl'].to_i * 60 * 60)) - host = vsphere.find_vm(vm) || - vsphere.find_vm_heavy(vm)[vm] + host = vsphere.find_vm(vm) if host start = Time.now @@ -319,8 +322,7 @@ module Vmpooler end def _create_vm_disk(vm, disk_size, vsphere) - host = vsphere.find_vm(vm) || - vsphere.find_vm_heavy(vm)[vm] + host = vsphere.find_vm(vm) if (host) && ((! disk_size.nil?) && (! disk_size.empty?) && (disk_size.to_i > 0)) $logger.log('s', "[ ] [disk_manager] '#{vm}' is attaching a #{disk_size}gb disk") @@ -360,8 +362,7 @@ module Vmpooler end def _create_vm_snapshot(vm, snapshot_name, vsphere) - host = vsphere.find_vm(vm) || - vsphere.find_vm_heavy(vm)[vm] + host = vsphere.find_vm(vm) if (host) && ((! snapshot_name.nil?) && (! snapshot_name.empty?)) $logger.log('s', "[ ] [snapshot_manager] '#{vm}' is being snapshotted") @@ -390,8 +391,7 @@ module Vmpooler end def _revert_vm_snapshot(vm, snapshot_name, vsphere) - host = vsphere.find_vm(vm) || - vsphere.find_vm_heavy(vm)[vm] + host = vsphere.find_vm(vm) if host snapshot = vsphere.find_snapshot(host, snapshot_name) @@ -473,10 +473,6 @@ module Vmpooler end end - def find_vsphere_pool_vm(pool, vm, vsphere) - vsphere.find_vm(vm) || vsphere.find_vm_heavy(vm)[vm] - end - def migration_limit(migration_limit) # Returns migration_limit setting when enabled return false if migration_limit == 0 || ! migration_limit @@ -492,7 +488,7 @@ module Vmpooler def _migrate_vm(vm, pool, vsphere) begin $redis.srem('vmpooler__migrating__' + pool, vm) - vm_object = find_vsphere_pool_vm(pool, vm, vsphere) + vm_object = vsphere.find_vm(vm) parent_host, parent_host_name = get_vm_host_info(vm_object) migration_limit = migration_limit $config[:config]['migration_limit'] migration_count = $redis.scard('vmpooler__migration') @@ -607,12 +603,14 @@ module Vmpooler # PENDING $redis.smembers("vmpooler__pending__#{pool['name']}").each do |vm| + pool_timeout = pool['timeout'] || $config[:config]['timeout'] || 15 if inventory[vm] begin - pool_timeout = pool['timeout'] || $config[:config]['timeout'] || 15 check_pending_vm(vm, pool['name'], pool_timeout, vsphere) rescue end + else + fail_pending_vm(vm, pool['name'], pool_timeout, false) end end diff --git a/lib/vmpooler/vsphere_helper.rb b/lib/vmpooler/vsphere_helper.rb index c49bce4..5d06b64 100644 --- a/lib/vmpooler/vsphere_helper.rb +++ b/lib/vmpooler/vsphere_helper.rb @@ -289,6 +289,11 @@ module Vmpooler def find_vm(vmname) ensure_connected @connection, $credentials + find_vm_light(vmname) || find_vm_heavy(vmname)[vmname] + end + + def find_vm_light(vmname) + ensure_connected @connection, $credentials @connection.searchIndex.FindByDnsName(vmSearch: true, dnsName: vmname) end diff --git a/spec/vmpooler/pool_manager_migration_spec.rb b/spec/vmpooler/pool_manager_migration_spec.rb index 6eab80e..9fd491b 100644 --- a/spec/vmpooler/pool_manager_migration_spec.rb +++ b/spec/vmpooler/pool_manager_migration_spec.rb @@ -40,7 +40,6 @@ describe 'Pool Manager' do create_migrating_vm vm['name'], pool, redis allow(vsphere).to receive(:find_vm).and_return(vm) allow(pooler).to receive(:get_vm_host_info).and_return([{'name' => 'host1'}, 'host1']) - expect(vsphere).to receive(:find_vm).with(vm['name']) end it 'logs VM host when migration is disabled' do diff --git a/spec/vmpooler/pool_manager_spec.rb b/spec/vmpooler/pool_manager_spec.rb index 76f5113..6bf00bd 100644 --- a/spec/vmpooler/pool_manager_spec.rb +++ b/spec/vmpooler/pool_manager_spec.rb @@ -26,7 +26,6 @@ describe 'Pool Manager' do it 'calls fail_pending_vm' do allow(vsphere).to receive(:find_vm).and_return(nil) allow(redis).to receive(:hget) - expect(redis).to receive(:hget).with(String, 'clone').once subject._check_pending_vm(vm, pool, timeout, vsphere) end end