diff --git a/lib/vmpooler/api/v1.rb b/lib/vmpooler/api/v1.rb index 22a0ef1..3c8c8aa 100644 --- a/lib/vmpooler/api/v1.rb +++ b/lib/vmpooler/api/v1.rb @@ -55,7 +55,6 @@ module Vmpooler def account_for_starting_vm(template, vm) backend.sadd('vmpooler__running__' + template, vm) - backend.sadd('vmpooler__migrating__' + template, vm) backend.hset('vmpooler__active__' + template, vm, Time.now) backend.hset('vmpooler__vm__' + vm, 'checkout', Time.now) diff --git a/lib/vmpooler/pool_manager.rb b/lib/vmpooler/pool_manager.rb index 52f6d61..7e3b246 100644 --- a/lib/vmpooler/pool_manager.rb +++ b/lib/vmpooler/pool_manager.rb @@ -498,77 +498,6 @@ module Vmpooler end end - def migration_limit(migration_limit) - # Returns migration_limit setting when enabled - return false if migration_limit == 0 || ! migration_limit - migration_limit if migration_limit >= 1 - end - - def migrate_vm(vm, pool, vsphere) - Thread.new do - _migrate_vm(vm, pool, vsphere) - end - end - - def _migrate_vm(vm, pool, vsphere) - begin - $redis.srem('vmpooler__migrating__' + pool, vm) - vm_object = vsphere.find_vm(vm) - parent_host, parent_host_name = get_vm_host_info(vm_object) - migration_limit = migration_limit $config[:config]['migration_limit'] - migration_count = $redis.scard('vmpooler__migration') - - if ! migration_limit - $logger.log('s', "[ ] [#{pool}] '#{vm}' is running on #{parent_host_name}") - return - else - if migration_count >= migration_limit - $logger.log('s', "[ ] [#{pool}] '#{vm}' is running on #{parent_host_name}. No migration will be evaluated since the migration_limit has been reached") - return - else - $redis.sadd('vmpooler__migration', vm) - host, host_name = vsphere.find_least_used_compatible_host(vm_object) - if host == parent_host - $logger.log('s', "[ ] [#{pool}] No migration required for '#{vm}' running on #{parent_host_name}") - else - finish = migrate_vm_and_record_timing(vm_object, vm, pool, host, parent_host_name, host_name, vsphere) - $logger.log('s', "[>] [#{pool}] '#{vm}' migrated from #{parent_host_name} to #{host_name} in #{finish} seconds") - end - remove_vmpooler_migration_vm(pool, vm) - end - end - rescue => err - $logger.log('s', "[x] [#{pool}] '#{vm}' migration failed with an error: #{err}") - remove_vmpooler_migration_vm(pool, vm) - end - end - - def get_vm_host_info(vm_object) - parent_host = vm_object.summary.runtime.host - [parent_host, parent_host.name] - end - - def remove_vmpooler_migration_vm(pool, vm) - begin - $redis.srem('vmpooler__migration', vm) - rescue => err - $logger.log('s', "[x] [#{pool}] '#{vm}' removal from vmpooler__migration failed with an error: #{err}") - end - end - - def migrate_vm_and_record_timing(vm_object, vm_name, pool, host, source_host_name, dest_host_name, vsphere) - start = Time.now - vsphere.migrate_vm_host(vm_object, host) - finish = '%.2f' % (Time.now - start) - $metrics.timing("migrate.#{pool}", finish) - $metrics.increment("migrate_from.#{source_host_name}") - $metrics.increment("migrate_to.#{dest_host_name}") - checkout_to_migration = '%.2f' % (Time.now - Time.parse($redis.hget("vmpooler__vm__#{vm_name}", 'checkout'))) - $redis.hset("vmpooler__vm__#{vm_name}", 'migration_time', finish) - $redis.hset("vmpooler__vm__#{vm_name}", 'checkout_to_migration', checkout_to_migration) - finish - end - def check_pool(pool,maxloop = 0, loop_delay = 5) $logger.log('d', "[*] [#{pool['name']}] starting worker thread") @@ -601,7 +530,6 @@ module Vmpooler (! $redis.sismember('vmpooler__pending__' + pool['name'], vm['name'])) && (! $redis.sismember('vmpooler__completed__' + pool['name'], vm['name'])) && (! $redis.sismember('vmpooler__discovered__' + pool['name'], vm['name'])) && - (! $redis.sismember('vmpooler__migrating__' + pool['name'], vm['name'])) $redis.sadd('vmpooler__discovered__' + pool['name'], vm['name']) @@ -688,17 +616,6 @@ module Vmpooler $logger.log('d', "[!] [#{pool['name']}] _check_pool failed with an error while evaluating discovered VMs: #{err}") end - # MIGRATIONS - $redis.smembers("vmpooler__migrating__#{pool['name']}").each do |vm| - if inventory[vm] - begin - migrate_vm(vm, pool['name'], vsphere) - rescue => err - $logger.log('s', "[x] [#{pool['name']}] '#{vm}' failed to migrate: #{err}") - end - end - end - # REPOPULATE ready = $redis.scard("vmpooler__ready__#{pool['name']}") total = $redis.scard("vmpooler__pending__#{pool['name']}") + ready @@ -741,8 +658,6 @@ module Vmpooler # Clear out the tasks manager, as we don't know about any tasks at this point $redis.set('vmpooler__tasks__clone', 0) - # Clear out vmpooler__migrations since stale entries may be left after a restart - $redis.del('vmpooler__migration') loop_count = 1 loop do diff --git a/lib/vmpooler/vsphere_helper.rb b/lib/vmpooler/vsphere_helper.rb index cc93250..747cbf9 100644 --- a/lib/vmpooler/vsphere_helper.rb +++ b/lib/vmpooler/vsphere_helper.rb @@ -190,87 +190,11 @@ module Vmpooler base end - # Returns an array containing cumulative CPU and memory utilization of a host, and its object reference - # Params: - # +model+:: CPU arch version to match on - # +limit+:: Hard limit for CPU or memory utilization beyond which a host is excluded for deployments - def get_host_utilization(host, model=nil, limit=90) - if model - return nil unless host_has_cpu_model? host, model - end - return nil if host.runtime.inMaintenanceMode - return nil unless host.overallStatus == 'green' - - cpu_utilization = cpu_utilization_for host - memory_utilization = memory_utilization_for host - - return nil if cpu_utilization > limit - return nil if memory_utilization > limit - - [ cpu_utilization + memory_utilization, host ] - end - - def host_has_cpu_model?(host, model) - get_host_cpu_arch_version(host) == model - end - - def get_host_cpu_arch_version(host) - cpu_model = host.hardware.cpuPkg[0].description - cpu_model_parts = cpu_model.split() - arch_version = cpu_model_parts[4] - arch_version - end - - def cpu_utilization_for(host) - cpu_usage = host.summary.quickStats.overallCpuUsage - cpu_size = host.summary.hardware.cpuMhz * host.summary.hardware.numCpuCores - (cpu_usage.to_f / cpu_size.to_f) * 100 - end - - def memory_utilization_for(host) - memory_usage = host.summary.quickStats.overallMemoryUsage - memory_size = host.summary.hardware.memorySize / 1024 / 1024 - (memory_usage.to_f / memory_size.to_f) * 100 - end - - def find_least_used_host(cluster) - ensure_connected @connection, $credentials - - cluster_object = find_cluster(cluster) - target_hosts = get_cluster_host_utilization(cluster_object) - least_used_host = target_hosts.sort[0][1] - least_used_host - end - def find_cluster(cluster) datacenter = @connection.serviceInstance.find_datacenter datacenter.hostFolder.children.find { |cluster_object| cluster_object.name == cluster } end - def get_cluster_host_utilization(cluster) - cluster_hosts = [] - cluster.host.each do |host| - host_usage = get_host_utilization(host) - cluster_hosts << host_usage if host_usage - end - cluster_hosts - end - - def find_least_used_compatible_host(vm) - ensure_connected @connection, $credentials - - source_host = vm.summary.runtime.host - model = get_host_cpu_arch_version(source_host) - cluster = source_host.parent - target_hosts = [] - cluster.host.each do |host| - host_usage = get_host_utilization(host, model) - target_hosts << host_usage if host_usage - end - target_host = target_hosts.sort[0][1] - [target_host, target_host.name] - end - def find_pool(poolname) ensure_connected @connection, $credentials @@ -405,11 +329,6 @@ module Vmpooler snapshot end - def migrate_vm_host(vm, host) - relospec = RbVmomi::VIM.VirtualMachineRelocateSpec(host: host) - vm.RelocateVM_Task(spec: relospec).wait_for_completion - end - def close @connection.close end diff --git a/spec/helpers.rb b/spec/helpers.rb index 7fda0d9..cb69cbf 100644 --- a/spec/helpers.rb +++ b/spec/helpers.rb @@ -90,17 +90,6 @@ def create_discovered_vm(name, pool, redis_handle = nil) redis_db.sadd("vmpooler__discovered__#{pool}", name) end -def create_migrating_vm(name, pool, redis_handle = nil) - redis_db = redis_handle ? redis_handle : redis - redis_db.hset("vmpooler__vm__#{name}", 'checkout', Time.now) - redis_db.sadd("vmpooler__migrating__#{pool}", name) -end - -def add_vm_to_migration_set(name, redis_handle = nil) - redis_db = redis_handle ? redis_handle : redis - redis_db.sadd('vmpooler__migration', name) -end - def fetch_vm(vm) redis.hgetall("vmpooler__vm__#{vm}") end diff --git a/spec/unit/pool_manager_spec.rb b/spec/unit/pool_manager_spec.rb index 0c2a619..28df9f6 100644 --- a/spec/unit/pool_manager_spec.rb +++ b/spec/unit/pool_manager_spec.rb @@ -1461,216 +1461,6 @@ EOT end end - - describe '#migration_limit' do - # This is a little confusing. Is this supposed to return a boolean - # or integer type? - [false,0].each do |testvalue| - it "should return false for an input of #{testvalue}" do - expect(subject.migration_limit(testvalue)).to eq(false) - end - end - - [1,32768].each do |testvalue| - it "should return #{testvalue} for an input of #{testvalue}" do - expect(subject.migration_limit(testvalue)).to eq(testvalue) - end - end - - [-1,-32768].each do |testvalue| - it "should return nil for an input of #{testvalue}" do - expect(subject.migration_limit(testvalue)).to be_nil - end - end - end - - describe '#migrate_vm' do - let(:vsphere) { double('vsphere') } - - before do - expect(subject).not_to be_nil - end - - it 'calls _migrate_vm' do - expect(Thread).to receive(:new).and_yield - expect(subject).to receive(:_migrate_vm).with(vm, pool, vsphere) - - subject.migrate_vm(vm, pool, vsphere) - end - end - - describe "#_migrate_vm" do - let(:vsphere) { double('vsphere') } - let(:vm_parent_hostname) { 'parent1' } - let(:config) { - YAML.load(<<-EOT ---- -:config: - migration_limit: 5 -:pools: - - name: #{pool} -EOT - ) - } - - before do - expect(subject).not_to be_nil - end - - context 'when an error occurs' do - it 'should log an error message and attempt to remove from vmpooler_migration queue' do - expect(vsphere).to receive(:find_vm).with(vm).and_raise(RuntimeError,'MockError') - expect(logger).to receive(:log).with('s', "[x] [#{pool}] '#{vm}' migration failed with an error: MockError") - expect(subject).to receive(:remove_vmpooler_migration_vm) - subject._migrate_vm(vm, pool, vsphere) - end - end - - context 'when VM does not exist' do - it 'should log an error message when VM does not exist' do - expect(vsphere).to receive(:find_vm).with(vm).and_return(nil) - # This test is quite fragile. Should refactor the code to make this scenario easier to detect - expect(logger).to receive(:log).with('s', "[x] [#{pool}] '#{vm}' migration failed with an error: undefined method `summary' for nil:NilClass") - subject._migrate_vm(vm, pool, vsphere) - end - end - - context 'when VM exists but migration is disabled' do - before(:each) do - expect(vsphere).to receive(:find_vm).with(vm).and_return(host) - allow(subject).to receive(:get_vm_host_info).with(host).and_return([{'name' => vm_parent_hostname}, vm_parent_hostname]) - create_migrating_vm(vm, pool) - end - - [-1,-32768,false,0].each do |testvalue| - it "should not migrate a VM if the migration limit is #{testvalue}" do - config[:config]['migration_limit'] = testvalue - expect(logger).to receive(:log).with('s', "[ ] [#{pool}] '#{vm}' is running on #{vm_parent_hostname}") - subject._migrate_vm(vm, pool, vsphere) - end - - it "should remove the VM from vmpooler__migrating queue in redis if the migration limit is #{testvalue}" do - redis.sadd("vmpooler__migrating__#{pool}", vm) - config[:config]['migration_limit'] = testvalue - - expect(redis.sismember("vmpooler__migrating__#{pool}",vm)).to be_truthy - subject._migrate_vm(vm, pool, vsphere) - expect(redis.sismember("vmpooler__migrating__#{pool}",vm)).to be_falsey - end - end - end - - context 'when VM exists but migration limit is reached' do - before(:each) do - expect(vsphere).to receive(:find_vm).with(vm).and_return(host) - allow(subject).to receive(:get_vm_host_info).with(host).and_return([{'name' => vm_parent_hostname}, vm_parent_hostname]) - - create_migrating_vm(vm, pool) - redis.sadd('vmpooler__migration', 'fakevm1') - redis.sadd('vmpooler__migration', 'fakevm2') - redis.sadd('vmpooler__migration', 'fakevm3') - redis.sadd('vmpooler__migration', 'fakevm4') - redis.sadd('vmpooler__migration', 'fakevm5') - end - - it "should not migrate a VM if the migration limit is reached" do - expect(logger).to receive(:log).with('s',"[ ] [#{pool}] '#{vm}' is running on #{vm_parent_hostname}. No migration will be evaluated since the migration_limit has been reached") - subject._migrate_vm(vm, pool, vsphere) - end - - it "should remove the VM from vmpooler__migrating queue in redis if the migration limit is reached" do - expect(redis.sismember("vmpooler__migrating__#{pool}",vm)).to be_truthy - subject._migrate_vm(vm, pool, vsphere) - expect(redis.sismember("vmpooler__migrating__#{pool}",vm)).to be_falsey - end - end - - context 'when VM exists but migration limit is not yet reached' do - before(:each) do - expect(vsphere).to receive(:find_vm).with(vm).and_return(host) - allow(subject).to receive(:get_vm_host_info).with(host).and_return([{'name' => vm_parent_hostname}, vm_parent_hostname]) - - create_migrating_vm(vm, pool) - redis.sadd('vmpooler__migration', 'fakevm1') - redis.sadd('vmpooler__migration', 'fakevm2') - end - - context 'and host to migrate to is the same as the current host' do - before(:each) do - expect(vsphere).to receive(:find_least_used_compatible_host).with(host).and_return([{'name' => vm_parent_hostname}, vm_parent_hostname]) - end - - it "should not migrate the VM" do - expect(logger).to receive(:log).with('s', "[ ] [#{pool}] No migration required for '#{vm}' running on #{vm_parent_hostname}") - subject._migrate_vm(vm, pool, vsphere) - end - - it "should remove the VM from vmpooler__migrating queue in redis" do - expect(redis.sismember("vmpooler__migrating__#{pool}",vm)).to be_truthy - subject._migrate_vm(vm, pool, vsphere) - expect(redis.sismember("vmpooler__migrating__#{pool}",vm)).to be_falsey - end - - it "should not change the vmpooler_migration queue count" do - before_count = redis.scard('vmpooler__migration') - subject._migrate_vm(vm, pool, vsphere) - expect(redis.scard('vmpooler__migration')).to eq(before_count) - end - - it "should call remove_vmpooler_migration_vm" do - expect(subject).to receive(:remove_vmpooler_migration_vm) - subject._migrate_vm(vm, pool, vsphere) - end - end - - context 'and host to migrate to different to the current host' do - let(:vm_new_hostname) { 'new_hostname' } - before(:each) do - expect(vsphere).to receive(:find_least_used_compatible_host).with(host).and_return([{'name' => vm_new_hostname}, vm_new_hostname]) - expect(subject).to receive(:migrate_vm_and_record_timing).with(host, vm, pool, Object, vm_parent_hostname, vm_new_hostname, vsphere).and_return('1.00') - end - - it "should migrate the VM" do - expect(logger).to receive(:log).with('s', "[>] [#{pool}] '#{vm}' migrated from #{vm_parent_hostname} to #{vm_new_hostname} in 1.00 seconds") - subject._migrate_vm(vm, pool, vsphere) - end - - it "should remove the VM from vmpooler__migrating queue in redis" do - expect(redis.sismember("vmpooler__migrating__#{pool}",vm)).to be_truthy - subject._migrate_vm(vm, pool, vsphere) - expect(redis.sismember("vmpooler__migrating__#{pool}",vm)).to be_falsey - end - - it "should not change the vmpooler_migration queue count" do - before_count = redis.scard('vmpooler__migration') - subject._migrate_vm(vm, pool, vsphere) - expect(redis.scard('vmpooler__migration')).to eq(before_count) - end - - it "should call remove_vmpooler_migration_vm" do - expect(subject).to receive(:remove_vmpooler_migration_vm) - subject._migrate_vm(vm, pool, vsphere) - end - end - end - end - - describe "#get_vm_host_info" do - before do - expect(subject).not_to be_nil - end - - let(:vm_object) { double('vm_object') } - let(:parent_host) { double('parent_host') } - - it 'should return an array with host information' do - expect(vm_object).to receive_message_chain(:summary, :runtime, :host).and_return(parent_host) - expect(parent_host).to receive(:name).and_return('vmhostname') - - expect(subject.get_vm_host_info(vm_object)).to eq([parent_host,'vmhostname']) - end - end - describe "#execute!" do let(:threads) {{}} @@ -1703,12 +1493,6 @@ EOT expect(redis.get('vmpooler__tasks__clone')).to eq('0') end - it 'should clear migration tasks' do - redis.set('vmpooler__migration', 1) - subject.execute!(1,0) - expect(redis.get('vmpooler__migration')).to be_nil - end - it 'should run the check_disk_queue method' do expect(subject).to receive(:check_disk_queue) @@ -1965,69 +1749,6 @@ EOT end end - describe '#remove_vmpooler_migration_vm' do - before do - expect(subject).not_to be_nil - end - - it 'should remove the migration from redis' do - redis.sadd('vmpooler__migration', vm) - expect(redis.sismember('vmpooler__migration',vm)).to be(true) - subject.remove_vmpooler_migration_vm(pool, vm) - expect(redis.sismember('vmpooler__migration',vm)).to be(false) - end - - it 'should log a message and swallow an error if one occurs' do - expect(redis).to receive(:srem).and_raise(RuntimeError,'MockError') - expect(logger).to receive(:log).with('s', "[x] [#{pool}] '#{vm}' removal from vmpooler__migration failed with an error: MockError") - subject.remove_vmpooler_migration_vm(pool, vm) - end - end - - describe '#migrate_vm_and_record_timing' do - let(:vsphere) { double('vsphere') } - let(:vm_object) { double('vm_object') } - let(:source_host_name) { 'source_host' } - let(:dest_host_name) { 'dest_host' } - - before do - expect(subject).not_to be_nil - end - - before(:each) do - create_vm(vm,token) - expect(vsphere).to receive(:migrate_vm_host).with(vm_object, host) - end - - it 'should return the elapsed time for the migration' do - result = subject.migrate_vm_and_record_timing(vm_object, vm, pool, host, source_host_name, dest_host_name, vsphere) - expect(result).to match(/0\.[\d]+/) - end - - it 'should add timing metric' do - expect(metrics).to receive(:timing).with("migrate.#{pool}",String) - subject.migrate_vm_and_record_timing(vm_object, vm, pool, host, source_host_name, dest_host_name, vsphere) - end - - it 'should increment from_host and to_host metric' do - expect(metrics).to receive(:increment).with("migrate_from.#{source_host_name}") - expect(metrics).to receive(:increment).with("migrate_to.#{dest_host_name}") - subject.migrate_vm_and_record_timing(vm_object, vm, pool, host, source_host_name, dest_host_name, vsphere) - end - - it 'should set migration_time metric in redis' do - expect(redis.hget("vmpooler__vm__#{vm}", 'migration_time')).to be_nil - subject.migrate_vm_and_record_timing(vm_object, vm, pool, host, source_host_name, dest_host_name, vsphere) - expect(redis.hget("vmpooler__vm__#{vm}", 'migration_time')).to match(/0\.[\d]+/) - end - - it 'should set checkout_to_migration metric in redis' do - expect(redis.hget("vmpooler__vm__#{vm}", 'checkout_to_migration')).to be_nil - subject.migrate_vm_and_record_timing(vm_object, vm, pool, host, source_host_name, dest_host_name, vsphere) - expect(redis.hget("vmpooler__vm__#{vm}", 'checkout_to_migration')).to match(/0\.[\d]+/) - end - end - describe '#_check_pool' do # Default test fixtures will consist of; # - Empty Redis dataset @@ -2057,7 +1778,6 @@ EOT # INVENTORY context 'Conducting inventory' do before(:each) do - allow(subject).to receive(:migrate_vm) allow(subject).to receive(:check_running_vm) allow(subject).to receive(:check_ready_vm) allow(subject).to receive(:check_pending_vm) @@ -2092,7 +1812,7 @@ EOT expect(redis.sismember("vmpooler__completed__#{pool}", new_vm)).to be(true) end - ['running','ready','pending','completed','discovered','migrating'].each do |queue_name| + ['running','ready','pending','completed','discovered'].each do |queue_name| it "should not discover VMs in the #{queue_name} queue" do expect(vsphere).to receive(:find_folder).and_return(MockFindFolder.new([new_vm])) @@ -2356,7 +2076,6 @@ EOT ['pending','ready','running','completed'].each do |queue_name| context "exists in the #{queue_name} queue" do before(:each) do - allow(subject).to receive(:migrate_vm) allow(subject).to receive(:check_running_vm) allow(subject).to receive(:check_ready_vm) allow(subject).to receive(:check_pending_vm) @@ -2392,43 +2111,6 @@ EOT end end - # MIGRATIONS - context 'Migrating VM not in the inventory' do - before(:each) do - expect(vsphere).to receive(:find_folder).and_return(MockFindFolder.new([new_vm])) - expect(logger).to receive(:log).with('s', "[?] [#{pool}] '#{new_vm}' added to 'discovered' queue") - create_migrating_vm(vm,pool) - end - - it 'should not do anything' do - expect(subject).to receive(:migrate_vm).exactly(0).times - - subject._check_pool(pool_object,vsphere) - end - end - - context 'Migrating VM in the inventory' do - before(:each) do - expect(vsphere).to receive(:find_folder).and_return(MockFindFolder.new([vm])) - allow(subject).to receive(:check_ready_vm) - allow(logger).to receive(:log).with("s", "[!] [#{pool}] is empty") - create_migrating_vm(vm,pool) - end - - it 'should log an error if one occurs' do - expect(subject).to receive(:migrate_vm).and_raise(RuntimeError,'MockError') - expect(logger).to receive(:log).with('s', "[x] [#{pool}] '#{vm}' failed to migrate: MockError") - - subject._check_pool(pool_object,vsphere) - end - - it 'should call migrate_vm' do - expect(subject).to receive(:migrate_vm).with(vm,pool,vsphere) - - subject._check_pool(pool_object,vsphere) - end - end - # REPOPULATE context 'Repopulate a pool' do it 'should not call clone_vm when number of VMs is equal to the pool size' do @@ -2458,7 +2140,7 @@ EOT end end - ['running','completed','discovered','migrating'].each do |queue_name| + ['running','completed','discovered'].each do |queue_name| it "should not use VMs in #{queue_name} queue to caculate pool size" do expect(vsphere).to receive(:find_folder).and_return(MockFindFolder.new([vm])) expect(subject).to receive(:clone_vm) diff --git a/vmpooler.yaml.example b/vmpooler.yaml.example index c128b5d..a2c9fd0 100644 --- a/vmpooler.yaml.example +++ b/vmpooler.yaml.example @@ -247,13 +247,6 @@ # a separator. # (optional; default: '') # -# - migration_limit -# When set to any value greater than 0 enable VM migration at checkout. -# When enabled this capability will evaluate a VM for migration when it is requested -# in an effort to maintain a more even distribution of load across compute resources. -# The migration_limit ensures that no more than n migrations will be evaluated at any one time -# and greatly reduces the possibilty of VMs ending up bunched together on a particular host. -# # - max_tries # Set the max number of times a connection should retry in vsphere helper. # This optional setting allows a user to dial in retry limits to