Minimize duplicate checking of VMs

This commit updates check_pool pending, running and ready methods to greatly reduce instances in which the VM object is retrieved. Without this change get_vm is run for each of these check_pool steps even though the VM is already validated as being in inventory being running the check. This is left for checking running VMs when the VM is no longer ready. Without this change there is an unnecessarily large volume of VM object checks.
This commit is contained in:
kirby@puppetlabs.com 2018-06-28 13:49:20 -07:00
parent 84065dd059
commit 59a09a71af
2 changed files with 53 additions and 162 deletions

View file

@ -49,13 +49,8 @@ module Vmpooler
mutex = vm_mutex(vm) mutex = vm_mutex(vm)
return if mutex.locked? return if mutex.locked?
mutex.synchronize do mutex.synchronize do
host = provider.get_vm(pool, vm)
unless host
fail_pending_vm(vm, pool, timeout, false)
return
end
if provider.vm_ready?(pool, vm) if provider.vm_ready?(pool, vm)
move_pending_vm_to_ready(vm, pool, host) move_pending_vm_to_ready(vm, pool)
else else
fail_pending_vm(vm, pool, timeout) fail_pending_vm(vm, pool, timeout)
end end
@ -86,14 +81,7 @@ module Vmpooler
false false
end end
def move_pending_vm_to_ready(vm, pool, host) def move_pending_vm_to_ready(vm, pool)
if host['hostname'] == vm
begin
Socket.getaddrinfo(vm, nil) # WTF? I assume this is just priming the local DNS resolver cache?!?!
rescue # rubocop:disable Lint/HandleExceptions
# Do not care about errors what-so-ever
end
clone_time = $redis.hget('vmpooler__vm__' + vm, 'clone') clone_time = $redis.hget('vmpooler__vm__' + vm, 'clone')
finish = format('%.2f', Time.now - Time.parse(clone_time)) if clone_time finish = format('%.2f', Time.now - Time.parse(clone_time)) if clone_time
@ -102,9 +90,17 @@ module Vmpooler
# last boot time is displayed in API, and used by alarming script # last boot time is displayed in API, and used by alarming script
$redis.hset('vmpooler__lastboot', pool, Time.now) $redis.hset('vmpooler__lastboot', pool, Time.now)
$metrics.timing("time_to_ready_state.#{pool}", finish) $metrics.timing("time_to_ready_state.#{pool}", finish)
$logger.log('s', "[>] [#{pool}] '#{vm}' moved from 'pending' to 'ready' queue") $logger.log('s', "[>] [#{pool}] '#{vm}' moved from 'pending' to 'ready' queue")
end end
def vm_still_ready?(pool_name, vm_name, provider)
# Check if the VM is still ready/available
return true if provider.vm_ready?(pool_name, vm_name)
raise("VM #{vm_name} is not ready")
rescue
move_vm_queue(pool_name, vm_name, 'ready', 'completed', "is unreachable, removed from 'ready' queue")
end end
def check_ready_vm(vm, pool, ttl, provider) def check_ready_vm(vm, pool, ttl, provider)
@ -126,22 +122,7 @@ module Vmpooler
check_stamp = $redis.hget('vmpooler__vm__' + vm, 'check') check_stamp = $redis.hget('vmpooler__vm__' + vm, 'check')
return if check_stamp && (((Time.now - Time.parse(check_stamp)) / 60) <= $config[:config]['vm_checktime']) return if check_stamp && (((Time.now - Time.parse(check_stamp)) / 60) <= $config[:config]['vm_checktime'])
host = provider.get_vm(pool, vm)
# Check if the host even exists
unless host
$redis.srem('vmpooler__ready__' + pool, vm)
$logger.log('s', "[!] [#{pool}] '#{vm}' not found in inventory, removed from 'ready' queue")
return
end
$redis.hset('vmpooler__vm__' + vm, 'check', Time.now) $redis.hset('vmpooler__vm__' + vm, 'check', Time.now)
# Check if the VM is not powered on, before checking TTL
unless host['powerstate'].casecmp('poweredon').zero?
$redis.smove('vmpooler__ready__' + pool, 'vmpooler__completed__' + pool, vm)
$logger.log('d', "[!] [#{pool}] '#{vm}' appears to be powered off, removed from 'ready' queue")
return
end
# Check if the hosts TTL has expired # Check if the hosts TTL has expired
if ttl > 0 if ttl > 0
# host['boottime'] may be nil if host is not powered on # host['boottime'] may be nil if host is not powered on
@ -153,23 +134,7 @@ module Vmpooler
end end
end end
# Check if the hostname has magically changed from underneath Pooler vm_still_ready?(pool, vm, provider)
if host['hostname'] != vm
$redis.smove('vmpooler__ready__' + pool, 'vmpooler__completed__' + pool, vm)
$logger.log('d', "[!] [#{pool}] '#{vm}' has mismatched hostname, removed from 'ready' queue")
return
end
# Check if the VM is still ready/available
begin
raise("VM #{vm} is not ready") unless provider.vm_ready?(pool, vm)
rescue
if $redis.smove('vmpooler__ready__' + pool, 'vmpooler__completed__' + pool, vm)
$logger.log('d', "[!] [#{pool}] '#{vm}' is unreachable, removed from 'ready' queue")
else
$logger.log('d', "[!] [#{pool}] '#{vm}' is unreachable, and failed to remove from 'ready' queue")
end
end
end end
end end
@ -188,9 +153,6 @@ module Vmpooler
mutex = vm_mutex(vm) mutex = vm_mutex(vm)
return if mutex.locked? return if mutex.locked?
mutex.synchronize do mutex.synchronize do
host = provider.get_vm(pool, vm)
if host
# Check that VM is within defined lifetime # Check that VM is within defined lifetime
checkouttime = $redis.hget('vmpooler__active__' + pool, vm) checkouttime = $redis.hget('vmpooler__active__' + pool, vm)
if checkouttime if checkouttime
@ -198,8 +160,20 @@ module Vmpooler
if (ttl.to_i > 0) && (running.to_i >= ttl.to_i) if (ttl.to_i > 0) && (running.to_i >= ttl.to_i)
move_vm_queue(pool, vm, 'running', 'completed', "reached end of TTL after #{ttl} hours") move_vm_queue(pool, vm, 'running', 'completed', "reached end of TTL after #{ttl} hours")
return
end end
end end
if provider.vm_ready?(pool, vm)
return
else
host = provider.get_vm(pool, vm)
if host
return
else
move_vm_queue(pool, vm, 'running', 'completed', "is no longer in inventory, removing from running")
end
end end
end end
end end

View file

@ -64,23 +64,11 @@ EOT
expect(subject).not_to be_nil expect(subject).not_to be_nil
end end
context 'host does not exist or not in pool' do
it 'calls fail_pending_vm' do
expect(provider).to receive(:get_vm).with(pool,vm).and_return(nil)
expect(subject).to receive(:fail_pending_vm).with(vm, pool, timeout, false)
subject._check_pending_vm(vm, pool, timeout, provider)
end
end
context 'host is in pool' do context 'host is in pool' do
before do
expect(provider).to receive(:get_vm).with(pool,vm).and_return(host)
end
it 'calls move_pending_vm_to_ready if host is ready' do it 'calls move_pending_vm_to_ready if host is ready' do
expect(provider).to receive(:vm_ready?).with(pool,vm).and_return(true) expect(provider).to receive(:vm_ready?).with(pool,vm).and_return(true)
expect(subject).to receive(:move_pending_vm_to_ready).with(vm, pool, host) expect(subject).to receive(:move_pending_vm_to_ready).with(vm, pool)
subject._check_pending_vm(vm, pool, timeout, provider) subject._check_pending_vm(vm, pool, timeout, provider)
end end
@ -184,29 +172,17 @@ EOT
before do before do
expect(subject).not_to be_nil expect(subject).not_to be_nil
allow(Socket).to receive(:getaddrinfo)
end end
before(:each) do before(:each) do
create_pending_vm(pool,vm) create_pending_vm(pool,vm)
end end
context 'when hostname does not match VM name' do
it 'should not take any action' do
expect(logger).to receive(:log).exactly(0).times
expect(Socket).to receive(:getaddrinfo).exactly(0).times
host['hostname'] = 'different_name'
subject.move_pending_vm_to_ready(vm, pool, host)
end
end
context 'when hostname matches VM name' do context 'when hostname matches VM name' do
it 'should move the VM from pending to ready pool' do it 'should move the VM from pending to ready pool' do
expect(redis.sismember("vmpooler__pending__#{pool}", vm)).to be(true) expect(redis.sismember("vmpooler__pending__#{pool}", vm)).to be(true)
expect(redis.sismember("vmpooler__ready__#{pool}", vm)).to be(false) expect(redis.sismember("vmpooler__ready__#{pool}", vm)).to be(false)
subject.move_pending_vm_to_ready(vm, pool, host) subject.move_pending_vm_to_ready(vm, pool)
expect(redis.sismember("vmpooler__pending__#{pool}", vm)).to be(false) expect(redis.sismember("vmpooler__pending__#{pool}", vm)).to be(false)
expect(redis.sismember("vmpooler__ready__#{pool}", vm)).to be(true) expect(redis.sismember("vmpooler__ready__#{pool}", vm)).to be(true)
end end
@ -214,7 +190,7 @@ EOT
it 'should log a message' do it 'should log a message' do
expect(logger).to receive(:log).with('s', "[>] [#{pool}] '#{vm}' moved from 'pending' to 'ready' queue") expect(logger).to receive(:log).with('s', "[>] [#{pool}] '#{vm}' moved from 'pending' to 'ready' queue")
subject.move_pending_vm_to_ready(vm, pool, host) subject.move_pending_vm_to_ready(vm, pool)
end end
it 'should receive time_to_ready_state metric' do it 'should receive time_to_ready_state metric' do
@ -228,25 +204,25 @@ EOT
it 'should set the boot time in redis' do it 'should set the boot time in redis' do
redis.hset("vmpooler__vm__#{vm}", 'clone',Time.now.to_s) redis.hset("vmpooler__vm__#{vm}", 'clone',Time.now.to_s)
expect(redis.hget('vmpooler__boot__' + Date.today.to_s, pool + ':' + vm)).to be_nil expect(redis.hget('vmpooler__boot__' + Date.today.to_s, pool + ':' + vm)).to be_nil
subject.move_pending_vm_to_ready(vm, pool, host) subject.move_pending_vm_to_ready(vm, pool)
expect(redis.hget('vmpooler__boot__' + Date.today.to_s, pool + ':' + vm)).to_not be_nil expect(redis.hget('vmpooler__boot__' + Date.today.to_s, pool + ':' + vm)).to_not be_nil
# TODO Should we inspect the value to see if it's valid? # TODO Should we inspect the value to see if it's valid?
end end
it 'should not determine boot timespan if clone start time not set' do it 'should not determine boot timespan if clone start time not set' do
expect(redis.hget('vmpooler__boot__' + Date.today.to_s, pool + ':' + vm)).to be_nil expect(redis.hget('vmpooler__boot__' + Date.today.to_s, pool + ':' + vm)).to be_nil
subject.move_pending_vm_to_ready(vm, pool, host) subject.move_pending_vm_to_ready(vm, pool)
expect(redis.hget('vmpooler__boot__' + Date.today.to_s, pool + ':' + vm)).to eq("") # Possible implementation bug here. Should still be nil here expect(redis.hget('vmpooler__boot__' + Date.today.to_s, pool + ':' + vm)).to eq("") # Possible implementation bug here. Should still be nil here
end end
it 'should raise error if clone start time is not parsable' do it 'should raise error if clone start time is not parsable' do
redis.hset("vmpooler__vm__#{vm}", 'clone','iamnotparsable_asdate') redis.hset("vmpooler__vm__#{vm}", 'clone','iamnotparsable_asdate')
expect{subject.move_pending_vm_to_ready(vm, pool, host)}.to raise_error(/iamnotparsable_asdate/) expect{subject.move_pending_vm_to_ready(vm, pool)}.to raise_error(/iamnotparsable_asdate/)
end end
it 'should save the last boot time' do it 'should save the last boot time' do
expect(redis.hget('vmpooler__lastboot', pool)).to be(nil) expect(redis.hget('vmpooler__lastboot', pool)).to be(nil)
subject.move_pending_vm_to_ready(vm, pool, host) subject.move_pending_vm_to_ready(vm, pool)
expect(redis.hget('vmpooler__lastboot', pool)).to_not be(nil) expect(redis.hget('vmpooler__lastboot', pool)).to_not be(nil)
end end
end end
@ -292,29 +268,6 @@ EOT
end end
end end
context 'a VM that does not exist' do
before do
expect(provider).to receive(:get_vm).with(pool,vm).and_return(nil)
end
it 'should not set the current check timestamp' do
expect(redis.hget("vmpooler__vm__#{vm}", 'check')).to be_nil
subject._check_ready_vm(vm, pool, ttl, provider)
expect(redis.hget("vmpooler__vm__#{vm}", 'check')).to be_nil
end
it 'should log a message' do
expect(logger).to receive(:log).with('s', "[!] [#{pool}] '#{vm}' not found in inventory, removed from 'ready' queue")
subject._check_ready_vm(vm, pool, ttl, provider)
end
it 'should remove the VM from the ready queue' do
expect(redis.sismember("vmpooler__ready__#{pool}", vm)).to be(true)
subject._check_ready_vm(vm, pool, ttl, provider)
expect(redis.sismember("vmpooler__ready__#{pool}", vm)).to be(false)
end
end
context 'a VM that has never been checked' do context 'a VM that has never been checked' do
let(:last_check_date) { Date.new(2001,1,1).to_s } let(:last_check_date) { Date.new(2001,1,1).to_s }
@ -348,12 +301,9 @@ EOT
end end
context 'is turned off' do context 'is turned off' do
before(:each) do
host['powerstate'] = 'PoweredOff'
end
it 'should move the VM to the completed queue' do it 'should move the VM to the completed queue' do
expect(redis).to receive(:smove).with("vmpooler__ready__#{pool}", "vmpooler__completed__#{pool}", vm) expect(subject).to receive(:move_vm_queue).with(pool, vm, 'ready', 'completed', "is unreachable, removed from 'ready' queue")
subject._check_ready_vm(vm, pool, ttl, provider) subject._check_ready_vm(vm, pool, ttl, provider)
end end
@ -367,33 +317,8 @@ EOT
end end
it 'should log messages about being powered off' do it 'should log messages about being powered off' do
expect(logger).to receive(:log).with('d', "[!] [#{pool}] '#{vm}' appears to be powered off, removed from 'ready' queue") expect(provider).to receive(:vm_ready?).and_return(false)
expect(logger).to receive(:log).with('d', "[!] [#{pool}] '#{vm}' is unreachable, removed from 'ready' queue")
subject._check_ready_vm(vm, pool, ttl, provider)
end
end
context 'is turned on, a name mismatch' do
before(:each) do
host['hostname'] = 'different_name'
end
it 'should move the VM to the completed queue' do
expect(redis).to receive(:smove).with("vmpooler__ready__#{pool}", "vmpooler__completed__#{pool}", vm)
subject._check_ready_vm(vm, pool, ttl, provider)
end
it 'should move the VM to the completed queue in Redis' do
expect(redis.sismember("vmpooler__ready__#{pool}", vm)).to be(true)
expect(redis.sismember("vmpooler__completed__#{pool}", vm)).to be(false)
subject._check_ready_vm(vm, pool, ttl, provider)
expect(redis.sismember("vmpooler__ready__#{pool}", vm)).to be(false)
expect(redis.sismember("vmpooler__completed__#{pool}", vm)).to be(true)
end
it 'should log messages about being misnamed' do
expect(logger).to receive(:log).with('d', "[!] [#{pool}] '#{vm}' has mismatched hostname, removed from 'ready' queue")
subject._check_ready_vm(vm, pool, ttl, provider) subject._check_ready_vm(vm, pool, ttl, provider)
end end
@ -472,32 +397,24 @@ EOT
allow(provider).to receive(:get_vm).with(pool,vm).and_return(host) allow(provider).to receive(:get_vm).with(pool,vm).and_return(host)
end end
it 'does nothing with a missing VM' do it 'moves a missing VM to the completed queue' do
expect(provider).to receive(:vm_ready?).and_return(false)
expect(provider).to receive(:get_vm).with(pool,vm).and_return(nil) expect(provider).to receive(:get_vm).with(pool,vm).and_return(nil)
expect(redis.sismember("vmpooler__running__#{pool}", vm)).to be(true) expect(redis.sismember("vmpooler__running__#{pool}", vm)).to be(true)
subject._check_running_vm(vm, pool, timeout, provider) subject._check_running_vm(vm, pool, timeout, provider)
expect(redis.sismember("vmpooler__running__#{pool}", vm)).to be(true) expect(redis.sismember("vmpooler__running__#{pool}", vm)).to be(false)
end end
context 'valid host' do context 'valid host' do
it 'should not move VM when not poweredOn' do
# I'm not sure this test is useful. There is no codepath
# in _check_running_vm that looks at Power State
host['powerstate'] = 'PoweredOff'
expect(logger).not_to receive(:log).with('d', "[!] [#{pool}] '#{vm}' appears to be powered off or dead")
expect(redis.sismember("vmpooler__running__#{pool}", vm)).to be(true)
subject._check_running_vm(vm, pool, timeout, provider)
expect(redis.sismember("vmpooler__running__#{pool}", vm)).to be(true)
end
it 'should not move VM if it has no checkout time' do it 'should not move VM if it has no checkout time' do
expect(provider).to receive(:vm_ready?).and_return(true)
expect(redis.sismember("vmpooler__running__#{pool}", vm)).to be(true) expect(redis.sismember("vmpooler__running__#{pool}", vm)).to be(true)
subject._check_running_vm(vm, pool, 0, provider) subject._check_running_vm(vm, pool, 0, provider)
expect(redis.sismember("vmpooler__running__#{pool}", vm)).to be(true) expect(redis.sismember("vmpooler__running__#{pool}", vm)).to be(true)
end end
it 'should not move VM if TTL is zero' do it 'should not move VM if TTL is zero' do
expect(provider).to receive(:vm_ready?).and_return(true)
redis.hset("vmpooler__active__#{pool}", vm,(Time.now - timeout*60*60).to_s) redis.hset("vmpooler__active__#{pool}", vm,(Time.now - timeout*60*60).to_s)
expect(redis.sismember("vmpooler__running__#{pool}", vm)).to be(true) expect(redis.sismember("vmpooler__running__#{pool}", vm)).to be(true)
subject._check_running_vm(vm, pool, 0, provider) subject._check_running_vm(vm, pool, 0, provider)