Minimize duplicate checking of VMs

This commit updates check_pool pending, running and ready methods to greatly reduce instances in which the VM object is retrieved. Without this change get_vm is run for each of these check_pool steps even though the VM is already validated as being in inventory being running the check. This is left for checking running VMs when the VM is no longer ready. Without this change there is an unnecessarily large volume of VM object checks.
This commit is contained in:
kirby@puppetlabs.com 2018-06-28 13:49:20 -07:00
parent 84065dd059
commit 59a09a71af
2 changed files with 53 additions and 162 deletions

View file

@ -49,13 +49,8 @@ module Vmpooler
mutex = vm_mutex(vm)
return if mutex.locked?
mutex.synchronize do
host = provider.get_vm(pool, vm)
unless host
fail_pending_vm(vm, pool, timeout, false)
return
end
if provider.vm_ready?(pool, vm)
move_pending_vm_to_ready(vm, pool, host)
move_pending_vm_to_ready(vm, pool)
else
fail_pending_vm(vm, pool, timeout)
end
@ -86,14 +81,7 @@ module Vmpooler
false
end
def move_pending_vm_to_ready(vm, pool, host)
if host['hostname'] == vm
begin
Socket.getaddrinfo(vm, nil) # WTF? I assume this is just priming the local DNS resolver cache?!?!
rescue # rubocop:disable Lint/HandleExceptions
# Do not care about errors what-so-ever
end
def move_pending_vm_to_ready(vm, pool)
clone_time = $redis.hget('vmpooler__vm__' + vm, 'clone')
finish = format('%.2f', Time.now - Time.parse(clone_time)) if clone_time
@ -102,9 +90,17 @@ module Vmpooler
# last boot time is displayed in API, and used by alarming script
$redis.hset('vmpooler__lastboot', pool, Time.now)
$metrics.timing("time_to_ready_state.#{pool}", finish)
$logger.log('s', "[>] [#{pool}] '#{vm}' moved from 'pending' to 'ready' queue")
end
def vm_still_ready?(pool_name, vm_name, provider)
# Check if the VM is still ready/available
return true if provider.vm_ready?(pool_name, vm_name)
raise("VM #{vm_name} is not ready")
rescue
move_vm_queue(pool_name, vm_name, 'ready', 'completed', "is unreachable, removed from 'ready' queue")
end
def check_ready_vm(vm, pool, ttl, provider)
@ -126,22 +122,7 @@ module Vmpooler
check_stamp = $redis.hget('vmpooler__vm__' + vm, 'check')
return if check_stamp && (((Time.now - Time.parse(check_stamp)) / 60) <= $config[:config]['vm_checktime'])
host = provider.get_vm(pool, vm)
# Check if the host even exists
unless host
$redis.srem('vmpooler__ready__' + pool, vm)
$logger.log('s', "[!] [#{pool}] '#{vm}' not found in inventory, removed from 'ready' queue")
return
end
$redis.hset('vmpooler__vm__' + vm, 'check', Time.now)
# Check if the VM is not powered on, before checking TTL
unless host['powerstate'].casecmp('poweredon').zero?
$redis.smove('vmpooler__ready__' + pool, 'vmpooler__completed__' + pool, vm)
$logger.log('d', "[!] [#{pool}] '#{vm}' appears to be powered off, removed from 'ready' queue")
return
end
# Check if the hosts TTL has expired
if ttl > 0
# host['boottime'] may be nil if host is not powered on
@ -153,23 +134,7 @@ module Vmpooler
end
end
# Check if the hostname has magically changed from underneath Pooler
if host['hostname'] != vm
$redis.smove('vmpooler__ready__' + pool, 'vmpooler__completed__' + pool, vm)
$logger.log('d', "[!] [#{pool}] '#{vm}' has mismatched hostname, removed from 'ready' queue")
return
end
# Check if the VM is still ready/available
begin
raise("VM #{vm} is not ready") unless provider.vm_ready?(pool, vm)
rescue
if $redis.smove('vmpooler__ready__' + pool, 'vmpooler__completed__' + pool, vm)
$logger.log('d', "[!] [#{pool}] '#{vm}' is unreachable, removed from 'ready' queue")
else
$logger.log('d', "[!] [#{pool}] '#{vm}' is unreachable, and failed to remove from 'ready' queue")
end
end
vm_still_ready?(pool, vm, provider)
end
end
@ -188,9 +153,6 @@ module Vmpooler
mutex = vm_mutex(vm)
return if mutex.locked?
mutex.synchronize do
host = provider.get_vm(pool, vm)
if host
# Check that VM is within defined lifetime
checkouttime = $redis.hget('vmpooler__active__' + pool, vm)
if checkouttime
@ -198,8 +160,20 @@ module Vmpooler
if (ttl.to_i > 0) && (running.to_i >= ttl.to_i)
move_vm_queue(pool, vm, 'running', 'completed', "reached end of TTL after #{ttl} hours")
return
end
end
if provider.vm_ready?(pool, vm)
return
else
host = provider.get_vm(pool, vm)
if host
return
else
move_vm_queue(pool, vm, 'running', 'completed', "is no longer in inventory, removing from running")
end
end
end
end

View file

@ -64,23 +64,11 @@ EOT
expect(subject).not_to be_nil
end
context 'host does not exist or not in pool' do
it 'calls fail_pending_vm' do
expect(provider).to receive(:get_vm).with(pool,vm).and_return(nil)
expect(subject).to receive(:fail_pending_vm).with(vm, pool, timeout, false)
subject._check_pending_vm(vm, pool, timeout, provider)
end
end
context 'host is in pool' do
before do
expect(provider).to receive(:get_vm).with(pool,vm).and_return(host)
end
it 'calls move_pending_vm_to_ready if host is ready' do
expect(provider).to receive(:vm_ready?).with(pool,vm).and_return(true)
expect(subject).to receive(:move_pending_vm_to_ready).with(vm, pool, host)
expect(subject).to receive(:move_pending_vm_to_ready).with(vm, pool)
subject._check_pending_vm(vm, pool, timeout, provider)
end
@ -184,29 +172,17 @@ EOT
before do
expect(subject).not_to be_nil
allow(Socket).to receive(:getaddrinfo)
end
before(:each) do
create_pending_vm(pool,vm)
end
context 'when hostname does not match VM name' do
it 'should not take any action' do
expect(logger).to receive(:log).exactly(0).times
expect(Socket).to receive(:getaddrinfo).exactly(0).times
host['hostname'] = 'different_name'
subject.move_pending_vm_to_ready(vm, pool, host)
end
end
context 'when hostname matches VM name' do
it 'should move the VM from pending to ready pool' do
expect(redis.sismember("vmpooler__pending__#{pool}", vm)).to be(true)
expect(redis.sismember("vmpooler__ready__#{pool}", vm)).to be(false)
subject.move_pending_vm_to_ready(vm, pool, host)
subject.move_pending_vm_to_ready(vm, pool)
expect(redis.sismember("vmpooler__pending__#{pool}", vm)).to be(false)
expect(redis.sismember("vmpooler__ready__#{pool}", vm)).to be(true)
end
@ -214,7 +190,7 @@ EOT
it 'should log a message' do
expect(logger).to receive(:log).with('s', "[>] [#{pool}] '#{vm}' moved from 'pending' to 'ready' queue")
subject.move_pending_vm_to_ready(vm, pool, host)
subject.move_pending_vm_to_ready(vm, pool)
end
it 'should receive time_to_ready_state metric' do
@ -228,25 +204,25 @@ EOT
it 'should set the boot time in redis' do
redis.hset("vmpooler__vm__#{vm}", 'clone',Time.now.to_s)
expect(redis.hget('vmpooler__boot__' + Date.today.to_s, pool + ':' + vm)).to be_nil
subject.move_pending_vm_to_ready(vm, pool, host)
subject.move_pending_vm_to_ready(vm, pool)
expect(redis.hget('vmpooler__boot__' + Date.today.to_s, pool + ':' + vm)).to_not be_nil
# TODO Should we inspect the value to see if it's valid?
end
it 'should not determine boot timespan if clone start time not set' do
expect(redis.hget('vmpooler__boot__' + Date.today.to_s, pool + ':' + vm)).to be_nil
subject.move_pending_vm_to_ready(vm, pool, host)
subject.move_pending_vm_to_ready(vm, pool)
expect(redis.hget('vmpooler__boot__' + Date.today.to_s, pool + ':' + vm)).to eq("") # Possible implementation bug here. Should still be nil here
end
it 'should raise error if clone start time is not parsable' do
redis.hset("vmpooler__vm__#{vm}", 'clone','iamnotparsable_asdate')
expect{subject.move_pending_vm_to_ready(vm, pool, host)}.to raise_error(/iamnotparsable_asdate/)
expect{subject.move_pending_vm_to_ready(vm, pool)}.to raise_error(/iamnotparsable_asdate/)
end
it 'should save the last boot time' do
expect(redis.hget('vmpooler__lastboot', pool)).to be(nil)
subject.move_pending_vm_to_ready(vm, pool, host)
subject.move_pending_vm_to_ready(vm, pool)
expect(redis.hget('vmpooler__lastboot', pool)).to_not be(nil)
end
end
@ -292,29 +268,6 @@ EOT
end
end
context 'a VM that does not exist' do
before do
expect(provider).to receive(:get_vm).with(pool,vm).and_return(nil)
end
it 'should not set the current check timestamp' do
expect(redis.hget("vmpooler__vm__#{vm}", 'check')).to be_nil
subject._check_ready_vm(vm, pool, ttl, provider)
expect(redis.hget("vmpooler__vm__#{vm}", 'check')).to be_nil
end
it 'should log a message' do
expect(logger).to receive(:log).with('s', "[!] [#{pool}] '#{vm}' not found in inventory, removed from 'ready' queue")
subject._check_ready_vm(vm, pool, ttl, provider)
end
it 'should remove the VM from the ready queue' do
expect(redis.sismember("vmpooler__ready__#{pool}", vm)).to be(true)
subject._check_ready_vm(vm, pool, ttl, provider)
expect(redis.sismember("vmpooler__ready__#{pool}", vm)).to be(false)
end
end
context 'a VM that has never been checked' do
let(:last_check_date) { Date.new(2001,1,1).to_s }
@ -348,12 +301,9 @@ EOT
end
context 'is turned off' do
before(:each) do
host['powerstate'] = 'PoweredOff'
end
it 'should move the VM to the completed queue' do
expect(redis).to receive(:smove).with("vmpooler__ready__#{pool}", "vmpooler__completed__#{pool}", vm)
expect(subject).to receive(:move_vm_queue).with(pool, vm, 'ready', 'completed', "is unreachable, removed from 'ready' queue")
subject._check_ready_vm(vm, pool, ttl, provider)
end
@ -367,33 +317,8 @@ EOT
end
it 'should log messages about being powered off' do
expect(logger).to receive(:log).with('d', "[!] [#{pool}] '#{vm}' appears to be powered off, removed from 'ready' queue")
subject._check_ready_vm(vm, pool, ttl, provider)
end
end
context 'is turned on, a name mismatch' do
before(:each) do
host['hostname'] = 'different_name'
end
it 'should move the VM to the completed queue' do
expect(redis).to receive(:smove).with("vmpooler__ready__#{pool}", "vmpooler__completed__#{pool}", vm)
subject._check_ready_vm(vm, pool, ttl, provider)
end
it 'should move the VM to the completed queue in Redis' do
expect(redis.sismember("vmpooler__ready__#{pool}", vm)).to be(true)
expect(redis.sismember("vmpooler__completed__#{pool}", vm)).to be(false)
subject._check_ready_vm(vm, pool, ttl, provider)
expect(redis.sismember("vmpooler__ready__#{pool}", vm)).to be(false)
expect(redis.sismember("vmpooler__completed__#{pool}", vm)).to be(true)
end
it 'should log messages about being misnamed' do
expect(logger).to receive(:log).with('d', "[!] [#{pool}] '#{vm}' has mismatched hostname, removed from 'ready' queue")
expect(provider).to receive(:vm_ready?).and_return(false)
expect(logger).to receive(:log).with('d', "[!] [#{pool}] '#{vm}' is unreachable, removed from 'ready' queue")
subject._check_ready_vm(vm, pool, ttl, provider)
end
@ -472,32 +397,24 @@ EOT
allow(provider).to receive(:get_vm).with(pool,vm).and_return(host)
end
it 'does nothing with a missing VM' do
it 'moves a missing VM to the completed queue' do
expect(provider).to receive(:vm_ready?).and_return(false)
expect(provider).to receive(:get_vm).with(pool,vm).and_return(nil)
expect(redis.sismember("vmpooler__running__#{pool}", vm)).to be(true)
subject._check_running_vm(vm, pool, timeout, provider)
expect(redis.sismember("vmpooler__running__#{pool}", vm)).to be(true)
expect(redis.sismember("vmpooler__running__#{pool}", vm)).to be(false)
end
context 'valid host' do
it 'should not move VM when not poweredOn' do
# I'm not sure this test is useful. There is no codepath
# in _check_running_vm that looks at Power State
host['powerstate'] = 'PoweredOff'
expect(logger).not_to receive(:log).with('d', "[!] [#{pool}] '#{vm}' appears to be powered off or dead")
expect(redis.sismember("vmpooler__running__#{pool}", vm)).to be(true)
subject._check_running_vm(vm, pool, timeout, provider)
expect(redis.sismember("vmpooler__running__#{pool}", vm)).to be(true)
end
it 'should not move VM if it has no checkout time' do
expect(provider).to receive(:vm_ready?).and_return(true)
expect(redis.sismember("vmpooler__running__#{pool}", vm)).to be(true)
subject._check_running_vm(vm, pool, 0, provider)
expect(redis.sismember("vmpooler__running__#{pool}", vm)).to be(true)
end
it 'should not move VM if TTL is zero' do
expect(provider).to receive(:vm_ready?).and_return(true)
redis.hset("vmpooler__active__#{pool}", vm,(Time.now - timeout*60*60).to_s)
expect(redis.sismember("vmpooler__running__#{pool}", vm)).to be(true)
subject._check_running_vm(vm, pool, 0, provider)