mirror of
https://github.com/puppetlabs/vmpooler.git
synced 2026-01-26 01:58:41 -05:00
(POOLER-112) Ensure a VM is only destroyed once
This commit implements a vm_mutex hash to allow synchronizing VM operations that should only happen once across threads. Without this change pool_manager will try to evaluate or destroy a VM multiple times, which results in an error being thrown by one of the destroy attempts as only one can succeed and a duplication of resources unnecessarily when there are no errors.
This commit is contained in:
parent
89e1f17738
commit
3a0f0880e7
2 changed files with 172 additions and 70 deletions
|
|
@ -24,6 +24,8 @@ module Vmpooler
|
||||||
|
|
||||||
# Pool mutex
|
# Pool mutex
|
||||||
@reconfigure_pool = {}
|
@reconfigure_pool = {}
|
||||||
|
|
||||||
|
@vm_mutex = {}
|
||||||
end
|
end
|
||||||
|
|
||||||
def config
|
def config
|
||||||
|
|
@ -44,15 +46,19 @@ module Vmpooler
|
||||||
end
|
end
|
||||||
|
|
||||||
def _check_pending_vm(vm, pool, timeout, provider)
|
def _check_pending_vm(vm, pool, timeout, provider)
|
||||||
host = provider.get_vm(pool, vm)
|
mutex = vm_mutex(vm)
|
||||||
unless host
|
return if mutex.locked?
|
||||||
fail_pending_vm(vm, pool, timeout, false)
|
mutex.synchronize do
|
||||||
return
|
host = provider.get_vm(pool, vm)
|
||||||
end
|
unless host
|
||||||
if provider.vm_ready?(pool, vm)
|
fail_pending_vm(vm, pool, timeout, false)
|
||||||
move_pending_vm_to_ready(vm, pool, host)
|
return
|
||||||
else
|
end
|
||||||
fail_pending_vm(vm, pool, timeout)
|
if provider.vm_ready?(pool, vm)
|
||||||
|
move_pending_vm_to_ready(vm, pool, host)
|
||||||
|
else
|
||||||
|
fail_pending_vm(vm, pool, timeout)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
@ -114,51 +120,55 @@ module Vmpooler
|
||||||
|
|
||||||
def _check_ready_vm(vm, pool, ttl, provider)
|
def _check_ready_vm(vm, pool, ttl, provider)
|
||||||
# Periodically check that the VM is available
|
# Periodically check that the VM is available
|
||||||
check_stamp = $redis.hget('vmpooler__vm__' + vm, 'check')
|
mutex = vm_mutex(vm)
|
||||||
return if check_stamp && (((Time.now - Time.parse(check_stamp)) / 60) <= $config[:config]['vm_checktime'])
|
return if mutex.locked?
|
||||||
|
mutex.synchronize do
|
||||||
|
check_stamp = $redis.hget('vmpooler__vm__' + vm, 'check')
|
||||||
|
return if check_stamp && (((Time.now - Time.parse(check_stamp)) / 60) <= $config[:config]['vm_checktime'])
|
||||||
|
|
||||||
host = provider.get_vm(pool, vm)
|
host = provider.get_vm(pool, vm)
|
||||||
# Check if the host even exists
|
# Check if the host even exists
|
||||||
unless host
|
unless host
|
||||||
$redis.srem('vmpooler__ready__' + pool, vm)
|
$redis.srem('vmpooler__ready__' + pool, vm)
|
||||||
$logger.log('s', "[!] [#{pool}] '#{vm}' not found in inventory, removed from 'ready' queue")
|
$logger.log('s', "[!] [#{pool}] '#{vm}' not found in inventory, removed from 'ready' queue")
|
||||||
return
|
|
||||||
end
|
|
||||||
|
|
||||||
$redis.hset('vmpooler__vm__' + vm, 'check', Time.now)
|
|
||||||
# Check if the VM is not powered on, before checking TTL
|
|
||||||
unless host['powerstate'].casecmp('poweredon').zero?
|
|
||||||
$redis.smove('vmpooler__ready__' + pool, 'vmpooler__completed__' + pool, vm)
|
|
||||||
$logger.log('d', "[!] [#{pool}] '#{vm}' appears to be powered off, removed from 'ready' queue")
|
|
||||||
return
|
|
||||||
end
|
|
||||||
|
|
||||||
# Check if the hosts TTL has expired
|
|
||||||
if ttl > 0
|
|
||||||
# host['boottime'] may be nil if host is not powered on
|
|
||||||
if ((Time.now - host['boottime']) / 60).to_s[/^\d+\.\d{1}/].to_f > ttl
|
|
||||||
$redis.smove('vmpooler__ready__' + pool, 'vmpooler__completed__' + pool, vm)
|
|
||||||
|
|
||||||
$logger.log('d', "[!] [#{pool}] '#{vm}' reached end of TTL after #{ttl} minutes, removed from 'ready' queue")
|
|
||||||
return
|
return
|
||||||
end
|
end
|
||||||
end
|
|
||||||
|
|
||||||
# Check if the hostname has magically changed from underneath Pooler
|
$redis.hset('vmpooler__vm__' + vm, 'check', Time.now)
|
||||||
if host['hostname'] != vm
|
# Check if the VM is not powered on, before checking TTL
|
||||||
$redis.smove('vmpooler__ready__' + pool, 'vmpooler__completed__' + pool, vm)
|
unless host['powerstate'].casecmp('poweredon').zero?
|
||||||
$logger.log('d', "[!] [#{pool}] '#{vm}' has mismatched hostname, removed from 'ready' queue")
|
$redis.smove('vmpooler__ready__' + pool, 'vmpooler__completed__' + pool, vm)
|
||||||
return
|
$logger.log('d', "[!] [#{pool}] '#{vm}' appears to be powered off, removed from 'ready' queue")
|
||||||
end
|
return
|
||||||
|
end
|
||||||
|
|
||||||
# Check if the VM is still ready/available
|
# Check if the hosts TTL has expired
|
||||||
begin
|
if ttl > 0
|
||||||
raise("VM #{vm} is not ready") unless provider.vm_ready?(pool, vm)
|
# host['boottime'] may be nil if host is not powered on
|
||||||
rescue
|
if ((Time.now - host['boottime']) / 60).to_s[/^\d+\.\d{1}/].to_f > ttl
|
||||||
if $redis.smove('vmpooler__ready__' + pool, 'vmpooler__completed__' + pool, vm)
|
$redis.smove('vmpooler__ready__' + pool, 'vmpooler__completed__' + pool, vm)
|
||||||
$logger.log('d', "[!] [#{pool}] '#{vm}' is unreachable, removed from 'ready' queue")
|
|
||||||
else
|
$logger.log('d', "[!] [#{pool}] '#{vm}' reached end of TTL after #{ttl} minutes, removed from 'ready' queue")
|
||||||
$logger.log('d', "[!] [#{pool}] '#{vm}' is unreachable, and failed to remove from 'ready' queue")
|
return
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Check if the hostname has magically changed from underneath Pooler
|
||||||
|
if host['hostname'] != vm
|
||||||
|
$redis.smove('vmpooler__ready__' + pool, 'vmpooler__completed__' + pool, vm)
|
||||||
|
$logger.log('d', "[!] [#{pool}] '#{vm}' has mismatched hostname, removed from 'ready' queue")
|
||||||
|
return
|
||||||
|
end
|
||||||
|
|
||||||
|
# Check if the VM is still ready/available
|
||||||
|
begin
|
||||||
|
raise("VM #{vm} is not ready") unless provider.vm_ready?(pool, vm)
|
||||||
|
rescue
|
||||||
|
if $redis.smove('vmpooler__ready__' + pool, 'vmpooler__completed__' + pool, vm)
|
||||||
|
$logger.log('d', "[!] [#{pool}] '#{vm}' is unreachable, removed from 'ready' queue")
|
||||||
|
else
|
||||||
|
$logger.log('d', "[!] [#{pool}] '#{vm}' is unreachable, and failed to remove from 'ready' queue")
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
@ -175,16 +185,20 @@ module Vmpooler
|
||||||
end
|
end
|
||||||
|
|
||||||
def _check_running_vm(vm, pool, ttl, provider)
|
def _check_running_vm(vm, pool, ttl, provider)
|
||||||
host = provider.get_vm(pool, vm)
|
mutex = vm_mutex(vm)
|
||||||
|
return if mutex.locked?
|
||||||
|
mutex.synchronize do
|
||||||
|
host = provider.get_vm(pool, vm)
|
||||||
|
|
||||||
if host
|
if host
|
||||||
# Check that VM is within defined lifetime
|
# Check that VM is within defined lifetime
|
||||||
checkouttime = $redis.hget('vmpooler__active__' + pool, vm)
|
checkouttime = $redis.hget('vmpooler__active__' + pool, vm)
|
||||||
if checkouttime
|
if checkouttime
|
||||||
running = (Time.now - Time.parse(checkouttime)) / 60 / 60
|
running = (Time.now - Time.parse(checkouttime)) / 60 / 60
|
||||||
|
|
||||||
if (ttl.to_i > 0) && (running.to_i >= ttl.to_i)
|
if (ttl.to_i > 0) && (running.to_i >= ttl.to_i)
|
||||||
move_vm_queue(pool, vm, 'running', 'completed', "reached end of TTL after #{ttl} hours")
|
move_vm_queue(pool, vm, 'running', 'completed', "reached end of TTL after #{ttl} hours")
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
@ -251,20 +265,24 @@ module Vmpooler
|
||||||
end
|
end
|
||||||
|
|
||||||
def _destroy_vm(vm, pool, provider)
|
def _destroy_vm(vm, pool, provider)
|
||||||
$redis.srem('vmpooler__completed__' + pool, vm)
|
mutex = vm_mutex(vm)
|
||||||
$redis.hdel('vmpooler__active__' + pool, vm)
|
return if mutex.locked?
|
||||||
$redis.hset('vmpooler__vm__' + vm, 'destroy', Time.now)
|
mutex.synchronize do
|
||||||
|
$redis.srem('vmpooler__completed__' + pool, vm)
|
||||||
|
$redis.hdel('vmpooler__active__' + pool, vm)
|
||||||
|
$redis.hset('vmpooler__vm__' + vm, 'destroy', Time.now)
|
||||||
|
|
||||||
# Auto-expire metadata key
|
# Auto-expire metadata key
|
||||||
$redis.expire('vmpooler__vm__' + vm, ($config[:redis]['data_ttl'].to_i * 60 * 60))
|
$redis.expire('vmpooler__vm__' + vm, ($config[:redis]['data_ttl'].to_i * 60 * 60))
|
||||||
|
|
||||||
start = Time.now
|
start = Time.now
|
||||||
|
|
||||||
provider.destroy_vm(pool, vm)
|
provider.destroy_vm(pool, vm)
|
||||||
|
|
||||||
finish = format('%.2f', Time.now - start)
|
finish = format('%.2f', Time.now - start)
|
||||||
$logger.log('s', "[-] [#{pool}] '#{vm}' destroyed in #{finish} seconds")
|
$logger.log('s', "[-] [#{pool}] '#{vm}' destroyed in #{finish} seconds")
|
||||||
$metrics.timing("destroy.#{pool}", finish)
|
$metrics.timing("destroy.#{pool}", finish)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def create_vm_disk(pool_name, vm, disk_size, provider)
|
def create_vm_disk(pool_name, vm, disk_size, provider)
|
||||||
|
|
@ -467,8 +485,11 @@ module Vmpooler
|
||||||
def migrate_vm(vm_name, pool_name, provider)
|
def migrate_vm(vm_name, pool_name, provider)
|
||||||
Thread.new do
|
Thread.new do
|
||||||
begin
|
begin
|
||||||
$redis.srem("vmpooler__migrating__#{pool_name}", vm_name)
|
mutex = vm_mutex(vm_name)
|
||||||
provider.migrate_vm(pool_name, vm_name)
|
mutex.synchronize do
|
||||||
|
$redis.srem("vmpooler__migrating__#{pool_name}", vm_name)
|
||||||
|
provider.migrate_vm(pool_name, vm_name)
|
||||||
|
end
|
||||||
rescue => err
|
rescue => err
|
||||||
$logger.log('s', "[x] [#{pool_name}] '#{vm_name}' migration failed with an error: #{err}")
|
$logger.log('s', "[x] [#{pool_name}] '#{vm_name}' migration failed with an error: #{err}")
|
||||||
end
|
end
|
||||||
|
|
@ -579,6 +600,10 @@ module Vmpooler
|
||||||
@reconfigure_pool[poolname] || @reconfigure_pool[poolname] = Mutex.new
|
@reconfigure_pool[poolname] || @reconfigure_pool[poolname] = Mutex.new
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def vm_mutex(vmname)
|
||||||
|
@vm_mutex[vmname] || @vm_mutex[vmname] = Mutex.new
|
||||||
|
end
|
||||||
|
|
||||||
def sync_pool_template(pool)
|
def sync_pool_template(pool)
|
||||||
pool_template = $redis.hget('vmpooler__config__template', pool['name'])
|
pool_template = $redis.hget('vmpooler__config__template', pool['name'])
|
||||||
if pool_template
|
if pool_template
|
||||||
|
|
|
||||||
|
|
@ -92,6 +92,19 @@ EOT
|
||||||
subject._check_pending_vm(vm, pool, timeout, provider)
|
subject._check_pending_vm(vm, pool, timeout, provider)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
context 'with a locked vm mutex' do
|
||||||
|
let(:mutex) { Mutex.new }
|
||||||
|
before(:each) do
|
||||||
|
mutex.lock
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'should return' do
|
||||||
|
expect(subject).to receive(:vm_mutex).and_return(mutex)
|
||||||
|
|
||||||
|
expect(subject._check_pending_vm(vm, pool, timeout, provider)).to be_nil
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
describe '#remove_nonexistent_vm' do
|
describe '#remove_nonexistent_vm' do
|
||||||
|
|
@ -404,6 +417,19 @@ EOT
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
context 'with a locked vm mutex' do
|
||||||
|
let(:mutex) { Mutex.new }
|
||||||
|
before(:each) do
|
||||||
|
mutex.lock
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'should return' do
|
||||||
|
expect(subject).to receive(:vm_mutex).and_return(mutex)
|
||||||
|
|
||||||
|
expect(subject._check_ready_vm(vm, pool, ttl, provider)).to be_nil
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
describe '#check_running_vm' do
|
describe '#check_running_vm' do
|
||||||
|
|
@ -479,6 +505,19 @@ EOT
|
||||||
expect(redis.sismember("vmpooler__completed__#{pool}", vm)).to be(true)
|
expect(redis.sismember("vmpooler__completed__#{pool}", vm)).to be(true)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
context 'with a locked vm mutex' do
|
||||||
|
let(:mutex) { Mutex.new }
|
||||||
|
before(:each) do
|
||||||
|
mutex.lock
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'should return' do
|
||||||
|
expect(subject).to receive(:vm_mutex).and_return(mutex)
|
||||||
|
|
||||||
|
expect(subject._check_running_vm(vm, pool, timeout, provider)).to be_nil
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
describe '#move_vm_queue' do
|
describe '#move_vm_queue' do
|
||||||
|
|
@ -681,7 +720,7 @@ EOT
|
||||||
before(:each) do
|
before(:each) do
|
||||||
config[:redis] = nil
|
config[:redis] = nil
|
||||||
end
|
end
|
||||||
|
|
||||||
it 'should raise an error' do
|
it 'should raise an error' do
|
||||||
expect{ subject._destroy_vm(vm,pool,provider) }.to raise_error(NoMethodError)
|
expect{ subject._destroy_vm(vm,pool,provider) }.to raise_error(NoMethodError)
|
||||||
end
|
end
|
||||||
|
|
@ -732,6 +771,19 @@ EOT
|
||||||
expect{ subject._destroy_vm(vm,pool,provider) }.to raise_error(/MockError/)
|
expect{ subject._destroy_vm(vm,pool,provider) }.to raise_error(/MockError/)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
context 'when the VM mutex is locked' do
|
||||||
|
let(:mutex) { Mutex.new }
|
||||||
|
before(:each) do
|
||||||
|
mutex.lock
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'should return' do
|
||||||
|
expect(subject).to receive(:vm_mutex).with(vm).and_return(mutex)
|
||||||
|
|
||||||
|
expect(subject._destroy_vm(vm,pool,provider)).to eq(nil)
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
describe '#create_vm_disk' do
|
describe '#create_vm_disk' do
|
||||||
|
|
@ -1501,6 +1553,31 @@ EOT
|
||||||
subject.migrate_vm(vm, pool, provider)
|
subject.migrate_vm(vm, pool, provider)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
context 'with a locked vm mutex' do
|
||||||
|
let(:mutex) { Mutex.new }
|
||||||
|
before(:each) do
|
||||||
|
mutex.lock
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'should return' do
|
||||||
|
expect(subject).to receive(:vm_mutex).and_return(mutex)
|
||||||
|
|
||||||
|
expect(subject.migrate_vm(vm, pool, provider)).to be_nil
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
describe '#vm_mutex' do
|
||||||
|
it 'should return a mutex' do
|
||||||
|
expect(subject.vm_mutex(vm)).to be_a(Mutex)
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'should return the same mutex when called twice' do
|
||||||
|
first = subject.vm_mutex(vm)
|
||||||
|
second = subject.vm_mutex(vm)
|
||||||
|
expect(first).to be(second)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
describe 'sync_pool_template' do
|
describe 'sync_pool_template' do
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue