(POOLER-133) Identify when a ready VM has failed

This commit fixes checking of a VM that has already been identified as ready. Without this change a ready VM that has failed will be identified as having failed, but will not successfully be removed from the ready queue. Additionally, the default vm_checktime value has been reduced from 15 to 1 to ensure that ready VMs are checked within one minute of the time they have reached the ready state by default.

Lastly, the docker-compose files are updated to specify that the redis
instance used is a local redis instance.
This commit is contained in:
kirby@puppetlabs.com 2018-12-01 09:09:28 -08:00
parent 81b5f620bd
commit 3c856d7ae9
11 changed files with 68 additions and 42 deletions

View file

@ -14,6 +14,8 @@ git logs & PR history.
### Fixed ### Fixed
- Sync pool size before dashboard is displayed (POOLER-132) - Sync pool size before dashboard is displayed (POOLER-132)
- Remove a failed VM from the ready queue (POOLER-133)
- Begin checking ready VMs to ensure alive after 1 minute by default
# [0.2.2](https://github.com/puppetlabs/vmpooler/compare/0.2.1...0.2.2) # [0.2.2](https://github.com/puppetlabs/vmpooler/compare/0.2.1...0.2.2)

View file

@ -16,11 +16,11 @@ services:
environment: environment:
- VMPOOLER_DEBUG=true # for use of dummy auth - VMPOOLER_DEBUG=true # for use of dummy auth
- VMPOOLER_CONFIG_FILE=/etc/vmpooler/vmpooler.yaml - VMPOOLER_CONFIG_FILE=/etc/vmpooler/vmpooler.yaml
- REDIS_SERVER=redis - REDIS_SERVER=redislocal
image: vmpooler-local image: vmpooler-local
depends_on: depends_on:
- redis - redislocal
redis: redislocal:
image: redis image: redis
ports: ports:
- "6379:6379" - "6379:6379"

View file

@ -100,7 +100,6 @@ Example minimal configuration file:
logfile: '/var/log/vmpooler.log' logfile: '/var/log/vmpooler.log'
task_limit: 10 task_limit: 10
timeout: 15 timeout: 15
vm_checktime: 15
vm_lifetime: 12 vm_lifetime: 12
vm_lifetime_auth: 24 vm_lifetime_auth: 24
allowed_tags: allowed_tags:

View file

@ -17,7 +17,7 @@
logfile: '/Users/samuel/workspace/vmpooler/vmpooler.log' logfile: '/Users/samuel/workspace/vmpooler/vmpooler.log'
task_limit: 10 task_limit: 10
timeout: 15 timeout: 15
vm_checktime: 15 vm_checktime: 1
vm_lifetime: 12 vm_lifetime: 12
vm_lifetime_auth: 24 vm_lifetime_auth: 24
allowed_tags: allowed_tags:

View file

@ -49,7 +49,7 @@ module Vmpooler
# Set some configuration defaults # Set some configuration defaults
parsed_config[:config]['task_limit'] = ENV['TASK_LIMIT'] || parsed_config[:config]['task_limit'] || 10 parsed_config[:config]['task_limit'] = ENV['TASK_LIMIT'] || parsed_config[:config]['task_limit'] || 10
parsed_config[:config]['migration_limit'] = ENV['MIGRATION_LIMIT'] if ENV['MIGRATION_LIMIT'] parsed_config[:config]['migration_limit'] = ENV['MIGRATION_LIMIT'] if ENV['MIGRATION_LIMIT']
parsed_config[:config]['vm_checktime'] = ENV['VM_CHECKTIME'] || parsed_config[:config]['vm_checktime'] || 15 parsed_config[:config]['vm_checktime'] = ENV['VM_CHECKTIME'] || parsed_config[:config]['vm_checktime'] || 1
parsed_config[:config]['vm_lifetime'] = ENV['VM_LIFETIME'] || parsed_config[:config]['vm_lifetime'] || 24 parsed_config[:config]['vm_lifetime'] = ENV['VM_LIFETIME'] || parsed_config[:config]['vm_lifetime'] || 24
parsed_config[:config]['prefix'] = ENV['VM_PREFIX'] || parsed_config[:config]['prefix'] || '' parsed_config[:config]['prefix'] = ENV['VM_PREFIX'] || parsed_config[:config]['prefix'] || ''
@ -100,6 +100,9 @@ module Vmpooler
parsed_config[:pools] = load_pools_from_redis(redis) parsed_config[:pools] = load_pools_from_redis(redis)
end end
# Create an index of pools by title
parsed_config[:pool_index] = pool_index(parsed_config[:pools])
parsed_config[:pools].each do |pool| parsed_config[:pools].each do |pool|
parsed_config[:pool_names] << pool['name'] parsed_config[:pool_names] << pool['name']
if pool['alias'] if pool['alias']
@ -161,4 +164,14 @@ module Vmpooler
def self.pools(conf) def self.pools(conf)
conf[:pools] conf[:pools]
end end
def self.pool_index(pools)
pools_hash = {}
index = 0
for pool in pools
pools_hash[pool['name']] = index
index += 1
end
pools_hash
end
end end

View file

@ -134,18 +134,18 @@ module Vmpooler
move_vm_queue(pool_name, vm_name, 'ready', 'completed', "is unreachable, removed from 'ready' queue") move_vm_queue(pool_name, vm_name, 'ready', 'completed', "is unreachable, removed from 'ready' queue")
end end
def check_ready_vm(vm, pool, ttl, provider) def check_ready_vm(vm, pool_name, ttl, provider)
Thread.new do Thread.new do
begin begin
_check_ready_vm(vm, pool, ttl, provider) _check_ready_vm(vm, pool_name, ttl, provider)
rescue => err rescue => err
$logger.log('s', "[!] [#{pool['name']}] '#{vm}' failed while checking a ready vm : #{err}") $logger.log('s', "[!] [#{pool_name}] '#{vm}' failed while checking a ready vm : #{err}")
raise raise
end end
end end
end end
def _check_ready_vm(vm, pool, ttl, provider) def _check_ready_vm(vm, pool_name, ttl, provider)
# Periodically check that the VM is available # Periodically check that the VM is available
mutex = vm_mutex(vm) mutex = vm_mutex(vm)
return if mutex.locked? return if mutex.locked?
@ -158,21 +158,22 @@ module Vmpooler
if ttl > 0 if ttl > 0
# host['boottime'] may be nil if host is not powered on # host['boottime'] may be nil if host is not powered on
if ((Time.now - host['boottime']) / 60).to_s[/^\d+\.\d{1}/].to_f > ttl if ((Time.now - host['boottime']) / 60).to_s[/^\d+\.\d{1}/].to_f > ttl
$redis.smove('vmpooler__ready__' + pool['name'], 'vmpooler__completed__' + pool['name'], vm) $redis.smove('vmpooler__ready__' + pool_name, 'vmpooler__completed__' + pool_name, vm)
$logger.log('d', "[!] [#{pool['name']}] '#{vm}' reached end of TTL after #{ttl} minutes, removed from 'ready' queue") $logger.log('d', "[!] [#{pool_name}] '#{vm}' reached end of TTL after #{ttl} minutes, removed from 'ready' queue")
return return
end end
end end
return if has_mismatched_hostname?(vm, pool, provider) return if has_mismatched_hostname?(vm, pool_name, provider)
vm_still_ready?(pool['name'], vm, provider) vm_still_ready?(pool_name, vm, provider)
end end
end end
def has_mismatched_hostname?(vm, pool, provider) def has_mismatched_hostname?(vm, pool_name, provider)
check_hostname = pool['check_hostname_for_mismatch'] pool_config = $config[:pools][$config[:pool_index][pool_name]]
check_hostname = pool_config['check_hostname_for_mismatch']
check_hostname = $config[:config]['check_ready_vm_hostname_for_mismatch'] if check_hostname.nil? check_hostname = $config[:config]['check_ready_vm_hostname_for_mismatch'] if check_hostname.nil?
return if check_hostname == false return if check_hostname == false
@ -187,15 +188,15 @@ module Vmpooler
end end
# Check if the hostname has magically changed from underneath Pooler # Check if the hostname has magically changed from underneath Pooler
vm_hash = provider.get_vm(pool['name'], vm) vm_hash = provider.get_vm(pool_name, vm)
return unless vm_hash.is_a? Hash return unless vm_hash.is_a? Hash
hostname = vm_hash['hostname'] hostname = vm_hash['hostname']
return if hostname.nil? return if hostname.nil?
return if hostname.empty? return if hostname.empty?
return if hostname == vm return if hostname == vm
$redis.smove('vmpooler__ready__' + pool['name'], 'vmpooler__completed__' + pool['name'], vm) $redis.smove('vmpooler__ready__' + pool_name, 'vmpooler__completed__' + pool_name, vm)
$logger.log('d', "[!] [#{pool['name']}] '#{vm}' has mismatched hostname #{hostname}, removed from 'ready' queue") $logger.log('d', "[!] [#{pool_name}] '#{vm}' has mismatched hostname #{hostname}, removed from 'ready' queue")
return true return true
end end

View file

@ -17,7 +17,7 @@
logfile: '/var/log/vmpooler.log' logfile: '/var/log/vmpooler.log'
task_limit: 10 task_limit: 10
timeout: 15 timeout: 15
vm_checktime: 15 vm_checktime: 1
vm_lifetime: 12 vm_lifetime: 12
vm_lifetime_auth: 24 vm_lifetime_auth: 24
allowed_tags: allowed_tags:
@ -38,4 +38,4 @@
provider: dummy provider: dummy
- name: 'pool02' - name: 'pool02'
size: 5 size: 5
provider: dummy provider: dummy

View file

@ -17,7 +17,7 @@
logfile: '/var/log/vmpooler.log' logfile: '/var/log/vmpooler.log'
task_limit: 10 task_limit: 10
timeout: 15 timeout: 15
vm_checktime: 15 vm_checktime: 1
vm_lifetime: 12 vm_lifetime: 12
vm_lifetime_auth: 24 vm_lifetime_auth: 24
allowed_tags: allowed_tags:
@ -38,4 +38,4 @@
provider: dummy provider: dummy
- name: 'pool04' - name: 'pool04'
size: 5 size: 5
provider: dummy provider: dummy

View file

@ -271,11 +271,22 @@ EOT
describe '#_check_ready_vm' do describe '#_check_ready_vm' do
let(:ttl) { 0 } let(:ttl) { 0 }
let(:host) { {} } let(:host) { {} }
let(:poolconfig) { config[:pools][0] } let(:config) { YAML.load(<<-EOT
---
:config: {}
:providers:
:mock:
:pools:
- name: '#{pool}'
size: 1
:pool_index:
'#{pool}': 0
EOT
)
}
before(:each) do before(:each) do
create_ready_vm(pool,vm) create_ready_vm(pool,vm)
config[:config] = {}
config[:config]['vm_checktime'] = 15 config[:config]['vm_checktime'] = 15
# Create a VM which is powered on # Create a VM which is powered on
@ -289,7 +300,7 @@ EOT
check_stamp = (Time.now - 60).to_s check_stamp = (Time.now - 60).to_s
redis.hset("vmpooler__vm__#{vm}", 'check', check_stamp) redis.hset("vmpooler__vm__#{vm}", 'check', check_stamp)
expect(provider).to receive(:get_vm).exactly(0).times expect(provider).to receive(:get_vm).exactly(0).times
subject._check_ready_vm(vm, poolconfig, ttl, provider) subject._check_ready_vm(vm, pool, ttl, provider)
expect(redis.hget("vmpooler__vm__#{vm}", 'check')).to eq(check_stamp) expect(redis.hget("vmpooler__vm__#{vm}", 'check')).to eq(check_stamp)
end end
end end
@ -299,7 +310,7 @@ EOT
it 'should set the current check timestamp' do it 'should set the current check timestamp' do
expect(redis.hget("vmpooler__vm__#{vm}", 'check')).to be_nil expect(redis.hget("vmpooler__vm__#{vm}", 'check')).to be_nil
subject._check_ready_vm(vm, poolconfig, ttl, provider) subject._check_ready_vm(vm, pool, ttl, provider)
expect(redis.hget("vmpooler__vm__#{vm}", 'check')).to_not be_nil expect(redis.hget("vmpooler__vm__#{vm}", 'check')).to_not be_nil
end end
end end
@ -312,7 +323,7 @@ EOT
it 'should set the current check timestamp' do it 'should set the current check timestamp' do
expect(redis.hget("vmpooler__vm__#{vm}", 'check')).to eq(last_check_date) expect(redis.hget("vmpooler__vm__#{vm}", 'check')).to eq(last_check_date)
subject._check_ready_vm(vm, poolconfig, ttl, provider) subject._check_ready_vm(vm, pool, ttl, provider)
expect(redis.hget("vmpooler__vm__#{vm}", 'check')).to_not eq(last_check_date) expect(redis.hget("vmpooler__vm__#{vm}", 'check')).to_not eq(last_check_date)
end end
@ -322,7 +333,7 @@ EOT
end end
it 'should only set the next check interval' do it 'should only set the next check interval' do
subject._check_ready_vm(vm, poolconfig, ttl, provider) subject._check_ready_vm(vm, pool, ttl, provider)
end end
end end
@ -334,13 +345,13 @@ EOT
it 'should move the VM to the completed queue' do it 'should move the VM to the completed queue' do
expect(redis).to receive(:smove).with("vmpooler__ready__#{pool}", "vmpooler__completed__#{pool}", vm) expect(redis).to receive(:smove).with("vmpooler__ready__#{pool}", "vmpooler__completed__#{pool}", vm)
subject._check_ready_vm(vm, poolconfig, ttl, provider) subject._check_ready_vm(vm, pool, ttl, provider)
end end
it 'should move the VM to the completed queue in Redis' do it 'should move the VM to the completed queue in Redis' do
expect(redis.sismember("vmpooler__ready__#{pool}", vm)).to be(true) expect(redis.sismember("vmpooler__ready__#{pool}", vm)).to be(true)
expect(redis.sismember("vmpooler__completed__#{pool}", vm)).to be(false) expect(redis.sismember("vmpooler__completed__#{pool}", vm)).to be(false)
subject._check_ready_vm(vm, poolconfig, ttl, provider) subject._check_ready_vm(vm, pool, ttl, provider)
expect(redis.sismember("vmpooler__ready__#{pool}", vm)).to be(false) expect(redis.sismember("vmpooler__ready__#{pool}", vm)).to be(false)
expect(redis.sismember("vmpooler__completed__#{pool}", vm)).to be(true) expect(redis.sismember("vmpooler__completed__#{pool}", vm)).to be(true)
end end
@ -348,7 +359,7 @@ EOT
it 'should log messages about being unreachable' do it 'should log messages about being unreachable' do
expect(logger).to receive(:log).with('d', "[!] [#{pool}] '#{vm}' is unreachable, removed from 'ready' queue") expect(logger).to receive(:log).with('d', "[!] [#{pool}] '#{vm}' is unreachable, removed from 'ready' queue")
subject._check_ready_vm(vm, poolconfig, ttl, provider) subject._check_ready_vm(vm, pool, ttl, provider)
end end
end end
@ -360,7 +371,7 @@ EOT
end end
it 'should return nil' do it 'should return nil' do
expect(subject._check_ready_vm(vm, poolconfig, ttl, provider)).to be_nil expect(subject._check_ready_vm(vm, pool, ttl, provider)).to be_nil
end end
end end
@ -374,13 +385,13 @@ EOT
it 'should move the VM to the completed queue' do it 'should move the VM to the completed queue' do
expect(redis).to receive(:smove).with("vmpooler__ready__#{pool}", "vmpooler__completed__#{pool}", vm) expect(redis).to receive(:smove).with("vmpooler__ready__#{pool}", "vmpooler__completed__#{pool}", vm)
subject._check_ready_vm(vm, poolconfig, ttl, provider) subject._check_ready_vm(vm, pool, ttl, provider)
end end
it 'should move the VM to the completed queue in Redis' do it 'should move the VM to the completed queue in Redis' do
expect(redis.sismember("vmpooler__ready__#{pool}", vm)).to be(true) expect(redis.sismember("vmpooler__ready__#{pool}", vm)).to be(true)
expect(redis.sismember("vmpooler__completed__#{pool}", vm)).to be(false) expect(redis.sismember("vmpooler__completed__#{pool}", vm)).to be(false)
subject._check_ready_vm(vm, poolconfig, ttl, provider) subject._check_ready_vm(vm, pool, ttl, provider)
expect(redis.sismember("vmpooler__ready__#{pool}", vm)).to be(false) expect(redis.sismember("vmpooler__ready__#{pool}", vm)).to be(false)
expect(redis.sismember("vmpooler__completed__#{pool}", vm)).to be(true) expect(redis.sismember("vmpooler__completed__#{pool}", vm)).to be(true)
end end
@ -388,7 +399,7 @@ EOT
it 'should log messages about being misnamed' do it 'should log messages about being misnamed' do
expect(logger).to receive(:log).with('d', "[!] [#{pool}] '#{vm}' has mismatched hostname #{different_hostname}, removed from 'ready' queue") expect(logger).to receive(:log).with('d', "[!] [#{pool}] '#{vm}' has mismatched hostname #{different_hostname}, removed from 'ready' queue")
subject._check_ready_vm(vm, poolconfig, ttl, provider) subject._check_ready_vm(vm, pool, ttl, provider)
end end
end end
end end
@ -401,7 +412,7 @@ EOT
it 'should not run get_vm' do it 'should not run get_vm' do
expect(provider).to_not receive(:get_vm) expect(provider).to_not receive(:get_vm)
subject._check_ready_vm(vm, poolconfig, ttl, provider) subject._check_ready_vm(vm, pool, ttl, provider)
end end
end end
@ -413,7 +424,7 @@ EOT
it 'should not run get_vm' do it 'should not run get_vm' do
expect(provider).to_not receive(:get_vm) expect(provider).to_not receive(:get_vm)
subject._check_ready_vm(vm, poolconfig, ttl, provider) subject._check_ready_vm(vm, pool, ttl, provider)
end end
end end
end end
@ -427,7 +438,7 @@ EOT
it 'should return' do it 'should return' do
expect(subject).to receive(:vm_mutex).and_return(mutex) expect(subject).to receive(:vm_mutex).and_return(mutex)
expect(subject._check_ready_vm(vm, poolconfig, ttl, provider)).to be_nil expect(subject._check_ready_vm(vm, pool, ttl, provider)).to be_nil
end end
end end
end end

View file

@ -17,7 +17,7 @@
logfile: '/var/log/vmpooler.log' logfile: '/var/log/vmpooler.log'
task_limit: 10 task_limit: 10
timeout: 15 timeout: 15
vm_checktime: 15 vm_checktime: 1
vm_lifetime: 12 vm_lifetime: 12
vm_lifetime_auth: 24 vm_lifetime_auth: 24
allowed_tags: allowed_tags:

View file

@ -376,7 +376,7 @@
# #
# - vm_checktime # - vm_checktime
# How often (in minutes) to check the sanity of VMs in 'ready' queues. # How often (in minutes) to check the sanity of VMs in 'ready' queues.
# (optional; default: '15') # (optional; default: '1')
# #
# - vm_lifetime # - vm_lifetime
# How long (in hours) to keep VMs in 'running' queues before destroying. # How long (in hours) to keep VMs in 'running' queues before destroying.
@ -492,7 +492,7 @@
logfile: '/var/log/vmpooler.log' logfile: '/var/log/vmpooler.log'
task_limit: 10 task_limit: 10
timeout: 15 timeout: 15
vm_checktime: 15 vm_checktime: 1
vm_lifetime: 12 vm_lifetime: 12
vm_lifetime_auth: 24 vm_lifetime_auth: 24
allowed_tags: allowed_tags: