(POOLER-133) Identify when a ready VM has failed

This commit fixes checking of a VM that has already been identified as ready. Without this change a ready VM that has failed will be identified as having failed, but will not successfully be removed from the ready queue. Additionally, the default vm_checktime value has been reduced from 15 to 1 to ensure that ready VMs are checked within one minute of the time they have reached the ready state by default.

Lastly, the docker-compose files are updated to specify that the redis
instance used is a local redis instance.
This commit is contained in:
kirby@puppetlabs.com 2018-12-01 09:09:28 -08:00
parent 81b5f620bd
commit 3c856d7ae9
11 changed files with 68 additions and 42 deletions

View file

@ -14,6 +14,8 @@ git logs & PR history.
### Fixed
- Sync pool size before dashboard is displayed (POOLER-132)
- Remove a failed VM from the ready queue (POOLER-133)
- Begin checking ready VMs to ensure alive after 1 minute by default
# [0.2.2](https://github.com/puppetlabs/vmpooler/compare/0.2.1...0.2.2)

View file

@ -16,11 +16,11 @@ services:
environment:
- VMPOOLER_DEBUG=true # for use of dummy auth
- VMPOOLER_CONFIG_FILE=/etc/vmpooler/vmpooler.yaml
- REDIS_SERVER=redis
- REDIS_SERVER=redislocal
image: vmpooler-local
depends_on:
- redis
redis:
- redislocal
redislocal:
image: redis
ports:
- "6379:6379"

View file

@ -100,7 +100,6 @@ Example minimal configuration file:
logfile: '/var/log/vmpooler.log'
task_limit: 10
timeout: 15
vm_checktime: 15
vm_lifetime: 12
vm_lifetime_auth: 24
allowed_tags:

View file

@ -17,7 +17,7 @@
logfile: '/Users/samuel/workspace/vmpooler/vmpooler.log'
task_limit: 10
timeout: 15
vm_checktime: 15
vm_checktime: 1
vm_lifetime: 12
vm_lifetime_auth: 24
allowed_tags:

View file

@ -49,7 +49,7 @@ module Vmpooler
# Set some configuration defaults
parsed_config[:config]['task_limit'] = ENV['TASK_LIMIT'] || parsed_config[:config]['task_limit'] || 10
parsed_config[:config]['migration_limit'] = ENV['MIGRATION_LIMIT'] if ENV['MIGRATION_LIMIT']
parsed_config[:config]['vm_checktime'] = ENV['VM_CHECKTIME'] || parsed_config[:config]['vm_checktime'] || 15
parsed_config[:config]['vm_checktime'] = ENV['VM_CHECKTIME'] || parsed_config[:config]['vm_checktime'] || 1
parsed_config[:config]['vm_lifetime'] = ENV['VM_LIFETIME'] || parsed_config[:config]['vm_lifetime'] || 24
parsed_config[:config]['prefix'] = ENV['VM_PREFIX'] || parsed_config[:config]['prefix'] || ''
@ -100,6 +100,9 @@ module Vmpooler
parsed_config[:pools] = load_pools_from_redis(redis)
end
# Create an index of pools by title
parsed_config[:pool_index] = pool_index(parsed_config[:pools])
parsed_config[:pools].each do |pool|
parsed_config[:pool_names] << pool['name']
if pool['alias']
@ -161,4 +164,14 @@ module Vmpooler
def self.pools(conf)
conf[:pools]
end
def self.pool_index(pools)
pools_hash = {}
index = 0
for pool in pools
pools_hash[pool['name']] = index
index += 1
end
pools_hash
end
end

View file

@ -134,18 +134,18 @@ module Vmpooler
move_vm_queue(pool_name, vm_name, 'ready', 'completed', "is unreachable, removed from 'ready' queue")
end
def check_ready_vm(vm, pool, ttl, provider)
def check_ready_vm(vm, pool_name, ttl, provider)
Thread.new do
begin
_check_ready_vm(vm, pool, ttl, provider)
_check_ready_vm(vm, pool_name, ttl, provider)
rescue => err
$logger.log('s', "[!] [#{pool['name']}] '#{vm}' failed while checking a ready vm : #{err}")
$logger.log('s', "[!] [#{pool_name}] '#{vm}' failed while checking a ready vm : #{err}")
raise
end
end
end
def _check_ready_vm(vm, pool, ttl, provider)
def _check_ready_vm(vm, pool_name, ttl, provider)
# Periodically check that the VM is available
mutex = vm_mutex(vm)
return if mutex.locked?
@ -158,21 +158,22 @@ module Vmpooler
if ttl > 0
# host['boottime'] may be nil if host is not powered on
if ((Time.now - host['boottime']) / 60).to_s[/^\d+\.\d{1}/].to_f > ttl
$redis.smove('vmpooler__ready__' + pool['name'], 'vmpooler__completed__' + pool['name'], vm)
$redis.smove('vmpooler__ready__' + pool_name, 'vmpooler__completed__' + pool_name, vm)
$logger.log('d', "[!] [#{pool['name']}] '#{vm}' reached end of TTL after #{ttl} minutes, removed from 'ready' queue")
$logger.log('d', "[!] [#{pool_name}] '#{vm}' reached end of TTL after #{ttl} minutes, removed from 'ready' queue")
return
end
end
return if has_mismatched_hostname?(vm, pool, provider)
return if has_mismatched_hostname?(vm, pool_name, provider)
vm_still_ready?(pool['name'], vm, provider)
vm_still_ready?(pool_name, vm, provider)
end
end
def has_mismatched_hostname?(vm, pool, provider)
check_hostname = pool['check_hostname_for_mismatch']
def has_mismatched_hostname?(vm, pool_name, provider)
pool_config = $config[:pools][$config[:pool_index][pool_name]]
check_hostname = pool_config['check_hostname_for_mismatch']
check_hostname = $config[:config]['check_ready_vm_hostname_for_mismatch'] if check_hostname.nil?
return if check_hostname == false
@ -187,15 +188,15 @@ module Vmpooler
end
# Check if the hostname has magically changed from underneath Pooler
vm_hash = provider.get_vm(pool['name'], vm)
vm_hash = provider.get_vm(pool_name, vm)
return unless vm_hash.is_a? Hash
hostname = vm_hash['hostname']
return if hostname.nil?
return if hostname.empty?
return if hostname == vm
$redis.smove('vmpooler__ready__' + pool['name'], 'vmpooler__completed__' + pool['name'], vm)
$logger.log('d', "[!] [#{pool['name']}] '#{vm}' has mismatched hostname #{hostname}, removed from 'ready' queue")
$redis.smove('vmpooler__ready__' + pool_name, 'vmpooler__completed__' + pool_name, vm)
$logger.log('d', "[!] [#{pool_name}] '#{vm}' has mismatched hostname #{hostname}, removed from 'ready' queue")
return true
end

View file

@ -17,7 +17,7 @@
logfile: '/var/log/vmpooler.log'
task_limit: 10
timeout: 15
vm_checktime: 15
vm_checktime: 1
vm_lifetime: 12
vm_lifetime_auth: 24
allowed_tags:
@ -38,4 +38,4 @@
provider: dummy
- name: 'pool02'
size: 5
provider: dummy
provider: dummy

View file

@ -17,7 +17,7 @@
logfile: '/var/log/vmpooler.log'
task_limit: 10
timeout: 15
vm_checktime: 15
vm_checktime: 1
vm_lifetime: 12
vm_lifetime_auth: 24
allowed_tags:
@ -38,4 +38,4 @@
provider: dummy
- name: 'pool04'
size: 5
provider: dummy
provider: dummy

View file

@ -271,11 +271,22 @@ EOT
describe '#_check_ready_vm' do
let(:ttl) { 0 }
let(:host) { {} }
let(:poolconfig) { config[:pools][0] }
let(:config) { YAML.load(<<-EOT
---
:config: {}
:providers:
:mock:
:pools:
- name: '#{pool}'
size: 1
:pool_index:
'#{pool}': 0
EOT
)
}
before(:each) do
create_ready_vm(pool,vm)
config[:config] = {}
config[:config]['vm_checktime'] = 15
# Create a VM which is powered on
@ -289,7 +300,7 @@ EOT
check_stamp = (Time.now - 60).to_s
redis.hset("vmpooler__vm__#{vm}", 'check', check_stamp)
expect(provider).to receive(:get_vm).exactly(0).times
subject._check_ready_vm(vm, poolconfig, ttl, provider)
subject._check_ready_vm(vm, pool, ttl, provider)
expect(redis.hget("vmpooler__vm__#{vm}", 'check')).to eq(check_stamp)
end
end
@ -299,7 +310,7 @@ EOT
it 'should set the current check timestamp' do
expect(redis.hget("vmpooler__vm__#{vm}", 'check')).to be_nil
subject._check_ready_vm(vm, poolconfig, ttl, provider)
subject._check_ready_vm(vm, pool, ttl, provider)
expect(redis.hget("vmpooler__vm__#{vm}", 'check')).to_not be_nil
end
end
@ -312,7 +323,7 @@ EOT
it 'should set the current check timestamp' do
expect(redis.hget("vmpooler__vm__#{vm}", 'check')).to eq(last_check_date)
subject._check_ready_vm(vm, poolconfig, ttl, provider)
subject._check_ready_vm(vm, pool, ttl, provider)
expect(redis.hget("vmpooler__vm__#{vm}", 'check')).to_not eq(last_check_date)
end
@ -322,7 +333,7 @@ EOT
end
it 'should only set the next check interval' do
subject._check_ready_vm(vm, poolconfig, ttl, provider)
subject._check_ready_vm(vm, pool, ttl, provider)
end
end
@ -334,13 +345,13 @@ EOT
it 'should move the VM to the completed queue' do
expect(redis).to receive(:smove).with("vmpooler__ready__#{pool}", "vmpooler__completed__#{pool}", vm)
subject._check_ready_vm(vm, poolconfig, ttl, provider)
subject._check_ready_vm(vm, pool, ttl, provider)
end
it 'should move the VM to the completed queue in Redis' do
expect(redis.sismember("vmpooler__ready__#{pool}", vm)).to be(true)
expect(redis.sismember("vmpooler__completed__#{pool}", vm)).to be(false)
subject._check_ready_vm(vm, poolconfig, ttl, provider)
subject._check_ready_vm(vm, pool, ttl, provider)
expect(redis.sismember("vmpooler__ready__#{pool}", vm)).to be(false)
expect(redis.sismember("vmpooler__completed__#{pool}", vm)).to be(true)
end
@ -348,7 +359,7 @@ EOT
it 'should log messages about being unreachable' do
expect(logger).to receive(:log).with('d', "[!] [#{pool}] '#{vm}' is unreachable, removed from 'ready' queue")
subject._check_ready_vm(vm, poolconfig, ttl, provider)
subject._check_ready_vm(vm, pool, ttl, provider)
end
end
@ -360,7 +371,7 @@ EOT
end
it 'should return nil' do
expect(subject._check_ready_vm(vm, poolconfig, ttl, provider)).to be_nil
expect(subject._check_ready_vm(vm, pool, ttl, provider)).to be_nil
end
end
@ -374,13 +385,13 @@ EOT
it 'should move the VM to the completed queue' do
expect(redis).to receive(:smove).with("vmpooler__ready__#{pool}", "vmpooler__completed__#{pool}", vm)
subject._check_ready_vm(vm, poolconfig, ttl, provider)
subject._check_ready_vm(vm, pool, ttl, provider)
end
it 'should move the VM to the completed queue in Redis' do
expect(redis.sismember("vmpooler__ready__#{pool}", vm)).to be(true)
expect(redis.sismember("vmpooler__completed__#{pool}", vm)).to be(false)
subject._check_ready_vm(vm, poolconfig, ttl, provider)
subject._check_ready_vm(vm, pool, ttl, provider)
expect(redis.sismember("vmpooler__ready__#{pool}", vm)).to be(false)
expect(redis.sismember("vmpooler__completed__#{pool}", vm)).to be(true)
end
@ -388,7 +399,7 @@ EOT
it 'should log messages about being misnamed' do
expect(logger).to receive(:log).with('d', "[!] [#{pool}] '#{vm}' has mismatched hostname #{different_hostname}, removed from 'ready' queue")
subject._check_ready_vm(vm, poolconfig, ttl, provider)
subject._check_ready_vm(vm, pool, ttl, provider)
end
end
end
@ -401,7 +412,7 @@ EOT
it 'should not run get_vm' do
expect(provider).to_not receive(:get_vm)
subject._check_ready_vm(vm, poolconfig, ttl, provider)
subject._check_ready_vm(vm, pool, ttl, provider)
end
end
@ -413,7 +424,7 @@ EOT
it 'should not run get_vm' do
expect(provider).to_not receive(:get_vm)
subject._check_ready_vm(vm, poolconfig, ttl, provider)
subject._check_ready_vm(vm, pool, ttl, provider)
end
end
end
@ -427,7 +438,7 @@ EOT
it 'should return' do
expect(subject).to receive(:vm_mutex).and_return(mutex)
expect(subject._check_ready_vm(vm, poolconfig, ttl, provider)).to be_nil
expect(subject._check_ready_vm(vm, pool, ttl, provider)).to be_nil
end
end
end

View file

@ -17,7 +17,7 @@
logfile: '/var/log/vmpooler.log'
task_limit: 10
timeout: 15
vm_checktime: 15
vm_checktime: 1
vm_lifetime: 12
vm_lifetime_auth: 24
allowed_tags:

View file

@ -376,7 +376,7 @@
#
# - vm_checktime
# How often (in minutes) to check the sanity of VMs in 'ready' queues.
# (optional; default: '15')
# (optional; default: '1')
#
# - vm_lifetime
# How long (in hours) to keep VMs in 'running' queues before destroying.
@ -492,7 +492,7 @@
logfile: '/var/log/vmpooler.log'
task_limit: 10
timeout: 15
vm_checktime: 15
vm_checktime: 1
vm_lifetime: 12
vm_lifetime_auth: 24
allowed_tags: