From 3c856d7ae98c0210d1b30adc3766c826e872439c Mon Sep 17 00:00:00 2001 From: "kirby@puppetlabs.com" Date: Sat, 1 Dec 2018 09:09:28 -0800 Subject: [PATCH] (POOLER-133) Identify when a ready VM has failed This commit fixes checking of a VM that has already been identified as ready. Without this change a ready VM that has failed will be identified as having failed, but will not successfully be removed from the ready queue. Additionally, the default vm_checktime value has been reduced from 15 to 1 to ensure that ready VMs are checked within one minute of the time they have reached the ready state by default. Lastly, the docker-compose files are updated to specify that the redis instance used is a local redis instance. --- CHANGELOG.md | 2 + docker/docker-compose.yml | 6 +-- docs/dev-setup.md | 1 - .../vmpooler.yaml.dummy-example.aliasedpools | 2 +- lib/vmpooler.rb | 15 ++++++- lib/vmpooler/pool_manager.rb | 27 ++++++------ spec/fixtures/vmpooler.yaml | 4 +- spec/fixtures/vmpooler2.yaml | 4 +- spec/unit/pool_manager_spec.rb | 43 ++++++++++++------- vmpooler.yaml.dummy-example | 2 +- vmpooler.yaml.example | 4 +- 11 files changed, 68 insertions(+), 42 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d8141a0..e7e1b1a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,8 @@ git logs & PR history. ### Fixed - Sync pool size before dashboard is displayed (POOLER-132) +- Remove a failed VM from the ready queue (POOLER-133) +- Begin checking ready VMs to ensure alive after 1 minute by default # [0.2.2](https://github.com/puppetlabs/vmpooler/compare/0.2.1...0.2.2) diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 1146a70..b007866 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -16,11 +16,11 @@ services: environment: - VMPOOLER_DEBUG=true # for use of dummy auth - VMPOOLER_CONFIG_FILE=/etc/vmpooler/vmpooler.yaml - - REDIS_SERVER=redis + - REDIS_SERVER=redislocal image: vmpooler-local depends_on: - - redis - redis: + - redislocal + redislocal: image: redis ports: - "6379:6379" diff --git a/docs/dev-setup.md b/docs/dev-setup.md index d308c38..3a128b0 100644 --- a/docs/dev-setup.md +++ b/docs/dev-setup.md @@ -100,7 +100,6 @@ Example minimal configuration file: logfile: '/var/log/vmpooler.log' task_limit: 10 timeout: 15 - vm_checktime: 15 vm_lifetime: 12 vm_lifetime_auth: 24 allowed_tags: diff --git a/examples/vmpooler.yaml.dummy-example.aliasedpools b/examples/vmpooler.yaml.dummy-example.aliasedpools index 6a25e77..efe0ce2 100644 --- a/examples/vmpooler.yaml.dummy-example.aliasedpools +++ b/examples/vmpooler.yaml.dummy-example.aliasedpools @@ -17,7 +17,7 @@ logfile: '/Users/samuel/workspace/vmpooler/vmpooler.log' task_limit: 10 timeout: 15 - vm_checktime: 15 + vm_checktime: 1 vm_lifetime: 12 vm_lifetime_auth: 24 allowed_tags: diff --git a/lib/vmpooler.rb b/lib/vmpooler.rb index 0b05599..a50be52 100644 --- a/lib/vmpooler.rb +++ b/lib/vmpooler.rb @@ -49,7 +49,7 @@ module Vmpooler # Set some configuration defaults parsed_config[:config]['task_limit'] = ENV['TASK_LIMIT'] || parsed_config[:config]['task_limit'] || 10 parsed_config[:config]['migration_limit'] = ENV['MIGRATION_LIMIT'] if ENV['MIGRATION_LIMIT'] - parsed_config[:config]['vm_checktime'] = ENV['VM_CHECKTIME'] || parsed_config[:config]['vm_checktime'] || 15 + parsed_config[:config]['vm_checktime'] = ENV['VM_CHECKTIME'] || parsed_config[:config]['vm_checktime'] || 1 parsed_config[:config]['vm_lifetime'] = ENV['VM_LIFETIME'] || parsed_config[:config]['vm_lifetime'] || 24 parsed_config[:config]['prefix'] = ENV['VM_PREFIX'] || parsed_config[:config]['prefix'] || '' @@ -100,6 +100,9 @@ module Vmpooler parsed_config[:pools] = load_pools_from_redis(redis) end + # Create an index of pools by title + parsed_config[:pool_index] = pool_index(parsed_config[:pools]) + parsed_config[:pools].each do |pool| parsed_config[:pool_names] << pool['name'] if pool['alias'] @@ -161,4 +164,14 @@ module Vmpooler def self.pools(conf) conf[:pools] end + + def self.pool_index(pools) + pools_hash = {} + index = 0 + for pool in pools + pools_hash[pool['name']] = index + index += 1 + end + pools_hash + end end diff --git a/lib/vmpooler/pool_manager.rb b/lib/vmpooler/pool_manager.rb index 561d642..3346da3 100644 --- a/lib/vmpooler/pool_manager.rb +++ b/lib/vmpooler/pool_manager.rb @@ -134,18 +134,18 @@ module Vmpooler move_vm_queue(pool_name, vm_name, 'ready', 'completed', "is unreachable, removed from 'ready' queue") end - def check_ready_vm(vm, pool, ttl, provider) + def check_ready_vm(vm, pool_name, ttl, provider) Thread.new do begin - _check_ready_vm(vm, pool, ttl, provider) + _check_ready_vm(vm, pool_name, ttl, provider) rescue => err - $logger.log('s', "[!] [#{pool['name']}] '#{vm}' failed while checking a ready vm : #{err}") + $logger.log('s', "[!] [#{pool_name}] '#{vm}' failed while checking a ready vm : #{err}") raise end end end - def _check_ready_vm(vm, pool, ttl, provider) + def _check_ready_vm(vm, pool_name, ttl, provider) # Periodically check that the VM is available mutex = vm_mutex(vm) return if mutex.locked? @@ -158,21 +158,22 @@ module Vmpooler if ttl > 0 # host['boottime'] may be nil if host is not powered on if ((Time.now - host['boottime']) / 60).to_s[/^\d+\.\d{1}/].to_f > ttl - $redis.smove('vmpooler__ready__' + pool['name'], 'vmpooler__completed__' + pool['name'], vm) + $redis.smove('vmpooler__ready__' + pool_name, 'vmpooler__completed__' + pool_name, vm) - $logger.log('d', "[!] [#{pool['name']}] '#{vm}' reached end of TTL after #{ttl} minutes, removed from 'ready' queue") + $logger.log('d', "[!] [#{pool_name}] '#{vm}' reached end of TTL after #{ttl} minutes, removed from 'ready' queue") return end end - return if has_mismatched_hostname?(vm, pool, provider) + return if has_mismatched_hostname?(vm, pool_name, provider) - vm_still_ready?(pool['name'], vm, provider) + vm_still_ready?(pool_name, vm, provider) end end - def has_mismatched_hostname?(vm, pool, provider) - check_hostname = pool['check_hostname_for_mismatch'] + def has_mismatched_hostname?(vm, pool_name, provider) + pool_config = $config[:pools][$config[:pool_index][pool_name]] + check_hostname = pool_config['check_hostname_for_mismatch'] check_hostname = $config[:config]['check_ready_vm_hostname_for_mismatch'] if check_hostname.nil? return if check_hostname == false @@ -187,15 +188,15 @@ module Vmpooler end # Check if the hostname has magically changed from underneath Pooler - vm_hash = provider.get_vm(pool['name'], vm) + vm_hash = provider.get_vm(pool_name, vm) return unless vm_hash.is_a? Hash hostname = vm_hash['hostname'] return if hostname.nil? return if hostname.empty? return if hostname == vm - $redis.smove('vmpooler__ready__' + pool['name'], 'vmpooler__completed__' + pool['name'], vm) - $logger.log('d', "[!] [#{pool['name']}] '#{vm}' has mismatched hostname #{hostname}, removed from 'ready' queue") + $redis.smove('vmpooler__ready__' + pool_name, 'vmpooler__completed__' + pool_name, vm) + $logger.log('d', "[!] [#{pool_name}] '#{vm}' has mismatched hostname #{hostname}, removed from 'ready' queue") return true end diff --git a/spec/fixtures/vmpooler.yaml b/spec/fixtures/vmpooler.yaml index 7ad26d5..1e3e78f 100644 --- a/spec/fixtures/vmpooler.yaml +++ b/spec/fixtures/vmpooler.yaml @@ -17,7 +17,7 @@ logfile: '/var/log/vmpooler.log' task_limit: 10 timeout: 15 - vm_checktime: 15 + vm_checktime: 1 vm_lifetime: 12 vm_lifetime_auth: 24 allowed_tags: @@ -38,4 +38,4 @@ provider: dummy - name: 'pool02' size: 5 - provider: dummy \ No newline at end of file + provider: dummy diff --git a/spec/fixtures/vmpooler2.yaml b/spec/fixtures/vmpooler2.yaml index 3c7b557..39a617a 100644 --- a/spec/fixtures/vmpooler2.yaml +++ b/spec/fixtures/vmpooler2.yaml @@ -17,7 +17,7 @@ logfile: '/var/log/vmpooler.log' task_limit: 10 timeout: 15 - vm_checktime: 15 + vm_checktime: 1 vm_lifetime: 12 vm_lifetime_auth: 24 allowed_tags: @@ -38,4 +38,4 @@ provider: dummy - name: 'pool04' size: 5 - provider: dummy \ No newline at end of file + provider: dummy diff --git a/spec/unit/pool_manager_spec.rb b/spec/unit/pool_manager_spec.rb index 4e4ff96..e9d2d67 100644 --- a/spec/unit/pool_manager_spec.rb +++ b/spec/unit/pool_manager_spec.rb @@ -271,11 +271,22 @@ EOT describe '#_check_ready_vm' do let(:ttl) { 0 } let(:host) { {} } - let(:poolconfig) { config[:pools][0] } + let(:config) { YAML.load(<<-EOT +--- +:config: {} +:providers: + :mock: +:pools: + - name: '#{pool}' + size: 1 +:pool_index: + '#{pool}': 0 +EOT + ) + } before(:each) do create_ready_vm(pool,vm) - config[:config] = {} config[:config]['vm_checktime'] = 15 # Create a VM which is powered on @@ -289,7 +300,7 @@ EOT check_stamp = (Time.now - 60).to_s redis.hset("vmpooler__vm__#{vm}", 'check', check_stamp) expect(provider).to receive(:get_vm).exactly(0).times - subject._check_ready_vm(vm, poolconfig, ttl, provider) + subject._check_ready_vm(vm, pool, ttl, provider) expect(redis.hget("vmpooler__vm__#{vm}", 'check')).to eq(check_stamp) end end @@ -299,7 +310,7 @@ EOT it 'should set the current check timestamp' do expect(redis.hget("vmpooler__vm__#{vm}", 'check')).to be_nil - subject._check_ready_vm(vm, poolconfig, ttl, provider) + subject._check_ready_vm(vm, pool, ttl, provider) expect(redis.hget("vmpooler__vm__#{vm}", 'check')).to_not be_nil end end @@ -312,7 +323,7 @@ EOT it 'should set the current check timestamp' do expect(redis.hget("vmpooler__vm__#{vm}", 'check')).to eq(last_check_date) - subject._check_ready_vm(vm, poolconfig, ttl, provider) + subject._check_ready_vm(vm, pool, ttl, provider) expect(redis.hget("vmpooler__vm__#{vm}", 'check')).to_not eq(last_check_date) end @@ -322,7 +333,7 @@ EOT end it 'should only set the next check interval' do - subject._check_ready_vm(vm, poolconfig, ttl, provider) + subject._check_ready_vm(vm, pool, ttl, provider) end end @@ -334,13 +345,13 @@ EOT it 'should move the VM to the completed queue' do expect(redis).to receive(:smove).with("vmpooler__ready__#{pool}", "vmpooler__completed__#{pool}", vm) - subject._check_ready_vm(vm, poolconfig, ttl, provider) + subject._check_ready_vm(vm, pool, ttl, provider) end it 'should move the VM to the completed queue in Redis' do expect(redis.sismember("vmpooler__ready__#{pool}", vm)).to be(true) expect(redis.sismember("vmpooler__completed__#{pool}", vm)).to be(false) - subject._check_ready_vm(vm, poolconfig, ttl, provider) + subject._check_ready_vm(vm, pool, ttl, provider) expect(redis.sismember("vmpooler__ready__#{pool}", vm)).to be(false) expect(redis.sismember("vmpooler__completed__#{pool}", vm)).to be(true) end @@ -348,7 +359,7 @@ EOT it 'should log messages about being unreachable' do expect(logger).to receive(:log).with('d', "[!] [#{pool}] '#{vm}' is unreachable, removed from 'ready' queue") - subject._check_ready_vm(vm, poolconfig, ttl, provider) + subject._check_ready_vm(vm, pool, ttl, provider) end end @@ -360,7 +371,7 @@ EOT end it 'should return nil' do - expect(subject._check_ready_vm(vm, poolconfig, ttl, provider)).to be_nil + expect(subject._check_ready_vm(vm, pool, ttl, provider)).to be_nil end end @@ -374,13 +385,13 @@ EOT it 'should move the VM to the completed queue' do expect(redis).to receive(:smove).with("vmpooler__ready__#{pool}", "vmpooler__completed__#{pool}", vm) - subject._check_ready_vm(vm, poolconfig, ttl, provider) + subject._check_ready_vm(vm, pool, ttl, provider) end it 'should move the VM to the completed queue in Redis' do expect(redis.sismember("vmpooler__ready__#{pool}", vm)).to be(true) expect(redis.sismember("vmpooler__completed__#{pool}", vm)).to be(false) - subject._check_ready_vm(vm, poolconfig, ttl, provider) + subject._check_ready_vm(vm, pool, ttl, provider) expect(redis.sismember("vmpooler__ready__#{pool}", vm)).to be(false) expect(redis.sismember("vmpooler__completed__#{pool}", vm)).to be(true) end @@ -388,7 +399,7 @@ EOT it 'should log messages about being misnamed' do expect(logger).to receive(:log).with('d', "[!] [#{pool}] '#{vm}' has mismatched hostname #{different_hostname}, removed from 'ready' queue") - subject._check_ready_vm(vm, poolconfig, ttl, provider) + subject._check_ready_vm(vm, pool, ttl, provider) end end end @@ -401,7 +412,7 @@ EOT it 'should not run get_vm' do expect(provider).to_not receive(:get_vm) - subject._check_ready_vm(vm, poolconfig, ttl, provider) + subject._check_ready_vm(vm, pool, ttl, provider) end end @@ -413,7 +424,7 @@ EOT it 'should not run get_vm' do expect(provider).to_not receive(:get_vm) - subject._check_ready_vm(vm, poolconfig, ttl, provider) + subject._check_ready_vm(vm, pool, ttl, provider) end end end @@ -427,7 +438,7 @@ EOT it 'should return' do expect(subject).to receive(:vm_mutex).and_return(mutex) - expect(subject._check_ready_vm(vm, poolconfig, ttl, provider)).to be_nil + expect(subject._check_ready_vm(vm, pool, ttl, provider)).to be_nil end end end diff --git a/vmpooler.yaml.dummy-example b/vmpooler.yaml.dummy-example index 0820757..69321ba 100644 --- a/vmpooler.yaml.dummy-example +++ b/vmpooler.yaml.dummy-example @@ -17,7 +17,7 @@ logfile: '/var/log/vmpooler.log' task_limit: 10 timeout: 15 - vm_checktime: 15 + vm_checktime: 1 vm_lifetime: 12 vm_lifetime_auth: 24 allowed_tags: diff --git a/vmpooler.yaml.example b/vmpooler.yaml.example index c25e362..1480972 100644 --- a/vmpooler.yaml.example +++ b/vmpooler.yaml.example @@ -376,7 +376,7 @@ # # - vm_checktime # How often (in minutes) to check the sanity of VMs in 'ready' queues. -# (optional; default: '15') +# (optional; default: '1') # # - vm_lifetime # How long (in hours) to keep VMs in 'running' queues before destroying. @@ -492,7 +492,7 @@ logfile: '/var/log/vmpooler.log' task_limit: 10 timeout: 15 - vm_checktime: 15 + vm_checktime: 1 vm_lifetime: 12 vm_lifetime_auth: 24 allowed_tags: