vmpooler/lib/vmpooler/pool_manager.rb
Colin f580f9915e (MAINT) Make Port Check Configurable
This change makes the port and port timeout configurable via the YAML
config file. Before these were hard coded to 22 and 5, respectively.

Tests have been added to verify the default is chosen when not
configured and the configuration is picked up when supplied.
2015-12-21 11:35:33 -08:00

574 lines
18 KiB
Ruby

module Vmpooler
class PoolManager
# Defaults
# port number to verify pending VM is ready
DEFAULT_PORT_NO = 22
# timeout, in seconds, when connecting to DEFAULT_PORT_NO
DEFAULT_PORT_TIMEOUT = 5
def initialize(config, logger, redis, graphite=nil)
$config = config
# Load logger library
$logger = logger
unless graphite.nil?
$graphite = graphite
end
# Connect to Redis
$redis = redis
# vSphere object
$vsphere = {}
# Our thread-tracker object
$threads = {}
end
# Check the state of a VM
def check_pending_vm(vm, pool, timeout)
Thread.new do
_check_pending_vm(vm, pool, timeout)
end
end
def _check_pending_vm(vm, pool, timeout)
host = $vsphere[pool].find_vm(vm)
if host
begin
Timeout.timeout($config[:config]['port_timeout'] || DEFAULT_PORT_TIMEOUT) do
TCPSocket.new vm, ($config[:config]['port'] || DEFAULT_PORT_NO)
end
move_pending_vm_to_ready(vm, pool, host)
rescue
fail_pending_vm(vm, pool, timeout)
end
else
fail_pending_vm(vm, pool, timeout)
end
end
def fail_pending_vm(vm, pool, timeout)
clone_stamp = $redis.hget('vmpooler__vm__' + vm, 'clone')
if (clone_stamp) &&
(((Time.now - Time.parse(clone_stamp)) / 60) > timeout)
$redis.smove('vmpooler__pending__' + pool, 'vmpooler__completed__' + pool, vm)
$logger.log('d', "[!] [#{pool}] '#{vm}' marked as 'failed' after #{timeout} minutes")
end
end
def move_pending_vm_to_ready(vm, pool, host)
if (host.summary) &&
(host.summary.guest) &&
(host.summary.guest.hostName) &&
(host.summary.guest.hostName == vm)
begin
Socket.getaddrinfo(vm, nil)
rescue
end
clone_time = $redis.hget('vmpooler__vm__' + vm, 'clone')
finish = '%.2f' % (Time.now - Time.parse(clone_time)) if clone_time
$redis.smove('vmpooler__pending__' + pool, 'vmpooler__ready__' + pool, vm)
$redis.hset('vmpooler__boot__' + Date.today.to_s, pool + ':' + vm, finish)
$logger.log('s', "[>] [#{pool}] '#{vm}' moved to 'ready' queue")
end
end
def check_ready_vm(vm, pool, ttl)
Thread.new do
if ttl > 0
if (((Time.now - host.runtime.bootTime) / 60).to_s[/^\d+\.\d{1}/].to_f) > ttl
$redis.smove('vmpooler__ready__' + pool, 'vmpooler__completed__' + pool, vm)
$logger.log('d', "[!] [#{pool}] '#{vm}' reached end of TTL after #{ttl} minutes, removed from 'ready' queue")
end
end
check_stamp = $redis.hget('vmpooler__vm__' + vm, 'check')
if
(!check_stamp) ||
(((Time.now - Time.parse(check_stamp)) / 60) > $config[:config]['vm_checktime'])
$redis.hset('vmpooler__vm__' + vm, 'check', Time.now)
host = $vsphere[pool].find_vm(vm) ||
$vsphere[pool].find_vm_heavy(vm)[vm]
if host
if
(host.runtime) &&
(host.runtime.powerState) &&
(host.runtime.powerState != 'poweredOn')
$redis.smove('vmpooler__ready__' + pool, 'vmpooler__completed__' + pool, vm)
$logger.log('d', "[!] [#{pool}] '#{vm}' appears to be powered off, removed from 'ready' queue")
end
if
(host.summary.guest) &&
(host.summary.guest.hostName) &&
(host.summary.guest.hostName != vm)
$redis.smove('vmpooler__ready__' + pool, 'vmpooler__completed__' + pool, vm)
$logger.log('d', "[!] [#{pool}] '#{vm}' has mismatched hostname, removed from 'ready' queue")
end
else
$redis.srem('vmpooler__ready__' + pool, vm)
$logger.log('s', "[!] [#{pool}] '#{vm}' not found in vCenter inventory, removed from 'ready' queue")
end
begin
Timeout.timeout(5) do
TCPSocket.new vm, 22
end
rescue
if $redis.smove('vmpooler__ready__' + pool, 'vmpooler__completed__' + pool, vm)
$logger.log('d', "[!] [#{pool}] '#{vm}' is unreachable, removed from 'ready' queue")
end
end
end
end
end
def check_running_vm(vm, pool, ttl)
Thread.new do
_check_running_vm(vm, pool, ttl)
end
end
def _check_running_vm(vm, pool, ttl)
host = $vsphere[pool].find_vm(vm)
if host
queue_from, queue_to = 'running', 'completed'
# Check that VM is within defined lifetime
checkouttime = $redis.hget('vmpooler__active__' + pool, vm)
if checkouttime
running = (Time.now - Time.parse(checkouttime)) / 60 / 60
if (ttl.to_i > 0) &&
(running.to_i >= ttl.to_i)
move_vm_queue(pool, vm, queue_from, queue_to, "reached end of TTL after #{ttl} hours")
end
end
end
end
def move_vm_queue(pool, vm, queue_from, queue_to, msg)
$redis.smove("vmpooler__#{queue_from}__#{pool}", "vmpooler__#{queue_to}__#{pool}", vm)
$logger.log('d', "[!] [#{pool}] '#{vm}' #{msg}")
end
# Clone a VM
def clone_vm(template, folder, datastore, target)
Thread.new do
vm = {}
if template =~ /\//
templatefolders = template.split('/')
vm['template'] = templatefolders.pop
end
if templatefolders
vm[vm['template']] = $vsphere[vm['template']].find_folder(templatefolders.join('/')).find(vm['template'])
else
fail 'Please provide a full path to the template'
end
if vm['template'].length == 0
fail "Unable to find template '#{vm['template']}'!"
end
# Generate a randomized hostname
o = [('a'..'z'), ('0'..'9')].map(&:to_a).flatten
vm['hostname'] = o[rand(25)] + (0...14).map { o[rand(o.length)] }.join
# Add VM to Redis inventory ('pending' pool)
$redis.sadd('vmpooler__pending__' + vm['template'], vm['hostname'])
$redis.hset('vmpooler__vm__' + vm['hostname'], 'clone', Time.now)
$redis.hset('vmpooler__vm__' + vm['hostname'], 'template', vm['template'])
# Annotate with creation time, origin template, etc.
# Add extraconfig options that can be queried by vmtools
configSpec = RbVmomi::VIM.VirtualMachineConfigSpec(
annotation: JSON.pretty_generate(
name: vm['hostname'],
created_by: $config[:vsphere]['username'],
base_template: vm['template'],
creation_timestamp: Time.now.utc
),
extraConfig: [
{ key: 'guestinfo.hostname',
value: vm['hostname']
}
]
)
# Choose a clone target
if target
$clone_target = $vsphere[vm['template']].find_least_used_host(target)
elsif $config[:config]['clone_target']
$clone_target = $vsphere[vm['template']].find_least_used_host($config[:config]['clone_target'])
end
# Put the VM in the specified folder and resource pool
relocateSpec = RbVmomi::VIM.VirtualMachineRelocateSpec(
datastore: $vsphere[vm['template']].find_datastore(datastore),
host: $clone_target,
diskMoveType: :moveChildMostDiskBacking
)
# Create a clone spec
spec = RbVmomi::VIM.VirtualMachineCloneSpec(
location: relocateSpec,
config: configSpec,
powerOn: true,
template: false
)
# Clone the VM
$logger.log('d', "[ ] [#{vm['template']}] '#{vm['hostname']}' is being cloned from '#{vm['template']}'")
begin
start = Time.now
vm[vm['template']].CloneVM_Task(
folder: $vsphere[vm['template']].find_folder(folder),
name: vm['hostname'],
spec: spec
).wait_for_completion
finish = '%.2f' % (Time.now - start)
$redis.hset('vmpooler__clone__' + Date.today.to_s, vm['template'] + ':' + vm['hostname'], finish)
$redis.hset('vmpooler__vm__' + vm['hostname'], 'clone_time', finish)
$logger.log('s', "[+] [#{vm['template']}] '#{vm['hostname']}' cloned from '#{vm['template']}' in #{finish} seconds")
rescue
$logger.log('s', "[!] [#{vm['template']}] '#{vm['hostname']}' clone appears to have failed")
$redis.srem('vmpooler__pending__' + vm['template'], vm['hostname'])
end
$redis.decr('vmpooler__tasks__clone')
begin
$graphite.log($config[:graphite]['prefix'] + ".clone.#{vm['template']}", finish) if defined? $graphite
rescue
end
end
end
# Destroy a VM
def destroy_vm(vm, pool)
Thread.new do
$redis.srem('vmpooler__completed__' + pool, vm)
$redis.hdel('vmpooler__active__' + pool, vm)
$redis.hset('vmpooler__vm__' + vm, 'destroy', Time.now)
# Auto-expire metadata key
$redis.expire('vmpooler__vm__' + vm, ($config[:redis]['data_ttl'].to_i * 60 * 60))
host = $vsphere[pool].find_vm(vm) ||
$vsphere[pool].find_vm_heavy(vm)[vm]
if host
start = Time.now
if
(host.runtime) &&
(host.runtime.powerState) &&
(host.runtime.powerState == 'poweredOn')
$logger.log('d', "[ ] [#{pool}] '#{vm}' is being shut down")
host.PowerOffVM_Task.wait_for_completion
end
host.Destroy_Task.wait_for_completion
finish = '%.2f' % (Time.now - start)
$logger.log('s', "[-] [#{pool}] '#{vm}' destroyed in #{finish} seconds")
$graphite.log($config[:graphite]['prefix'] + ".destroy.#{pool}", finish) if defined? $graphite
end
end
end
def create_vm_snapshot(vm, snapshot_name)
Thread.new do
_create_vm_snapshot(vm, snapshot_name)
end
end
def _create_vm_snapshot(vm, snapshot_name)
host = $vsphere['snapshot_manager'].find_vm(vm) ||
$vsphere['snapshot_manager'].find_vm_heavy(vm)[vm]
if (host) && ((! snapshot_name.nil?) || (! snapshot_name.empty?))
$logger.log('s', "[ ] [snapshot_manager] '#{vm}' is being snapshotted")
start = Time.now
host.CreateSnapshot_Task(
name: snapshot_name,
description: 'vmpooler',
memory: true,
quiesce: true
).wait_for_completion
finish = '%.2f' % (Time.now - start)
$redis.hset('vmpooler__vm__' + vm, 'snapshot:' + snapshot_name, Time.now.to_s)
$logger.log('s', "[+] [snapshot_manager] '#{vm}' snapshot created in #{finish} seconds")
end
end
def revert_vm_snapshot(vm, snapshot_name)
Thread.new do
_revert_vm_snapshot(vm, snapshot_name)
end
end
def _revert_vm_snapshot(vm, snapshot_name)
host = $vsphere['snapshot_manager'].find_vm(vm) ||
$vsphere['snapshot_manager'].find_vm_heavy(vm)[vm]
if host
snapshot = $vsphere['snapshot_manager'].find_snapshot(host, snapshot_name)
if snapshot
$logger.log('s', "[ ] [snapshot_manager] '#{vm}' is being reverted to snapshot '#{snapshot_name}'")
start = Time.now
snapshot.RevertToSnapshot_Task.wait_for_completion
finish = '%.2f' % (Time.now - start)
$logger.log('s', "[<] [snapshot_manager] '#{vm}' reverted to snapshot in #{finish} seconds")
end
end
end
def check_snapshot_queue
$logger.log('d', "[*] [snapshot_manager] starting worker thread")
$vsphere['snapshot_manager'] ||= Vmpooler::VsphereHelper.new
$threads['snapshot_manager'] = Thread.new do
loop do
_check_snapshot_queue
sleep(5)
end
end
end
def _check_snapshot_queue
vm = $redis.spop('vmpooler__tasks__snapshot')
unless vm.nil?
begin
vm_name, snapshot_name = vm.split(':')
create_vm_snapshot(vm_name, snapshot_name)
rescue
$logger.log('s', "[!] [snapshot_manager] snapshot appears to have failed")
end
end
vm = $redis.spop('vmpooler__tasks__snapshot-revert')
unless vm.nil?
begin
vm_name, snapshot_name = vm.split(':')
revert_vm_snapshot(vm_name, snapshot_name)
rescue
$logger.log('s', "[!] [snapshot_manager] snapshot revert appears to have failed")
end
end
end
def check_pool(pool)
$logger.log('d', "[*] [#{pool['name']}] starting worker thread")
$vsphere[pool['name']] ||= Vmpooler::VsphereHelper.new
$threads[pool['name']] = Thread.new do
loop do
_check_pool(pool)
sleep(5)
end
end
end
def _check_pool(pool)
# INVENTORY
inventory = {}
begin
base = $vsphere[pool['name']].find_folder(pool['folder'])
base.childEntity.each do |vm|
if
(! $redis.sismember('vmpooler__running__' + pool['name'], vm['name'])) &&
(! $redis.sismember('vmpooler__ready__' + pool['name'], vm['name'])) &&
(! $redis.sismember('vmpooler__pending__' + pool['name'], vm['name'])) &&
(! $redis.sismember('vmpooler__completed__' + pool['name'], vm['name'])) &&
(! $redis.sismember('vmpooler__discovered__' + pool['name'], vm['name']))
$redis.sadd('vmpooler__discovered__' + pool['name'], vm['name'])
$logger.log('s', "[?] [#{pool['name']}] '#{vm['name']}' added to 'discovered' queue")
end
inventory[vm['name']] = 1
end
rescue
end
# RUNNING
$redis.smembers('vmpooler__running__' + pool['name']).each do |vm|
if inventory[vm]
begin
check_running_vm(vm, pool['name'], $redis.hget('vmpooler__vm__' + vm, 'lifetime') || $config[:config]['vm_lifetime'] || 12)
rescue
end
end
end
# READY
$redis.smembers('vmpooler__ready__' + pool['name']).each do |vm|
if inventory[vm]
begin
check_ready_vm(vm, pool['name'], pool['ready_ttl'] || 0)
rescue
end
end
end
# PENDING
$redis.smembers('vmpooler__pending__' + pool['name']).each do |vm|
if inventory[vm]
begin
check_pending_vm(vm, pool['name'], pool['timeout'] || $config[:config]['timeout'] || 15)
rescue
end
end
end
# COMPLETED
$redis.smembers('vmpooler__completed__' + pool['name']).each do |vm|
if inventory[vm]
begin
destroy_vm(vm, pool['name'])
rescue
$logger.log('s', "[!] [#{pool['name']}] '#{vm}' destroy appears to have failed")
$redis.srem('vmpooler__completed__' + pool['name'], vm)
$redis.hdel('vmpooler__active__' + pool['name'], vm)
$redis.del('vmpooler__vm__' + vm)
end
else
$logger.log('s', "[!] [#{pool['name']}] '#{vm}' not found in inventory, removed from 'completed' queue")
$redis.srem('vmpooler__completed__' + pool['name'], vm)
$redis.hdel('vmpooler__active__' + pool['name'], vm)
$redis.del('vmpooler__vm__' + vm)
end
end
# DISCOVERED
$redis.smembers('vmpooler__discovered__' + pool['name']).each do |vm|
%w(pending ready running completed).each do |queue|
if $redis.sismember('vmpooler__' + queue + '__' + pool['name'], vm)
$logger.log('d', "[!] [#{pool['name']}] '#{vm}' found in '#{queue}', removed from 'discovered' queue")
$redis.srem('vmpooler__discovered__' + pool['name'], vm)
end
end
if $redis.sismember('vmpooler__discovered__' + pool['name'], vm)
$redis.smove('vmpooler__discovered__' + pool['name'], 'vmpooler__completed__' + pool['name'], vm)
end
end
# REPOPULATE
ready = $redis.scard('vmpooler__ready__' + pool['name'])
total = $redis.scard('vmpooler__pending__' + pool['name']) + ready
begin
if defined? $graphite
$graphite.log($config[:graphite]['prefix'] + '.ready.' + pool['name'], $redis.scard('vmpooler__ready__' + pool['name']))
$graphite.log($config[:graphite]['prefix'] + '.running.' + pool['name'], $redis.scard('vmpooler__running__' + pool['name']))
end
rescue
end
if $redis.get('vmpooler__empty__' + pool['name'])
unless ready == 0
$redis.del('vmpooler__empty__' + pool['name'])
end
else
if ready == 0
$redis.set('vmpooler__empty__' + pool['name'], 'true')
$logger.log('s', "[!] [#{pool['name']}] is empty")
end
end
if total < pool['size']
(1..(pool['size'] - total)).each do |_i|
if $redis.get('vmpooler__tasks__clone').to_i < $config[:config]['task_limit'].to_i
begin
$redis.incr('vmpooler__tasks__clone')
clone_vm(
pool['template'],
pool['folder'],
pool['datastore'],
pool['clone_target']
)
rescue
$logger.log('s', "[!] [#{pool['name']}] clone appears to have failed")
$redis.decr('vmpooler__tasks__clone')
end
end
end
end
end
def execute!
$logger.log('d', 'starting vmpooler')
# Clear out the tasks manager, as we don't know about any tasks at this point
$redis.set('vmpooler__tasks__clone', 0)
loop do
if ! $threads['snapshot_manager']
check_snapshot_queue
elsif ! $threads['snapshot_manager'].alive?
$logger.log('d', "[!] [snapshot_manager] worker thread died, restarting")
check_snapshot_queue
end
$config[:pools].each do |pool|
if ! $threads[pool['name']]
check_pool(pool)
elsif ! $threads[pool['name']].alive?
$logger.log('d', "[!] [#{pool['name']}] worker thread died, restarting")
check_pool(pool)
end
end
sleep(1)
end
end
end
end