vmpooler/vmware-host-pooler
Scott Schneider 087be6014d Store 'clone' timestamps in Redis database
Using redis keys to store clone timestamps will allow for a global TTL
for VMs in the 'pending' pool; failed clones should be cleaned up and
retried after a set period of time (eg. 15m).
2014-02-04 15:05:24 -08:00

438 lines
13 KiB
Ruby
Executable file

#!/usr/bin/ruby
require 'rbvmomi'
require 'redis'
require 'time'
require 'yaml'
$:.unshift(File.dirname(__FILE__))
require 'lib/logger'
require 'lib/require_relative'
require 'lib/vsphere_helper'
Dir.chdir(File.dirname(__FILE__))
# Load the configuration file
config_file = File.expand_path('vmware-host-pooler.yaml')
$config = YAML.load_file(config_file)
pools = $config[:pools]
vsphere = $config[:vsphere]
# Load logger library
$logger = Logger.new $config[:config]['logfile']
# Load Graphite helper library (if configured)
if (defined? $config[:config]['graphite'])
require 'lib/graphite'
$graphite = Graphite.new $config[:config]['graphite']
end
# Set some defaults
$config[:config]['task_limit'] ||= 10
# Connect to Redis
$redis = Redis.new
# vSphere object
$vsphere = {}
# Our thread-tracker object
$threads = {}
# Check the state of a VM
def check_pending_vm vm, pool, timeout
Thread.new {
host = $vsphere[pool].find_vm(vm)
if (host)
if (
(host.summary) and
(host.summary.guest) and
(host.summary.guest.hostName) and
(host.summary.guest.hostName == vm)
)
begin
Socket.getaddrinfo(vm, nil)
rescue
end
$redis.smove('vmware_host_pool__pending__'+pool, 'vmware_host_pool__ready__'+pool, vm)
$logger.log('s', "[>] [#{pool}] '#{vm}' moved to 'ready' queue")
end
else
clone_stamp = $redis.hget('vmware_host_pool__vm__'+vm, 'clone')
if (
(clone_stamp) and
(((Time.now - Time.parse(clone_stamp))/60) > timeout)
)
$redis.smove('vmware_host_pool__pending__'+pool, 'vmware_host_pool__completed__'+pool, vm)
$logger.log('d', "[!] [#{pool}] '#{vm}' marked as 'failed' after #{timeout} minutes")
end
end
}
end
def check_ready_vm vm, pool, ttl
Thread.new {
host = $vsphere[pool].find_vm(vm)
if (host)
if (
(host.runtime) and
(host.runtime.powerState) and
(host.runtime.powerState != 'poweredOn')
)
$redis.smove('vmware_host_pool__ready__'+pool, 'vmware_host_pool__completed__'+pool, vm)
$logger.log('d', "[!] [#{pool}] '#{vm}' appears to be powered off or dead")
else
if ((((Time.now - host.runtime.bootTime)/60).to_s[/^\d+\.\d{1}/].to_f) > ttl)
$redis.smove('vmware_host_pool__ready__'+pool, 'vmware_host_pool__completed__'+pool, vm)
$logger.log('d', "[!] [#{pool}] '#{vm}' reached end of TTL after #{ttl} minutes")
end
end
else
$redis.srem('vmware_host_pool__ready__'+pool, vm)
$logger.log('s', "[!] [#{pool}] '#{vm}' not found in vCenter inventory")
end
}
end
def check_running_vm vm, pool, ttl
Thread.new {
host = $vsphere[pool].find_vm(vm)
if (host)
if (
(host.runtime) and
(host.runtime.powerState != 'poweredOn')
)
$redis.smove('vmware_host_pool__running__'+pool, 'vmware_host_pool__completed__'+pool, vm)
$logger.log('d', "[!] [#{pool}] '#{vm}' appears to be powered off or dead")
else
if (
(host.runtime) and
(host.runtime.bootTime)
((((Time.now - host.runtime.bootTime)/60).to_s[/^\d+\.\d{1}/].to_f) > ttl)
)
$redis.smove('vmware_host_pool__running__'+pool, 'vmware_host_pool__completed__'+pool, vm)
$logger.log('d', "[!] [#{pool}] '#{vm}' reached end of TTL after #{ttl} minutes")
end
end
end
}
end
# Clone a VM
def clone_vm template, pool, folder, datastore
Thread.new {
vm = {}
if template =~ /\//
templatefolders = template.split('/')
vm['template'] = templatefolders.pop
end
if templatefolders
vm[vm['template']] = $vsphere[vm['template']].find_folder(templatefolders.join('/')).find(vm['template'])
else
raise "Please provide a full path to the template"
end
if vm['template'].length == 0
raise "Unable to find template '#{vm['template']}'!"
end
# Generate a randomized hostname
o = [('a'..'z'),('0'..'9')].map{|r| r.to_a}.flatten
vm['hostname'] = o[rand(25)]+(0...14).map{o[rand(o.length)]}.join
# Add VM to Redis inventory ('pending' pool)
$redis.sadd('vmware_host_pool__pending__'+vm['template'], vm['hostname'])
$redis.hset('vmware_host_pool__vm__'+vm['hostname'], 'clone', Time.now)
# Annotate with creation time, origin template, etc.
configSpec = RbVmomi::VIM.VirtualMachineConfigSpec(
:annotation =>
'Base template: ' + vm['template'] + "\n" +
'Creation time: ' + Time.now.strftime("%Y-%m-%d %H:%M")
)
# Put the VM in the specified folder and resource pool
relocateSpec = RbVmomi::VIM.VirtualMachineRelocateSpec(
:datastore => $vsphere[vm['template']].find_datastore(datastore),
:pool => $vsphere[vm['template']].find_pool(pool),
:diskMoveType => :moveChildMostDiskBacking
)
# Create a clone spec
spec = RbVmomi::VIM.VirtualMachineCloneSpec(
:location => relocateSpec,
:config => configSpec,
:powerOn => true,
:template => false
)
# Clone the VM
$logger.log('d', "[ ] [#{vm['template']}] '#{vm['hostname']}' is being cloned from '#{vm['template']}'")
begin
start = Time.now
vm[vm['template']].CloneVM_Task(
:folder => $vsphere[vm['template']].find_folder(folder),
:name => vm['hostname'],
:spec => spec
).wait_for_completion
finish = '%.2f' % (Time.now-start)
$logger.log('s', "[+] [#{vm['template']}] '#{vm['hostname']}' cloned from '#{vm['template']}' in #{finish} seconds")
rescue
$logger.log('s', "[!] [#{vm['template']}] '#{vm['hostname']}' clone appears to have failed")
$redis.srem('vmware_host_pool__pending__'+vm['template'], vm['hostname'])
end
$redis.decr('vmware_host_pool__tasks__clone')
begin
$graphite.log("vcloud.clone.#{vm['template']}", finish) if defined? $graphite
rescue
end
}
end
# Destroy a VM
def destroy_vm vm, pool
Thread.new {
$redis.srem('vmware_host_pool__completed__'+pool, vm)
$redis.hdel('vmware_host_pool__active__'+pool, vm)
$redis.del('vmware_host_pool__vm__'+vm)
host = $vsphere[pool].find_vm(vm)
if (host)
start = Time.now
if (
(host.runtime) and
(host.runtime.powerState) and
(host.runtime.powerState == 'poweredOn')
)
$logger.log('d', "[ ] [#{pool}] '#{vm}' is being shut down")
host.PowerOffVM_Task.wait_for_completion
end
host.Destroy_Task.wait_for_completion
finish = '%.2f' % (Time.now-start)
$logger.log('s', "[-] [#{pool}] '#{vm}' destroyed in #{finish} seconds")
$graphite.log("vcloud.destroy.#{pool}", finish) if defined? $graphite
else
host = $vsphere[pool].find_vm_heavy(vm)[vm]
if (host)
if (
(host.runtime) and
(host.runtime.powerState) and
(host.runtime.powerState == 'poweredOn')
)
$logger.log('d', "[ ] [#{pool}] '#{vm}' is being shut down")
host.PowerOffVM_Task.wait_for_completion
end
host.Destroy_Task.wait_for_completion
finish = '%.2f' % (Time.now-start)
$logger.log('s', "[-] [#{pool}] '#{vm}' destroyed in #{finish} seconds")
$graphite.log("vcloud.destroy.#{pool}", finish) if defined? $graphite
end
end
}
end
def check_pool pool
$logger.log('d', "[*] [#{pool['name']}] starting worker thread")
$threads[pool['name']] = Thread.new {
$vsphere[pool['name']] ||= VsphereHelper.new
loop do
# INVENTORY
inventory = {}
begin
base = $vsphere[pool['name']].find_pool(pool['pool'])
base.vm.each do |vm|
if (
(! $redis.sismember('vmware_host_pool__running__'+pool['name'], vm['name'])) and
(! $redis.sismember('vmware_host_pool__ready__'+pool['name'], vm['name'])) and
(! $redis.sismember('vmware_host_pool__pending__'+pool['name'], vm['name'])) and
(! $redis.sismember('vmware_host_pool__completed__'+pool['name'], vm['name'])) and
(! $redis.sismember('vmware_host_pool__discovered__'+pool['name'], vm['name']))
)
$redis.sadd('vmware_host_pool__discovered__'+pool['name'], vm['name'])
$logger.log('s', "[?] [#{pool['name']}] '#{vm['name']}' added to 'discovered' queue")
end
inventory[vm['name']] = 1
end
rescue
end
# RUNNING
$redis.smembers('vmware_host_pool__running__'+pool['name']).each do |vm|
if (inventory[vm])
if (pool['running_ttl'])
begin
check_running_vm(vm, pool['name'], pool['running_ttl'])
rescue
end
else
begin
check_running_vm(vm, pool['name'], '720')
rescue
end
end
end
end
# READY
$redis.smembers('vmware_host_pool__ready__'+pool['name']).each do |vm|
if (inventory[vm])
if (pool['ready_ttl'])
begin
check_ready_vm(vm, pool['name'], pool['ready_ttl'])
rescue
end
end
end
end
# PENDING
$redis.smembers('vmware_host_pool__pending__'+pool['name']).each do |vm|
pool['timeout'] ||= 15
if (inventory[vm])
begin
check_pending_vm(vm, pool['name'], pool['timeout'])
rescue
end
end
end
# COMPLETED
$redis.smembers('vmware_host_pool__completed__'+pool['name']).each do |vm|
if (inventory[vm])
begin
destroy_vm(vm, pool['name'])
rescue
$logger.log('s', "[!] [#{pool['name']}] '#{vm}' destroy appears to have failed")
$redis.srem('vmware_host_pool__completed__'+pool['name'], vm)
$redis.hdel('vmware_host_pool__active__'+pool['name'], vm)
$redis.del('vmware_host_pool__vm__'+vm)
end
else
$logger.log('s', "[!] [#{pool['name']}] '#{vm}' not found in inventory, removing from 'completed' queue")
$redis.srem('vmware_host_pool__completed__'+pool['name'], vm)
$redis.hdel('vmware_host_pool__active__'+pool['name'], vm)
$redis.del('vmware_host_pool__vm__'+vm)
end
end
# DISCOVERED
$redis.smembers('vmware_host_pool__discovered__'+pool['name']).each do |vm|
['pending', 'ready', 'running', 'completed'].each do |queue|
if ($redis.sismember('vmware_host_pool__'+queue+'__'+pool['name'], vm))
$logger.log('d', "[!] [#{pool['name']}] '#{vm}' found in '#{queue}', removing from 'discovered'")
$redis.srem('vmware_host_pool__discovered__'+pool['name'], vm)
end
end
if ($redis.sismember('vmware_host_pool__discovered__'+pool['name'], vm))
$redis.smove('vmware_host_pool__discovered__'+pool['name'], 'vmware_host_pool__completed__'+pool['name'], vm)
end
end
# LONG-RUNNING
$redis.smembers('vmware_host_pool__running__'+pool['name']).each do |vm|
if ($redis.hget('vmware_host_pool__active__'+pool['name'], vm))
running = (Time.now - Time.parse($redis.hget('vmware_host_pool__active__'+pool['name'], vm)))/60/60
if (running > 12)
$redis.smove('vmware_host_pool__running__'+pool['name'], 'vmware_host_pool__completed__'+pool['name'], vm)
$logger.log('d', "[!] [#{pool['name']}] '#{vm}' reached end of TTL after 12 hours")
end
end
end
# REPOPULATE
total = $redis.scard('vmware_host_pool__ready__'+pool['name']) +
$redis.scard('vmware_host_pool__pending__'+pool['name'])
begin
$graphite.log(
'vcloud.ready.'+pool['name'], $redis.scard('vmware_host_pool__ready__'+pool['name'])
) if defined? $graphite
rescue
end
if (total < pool['size'])
(1..(pool['size'] - total)).each { |i|
if ($redis.get('vmware_host_pool__tasks__clone').to_i < $config[:config]['task_limit'])
begin
$redis.incr('vmware_host_pool__tasks__clone')
clone_vm(
pool['template'],
pool['pool'],
pool['folder'],
pool['datastore']
)
rescue
$logger.log('s', "[!] [#{pool['name']}] clone appears to have failed")
$redis.decr('vmware_host_pool__tasks__clone')
end
end
}
end
sleep(1)
end
}
end
$logger.log('d', "starting vmware-host-pooler")
# Clear out the tasks manager, as we don't know about any tasks at this point
$redis.set('vmware_host_pool__tasks__clone', 0)
loop do
pools.each do |pool|
if (! $threads[pool['name']])
check_pool(pool)
else
if (! $threads[pool['name']].alive?)
$logger.log('d', "[!] [#{pool['name']}] worker thread died, restarting")
check_pool(pool)
end
end
end
sleep(1)
end