(QENG-5305) Check cluster utilization once at a time

This commit adds a global provider_hosts concept in order to allow checking cluster utilization once per interval for a given cluster and retain the results, reusing them for an interval, and tracking the least used set of hosts. Without this change each migration and clone operation inspect host utilization and state for each host in the cluster, which is computationally expensive for vsphere.
This commit is contained in:
kirby@puppetlabs.com 2017-10-13 17:56:22 -07:00 committed by mattkirby
parent 62643b237f
commit ada79e81f4

View file

@ -21,6 +21,9 @@ module Vmpooler
# Our thread-tracker object # Our thread-tracker object
$threads = {} $threads = {}
# Host tracking object
$provider_hosts = {}
end end
def config def config
@ -459,6 +462,77 @@ module Vmpooler
end end
end end
def get_provider_name(pool_name, config = $config)
pool = config[:pools].select { |p| p['name'] == pool_name }[0]
provider_name = pool['provider'] if pool.key?('provider')
provider_name = config[:providers].first[0].to_s if provider_name.nil? and config.key?(:providers)
provider_name = 'default' if provider_name.nil?
provider_name
end
def get_cluster(pool_name)
default_cluster = $config[:config]['clone_target'] if $config[:config].key?('clone_target')
default_datacenter = $config[:config]['datacenter'] if $config[:config].key?('datacenter')
pool = $config[:pools].select { |p| p['name'] == pool_name }[0]
cluster = pool['clone_target'] if pool.key?('clone_target')
cluster = default_cluster if cluster.nil?
datacenter = pool['datacenter'] if pool.key?('datacenter')
datacenter = default_datacenter if datacenter.nil?
return if cluster.nil?
return if datacenter.nil?
{ 'cluster' => cluster, 'datacenter' => datacenter }
end
def select_hosts(pool_name, provider, provider_name, cluster, datacenter, percentage)
$provider_hosts[provider_name] = {} unless $provider_hosts.key?(provider_name)
$provider_hosts[provider_name][datacenter] = {} unless $provider_hosts[provider_name].key?(datacenter)
$provider_hosts[provider_name][datacenter][cluster] = {} unless $provider_hosts[provider_name][datacenter].key?(cluster)
$provider_hosts[provider_name][datacenter][cluster]['checking'] = true
hosts_hash = provider.select_target_hosts(cluster, datacenter, percentage)
$provider_hosts[provider_name][datacenter][cluster] = hosts_hash
$provider_hosts[provider_name][datacenter][cluster]['check_time_finished'] = Time.now
end
def run_select_hosts(provider, pool_name, provider_name, cluster, datacenter, max_age, percentage)
now = Time.now
if $provider_hosts.key?(provider_name) and $provider_hosts[provider_name].key?(datacenter) and $provider_hosts[provider_name][datacenter].key?(cluster) and $provider_hosts[provider_name][datacenter][cluster].key?('checking')
wait_for_host_selection(pool_name, provider_name, cluster, datacenter)
elsif $provider_hosts.key?(provider_name) and $provider_hosts[provider_name].key?(datacenter) and $provider_hosts[provider_name][datacenter].key?(cluster) and $provider_hosts[provider_name][datacenter][cluster].key?('check_time_finished')
select_hosts(pool_name, provider, provider_name, cluster, datacenter, percentage) if now - $provider_hosts[provider_name][datacenter][cluster]['check_time_finished'] > max_age
else
select_hosts(pool_name, provider, provider_name, cluster, datacenter, percentage)
end
end
def wait_for_host_selection(pool_name, provider_name, cluster, datacenter, maxloop = 0, loop_delay = 5, max_age = 60)
loop_count = 1
until $provider_hosts[provider_name][datacenter][cluster].key?('check_time_finished')
sleep(loop_delay)
unless maxloop.zero?
break if loop_count >= maxloop
loop_count += 1
end
end
return unless $provider_hosts[provider_name][datacenter][cluster].key?('check_time_finished')
loop_count = 1
while Time.now - $provider_hosts[provider_name][datacenter][cluster]['check_time_finished'] > max_age
sleep(loop_delay)
unless maxloop.zero?
break if loop_count >= maxloop
loop_count += 1
end
end
end
def select_next_host(provider_name, datacenter, cluster, architecture)
provider_hosts = $provider_hosts
host = provider_hosts[provider_name][datacenter][cluster]['architectures'][architecture][0]
return if host.nil?
provider_hosts[provider_name][datacenter][cluster]['architectures'][architecture].delete(host)
provider_hosts[provider_name][datacenter][cluster]['architectures'][architecture] << host
host
end
def migration_limit(migration_limit) def migration_limit(migration_limit)
# Returns migration_limit setting when enabled # Returns migration_limit setting when enabled
return false if migration_limit == 0 || !migration_limit # rubocop:disable Style/NumericPredicate return false if migration_limit == 0 || !migration_limit # rubocop:disable Style/NumericPredicate
@ -468,7 +542,7 @@ module Vmpooler
def migrate_vm(vm_name, pool_name, provider) def migrate_vm(vm_name, pool_name, provider)
Thread.new do Thread.new do
begin begin
_migrate_vm(vm_name, pool_name, provider) _migrate_vm(vm_name, pool_name, provider, $provider_hosts)
rescue => err rescue => err
$logger.log('s', "[x] [#{pool_name}] '#{vm_name}' migration failed with an error: #{err}") $logger.log('s', "[x] [#{pool_name}] '#{vm_name}' migration failed with an error: #{err}")
remove_vmpooler_migration_vm(pool_name, vm_name) remove_vmpooler_migration_vm(pool_name, vm_name)
@ -477,30 +551,33 @@ module Vmpooler
end end
def _migrate_vm(vm_name, pool_name, provider) def _migrate_vm(vm_name, pool_name, provider)
$redis.srem('vmpooler__migrating__' + pool_name, vm_name) $redis.srem("vmpooler__migrating__#{pool_name}", vm_name)
parent_host_name = provider.get_vm_host(pool_name, vm_name) provider_name = get_provider_name(pool_name)
raise('Unable to determine which host the VM is running on') if parent_host_name.nil? vm = provider.get_vm_details(pool_name, vm_name)
raise('Unable to determine which host the VM is running on') if vm['host'].nil?
migration_limit = migration_limit $config[:config]['migration_limit'] migration_limit = migration_limit $config[:config]['migration_limit']
migration_count = $redis.scard('vmpooler__migration') migration_count = $redis.scard('vmpooler__migration')
if !migration_limit if migration_limit
$logger.log('s', "[ ] [#{pool_name}] '#{vm_name}' is running on #{parent_host_name}") max_age = 60
return percentage_of_hosts_below_average = 100
elsif migration_count >= migration_limit run_select_hosts(provider, pool_name, provider_name, vm['cluster'], vm['datacenter'], max_age, percentage_of_hosts_below_average)
$logger.log('s', "[ ] [#{pool_name}] '#{vm_name}' is running on #{parent_host_name}. No migration will be evaluated since the migration_limit has been reached") if migration_count >= migration_limit
return $logger.log('s', "[ ] [#{pool_name}] '#{vm_name}' is running on #{vm['host']}. No migration will be evaluated since the migration_limit has been reached")
elsif $provider_hosts[provider_name][vm['datacenter']][vm['cluster']]['architectures'][vm['architecture']].include?(vm['host'])
$logger.log('s', "[ ] [#{pool_name}] No migration required for '#{vm_name}' running on #{vm['host']}")
else else
$redis.sadd('vmpooler__migration', vm_name) $redis.sadd('vmpooler__migration', vm_name)
host_name = provider.find_least_used_compatible_host(pool_name, vm_name) target_host_name = select_next_host(provider_name, vm['datacenter'], vm['cluster'], vm['architecture'])
if host_name == parent_host_name finish = migrate_vm_and_record_timing(vm_name, pool_name, vm['host'], target_host_name, provider)
$logger.log('s', "[ ] [#{pool_name}] No migration required for '#{vm_name}' running on #{parent_host_name}") $logger.log('s', "[>] [#{pool_name}] '#{vm_name}' migrated from #{vm['host']} to #{target_host_name} in #{finish} seconds")
else
finish = migrate_vm_and_record_timing(vm_name, pool_name, parent_host_name, host_name, provider)
$logger.log('s', "[>] [#{pool_name}] '#{vm_name}' migrated from #{parent_host_name} to #{host_name} in #{finish} seconds")
end
remove_vmpooler_migration_vm(pool_name, vm_name) remove_vmpooler_migration_vm(pool_name, vm_name)
end end
return
else
$logger.log('s', "[ ] [#{pool_name}] '#{vm_name}' is running on #{vm['host']}")
end
end end
def remove_vmpooler_migration_vm(pool, vm) def remove_vmpooler_migration_vm(pool, vm)