mirror of
https://github.com/puppetlabs/vmpooler.git
synced 2026-01-26 10:08:40 -05:00
(QENG-5305) Check cluster utilization once at a time
This commit adds a global provider_hosts concept in order to allow checking cluster utilization once per interval for a given cluster and retain the results, reusing them for an interval, and tracking the least used set of hosts. Without this change each migration and clone operation inspect host utilization and state for each host in the cluster, which is computationally expensive for vsphere.
This commit is contained in:
parent
62643b237f
commit
ada79e81f4
1 changed files with 95 additions and 18 deletions
|
|
@ -21,6 +21,9 @@ module Vmpooler
|
||||||
|
|
||||||
# Our thread-tracker object
|
# Our thread-tracker object
|
||||||
$threads = {}
|
$threads = {}
|
||||||
|
|
||||||
|
# Host tracking object
|
||||||
|
$provider_hosts = {}
|
||||||
end
|
end
|
||||||
|
|
||||||
def config
|
def config
|
||||||
|
|
@ -459,6 +462,77 @@ module Vmpooler
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def get_provider_name(pool_name, config = $config)
|
||||||
|
pool = config[:pools].select { |p| p['name'] == pool_name }[0]
|
||||||
|
provider_name = pool['provider'] if pool.key?('provider')
|
||||||
|
provider_name = config[:providers].first[0].to_s if provider_name.nil? and config.key?(:providers)
|
||||||
|
provider_name = 'default' if provider_name.nil?
|
||||||
|
provider_name
|
||||||
|
end
|
||||||
|
|
||||||
|
def get_cluster(pool_name)
|
||||||
|
default_cluster = $config[:config]['clone_target'] if $config[:config].key?('clone_target')
|
||||||
|
default_datacenter = $config[:config]['datacenter'] if $config[:config].key?('datacenter')
|
||||||
|
pool = $config[:pools].select { |p| p['name'] == pool_name }[0]
|
||||||
|
cluster = pool['clone_target'] if pool.key?('clone_target')
|
||||||
|
cluster = default_cluster if cluster.nil?
|
||||||
|
datacenter = pool['datacenter'] if pool.key?('datacenter')
|
||||||
|
datacenter = default_datacenter if datacenter.nil?
|
||||||
|
return if cluster.nil?
|
||||||
|
return if datacenter.nil?
|
||||||
|
{ 'cluster' => cluster, 'datacenter' => datacenter }
|
||||||
|
end
|
||||||
|
|
||||||
|
def select_hosts(pool_name, provider, provider_name, cluster, datacenter, percentage)
|
||||||
|
$provider_hosts[provider_name] = {} unless $provider_hosts.key?(provider_name)
|
||||||
|
$provider_hosts[provider_name][datacenter] = {} unless $provider_hosts[provider_name].key?(datacenter)
|
||||||
|
$provider_hosts[provider_name][datacenter][cluster] = {} unless $provider_hosts[provider_name][datacenter].key?(cluster)
|
||||||
|
$provider_hosts[provider_name][datacenter][cluster]['checking'] = true
|
||||||
|
hosts_hash = provider.select_target_hosts(cluster, datacenter, percentage)
|
||||||
|
$provider_hosts[provider_name][datacenter][cluster] = hosts_hash
|
||||||
|
$provider_hosts[provider_name][datacenter][cluster]['check_time_finished'] = Time.now
|
||||||
|
end
|
||||||
|
|
||||||
|
def run_select_hosts(provider, pool_name, provider_name, cluster, datacenter, max_age, percentage)
|
||||||
|
now = Time.now
|
||||||
|
if $provider_hosts.key?(provider_name) and $provider_hosts[provider_name].key?(datacenter) and $provider_hosts[provider_name][datacenter].key?(cluster) and $provider_hosts[provider_name][datacenter][cluster].key?('checking')
|
||||||
|
wait_for_host_selection(pool_name, provider_name, cluster, datacenter)
|
||||||
|
elsif $provider_hosts.key?(provider_name) and $provider_hosts[provider_name].key?(datacenter) and $provider_hosts[provider_name][datacenter].key?(cluster) and $provider_hosts[provider_name][datacenter][cluster].key?('check_time_finished')
|
||||||
|
select_hosts(pool_name, provider, provider_name, cluster, datacenter, percentage) if now - $provider_hosts[provider_name][datacenter][cluster]['check_time_finished'] > max_age
|
||||||
|
else
|
||||||
|
select_hosts(pool_name, provider, provider_name, cluster, datacenter, percentage)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def wait_for_host_selection(pool_name, provider_name, cluster, datacenter, maxloop = 0, loop_delay = 5, max_age = 60)
|
||||||
|
loop_count = 1
|
||||||
|
until $provider_hosts[provider_name][datacenter][cluster].key?('check_time_finished')
|
||||||
|
sleep(loop_delay)
|
||||||
|
unless maxloop.zero?
|
||||||
|
break if loop_count >= maxloop
|
||||||
|
loop_count += 1
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return unless $provider_hosts[provider_name][datacenter][cluster].key?('check_time_finished')
|
||||||
|
loop_count = 1
|
||||||
|
while Time.now - $provider_hosts[provider_name][datacenter][cluster]['check_time_finished'] > max_age
|
||||||
|
sleep(loop_delay)
|
||||||
|
unless maxloop.zero?
|
||||||
|
break if loop_count >= maxloop
|
||||||
|
loop_count += 1
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def select_next_host(provider_name, datacenter, cluster, architecture)
|
||||||
|
provider_hosts = $provider_hosts
|
||||||
|
host = provider_hosts[provider_name][datacenter][cluster]['architectures'][architecture][0]
|
||||||
|
return if host.nil?
|
||||||
|
provider_hosts[provider_name][datacenter][cluster]['architectures'][architecture].delete(host)
|
||||||
|
provider_hosts[provider_name][datacenter][cluster]['architectures'][architecture] << host
|
||||||
|
host
|
||||||
|
end
|
||||||
|
|
||||||
def migration_limit(migration_limit)
|
def migration_limit(migration_limit)
|
||||||
# Returns migration_limit setting when enabled
|
# Returns migration_limit setting when enabled
|
||||||
return false if migration_limit == 0 || !migration_limit # rubocop:disable Style/NumericPredicate
|
return false if migration_limit == 0 || !migration_limit # rubocop:disable Style/NumericPredicate
|
||||||
|
|
@ -468,7 +542,7 @@ module Vmpooler
|
||||||
def migrate_vm(vm_name, pool_name, provider)
|
def migrate_vm(vm_name, pool_name, provider)
|
||||||
Thread.new do
|
Thread.new do
|
||||||
begin
|
begin
|
||||||
_migrate_vm(vm_name, pool_name, provider)
|
_migrate_vm(vm_name, pool_name, provider, $provider_hosts)
|
||||||
rescue => err
|
rescue => err
|
||||||
$logger.log('s', "[x] [#{pool_name}] '#{vm_name}' migration failed with an error: #{err}")
|
$logger.log('s', "[x] [#{pool_name}] '#{vm_name}' migration failed with an error: #{err}")
|
||||||
remove_vmpooler_migration_vm(pool_name, vm_name)
|
remove_vmpooler_migration_vm(pool_name, vm_name)
|
||||||
|
|
@ -477,29 +551,32 @@ module Vmpooler
|
||||||
end
|
end
|
||||||
|
|
||||||
def _migrate_vm(vm_name, pool_name, provider)
|
def _migrate_vm(vm_name, pool_name, provider)
|
||||||
$redis.srem('vmpooler__migrating__' + pool_name, vm_name)
|
$redis.srem("vmpooler__migrating__#{pool_name}", vm_name)
|
||||||
|
|
||||||
parent_host_name = provider.get_vm_host(pool_name, vm_name)
|
provider_name = get_provider_name(pool_name)
|
||||||
raise('Unable to determine which host the VM is running on') if parent_host_name.nil?
|
vm = provider.get_vm_details(pool_name, vm_name)
|
||||||
|
raise('Unable to determine which host the VM is running on') if vm['host'].nil?
|
||||||
migration_limit = migration_limit $config[:config]['migration_limit']
|
migration_limit = migration_limit $config[:config]['migration_limit']
|
||||||
migration_count = $redis.scard('vmpooler__migration')
|
migration_count = $redis.scard('vmpooler__migration')
|
||||||
|
|
||||||
if !migration_limit
|
if migration_limit
|
||||||
$logger.log('s', "[ ] [#{pool_name}] '#{vm_name}' is running on #{parent_host_name}")
|
max_age = 60
|
||||||
return
|
percentage_of_hosts_below_average = 100
|
||||||
elsif migration_count >= migration_limit
|
run_select_hosts(provider, pool_name, provider_name, vm['cluster'], vm['datacenter'], max_age, percentage_of_hosts_below_average)
|
||||||
$logger.log('s', "[ ] [#{pool_name}] '#{vm_name}' is running on #{parent_host_name}. No migration will be evaluated since the migration_limit has been reached")
|
if migration_count >= migration_limit
|
||||||
return
|
$logger.log('s', "[ ] [#{pool_name}] '#{vm_name}' is running on #{vm['host']}. No migration will be evaluated since the migration_limit has been reached")
|
||||||
else
|
elsif $provider_hosts[provider_name][vm['datacenter']][vm['cluster']]['architectures'][vm['architecture']].include?(vm['host'])
|
||||||
$redis.sadd('vmpooler__migration', vm_name)
|
$logger.log('s', "[ ] [#{pool_name}] No migration required for '#{vm_name}' running on #{vm['host']}")
|
||||||
host_name = provider.find_least_used_compatible_host(pool_name, vm_name)
|
|
||||||
if host_name == parent_host_name
|
|
||||||
$logger.log('s', "[ ] [#{pool_name}] No migration required for '#{vm_name}' running on #{parent_host_name}")
|
|
||||||
else
|
else
|
||||||
finish = migrate_vm_and_record_timing(vm_name, pool_name, parent_host_name, host_name, provider)
|
$redis.sadd('vmpooler__migration', vm_name)
|
||||||
$logger.log('s', "[>] [#{pool_name}] '#{vm_name}' migrated from #{parent_host_name} to #{host_name} in #{finish} seconds")
|
target_host_name = select_next_host(provider_name, vm['datacenter'], vm['cluster'], vm['architecture'])
|
||||||
|
finish = migrate_vm_and_record_timing(vm_name, pool_name, vm['host'], target_host_name, provider)
|
||||||
|
$logger.log('s', "[>] [#{pool_name}] '#{vm_name}' migrated from #{vm['host']} to #{target_host_name} in #{finish} seconds")
|
||||||
|
remove_vmpooler_migration_vm(pool_name, vm_name)
|
||||||
end
|
end
|
||||||
remove_vmpooler_migration_vm(pool_name, vm_name)
|
return
|
||||||
|
else
|
||||||
|
$logger.log('s', "[ ] [#{pool_name}] '#{vm_name}' is running on #{vm['host']}")
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue