refactor the connection and add debug logs

This commit is contained in:
Samuel Beaulieu 2021-12-09 17:52:19 -06:00
parent 04cc86689d
commit 662f965c0f
No known key found for this signature in database
GPG key ID: 12030F74136D0F34
2 changed files with 282 additions and 264 deletions

View file

@ -49,6 +49,12 @@ module Vmpooler
'gce'
end
def connection
@connection_pool.with_metrics do |pool_object|
return ensured_gce_connection(pool_object)
end
end
# main configuration options
def project
provider_config['project']
@ -81,12 +87,11 @@ module Vmpooler
# [Hashtable]
# [String] name : the name of the VM instance (unique for whole project)
def vms_in_pool(pool_name)
debug_logger("vms_in_pool")
vms = []
pool = pool_config(pool_name)
raise("Pool #{pool_name} does not exist for the provider #{name}") if pool.nil?
zone = zone(pool_name)
@connection_pool.with_metrics do |pool_object|
connection = ensured_gce_connection(pool_object)
filter = "(labels.pool = #{pool_name})"
instance_list = connection.list_instances(project, zone, filter: filter)
@ -95,7 +100,7 @@ module Vmpooler
instance_list.items.each do |vm|
vms << { 'name' => vm.name }
end
end
debug_logger(vms)
vms
end
@ -116,9 +121,8 @@ module Vmpooler
# [String] zone : URL of the zone where the instance resides.
# [String] machine_type : Full or partial URL of the machine type resource to use for this instance, in the format: zones/zone/machineTypes/machine-type.
def get_vm(pool_name, vm_name)
debug_logger("get_vm")
vm_hash = nil
@connection_pool.with_metrics do |pool_object|
connection = ensured_gce_connection(pool_object)
begin
vm_object = connection.get_instance(project, zone(pool_name), vm_name)
rescue ::Google::Apis::ClientError => e
@ -130,7 +134,7 @@ module Vmpooler
return vm_hash if vm_object.nil?
vm_hash = generate_vm_hash(vm_object, pool_name)
end
debug_logger("vm_hash #{vm_hash}")
vm_hash
end
@ -146,9 +150,9 @@ module Vmpooler
# returns
# [Hashtable] of the VM as per get_vm(pool_name, vm_name)
def create_vm(pool_name, new_vmname)
debug_logger("create_vm")
pool = pool_config(pool_name)
raise("Pool #{pool_name} does not exist for the provider #{name}") if pool.nil?
vm_hash = nil
# harcoded network info
network_interfaces = Google::Apis::ComputeV1::NetworkInterface.new(
@ -164,8 +168,7 @@ module Vmpooler
:boot => true,
:initialize_params => Google::Apis::ComputeV1::AttachedDiskInitializeParams.new(initParams)
)
@connection_pool.with_metrics do |pool_object|
connection = ensured_gce_connection(pool_object)
# Assume all pool config is valid i.e. not missing
client = ::Google::Apis::ComputeV1::Instance.new(
:name => new_vmname,
@ -174,10 +177,10 @@ module Vmpooler
:network_interfaces => [network_interfaces],
:labels => {'vm' => new_vmname, 'pool' => pool_name}
)
debug_logger("trigger insert_instance")
result = connection.insert_instance(project, zone(pool_name), client)
result = wait_for_operation(project, pool_name, result, connection)
result = wait_for_operation(project, pool_name, result)
vm_hash = get_vm(pool_name, new_vmname)
end
vm_hash
end
@ -197,11 +200,10 @@ module Vmpooler
# returns
# [boolean] true : once the operations are finished
def create_disk(pool_name, vm_name, disk_size)
debug_logger("create_disk")
pool = pool_config(pool_name)
raise("Pool #{pool_name} does not exist for the provider #{name}") if pool.nil?
@connection_pool.with_metrics do |pool_object|
connection = ensured_gce_connection(pool_object)
begin
vm_object = connection.get_instance(project, zone(pool_name), vm_name)
rescue ::Google::Apis::ClientError => e
@ -219,8 +221,10 @@ module Vmpooler
:size_gb => disk_size,
:labels => {"pool" => pool_name, "vm" => vm_name}
)
debug_logger("trigger insert_disk #{disk_name} for #{vm_name}")
result = connection.insert_disk(project, zone(pool_name), disk)
wait_for_operation(project, pool_name, result, connection)
wait_for_operation(project, pool_name, result)
debug_logger("trigger get_disk #{disk_name} for #{vm_name}")
new_disk = connection.get_disk(project, zone(pool_name), disk_name)
attached_disk = Google::Apis::ComputeV1::AttachedDisk.new(
@ -228,9 +232,9 @@ module Vmpooler
:boot => false,
:source => new_disk.self_link
)
debug_logger("trigger attach_disk #{disk_name} for #{vm_name}")
result = connection.attach_disk(project, zone(pool_name), vm_object.name, attached_disk)
wait_for_operation(project, pool_name, result, connection)
end
wait_for_operation(project, pool_name, result)
true
end
@ -250,8 +254,7 @@ module Vmpooler
# RuntimeError if the vm_name cannot be found
# RuntimeError if the snapshot_name already exists for this VM
def create_snapshot(pool_name, vm_name, new_snapshot_name)
@connection_pool.with_metrics do |pool_object|
connection = ensured_gce_connection(pool_object)
debug_logger("create_snapshot")
begin
vm_object = connection.get_instance(project, zone(pool_name), vm_name)
rescue ::Google::Apis::ClientError => e
@ -260,7 +263,7 @@ module Vmpooler
raise("VM #{vm_name} in pool #{pool_name} does not exist for the provider #{name}")
end
old_snap = find_snapshot(vm_name, new_snapshot_name, connection)
old_snap = find_snapshot(vm_name, new_snapshot_name)
raise("Snapshot #{new_snapshot_name} for VM #{vm_name} in pool #{pool_name} already exists for the provider #{name}") unless old_snap.nil?
result_list = []
@ -276,14 +279,14 @@ module Vmpooler
"boot" => attached_disk.boot.to_s
}
)
debug_logger("trigger async create_disk_snapshot #{vm_name}: #{new_snapshot_name}-#{disk_name}")
result = connection.create_disk_snapshot(project, zone(pool_name), disk_name, snapshot_obj)
# do them all async, keep a list, check later
result_list << result
end
#now check they are done
result_list.each do |result|
wait_for_operation(project, pool_name, result, connection)
end
wait_for_operation(project, pool_name, result)
end
true
end
@ -307,8 +310,7 @@ module Vmpooler
# RuntimeError if the vm_name cannot be found
# RuntimeError if the snapshot_name already exists for this VM
def revert_snapshot(pool_name, vm_name, snapshot_name)
@connection_pool.with_metrics do |pool_object|
connection = ensured_gce_connection(pool_object)
debug_logger("revert_snapshot")
begin
vm_object = connection.get_instance(project, zone(pool_name), vm_name)
rescue ::Google::Apis::ClientError => e
@ -317,21 +319,24 @@ module Vmpooler
raise("VM #{vm_name} in pool #{pool_name} does not exist for the provider #{name}")
end
snapshot_object = find_snapshot(vm_name, snapshot_name, connection)
snapshot_object = find_snapshot(vm_name, snapshot_name)
raise("Snapshot #{snapshot_name} for VM #{vm_name} in pool #{pool_name} does not exist for the provider #{name}") if snapshot_object.nil?
# Shutdown instance
debug_logger("trigger stop_instance #{vm_name}")
result = connection.stop_instance(project, zone(pool_name), vm_name)
wait_for_operation(project, pool_name, result, connection)
wait_for_operation(project, pool_name, result)
# Delete existing disks
if vm_object.disks
vm_object.disks.each do |attached_disk|
debug_logger("trigger detach_disk #{vm_name}: #{attached_disk.device_name}")
result = connection.detach_disk(project, zone(pool_name), vm_name, attached_disk.device_name)
wait_for_operation(project, pool_name, result, connection)
wait_for_operation(project, pool_name, result)
current_disk_name = disk_name_from_source(attached_disk)
debug_logger("trigger delete_disk #{vm_name}: #{current_disk_name}")
result = connection.delete_disk(project, zone(pool_name), current_disk_name)
wait_for_operation(project, pool_name, result, connection)
wait_for_operation(project, pool_name, result)
end
end
@ -345,8 +350,9 @@ module Vmpooler
:source_snapshot => snapshot.self_link
)
# create disk in GCE as a separate resource
debug_logger("trigger insert_disk #{vm_name}: #{current_disk_name} based on #{snapshot.self_link}")
result = connection.insert_disk(project, zone(pool_name), disk)
wait_for_operation(project, pool_name, result, connection)
wait_for_operation(project, pool_name, result)
# read the new disk info
new_disk_info = connection.get_disk(project, zone(pool_name), current_disk_name)
new_attached_disk = Google::Apis::ComputeV1::AttachedDisk.new(
@ -355,13 +361,14 @@ module Vmpooler
:source => new_disk_info.self_link
)
# attach the new disk to existing instance
debug_logger("trigger attach_disk #{vm_name}: #{current_disk_name}")
result = connection.attach_disk(project, zone(pool_name), vm_name, new_attached_disk)
wait_for_operation(project, pool_name, result, connection)
wait_for_operation(project, pool_name, result)
end
debug_logger("trigger start_instance #{vm_name}")
result = connection.start_instance(project, zone(pool_name), vm_name)
wait_for_operation(project, pool_name, result, connection)
end
wait_for_operation(project, pool_name, result)
true
end
@ -373,8 +380,7 @@ module Vmpooler
# returns
# [boolean] true : once the operations are finished
def destroy_vm(pool_name, vm_name)
@connection_pool.with_metrics do |pool_object|
connection = ensured_gce_connection(pool_object)
debug_logger("destroy_vm")
deleted = false
begin
vm_object = connection.get_instance(project, zone(pool_name), vm_name)
@ -382,11 +388,13 @@ module Vmpooler
raise e unless e.status_code == 404
# If a VM doesn't exist then it is effectively deleted
deleted = true
debug_logger("instance #{vm_name} already deleted")
end
if(!deleted)
debug_logger("trigger delete_instance #{vm_name}")
result = connection.delete_instance(project, zone(pool_name), vm_name)
wait_for_operation(project, pool_name, result, connection, 10)
wait_for_operation(project, pool_name, result, 10)
end
# list and delete any leftover disk, for instance if they were detached from the instance
@ -395,6 +403,7 @@ module Vmpooler
result_list = []
unless disk_list.items.nil?
disk_list.items.each do |disk|
debug_logger("trigger delete_disk #{disk.name}")
result = connection.delete_disk(project, zone(pool_name), disk.name)
# do them all async, keep a list, check later
result_list << result
@ -402,15 +411,16 @@ module Vmpooler
end
#now check they are done
result_list.each do |result|
wait_for_operation(project, pool_name, result, connection)
wait_for_operation(project, pool_name, result)
end
# list and delete leftover snapshots, this could happen if snapshots were taken,
# as they are not removed when the original disk is deleted or the instance is detroyed
snapshot_list = find_all_snapshots(vm_name, connection)
snapshot_list = find_all_snapshots(vm_name)
result_list = []
unless snapshot_list.nil?
snapshot_list.each do |snapshot|
debug_logger("trigger delete_snapshot #{snapshot.name}")
result = connection.delete_snapshot(project, snapshot.name)
# do them all async, keep a list, check later
result_list << result
@ -418,8 +428,7 @@ module Vmpooler
end
#now check they are done
result_list.each do |result|
wait_for_operation(project, pool_name, result, connection)
end
wait_for_operation(project, pool_name, result)
end
true
end
@ -437,8 +446,7 @@ module Vmpooler
# Scans zones that are configured for list of resources (VM, disks, snapshots) that do not have the label.pool set
# to one of the configured pools. If it is also not in the allowlist, the resource is destroyed
def purge_unconfigured_resources(allowlist)
@connection_pool.with_metrics do |pool_object|
connection = ensured_gce_connection(pool_object)
debug_logger("purge_unconfigured_resources")
pools_array = provided_pools
filter = {}
# we have to group things by zone, because the API search feature is done against a zone and not global
@ -458,13 +466,14 @@ module Vmpooler
unless instance_list.items.nil?
instance_list.items.each do |vm|
next if should_be_ignored(vm, allowlist)
debug_logger("trigger async delete_instance #{vm.name}")
result = connection.delete_instance(project, zone, vm.name)
result_list << result
end
end
#now check they are done
result_list.each do |result|
wait_for_zone_operation(project, zone, result, connection)
wait_for_zone_operation(project, zone, result)
end
#Disks
@ -472,6 +481,7 @@ module Vmpooler
unless disks_list.items.nil?
disks_list.items.each do |disk|
next if should_be_ignored(disk, allowlist)
debug_logger("trigger async no wait delete_disk #{disk.name}")
result = connection.delete_disk(project, zone, disk.name)
end
end
@ -481,12 +491,12 @@ module Vmpooler
unless snapshot_list.items.nil?
snapshot_list.items.each do |sn|
next if should_be_ignored(sn, allowlist)
debug_logger("trigger async no wait delete_snapshot #{sn.name}")
result = connection.delete_snapshot(project, sn.name)
end
end
end
end
end
# tag_vm_user This method is called once we know who is using the VM (it is running). This method enables seeing
# who is using what in the provider pools.
@ -497,7 +507,6 @@ module Vmpooler
# returns
# [Boolean] : true if successful, false if an error occurred and it should retry
def tag_vm_user(pool, vm)
@redis.with_metrics do |redis|
user = get_current_user(vm)
vm_hash = get_vm(pool, vm)
return false if vm_hash.nil?
@ -509,17 +518,17 @@ module Vmpooler
begin
instances_set_labels_request_object = Google::Apis::ComputeV1::InstancesSetLabelsRequest.new(label_fingerprint:vm_hash['label_fingerprint'], labels: new_labels)
result = connection.set_instance_labels(project, zone(pool), vm, instances_set_labels_request_object)
wait_for_zone_operation(project, zone(pool), result, connection)
wait_for_zone_operation(project, zone(pool), result)
rescue StandardError => _e
return false
end
return true
end
end
# END BASE METHODS
def should_be_ignored(item, allowlist)
return false if allowlist.nil?
allowlist.map!(&:downcase) # remove uppercase from configured values because its not valid as resource label
array_flattened_labels = []
unless item.labels.nil?
@ -546,9 +555,10 @@ module Vmpooler
end
# Compute resource wait for operation to be DONE (synchronous operation)
def wait_for_zone_operation(project, zone, result, connection, retries=5)
def wait_for_zone_operation(project, zone, result, retries=5)
while result.status != 'DONE'
result = connection.wait_zone_operation(project, zone, result.name)
debug_logger(" -> wait_for_zone_operation status #{result.status} (#{result.name})")
end
if result.error # unsure what kind of error can be stored here
error_message = ""
@ -573,8 +583,8 @@ module Vmpooler
puts "waited on #{result.name} but was not found, so skipping"
end
def wait_for_operation(project, pool_name, result, connection, retries=5)
wait_for_zone_operation(project, zone(pool_name), result, connection, retries)
def wait_for_operation(project, pool_name, result, retries=5)
wait_for_zone_operation(project, zone(pool_name), result, retries)
end
# Return a hash of VM data
@ -643,7 +653,7 @@ module Vmpooler
# this is used because for one vm, with the same snapshot name there could be multiple snapshots,
# one for each disk
def find_snapshot(vm, snapshotname, connection)
def find_snapshot(vm, snapshotname)
filter = "(labels.vm = #{vm}) AND (labels.snapshot_name = #{snapshotname})"
snapshot_list = connection.list_snapshots(project,filter: filter)
return snapshot_list.items #array of snapshot objects
@ -651,7 +661,7 @@ module Vmpooler
# find all snapshots ever created for one vm,
# regardless of snapshot name, for example when deleting it all
def find_all_snapshots(vm, connection)
def find_all_snapshots(vm)
filter = "(labels.vm = #{vm})"
snapshot_list = connection.list_snapshots(project,filter: filter)
return snapshot_list.items #array of snapshot objects
@ -660,6 +670,14 @@ module Vmpooler
def disk_name_from_source(attached_disk)
attached_disk.source.split('/')[-1] # disk name is after the last / of the full source URL
end
# used in local dev environment, set DEBUG_FLAG=true
# this way the upstream vmpooler manager does not get polluted with logs
def debug_logger(message, send_to_upstream=false)
#the default logger is simple and does not enforce debug levels (the first argument)
puts message if ENV['DEBUG_FLAG']
logger.log("[g]", message) if send_to_upstream
end
end
end
end

View file

@ -9,7 +9,7 @@ end
describe 'Vmpooler::PoolManager::Provider::Gce' do
let(:logger) { MockLogger.new }
let(:metrics) { Vmpooler::Metrics::DummyStatsd.new }
let(:poolname) { 'pool1' }
let(:poolname) { 'debian-9' }
let(:provider_options) { { 'param' => 'value' } }
let(:project) { 'dio-samuel-dev' }
let(:zone){ 'us-west1-b' }
@ -38,7 +38,7 @@ EOT
)
}
let(:vmname) { 'vm15' }
let(:vmname) { 'vm16' }
let(:connection) { MockComputeServiceConnection.new }
let(:redis_connection_pool) { Vmpooler::PoolManager::GenericConnectionPool.new(
metrics: metrics,
@ -62,7 +62,8 @@ EOT
puts "creating"
result = subject.create_vm(poolname, vmname)
subject.get_vm(poolname, vmname)
=begin
subject.vms_in_pool(poolname)
puts "create snapshot w/ one disk"
result = subject.create_snapshot(poolname, vmname, "sams")
puts "create disk"
@ -71,8 +72,7 @@ EOT
result = subject.create_snapshot(poolname, vmname, "sams2")
puts "revert snapshot"
result = subject.revert_snapshot(poolname, vmname, "sams")
=end
#result = subject.destroy_vm(poolname, vmname)
result = subject.destroy_vm(poolname, vmname)
end
skip 'runs existing' do