# frozen_string_literal: true require 'bigdecimal' require 'bigdecimal/util' require 'vmpooler/providers/base' require 'aws-sdk-ec2' module Vmpooler class PoolManager class Provider # This class represent a GCE provider to CRUD resources in a gce cloud. class Aws < Vmpooler::PoolManager::Provider::Base # The connection_pool method is normally used only for testing attr_reader :connection_pool def initialize(config, logger, metrics, redis_connection_pool, name, options) super(config, logger, metrics, redis_connection_pool, name, options) @aws_access_key = ENV['ABS_AWS_ACCESS_KEY'] @aws_secret_key = ENV['ABS_AWS_SECRET_KEY'] task_limit = global_config[:config].nil? || global_config[:config]['task_limit'].nil? ? 10 : global_config[:config]['task_limit'].to_i # The default connection pool size is: # Whatever is biggest from: # - How many pools this provider services # - Maximum number of cloning tasks allowed # - Need at least 2 connections so that a pool can have inventory functions performed while cloning etc. default_connpool_size = [provided_pools.count, task_limit, 2].max connpool_size = provider_config['connection_pool_size'].nil? ? default_connpool_size : provider_config['connection_pool_size'].to_i # The default connection pool timeout should be quite large - 60 seconds connpool_timeout = provider_config['connection_pool_timeout'].nil? ? 60 : provider_config['connection_pool_timeout'].to_i logger.log('d', "[#{name}] ConnPool - Creating a connection pool of size #{connpool_size} with timeout #{connpool_timeout}") @connection_pool = Vmpooler::PoolManager::GenericConnectionPool.new( metrics: metrics, connpool_type: 'provider_connection_pool', connpool_provider: name, size: connpool_size, timeout: connpool_timeout ) do logger.log('d', "[#{name}] Connection Pool - Creating a connection object") # Need to wrap the vSphere connection object in another object. The generic connection pooler will preserve # the object reference for the connection, which means it cannot "reconnect" by creating an entirely new connection # object. Instead by wrapping it in a Hash, the Hash object reference itself never changes but the content of the # Hash can change, and is preserved across invocations. new_conn = #connect to aws { connection: new_conn } end @redis = redis_connection_pool end # name of the provider class def name 'aws' end def connection @connection_pool.with_metrics do |pool_object| return ensured_aws_connection(pool_object) end end def dns @dns end # main configuration options def region return provider_config['region'] if provider_config['region'] end # main configuration options, overridable for each pool def zone(pool_name) return pool_config(pool_name)['zone'] if pool_config(pool_name)['zone'] return provider_config['zone'] if provider_config['zone'] end def amisize(pool_name) return pool_config(pool_name)['amisize'] if pool_config(pool_name)['amisize'] return provider_config['amisize'] if provider_config['amisize'] end def volume_size(pool_name) return pool_config(pool_name)['volume_size'] if pool_config(pool_name)['volume_size'] return provider_config['volume_size'] if provider_config['volume_size'] end #dns def domain provider_config['domain'] end def dns_zone_resource_name provider_config['dns_zone_resource_name'] end #subnets def get_subnet_id(pool_name) case zone(pool_name) when 'us-west-2b' return 'subnet-0fe90a688844f6f26' when 'us-west-2a' return 'subnet-091b436f' end end # Base methods that are implemented: # vms_in_pool lists all the VM names in a pool, which is based on the VMs # having a tag "pool" that match a pool config name. # inputs # [String] pool_name : Name of the pool # returns # empty array [] if no VMs found in the pool # [Array] # [Hashtable] # [String] name : the name of the VM instance (unique for whole project) def vms_in_pool(pool_name) debug_logger('vms_in_pool') vms = [] pool = pool_config(pool_name) raise("Pool #{pool_name} does not exist for the provider #{name}") if pool.nil? zone = zone(pool_name) filters = [{ name: "tag:pool", values: [pool_name], }] instance_list = connection.list_instances(project, zone, filter: filter) return vms if instance_list.size.nil? || instance_list.size == 0 instance_list.items.each do |vm| vms << { 'name' => vm.tags.detect {|f| f.key == 'vm_name' }&.value || "vm_name not found in tags" } end debug_logger(vms) vms end # inputs # [String] pool_name : Name of the pool # [String] vm_name : Name of the VM to find # returns # nil if VM doesn't exist name, template, poolname, boottime, status, image_size, private_ip_address # [Hastable] of the VM # [String] name : The name of the resource, provided by the client when initially creating the resource # [String] template : This is the name of template # [String] poolname : Name of the pool the VM # [Time] boottime : Time when the VM was created/booted # [String] status : One of the following values: pending, running, shutting-down, terminated, stopping, stopped # [String] image_size : The EC2 image size eg a1.large # [String] private_ip_address: The private IPv4 address def get_vm(pool_name, vm_name) debug_logger('get_vm') vm_hash = nil filters = [{ name: "tag:vm_name", values: [vm_name], }] instances = connection.instances(filters: filters).first return vm_hash if instances.nil? vm_hash = generate_vm_hash(instances, pool_name) debug_logger("vm_hash #{vm_hash}") vm_hash end # create_vm creates a new VM with a default network from the config, # a initial disk named #{new_vmname}-disk0 that uses the 'template' as its source image # and labels added for vm and pool # and an instance configuration for machine_type from the config and # labels vm and pool # having a label "pool" that match a pool config name. # inputs # [String] pool : Name of the pool # [String] new_vmname : Name to give the new VM # returns # [Hashtable] of the VM as per get_vm(pool_name, vm_name) def create_vm(pool_name, new_vmname) debug_logger('create_vm') pool = pool_config(pool_name) raise("Pool #{pool_name} does not exist for the provider #{name}") if pool.nil? raise("Instance creation not attempted, #{new_vmname} already exists") if get_vm(pool_name, new_vmname) subnet_id = get_subnet_id(pool_name) tag = [ { resource_type: "instance", # accepts capacity-reservation, client-vpn-endpoint, customer-gateway, carrier-gateway, dedicated-host, dhcp-options, egress-only-internet-gateway, elastic-ip, elastic-gpu, export-image-task, export-instance-task, fleet, fpga-image, host-reservation, image, import-image-task, import-snapshot-task, instance, instance-event-window, internet-gateway, ipam, ipam-pool, ipam-scope, ipv4pool-ec2, ipv6pool-ec2, key-pair, launch-template, local-gateway, local-gateway-route-table, local-gateway-virtual-interface, local-gateway-virtual-interface-group, local-gateway-route-table-vpc-association, local-gateway-route-table-virtual-interface-group-association, natgateway, network-acl, network-interface, network-insights-analysis, network-insights-path, network-insights-access-scope, network-insights-access-scope-analysis, placement-group, prefix-list, replace-root-volume-task, reserved-instances, route-table, security-group, security-group-rule, snapshot, spot-fleet-request, spot-instances-request, subnet, subnet-cidr-reservation, traffic-mirror-filter, traffic-mirror-session, traffic-mirror-target, transit-gateway, transit-gateway-attachment, transit-gateway-connect-peer, transit-gateway-multicast-domain, transit-gateway-route-table, volume, vpc, vpc-endpoint, vpc-endpoint-service, vpc-peering-connection, vpn-connection, vpn-gateway, vpc-flow-log tags: [ { key: "vm_name", value: new_vmname, }, { key: "pool", value: pool_name, }, { key: "lifetime", value: get_current_lifetime(new_vmname), }, { key: "created_by", value: get_current_user(new_vmname), }, { key: "job_url", value: get_current_job_url(new_vmname), }, { key: "organization", value: "engineering", }, { key: "portfolio", value: "ds-ci", }, ], }, ] config = { min_count: 1, max_count: 1, image_id: pool['template'], monitoring: {:enabled => true}, key_name: 'always-be-scheduling', security_group_ids: ['sg-697fb015'], instance_type: amisize(pool_name), disable_api_termination: false, instance_initiated_shutdown_behavior: 'terminate', tag_specifications: tag, subnet_id: subnet_id } if volume_size(pool_name) config[:block_device_mappings] = get_block_device_mappings(config['image_id'], volume_size(pool_name)) end debug_logger('trigger insert_instance') batch_instance = connection.create_instances(config) instance_id = batch_instance.first.instance_id connection.client.wait_until(:instance_running, {instance_ids: [instance_id]}) created_instance = get_vm(pool_name, new_vmname) created_instance end def get_block_device_mappings(image_id, volume_size) ec2_client = connection.client image = ec2_client.describe_images(:image_ids => [image_id]).images.first raise RuntimeError, "Image not found: #{image_id}" if image.nil? # Transform the images block_device_mappings output into a format # ready for a create. block_device_mappings = [] if image.root_device_type == "ebs" orig_bdm = image.block_device_mappings orig_bdm.each do |block_device| block_device_mappings << { :device_name => block_device.device_name, :ebs => { # Change the default size of the root volume. :volume_size => volume_size, # This is required to override the images default for # delete_on_termination, forcing all volumes to be deleted once the # instance is terminated. :delete_on_termination => true } } end else raise "#{image_id} does not have an ebs root device type" end block_device_mappings end # create_disk creates an additional disk for an existing VM. It will name the new # disk #{vm_name}-disk#{number_disk} where number_disk is the next logical disk number # starting with 1 when adding an additional disk to a VM with only the boot disk: # #{vm_name}-disk0 == boot disk # #{vm_name}-disk1 == additional disk added via create_disk # #{vm_name}-disk2 == additional disk added via create_disk if run a second time etc # the new disk has labels added for vm and pool # The AWS lifecycle is to create a new disk (lives independently of the instance) then to attach # it to the existing instance. # inputs # [String] pool_name : Name of the pool # [String] vm_name : Name of the existing VM # [String] disk_size : The new disk size in GB # returns # [boolean] true : once the operations are finished def create_disk(pool_name, vm_name, disk_size) debug_logger('create_disk') pool = pool_config(pool_name) raise("Pool #{pool_name} does not exist for the provider #{name}") if pool.nil? filters = [{ name: "tag:vm_name", values: [vm_name], }] instances = connection.instances(filters: filters).first raise("VM #{vm_name} in pool #{pool_name} does not exist for the provider #{name}") if instances.nil? # this number should start at 1 when there is only the boot disk, # eg the new disk will be named spicy-proton-disk1 number_disk = instances.block_device_mappings.length disk_name = "#{vm_name}-disk#{number_disk}" disk = { availability_zone: zone(pool_name), size: disk_size, tag_specifications: [ { resource_type: "volume", tags: [ { key: "pool", value: pool_name, }, { key: "vm", value: vm_name, }, { key: "disk_name", value: disk_name, } ] } ], } debug_logger("trigger insert_disk #{disk_name} for #{vm_name}") volume = connection.create_volume(disk) # Aws::EC2::Errors::UnauthorizedOperation: # You are not authorized to perform this operation. connection.client.wait_until(:volume_available, {volume_ids: [volume.id]}) debug_logger("trigger attach_disk #{disk_name} for #{vm_name}") volume = instances.attach_volume( { device: "/dev/xvdb", volume_id: volume.id } ) connection.client.wait_until(:volume_in_use, {volume_ids: [volume.id]}) true end # create_snapshot creates new snapshots with the unique name {new_snapshot_name}-#{disk.name} # for one vm, and one create_snapshot() there could be multiple snapshots created, one for each drive. # since the snapshot resource needs a unique name in the gce project, # we create a unique name by concatenating {new_snapshot_name}-#{disk.name} # the disk name is based on vm_name which makes it unique. # The snapshot is added tags snapshot_name, vm, pool, diskname and boot # inputs # [String] pool_name : Name of the pool # [String] vm_name : Name of the existing VM # [String] new_snapshot_name : a unique name for this snapshot, which would be used to refer to it when reverting # returns # [boolean] true : once the operations are finished # raises # RuntimeError if the vm_name cannot be found # RuntimeError if the snapshot_name already exists for this VM def create_snapshot(pool_name, vm_name, new_snapshot_name) debug_logger('create_snapshot') filters = [{ name: "tag:vm_name", values: [vm_name], }] instances = connection.instances(filters: filters).first raise("VM #{vm_name} in pool #{pool_name} does not exist for the provider #{name}") if instances.nil? old_snap = find_snapshot(vm_name, new_snapshot_name) raise("Snapshot #{new_snapshot_name} for VM #{vm_name} in pool #{pool_name} already exists for the provider #{name}") unless old_snap.first.nil? result_list = [] instances.block_device_mappings.each do |attached_disk| volume_id = attached_disk.ebs.volume_id snapshot = connection.create_snapshot({ description: new_snapshot_name, volume_id: volume_id, tag_specifications: [ { resource_type: "snapshot", # accepts capacity-reservation, client-vpn-endpoint, customer-gateway, carrier-gateway, dedicated-host, dhcp-options, egress-only-internet-gateway, elastic-ip, elastic-gpu, export-image-task, export-instance-task, fleet, fpga-image, host-reservation, image, import-image-task, import-snapshot-task, instance, instance-event-window, internet-gateway, ipam, ipam-pool, ipam-scope, ipv4pool-ec2, ipv6pool-ec2, key-pair, launch-template, local-gateway, local-gateway-route-table, local-gateway-virtual-interface, local-gateway-virtual-interface-group, local-gateway-route-table-vpc-association, local-gateway-route-table-virtual-interface-group-association, natgateway, network-acl, network-interface, network-insights-analysis, network-insights-path, network-insights-access-scope, network-insights-access-scope-analysis, placement-group, prefix-list, replace-root-volume-task, reserved-instances, route-table, security-group, security-group-rule, snapshot, spot-fleet-request, spot-instances-request, subnet, subnet-cidr-reservation, traffic-mirror-filter, traffic-mirror-session, traffic-mirror-target, transit-gateway, transit-gateway-attachment, transit-gateway-connect-peer, transit-gateway-multicast-domain, transit-gateway-route-table, volume, vpc, vpc-endpoint, vpc-endpoint-service, vpc-peering-connection, vpn-connection, vpn-gateway, vpc-flow-log tags: [ { key: "vm_name", value: vm_name, }, { key: "pool_name", value: pool_name, }, { key: "new_snapshot_name", value: new_snapshot_name, }, ], }, ] }) # Aws::EC2::Errors::UnauthorizedOperation: # You are not authorized to perform this operation. end true end # revert_snapshot reverts an existing VM's disks to an existing snapshot_name # reverting in aws entails # 1. shutting down the VM, # 2. detaching and deleting the drives, # 3. creating new disks with the same name from the snapshot for each disk # 4. attach disks and start instance # for one vm, there might be multiple snapshots in time. We select the ones referred to by the # snapshot_name, but that may be multiple snapshots, one for each disks # The new disk is added tags vm and pool # inputs # [String] pool_name : Name of the pool # [String] vm_name : Name of the existing VM # [String] snapshot_name : Name of an existing snapshot # returns # [boolean] true : once the operations are finished # raises # RuntimeError if the vm_name cannot be found # RuntimeError if the snapshot_name already exists for this VM def revert_snapshot(pool_name, vm_name, snapshot_name) debug_logger('revert_snapshot') begin vm_object = connection.get_instance(project, zone(pool_name), vm_name) rescue ::Google::Apis::ClientError => e raise e unless e.status_code == 404 # if it does not exist raise("VM #{vm_name} in pool #{pool_name} does not exist for the provider #{name}") end snapshot_object = find_snapshot(vm_name, snapshot_name) raise("Snapshot #{snapshot_name} for VM #{vm_name} in pool #{pool_name} does not exist for the provider #{name}") if snapshot_object.first.nil? # Shutdown instance debug_logger("trigger stop_instance #{vm_name}") result = connection.stop_instance(project, zone(pool_name), vm_name) wait_for_operation(project, pool_name, result) # Delete existing disks vm_object.disks&.each do |attached_disk| debug_logger("trigger detach_disk #{vm_name}: #{attached_disk.device_name}") result = connection.detach_disk(project, zone(pool_name), vm_name, attached_disk.device_name) wait_for_operation(project, pool_name, result) current_disk_name = disk_name_from_source(attached_disk) debug_logger("trigger delete_disk #{vm_name}: #{current_disk_name}") result = connection.delete_disk(project, zone(pool_name), current_disk_name) wait_for_operation(project, pool_name, result) end # this block is sensitive to disruptions, for example if vmpooler is stopped while this is running snapshot_object.each do |snapshot| current_disk_name = snapshot.labels['diskname'] bootable = (snapshot.labels['boot'] == 'true') disk = Google::Apis::ComputeV1::Disk.new( name: current_disk_name, labels: { 'pool' => pool_name, 'vm' => vm_name }, source_snapshot: snapshot.self_link ) # create disk in GCE as a separate resource debug_logger("trigger insert_disk #{vm_name}: #{current_disk_name} based on #{snapshot.self_link}") result = connection.insert_disk(project, zone(pool_name), disk) wait_for_operation(project, pool_name, result) # read the new disk info new_disk_info = connection.get_disk(project, zone(pool_name), current_disk_name) new_attached_disk = Google::Apis::ComputeV1::AttachedDisk.new( auto_delete: true, boot: bootable, source: new_disk_info.self_link ) # attach the new disk to existing instance debug_logger("trigger attach_disk #{vm_name}: #{current_disk_name}") result = connection.attach_disk(project, zone(pool_name), vm_name, new_attached_disk) wait_for_operation(project, pool_name, result) end debug_logger("trigger start_instance #{vm_name}") result = connection.start_instance(project, zone(pool_name), vm_name) wait_for_operation(project, pool_name, result) true end # destroy_vm deletes an existing VM instance and any disks and snapshots via the labels # in gce instances, disks and snapshots are resources that can exist independent of each other # inputs # [String] pool_name : Name of the pool # [String] vm_name : Name of the existing VM # returns # [boolean] true : once the operations are finished def destroy_vm(pool_name, vm_name) debug_logger('destroy_vm') deleted = false filters = [{ name: "tag:vm_name", values: [vm_name], }] instances = connection.instances(filters: filters).first return true if instances.nil? debug_logger("trigger delete_instance #{vm_name}") # vm_hash = get_vm(pool_name, vm_name) instances.terminate begin connection.client.wait_until(:instance_terminated, {instance_ids: [instances.id]}) deleted = true rescue ::Aws::Waiters::Errors => error debug_logger("failed waiting for instance terminated #{vm_name}") end return deleted end # check if a vm is ready by opening a socket on port 22 # if a domain is set, it will use vn_name.domain, # if not then it will use the ip directly (AWS workaround) def vm_ready?(_pool_name, vm_name) begin # TODO: we could use a healthcheck resource attached to instance domain_set = domain || global_config[:config]['domain'] if domain_set.nil? vm_ip = get_vm(_pool_name, vm_name)['private_ip_address'] vm_name = vm_ip unless vm_ip.nil? end open_socket(vm_name, domain_set) rescue StandardError => _e return false end true end # Scans zones that are configured for list of resources (VM, disks, snapshots) that do not have the label.pool set # to one of the configured pools. If it is also not in the allowlist, the resource is destroyed def purge_unconfigured_resources(allowlist) debug_logger('purge_unconfigured_resources') pools_array = provided_pools filter = {} # we have to group things by zone, because the API search feature is done against a zone and not global # so we will do the searches in each configured zone pools_array.each do |pool| filter[zone(pool)] = [] if filter[zone(pool)].nil? filter[zone(pool)] << "(labels.pool != #{pool})" end filter.each_key do |zone| # this filter should return any item that have a labels.pool that is not in the config OR # do not have a pool label at all filter_string = "#{filter[zone].join(' AND ')} OR -labels.pool:*" # VMs instance_list = connection.list_instances(project, zone, filter: filter_string) result_list = [] instance_list.items&.each do |vm| next if should_be_ignored(vm, allowlist) debug_logger("trigger async delete_instance #{vm.name}") result = connection.delete_instance(project, zone, vm.name) vm_pool = vm.labels&.key?('pool') ? vm.labels['pool'] : nil existing_vm = generate_vm_hash(vm, vm_pool) dns_teardown(existing_vm) result_list << result end # now check they are done result_list.each do |result| wait_for_zone_operation(project, zone, result) end # Disks disks_list = connection.list_disks(project, zone, filter: filter_string) disks_list.items&.each do |disk| next if should_be_ignored(disk, allowlist) debug_logger("trigger async no wait delete_disk #{disk.name}") connection.delete_disk(project, zone, disk.name) end # Snapshots snapshot_list = connection.list_snapshots(project, filter: filter_string) next if snapshot_list.items.nil? snapshot_list.items.each do |sn| next if should_be_ignored(sn, allowlist) debug_logger("trigger async no wait delete_snapshot #{sn.name}") connection.delete_snapshot(project, sn.name) end end end # tag_vm_user This method is called once we know who is using the VM (it is running). This method enables seeing # who is using what in the provider pools. # # inputs # [String] pool_name : Name of the pool # [String] vm_name : Name of the VM to check if ready # returns # [Boolean] : true if successful, false if an error occurred and it should retry def tag_vm_user(pool, vm_name) user = get_current_user(vm_name) vm_hash = get_vm(pool, vm_name) return false if vm_hash.nil? new_labels = vm_hash['labels'] # bailing in this case since labels should exist, and continuing would mean losing them return false if new_labels.nil? # add new label called token-user, with value as user new_labels['token-user'] = user begin instances_set_labels_request_object = Google::Apis::ComputeV1::InstancesSetLabelsRequest.new(label_fingerprint: vm_hash['label_fingerprint'], labels: new_labels) result = connection.set_instance_labels(project, zone(pool), vm_name, instances_set_labels_request_object) wait_for_zone_operation(project, zone(pool), result) rescue StandardError => _e return false end true end # END BASE METHODS def dns_setup(created_instance) dns_zone = dns.zone(dns_zone_resource_name) if dns_zone_resource_name return unless dns_zone && created_instance && created_instance['name'] && created_instance['ip'] name = created_instance['name'] begin change = dns_zone.add(name, 'A', 60, [created_instance['ip']]) debug_logger("#{change.id} - #{change.started_at} - #{change.status} DNS address added") if change rescue Google::Cloud::AlreadyExistsError => _e # DNS setup is done only for new instances, so in the rare case where a DNS record already exists (it is stale) and we replace it. # the error is Google::Cloud::AlreadyExistsError: alreadyExists: The resource 'entity.change.additions[0]' named 'instance-8.test.vmpooler.net. (A)' already exists change = dns_zone.replace(name, 'A', 60, [created_instance['ip']]) debug_logger("#{change.id} - #{change.started_at} - #{change.status} DNS address previously existed and was replaced") if change end end def dns_teardown(created_instance) dns_zone = dns.zone(dns_zone_resource_name) if dns_zone_resource_name return unless dns_zone && created_instance name = created_instance['name'] change = dns_zone.remove(name, 'A') debug_logger("#{change.id} - #{change.started_at} - #{change.status} DNS address removed") if change end def should_be_ignored(item, allowlist) return false if allowlist.nil? allowlist.map!(&:downcase) # remove uppercase from configured values because its not valid as resource label array_flattened_labels = [] item.labels&.each do |k, v| array_flattened_labels << "#{k}=#{v}" end (!item.labels.nil? && allowlist&.include?(item.labels['pool'])) || # the allow list specifies the value within the pool label (allowlist&.include?('') && !item.labels&.keys&.include?('pool')) || # the allow list specifies "" string and the pool label is not set !(allowlist & array_flattened_labels).empty? # the allow list specify a fully qualified label eg user=Bob and the item has it end def get_current_user(vm_name) @redis.with_metrics do |redis| user = redis.hget("vmpooler__vm__#{vm_name}", 'token:user') return '' if user.nil? # cleanup so it's a valid label value # can't have upercase user = user.downcase # replace invalid chars with dash user = user.gsub(/[^0-9a-z_-]/, '-') return user end end def get_current_lifetime(vm_name) @redis.with_metrics do |redis| lifetime = redis.hget("vmpooler__vm__#{vm_name}", 'lifetime') || '1h' return lifetime end end def get_current_job_url(vm_name) @redis.with_metrics do |redis| job = redis.hget("vmpooler__vm__#{vm_name}", 'tag:jenkins_build_url') || '' return job end end # Return a hash of VM data # Provides name, template, poolname, boottime, status, image_size, private_ip_address def generate_vm_hash(vm_object, pool_name) pool_configuration = pool_config(pool_name) return nil if pool_configuration.nil? { 'name' => vm_object.tags.detect {|f| f.key == 'vm_name' }&.value, #'hostname' => vm_object.hostname, 'template' => pool_configuration&.key?('template') ? pool_configuration['template'] : nil, # was expecting to get it from API, not from config, but this is what vSphere does too! 'poolname' => vm_object.tags.detect {|f| f.key == 'pool' }&.value, 'boottime' => vm_object.launch_time, 'status' => vm_object.state.name, # One of the following values: pending, running, shutting-down, terminated, stopping, stopped #'zone' => vm_object.zone, 'image_size' => vm_object.instance_type, 'private_ip_address' => vm_object.private_ip_address } end def ensured_aws_connection(connection_pool_object) connection_pool_object[:connection] = connect_to_aws unless connection_pool_object[:connection] connection_pool_object[:connection] end def connect_to_aws max_tries = global_config[:config]['max_tries'] || 3 retry_factor = global_config[:config]['retry_factor'] || 10 try = 1 begin compute = ::Aws::EC2::Resource.new( region: region, credentials: ::Aws::Credentials.new(@aws_access_key, @aws_secret_key), log_level: :debug ) metrics.increment('connect.open') compute rescue StandardError => e # is that even a thing? metrics.increment('connect.fail') raise e if try >= max_tries sleep(try * retry_factor) try += 1 retry end end # This should supercede the open_socket method in the Pool Manager def open_socket(host, domain = nil, timeout = 5, port = 22, &_block) Timeout.timeout(timeout) do target_host = host target_host = "#{host}.#{domain}" if domain sock = TCPSocket.new target_host, port begin yield sock if block_given? ensure sock.close end end end # this is used because for one vm, with the same snapshot name there could be multiple snapshots, # one for each disk def find_snapshot(vm_name, snapshotname) filters = [ { name: "tag:vm_name", values: [vm_name], }, { name: "tag:snapshot_name", values: [snapshotname], }, ] snapshot_list = connection.snapshots({filters: filters}) end # find all snapshots ever created for one vm, # regardless of snapshot name, for example when deleting it all def find_all_snapshots(vm_name) filter = "(labels.vm = #{vm_name})" snapshot_list = connection.list_snapshots(project, filter: filter) snapshot_list.items # array of snapshot objects end def disk_name_from_source(attached_disk) attached_disk.source.split('/')[-1] # disk name is after the last / of the full source URL end # used in local dev environment, set DEBUG_FLAG=true # this way the upstream vmpooler manager does not get polluted with logs def debug_logger(message, send_to_upstream: false) # the default logger is simple and does not enforce debug levels (the first argument) puts message if ENV['DEBUG_FLAG'] logger.log('[g]', message) if send_to_upstream end end end end end