diff --git a/README.md b/README.md index 2fb385c..2909f3f 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,16 @@ These steps expect two environment vars ### DNS AWS will setup a private ip and private dns hostname for the VM once running. Optionally we can setup a human readable DNS entry to resolve the VMPooler provider `spicy-proton` fqdn -DNS is integrated via Google's CloudDNS service. To enable, a CloudDNS zone name must be provided in the config (see the example yaml file dns_zone_resource_name) +DNS is integrated via Google's CloudDNS service. +GCE authorization is handled via a service account (or personal account) private key (json format) and can be configured via + +1. GOOGLE_APPLICATION_CREDENTIALS environment variable eg GOOGLE_APPLICATION_CREDENTIALS=/my/home/directory/my_account_key.json + +Provider config needed: +1. domain +2. project +3. dns_zone_resource_name +(see the example yaml file) An A record is then created in that zone upon instance creation with the VM's internal IP, and deleted when the instance is destroyed. @@ -44,8 +53,7 @@ do not have the pool label, and can be configured to allow a specific list of un ### Pre-requisite - An IAM user must exist in the target AWS account with permissions to create, delete vms etc -- if using DNS, a DNS zone needs to be created in CloudDNS, and configured in the provider's config section with the name of that zone (dns_zone_resource_name). When not specified, the DNS setup and teardown is skipped. - +- if using DNS see section above, and a service account with permissions to change Cloud DNS need to exist ## License diff --git a/lib/vmpooler/providers/ec2.rb b/lib/vmpooler/providers/ec2.rb index 7ef5e36..f857f86 100644 --- a/lib/vmpooler/providers/ec2.rb +++ b/lib/vmpooler/providers/ec2.rb @@ -3,6 +3,7 @@ require 'bigdecimal' require 'bigdecimal/util' require 'vmpooler/providers/base' +require 'vmpooler/cloud_dns' require 'aws-sdk-ec2' require 'vmpooler/aws_setup' @@ -61,30 +62,35 @@ module Vmpooler end end - attr_reader :dns - # main configuration options def region - return provider_config['region'] if provider_config['region'] + provider_config['region'] end # main configuration options, overridable for each pool def zone(pool_name) return pool_config(pool_name)['zone'] if pool_config(pool_name)['zone'] - return provider_config['zone'] if provider_config['zone'] + + provider_config['zone'] end def amisize(pool_name) return pool_config(pool_name)['amisize'] if pool_config(pool_name)['amisize'] - return provider_config['amisize'] if provider_config['amisize'] + + provider_config['amisize'] end def volume_size(pool_name) return pool_config(pool_name)['volume_size'] if pool_config(pool_name)['volume_size'] - return provider_config['volume_size'] if provider_config['volume_size'] + + provider_config['volume_size'] end # dns + def project + provider_config['project'] + end + def domain provider_config['domain'] end @@ -104,7 +110,7 @@ module Vmpooler end def to_provision(pool_name) - return pool_config(pool_name)['provision'] if pool_config(pool_name)['provision'] + pool_config(pool_name)['provision'] end # Base methods that are implemented: @@ -192,6 +198,13 @@ module Vmpooler raise("Instance creation not attempted, #{new_vmname} already exists") if get_vm(pool_name, new_vmname) subnet_id = get_subnet_id(pool_name) + domain_set = domain + name_to_use = if domain_set.nil? + new_vmname + else + "#{new_vmname}.#{domain_set}" + end + tag = [ { resource_type: 'instance', # accepts capacity-reservation, client-vpn-endpoint, customer-gateway, carrier-gateway, dedicated-host, dhcp-options, egress-only-internet-gateway, elastic-ip, elastic-gpu, export-image-task, export-instance-task, fleet, fpga-image, host-reservation, image, import-image-task, import-snapshot-task, instance, instance-event-window, internet-gateway, ipam, ipam-pool, ipam-scope, ipv4pool-ec2, ipv6pool-ec2, key-pair, launch-template, local-gateway, local-gateway-route-table, local-gateway-virtual-interface, local-gateway-virtual-interface-group, local-gateway-route-table-vpc-association, local-gateway-route-table-virtual-interface-group-association, natgateway, network-acl, network-interface, network-insights-analysis, network-insights-path, network-insights-access-scope, network-insights-access-scope-analysis, placement-group, prefix-list, replace-root-volume-task, reserved-instances, route-table, security-group, security-group-rule, snapshot, spot-fleet-request, spot-instances-request, subnet, subnet-cidr-reservation, traffic-mirror-filter, traffic-mirror-session, traffic-mirror-target, transit-gateway, transit-gateway-attachment, transit-gateway-connect-peer, transit-gateway-multicast-domain, transit-gateway-route-table, volume, vpc, vpc-endpoint, vpc-endpoint-service, vpc-peering-connection, vpn-connection, vpn-gateway, vpc-flow-log @@ -206,7 +219,7 @@ module Vmpooler }, { key: 'lifetime', # required by AWS reaper - value: get_current_lifetime(new_vmname) + value: max_lifetime }, { key: 'created_by', # required by AWS reaper @@ -226,7 +239,7 @@ module Vmpooler }, { key: 'Name', - value: new_vmname + value: name_to_use } ] } @@ -252,16 +265,25 @@ module Vmpooler instance_id = batch_instance.first.instance_id connection.client.wait_until(:instance_running, { instance_ids: [instance_id] }) @logger.log('s', "[>] [#{pool_name}] '#{new_vmname}' instance running") + created_instance = get_vm(pool_name, new_vmname) + dns_setup(created_instance) if domain + ### System status checks # This check verifies that your instance is reachable. Amazon EC2 tests that network packets can get to your instance. ### Instance status checks # This check verifies that your instance's operating system is accepting traffic. connection.client.wait_until(:instance_status_ok, { instance_ids: [instance_id] }) @logger.log('s', "[>] [#{pool_name}] '#{new_vmname}' instance ready to accept traffic") - created_instance = get_vm(pool_name, new_vmname) - # extra setup steps - provision_node_aws(created_instance['private_dns_name'], pool_name, new_vmname) if to_provision(pool_name) == 'true' || to_provision(pool_name) == true + @redis.with_metrics do |redis| + redis.hset("vmpooler__vm__#{new_vmname}", 'host', created_instance['private_dns_name']) + end + + if domain + provision_node_aws(created_instance['name'], pool_name, new_vmname) if to_provision(pool_name) == 'true' || to_provision(pool_name) == true + elsif to_provision(pool_name) == 'true' || to_provision(pool_name) == true + provision_node_aws(created_instance['private_dns_name'], pool_name, new_vmname) + end created_instance end @@ -355,7 +377,7 @@ module Vmpooler # [String] vm_name : Name of the existing VM # returns # [boolean] true : once the operations are finished - def destroy_vm(_pool_name, vm_name) + def destroy_vm(pool_name, vm_name) debug_logger('destroy_vm') deleted = false @@ -366,8 +388,8 @@ module Vmpooler instances = connection.instances(filters: filters).first return true if instances.nil? + instance_hash = get_vm(pool_name, vm_name) debug_logger("trigger delete_instance #{vm_name}") - # vm_hash = get_vm(pool_name, vm_name) instances.terminate begin connection.client.wait_until(:instance_terminated, { instance_ids: [instances.id] }) @@ -376,15 +398,16 @@ module Vmpooler debug_logger("failed waiting for instance terminated #{vm_name}: #{e}") end + dns_teardown(instance_hash) if domain + deleted end # check if a vm is ready by opening a socket on port 22 # if a domain is set, it will use vn_name.domain, - # if not then it will use the ip directly (AWS workaround) + # if not then it will use the private dns name directly (AWS workaround) def vm_ready?(pool_name, vm_name) begin - # TODO: we could use a healthcheck resource attached to instance domain_set = domain if domain_set.nil? vm_ip = get_vm(pool_name, vm_name)['private_dns_name'] @@ -411,24 +434,32 @@ module Vmpooler vm_hash = get_vm(pool, vm_name) return false if vm_hash.nil? - new_labels = vm_hash['labels'] - # bailing in this case since labels should exist, and continuing would mean losing them - return false if new_labels.nil? + filters = [{ + name: 'tag:vm_name', + values: [vm_name] + }] + instances = connection.instances(filters: filters).first + return false if instances.nil? # add new label called token-user, with value as user - new_labels['token-user'] = user - begin - instances_set_labels_request_object = Google::Apis::ComputeV1::InstancesSetLabelsRequest.new(label_fingerprint: vm_hash['label_fingerprint'], labels: new_labels) - result = connection.set_instance_labels(project, zone(pool), vm_name, instances_set_labels_request_object) - wait_for_zone_operation(project, zone(pool), result) - rescue StandardError => _e - return false - end + instances.create_tags(tags: [key: 'token-user', value: user]) true + rescue StandardError => _e + false end # END BASE METHODS + def dns_setup(created_instance) + dns = Vmpooler::PoolManager::CloudDns.new(project, dns_zone_resource_name) + dns.dns_create_or_replace(created_instance) + end + + def dns_teardown(created_instance) + dns = Vmpooler::PoolManager::CloudDns.new(project, dns_zone_resource_name) + dns.dns_teardown(created_instance) + end + def get_current_user(vm_name) @redis.with_metrics do |redis| user = redis.hget("vmpooler__vm__#{vm_name}", 'token:user') @@ -451,6 +482,12 @@ module Vmpooler end end + # returns max_lifetime_upper_limit in hours in the format Xh defaults to 12h + def max_lifetime + max_hours = global_config[:config]['max_lifetime_upper_limit'] || '12' + "#{max_hours}h" + end + def get_current_job_url(vm_name) @redis.with_metrics do |redis| job = redis.hget("vmpooler__vm__#{vm_name}", 'tag:jenkins_build_url') || '' @@ -465,7 +502,7 @@ module Vmpooler return nil if pool_configuration.nil? { - 'name' => vm_object.tags.detect { |f| f.key == 'vm_name' }&.value, + 'name' => vm_object.tags.detect { |f| f.key == 'Name' }&.value, # 'hostname' => vm_object.hostname, 'template' => pool_configuration&.key?('template') ? pool_configuration['template'] : nil, # was expecting to get it from API, not from config, but this is what vSphere does too! 'poolname' => vm_object.tags.detect { |f| f.key == 'pool' }&.value, @@ -473,6 +510,7 @@ module Vmpooler 'status' => vm_object.state&.name, # One of the following values: pending, running, shutting-down, terminated, stopping, stopped # 'zone' => vm_object.zone, 'image_size' => vm_object.instance_type, + 'ip' => vm_object.private_ip_address, # used by the cloud dns class to set the record to this value 'private_ip_address' => vm_object.private_ip_address, 'private_dns_name' => vm_object.private_dns_name } diff --git a/spec/unit/providers/ec2_spec.rb b/spec/unit/providers/ec2_spec.rb index 5225835..1125100 100644 --- a/spec/unit/providers/ec2_spec.rb +++ b/spec/unit/providers/ec2_spec.rb @@ -53,23 +53,28 @@ EOT describe '#manual tests live' do context 'in itsysops' do - let(:vmname) { "instance-50" } - let(:poolname) { "ubuntu-2004-arm64" } + let(:vmname) { "instance-60" } + let(:poolname) { "amazon-7-x86_64-local" } + let(:amisize) { "c5.xlarge" } let(:config) { YAML.load(<<~EOT --- :config: max_tries: 3 retry_factor: 10 + site_name: 'vmpooler-local-dev' :providers: :ec2: connection_pool_timeout: 1 zone: '#{zone}' region: '#{region}' + project: 'vmpooler-test' + dns_zone_resource_name: 'vmpooler-test-puppet-net' + domain: 'vmpooler-test.puppet.net' :pools: - name: '#{poolname}' alias: [ 'mockpool' ] - amisize: 'a1.large' - template: 'ami-03c1b544a7566b3e5' + amisize: '#{amisize}' + template: 'ami-31394949' size: 5 timeout: 10 ready_ttl: 1440 @@ -83,6 +88,8 @@ EOT } skip 'gets a vm' do result = subject.create_vm(poolname, vmname) + subject.tag_vm_user(poolname, vmname) + #result = subject.destroy_vm(poolname, vmname) #subject.vms_in_pool("amazon-6-x86_64-ec2") #subject.provision_node_aws("ip-10-227-4-97.amz-dev.puppet.net", poolname) # subject.create_snapshot(poolname, vmname, "foo") @@ -153,7 +160,10 @@ EOT context 'when VM exists but is missing information' do before(:each) do - tags = [MockTag.new(key: "vm_name", value: vmname)] + tags = [ + MockTag.new(key: "Name", value: vmname), + MockTag.new(key: "vm_name", value: vmname) + ] allow(connection).to receive(:instances).and_return([MockInstance.new(tags: tags)]) end @@ -161,7 +171,8 @@ EOT expect(subject.get_vm(poolname, vmname)).to be_kind_of(Hash) end - it 'should return the VM name' do + it 'should return the VM name when domain set' do + config[:providers][:ec2]['domain'] = "foobar.com" result = subject.get_vm(poolname, vmname) expect(result['name']).to eq(vmname) @@ -188,7 +199,7 @@ EOT instance_type: "a1.large", private_ip_address: "1.1.1.1", tags: [ - MockTag.new(key: "vm_name", value: vmname), + MockTag.new(key: "Name", value: vmname), MockTag.new(key: "pool", value: poolname) ] ) @@ -322,6 +333,8 @@ EOT allow(connection).to receive(:client).and_return(client) allow(client).to receive(:wait_until) allow(instance).to receive(:id) + allow(subject).to receive(:get_vm).and_return({}) + allow(subject).to receive(:dns_teardown).and_return(true) end it 'should return true' do diff --git a/vmpooler-provider-ec2.gemspec b/vmpooler-provider-ec2.gemspec index dd6986a..e084e70 100644 --- a/vmpooler-provider-ec2.gemspec +++ b/vmpooler-provider-ec2.gemspec @@ -16,9 +16,11 @@ Gem::Specification.new do |s| s.files = Dir[ "lib/**/*" ] s.require_paths = ["lib"] s.add_dependency 'aws-sdk-ec2', '~> 1' - s.add_dependency 'net-ssh', '~> 6.2.0.rc2' + s.add_dependency 'net-ssh', '>= 6.2', '< 7.1' s.add_development_dependency 'vmpooler', '>= 1.3.0', '~> 2.3' + #s.add_development_dependency 'vmpooler-provider-gce', '>= 0.4.0', '~> 0.4' + s.add_development_dependency 'vmpooler-provider-gce', '>= 0.4.0', '~> 0.4' # Testing dependencies s.add_development_dependency 'climate_control', '>= 0.2.0' diff --git a/vmpooler.yaml.example b/vmpooler.yaml.example index 28b92a3..24fdb64 100644 --- a/vmpooler.yaml.example +++ b/vmpooler.yaml.example @@ -81,7 +81,10 @@ # Overwrites the global domain parameter. This should match the dns zone domain set for the dns_zone_resource_name. # It is used to infer the domain part of the FQDN ie $vm_name.$domain # When setting multiple providers at the same time, this value should be set for each GCE pools. -# (optional) If not explicitely set, the FQDN is inferred using the global 'domain' config parameter +# (optional) when not set, the dns setup / teardown is skipped and the instance is reachable via the private_dns_name +# - project +# The GCP project name where the DNS zone resource exists +# (optional) # Example: :aws: @@ -91,6 +94,7 @@ volume_size: '10' dns_zone_resource_name: 'subdomain-example-com' domain: 'subdomain.example.com' + project: 'gcp-project-1' # :pools: #