Merge pull request #7 from puppetlabs/dio-3163

(DIO-3163) Implement Cloud DNS for EC2 VMs
This commit is contained in:
Samuel 2022-08-02 09:26:19 -05:00 committed by GitHub
commit 4e311e1c3c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 105 additions and 40 deletions

View file

@ -25,7 +25,16 @@ These steps expect two environment vars
### DNS ### DNS
AWS will setup a private ip and private dns hostname for the VM once running. Optionally we can setup a human readable DNS entry to resolve the VMPooler provider `spicy-proton` fqdn AWS will setup a private ip and private dns hostname for the VM once running. Optionally we can setup a human readable DNS entry to resolve the VMPooler provider `spicy-proton` fqdn
DNS is integrated via Google's CloudDNS service. To enable, a CloudDNS zone name must be provided in the config (see the example yaml file dns_zone_resource_name) DNS is integrated via Google's CloudDNS service.
GCE authorization is handled via a service account (or personal account) private key (json format) and can be configured via
1. GOOGLE_APPLICATION_CREDENTIALS environment variable eg GOOGLE_APPLICATION_CREDENTIALS=/my/home/directory/my_account_key.json
Provider config needed:
1. domain
2. project
3. dns_zone_resource_name
(see the example yaml file)
An A record is then created in that zone upon instance creation with the VM's internal IP, and deleted when the instance is destroyed. An A record is then created in that zone upon instance creation with the VM's internal IP, and deleted when the instance is destroyed.
@ -44,8 +53,7 @@ do not have the pool label, and can be configured to allow a specific list of un
### Pre-requisite ### Pre-requisite
- An IAM user must exist in the target AWS account with permissions to create, delete vms etc - An IAM user must exist in the target AWS account with permissions to create, delete vms etc
- if using DNS, a DNS zone needs to be created in CloudDNS, and configured in the provider's config section with the name of that zone (dns_zone_resource_name). When not specified, the DNS setup and teardown is skipped. - if using DNS see section above, and a service account with permissions to change Cloud DNS need to exist
## License ## License

View file

@ -3,6 +3,7 @@
require 'bigdecimal' require 'bigdecimal'
require 'bigdecimal/util' require 'bigdecimal/util'
require 'vmpooler/providers/base' require 'vmpooler/providers/base'
require 'vmpooler/cloud_dns'
require 'aws-sdk-ec2' require 'aws-sdk-ec2'
require 'vmpooler/aws_setup' require 'vmpooler/aws_setup'
@ -61,30 +62,35 @@ module Vmpooler
end end
end end
attr_reader :dns
# main configuration options # main configuration options
def region def region
return provider_config['region'] if provider_config['region'] provider_config['region']
end end
# main configuration options, overridable for each pool # main configuration options, overridable for each pool
def zone(pool_name) def zone(pool_name)
return pool_config(pool_name)['zone'] if pool_config(pool_name)['zone'] return pool_config(pool_name)['zone'] if pool_config(pool_name)['zone']
return provider_config['zone'] if provider_config['zone']
provider_config['zone']
end end
def amisize(pool_name) def amisize(pool_name)
return pool_config(pool_name)['amisize'] if pool_config(pool_name)['amisize'] return pool_config(pool_name)['amisize'] if pool_config(pool_name)['amisize']
return provider_config['amisize'] if provider_config['amisize']
provider_config['amisize']
end end
def volume_size(pool_name) def volume_size(pool_name)
return pool_config(pool_name)['volume_size'] if pool_config(pool_name)['volume_size'] return pool_config(pool_name)['volume_size'] if pool_config(pool_name)['volume_size']
return provider_config['volume_size'] if provider_config['volume_size']
provider_config['volume_size']
end end
# dns # dns
def project
provider_config['project']
end
def domain def domain
provider_config['domain'] provider_config['domain']
end end
@ -104,7 +110,7 @@ module Vmpooler
end end
def to_provision(pool_name) def to_provision(pool_name)
return pool_config(pool_name)['provision'] if pool_config(pool_name)['provision'] pool_config(pool_name)['provision']
end end
# Base methods that are implemented: # Base methods that are implemented:
@ -192,6 +198,13 @@ module Vmpooler
raise("Instance creation not attempted, #{new_vmname} already exists") if get_vm(pool_name, new_vmname) raise("Instance creation not attempted, #{new_vmname} already exists") if get_vm(pool_name, new_vmname)
subnet_id = get_subnet_id(pool_name) subnet_id = get_subnet_id(pool_name)
domain_set = domain
name_to_use = if domain_set.nil?
new_vmname
else
"#{new_vmname}.#{domain_set}"
end
tag = [ tag = [
{ {
resource_type: 'instance', # accepts capacity-reservation, client-vpn-endpoint, customer-gateway, carrier-gateway, dedicated-host, dhcp-options, egress-only-internet-gateway, elastic-ip, elastic-gpu, export-image-task, export-instance-task, fleet, fpga-image, host-reservation, image, import-image-task, import-snapshot-task, instance, instance-event-window, internet-gateway, ipam, ipam-pool, ipam-scope, ipv4pool-ec2, ipv6pool-ec2, key-pair, launch-template, local-gateway, local-gateway-route-table, local-gateway-virtual-interface, local-gateway-virtual-interface-group, local-gateway-route-table-vpc-association, local-gateway-route-table-virtual-interface-group-association, natgateway, network-acl, network-interface, network-insights-analysis, network-insights-path, network-insights-access-scope, network-insights-access-scope-analysis, placement-group, prefix-list, replace-root-volume-task, reserved-instances, route-table, security-group, security-group-rule, snapshot, spot-fleet-request, spot-instances-request, subnet, subnet-cidr-reservation, traffic-mirror-filter, traffic-mirror-session, traffic-mirror-target, transit-gateway, transit-gateway-attachment, transit-gateway-connect-peer, transit-gateway-multicast-domain, transit-gateway-route-table, volume, vpc, vpc-endpoint, vpc-endpoint-service, vpc-peering-connection, vpn-connection, vpn-gateway, vpc-flow-log resource_type: 'instance', # accepts capacity-reservation, client-vpn-endpoint, customer-gateway, carrier-gateway, dedicated-host, dhcp-options, egress-only-internet-gateway, elastic-ip, elastic-gpu, export-image-task, export-instance-task, fleet, fpga-image, host-reservation, image, import-image-task, import-snapshot-task, instance, instance-event-window, internet-gateway, ipam, ipam-pool, ipam-scope, ipv4pool-ec2, ipv6pool-ec2, key-pair, launch-template, local-gateway, local-gateway-route-table, local-gateway-virtual-interface, local-gateway-virtual-interface-group, local-gateway-route-table-vpc-association, local-gateway-route-table-virtual-interface-group-association, natgateway, network-acl, network-interface, network-insights-analysis, network-insights-path, network-insights-access-scope, network-insights-access-scope-analysis, placement-group, prefix-list, replace-root-volume-task, reserved-instances, route-table, security-group, security-group-rule, snapshot, spot-fleet-request, spot-instances-request, subnet, subnet-cidr-reservation, traffic-mirror-filter, traffic-mirror-session, traffic-mirror-target, transit-gateway, transit-gateway-attachment, transit-gateway-connect-peer, transit-gateway-multicast-domain, transit-gateway-route-table, volume, vpc, vpc-endpoint, vpc-endpoint-service, vpc-peering-connection, vpn-connection, vpn-gateway, vpc-flow-log
@ -206,7 +219,7 @@ module Vmpooler
}, },
{ {
key: 'lifetime', # required by AWS reaper key: 'lifetime', # required by AWS reaper
value: get_current_lifetime(new_vmname) value: max_lifetime
}, },
{ {
key: 'created_by', # required by AWS reaper key: 'created_by', # required by AWS reaper
@ -226,7 +239,7 @@ module Vmpooler
}, },
{ {
key: 'Name', key: 'Name',
value: new_vmname value: name_to_use
} }
] ]
} }
@ -252,16 +265,25 @@ module Vmpooler
instance_id = batch_instance.first.instance_id instance_id = batch_instance.first.instance_id
connection.client.wait_until(:instance_running, { instance_ids: [instance_id] }) connection.client.wait_until(:instance_running, { instance_ids: [instance_id] })
@logger.log('s', "[>] [#{pool_name}] '#{new_vmname}' instance running") @logger.log('s', "[>] [#{pool_name}] '#{new_vmname}' instance running")
created_instance = get_vm(pool_name, new_vmname)
dns_setup(created_instance) if domain
### System status checks ### System status checks
# This check verifies that your instance is reachable. Amazon EC2 tests that network packets can get to your instance. # This check verifies that your instance is reachable. Amazon EC2 tests that network packets can get to your instance.
### Instance status checks ### Instance status checks
# This check verifies that your instance's operating system is accepting traffic. # This check verifies that your instance's operating system is accepting traffic.
connection.client.wait_until(:instance_status_ok, { instance_ids: [instance_id] }) connection.client.wait_until(:instance_status_ok, { instance_ids: [instance_id] })
@logger.log('s', "[>] [#{pool_name}] '#{new_vmname}' instance ready to accept traffic") @logger.log('s', "[>] [#{pool_name}] '#{new_vmname}' instance ready to accept traffic")
created_instance = get_vm(pool_name, new_vmname)
# extra setup steps @redis.with_metrics do |redis|
provision_node_aws(created_instance['private_dns_name'], pool_name, new_vmname) if to_provision(pool_name) == 'true' || to_provision(pool_name) == true redis.hset("vmpooler__vm__#{new_vmname}", 'host', created_instance['private_dns_name'])
end
if domain
provision_node_aws(created_instance['name'], pool_name, new_vmname) if to_provision(pool_name) == 'true' || to_provision(pool_name) == true
elsif to_provision(pool_name) == 'true' || to_provision(pool_name) == true
provision_node_aws(created_instance['private_dns_name'], pool_name, new_vmname)
end
created_instance created_instance
end end
@ -355,7 +377,7 @@ module Vmpooler
# [String] vm_name : Name of the existing VM # [String] vm_name : Name of the existing VM
# returns # returns
# [boolean] true : once the operations are finished # [boolean] true : once the operations are finished
def destroy_vm(_pool_name, vm_name) def destroy_vm(pool_name, vm_name)
debug_logger('destroy_vm') debug_logger('destroy_vm')
deleted = false deleted = false
@ -366,8 +388,8 @@ module Vmpooler
instances = connection.instances(filters: filters).first instances = connection.instances(filters: filters).first
return true if instances.nil? return true if instances.nil?
instance_hash = get_vm(pool_name, vm_name)
debug_logger("trigger delete_instance #{vm_name}") debug_logger("trigger delete_instance #{vm_name}")
# vm_hash = get_vm(pool_name, vm_name)
instances.terminate instances.terminate
begin begin
connection.client.wait_until(:instance_terminated, { instance_ids: [instances.id] }) connection.client.wait_until(:instance_terminated, { instance_ids: [instances.id] })
@ -376,15 +398,16 @@ module Vmpooler
debug_logger("failed waiting for instance terminated #{vm_name}: #{e}") debug_logger("failed waiting for instance terminated #{vm_name}: #{e}")
end end
dns_teardown(instance_hash) if domain
deleted deleted
end end
# check if a vm is ready by opening a socket on port 22 # check if a vm is ready by opening a socket on port 22
# if a domain is set, it will use vn_name.domain, # if a domain is set, it will use vn_name.domain,
# if not then it will use the ip directly (AWS workaround) # if not then it will use the private dns name directly (AWS workaround)
def vm_ready?(pool_name, vm_name) def vm_ready?(pool_name, vm_name)
begin begin
# TODO: we could use a healthcheck resource attached to instance
domain_set = domain domain_set = domain
if domain_set.nil? if domain_set.nil?
vm_ip = get_vm(pool_name, vm_name)['private_dns_name'] vm_ip = get_vm(pool_name, vm_name)['private_dns_name']
@ -411,24 +434,32 @@ module Vmpooler
vm_hash = get_vm(pool, vm_name) vm_hash = get_vm(pool, vm_name)
return false if vm_hash.nil? return false if vm_hash.nil?
new_labels = vm_hash['labels'] filters = [{
# bailing in this case since labels should exist, and continuing would mean losing them name: 'tag:vm_name',
return false if new_labels.nil? values: [vm_name]
}]
instances = connection.instances(filters: filters).first
return false if instances.nil?
# add new label called token-user, with value as user # add new label called token-user, with value as user
new_labels['token-user'] = user instances.create_tags(tags: [key: 'token-user', value: user])
begin
instances_set_labels_request_object = Google::Apis::ComputeV1::InstancesSetLabelsRequest.new(label_fingerprint: vm_hash['label_fingerprint'], labels: new_labels)
result = connection.set_instance_labels(project, zone(pool), vm_name, instances_set_labels_request_object)
wait_for_zone_operation(project, zone(pool), result)
rescue StandardError => _e
return false
end
true true
rescue StandardError => _e
false
end end
# END BASE METHODS # END BASE METHODS
def dns_setup(created_instance)
dns = Vmpooler::PoolManager::CloudDns.new(project, dns_zone_resource_name)
dns.dns_create_or_replace(created_instance)
end
def dns_teardown(created_instance)
dns = Vmpooler::PoolManager::CloudDns.new(project, dns_zone_resource_name)
dns.dns_teardown(created_instance)
end
def get_current_user(vm_name) def get_current_user(vm_name)
@redis.with_metrics do |redis| @redis.with_metrics do |redis|
user = redis.hget("vmpooler__vm__#{vm_name}", 'token:user') user = redis.hget("vmpooler__vm__#{vm_name}", 'token:user')
@ -451,6 +482,12 @@ module Vmpooler
end end
end end
# returns max_lifetime_upper_limit in hours in the format Xh defaults to 12h
def max_lifetime
max_hours = global_config[:config]['max_lifetime_upper_limit'] || '12'
"#{max_hours}h"
end
def get_current_job_url(vm_name) def get_current_job_url(vm_name)
@redis.with_metrics do |redis| @redis.with_metrics do |redis|
job = redis.hget("vmpooler__vm__#{vm_name}", 'tag:jenkins_build_url') || '' job = redis.hget("vmpooler__vm__#{vm_name}", 'tag:jenkins_build_url') || ''
@ -465,7 +502,7 @@ module Vmpooler
return nil if pool_configuration.nil? return nil if pool_configuration.nil?
{ {
'name' => vm_object.tags.detect { |f| f.key == 'vm_name' }&.value, 'name' => vm_object.tags.detect { |f| f.key == 'Name' }&.value,
# 'hostname' => vm_object.hostname, # 'hostname' => vm_object.hostname,
'template' => pool_configuration&.key?('template') ? pool_configuration['template'] : nil, # was expecting to get it from API, not from config, but this is what vSphere does too! 'template' => pool_configuration&.key?('template') ? pool_configuration['template'] : nil, # was expecting to get it from API, not from config, but this is what vSphere does too!
'poolname' => vm_object.tags.detect { |f| f.key == 'pool' }&.value, 'poolname' => vm_object.tags.detect { |f| f.key == 'pool' }&.value,
@ -473,6 +510,7 @@ module Vmpooler
'status' => vm_object.state&.name, # One of the following values: pending, running, shutting-down, terminated, stopping, stopped 'status' => vm_object.state&.name, # One of the following values: pending, running, shutting-down, terminated, stopping, stopped
# 'zone' => vm_object.zone, # 'zone' => vm_object.zone,
'image_size' => vm_object.instance_type, 'image_size' => vm_object.instance_type,
'ip' => vm_object.private_ip_address, # used by the cloud dns class to set the record to this value
'private_ip_address' => vm_object.private_ip_address, 'private_ip_address' => vm_object.private_ip_address,
'private_dns_name' => vm_object.private_dns_name 'private_dns_name' => vm_object.private_dns_name
} }

View file

@ -53,23 +53,28 @@ EOT
describe '#manual tests live' do describe '#manual tests live' do
context 'in itsysops' do context 'in itsysops' do
let(:vmname) { "instance-50" } let(:vmname) { "instance-60" }
let(:poolname) { "ubuntu-2004-arm64" } let(:poolname) { "amazon-7-x86_64-local" }
let(:amisize) { "c5.xlarge" }
let(:config) { YAML.load(<<~EOT let(:config) { YAML.load(<<~EOT
--- ---
:config: :config:
max_tries: 3 max_tries: 3
retry_factor: 10 retry_factor: 10
site_name: 'vmpooler-local-dev'
:providers: :providers:
:ec2: :ec2:
connection_pool_timeout: 1 connection_pool_timeout: 1
zone: '#{zone}' zone: '#{zone}'
region: '#{region}' region: '#{region}'
project: 'vmpooler-test'
dns_zone_resource_name: 'vmpooler-test-puppet-net'
domain: 'vmpooler-test.puppet.net'
:pools: :pools:
- name: '#{poolname}' - name: '#{poolname}'
alias: [ 'mockpool' ] alias: [ 'mockpool' ]
amisize: 'a1.large' amisize: '#{amisize}'
template: 'ami-03c1b544a7566b3e5' template: 'ami-31394949'
size: 5 size: 5
timeout: 10 timeout: 10
ready_ttl: 1440 ready_ttl: 1440
@ -83,6 +88,8 @@ EOT
} }
skip 'gets a vm' do skip 'gets a vm' do
result = subject.create_vm(poolname, vmname) result = subject.create_vm(poolname, vmname)
subject.tag_vm_user(poolname, vmname)
#result = subject.destroy_vm(poolname, vmname)
#subject.vms_in_pool("amazon-6-x86_64-ec2") #subject.vms_in_pool("amazon-6-x86_64-ec2")
#subject.provision_node_aws("ip-10-227-4-97.amz-dev.puppet.net", poolname) #subject.provision_node_aws("ip-10-227-4-97.amz-dev.puppet.net", poolname)
# subject.create_snapshot(poolname, vmname, "foo") # subject.create_snapshot(poolname, vmname, "foo")
@ -153,7 +160,10 @@ EOT
context 'when VM exists but is missing information' do context 'when VM exists but is missing information' do
before(:each) do before(:each) do
tags = [MockTag.new(key: "vm_name", value: vmname)] tags = [
MockTag.new(key: "Name", value: vmname),
MockTag.new(key: "vm_name", value: vmname)
]
allow(connection).to receive(:instances).and_return([MockInstance.new(tags: tags)]) allow(connection).to receive(:instances).and_return([MockInstance.new(tags: tags)])
end end
@ -161,7 +171,8 @@ EOT
expect(subject.get_vm(poolname, vmname)).to be_kind_of(Hash) expect(subject.get_vm(poolname, vmname)).to be_kind_of(Hash)
end end
it 'should return the VM name' do it 'should return the VM name when domain set' do
config[:providers][:ec2]['domain'] = "foobar.com"
result = subject.get_vm(poolname, vmname) result = subject.get_vm(poolname, vmname)
expect(result['name']).to eq(vmname) expect(result['name']).to eq(vmname)
@ -188,7 +199,7 @@ EOT
instance_type: "a1.large", instance_type: "a1.large",
private_ip_address: "1.1.1.1", private_ip_address: "1.1.1.1",
tags: [ tags: [
MockTag.new(key: "vm_name", value: vmname), MockTag.new(key: "Name", value: vmname),
MockTag.new(key: "pool", value: poolname) MockTag.new(key: "pool", value: poolname)
] ]
) )
@ -322,6 +333,8 @@ EOT
allow(connection).to receive(:client).and_return(client) allow(connection).to receive(:client).and_return(client)
allow(client).to receive(:wait_until) allow(client).to receive(:wait_until)
allow(instance).to receive(:id) allow(instance).to receive(:id)
allow(subject).to receive(:get_vm).and_return({})
allow(subject).to receive(:dns_teardown).and_return(true)
end end
it 'should return true' do it 'should return true' do

View file

@ -16,9 +16,11 @@ Gem::Specification.new do |s|
s.files = Dir[ "lib/**/*" ] s.files = Dir[ "lib/**/*" ]
s.require_paths = ["lib"] s.require_paths = ["lib"]
s.add_dependency 'aws-sdk-ec2', '~> 1' s.add_dependency 'aws-sdk-ec2', '~> 1'
s.add_dependency 'net-ssh', '~> 6.2.0.rc2' s.add_dependency 'net-ssh', '>= 6.2', '< 7.1'
s.add_development_dependency 'vmpooler', '>= 1.3.0', '~> 2.3' s.add_development_dependency 'vmpooler', '>= 1.3.0', '~> 2.3'
#s.add_development_dependency 'vmpooler-provider-gce', '>= 0.4.0', '~> 0.4'
s.add_development_dependency 'vmpooler-provider-gce', '>= 0.4.0', '~> 0.4'
# Testing dependencies # Testing dependencies
s.add_development_dependency 'climate_control', '>= 0.2.0' s.add_development_dependency 'climate_control', '>= 0.2.0'

View file

@ -81,7 +81,10 @@
# Overwrites the global domain parameter. This should match the dns zone domain set for the dns_zone_resource_name. # Overwrites the global domain parameter. This should match the dns zone domain set for the dns_zone_resource_name.
# It is used to infer the domain part of the FQDN ie $vm_name.$domain # It is used to infer the domain part of the FQDN ie $vm_name.$domain
# When setting multiple providers at the same time, this value should be set for each GCE pools. # When setting multiple providers at the same time, this value should be set for each GCE pools.
# (optional) If not explicitely set, the FQDN is inferred using the global 'domain' config parameter # (optional) when not set, the dns setup / teardown is skipped and the instance is reachable via the private_dns_name
# - project
# The GCP project name where the DNS zone resource exists
# (optional)
# Example: # Example:
:aws: :aws:
@ -91,6 +94,7 @@
volume_size: '10' volume_size: '10'
dns_zone_resource_name: 'subdomain-example-com' dns_zone_resource_name: 'subdomain-example-com'
domain: 'subdomain.example.com' domain: 'subdomain.example.com'
project: 'gcp-project-1'
# :pools: # :pools:
# #