Merge pull request #6 from puppetlabs/dio-3163

(DIO-3163) Code improvements after initial testing in vmpooler
This commit is contained in:
Samuel 2022-07-26 12:17:12 -05:00 committed by GitHub
commit 13d0de6dc0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 60 additions and 29 deletions

View file

@ -20,10 +20,10 @@ jobs:
draft: false draft: false
prerelease: false prerelease: false
generateReleaseNotes: true generateReleaseNotes: true
- name: Install Ruby 2.5.8 - name: Install Ruby jruby-9.3.6.0
uses: ruby/setup-ruby@v1 uses: ruby/setup-ruby@v1
with: with:
ruby-version: '2.5.8' ruby-version: 'jruby-9.3.6.0'
- name: Build gem - name: Build gem
run: gem build *.gemspec run: gem build *.gemspec
- name: Publish gem - name: Publish gem

View file

@ -18,7 +18,7 @@ jobs:
strategy: strategy:
matrix: matrix:
ruby-version: ruby-version:
- '2.5.8' - 'jruby-9.3.6.0'
steps: steps:
- uses: actions/checkout@v2 - uses: actions/checkout@v2
- name: Set up Ruby - name: Set up Ruby
@ -34,8 +34,7 @@ jobs:
strategy: strategy:
matrix: matrix:
ruby-version: ruby-version:
- '2.5.8' - 'jruby-9.3.6.0'
- 'jruby-9.2.12.0'
steps: steps:
- uses: actions/checkout@v2 - uses: actions/checkout@v2
- name: Set up Ruby - name: Set up Ruby

View file

@ -10,10 +10,19 @@ module Vmpooler
ROOT_KEYS_SCRIPT = ENV['ROOT_KEYS_SCRIPT'] ROOT_KEYS_SCRIPT = ENV['ROOT_KEYS_SCRIPT']
ROOT_KEYS_SYNC_CMD = "curl -k -o - -L #{ROOT_KEYS_SCRIPT} | %s" ROOT_KEYS_SYNC_CMD = "curl -k -o - -L #{ROOT_KEYS_SCRIPT} | %s"
def self.setup_node_by_ssh(host, platform) def initialize(logger, new_vmname)
@logger = logger
@key_file = ENV['AWS_KEY_FILE_LOCATION'] @key_file = ENV['AWS_KEY_FILE_LOCATION']
@vm_name = new_vmname
end
def setup_node_by_ssh(host, platform)
conn = check_ssh_accepting_connections(host, platform) conn = check_ssh_accepting_connections(host, platform)
return unless conn
@logger.log('s', "[>] [#{platform}] '#{@vm_name}' net:ssh connected")
configure_host(host, platform, conn) configure_host(host, platform, conn)
@logger.log('s', "[>] [#{platform}] '#{@vm_name}' configured")
end end
# For an Amazon Linux AMI, the user name is ec2-user. # For an Amazon Linux AMI, the user name is ec2-user.
@ -30,7 +39,7 @@ module Vmpooler
# #
# For an Ubuntu AMI, the user name is ubuntu. # For an Ubuntu AMI, the user name is ubuntu.
def self.get_user(platform) def get_user(platform)
if platform =~ /centos/ if platform =~ /centos/
'centos' 'centos'
elsif platform =~ /ubuntu/ elsif platform =~ /ubuntu/
@ -42,22 +51,29 @@ module Vmpooler
end end
end end
def self.check_ssh_accepting_connections(host, platform) def check_ssh_accepting_connections(host, platform)
retries = 0 retries = 0
begin begin
user = get_user(platform) user = get_user(platform)
netssh_jruby_workaround netssh_jruby_workaround
Net::SSH.start(host, user, keys: @key_file, timeout: 10) Net::SSH.start(host, user, keys: @key_file, timeout: 10)
rescue Net::SSH::ConnectionTimeout, Errno::ECONNREFUSED => e rescue Net::SSH::ConnectionTimeout, Errno::ECONNREFUSED => e
puts "Requested instances do not have sshd ready yet, try again: #{e}" @logger.log('s', "[>] [#{platform}] '#{@vm_name}' net:ssh requested instances do not have sshd ready yet, try again for 300s (#{retries}/300): #{e}")
sleep 1 sleep 1
retry if (retries += 1) < 300 retry if (retries += 1) < 300
rescue Errno::EBADF => e
@logger.log('s', "[>] [#{platform}] '#{@vm_name}' net:ssh jruby error, try again for 300s (#{retries}/30): #{e}")
sleep 10
retry if (retries += 1) < 30
rescue StandardError => e
@logger.log('s', "[>] [#{platform}] '#{@vm_name}' net:ssh other error, skipping aws_setup: #{e}")
puts e.backtrace
end end
end end
# Configure the aws host by enabling root and setting the hostname # Configure the aws host by enabling root and setting the hostname
# @param host [String] the internal dns name of the instance # @param host [String] the internal dns name of the instance
def self.configure_host(host, platform, ssh) def configure_host(host, platform, ssh)
ssh.exec!('sudo cp -r .ssh /root/.') ssh.exec!('sudo cp -r .ssh /root/.')
ssh.exec!("sudo sed -ri 's/^#?PermitRootLogin.*/PermitRootLogin yes/' /etc/ssh/sshd_config") ssh.exec!("sudo sed -ri 's/^#?PermitRootLogin.*/PermitRootLogin yes/' /etc/ssh/sshd_config")
ssh.exec!("sudo hostname #{host}") ssh.exec!("sudo hostname #{host}")
@ -69,7 +85,7 @@ module Vmpooler
sync_root_keys(host, platform) sync_root_keys(host, platform)
end end
def self.restart_sshd(host, platform, ssh) def restart_sshd(host, platform, ssh)
ssh.open_channel do |channel| ssh.open_channel do |channel|
channel.request_pty do |ch, success| channel.request_pty do |ch, success|
raise "can't get pty request" unless success raise "can't get pty request" unless success
@ -88,7 +104,7 @@ module Vmpooler
ssh.loop ssh.loop
end end
def self.sync_root_keys(host, _platform) def sync_root_keys(host, _platform)
return if ROOT_KEYS_SCRIPT.nil? return if ROOT_KEYS_SCRIPT.nil?
user = 'root' user = 'root'
@ -101,7 +117,7 @@ module Vmpooler
# issue when using net ssh 6.1.0 with jruby # issue when using net ssh 6.1.0 with jruby
# https://github.com/jruby/jruby-openssl/issues/105 # https://github.com/jruby/jruby-openssl/issues/105
# this will turn off some algos that match /^ecd(sa|h)-sha2/ # this will turn off some algos that match /^ecd(sa|h)-sha2/
def self.netssh_jruby_workaround def netssh_jruby_workaround
Net::SSH::Transport::Algorithms::ALGORITHMS.each_value { |algs| algs.reject! { |a| a =~ /^ecd(sa|h)-sha2/ } } Net::SSH::Transport::Algorithms::ALGORITHMS.each_value { |algs| algs.reject! { |a| a =~ /^ecd(sa|h)-sha2/ } }
Net::SSH::KnownHosts::SUPPORTED_TYPE.reject! { |t| t =~ /^ecd(sa|h)-sha2/ } Net::SSH::KnownHosts::SUPPORTED_TYPE.reject! { |t| t =~ /^ecd(sa|h)-sha2/ }
end end

View file

@ -31,6 +31,7 @@ module Vmpooler
# The default connection pool timeout should be quite large - 60 seconds # The default connection pool timeout should be quite large - 60 seconds
connpool_timeout = provider_config['connection_pool_timeout'].nil? ? 60 : provider_config['connection_pool_timeout'].to_i connpool_timeout = provider_config['connection_pool_timeout'].nil? ? 60 : provider_config['connection_pool_timeout'].to_i
logger.log('d', "[#{name}] ConnPool - Creating a connection pool of size #{connpool_size} with timeout #{connpool_timeout}") logger.log('d', "[#{name}] ConnPool - Creating a connection pool of size #{connpool_size} with timeout #{connpool_timeout}")
@logger = logger
@connection_pool = Vmpooler::PoolManager::GenericConnectionPool.new( @connection_pool = Vmpooler::PoolManager::GenericConnectionPool.new(
metrics: metrics, metrics: metrics,
connpool_type: 'provider_connection_pool', connpool_type: 'provider_connection_pool',
@ -204,11 +205,11 @@ module Vmpooler
value: pool_name value: pool_name
}, },
{ {
key: 'lifetime', key: 'lifetime', # required by AWS reaper
value: get_current_lifetime(new_vmname) value: get_current_lifetime(new_vmname)
}, },
{ {
key: 'created_by', key: 'created_by', # required by AWS reaper
value: get_current_user(new_vmname) value: get_current_user(new_vmname)
}, },
{ {
@ -216,14 +217,17 @@ module Vmpooler
value: get_current_job_url(new_vmname) value: get_current_job_url(new_vmname)
}, },
{ {
key: 'organization', key: 'organization', # required by AWS reaper
value: 'engineering' value: 'engineering'
}, },
{ {
key: 'portfolio', key: 'portfolio', # required by AWS reaper
value: 'ds-ci' value: 'ds-ci'
},
{
key: 'Name',
value: new_vmname
} }
] ]
} }
] ]
@ -247,16 +251,24 @@ module Vmpooler
batch_instance = connection.create_instances(config) batch_instance = connection.create_instances(config)
instance_id = batch_instance.first.instance_id instance_id = batch_instance.first.instance_id
connection.client.wait_until(:instance_running, { instance_ids: [instance_id] }) connection.client.wait_until(:instance_running, { instance_ids: [instance_id] })
@logger.log('s', "[>] [#{pool_name}] '#{new_vmname}' instance running")
### System status checks
# This check verifies that your instance is reachable. Amazon EC2 tests that network packets can get to your instance.
### Instance status checks
# This check verifies that your instance's operating system is accepting traffic.
connection.client.wait_until(:instance_status_ok, { instance_ids: [instance_id] })
@logger.log('s', "[>] [#{pool_name}] '#{new_vmname}' instance ready to accept traffic")
created_instance = get_vm(pool_name, new_vmname) created_instance = get_vm(pool_name, new_vmname)
# extra setup steps # extra setup steps
provision_node_aws(created_instance['private_dns_name'], pool_name) if to_provision(pool_name) == 'true' || to_provision(pool_name) == true provision_node_aws(created_instance['private_dns_name'], pool_name, new_vmname) if to_provision(pool_name) == 'true' || to_provision(pool_name) == true
created_instance created_instance
end end
def provision_node_aws(vm, pool_name) def provision_node_aws(vm, pool_name, new_vmname)
AwsSetup.setup_node_by_ssh(vm, pool_name) aws_setup = AwsSetup.new(@logger, new_vmname)
aws_setup.setup_node_by_ssh(vm, pool_name)
end end
def get_block_device_mappings(image_id, volume_size) def get_block_device_mappings(image_id, volume_size)
@ -373,13 +385,14 @@ module Vmpooler
def vm_ready?(pool_name, vm_name) def vm_ready?(pool_name, vm_name)
begin begin
# TODO: we could use a healthcheck resource attached to instance # TODO: we could use a healthcheck resource attached to instance
domain_set = domain || global_config[:config]['domain'] domain_set = domain
if domain_set.nil? if domain_set.nil?
vm_ip = get_vm(pool_name, vm_name)['private_ip_address'] vm_ip = get_vm(pool_name, vm_name)['private_dns_name']
vm_name = vm_ip unless vm_ip.nil? vm_name = vm_ip unless vm_ip.nil?
end end
open_socket(vm_name, domain_set) open_socket(vm_name, domain_set)
rescue StandardError => _e rescue StandardError => e
@logger.log('s', "[!] [#{pool_name}] '#{vm_name}' instance cannot be reached by vmpooler on tcp port 22; #{e}")
return false return false
end end
true true
@ -430,10 +443,11 @@ module Vmpooler
end end
end end
# returns lifetime in hours in the format Xh defaults to 1h
def get_current_lifetime(vm_name) def get_current_lifetime(vm_name)
@redis.with_metrics do |redis| @redis.with_metrics do |redis|
lifetime = redis.hget("vmpooler__vm__#{vm_name}", 'lifetime') || '1h' lifetime = redis.hget("vmpooler__vm__#{vm_name}", 'lifetime') || '1'
return lifetime return "#{lifetime}h"
end end
end end
@ -511,7 +525,7 @@ module Vmpooler
def debug_logger(message, send_to_upstream: false) def debug_logger(message, send_to_upstream: false)
# the default logger is simple and does not enforce debug levels (the first argument) # the default logger is simple and does not enforce debug levels (the first argument)
puts message if ENV['DEBUG_FLAG'] puts message if ENV['DEBUG_FLAG']
logger.log('[g]', message) if send_to_upstream @logger.log('[g]', message) if send_to_upstream
end end
end end
end end

View file

@ -10,7 +10,9 @@ end
# Mock an object which represents a Logger. This stops the proliferation # Mock an object which represents a Logger. This stops the proliferation
# of allow(logger).to .... expectations in tests. # of allow(logger).to .... expectations in tests.
class MockLogger class MockLogger
def log(_level, string); end def log(_level, string);
puts "#{string}"
end
end end
def expect_json(ok = true, http = 200) def expect_json(ok = true, http = 200)

View file

@ -26,7 +26,7 @@ Gem::Specification.new do |s|
s.add_development_dependency 'pry' s.add_development_dependency 'pry'
s.add_development_dependency 'rack-test', '>= 0.6' s.add_development_dependency 'rack-test', '>= 0.6'
s.add_development_dependency 'rspec', '>= 3.2' s.add_development_dependency 'rspec', '>= 3.2'
s.add_development_dependency 'rubocop', '~> 1.1.0' s.add_development_dependency 'rubocop', '~> 1.28.2'
s.add_development_dependency 'simplecov', '>= 0.11.2' s.add_development_dependency 'simplecov', '>= 0.11.2'
s.add_development_dependency 'thor', '~> 1.0', '>= 1.0.1' s.add_development_dependency 'thor', '~> 1.0', '>= 1.0.1'
s.add_development_dependency 'yarjuf', '>= 2.0' s.add_development_dependency 'yarjuf', '>= 2.0'