Merge pull request #6 from puppetlabs/dio-3163

(DIO-3163) Code improvements after initial testing in vmpooler
This commit is contained in:
Samuel 2022-07-26 12:17:12 -05:00 committed by GitHub
commit 13d0de6dc0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 60 additions and 29 deletions

View file

@ -20,10 +20,10 @@ jobs:
draft: false
prerelease: false
generateReleaseNotes: true
- name: Install Ruby 2.5.8
- name: Install Ruby jruby-9.3.6.0
uses: ruby/setup-ruby@v1
with:
ruby-version: '2.5.8'
ruby-version: 'jruby-9.3.6.0'
- name: Build gem
run: gem build *.gemspec
- name: Publish gem

View file

@ -18,7 +18,7 @@ jobs:
strategy:
matrix:
ruby-version:
- '2.5.8'
- 'jruby-9.3.6.0'
steps:
- uses: actions/checkout@v2
- name: Set up Ruby
@ -34,8 +34,7 @@ jobs:
strategy:
matrix:
ruby-version:
- '2.5.8'
- 'jruby-9.2.12.0'
- 'jruby-9.3.6.0'
steps:
- uses: actions/checkout@v2
- name: Set up Ruby

View file

@ -10,10 +10,19 @@ module Vmpooler
ROOT_KEYS_SCRIPT = ENV['ROOT_KEYS_SCRIPT']
ROOT_KEYS_SYNC_CMD = "curl -k -o - -L #{ROOT_KEYS_SCRIPT} | %s"
def self.setup_node_by_ssh(host, platform)
def initialize(logger, new_vmname)
@logger = logger
@key_file = ENV['AWS_KEY_FILE_LOCATION']
@vm_name = new_vmname
end
def setup_node_by_ssh(host, platform)
conn = check_ssh_accepting_connections(host, platform)
return unless conn
@logger.log('s', "[>] [#{platform}] '#{@vm_name}' net:ssh connected")
configure_host(host, platform, conn)
@logger.log('s', "[>] [#{platform}] '#{@vm_name}' configured")
end
# For an Amazon Linux AMI, the user name is ec2-user.
@ -30,7 +39,7 @@ module Vmpooler
#
# For an Ubuntu AMI, the user name is ubuntu.
def self.get_user(platform)
def get_user(platform)
if platform =~ /centos/
'centos'
elsif platform =~ /ubuntu/
@ -42,22 +51,29 @@ module Vmpooler
end
end
def self.check_ssh_accepting_connections(host, platform)
def check_ssh_accepting_connections(host, platform)
retries = 0
begin
user = get_user(platform)
netssh_jruby_workaround
Net::SSH.start(host, user, keys: @key_file, timeout: 10)
rescue Net::SSH::ConnectionTimeout, Errno::ECONNREFUSED => e
puts "Requested instances do not have sshd ready yet, try again: #{e}"
@logger.log('s', "[>] [#{platform}] '#{@vm_name}' net:ssh requested instances do not have sshd ready yet, try again for 300s (#{retries}/300): #{e}")
sleep 1
retry if (retries += 1) < 300
rescue Errno::EBADF => e
@logger.log('s', "[>] [#{platform}] '#{@vm_name}' net:ssh jruby error, try again for 300s (#{retries}/30): #{e}")
sleep 10
retry if (retries += 1) < 30
rescue StandardError => e
@logger.log('s', "[>] [#{platform}] '#{@vm_name}' net:ssh other error, skipping aws_setup: #{e}")
puts e.backtrace
end
end
# Configure the aws host by enabling root and setting the hostname
# @param host [String] the internal dns name of the instance
def self.configure_host(host, platform, ssh)
def configure_host(host, platform, ssh)
ssh.exec!('sudo cp -r .ssh /root/.')
ssh.exec!("sudo sed -ri 's/^#?PermitRootLogin.*/PermitRootLogin yes/' /etc/ssh/sshd_config")
ssh.exec!("sudo hostname #{host}")
@ -69,7 +85,7 @@ module Vmpooler
sync_root_keys(host, platform)
end
def self.restart_sshd(host, platform, ssh)
def restart_sshd(host, platform, ssh)
ssh.open_channel do |channel|
channel.request_pty do |ch, success|
raise "can't get pty request" unless success
@ -88,7 +104,7 @@ module Vmpooler
ssh.loop
end
def self.sync_root_keys(host, _platform)
def sync_root_keys(host, _platform)
return if ROOT_KEYS_SCRIPT.nil?
user = 'root'
@ -101,7 +117,7 @@ module Vmpooler
# issue when using net ssh 6.1.0 with jruby
# https://github.com/jruby/jruby-openssl/issues/105
# this will turn off some algos that match /^ecd(sa|h)-sha2/
def self.netssh_jruby_workaround
def netssh_jruby_workaround
Net::SSH::Transport::Algorithms::ALGORITHMS.each_value { |algs| algs.reject! { |a| a =~ /^ecd(sa|h)-sha2/ } }
Net::SSH::KnownHosts::SUPPORTED_TYPE.reject! { |t| t =~ /^ecd(sa|h)-sha2/ }
end

View file

@ -31,6 +31,7 @@ module Vmpooler
# The default connection pool timeout should be quite large - 60 seconds
connpool_timeout = provider_config['connection_pool_timeout'].nil? ? 60 : provider_config['connection_pool_timeout'].to_i
logger.log('d', "[#{name}] ConnPool - Creating a connection pool of size #{connpool_size} with timeout #{connpool_timeout}")
@logger = logger
@connection_pool = Vmpooler::PoolManager::GenericConnectionPool.new(
metrics: metrics,
connpool_type: 'provider_connection_pool',
@ -204,11 +205,11 @@ module Vmpooler
value: pool_name
},
{
key: 'lifetime',
key: 'lifetime', # required by AWS reaper
value: get_current_lifetime(new_vmname)
},
{
key: 'created_by',
key: 'created_by', # required by AWS reaper
value: get_current_user(new_vmname)
},
{
@ -216,14 +217,17 @@ module Vmpooler
value: get_current_job_url(new_vmname)
},
{
key: 'organization',
key: 'organization', # required by AWS reaper
value: 'engineering'
},
{
key: 'portfolio',
key: 'portfolio', # required by AWS reaper
value: 'ds-ci'
},
{
key: 'Name',
value: new_vmname
}
]
}
]
@ -247,16 +251,24 @@ module Vmpooler
batch_instance = connection.create_instances(config)
instance_id = batch_instance.first.instance_id
connection.client.wait_until(:instance_running, { instance_ids: [instance_id] })
@logger.log('s', "[>] [#{pool_name}] '#{new_vmname}' instance running")
### System status checks
# This check verifies that your instance is reachable. Amazon EC2 tests that network packets can get to your instance.
### Instance status checks
# This check verifies that your instance's operating system is accepting traffic.
connection.client.wait_until(:instance_status_ok, { instance_ids: [instance_id] })
@logger.log('s', "[>] [#{pool_name}] '#{new_vmname}' instance ready to accept traffic")
created_instance = get_vm(pool_name, new_vmname)
# extra setup steps
provision_node_aws(created_instance['private_dns_name'], pool_name) if to_provision(pool_name) == 'true' || to_provision(pool_name) == true
provision_node_aws(created_instance['private_dns_name'], pool_name, new_vmname) if to_provision(pool_name) == 'true' || to_provision(pool_name) == true
created_instance
end
def provision_node_aws(vm, pool_name)
AwsSetup.setup_node_by_ssh(vm, pool_name)
def provision_node_aws(vm, pool_name, new_vmname)
aws_setup = AwsSetup.new(@logger, new_vmname)
aws_setup.setup_node_by_ssh(vm, pool_name)
end
def get_block_device_mappings(image_id, volume_size)
@ -373,13 +385,14 @@ module Vmpooler
def vm_ready?(pool_name, vm_name)
begin
# TODO: we could use a healthcheck resource attached to instance
domain_set = domain || global_config[:config]['domain']
domain_set = domain
if domain_set.nil?
vm_ip = get_vm(pool_name, vm_name)['private_ip_address']
vm_ip = get_vm(pool_name, vm_name)['private_dns_name']
vm_name = vm_ip unless vm_ip.nil?
end
open_socket(vm_name, domain_set)
rescue StandardError => _e
rescue StandardError => e
@logger.log('s', "[!] [#{pool_name}] '#{vm_name}' instance cannot be reached by vmpooler on tcp port 22; #{e}")
return false
end
true
@ -430,10 +443,11 @@ module Vmpooler
end
end
# returns lifetime in hours in the format Xh defaults to 1h
def get_current_lifetime(vm_name)
@redis.with_metrics do |redis|
lifetime = redis.hget("vmpooler__vm__#{vm_name}", 'lifetime') || '1h'
return lifetime
lifetime = redis.hget("vmpooler__vm__#{vm_name}", 'lifetime') || '1'
return "#{lifetime}h"
end
end
@ -511,7 +525,7 @@ module Vmpooler
def debug_logger(message, send_to_upstream: false)
# the default logger is simple and does not enforce debug levels (the first argument)
puts message if ENV['DEBUG_FLAG']
logger.log('[g]', message) if send_to_upstream
@logger.log('[g]', message) if send_to_upstream
end
end
end

View file

@ -10,7 +10,9 @@ end
# Mock an object which represents a Logger. This stops the proliferation
# of allow(logger).to .... expectations in tests.
class MockLogger
def log(_level, string); end
def log(_level, string);
puts "#{string}"
end
end
def expect_json(ok = true, http = 200)

View file

@ -26,7 +26,7 @@ Gem::Specification.new do |s|
s.add_development_dependency 'pry'
s.add_development_dependency 'rack-test', '>= 0.6'
s.add_development_dependency 'rspec', '>= 3.2'
s.add_development_dependency 'rubocop', '~> 1.1.0'
s.add_development_dependency 'rubocop', '~> 1.28.2'
s.add_development_dependency 'simplecov', '>= 0.11.2'
s.add_development_dependency 'thor', '~> 1.0', '>= 1.0.1'
s.add_development_dependency 'yarjuf', '>= 2.0'