diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 17ec95b..e0538bf 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -20,10 +20,10 @@ jobs: draft: false prerelease: false generateReleaseNotes: true - - name: Install Ruby 2.5.8 + - name: Install Ruby jruby-9.3.6.0 uses: ruby/setup-ruby@v1 with: - ruby-version: '2.5.8' + ruby-version: 'jruby-9.3.6.0' - name: Build gem run: gem build *.gemspec - name: Publish gem diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index 501403f..068495e 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -18,7 +18,7 @@ jobs: strategy: matrix: ruby-version: - - '2.5.8' + - 'jruby-9.3.6.0' steps: - uses: actions/checkout@v2 - name: Set up Ruby @@ -34,8 +34,7 @@ jobs: strategy: matrix: ruby-version: - - '2.5.8' - - 'jruby-9.2.12.0' + - 'jruby-9.3.6.0' steps: - uses: actions/checkout@v2 - name: Set up Ruby diff --git a/lib/vmpooler/aws_setup.rb b/lib/vmpooler/aws_setup.rb index 90a0825..8077faf 100644 --- a/lib/vmpooler/aws_setup.rb +++ b/lib/vmpooler/aws_setup.rb @@ -10,10 +10,19 @@ module Vmpooler ROOT_KEYS_SCRIPT = ENV['ROOT_KEYS_SCRIPT'] ROOT_KEYS_SYNC_CMD = "curl -k -o - -L #{ROOT_KEYS_SCRIPT} | %s" - def self.setup_node_by_ssh(host, platform) + def initialize(logger, new_vmname) + @logger = logger @key_file = ENV['AWS_KEY_FILE_LOCATION'] + @vm_name = new_vmname + end + + def setup_node_by_ssh(host, platform) conn = check_ssh_accepting_connections(host, platform) + return unless conn + + @logger.log('s', "[>] [#{platform}] '#{@vm_name}' net:ssh connected") configure_host(host, platform, conn) + @logger.log('s', "[>] [#{platform}] '#{@vm_name}' configured") end # For an Amazon Linux AMI, the user name is ec2-user. @@ -30,7 +39,7 @@ module Vmpooler # # For an Ubuntu AMI, the user name is ubuntu. - def self.get_user(platform) + def get_user(platform) if platform =~ /centos/ 'centos' elsif platform =~ /ubuntu/ @@ -42,22 +51,29 @@ module Vmpooler end end - def self.check_ssh_accepting_connections(host, platform) + def check_ssh_accepting_connections(host, platform) retries = 0 begin user = get_user(platform) netssh_jruby_workaround Net::SSH.start(host, user, keys: @key_file, timeout: 10) rescue Net::SSH::ConnectionTimeout, Errno::ECONNREFUSED => e - puts "Requested instances do not have sshd ready yet, try again: #{e}" + @logger.log('s', "[>] [#{platform}] '#{@vm_name}' net:ssh requested instances do not have sshd ready yet, try again for 300s (#{retries}/300): #{e}") sleep 1 retry if (retries += 1) < 300 + rescue Errno::EBADF => e + @logger.log('s', "[>] [#{platform}] '#{@vm_name}' net:ssh jruby error, try again for 300s (#{retries}/30): #{e}") + sleep 10 + retry if (retries += 1) < 30 + rescue StandardError => e + @logger.log('s', "[>] [#{platform}] '#{@vm_name}' net:ssh other error, skipping aws_setup: #{e}") + puts e.backtrace end end # Configure the aws host by enabling root and setting the hostname # @param host [String] the internal dns name of the instance - def self.configure_host(host, platform, ssh) + def configure_host(host, platform, ssh) ssh.exec!('sudo cp -r .ssh /root/.') ssh.exec!("sudo sed -ri 's/^#?PermitRootLogin.*/PermitRootLogin yes/' /etc/ssh/sshd_config") ssh.exec!("sudo hostname #{host}") @@ -69,7 +85,7 @@ module Vmpooler sync_root_keys(host, platform) end - def self.restart_sshd(host, platform, ssh) + def restart_sshd(host, platform, ssh) ssh.open_channel do |channel| channel.request_pty do |ch, success| raise "can't get pty request" unless success @@ -88,7 +104,7 @@ module Vmpooler ssh.loop end - def self.sync_root_keys(host, _platform) + def sync_root_keys(host, _platform) return if ROOT_KEYS_SCRIPT.nil? user = 'root' @@ -101,7 +117,7 @@ module Vmpooler # issue when using net ssh 6.1.0 with jruby # https://github.com/jruby/jruby-openssl/issues/105 # this will turn off some algos that match /^ecd(sa|h)-sha2/ - def self.netssh_jruby_workaround + def netssh_jruby_workaround Net::SSH::Transport::Algorithms::ALGORITHMS.each_value { |algs| algs.reject! { |a| a =~ /^ecd(sa|h)-sha2/ } } Net::SSH::KnownHosts::SUPPORTED_TYPE.reject! { |t| t =~ /^ecd(sa|h)-sha2/ } end diff --git a/lib/vmpooler/providers/ec2.rb b/lib/vmpooler/providers/ec2.rb index 4a7c26c..7ef5e36 100644 --- a/lib/vmpooler/providers/ec2.rb +++ b/lib/vmpooler/providers/ec2.rb @@ -31,6 +31,7 @@ module Vmpooler # The default connection pool timeout should be quite large - 60 seconds connpool_timeout = provider_config['connection_pool_timeout'].nil? ? 60 : provider_config['connection_pool_timeout'].to_i logger.log('d', "[#{name}] ConnPool - Creating a connection pool of size #{connpool_size} with timeout #{connpool_timeout}") + @logger = logger @connection_pool = Vmpooler::PoolManager::GenericConnectionPool.new( metrics: metrics, connpool_type: 'provider_connection_pool', @@ -204,11 +205,11 @@ module Vmpooler value: pool_name }, { - key: 'lifetime', + key: 'lifetime', # required by AWS reaper value: get_current_lifetime(new_vmname) }, { - key: 'created_by', + key: 'created_by', # required by AWS reaper value: get_current_user(new_vmname) }, { @@ -216,14 +217,17 @@ module Vmpooler value: get_current_job_url(new_vmname) }, { - key: 'organization', + key: 'organization', # required by AWS reaper value: 'engineering' }, { - key: 'portfolio', + key: 'portfolio', # required by AWS reaper value: 'ds-ci' + }, + { + key: 'Name', + value: new_vmname } - ] } ] @@ -247,16 +251,24 @@ module Vmpooler batch_instance = connection.create_instances(config) instance_id = batch_instance.first.instance_id connection.client.wait_until(:instance_running, { instance_ids: [instance_id] }) + @logger.log('s', "[>] [#{pool_name}] '#{new_vmname}' instance running") + ### System status checks + # This check verifies that your instance is reachable. Amazon EC2 tests that network packets can get to your instance. + ### Instance status checks + # This check verifies that your instance's operating system is accepting traffic. + connection.client.wait_until(:instance_status_ok, { instance_ids: [instance_id] }) + @logger.log('s', "[>] [#{pool_name}] '#{new_vmname}' instance ready to accept traffic") created_instance = get_vm(pool_name, new_vmname) # extra setup steps - provision_node_aws(created_instance['private_dns_name'], pool_name) if to_provision(pool_name) == 'true' || to_provision(pool_name) == true + provision_node_aws(created_instance['private_dns_name'], pool_name, new_vmname) if to_provision(pool_name) == 'true' || to_provision(pool_name) == true created_instance end - def provision_node_aws(vm, pool_name) - AwsSetup.setup_node_by_ssh(vm, pool_name) + def provision_node_aws(vm, pool_name, new_vmname) + aws_setup = AwsSetup.new(@logger, new_vmname) + aws_setup.setup_node_by_ssh(vm, pool_name) end def get_block_device_mappings(image_id, volume_size) @@ -373,13 +385,14 @@ module Vmpooler def vm_ready?(pool_name, vm_name) begin # TODO: we could use a healthcheck resource attached to instance - domain_set = domain || global_config[:config]['domain'] + domain_set = domain if domain_set.nil? - vm_ip = get_vm(pool_name, vm_name)['private_ip_address'] + vm_ip = get_vm(pool_name, vm_name)['private_dns_name'] vm_name = vm_ip unless vm_ip.nil? end open_socket(vm_name, domain_set) - rescue StandardError => _e + rescue StandardError => e + @logger.log('s', "[!] [#{pool_name}] '#{vm_name}' instance cannot be reached by vmpooler on tcp port 22; #{e}") return false end true @@ -430,10 +443,11 @@ module Vmpooler end end + # returns lifetime in hours in the format Xh defaults to 1h def get_current_lifetime(vm_name) @redis.with_metrics do |redis| - lifetime = redis.hget("vmpooler__vm__#{vm_name}", 'lifetime') || '1h' - return lifetime + lifetime = redis.hget("vmpooler__vm__#{vm_name}", 'lifetime') || '1' + return "#{lifetime}h" end end @@ -511,7 +525,7 @@ module Vmpooler def debug_logger(message, send_to_upstream: false) # the default logger is simple and does not enforce debug levels (the first argument) puts message if ENV['DEBUG_FLAG'] - logger.log('[g]', message) if send_to_upstream + @logger.log('[g]', message) if send_to_upstream end end end diff --git a/spec/helpers.rb b/spec/helpers.rb index 4b2dff6..eefcb04 100644 --- a/spec/helpers.rb +++ b/spec/helpers.rb @@ -10,7 +10,9 @@ end # Mock an object which represents a Logger. This stops the proliferation # of allow(logger).to .... expectations in tests. class MockLogger - def log(_level, string); end + def log(_level, string); + puts "#{string}" + end end def expect_json(ok = true, http = 200) diff --git a/vmpooler-provider-ec2.gemspec b/vmpooler-provider-ec2.gemspec index a569143..dd6986a 100644 --- a/vmpooler-provider-ec2.gemspec +++ b/vmpooler-provider-ec2.gemspec @@ -26,7 +26,7 @@ Gem::Specification.new do |s| s.add_development_dependency 'pry' s.add_development_dependency 'rack-test', '>= 0.6' s.add_development_dependency 'rspec', '>= 3.2' - s.add_development_dependency 'rubocop', '~> 1.1.0' + s.add_development_dependency 'rubocop', '~> 1.28.2' s.add_development_dependency 'simplecov', '>= 0.11.2' s.add_development_dependency 'thor', '~> 1.0', '>= 1.0.1' s.add_development_dependency 'yarjuf', '>= 2.0'