From cb955a1bedae2ec67cb44eaac10c184095a9f825 Mon Sep 17 00:00:00 2001 From: John O'Connor Date: Wed, 17 Jun 2020 22:32:56 +0100 Subject: [PATCH] (POOLER-177) Filter hostname from API Paths Use the example provided in the Ruby Client to provide a customised collector appropriate to log all calls to the API. The customised filtering is used to replace individual node names and templates for the /vm and request ID's for the /ondemand endpoints. This module was failing our rubocop checks so have updated it since it now forms part of vmpooler. Separate trapping for litmus jobs is also included so that they don't interfere with stats from the jenkins pipelines. --- lib/vmpooler.rb | 3 - lib/vmpooler/api.rb | 5 +- lib/vmpooler/api/v1.rb | 12 ++ .../metrics/promstats/collector_middleware.rb | 119 +++++++++++++++ lib/vmpooler/pool_manager.rb | 7 +- spec/unit/collector_middleware_spec.rb | 135 ++++++++++++++++++ spec/unit/generic_connection_pool_spec.rb | 2 +- 7 files changed, 277 insertions(+), 6 deletions(-) create mode 100644 lib/vmpooler/metrics/promstats/collector_middleware.rb create mode 100644 spec/unit/collector_middleware_spec.rb diff --git a/lib/vmpooler.rb b/lib/vmpooler.rb index 98b4bd6..20eb26b 100644 --- a/lib/vmpooler.rb +++ b/lib/vmpooler.rb @@ -15,9 +15,6 @@ module Vmpooler require 'timeout' require 'yaml' - require 'prometheus/middleware/collector' - require 'prometheus/middleware/exporter' - %w[api metrics logger pool_manager generic_connection_pool].each do |lib| require "vmpooler/#{lib}" end diff --git a/lib/vmpooler/api.rb b/lib/vmpooler/api.rb index 068f024..cf516a7 100644 --- a/lib/vmpooler/api.rb +++ b/lib/vmpooler/api.rb @@ -35,7 +35,10 @@ module Vmpooler # in the config file. metrics.setup_prometheus_metrics - use Prometheus::Middleware::Collector, metrics_prefix: "#{metrics.metrics_prefix}_http" + # Using customised collector that filters out hostnames on API paths + require 'vmpooler/metrics/promstats/collector_middleware' + require 'prometheus/middleware/exporter' + use Vmpooler::Metrics::Promstats::CollectorMiddleware, metrics_prefix: "#{metrics.metrics_prefix}_http" use Prometheus::Middleware::Exporter, path: metrics.endpoint end diff --git a/lib/vmpooler/api/v1.rb b/lib/vmpooler/api/v1.rb index 5ecb2b5..c5cb0bf 100644 --- a/lib/vmpooler/api/v1.rb +++ b/lib/vmpooler/api/v1.rb @@ -809,6 +809,7 @@ module Vmpooler post "#{api_prefix}/ondemandvm/?" do content_type :json + metrics.increment('api_vm.post.ondemand.requestid') need_token! if Vmpooler::API.settings.config[:auth] @@ -846,6 +847,7 @@ module Vmpooler post "#{api_prefix}/ondemandvm/:template/?" do content_type :json result = { 'ok' => false } + metrics.increment('api_vm.delete.ondemand.template') need_token! if Vmpooler::API.settings.config[:auth] @@ -872,6 +874,7 @@ module Vmpooler get "#{api_prefix}/ondemandvm/:requestid/?" do content_type :json + metrics.increment('api_vm.get.ondemand.request') status 404 result = check_ondemand_request(params[:requestid]) @@ -882,6 +885,7 @@ module Vmpooler delete "#{api_prefix}/ondemandvm/:requestid/?" do content_type :json need_token! if Vmpooler::API.settings.config[:auth] + metrics.increment('api_vm.delete.ondemand.request') status 404 result = delete_ondemand_request(params[:requestid]) @@ -892,6 +896,7 @@ module Vmpooler post "#{api_prefix}/vm/?" do content_type :json result = { 'ok' => false } + metrics.increment('api_vm.post.vm.checkout') payload = JSON.parse(request.body.read) @@ -1034,6 +1039,7 @@ module Vmpooler post "#{api_prefix}/vm/:template/?" do content_type :json result = { 'ok' => false } + metrics.increment('api_vm.get.vm.template') payload = extract_templates_from_query_params(params[:template]) @@ -1057,6 +1063,7 @@ module Vmpooler get "#{api_prefix}/vm/:hostname/?" do content_type :json + metrics.increment('api_vm.get.vm.hostname') result = {} @@ -1129,6 +1136,7 @@ module Vmpooler delete "#{api_prefix}/vm/:hostname/?" do content_type :json + metrics.increment('api_vm.delete.vm.hostname') result = {} @@ -1156,6 +1164,7 @@ module Vmpooler put "#{api_prefix}/vm/:hostname/?" do content_type :json + metrics.increment('api_vm.put.vm.modify') status 404 result = { 'ok' => false } @@ -1232,6 +1241,7 @@ module Vmpooler post "#{api_prefix}/vm/:hostname/disk/:size/?" do content_type :json + metrics.increment('api_vm.post.vm.disksize') need_token! if Vmpooler::API.settings.config[:auth] @@ -1255,6 +1265,7 @@ module Vmpooler post "#{api_prefix}/vm/:hostname/snapshot/?" do content_type :json + metrics.increment('api_vm.post.vm.snapshot') need_token! if Vmpooler::API.settings.config[:auth] @@ -1280,6 +1291,7 @@ module Vmpooler post "#{api_prefix}/vm/:hostname/snapshot/:snapshot/?" do content_type :json + metrics.increment('api_vm.post.vm.disksize') need_token! if Vmpooler::API.settings.config[:auth] diff --git a/lib/vmpooler/metrics/promstats/collector_middleware.rb b/lib/vmpooler/metrics/promstats/collector_middleware.rb new file mode 100644 index 0000000..c05c866 --- /dev/null +++ b/lib/vmpooler/metrics/promstats/collector_middleware.rb @@ -0,0 +1,119 @@ +# frozen_string_literal: true + +# This is an adapted Collector module for vmpooler based on the sample implementation +# available in the prometheus client_ruby library +# https://github.com/prometheus/client_ruby/blob/master/lib/prometheus/middleware/collector.rb +# +# The code was also failing Rubocop on PR check, so have addressed all the offenses. +# +# The method strip_ids_from_path has been adapted to add a match for hostnames in paths +# to replace with a single ":hostname" string to avoid proliferation of stat lines for +# each new vm hostname deleted, modified or otherwise queried. + +require 'benchmark' +require 'prometheus/client' +require 'vmpooler/logger' + +module Vmpooler + class Metrics + class Promstats + # CollectorMiddleware is an implementation of Rack Middleware customised + # for vmpooler use. + # + # By default metrics are registered on the global registry. Set the + # `:registry` option to use a custom registry. + # + # By default metrics all have the prefix "http_server". Set to something + # else if you like. + # + # The request counter metric is broken down by code, method and path by + # default. Set the `:counter_label_builder` option to use a custom label + # builder. + # + # The request duration metric is broken down by method and path by default. + # Set the `:duration_label_builder` option to use a custom label builder. + # + # Label Builder functions will receive a Rack env and a status code, and must + # return a hash with the labels for that request. They must also accept an empty + # env, and return a hash with the correct keys. This is necessary to initialize + # the metrics with the correct set of labels. + class CollectorMiddleware + attr_reader :app, :registry + + def initialize(app, options = {}) + @app = app + @registry = options[:registry] || Prometheus::Client.registry + @metrics_prefix = options[:metrics_prefix] || 'http_server' + + init_request_metrics + init_exception_metrics + end + + def call(env) # :nodoc: + trace(env) { @app.call(env) } + end + + protected + + def init_request_metrics + @requests = @registry.counter( + :"#{@metrics_prefix}_requests_total", + docstring: + 'The total number of HTTP requests handled by the Rack application.', + labels: %i[code method path] + ) + @durations = @registry.histogram( + :"#{@metrics_prefix}_request_duration_seconds", + docstring: 'The HTTP response duration of the Rack application.', + labels: %i[method path] + ) + end + + def init_exception_metrics + @exceptions = @registry.counter( + :"#{@metrics_prefix}_exceptions_total", + docstring: 'The total number of exceptions raised by the Rack application.', + labels: [:exception] + ) + end + + def trace(env) + response = nil + duration = Benchmark.realtime { response = yield } + record(env, response.first.to_s, duration) + response + rescue StandardError => e + @exceptions.increment(labels: { exception: e.class.name }) + raise + end + + def record(env, code, duration) + counter_labels = { + code: code, + method: env['REQUEST_METHOD'].downcase, + path: strip_ids_from_path(env['PATH_INFO']) + } + + duration_labels = { + method: env['REQUEST_METHOD'].downcase, + path: strip_ids_from_path(env['PATH_INFO']) + } + + @requests.increment(labels: counter_labels) + @durations.observe(duration, labels: duration_labels) + rescue # rubocop:disable Style/RescueStandardError + nil + end + + def strip_ids_from_path(path) + # Custom for /vm path - so we just collect aggrate stats for all usage along this one + # path. Custom counters are then added more specific endpoints in v1.rb + # Since we aren't parsing UID/GIDs as in the original example, these are removed. + path + .gsub(%r{/vm/.+$}, '/vm') + .gsub(%r{/ondemand/.+$}, '/ondemand') + end + end + end + end +end diff --git a/lib/vmpooler/pool_manager.rb b/lib/vmpooler/pool_manager.rb index cdd6f63..0d56fbc 100644 --- a/lib/vmpooler/pool_manager.rb +++ b/lib/vmpooler/pool_manager.rb @@ -496,7 +496,12 @@ module Vmpooler return unless jenkins_build_url - # TBD - Add Filter for Litmus here as well - to ignore for the moment. + if jenkins_build_url.include? 'litmus' + # Very simple filter for Litmus jobs - just count them coming through for the moment. + $metrics.increment("usage_litmus.#{user}.#{poolname}") + return + end + url_parts = jenkins_build_url.split('/')[2..-1] jenkins_instance = url_parts[0].gsub('.', '_') value_stream_parts = url_parts[2].split('_') diff --git a/spec/unit/collector_middleware_spec.rb b/spec/unit/collector_middleware_spec.rb new file mode 100644 index 0000000..671e118 --- /dev/null +++ b/spec/unit/collector_middleware_spec.rb @@ -0,0 +1,135 @@ + +require 'rack/test' +require 'vmpooler/metrics/promstats/collector_middleware' + + +describe Vmpooler::Metrics::Promstats::CollectorMiddleware do + include Rack::Test::Methods + + # Reset the data store + before do + Prometheus::Client.config.data_store = Prometheus::Client::DataStores::Synchronized.new + end + + let(:registry) do + Prometheus::Client::Registry.new + end + + let(:original_app) do + ->(_) { [200, { 'Content-Type' => 'text/html' }, ['OK']] } + end + + let!(:app) do + described_class.new(original_app, registry: registry) + end + + let(:dummy_error) { RuntimeError.new("Dummy error from tests") } + + it 'returns the app response' do + get '/foo' + + expect(last_response).to be_ok + expect(last_response.body).to eql('OK') + end + + it 'handles errors in the registry gracefully' do + counter = registry.get(:http_server_requests_total) + expect(counter).to receive(:increment).and_raise(dummy_error) + + get '/foo' + + expect(last_response).to be_ok + end + + it 'traces request information' do + expect(Benchmark).to receive(:realtime).and_yield.and_return(0.2) + + get '/foo' + + metric = :http_server_requests_total + labels = { method: 'get', path: '/foo', code: '200' } + expect(registry.get(metric).get(labels: labels)).to eql(1.0) + + metric = :http_server_request_duration_seconds + labels = { method: 'get', path: '/foo' } + expect(registry.get(metric).get(labels: labels)).to include("0.1" => 0, "0.25" => 1) + end + + it 'normalizes paths cotaining /vm by default' do + expect(Benchmark).to receive(:realtime).and_yield.and_return(0.3) + + get '/foo/vm/bar-mumble-flame' + + metric = :http_server_requests_total + labels = { method: 'get', path: '/foo/vm', code: '200' } + expect(registry.get(metric).get(labels: labels)).to eql(1.0) + + metric = :http_server_request_duration_seconds + labels = { method: 'get', path: '/foo/vm' } + expect(registry.get(metric).get(labels: labels)).to include("0.1" => 0, "0.5" => 1) + end + + it 'normalizes paths containing /ondemandvm by ' do + expect(Benchmark).to receive(:realtime).and_yield.and_return(0.3) + + get '/foo/ondemand/bar/fatman' + + metric = :http_server_requests_total + labels = { method: 'get', path: '/foo/ondemand', code: '200' } + expect(registry.get(metric).get(labels: labels)).to eql(1.0) + + metric = :http_server_request_duration_seconds + labels = { method: 'get', path: '/foo/ondemand' } + expect(registry.get(metric).get(labels: labels)).to include("0.1" => 0, "0.5" => 1) + end + + context 'when the app raises an exception' do + let(:original_app) do + lambda do |env| + raise dummy_error if env['PATH_INFO'] == '/broken' + + [200, { 'Content-Type' => 'text/html' }, ['OK']] + end + end + + before do + get '/foo' + end + + it 'traces exceptions' do + expect { get '/broken' }.to raise_error RuntimeError + + metric = :http_server_exceptions_total + labels = { exception: 'RuntimeError' } + expect(registry.get(metric).get(labels: labels)).to eql(1.0) + end + end + + context 'when provided a custom metrics_prefix' do + let!(:app) do + described_class.new( + original_app, + registry: registry, + metrics_prefix: 'lolrus', + ) + end + + it 'provides alternate metric names' do + expect( + registry.get(:lolrus_requests_total), + ).to be_a(Prometheus::Client::Counter) + expect( + registry.get(:lolrus_request_duration_seconds), + ).to be_a(Prometheus::Client::Histogram) + expect( + registry.get(:lolrus_exceptions_total), + ).to be_a(Prometheus::Client::Counter) + end + + it "doesn't register the default metrics" do + expect(registry.get(:http_server_requests_total)).to be(nil) + expect(registry.get(:http_server_request_duration_seconds)).to be(nil) + expect(registry.get(:http_server_exceptions_total)).to be(nil) + end + end +end diff --git a/spec/unit/generic_connection_pool_spec.rb b/spec/unit/generic_connection_pool_spec.rb index 350e6f5..5c24215 100644 --- a/spec/unit/generic_connection_pool_spec.rb +++ b/spec/unit/generic_connection_pool_spec.rb @@ -1,7 +1,7 @@ require 'spec_helper' describe 'GenericConnectionPool' do - let(:metrics) { Vmpooler::DummyStatsd.new } + let(:metrics) { Vmpooler::Metrics::DummyStatsd.new } let(:connpool_type) { 'test_connection_pool' } let(:connpool_provider) { 'testprovider' } let(:default_connpool_type) { 'connectionpool' }