(POOLER-177) Filter hostname from API Paths

Use the example provided in the Ruby Client to provide a customised collector appropriate to log all calls to the API. The customised filtering is used to replace individual node names and templates for the /vm and request ID's for the /ondemand endpoints. This module was failing our rubocop checks so have updated it since it now forms part of vmpooler. Separate trapping for litmus jobs is also included so that they don't interfere with stats from the jenkins pipelines.
2026-01-26 01:58:41 -05:00 · 2020-06-17 22:32:56 +01:00 · 2020-06-17 22:32:56 +01:00 · cb955a1bed
commit cb955a1bed
parent b6dcd77228
7 changed files with 277 additions and 6 deletions
--- a/lib/vmpooler.rb
+++ b/lib/vmpooler.rb
@ -15,9 +15,6 @@ module Vmpooler
  require 'timeout'
  require 'yaml'

-  require 'prometheus/middleware/collector'
-  require 'prometheus/middleware/exporter'
-
  %w[api metrics logger pool_manager generic_connection_pool].each do |lib|
    require "vmpooler/#{lib}"
  end
--- a/lib/vmpooler/api.rb
+++ b/lib/vmpooler/api.rb
@ -35,7 +35,10 @@ module Vmpooler
        # in the config file.
        metrics.setup_prometheus_metrics

-        use Prometheus::Middleware::Collector, metrics_prefix: "#{metrics.metrics_prefix}_http"
+        # Using customised collector that filters out hostnames on API paths
+        require 'vmpooler/metrics/promstats/collector_middleware'
+        require 'prometheus/middleware/exporter'
+        use Vmpooler::Metrics::Promstats::CollectorMiddleware, metrics_prefix: "#{metrics.metrics_prefix}_http"
        use Prometheus::Middleware::Exporter, path: metrics.endpoint
      end

--- a/lib/vmpooler/api/v1.rb
+++ b/lib/vmpooler/api/v1.rb
@ -809,6 +809,7 @@ module Vmpooler

    post "#{api_prefix}/ondemandvm/?" do
      content_type :json
+      metrics.increment('api_vm.post.ondemand.requestid')

      need_token! if Vmpooler::API.settings.config[:auth]

@ -846,6 +847,7 @@ module Vmpooler
    post "#{api_prefix}/ondemandvm/:template/?" do
      content_type :json
      result = { 'ok' => false }
+      metrics.increment('api_vm.delete.ondemand.template')

      need_token! if Vmpooler::API.settings.config[:auth]

@ -872,6 +874,7 @@ module Vmpooler

    get "#{api_prefix}/ondemandvm/:requestid/?" do
      content_type :json
+      metrics.increment('api_vm.get.ondemand.request')

      status 404
      result = check_ondemand_request(params[:requestid])
@ -882,6 +885,7 @@ module Vmpooler
    delete "#{api_prefix}/ondemandvm/:requestid/?" do
      content_type :json
      need_token! if Vmpooler::API.settings.config[:auth]
+      metrics.increment('api_vm.delete.ondemand.request')

      status 404
      result = delete_ondemand_request(params[:requestid])
@ -892,6 +896,7 @@ module Vmpooler
    post "#{api_prefix}/vm/?" do
      content_type :json
      result = { 'ok' => false }
+      metrics.increment('api_vm.post.vm.checkout')

      payload = JSON.parse(request.body.read)

@ -1034,6 +1039,7 @@ module Vmpooler
    post "#{api_prefix}/vm/:template/?" do
      content_type :json
      result = { 'ok' => false }
+      metrics.increment('api_vm.get.vm.template')

      payload = extract_templates_from_query_params(params[:template])

@ -1057,6 +1063,7 @@ module Vmpooler

    get "#{api_prefix}/vm/:hostname/?" do
      content_type :json
+      metrics.increment('api_vm.get.vm.hostname')

      result = {}

@ -1129,6 +1136,7 @@ module Vmpooler

    delete "#{api_prefix}/vm/:hostname/?" do
      content_type :json
+      metrics.increment('api_vm.delete.vm.hostname')

      result = {}

@ -1156,6 +1164,7 @@ module Vmpooler

    put "#{api_prefix}/vm/:hostname/?" do
      content_type :json
+      metrics.increment('api_vm.put.vm.modify')

      status 404
      result = { 'ok' => false }
@ -1232,6 +1241,7 @@ module Vmpooler

    post "#{api_prefix}/vm/:hostname/disk/:size/?" do
      content_type :json
+      metrics.increment('api_vm.post.vm.disksize')

      need_token! if Vmpooler::API.settings.config[:auth]

@ -1255,6 +1265,7 @@ module Vmpooler

    post "#{api_prefix}/vm/:hostname/snapshot/?" do
      content_type :json
+      metrics.increment('api_vm.post.vm.snapshot')

      need_token! if Vmpooler::API.settings.config[:auth]

@ -1280,6 +1291,7 @@ module Vmpooler

    post "#{api_prefix}/vm/:hostname/snapshot/:snapshot/?" do
      content_type :json
+      metrics.increment('api_vm.post.vm.disksize')

      need_token! if Vmpooler::API.settings.config[:auth]

--- a/lib/vmpooler/metrics/promstats/collector_middleware.rb
+++ b/lib/vmpooler/metrics/promstats/collector_middleware.rb
@ -0,0 +1,119 @@
+# frozen_string_literal: true
+
+# This is an adapted Collector module for vmpooler based on the sample implementation
+# available in the prometheus client_ruby library
+# https://github.com/prometheus/client_ruby/blob/master/lib/prometheus/middleware/collector.rb
+#
+# The code was also failing Rubocop on PR check, so have addressed all the offenses.
+#
+# The method strip_ids_from_path has been adapted to add a match for hostnames in paths
+# to replace with a single ":hostname" string to avoid proliferation of stat lines for
+# each new vm hostname deleted, modified or otherwise queried.
+
+require 'benchmark'
+require 'prometheus/client'
+require 'vmpooler/logger'
+
+module Vmpooler
+  class Metrics
+    class Promstats
+      # CollectorMiddleware is an implementation of Rack Middleware customised
+      # for vmpooler use.
+      #
+      # By default metrics are registered on the global registry. Set the
+      # `:registry` option to use a custom registry.
+      #
+      # By default metrics all have the prefix "http_server". Set to something
+      # else if you like.
+      #
+      # The request counter metric is broken down by code, method and path by
+      # default. Set the `:counter_label_builder` option to use a custom label
+      # builder.
+      #
+      # The request duration metric is broken down by method and path by default.
+      # Set the `:duration_label_builder` option to use a custom label builder.
+      #
+      # Label Builder functions will receive a Rack env and a status code, and must
+      # return a hash with the labels for that request. They must also accept an empty
+      # env, and return a hash with the correct keys. This is necessary to initialize
+      # the metrics with the correct set of labels.
+      class CollectorMiddleware
+        attr_reader :app, :registry
+
+        def initialize(app, options = {})
+          @app = app
+          @registry = options[:registry] || Prometheus::Client.registry
+          @metrics_prefix = options[:metrics_prefix] || 'http_server'
+
+          init_request_metrics
+          init_exception_metrics
+        end
+
+        def call(env) # :nodoc:
+          trace(env) { @app.call(env) }
+        end
+
+        protected
+
+        def init_request_metrics
+          @requests = @registry.counter(
+            :"#{@metrics_prefix}_requests_total",
+            docstring:
+              'The total number of HTTP requests handled by the Rack application.',
+            labels: %i[code method path]
+          )
+          @durations = @registry.histogram(
+            :"#{@metrics_prefix}_request_duration_seconds",
+            docstring: 'The HTTP response duration of the Rack application.',
+            labels: %i[method path]
+          )
+        end
+
+        def init_exception_metrics
+          @exceptions = @registry.counter(
+            :"#{@metrics_prefix}_exceptions_total",
+            docstring: 'The total number of exceptions raised by the Rack application.',
+            labels: [:exception]
+          )
+        end
+
+        def trace(env)
+          response = nil
+          duration = Benchmark.realtime { response = yield }
+          record(env, response.first.to_s, duration)
+          response
+        rescue StandardError => e
+          @exceptions.increment(labels: { exception: e.class.name })
+          raise
+        end
+
+        def record(env, code, duration)
+          counter_labels = {
+            code: code,
+            method: env['REQUEST_METHOD'].downcase,
+            path: strip_ids_from_path(env['PATH_INFO'])
+          }
+
+          duration_labels = {
+            method: env['REQUEST_METHOD'].downcase,
+            path: strip_ids_from_path(env['PATH_INFO'])
+          }
+
+          @requests.increment(labels: counter_labels)
+          @durations.observe(duration, labels: duration_labels)
+        rescue # rubocop:disable Style/RescueStandardError
+          nil
+        end
+
+        def strip_ids_from_path(path)
+          # Custom for /vm path - so we just collect aggrate stats for all usage along this one
+          # path. Custom counters are then added more specific endpoints in v1.rb
+          # Since we aren't parsing UID/GIDs as in the original example, these are removed.
+          path
+            .gsub(%r{/vm/.+$}, '/vm')
+            .gsub(%r{/ondemand/.+$}, '/ondemand')
+        end
+      end
+    end
+  end
+end
--- a/lib/vmpooler/pool_manager.rb
+++ b/lib/vmpooler/pool_manager.rb
@ -496,7 +496,12 @@ module Vmpooler

      return unless jenkins_build_url

-      # TBD - Add Filter for Litmus here as well - to ignore for the moment.
+      if jenkins_build_url.include? 'litmus'
+        # Very simple filter for Litmus jobs - just count them coming through for the moment.
+        $metrics.increment("usage_litmus.#{user}.#{poolname}")
+        return
+      end
+
      url_parts = jenkins_build_url.split('/')[2..-1]
      jenkins_instance = url_parts[0].gsub('.', '_')
      value_stream_parts = url_parts[2].split('_')
--- a/spec/unit/collector_middleware_spec.rb
+++ b/spec/unit/collector_middleware_spec.rb
@ -0,0 +1,135 @@
+
+require 'rack/test'
+require 'vmpooler/metrics/promstats/collector_middleware'
+
+
+describe Vmpooler::Metrics::Promstats::CollectorMiddleware do
+  include Rack::Test::Methods
+
+  # Reset the data store
+  before do
+    Prometheus::Client.config.data_store = Prometheus::Client::DataStores::Synchronized.new
+  end
+
+  let(:registry) do
+    Prometheus::Client::Registry.new
+  end
+
+  let(:original_app) do
+    ->(_) { [200, { 'Content-Type' => 'text/html' }, ['OK']] }
+  end
+
+  let!(:app) do
+    described_class.new(original_app, registry: registry)
+  end
+
+  let(:dummy_error) { RuntimeError.new("Dummy error from tests") }
+
+  it 'returns the app response' do
+    get '/foo'
+
+    expect(last_response).to be_ok
+    expect(last_response.body).to eql('OK')
+  end
+
+  it 'handles errors in the registry gracefully' do
+    counter = registry.get(:http_server_requests_total)
+    expect(counter).to receive(:increment).and_raise(dummy_error)
+
+    get '/foo'
+
+    expect(last_response).to be_ok
+  end
+
+  it 'traces request information' do
+    expect(Benchmark).to receive(:realtime).and_yield.and_return(0.2)
+
+    get '/foo'
+
+    metric = :http_server_requests_total
+    labels = { method: 'get', path: '/foo', code: '200' }
+    expect(registry.get(metric).get(labels: labels)).to eql(1.0)
+
+    metric = :http_server_request_duration_seconds
+    labels = { method: 'get', path: '/foo' }
+    expect(registry.get(metric).get(labels: labels)).to include("0.1" => 0, "0.25" => 1)
+  end
+
+  it 'normalizes paths cotaining /vm by default' do
+    expect(Benchmark).to receive(:realtime).and_yield.and_return(0.3)
+
+    get '/foo/vm/bar-mumble-flame'
+
+    metric = :http_server_requests_total
+    labels = { method: 'get', path: '/foo/vm', code: '200' }
+    expect(registry.get(metric).get(labels: labels)).to eql(1.0)
+
+    metric = :http_server_request_duration_seconds
+    labels = { method: 'get', path: '/foo/vm' }
+    expect(registry.get(metric).get(labels: labels)).to include("0.1" => 0, "0.5" => 1)
+  end
+
+  it 'normalizes paths containing /ondemandvm by ' do
+    expect(Benchmark).to receive(:realtime).and_yield.and_return(0.3)
+
+    get '/foo/ondemand/bar/fatman'
+
+    metric = :http_server_requests_total
+    labels = { method: 'get', path: '/foo/ondemand', code: '200' }
+    expect(registry.get(metric).get(labels: labels)).to eql(1.0)
+
+    metric = :http_server_request_duration_seconds
+    labels = { method: 'get', path: '/foo/ondemand' }
+    expect(registry.get(metric).get(labels: labels)).to include("0.1" => 0, "0.5" => 1)
+  end
+
+  context 'when the app raises an exception' do
+    let(:original_app) do
+      lambda do |env|
+        raise dummy_error if env['PATH_INFO'] == '/broken'
+
+        [200, { 'Content-Type' => 'text/html' }, ['OK']]
+      end
+    end
+
+    before do
+      get '/foo'
+    end
+
+    it 'traces exceptions' do
+      expect { get '/broken' }.to raise_error RuntimeError
+
+      metric = :http_server_exceptions_total
+      labels = { exception: 'RuntimeError' }
+      expect(registry.get(metric).get(labels: labels)).to eql(1.0)
+    end
+  end
+
+  context 'when provided a custom metrics_prefix' do
+    let!(:app) do
+      described_class.new(
+        original_app,
+        registry: registry,
+        metrics_prefix: 'lolrus',
+      )
+    end
+
+    it 'provides alternate metric names' do
+      expect(
+        registry.get(:lolrus_requests_total),
+      ).to be_a(Prometheus::Client::Counter)
+      expect(
+        registry.get(:lolrus_request_duration_seconds),
+      ).to be_a(Prometheus::Client::Histogram)
+      expect(
+        registry.get(:lolrus_exceptions_total),
+      ).to be_a(Prometheus::Client::Counter)
+    end
+
+    it "doesn't register the default metrics" do
+      expect(registry.get(:http_server_requests_total)).to be(nil)
+      expect(registry.get(:http_server_request_duration_seconds)).to be(nil)
+      expect(registry.get(:http_server_exceptions_total)).to be(nil)
+    end
+  end
+end
--- a/spec/unit/generic_connection_pool_spec.rb
+++ b/spec/unit/generic_connection_pool_spec.rb
@ -1,7 +1,7 @@
 require 'spec_helper'

 describe 'GenericConnectionPool' do
-  let(:metrics) { Vmpooler::DummyStatsd.new }
+  let(:metrics) { Vmpooler::Metrics::DummyStatsd.new }
  let(:connpool_type) { 'test_connection_pool' }
  let(:connpool_provider) { 'testprovider' }
  let(:default_connpool_type) { 'connectionpool' }