Compare commits

..

No commits in common. "main" and "3.4.0" have entirely different histories.
main ... 3.4.0

32 changed files with 386 additions and 2758 deletions

View file

@ -1,12 +0,0 @@
name: Automated release prep
on:
workflow_dispatch:
jobs:
auto_release_prep:
uses: puppetlabs/release-engineering-repo-standards/.github/workflows/auto_release_prep.yml@v1
secrets: inherit
with:
project-type: ruby
version-file-path: lib/vmpooler/version.rb

View file

@ -1,8 +0,0 @@
name: Dependabot auto-merge
on: pull_request
jobs:
dependabot_merge:
uses: puppetlabs/release-engineering-repo-standards/.github/workflows/dependabot_merge.yml@v1
secrets: inherit

View file

@ -1,8 +0,0 @@
name: Ensure label
on: pull_request
jobs:
ensure_label:
uses: puppetlabs/release-engineering-repo-standards/.github/workflows/ensure_label.yml@v1
secrets: inherit

View file

@ -7,10 +7,10 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
if: github.repository == 'puppetlabs/vmpooler' if: github.repository == 'puppetlabs/vmpooler'
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
- name: Get Current Version - name: Get Current Version
uses: actions/github-script@v7 uses: actions/github-script@v6
id: cv id: cv
with: with:
script: | script: |
@ -29,6 +29,37 @@ jobs:
echo "version=$version" >> $GITHUB_OUTPUT echo "version=$version" >> $GITHUB_OUTPUT
echo "Found version $version from lib/vmpooler/version.rb" echo "Found version $version from lib/vmpooler/version.rb"
- name: Generate Changelog
uses: docker://githubchangeloggenerator/github-changelog-generator:1.16.2
with:
args: >-
--future-release ${{ steps.nv.outputs.version }}
env:
CHANGELOG_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Validate Changelog
run : |
set -e
if [[ -n $(git status --porcelain) ]]; then
echo "Here is the current git status:"
git status
echo
echo "The following changes were detected:"
git --no-pager diff
echo "Uncommitted PRs found in the changelog. Please submit a release prep PR of changes after running `./update-changelog`"
exit 1
fi
- name: Generate Release Notes
uses: docker://githubchangeloggenerator/github-changelog-generator:1.16.2
with:
args: >-
--since-tag ${{ steps.cv.outputs.result }}
--future-release ${{ steps.nv.outputs.version }}
--output release-notes.md
env:
CHANGELOG_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Tag Release - name: Tag Release
uses: ncipollo/release-action@v1 uses: ncipollo/release-action@v1
with: with:
@ -39,10 +70,10 @@ jobs:
prerelease: false prerelease: false
# This step should closely match what is used in `docker/Dockerfile` in vmpooler-deployment # This step should closely match what is used in `docker/Dockerfile` in vmpooler-deployment
- name: Install Ruby jruby-9.4.12.1 - name: Install Ruby jruby-9.4.3.0
uses: ruby/setup-ruby@v1 uses: ruby/setup-ruby@v1
with: with:
ruby-version: 'jruby-9.4.12.1' ruby-version: 'jruby-9.4.3.0'
- name: Build gem - name: Build gem
run: gem build *.gemspec run: gem build *.gemspec

View file

@ -11,7 +11,7 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: checkout repo content - name: checkout repo content
uses: actions/checkout@v4 uses: actions/checkout@v3
with: with:
fetch-depth: 1 fetch-depth: 1
- name: setup ruby - name: setup ruby
@ -22,7 +22,7 @@ jobs:
- name: check lock - name: check lock
run: '[ -f "Gemfile.lock" ] && echo "package lock file exists, skipping" || bundle lock' run: '[ -f "Gemfile.lock" ] && echo "package lock file exists, skipping" || bundle lock'
# install java # install java
- uses: actions/setup-java@v4 - uses: actions/setup-java@v3
with: with:
distribution: 'temurin' # See 'Supported distributions' for available options distribution: 'temurin' # See 'Supported distributions' for available options
java-version: '17' java-version: '17'

View file

@ -18,9 +18,9 @@ jobs:
strategy: strategy:
matrix: matrix:
ruby-version: ruby-version:
- 'jruby-9.4.12.1' - 'jruby-9.4.3.0'
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
- name: Set up Ruby - name: Set up Ruby
uses: ruby/setup-ruby@v1 uses: ruby/setup-ruby@v1
with: with:
@ -34,9 +34,9 @@ jobs:
strategy: strategy:
matrix: matrix:
ruby-version: ruby-version:
- 'jruby-9.4.12.1' - 'jruby-9.4.3.0'
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
- name: Set up Ruby - name: Set up Ruby
uses: ruby/setup-ruby@v1 uses: ruby/setup-ruby@v1
with: with:

View file

@ -1,5 +1,3 @@
project=vmpooler project=vmpooler
user=puppetlabs user=puppetlabs
exclude_labels=maintenance exclude_labels=maintenance
github-api=https://api.github.com
release-branch=main

View file

@ -1,106 +1,5 @@
# Changelog # Changelog
## [3.8.1](https://github.com/puppetlabs/vmpooler/tree/3.8.1) (2026-01-14)
[Full Changelog](https://github.com/puppetlabs/vmpooler/compare/3.7.0...3.8.1)
**Implemented enhancements:**
- \(P4DEVOPS-9434\) Add rate limiting and input validation security enhancements [\#690](https://github.com/puppetlabs/vmpooler/pull/690) ([mahima-singh](https://github.com/mahima-singh))
- \(P4DEVOPS-8570\) Add Phase 2 optimizations: status API caching and improved Redis pipelining [\#689](https://github.com/puppetlabs/vmpooler/pull/689) ([mahima-singh](https://github.com/mahima-singh))
- \(P4DEVOPS-8567\) Add DLQ, auto-purge, and health checks for Redis queues [\#688](https://github.com/puppetlabs/vmpooler/pull/688) ([mahima-singh](https://github.com/mahima-singh))
- Add retry logic for immediate clone failures [\#687](https://github.com/puppetlabs/vmpooler/pull/687) ([mahima-singh](https://github.com/mahima-singh))
**Fixed bugs:**
- \(P4DEVOPS-8567\) Prevent VM allocation for already-deleted request-ids [\#688](https://github.com/puppetlabs/vmpooler/pull/688) ([mahima-singh](https://github.com/mahima-singh))
- Prevent re-queueing requests already marked as failed [\#687](https://github.com/puppetlabs/vmpooler/pull/687) ([mahima-singh](https://github.com/mahima-singh))
## [3.7.0](https://github.com/puppetlabs/vmpooler/tree/3.7.0) (2025-06-04)
[Full Changelog](https://github.com/puppetlabs/vmpooler/compare/3.6.0...3.7.0)
**Implemented enhancements:**
- \(P4DEVOPS-6096\) Include VMs that have been requested but not moved to pending when getting queue metrics [\#681](https://github.com/puppetlabs/vmpooler/pull/681) ([isaac-hammes](https://github.com/isaac-hammes))
- Bump redis from 5.1.0 to 5.2.0 [\#675](https://github.com/puppetlabs/vmpooler/pull/675) ([dependabot[bot]](https://github.com/apps/dependabot))
- Bump rake from 13.1.0 to 13.2.1 [\#673](https://github.com/puppetlabs/vmpooler/pull/673) ([dependabot[bot]](https://github.com/apps/dependabot))
- Bump redis from 5.0.8 to 5.1.0 [\#665](https://github.com/puppetlabs/vmpooler/pull/665) ([dependabot[bot]](https://github.com/apps/dependabot))
- Bump rspec from 3.12.0 to 3.13.0 [\#664](https://github.com/puppetlabs/vmpooler/pull/664) ([dependabot[bot]](https://github.com/apps/dependabot))
- Bump opentelemetry-sdk from 1.3.1 to 1.4.0 [\#663](https://github.com/puppetlabs/vmpooler/pull/663) ([dependabot[bot]](https://github.com/apps/dependabot))
- Bump mock\_redis from 0.43.0 to 0.44.0 [\#662](https://github.com/puppetlabs/vmpooler/pull/662) ([dependabot[bot]](https://github.com/apps/dependabot))
- Bump mock\_redis from 0.41.0 to 0.43.0 [\#658](https://github.com/puppetlabs/vmpooler/pull/658) ([dependabot[bot]](https://github.com/apps/dependabot))
- Bump net-ldap from 0.18.0 to 0.19.0 [\#653](https://github.com/puppetlabs/vmpooler/pull/653) ([dependabot[bot]](https://github.com/apps/dependabot))
- Bump sinatra from 3.1.0 to 3.2.0 [\#652](https://github.com/puppetlabs/vmpooler/pull/652) ([dependabot[bot]](https://github.com/apps/dependabot))
- Bump mock\_redis from 0.40.0 to 0.41.0 [\#650](https://github.com/puppetlabs/vmpooler/pull/650) ([dependabot[bot]](https://github.com/apps/dependabot))
- Bump mock\_redis from 0.37.0 to 0.40.0 [\#643](https://github.com/puppetlabs/vmpooler/pull/643) ([dependabot[bot]](https://github.com/apps/dependabot))
- Bump rake from 13.0.6 to 13.1.0 [\#638](https://github.com/puppetlabs/vmpooler/pull/638) ([dependabot[bot]](https://github.com/apps/dependabot))
- Bump thor from 1.2.2 to 1.3.0 [\#635](https://github.com/puppetlabs/vmpooler/pull/635) ([dependabot[bot]](https://github.com/apps/dependabot))
**Fixed bugs:**
- Bump opentelemetry-sdk from 1.4.0 to 1.4.1 [\#672](https://github.com/puppetlabs/vmpooler/pull/672) ([dependabot[bot]](https://github.com/apps/dependabot))
- Bump rack from 2.2.8.1 to 2.2.9 [\#671](https://github.com/puppetlabs/vmpooler/pull/671) ([dependabot[bot]](https://github.com/apps/dependabot))
- Bump thor from 1.3.0 to 1.3.1 [\#668](https://github.com/puppetlabs/vmpooler/pull/668) ([dependabot[bot]](https://github.com/apps/dependabot))
- Bump rack from 2.2.8 to 2.2.8.1 [\#666](https://github.com/puppetlabs/vmpooler/pull/666) ([dependabot[bot]](https://github.com/apps/dependabot))
- Bump concurrent-ruby from 1.2.2 to 1.2.3 [\#660](https://github.com/puppetlabs/vmpooler/pull/660) ([dependabot[bot]](https://github.com/apps/dependabot))
- Bump puma from 6.4.1 to 6.4.2 [\#655](https://github.com/puppetlabs/vmpooler/pull/655) ([dependabot[bot]](https://github.com/apps/dependabot))
- Bump puma from 6.4.0 to 6.4.1 [\#654](https://github.com/puppetlabs/vmpooler/pull/654) ([dependabot[bot]](https://github.com/apps/dependabot))
- Update opentelemetry-instrumentation-http\_client requirement from = 0.22.2 to = 0.22.3 [\#646](https://github.com/puppetlabs/vmpooler/pull/646) ([dependabot[bot]](https://github.com/apps/dependabot))
- Update opentelemetry-instrumentation-concurrent\_ruby requirement from = 0.21.1 to = 0.21.2 [\#645](https://github.com/puppetlabs/vmpooler/pull/645) ([dependabot[bot]](https://github.com/apps/dependabot))
- Bump opentelemetry-sdk from 1.3.0 to 1.3.1 [\#642](https://github.com/puppetlabs/vmpooler/pull/642) ([dependabot[bot]](https://github.com/apps/dependabot))
- Bump prometheus-client from 4.2.1 to 4.2.2 [\#641](https://github.com/puppetlabs/vmpooler/pull/641) ([dependabot[bot]](https://github.com/apps/dependabot))
- Bump redis from 5.0.7 to 5.0.8 [\#637](https://github.com/puppetlabs/vmpooler/pull/637) ([dependabot[bot]](https://github.com/apps/dependabot))
- \(RE-15817\) Reword fail warning and get error from redis before generating message [\#633](https://github.com/puppetlabs/vmpooler/pull/633) ([isaac-hammes](https://github.com/isaac-hammes))
**Merged pull requests:**
- \(P4DEVOPS-6096\) Fix gems to prevent warnings in logs [\#685](https://github.com/puppetlabs/vmpooler/pull/685) ([isaac-hammes](https://github.com/isaac-hammes))
- \(maint\) Revert gems to last release [\#683](https://github.com/puppetlabs/vmpooler/pull/683) ([isaac-hammes](https://github.com/isaac-hammes))
- Bump actions/setup-java from 3 to 4 [\#648](https://github.com/puppetlabs/vmpooler/pull/648) ([dependabot[bot]](https://github.com/apps/dependabot))
- Bump actions/github-script from 6 to 7 [\#644](https://github.com/puppetlabs/vmpooler/pull/644) ([dependabot[bot]](https://github.com/apps/dependabot))
## [3.6.0](https://github.com/puppetlabs/vmpooler/tree/3.6.0) (2023-10-05)
[Full Changelog](https://github.com/puppetlabs/vmpooler/compare/3.5.1...3.6.0)
**Fixed bugs:**
- \(maint\) Fix message for timeout notification. [\#624](https://github.com/puppetlabs/vmpooler/pull/624) ([isaac-hammes](https://github.com/isaac-hammes))
**Merged pull requests:**
- Bump rubocop from 1.56.3 to 1.56.4 [\#631](https://github.com/puppetlabs/vmpooler/pull/631) ([dependabot[bot]](https://github.com/apps/dependabot))
- Bump puma from 6.3.1 to 6.4.0 [\#630](https://github.com/puppetlabs/vmpooler/pull/630) ([dependabot[bot]](https://github.com/apps/dependabot))
- Bump rubocop from 1.56.2 to 1.56.3 [\#628](https://github.com/puppetlabs/vmpooler/pull/628) ([dependabot[bot]](https://github.com/apps/dependabot))
- Bump actions/checkout from 3 to 4 [\#627](https://github.com/puppetlabs/vmpooler/pull/627) ([dependabot[bot]](https://github.com/apps/dependabot))
- Update opentelemetry-resource\_detectors requirement from = 0.24.1 to = 0.24.2 [\#626](https://github.com/puppetlabs/vmpooler/pull/626) ([dependabot[bot]](https://github.com/apps/dependabot))
- Bump rubocop from 1.56.1 to 1.56.2 [\#625](https://github.com/puppetlabs/vmpooler/pull/625) ([dependabot[bot]](https://github.com/apps/dependabot))
- Bump rubocop from 1.56.0 to 1.56.1 [\#623](https://github.com/puppetlabs/vmpooler/pull/623) ([dependabot[bot]](https://github.com/apps/dependabot))
## [3.5.1](https://github.com/puppetlabs/vmpooler/tree/3.5.1) (2023-08-24)
[Full Changelog](https://github.com/puppetlabs/vmpooler/compare/3.5.0...3.5.1)
**Fixed bugs:**
- \(maint\) Fix bugs from redis and timeout notification updates. [\#621](https://github.com/puppetlabs/vmpooler/pull/621) ([isaac-hammes](https://github.com/isaac-hammes))
## [3.5.0](https://github.com/puppetlabs/vmpooler/tree/3.5.0) (2023-08-23)
[Full Changelog](https://github.com/puppetlabs/vmpooler/compare/3.4.0...3.5.0)
**Implemented enhancements:**
- Improve LDAP auth [\#616](https://github.com/puppetlabs/vmpooler/issues/616)
- \(maint\) Raise error when ip address is not given to vm after clone. [\#619](https://github.com/puppetlabs/vmpooler/pull/619) ([isaac-hammes](https://github.com/isaac-hammes))
- \(POD-8\) Add timeout\_notification config to log warning before vm is destroyed. [\#618](https://github.com/puppetlabs/vmpooler/pull/618) ([isaac-hammes](https://github.com/isaac-hammes))
- \(RE-15565\) Add ability to use bind\_as with a service account [\#617](https://github.com/puppetlabs/vmpooler/pull/617) ([yachub](https://github.com/yachub))
**Merged pull requests:**
- Bump puma from 6.3.0 to 6.3.1 [\#615](https://github.com/puppetlabs/vmpooler/pull/615) ([dependabot[bot]](https://github.com/apps/dependabot))
## [3.4.0](https://github.com/puppetlabs/vmpooler/tree/3.4.0) (2023-08-18) ## [3.4.0](https://github.com/puppetlabs/vmpooler/tree/3.4.0) (2023-08-18)
[Full Changelog](https://github.com/puppetlabs/vmpooler/compare/3.3.0...3.4.0) [Full Changelog](https://github.com/puppetlabs/vmpooler/compare/3.3.0...3.4.0)
@ -246,7 +145,6 @@
- \(maint\) Adding a provider method tag\_vm\_user [\#469](https://github.com/puppetlabs/vmpooler/pull/469) ([sbeaulie](https://github.com/sbeaulie)) - \(maint\) Adding a provider method tag\_vm\_user [\#469](https://github.com/puppetlabs/vmpooler/pull/469) ([sbeaulie](https://github.com/sbeaulie))
- Update testing.yml [\#468](https://github.com/puppetlabs/vmpooler/pull/468) ([sbeaulie](https://github.com/sbeaulie)) - Update testing.yml [\#468](https://github.com/puppetlabs/vmpooler/pull/468) ([sbeaulie](https://github.com/sbeaulie))
- Move vsphere specific methods out of vmpooler [\#467](https://github.com/puppetlabs/vmpooler/pull/467) ([sbeaulie](https://github.com/sbeaulie)) - Move vsphere specific methods out of vmpooler [\#467](https://github.com/puppetlabs/vmpooler/pull/467) ([sbeaulie](https://github.com/sbeaulie))
- Release prep for v2.0.0 [\#465](https://github.com/puppetlabs/vmpooler/pull/465) ([genebean](https://github.com/genebean))
## [2.0.0](https://github.com/puppetlabs/vmpooler/tree/2.0.0) (2021-12-08) ## [2.0.0](https://github.com/puppetlabs/vmpooler/tree/2.0.0) (2021-12-08)
@ -255,6 +153,7 @@
**Merged pull requests:** **Merged pull requests:**
- Use credentials file for Rubygems auth [\#466](https://github.com/puppetlabs/vmpooler/pull/466) ([genebean](https://github.com/genebean)) - Use credentials file for Rubygems auth [\#466](https://github.com/puppetlabs/vmpooler/pull/466) ([genebean](https://github.com/genebean))
- Release prep for v2.0.0 [\#465](https://github.com/puppetlabs/vmpooler/pull/465) ([genebean](https://github.com/genebean))
- Add Gem release workflow [\#464](https://github.com/puppetlabs/vmpooler/pull/464) ([genebean](https://github.com/genebean)) - Add Gem release workflow [\#464](https://github.com/puppetlabs/vmpooler/pull/464) ([genebean](https://github.com/genebean))
- Update icon in the readme to reference this repo [\#463](https://github.com/puppetlabs/vmpooler/pull/463) ([genebean](https://github.com/genebean)) - Update icon in the readme to reference this repo [\#463](https://github.com/puppetlabs/vmpooler/pull/463) ([genebean](https://github.com/genebean))
- \(DIO-2769\) Move vsphere provider to its own gem [\#462](https://github.com/puppetlabs/vmpooler/pull/462) ([genebean](https://github.com/genebean)) - \(DIO-2769\) Move vsphere provider to its own gem [\#462](https://github.com/puppetlabs/vmpooler/pull/462) ([genebean](https://github.com/genebean))
@ -299,17 +198,13 @@
**Merged pull requests:** **Merged pull requests:**
- \(POOLER-176\) Add Operation Label to User Metric [\#455](https://github.com/puppetlabs/vmpooler/pull/455) ([yachub](https://github.com/yachub)) - \(POOLER-176\) Add Operation Label to User Metric [\#455](https://github.com/puppetlabs/vmpooler/pull/455) ([yachub](https://github.com/yachub))
- Update OTel gems to 0.15.0 [\#450](https://github.com/puppetlabs/vmpooler/pull/450) ([genebean](https://github.com/genebean))
- Migrate testing to GH Actions from Travis [\#446](https://github.com/puppetlabs/vmpooler/pull/446) ([genebean](https://github.com/genebean))
## [1.1.0-rc.1](https://github.com/puppetlabs/vmpooler/tree/1.1.0-rc.1) (2021-08-11) ## [1.1.0-rc.1](https://github.com/puppetlabs/vmpooler/tree/1.1.0-rc.1) (2021-08-11)
[Full Changelog](https://github.com/puppetlabs/vmpooler/compare/1.0.0...1.1.0-rc.1) [Full Changelog](https://github.com/puppetlabs/vmpooler/compare/1.0.0...1.1.0-rc.1)
**Merged pull requests:**
- \(POOLER-176\) Add Operation Label to User Metric [\#454](https://github.com/puppetlabs/vmpooler/pull/454) ([yachub](https://github.com/yachub))
- Update OTel gems to 0.15.0 [\#450](https://github.com/puppetlabs/vmpooler/pull/450) ([genebean](https://github.com/genebean))
- Migrate testing to GH Actions from Travis [\#446](https://github.com/puppetlabs/vmpooler/pull/446) ([genebean](https://github.com/genebean))
## [1.0.0](https://github.com/puppetlabs/vmpooler/tree/1.0.0) (2021-02-02) ## [1.0.0](https://github.com/puppetlabs/vmpooler/tree/1.0.0) (2021-02-02)
[Full Changelog](https://github.com/puppetlabs/vmpooler/compare/0.18.2...1.0.0) [Full Changelog](https://github.com/puppetlabs/vmpooler/compare/0.18.2...1.0.0)
@ -818,13 +713,13 @@
- Do not have a hardcoded list of VM providers [\#230](https://github.com/puppetlabs/vmpooler/issues/230) - Do not have a hardcoded list of VM providers [\#230](https://github.com/puppetlabs/vmpooler/issues/230)
- Use a dynamic check\_pool period [\#226](https://github.com/puppetlabs/vmpooler/issues/226) - Use a dynamic check\_pool period [\#226](https://github.com/puppetlabs/vmpooler/issues/226)
- vmpooler doesn't seem to recognize ready VMs [\#218](https://github.com/puppetlabs/vmpooler/issues/218) - vmpooler doesn't seem to recognize ready VMs [\#218](https://github.com/puppetlabs/vmpooler/issues/218)
- `find\_vmdks` in `vsphere\_helper` should not use `vmdk\_datastore.\_connection` [\#213](https://github.com/puppetlabs/vmpooler/issues/213) - `find_vmdks` in `vsphere_helper` should not use `vmdk_datastore._connection` [\#213](https://github.com/puppetlabs/vmpooler/issues/213)
- `get\_base\_vm\_container\_from` in `vsphere\_helper` ensures the wrong connection [\#212](https://github.com/puppetlabs/vmpooler/issues/212) - `get_base_vm_container_from` in `vsphere_helper` ensures the wrong connection [\#212](https://github.com/puppetlabs/vmpooler/issues/212)
- `close` in vsphere\_helper throws an error if a connection was never made [\#211](https://github.com/puppetlabs/vmpooler/issues/211) - `close` in vsphere\_helper throws an error if a connection was never made [\#211](https://github.com/puppetlabs/vmpooler/issues/211)
- `find\_pool` in vsphere\_helper.rb has subtle errors [\#210](https://github.com/puppetlabs/vmpooler/issues/210) - `find_pool` in vsphere\_helper.rb has subtle errors [\#210](https://github.com/puppetlabs/vmpooler/issues/210)
- `find\_pool` in vsphere\_helper tends to throw instead of returning nil for missing pools [\#209](https://github.com/puppetlabs/vmpooler/issues/209) - `find_pool` in vsphere\_helper tends to throw instead of returning nil for missing pools [\#209](https://github.com/puppetlabs/vmpooler/issues/209)
- Vsphere connections are always insecure \(Ignore cert errors\) [\#207](https://github.com/puppetlabs/vmpooler/issues/207) - Vsphere connections are always insecure \(Ignore cert errors\) [\#207](https://github.com/puppetlabs/vmpooler/issues/207)
- `find\_folder` in vsphere\_helper.rb has subtle errors [\#204](https://github.com/puppetlabs/vmpooler/issues/204) - `find_folder` in vsphere\_helper.rb has subtle errors [\#204](https://github.com/puppetlabs/vmpooler/issues/204)
- Should not use `abort` in vsphere\_helper [\#203](https://github.com/puppetlabs/vmpooler/issues/203) - Should not use `abort` in vsphere\_helper [\#203](https://github.com/puppetlabs/vmpooler/issues/203)
- No reason why get\_snapshot\_list is defined in vsphere\_helper [\#202](https://github.com/puppetlabs/vmpooler/issues/202) - No reason why get\_snapshot\_list is defined in vsphere\_helper [\#202](https://github.com/puppetlabs/vmpooler/issues/202)
- Setting max\_tries in configuration results in vSphereHelper going into infinite loop [\#199](https://github.com/puppetlabs/vmpooler/issues/199) - Setting max\_tries in configuration results in vSphereHelper going into infinite loop [\#199](https://github.com/puppetlabs/vmpooler/issues/199)
@ -886,7 +781,7 @@
- \(POOLER-93\) Extend API endpoint to provide just what is needed [\#245](https://github.com/puppetlabs/vmpooler/pull/245) ([sbeaulie](https://github.com/sbeaulie)) - \(POOLER-93\) Extend API endpoint to provide just what is needed [\#245](https://github.com/puppetlabs/vmpooler/pull/245) ([sbeaulie](https://github.com/sbeaulie))
- \(POOLER-92\) Add the alias information in the API status page for each… [\#244](https://github.com/puppetlabs/vmpooler/pull/244) ([sbeaulie](https://github.com/sbeaulie)) - \(POOLER-92\) Add the alias information in the API status page for each… [\#244](https://github.com/puppetlabs/vmpooler/pull/244) ([sbeaulie](https://github.com/sbeaulie))
- \(QENG-5305\) Improve vmpooler host selection [\#242](https://github.com/puppetlabs/vmpooler/pull/242) ([mattkirby](https://github.com/mattkirby)) - \(QENG-5305\) Improve vmpooler host selection [\#242](https://github.com/puppetlabs/vmpooler/pull/242) ([mattkirby](https://github.com/mattkirby))
- Allow user to specify a configuration file in VMPOOLER\_CONFIG\_FILE variable [\#241](https://github.com/puppetlabs/vmpooler/pull/241) ([amcdson](https://github.com/amcdson)) - Allow user to specify a configuration file in VMPOOLER\_CONFIG\_FILE variable [\#241](https://github.com/puppetlabs/vmpooler/pull/241) ([adamdav](https://github.com/adamdav))
- Fix no implicit conversion to rational from nil [\#239](https://github.com/puppetlabs/vmpooler/pull/239) ([sbeaulie](https://github.com/sbeaulie)) - Fix no implicit conversion to rational from nil [\#239](https://github.com/puppetlabs/vmpooler/pull/239) ([sbeaulie](https://github.com/sbeaulie))
- Updated Vagrant box and associated docs [\#237](https://github.com/puppetlabs/vmpooler/pull/237) ([genebean](https://github.com/genebean)) - Updated Vagrant box and associated docs [\#237](https://github.com/puppetlabs/vmpooler/pull/237) ([genebean](https://github.com/genebean))
- \(GH-226\) Respond quickly to VMs being consumed [\#236](https://github.com/puppetlabs/vmpooler/pull/236) ([glennsarti](https://github.com/glennsarti)) - \(GH-226\) Respond quickly to VMs being consumed [\#236](https://github.com/puppetlabs/vmpooler/pull/236) ([glennsarti](https://github.com/glennsarti))
@ -920,7 +815,88 @@
- \(maint\) Add rubocop and allow failures in Travis CI [\#183](https://github.com/puppetlabs/vmpooler/pull/183) ([glennsarti](https://github.com/glennsarti)) - \(maint\) Add rubocop and allow failures in Travis CI [\#183](https://github.com/puppetlabs/vmpooler/pull/183) ([glennsarti](https://github.com/glennsarti))
- \(POOLER-73\) Update unit tests prior to refactoring [\#182](https://github.com/puppetlabs/vmpooler/pull/182) ([glennsarti](https://github.com/glennsarti)) - \(POOLER-73\) Update unit tests prior to refactoring [\#182](https://github.com/puppetlabs/vmpooler/pull/182) ([glennsarti](https://github.com/glennsarti))
- \(POOLER-71\) Add dummy authentication provider [\#180](https://github.com/puppetlabs/vmpooler/pull/180) ([glennsarti](https://github.com/glennsarti)) - \(POOLER-71\) Add dummy authentication provider [\#180](https://github.com/puppetlabs/vmpooler/pull/180) ([glennsarti](https://github.com/glennsarti))
- \(maint\) Remove Ruby 1.9.3 testing from Travis [\#178](https://github.com/puppetlabs/vmpooler/pull/178) ([glennsarti](https://github.com/glennsarti))
- \(maint\) Enhance VM Pooler developer experience [\#177](https://github.com/puppetlabs/vmpooler/pull/177) ([glennsarti](https://github.com/glennsarti)) - \(maint\) Enhance VM Pooler developer experience [\#177](https://github.com/puppetlabs/vmpooler/pull/177) ([glennsarti](https://github.com/glennsarti))
- \(POOLER-47\) Send clone errors up [\#175](https://github.com/puppetlabs/vmpooler/pull/175) ([mattkirby](https://github.com/mattkirby))
- \(POOLER-48\) Clear migrations at application start time [\#174](https://github.com/puppetlabs/vmpooler/pull/174) ([mattkirby](https://github.com/mattkirby))
- Add retry logic with a delay for vsphere connections [\#173](https://github.com/puppetlabs/vmpooler/pull/173) ([mattkirby](https://github.com/mattkirby))
- \(POOLER-44\) Fix vmpooler.migrate reference [\#172](https://github.com/puppetlabs/vmpooler/pull/172) ([mattkirby](https://github.com/mattkirby))
- Add `puma` as required gem [\#171](https://github.com/puppetlabs/vmpooler/pull/171) ([sschneid](https://github.com/sschneid))
- Fix JavaScript error on nil `weekly_data` [\#170](https://github.com/puppetlabs/vmpooler/pull/170) ([sschneid](https://github.com/sschneid))
- Containerize vmpooler [\#169](https://github.com/puppetlabs/vmpooler/pull/169) ([sschneid](https://github.com/sschneid))
- Add vagrant-vmpooler plugin to readme [\#168](https://github.com/puppetlabs/vmpooler/pull/168) ([briancain](https://github.com/briancain))
- Improve vmpooler scheduling logic [\#167](https://github.com/puppetlabs/vmpooler/pull/167) ([mattkirby](https://github.com/mattkirby))
- \[QENG-4181\] Add per-pool stats to `/status` API [\#162](https://github.com/puppetlabs/vmpooler/pull/162) ([rick](https://github.com/rick))
- Merge CI.next into Master [\#161](https://github.com/puppetlabs/vmpooler/pull/161) ([shermdog](https://github.com/shermdog))
- \(maint\) update README.md and LICENSE to reflect rebranding [\#157](https://github.com/puppetlabs/vmpooler/pull/157) ([erosa](https://github.com/erosa))
- Add info about vmfloaty [\#156](https://github.com/puppetlabs/vmpooler/pull/156) ([briancain](https://github.com/briancain))
- Added IP lookup functionality for /vm/hostname [\#154](https://github.com/puppetlabs/vmpooler/pull/154) ([frozenfoxx](https://github.com/frozenfoxx))
- Improved tests for vmpooler [\#152](https://github.com/puppetlabs/vmpooler/pull/152) ([rick](https://github.com/rick))
- Added prefix parameter to the vmpooler configuration [\#149](https://github.com/puppetlabs/vmpooler/pull/149) ([frozenfoxx](https://github.com/frozenfoxx))
- Update license copyright [\#148](https://github.com/puppetlabs/vmpooler/pull/148) ([sschneid](https://github.com/sschneid))
- Allow new disks to be added to running VMs via vmpooler API [\#147](https://github.com/puppetlabs/vmpooler/pull/147) ([sschneid](https://github.com/sschneid))
- Updated YAML config variables in create\_template\_deltas.rb [\#145](https://github.com/puppetlabs/vmpooler/pull/145) ([frozenfoxx](https://github.com/frozenfoxx))
- \(QA-2036\) Update README for Client Utility [\#143](https://github.com/puppetlabs/vmpooler/pull/143) ([cowofevil](https://github.com/cowofevil))
- add guestinfo.hostname to VirtualMachineConfigSpecs [\#139](https://github.com/puppetlabs/vmpooler/pull/139) ([heathseals](https://github.com/heathseals))
- \(QENG-2807\) Allow pool 'alias' names [\#138](https://github.com/puppetlabs/vmpooler/pull/138) ([sschneid](https://github.com/sschneid))
- \(QENG-2995\) Display associated VMs in GET /token/:token endpoint [\#137](https://github.com/puppetlabs/vmpooler/pull/137) ([sschneid](https://github.com/sschneid))
- Update API docs to include "domain" key for get vm requests [\#136](https://github.com/puppetlabs/vmpooler/pull/136) ([briancain](https://github.com/briancain))
- \(MAINT\) Remove Ping Check on Running VMs [\#133](https://github.com/puppetlabs/vmpooler/pull/133) ([colinPL](https://github.com/colinPL))
- \(maint\) Move VM Only When SSH Check Succeeds [\#131](https://github.com/puppetlabs/vmpooler/pull/131) ([colinPL](https://github.com/colinPL))
- \(QENG-2952\) Check that SSH is available [\#130](https://github.com/puppetlabs/vmpooler/pull/130) ([sschneid](https://github.com/sschneid))
- \(maint\) Update license copyright [\#128](https://github.com/puppetlabs/vmpooler/pull/128) ([sschneid](https://github.com/sschneid))
- \(maint\) Remove duplicate \(nested\) "ok" responses [\#127](https://github.com/puppetlabs/vmpooler/pull/127) ([sschneid](https://github.com/sschneid))
- \(maint\) Documentation updates [\#126](https://github.com/puppetlabs/vmpooler/pull/126) ([sschneid](https://github.com/sschneid))
- Track token use times [\#125](https://github.com/puppetlabs/vmpooler/pull/125) ([sschneid](https://github.com/sschneid))
- Docs update [\#124](https://github.com/puppetlabs/vmpooler/pull/124) ([sschneid](https://github.com/sschneid))
- User token list [\#123](https://github.com/puppetlabs/vmpooler/pull/123) ([sschneid](https://github.com/sschneid))
- \(maint\) Additional utility and reporting scripts [\#122](https://github.com/puppetlabs/vmpooler/pull/122) ([sschneid](https://github.com/sschneid))
- \(maint\) Syntax fixup [\#121](https://github.com/puppetlabs/vmpooler/pull/121) ([sschneid](https://github.com/sschneid))
- \(MAINT\) Reduce redis Calls in API [\#120](https://github.com/puppetlabs/vmpooler/pull/120) ([colinPL](https://github.com/colinPL))
- \(maint\) Use expect\_json helper method for determining JSON response status [\#119](https://github.com/puppetlabs/vmpooler/pull/119) ([sschneid](https://github.com/sschneid))
- \(QENG-1304\) vmpooler should require an auth key for VM destruction [\#118](https://github.com/puppetlabs/vmpooler/pull/118) ([sschneid](https://github.com/sschneid))
- \(QENG-2636\) Host snapshots [\#117](https://github.com/puppetlabs/vmpooler/pull/117) ([sschneid](https://github.com/sschneid))
- \(maint\) Use dep caching and containers [\#116](https://github.com/puppetlabs/vmpooler/pull/116) ([sschneid](https://github.com/sschneid))
- \(maint\) Include travis-ci build status in README [\#115](https://github.com/puppetlabs/vmpooler/pull/115) ([sschneid](https://github.com/sschneid))
- Show test contexts and names [\#114](https://github.com/puppetlabs/vmpooler/pull/114) ([sschneid](https://github.com/sschneid))
- \(QENG-2246\) Add Default Rake Task [\#113](https://github.com/puppetlabs/vmpooler/pull/113) ([colinPL](https://github.com/colinPL))
- Log empty pools [\#112](https://github.com/puppetlabs/vmpooler/pull/112) ([sschneid](https://github.com/sschneid))
- \(QENG-2246\) Add Travis CI [\#111](https://github.com/puppetlabs/vmpooler/pull/111) ([colinPL](https://github.com/colinPL))
- \(QENG-2388\) Tagging restrictions [\#110](https://github.com/puppetlabs/vmpooler/pull/110) ([sschneid](https://github.com/sschneid))
- An updated dashboard [\#109](https://github.com/puppetlabs/vmpooler/pull/109) ([sschneid](https://github.com/sschneid))
- API summary rework [\#108](https://github.com/puppetlabs/vmpooler/pull/108) ([sschneid](https://github.com/sschneid))
- Only filter regex matches [\#106](https://github.com/puppetlabs/vmpooler/pull/106) ([sschneid](https://github.com/sschneid))
- \(QENG-2518\) Tag-filtering [\#105](https://github.com/puppetlabs/vmpooler/pull/105) ([sschneid](https://github.com/sschneid))
- \(QENG-2360\) check\_running\_vm Spec Tests [\#104](https://github.com/puppetlabs/vmpooler/pull/104) ([colinPL](https://github.com/colinPL))
- \(QENG-2056\) Create daily tag indexes, report in /summary [\#102](https://github.com/puppetlabs/vmpooler/pull/102) ([sschneid](https://github.com/sschneid))
- Store token metadata in vmpooler\_\_vm\_\_ Redis hash [\#101](https://github.com/puppetlabs/vmpooler/pull/101) ([sschneid](https://github.com/sschneid))
- Display VM state in GET /vm/:hostname route [\#100](https://github.com/puppetlabs/vmpooler/pull/100) ([sschneid](https://github.com/sschneid))
- Add basic auth token functionality [\#98](https://github.com/puppetlabs/vmpooler/pull/98) ([sschneid](https://github.com/sschneid))
- Add basic HTTP authentication and /token routes [\#97](https://github.com/puppetlabs/vmpooler/pull/97) ([sschneid](https://github.com/sschneid))
- \(QENG-2208\) Add more helper tests [\#95](https://github.com/puppetlabs/vmpooler/pull/95) ([colinPL](https://github.com/colinPL))
- \(QENG-2208\) Move Sinatra Helpers to own file [\#94](https://github.com/puppetlabs/vmpooler/pull/94) ([colinPL](https://github.com/colinPL))
- Fix rspec tests broken in f9de28236b726e37977123cea9b4f3a562bfdcdb [\#93](https://github.com/puppetlabs/vmpooler/pull/93) ([sschneid](https://github.com/sschneid))
- Redirect / to /dashboard [\#92](https://github.com/puppetlabs/vmpooler/pull/92) ([sschneid](https://github.com/sschneid))
- Ensure 'lifetime' val returned by GET /vm/:hostname is an int [\#91](https://github.com/puppetlabs/vmpooler/pull/91) ([sschneid](https://github.com/sschneid))
- running-to-lifetime comparison should be 'greater than or equal to' [\#90](https://github.com/puppetlabs/vmpooler/pull/90) ([sschneid](https://github.com/sschneid))
- Auto-expire Redis metadata key via Redis EXPIRE [\#89](https://github.com/puppetlabs/vmpooler/pull/89) ([sschneid](https://github.com/sschneid))
- \(QENG-1906\) Add specs for Dashboard and root API class [\#88](https://github.com/puppetlabs/vmpooler/pull/88) ([colinPL](https://github.com/colinPL))
- \(maint\) Fix bad redis reference [\#87](https://github.com/puppetlabs/vmpooler/pull/87) ([colinPL](https://github.com/colinPL))
- \(QENG-1906\) Break apart check\_pending\_vm and add spec tests [\#86](https://github.com/puppetlabs/vmpooler/pull/86) ([colinPL](https://github.com/colinPL))
- Remove defined? when checking configuration for graphite server. [\#85](https://github.com/puppetlabs/vmpooler/pull/85) ([colinPL](https://github.com/colinPL))
- \(QENG-1906\) Add spec tests for Janitor [\#78](https://github.com/puppetlabs/vmpooler/pull/78) ([colinPL](https://github.com/colinPL))
- \(QENG-1906\) Refactor initialize to allow config passing [\#77](https://github.com/puppetlabs/vmpooler/pull/77) ([colinPL](https://github.com/colinPL))
- Use 'checkout' time to calculate 'running' time [\#75](https://github.com/puppetlabs/vmpooler/pull/75) ([sschneid](https://github.com/sschneid))
- Catch improperly-formatted data payloads [\#73](https://github.com/puppetlabs/vmpooler/pull/73) ([sschneid](https://github.com/sschneid))
- \(QENG-1905\) Adding VM-tagging support via PUT /vm/:hostname endpoint [\#72](https://github.com/puppetlabs/vmpooler/pull/72) ([sschneid](https://github.com/sschneid))
- \(QENG-2057\) Historic Redis VM metadata [\#71](https://github.com/puppetlabs/vmpooler/pull/71) ([sschneid](https://github.com/sschneid))
- \(QENG-1899\) Add documentation for /summary [\#67](https://github.com/puppetlabs/vmpooler/pull/67) ([colinPL](https://github.com/colinPL))
- Use $redis.hgetall rather than hget in a loop [\#66](https://github.com/puppetlabs/vmpooler/pull/66) ([sschneid](https://github.com/sschneid))
- /summary per-pool metrics [\#65](https://github.com/puppetlabs/vmpooler/pull/65) ([sschneid](https://github.com/sschneid))
- Show boot metrics in /status and /summary endpoints [\#64](https://github.com/puppetlabs/vmpooler/pull/64) ([sschneid](https://github.com/sschneid))
- \(maint\) Fixing spacing [\#63](https://github.com/puppetlabs/vmpooler/pull/63) ([sschneid](https://github.com/sschneid))
- Metric calc via helpers [\#62](https://github.com/puppetlabs/vmpooler/pull/62) ([sschneid](https://github.com/sschneid))
- More granular metrics [\#61](https://github.com/puppetlabs/vmpooler/pull/61) ([sschneid](https://github.com/sschneid))

View file

@ -3,11 +3,11 @@ source ENV['GEM_SOURCE'] || 'https://rubygems.org'
gemspec gemspec
# Evaluate Gemfile.local if it exists # Evaluate Gemfile.local if it exists
if File.exist? "#{__FILE__}.local" if File.exists? "#{__FILE__}.local"
instance_eval(File.read("#{__FILE__}.local")) instance_eval(File.read("#{__FILE__}.local"))
end end
# Evaluate ~/.gemfile if it exists # Evaluate ~/.gemfile if it exists
if File.exist?(File.join(Dir.home, '.gemfile')) if File.exists?(File.join(Dir.home, '.gemfile'))
instance_eval(File.read(File.join(Dir.home, '.gemfile'))) instance_eval(File.read(File.join(Dir.home, '.gemfile')))
end end

View file

@ -1,7 +1,7 @@
PATH PATH
remote: . remote: .
specs: specs:
vmpooler (3.8.1) vmpooler (3.4.0)
concurrent-ruby (~> 1.1) concurrent-ruby (~> 1.1)
connection_pool (~> 2.4) connection_pool (~> 2.4)
deep_merge (~> 1.2) deep_merge (~> 1.2)
@ -9,11 +9,10 @@ PATH
opentelemetry-exporter-jaeger (= 0.23.0) opentelemetry-exporter-jaeger (= 0.23.0)
opentelemetry-instrumentation-concurrent_ruby (= 0.21.1) opentelemetry-instrumentation-concurrent_ruby (= 0.21.1)
opentelemetry-instrumentation-http_client (= 0.22.2) opentelemetry-instrumentation-http_client (= 0.22.2)
opentelemetry-instrumentation-rack (= 0.23.4)
opentelemetry-instrumentation-redis (= 0.25.3) opentelemetry-instrumentation-redis (= 0.25.3)
opentelemetry-instrumentation-sinatra (= 0.23.2) opentelemetry-instrumentation-sinatra (= 0.23.2)
opentelemetry-resource_detectors (= 0.24.2) opentelemetry-resource_detectors (= 0.24.1)
opentelemetry-sdk (~> 1.8) opentelemetry-sdk (~> 1.3, >= 1.3.0)
pickup (~> 0.0.11) pickup (~> 0.0.11)
prometheus-client (>= 2, < 5) prometheus-client (>= 2, < 5)
puma (>= 5.0.4, < 7) puma (>= 5.0.4, < 7)
@ -27,41 +26,36 @@ PATH
GEM GEM
remote: https://rubygems.org/ remote: https://rubygems.org/
specs: specs:
ast (2.4.3) ast (2.4.2)
base64 (0.1.2) base64 (0.1.1)
bindata (2.5.1) bindata (2.4.15)
builder (3.3.0) builder (3.2.4)
climate_control (1.2.0) climate_control (1.2.0)
coderay (1.1.3) coderay (1.1.3)
concurrent-ruby (1.3.5) concurrent-ruby (1.2.2)
connection_pool (2.5.3) connection_pool (2.4.1)
deep_merge (1.2.2) deep_merge (1.2.2)
diff-lcs (1.6.2) diff-lcs (1.5.0)
docile (1.4.1) docile (1.4.0)
faraday (2.13.1) faraday (2.7.10)
faraday-net_http (>= 2.0, < 3.5) faraday-net_http (>= 2.0, < 3.1)
json ruby2_keywords (>= 0.0.4)
logger faraday-net_http (3.0.2)
faraday-net_http (3.4.0) ffi (1.15.5-java)
net-http (>= 0.5.0) google-cloud-env (1.6.0)
ffi (1.17.2-java) faraday (>= 0.17.3, < 3.0)
google-cloud-env (2.2.1) json (2.6.3)
faraday (>= 1.0, < 3.a) json (2.6.3-java)
json (2.12.2) language_server-protocol (3.17.0.3)
json (2.12.2-java) method_source (1.0.0)
language_server-protocol (3.17.0.5)
logger (1.7.0)
method_source (1.1.0)
mock_redis (0.37.0) mock_redis (0.37.0)
mustermann (3.0.3) mustermann (3.0.0)
ruby2_keywords (~> 0.0.1) ruby2_keywords (~> 0.0.1)
net-http (0.6.0) net-ldap (0.18.0)
uri nio4r (2.5.9)
net-ldap (0.19.0) nio4r (2.5.9-java)
nio4r (2.7.4) opentelemetry-api (1.2.1)
nio4r (2.7.4-java) opentelemetry-common (0.20.0)
opentelemetry-api (1.5.0)
opentelemetry-common (0.20.1)
opentelemetry-api (~> 1.0) opentelemetry-api (~> 1.0)
opentelemetry-exporter-jaeger (0.23.0) opentelemetry-exporter-jaeger (0.23.0)
opentelemetry-api (~> 1.1) opentelemetry-api (~> 1.1)
@ -69,7 +63,7 @@ GEM
opentelemetry-sdk (~> 1.2) opentelemetry-sdk (~> 1.2)
opentelemetry-semantic_conventions opentelemetry-semantic_conventions
thrift thrift
opentelemetry-instrumentation-base (0.22.3) opentelemetry-instrumentation-base (0.22.2)
opentelemetry-api (~> 1.0) opentelemetry-api (~> 1.0)
opentelemetry-registry (~> 0.1) opentelemetry-registry (~> 0.1)
opentelemetry-instrumentation-concurrent_ruby (0.21.1) opentelemetry-instrumentation-concurrent_ruby (0.21.1)
@ -92,67 +86,64 @@ GEM
opentelemetry-common (~> 0.20.0) opentelemetry-common (~> 0.20.0)
opentelemetry-instrumentation-base (~> 0.22.1) opentelemetry-instrumentation-base (~> 0.22.1)
opentelemetry-instrumentation-rack (~> 0.21) opentelemetry-instrumentation-rack (~> 0.21)
opentelemetry-registry (0.4.0) opentelemetry-registry (0.3.0)
opentelemetry-api (~> 1.1) opentelemetry-api (~> 1.1)
opentelemetry-resource_detectors (0.24.2) opentelemetry-resource_detectors (0.24.1)
google-cloud-env google-cloud-env
opentelemetry-sdk (~> 1.0) opentelemetry-sdk (~> 1.0)
opentelemetry-sdk (1.8.0) opentelemetry-sdk (1.3.0)
opentelemetry-api (~> 1.1) opentelemetry-api (~> 1.1)
opentelemetry-common (~> 0.20) opentelemetry-common (~> 0.20)
opentelemetry-registry (~> 0.2) opentelemetry-registry (~> 0.2)
opentelemetry-semantic_conventions opentelemetry-semantic_conventions
opentelemetry-semantic_conventions (1.11.0) opentelemetry-semantic_conventions (1.10.0)
opentelemetry-api (~> 1.0) opentelemetry-api (~> 1.0)
parallel (1.27.0) parallel (1.23.0)
parser (3.3.8.0) parser (3.2.2.3)
ast (~> 2.4.1) ast (~> 2.4.1)
racc racc
pickup (0.0.11) pickup (0.0.11)
prism (1.4.0) prometheus-client (4.2.1)
prometheus-client (4.2.4) pry (0.14.2)
base64
pry (0.15.2)
coderay (~> 1.1) coderay (~> 1.1)
method_source (~> 1.0) method_source (~> 1.0)
pry (0.15.2-java) pry (0.14.2-java)
coderay (~> 1.1) coderay (~> 1.1)
method_source (~> 1.0) method_source (~> 1.0)
spoon (~> 0.0) spoon (~> 0.0)
puma (6.6.0) puma (6.3.0)
nio4r (~> 2.0) nio4r (~> 2.0)
puma (6.6.0-java) puma (6.3.0-java)
nio4r (~> 2.0) nio4r (~> 2.0)
racc (1.8.1) racc (1.7.1)
racc (1.8.1-java) racc (1.7.1-java)
rack (2.2.17) rack (2.2.8)
rack-protection (3.2.0) rack-protection (3.1.0)
base64 (>= 0.1.0)
rack (~> 2.2, >= 2.2.4) rack (~> 2.2, >= 2.2.4)
rack-test (2.2.0) rack-test (2.1.0)
rack (>= 1.3) rack (>= 1.3)
rainbow (3.1.1) rainbow (3.1.1)
rake (13.3.0) rake (13.0.6)
redis (5.4.0) redis (5.0.7)
redis-client (>= 0.22.0) redis-client (>= 0.9.0)
redis-client (0.24.0) redis-client (0.15.0)
connection_pool connection_pool
regexp_parser (2.10.0) regexp_parser (2.8.1)
rexml (3.4.1) rexml (3.2.6)
rspec (3.13.1) rspec (3.12.0)
rspec-core (~> 3.13.0) rspec-core (~> 3.12.0)
rspec-expectations (~> 3.13.0) rspec-expectations (~> 3.12.0)
rspec-mocks (~> 3.13.0) rspec-mocks (~> 3.12.0)
rspec-core (3.13.4) rspec-core (3.12.2)
rspec-support (~> 3.13.0) rspec-support (~> 3.12.0)
rspec-expectations (3.13.5) rspec-expectations (3.12.3)
diff-lcs (>= 1.2.0, < 2.0) diff-lcs (>= 1.2.0, < 2.0)
rspec-support (~> 3.13.0) rspec-support (~> 3.12.0)
rspec-mocks (3.13.5) rspec-mocks (3.12.6)
diff-lcs (>= 1.2.0, < 2.0) diff-lcs (>= 1.2.0, < 2.0)
rspec-support (~> 3.13.0) rspec-support (~> 3.12.0)
rspec-support (3.13.4) rspec-support (3.12.1)
rubocop (1.56.4) rubocop (1.56.0)
base64 (~> 0.1.1) base64 (~> 0.1.1)
json (~> 2.3) json (~> 2.3)
language_server-protocol (>= 3.17.0) language_server-protocol (>= 3.17.0)
@ -164,48 +155,41 @@ GEM
rubocop-ast (>= 1.28.1, < 2.0) rubocop-ast (>= 1.28.1, < 2.0)
ruby-progressbar (~> 1.7) ruby-progressbar (~> 1.7)
unicode-display_width (>= 2.4.0, < 3.0) unicode-display_width (>= 2.4.0, < 3.0)
rubocop-ast (1.44.1) rubocop-ast (1.29.0)
parser (>= 3.3.7.2) parser (>= 3.2.1.0)
prism (~> 1.4)
ruby-progressbar (1.13.0) ruby-progressbar (1.13.0)
ruby2_keywords (0.0.5) ruby2_keywords (0.0.5)
simplecov (0.22.0) simplecov (0.22.0)
docile (~> 1.1) docile (~> 1.1)
simplecov-html (~> 0.11) simplecov-html (~> 0.11)
simplecov_json_formatter (~> 0.1) simplecov_json_formatter (~> 0.1)
simplecov-html (0.13.1) simplecov-html (0.12.3)
simplecov_json_formatter (0.1.4) simplecov_json_formatter (0.1.4)
sinatra (3.2.0) sinatra (3.1.0)
mustermann (~> 3.0) mustermann (~> 3.0)
rack (~> 2.2, >= 2.2.4) rack (~> 2.2, >= 2.2.4)
rack-protection (= 3.2.0) rack-protection (= 3.1.0)
tilt (~> 2.0) tilt (~> 2.0)
spicy-proton (2.1.15) spicy-proton (2.1.15)
bindata (~> 2.3) bindata (~> 2.3)
spoon (0.0.6) spoon (0.0.6)
ffi ffi
statsd-ruby (1.5.0) statsd-ruby (1.5.0)
thor (1.3.2) thor (1.2.2)
thrift (0.22.0) thrift (0.18.1)
tilt (2.6.0) tilt (2.2.0)
unicode-display_width (2.6.0) unicode-display_width (2.4.2)
uri (1.0.3)
yarjuf (2.0.0) yarjuf (2.0.0)
builder builder
rspec (~> 3) rspec (~> 3)
PLATFORMS PLATFORMS
arm64-darwin-22
arm64-darwin-23
arm64-darwin-25
universal-java-11 universal-java-11
universal-java-17
x86_64-darwin-22
x86_64-linux x86_64-linux
DEPENDENCIES DEPENDENCIES
climate_control (>= 0.2.0) climate_control (>= 0.2.0)
mock_redis (= 0.37.0) mock_redis (>= 0.17.0)
pry pry
rack-test (>= 0.6) rack-test (>= 0.6)
rspec (>= 3.2) rspec (>= 3.2)

View file

@ -246,18 +246,6 @@ This can be a string providing a single DN. For multiple DNs please specify the
The LDAP object-type used to designate a user object. The LDAP object-type used to designate a user object.
(optional) (optional)
### LDAP\_SERVICE_ACCOUNT\_HASH
A hash containing the following parameters for a service account to perform the
initial bind. After the initial bind, then a search query is performed using the
'base' and 'user_object', then re-binds as the returned user.
- :user_dn: The full distinguished name (DN) of the service account used to bind.
- :password: The password for the service account used to bind.
(optional)
### SITE\_NAME ### SITE\_NAME
The name of your deployment. The name of your deployment.

View file

@ -17,7 +17,6 @@
logfile: '/Users/samuel/workspace/vmpooler/vmpooler.log' logfile: '/Users/samuel/workspace/vmpooler/vmpooler.log'
task_limit: 10 task_limit: 10
timeout: 15 timeout: 15
timeout_notification: 5
vm_checktime: 1 vm_checktime: 1
vm_lifetime: 12 vm_lifetime: 12
vm_lifetime_auth: 24 vm_lifetime_auth: 24
@ -39,7 +38,6 @@
datastore: 'vmstorage' datastore: 'vmstorage'
size: 5 size: 5
timeout: 15 timeout: 15
timeout_notification: 5
ready_ttl: 1440 ready_ttl: 1440
provider: dummy provider: dummy
dns_plugin: 'example' dns_plugin: 'example'
@ -50,7 +48,6 @@
datastore: 'vmstorage' datastore: 'vmstorage'
size: 5 size: 5
timeout: 15 timeout: 15
timeout_notification: 5
ready_ttl: 1440 ready_ttl: 1440
provider: dummy provider: dummy
dns_plugin: 'example' dns_plugin: 'example'
@ -61,7 +58,6 @@
datastore: 'vmstorage' datastore: 'vmstorage'
size: 5 size: 5
timeout: 15 timeout: 15
timeout_notification: 5
ready_ttl: 1440 ready_ttl: 1440
provider: dummy provider: dummy
dns_plugin: 'example' dns_plugin: 'example'
@ -71,7 +67,6 @@
datastore: 'vmstorage' datastore: 'vmstorage'
size: 5 size: 5
timeout: 15 timeout: 15
timeout_notification: 5
ready_ttl: 1440 ready_ttl: 1440
provider: dummy provider: dummy
dns_plugin: 'example' dns_plugin: 'example'
@ -82,7 +77,6 @@
datastore: 'other-vmstorage' datastore: 'other-vmstorage'
size: 5 size: 5
timeout: 15 timeout: 15
timeout_notification: 5
ready_ttl: 1440 ready_ttl: 1440
provider: dummy provider: dummy
dns_plugin: 'example' dns_plugin: 'example'

View file

@ -82,7 +82,6 @@ module Vmpooler
end end
parsed_config[:config]['clone_target'] = ENV['CLONE_TARGET'] if ENV['CLONE_TARGET'] parsed_config[:config]['clone_target'] = ENV['CLONE_TARGET'] if ENV['CLONE_TARGET']
parsed_config[:config]['timeout'] = string_to_int(ENV['TIMEOUT']) if ENV['TIMEOUT'] parsed_config[:config]['timeout'] = string_to_int(ENV['TIMEOUT']) if ENV['TIMEOUT']
parsed_config[:config]['timeout_notification'] = string_to_int(ENV['TIMEOUT_NOTIFICATION']) if ENV['TIMEOUT_NOTIFICATION']
parsed_config[:config]['vm_lifetime_auth'] = string_to_int(ENV['VM_LIFETIME_AUTH']) if ENV['VM_LIFETIME_AUTH'] parsed_config[:config]['vm_lifetime_auth'] = string_to_int(ENV['VM_LIFETIME_AUTH']) if ENV['VM_LIFETIME_AUTH']
parsed_config[:config]['max_tries'] = string_to_int(ENV['MAX_TRIES']) if ENV['MAX_TRIES'] parsed_config[:config]['max_tries'] = string_to_int(ENV['MAX_TRIES']) if ENV['MAX_TRIES']
parsed_config[:config]['retry_factor'] = string_to_int(ENV['RETRY_FACTOR']) if ENV['RETRY_FACTOR'] parsed_config[:config]['retry_factor'] = string_to_int(ENV['RETRY_FACTOR']) if ENV['RETRY_FACTOR']

View file

@ -1,13 +1,10 @@
# frozen_string_literal: true # frozen_string_literal: true
require 'vmpooler/api/input_validator'
module Vmpooler module Vmpooler
class API class API
module Helpers module Helpers
include InputValidator
def tracer def tracer
@tracer ||= OpenTelemetry.tracer_provider.tracer('api', Vmpooler::VERSION) @tracer ||= OpenTelemetry.tracer_provider.tracer('api', Vmpooler::VERSION)
@ -71,7 +68,7 @@ module Vmpooler
end end
end end
def authenticate_ldap(port, host, encryption_hash, user_object, base, username_str, password_str, service_account_hash = nil) def authenticate_ldap(port, host, encryption_hash, user_object, base, username_str, password_str)
tracer.in_span( tracer.in_span(
"Vmpooler::API::Helpers.#{__method__}", "Vmpooler::API::Helpers.#{__method__}",
attributes: { attributes: {
@ -82,14 +79,6 @@ module Vmpooler
}, },
kind: :client kind: :client
) do ) do
if service_account_hash
username = service_account_hash[:user_dn]
password = service_account_hash[:password]
else
username = "#{user_object}=#{username_str},#{base}"
password = password_str
end
ldap = Net::LDAP.new( ldap = Net::LDAP.new(
:host => host, :host => host,
:port => port, :port => port,
@ -97,22 +86,12 @@ module Vmpooler
:base => base, :base => base,
:auth => { :auth => {
:method => :simple, :method => :simple,
:username => username, :username => "#{user_object}=#{username_str},#{base}",
:password => password :password => password_str
} }
) )
if service_account_hash return true if ldap.bind
return true if ldap.bind_as(
:base => base,
:filter => "(#{user_object}=#{username_str})",
:password => password_str
)
elsif ldap.bind
return true
else
return false
end
return false return false
end end
@ -137,7 +116,6 @@ module Vmpooler
:method => :start_tls, :method => :start_tls,
:tls_options => { :ssl_version => 'TLSv1' } :tls_options => { :ssl_version => 'TLSv1' }
} }
service_account_hash = auth[:ldap]['service_account_hash']
unless ldap_base.is_a? Array unless ldap_base.is_a? Array
ldap_base = ldap_base.split ldap_base = ldap_base.split
@ -156,8 +134,7 @@ module Vmpooler
search_user_obj, search_user_obj,
search_base, search_base,
username_str, username_str,
password_str, password_str
service_account_hash
) )
return true if result return true if result
end end
@ -292,7 +269,6 @@ module Vmpooler
def get_queue_metrics(pools, backend) def get_queue_metrics(pools, backend)
tracer.in_span("Vmpooler::API::Helpers.#{__method__}") do tracer.in_span("Vmpooler::API::Helpers.#{__method__}") do
queue = { queue = {
requested: 0,
pending: 0, pending: 0,
cloning: 0, cloning: 0,
booting: 0, booting: 0,
@ -302,35 +278,15 @@ module Vmpooler
total: 0 total: 0
} }
# Use a single pipeline to fetch all queue counts at once for better performance queue[:pending] = get_total_across_pools_redis_scard(pools, 'vmpooler__pending__', backend)
results = backend.pipelined do |pipeline| queue[:ready] = get_total_across_pools_redis_scard(pools, 'vmpooler__ready__', backend)
# Order matters - we'll use indices to extract values queue[:running] = get_total_across_pools_redis_scard(pools, 'vmpooler__running__', backend)
pools.each do |pool| queue[:completed] = get_total_across_pools_redis_scard(pools, 'vmpooler__completed__', backend)
pipeline.scard("vmpooler__provisioning__request#{pool['name']}") # 0..n-1
pipeline.scard("vmpooler__provisioning__processing#{pool['name']}") # n..2n-1
pipeline.scard("vmpooler__odcreate__task#{pool['name']}") # 2n..3n-1
pipeline.scard("vmpooler__pending__#{pool['name']}") # 3n..4n-1
pipeline.scard("vmpooler__ready__#{pool['name']}") # 4n..5n-1
pipeline.scard("vmpooler__running__#{pool['name']}") # 5n..6n-1
pipeline.scard("vmpooler__completed__#{pool['name']}") # 6n..7n-1
end
pipeline.get('vmpooler__tasks__clone') # 7n
pipeline.get('vmpooler__tasks__ondemandclone') # 7n+1
end
n = pools.length queue[:cloning] = backend.get('vmpooler__tasks__clone').to_i + backend.get('vmpooler__tasks__ondemandclone').to_i
# Safely extract results with default to empty array if slice returns nil queue[:booting] = queue[:pending].to_i - queue[:cloning].to_i
queue[:requested] = (results[0...n] || []).sum(&:to_i) + queue[:booting] = 0 if queue[:booting] < 0
(results[n...(2 * n)] || []).sum(&:to_i) + queue[:total] = queue[:pending].to_i + queue[:ready].to_i + queue[:running].to_i + queue[:completed].to_i
(results[(2 * n)...(3 * n)] || []).sum(&:to_i)
queue[:pending] = (results[(3 * n)...(4 * n)] || []).sum(&:to_i)
queue[:ready] = (results[(4 * n)...(5 * n)] || []).sum(&:to_i)
queue[:running] = (results[(5 * n)...(6 * n)] || []).sum(&:to_i)
queue[:completed] = (results[(6 * n)...(7 * n)] || []).sum(&:to_i)
queue[:cloning] = (results[7 * n] || 0).to_i + (results[7 * n + 1] || 0).to_i
queue[:booting] = queue[:pending].to_i - queue[:cloning].to_i
queue[:booting] = 0 if queue[:booting] < 0
queue[:total] = queue[:requested] + queue[:pending].to_i + queue[:ready].to_i + queue[:running].to_i + queue[:completed].to_i
queue queue
end end

View file

@ -1,159 +0,0 @@
# frozen_string_literal: true
module Vmpooler
class API
# Input validation helpers to enhance security
module InputValidator
# Maximum lengths to prevent abuse
MAX_HOSTNAME_LENGTH = 253
MAX_TAG_KEY_LENGTH = 50
MAX_TAG_VALUE_LENGTH = 255
MAX_REASON_LENGTH = 500
MAX_POOL_NAME_LENGTH = 100
MAX_TOKEN_LENGTH = 64
# Valid patterns
HOSTNAME_PATTERN = /\A[a-z0-9]([a-z0-9-]{0,61}[a-z0-9])?(\.[a-z0-9]([a-z0-9-]{0,61}[a-z0-9])?)* \z/ix.freeze
POOL_NAME_PATTERN = /\A[a-zA-Z0-9_-]+\z/.freeze
TAG_KEY_PATTERN = /\A[a-zA-Z0-9_\-.]+\z/.freeze
TOKEN_PATTERN = /\A[a-zA-Z0-9\-_]+\z/.freeze
INTEGER_PATTERN = /\A\d+\z/.freeze
class ValidationError < StandardError; end
# Validate hostname format and length
def validate_hostname(hostname)
return error_response('Hostname is required') if hostname.nil? || hostname.empty?
return error_response('Hostname too long') if hostname.length > MAX_HOSTNAME_LENGTH
return error_response('Invalid hostname format') unless hostname.match?(HOSTNAME_PATTERN)
true
end
# Validate pool/template name
def validate_pool_name(pool_name)
return error_response('Pool name is required') if pool_name.nil? || pool_name.empty?
return error_response('Pool name too long') if pool_name.length > MAX_POOL_NAME_LENGTH
return error_response('Invalid pool name format') unless pool_name.match?(POOL_NAME_PATTERN)
true
end
# Validate tag key and value
def validate_tag(key, value)
return error_response('Tag key is required') if key.nil? || key.empty?
return error_response('Tag key too long') if key.length > MAX_TAG_KEY_LENGTH
return error_response('Invalid tag key format') unless key.match?(TAG_KEY_PATTERN)
if value
return error_response('Tag value too long') if value.length > MAX_TAG_VALUE_LENGTH
# Sanitize value to prevent injection attacks
sanitized_value = value.gsub(/[^\w\s\-.@:\/]/, '')
return error_response('Tag value contains invalid characters') if sanitized_value != value
end
true
end
# Validate token format
def validate_token_format(token)
return error_response('Token is required') if token.nil? || token.empty?
return error_response('Token too long') if token.length > MAX_TOKEN_LENGTH
return error_response('Invalid token format') unless token.match?(TOKEN_PATTERN)
true
end
# Validate integer parameter
def validate_integer(value, name = 'value', min: nil, max: nil)
return error_response("#{name} is required") if value.nil?
value_str = value.to_s
return error_response("#{name} must be a valid integer") unless value_str.match?(INTEGER_PATTERN)
int_value = value.to_i
return error_response("#{name} must be at least #{min}") if min && int_value < min
return error_response("#{name} must be at most #{max}") if max && int_value > max
int_value
end
# Validate VM request count
def validate_vm_count(count)
validated = validate_integer(count, 'VM count', min: 1, max: 100)
return validated if validated.is_a?(Hash) # error response
validated
end
# Validate disk size
def validate_disk_size(size)
validated = validate_integer(size, 'Disk size', min: 1, max: 2048)
return validated if validated.is_a?(Hash) # error response
validated
end
# Validate lifetime (TTL) in hours
def validate_lifetime(lifetime)
validated = validate_integer(lifetime, 'Lifetime', min: 1, max: 168) # max 1 week
return validated if validated.is_a?(Hash) # error response
validated
end
# Validate reason text
def validate_reason(reason)
return true if reason.nil? || reason.empty?
return error_response('Reason too long') if reason.length > MAX_REASON_LENGTH
# Sanitize to prevent XSS/injection
sanitized = reason.gsub(/[<>"']/, '')
return error_response('Reason contains invalid characters') if sanitized != reason
true
end
# Sanitize JSON body to prevent injection
def sanitize_json_body(body)
return {} if body.nil? || body.empty?
begin
parsed = JSON.parse(body)
return error_response('Request body must be a JSON object') unless parsed.is_a?(Hash)
# Limit depth and size to prevent DoS
return error_response('Request body too complex') if json_depth(parsed) > 5
return error_response('Request body too large') if body.length > 10_240 # 10KB max
parsed
rescue JSON::ParserError => e
error_response("Invalid JSON: #{e.message}")
end
end
# Check if validation result is an error
def validation_error?(result)
result.is_a?(Hash) && result['ok'] == false
end
private
def error_response(message)
{ 'ok' => false, 'error' => message }
end
def json_depth(obj, depth = 0)
return depth unless obj.is_a?(Hash) || obj.is_a?(Array)
return depth + 1 if obj.empty?
if obj.is_a?(Hash)
depth + 1 + obj.values.map { |v| json_depth(v, 0) }.max
else
depth + 1 + obj.map { |v| json_depth(v, 0) }.max
end
end
end
end
end

View file

@ -1,116 +0,0 @@
# frozen_string_literal: true
module Vmpooler
class API
# Rate limiter middleware to protect against abuse
# Uses Redis to track request counts per IP and token
class RateLimiter
DEFAULT_LIMITS = {
global_per_ip: { limit: 100, period: 60 }, # 100 requests per minute per IP
authenticated: { limit: 500, period: 60 }, # 500 requests per minute with token
vm_creation: { limit: 20, period: 60 }, # 20 VM creations per minute
vm_deletion: { limit: 50, period: 60 } # 50 VM deletions per minute
}.freeze
def initialize(app, redis, config = {})
@app = app
@redis = redis
@config = DEFAULT_LIMITS.merge(config[:rate_limits] || {})
@enabled = config.fetch(:rate_limiting_enabled, true)
end
def call(env)
return @app.call(env) unless @enabled
request = Rack::Request.new(env)
client_id = identify_client(request)
endpoint_type = classify_endpoint(request)
# Check rate limits
return rate_limit_response(client_id, endpoint_type) if rate_limit_exceeded?(client_id, endpoint_type, request)
# Track the request
increment_request_count(client_id, endpoint_type)
@app.call(env)
end
private
def identify_client(request)
# Prioritize token-based identification for authenticated requests
token = request.env['HTTP_X_AUTH_TOKEN']
return "token:#{token}" if token && !token.empty?
# Fall back to IP address
ip = request.ip || request.env['REMOTE_ADDR'] || 'unknown'
"ip:#{ip}"
end
def classify_endpoint(request)
path = request.path
method = request.request_method
return :vm_creation if method == 'POST' && path.include?('/vm')
return :vm_deletion if method == 'DELETE' && path.include?('/vm')
return :authenticated if request.env['HTTP_X_AUTH_TOKEN']
:global_per_ip
end
def rate_limit_exceeded?(client_id, endpoint_type, _request)
limit_config = @config[endpoint_type] || @config[:global_per_ip]
key = "vmpooler__ratelimit__#{endpoint_type}__#{client_id}"
current_count = @redis.get(key).to_i
current_count >= limit_config[:limit]
rescue StandardError => e
# If Redis fails, allow the request through (fail open)
warn "Rate limiter Redis error: #{e.message}"
false
end
def increment_request_count(client_id, endpoint_type)
limit_config = @config[endpoint_type] || @config[:global_per_ip]
key = "vmpooler__ratelimit__#{endpoint_type}__#{client_id}"
@redis.pipelined do |pipeline|
pipeline.incr(key)
pipeline.expire(key, limit_config[:period])
end
rescue StandardError => e
# Log error but don't fail the request
warn "Rate limiter increment error: #{e.message}"
end
def rate_limit_response(client_id, endpoint_type)
limit_config = @config[endpoint_type] || @config[:global_per_ip]
key = "vmpooler__ratelimit__#{endpoint_type}__#{client_id}"
begin
ttl = @redis.ttl(key)
rescue StandardError
ttl = limit_config[:period]
end
headers = {
'Content-Type' => 'application/json',
'X-RateLimit-Limit' => limit_config[:limit].to_s,
'X-RateLimit-Remaining' => '0',
'X-RateLimit-Reset' => (Time.now.to_i + ttl).to_s,
'Retry-After' => ttl.to_s
}
body = JSON.pretty_generate({
'ok' => false,
'error' => 'Rate limit exceeded',
'limit' => limit_config[:limit],
'period' => limit_config[:period],
'retry_after' => ttl
})
[429, headers, [body]]
end
end
end
end

View file

@ -9,20 +9,6 @@ module Vmpooler
api_version = '3' api_version = '3'
api_prefix = "/api/v#{api_version}" api_prefix = "/api/v#{api_version}"
# Simple in-memory cache for status endpoint
# rubocop:disable Style/ClassVars
@@status_cache = {}
@@status_cache_mutex = Mutex.new
# rubocop:enable Style/ClassVars
STATUS_CACHE_TTL = 30 # seconds
# Clear cache (useful for testing)
def self.clear_status_cache
@@status_cache_mutex.synchronize do
@@status_cache.clear
end
end
helpers do helpers do
include Vmpooler::API::Helpers include Vmpooler::API::Helpers
end end
@ -297,9 +283,11 @@ module Vmpooler
def update_user_metrics(operation, vmname) def update_user_metrics(operation, vmname)
tracer.in_span("Vmpooler::API::V3.#{__method__}") do |span| tracer.in_span("Vmpooler::API::V3.#{__method__}") do |span|
begin begin
jenkins_build_url = backend.hget("vmpooler__vm__#{vmname}", 'tag:jenkins_build_url') backend.multi
user = backend.hget("vmpooler__vm__#{vmname}", 'token:user') backend.hget("vmpooler__vm__#{vmname}", 'tag:jenkins_build_url')
poolname = backend.hget("vmpooler__vm__#{vmname}", 'template') backend.hget("vmpooler__vm__#{vmname}", 'token:user')
backend.hget("vmpooler__vm__#{vmname}", 'template')
jenkins_build_url, user, poolname = backend.exec
poolname = poolname.gsub('.', '_') poolname = poolname.gsub('.', '_')
if user if user
@ -478,32 +466,6 @@ module Vmpooler
end end
end end
# Cache helper methods for status endpoint
def get_cached_status(cache_key)
@@status_cache_mutex.synchronize do
cached = @@status_cache[cache_key]
if cached && (Time.now - cached[:timestamp]) < STATUS_CACHE_TTL
return cached[:data]
end
nil
end
end
def set_cached_status(cache_key, data)
@@status_cache_mutex.synchronize do
@@status_cache[cache_key] = {
data: data,
timestamp: Time.now
}
# Cleanup old cache entries (keep only last 10 unique view combinations)
if @@status_cache.size > 10
oldest = @@status_cache.min_by { |_k, v| v[:timestamp] }
@@status_cache.delete(oldest[0])
end
end
end
def sync_pool_templates def sync_pool_templates
tracer.in_span("Vmpooler::API::V3.#{__method__}") do tracer.in_span("Vmpooler::API::V3.#{__method__}") do
pool_index = pool_index(pools) pool_index = pool_index(pools)
@ -686,13 +648,6 @@ module Vmpooler
get "#{api_prefix}/status/?" do get "#{api_prefix}/status/?" do
content_type :json content_type :json
# Create cache key based on view parameters
cache_key = params[:view] ? "status_#{params[:view]}" : "status_all"
# Try to get cached response
cached_response = get_cached_status(cache_key)
return cached_response if cached_response
if params[:view] if params[:view]
views = params[:view].split(",") views = params[:view].split(",")
end end
@ -753,12 +708,7 @@ module Vmpooler
result[:status][:uptime] = (Time.now - Vmpooler::API.settings.config[:uptime]).round(1) if Vmpooler::API.settings.config[:uptime] result[:status][:uptime] = (Time.now - Vmpooler::API.settings.config[:uptime]).round(1) if Vmpooler::API.settings.config[:uptime]
response = JSON.pretty_generate(Hash[result.sort_by { |k, _v| k }]) JSON.pretty_generate(Hash[result.sort_by { |k, _v| k }])
# Cache the response
set_cached_status(cache_key, response)
response
end end
# request statistics for specific pools by passing parameter 'pool' # request statistics for specific pools by passing parameter 'pool'
@ -1137,29 +1087,9 @@ module Vmpooler
result = { 'ok' => false } result = { 'ok' => false }
metrics.increment('http_requests_vm_total.post.vm.checkout') metrics.increment('http_requests_vm_total.post.vm.checkout')
# Validate and sanitize JSON body payload = JSON.parse(request.body.read)
payload = sanitize_json_body(request.body.read)
if validation_error?(payload)
status 400
return JSON.pretty_generate(payload)
end
# Validate each template and count if payload
payload.each do |template, count|
validation = validate_pool_name(template)
if validation_error?(validation)
status 400
return JSON.pretty_generate(validation)
end
validated_count = validate_vm_count(count)
if validation_error?(validated_count)
status 400
return JSON.pretty_generate(validated_count)
end
end
if payload && !payload.empty?
invalid = invalid_templates(payload) invalid = invalid_templates(payload)
if invalid.empty? if invalid.empty?
result = atomically_allocate_vms(payload) result = atomically_allocate_vms(payload)
@ -1278,7 +1208,6 @@ module Vmpooler
result = { 'ok' => false } result = { 'ok' => false }
metrics.increment('http_requests_vm_total.get.vm.template') metrics.increment('http_requests_vm_total.get.vm.template')
# Template can contain multiple pools separated by +, so validate after parsing
payload = extract_templates_from_query_params(params[:template]) payload = extract_templates_from_query_params(params[:template])
if payload if payload
@ -1308,13 +1237,6 @@ module Vmpooler
status 404 status 404
result['ok'] = false result['ok'] = false
# Validate hostname
validation = validate_hostname(params[:hostname])
if validation_error?(validation)
status 400
return JSON.pretty_generate(validation)
end
params[:hostname] = hostname_shorten(params[:hostname]) params[:hostname] = hostname_shorten(params[:hostname])
rdata = backend.hgetall("vmpooler__vm__#{params[:hostname]}") rdata = backend.hgetall("vmpooler__vm__#{params[:hostname]}")
@ -1453,13 +1375,6 @@ module Vmpooler
status 404 status 404
result['ok'] = false result['ok'] = false
# Validate hostname
validation = validate_hostname(params[:hostname])
if validation_error?(validation)
status 400
return JSON.pretty_generate(validation)
end
params[:hostname] = hostname_shorten(params[:hostname]) params[:hostname] = hostname_shorten(params[:hostname])
rdata = backend.hgetall("vmpooler__vm__#{params[:hostname]}") rdata = backend.hgetall("vmpooler__vm__#{params[:hostname]}")
@ -1490,21 +1405,16 @@ module Vmpooler
failure = [] failure = []
# Validate hostname
validation = validate_hostname(params[:hostname])
if validation_error?(validation)
status 400
return JSON.pretty_generate(validation)
end
params[:hostname] = hostname_shorten(params[:hostname]) params[:hostname] = hostname_shorten(params[:hostname])
if backend.exists?("vmpooler__vm__#{params[:hostname]}") if backend.exists?("vmpooler__vm__#{params[:hostname]}")
# Validate and sanitize JSON body begin
jdata = sanitize_json_body(request.body.read) jdata = JSON.parse(request.body.read)
if validation_error?(jdata) rescue StandardError => e
status 400 span = OpenTelemetry::Trace.current_span
return JSON.pretty_generate(jdata) span.record_exception(e)
span.status = OpenTelemetry::Trace::Status.error(e.to_s)
halt 400, JSON.pretty_generate(result)
end end
# Validate data payload # Validate data payload
@ -1513,13 +1423,6 @@ module Vmpooler
when 'lifetime' when 'lifetime'
need_token! if Vmpooler::API.settings.config[:auth] need_token! if Vmpooler::API.settings.config[:auth]
# Validate lifetime is a positive integer
lifetime_int = arg.to_i
if lifetime_int <= 0
failure.push("Lifetime must be a positive integer (got #{arg})")
next
end
# in hours, defaults to one week # in hours, defaults to one week
max_lifetime_upper_limit = config['max_lifetime_upper_limit'] max_lifetime_upper_limit = config['max_lifetime_upper_limit']
if max_lifetime_upper_limit if max_lifetime_upper_limit
@ -1529,17 +1432,13 @@ module Vmpooler
end end
end end
when 'tags' # validate lifetime is within boundaries
failure.push("You provided tags (#{arg}) as something other than a hash.") unless arg.is_a?(Hash) unless arg.to_i > 0
failure.push("You provided a lifetime (#{arg}) but you must provide a positive number.")
# Validate each tag key and value
arg.each do |key, value|
tag_validation = validate_tag(key, value)
if validation_error?(tag_validation)
failure.push(tag_validation['error'])
end
end end
when 'tags'
failure.push("You provided tags (#{arg}) as something other than a hash.") unless arg.is_a?(Hash)
failure.push("You provided unsuppored tags (#{arg}).") if config['allowed_tags'] && !(arg.keys - config['allowed_tags']).empty? failure.push("You provided unsuppored tags (#{arg}).") if config['allowed_tags'] && !(arg.keys - config['allowed_tags']).empty?
else else
failure.push("Unknown argument #{arg}.") failure.push("Unknown argument #{arg}.")
@ -1581,23 +1480,9 @@ module Vmpooler
status 404 status 404
result = { 'ok' => false } result = { 'ok' => false }
# Validate hostname
validation = validate_hostname(params[:hostname])
if validation_error?(validation)
status 400
return JSON.pretty_generate(validation)
end
# Validate disk size
validated_size = validate_disk_size(params[:size])
if validation_error?(validated_size)
status 400
return JSON.pretty_generate(validated_size)
end
params[:hostname] = hostname_shorten(params[:hostname]) params[:hostname] = hostname_shorten(params[:hostname])
if backend.exists?("vmpooler__vm__#{params[:hostname]}") if ((params[:size].to_i > 0 )and (backend.exists?("vmpooler__vm__#{params[:hostname]}")))
result[params[:hostname]] = {} result[params[:hostname]] = {}
result[params[:hostname]]['disk'] = "+#{params[:size]}gb" result[params[:hostname]]['disk'] = "+#{params[:size]}gb"

View file

@ -329,30 +329,6 @@ module Vmpooler
buckets: REDIS_CONNECT_BUCKETS, buckets: REDIS_CONNECT_BUCKETS,
docstring: 'vmpooler redis connection wait time', docstring: 'vmpooler redis connection wait time',
param_labels: %i[type provider] param_labels: %i[type provider]
},
vmpooler_health: {
mtype: M_GAUGE,
torun: %i[manager],
docstring: 'vmpooler health check metrics',
param_labels: %i[metric_path]
},
vmpooler_purge: {
mtype: M_GAUGE,
torun: %i[manager],
docstring: 'vmpooler purge metrics',
param_labels: %i[metric_path]
},
vmpooler_destroy: {
mtype: M_GAUGE,
torun: %i[manager],
docstring: 'vmpooler destroy metrics',
param_labels: %i[poolname]
},
vmpooler_clone: {
mtype: M_GAUGE,
torun: %i[manager],
docstring: 'vmpooler clone metrics',
param_labels: %i[poolname]
} }
} }
end end

View file

@ -82,21 +82,21 @@ module Vmpooler
end end
# Check the state of a VM # Check the state of a VM
def check_pending_vm(vm, pool, timeout, timeout_notification, provider) def check_pending_vm(vm, pool, timeout, provider)
Thread.new do Thread.new do
begin begin
_check_pending_vm(vm, pool, timeout, timeout_notification, provider) _check_pending_vm(vm, pool, timeout, provider)
rescue StandardError => e rescue StandardError => e
$logger.log('s', "[!] [#{pool}] '#{vm}' #{timeout} #{provider} errored while checking a pending vm : #{e}") $logger.log('s', "[!] [#{pool}] '#{vm}' #{timeout} #{provider} errored while checking a pending vm : #{e}")
@redis.with_metrics do |redis| @redis.with_metrics do |redis|
fail_pending_vm(vm, pool, timeout, timeout_notification, redis) fail_pending_vm(vm, pool, timeout, redis)
end end
raise raise
end end
end end
end end
def _check_pending_vm(vm, pool, timeout, timeout_notification, provider) def _check_pending_vm(vm, pool, timeout, provider)
mutex = vm_mutex(vm) mutex = vm_mutex(vm)
return if mutex.locked? return if mutex.locked?
@ -106,7 +106,7 @@ module Vmpooler
if provider.vm_ready?(pool, vm, redis) if provider.vm_ready?(pool, vm, redis)
move_pending_vm_to_ready(vm, pool, redis, request_id) move_pending_vm_to_ready(vm, pool, redis, request_id)
else else
fail_pending_vm(vm, pool, timeout, timeout_notification, redis) fail_pending_vm(vm, pool, timeout, redis)
end end
end end
end end
@ -122,121 +122,35 @@ module Vmpooler
$logger.log('d', "[!] [#{pool}] '#{vm}' no longer exists. Removing from pending.") $logger.log('d', "[!] [#{pool}] '#{vm}' no longer exists. Removing from pending.")
end end
def fail_pending_vm(vm, pool, timeout, timeout_notification, redis, exists: true) def fail_pending_vm(vm, pool, timeout, redis, exists: true)
clone_stamp = redis.hget("vmpooler__vm__#{vm}", 'clone') clone_stamp = redis.hget("vmpooler__vm__#{vm}", 'clone')
time_since_clone = (Time.now - Time.parse(clone_stamp)) / 60 time_since_clone = (Time.now - Time.parse(clone_stamp)) / 60
if time_since_clone > timeout
already_timed_out = time_since_clone > timeout if exists
timing_out_soon = time_since_clone > timeout_notification && !redis.hget("vmpooler__vm__#{vm}", 'timeout_notification') request_id = redis.hget("vmpooler__vm__#{vm}", 'request_id')
pool_alias = redis.hget("vmpooler__vm__#{vm}", 'pool_alias') if request_id
return true if !already_timed_out && !timing_out_soon open_socket_error = redis.hget("vmpooler__vm__#{vm}", 'open_socket_error')
redis.smove("vmpooler__pending__#{pool}", "vmpooler__completed__#{pool}", vm)
if already_timed_out if request_id
unless exists ondemandrequest_hash = redis.hgetall("vmpooler__odrequest__#{request_id}")
if ondemandrequest_hash && ondemandrequest_hash['status'] != 'failed' && ondemandrequest_hash['status'] != 'deleted'
# will retry a VM that did not come up as vm_ready? only if it has not been market failed or deleted
redis.zadd('vmpooler__odcreate__task', 1, "#{pool_alias}:#{pool}:1:#{request_id}")
end
end
$metrics.increment("errors.markedasfailed.#{pool}")
$logger.log('d', "[!] [#{pool}] '#{vm}' marked as 'failed' after #{timeout} minutes with error: #{open_socket_error}")
else
remove_nonexistent_vm(vm, pool, redis) remove_nonexistent_vm(vm, pool, redis)
return true
end end
open_socket_error = handle_timed_out_vm(vm, pool, redis)
end end
redis.hset("vmpooler__vm__#{vm}", 'timeout_notification', 1) if timing_out_soon
nonexist_warning = if already_timed_out
"[!] [#{pool}] '#{vm}' marked as 'failed' after #{timeout} minutes with error: #{open_socket_error}"
elsif timing_out_soon
time_remaining = timeout - timeout_notification
open_socket_error = redis.hget("vmpooler__vm__#{vm}", 'open_socket_error')
"[!] [#{pool}] '#{vm}' impending failure in #{time_remaining} minutes with error: #{open_socket_error}"
else
"[!] [#{pool}] '#{vm}' This error is wholly unexpected"
end
$logger.log('d', nonexist_warning)
true true
rescue StandardError => e rescue StandardError => e
$logger.log('d', "Fail pending VM failed with an error: #{e}") $logger.log('d', "Fail pending VM failed with an error: #{e}")
false false
end end
def handle_timed_out_vm(vm, pool, redis)
request_id = redis.hget("vmpooler__vm__#{vm}", 'request_id')
pool_alias = redis.hget("vmpooler__vm__#{vm}", 'pool_alias') if request_id
open_socket_error = redis.hget("vmpooler__vm__#{vm}", 'open_socket_error')
retry_count = redis.hget("vmpooler__odrequest__#{request_id}", 'retry_count').to_i if request_id
# Move to DLQ before moving to completed queue
move_to_dlq(vm, pool, 'pending', 'Timeout',
open_socket_error || 'VM timed out during pending phase',
redis, request_id: request_id, pool_alias: pool_alias, retry_count: retry_count)
clone_error = redis.hget("vmpooler__vm__#{vm}", 'clone_error')
clone_error_class = redis.hget("vmpooler__vm__#{vm}", 'clone_error_class')
redis.smove("vmpooler__pending__#{pool}", "vmpooler__completed__#{pool}", vm)
if request_id
ondemandrequest_hash = redis.hgetall("vmpooler__odrequest__#{request_id}")
if ondemandrequest_hash && ondemandrequest_hash['status'] != 'failed' && ondemandrequest_hash['status'] != 'deleted'
# Check retry count and max retry limit before retrying
retry_count = (redis.hget("vmpooler__odrequest__#{request_id}", 'retry_count') || '0').to_i
max_retries = $config[:config]['max_vm_retries'] || 3
$logger.log('s', "[!] [#{pool}] '#{vm}' checking retry logic: error='#{clone_error}', error_class='#{clone_error_class}', retry_count=#{retry_count}, max_retries=#{max_retries}")
# Determine if error is likely permanent (configuration issues)
permanent_error = permanent_error?(clone_error, clone_error_class)
$logger.log('s', "[!] [#{pool}] '#{vm}' permanent_error check result: #{permanent_error}")
if retry_count < max_retries && !permanent_error
# Increment retry count and retry VM creation
redis.hset("vmpooler__odrequest__#{request_id}", 'retry_count', retry_count + 1)
redis.zadd('vmpooler__odcreate__task', 1, "#{pool_alias}:#{pool}:1:#{request_id}")
$logger.log('s', "[!] [#{pool}] '#{vm}' failed, retrying (attempt #{retry_count + 1}/#{max_retries})")
else
# Max retries exceeded or permanent error, mark request as permanently failed
failure_reason = if permanent_error
"Configuration error: #{clone_error}"
else
'Max retry attempts exceeded'
end
redis.hset("vmpooler__odrequest__#{request_id}", 'status', 'failed')
redis.hset("vmpooler__odrequest__#{request_id}", 'failure_reason', failure_reason)
$logger.log('s', "[!] [#{pool}] '#{vm}' permanently failed: #{failure_reason}")
$metrics.increment("vmpooler_errors.permanently_failed.#{pool}")
end
end
end
$metrics.increment("vmpooler_errors.markedasfailed.#{pool}")
open_socket_error || clone_error
end
# Determine if an error is likely permanent (configuration issue) vs transient
def permanent_error?(error_message, error_class)
return false if error_message.nil? || error_class.nil?
permanent_error_patterns = [
/template.*not found/i,
/template.*does not exist/i,
/invalid.*path/i,
/folder.*not found/i,
/datastore.*not found/i,
/resource pool.*not found/i,
/permission.*denied/i,
/authentication.*failed/i,
/invalid.*credentials/i,
/configuration.*error/i
]
permanent_error_classes = [
'ArgumentError',
'NoMethodError',
'NameError'
]
# Check error message patterns
permanent_error_patterns.any? { |pattern| error_message.match?(pattern) } ||
# Check error class types
permanent_error_classes.include?(error_class)
end
def move_pending_vm_to_ready(vm, pool, redis, request_id = nil) def move_pending_vm_to_ready(vm, pool, redis, request_id = nil)
clone_time = redis.hget("vmpooler__vm__#{vm}", 'clone') clone_time = redis.hget("vmpooler__vm__#{vm}", 'clone')
finish = format('%<time>.2f', time: Time.now - Time.parse(clone_time)) finish = format('%<time>.2f', time: Time.now - Time.parse(clone_time))
@ -287,16 +201,8 @@ module Vmpooler
return true if provider.vm_ready?(pool_name, vm_name, redis) return true if provider.vm_ready?(pool_name, vm_name, redis)
raise("VM #{vm_name} is not ready") raise("VM #{vm_name} is not ready")
rescue StandardError => e rescue StandardError
open_socket_error = redis.hget("vmpooler__vm__#{vm_name}", 'open_socket_error') open_socket_error = redis.hget("vmpooler__vm__#{vm_name}", 'open_socket_error')
request_id = redis.hget("vmpooler__vm__#{vm_name}", 'request_id')
pool_alias = redis.hget("vmpooler__vm__#{vm_name}", 'pool_alias')
# Move to DLQ before moving to completed queue
move_to_dlq(vm_name, pool_name, 'ready', e.class.name,
open_socket_error || 'VM became unreachable in ready queue',
redis, request_id: request_id, pool_alias: pool_alias)
move_vm_queue(pool_name, vm_name, 'ready', 'completed', redis, "removed from 'ready' queue. vm unreachable with error: #{open_socket_error}") move_vm_queue(pool_name, vm_name, 'ready', 'completed', redis, "removed from 'ready' queue. vm unreachable with error: #{open_socket_error}")
end end
@ -429,60 +335,6 @@ module Vmpooler
$logger.log('d', "[!] [#{pool}] '#{vm}' #{msg}") if msg $logger.log('d', "[!] [#{pool}] '#{vm}' #{msg}") if msg
end end
# Dead-Letter Queue (DLQ) helper methods
def dlq_enabled?
$config[:config] && $config[:config]['dlq_enabled'] == true
end
def dlq_ttl
($config[:config] && $config[:config]['dlq_ttl']) || 168 # default 7 days in hours
end
def dlq_max_entries
($config[:config] && $config[:config]['dlq_max_entries']) || 10_000
end
def move_to_dlq(vm, pool, queue_type, error_class, error_message, redis, request_id: nil, pool_alias: nil, retry_count: 0, skip_metrics: false)
return unless dlq_enabled?
dlq_key = "vmpooler__dlq__#{queue_type}"
timestamp = Time.now.to_i
# Build DLQ entry
dlq_entry = {
'vm' => vm,
'pool' => pool,
'queue_from' => queue_type,
'error_class' => error_class.to_s,
'error_message' => error_message.to_s,
'failed_at' => Time.now.iso8601,
'retry_count' => retry_count,
'request_id' => request_id,
'pool_alias' => pool_alias
}.compact
# Use sorted set with timestamp as score for easy age-based queries and TTL
dlq_entry_json = dlq_entry.to_json
redis.zadd(dlq_key, timestamp, "#{vm}:#{timestamp}:#{dlq_entry_json}")
# Enforce max entries limit by removing oldest entries
current_size = redis.zcard(dlq_key)
if current_size > dlq_max_entries
remove_count = current_size - dlq_max_entries
redis.zremrangebyrank(dlq_key, 0, remove_count - 1)
$logger.log('d', "[!] [dlq] Trimmed #{remove_count} oldest entries from #{dlq_key}")
end
# Set expiration on the entire DLQ (will be refreshed on next write)
ttl_seconds = dlq_ttl * 3600
redis.expire(dlq_key, ttl_seconds)
$metrics.increment("vmpooler_dlq.#{queue_type}.count") unless skip_metrics
$logger.log('d', "[!] [dlq] Moved '#{vm}' from '#{queue_type}' queue to DLQ: #{error_message}")
rescue StandardError => e
$logger.log('s', "[!] [dlq] Failed to move '#{vm}' to DLQ: #{e}")
end
# Clone a VM # Clone a VM
def clone_vm(pool_name, provider, dns_plugin, request_id = nil, pool_alias = nil) def clone_vm(pool_name, provider, dns_plugin, request_id = nil, pool_alias = nil)
Thread.new do Thread.new do
@ -492,13 +344,7 @@ module Vmpooler
if request_id if request_id
$logger.log('s', "[!] [#{pool_name}] failed while cloning VM for request #{request_id} with an error: #{e}") $logger.log('s', "[!] [#{pool_name}] failed while cloning VM for request #{request_id} with an error: #{e}")
@redis.with_metrics do |redis| @redis.with_metrics do |redis|
# Only re-queue if the request wasn't already marked as failed (e.g., by permanent error detection) redis.zadd('vmpooler__odcreate__task', 1, "#{pool_alias}:#{pool_name}:1:#{request_id}")
request_status = redis.hget("vmpooler__odrequest__#{request_id}", 'status')
if request_status != 'failed'
redis.zadd('vmpooler__odcreate__task', 1, "#{pool_alias}:#{pool_name}:1:#{request_id}")
else
$logger.log('s', "[!] [#{pool_name}] Request #{request_id} already marked as failed, not re-queueing")
end
end end
else else
$logger.log('s', "[!] [#{pool_name}] failed while cloning VM with an error: #{e}") $logger.log('s', "[!] [#{pool_name}] failed while cloning VM with an error: #{e}")
@ -551,10 +397,10 @@ module Vmpooler
hostname_retries += 1 hostname_retries += 1
if !hostname_available if !hostname_available
$metrics.increment("vmpooler_errors.duplicatehostname.#{pool_name}") $metrics.increment("errors.duplicatehostname.#{pool_name}")
$logger.log('s', "[!] [#{pool_name}] Generated hostname #{fqdn} was not unique (attempt \##{hostname_retries} of #{max_hostname_retries})") $logger.log('s', "[!] [#{pool_name}] Generated hostname #{fqdn} was not unique (attempt \##{hostname_retries} of #{max_hostname_retries})")
elsif !dns_available elsif !dns_available
$metrics.increment("vmpooler_errors.staledns.#{pool_name}") $metrics.increment("errors.staledns.#{pool_name}")
$logger.log('s', "[!] [#{pool_name}] Generated hostname #{fqdn} already exists in DNS records (#{dns_ip}), stale DNS") $logger.log('s', "[!] [#{pool_name}] Generated hostname #{fqdn} already exists in DNS records (#{dns_ip}), stale DNS")
end end
end end
@ -600,15 +446,12 @@ module Vmpooler
provider.create_vm(pool_name, new_vmname) provider.create_vm(pool_name, new_vmname)
finish = format('%<time>.2f', time: Time.now - start) finish = format('%<time>.2f', time: Time.now - start)
$logger.log('s', "[+] [#{pool_name}] '#{new_vmname}' cloned in #{finish} seconds") $logger.log('s', "[+] [#{pool_name}] '#{new_vmname}' cloned in #{finish} seconds")
$metrics.gauge("vmpooler_clone.#{pool_name}", finish) $metrics.timing("clone.#{pool_name}", finish)
$logger.log('d', "[ ] [#{pool_name}] Obtaining IP for '#{new_vmname}'") $logger.log('d', "[ ] [#{pool_name}] Obtaining IP for '#{new_vmname}'")
ip_start = Time.now ip_start = Time.now
ip = provider.get_vm_ip_address(new_vmname, pool_name) ip = provider.get_vm_ip_address(new_vmname, pool_name)
ip_finish = format('%<time>.2f', time: Time.now - ip_start) ip_finish = format('%<time>.2f', time: Time.now - ip_start)
raise StandardError, "failed to obtain IP after #{ip_finish} seconds" if ip.nil?
$logger.log('s', "[+] [#{pool_name}] Obtained IP for '#{new_vmname}' in #{ip_finish} seconds") $logger.log('s', "[+] [#{pool_name}] Obtained IP for '#{new_vmname}' in #{ip_finish} seconds")
@redis.with_metrics do |redis| @redis.with_metrics do |redis|
@ -621,50 +464,14 @@ module Vmpooler
dns_plugin_class_name = get_dns_plugin_class_name_for_pool(pool_name) dns_plugin_class_name = get_dns_plugin_class_name_for_pool(pool_name)
dns_plugin.create_or_replace_record(new_vmname) unless dns_plugin_class_name == 'dynamic-dns' dns_plugin.create_or_replace_record(new_vmname) unless dns_plugin_class_name == 'dynamic-dns'
rescue StandardError => e rescue StandardError
# Store error details for retry decision making
@redis.with_metrics do |redis| @redis.with_metrics do |redis|
# Get retry count before moving to DLQ
retry_count = 0
if request_id
ondemandrequest_hash = redis.hgetall("vmpooler__odrequest__#{request_id}")
retry_count = ondemandrequest_hash['retry_count'].to_i if ondemandrequest_hash
end
# Move to DLQ before removing from pending queue
move_to_dlq(new_vmname, pool_name, 'clone', e.class.name, e.message,
redis, request_id: request_id, pool_alias: pool_alias, retry_count: retry_count)
redis.pipelined do |pipeline| redis.pipelined do |pipeline|
pipeline.srem("vmpooler__pending__#{pool_name}", new_vmname) pipeline.srem("vmpooler__pending__#{pool_name}", new_vmname)
pipeline.hset("vmpooler__vm__#{new_vmname}", 'clone_error', e.message)
pipeline.hset("vmpooler__vm__#{new_vmname}", 'clone_error_class', e.class.name)
expiration_ttl = $config[:redis]['data_ttl'].to_i * 60 * 60 expiration_ttl = $config[:redis]['data_ttl'].to_i * 60 * 60
pipeline.expire("vmpooler__vm__#{new_vmname}", expiration_ttl) pipeline.expire("vmpooler__vm__#{new_vmname}", expiration_ttl)
end end
# Handle retry logic for on-demand requests
if request_id
retry_count = (redis.hget("vmpooler__odrequest__#{request_id}", 'retry_count') || '0').to_i
max_retries = $config[:config]['max_vm_retries'] || 3
is_permanent = permanent_error?(e.message, e.class.name)
$logger.log('s', "[!] [#{pool_name}] '#{new_vmname}' checking immediate failure retry: error='#{e.message}', error_class='#{e.class.name}', retry_count=#{retry_count}, max_retries=#{max_retries}, permanent_error=#{is_permanent}")
if is_permanent || retry_count >= max_retries
reason = is_permanent ? 'permanent error detected' : 'max retries exceeded'
$logger.log('s', "[!] [#{pool_name}] Cancelling request #{request_id} due to #{reason}")
redis.hset("vmpooler__odrequest__#{request_id}", 'status', 'failed')
redis.zadd('vmpooler__odcreate__task', 0, "#{pool_alias}:#{pool_name}:0:#{request_id}")
else
# Increment retry count and re-queue for retry
redis.hincrby("vmpooler__odrequest__#{request_id}", 'retry_count', 1)
$logger.log('s', "[+] [#{pool_name}] Request #{request_id} will be retried (attempt #{retry_count + 1}/#{max_retries})")
redis.zadd('vmpooler__odcreate__task', 1, "#{pool_alias}:#{pool_name}:1:#{request_id}")
end
end
end end
$logger.log('s', "[!] [#{pool_name}] '#{new_vmname}' clone failed: #{e.class}: #{e.message}")
raise raise
ensure ensure
@redis.with_metrics do |redis| @redis.with_metrics do |redis|
@ -714,7 +521,7 @@ module Vmpooler
finish = format('%<time>.2f', time: Time.now - start) finish = format('%<time>.2f', time: Time.now - start)
$logger.log('s', "[-] [#{pool}] '#{vm}' destroyed in #{finish} seconds") $logger.log('s', "[-] [#{pool}] '#{vm}' destroyed in #{finish} seconds")
$metrics.gauge("vmpooler_destroy.#{pool}", finish) $metrics.timing("destroy.#{pool}", finish)
end end
end end
dereference_mutex(vm) dereference_mutex(vm)
@ -750,543 +557,6 @@ module Vmpooler
provider.purge_unconfigured_resources(allowlist) provider.purge_unconfigured_resources(allowlist)
end end
# Auto-purge stale queue entries
def purge_enabled?
$config[:config] && $config[:config]['purge_enabled'] == true
end
def purge_dry_run?
$config[:config] && $config[:config]['purge_dry_run'] == true
end
def max_pending_age
($config[:config] && $config[:config]['max_pending_age']) || 7200 # default 2 hours in seconds
end
def max_ready_age
($config[:config] && $config[:config]['max_ready_age']) || 86_400 # default 24 hours in seconds
end
def max_completed_age
($config[:config] && $config[:config]['max_completed_age']) || 3600 # default 1 hour in seconds
end
def max_orphaned_age
($config[:config] && $config[:config]['max_orphaned_age']) || 86_400 # default 24 hours in seconds
end
def purge_stale_queue_entries
return unless purge_enabled?
Thread.new do
begin
$logger.log('d', '[*] [purge] Starting stale queue entry purge cycle')
purge_start = Time.now
@redis.with_metrics do |redis|
total_purged = 0
# Purge stale entries from each pool
$config[:pools].each do |pool|
pool_name = pool['name']
# Purge pending queue
purged_pending = purge_pending_queue(pool_name, redis)
total_purged += purged_pending
# Purge ready queue
purged_ready = purge_ready_queue(pool_name, redis)
total_purged += purged_ready
# Purge completed queue
purged_completed = purge_completed_queue(pool_name, redis)
total_purged += purged_completed
end
# Purge orphaned VM metadata
purged_orphaned = purge_orphaned_metadata(redis)
total_purged += purged_orphaned
purge_duration = Time.now - purge_start
$logger.log('s', "[*] [purge] Completed purge cycle in #{purge_duration.round(2)}s: #{total_purged} entries purged")
$metrics.gauge('vmpooler_purge.cycle.duration', purge_duration)
$metrics.gauge('vmpooler_purge.total.count', total_purged)
end
rescue StandardError => e
$logger.log('s', "[!] [purge] Failed during purge cycle: #{e}")
end
end
end
def purge_pending_queue(pool_name, redis)
queue_key = "vmpooler__pending__#{pool_name}"
vms = redis.smembers(queue_key)
purged_count = 0
vms.each do |vm|
begin
clone_time_str = redis.hget("vmpooler__vm__#{vm}", 'clone')
next unless clone_time_str
clone_time = Time.parse(clone_time_str)
age = Time.now - clone_time
if age > max_pending_age
request_id = redis.hget("vmpooler__vm__#{vm}", 'request_id')
pool_alias = redis.hget("vmpooler__vm__#{vm}", 'pool_alias')
purged_count += 1
if purge_dry_run?
$logger.log('d', "[*] [purge][dry-run] Would purge stale pending VM '#{vm}' (age: #{age.round(0)}s, max: #{max_pending_age}s)")
else
# Move to DLQ before removing (skip DLQ metric since we're tracking purge metric)
move_to_dlq(vm, pool_name, 'pending', 'Purge',
"Stale pending VM (age: #{age.round(0)}s > max: #{max_pending_age}s)",
redis, request_id: request_id, pool_alias: pool_alias, skip_metrics: true)
redis.srem(queue_key, vm)
# Set expiration on VM metadata if data_ttl is configured
if $config[:redis] && $config[:redis]['data_ttl']
expiration_ttl = $config[:redis]['data_ttl'].to_i * 60 * 60
redis.expire("vmpooler__vm__#{vm}", expiration_ttl)
end
$logger.log('d', "[!] [purge] Purged stale pending VM '#{vm}' from '#{pool_name}' (age: #{age.round(0)}s)")
$metrics.increment("vmpooler_purge.pending.#{pool_name}.count")
end
end
rescue StandardError => e
$logger.log('d', "[!] [purge] Error checking pending VM '#{vm}': #{e}")
end
end
purged_count
end
def purge_ready_queue(pool_name, redis)
queue_key = "vmpooler__ready__#{pool_name}"
vms = redis.smembers(queue_key)
purged_count = 0
vms.each do |vm|
begin
ready_time_str = redis.hget("vmpooler__vm__#{vm}", 'ready')
next unless ready_time_str
ready_time = Time.parse(ready_time_str)
age = Time.now - ready_time
if age > max_ready_age
if purge_dry_run?
$logger.log('d', "[*] [purge][dry-run] Would purge stale ready VM '#{vm}' (age: #{age.round(0)}s, max: #{max_ready_age}s)")
else
redis.smove(queue_key, "vmpooler__completed__#{pool_name}", vm)
$logger.log('d', "[!] [purge] Moved stale ready VM '#{vm}' from '#{pool_name}' to completed (age: #{age.round(0)}s)")
$metrics.increment("vmpooler_purge.ready.#{pool_name}.count")
end
purged_count += 1
end
rescue StandardError => e
$logger.log('d', "[!] [purge] Error checking ready VM '#{vm}': #{e}")
end
end
purged_count
end
def purge_completed_queue(pool_name, redis)
queue_key = "vmpooler__completed__#{pool_name}"
vms = redis.smembers(queue_key)
purged_count = 0
vms.each do |vm|
begin
# Check destroy time or last activity time
destroy_time_str = redis.hget("vmpooler__vm__#{vm}", 'destroy')
checkout_time_str = redis.hget("vmpooler__vm__#{vm}", 'checkout')
# Use the most recent timestamp
timestamp_str = destroy_time_str || checkout_time_str
next unless timestamp_str
timestamp = Time.parse(timestamp_str)
age = Time.now - timestamp
if age > max_completed_age
if purge_dry_run?
$logger.log('d', "[*] [purge][dry-run] Would purge stale completed VM '#{vm}' (age: #{age.round(0)}s, max: #{max_completed_age}s)")
else
redis.srem(queue_key, vm)
$logger.log('d', "[!] [purge] Removed stale completed VM '#{vm}' from '#{pool_name}' (age: #{age.round(0)}s)")
$metrics.increment("vmpooler_purge.completed.#{pool_name}.count")
end
purged_count += 1
end
rescue StandardError => e
$logger.log('d', "[!] [purge] Error checking completed VM '#{vm}': #{e}")
end
end
purged_count
end
def purge_orphaned_metadata(redis)
# Find VM metadata that doesn't belong to any queue
all_vm_keys = redis.keys('vmpooler__vm__*')
purged_count = 0
all_vm_keys.each do |vm_key|
begin
vm = vm_key.sub('vmpooler__vm__', '')
# Check if VM exists in any queue
pool_name = redis.hget(vm_key, 'pool')
next unless pool_name
in_pending = redis.sismember("vmpooler__pending__#{pool_name}", vm)
in_ready = redis.sismember("vmpooler__ready__#{pool_name}", vm)
in_running = redis.sismember("vmpooler__running__#{pool_name}", vm)
in_completed = redis.sismember("vmpooler__completed__#{pool_name}", vm)
in_discovered = redis.sismember("vmpooler__discovered__#{pool_name}", vm)
in_migrating = redis.sismember("vmpooler__migrating__#{pool_name}", vm)
# VM is orphaned if not in any queue
unless in_pending || in_ready || in_running || in_completed || in_discovered || in_migrating
# Check age
clone_time_str = redis.hget(vm_key, 'clone')
next unless clone_time_str
clone_time = Time.parse(clone_time_str)
age = Time.now - clone_time
if age > max_orphaned_age
if purge_dry_run?
$logger.log('d', "[*] [purge][dry-run] Would purge orphaned metadata for '#{vm}' (age: #{age.round(0)}s, max: #{max_orphaned_age}s)")
else
expiration_ttl = 3600 # 1 hour
redis.expire(vm_key, expiration_ttl)
$logger.log('d', "[!] [purge] Set expiration on orphaned metadata for '#{vm}' (age: #{age.round(0)}s)")
$metrics.increment('vmpooler_purge.orphaned.count')
end
purged_count += 1
end
end
rescue StandardError => e
$logger.log('d', "[!] [purge] Error checking orphaned metadata '#{vm_key}': #{e}")
end
end
purged_count
end
# Health checks for Redis queues
def health_check_enabled?
$config[:config] && $config[:config]['health_check_enabled'] == true
end
def health_thresholds
defaults = {
'pending_queue_max' => 100,
'ready_queue_max' => 500,
'dlq_max_warning' => 100,
'dlq_max_critical' => 1000,
'stuck_vm_age_threshold' => 7200, # 2 hours
'stuck_vm_max_warning' => 10,
'stuck_vm_max_critical' => 50
}
if $config[:config] && $config[:config]['health_thresholds']
defaults.merge($config[:config]['health_thresholds'])
else
defaults
end
end
def check_queue_health
return unless health_check_enabled?
Thread.new do
begin
$logger.log('d', '[*] [health] Running queue health check')
health_start = Time.now
@redis.with_metrics do |redis|
health_metrics = calculate_health_metrics(redis)
health_status = determine_health_status(health_metrics)
# Store health metrics in Redis for API consumption
# Convert nested hash to JSON for storage
require 'json'
redis.hset('vmpooler__health', 'metrics', health_metrics.to_json)
redis.hset('vmpooler__health', 'status', health_status)
redis.hset('vmpooler__health', 'last_check', Time.now.iso8601)
redis.expire('vmpooler__health', 3600) # Expire after 1 hour
# Log health summary
log_health_summary(health_metrics, health_status)
# Push metrics
push_health_metrics(health_metrics, health_status)
health_duration = Time.now - health_start
$metrics.gauge('vmpooler_health.check.duration', health_duration)
end
rescue StandardError => e
$logger.log('s', "[!] [health] Failed during health check: #{e}")
end
end
end
def calculate_health_metrics(redis)
metrics = {
'queues' => {},
'tasks' => {},
'errors' => {}
}
total_stuck_vms = 0
total_dlq_size = 0
thresholds = health_thresholds
# Check each pool's queues
$config[:pools].each do |pool|
pool_name = pool['name']
metrics['queues'][pool_name] = {}
# Pending queue metrics
pending_key = "vmpooler__pending__#{pool_name}"
pending_vms = redis.smembers(pending_key)
pending_ages = calculate_queue_ages(pending_vms, 'clone', redis)
stuck_pending = pending_ages.count { |age| age > thresholds['stuck_vm_age_threshold'] }
total_stuck_vms += stuck_pending
metrics['queues'][pool_name]['pending'] = {
'size' => pending_vms.size,
'oldest_age' => pending_ages.max || 0,
'avg_age' => pending_ages.empty? ? 0 : (pending_ages.sum / pending_ages.size).round(0),
'stuck_count' => stuck_pending
}
# Ready queue metrics
ready_key = "vmpooler__ready__#{pool_name}"
ready_vms = redis.smembers(ready_key)
ready_ages = calculate_queue_ages(ready_vms, 'ready', redis)
metrics['queues'][pool_name]['ready'] = {
'size' => ready_vms.size,
'oldest_age' => ready_ages.max || 0,
'avg_age' => ready_ages.empty? ? 0 : (ready_ages.sum / ready_ages.size).round(0)
}
# Completed queue metrics
completed_key = "vmpooler__completed__#{pool_name}"
completed_size = redis.scard(completed_key)
metrics['queues'][pool_name]['completed'] = { 'size' => completed_size }
end
# Task queue metrics
clone_active = redis.get('vmpooler__tasks__clone').to_i
ondemand_active = redis.get('vmpooler__tasks__ondemandclone').to_i
odcreate_pending = redis.zcard('vmpooler__odcreate__task')
metrics['tasks']['clone'] = { 'active' => clone_active }
metrics['tasks']['ondemand'] = { 'active' => ondemand_active, 'pending' => odcreate_pending }
# DLQ metrics
if dlq_enabled?
dlq_keys = redis.keys('vmpooler__dlq__*')
dlq_keys.each do |dlq_key|
queue_type = dlq_key.sub('vmpooler__dlq__', '')
dlq_size = redis.zcard(dlq_key)
total_dlq_size += dlq_size
metrics['queues']['dlq'] ||= {}
metrics['queues']['dlq'][queue_type] = { 'size' => dlq_size }
end
end
# Error metrics
metrics['errors']['dlq_total_size'] = total_dlq_size
metrics['errors']['stuck_vm_count'] = total_stuck_vms
# Orphaned metadata count
orphaned_count = count_orphaned_metadata(redis)
metrics['errors']['orphaned_metadata_count'] = orphaned_count
metrics
end
def calculate_queue_ages(vms, timestamp_field, redis)
ages = []
vms.each do |vm|
begin
timestamp_str = redis.hget("vmpooler__vm__#{vm}", timestamp_field)
next unless timestamp_str
timestamp = Time.parse(timestamp_str)
age = (Time.now - timestamp).to_i
ages << age
rescue StandardError
# Skip VMs with invalid timestamps
end
end
ages
end
def count_orphaned_metadata(redis)
all_vm_keys = redis.keys('vmpooler__vm__*')
orphaned_count = 0
all_vm_keys.each do |vm_key|
begin
vm = vm_key.sub('vmpooler__vm__', '')
pool_name = redis.hget(vm_key, 'pool')
next unless pool_name
in_any_queue = redis.sismember("vmpooler__pending__#{pool_name}", vm) ||
redis.sismember("vmpooler__ready__#{pool_name}", vm) ||
redis.sismember("vmpooler__running__#{pool_name}", vm) ||
redis.sismember("vmpooler__completed__#{pool_name}", vm) ||
redis.sismember("vmpooler__discovered__#{pool_name}", vm) ||
redis.sismember("vmpooler__migrating__#{pool_name}", vm)
orphaned_count += 1 unless in_any_queue
rescue StandardError
# Skip on error
end
end
orphaned_count
end
def determine_health_status(metrics)
thresholds = health_thresholds
# Check DLQ size
dlq_size = metrics['errors']['dlq_total_size']
return 'unhealthy' if dlq_size > thresholds['dlq_max_critical']
# Check stuck VM count
stuck_count = metrics['errors']['stuck_vm_count']
return 'unhealthy' if stuck_count > thresholds['stuck_vm_max_critical']
# Check queue sizes
metrics['queues'].each do |pool_name, queues|
next if pool_name == 'dlq'
pending_size = begin
queues['pending']['size']
rescue StandardError
0
end
ready_size = begin
queues['ready']['size']
rescue StandardError
0
end
return 'unhealthy' if pending_size > thresholds['pending_queue_max'] * 2
return 'unhealthy' if ready_size > thresholds['ready_queue_max'] * 2
end
# Check for degraded conditions
return 'degraded' if dlq_size > thresholds['dlq_max_warning']
return 'degraded' if stuck_count > thresholds['stuck_vm_max_warning']
metrics['queues'].each do |pool_name, queues|
next if pool_name == 'dlq'
pending_size = begin
queues['pending']['size']
rescue StandardError
0
end
ready_size = begin
queues['ready']['size']
rescue StandardError
0
end
return 'degraded' if pending_size > thresholds['pending_queue_max']
return 'degraded' if ready_size > thresholds['ready_queue_max']
end
'healthy'
end
def log_health_summary(metrics, status)
summary = "[*] [health] Status: #{status.upcase}"
# Queue summary
total_pending = 0
total_ready = 0
total_completed = 0
metrics['queues'].each do |pool_name, queues|
next if pool_name == 'dlq'
total_pending += begin
queues['pending']['size']
rescue StandardError
0
end
total_ready += begin
queues['ready']['size']
rescue StandardError
0
end
total_completed += begin
queues['completed']['size']
rescue StandardError
0
end
end
summary += " | Queues: P=#{total_pending} R=#{total_ready} C=#{total_completed}"
summary += " | DLQ=#{metrics['errors']['dlq_total_size']}"
summary += " | Stuck=#{metrics['errors']['stuck_vm_count']}"
summary += " | Orphaned=#{metrics['errors']['orphaned_metadata_count']}"
log_level = status == 'healthy' ? 's' : 'd'
$logger.log(log_level, summary)
end
def push_health_metrics(metrics, status)
# Push error metrics first
$metrics.gauge('vmpooler_health.dlq.total_size', metrics['errors']['dlq_total_size'])
$metrics.gauge('vmpooler_health.stuck_vms.count', metrics['errors']['stuck_vm_count'])
$metrics.gauge('vmpooler_health.orphaned_metadata.count', metrics['errors']['orphaned_metadata_count'])
# Push per-pool queue metrics
metrics['queues'].each do |pool_name, queues|
next if pool_name == 'dlq'
$metrics.gauge("vmpooler_health.queue.#{pool_name}.pending.size", queues['pending']['size'])
$metrics.gauge("vmpooler_health.queue.#{pool_name}.pending.oldest_age", queues['pending']['oldest_age'])
$metrics.gauge("vmpooler_health.queue.#{pool_name}.pending.stuck_count", queues['pending']['stuck_count'])
$metrics.gauge("vmpooler_health.queue.#{pool_name}.ready.size", queues['ready']['size'])
$metrics.gauge("vmpooler_health.queue.#{pool_name}.ready.oldest_age", queues['ready']['oldest_age'])
$metrics.gauge("vmpooler_health.queue.#{pool_name}.completed.size", queues['completed']['size'])
end
# Push DLQ metrics
metrics['queues']['dlq']&.each do |queue_type, dlq_metrics|
$metrics.gauge("vmpooler_health.dlq.#{queue_type}.size", dlq_metrics['size'])
end
# Push task metrics
$metrics.gauge('vmpooler_health.tasks.clone.active', metrics['tasks']['clone']['active'])
$metrics.gauge('vmpooler_health.tasks.ondemand.active', metrics['tasks']['ondemand']['active'])
$metrics.gauge('vmpooler_health.tasks.ondemand.pending', metrics['tasks']['ondemand']['pending'])
# Push status last (0=healthy, 1=degraded, 2=unhealthy)
status_value = { 'healthy' => 0, 'degraded' => 1, 'unhealthy' => 2 }[status] || 2
$metrics.gauge('vmpooler_health.status', status_value)
end
def create_vm_disk(pool_name, vm, disk_size, provider) def create_vm_disk(pool_name, vm, disk_size, provider)
Thread.new do Thread.new do
begin begin
@ -1687,12 +957,7 @@ module Vmpooler
sync_pool_template(pool) sync_pool_template(pool)
loop do loop do
start_time = Time.now
result = _check_pool(pool, provider) result = _check_pool(pool, provider)
duration = Time.now - start_time
$metrics.gauge("vmpooler_performance.check_pool.#{pool['name']}", duration)
$logger.log('d', "[!] check_pool for #{pool['name']} took #{duration.round(2)}s") if duration > 5
if result[:cloned_vms] > 0 || result[:checked_pending_vms] > 0 || result[:discovered_vms] > 0 if result[:cloned_vms] > 0 || result[:checked_pending_vms] > 0 || result[:discovered_vms] > 0
loop_delay = loop_delay_min loop_delay = loop_delay_min
@ -1985,20 +1250,19 @@ module Vmpooler
end end
end end
def check_pending_pool_vms(pool_name, provider, pool_check_response, inventory, pool_timeout, pool_timeout_notification) def check_pending_pool_vms(pool_name, provider, pool_check_response, inventory, pool_timeout)
pool_timeout ||= $config[:config]['timeout'] || 15 pool_timeout ||= $config[:config]['timeout'] || 15
pool_timeout_notification ||= $config[:config]['timeout_notification'] || 5
@redis.with_metrics do |redis| @redis.with_metrics do |redis|
redis.smembers("vmpooler__pending__#{pool_name}").reverse.each do |vm| redis.smembers("vmpooler__pending__#{pool_name}").reverse.each do |vm|
if inventory[vm] if inventory[vm]
begin begin
pool_check_response[:checked_pending_vms] += 1 pool_check_response[:checked_pending_vms] += 1
check_pending_vm(vm, pool_name, pool_timeout, pool_timeout_notification, provider) check_pending_vm(vm, pool_name, pool_timeout, provider)
rescue StandardError => e rescue StandardError => e
$logger.log('d', "[!] [#{pool_name}] _check_pool failed with an error while evaluating pending VMs: #{e}") $logger.log('d', "[!] [#{pool_name}] _check_pool failed with an error while evaluating pending VMs: #{e}")
end end
else else
fail_pending_vm(vm, pool_name, pool_timeout, pool_timeout_notification, redis, exists: false) fail_pending_vm(vm, pool_name, pool_timeout, redis, exists: false)
end end
end end
end end
@ -2125,7 +1389,7 @@ module Vmpooler
check_ready_pool_vms(pool['name'], provider, pool_check_response, inventory, pool['ready_ttl'] || $config[:config]['ready_ttl']) check_ready_pool_vms(pool['name'], provider, pool_check_response, inventory, pool['ready_ttl'] || $config[:config]['ready_ttl'])
check_pending_pool_vms(pool['name'], provider, pool_check_response, inventory, pool['timeout'], pool['timeout_notification']) check_pending_pool_vms(pool['name'], provider, pool_check_response, inventory, pool['timeout'])
check_completed_pool_vms(pool['name'], provider, pool_check_response, inventory) check_completed_pool_vms(pool['name'], provider, pool_check_response, inventory)
@ -2251,15 +1515,6 @@ module Vmpooler
redis.zrem('vmpooler__provisioning__request', request_id) redis.zrem('vmpooler__provisioning__request', request_id)
return return
end end
# Check if request was already marked as failed (e.g., by delete endpoint)
request_status = redis.hget("vmpooler__odrequest__#{request_id}", 'status')
if request_status == 'failed'
$logger.log('s', "Request '#{request_id}' already marked as failed, skipping VM creation")
redis.zrem('vmpooler__provisioning__request', request_id)
return
end
score = redis.zscore('vmpooler__provisioning__request', request_id) score = redis.zscore('vmpooler__provisioning__request', request_id)
requested = requested.split(',') requested = requested.split(',')
@ -2483,48 +1738,6 @@ module Vmpooler
check_ondemand_requests(check_loop_delay_min, check_loop_delay_max, check_loop_delay_decay) check_ondemand_requests(check_loop_delay_min, check_loop_delay_max, check_loop_delay_decay)
end end
# Queue purge thread
if purge_enabled?
purge_interval = ($config[:config] && $config[:config]['purge_interval']) || 3600 # default 1 hour
if !$threads['queue_purge']
$threads['queue_purge'] = Thread.new do
loop do
purge_stale_queue_entries
sleep(purge_interval)
end
end
elsif !$threads['queue_purge'].alive?
$logger.log('d', '[!] [queue_purge] worker thread died, restarting')
$threads['queue_purge'] = Thread.new do
loop do
purge_stale_queue_entries
sleep(purge_interval)
end
end
end
end
# Health check thread
if health_check_enabled?
health_interval = ($config[:config] && $config[:config]['health_check_interval']) || 300 # default 5 minutes
if !$threads['health_check']
$threads['health_check'] = Thread.new do
loop do
check_queue_health
sleep(health_interval)
end
end
elsif !$threads['health_check'].alive?
$logger.log('d', '[!] [health_check] worker thread died, restarting')
$threads['health_check'] = Thread.new do
loop do
check_queue_health
sleep(health_interval)
end
end
end
end
sleep(loop_delay) sleep(loop_delay)
unless maxloop == 0 unless maxloop == 0

View file

@ -1,5 +1,5 @@
# frozen_string_literal: true # frozen_string_literal: true
module Vmpooler module Vmpooler
VERSION = '3.8.1' VERSION = '3.4.0'
end end

View file

@ -1,15 +0,0 @@
## [3.8.1](https://github.com/puppetlabs/vmpooler/tree/3.8.1) (2026-01-14)
[Full Changelog](https://github.com/puppetlabs/vmpooler/compare/3.7.0...3.8.1)
**Implemented enhancements:**
- \(P4DEVOPS-9434\) Add rate limiting and input validation security enhancements [\#690](https://github.com/puppetlabs/vmpooler/pull/690) ([mahima-singh](https://github.com/mahima-singh))
- \(P4DEVOPS-8570\) Add Phase 2 optimizations: status API caching and improved Redis pipelining [\#689](https://github.com/puppetlabs/vmpooler/pull/689) ([mahima-singh](https://github.com/mahima-singh))
- \(P4DEVOPS-8567\) Add DLQ, auto-purge, and health checks for Redis queues [\#688](https://github.com/puppetlabs/vmpooler/pull/688) ([mahima-singh](https://github.com/mahima-singh))
- Add retry logic for immediate clone failures [\#687](https://github.com/puppetlabs/vmpooler/pull/687) ([mahima-singh](https://github.com/mahima-singh))
**Fixed bugs:**
- \(P4DEVOPS-8567\) Prevent VM allocation for already-deleted request-ids [\#688](https://github.com/puppetlabs/vmpooler/pull/688) ([mahima-singh](https://github.com/mahima-singh))
- Prevent re-queueing requests already marked as failed [\#687](https://github.com/puppetlabs/vmpooler/pull/687) ([mahima-singh](https://github.com/mahima-singh))

View file

@ -3,14 +3,13 @@
# The container tag should closely match what is used in `docker/Dockerfile` in vmpooler-deployment # The container tag should closely match what is used in `docker/Dockerfile` in vmpooler-deployment
# #
# Update Gemfile.lock # Update Gemfile.lock
docker run -t --rm \ docker run -it --rm \
-v $(pwd):/app \ -v $(pwd):/app \
jruby:9.4.12.1-jdk11 \ jruby:9.4.3.0-jdk11 \
/bin/bash -c 'apt-get update -qq && apt-get install -y --no-install-recommends git make netbase && cd /app && gem install bundler && bundle install --jobs 3; echo "LOCK_FILE_UPDATE_EXIT_CODE=$?"' /bin/bash -c 'apt-get update -qq && apt-get install -y --no-install-recommends git make netbase && cd /app && gem install bundler && bundle install --jobs 3; echo "LOCK_FILE_UPDATE_EXIT_CODE=$?"'
# Update Changelog # Update Changelog
docker run -t --rm -e CHANGELOG_GITHUB_TOKEN -v $(pwd):/usr/local/src/your-app \ docker run -it --rm -e CHANGELOG_GITHUB_TOKEN -v $(pwd):/usr/local/src/your-app \
githubchangeloggenerator/github-changelog-generator:1.16.4 \ githubchangeloggenerator/github-changelog-generator:1.16.2 \
github_changelog_generator --future-release $(grep VERSION lib/vmpooler/version.rb |rev |cut -d "'" -f2 |rev) \ github_changelog_generator --future-release $(grep VERSION lib/vmpooler/version.rb |rev |cut -d "'" -f2 |rev)
--token $CHANGELOG_GITHUB_TOKEN --release-branch main

View file

@ -17,8 +17,6 @@ describe Vmpooler::API::V3 do
# https://rubydoc.info/gems/sinatra/Sinatra/Base#reset!-class_method # https://rubydoc.info/gems/sinatra/Sinatra/Base#reset!-class_method
before(:each) do before(:each) do
app.reset! app.reset!
# Clear status cache to prevent test interference
Vmpooler::API::V3.clear_status_cache
end end
describe 'status and metrics endpoints' do describe 'status and metrics endpoints' do

View file

@ -116,7 +116,7 @@ describe Vmpooler::API::Helpers do
allow(redis).to receive(:pipelined).with(no_args).and_return [0] allow(redis).to receive(:pipelined).with(no_args).and_return [0]
allow(redis).to receive(:get).and_return 0 allow(redis).to receive(:get).and_return 0
expect(subject.get_queue_metrics([], redis)).to eq({requested: 0, pending: 0, cloning: 0, booting: 0, ready: 0, running: 0, completed: 0, total: 0}) expect(subject.get_queue_metrics([], redis)).to eq({pending: 0, cloning: 0, booting: 0, ready: 0, running: 0, completed: 0, total: 0})
end end
it 'adds pool queues correctly' do it 'adds pool queues correctly' do
@ -125,14 +125,10 @@ describe Vmpooler::API::Helpers do
{'name' => 'p2'} {'name' => 'p2'}
] ]
# Mock returns 7*2 + 2 = 16 results (7 queue types for 2 pools + 2 global counters) allow(redis).to receive(:pipelined).with(no_args).and_return [1,1]
# For each pool: [request, processing, odcreate, pending, ready, running, completed] allow(redis).to receive(:get).and_return(1,0)
# Plus 2 global counters: clone (1), ondemandclone (0)
# Results array: [1,1, 1,1, 1,1, 1,1, 1,1, 1,1, 1,1, 1, 0]
# [req, proc, odc, pend, rdy, run, comp, clone, odc]
allow(redis).to receive(:pipelined).with(no_args).and_return [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0]
expect(subject.get_queue_metrics(pools, redis)).to eq({requested: 6, pending: 2, cloning: 1, booting: 1, ready: 2, running: 2, completed: 2, total: 14}) expect(subject.get_queue_metrics(pools, redis)).to eq({pending: 2, cloning: 1, booting: 1, ready: 2, running: 2, completed: 2, total: 8})
end end
it 'sets booting to 0 when negative calculation' do it 'sets booting to 0 when negative calculation' do
@ -141,10 +137,10 @@ describe Vmpooler::API::Helpers do
{'name' => 'p2'} {'name' => 'p2'}
] ]
# Mock returns 7*2 + 2 = 16 results with clone=5 to cause negative booting allow(redis).to receive(:pipelined).with(no_args).and_return [1,1]
allow(redis).to receive(:pipelined).with(no_args).and_return [1,1,1,1,1,1,1,1,1,1,1,1,1,1,5,0] allow(redis).to receive(:get).and_return(5,0)
expect(subject.get_queue_metrics(pools, redis)).to eq({requested: 6, pending: 2, cloning: 5, booting: 0, ready: 2, running: 2, completed: 2, total: 14}) expect(subject.get_queue_metrics(pools, redis)).to eq({pending: 2, cloning: 5, booting: 0, ready: 2, running: 2, completed: 2, total: 8})
end end
end end
@ -272,62 +268,22 @@ describe Vmpooler::API::Helpers do
:tls_options => { :ssl_version => 'TLSv1' } :tls_options => { :ssl_version => 'TLSv1' }
} }
end end
it 'should attempt ldap authentication' do
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object, base, username_str, password_str)
context 'without a service account' do subject.authenticate(auth, username_str, password_str)
it 'should attempt ldap authentication' do
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object, base, username_str, password_str, nil)
subject.authenticate(auth, username_str, password_str)
end
it 'should return true when authentication is successful' do
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object, base, username_str, password_str, nil).and_return(true)
expect(subject.authenticate(auth, username_str, password_str)).to be true
end
it 'should return false when authentication fails' do
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object, base, username_str, password_str, nil).and_return(false)
expect(subject.authenticate(auth, username_str, password_str)).to be false
end
end end
context 'with a service account' do it 'should return true when authentication is successful' do
let(:service_account_hash) do expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object, base, username_str, password_str).and_return(true)
{
:user_dn => 'cn=Service Account,ou=users,dc=example,dc=com',
:password => 's3cr3t'
}
end
let(:auth) {
{
'provider' => 'ldap',
ldap: {
'host' => host,
'base' => base,
'user_object' => user_object,
'service_account_hash' => service_account_hash
}
}
}
it 'should attempt ldap authentication' do
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object, base, username_str, password_str, service_account_hash)
subject.authenticate(auth, username_str, password_str) expect(subject.authenticate(auth, username_str, password_str)).to be true
end end
it 'should return true when authentication is successful' do it 'should return false when authentication fails' do
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object, base, username_str, password_str, service_account_hash).and_return(true) expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object, base, username_str, password_str).and_return(false)
expect(subject.authenticate(auth, username_str, password_str)).to be true expect(subject.authenticate(auth, username_str, password_str)).to be false
end
it 'should return false when authentication fails' do
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object, base, username_str, password_str, service_account_hash).and_return(false)
expect(subject.authenticate(auth, username_str, password_str)).to be false
end
end end
context 'with an alternate ssl_version' do context 'with an alternate ssl_version' do
@ -342,7 +298,7 @@ describe Vmpooler::API::Helpers do
end end
it 'should specify the alternate ssl_version when authenticating' do it 'should specify the alternate ssl_version when authenticating' do
expect(subject).to receive(:authenticate_ldap).with(default_port, host, secure_encryption, user_object, base, username_str, password_str, nil) expect(subject).to receive(:authenticate_ldap).with(default_port, host, secure_encryption, user_object, base, username_str, password_str)
subject.authenticate(auth, username_str, password_str) subject.authenticate(auth, username_str, password_str)
end end
@ -355,7 +311,7 @@ describe Vmpooler::API::Helpers do
end end
it 'should specify the alternate port when authenticating' do it 'should specify the alternate port when authenticating' do
expect(subject).to receive(:authenticate_ldap).with(alternate_port, host, default_encryption, user_object, base, username_str, password_str, nil) expect(subject).to receive(:authenticate_ldap).with(alternate_port, host, default_encryption, user_object, base, username_str, password_str)
subject.authenticate(auth, username_str, password_str) subject.authenticate(auth, username_str, password_str)
end end
@ -375,7 +331,7 @@ describe Vmpooler::API::Helpers do
end end
it 'should specify the secure port and encryption options when authenticating' do it 'should specify the secure port and encryption options when authenticating' do
expect(subject).to receive(:authenticate_ldap).with(secure_port, host, secure_encryption, user_object, base, username_str, password_str, nil) expect(subject).to receive(:authenticate_ldap).with(secure_port, host, secure_encryption, user_object, base, username_str, password_str)
subject.authenticate(auth, username_str, password_str) subject.authenticate(auth, username_str, password_str)
end end
@ -393,36 +349,36 @@ describe Vmpooler::API::Helpers do
end end
it 'should attempt to bind with each base' do it 'should attempt to bind with each base' do
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object, base[0], username_str, password_str, nil) expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object, base[0], username_str, password_str)
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object, base[1], username_str, password_str, nil) expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object, base[1], username_str, password_str)
subject.authenticate(auth, username_str, password_str) subject.authenticate(auth, username_str, password_str)
end end
it 'should not search the second base when the first binds' do it 'should not search the second base when the first binds' do
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object, base[0], username_str, password_str, nil).and_return(true) expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object, base[0], username_str, password_str).and_return(true)
expect(subject).to_not receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object, base[1], username_str, password_str, nil) expect(subject).to_not receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object, base[1], username_str, password_str)
subject.authenticate(auth, username_str, password_str) subject.authenticate(auth, username_str, password_str)
end end
it 'should search the second base when the first bind fails' do it 'should search the second base when the first bind fails' do
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object, base[0], username_str, password_str, nil).and_return(false) expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object, base[0], username_str, password_str).and_return(false)
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object, base[1], username_str, password_str, nil) expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object, base[1], username_str, password_str)
subject.authenticate(auth, username_str, password_str) subject.authenticate(auth, username_str, password_str)
end end
it 'should return true when any bind succeeds' do it 'should return true when any bind succeeds' do
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object, base[0], username_str, password_str, nil).and_return(false) expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object, base[0], username_str, password_str).and_return(false)
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object, base[1], username_str, password_str, nil).and_return(true) expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object, base[1], username_str, password_str).and_return(true)
expect(subject.authenticate(auth, username_str, password_str)).to be true expect(subject.authenticate(auth, username_str, password_str)).to be true
end end
it 'should return false when all bind attempts fail' do it 'should return false when all bind attempts fail' do
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object, base[0], username_str, password_str, nil).and_return(false) expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object, base[0], username_str, password_str).and_return(false)
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object, base[1], username_str, password_str, nil).and_return(false) expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object, base[1], username_str, password_str).and_return(false)
expect(subject.authenticate(auth, username_str, password_str)).to be false expect(subject.authenticate(auth, username_str, password_str)).to be false
end end
@ -440,36 +396,36 @@ describe Vmpooler::API::Helpers do
end end
it 'should attempt to bind with each user object' do it 'should attempt to bind with each user object' do
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[0], base, username_str, password_str, nil) expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[0], base, username_str, password_str)
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[1], base, username_str, password_str, nil) expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[1], base, username_str, password_str)
subject.authenticate(auth, username_str, password_str) subject.authenticate(auth, username_str, password_str)
end end
it 'should not search the second user object when the first binds' do it 'should not search the second user object when the first binds' do
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[0], base, username_str, password_str, nil).and_return(true) expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[0], base, username_str, password_str).and_return(true)
expect(subject).to_not receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[1], base, username_str, password_str, nil) expect(subject).to_not receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[1], base, username_str, password_str)
subject.authenticate(auth, username_str, password_str) subject.authenticate(auth, username_str, password_str)
end end
it 'should search the second user object when the first bind fails' do it 'should search the second user object when the first bind fails' do
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[0], base, username_str, password_str, nil).and_return(false) expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[0], base, username_str, password_str).and_return(false)
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[1], base, username_str, password_str, nil) expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[1], base, username_str, password_str)
subject.authenticate(auth, username_str, password_str) subject.authenticate(auth, username_str, password_str)
end end
it 'should return true when any bind succeeds' do it 'should return true when any bind succeeds' do
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[0], base, username_str, password_str, nil).and_return(false) expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[0], base, username_str, password_str).and_return(false)
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[1], base, username_str, password_str, nil).and_return(true) expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[1], base, username_str, password_str).and_return(true)
expect(subject.authenticate(auth, username_str, password_str)).to be true expect(subject.authenticate(auth, username_str, password_str)).to be true
end end
it 'should return false when all bind attempts fail' do it 'should return false when all bind attempts fail' do
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[0], base, username_str, password_str, nil).and_return(false) expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[0], base, username_str, password_str).and_return(false)
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[1], base, username_str, password_str, nil).and_return(false) expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[1], base, username_str, password_str).and_return(false)
expect(subject.authenticate(auth, username_str, password_str)).to be false expect(subject.authenticate(auth, username_str, password_str)).to be false
end end
@ -494,64 +450,64 @@ describe Vmpooler::API::Helpers do
end end
it 'should attempt to bind with each user object and base' do it 'should attempt to bind with each user object and base' do
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[0], base[0], username_str, password_str, nil) expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[0], base[0], username_str, password_str)
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[1], base[0], username_str, password_str, nil) expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[1], base[0], username_str, password_str)
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[0], base[1], username_str, password_str, nil) expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[0], base[1], username_str, password_str)
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[1], base[1], username_str, password_str, nil) expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[1], base[1], username_str, password_str)
subject.authenticate(auth, username_str, password_str) subject.authenticate(auth, username_str, password_str)
end end
it 'should not continue searching when the first combination binds' do it 'should not continue searching when the first combination binds' do
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[0], base[0], username_str, password_str, nil).and_return(true) expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[0], base[0], username_str, password_str).and_return(true)
expect(subject).to_not receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[1], base[0], username_str, password_str, nil) expect(subject).to_not receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[1], base[0], username_str, password_str)
expect(subject).to_not receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[0], base[1], username_str, password_str, nil) expect(subject).to_not receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[0], base[1], username_str, password_str)
expect(subject).to_not receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[1], base[1], username_str, password_str, nil) expect(subject).to_not receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[1], base[1], username_str, password_str)
subject.authenticate(auth, username_str, password_str) subject.authenticate(auth, username_str, password_str)
end end
it 'should search the remaining combinations when the first bind fails' do it 'should search the remaining combinations when the first bind fails' do
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[0], base[0], username_str, password_str, nil).and_return(false) expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[0], base[0], username_str, password_str).and_return(false)
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[1], base[0], username_str, password_str, nil) expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[1], base[0], username_str, password_str)
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[0], base[1], username_str, password_str, nil) expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[0], base[1], username_str, password_str)
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[1], base[1], username_str, password_str, nil) expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[1], base[1], username_str, password_str)
subject.authenticate(auth, username_str, password_str) subject.authenticate(auth, username_str, password_str)
end end
it 'should search the remaining combinations when the first two binds fail' do it 'should search the remaining combinations when the first two binds fail' do
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[0], base[0], username_str, password_str, nil).and_return(false) expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[0], base[0], username_str, password_str).and_return(false)
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[1], base[0], username_str, password_str, nil).and_return(false) expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[1], base[0], username_str, password_str).and_return(false)
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[0], base[1], username_str, password_str, nil) expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[0], base[1], username_str, password_str)
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[1], base[1], username_str, password_str, nil) expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[1], base[1], username_str, password_str)
subject.authenticate(auth, username_str, password_str) subject.authenticate(auth, username_str, password_str)
end end
it 'should search the remaining combination when the first three binds fail' do it 'should search the remaining combination when the first three binds fail' do
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[0], base[0], username_str, password_str, nil).and_return(false) expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[0], base[0], username_str, password_str).and_return(false)
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[1], base[0], username_str, password_str, nil).and_return(false) expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[1], base[0], username_str, password_str).and_return(false)
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[0], base[1], username_str, password_str, nil).and_return(false) expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[0], base[1], username_str, password_str).and_return(false)
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[1], base[1], username_str, password_str, nil) expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[1], base[1], username_str, password_str)
subject.authenticate(auth, username_str, password_str) subject.authenticate(auth, username_str, password_str)
end end
it 'should return true when any bind succeeds' do it 'should return true when any bind succeeds' do
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[0], base[0], username_str, password_str, nil).and_return(false) expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[0], base[0], username_str, password_str).and_return(false)
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[1], base[0], username_str, password_str, nil).and_return(false) expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[1], base[0], username_str, password_str).and_return(false)
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[0], base[1], username_str, password_str, nil).and_return(false) expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[0], base[1], username_str, password_str).and_return(false)
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[1], base[1], username_str, password_str, nil).and_return(true) expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[1], base[1], username_str, password_str).and_return(true)
expect(subject.authenticate(auth, username_str, password_str)).to be true expect(subject.authenticate(auth, username_str, password_str)).to be true
end end
it 'should return false when all bind attempts fail' do it 'should return false when all bind attempts fail' do
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[0], base[0], username_str, password_str, nil).and_return(false) expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[0], base[0], username_str, password_str).and_return(false)
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[1], base[0], username_str, password_str, nil).and_return(false) expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[1], base[0], username_str, password_str).and_return(false)
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[0], base[1], username_str, password_str, nil).and_return(false) expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[0], base[1], username_str, password_str).and_return(false)
expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[1], base[1], username_str, password_str, nil).and_return(false) expect(subject).to receive(:authenticate_ldap).with(default_port, host, default_encryption, user_object[1], base[1], username_str, password_str).and_return(false)
expect(subject.authenticate(auth, username_str, password_str)).to be false expect(subject.authenticate(auth, username_str, password_str)).to be false
end end
@ -585,12 +541,6 @@ describe Vmpooler::API::Helpers do
:tls_options => { :ssl_version => 'TLSv1' } :tls_options => { :ssl_version => 'TLSv1' }
} }
end end
let(:service_account_hash) do
{
:user_dn => 'cn=Service Account,ou=users,dc=example,dc=com',
:password => 's3cr3t'
}
end
let(:ldap) { double('ldap') } let(:ldap) { double('ldap') }
it 'should create a new ldap connection' do it 'should create a new ldap connection' do
allow(ldap).to receive(:bind) allow(ldap).to receive(:bind)
@ -622,20 +572,6 @@ describe Vmpooler::API::Helpers do
expect(subject.authenticate_ldap(port, host, encryption, user_object, base, username_str, password_str)).to be false expect(subject.authenticate_ldap(port, host, encryption, user_object, base, username_str, password_str)).to be false
end end
it 'should return true when a bind_as is successful' do
expect(Net::LDAP).to receive(:new).and_return(ldap)
expect(ldap).to receive(:bind_as).and_return(true)
expect(subject.authenticate_ldap(port, host, encryption, user_object, base, username_str, password_str, service_account_hash)).to be true
end
it 'should return false when a bind_as fails' do
expect(Net::LDAP).to receive(:new).and_return(ldap)
expect(ldap).to receive(:bind_as).and_return(false)
expect(subject.authenticate_ldap(port, host, encryption, user_object, base, username_str, password_str, service_account_hash)).to be false
end
end end
end end

View file

@ -1,184 +0,0 @@
# frozen_string_literal: true
require 'spec_helper'
require 'rack/test'
require 'vmpooler/api/input_validator'
describe Vmpooler::API::InputValidator do
let(:test_class) do
Class.new do
include Vmpooler::API::InputValidator
end
end
let(:validator) { test_class.new }
describe '#validate_hostname' do
it 'accepts valid hostnames' do
expect(validator.validate_hostname('test-host.example.com')).to be true
expect(validator.validate_hostname('host123')).to be true
end
it 'rejects invalid hostnames' do
result = validator.validate_hostname('invalid_host!')
expect(result['ok']).to be false
expect(result['error']).to include('Invalid hostname format')
end
it 'rejects hostnames that are too long' do
long_hostname = 'a' * 300
result = validator.validate_hostname(long_hostname)
expect(result['ok']).to be false
expect(result['error']).to include('too long')
end
it 'rejects empty hostnames' do
result = validator.validate_hostname('')
expect(result['ok']).to be false
expect(result['error']).to include('required')
end
end
describe '#validate_pool_name' do
it 'accepts valid pool names' do
expect(validator.validate_pool_name('centos-7-x86_64')).to be true
expect(validator.validate_pool_name('ubuntu-2204')).to be true
end
it 'rejects invalid pool names' do
result = validator.validate_pool_name('invalid pool!')
expect(result['ok']).to be false
expect(result['error']).to include('Invalid pool name format')
end
it 'rejects pool names that are too long' do
result = validator.validate_pool_name('a' * 150)
expect(result['ok']).to be false
expect(result['error']).to include('too long')
end
end
describe '#validate_tag' do
it 'accepts valid tags' do
expect(validator.validate_tag('project', 'test-123')).to be true
expect(validator.validate_tag('owner', 'user@example.com')).to be true
end
it 'rejects tags with invalid keys' do
result = validator.validate_tag('invalid key!', 'value')
expect(result['ok']).to be false
expect(result['error']).to include('Invalid tag key format')
end
it 'rejects tags with invalid characters in value' do
result = validator.validate_tag('key', 'value<script>')
expect(result['ok']).to be false
expect(result['error']).to include('invalid characters')
end
it 'rejects tags that are too long' do
result = validator.validate_tag('key', 'a' * 300)
expect(result['ok']).to be false
expect(result['error']).to include('too long')
end
end
describe '#validate_vm_count' do
it 'accepts valid VM counts' do
expect(validator.validate_vm_count(5)).to eq(5)
expect(validator.validate_vm_count('10')).to eq(10)
end
it 'rejects counts less than 1' do
result = validator.validate_vm_count(0)
expect(result['ok']).to be false
expect(result['error']).to include('at least 1')
end
it 'rejects counts greater than 100' do
result = validator.validate_vm_count(150)
expect(result['ok']).to be false
expect(result['error']).to include('at most 100')
end
it 'rejects non-integer values' do
result = validator.validate_vm_count('abc')
expect(result['ok']).to be false
expect(result['error']).to include('valid integer')
end
end
describe '#validate_disk_size' do
it 'accepts valid disk sizes' do
expect(validator.validate_disk_size(50)).to eq(50)
expect(validator.validate_disk_size('100')).to eq(100)
end
it 'rejects sizes less than 1' do
result = validator.validate_disk_size(0)
expect(result['ok']).to be false
end
it 'rejects sizes greater than 2048' do
result = validator.validate_disk_size(3000)
expect(result['ok']).to be false
end
end
describe '#validate_lifetime' do
it 'accepts valid lifetimes' do
expect(validator.validate_lifetime(24)).to eq(24)
expect(validator.validate_lifetime('48')).to eq(48)
end
it 'rejects lifetimes greater than 168 hours (1 week)' do
result = validator.validate_lifetime(200)
expect(result['ok']).to be false
expect(result['error']).to include('at most 168')
end
end
describe '#sanitize_json_body' do
it 'parses valid JSON' do
result = validator.sanitize_json_body('{"key": "value"}')
expect(result).to eq('key' => 'value')
end
it 'rejects invalid JSON' do
result = validator.sanitize_json_body('{invalid}')
expect(result['ok']).to be false
expect(result['error']).to include('Invalid JSON')
end
it 'rejects non-object JSON' do
result = validator.sanitize_json_body('["array"]')
expect(result['ok']).to be false
expect(result['error']).to include('must be a JSON object')
end
it 'rejects deeply nested JSON' do
deep_json = '{"a":{"b":{"c":{"d":{"e":{"f":"too deep"}}}}}}'
result = validator.sanitize_json_body(deep_json)
expect(result['ok']).to be false
expect(result['error']).to include('too complex')
end
it 'rejects bodies that are too large' do
large_json = '{"data":"' + ('a' * 20000) + '"}'
result = validator.sanitize_json_body(large_json)
expect(result['ok']).to be false
expect(result['error']).to include('too large')
end
end
describe '#validation_error?' do
it 'returns true for error responses' do
error = { 'ok' => false, 'error' => 'test error' }
expect(validator.validation_error?(error)).to be true
end
it 'returns false for successful responses' do
expect(validator.validation_error?(true)).to be false
expect(validator.validation_error?(5)).to be false
end
end
end

View file

@ -14,7 +14,6 @@ describe 'Pool Manager' do
let(:pool) { 'pool1' } let(:pool) { 'pool1' }
let(:vm) { 'vm1' } let(:vm) { 'vm1' }
let(:timeout) { 5 } let(:timeout) { 5 }
let(:timeout_notification) { 2 }
let(:host) { double('host') } let(:host) { double('host') }
let(:token) { 'token1234' } let(:token) { 'token1234' }
let(:request_id) { '1234' } let(:request_id) { '1234' }
@ -190,9 +189,9 @@ EOT
it 'calls _check_pending_vm' do it 'calls _check_pending_vm' do
expect(Thread).to receive(:new).and_yield expect(Thread).to receive(:new).and_yield
expect(subject).to receive(:_check_pending_vm).with(vm, pool, timeout, timeout_notification, provider) expect(subject).to receive(:_check_pending_vm).with(vm,pool,timeout,provider)
subject.check_pending_vm(vm, pool, timeout, timeout_notification, provider) subject.check_pending_vm(vm, pool, timeout, provider)
end end
end end
@ -209,16 +208,16 @@ EOT
expect(subject).to receive(:move_pending_vm_to_ready).with(vm, pool, redis, nil) expect(subject).to receive(:move_pending_vm_to_ready).with(vm, pool, redis, nil)
end end
subject._check_pending_vm(vm, pool, timeout, timeout_notification, provider) subject._check_pending_vm(vm, pool, timeout, provider)
end end
it 'calls fail_pending_vm if host is not ready' do it 'calls fail_pending_vm if host is not ready' do
redis_connection_pool.with do |redis| redis_connection_pool.with do |redis|
expect(provider).to receive(:vm_ready?).with(pool, vm, redis).and_return(false) expect(provider).to receive(:vm_ready?).with(pool, vm, redis).and_return(false)
expect(subject).to receive(:fail_pending_vm).with(vm, pool, timeout, timeout_notification, redis) expect(subject).to receive(:fail_pending_vm).with(vm, pool, timeout, redis)
end end
subject._check_pending_vm(vm, pool, timeout, timeout_notification, provider) subject._check_pending_vm(vm, pool, timeout, provider)
end end
end end
@ -231,7 +230,7 @@ EOT
it 'should return' do it 'should return' do
expect(subject).to receive(:vm_mutex).and_return(mutex) expect(subject).to receive(:vm_mutex).and_return(mutex)
expect(subject._check_pending_vm(vm, pool, timeout, timeout_notification, provider)).to be_nil expect(subject._check_pending_vm(vm, pool, timeout, provider)).to be_nil
end end
end end
end end
@ -275,16 +274,14 @@ EOT
it 'takes no action if VM is not cloning' do it 'takes no action if VM is not cloning' do
redis_connection_pool.with do |redis| redis_connection_pool.with do |redis|
expect(logger).to_not receive(:log) expect(subject.fail_pending_vm(vm, pool, timeout, redis)).to eq(true)
expect(subject.fail_pending_vm(vm, pool, timeout, timeout_notification, redis)).to eq(true)
end end
end end
it 'takes no action if VM is within timeout' do it 'takes no action if VM is within timeout' do
redis_connection_pool.with do |redis| redis_connection_pool.with do |redis|
redis.hset("vmpooler__vm__#{vm}", 'clone',Time.now.to_s) redis.hset("vmpooler__vm__#{vm}", 'clone',Time.now.to_s)
expect(logger).to_not receive(:log) expect(subject.fail_pending_vm(vm, pool, timeout, redis)).to eq(true)
expect(subject.fail_pending_vm(vm, pool, timeout, timeout_notification, redis)).to eq(true)
expect(redis.sismember("vmpooler__pending__#{pool}", vm)).to be(true) expect(redis.sismember("vmpooler__pending__#{pool}", vm)).to be(true)
end end
end end
@ -292,7 +289,7 @@ EOT
it 'moves VM to completed queue if VM has exceeded timeout and exists' do it 'moves VM to completed queue if VM has exceeded timeout and exists' do
redis_connection_pool.with do |redis| redis_connection_pool.with do |redis|
redis.hset("vmpooler__vm__#{vm}", 'clone',Date.new(2001,1,1).to_s) redis.hset("vmpooler__vm__#{vm}", 'clone',Date.new(2001,1,1).to_s)
expect(subject.fail_pending_vm(vm, pool, timeout, timeout_notification, redis, exists: true)).to eq(true) expect(subject.fail_pending_vm(vm, pool, timeout, redis, exists: true)).to eq(true)
expect(redis.sismember("vmpooler__pending__#{pool}", vm)).to be(false) expect(redis.sismember("vmpooler__pending__#{pool}", vm)).to be(false)
expect(redis.sismember("vmpooler__completed__#{pool}", vm)).to be(true) expect(redis.sismember("vmpooler__completed__#{pool}", vm)).to be(true)
end end
@ -302,23 +299,22 @@ EOT
redis_connection_pool.with do |redis| redis_connection_pool.with do |redis|
redis.hset("vmpooler__vm__#{vm}", 'clone',Date.new(2001,1,1).to_s) redis.hset("vmpooler__vm__#{vm}", 'clone',Date.new(2001,1,1).to_s)
expect(logger).to receive(:log).with('d', "[!] [#{pool}] '#{vm}' marked as 'failed' after #{timeout} minutes with error: ") expect(logger).to receive(:log).with('d', "[!] [#{pool}] '#{vm}' marked as 'failed' after #{timeout} minutes with error: ")
expect(subject.fail_pending_vm(vm, pool, timeout, timeout_notification, redis, exists: true)).to eq(true) expect(subject.fail_pending_vm(vm, pool, timeout, redis, exists: true)).to eq(true)
end end
end end
it 'calls remove_nonexistent_vm if VM has exceeded timeout and does not exist' do it 'calls remove_nonexistent_vm if VM has exceeded timeout and does not exist' do
redis_connection_pool.with do |redis| redis_connection_pool.with do |redis|
redis.hset("vmpooler__vm__#{vm}", 'clone',Date.new(2001,1,1).to_s) redis.hset("vmpooler__vm__#{vm}", 'clone',Date.new(2001,1,1).to_s)
expect(logger).to_not receive(:log)
expect(subject).to receive(:remove_nonexistent_vm).with(vm, pool, redis) expect(subject).to receive(:remove_nonexistent_vm).with(vm, pool, redis)
expect(subject.fail_pending_vm(vm, pool, timeout, timeout_notification, redis, exists: false)).to eq(true) expect(subject.fail_pending_vm(vm, pool, timeout, redis, exists: false)).to eq(true)
end end
end end
it 'swallows error if an error is raised' do it 'swallows error if an error is raised' do
redis_connection_pool.with do |redis| redis_connection_pool.with do |redis|
redis.hset("vmpooler__vm__#{vm}", 'clone','iamnotparsable_asdate') redis.hset("vmpooler__vm__#{vm}", 'clone','iamnotparsable_asdate')
expect(subject.fail_pending_vm(vm, pool, timeout, timeout_notification, redis, exists: true)).to eq(false) expect(subject.fail_pending_vm(vm, pool, timeout, redis, exists: true)).to eq(false)
end end
end end
@ -327,7 +323,7 @@ EOT
redis.hset("vmpooler__vm__#{vm}", 'clone','iamnotparsable_asdate') redis.hset("vmpooler__vm__#{vm}", 'clone','iamnotparsable_asdate')
expect(logger).to receive(:log).with('d', String) expect(logger).to receive(:log).with('d', String)
subject.fail_pending_vm(vm, pool, timeout, timeout_notification, redis, exists: true) subject.fail_pending_vm(vm, pool, timeout, redis, exists: true)
end end
end end
@ -338,130 +334,13 @@ EOT
redis.hset("vmpooler__vm__#{vm}", 'clone',(Time.now - 900).to_s) redis.hset("vmpooler__vm__#{vm}", 'clone',(Time.now - 900).to_s)
redis.hset("vmpooler__vm__#{vm}", 'pool_alias', pool) redis.hset("vmpooler__vm__#{vm}", 'pool_alias', pool)
redis.hset("vmpooler__vm__#{vm}", 'request_id', request_id) redis.hset("vmpooler__vm__#{vm}", 'request_id', request_id)
subject.fail_pending_vm(vm, pool, timeout, timeout_notification, redis, exists: true) subject.fail_pending_vm(vm, pool, timeout, redis, exists: true)
expect(redis.zrange('vmpooler__odcreate__task', 0, -1)).to eq(["#{pool}:#{pool}:1:#{request_id}"]) expect(redis.zrange('vmpooler__odcreate__task', 0, -1)).to eq(["#{pool}:#{pool}:1:#{request_id}"])
end end
end end
end end
end end
describe '#handle_timed_out_vm' do
before do
expect(subject).not_to be_nil
end
before(:each) do
redis_connection_pool.with do |redis|
create_pending_vm(pool, vm, redis)
config[:config]['max_vm_retries'] = 3
end
end
context 'without request_id' do
it 'moves VM to completed queue and returns error' do
redis_connection_pool.with do |redis|
redis.hset("vmpooler__vm__#{vm}", 'open_socket_error', 'connection failed')
result = subject.handle_timed_out_vm(vm, pool, redis)
expect(redis.sismember("vmpooler__pending__#{pool}", vm)).to be(false)
expect(redis.sismember("vmpooler__completed__#{pool}", vm)).to be(true)
expect(result).to eq('connection failed')
end
end
end
context 'with request_id and transient error' do
before(:each) do
redis_connection_pool.with do |redis|
redis.hset("vmpooler__vm__#{vm}", 'request_id', request_id)
redis.hset("vmpooler__vm__#{vm}", 'pool_alias', pool)
redis.hset("vmpooler__odrequest__#{request_id}", 'status', 'pending')
redis.hset("vmpooler__vm__#{vm}", 'clone_error', 'network timeout')
redis.hset("vmpooler__vm__#{vm}", 'clone_error_class', 'Timeout::Error')
end
end
it 'retries on first failure' do
redis_connection_pool.with do |redis|
subject.handle_timed_out_vm(vm, pool, redis)
expect(redis.hget("vmpooler__odrequest__#{request_id}", 'retry_count')).to eq('1')
expect(redis.zrange('vmpooler__odcreate__task', 0, -1)).to include("#{pool}:#{pool}:1:#{request_id}")
end
end
it 'marks as failed after max retries' do
redis_connection_pool.with do |redis|
redis.hset("vmpooler__odrequest__#{request_id}", 'retry_count', '3')
subject.handle_timed_out_vm(vm, pool, redis)
expect(redis.hget("vmpooler__odrequest__#{request_id}", 'status')).to eq('failed')
expect(redis.hget("vmpooler__odrequest__#{request_id}", 'failure_reason')).to eq('Max retry attempts exceeded')
expect(redis.zrange('vmpooler__odcreate__task', 0, -1)).not_to include("#{pool}:#{pool}:1:#{request_id}")
end
end
end
context 'with request_id and permanent error' do
before(:each) do
redis_connection_pool.with do |redis|
redis.hset("vmpooler__vm__#{vm}", 'request_id', request_id)
redis.hset("vmpooler__vm__#{vm}", 'pool_alias', pool)
redis.hset("vmpooler__odrequest__#{request_id}", 'status', 'pending')
redis.hset("vmpooler__vm__#{vm}", 'clone_error', 'template not found')
redis.hset("vmpooler__vm__#{vm}", 'clone_error_class', 'RuntimeError')
end
end
it 'immediately marks as failed without retrying' do
redis_connection_pool.with do |redis|
subject.handle_timed_out_vm(vm, pool, redis)
expect(redis.hget("vmpooler__odrequest__#{request_id}", 'status')).to eq('failed')
expect(redis.hget("vmpooler__odrequest__#{request_id}", 'failure_reason')).to include('Configuration error')
expect(redis.zrange('vmpooler__odcreate__task', 0, -1)).not_to include("#{pool}:#{pool}:1:#{request_id}")
end
end
end
end
describe '#permanent_error?' do
before do
expect(subject).not_to be_nil
end
it 'identifies template not found errors as permanent' do
expect(subject.permanent_error?('template not found', 'RuntimeError')).to be(true)
end
it 'identifies invalid path errors as permanent' do
expect(subject.permanent_error?('invalid path specified', 'ArgumentError')).to be(true)
end
it 'identifies permission denied errors as permanent' do
expect(subject.permanent_error?('permission denied', 'SecurityError')).to be(true)
end
it 'identifies ArgumentError class as permanent' do
expect(subject.permanent_error?('some argument error', 'ArgumentError')).to be(true)
end
it 'identifies network errors as transient' do
expect(subject.permanent_error?('connection timeout', 'Timeout::Error')).to be(false)
end
it 'identifies socket errors as transient' do
expect(subject.permanent_error?('connection refused', 'Errno::ECONNREFUSED')).to be(false)
end
it 'returns false for nil inputs' do
expect(subject.permanent_error?(nil, nil)).to be(false)
expect(subject.permanent_error?('error', nil)).to be(false)
expect(subject.permanent_error?(nil, 'Error')).to be(false)
end
end
describe '#move_pending_vm_to_ready' do describe '#move_pending_vm_to_ready' do
let(:host) { { 'hostname' => vm }} let(:host) { { 'hostname' => vm }}
@ -1107,10 +986,9 @@ EOT
context 'with no errors during cloning' do context 'with no errors during cloning' do
before(:each) do before(:each) do
allow(metrics).to receive(:timing) allow(metrics).to receive(:timing)
allow(metrics).to receive(:gauge) expect(metrics).to receive(:timing).with(/clone\./,/0/)
expect(metrics).to receive(:gauge).with(/vmpooler_clone\./,/0/)
expect(provider).to receive(:create_vm).with(pool, String) expect(provider).to receive(:create_vm).with(pool, String)
allow(provider).to receive(:get_vm_ip_address).and_return(1) allow(provider).to receive(:get_vm_ip_address)
allow(subject).to receive(:get_domain_for_pool).and_return('example.com') allow(subject).to receive(:get_domain_for_pool).and_return('example.com')
allow(subject).to receive(:get_dns_plugin_class_name_for_pool).and_return(dns_plugin) allow(subject).to receive(:get_dns_plugin_class_name_for_pool).and_return(dns_plugin)
allow(logger).to receive(:log) allow(logger).to receive(:log)
@ -1156,18 +1034,6 @@ EOT
end end
end end
context 'with a failure to get ip address after cloning' do
it 'should log a message that it completed being cloned' do
allow(metrics).to receive(:timing)
allow(metrics).to receive(:gauge)
expect(metrics).to receive(:gauge).with(/vmpooler_clone\./,/0/)
expect(provider).to receive(:create_vm).with(pool, String)
allow(provider).to receive(:get_vm_ip_address).and_return(nil)
expect{subject._clone_vm(pool,provider,dns_plugin)}.to raise_error(StandardError)
end
end
context 'with an error during cloning' do context 'with an error during cloning' do
before(:each) do before(:each) do
expect(provider).to receive(:create_vm).with(pool, String).and_raise('MockError') expect(provider).to receive(:create_vm).with(pool, String).and_raise('MockError')
@ -1219,10 +1085,9 @@ EOT
context 'with request_id' do context 'with request_id' do
before(:each) do before(:each) do
allow(metrics).to receive(:timing) allow(metrics).to receive(:timing)
allow(metrics).to receive(:gauge) expect(metrics).to receive(:timing).with(/clone\./,/0/)
expect(metrics).to receive(:gauge).with(/vmpooler_clone\./,/0/)
expect(provider).to receive(:create_vm).with(pool, String) expect(provider).to receive(:create_vm).with(pool, String)
allow(provider).to receive(:get_vm_ip_address).with(vm,pool).and_return(1) allow(provider).to receive(:get_vm_ip_address).with(vm,pool)
allow(subject).to receive(:get_dns_plugin_class_name_for_pool).and_return(dns_plugin) allow(subject).to receive(:get_dns_plugin_class_name_for_pool).and_return(dns_plugin)
expect(dns_plugin).to receive(:create_or_replace_record) expect(dns_plugin).to receive(:create_or_replace_record)
allow(logger).to receive(:log) allow(logger).to receive(:log)
@ -1258,7 +1123,7 @@ EOT
resolv = class_double("Resolv").as_stubbed_const(:transfer_nested_constants => true) resolv = class_double("Resolv").as_stubbed_const(:transfer_nested_constants => true)
expect(subject).to receive(:generate_and_check_hostname).exactly(3).times.and_return([vm_name, true]) #skip this, make it available all times expect(subject).to receive(:generate_and_check_hostname).exactly(3).times.and_return([vm_name, true]) #skip this, make it available all times
expect(resolv).to receive(:getaddress).exactly(3).times.and_return("1.2.3.4") expect(resolv).to receive(:getaddress).exactly(3).times.and_return("1.2.3.4")
expect(metrics).to receive(:increment).with("vmpooler_errors.staledns.#{pool}").exactly(3).times expect(metrics).to receive(:increment).with("errors.staledns.#{pool}").exactly(3).times
expect{subject._clone_vm(pool,provider,dns_plugin)}.to raise_error(/Unable to generate a unique hostname after/) expect{subject._clone_vm(pool,provider,dns_plugin)}.to raise_error(/Unable to generate a unique hostname after/)
end end
it 'should be successful if DNS does not exist' do it 'should be successful if DNS does not exist' do
@ -1356,8 +1221,7 @@ EOT
it 'should emit a timing metric' do it 'should emit a timing metric' do
allow(subject).to receive(:get_vm_usage_labels) allow(subject).to receive(:get_vm_usage_labels)
allow(metrics).to receive(:timing) allow(metrics).to receive(:timing)
allow(metrics).to receive(:gauge) expect(metrics).to receive(:timing).with("destroy.#{pool}", String)
expect(metrics).to receive(:gauge).with("vmpooler_destroy.#{pool}", String)
subject._destroy_vm(vm,pool,provider,dns_plugin) subject._destroy_vm(vm,pool,provider,dns_plugin)
end end
@ -4261,10 +4125,10 @@ EOT
it 'should call fail_pending_vm' do it 'should call fail_pending_vm' do
redis_connection_pool.with do |redis| redis_connection_pool.with do |redis|
expect(subject).to receive(:fail_pending_vm).with(vm, pool, Integer, Integer, redis, exists: false) expect(subject).to receive(:fail_pending_vm).with(vm, pool, Integer, redis, exists: false)
end end
subject.check_pending_pool_vms(pool, provider, pool_check_response, inventory, timeout, timeout_notification) subject.check_pending_pool_vms(pool, provider, pool_check_response, inventory, timeout)
end end
end end
@ -4282,7 +4146,7 @@ EOT
it 'should return the number of checked pending VMs' do it 'should return the number of checked pending VMs' do
allow(subject).to receive(:check_pending_vm) allow(subject).to receive(:check_pending_vm)
subject.check_pending_pool_vms(pool, provider, pool_check_response, inventory, timeout, timeout_notification) subject.check_pending_pool_vms(pool, provider, pool_check_response, inventory, timeout)
expect(pool_check_response[:checked_pending_vms]).to be(1) expect(pool_check_response[:checked_pending_vms]).to be(1)
end end
@ -4291,31 +4155,31 @@ EOT
expect(subject).to receive(:check_pending_vm).and_raise(RuntimeError,'MockError') expect(subject).to receive(:check_pending_vm).and_raise(RuntimeError,'MockError')
expect(logger).to receive(:log).with('d', "[!] [#{pool}] _check_pool failed with an error while evaluating pending VMs: MockError") expect(logger).to receive(:log).with('d', "[!] [#{pool}] _check_pool failed with an error while evaluating pending VMs: MockError")
subject.check_pending_pool_vms(pool, provider, pool_check_response, inventory, timeout, timeout_notification) subject.check_pending_pool_vms(pool, provider, pool_check_response, inventory, timeout)
end end
it 'should use the pool timeout if set' do it 'should use the pool timeout if set' do
big_lifetime = 2000 big_lifetime = 2000
config[:pools][0]['timeout'] = big_lifetime config[:pools][0]['timeout'] = big_lifetime
expect(subject).to receive(:check_pending_vm).with(vm,pool, big_lifetime, timeout_notification, provider) expect(subject).to receive(:check_pending_vm).with(vm,pool,big_lifetime,provider)
subject.check_pending_pool_vms(pool, provider, pool_check_response, inventory, big_lifetime, timeout_notification) subject.check_pending_pool_vms(pool, provider, pool_check_response, inventory, big_lifetime)
end end
it 'should use the configuration setting if the pool timeout is not set' do it 'should use the configuration setting if the pool timeout is not set' do
big_lifetime = 2000 big_lifetime = 2000
config[:config]['timeout'] = big_lifetime config[:config]['timeout'] = big_lifetime
expect(subject).to receive(:check_pending_vm).with(vm, pool, big_lifetime, timeout_notification, provider) expect(subject).to receive(:check_pending_vm).with(vm,pool,big_lifetime,provider)
subject.check_pending_pool_vms(pool, provider, pool_check_response, inventory, big_lifetime, timeout_notification) subject.check_pending_pool_vms(pool, provider, pool_check_response, inventory, big_lifetime)
end end
it 'should use a pool timeout of 15 if nothing is set' do it 'should use a pool timeout of 15 if nothing is set' do
expect(subject).to receive(:check_pending_vm).with(vm, pool, timeout, timeout_notification, provider) expect(subject).to receive(:check_pending_vm).with(vm,pool,timeout,provider)
subject.check_pending_pool_vms(pool, provider, pool_check_response, inventory, timeout, timeout_notification) subject.check_pending_pool_vms(pool, provider, pool_check_response, inventory, timeout)
end end
end end
end end
@ -4977,7 +4841,7 @@ EOT
it 'should call #check_ready_pool_vms' do it 'should call #check_ready_pool_vms' do
allow(subject).to receive(:create_inventory).and_return({}) allow(subject).to receive(:create_inventory).and_return({})
expect(subject).to receive(:check_pending_pool_vms).with(pool, provider, pool_check_response, {}, pool_object['timeout'], pool_object['timeout_notification']) expect(subject).to receive(:check_pending_pool_vms).with(pool, provider, pool_check_response, {}, pool_object['timeout'])
subject._check_pool(pool_object,provider) subject._check_pool(pool_object,provider)
end end
@ -5178,44 +5042,6 @@ EOT
end end
end end
context 'when request is already marked as failed' do
let(:request_string) { "#{pool}:#{pool}:1" }
before(:each) do
redis_connection_pool.with do |redis|
create_ondemand_request_for_test(request_id, current_time.to_i, request_string, redis)
set_ondemand_request_status(request_id, 'failed', redis)
end
end
it 'logs that the request is already failed' do
redis_connection_pool.with do |redis|
expect(logger).to receive(:log).with('s', "Request '#{request_id}' already marked as failed, skipping VM creation")
subject.create_ondemand_vms(request_id, redis)
end
end
it 'removes the request from provisioning__request queue' do
redis_connection_pool.with do |redis|
subject.create_ondemand_vms(request_id, redis)
expect(redis.zscore('vmpooler__provisioning__request', request_id)).to be_nil
end
end
it 'does not create VM tasks' do
redis_connection_pool.with do |redis|
subject.create_ondemand_vms(request_id, redis)
expect(redis.zcard('vmpooler__odcreate__task')).to eq(0)
end
end
it 'does not add to provisioning__processing queue' do
redis_connection_pool.with do |redis|
subject.create_ondemand_vms(request_id, redis)
expect(redis.zscore('vmpooler__provisioning__processing', request_id)).to be_nil
end
end
end
context 'with a request that has data' do context 'with a request that has data' do
let(:request_string) { "#{pool}:#{pool}:1" } let(:request_string) { "#{pool}:#{pool}:1" }
before(:each) do before(:each) do

View file

@ -1,497 +0,0 @@
# frozen_string_literal: true
require 'spec_helper'
require 'vmpooler/pool_manager'
describe 'Vmpooler::PoolManager - Queue Reliability Features' do
let(:logger) { MockLogger.new }
let(:redis_connection_pool) { ConnectionPool.new(size: 1) { redis } }
let(:metrics) { Vmpooler::Metrics::DummyStatsd.new }
let(:config) { YAML.load(<<~EOT
---
:config:
task_limit: 10
vm_checktime: 1
vm_lifetime: 12
prefix: 'pooler-'
dlq_enabled: true
dlq_ttl: 168
dlq_max_entries: 100
purge_enabled: true
purge_dry_run: false
max_pending_age: 7200
max_ready_age: 86400
max_completed_age: 3600
health_check_enabled: true
health_check_interval: 300
health_thresholds:
pending_queue_max: 100
ready_queue_max: 500
dlq_max_warning: 100
dlq_max_critical: 1000
stuck_vm_age_threshold: 7200
:providers:
:dummy: {}
:pools:
- name: 'test-pool'
size: 5
provider: 'dummy'
EOT
)
}
subject { Vmpooler::PoolManager.new(config, logger, redis_connection_pool, metrics) }
describe 'Dead-Letter Queue (DLQ)' do
let(:vm) { 'vm-abc123' }
let(:pool) { 'test-pool' }
let(:error_class) { 'StandardError' }
let(:error_message) { 'template does not exist' }
let(:request_id) { 'req-123' }
let(:pool_alias) { 'test-alias' }
before(:each) do
redis_connection_pool.with do |redis_connection|
allow(redis_connection).to receive(:zadd)
allow(redis_connection).to receive(:zcard).and_return(0)
allow(redis_connection).to receive(:expire)
end
end
describe '#dlq_enabled?' do
it 'returns true when dlq_enabled is true in config' do
expect(subject.dlq_enabled?).to be true
end
it 'returns false when dlq_enabled is false in config' do
config[:config]['dlq_enabled'] = false
expect(subject.dlq_enabled?).to be false
end
end
describe '#dlq_ttl' do
it 'returns configured TTL' do
expect(subject.dlq_ttl).to eq(168)
end
it 'returns default TTL when not configured' do
config[:config].delete('dlq_ttl')
expect(subject.dlq_ttl).to eq(168)
end
end
describe '#dlq_max_entries' do
it 'returns configured max entries' do
expect(subject.dlq_max_entries).to eq(100)
end
it 'returns default max entries when not configured' do
config[:config].delete('dlq_max_entries')
expect(subject.dlq_max_entries).to eq(10000)
end
end
describe '#move_to_dlq' do
context 'when DLQ is enabled' do
it 'adds entry to DLQ sorted set' do
redis_connection_pool.with do |redis_connection|
dlq_key = 'vmpooler__dlq__pending'
expect(redis_connection).to receive(:zadd).with(dlq_key, anything, anything)
expect(redis_connection).to receive(:expire).with(dlq_key, anything)
subject.move_to_dlq(vm, pool, 'pending', error_class, error_message,
redis_connection, request_id: request_id, pool_alias: pool_alias)
end
end
it 'includes error details in DLQ entry' do
redis_connection_pool.with do |redis_connection|
expect(redis_connection).to receive(:zadd) do |_key, _score, entry|
expect(entry).to include(vm)
expect(entry).to include(error_message)
expect(entry).to include(error_class)
end
subject.move_to_dlq(vm, pool, 'pending', error_class, error_message, redis_connection)
end
end
it 'increments DLQ metrics' do
redis_connection_pool.with do |redis_connection|
expect(metrics).to receive(:increment).with('vmpooler_dlq.pending.count')
subject.move_to_dlq(vm, pool, 'pending', error_class, error_message, redis_connection)
end
end
it 'enforces max entries limit' do
redis_connection_pool.with do |redis_connection|
allow(redis_connection).to receive(:zcard).and_return(150)
expect(redis_connection).to receive(:zremrangebyrank).with(anything, 0, 49)
subject.move_to_dlq(vm, pool, 'pending', error_class, error_message, redis_connection)
end
end
end
context 'when DLQ is disabled' do
before { config[:config]['dlq_enabled'] = false }
it 'does not add entry to DLQ' do
redis_connection_pool.with do |redis_connection|
expect(redis_connection).not_to receive(:zadd)
subject.move_to_dlq(vm, pool, 'pending', error_class, error_message, redis_connection)
end
end
end
end
end
describe 'Auto-Purge' do
describe '#purge_enabled?' do
it 'returns true when purge_enabled is true in config' do
expect(subject.purge_enabled?).to be true
end
it 'returns false when purge_enabled is false in config' do
config[:config]['purge_enabled'] = false
expect(subject.purge_enabled?).to be false
end
end
describe '#purge_dry_run?' do
it 'returns false when purge_dry_run is false in config' do
expect(subject.purge_dry_run?).to be false
end
it 'returns true when purge_dry_run is true in config' do
config[:config]['purge_dry_run'] = true
expect(subject.purge_dry_run?).to be true
end
end
describe '#max_pending_age' do
it 'returns configured max age' do
expect(subject.max_pending_age).to eq(7200)
end
it 'returns default max age when not configured' do
config[:config].delete('max_pending_age')
expect(subject.max_pending_age).to eq(7200)
end
end
describe '#purge_pending_queue' do
let(:pool) { 'test-pool' }
let(:old_vm) { 'vm-old' }
let(:new_vm) { 'vm-new' }
before(:each) do
redis_connection_pool.with do |redis_connection|
# Old VM (3 hours old, exceeds 2 hour threshold)
redis_connection.sadd("vmpooler__pending__#{pool}", old_vm)
redis_connection.hset("vmpooler__vm__#{old_vm}", 'clone', (Time.now - 10800).to_s)
# New VM (30 minutes old, within threshold)
redis_connection.sadd("vmpooler__pending__#{pool}", new_vm)
redis_connection.hset("vmpooler__vm__#{new_vm}", 'clone', (Time.now - 1800).to_s)
end
end
context 'when not in dry-run mode' do
it 'purges stale pending VMs' do
redis_connection_pool.with do |redis_connection|
purged_count = subject.purge_pending_queue(pool, redis_connection)
expect(purged_count).to eq(1)
expect(redis_connection.sismember("vmpooler__pending__#{pool}", old_vm)).to be false
expect(redis_connection.sismember("vmpooler__pending__#{pool}", new_vm)).to be true
end
end
it 'moves purged VMs to DLQ' do
redis_connection_pool.with do |redis_connection|
expect(subject).to receive(:move_to_dlq).with(
old_vm, pool, 'pending', 'Purge', anything, redis_connection, anything
)
subject.purge_pending_queue(pool, redis_connection)
end
end
it 'increments purge metrics' do
redis_connection_pool.with do |redis_connection|
expect(metrics).to receive(:increment).with("vmpooler_purge.pending.#{pool}.count")
subject.purge_pending_queue(pool, redis_connection)
end
end
end
context 'when in dry-run mode' do
before { config[:config]['purge_dry_run'] = true }
it 'detects but does not purge stale VMs' do
redis_connection_pool.with do |redis_connection|
purged_count = subject.purge_pending_queue(pool, redis_connection)
expect(purged_count).to eq(1)
expect(redis_connection.sismember("vmpooler__pending__#{pool}", old_vm)).to be true
end
end
it 'does not move to DLQ' do
redis_connection_pool.with do |redis_connection|
expect(subject).not_to receive(:move_to_dlq)
subject.purge_pending_queue(pool, redis_connection)
end
end
end
end
describe '#purge_ready_queue' do
let(:pool) { 'test-pool' }
let(:old_vm) { 'vm-old-ready' }
let(:new_vm) { 'vm-new-ready' }
before(:each) do
redis_connection_pool.with do |redis_connection|
# Old VM (25 hours old, exceeds 24 hour threshold)
redis_connection.sadd("vmpooler__ready__#{pool}", old_vm)
redis_connection.hset("vmpooler__vm__#{old_vm}", 'ready', (Time.now - 90000).to_s)
# New VM (2 hours old, within threshold)
redis_connection.sadd("vmpooler__ready__#{pool}", new_vm)
redis_connection.hset("vmpooler__vm__#{new_vm}", 'ready', (Time.now - 7200).to_s)
end
end
it 'moves stale ready VMs to completed queue' do
redis_connection_pool.with do |redis_connection|
purged_count = subject.purge_ready_queue(pool, redis_connection)
expect(purged_count).to eq(1)
expect(redis_connection.sismember("vmpooler__ready__#{pool}", old_vm)).to be false
expect(redis_connection.sismember("vmpooler__completed__#{pool}", old_vm)).to be true
expect(redis_connection.sismember("vmpooler__ready__#{pool}", new_vm)).to be true
end
end
end
describe '#purge_completed_queue' do
let(:pool) { 'test-pool' }
let(:old_vm) { 'vm-old-completed' }
let(:new_vm) { 'vm-new-completed' }
before(:each) do
redis_connection_pool.with do |redis_connection|
# Old VM (2 hours old, exceeds 1 hour threshold)
redis_connection.sadd("vmpooler__completed__#{pool}", old_vm)
redis_connection.hset("vmpooler__vm__#{old_vm}", 'destroy', (Time.now - 7200).to_s)
# New VM (30 minutes old, within threshold)
redis_connection.sadd("vmpooler__completed__#{pool}", new_vm)
redis_connection.hset("vmpooler__vm__#{new_vm}", 'destroy', (Time.now - 1800).to_s)
end
end
it 'removes stale completed VMs' do
redis_connection_pool.with do |redis_connection|
purged_count = subject.purge_completed_queue(pool, redis_connection)
expect(purged_count).to eq(1)
expect(redis_connection.sismember("vmpooler__completed__#{pool}", old_vm)).to be false
expect(redis_connection.sismember("vmpooler__completed__#{pool}", new_vm)).to be true
end
end
end
end
describe 'Health Checks' do
describe '#health_check_enabled?' do
it 'returns true when health_check_enabled is true in config' do
expect(subject.health_check_enabled?).to be true
end
it 'returns false when health_check_enabled is false in config' do
config[:config]['health_check_enabled'] = false
expect(subject.health_check_enabled?).to be false
end
end
describe '#health_thresholds' do
it 'returns configured thresholds' do
thresholds = subject.health_thresholds
expect(thresholds['pending_queue_max']).to eq(100)
expect(thresholds['stuck_vm_age_threshold']).to eq(7200)
end
it 'merges with defaults when partially configured' do
config[:config]['health_thresholds'] = { 'pending_queue_max' => 200 }
thresholds = subject.health_thresholds
expect(thresholds['pending_queue_max']).to eq(200)
expect(thresholds['ready_queue_max']).to eq(500) # default
end
end
describe '#calculate_queue_ages' do
let(:pool) { 'test-pool' }
let(:vm1) { 'vm-1' }
let(:vm2) { 'vm-2' }
let(:vm3) { 'vm-3' }
before(:each) do
redis_connection_pool.with do |redis_connection|
redis_connection.hset("vmpooler__vm__#{vm1}", 'clone', (Time.now - 3600).to_s)
redis_connection.hset("vmpooler__vm__#{vm2}", 'clone', (Time.now - 7200).to_s)
redis_connection.hset("vmpooler__vm__#{vm3}", 'clone', (Time.now - 1800).to_s)
end
end
it 'calculates ages for all VMs' do
redis_connection_pool.with do |redis_connection|
vms = [vm1, vm2, vm3]
ages = subject.calculate_queue_ages(vms, 'clone', redis_connection)
expect(ages.length).to eq(3)
expect(ages[0]).to be_within(5).of(3600)
expect(ages[1]).to be_within(5).of(7200)
expect(ages[2]).to be_within(5).of(1800)
end
end
it 'skips VMs with missing timestamps' do
redis_connection_pool.with do |redis_connection|
vms = [vm1, 'vm-nonexistent', vm3]
ages = subject.calculate_queue_ages(vms, 'clone', redis_connection)
expect(ages.length).to eq(2)
end
end
end
describe '#determine_health_status' do
let(:base_metrics) do
{
'queues' => {
'test-pool' => {
'pending' => { 'size' => 10, 'stuck_count' => 2 },
'ready' => { 'size' => 50 }
}
},
'errors' => {
'dlq_total_size' => 50,
'stuck_vm_count' => 2
}
}
end
it 'returns healthy when all metrics are within thresholds' do
status = subject.determine_health_status(base_metrics)
expect(status).to eq('healthy')
end
it 'returns degraded when DLQ size exceeds warning threshold' do
metrics = base_metrics.dup
metrics['errors']['dlq_total_size'] = 150
status = subject.determine_health_status(metrics)
expect(status).to eq('degraded')
end
it 'returns unhealthy when DLQ size exceeds critical threshold' do
metrics = base_metrics.dup
metrics['errors']['dlq_total_size'] = 1500
status = subject.determine_health_status(metrics)
expect(status).to eq('unhealthy')
end
it 'returns degraded when pending queue exceeds warning threshold' do
metrics = base_metrics.dup
metrics['queues']['test-pool']['pending']['size'] = 120
status = subject.determine_health_status(metrics)
expect(status).to eq('degraded')
end
it 'returns unhealthy when pending queue exceeds critical threshold' do
metrics = base_metrics.dup
metrics['queues']['test-pool']['pending']['size'] = 250
status = subject.determine_health_status(metrics)
expect(status).to eq('unhealthy')
end
it 'returns unhealthy when stuck VM count exceeds critical threshold' do
metrics = base_metrics.dup
metrics['errors']['stuck_vm_count'] = 60
status = subject.determine_health_status(metrics)
expect(status).to eq('unhealthy')
end
end
describe '#push_health_metrics' do
let(:metrics_data) do
{
'queues' => {
'test-pool' => {
'pending' => { 'size' => 10, 'oldest_age' => 3600, 'stuck_count' => 2 },
'ready' => { 'size' => 50, 'oldest_age' => 7200 },
'completed' => { 'size' => 5 }
}
},
'tasks' => {
'clone' => { 'active' => 3 },
'ondemand' => { 'active' => 2, 'pending' => 5 }
},
'errors' => {
'dlq_total_size' => 25,
'stuck_vm_count' => 2,
'orphaned_metadata_count' => 3
}
}
end
it 'pushes status metric' do
allow(metrics).to receive(:gauge)
expect(metrics).to receive(:gauge).with('vmpooler_health.status', 0)
subject.push_health_metrics(metrics_data, 'healthy')
end
it 'pushes error metrics' do
allow(metrics).to receive(:gauge)
expect(metrics).to receive(:gauge).with('vmpooler_health.dlq.total_size', 25)
expect(metrics).to receive(:gauge).with('vmpooler_health.stuck_vms.count', 2)
expect(metrics).to receive(:gauge).with('vmpooler_health.orphaned_metadata.count', 3)
subject.push_health_metrics(metrics_data, 'healthy')
end
it 'pushes per-pool queue metrics' do
allow(metrics).to receive(:gauge)
expect(metrics).to receive(:gauge).with('vmpooler_health.queue.test-pool.pending.size', 10)
expect(metrics).to receive(:gauge).with('vmpooler_health.queue.test-pool.pending.oldest_age', 3600)
expect(metrics).to receive(:gauge).with('vmpooler_health.queue.test-pool.pending.stuck_count', 2)
expect(metrics).to receive(:gauge).with('vmpooler_health.queue.test-pool.ready.size', 50)
subject.push_health_metrics(metrics_data, 'healthy')
end
it 'pushes task metrics' do
allow(metrics).to receive(:gauge)
expect(metrics).to receive(:gauge).with('vmpooler_health.tasks.clone.active', 3)
expect(metrics).to receive(:gauge).with('vmpooler_health.tasks.ondemand.active', 2)
expect(metrics).to receive(:gauge).with('vmpooler_health.tasks.ondemand.pending', 5)
subject.push_health_metrics(metrics_data, 'healthy')
end
end
end
end

View file

@ -3,5 +3,5 @@
# The container tag should closely match what is used in `docker/Dockerfile` in vmpooler-deployment # The container tag should closely match what is used in `docker/Dockerfile` in vmpooler-deployment
docker run -it --rm \ docker run -it --rm \
-v $(pwd):/app \ -v $(pwd):/app \
jruby:9.4.12.1-jdk11 \ jruby:9.4.3.0-jdk11 \
/bin/bash -c 'apt-get update -qq && apt-get install -y --no-install-recommends git make netbase build-essential && cd /app && gem install bundler && bundle install --jobs 3 && bundle update; echo "LOCK_FILE_UPDATE_EXIT_CODE=$?"' /bin/bash -c 'apt-get update -qq && apt-get install -y --no-install-recommends git make netbase && cd /app && gem install bundler && bundle install --jobs 3 && bundle update; echo "LOCK_FILE_UPDATE_EXIT_CODE=$?"'

View file

@ -23,11 +23,10 @@ Gem::Specification.new do |s|
s.add_dependency 'opentelemetry-exporter-jaeger', '= 0.23.0' s.add_dependency 'opentelemetry-exporter-jaeger', '= 0.23.0'
s.add_dependency 'opentelemetry-instrumentation-concurrent_ruby', '= 0.21.1' s.add_dependency 'opentelemetry-instrumentation-concurrent_ruby', '= 0.21.1'
s.add_dependency 'opentelemetry-instrumentation-http_client', '= 0.22.2' s.add_dependency 'opentelemetry-instrumentation-http_client', '= 0.22.2'
s.add_dependency 'opentelemetry-instrumentation-rack', '= 0.23.4'
s.add_dependency 'opentelemetry-instrumentation-redis', '= 0.25.3' s.add_dependency 'opentelemetry-instrumentation-redis', '= 0.25.3'
s.add_dependency 'opentelemetry-instrumentation-sinatra', '= 0.23.2' s.add_dependency 'opentelemetry-instrumentation-sinatra', '= 0.23.2'
s.add_dependency 'opentelemetry-resource_detectors', '= 0.24.2' s.add_dependency 'opentelemetry-resource_detectors', '= 0.24.1'
s.add_dependency 'opentelemetry-sdk', '~> 1.8' s.add_dependency 'opentelemetry-sdk', '~> 1.3', '>= 1.3.0'
s.add_dependency 'pickup', '~> 0.0.11' s.add_dependency 'pickup', '~> 0.0.11'
s.add_dependency 'prometheus-client', '>= 2', '< 5' s.add_dependency 'prometheus-client', '>= 2', '< 5'
s.add_dependency 'puma', '>= 5.0.4', '< 7' s.add_dependency 'puma', '>= 5.0.4', '< 7'
@ -40,7 +39,7 @@ Gem::Specification.new do |s|
# Testing dependencies # Testing dependencies
s.add_development_dependency 'climate_control', '>= 0.2.0' s.add_development_dependency 'climate_control', '>= 0.2.0'
s.add_development_dependency 'mock_redis', '= 0.37.0' s.add_development_dependency 'mock_redis', '>= 0.17.0'
s.add_development_dependency 'pry' s.add_development_dependency 'pry'
s.add_development_dependency 'rack-test', '>= 0.6' s.add_development_dependency 'rack-test', '>= 0.6'
s.add_development_dependency 'rspec', '>= 3.2' s.add_development_dependency 'rspec', '>= 3.2'

View file

@ -23,7 +23,6 @@
logfile: '/var/log/vmpooler.log' logfile: '/var/log/vmpooler.log'
task_limit: 10 task_limit: 10
timeout: 15 timeout: 15
timeout_notification: 5
vm_checktime: 1 vm_checktime: 1
vm_lifetime: 12 vm_lifetime: 12
vm_lifetime_auth: 24 vm_lifetime_auth: 24
@ -40,7 +39,6 @@
datastore: 'vmstorage' datastore: 'vmstorage'
size: 5 size: 5
timeout: 15 timeout: 15
timeout_notification: 5
ready_ttl: 1440 ready_ttl: 1440
provider: dummy provider: dummy
dns_plugin: 'example' dns_plugin: 'example'
@ -51,7 +49,6 @@
datastore: 'vmstorage' datastore: 'vmstorage'
size: 5 size: 5
timeout: 15 timeout: 15
timeout_notification: 5
ready_ttl: 1440 ready_ttl: 1440
provider: dummy provider: dummy
dns_plugin: 'example' dns_plugin: 'example'

View file

@ -367,15 +367,6 @@
# - user_object # - user_object
# The LDAP object-type used to designate a user object. # The LDAP object-type used to designate a user object.
# #
# - service_account_hash
# A hash containing the following parameters for a service account to perform the
# initial bind. After the initial bind, then a search query is performed using the
# 'base' and 'user_object', then re-binds as the returned user.
# - :user_dn
# The full distinguished name (DN) of the service account used to bind.
# - :password
# The password for the service account used to bind.
#
# Example: # Example:
# :auth: # :auth:
# provider: 'ldap' # provider: 'ldap'
@ -384,23 +375,6 @@
# port: 389 # port: 389
# base: 'ou=users,dc=company,dc=com' # base: 'ou=users,dc=company,dc=com'
# user_object: 'uid' # user_object: 'uid'
#
# :auth:
# provider: 'ldap'
# :ldap:
# host: 'ldap.example.com'
# port: 636
# service_account_hash:
# :user_dn: 'cn=Service Account,ou=Accounts,dc=ldap,dc=example,dc=com'
# :password: 'service-account-password'
# encryption:
# :method: :simple_tls
# :tls_options:
# :ssl_version: 'TLSv1_2'
# base:
# - 'ou=Accounts,dc=company,dc=com'
# user_object:
# - 'samAccountName'
:auth: :auth:
provider: 'ldap' provider: 'ldap'
@ -456,12 +430,6 @@
# How long (in minutes) before marking a clone in 'pending' queues as 'failed' and retrying. # How long (in minutes) before marking a clone in 'pending' queues as 'failed' and retrying.
# (default: 15) # (default: 15)
# #
# - max_vm_retries
# Maximum number of times to retry VM creation for a failed request before marking it as permanently failed.
# This helps prevent infinite retry loops when there are configuration issues like invalid template paths.
# Permanent errors (like invalid template paths) are detected and will not be retried.
# (default: 3)
#
# - vm_checktime # - vm_checktime
# How often (in minutes) to check the sanity of VMs in 'ready' queues. # How often (in minutes) to check the sanity of VMs in 'ready' queues.
# (default: 1) # (default: 1)
@ -621,11 +589,9 @@
logfile: '/var/log/vmpooler.log' logfile: '/var/log/vmpooler.log'
task_limit: 10 task_limit: 10
timeout: 15 timeout: 15
timeout_notification: 5
vm_checktime: 1 vm_checktime: 1
vm_lifetime: 12 vm_lifetime: 12
vm_lifetime_auth: 24 vm_lifetime_auth: 24
max_vm_retries: 3
allowed_tags: allowed_tags:
- 'created_by' - 'created_by'
- 'project' - 'project'
@ -745,7 +711,6 @@
datastore: 'vmstorage' datastore: 'vmstorage'
size: 5 size: 5
timeout: 15 timeout: 15
timeout_notification: 5
ready_ttl: 1440 ready_ttl: 1440
provider: vsphere provider: vsphere
create_linked_clone: true create_linked_clone: true
@ -756,7 +721,6 @@
datastore: 'vmstorage' datastore: 'vmstorage'
size: 5 size: 5
timeout: 15 timeout: 15
timeout_notification: 5
ready_ttl: 1440 ready_ttl: 1440
provider: vsphere provider: vsphere
create_linked_clone: false create_linked_clone: false

View file

@ -1,92 +0,0 @@
---
# VMPooler Configuration Example with Dead-Letter Queue, Auto-Purge, and Health Checks
# Redis Configuration
:redis:
server: 'localhost'
port: 6379
data_ttl: 168 # hours - how long to keep VM metadata in Redis
# Dead-Letter Queue (DLQ) Configuration
dlq_enabled: true
dlq_ttl: 168 # hours (7 days) - how long to keep DLQ entries
dlq_max_entries: 10000 # maximum entries per DLQ queue before trimming
# Application Configuration
:config:
# ... other existing config ...
# Dead-Letter Queue (DLQ) - Optional, defaults shown
dlq_enabled: false # Set to true to enable DLQ
dlq_ttl: 168 # hours (7 days)
dlq_max_entries: 10000 # per DLQ queue
# Auto-Purge Stale Queue Entries
purge_enabled: false # Set to true to enable auto-purge
purge_interval: 3600 # seconds (1 hour) - how often to run purge cycle
purge_dry_run: false # Set to true to log what would be purged without actually purging
# Auto-Purge Age Thresholds (in seconds)
max_pending_age: 7200 # 2 hours - VMs stuck in pending
max_ready_age: 86400 # 24 hours - VMs idle in ready queue
max_completed_age: 3600 # 1 hour - VMs in completed queue
max_orphaned_age: 86400 # 24 hours - orphaned VM metadata
max_request_age: 86400 # 24 hours - stale on-demand requests
# Health Checks
health_check_enabled: false # Set to true to enable health checks
health_check_interval: 300 # seconds (5 minutes) - how often to run health checks
# Health Check Thresholds
health_thresholds:
pending_queue_max: 100 # Warning threshold for pending queue size
ready_queue_max: 500 # Warning threshold for ready queue size
dlq_max_warning: 100 # Warning threshold for DLQ size
dlq_max_critical: 1000 # Critical threshold for DLQ size
stuck_vm_age_threshold: 7200 # 2 hours - age at which VM is considered "stuck"
stuck_vm_max_warning: 10 # Warning threshold for stuck VM count
stuck_vm_max_critical: 50 # Critical threshold for stuck VM count
# Pool Configuration
:pools:
- name: 'centos-7-x86_64'
size: 5
provider: 'vsphere'
# ... other pool settings ...
# Provider Configuration
:providers:
:vsphere:
server: 'vcenter.example.com'
username: 'vmpooler'
password: 'secret'
# ... other provider settings ...
# Example: Production Configuration
# For production use, you might want:
# :config:
# dlq_enabled: true
# dlq_ttl: 168 # Keep failed VMs for a week
#
# purge_enabled: true
# purge_interval: 1800 # Run every 30 minutes
# purge_dry_run: false
# max_pending_age: 3600 # Purge pending VMs after 1 hour
# max_ready_age: 172800 # Purge ready VMs after 2 days
#
# health_check_enabled: true
# health_check_interval: 300 # Check every 5 minutes
# Example: Development Configuration
# For development/testing, you might want:
# :config:
# dlq_enabled: true
# dlq_ttl: 24 # Keep failed VMs for a day
#
# purge_enabled: true
# purge_interval: 600 # Run every 10 minutes
# purge_dry_run: true # Test mode - log but don't actually purge
# max_pending_age: 1800 # More aggressive - 30 minutes
#
# health_check_enabled: true
# health_check_interval: 60 # Check every minute