vcd

package module
v0.3.2 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 17, 2026 License: BSD-3-Clause Imports: 35 Imported by: 0

README

fleeting-plugin-vcd

A GitLab Fleeting plugin for VMware Cloud Director.

Overview

This plugin enables GitLab Runner to dynamically provision and manage virtual machines in VMware Cloud Director for CI/CD job execution. It's part of GitLab's Fleeting ecosystem, which replaces Docker Machine for autoscaling runners.

Fleeting is an abstraction layer for cloud providers' instance groups, allowing for the provisioning of multiple identical instances with a minimal API focused on creation, connection, and deletion.

Features

  • Dynamic provisioning of VMs in VMware Cloud Director
  • Support for both Linux and Windows VMs
  • SSH key and password-based authentication
  • Customizable VM resources (CPU, memory, disk size)
  • Automatic garbage collection of stuck/failed instances
  • Debug HTTP server for monitoring instance states

Requirements

  • VMware Cloud Director 10.4+ (for API token authentication)
  • GitLab Runner with Fleeting support
  • VM template with VMware Tools installed

Installation

Download the latest release from GitHub Releases and place the binary in a location accessible by GitLab Runner.

Assumptions

  • The vApp template must have a single VM
  • The OS template must have VMware Tools (or open-vm-tools for Linux) installed
  • If using the Docker Autoscaler executor, the Docker daemon must be installed and running on the VM
  • For Windows machines, the OpenSSH service must be enabled (WinRM is not supported)

Configuration

The plugin is configured via the GitLab Runner's config.toml file under [runners.autoscaler.plugin_config].

Required Configuration
Parameter Description
name Unique name for this runner instance
url VMware Cloud Director API URL
org VCD Organization name
token API token for authentication (VCD 10.4+)
virtual_datacenter Virtual Datacenter name
network Network to attach VMs to
ip_allocation_mode IP allocation mode: POOL or DHCP
instance_group_name Metadata tag to identify VMs belonging to this group
vapp_name_prefix Prefix for vApp names
catalog VCD Catalog containing the VM template
template Template name within the catalog
storage_profile Storage profile for VM disks
cpu_count Number of vCPUs
cores_per_socket CPU cores per socket
memory_mb Memory in MB
Optional Configuration
Parameter Description Default
disk_size_gb Disk size in GB (0 = use template default) 0
debug_server_addr Debug HTTP server address (e.g., 127.0.0.1:27060) disabled
max_concurrent_creates Maximum number of VMs being created simultaneously 3
max_concurrent_deletes Maximum number of VMs being deleted simultaneously 5
Example Configuration

See config.example.toml for a complete example configuration.

[[runners]]
executor = "docker-autoscaler"

[runners.autoscaler]
plugin = "fleeting-plugin-vcd"
capacity_per_instance = 1
max_instances = 5

[runners.autoscaler.plugin_config]
name = "my-runner"
url = "https://vcd.example.com/api"
org = "MyOrg"
token = "your-api-token"
virtual_datacenter = "MyVDC"
network = "MyNetwork"
ip_allocation_mode = "POOL"
instance_group_name = "gitlab-runners"
vapp_name_prefix = "gitlab-runner"
catalog = "MyCatalog"
template = "Ubuntu-22.04"
storage_profile = "MyStorageProfile"
cpu_count = 4
cores_per_socket = 2
memory_mb = 8192
disk_size_gb = 100

# Optional: Enable debug server
debug_server_addr = "127.0.0.1:27060"

[runners.autoscaler.connector_config]
use_static_credentials = true
username = "root"
password = "your-password"

Debug Server

When debug_server_addr is configured, the plugin exposes an HTTP server showing the current state of all instances. Access it at http://[address]/ to see a table with instance states, creation times, and lifecycle events.

Building from Source

# Using goreleaser
goreleaser build --single-target --snapshot --clean

# Or using make
make build

Running Integration Tests

Integration tests require environment variables (not used by the plugin itself, only for tests):

export VCD_URL="https://vcd.example.com/api"
export VCD_ORG="MyOrg"
export VCD_TOKEN="your-api-token"
export VCD_VDC="MyVDC"
export VCD_NETWORK="MyNetwork"
export VCD_NETWORK_ALLOCATION_MODE="POOL"
export VCD_CATALOG="MyCatalog"
export VCD_TEMPLATE="Ubuntu-22.04"
export VCD_VAPP_NAME_PREFIX="test-runner"
export VCD_STORAGE_PROFILE="MyStorageProfile"
export VCD_CPU_COUNT="2"
export VCD_CORES_PER_SOCKET="2"
export VCD_MEMORY_MB="4096"
export VCD_DISK_SIZE_GB="50"

make test

History

This plugin is based on:

Contributing

Contributions are welcome! Please feel free to submit a Pull Request.

License

See LICENSE file.

Documentation

Index

Constants

This section is empty.

Variables

View Source
var (
	// Instance Lifecycle Metrics
	InstancesCreatedTotal = promauto.NewCounterVec(prometheus.CounterOpts{
		Name: "fleeting_vcd_instances_created_total",
		Help: "Total number of instances created",
	}, []string{instanceGroupLabel})

	InstancesDeletedTotal = promauto.NewCounterVec(prometheus.CounterOpts{
		Name: "fleeting_vcd_instances_deleted_total",
		Help: "Total number of instances deleted",
	}, []string{instanceGroupLabel})

	InstancesFailedTotal = promauto.NewCounterVec(prometheus.CounterOpts{
		Name: "fleeting_vcd_instances_failed_total",
		Help: "Total number of failed instance operations",
	}, []string{instanceGroupLabel, "operation"})

	InstancesCurrentByState = promauto.NewGaugeVec(prometheus.GaugeOpts{
		Name: "fleeting_vcd_instances_current",
		Help: "Current number of instances by state",
	}, []string{instanceGroupLabel, "state"})

	InstanceCreationDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
		Name:    "fleeting_vcd_instance_creation_duration_seconds",
		Help:    "Time taken to create an instance",
		Buckets: []float64{30, 60, 90, 120, 180, 300, 600, 900, 1200},
	}, []string{instanceGroupLabel})

	InstanceDeletionDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
		Name:    "fleeting_vcd_instance_deletion_duration_seconds",
		Help:    "Time taken to delete an instance",
		Buckets: []float64{10, 30, 60, 120, 300, 600, 1200, 1800, 3600},
	}, []string{instanceGroupLabel})

	// Garbage Collection Metrics
	GCRunsTotal = promauto.NewCounterVec(prometheus.CounterOpts{
		Name: "fleeting_vcd_gc_runs_total",
		Help: "Total number of garbage collection runs",
	}, []string{instanceGroupLabel})

	GCInstancesCollectedTotal = promauto.NewCounterVec(prometheus.CounterOpts{
		Name: "fleeting_vcd_gc_instances_collected_total",
		Help: "Total number of instances cleaned up by garbage collection",
	}, []string{instanceGroupLabel})

	GCDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
		Name:    "fleeting_vcd_gc_duration_seconds",
		Help:    "Time taken to run garbage collection",
		Buckets: []float64{1, 5, 10, 30, 60, 120, 300},
	}, []string{instanceGroupLabel})

	// VCD API Metrics
	APICallsTotal = promauto.NewCounterVec(prometheus.CounterOpts{
		Name: "fleeting_vcd_api_calls_total",
		Help: "Total number of VCD API calls",
	}, []string{instanceGroupLabel, "operation"})

	APIErrorsTotal = promauto.NewCounterVec(prometheus.CounterOpts{
		Name: "fleeting_vcd_api_errors_total",
		Help: "Total number of VCD API errors",
	}, []string{instanceGroupLabel, "operation"})

	APIDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
		Name:    "fleeting_vcd_api_duration_seconds",
		Help:    "Time taken for VCD API calls",
		Buckets: []float64{0.1, 0.5, 1, 2, 5, 10, 30, 60},
	}, []string{instanceGroupLabel, "operation"})

	// Pool Status Metrics
	PoolSize = promauto.NewGaugeVec(prometheus.GaugeOpts{
		Name: "fleeting_vcd_pool_size",
		Help: "Current pool size",
	}, []string{instanceGroupLabel})

	PoolMaxSize = promauto.NewGaugeVec(prometheus.GaugeOpts{
		Name: "fleeting_vcd_pool_max_size",
		Help: "Configured maximum pool size",
	}, []string{instanceGroupLabel})

	// State Manager Metrics (now backed by desiredStateStore)
	StateManagerInstancesTotal = promauto.NewGaugeVec(prometheus.GaugeOpts{
		Name: "fleeting_vcd_state_manager_instances_total",
		Help: "Total instances tracked in state store",
	}, []string{instanceGroupLabel})

	StateManagerInstancesByState = promauto.NewGaugeVec(prometheus.GaugeOpts{
		Name: "fleeting_vcd_state_manager_instances_by_state",
		Help: "Instances by fleeting state in state store",
	}, []string{instanceGroupLabel, "state"})

	// Reconciler Metrics
	ReconcileDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
		Name:    "fleeting_vcd_reconcile_duration_seconds",
		Help:    "Time taken for a single reconciliation cycle",
		Buckets: []float64{0.1, 0.5, 1, 2, 5, 10, 30, 60, 120},
	}, []string{instanceGroupLabel})

	ReconcileTotal = promauto.NewCounterVec(prometheus.CounterOpts{
		Name: "fleeting_vcd_reconcile_total",
		Help: "Total number of reconciliation cycles",
	}, []string{instanceGroupLabel})
)
View Source
var (
	NAME      = "fleeting-plugin-vcd"
	VERSION   = "dev"
	REVISION  = "HEAD"
	REFERENCE = "HEAD"
	BUILT     = "now"

	Version plugin.VersionInfo
)

Functions

func GenerateRandomBytes

func GenerateRandomBytes(n int) ([]byte, error)

GenerateRandomBytes returns securely generated random bytes. It will return an error if the system's secure random number generator fails to function correctly, in which case the caller should not continue.

func GenerateRandomStringURLSafe

func GenerateRandomStringURLSafe(n int) (string, error)

GenerateRandomStringURLSafe returns a URL-safe, base64 encoded securely generated random string. It will return an error if the system's secure random number generator fails to function correctly, in which case the caller should not continue.

func GenerateRandomStringVMNameSafe

func GenerateRandomStringVMNameSafe(size int) (string, error)

GenerateRandomStringVMNameSafe returns a string that can be used as a VM name. It will return an error if the system's secure random number generator fails to function correctly, in which case the caller should not continue.

Types

type DebugServer added in v0.0.3

type DebugServer struct {
	// contains filtered or unexported fields
}

func NewDebugServer added in v0.0.3

func NewDebugServer(log hclog.Logger, store *desiredStateStore, instanceGroupName string) *DebugServer

func (*DebugServer) ServeHTTP added in v0.0.3

func (ds *DebugServer) ServeHTTP(w http.ResponseWriter, r *http.Request)

type Instance added in v0.1.0

type Instance struct {
	ID         string // VApp HREF (empty until VApp created)
	IntentID   string // UUID, assigned at Increase time
	Name       string // VApp name
	VMName     string
	Phase      Phase
	IPAddress  string
	VAppStatus string
	VMStatus   string
	OSType     string // Cached OS type from VM spec (e.g. "windows9Server64Guest")

	// Timestamps
	CreatedAt         *time.Time
	CreateStartedAt   *time.Time
	CreateCompletedAt *time.Time
	DeleteRequestedAt *time.Time
	DeleteStartedAt   *time.Time
	DeleteCompletedAt *time.Time
	GCMarkedAt        *time.Time
	LastUpdated       *time.Time

	// Retry
	RetryCount     int
	LastError      string
	NextRetryAfter *time.Time

	// MissedPolls tracks how many consecutive polls this instance was not found in VCD.
	// Used to avoid marking instances as disappeared due to eventual consistency.
	MissedPolls int
}

Instance represents a tracked VCD instance with its full lifecycle state.

type InstanceGroup

type InstanceGroup struct {
	Name string `json:"name"`

	// Cloud Director connection config
	StrURL               string `json:"url"`
	Org                  string `json:"org"`
	Token                string `json:"token"`
	VirtualDatacenter    string `json:"virtual_datacenter"`
	Network              string `json:"network"`
	IPAllocationMode     string `json:"ip_allocation_mode"`
	InstanceGroupName    string `json:"instance_group_name"`
	VAppNamePrefix       string `json:"vapp_name_prefix"`
	Catalog              string `json:"catalog"`
	Template             string `json:"template"`
	StorageProfile       string `json:"storage_profile"`
	CPUCount             int    `json:"cpu_count"`
	CoresPerSocket       int    `json:"cores_per_socket"`
	MemoryMB             int64  `json:"memory_mb"`
	DiskSizeGB           int    `json:"disk_size_gb"`
	DebugServerAddr      string `json:"debug_server_addr"`
	MaxConcurrentCreates int    `json:"max_concurrent_creates"`
	MaxConcurrentDeletes int    `json:"max_concurrent_deletes"`
	// contains filtered or unexported fields
}

func (*InstanceGroup) ConnectInfo

func (g *InstanceGroup) ConnectInfo(ctx context.Context, id string) (provider.ConnectInfo, error)

ConnectInfo implements provider.InstanceGroup

func (*InstanceGroup) Decrease

func (g *InstanceGroup) Decrease(ctx context.Context, instancesToDelete []string) ([]string, error)

Decrease implements provider.InstanceGroup

func (*InstanceGroup) Heartbeat added in v0.0.2

func (g *InstanceGroup) Heartbeat(ctx context.Context, id string) error

func (*InstanceGroup) Increase

func (g *InstanceGroup) Increase(ctx context.Context, delta int) (int, error)

func (*InstanceGroup) Init

Init implements provider.InstanceGroup

func (*InstanceGroup) Shutdown

func (g *InstanceGroup) Shutdown(ctx context.Context) error

func (*InstanceGroup) Update

func (g *InstanceGroup) Update(ctx context.Context, update func(instance string, state provider.State)) error

Update implements provider.InstanceGroup

type Phase added in v0.1.0

type Phase int

Phase represents the lifecycle phase of an instance.

const (
	PhasePendingCreate Phase = iota
	PhaseCreating
	PhaseRunning
	PhasePendingDelete
	PhaseDeleting
	PhaseDeleted
)

func (Phase) String added in v0.1.0

func (p Phase) String() string

type PrivPub

type PrivPub interface {
	crypto.PrivateKey
	Public() crypto.PublicKey
}

type ReconcilerConfig added in v0.1.0

type ReconcilerConfig struct {
	MaxConcurrentCreates int
	MaxConcurrentDeletes int
	Interval             time.Duration
	MaxInstanceAge       time.Duration
}

ReconcilerConfig holds configuration for the reconciliation loop.

type VCDInstanceGroup added in v0.1.0

type VCDInstanceGroup interface {
	// Increase requests n new instances. Returns intent IDs.
	Increase(n int) []string

	// Decrease marks instances for deletion.
	Decrease(instanceIDs []string)

	// Instances returns all tracked instances from cache.
	Instances() []Instance

	// Instance returns a specific instance by VApp HREF from cache.
	Instance(id string) (Instance, bool)

	// Shutdown stops the reconciler, waits for in-flight ops, cleans up all instances.
	Shutdown(ctx context.Context) error
}

VCDInstanceGroup manages a group of VCD instances with reconciliation.

Directories

Path Synopsis
cmd

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL