types

package
v1.0.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 13, 2021 License: Apache-2.0, BSD-3-Clause, MIT Imports: 19 Imported by: 0

Documentation

Index

Constants

View Source
const (
	ExpresstionAutoDetect = "auto"

	DetectionExpression = "expression"
	DetectionEWMA       = "ewma"
	DetectionUnion      = "union"
)
View Source
const (
	// LocalPredictorType is the local predictor
	LocalPredictorType = "local"
	// VPAPredictorType is the remote VPA predictor
	VPAPredictorType = "vpa"

	NodeResourceTypeOnlinePredict = "online_predict"
)
View Source
const (
	// TaskType
	OnlineTypeOnK8s      = "k8s"
	OnlineTypeOnLocal    = "local"
	OfflineTypeOnk8s     = "k8s"
	OfflineTypeYarnOnk8s = "yarn_on_k8s"

	AlarmTypeLocal  = "local"
	AlarmTypeRemote = "remote"

	// CpuManagePolicyBT is just for tencent OS
	CpuManagePolicyBT       = "bt"
	CpuManagePolicySet      = "cpuset"
	CpuManagePolicyQuota    = "quota"
	CpuManagePolicyAdaptive = "adaptive"

	// MemUnit translates Mb to byte
	MemUnit   = int64(1024 * 1024)
	MemGbUnit = int64(1024 * 1024 * 1024)
	// CpuUnit translates milli core
	CpuUnit = int64(1000)
	// DiskUnit translates Gi to btye
	DiskUnit = int64(1024 * 1020 * 1024)

	// pod annotation fixed annotation
	PodAnnotationPrefix = "mixer.kubernetes.io/"

	// RootFS is the root directory in container.
	RootFS                     = "/rootfs"
	CgroupKubePods             = "/kubepods"
	CgroupOffline              = "/kubepods/offline"
	CgroupOfflineSystem        = CgroupOffline + "/system"
	SystemComponentOomScoreAdj = "500"
	CgroupYarn                 = "hadoop-yarn"
	// CgroupNonK8sOnline is the cgroup for online jobs, which are not running on k8s, we need to create the cgroup
	// and children cgroup manually.
	CgroupNonK8sOnline = "/onlinejobs"
)
View Source
const (
	// container runtime
	ContainerRuntimeDocker = "docker"
)

Variables

View Source
var (
	AvailablePredictType      = sets.NewString(LocalPredictorType, VPAPredictorType)
	AvailableLocalPredictType = sets.NewString(LocalPredictorType)
)
View Source
var (
	// AvailableOnlineTaskType describe available online tasks, which may be pod or local process
	AvailableOnlineTaskType = sets.NewString(OnlineTypeOnK8s, OnlineTypeOnLocal)
	// AvailableOfflineTaskType describe available offline tasks, which may be pod or yarn job
	AvailableOfflineTaskType = sets.NewString(OfflineTypeOnk8s, OfflineTypeYarnOnk8s)

	// AvailableAlarmType shows available alarm type
	AvailableAlarmType = sets.NewString(AlarmTypeLocal, AlarmTypeRemote)

	// AvailableCpuManagePolicy shows available cpu manage policy
	AvailableCpuManagePolicy = sets.NewString(CpuManagePolicyBT, CpuManagePolicySet, CpuManagePolicyQuota,
		CpuManagePolicyAdaptive)

	CompressibleRes = sets.NewString(string(v1.ResourceCPU))
)

Functions

func AllResCompressible

func AllResCompressible(res []string) bool

AllResCompressible check if the resources are compressible

func GetDeviceNameFromMetric

func GetDeviceNameFromMetric(metric string) (dev, devMetric, originalMetric string)

GetDeviceNameFromMetric parse the metric name, and output the dev and devMetric name

func InitHealthCheckConfigFunc

func InitHealthCheckConfigFunc(nodeMetrics *MetricsNodeConfig,
	predictReserved *Resource) func(string) (*HealthCheckConfig, error)

InitHealthCheckConfigFunc return function to get health check config

func InitPredictConfig

func InitPredictConfig(config *PredictConfig)

InitPredictConfig validate and format predict config

func OfflineOnYarn

func OfflineOnYarn(config *TaskTypeConfig) bool

OfflineOnYarn check if offline job is running on YARN

Types

type ActionConfig

type ActionConfig struct {
	Name    string          `json:"name"`
	ArgsStr json.RawMessage `json:"args"`
	Args    interface{}     `json:"-"`
}

ActionConfig define action config

type AggregationsConfig

type AggregationsConfig struct {
	// MemoryAggregationInterval is the length of a single interval, for
	// which the peak memory usage is computed.
	// Memory usage peaks are aggregated in multiples of this interval. In other words
	// there is one memory usage sample per interval (the maximum usage over that
	// interval).
	MemoryAggregationInterval times.Duration `json:"memory_aggregation_interval"`
	// MemoryAggregationWindowIntervalCount is the number of consecutive MemoryAggregationIntervals
	// which make up the MemoryAggregationWindowLength which in turn is the period for memory
	// usage aggregation by VPA.
	MemoryAggregationIntervalCount int64 `json:"memory_aggregation_interval_count"`
	// MemoryHistogramDecayHalfLife is the amount of time it takes a historical
	// memory usage sample to lose half of its weight. In other words, a fresh
	// usage sample is twice as 'important' as one with age equal to the half
	// life period.
	MemoryHistogramDecayHalfLife times.Duration `json:"memory_histogram_decay_half_life"`
	// CPUHistogramDecayHalfLife is the amount of time it takes a historical
	// CPU usage sample to lose half of its weight.
	CPUHistogramDecayHalfLife times.Duration `json:"cpu_histogram_decay_half_life"`
}

AggregationsConfig is used to configure aggregation behaviour.

type AlarmChannel

type AlarmChannel struct {
	LocalAlarm  *LocalAlarm  `json:"local"`
	RemoteAlarm *RemoteAlarm `json:"remote"`
}

AlarmChannel struct is used to show alarm channel

type AlarmConfig

type AlarmConfig struct {
	Enable                 bool           `json:"enable"`
	Cluster                string         `json:"cluster"`
	MessageBatch           int            `json:"message_batch"`
	MessageDelay           times.Duration `json:"message_delay"`
	ChannelName            string         `json:"channel_name"`
	IgnoreAlarmWhenSilence bool           `json:"ignore_alarm_when_silence"`
	AlarmChannel           `json:"alarm_channel"`
}

AlarmConfig group options to send alarm message

type CPIManagerConfig

type CPIManagerConfig struct {
	// I want this feature disabled by default
	Enable            bool           `json:"enable"`
	WindowDuration    times.Duration `json:"window_duration"`
	PrometheusAddrStr string         `json:"prometheus_addr"`
	PrometheusAddr    url.URL        `json:"-"`
	MaxJobSpecRange   times.Duration `json:"max_job_spec_range"`
}

CPIManagerConfig show the configuration for cpi detecting

type CaelusConfig

type CaelusConfig struct {
	K8sConfig    K8sConfig          `json:"k8s_config"`
	CheckPoint   CheckPointConfig   `json:"check_point"`
	TaskType     TaskTypeConfig     `json:"task_type"`
	NodeResource NodeResourceConfig `json:"node_resource"`
	// If multiple predicts, the first one is used for real prediction. The left are experiment predicts, caelus will
	// only feeds samples to them and expose predict metrics for them.
	Predicts        []PredictConfig       `json:"predicts"`
	Metrics         MetricsCollectConfig  `json:"metrics"`
	ResourceIsolate ResourceIsolateConfig `json:"resource_isolate"`
	CpiManager      CPIManagerConfig      `json:"cpi_manager"`
	Alarm           AlarmConfig           `json:"alarm"`
	Online          OnlineConfig          `json:"online"`
	DiskQuota       DiskQuotaConfig       `json:"disk_quota"`
}

CaelusConfig is the configuration for Caelus

func ParseJsonConfig

func ParseJsonConfig(configFile string) (*CaelusConfig, error)

ParseJsonConfig parse json config

type CheckPointConfig

type CheckPointConfig struct {
	CheckPointDir   string `json:"check_point_dir"`
	NodeResourceKey string `json:"node_resource_key"`
}

CheckPointConfig group info related to check point, which saving state to local file

type ComponentConfig

type ComponentConfig struct {
	Cgroup  string `json:"cgroup"`
	Command string `json:"command"`
}

ComponentConfig is the config to specific a non-containerized component

type CpuIsolateConfig

type CpuIsolateConfig struct {
	// AutoDetect will enable bt feature if supported, and quota as the second choice.
	AutoDetect bool `json:"auto_detect"`
	// ManagePolicy assigns cpu manage policy
	ManagePolicy   string         `json:"manage_policy"`
	CpuSetConfig   CpuSetConfig   `json:"cpuset_config"`
	CpuQuotaConfig CpuQuotaConfig `json:"cpu_quota_config"`
	// KubeletStatic check if cpu manager policy for kubelet is static
	KubeletStatic bool `json:"-"`
}

CpuIsolateConfig is the configuration for cpu isolation

type CpuQuotaConfig

type CpuQuotaConfig struct {
	// set offline job weights, just for quota policy
	OfflineShare *uint64 `json:"offline_share"`
}

CpuQuotaConfig describe configs for cpu quota isolation policy

type CpuSetConfig

type CpuSetConfig struct {
	// isolate online jobs with offline jobs
	EnableOnlineIsolate bool `json:"enable_online_isolate"`
	// cpu list, which offline job will not be assigned
	ReservedCpus string `json:"reserved_cpus"`
}

CpuSetConfig describe configs for cpuset isolation policy

type CustomMetric

type CustomMetric struct {
	MetricServerAddr string         `json:"metric_server_addr"`
	CollectInterval  times.Duration `json:"collect_interval"`
}

CustomMetric define custom metric config

type DetectActionConfig

type DetectActionConfig struct {
	Detects []*DetectConfig `json:"detects"`
	Actions []*ActionConfig `json:"actions"`
}

DetectActionConfig define detectors and actions

type DetectConfig

type DetectConfig struct {
	Name    string          `json:"name"`
	ArgsStr json.RawMessage `json:"args"`
	Args    interface{}     `json:"-"`
}

DetectConfig define detector config

type Devices

type Devices struct {
	// Ifaces are the network interfaces, e.g. eth0, those not exist or down will be filter out
	// these ifaces will be assigned to metrics.node.ifaces
	IfacesWithProperty []string `json:"ifaces_xxx"`
	Ifaces             []string `json:"-"`
	// DiskNames are the disk names, e.g. sda, vda, those not exist will be filter out
	// these ifaces will be assigned to metrics.node.deviceNames
	DiskNames []string `json:"disk_names"`
}

Devices group network and disk devices

type DiskQuotaConfig

type DiskQuotaConfig struct {
	Enabled     bool           `json:"enabled"`
	CheckPeriod times.Duration `json:"check_period"`
	// such as docker or containerd
	ContainerRuntime string `json:"container_runtime"`
	// quota size just for offline job, online jobs need to announce in annotations
	VolumeSizes map[VolumeType]*DiskQuotaSize `json:"volume_sizes"`
}

DiskQuotaConfig group disk quota configurations

type DiskQuotaSize

type DiskQuotaSize struct {
	Quota      uint64 `json:"quota"`
	Inodes     uint64 `json:"inodes"`
	QuotaUsed  uint64 `json:"-"`
	InodesUsed uint64 `json:"-"`
}

shall we support soft feature ?

type EWMAArgs

type EWMAArgs struct {
	Metric string `json:"metric"`
	Nr     int    `json:"nr"`
}

EWMAArgs group args used for ewma detection

type ExpressionArgs

type ExpressionArgs struct {
	Expression      string         `json:"expression"`
	WarningCount    int            `json:"warning_count"`
	WarningDuration times.Duration `json:"warning_duration"`
}

ExpressionArgs group args used for expression detection

type HealthCheckConfig

type HealthCheckConfig struct {
	Disable      bool         `json:"disable"`
	RuleNodes    []string     `json:"rule_nodes"`
	RuleCheck    RuleCheck    `json:"rule_check"`
	CgroupNotify NotifyConfig `json:"cgroup_notify"`
	// assign the value when initialize
	PredictReserved *Resource `json:"-"`
}

HealthCheckConfig is the config for checking health, such as node load or online job interference

type K8sConfig

type K8sConfig struct {
	KubeletRootDir string `json:"kubelet_root_dir"`
}

K8sConfig show kubernetes config

type LocalAlarm

type LocalAlarm struct {
	Executor string `json:"executor"`
}

LocalAlarm struct is used to describe local alarm body

type LocalPredictConfig

type LocalPredictConfig struct {
	// Minimum CPU recommendation for a pod
	PodMinCPUMillicores float64 `json:"pod_min_cpu_millicores"`
	// Minimum memory recommendation for a pod
	PodMinMemoryMb float64 `json:"pod_min_memory_mb"`
	// Fraction of usage added as the safety margin to the recommended request
	SafetyMarginFraction float64 `json:"safety_margin_fraction"`
	// cpu usage percentile to recommend cpu resource
	CPUPercentile float64 `json:"cpu_percentile"`
	// memory usage percentile to recommend cpu resource
	MemoryPeaksPercentile float64 `json:"memory_peaks_percentile"`
	// AggregationsConfig is used to configure aggregation behaviour.
	AggregationsConfig `json:",inline"`
	// Enable tune cpu weight if cpu usage is anomaly
	EnableTuneCPUWeight bool `json:"enable_tune_cpu_weight"`
	// AnomalyDetectorMovingWindow defines how long the moving window of anomaly detector should keep
	AnomalyDetectorMovingWindow times.Duration `json:"anomaly_detector_moving_window"`
	// If detect cpu usage increasing anomaly, the weight of the anomaly sample
	// Base weight is 100
	IncreasingAnomalyWeightFactor int64 `json:"increasing_anomaly_weight_factor"`
	// If detect cpu usage decreasing anomaly, the weight of the anomaly sample
	// Base weight is 100
	DecreasingAnomalyWeightFactor int64 `json:"decreasing_anomaly_weight_factor"`
}

LocalPredictConfig group options for local predictor

type MemoryNotifyConfig

type MemoryNotifyConfig struct {
	Pressures []MemoryPressureNotifyConfig `json:"pressures"`
	Usages    []MemoryUsageNotifyConfig    `json:"usages"`
}

MemoryNotifyConfig describe memory cgroup notify

type MemoryPressureNotifyConfig

type MemoryPressureNotifyConfig struct {
	Cgroups       []string `json:"cgroups"`
	PressureLevel string   `json:"pressure_level"`
	// assign time duration the pressure has kept
	Duration times.Duration `json:"duration"`
	// assign event number in the duration time
	Count int `json:"count"`
}

MemoryPressureNotifyConfig describe memory.pressure_level notify data

type MemoryUsageNotifyConfig

type MemoryUsageNotifyConfig struct {
	Cgroups []string `json:"cgroups"`
	// the distance between limit and threshold
	MarginMb int `json:"margin_mb"`
	// when to handle event after receiving event
	Duration times.Duration `json:"duration"`
}

MemoryUsageNotifyConfig describe memory.usage_in_bytes notify data

type MetricKind

type MetricKind string

MetricKind represent the kind of metrics that cAdvisor exposes.

type MetricsCollectConfig

type MetricsCollectConfig struct {
	Node       MetricsNodeConfig      `json:"node"`
	Container  MetricsContainerConfig `json:"container"`
	Perf       MetricsPerfConfig      `json:"perf"`
	Rdt        MetricsRdtConfig       `json:"rdt"`
	Prometheus MetricsPrometheus      `json:"prometheus"`
}

MetricsCollectConfig is the configuration for metrics collection

type MetricsContainerConfig

type MetricsContainerConfig struct {
	Resources               []string       `json:"resources"`
	Cgroups                 []string       `json:"cgroups"`
	CollectInterval         times.Duration `json:"collect_interval"`
	MaxHousekeepingInterval times.Duration `json:"max_housekeeping_interval"`
}

MetricsContainerConfig is the configuration for container metrics collection

type MetricsNodeConfig

type MetricsNodeConfig struct {
	CollectInterval times.Duration `json:"collect_interval"`
	SystemProcesses []string       `json:"system_processes"`
	OfflineType     string         `json:"-"`
	Devices         `json:",inline"`
}

MetricsNodeConfig is the configuration for node metrics collection

type MetricsPerfConfig

type MetricsPerfConfig struct {
	Disable         bool           `json:"disable"`
	CollectInterval times.Duration `json:"collect_interval"`
	CollectDuration times.Duration `json:"collect_duration"`
	IgnoredCgroups  []string       `json:"ignored_cgroups"`
}

MetricsPerfConfig is the configuration for perf metrics collection

type MetricsPrometheus

type MetricsPrometheus struct {
	CollectInterval times.Duration `json:"collect_interval"`
	// if need to show these metrics with the prefix "caelus_"
	DisableShow bool              `json:"disable_show"`
	Items       []*PrometheusData `json:"items"`
}

MetricsPrometheus describe how to collect prometheus metrics

type MetricsRdtConfig

type MetricsRdtConfig struct {
	Disable         bool           `json:"disable"`
	RdtCommand      string         `json:"rdt_command"`
	CollectInterval times.Duration `json:"collect_interval"`
	CollectDuration times.Duration `json:"collect_duration"`
	ExecuteInterval times.Duration `json:"execute_interval"`
}

MetricsRdtConfig is the configuration for RDT metrics collection

type MetricsSource

type MetricsSource struct {
	CheckInterval times.Duration `json:"check_interval"`
	// MetricsCommand is a command to get job's current metrics value, it must return the format data, like:
	// Its output is {"code":0,"msg":"success","data":[{"job_name":"","metric_name":"","key1":xx,"key2":xx,...}]}
	MetricsCommand []string `json:"metrics_command"`
	// if need to run chroot when executing metrics command
	CmdNeedChroot *bool `json:"cmd_need_chroot"`
	// MetricsURL is a url to get the job's metrics value, it must return the format data, like:
	// Its output is <slo>,<metrics>.
	MetricsURL string `json:"metrics_url"`
}

MetricsSource define metrics source of online services

type NodeResourceConfig

type NodeResourceConfig struct {
	Disable        bool           `json:"disable"`
	UpdateInterval times.Duration `json:"update_interval"`
	OfflineType    string         `json:"-"`
	// DisableKillIfNormal does not kill pod when no resource in conflicting status
	DisableKillIfNormal         bool                   `json:"disable_kill_if_normal"`
	OnlyKillIfIncompressibleRes bool                   `json:"only_kill_if_incompressible_res"`
	YarnConfig                  YarnNodeResourceConfig `json:"yarn_config"`
	Silence                     SilenceConfig          `json:"silence"`
}

NodeResourceConfig group configuration for node

type NotifyConfig

type NotifyConfig struct {
	MemoryCgroup *MemoryNotifyConfig `json:"memory_cgroup"`
}

NotifyConfig monitor resource by kernel notify

type OfflineJobs

type OfflineJobs struct {
	Metadata interface{}
	Request  v1.ResourceList
	Used     v1.ResourceList
	State    string
}

OfflineJobs describe offline job features, such as resource and state

type OnlineConfig

type OnlineConfig struct {
	Enable       bool              `json:"enable"`
	PidToCgroup  PidToCgroup       `json:"pid_to_cgroup"`
	Jobs         []OnlineJobConfig `json:"jobs"`
	CustomMetric CustomMetric      `json:"custom_metric"`
}

OnlineConfig show online job configuration

type OnlineJobConfig

type OnlineJobConfig struct {
	Name string `json:"name"`
	// JobCommand is job's command expression
	Command string          `json:"command"`
	Metrics []OnlineMetrics `json:"metrics"`
}

OnlineJobConfig is the configuration of a online job

type OnlineMetrics

type OnlineMetrics struct {
	Name   string        `json:"name"`
	Source MetricsSource `json:"source"`
}

OnlineMetrics define metric config of online services

type OverCommit

type OverCommit struct {
	Enable            bool                  `json:"enable"`
	OverCommitPercent float64               `json:"over_commit_percent"`
	Periods           []TimeRangeOverCommit `json:"periods"`
}

OverCommit set overcommit percent for resource

type PathInfo

type PathInfo struct {
	Path string
	Size *DiskQuotaSize
	//if we set share limit, SharedInfo containers project id name
	//if not, SharedInfo is nil
	SharedInfo *SharedInfo
}

PathInfo group path and quota options

type PidToCgroup

type PidToCgroup struct {
	// PidCheckInterval could be zero
	PidCheckInterval    times.Duration `json:"pids_check_interval"`
	CgroupCheckInterval times.Duration `json:"cgroup_check_interval"`
	BatchNum            int            `json:"batch_num"`
}

PidToCgroup define online config of pid check

type PredictConfig

type PredictConfig struct {
	Disable       bool           `json:"disable"`
	CheckInterval times.Duration `json:"check_interval"`
	// PredictType must in [local, localv2, vpa]
	PredictType       string   `json:"predict_type"`
	PredictServerAddr string   `json:"predict_server_addr"`
	ReserveResource   Resource `json:"reserve_resource"`
	// PrintInterval is the the time interval to print predict detailed log for debug
	PrintInterval times.Duration `json:"print_interval"`
	// LocalPredictConfig is the configuration for local predictor
	LocalPredictConfig `json:",inline"`
	// The type value of online predict metrics caelus_node_resource{type=""}
	// It's used by experiment predict
	PredictMetricsType string `json:"predict_metrics_type"`
}

PredictConfig group options for predictor

type PrometheusData

type PrometheusData struct {
	Address      string      `json:"address"`
	Collect      []string    `json:"collect"`
	NoCollect    []string    `json:"no_collect"`
	CollectMap   sets.String `json:"-"`
	NoCollectMap sets.String `json:"-"`
}

PrometheusData describe which metrics to collect or not collect

type RangeResource

type RangeResource struct {
	CPUMilli RangeState `json:"cpu_milli"`
	MemMB    RangeState `json:"mem_mb"`
}

RangeResource is used to check if the resource changed is available there is no need to update node resource when changed quantity is small.

type RangeState

type RangeState struct {
	// Minimum is the range quantity
	Min float64 `json:"min"`
	// Maximum is the maxisum range quantity
	Max float64 `json:"max"`
	// Ratio used to calculate change range quantity
	Ratio float64 `json:"ratio"`
}

RangeState describe range resource to drop little changing

type RemoteAlarm

type RemoteAlarm struct {
	RemoteWebhook string `json:"remoteWebhook"`
	WeWorkWebhook string `json:"weWorkWebhook"`
}

RemoteAlarm struct is used to describe remote alarm body

type Resource

type Resource struct {
	CpuMilli      *float64 `json:"cpu_milli"`
	MemMB         *float64 `json:"mem_mb"`
	CpuPercentStr string   `json:"cpu_percent"`
	CpuPercent    *float64 `json:"-"`
	MemPercentStr string   `json:"mem_percent"`
	MemPercent    *float64 `json:"-"`
}

Resource is the cpu and memory configuration

type ResourceIsolateConfig

type ResourceIsolateConfig struct {
	Disable         bool            `json:"disable"`
	ResourceDisable map[string]bool `json:"resource_disable"`
	UpdatePeriod    times.Duration  `json:"update_period"`
	// disks need to set io weight
	DiskNames []string `json:"-"`
	// eni iface for eni network pods
	EniIface string `json:"-"`
	// normal iface for host network and global route network pods
	Iface              string            `json:"-"`
	CpuConfig          CpuIsolateConfig  `json:"cpu_config"`
	OnlineType         string            `json:"-"`
	OfflineType        string            `json:"-"`
	ExternalComponents []ComponentConfig `json:"external_components"`
}

ResourceIsolateConfig is the offline job quota limit configuration for resources

type ResourceUpdateEvent

type ResourceUpdateEvent struct {
	ConflictRes []string
	Reason      string
}

ResourceUpdateEvent define the event when need to update offline resources

type RoundOffResource

type RoundOffResource struct {
	CPUMilli float64 `json:"cpu_milli"`
	MemMB    float64 `json:"mem_mb"`
}

RoundOffResource is used to format resource quantity, such as the origin memory is 1027Mi, we can get 1024Mi after rounding off, making memory 2 times of 512Mi

type RuleCheck

type RuleCheck struct {
	ContainerRules []*RuleCheckConfig `json:"container_rules"`
	NodeRules      []*RuleCheckConfig `json:"node_rules"`
	AppRules       []*RuleCheckConfig `json:"app_rules"`
}

RuleCheck group all rules

type RuleCheckConfig

type RuleCheckConfig struct {
	Name    string   `json:"name"`
	Metrics []string `json:"metrics"`
	// CheckInterval describes the interval to trigger detection
	CheckInterval times.Duration `json:"check_interval"`
	// HandleInterval describes the interval to handle conflicts after detecting abnormal result
	HandleInterval times.Duration `json:"handle_interval"`
	// RecoverInterval describes the interval to recover conflicts after detecting normal result
	RecoverInterval times.Duration        `json:"recover_interval"`
	Rules           []*DetectActionConfig `json:"rules"`
	RecoverRules    []*DetectActionConfig `json:"recover_rules"`
}

RuleCheckConfig define the rule config

type SharedInfo

type SharedInfo struct {
	PodName string
}

SharedInfo indicate a path has shared quota or not

type SilenceConfig

type SilenceConfig struct {
	// [0:00:00, 5:00:00]
	Periods [][2]times.SecondsInDay `json:"periods"`
	// disable schedule before silence
	AheadOfUnSchedule times.Duration `json:"ahead_of_unSchedule"`
}

SilenceConfig describe the period time, do not allow running offline jobs

type TaskTypeConfig

type TaskTypeConfig struct {
	OnlineType  string `json:"online_type"`
	OfflineType string `json:"offline_type"`
}

TaskTypeConfig show the online and offline task type, such as offline is yarn on k8s.

type TimeRangeOverCommit

type TimeRangeOverCommit struct {
	Range             [2]times.SecondsInDay `json:"range"`
	OverCommitPercent float64               `json:"over_commit_percent"`
}

TimeRangeOverCommit set overcommit percent for resource in specific time range

type VolumeType

type VolumeType string
var (
	VolumeTypeRootFs     VolumeType = "rootFs"
	VolumeTypeEmptyDir   VolumeType = "emptyDir"
	VolumeTypeHostPath   VolumeType = "hostPath"
	AvailableVolumeTypes            = sets.NewString(
		VolumeTypeRootFs.String(),
		VolumeTypeEmptyDir.String(),
		VolumeTypeHostPath.String())
)

func (VolumeType) String

func (vt VolumeType) String() string

String output volume type to string

type YarnDisksConfig

type YarnDisksConfig struct {
	// RatioToCore translate disk space to core numbers
	RatioToCore      int64 `json:"ratio_to_core"`
	MultiDiskDisable bool  `json:"multi_disk_disable"`
	// DiskMinCapacityGb drop disks with little disk space
	DiskMinCapacityGb int64          `json:"disk_min_capacity_gb"`
	SpaceCheckEnabled bool           `json:"space_check_enabled"`
	SpaceCheckPeriod  times.Duration `json:"space_check_period"`
	// SpaceCheckReservedGb is used for checking disk space, it will start cleaning space if free disk space is less
	// than SpaceCheckReservedGb
	SpaceCheckReservedGb      int64   `json:"space_check_reserved_gb"`
	SpaceCheckReservedPercent float64 `json:"space_check_reserved_percent"`
	SpaceCleanDisable         bool    `json:"space_clean_disable"`
	// SpaceCleanJustData is enabled, it will just restart nodemanager pod to release /data space, and
	// do not care other disk partitions
	SpaceCleanJustData bool `json:"space_clean_just_data"`
	// OfflineExitedCleanDelay is used to clean nodemanager local or log path when offline pod exited for long time
	OfflineExitedCleanDelay times.Duration `json:"offline_exited_clean_delay"`
}

YarnDisksConfig group disks config

type YarnNodeResourceConfig

type YarnNodeResourceConfig struct {
	// CapacityIncInterval is used to make nodemanager capacity increase not very frequently
	CapacityIncInterval times.Duration    `json:"capacity_inc_interval"`
	NMServer            string            `json:"nm_server"`
	NMReserve           Resource          `json:"nm_reserve"`
	ResourceRoundOff    RoundOffResource  `json:"resource_roundoff"`
	ResourceRange       RangeResource     `json:"resource_range"`
	ScheduleServerPort  string            `json:"schedule_server_port"`
	PortAutoDetect      bool              `json:"port_auto_detect"`
	Properties          map[string]string `json:"properties"`
	Disks               YarnDisksConfig   `json:"disks"`
	ShimServer          string            `json:"shim_server"`
	CpuOverCommit       OverCommit        `json:"cpu_over_commit"`
}

YarnNodeResourceConfig is used to show yarn related configuration

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL