Documentation ¶
Index ¶
- Constants
- Variables
- func AllResCompressible(res []string) bool
- func GetDeviceNameFromMetric(metric string) (dev, devMetric, originalMetric string)
- func InitHealthCheckConfigFunc(nodeMetrics *MetricsNodeConfig, predictReserved *Resource) func(string) (*HealthCheckConfig, error)
- func InitPredictConfig(config *PredictConfig)
- func OfflineOnYarn(config *TaskTypeConfig) bool
- type ActionConfig
- type AggregationsConfig
- type AlarmChannel
- type AlarmConfig
- type CPIManagerConfig
- type CaelusConfig
- type CheckPointConfig
- type ComponentConfig
- type CpuIsolateConfig
- type CpuQuotaConfig
- type CpuSetConfig
- type CustomMetric
- type DetectActionConfig
- type DetectConfig
- type Devices
- type DiskQuotaConfig
- type DiskQuotaSize
- type EWMAArgs
- type ExpressionArgs
- type HealthCheckConfig
- type K8sConfig
- type LocalAlarm
- type LocalPredictConfig
- type MemoryNotifyConfig
- type MemoryPressureNotifyConfig
- type MemoryUsageNotifyConfig
- type MetricKind
- type MetricsCollectConfig
- type MetricsContainerConfig
- type MetricsNodeConfig
- type MetricsPerfConfig
- type MetricsPrometheus
- type MetricsRdtConfig
- type MetricsSource
- type NodeResourceConfig
- type NotifyConfig
- type OfflineJobs
- type OnlineConfig
- type OnlineJobConfig
- type OnlineMetrics
- type OverCommit
- type PathInfo
- type PidToCgroup
- type PredictConfig
- type PrometheusData
- type RangeResource
- type RangeState
- type RemoteAlarm
- type Resource
- type ResourceIsolateConfig
- type ResourceUpdateEvent
- type RoundOffResource
- type RuleCheck
- type RuleCheckConfig
- type SharedInfo
- type SilenceConfig
- type TaskTypeConfig
- type TimeRangeOverCommit
- type VolumeType
- type YarnDisksConfig
- type YarnNodeResourceConfig
Constants ¶
const ( ExpresstionAutoDetect = "auto" DetectionExpression = "expression" DetectionEWMA = "ewma" DetectionUnion = "union" )
const ( // LocalPredictorType is the local predictor LocalPredictorType = "local" // VPAPredictorType is the remote VPA predictor VPAPredictorType = "vpa" NodeResourceTypeOnlinePredict = "online_predict" )
const ( // TaskType OnlineTypeOnK8s = "k8s" OnlineTypeOnLocal = "local" OfflineTypeOnk8s = "k8s" OfflineTypeYarnOnk8s = "yarn_on_k8s" AlarmTypeLocal = "local" AlarmTypeRemote = "remote" // CpuManagePolicyBT is just for tencent OS CpuManagePolicyBT = "bt" CpuManagePolicySet = "cpuset" CpuManagePolicyQuota = "quota" CpuManagePolicyAdaptive = "adaptive" // MemUnit translates Mb to byte MemUnit = int64(1024 * 1024) MemGbUnit = int64(1024 * 1024 * 1024) // CpuUnit translates milli core CpuUnit = int64(1000) // DiskUnit translates Gi to btye DiskUnit = int64(1024 * 1020 * 1024) // pod annotation fixed annotation PodAnnotationPrefix = "mixer.kubernetes.io/" // RootFS is the root directory in container. RootFS = "/rootfs" CgroupKubePods = "/kubepods" CgroupOffline = "/kubepods/offline" CgroupOfflineSystem = CgroupOffline + "/system" SystemComponentOomScoreAdj = "500" CgroupYarn = "hadoop-yarn" // CgroupNonK8sOnline is the cgroup for online jobs, which are not running on k8s, we need to create the cgroup // and children cgroup manually. CgroupNonK8sOnline = "/onlinejobs" )
const (
// container runtime
ContainerRuntimeDocker = "docker"
)
Variables ¶
var ( AvailablePredictType = sets.NewString(LocalPredictorType, VPAPredictorType) AvailableLocalPredictType = sets.NewString(LocalPredictorType) )
var ( // AvailableOnlineTaskType describe available online tasks, which may be pod or local process AvailableOnlineTaskType = sets.NewString(OnlineTypeOnK8s, OnlineTypeOnLocal) // AvailableOfflineTaskType describe available offline tasks, which may be pod or yarn job AvailableOfflineTaskType = sets.NewString(OfflineTypeOnk8s, OfflineTypeYarnOnk8s) // AvailableAlarmType shows available alarm type AvailableAlarmType = sets.NewString(AlarmTypeLocal, AlarmTypeRemote) // AvailableCpuManagePolicy shows available cpu manage policy AvailableCpuManagePolicy = sets.NewString(CpuManagePolicyBT, CpuManagePolicySet, CpuManagePolicyQuota, CpuManagePolicyAdaptive) CompressibleRes = sets.NewString(string(v1.ResourceCPU)) )
Functions ¶
func AllResCompressible ¶
AllResCompressible check if the resources are compressible
func GetDeviceNameFromMetric ¶
GetDeviceNameFromMetric parse the metric name, and output the dev and devMetric name
func InitHealthCheckConfigFunc ¶
func InitHealthCheckConfigFunc(nodeMetrics *MetricsNodeConfig, predictReserved *Resource) func(string) (*HealthCheckConfig, error)
InitHealthCheckConfigFunc return function to get health check config
func InitPredictConfig ¶
func InitPredictConfig(config *PredictConfig)
InitPredictConfig validate and format predict config
func OfflineOnYarn ¶
func OfflineOnYarn(config *TaskTypeConfig) bool
OfflineOnYarn check if offline job is running on YARN
Types ¶
type ActionConfig ¶
type ActionConfig struct { Name string `json:"name"` ArgsStr json.RawMessage `json:"args"` Args interface{} `json:"-"` }
ActionConfig define action config
type AggregationsConfig ¶
type AggregationsConfig struct { // MemoryAggregationInterval is the length of a single interval, for // which the peak memory usage is computed. // Memory usage peaks are aggregated in multiples of this interval. In other words // there is one memory usage sample per interval (the maximum usage over that // interval). MemoryAggregationInterval times.Duration `json:"memory_aggregation_interval"` // MemoryAggregationWindowIntervalCount is the number of consecutive MemoryAggregationIntervals // which make up the MemoryAggregationWindowLength which in turn is the period for memory // usage aggregation by VPA. MemoryAggregationIntervalCount int64 `json:"memory_aggregation_interval_count"` // MemoryHistogramDecayHalfLife is the amount of time it takes a historical // memory usage sample to lose half of its weight. In other words, a fresh // usage sample is twice as 'important' as one with age equal to the half // life period. MemoryHistogramDecayHalfLife times.Duration `json:"memory_histogram_decay_half_life"` // CPUHistogramDecayHalfLife is the amount of time it takes a historical // CPU usage sample to lose half of its weight. CPUHistogramDecayHalfLife times.Duration `json:"cpu_histogram_decay_half_life"` }
AggregationsConfig is used to configure aggregation behaviour.
type AlarmChannel ¶
type AlarmChannel struct { LocalAlarm *LocalAlarm `json:"local"` RemoteAlarm *RemoteAlarm `json:"remote"` }
AlarmChannel struct is used to show alarm channel
type AlarmConfig ¶
type AlarmConfig struct { Enable bool `json:"enable"` Cluster string `json:"cluster"` MessageBatch int `json:"message_batch"` MessageDelay times.Duration `json:"message_delay"` ChannelName string `json:"channel_name"` IgnoreAlarmWhenSilence bool `json:"ignore_alarm_when_silence"` AlarmChannel `json:"alarm_channel"` }
AlarmConfig group options to send alarm message
type CPIManagerConfig ¶
type CPIManagerConfig struct { // I want this feature disabled by default Enable bool `json:"enable"` WindowDuration times.Duration `json:"window_duration"` PrometheusAddrStr string `json:"prometheus_addr"` PrometheusAddr url.URL `json:"-"` MaxJobSpecRange times.Duration `json:"max_job_spec_range"` }
CPIManagerConfig show the configuration for cpi detecting
type CaelusConfig ¶
type CaelusConfig struct { K8sConfig K8sConfig `json:"k8s_config"` CheckPoint CheckPointConfig `json:"check_point"` TaskType TaskTypeConfig `json:"task_type"` NodeResource NodeResourceConfig `json:"node_resource"` // If multiple predicts, the first one is used for real prediction. The left are experiment predicts, caelus will // only feeds samples to them and expose predict metrics for them. Predicts []PredictConfig `json:"predicts"` Metrics MetricsCollectConfig `json:"metrics"` ResourceIsolate ResourceIsolateConfig `json:"resource_isolate"` CpiManager CPIManagerConfig `json:"cpi_manager"` Alarm AlarmConfig `json:"alarm"` Online OnlineConfig `json:"online"` DiskQuota DiskQuotaConfig `json:"disk_quota"` }
CaelusConfig is the configuration for Caelus
func ParseJsonConfig ¶
func ParseJsonConfig(configFile string) (*CaelusConfig, error)
ParseJsonConfig parse json config
type CheckPointConfig ¶
type CheckPointConfig struct { CheckPointDir string `json:"check_point_dir"` NodeResourceKey string `json:"node_resource_key"` }
CheckPointConfig group info related to check point, which saving state to local file
type ComponentConfig ¶
ComponentConfig is the config to specific a non-containerized component
type CpuIsolateConfig ¶
type CpuIsolateConfig struct { // AutoDetect will enable bt feature if supported, and quota as the second choice. AutoDetect bool `json:"auto_detect"` // ManagePolicy assigns cpu manage policy ManagePolicy string `json:"manage_policy"` CpuSetConfig CpuSetConfig `json:"cpuset_config"` CpuQuotaConfig CpuQuotaConfig `json:"cpu_quota_config"` // KubeletStatic check if cpu manager policy for kubelet is static KubeletStatic bool `json:"-"` }
CpuIsolateConfig is the configuration for cpu isolation
type CpuQuotaConfig ¶
type CpuQuotaConfig struct { uint64 `json:"offline_share"` }OfflineShare *
CpuQuotaConfig describe configs for cpu quota isolation policy
type CpuSetConfig ¶
type CpuSetConfig struct { // isolate online jobs with offline jobs EnableOnlineIsolate bool `json:"enable_online_isolate"` // cpu list, which offline job will not be assigned ReservedCpus string `json:"reserved_cpus"` }
CpuSetConfig describe configs for cpuset isolation policy
type CustomMetric ¶
type CustomMetric struct { MetricServerAddr string `json:"metric_server_addr"` CollectInterval times.Duration `json:"collect_interval"` }
CustomMetric define custom metric config
type DetectActionConfig ¶
type DetectActionConfig struct { Detects []*DetectConfig `json:"detects"` Actions []*ActionConfig `json:"actions"` }
DetectActionConfig define detectors and actions
type DetectConfig ¶
type DetectConfig struct { Name string `json:"name"` ArgsStr json.RawMessage `json:"args"` Args interface{} `json:"-"` }
DetectConfig define detector config
type Devices ¶
type Devices struct { // Ifaces are the network interfaces, e.g. eth0, those not exist or down will be filter out // these ifaces will be assigned to metrics.node.ifaces IfacesWithProperty []string `json:"ifaces_xxx"` Ifaces []string `json:"-"` // DiskNames are the disk names, e.g. sda, vda, those not exist will be filter out // these ifaces will be assigned to metrics.node.deviceNames DiskNames []string `json:"disk_names"` }
Devices group network and disk devices
type DiskQuotaConfig ¶
type DiskQuotaConfig struct { Enabled bool `json:"enabled"` CheckPeriod times.Duration `json:"check_period"` // such as docker or containerd ContainerRuntime string `json:"container_runtime"` // quota size just for offline job, online jobs need to announce in annotations VolumeSizes map[VolumeType]*DiskQuotaSize `json:"volume_sizes"` }
DiskQuotaConfig group disk quota configurations
type DiskQuotaSize ¶
type DiskQuotaSize struct { Quota uint64 `json:"quota"` Inodes uint64 `json:"inodes"` QuotaUsed uint64 `json:"-"` InodesUsed uint64 `json:"-"` }
shall we support soft feature ?
type ExpressionArgs ¶
type ExpressionArgs struct { Expression string `json:"expression"` WarningCount int `json:"warning_count"` WarningDuration times.Duration `json:"warning_duration"` }
ExpressionArgs group args used for expression detection
type HealthCheckConfig ¶
type HealthCheckConfig struct { Disable bool `json:"disable"` RuleNodes []string `json:"rule_nodes"` RuleCheck RuleCheck `json:"rule_check"` CgroupNotify NotifyConfig `json:"cgroup_notify"` // assign the value when initialize PredictReserved *Resource `json:"-"` }
HealthCheckConfig is the config for checking health, such as node load or online job interference
type K8sConfig ¶
type K8sConfig struct {
KubeletRootDir string `json:"kubelet_root_dir"`
}
K8sConfig show kubernetes config
type LocalAlarm ¶
type LocalAlarm struct {
Executor string `json:"executor"`
}
LocalAlarm struct is used to describe local alarm body
type LocalPredictConfig ¶
type LocalPredictConfig struct { // Minimum CPU recommendation for a pod PodMinCPUMillicores float64 `json:"pod_min_cpu_millicores"` // Minimum memory recommendation for a pod PodMinMemoryMb float64 `json:"pod_min_memory_mb"` // Fraction of usage added as the safety margin to the recommended request SafetyMarginFraction float64 `json:"safety_margin_fraction"` // cpu usage percentile to recommend cpu resource CPUPercentile float64 `json:"cpu_percentile"` // memory usage percentile to recommend cpu resource MemoryPeaksPercentile float64 `json:"memory_peaks_percentile"` // AggregationsConfig is used to configure aggregation behaviour. AggregationsConfig `json:",inline"` // Enable tune cpu weight if cpu usage is anomaly EnableTuneCPUWeight bool `json:"enable_tune_cpu_weight"` // AnomalyDetectorMovingWindow defines how long the moving window of anomaly detector should keep AnomalyDetectorMovingWindow times.Duration `json:"anomaly_detector_moving_window"` // If detect cpu usage increasing anomaly, the weight of the anomaly sample // Base weight is 100 IncreasingAnomalyWeightFactor int64 `json:"increasing_anomaly_weight_factor"` // If detect cpu usage decreasing anomaly, the weight of the anomaly sample // Base weight is 100 DecreasingAnomalyWeightFactor int64 `json:"decreasing_anomaly_weight_factor"` }
LocalPredictConfig group options for local predictor
type MemoryNotifyConfig ¶
type MemoryNotifyConfig struct { Pressures []MemoryPressureNotifyConfig `json:"pressures"` Usages []MemoryUsageNotifyConfig `json:"usages"` }
MemoryNotifyConfig describe memory cgroup notify
type MemoryPressureNotifyConfig ¶
type MemoryPressureNotifyConfig struct { Cgroups []string `json:"cgroups"` PressureLevel string `json:"pressure_level"` // assign time duration the pressure has kept Duration times.Duration `json:"duration"` // assign event number in the duration time Count int `json:"count"` }
MemoryPressureNotifyConfig describe memory.pressure_level notify data
type MemoryUsageNotifyConfig ¶
type MemoryUsageNotifyConfig struct { Cgroups []string `json:"cgroups"` // the distance between limit and threshold MarginMb int `json:"margin_mb"` // when to handle event after receiving event Duration times.Duration `json:"duration"` }
MemoryUsageNotifyConfig describe memory.usage_in_bytes notify data
type MetricKind ¶
type MetricKind string
MetricKind represent the kind of metrics that cAdvisor exposes.
type MetricsCollectConfig ¶
type MetricsCollectConfig struct { Node MetricsNodeConfig `json:"node"` Container MetricsContainerConfig `json:"container"` Perf MetricsPerfConfig `json:"perf"` Rdt MetricsRdtConfig `json:"rdt"` Prometheus MetricsPrometheus `json:"prometheus"` }
MetricsCollectConfig is the configuration for metrics collection
type MetricsContainerConfig ¶
type MetricsContainerConfig struct { Resources []string `json:"resources"` Cgroups []string `json:"cgroups"` CollectInterval times.Duration `json:"collect_interval"` MaxHousekeepingInterval times.Duration `json:"max_housekeeping_interval"` }
MetricsContainerConfig is the configuration for container metrics collection
type MetricsNodeConfig ¶
type MetricsNodeConfig struct { CollectInterval times.Duration `json:"collect_interval"` SystemProcesses []string `json:"system_processes"` OfflineType string `json:"-"` Devices `json:",inline"` }
MetricsNodeConfig is the configuration for node metrics collection
type MetricsPerfConfig ¶
type MetricsPerfConfig struct { Disable bool `json:"disable"` CollectInterval times.Duration `json:"collect_interval"` CollectDuration times.Duration `json:"collect_duration"` IgnoredCgroups []string `json:"ignored_cgroups"` }
MetricsPerfConfig is the configuration for perf metrics collection
type MetricsPrometheus ¶
type MetricsPrometheus struct { CollectInterval times.Duration `json:"collect_interval"` // if need to show these metrics with the prefix "caelus_" DisableShow bool `json:"disable_show"` Items []*PrometheusData `json:"items"` }
MetricsPrometheus describe how to collect prometheus metrics
type MetricsRdtConfig ¶
type MetricsRdtConfig struct { Disable bool `json:"disable"` RdtCommand string `json:"rdt_command"` CollectInterval times.Duration `json:"collect_interval"` CollectDuration times.Duration `json:"collect_duration"` ExecuteInterval times.Duration `json:"execute_interval"` }
MetricsRdtConfig is the configuration for RDT metrics collection
type MetricsSource ¶
type MetricsSource struct { CheckInterval times.Duration `json:"check_interval"` // MetricsCommand is a command to get job's current metrics value, it must return the format data, like: // Its output is {"code":0,"msg":"success","data":[{"job_name":"","metric_name":"","key1":xx,"key2":xx,...}]} MetricsCommand []string `json:"metrics_command"` // if need to run chroot when executing metrics command CmdNeedChroot *bool `json:"cmd_need_chroot"` // MetricsURL is a url to get the job's metrics value, it must return the format data, like: // Its output is <slo>,<metrics>. MetricsURL string `json:"metrics_url"` }
MetricsSource define metrics source of online services
type NodeResourceConfig ¶
type NodeResourceConfig struct { Disable bool `json:"disable"` UpdateInterval times.Duration `json:"update_interval"` OfflineType string `json:"-"` // DisableKillIfNormal does not kill pod when no resource in conflicting status DisableKillIfNormal bool `json:"disable_kill_if_normal"` OnlyKillIfIncompressibleRes bool `json:"only_kill_if_incompressible_res"` YarnConfig YarnNodeResourceConfig `json:"yarn_config"` Silence SilenceConfig `json:"silence"` }
NodeResourceConfig group configuration for node
type NotifyConfig ¶
type NotifyConfig struct {
MemoryCgroup *MemoryNotifyConfig `json:"memory_cgroup"`
}
NotifyConfig monitor resource by kernel notify
type OfflineJobs ¶
type OfflineJobs struct { Metadata interface{} Request v1.ResourceList Used v1.ResourceList State string }
OfflineJobs describe offline job features, such as resource and state
type OnlineConfig ¶
type OnlineConfig struct { Enable bool `json:"enable"` PidToCgroup PidToCgroup `json:"pid_to_cgroup"` Jobs []OnlineJobConfig `json:"jobs"` CustomMetric CustomMetric `json:"custom_metric"` }
OnlineConfig show online job configuration
type OnlineJobConfig ¶
type OnlineJobConfig struct { Name string `json:"name"` // JobCommand is job's command expression Command string `json:"command"` Metrics []OnlineMetrics `json:"metrics"` }
OnlineJobConfig is the configuration of a online job
type OnlineMetrics ¶
type OnlineMetrics struct { Name string `json:"name"` Source MetricsSource `json:"source"` }
OnlineMetrics define metric config of online services
type OverCommit ¶
type OverCommit struct { Enable bool `json:"enable"` OverCommitPercent float64 `json:"over_commit_percent"` Periods []TimeRangeOverCommit `json:"periods"` }
OverCommit set overcommit percent for resource
type PathInfo ¶
type PathInfo struct { Path string Size *DiskQuotaSize //if not, SharedInfo is nil SharedInfo *SharedInfo }
PathInfo group path and quota options
type PidToCgroup ¶
type PidToCgroup struct { // PidCheckInterval could be zero PidCheckInterval times.Duration `json:"pids_check_interval"` CgroupCheckInterval times.Duration `json:"cgroup_check_interval"` BatchNum int `json:"batch_num"` }
PidToCgroup define online config of pid check
type PredictConfig ¶
type PredictConfig struct { Disable bool `json:"disable"` CheckInterval times.Duration `json:"check_interval"` // PredictType must in [local, localv2, vpa] PredictType string `json:"predict_type"` PredictServerAddr string `json:"predict_server_addr"` ReserveResource Resource `json:"reserve_resource"` // PrintInterval is the the time interval to print predict detailed log for debug PrintInterval times.Duration `json:"print_interval"` // LocalPredictConfig is the configuration for local predictor LocalPredictConfig `json:",inline"` // The type value of online predict metrics caelus_node_resource{type=""} // It's used by experiment predict PredictMetricsType string `json:"predict_metrics_type"` }
PredictConfig group options for predictor
type PrometheusData ¶
type PrometheusData struct { Address string `json:"address"` Collect []string `json:"collect"` NoCollect []string `json:"no_collect"` CollectMap sets.String `json:"-"` NoCollectMap sets.String `json:"-"` }
PrometheusData describe which metrics to collect or not collect
type RangeResource ¶
type RangeResource struct { CPUMilli RangeState `json:"cpu_milli"` MemMB RangeState `json:"mem_mb"` }
RangeResource is used to check if the resource changed is available there is no need to update node resource when changed quantity is small.
type RangeState ¶
type RangeState struct { // Minimum is the range quantity Min float64 `json:"min"` // Maximum is the maxisum range quantity Max float64 `json:"max"` // Ratio used to calculate change range quantity Ratio float64 `json:"ratio"` }
RangeState describe range resource to drop little changing
type RemoteAlarm ¶
type RemoteAlarm struct { RemoteWebhook string `json:"remoteWebhook"` WeWorkWebhook string `json:"weWorkWebhook"` }
RemoteAlarm struct is used to describe remote alarm body
type Resource ¶
type Resource struct { CpuMilli *float64 `json:"cpu_milli"` MemMB *float64 `json:"mem_mb"` CpuPercentStr string `json:"cpu_percent"` CpuPercent *float64 `json:"-"` MemPercentStr string `json:"mem_percent"` MemPercent *float64 `json:"-"` }
Resource is the cpu and memory configuration
type ResourceIsolateConfig ¶
type ResourceIsolateConfig struct { Disable bool `json:"disable"` ResourceDisable map[string]bool `json:"resource_disable"` UpdatePeriod times.Duration `json:"update_period"` // disks need to set io weight DiskNames []string `json:"-"` // eni iface for eni network pods EniIface string `json:"-"` // normal iface for host network and global route network pods Iface string `json:"-"` CpuConfig CpuIsolateConfig `json:"cpu_config"` OnlineType string `json:"-"` OfflineType string `json:"-"` ExternalComponents []ComponentConfig `json:"external_components"` }
ResourceIsolateConfig is the offline job quota limit configuration for resources
type ResourceUpdateEvent ¶
ResourceUpdateEvent define the event when need to update offline resources
type RoundOffResource ¶
RoundOffResource is used to format resource quantity, such as the origin memory is 1027Mi, we can get 1024Mi after rounding off, making memory 2 times of 512Mi
type RuleCheck ¶
type RuleCheck struct { ContainerRules []*RuleCheckConfig `json:"container_rules"` NodeRules []*RuleCheckConfig `json:"node_rules"` AppRules []*RuleCheckConfig `json:"app_rules"` }
RuleCheck group all rules
type RuleCheckConfig ¶
type RuleCheckConfig struct { Name string `json:"name"` Metrics []string `json:"metrics"` // CheckInterval describes the interval to trigger detection CheckInterval times.Duration `json:"check_interval"` // HandleInterval describes the interval to handle conflicts after detecting abnormal result HandleInterval times.Duration `json:"handle_interval"` // RecoverInterval describes the interval to recover conflicts after detecting normal result RecoverInterval times.Duration `json:"recover_interval"` Rules []*DetectActionConfig `json:"rules"` RecoverRules []*DetectActionConfig `json:"recover_rules"` }
RuleCheckConfig define the rule config
type SilenceConfig ¶
type SilenceConfig struct { // [0:00:00, 5:00:00] Periods [][2]times.SecondsInDay `json:"periods"` // disable schedule before silence AheadOfUnSchedule times.Duration `json:"ahead_of_unSchedule"` }
SilenceConfig describe the period time, do not allow running offline jobs
type TaskTypeConfig ¶
type TaskTypeConfig struct { OnlineType string `json:"online_type"` OfflineType string `json:"offline_type"` }
TaskTypeConfig show the online and offline task type, such as offline is yarn on k8s.
type TimeRangeOverCommit ¶
type TimeRangeOverCommit struct { Range [2]times.SecondsInDay `json:"range"` OverCommitPercent float64 `json:"over_commit_percent"` }
TimeRangeOverCommit set overcommit percent for resource in specific time range
type VolumeType ¶
type VolumeType string
var ( VolumeTypeRootFs VolumeType = "rootFs" VolumeTypeEmptyDir VolumeType = "emptyDir" VolumeTypeHostPath VolumeType = "hostPath" AvailableVolumeTypes = sets.NewString( VolumeTypeRootFs.String(), VolumeTypeEmptyDir.String(), VolumeTypeHostPath.String()) )
type YarnDisksConfig ¶
type YarnDisksConfig struct { // RatioToCore translate disk space to core numbers RatioToCore int64 `json:"ratio_to_core"` MultiDiskDisable bool `json:"multi_disk_disable"` // DiskMinCapacityGb drop disks with little disk space DiskMinCapacityGb int64 `json:"disk_min_capacity_gb"` SpaceCheckEnabled bool `json:"space_check_enabled"` SpaceCheckPeriod times.Duration `json:"space_check_period"` // SpaceCheckReservedGb is used for checking disk space, it will start cleaning space if free disk space is less // than SpaceCheckReservedGb SpaceCheckReservedGb int64 `json:"space_check_reserved_gb"` SpaceCheckReservedPercent float64 `json:"space_check_reserved_percent"` SpaceCleanDisable bool `json:"space_clean_disable"` // SpaceCleanJustData is enabled, it will just restart nodemanager pod to release /data space, and // do not care other disk partitions SpaceCleanJustData bool `json:"space_clean_just_data"` // OfflineExitedCleanDelay is used to clean nodemanager local or log path when offline pod exited for long time OfflineExitedCleanDelay times.Duration `json:"offline_exited_clean_delay"` }
YarnDisksConfig group disks config
type YarnNodeResourceConfig ¶
type YarnNodeResourceConfig struct { // CapacityIncInterval is used to make nodemanager capacity increase not very frequently CapacityIncInterval times.Duration `json:"capacity_inc_interval"` NMServer string `json:"nm_server"` NMReserve Resource `json:"nm_reserve"` ResourceRoundOff RoundOffResource `json:"resource_roundoff"` ResourceRange RangeResource `json:"resource_range"` ScheduleServerPort string `json:"schedule_server_port"` PortAutoDetect bool `json:"port_auto_detect"` Properties map[string]string `json:"properties"` Disks YarnDisksConfig `json:"disks"` ShimServer string `json:"shim_server"` CpuOverCommit OverCommit `json:"cpu_over_commit"` }
YarnNodeResourceConfig is used to show yarn related configuration