sysdump

package
v0.16.5 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 26, 2024 License: Apache-2.0 Imports: 44 Imported by: 0

Documentation

Index

Constants

View Source
const (
	DefaultCiliumLabelSelector               = labelPrefix + "cilium"
	DefaultCiliumEnvoyLabelSelector          = labelPrefix + "cilium-envoy"
	DefaultCiliumOperatorLabelSelector       = "io.cilium/app=operator"
	DefaultClustermeshApiserverLabelSelector = labelPrefix + "clustermesh-apiserver"
	DefaultCiliumNodeInitLabelSelector       = "app=cilium-node-init"
	DefaultCiliumSpireAgentLabelSelector     = "app=spire-agent"
	DefaultCiliumSpireServerLabelSelector    = "app=spire-server"
	DefaultDebug                             = false
	DefaultProfiling                         = true
	DefaultTracing                           = false
	DefaultHubbleLabelSelector               = labelPrefix + "hubble"
	DefaultHubbleFlowsCount                  = 10000
	DefaultHubbleFlowsTimeout                = 5 * time.Second
	DefaultHubbleRelayLabelSelector          = labelPrefix + "hubble-relay"
	DefaultHubbleUILabelSelector             = labelPrefix + "hubble-ui"
	DefaultHubbleGenerateCertsLabelSelector  = labelPrefix + "hubble-generate-certs"
	DefaultLargeSysdumpAbortTimeout          = 5 * time.Second
	DefaultLargeSysdumpThreshold             = 20
	DefaultLogsSinceTime                     = 8760 * time.Hour // 1y
	DefaultLogsLimitBytes                    = 1073741824       // 1GiB
	DefaultNodeList                          = ""
	DefaultQuick                             = false
	DefaultOutputFileName                    = "cilium-sysdump-<ts>" // "<ts>" will be replaced with the timestamp
	DefaultDetectGopsPID                     = false
	DefaultCNIConfigDirectory                = "/etc/cni/net.d/"
	DefaultCNIConfigMapName                  = "cni-configuration"
	DefaultTetragonNamespace                 = "kube-system"
	DefaultTetragonLabelSelector             = "app.kubernetes.io/name=tetragon"
	DefaultTetragonOperatorLabelSelector     = "app.kubernetes.io/name=tetragon-operator"
	DefaultTetragonAgentContainerName        = "tetragon"
	DefaultTetragonConfigMapName             = "tetragon-config"
	DefaultTetragonBugtoolPrefix             = "tetragon-bugtool"
	DefaultTetragonCLICommand                = "tetra"
	DefaultTetragonPodInfo                   = "tetragonpodinfo-<ts>.yaml"
	DefaultTetragonTracingPolicy             = "tetragontracingpolicy-<ts>.yaml"
	DefaultTetragonTracingPolicyNamespaced   = "tetragontracingpolicynamespaced-<ts>.yaml"
)

Variables

View Source
var (
	// DefaultWorkerCount is initialized to the machine's available CPUs.
	DefaultWorkerCount = runtime.NumCPU()

	// DefaultCopyRetryLimit limits retries done while copying files from pods
	DefaultCopyRetryLimit = 100

	// DefaultCiliumNamespaces will be used to attempt to autodetect what namespace Cilium is installed in
	// unless otherwise specified.
	DefaultCiliumNamespaces = []string{"kube-system", "cilium"}

	// DefaultCiliumSPIRENamespaces will be used to attempt to autodetect what namespace Cilium SPIRE is installed in
	// unless otherwise specified.
	DefaultCiliumSPIRENamespaces = []string{"kube-system", "cilium", "cilium-spire"}
)

Functions

func AllPods added in v0.15.14

func AllPods(l *corev1.PodList) []*corev1.Pod

AllPods converts a PodList into a slice of Pod objects.

func FilterPods added in v0.9.2

func FilterPods(l *corev1.PodList, n []string) []*corev1.Pod

FilterPods filters a list of pods by node names.

func InitSysdumpFlags added in v0.16.0

func InitSysdumpFlags(cmd *cobra.Command, options *Options, optionPrefix string, hooks Hooks)

Types

type Collector

type Collector struct {
	Client  KubernetesClient
	Options Options
	Pool    *workerpool.WorkerPool

	// NodeList is a list of nodes to collect sysdump information from.
	NodeList []string
	// CiliumPods is a list of Cilium agent pods running on nodes in NodeList.
	CiliumPods []*corev1.Pod
	// CiliumConfigMap is a pointer to cilium-config ConfigMap.
	CiliumConfigMap *corev1.ConfigMap

	// FeatureSet is a map of enabled / disabled features based on the contents of cilium-config ConfigMap.
	FeatureSet features.Set
	// contains filtered or unexported fields
}

Collector knows how to collect information required to troubleshoot issues with Cilium and Hubble.

func NewCollector

func NewCollector(k KubernetesClient, o Options, startTime time.Time, cliVersion string) (*Collector, error)

NewCollector returns a new sysdump collector.

func (*Collector) AbsoluteTempPath added in v0.9.2

func (c *Collector) AbsoluteTempPath(f string) string

AbsoluteTempPath returns the absolute path where to store the specified filename temporarily.

func (*Collector) AddTasks added in v0.9.2

func (c *Collector) AddTasks(tasks []Task)

AddTasks adds extra tasks for the collector to execute. Must be called before Run().

func (*Collector) GatherResourceUnstructured added in v0.16.1

func (c *Collector) GatherResourceUnstructured(ctx context.Context, r schema.GroupVersionResource, fname string, keep ...string) error

GatherResourceUnstructured queries resources with the given GroupVersionResource, storing them in the file specified by fname. If keep is non-empty; then it will filter the items returned, keeping only those with names listed in keep. If keep is empty, it will not filter the resources returned.

func (*Collector) Run

func (c *Collector) Run() error

Run performs the actual sysdump collection.

func (*Collector) SubmitCniConflistSubtask added in v0.10.5

func (c *Collector) SubmitCniConflistSubtask(pods []*corev1.Pod, containerName string) error

func (*Collector) SubmitGopsSubtasks added in v0.9.2

func (c *Collector) SubmitGopsSubtasks(pods []*corev1.Pod, containerName string) error

SubmitGopsSubtasks submits tasks to collect gops statistics from pods.

func (*Collector) SubmitLogsTasks added in v0.9.2

func (c *Collector) SubmitLogsTasks(pods []*corev1.Pod, since time.Duration, limitBytes int64) error

SubmitLogsTasks submits tasks to collect kubernetes logs from pods.

func (*Collector) SubmitProfilingGopsSubtasks added in v0.13.1

func (c *Collector) SubmitProfilingGopsSubtasks(pods []*corev1.Pod, containerName string) error

SubmitProfilingGopsSubtasks submits tasks to collect profiling data from pods.

func (*Collector) SubmitTetragonBugtoolTasks added in v0.14.5

func (c *Collector) SubmitTetragonBugtoolTasks(pods []*corev1.Pod, tetragonAgentContainerName,
	tetragonBugtoolPrefix, tetragonCLICommand string) error

func (*Collector) SubmitTracingGopsSubtask added in v0.15.14

func (c *Collector) SubmitTracingGopsSubtask(pods []*corev1.Pod, containerName string) error

SubmitTracingGopsSubtask submits task to collect tracing data from pods.

func (*Collector) WriteBytes added in v0.9.2

func (c *Collector) WriteBytes(filename string, value []byte) error

WriteBytes writes a byte array to a file.

func (*Collector) WriteString added in v0.9.2

func (c *Collector) WriteString(filename string, value string) error

WriteString writes a string to a file.

func (*Collector) WriteTable added in v0.9.2

func (c *Collector) WriteTable(filename string, value *metav1.Table) error

WriteTable writes a kubernetes table to a file.

func (*Collector) WriteYAML added in v0.9.2

func (c *Collector) WriteYAML(filename string, o runtime.Object) error

WriteYAML writes a kubernetes object to a file as YAML.

type Hooks added in v0.16.0

type Hooks interface {
	AddSysdumpFlags(flags *pflag.FlagSet)
	AddSysdumpTasks(*Collector) error
}

Hooks to extend cilium-cli with additional sysdump tasks and related flags.

type KubernetesClient

type KubernetesClient interface {
	AutodetectFlavor(ctx context.Context) k8s.Flavor
	CopyFromPod(ctx context.Context, namespace, pod, container, fromFile, destFile string, retryLimit int) error
	CreateEphemeralContainer(ctx context.Context, pod *corev1.Pod, ec *corev1.EphemeralContainer) (*corev1.Pod, error)
	CreatePod(ctx context.Context, namespace string, pod *corev1.Pod, opts metav1.CreateOptions) (*corev1.Pod, error)
	GetPod(ctx context.Context, namespace, name string, opts metav1.GetOptions) (*corev1.Pod, error)
	GetRaw(ctx context.Context, path string) (string, error)
	DeletePod(ctx context.Context, namespace, name string, opts metav1.DeleteOptions) error
	ExecInPod(ctx context.Context, namespace, pod, container string, command []string) (bytes.Buffer, error)
	ExecInPodWithStderr(ctx context.Context, namespace, pod, container string, command []string) (bytes.Buffer, bytes.Buffer, error)
	GetConfigMap(ctx context.Context, namespace, name string, opts metav1.GetOptions) (*corev1.ConfigMap, error)
	GetNamespace(ctx context.Context, namespace string, options metav1.GetOptions) (*corev1.Namespace, error)
	GetDaemonSet(ctx context.Context, namespace, name string, opts metav1.GetOptions) (*appsv1.DaemonSet, error)
	GetStatefulSet(ctx context.Context, namespace, name string, opts metav1.GetOptions) (*appsv1.StatefulSet, error)
	GetDeployment(ctx context.Context, namespace, name string, opts metav1.GetOptions) (*appsv1.Deployment, error)
	GetCronJob(ctx context.Context, namespace, name string, opts metav1.GetOptions) (*batchv1.CronJob, error)
	GetLogs(ctx context.Context, namespace, name, container string, opts corev1.PodLogOptions) (string, error)
	GetPodsTable(ctx context.Context) (*metav1.Table, error)
	ProxyGet(ctx context.Context, namespace, name, url string) (string, error)
	GetSecret(ctx context.Context, namespace, name string, opts metav1.GetOptions) (*corev1.Secret, error)
	GetCiliumVersion(ctx context.Context, p *corev1.Pod) (*semver.Version, error)
	GetVersion(ctx context.Context) (string, error)
	GetHelmMetadata(ctx context.Context, releaseName string, namespace string) (string, error)
	GetHelmValues(ctx context.Context, releaseName string, namespace string) (string, error)
	ListCiliumBGPPeeringPolicies(ctx context.Context, opts metav1.ListOptions) (*ciliumv2alpha1.CiliumBGPPeeringPolicyList, error)
	ListCiliumCIDRGroups(ctx context.Context, opts metav1.ListOptions) (*ciliumv2alpha1.CiliumCIDRGroupList, error)
	ListCiliumClusterwideNetworkPolicies(ctx context.Context, opts metav1.ListOptions) (*ciliumv2.CiliumClusterwideNetworkPolicyList, error)
	ListCiliumClusterwideEnvoyConfigs(ctx context.Context, opts metav1.ListOptions) (*ciliumv2.CiliumClusterwideEnvoyConfigList, error)
	ListCiliumIdentities(ctx context.Context) (*ciliumv2.CiliumIdentityList, error)
	ListCiliumEgressGatewayPolicies(ctx context.Context, opts metav1.ListOptions) (*ciliumv2.CiliumEgressGatewayPolicyList, error)
	ListCiliumEndpoints(ctx context.Context, namespace string, options metav1.ListOptions) (*ciliumv2.CiliumEndpointList, error)
	ListCiliumEndpointSlices(ctx context.Context, options metav1.ListOptions) (*ciliumv2alpha1.CiliumEndpointSliceList, error)
	ListCiliumEnvoyConfigs(ctx context.Context, namespace string, options metav1.ListOptions) (*ciliumv2.CiliumEnvoyConfigList, error)
	ListCiliumExternalWorkloads(ctx context.Context, options metav1.ListOptions) (*ciliumv2.CiliumExternalWorkloadList, error)
	ListCiliumLoadBalancerIPPools(ctx context.Context, opts metav1.ListOptions) (*ciliumv2alpha1.CiliumLoadBalancerIPPoolList, error)
	ListCiliumLocalRedirectPolicies(ctx context.Context, namespace string, options metav1.ListOptions) (*ciliumv2.CiliumLocalRedirectPolicyList, error)
	ListCiliumNetworkPolicies(ctx context.Context, namespace string, opts metav1.ListOptions) (*ciliumv2.CiliumNetworkPolicyList, error)
	ListCiliumNodes(ctx context.Context) (*ciliumv2.CiliumNodeList, error)
	ListCiliumNodeConfigs(ctx context.Context, namespace string, opts metav1.ListOptions) (*ciliumv2alpha1.CiliumNodeConfigList, error)
	ListCiliumPodIPPools(ctx context.Context, opts metav1.ListOptions) (*ciliumv2alpha1.CiliumPodIPPoolList, error)
	ListDaemonSet(ctx context.Context, namespace string, o metav1.ListOptions) (*appsv1.DaemonSetList, error)
	ListEvents(ctx context.Context, o metav1.ListOptions) (*corev1.EventList, error)
	ListEndpoints(ctx context.Context, o metav1.ListOptions) (*corev1.EndpointsList, error)
	ListIngressClasses(ctx context.Context, o metav1.ListOptions) (*networkingv1.IngressClassList, error)
	ListIngresses(ctx context.Context, o metav1.ListOptions) (*networkingv1.IngressList, error)
	ListNamespaces(ctx context.Context, o metav1.ListOptions) (*corev1.NamespaceList, error)
	ListNetworkPolicies(ctx context.Context, o metav1.ListOptions) (*networkingv1.NetworkPolicyList, error)
	ListNodes(ctx context.Context, options metav1.ListOptions) (*corev1.NodeList, error)
	ListPods(ctx context.Context, namespace string, options metav1.ListOptions) (*corev1.PodList, error)
	ListServices(ctx context.Context, namespace string, options metav1.ListOptions) (*corev1.ServiceList, error)
	ListUnstructured(ctx context.Context, gvr schema.GroupVersionResource, namespace *string, o metav1.ListOptions) (*unstructured.UnstructuredList, error)
}

type Options

type Options struct {
	// The labels used to target Cilium pods.
	CiliumLabelSelector string
	// The namespace Cilium is running in.
	CiliumNamespace string
	// The namespace Cilium operator is running in.
	CiliumOperatorNamespace string
	// The namespace Cilium SPIRE installation is running in.
	CiliumSPIRENamespace string
	// The labels used to target Cilium daemon set. Usually, this label is same as CiliumLabelSelector.
	CiliumDaemonSetSelector string
	// The labels used to target Cilium Envoy pods.
	CiliumEnvoyLabelSelector string
	// The release name of Cilium Helm chart.
	CiliumHelmReleaseName string
	// The labels used to target Cilium Node Init daemon set. Usually, this label is same as CiliumNodeInitLabelSelector.
	CiliumNodeInitDaemonSetSelector string
	// The labels used to target Cilium Node Init pods.
	CiliumNodeInitLabelSelector string
	// The labels used to target Cilium operator pods.
	CiliumOperatorLabelSelector string
	// The labels used to target 'clustermesh-apiserver' pods.
	ClustermeshApiserverLabelSelector string
	// The labels used to target Cilium SPIRE server pods.
	CiliumSPIREServerLabelSelector string
	// The labels used to target Cilium SPIRE agent pods.
	CiliumSPIREAgentLabelSelector string
	// Whether to enable debug logging.
	Debug bool
	// Whether to enable scraping profiling data.
	Profiling bool
	// Whether to enable scraping tracing data.
	Tracing bool
	// The labels used to target additional pods
	ExtraLabelSelectors []string
	// The labels used to target Hubble pods.
	HubbleLabelSelector string
	// Number of Hubble flows to collect.
	HubbleFlowsCount int64
	// Timeout for collecting Hubble flows.
	HubbleFlowsTimeout time.Duration
	// The labels used to target Hubble Relay pods.
	HubbleRelayLabelSelector string
	// The labels used to target Hubble UI pods.
	HubbleUILabelSelector string
	// The labels used to target Hubble generate certs pods.
	HubbleGenerateCertsLabelSelector string
	// The amount of time to wait for the user to cancel the sysdump on a large cluster.
	LargeSysdumpAbortTimeout time.Duration
	// The threshold on the number of nodes present in the cluster that triggers a warning message.
	LargeSysdumpThreshold int
	// The limit on the number of bytes to retrieve when collecting logs
	LogsLimitBytes int64
	// How far back in time to go when collecting logs.
	LogsSinceTime time.Duration
	// Comma-separated list of node IPs or names to filter pods for which to collect gops and logs.
	NodeList string
	// The name of the resulting file (without extension)\n'<ts>' can be used as the placeholder for the timestamp.
	OutputFileName string
	// Whether to enable quick mode (i.e. skip collection of 'cilium-bugtool' output and logs).
	Quick bool
	// A 'RESTClientGetter' that can be used to create REST clients for the Kubernetes API.
	// Required at least for getting the proper output of 'kubectl get pod -o wide' without actually using 'kubectl'.
	RESTClientGetter genericclioptions.RESTClientGetter
	// The number of workers to use.
	WorkerCount int
	// The writer used for logging.
	Writer io.Writer
	// Flags to pass to cilium-bugtool command
	CiliumBugtoolFlags []string
	// Whether to automatically detect the gops agent PID
	DetectGopsPID bool
	// Directory where CNI configs are located
	CNIConfigDirectory string
	// The name of the CNI config map
	CNIConfigMapName string
	// The labels used to target Tetragon pods.
	TetragonLabelSelector string
	// The labels used to target Tetragon oeprator pods.
	TetragonOperatorLabelSelector string
	// The namespace Namespace is running in.
	TetragonNamespace string
	// Retry limit for copying files from pods
	CopyRetryLimit int
}

Options groups together the set of options required to collect a sysdump.

type Task added in v0.9.2

type Task struct {
	// MUST be set to true if the task submits additional tasks to the worker pool.
	CreatesSubtasks bool
	// The description of the task.
	Description string
	// Whether this task runs when running in quick mode.
	Quick bool
	// The task itself.
	Task func(context.Context) error
}

Task defines a task for the sysdump collector to execute.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL