Documentation
¶
Overview ¶
Package telemetry provides OpenTelemetry integration with rich attributes for agent observability.
SPDX-License-Identifier: Apache-2.0 Package telemetry provides observability for Kairos error handling. See docs/ERROR_HANDLING.md for metric integration patterns.
SPDX-License-Identifier: Apache-2.0 Package telemetry configures OpenTelemetry exporters and propagators. See docs/ERROR_HANDLING.md for error handling integration.
Index ¶
- Constants
- func AgentAttributes(agentID, role, model, runID string, iteration, maxIter int) []attribute.KeyValue
- func ConfigureSlog(output io.Writer, level, format string) *slog.Logger
- func GuardrailInputAttributes(blocked bool, guardrailID string, confidence float64) []attribute.KeyValue
- func GuardrailOutputAttributes(modified bool, redactions int) []attribute.KeyValue
- func LLMAttributes(model, provider string, msgCount int, toolCallCount int) []attribute.KeyValue
- func LLMUsageAttributes(inputTokens, outputTokens int, durationMs float64, finishReason string) []attribute.KeyValue
- func MemoryAttributes(enabled bool, memType string, retrieved int, stored bool) []attribute.KeyValue
- func PlannerAttributes(planID, runID string) []attribute.KeyValue
- func PlannerNodeAttributes(nodeID, nodeType, status, planID, runID string) []attribute.KeyValue
- func PlannerNodeIO(input, output string, maxLen int) []attribute.KeyValue
- func PolicyAttributes(evaluated, allowed bool, reason string) []attribute.KeyValue
- func RecordError(span trace.Span, err error)
- func SessionAttributes(sessionID string, enabled bool, msgCount int, strategy string) []attribute.KeyValue
- func SkillAttributes(name, action, resource string) []attribute.KeyValue
- func TaskAttributes(taskID, goal, status string) []attribute.KeyValue
- func ToolCallArgsResult(args, result string, maxLen int) []attribute.KeyValue
- func ToolCallAttributes(name, callID, source string, durationMs float64, success bool) []attribute.KeyValue
- func ToolsetAttributes(total, local, mcp, skill int, names []string) []attribute.KeyValue
- type Config
- type ErrorMetrics
- func (em *ErrorMetrics) RecordCircuitBreakerState(ctx context.Context, component string, state int64)
- func (em *ErrorMetrics) RecordErrorMetric(ctx context.Context, err error, component string)
- func (em *ErrorMetrics) RecordErrorRate(ctx context.Context, component string, ratePerMinute float64)
- func (em *ErrorMetrics) RecordHealthStatus(ctx context.Context, component string, status int64)
- func (em *ErrorMetrics) RecordRecovery(ctx context.Context, errorCode errors.ErrorCode)
- type ShutdownFunc
Constants ¶
const ( // Agent attributes AttrAgentID = "kairos.agent.id" AttrAgentRole = "kairos.agent.role" AttrAgentModel = "kairos.agent.model" AttrAgentRunID = "kairos.agent.run_id" AttrAgentIteration = "kairos.agent.iteration" AttrAgentMaxIter = "kairos.agent.max_iterations" // Session/Conversation attributes AttrSessionID = "kairos.session.id" AttrConversationEnabled = "kairos.conversation.enabled" AttrConversationMsgCount = "kairos.conversation.message_count" AttrConversationStrategy = "kairos.conversation.truncation_strategy" // Memory attributes AttrMemoryEnabled = "kairos.memory.enabled" AttrMemoryType = "kairos.memory.type" AttrMemoryRetrieved = "kairos.memory.retrieved_count" AttrMemoryStored = "kairos.memory.stored" // Tool attributes AttrToolName = "kairos.tool.name" AttrToolCallID = "kairos.tool.call_id" AttrToolArgs = "kairos.tool.arguments" AttrToolResult = "kairos.tool.result" AttrToolDurationMs = "kairos.tool.duration_ms" AttrToolSuccess = "kairos.tool.success" AttrToolSource = "kairos.tool.source" // "local", "mcp", "skill" // Tool set attributes AttrToolsCount = "kairos.tools.count" AttrToolsNames = "kairos.tools.names" AttrToolsMCPCount = "kairos.tools.mcp_count" AttrToolsLocalCount = "kairos.tools.local_count" AttrToolsSkillCount = "kairos.tools.skill_count" // LLM attributes (extending standard gen_ai conventions) AttrLLMModel = "gen_ai.request.model" AttrLLMProvider = "gen_ai.system" AttrLLMMessages = "gen_ai.request.messages" AttrLLMTokensInput = "gen_ai.usage.input_tokens" AttrLLMTokensOutput = "gen_ai.usage.output_tokens" AttrLLMTokensTotal = "gen_ai.usage.total_tokens" AttrLLMDurationMs = "gen_ai.duration_ms" AttrLLMToolCalls = "gen_ai.tool_calls" AttrLLMFinishReason = "gen_ai.finish_reason" // Skill attributes AttrSkillName = "kairos.skill.name" AttrSkillAction = "kairos.skill.action" AttrSkillResource = "kairos.skill.resource" // Governance attributes AttrPolicyEvaluated = "kairos.policy.evaluated" AttrPolicyAllowed = "kairos.policy.allowed" AttrPolicyReason = "kairos.policy.reason" // Task attributes AttrTaskID = "kairos.task.id" AttrTaskGoal = "kairos.task.goal" AttrTaskStatus = "kairos.task.status" // Planner attributes AttrPlannerID = "kairos.planner.id" AttrPlannerRunID = "kairos.planner.run_id" AttrPlannerNodeID = "kairos.planner.node.id" AttrPlannerNodeType = "kairos.planner.node.type" AttrPlannerNodeStatus = "kairos.planner.node.status" AttrPlannerNodeInput = "kairos.planner.node.input" AttrPlannerNodeOutput = "kairos.planner.node.output" // Guardrails attributes AttrGuardrailsInputBlocked = "kairos.guardrails.input.blocked" AttrGuardrailsInputID = "kairos.guardrails.input.id" AttrGuardrailsInputConfidence = "kairos.guardrails.input.confidence" AttrGuardrailsOutputModified = "kairos.guardrails.output.modified" AttrGuardrailsOutputRedactions = "kairos.guardrails.output.redactions" // Event attributes AttrEventType = "kairos.event.type" AttrEventPayload = "kairos.event.payload" )
Semantic conventions for Kairos agent telemetry. These follow OpenTelemetry naming conventions where applicable.
Variables ¶
This section is empty.
Functions ¶
func AgentAttributes ¶
func AgentAttributes(agentID, role, model, runID string, iteration, maxIter int) []attribute.KeyValue
AgentAttributes returns common attributes for agent spans.
func ConfigureSlog ¶
ConfigureSlog sets the global slog logger with trace-aware attributes.
func GuardrailInputAttributes ¶
func GuardrailInputAttributes(blocked bool, guardrailID string, confidence float64) []attribute.KeyValue
GuardrailInputAttributes returns attributes for guardrails input checks.
func GuardrailOutputAttributes ¶
GuardrailOutputAttributes returns attributes for guardrails output filtering.
func LLMAttributes ¶
LLMAttributes returns attributes for LLM call spans.
func LLMUsageAttributes ¶
func LLMUsageAttributes(inputTokens, outputTokens int, durationMs float64, finishReason string) []attribute.KeyValue
LLMUsageAttributes returns token usage attributes.
func MemoryAttributes ¶
func MemoryAttributes(enabled bool, memType string, retrieved int, stored bool) []attribute.KeyValue
MemoryAttributes returns attributes for memory operations.
func PlannerAttributes ¶
PlannerAttributes returns attributes for planner executions.
func PlannerNodeAttributes ¶
PlannerNodeAttributes returns attributes for planner node spans.
func PlannerNodeIO ¶
PlannerNodeIO returns truncated input/output attributes for planner nodes.
func PolicyAttributes ¶
PolicyAttributes returns attributes for policy evaluation.
func RecordError ¶
RecordError records a Kairos error with full context to the span. This integrates error handling with OTEL observability.
func SessionAttributes ¶
func SessionAttributes(sessionID string, enabled bool, msgCount int, strategy string) []attribute.KeyValue
SessionAttributes returns attributes for session/conversation tracking.
func SkillAttributes ¶
SkillAttributes returns attributes for skill activation spans.
func TaskAttributes ¶
TaskAttributes returns attributes for task tracking.
func ToolCallArgsResult ¶
ToolCallArgsResult returns attributes with tool arguments and result (truncated for safety).
Types ¶
type Config ¶
type Config struct {
Exporter string
OTLPEndpoint string
OTLPInsecure bool
OTLPTimeoutSeconds int
OTLPHeaders map[string]string
OTLPUser string
OTLPToken string
}
Config controls telemetry exporter behavior and OTLP connection settings.
type ErrorMetrics ¶
type ErrorMetrics struct {
// contains filtered or unexported fields
}
ErrorMetrics tracks error rates, types, and recovery patterns for production monitoring.
func NewErrorMetrics ¶
func NewErrorMetrics(ctx context.Context) (*ErrorMetrics, error)
NewErrorMetrics creates a new error metrics tracker with OTEL meters.
func (*ErrorMetrics) RecordCircuitBreakerState ¶
func (em *ErrorMetrics) RecordCircuitBreakerState(ctx context.Context, component string, state int64)
RecordCircuitBreakerState records the circuit breaker state (0=open, 1=half-open, 2=closed).
func (*ErrorMetrics) RecordErrorMetric ¶
func (em *ErrorMetrics) RecordErrorMetric(ctx context.Context, err error, component string)
RecordErrorMetric increments the error counter for the given error code and component. This is called by error handling code to track error rates.
func (*ErrorMetrics) RecordErrorRate ¶
func (em *ErrorMetrics) RecordErrorRate(ctx context.Context, component string, ratePerMinute float64)
RecordErrorRate records the current error rate for a component (errors per minute).
func (*ErrorMetrics) RecordHealthStatus ¶
func (em *ErrorMetrics) RecordHealthStatus(ctx context.Context, component string, status int64)
RecordHealthStatus records the health status of a component (0=unhealthy, 1=degraded, 2=healthy).
func (*ErrorMetrics) RecordRecovery ¶
func (em *ErrorMetrics) RecordRecovery(ctx context.Context, errorCode errors.ErrorCode)
RecordRecovery increments the recovery counter for the given error code. This is called when an error is successfully handled (retry succeeded, fallback used, etc).
type ShutdownFunc ¶
ShutdownFunc releases telemetry resources created by Init or InitWithConfig.
func Init ¶
func Init(serviceName, version string) (ShutdownFunc, error)
Init initializes OpenTelemetry with stdout exporters using default settings.
func InitWithConfig ¶
func InitWithConfig(serviceName, version string, cfg Config) (ShutdownFunc, error)
InitWithConfig initializes OpenTelemetry with the specified exporter config.